summaryrefslogtreecommitdiffstats
path: root/src/librbd/api
diff options
context:
space:
mode:
Diffstat (limited to 'src/librbd/api')
-rw-r--r--src/librbd/api/Config.cc233
-rw-r--r--src/librbd/api/Config.h37
-rw-r--r--src/librbd/api/DiffIterate.cc378
-rw-r--r--src/librbd/api/DiffIterate.h66
-rw-r--r--src/librbd/api/Group.cc1287
-rw-r--r--src/librbd/api/Group.h60
-rw-r--r--src/librbd/api/Image.cc1015
-rw-r--r--src/librbd/api/Image.h85
-rw-r--r--src/librbd/api/Io.cc555
-rw-r--r--src/librbd/api/Io.h65
-rw-r--r--src/librbd/api/Migration.cc2126
-rw-r--r--src/librbd/api/Migration.h113
-rw-r--r--src/librbd/api/Mirror.cc2104
-rw-r--r--src/librbd/api/Mirror.h126
-rw-r--r--src/librbd/api/Namespace.cc235
-rw-r--r--src/librbd/api/Namespace.h33
-rw-r--r--src/librbd/api/Pool.cc375
-rw-r--r--src/librbd/api/Pool.h38
-rw-r--r--src/librbd/api/PoolMetadata.cc156
-rw-r--r--src/librbd/api/PoolMetadata.h37
-rw-r--r--src/librbd/api/Snapshot.cc444
-rw-r--r--src/librbd/api/Snapshot.h67
-rw-r--r--src/librbd/api/Trash.cc759
-rw-r--r--src/librbd/api/Trash.h53
-rw-r--r--src/librbd/api/Utils.cc102
-rw-r--r--src/librbd/api/Utils.h28
26 files changed, 10577 insertions, 0 deletions
diff --git a/src/librbd/api/Config.cc b/src/librbd/api/Config.cc
new file mode 100644
index 000000000..8148607e3
--- /dev/null
+++ b/src/librbd/api/Config.cc
@@ -0,0 +1,233 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/api/Config.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "common/Cond.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/Utils.h"
+#include "librbd/api/PoolMetadata.h"
+#include "librbd/image/GetMetadataRequest.h"
+#include <algorithm>
+#include <boost/algorithm/string/predicate.hpp>
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::Config: " << __func__ << ": "
+
+namespace librbd {
+namespace api {
+
+namespace {
+
+const uint32_t MAX_KEYS = 64;
+
+typedef std::map<std::string_view, std::pair<std::string, config_source_t>> Parent;
+
+static std::set<std::string_view> EXCLUDE_OPTIONS {
+ "rbd_auto_exclusive_lock_until_manual_request",
+ "rbd_default_format",
+ "rbd_default_pool",
+ "rbd_discard_on_zeroed_write_same",
+ "rbd_op_thread_timeout",
+ "rbd_op_threads",
+ "rbd_tracing",
+ "rbd_validate_names",
+ "rbd_validate_pool",
+ "rbd_mirror_pool_replayers_refresh_interval",
+ "rbd_config_pool_override_update_timestamp"
+ };
+static std::set<std::string_view> EXCLUDE_IMAGE_OPTIONS {
+ "rbd_default_clone_format",
+ "rbd_default_data_pool",
+ "rbd_default_features",
+ "rbd_default_format",
+ "rbd_default_order",
+ "rbd_default_stripe_count",
+ "rbd_default_stripe_unit",
+ "rbd_journal_order",
+ "rbd_journal_pool",
+ "rbd_journal_splay_width"
+ };
+
+struct Options : Parent {
+ librados::IoCtx m_io_ctx;
+
+ Options(librados::IoCtx& io_ctx, bool image_apply_only_options) {
+ m_io_ctx.dup(io_ctx);
+ m_io_ctx.set_namespace("");
+
+ CephContext *cct = reinterpret_cast<CephContext *>(m_io_ctx.cct());
+
+ const std::string rbd_key_prefix("rbd_");
+ const std::string rbd_mirror_key_prefix("rbd_mirror_");
+ auto& schema = cct->_conf.get_schema();
+ for (auto& pair : schema) {
+ if (!boost::starts_with(pair.first, rbd_key_prefix)) {
+ continue;
+ } else if (EXCLUDE_OPTIONS.count(pair.first) != 0) {
+ continue;
+ } else if (image_apply_only_options &&
+ EXCLUDE_IMAGE_OPTIONS.count(pair.first) != 0) {
+ continue;
+ } else if (image_apply_only_options &&
+ boost::starts_with(pair.first, rbd_mirror_key_prefix)) {
+ continue;
+ }
+
+ insert({pair.first, {}});
+ }
+ }
+
+ int init() {
+ CephContext *cct = (CephContext *)m_io_ctx.cct();
+
+ for (auto& [k,v] : *this) {
+ int r = cct->_conf.get_val(k, &v.first);
+ ceph_assert(r == 0);
+ v.second = RBD_CONFIG_SOURCE_CONFIG;
+ }
+
+ std::string last_key = ImageCtx::METADATA_CONF_PREFIX;
+ bool more_results = true;
+
+ while (more_results) {
+ std::map<std::string, bufferlist> pairs;
+
+ int r = librbd::api::PoolMetadata<>::list(m_io_ctx, last_key, MAX_KEYS,
+ &pairs);
+ if (r < 0) {
+ return r;
+ }
+
+ if (pairs.empty()) {
+ break;
+ }
+
+ more_results = (pairs.size() == MAX_KEYS);
+ last_key = pairs.rbegin()->first;
+
+ for (auto kv : pairs) {
+ std::string key;
+ if (!util::is_metadata_config_override(kv.first, &key)) {
+ more_results = false;
+ break;
+ }
+ auto it = find(key);
+ if (it != end()) {
+ it->second = {{kv.second.c_str(), kv.second.length()},
+ RBD_CONFIG_SOURCE_POOL};
+ }
+ }
+ }
+ return 0;
+ }
+};
+
+} // anonymous namespace
+
+template <typename I>
+bool Config<I>::is_option_name(librados::IoCtx& io_ctx,
+ const std::string &name) {
+ Options opts(io_ctx, false);
+
+ return (opts.find(name) != opts.end());
+}
+
+template <typename I>
+int Config<I>::list(librados::IoCtx& io_ctx,
+ std::vector<config_option_t> *options) {
+ Options opts(io_ctx, false);
+
+ int r = opts.init();
+ if (r < 0) {
+ return r;
+ }
+
+ for (auto& [k,v] : opts) {
+ options->push_back({std::string{k}, v.first, v.second});
+ }
+
+ return 0;
+}
+
+template <typename I>
+bool Config<I>::is_option_name(I *image_ctx, const std::string &name) {
+ Options opts(image_ctx->md_ctx, true);
+
+ return (opts.find(name) != opts.end());
+}
+
+template <typename I>
+int Config<I>::list(I *image_ctx, std::vector<config_option_t> *options) {
+ CephContext *cct = image_ctx->cct;
+ Options opts(image_ctx->md_ctx, true);
+
+ int r = opts.init();
+ if (r < 0) {
+ return r;
+ }
+
+ std::map<std::string, bufferlist> pairs;
+ C_SaferCond ctx;
+ auto req = image::GetMetadataRequest<I>::create(
+ image_ctx->md_ctx, image_ctx->header_oid, true,
+ ImageCtx::METADATA_CONF_PREFIX, ImageCtx::METADATA_CONF_PREFIX, 0U, &pairs,
+ &ctx);
+ req->send();
+
+ r = ctx.wait();
+ if (r < 0) {
+ lderr(cct) << "failed reading image metadata: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ for (auto kv : pairs) {
+ std::string key;
+ if (!util::is_metadata_config_override(kv.first, &key)) {
+ break;
+ }
+ auto it = opts.find(key);
+ if (it != opts.end()) {
+ it->second = {{kv.second.c_str(), kv.second.length()},
+ RBD_CONFIG_SOURCE_IMAGE};
+ }
+ }
+
+ for (auto& [k,v] : opts) {
+ options->push_back({std::string{k}, v.first, v.second});
+ }
+
+ return 0;
+}
+
+template <typename I>
+void Config<I>::apply_pool_overrides(librados::IoCtx& io_ctx,
+ ConfigProxy* config) {
+ CephContext *cct = reinterpret_cast<CephContext *>(io_ctx.cct());
+
+ Options opts(io_ctx, false);
+ int r = opts.init();
+ if (r < 0) {
+ lderr(cct) << "failed to read pool config overrides: " << cpp_strerror(r)
+ << dendl;
+ return;
+ }
+
+ for (auto& [k,v] : opts) {
+ if (v.second == RBD_CONFIG_SOURCE_POOL) {
+ r = config->set_val(k, v.first);
+ if (r < 0) {
+ lderr(cct) << "failed to override pool config " << k << "="
+ << v.first << ": " << cpp_strerror(r) << dendl;
+ }
+ }
+ }
+}
+
+} // namespace api
+} // namespace librbd
+
+template class librbd::api::Config<librbd::ImageCtx>;
diff --git a/src/librbd/api/Config.h b/src/librbd/api/Config.h
new file mode 100644
index 000000000..83225d287
--- /dev/null
+++ b/src/librbd/api/Config.h
@@ -0,0 +1,37 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_API_CONFIG_H
+#define CEPH_LIBRBD_API_CONFIG_H
+
+#include "common/config_fwd.h"
+#include "include/common_fwd.h"
+#include "include/rbd/librbd.hpp"
+#include "include/rados/librados_fwd.hpp"
+
+namespace librbd {
+
+class ImageCtx;
+
+namespace api {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class Config {
+public:
+ static bool is_option_name(librados::IoCtx& io_ctx, const std::string &name);
+ static int list(librados::IoCtx& io_ctx,
+ std::vector<config_option_t> *options);
+
+ static bool is_option_name(ImageCtxT *image_ctx, const std::string &name);
+ static int list(ImageCtxT *image_ctx, std::vector<config_option_t> *options);
+
+ static void apply_pool_overrides(librados::IoCtx& io_ctx,
+ ConfigProxy* config);
+};
+
+} // namespace api
+} // namespace librbd
+
+extern template class librbd::api::Config<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_API_CONFIG_H
diff --git a/src/librbd/api/DiffIterate.cc b/src/librbd/api/DiffIterate.cc
new file mode 100644
index 000000000..b400b5d5a
--- /dev/null
+++ b/src/librbd/api/DiffIterate.cc
@@ -0,0 +1,378 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/api/DiffIterate.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/ObjectMap.h"
+#include "librbd/Utils.h"
+#include "librbd/internal.h"
+#include "librbd/io/AioCompletion.h"
+#include "librbd/io/ImageDispatchSpec.h"
+#include "librbd/object_map/DiffRequest.h"
+#include "include/rados/librados.hpp"
+#include "include/interval_set.h"
+#include "common/errno.h"
+#include "common/Cond.h"
+#include "common/Throttle.h"
+#include "osdc/Striper.h"
+#include <boost/tuple/tuple.hpp>
+#include <list>
+#include <map>
+#include <vector>
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::DiffIterate: "
+
+namespace librbd {
+namespace api {
+
+namespace {
+
+struct DiffContext {
+ DiffIterate<>::Callback callback;
+ void *callback_arg;
+ bool whole_object;
+ bool include_parent;
+ uint64_t from_snap_id;
+ uint64_t end_snap_id;
+ OrderedThrottle throttle;
+
+ template <typename I>
+ DiffContext(I &image_ctx, DiffIterate<>::Callback callback,
+ void *callback_arg, bool _whole_object, bool _include_parent,
+ uint64_t _from_snap_id, uint64_t _end_snap_id)
+ : callback(callback), callback_arg(callback_arg),
+ whole_object(_whole_object), include_parent(_include_parent),
+ from_snap_id(_from_snap_id), end_snap_id(_end_snap_id),
+ throttle(image_ctx.config.template get_val<uint64_t>("rbd_concurrent_management_ops"), true) {
+ }
+};
+
+template <typename I>
+class C_DiffObject : public Context {
+public:
+ C_DiffObject(I &image_ctx, DiffContext &diff_context, uint64_t image_offset,
+ uint64_t image_length)
+ : m_image_ctx(image_ctx), m_cct(image_ctx.cct),
+ m_diff_context(diff_context), m_image_offset(image_offset),
+ m_image_length(image_length) {
+ }
+
+ void send() {
+ Context* ctx = m_diff_context.throttle.start_op(this);
+ auto aio_comp = io::AioCompletion::create_and_start(
+ ctx, util::get_image_ctx(&m_image_ctx), io::AIO_TYPE_GENERIC);
+ int list_snaps_flags = 0;
+ if (!m_diff_context.include_parent || m_diff_context.from_snap_id != 0) {
+ list_snaps_flags |= io::LIST_SNAPS_FLAG_DISABLE_LIST_FROM_PARENT;
+ }
+ if (m_diff_context.whole_object) {
+ list_snaps_flags |= io::LIST_SNAPS_FLAG_WHOLE_OBJECT;
+ }
+ auto req = io::ImageDispatchSpec::create_list_snaps(
+ m_image_ctx, io::IMAGE_DISPATCH_LAYER_INTERNAL_START,
+ aio_comp, {{m_image_offset, m_image_length}}, io::ImageArea::DATA,
+ {m_diff_context.from_snap_id, m_diff_context.end_snap_id},
+ list_snaps_flags, &m_snapshot_delta, {});
+ req->send();
+ }
+
+protected:
+ typedef boost::tuple<uint64_t, size_t, bool> Diff;
+ typedef std::list<Diff> Diffs;
+
+ void finish(int r) override {
+ CephContext *cct = m_cct;
+
+ if (r < 0) {
+ ldout(cct, 20) << "list_snaps failed: " << m_image_offset << "~"
+ << m_image_length << ": " << cpp_strerror(r) << dendl;
+ }
+
+ Diffs diffs;
+ ldout(cct, 20) << "image extent " << m_image_offset << "~"
+ << m_image_length << ": list_snaps complete" << dendl;
+
+ compute_diffs(&diffs);
+ for (Diffs::const_iterator d = diffs.begin(); d != diffs.end(); ++d) {
+ r = m_diff_context.callback(d->get<0>(), d->get<1>(), d->get<2>(),
+ m_diff_context.callback_arg);
+ if (r < 0) {
+ break;
+ }
+ }
+ m_diff_context.throttle.end_op(r);
+ }
+
+private:
+ I& m_image_ctx;
+ CephContext *m_cct;
+ DiffContext &m_diff_context;
+ uint64_t m_image_offset;
+ uint64_t m_image_length;
+
+ io::SnapshotDelta m_snapshot_delta;
+
+ void compute_diffs(Diffs *diffs) {
+ CephContext *cct = m_cct;
+
+ // merge per-snapshot deltas into an aggregate
+ io::SparseExtents aggregate_snapshot_extents;
+ for (auto& [key, snapshot_extents] : m_snapshot_delta) {
+ for (auto& snapshot_extent : snapshot_extents) {
+ auto state = snapshot_extent.get_val().state;
+
+ // ignore DNE object (and parent)
+ if ((state == io::SPARSE_EXTENT_STATE_DNE) ||
+ (key == io::INITIAL_WRITE_READ_SNAP_IDS &&
+ state == io::SPARSE_EXTENT_STATE_ZEROED)) {
+ continue;
+ }
+
+ aggregate_snapshot_extents.insert(
+ snapshot_extent.get_off(), snapshot_extent.get_len(),
+ {state, snapshot_extent.get_len()});
+ }
+ }
+
+ // build delta callback set
+ for (auto& snapshot_extent : aggregate_snapshot_extents) {
+ ldout(cct, 20) << "off=" << snapshot_extent.get_off() << ", "
+ << "len=" << snapshot_extent.get_len() << ", "
+ << "state=" << snapshot_extent.get_val().state << dendl;
+ diffs->emplace_back(
+ snapshot_extent.get_off(), snapshot_extent.get_len(),
+ snapshot_extent.get_val().state == io::SPARSE_EXTENT_STATE_DATA);
+ }
+ }
+};
+
+int simple_diff_cb(uint64_t off, size_t len, int exists, void *arg) {
+ // it's possible for a discard to create a hole in the parent image -- ignore
+ if (exists) {
+ interval_set<uint64_t> *diff = static_cast<interval_set<uint64_t> *>(arg);
+ diff->insert(off, len);
+ }
+ return 0;
+}
+
+} // anonymous namespace
+
+template <typename I>
+int DiffIterate<I>::diff_iterate(I *ictx,
+ const cls::rbd::SnapshotNamespace& from_snap_namespace,
+ const char *fromsnapname,
+ uint64_t off, uint64_t len,
+ bool include_parent, bool whole_object,
+ int (*cb)(uint64_t, size_t, int, void *),
+ void *arg)
+{
+ ldout(ictx->cct, 20) << "diff_iterate " << ictx << " off = " << off
+ << " len = " << len << dendl;
+
+ if (!ictx->data_ctx.is_valid()) {
+ return -ENODEV;
+ }
+
+ // ensure previous writes are visible to listsnaps
+ C_SaferCond flush_ctx;
+ {
+ std::shared_lock owner_locker{ictx->owner_lock};
+ auto aio_comp = io::AioCompletion::create_and_start(&flush_ctx, ictx,
+ io::AIO_TYPE_FLUSH);
+ auto req = io::ImageDispatchSpec::create_flush(
+ *ictx, io::IMAGE_DISPATCH_LAYER_INTERNAL_START,
+ aio_comp, io::FLUSH_SOURCE_INTERNAL, {});
+ req->send();
+ }
+ int r = flush_ctx.wait();
+ if (r < 0) {
+ return r;
+ }
+
+ r = ictx->state->refresh_if_required();
+ if (r < 0) {
+ return r;
+ }
+
+ ictx->image_lock.lock_shared();
+ r = clip_io(ictx, off, &len, io::ImageArea::DATA);
+ ictx->image_lock.unlock_shared();
+ if (r < 0) {
+ return r;
+ }
+
+ DiffIterate command(*ictx, from_snap_namespace, fromsnapname, off, len,
+ include_parent, whole_object, cb, arg);
+ r = command.execute();
+ return r;
+}
+
+template <typename I>
+int DiffIterate<I>::execute() {
+ CephContext* cct = m_image_ctx.cct;
+
+ ceph_assert(m_image_ctx.data_ctx.is_valid());
+
+ librados::snap_t from_snap_id = 0;
+ librados::snap_t end_snap_id;
+ uint64_t from_size = 0;
+ uint64_t end_size;
+ {
+ std::shared_lock image_locker{m_image_ctx.image_lock};
+ if (m_from_snap_name) {
+ from_snap_id = m_image_ctx.get_snap_id(m_from_snap_namespace,
+ m_from_snap_name);
+ from_size = m_image_ctx.get_image_size(from_snap_id);
+ }
+ end_snap_id = m_image_ctx.snap_id;
+ end_size = m_image_ctx.get_image_size(end_snap_id);
+ }
+
+ if (from_snap_id == CEPH_NOSNAP) {
+ return -ENOENT;
+ }
+ if (from_snap_id == end_snap_id) {
+ // no diff.
+ return 0;
+ }
+ if (from_snap_id >= end_snap_id) {
+ return -EINVAL;
+ }
+
+ int r;
+ bool fast_diff_enabled = false;
+ BitVector<2> object_diff_state;
+ interval_set<uint64_t> parent_diff;
+ if (m_whole_object) {
+ C_SaferCond ctx;
+ auto req = object_map::DiffRequest<I>::create(&m_image_ctx, from_snap_id,
+ end_snap_id,
+ &object_diff_state, &ctx);
+ req->send();
+
+ r = ctx.wait();
+ if (r < 0) {
+ ldout(cct, 5) << "fast diff disabled" << dendl;
+ } else {
+ ldout(cct, 5) << "fast diff enabled" << dendl;
+ fast_diff_enabled = true;
+
+ // check parent overlap only if we are comparing to the beginning of time
+ if (m_include_parent && from_snap_id == 0) {
+ std::shared_lock image_locker{m_image_ctx.image_lock};
+ uint64_t raw_overlap = 0;
+ m_image_ctx.get_parent_overlap(m_image_ctx.snap_id, &raw_overlap);
+ auto overlap = m_image_ctx.reduce_parent_overlap(raw_overlap, false);
+ if (overlap.first > 0 && overlap.second == io::ImageArea::DATA) {
+ ldout(cct, 10) << " first getting parent diff" << dendl;
+ DiffIterate diff_parent(*m_image_ctx.parent, {}, nullptr, 0,
+ overlap.first, true, true, &simple_diff_cb,
+ &parent_diff);
+ r = diff_parent.execute();
+ if (r < 0) {
+ return r;
+ }
+ }
+ }
+ }
+ }
+
+ ldout(cct, 5) << "diff_iterate from " << from_snap_id << " to "
+ << end_snap_id << " size from " << from_size
+ << " to " << end_size << dendl;
+ DiffContext diff_context(m_image_ctx, m_callback, m_callback_arg,
+ m_whole_object, m_include_parent, from_snap_id,
+ end_snap_id);
+
+ uint64_t period = m_image_ctx.get_stripe_period();
+ uint64_t off = m_offset;
+ uint64_t left = m_length;
+
+ while (left > 0) {
+ uint64_t period_off = off - (off % period);
+ uint64_t read_len = std::min(period_off + period - off, left);
+
+ if (fast_diff_enabled) {
+ // map to extents
+ std::map<object_t,std::vector<ObjectExtent> > object_extents;
+ Striper::file_to_extents(cct, m_image_ctx.format_string,
+ &m_image_ctx.layout, off, read_len, 0,
+ object_extents, 0);
+
+ // get diff info for each object and merge adjacent stripe units
+ // into an aggregate (this also sorts them)
+ io::SparseExtents aggregate_sparse_extents;
+ for (auto& [object, extents] : object_extents) {
+ const uint64_t object_no = extents.front().objectno;
+ uint8_t diff_state = object_diff_state[object_no];
+ ldout(cct, 20) << "object " << object << ": diff_state="
+ << (int)diff_state << dendl;
+
+ if (diff_state == object_map::DIFF_STATE_HOLE &&
+ from_snap_id == 0 && !parent_diff.empty()) {
+ // no data in child object -- report parent diff instead
+ for (auto& oe : extents) {
+ for (auto& be : oe.buffer_extents) {
+ interval_set<uint64_t> o;
+ o.insert(off + be.first, be.second);
+ o.intersection_of(parent_diff);
+ ldout(cct, 20) << " reporting parent overlap " << o << dendl;
+ for (auto e = o.begin(); e != o.end(); ++e) {
+ aggregate_sparse_extents.insert(e.get_start(), e.get_len(),
+ {io::SPARSE_EXTENT_STATE_DATA,
+ e.get_len()});
+ }
+ }
+ }
+ } else if (diff_state == object_map::DIFF_STATE_HOLE_UPDATED ||
+ diff_state == object_map::DIFF_STATE_DATA_UPDATED) {
+ auto state = (diff_state == object_map::DIFF_STATE_HOLE_UPDATED ?
+ io::SPARSE_EXTENT_STATE_ZEROED : io::SPARSE_EXTENT_STATE_DATA);
+ for (auto& oe : extents) {
+ for (auto& be : oe.buffer_extents) {
+ aggregate_sparse_extents.insert(off + be.first, be.second,
+ {state, be.second});
+ }
+ }
+ }
+ }
+
+ for (const auto& se : aggregate_sparse_extents) {
+ ldout(cct, 20) << "off=" << se.get_off() << ", len=" << se.get_len()
+ << ", state=" << se.get_val().state << dendl;
+ r = m_callback(se.get_off(), se.get_len(),
+ se.get_val().state == io::SPARSE_EXTENT_STATE_DATA,
+ m_callback_arg);
+ if (r < 0) {
+ return r;
+ }
+ }
+ } else {
+ auto diff_object = new C_DiffObject<I>(m_image_ctx, diff_context, off,
+ read_len);
+ diff_object->send();
+
+ if (diff_context.throttle.pending_error()) {
+ r = diff_context.throttle.wait_for_ret();
+ return r;
+ }
+ }
+
+ left -= read_len;
+ off += read_len;
+ }
+
+ r = diff_context.throttle.wait_for_ret();
+ if (r < 0) {
+ return r;
+ }
+ return 0;
+}
+
+} // namespace api
+} // namespace librbd
+
+template class librbd::api::DiffIterate<librbd::ImageCtx>;
diff --git a/src/librbd/api/DiffIterate.h b/src/librbd/api/DiffIterate.h
new file mode 100644
index 000000000..e6074d9cb
--- /dev/null
+++ b/src/librbd/api/DiffIterate.h
@@ -0,0 +1,66 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_API_DIFF_ITERATE_H
+#define CEPH_LIBRBD_API_DIFF_ITERATE_H
+
+#include "include/int_types.h"
+#include "common/bit_vector.hpp"
+#include "cls/rbd/cls_rbd_types.h"
+
+namespace librbd {
+
+class ImageCtx;
+
+namespace api {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class DiffIterate {
+public:
+ typedef int (*Callback)(uint64_t, size_t, int, void *);
+
+ static int diff_iterate(ImageCtxT *ictx,
+ const cls::rbd::SnapshotNamespace& from_snap_namespace,
+ const char *fromsnapname,
+ uint64_t off, uint64_t len, bool include_parent,
+ bool whole_object,
+ int (*cb)(uint64_t, size_t, int, void *),
+ void *arg);
+
+private:
+ ImageCtxT &m_image_ctx;
+ cls::rbd::SnapshotNamespace m_from_snap_namespace;
+ const char* m_from_snap_name;
+ uint64_t m_offset;
+ uint64_t m_length;
+ bool m_include_parent;
+ bool m_whole_object;
+ Callback m_callback;
+ void *m_callback_arg;
+
+ DiffIterate(ImageCtxT &image_ctx,
+ const cls::rbd::SnapshotNamespace& from_snap_namespace,
+ const char *from_snap_name, uint64_t off, uint64_t len,
+ bool include_parent, bool whole_object, Callback callback,
+ void *callback_arg)
+ : m_image_ctx(image_ctx), m_from_snap_namespace(from_snap_namespace),
+ m_from_snap_name(from_snap_name), m_offset(off),
+ m_length(len), m_include_parent(include_parent),
+ m_whole_object(whole_object), m_callback(callback),
+ m_callback_arg(callback_arg)
+ {
+ }
+
+ int execute();
+
+ int diff_object_map(uint64_t from_snap_id, uint64_t to_snap_id,
+ BitVector<2>* object_diff_state);
+
+};
+
+} // namespace api
+} // namespace librbd
+
+extern template class librbd::api::DiffIterate<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_API_DIFF_ITERATE_H
diff --git a/src/librbd/api/Group.cc b/src/librbd/api/Group.cc
new file mode 100644
index 000000000..e5f3da69c
--- /dev/null
+++ b/src/librbd/api/Group.cc
@@ -0,0 +1,1287 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "common/Cond.h"
+#include "common/errno.h"
+
+#include "librbd/ExclusiveLock.h"
+#include "librbd/api/Group.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/ImageWatcher.h"
+#include "librbd/Operations.h"
+#include "librbd/Utils.h"
+#include "librbd/internal.h"
+#include "librbd/io/AioCompletion.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::api::Group: " << __func__ << ": "
+
+using std::map;
+using std::pair;
+using std::set;
+using std::string;
+using std::vector;
+// list binds to list() here, so std::list is explicitly used below
+
+using ceph::bufferlist;
+using librados::snap_t;
+using librados::IoCtx;
+using librados::Rados;
+
+
+namespace librbd {
+namespace api {
+
+namespace {
+
+template <typename I>
+snap_t get_group_snap_id(I* ictx,
+ const cls::rbd::SnapshotNamespace& in_snap_namespace) {
+ ceph_assert(ceph_mutex_is_locked(ictx->image_lock));
+ auto it = ictx->snap_ids.lower_bound({cls::rbd::GroupSnapshotNamespace{},
+ ""});
+ for (; it != ictx->snap_ids.end(); ++it) {
+ if (it->first.first == in_snap_namespace) {
+ return it->second;
+ } else if (!std::holds_alternative<cls::rbd::GroupSnapshotNamespace>(
+ it->first.first)) {
+ break;
+ }
+ }
+ return CEPH_NOSNAP;
+}
+
+string generate_uuid(librados::IoCtx& io_ctx)
+{
+ Rados rados(io_ctx);
+ uint64_t bid = rados.get_instance_id();
+
+ uint32_t extra = rand() % 0xFFFFFFFF;
+ std::ostringstream bid_ss;
+ bid_ss << std::hex << bid << std::hex << extra;
+ return bid_ss.str();
+}
+
+int group_snap_list(librados::IoCtx& group_ioctx, const char *group_name,
+ std::vector<cls::rbd::GroupSnapshot> *cls_snaps)
+{
+ CephContext *cct = (CephContext *)group_ioctx.cct();
+
+ string group_id;
+ vector<string> ind_snap_names;
+
+ int r = cls_client::dir_get_id(&group_ioctx, RBD_GROUP_DIRECTORY,
+ group_name, &group_id);
+ if (r < 0) {
+ lderr(cct) << "error reading group id object: "
+ << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+ string group_header_oid = util::group_header_name(group_id);
+
+ const int max_read = 1024;
+ cls::rbd::GroupSnapshot snap_last;
+
+ for (;;) {
+ vector<cls::rbd::GroupSnapshot> snaps_page;
+
+ r = cls_client::group_snap_list(&group_ioctx, group_header_oid,
+ snap_last, max_read, &snaps_page);
+
+ if (r < 0) {
+ lderr(cct) << "error reading snap list from group: "
+ << cpp_strerror(-r) << dendl;
+ return r;
+ }
+ cls_snaps->insert(cls_snaps->end(), snaps_page.begin(), snaps_page.end());
+ if (snaps_page.size() < max_read) {
+ break;
+ }
+ snap_last = *snaps_page.rbegin();
+ }
+
+ return 0;
+}
+
+std::string calc_ind_image_snap_name(uint64_t pool_id,
+ const std::string &group_id,
+ const std::string &snap_id)
+{
+ std::stringstream ind_snap_name_stream;
+ ind_snap_name_stream << ".group." << std::hex << pool_id << "_"
+ << group_id << "_" << snap_id;
+ return ind_snap_name_stream.str();
+}
+
+int group_image_list(librados::IoCtx& group_ioctx, const char *group_name,
+ std::vector<cls::rbd::GroupImageStatus> *image_ids)
+{
+ CephContext *cct = (CephContext *)group_ioctx.cct();
+
+ string group_id;
+
+ int r = cls_client::dir_get_id(&group_ioctx, RBD_GROUP_DIRECTORY,
+ group_name, &group_id);
+ if (r < 0) {
+ lderr(cct) << "error reading group id object: "
+ << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+ string group_header_oid = util::group_header_name(group_id);
+
+ ldout(cct, 20) << "listing images in group name "
+ << group_name << " group id " << group_header_oid << dendl;
+ image_ids->clear();
+
+ const int max_read = 1024;
+ cls::rbd::GroupImageSpec start_last;
+ do {
+ std::vector<cls::rbd::GroupImageStatus> image_ids_page;
+
+ r = cls_client::group_image_list(&group_ioctx, group_header_oid,
+ start_last, max_read, &image_ids_page);
+
+ if (r < 0) {
+ lderr(cct) << "error reading image list from group: "
+ << cpp_strerror(-r) << dendl;
+ return r;
+ }
+ image_ids->insert(image_ids->end(),
+ image_ids_page.begin(), image_ids_page.end());
+
+ if (image_ids_page.size() > 0)
+ start_last = image_ids_page.rbegin()->spec;
+
+ r = image_ids_page.size();
+ } while (r == max_read);
+
+ return 0;
+}
+
+int group_image_remove(librados::IoCtx& group_ioctx, string group_id,
+ librados::IoCtx& image_ioctx, string image_id)
+{
+ CephContext *cct = (CephContext *)group_ioctx.cct();
+
+ string group_header_oid = util::group_header_name(group_id);
+
+ string image_header_oid = util::header_name(image_id);
+
+ ldout(cct, 20) << "removing image " << image_id
+ << " image id " << image_header_oid << dendl;
+
+ cls::rbd::GroupSpec group_spec(group_id, group_ioctx.get_id());
+
+ cls::rbd::GroupImageStatus incomplete_st(image_id, image_ioctx.get_id(),
+ cls::rbd::GROUP_IMAGE_LINK_STATE_INCOMPLETE);
+
+ cls::rbd::GroupImageSpec spec(image_id, image_ioctx.get_id());
+
+ int r = cls_client::group_image_set(&group_ioctx, group_header_oid,
+ incomplete_st);
+
+ if (r < 0) {
+ lderr(cct) << "couldn't put image into removing state: "
+ << cpp_strerror(-r) << dendl;
+ return r;
+ }
+
+ r = cls_client::image_group_remove(&image_ioctx, image_header_oid,
+ group_spec);
+ if ((r < 0) && (r != -ENOENT)) {
+ lderr(cct) << "couldn't remove group reference from image"
+ << cpp_strerror(-r) << dendl;
+ return r;
+ } else if (r >= 0) {
+ ImageWatcher<>::notify_header_update(image_ioctx, image_header_oid);
+ }
+
+ r = cls_client::group_image_remove(&group_ioctx, group_header_oid, spec);
+ if (r < 0) {
+ lderr(cct) << "couldn't remove image from group"
+ << cpp_strerror(-r) << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+int group_snap_remove_by_record(librados::IoCtx& group_ioctx,
+ const cls::rbd::GroupSnapshot& group_snap,
+ const std::string& group_id,
+ const std::string& group_header_oid) {
+
+ CephContext *cct = (CephContext *)group_ioctx.cct();
+ std::vector<C_SaferCond*> on_finishes;
+ int r, ret_code;
+
+ std::vector<librbd::ImageCtx*> ictxs;
+
+ cls::rbd::GroupSnapshotNamespace ne{group_ioctx.get_id(), group_id,
+ group_snap.id};
+
+ ldout(cct, 20) << "Removing snapshots" << dendl;
+ int snap_count = group_snap.snaps.size();
+
+ for (int i = 0; i < snap_count; ++i) {
+ librbd::IoCtx image_io_ctx;
+ r = util::create_ioctx(group_ioctx, "image", group_snap.snaps[i].pool, {},
+ &image_io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ librbd::ImageCtx* image_ctx = new ImageCtx("", group_snap.snaps[i].image_id,
+ nullptr, image_io_ctx, false);
+
+ C_SaferCond* on_finish = new C_SaferCond;
+
+ image_ctx->state->open(0, on_finish);
+
+ ictxs.push_back(image_ctx);
+ on_finishes.push_back(on_finish);
+ }
+
+ ret_code = 0;
+ for (int i = 0; i < snap_count; ++i) {
+ r = on_finishes[i]->wait();
+ delete on_finishes[i];
+ if (r < 0) {
+ ictxs[i] = nullptr;
+ ret_code = r;
+ }
+ }
+ if (ret_code != 0) {
+ goto finish;
+ }
+
+ ldout(cct, 20) << "Opened participating images. " <<
+ "Deleting snapshots themselves." << dendl;
+
+ for (int i = 0; i < snap_count; ++i) {
+ ImageCtx *ictx = ictxs[i];
+ on_finishes[i] = new C_SaferCond;
+
+ std::string snap_name;
+ ictx->image_lock.lock_shared();
+ snap_t snap_id = get_group_snap_id(ictx, ne);
+ r = ictx->get_snap_name(snap_id, &snap_name);
+ ictx->image_lock.unlock_shared();
+
+ if (r >= 0) {
+ ldout(cct, 20) << "removing individual snapshot from image " << ictx->name
+ << dendl;
+ ictx->operations->snap_remove(ne, snap_name, on_finishes[i]);
+ } else {
+ // We are ok to ignore missing image snapshots. The snapshot could have
+ // been inconsistent in the first place.
+ on_finishes[i]->complete(0);
+ }
+ }
+
+ for (int i = 0; i < snap_count; ++i) {
+ r = on_finishes[i]->wait();
+ delete on_finishes[i];
+ if (r < 0 && r != -ENOENT) {
+ // if previous attempts to remove this snapshot failed then the image's
+ // snapshot may not exist
+ lderr(cct) << "Failed deleting image snapshot. Ret code: " << r << dendl;
+ ret_code = r;
+ }
+ }
+
+ if (ret_code != 0) {
+ goto finish;
+ }
+
+ ldout(cct, 20) << "Removed images snapshots removing snapshot record."
+ << dendl;
+
+ r = cls_client::group_snap_remove(&group_ioctx, group_header_oid,
+ group_snap.id);
+ if (r < 0) {
+ ret_code = r;
+ goto finish;
+ }
+
+finish:
+ for (int i = 0; i < snap_count; ++i) {
+ if (ictxs[i] != nullptr) {
+ ictxs[i]->state->close();
+ }
+ }
+ return ret_code;
+}
+
+int group_snap_rollback_by_record(librados::IoCtx& group_ioctx,
+ const cls::rbd::GroupSnapshot& group_snap,
+ const std::string& group_id,
+ const std::string& group_header_oid,
+ ProgressContext& pctx) {
+ CephContext *cct = (CephContext *)group_ioctx.cct();
+ std::vector<C_SaferCond*> on_finishes;
+ int r, ret_code;
+
+ std::vector<librbd::ImageCtx*> ictxs;
+
+ cls::rbd::GroupSnapshotNamespace ne{group_ioctx.get_id(), group_id,
+ group_snap.id};
+
+ ldout(cct, 20) << "Rolling back snapshots" << dendl;
+ int snap_count = group_snap.snaps.size();
+
+ for (int i = 0; i < snap_count; ++i) {
+ librados::IoCtx image_io_ctx;
+ r = util::create_ioctx(group_ioctx, "image", group_snap.snaps[i].pool, {},
+ &image_io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ librbd::ImageCtx* image_ctx = new ImageCtx("", group_snap.snaps[i].image_id,
+ nullptr, image_io_ctx, false);
+
+ C_SaferCond* on_finish = new C_SaferCond;
+
+ image_ctx->state->open(0, on_finish);
+
+ ictxs.push_back(image_ctx);
+ on_finishes.push_back(on_finish);
+ }
+
+ ret_code = 0;
+ for (int i = 0; i < snap_count; ++i) {
+ r = on_finishes[i]->wait();
+ delete on_finishes[i];
+ if (r < 0) {
+ ictxs[i] = nullptr;
+ ret_code = r;
+ }
+ }
+ if (ret_code != 0) {
+ goto finish;
+ }
+
+ ldout(cct, 20) << "Requesting exclusive locks for images" << dendl;
+ for (auto ictx: ictxs) {
+ std::shared_lock owner_lock{ictx->owner_lock};
+ if (ictx->exclusive_lock != nullptr) {
+ ictx->exclusive_lock->block_requests(-EBUSY);
+ }
+ }
+ for (int i = 0; i < snap_count; ++i) {
+ ImageCtx *ictx = ictxs[i];
+ std::shared_lock owner_lock{ictx->owner_lock};
+
+ on_finishes[i] = new C_SaferCond;
+ if (ictx->exclusive_lock != nullptr) {
+ ictx->exclusive_lock->acquire_lock(on_finishes[i]);
+ }
+ }
+
+ ret_code = 0;
+ for (int i = 0; i < snap_count; ++i) {
+ r = 0;
+ ImageCtx *ictx = ictxs[i];
+ if (ictx->exclusive_lock != nullptr) {
+ r = on_finishes[i]->wait();
+ }
+ delete on_finishes[i];
+ if (r < 0) {
+ ret_code = r;
+ }
+ }
+ if (ret_code != 0) {
+ goto finish;
+ }
+
+ for (int i = 0; i < snap_count; ++i) {
+ ImageCtx *ictx = ictxs[i];
+ on_finishes[i] = new C_SaferCond;
+
+ std::shared_lock owner_locker{ictx->owner_lock};
+ std::string snap_name;
+ ictx->image_lock.lock_shared();
+ snap_t snap_id = get_group_snap_id(ictx, ne);
+ r = ictx->get_snap_name(snap_id, &snap_name);
+ ictx->image_lock.unlock_shared();
+
+ if (r >= 0) {
+ ldout(cct, 20) << "rolling back to individual snapshot for image " << ictx->name
+ << dendl;
+ ictx->operations->execute_snap_rollback(ne, snap_name, pctx, on_finishes[i]);
+ } else {
+ on_finishes[i]->complete(r);
+ }
+ }
+
+ for (int i = 0; i < snap_count; ++i) {
+ r = on_finishes[i]->wait();
+ delete on_finishes[i];
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "Failed rolling back group to snapshot. Ret code: " << r << dendl;
+ ret_code = r;
+ }
+ }
+
+finish:
+ for (int i = 0; i < snap_count; ++i) {
+ if (ictxs[i] != nullptr) {
+ ictxs[i]->state->close();
+ }
+ }
+ return ret_code;
+}
+
+template <typename I>
+void notify_unquiesce(std::vector<I*> &ictxs,
+ const std::vector<uint64_t> &requests) {
+ if (requests.empty()) {
+ return;
+ }
+
+ ceph_assert(requests.size() == ictxs.size());
+ int image_count = ictxs.size();
+ std::vector<C_SaferCond> on_finishes(image_count);
+
+ for (int i = 0; i < image_count; ++i) {
+ ImageCtx *ictx = ictxs[i];
+
+ ictx->image_watcher->notify_unquiesce(requests[i], &on_finishes[i]);
+ }
+
+ for (int i = 0; i < image_count; ++i) {
+ on_finishes[i].wait();
+ }
+}
+
+template <typename I>
+int notify_quiesce(std::vector<I*> &ictxs, ProgressContext &prog_ctx,
+ std::vector<uint64_t> *requests) {
+ int image_count = ictxs.size();
+ std::vector<C_SaferCond> on_finishes(image_count);
+
+ requests->resize(image_count);
+ for (int i = 0; i < image_count; ++i) {
+ auto ictx = ictxs[i];
+
+ ictx->image_watcher->notify_quiesce(&(*requests)[i], prog_ctx,
+ &on_finishes[i]);
+ }
+
+ int ret_code = 0;
+ for (int i = 0; i < image_count; ++i) {
+ int r = on_finishes[i].wait();
+ if (r < 0) {
+ ret_code = r;
+ }
+ }
+
+ if (ret_code != 0) {
+ notify_unquiesce(ictxs, *requests);
+ }
+
+ return ret_code;
+}
+
+} // anonymous namespace
+
+template <typename I>
+int Group<I>::image_remove_by_id(librados::IoCtx& group_ioctx,
+ const char *group_name,
+ librados::IoCtx& image_ioctx,
+ const char *image_id)
+{
+ CephContext *cct = (CephContext *)group_ioctx.cct();
+ ldout(cct, 20) << "io_ctx=" << &group_ioctx
+ << " group name " << group_name << " image "
+ << &image_ioctx << " id " << image_id << dendl;
+
+ string group_id;
+
+ int r = cls_client::dir_get_id(&group_ioctx, RBD_GROUP_DIRECTORY, group_name,
+ &group_id);
+ if (r < 0) {
+ lderr(cct) << "error reading group id object: "
+ << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ ldout(cct, 20) << "removing image from group name " << group_name
+ << " group id " << group_id << dendl;
+
+ return group_image_remove(group_ioctx, group_id, image_ioctx, string(image_id));
+}
+
+template <typename I>
+int Group<I>::create(librados::IoCtx& io_ctx, const char *group_name)
+{
+ CephContext *cct = (CephContext *)io_ctx.cct();
+
+ string id = generate_uuid(io_ctx);
+
+ ldout(cct, 2) << "adding group to directory..." << dendl;
+
+ int r = cls_client::group_dir_add(&io_ctx, RBD_GROUP_DIRECTORY, group_name,
+ id);
+ if (r < 0) {
+ lderr(cct) << "error adding group to directory: "
+ << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+ string header_oid = util::group_header_name(id);
+
+ r = io_ctx.create(header_oid, true);
+ if (r < 0) {
+ lderr(cct) << "error creating group header: " << cpp_strerror(r) << dendl;
+ goto err_remove_from_dir;
+ }
+
+ return 0;
+
+err_remove_from_dir:
+ int remove_r = cls_client::group_dir_remove(&io_ctx, RBD_GROUP_DIRECTORY,
+ group_name, id);
+ if (remove_r < 0) {
+ lderr(cct) << "error cleaning up group from rbd_directory "
+ << "object after creation failed: " << cpp_strerror(remove_r)
+ << dendl;
+ }
+
+ return r;
+}
+
+template <typename I>
+int Group<I>::remove(librados::IoCtx& io_ctx, const char *group_name)
+{
+ CephContext *cct((CephContext *)io_ctx.cct());
+ ldout(cct, 20) << "group_remove " << &io_ctx << " " << group_name << dendl;
+
+ std::string group_id;
+ int r = cls_client::dir_get_id(&io_ctx, RBD_GROUP_DIRECTORY,
+ std::string(group_name), &group_id);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "error getting id of group" << dendl;
+ return r;
+ }
+ string group_header_oid = util::group_header_name(group_id);
+
+ std::vector<cls::rbd::GroupSnapshot> snaps;
+ r = group_snap_list(io_ctx, group_name, &snaps);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "error listing group snapshots" << dendl;
+ return r;
+ }
+
+ for (auto &snap : snaps) {
+ r = group_snap_remove_by_record(io_ctx, snap, group_id, group_header_oid);
+ if (r < 0) {
+ return r;
+ }
+ }
+
+ std::vector<cls::rbd::GroupImageStatus> images;
+ r = group_image_list(io_ctx, group_name, &images);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "error listing group images" << dendl;
+ return r;
+ }
+
+ for (auto image : images) {
+ IoCtx image_ioctx;
+ r = util::create_ioctx(io_ctx, "image", image.spec.pool_id, {},
+ &image_ioctx);
+ if (r < 0) {
+ return r;
+ }
+
+ r = group_image_remove(io_ctx, group_id, image_ioctx, image.spec.image_id);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "error removing image from a group" << dendl;
+ return r;
+ }
+ }
+
+ string header_oid = util::group_header_name(group_id);
+
+ r = io_ctx.remove(header_oid);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "error removing header: " << cpp_strerror(-r) << dendl;
+ return r;
+ }
+
+ r = cls_client::group_dir_remove(&io_ctx, RBD_GROUP_DIRECTORY,
+ group_name, group_id);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "error removing group from directory" << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Group<I>::list(IoCtx& io_ctx, vector<string> *names)
+{
+ CephContext *cct = (CephContext *)io_ctx.cct();
+ ldout(cct, 20) << "io_ctx=" << &io_ctx << dendl;
+
+ int max_read = 1024;
+ string last_read = "";
+ int r;
+ do {
+ map<string, string> groups;
+ r = cls_client::group_dir_list(&io_ctx, RBD_GROUP_DIRECTORY, last_read,
+ max_read, &groups);
+ if (r < 0) {
+ if (r != -ENOENT) {
+ lderr(cct) << "error listing group in directory: "
+ << cpp_strerror(r) << dendl;
+ } else {
+ r = 0;
+ }
+ return r;
+ }
+ for (pair<string, string> group : groups) {
+ names->push_back(group.first);
+ }
+ if (!groups.empty()) {
+ last_read = groups.rbegin()->first;
+ }
+ r = groups.size();
+ } while (r == max_read);
+
+ return 0;
+}
+
+template <typename I>
+int Group<I>::image_add(librados::IoCtx& group_ioctx, const char *group_name,
+ librados::IoCtx& image_ioctx, const char *image_name)
+{
+ CephContext *cct = (CephContext *)group_ioctx.cct();
+ ldout(cct, 20) << "io_ctx=" << &group_ioctx
+ << " group name " << group_name << " image "
+ << &image_ioctx << " name " << image_name << dendl;
+
+ if (group_ioctx.get_namespace() != image_ioctx.get_namespace()) {
+ lderr(cct) << "group and image cannot be in different namespaces" << dendl;
+ return -EINVAL;
+ }
+
+ string group_id;
+
+ int r = cls_client::dir_get_id(&group_ioctx, RBD_GROUP_DIRECTORY, group_name,
+ &group_id);
+ if (r < 0) {
+ lderr(cct) << "error reading group id object: "
+ << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+ string group_header_oid = util::group_header_name(group_id);
+
+
+ ldout(cct, 20) << "adding image to group name " << group_name
+ << " group id " << group_header_oid << dendl;
+
+ string image_id;
+
+ r = cls_client::dir_get_id(&image_ioctx, RBD_DIRECTORY, image_name,
+ &image_id);
+ if (r < 0) {
+ lderr(cct) << "error reading image id object: "
+ << cpp_strerror(-r) << dendl;
+ return r;
+ }
+
+ string image_header_oid = util::header_name(image_id);
+
+ ldout(cct, 20) << "adding image " << image_name
+ << " image id " << image_header_oid << dendl;
+
+ cls::rbd::GroupImageStatus incomplete_st(
+ image_id, image_ioctx.get_id(),
+ cls::rbd::GROUP_IMAGE_LINK_STATE_INCOMPLETE);
+ cls::rbd::GroupImageStatus attached_st(
+ image_id, image_ioctx.get_id(), cls::rbd::GROUP_IMAGE_LINK_STATE_ATTACHED);
+
+ r = cls_client::group_image_set(&group_ioctx, group_header_oid,
+ incomplete_st);
+
+ cls::rbd::GroupSpec group_spec(group_id, group_ioctx.get_id());
+
+ if (r < 0) {
+ lderr(cct) << "error adding image reference to group: "
+ << cpp_strerror(-r) << dendl;
+ return r;
+ }
+
+ r = cls_client::image_group_add(&image_ioctx, image_header_oid, group_spec);
+ if (r < 0) {
+ lderr(cct) << "error adding group reference to image: "
+ << cpp_strerror(-r) << dendl;
+ cls::rbd::GroupImageSpec spec(image_id, image_ioctx.get_id());
+ cls_client::group_image_remove(&group_ioctx, group_header_oid, spec);
+ // Ignore errors in the clean up procedure.
+ return r;
+ }
+ ImageWatcher<>::notify_header_update(image_ioctx, image_header_oid);
+
+ r = cls_client::group_image_set(&group_ioctx, group_header_oid,
+ attached_st);
+
+ return r;
+}
+
+template <typename I>
+int Group<I>::image_remove(librados::IoCtx& group_ioctx, const char *group_name,
+ librados::IoCtx& image_ioctx, const char *image_name)
+{
+ CephContext *cct = (CephContext *)group_ioctx.cct();
+ ldout(cct, 20) << "io_ctx=" << &group_ioctx
+ << " group name " << group_name << " image "
+ << &image_ioctx << " name " << image_name << dendl;
+
+ if (group_ioctx.get_namespace() != image_ioctx.get_namespace()) {
+ lderr(cct) << "group and image cannot be in different namespaces" << dendl;
+ return -EINVAL;
+ }
+
+ string group_id;
+
+ int r = cls_client::dir_get_id(&group_ioctx, RBD_GROUP_DIRECTORY, group_name,
+ &group_id);
+ if (r < 0) {
+ lderr(cct) << "error reading group id object: "
+ << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ ldout(cct, 20) << "removing image from group name " << group_name
+ << " group id " << group_id << dendl;
+
+ string image_id;
+ r = cls_client::dir_get_id(&image_ioctx, RBD_DIRECTORY, image_name,
+ &image_id);
+ if (r < 0) {
+ lderr(cct) << "error reading image id object: "
+ << cpp_strerror(-r) << dendl;
+ return r;
+ }
+
+ r = group_image_remove(group_ioctx, group_id, image_ioctx, image_id);
+
+ return r;
+}
+
+template <typename I>
+int Group<I>::image_list(librados::IoCtx& group_ioctx,
+ const char *group_name,
+ std::vector<group_image_info_t>* images)
+{
+ CephContext *cct = (CephContext *)group_ioctx.cct();
+ ldout(cct, 20) << "io_ctx=" << &group_ioctx
+ << " group name " << group_name << dendl;
+
+ std::vector<cls::rbd::GroupImageStatus> image_ids;
+
+ group_image_list(group_ioctx, group_name, &image_ids);
+
+ for (auto image_id : image_ids) {
+ IoCtx ioctx;
+ int r = util::create_ioctx(group_ioctx, "image", image_id.spec.pool_id, {},
+ &ioctx);
+ if (r < 0) {
+ return r;
+ }
+
+ std::string image_name;
+ r = cls_client::dir_get_name(&ioctx, RBD_DIRECTORY,
+ image_id.spec.image_id, &image_name);
+ if (r < 0) {
+ return r;
+ }
+
+ images->push_back(
+ group_image_info_t {
+ image_name,
+ ioctx.get_id(),
+ static_cast<group_image_state_t>(image_id.state)});
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Group<I>::rename(librados::IoCtx& io_ctx, const char *src_name,
+ const char *dest_name)
+{
+ CephContext *cct((CephContext *)io_ctx.cct());
+ ldout(cct, 20) << "group_rename " << &io_ctx << " " << src_name
+ << " -> " << dest_name << dendl;
+
+ std::string group_id;
+ int r = cls_client::dir_get_id(&io_ctx, RBD_GROUP_DIRECTORY,
+ std::string(src_name), &group_id);
+ if (r < 0) {
+ if (r != -ENOENT)
+ lderr(cct) << "error getting id of group" << dendl;
+ return r;
+ }
+
+ r = cls_client::group_dir_rename(&io_ctx, RBD_GROUP_DIRECTORY,
+ src_name, dest_name, group_id);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "error renaming group from directory" << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+
+template <typename I>
+int Group<I>::image_get_group(I *ictx, group_info_t *group_info)
+{
+ int r = ictx->state->refresh_if_required();
+ if (r < 0)
+ return r;
+
+ if (RBD_GROUP_INVALID_POOL != ictx->group_spec.pool_id) {
+ IoCtx ioctx;
+ r = util::create_ioctx(ictx->md_ctx, "group", ictx->group_spec.pool_id, {},
+ &ioctx);
+ if (r < 0) {
+ return r;
+ }
+
+ std::string group_name;
+ r = cls_client::dir_get_name(&ioctx, RBD_GROUP_DIRECTORY,
+ ictx->group_spec.group_id, &group_name);
+ if (r < 0)
+ return r;
+ group_info->pool = ioctx.get_id();
+ group_info->name = group_name;
+ } else {
+ group_info->pool = RBD_GROUP_INVALID_POOL;
+ group_info->name = "";
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Group<I>::snap_create(librados::IoCtx& group_ioctx,
+ const char *group_name, const char *snap_name,
+ uint32_t flags) {
+ CephContext *cct = (CephContext *)group_ioctx.cct();
+
+ string group_id;
+ cls::rbd::GroupSnapshot group_snap;
+ vector<cls::rbd::ImageSnapshotSpec> image_snaps;
+ std::string ind_snap_name;
+
+ std::vector<librbd::ImageCtx*> ictxs;
+ std::vector<C_SaferCond*> on_finishes;
+ std::vector<uint64_t> quiesce_requests;
+ NoOpProgressContext prog_ctx;
+ uint64_t internal_flags = 0;
+
+ int r = util::snap_create_flags_api_to_internal(cct, flags, &internal_flags);
+ if (r < 0) {
+ return r;
+ }
+ internal_flags &= ~(SNAP_CREATE_FLAG_SKIP_NOTIFY_QUIESCE |
+ SNAP_CREATE_FLAG_IGNORE_NOTIFY_QUIESCE_ERROR);
+
+ r = cls_client::dir_get_id(&group_ioctx, RBD_GROUP_DIRECTORY, group_name,
+ &group_id);
+ if (r < 0) {
+ lderr(cct) << "error reading group id object: "
+ << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ std::vector<cls::rbd::GroupImageStatus> images;
+ r = group_image_list(group_ioctx, group_name, &images);
+ if (r < 0) {
+ return r;
+ }
+ int image_count = images.size();
+
+ ldout(cct, 20) << "Found " << image_count << " images in group" << dendl;
+
+ image_snaps = vector<cls::rbd::ImageSnapshotSpec>(image_count,
+ cls::rbd::ImageSnapshotSpec());
+
+ for (int i = 0; i < image_count; ++i) {
+ image_snaps[i].pool = images[i].spec.pool_id;
+ image_snaps[i].image_id = images[i].spec.image_id;
+ }
+
+ string group_header_oid = util::group_header_name(group_id);
+
+ group_snap.id = generate_uuid(group_ioctx);
+ group_snap.name = string(snap_name);
+ group_snap.state = cls::rbd::GROUP_SNAPSHOT_STATE_INCOMPLETE;
+ group_snap.snaps = image_snaps;
+
+ cls::rbd::GroupSnapshotNamespace ne{group_ioctx.get_id(), group_id,
+ group_snap.id};
+
+ r = cls_client::group_snap_set(&group_ioctx, group_header_oid, group_snap);
+ if (r == -EEXIST) {
+ lderr(cct) << "snapshot with this name already exists: "
+ << cpp_strerror(r)
+ << dendl;
+ }
+ int ret_code = 0;
+ if (r < 0) {
+ ret_code = r;
+ goto finish;
+ }
+
+ for (auto image: images) {
+ librbd::IoCtx image_io_ctx;
+ r = util::create_ioctx(group_ioctx, "image", image.spec.pool_id, {},
+ &image_io_ctx);
+ if (r < 0) {
+ ret_code = r;
+ goto finish;
+ }
+
+ ldout(cct, 20) << "Opening image with id " << image.spec.image_id << dendl;
+
+ librbd::ImageCtx* image_ctx = new ImageCtx("", image.spec.image_id.c_str(),
+ nullptr, image_io_ctx, false);
+
+ C_SaferCond* on_finish = new C_SaferCond;
+
+ image_ctx->state->open(0, on_finish);
+
+ ictxs.push_back(image_ctx);
+ on_finishes.push_back(on_finish);
+ }
+ ldout(cct, 20) << "Issued open request waiting for the completion" << dendl;
+ ret_code = 0;
+ for (int i = 0; i < image_count; ++i) {
+
+ ldout(cct, 20) << "Waiting for completion on on_finish: " <<
+ on_finishes[i] << dendl;
+
+ r = on_finishes[i]->wait();
+ delete on_finishes[i];
+ if (r < 0) {
+ ictxs[i] = nullptr;
+ ret_code = r;
+ }
+ }
+ if (ret_code != 0) {
+ goto remove_record;
+ }
+
+ if ((flags & RBD_SNAP_CREATE_SKIP_QUIESCE) == 0) {
+ ldout(cct, 20) << "Sending quiesce notification" << dendl;
+ ret_code = notify_quiesce(ictxs, prog_ctx, &quiesce_requests);
+ if (ret_code != 0 && (flags & RBD_SNAP_CREATE_IGNORE_QUIESCE_ERROR) == 0) {
+ goto remove_record;
+ }
+ }
+
+ ldout(cct, 20) << "Requesting exclusive locks for images" << dendl;
+
+ for (auto ictx: ictxs) {
+ std::shared_lock owner_lock{ictx->owner_lock};
+ if (ictx->exclusive_lock != nullptr) {
+ ictx->exclusive_lock->block_requests(-EBUSY);
+ }
+ }
+ for (int i = 0; i < image_count; ++i) {
+ ImageCtx *ictx = ictxs[i];
+ std::shared_lock owner_lock{ictx->owner_lock};
+
+ on_finishes[i] = new C_SaferCond;
+ if (ictx->exclusive_lock != nullptr) {
+ ictx->exclusive_lock->acquire_lock(on_finishes[i]);
+ }
+ }
+
+ ret_code = 0;
+ for (int i = 0; i < image_count; ++i) {
+ r = 0;
+ ImageCtx *ictx = ictxs[i];
+ if (ictx->exclusive_lock != nullptr) {
+ r = on_finishes[i]->wait();
+ }
+ delete on_finishes[i];
+ if (r < 0) {
+ ret_code = r;
+ }
+ }
+ if (ret_code != 0) {
+ notify_unquiesce(ictxs, quiesce_requests);
+ goto remove_record;
+ }
+
+ ind_snap_name = calc_ind_image_snap_name(group_ioctx.get_id(), group_id,
+ group_snap.id);
+
+ for (int i = 0; i < image_count; ++i) {
+ ImageCtx *ictx = ictxs[i];
+
+ C_SaferCond* on_finish = new C_SaferCond;
+
+ std::shared_lock owner_locker{ictx->owner_lock};
+ ictx->operations->execute_snap_create(
+ ne, ind_snap_name.c_str(), on_finish, 0,
+ SNAP_CREATE_FLAG_SKIP_NOTIFY_QUIESCE, prog_ctx);
+
+ on_finishes[i] = on_finish;
+ }
+
+ ret_code = 0;
+ for (int i = 0; i < image_count; ++i) {
+ r = on_finishes[i]->wait();
+ delete on_finishes[i];
+ if (r < 0) {
+ ret_code = r;
+ } else {
+ ImageCtx *ictx = ictxs[i];
+ ictx->image_lock.lock_shared();
+ snap_t snap_id = get_group_snap_id(ictx, ne);
+ ictx->image_lock.unlock_shared();
+ if (snap_id == CEPH_NOSNAP) {
+ ldout(cct, 20) << "Couldn't find created snapshot with namespace: "
+ << ne << dendl;
+ ret_code = -ENOENT;
+ } else {
+ image_snaps[i].snap_id = snapid_t(snap_id);
+ image_snaps[i].pool = ictx->md_ctx.get_id();
+ image_snaps[i].image_id = ictx->id;
+ }
+ }
+ }
+ if (ret_code != 0) {
+ goto remove_image_snaps;
+ }
+
+ group_snap.snaps = image_snaps;
+ group_snap.state = cls::rbd::GROUP_SNAPSHOT_STATE_COMPLETE;
+
+ r = cls_client::group_snap_set(&group_ioctx, group_header_oid, group_snap);
+ if (r < 0) {
+ ret_code = r;
+ goto remove_image_snaps;
+ }
+
+ ldout(cct, 20) << "Sending unquiesce notification" << dendl;
+ notify_unquiesce(ictxs, quiesce_requests);
+
+ goto finish;
+
+remove_image_snaps:
+ notify_unquiesce(ictxs, quiesce_requests);
+
+ for (int i = 0; i < image_count; ++i) {
+ ImageCtx *ictx = ictxs[i];
+ ldout(cct, 20) << "Removing individual snapshot with name: " <<
+ ind_snap_name << dendl;
+
+ on_finishes[i] = new C_SaferCond;
+ std::string snap_name;
+ ictx->image_lock.lock_shared();
+ snap_t snap_id = get_group_snap_id(ictx, ne);
+ r = ictx->get_snap_name(snap_id, &snap_name);
+ ictx->image_lock.unlock_shared();
+ if (r >= 0) {
+ ictx->operations->snap_remove(ne, snap_name.c_str(), on_finishes[i]);
+ } else {
+ // Ignore missing image snapshots. The whole snapshot could have been
+ // inconsistent.
+ on_finishes[i]->complete(0);
+ }
+ }
+
+ for (int i = 0, n = on_finishes.size(); i < n; ++i) {
+ r = on_finishes[i]->wait();
+ delete on_finishes[i];
+ if (r < 0 && r != -ENOENT) { // if previous attempts to remove this snapshot failed then the image's snapshot may not exist
+ lderr(cct) << "Failed cleaning up image snapshot. Ret code: " << r << dendl;
+ // just report error, but don't abort the process
+ }
+ }
+
+remove_record:
+ r = cls_client::group_snap_remove(&group_ioctx, group_header_oid,
+ group_snap.id);
+ if (r < 0) {
+ lderr(cct) << "error while cleaning up group snapshot" << dendl;
+ // we ignore return value in clean up
+ }
+
+finish:
+ for (int i = 0, n = ictxs.size(); i < n; ++i) {
+ if (ictxs[i] != nullptr) {
+ ictxs[i]->state->close();
+ }
+ }
+ return ret_code;
+}
+
+template <typename I>
+int Group<I>::snap_remove(librados::IoCtx& group_ioctx, const char *group_name,
+ const char *snap_name)
+{
+ CephContext *cct = (CephContext *)group_ioctx.cct();
+
+ string group_id;
+ int r = cls_client::dir_get_id(&group_ioctx, RBD_GROUP_DIRECTORY,
+ group_name, &group_id);
+ if (r < 0) {
+ lderr(cct) << "error reading group id object: "
+ << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ std::vector<cls::rbd::GroupSnapshot> snaps;
+ r = group_snap_list(group_ioctx, group_name, &snaps);
+ if (r < 0) {
+ return r;
+ }
+
+ cls::rbd::GroupSnapshot *group_snap = nullptr;
+ for (auto &snap : snaps) {
+ if (snap.name == string(snap_name)) {
+ group_snap = &snap;
+ break;
+ }
+ }
+ if (group_snap == nullptr) {
+ return -ENOENT;
+ }
+
+ string group_header_oid = util::group_header_name(group_id);
+ r = group_snap_remove_by_record(group_ioctx, *group_snap, group_id,
+ group_header_oid);
+ return r;
+}
+
+template <typename I>
+int Group<I>::snap_rename(librados::IoCtx& group_ioctx, const char *group_name,
+ const char *old_snap_name,
+ const char *new_snap_name) {
+ CephContext *cct = (CephContext *)group_ioctx.cct();
+ if (0 == strcmp(old_snap_name, new_snap_name))
+ return -EEXIST;
+
+ std::string group_id;
+ int r = cls_client::dir_get_id(&group_ioctx, RBD_GROUP_DIRECTORY,
+ group_name, &group_id);
+ if (r == -ENOENT) {
+ return r;
+ } else if (r < 0) {
+ lderr(cct) << "error reading group id object: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ std::vector<cls::rbd::GroupSnapshot> group_snaps;
+ r = group_snap_list(group_ioctx, group_name, &group_snaps);
+ if (r < 0) {
+ return r;
+ }
+
+ cls::rbd::GroupSnapshot group_snap;
+ for (auto &snap : group_snaps) {
+ if (snap.name == old_snap_name) {
+ group_snap = snap;
+ break;
+ }
+ }
+
+ if (group_snap.id.empty()) {
+ return -ENOENT;
+ }
+
+ std::string group_header_oid = util::group_header_name(group_id);
+ group_snap.name = new_snap_name;
+ r = cls_client::group_snap_set(&group_ioctx, group_header_oid, group_snap);
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Group<I>::snap_list(librados::IoCtx& group_ioctx, const char *group_name,
+ std::vector<group_snap_info_t> *snaps)
+{
+ std::vector<cls::rbd::GroupSnapshot> cls_snaps;
+
+ int r = group_snap_list(group_ioctx, group_name, &cls_snaps);
+ if (r < 0) {
+ return r;
+ }
+
+ for (auto snap : cls_snaps) {
+ snaps->push_back(
+ group_snap_info_t {
+ snap.name,
+ static_cast<group_snap_state_t>(snap.state)});
+
+ }
+ return 0;
+}
+
+template <typename I>
+int Group<I>::snap_rollback(librados::IoCtx& group_ioctx,
+ const char *group_name, const char *snap_name,
+ ProgressContext& pctx)
+{
+ CephContext *cct = (CephContext *)group_ioctx.cct();
+
+ string group_id;
+ int r = cls_client::dir_get_id(&group_ioctx, RBD_GROUP_DIRECTORY,
+ group_name, &group_id);
+ if (r < 0) {
+ lderr(cct) << "error reading group id object: "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ std::vector<cls::rbd::GroupSnapshot> snaps;
+ r = group_snap_list(group_ioctx, group_name, &snaps);
+ if (r < 0) {
+ return r;
+ }
+
+ cls::rbd::GroupSnapshot *group_snap = nullptr;
+ for (auto &snap : snaps) {
+ if (snap.name == string(snap_name)) {
+ group_snap = &snap;
+ break;
+ }
+ }
+ if (group_snap == nullptr) {
+ return -ENOENT;
+ }
+
+ string group_header_oid = util::group_header_name(group_id);
+ r = group_snap_rollback_by_record(group_ioctx, *group_snap, group_id,
+ group_header_oid, pctx);
+ return r;
+}
+
+} // namespace api
+} // namespace librbd
+
+template class librbd::api::Group<librbd::ImageCtx>;
diff --git a/src/librbd/api/Group.h b/src/librbd/api/Group.h
new file mode 100644
index 000000000..9d3abcc59
--- /dev/null
+++ b/src/librbd/api/Group.h
@@ -0,0 +1,60 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_API_GROUP_H
+#define CEPH_LIBRBD_API_GROUP_H
+
+#include "include/rbd/librbd.hpp"
+#include "include/rados/librados_fwd.hpp"
+#include <string>
+#include <vector>
+
+namespace librbd {
+
+struct ImageCtx;
+
+namespace api {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+struct Group {
+
+ static int create(librados::IoCtx& io_ctx, const char *group_name);
+ static int remove(librados::IoCtx& io_ctx, const char *group_name);
+ static int list(librados::IoCtx& io_ctx, std::vector<std::string> *names);
+ static int rename(librados::IoCtx& io_ctx, const char *src_group_name,
+ const char *dest_group_name);
+
+ static int image_add(librados::IoCtx& group_ioctx, const char *group_name,
+ librados::IoCtx& image_ioctx, const char *image_name);
+ static int image_remove(librados::IoCtx& group_ioctx, const char *group_name,
+ librados::IoCtx& image_ioctx, const char *image_name);
+ static int image_remove_by_id(librados::IoCtx& group_ioctx,
+ const char *group_name,
+ librados::IoCtx& image_ioctx,
+ const char *image_id);
+ static int image_list(librados::IoCtx& group_ioctx, const char *group_name,
+ std::vector<group_image_info_t> *images);
+
+ static int image_get_group(ImageCtxT *ictx, group_info_t *group_info);
+
+ static int snap_create(librados::IoCtx& group_ioctx,
+ const char *group_name, const char *snap_name,
+ uint32_t flags);
+ static int snap_remove(librados::IoCtx& group_ioctx,
+ const char *group_name, const char *snap_name);
+ static int snap_rename(librados::IoCtx& group_ioctx, const char *group_name,
+ const char *old_snap_name, const char *new_snap_name);
+ static int snap_list(librados::IoCtx& group_ioctx, const char *group_name,
+ std::vector<group_snap_info_t> *snaps);
+ static int snap_rollback(librados::IoCtx& group_ioctx,
+ const char *group_name, const char *snap_name,
+ ProgressContext& pctx);
+
+};
+
+} // namespace api
+} // namespace librbd
+
+extern template class librbd::api::Group<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_API_GROUP_H
diff --git a/src/librbd/api/Image.cc b/src/librbd/api/Image.cc
new file mode 100644
index 000000000..19dc5aa68
--- /dev/null
+++ b/src/librbd/api/Image.cc
@@ -0,0 +1,1015 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/api/Image.h"
+#include "include/rados/librados.hpp"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "common/Cond.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "librbd/AsioEngine.h"
+#include "librbd/DeepCopyRequest.h"
+#include "librbd/ExclusiveLock.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/internal.h"
+#include "librbd/Operations.h"
+#include "librbd/Utils.h"
+#include "librbd/api/Config.h"
+#include "librbd/api/Trash.h"
+#include "librbd/api/Utils.h"
+#include "librbd/crypto/FormatRequest.h"
+#include "librbd/crypto/LoadRequest.h"
+#include "librbd/deep_copy/Handler.h"
+#include "librbd/image/CloneRequest.h"
+#include "librbd/image/RemoveRequest.h"
+#include "librbd/image/PreRemoveRequest.h"
+#include "librbd/io/ImageDispatcherInterface.h"
+#include "librbd/io/ObjectDispatcherInterface.h"
+#include "librbd/io/AioCompletion.h"
+#include "librbd/io/ImageDispatchSpec.h"
+#include <boost/scope_exit.hpp>
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::api::Image: " << __func__ << ": "
+
+using std::map;
+using std::string;
+using librados::snap_t;
+
+namespace librbd {
+namespace api {
+
+namespace {
+
+bool compare_by_pool(const librbd::linked_image_spec_t& lhs,
+ const librbd::linked_image_spec_t& rhs)
+{
+ if (lhs.pool_id != rhs.pool_id) {
+ return lhs.pool_id < rhs.pool_id;
+ } else if (lhs.pool_namespace != rhs.pool_namespace) {
+ return lhs.pool_namespace < rhs.pool_namespace;
+ }
+ return false;
+}
+
+bool compare(const librbd::linked_image_spec_t& lhs,
+ const librbd::linked_image_spec_t& rhs)
+{
+ if (lhs.pool_name != rhs.pool_name) {
+ return lhs.pool_name < rhs.pool_name;
+ } else if (lhs.pool_id != rhs.pool_id) {
+ return lhs.pool_id < rhs.pool_id;
+ } else if (lhs.pool_namespace != rhs.pool_namespace) {
+ return lhs.pool_namespace < rhs.pool_namespace;
+ } else if (lhs.image_name != rhs.image_name) {
+ return lhs.image_name < rhs.image_name;
+ } else if (lhs.image_id != rhs.image_id) {
+ return lhs.image_id < rhs.image_id;
+ }
+ return false;
+}
+
+template <typename I>
+int pre_remove_image(librados::IoCtx& io_ctx, const std::string& image_id) {
+ I *image_ctx = I::create("", image_id, nullptr, io_ctx, false);
+ int r = image_ctx->state->open(OPEN_FLAG_SKIP_OPEN_PARENT);
+ if (r < 0) {
+ return r;
+ }
+
+ C_SaferCond ctx;
+ auto req = image::PreRemoveRequest<I>::create(image_ctx, false, &ctx);
+ req->send();
+
+ r = ctx.wait();
+ image_ctx->state->close();
+ return r;
+}
+
+} // anonymous namespace
+
+template <typename I>
+int64_t Image<I>::get_data_pool_id(I *ictx) {
+ if (ictx->data_ctx.is_valid()) {
+ return ictx->data_ctx.get_id();
+ }
+
+ int64_t pool_id;
+ int r = cls_client::get_data_pool(&ictx->md_ctx, ictx->header_oid, &pool_id);
+ if (r < 0) {
+ CephContext *cct = ictx->cct;
+ lderr(cct) << "error getting data pool ID: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ return pool_id;
+}
+
+template <typename I>
+int Image<I>::get_op_features(I *ictx, uint64_t *op_features) {
+ CephContext *cct = ictx->cct;
+ ldout(cct, 20) << "image_ctx=" << ictx << dendl;
+
+ int r = ictx->state->refresh_if_required();
+ if (r < 0) {
+ return r;
+ }
+
+ std::shared_lock image_locker{ictx->image_lock};
+ *op_features = ictx->op_features;
+ return 0;
+}
+
+template <typename I>
+int Image<I>::list_images(librados::IoCtx& io_ctx,
+ std::vector<image_spec_t> *images) {
+ CephContext *cct = (CephContext *)io_ctx.cct();
+ ldout(cct, 20) << "list " << &io_ctx << dendl;
+
+ int r;
+ images->clear();
+
+ if (io_ctx.get_namespace().empty()) {
+ bufferlist bl;
+ r = io_ctx.read(RBD_DIRECTORY, bl, 0, 0);
+ if (r == -ENOENT) {
+ return 0;
+ } else if (r < 0) {
+ lderr(cct) << "error listing v1 images: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ // V1 format images are in a tmap
+ if (bl.length()) {
+ auto p = bl.cbegin();
+ bufferlist header;
+ std::map<std::string, bufferlist> m;
+ decode(header, p);
+ decode(m, p);
+ for (auto& it : m) {
+ images->push_back({.id ="", .name = it.first});
+ }
+ }
+ }
+
+ // V2 format images
+ std::map<std::string, std::string> image_names_to_ids;
+ r = list_images_v2(io_ctx, &image_names_to_ids);
+ if (r < 0) {
+ lderr(cct) << "error listing v2 images: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ for (const auto& img_pair : image_names_to_ids) {
+ images->push_back({.id = img_pair.second,
+ .name = img_pair.first});
+ }
+
+ // include V2 images in a partially removed state
+ std::vector<librbd::trash_image_info_t> trash_images;
+ r = Trash<I>::list(io_ctx, trash_images, false);
+ if (r < 0 && r != -EOPNOTSUPP) {
+ lderr(cct) << "error listing trash images: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ for (const auto& trash_image : trash_images) {
+ if (trash_image.source == RBD_TRASH_IMAGE_SOURCE_REMOVING) {
+ images->push_back({.id = trash_image.id,
+ .name = trash_image.name});
+
+ }
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Image<I>::list_images_v2(librados::IoCtx& io_ctx, ImageNameToIds *images) {
+ CephContext *cct = (CephContext *)io_ctx.cct();
+ ldout(cct, 20) << "io_ctx=" << &io_ctx << dendl;
+
+ // new format images are accessed by class methods
+ int r;
+ int max_read = 1024;
+ string last_read = "";
+ do {
+ map<string, string> images_page;
+ r = cls_client::dir_list(&io_ctx, RBD_DIRECTORY, last_read, max_read,
+ &images_page);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "error listing image in directory: "
+ << cpp_strerror(r) << dendl;
+ return r;
+ } else if (r == -ENOENT) {
+ break;
+ }
+ for (map<string, string>::const_iterator it = images_page.begin();
+ it != images_page.end(); ++it) {
+ images->insert(*it);
+ }
+ if (!images_page.empty()) {
+ last_read = images_page.rbegin()->first;
+ }
+ r = images_page.size();
+ } while (r == max_read);
+
+ return 0;
+}
+
+template <typename I>
+int Image<I>::get_parent(I *ictx,
+ librbd::linked_image_spec_t *parent_image,
+ librbd::snap_spec_t *parent_snap) {
+ auto cct = ictx->cct;
+ ldout(cct, 20) << "image_ctx=" << ictx << dendl;
+
+ int r = ictx->state->refresh_if_required();
+ if (r < 0) {
+ return r;
+ }
+
+ std::shared_lock image_locker{ictx->image_lock};
+
+ bool release_image_lock = false;
+ BOOST_SCOPE_EXIT_ALL(ictx, &release_image_lock) {
+ if (release_image_lock) {
+ ictx->parent->image_lock.unlock_shared();
+ }
+ };
+
+ // if a migration is in-progress, the true parent is the parent
+ // of the migration source image
+ auto parent = ictx->parent;
+ if (!ictx->migration_info.empty() && ictx->parent != nullptr) {
+ release_image_lock = true;
+ ictx->parent->image_lock.lock_shared();
+
+ parent = ictx->parent->parent;
+ }
+
+ if (parent == nullptr) {
+ return -ENOENT;
+ }
+
+ parent_image->pool_id = parent->md_ctx.get_id();
+ parent_image->pool_name = parent->md_ctx.get_pool_name();
+ parent_image->pool_namespace = parent->md_ctx.get_namespace();
+
+ std::shared_lock parent_image_locker{parent->image_lock};
+ parent_snap->id = parent->snap_id;
+ parent_snap->namespace_type = RBD_SNAP_NAMESPACE_TYPE_USER;
+ if (parent->snap_id != CEPH_NOSNAP) {
+ auto snap_info = parent->get_snap_info(parent->snap_id);
+ if (snap_info == nullptr) {
+ lderr(cct) << "error finding parent snap name: " << cpp_strerror(r)
+ << dendl;
+ return -ENOENT;
+ }
+
+ parent_snap->namespace_type = static_cast<snap_namespace_type_t>(
+ cls::rbd::get_snap_namespace_type(snap_info->snap_namespace));
+ parent_snap->name = snap_info->name;
+ }
+
+ parent_image->image_id = parent->id;
+ parent_image->image_name = parent->name;
+ parent_image->trash = true;
+
+ librbd::trash_image_info_t trash_info;
+ r = Trash<I>::get(parent->md_ctx, parent->id, &trash_info);
+ if (r == -ENOENT || r == -EOPNOTSUPP) {
+ parent_image->trash = false;
+ } else if (r < 0) {
+ lderr(cct) << "error looking up trash status: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Image<I>::list_children(I *ictx,
+ std::vector<librbd::linked_image_spec_t> *images) {
+ images->clear();
+ return list_descendants(ictx, 1, images);
+}
+
+template <typename I>
+int Image<I>::list_children(I *ictx,
+ const cls::rbd::ParentImageSpec &parent_spec,
+ std::vector<librbd::linked_image_spec_t> *images) {
+ images->clear();
+ return list_descendants(ictx, parent_spec, 1, images);
+}
+
+template <typename I>
+int Image<I>::list_descendants(
+ librados::IoCtx& io_ctx, const std::string &image_id,
+ const std::optional<size_t> &max_level,
+ std::vector<librbd::linked_image_spec_t> *images) {
+ ImageCtx *ictx = new librbd::ImageCtx("", image_id, nullptr,
+ io_ctx, true);
+ CephContext *cct = ictx->cct;
+ int r = ictx->state->open(OPEN_FLAG_SKIP_OPEN_PARENT);
+ if (r < 0) {
+ if (r == -ENOENT) {
+ return 0;
+ }
+ lderr(cct) << "failed to open descendant " << image_id
+ << " from pool " << io_ctx.get_pool_name() << ":"
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ r = list_descendants(ictx, max_level, images);
+
+ int r1 = ictx->state->close();
+ if (r1 < 0) {
+ lderr(cct) << "error when closing descendant " << image_id
+ << " from pool " << io_ctx.get_pool_name() << ":"
+ << cpp_strerror(r1) << dendl;
+ }
+
+ return r;
+}
+
+template <typename I>
+int Image<I>::list_descendants(
+ I *ictx, const std::optional<size_t> &max_level,
+ std::vector<librbd::linked_image_spec_t> *images) {
+ std::shared_lock l{ictx->image_lock};
+ std::vector<librados::snap_t> snap_ids;
+ if (ictx->snap_id != CEPH_NOSNAP) {
+ snap_ids.push_back(ictx->snap_id);
+ } else {
+ snap_ids = ictx->snaps;
+ }
+ for (auto snap_id : snap_ids) {
+ cls::rbd::ParentImageSpec parent_spec{ictx->md_ctx.get_id(),
+ ictx->md_ctx.get_namespace(),
+ ictx->id, snap_id};
+ int r = list_descendants(ictx, parent_spec, max_level, images);
+ if (r < 0) {
+ return r;
+ }
+ }
+ return 0;
+}
+
+template <typename I>
+int Image<I>::list_descendants(
+ I *ictx, const cls::rbd::ParentImageSpec &parent_spec,
+ const std::optional<size_t> &max_level,
+ std::vector<librbd::linked_image_spec_t> *images) {
+ auto child_max_level = max_level;
+ if (child_max_level) {
+ if (child_max_level == 0) {
+ return 0;
+ }
+ (*child_max_level)--;
+ }
+ CephContext *cct = ictx->cct;
+ ldout(cct, 20) << "ictx=" << ictx << dendl;
+
+ // no children for non-layered or old format image
+ if (!ictx->test_features(RBD_FEATURE_LAYERING, ictx->image_lock)) {
+ return 0;
+ }
+
+ librados::Rados rados(ictx->md_ctx);
+
+ // search all pools for clone v1 children dependent on this snapshot
+ std::list<std::pair<int64_t, std::string> > pools;
+ int r = rados.pool_list2(pools);
+ if (r < 0) {
+ lderr(cct) << "error listing pools: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ for (auto& it : pools) {
+ int64_t base_tier;
+ r = rados.pool_get_base_tier(it.first, &base_tier);
+ if (r == -ENOENT) {
+ ldout(cct, 1) << "pool " << it.second << " no longer exists" << dendl;
+ continue;
+ } else if (r < 0) {
+ lderr(cct) << "error retrieving base tier for pool " << it.second
+ << dendl;
+ return r;
+ }
+ if (it.first != base_tier) {
+ // pool is a cache; skip it
+ continue;
+ }
+
+ IoCtx ioctx;
+ r = librbd::util::create_ioctx(
+ ictx->md_ctx, "child image", it.first, {}, &ioctx);
+ if (r == -ENOENT) {
+ continue;
+ } else if (r < 0) {
+ return r;
+ }
+
+ std::set<std::string> image_ids;
+ r = cls_client::get_children(&ioctx, RBD_CHILDREN, parent_spec,
+ image_ids);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "error reading list of children from pool " << it.second
+ << dendl;
+ return r;
+ }
+
+ for (auto& image_id : image_ids) {
+ images->push_back({
+ it.first, "", ictx->md_ctx.get_namespace(), image_id, "", false});
+ r = list_descendants(ioctx, image_id, child_max_level, images);
+ if (r < 0) {
+ return r;
+ }
+ }
+ }
+
+ // retrieve clone v2 children attached to this snapshot
+ IoCtx parent_io_ctx;
+ r = librbd::util::create_ioctx(
+ ictx->md_ctx, "parent image",parent_spec.pool_id,
+ parent_spec.pool_namespace, &parent_io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ cls::rbd::ChildImageSpecs child_images;
+ r = cls_client::children_list(
+ &parent_io_ctx, librbd::util::header_name(parent_spec.image_id),
+ parent_spec.snap_id, &child_images);
+ if (r < 0 && r != -ENOENT && r != -EOPNOTSUPP) {
+ lderr(cct) << "error retrieving children: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ for (auto& child_image : child_images) {
+ images->push_back({
+ child_image.pool_id, "", child_image.pool_namespace,
+ child_image.image_id, "", false});
+ if (!child_max_level || *child_max_level > 0) {
+ IoCtx ioctx;
+ r = librbd::util::create_ioctx(
+ ictx->md_ctx, "child image", child_image.pool_id,
+ child_image.pool_namespace, &ioctx);
+ if (r == -ENOENT) {
+ continue;
+ } else if (r < 0) {
+ return r;
+ }
+ r = list_descendants(ioctx, child_image.image_id, child_max_level,
+ images);
+ if (r < 0) {
+ return r;
+ }
+ }
+ }
+
+ // batch lookups by pool + namespace
+ std::sort(images->begin(), images->end(), compare_by_pool);
+
+ int64_t child_pool_id = -1;
+ librados::IoCtx child_io_ctx;
+ std::map<std::string, std::pair<std::string, bool>> child_image_id_to_info;
+ for (auto& image : *images) {
+ if (child_pool_id == -1 || child_pool_id != image.pool_id ||
+ child_io_ctx.get_namespace() != image.pool_namespace) {
+ r = librbd::util::create_ioctx(
+ ictx->md_ctx, "child image", image.pool_id, image.pool_namespace,
+ &child_io_ctx);
+ if (r == -ENOENT) {
+ image.pool_name = "";
+ image.image_name = "";
+ continue;
+ } else if (r < 0) {
+ return r;
+ }
+ child_pool_id = image.pool_id;
+
+ child_image_id_to_info.clear();
+
+ std::map<std::string, std::string> image_names_to_ids;
+ r = list_images_v2(child_io_ctx, &image_names_to_ids);
+ if (r < 0) {
+ lderr(cct) << "error listing v2 images: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ for (auto& [name, id] : image_names_to_ids) {
+ child_image_id_to_info.insert({id, {name, false}});
+ }
+
+ std::vector<librbd::trash_image_info_t> trash_images;
+ r = Trash<I>::list(child_io_ctx, trash_images, false);
+ if (r < 0 && r != -EOPNOTSUPP) {
+ lderr(cct) << "error listing trash images: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ for (auto& it : trash_images) {
+ child_image_id_to_info.insert({
+ it.id,
+ {it.name,
+ it.source == RBD_TRASH_IMAGE_SOURCE_REMOVING ? false : true}});
+ }
+ }
+
+ auto it = child_image_id_to_info.find(image.image_id);
+ if (it == child_image_id_to_info.end()) {
+ lderr(cct) << "error looking up name for image id "
+ << image.image_id << " in pool "
+ << child_io_ctx.get_pool_name()
+ << (image.pool_namespace.empty() ?
+ "" : "/" + image.pool_namespace) << dendl;
+ return -ENOENT;
+ }
+
+ image.pool_name = child_io_ctx.get_pool_name();
+ image.image_name = it->second.first;
+ image.trash = it->second.second;
+ }
+
+ // final sort by pool + image names
+ std::sort(images->begin(), images->end(), compare);
+ return 0;
+}
+
+template <typename I>
+int Image<I>::deep_copy(I *src, librados::IoCtx& dest_md_ctx,
+ const char *destname, ImageOptions& opts,
+ ProgressContext &prog_ctx) {
+ CephContext *cct = (CephContext *)dest_md_ctx.cct();
+ ldout(cct, 20) << src->name
+ << (src->snap_name.length() ? "@" + src->snap_name : "")
+ << " -> " << destname << " opts = " << opts << dendl;
+
+ uint64_t features;
+ uint64_t src_size;
+ {
+ std::shared_lock image_locker{src->image_lock};
+
+ if (!src->migration_info.empty()) {
+ lderr(cct) << "cannot deep copy migrating image" << dendl;
+ return -EBUSY;
+ }
+
+ features = src->features;
+ src_size = src->get_image_size(src->snap_id);
+ }
+ uint64_t format = 2;
+ if (opts.get(RBD_IMAGE_OPTION_FORMAT, &format) != 0) {
+ opts.set(RBD_IMAGE_OPTION_FORMAT, format);
+ }
+ if (format == 1) {
+ lderr(cct) << "old format not supported for destination image" << dendl;
+ return -EINVAL;
+ }
+ uint64_t stripe_unit = src->stripe_unit;
+ if (opts.get(RBD_IMAGE_OPTION_STRIPE_UNIT, &stripe_unit) != 0) {
+ opts.set(RBD_IMAGE_OPTION_STRIPE_UNIT, stripe_unit);
+ }
+ uint64_t stripe_count = src->stripe_count;
+ if (opts.get(RBD_IMAGE_OPTION_STRIPE_COUNT, &stripe_count) != 0) {
+ opts.set(RBD_IMAGE_OPTION_STRIPE_COUNT, stripe_count);
+ }
+ uint64_t order = src->order;
+ if (opts.get(RBD_IMAGE_OPTION_ORDER, &order) != 0) {
+ opts.set(RBD_IMAGE_OPTION_ORDER, order);
+ }
+ if (opts.get(RBD_IMAGE_OPTION_FEATURES, &features) != 0) {
+ opts.set(RBD_IMAGE_OPTION_FEATURES, features);
+ }
+ if (features & ~RBD_FEATURES_ALL) {
+ lderr(cct) << "librbd does not support requested features" << dendl;
+ return -ENOSYS;
+ }
+
+ uint64_t flatten = 0;
+ if (opts.get(RBD_IMAGE_OPTION_FLATTEN, &flatten) == 0) {
+ opts.unset(RBD_IMAGE_OPTION_FLATTEN);
+ }
+
+ cls::rbd::ParentImageSpec parent_spec;
+ if (flatten > 0) {
+ parent_spec.pool_id = -1;
+ } else {
+ std::shared_lock image_locker{src->image_lock};
+
+ // use oldest snapshot or HEAD for parent spec
+ if (!src->snap_info.empty()) {
+ parent_spec = src->snap_info.begin()->second.parent.spec;
+ } else {
+ parent_spec = src->parent_md.spec;
+ }
+ }
+
+ int r;
+ if (parent_spec.pool_id == -1) {
+ r = create(dest_md_ctx, destname, "", src_size, opts, "", "", false);
+ } else {
+ librados::IoCtx parent_io_ctx;
+ r = librbd::util::create_ioctx(
+ src->md_ctx, "parent image", parent_spec.pool_id,
+ parent_spec.pool_namespace, &parent_io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ ConfigProxy config{cct->_conf};
+ api::Config<I>::apply_pool_overrides(dest_md_ctx, &config);
+
+ C_SaferCond ctx;
+ std::string dest_id = librbd::util::generate_image_id(dest_md_ctx);
+ auto *req = image::CloneRequest<I>::create(
+ config, parent_io_ctx, parent_spec.image_id, "", {}, parent_spec.snap_id,
+ dest_md_ctx, destname, dest_id, opts, cls::rbd::MIRROR_IMAGE_MODE_JOURNAL,
+ "", "", src->op_work_queue, &ctx);
+ req->send();
+ r = ctx.wait();
+ }
+ if (r < 0) {
+ lderr(cct) << "header creation failed" << dendl;
+ return r;
+ }
+ opts.set(RBD_IMAGE_OPTION_ORDER, static_cast<uint64_t>(order));
+
+ auto dest = new I(destname, "", nullptr, dest_md_ctx, false);
+ r = dest->state->open(0);
+ if (r < 0) {
+ lderr(cct) << "failed to read newly created header" << dendl;
+ return r;
+ }
+
+ C_SaferCond lock_ctx;
+ {
+ std::unique_lock locker{dest->owner_lock};
+
+ if (dest->exclusive_lock == nullptr ||
+ dest->exclusive_lock->is_lock_owner()) {
+ lock_ctx.complete(0);
+ } else {
+ dest->exclusive_lock->acquire_lock(&lock_ctx);
+ }
+ }
+
+ r = lock_ctx.wait();
+ if (r < 0) {
+ lderr(cct) << "failed to request exclusive lock: " << cpp_strerror(r)
+ << dendl;
+ dest->state->close();
+ return r;
+ }
+
+ r = deep_copy(src, dest, flatten > 0, prog_ctx);
+
+ int close_r = dest->state->close();
+ if (r == 0 && close_r < 0) {
+ r = close_r;
+ }
+ return r;
+}
+
+template <typename I>
+int Image<I>::deep_copy(I *src, I *dest, bool flatten,
+ ProgressContext &prog_ctx) {
+ // ensure previous writes are visible to dest
+ C_SaferCond flush_ctx;
+ {
+ std::shared_lock owner_locker{src->owner_lock};
+ auto aio_comp = io::AioCompletion::create_and_start(&flush_ctx, src,
+ io::AIO_TYPE_FLUSH);
+ auto req = io::ImageDispatchSpec::create_flush(
+ *src, io::IMAGE_DISPATCH_LAYER_INTERNAL_START,
+ aio_comp, io::FLUSH_SOURCE_INTERNAL, {});
+ req->send();
+ }
+ int r = flush_ctx.wait();
+ if (r < 0) {
+ return r;
+ }
+
+ librados::snap_t snap_id_start = 0;
+ librados::snap_t snap_id_end;
+ {
+ std::shared_lock image_locker{src->image_lock};
+ snap_id_end = src->snap_id;
+ }
+
+ AsioEngine asio_engine(src->md_ctx);
+
+ C_SaferCond cond;
+ SnapSeqs snap_seqs;
+ deep_copy::ProgressHandler progress_handler{&prog_ctx};
+ auto req = DeepCopyRequest<I>::create(
+ src, dest, snap_id_start, snap_id_end, 0U, flatten, boost::none,
+ asio_engine.get_work_queue(), &snap_seqs, &progress_handler, &cond);
+ req->send();
+ r = cond.wait();
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Image<I>::snap_set(I *ictx,
+ const cls::rbd::SnapshotNamespace &snap_namespace,
+ const char *snap_name) {
+ ldout(ictx->cct, 20) << "snap_set " << ictx << " snap = "
+ << (snap_name ? snap_name : "NULL") << dendl;
+
+ // ignore return value, since we may be set to a non-existent
+ // snapshot and the user is trying to fix that
+ ictx->state->refresh_if_required();
+
+ uint64_t snap_id = CEPH_NOSNAP;
+ std::string name(snap_name == nullptr ? "" : snap_name);
+ if (!name.empty()) {
+ std::shared_lock image_locker{ictx->image_lock};
+ snap_id = ictx->get_snap_id(snap_namespace, snap_name);
+ if (snap_id == CEPH_NOSNAP) {
+ return -ENOENT;
+ }
+ }
+
+ return snap_set(ictx, snap_id);
+}
+
+template <typename I>
+int Image<I>::snap_set(I *ictx, uint64_t snap_id) {
+ ldout(ictx->cct, 20) << "snap_set " << ictx << " "
+ << "snap_id=" << snap_id << dendl;
+
+ // ignore return value, since we may be set to a non-existent
+ // snapshot and the user is trying to fix that
+ ictx->state->refresh_if_required();
+
+ C_SaferCond ctx;
+ ictx->state->snap_set(snap_id, &ctx);
+ int r = ctx.wait();
+ if (r < 0) {
+ if (r != -ENOENT) {
+ lderr(ictx->cct) << "failed to " << (snap_id == CEPH_NOSNAP ? "un" : "")
+ << "set snapshot: " << cpp_strerror(r) << dendl;
+ }
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Image<I>::remove(IoCtx& io_ctx, const std::string &image_name,
+ ProgressContext& prog_ctx)
+{
+ CephContext *cct((CephContext *)io_ctx.cct());
+ ldout(cct, 20) << "name=" << image_name << dendl;
+
+ // look up the V2 image id based on the image name
+ std::string image_id;
+ int r = cls_client::dir_get_id(&io_ctx, RBD_DIRECTORY, image_name,
+ &image_id);
+ if (r == -ENOENT) {
+ // check if it already exists in trash from an aborted trash remove attempt
+ std::vector<trash_image_info_t> trash_entries;
+ r = Trash<I>::list(io_ctx, trash_entries, false);
+ if (r < 0) {
+ return r;
+ }
+ for (auto& entry : trash_entries) {
+ if (entry.name == image_name &&
+ entry.source == RBD_TRASH_IMAGE_SOURCE_REMOVING) {
+ cls::rbd::TrashImageSpec spec;
+ r = cls_client::trash_get(&io_ctx, entry.id, &spec);
+ if (r < 0) {
+ lderr(cct) << "error getting image id " << entry.id
+ << " info from trash: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+ if (spec.state == cls::rbd::TRASH_IMAGE_STATE_MOVING) {
+ r = Trash<I>::move(io_ctx, entry.source, entry.name, entry.id, 0);
+ if (r < 0) {
+ return r;
+ }
+ }
+ return Trash<I>::remove(io_ctx, entry.id, true, prog_ctx);
+ }
+ }
+
+ // fall-through if we failed to locate the image in the V2 directory and
+ // trash
+ } else if (r < 0) {
+ lderr(cct) << "failed to retrieve image id: " << cpp_strerror(r) << dendl;
+ return r;
+ } else {
+ // attempt to move the image to the trash (and optionally immediately
+ // delete the image)
+ ConfigProxy config(cct->_conf);
+ Config<I>::apply_pool_overrides(io_ctx, &config);
+
+ rbd_trash_image_source_t trash_image_source =
+ RBD_TRASH_IMAGE_SOURCE_REMOVING;
+ uint64_t expire_seconds = 0;
+ if (config.get_val<bool>("rbd_move_to_trash_on_remove")) {
+ // keep the image in the trash upon remove requests
+ trash_image_source = RBD_TRASH_IMAGE_SOURCE_USER;
+ expire_seconds = config.get_val<uint64_t>(
+ "rbd_move_to_trash_on_remove_expire_seconds");
+ } else {
+ // attempt to pre-validate the removal before moving to trash and
+ // removing
+ r = pre_remove_image<I>(io_ctx, image_id);
+ if (r == -ECHILD) {
+ if (config.get_val<bool>("rbd_move_parent_to_trash_on_remove")) {
+ // keep the image in the trash until the last child is removed
+ trash_image_source = RBD_TRASH_IMAGE_SOURCE_USER_PARENT;
+ } else {
+ lderr(cct) << "image has snapshots - not removing" << dendl;
+ return -ENOTEMPTY;
+ }
+ } else if (r < 0 && r != -ENOENT) {
+ return r;
+ }
+ }
+
+ r = Trash<I>::move(io_ctx, trash_image_source, image_name, image_id,
+ expire_seconds);
+ if (r >= 0) {
+ if (trash_image_source == RBD_TRASH_IMAGE_SOURCE_REMOVING) {
+ // proceed with attempting to immediately remove the image
+ r = Trash<I>::remove(io_ctx, image_id, true, prog_ctx);
+
+ if (r == -ENOTEMPTY || r == -EBUSY || r == -EMLINK) {
+ // best-effort try to restore the image if the removal
+ // failed for possible expected reasons
+ Trash<I>::restore(io_ctx, {cls::rbd::TRASH_IMAGE_SOURCE_REMOVING},
+ image_id, image_name);
+ }
+ }
+ return r;
+ } else if (r < 0 && r != -EOPNOTSUPP) {
+ return r;
+ }
+
+ // fall-through if trash isn't supported
+ }
+
+ AsioEngine asio_engine(io_ctx);
+
+ // might be a V1 image format that cannot be moved to the trash
+ // and would not have been listed in the V2 directory -- or the OSDs
+ // are too old and don't support the trash feature
+ C_SaferCond cond;
+ auto req = librbd::image::RemoveRequest<I>::create(
+ io_ctx, image_name, "", false, false, prog_ctx,
+ asio_engine.get_work_queue(), &cond);
+ req->send();
+
+ return cond.wait();
+}
+
+template <typename I>
+int Image<I>::flatten_children(I *ictx, const char* snap_name,
+ ProgressContext& pctx) {
+ CephContext *cct = ictx->cct;
+ ldout(cct, 20) << "children flatten " << ictx->name << dendl;
+
+ int r = ictx->state->refresh_if_required();
+ if (r < 0) {
+ return r;
+ }
+
+ std::shared_lock l{ictx->image_lock};
+ snap_t snap_id = ictx->get_snap_id(cls::rbd::UserSnapshotNamespace(),
+ snap_name);
+
+ cls::rbd::ParentImageSpec parent_spec{ictx->md_ctx.get_id(),
+ ictx->md_ctx.get_namespace(),
+ ictx->id, snap_id};
+ std::vector<librbd::linked_image_spec_t> child_images;
+ r = list_children(ictx, parent_spec, &child_images);
+ if (r < 0) {
+ return r;
+ }
+
+ size_t size = child_images.size();
+ if (size == 0) {
+ return 0;
+ }
+
+ librados::IoCtx child_io_ctx;
+ int64_t child_pool_id = -1;
+ size_t i = 0;
+ for (auto &child_image : child_images){
+ std::string pool = child_image.pool_name;
+ if (child_pool_id == -1 ||
+ child_pool_id != child_image.pool_id ||
+ child_io_ctx.get_namespace() != child_image.pool_namespace) {
+ r = librbd::util::create_ioctx(
+ ictx->md_ctx, "child image", child_image.pool_id,
+ child_image.pool_namespace, &child_io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ child_pool_id = child_image.pool_id;
+ }
+
+ ImageCtx *imctx = new ImageCtx("", child_image.image_id, nullptr,
+ child_io_ctx, false);
+ r = imctx->state->open(0);
+ if (r < 0) {
+ lderr(cct) << "error opening image: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ if ((imctx->features & RBD_FEATURE_DEEP_FLATTEN) == 0 &&
+ !imctx->snaps.empty()) {
+ lderr(cct) << "snapshot in-use by " << pool << "/" << imctx->name
+ << dendl;
+ imctx->state->close();
+ return -EBUSY;
+ }
+
+ librbd::NoOpProgressContext prog_ctx;
+ r = imctx->operations->flatten(prog_ctx);
+ if (r < 0) {
+ lderr(cct) << "error flattening image: " << pool << "/"
+ << (child_image.pool_namespace.empty() ?
+ "" : "/" + child_image.pool_namespace)
+ << child_image.image_name << cpp_strerror(r) << dendl;
+ imctx->state->close();
+ return r;
+ }
+
+ r = imctx->state->close();
+ if (r < 0) {
+ lderr(cct) << "failed to close image: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ pctx.update_progress(++i, size);
+ ceph_assert(i <= size);
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Image<I>::encryption_format(I* ictx, encryption_format_t format,
+ encryption_options_t opts, size_t opts_size,
+ bool c_api) {
+ crypto::EncryptionFormat<I>* result_format;
+ auto r = util::create_encryption_format(
+ ictx->cct, format, opts, opts_size, c_api, &result_format);
+ if (r != 0) {
+ return r;
+ }
+
+ C_SaferCond cond;
+ auto req = librbd::crypto::FormatRequest<I>::create(
+ ictx, std::unique_ptr<crypto::EncryptionFormat<I>>(result_format),
+ &cond);
+ req->send();
+ return cond.wait();
+}
+
+template <typename I>
+int Image<I>::encryption_load(I* ictx, const encryption_spec_t *specs,
+ size_t spec_count, bool c_api) {
+ std::vector<std::unique_ptr<crypto::EncryptionFormat<I>>> formats;
+
+ for (size_t i = 0; i < spec_count; ++i) {
+ crypto::EncryptionFormat<I>* result_format;
+ auto r = util::create_encryption_format(
+ ictx->cct, specs[i].format, specs[i].opts, specs[i].opts_size,
+ c_api, &result_format);
+ if (r != 0) {
+ return r;
+ }
+
+ formats.emplace_back(result_format);
+ }
+
+ C_SaferCond cond;
+ auto req = librbd::crypto::LoadRequest<I>::create(
+ ictx, std::move(formats), &cond);
+ req->send();
+ return cond.wait();
+}
+
+} // namespace api
+} // namespace librbd
+
+template class librbd::api::Image<librbd::ImageCtx>;
diff --git a/src/librbd/api/Image.h b/src/librbd/api/Image.h
new file mode 100644
index 000000000..29398d6cd
--- /dev/null
+++ b/src/librbd/api/Image.h
@@ -0,0 +1,85 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef LIBRBD_API_IMAGE_H
+#define LIBRBD_API_IMAGE_H
+
+#include "include/rbd/librbd.hpp"
+#include "include/rados/librados_fwd.hpp"
+#include "librbd/Types.h"
+#include <map>
+#include <set>
+#include <string>
+
+namespace librbd {
+
+class ImageOptions;
+class ProgressContext;
+
+struct ImageCtx;
+
+namespace api {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+struct Image {
+ typedef std::map<std::string, std::string> ImageNameToIds;
+
+ static int64_t get_data_pool_id(ImageCtxT *ictx);
+
+ static int get_op_features(ImageCtxT *ictx, uint64_t *op_features);
+
+ static int list_images(librados::IoCtx& io_ctx,
+ std::vector<image_spec_t> *images);
+ static int list_images_v2(librados::IoCtx& io_ctx,
+ ImageNameToIds *images);
+
+ static int get_parent(ImageCtxT *ictx,
+ librbd::linked_image_spec_t *parent_image,
+ librbd::snap_spec_t *parent_snap);
+
+ static int list_children(ImageCtxT *ictx,
+ std::vector<librbd::linked_image_spec_t> *images);
+ static int list_children(ImageCtxT *ictx,
+ const cls::rbd::ParentImageSpec &parent_spec,
+ std::vector<librbd::linked_image_spec_t> *images);
+
+ static int list_descendants(IoCtx& io_ctx, const std::string &image_id,
+ const std::optional<size_t> &max_level,
+ std::vector<librbd::linked_image_spec_t> *images);
+ static int list_descendants(ImageCtxT *ictx,
+ const std::optional<size_t> &max_level,
+ std::vector<librbd::linked_image_spec_t> *images);
+ static int list_descendants(ImageCtxT *ictx,
+ const cls::rbd::ParentImageSpec &parent_spec,
+ const std::optional<size_t> &max_level,
+ std::vector<librbd::linked_image_spec_t> *images);
+
+ static int deep_copy(ImageCtxT *ictx, librados::IoCtx& dest_md_ctx,
+ const char *destname, ImageOptions& opts,
+ ProgressContext &prog_ctx);
+ static int deep_copy(ImageCtxT *src, ImageCtxT *dest, bool flatten,
+ ProgressContext &prog_ctx);
+
+ static int snap_set(ImageCtxT *ictx,
+ const cls::rbd::SnapshotNamespace &snap_namespace,
+ const char *snap_name);
+ static int snap_set(ImageCtxT *ictx, uint64_t snap_id);
+
+ static int remove(librados::IoCtx& io_ctx, const std::string &image_name,
+ ProgressContext& prog_ctx);
+
+ static int flatten_children(ImageCtxT *ictx, const char* snap_name, ProgressContext& pctx);
+
+ static int encryption_format(ImageCtxT *ictx, encryption_format_t format,
+ encryption_options_t opts, size_t opts_size,
+ bool c_api);
+ static int encryption_load(ImageCtxT *ictx, const encryption_spec_t *specs,
+ size_t spec_count, bool c_api);
+};
+
+} // namespace api
+} // namespace librbd
+
+extern template class librbd::api::Image<librbd::ImageCtx>;
+
+#endif // LIBRBD_API_IMAGE_H
diff --git a/src/librbd/api/Io.cc b/src/librbd/api/Io.cc
new file mode 100644
index 000000000..c1bd38fc0
--- /dev/null
+++ b/src/librbd/api/Io.cc
@@ -0,0 +1,555 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/api/Io.h"
+#include "include/intarith.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "common/Cond.h"
+#include "common/EventTrace.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/internal.h"
+#include "librbd/Utils.h"
+#include "librbd/io/AioCompletion.h"
+#include "librbd/io/ImageDispatchSpec.h"
+#include "librbd/io/Types.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::api::Io " << __func__ << ": "
+
+namespace librbd {
+namespace api {
+
+namespace {
+
+template <typename I>
+bool is_valid_io(I& image_ctx, io::AioCompletion* aio_comp) {
+ auto cct = image_ctx.cct;
+
+ if (!image_ctx.data_ctx.is_valid()) {
+ lderr(cct) << "missing data pool" << dendl;
+
+ aio_comp->fail(-ENODEV);
+ return false;
+ }
+
+ return true;
+}
+
+} // anonymous namespace
+
+template <typename I>
+ssize_t Io<I>::read(
+ I &image_ctx, uint64_t off, uint64_t len, io::ReadResult &&read_result,
+ int op_flags) {
+ auto cct = image_ctx.cct;
+
+ ldout(cct, 20) << "ictx=" << &image_ctx << ", off=" << off << ", "
+ << "len = " << len << dendl;
+
+ C_SaferCond ctx;
+ auto aio_comp = io::AioCompletion::create(&ctx);
+ aio_read(image_ctx, aio_comp, off, len, std::move(read_result), op_flags,
+ false);
+ return ctx.wait();
+}
+
+template <typename I>
+ssize_t Io<I>::write(
+ I &image_ctx, uint64_t off, uint64_t len, bufferlist &&bl, int op_flags) {
+ auto cct = image_ctx.cct;
+ ldout(cct, 20) << "ictx=" << &image_ctx << ", off=" << off << ", "
+ << "len = " << len << dendl;
+
+ image_ctx.image_lock.lock_shared();
+ int r = clip_io(util::get_image_ctx(&image_ctx), off, &len,
+ io::ImageArea::DATA);
+ image_ctx.image_lock.unlock_shared();
+ if (r < 0) {
+ lderr(cct) << "invalid IO request: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ C_SaferCond ctx;
+ auto aio_comp = io::AioCompletion::create(&ctx);
+ aio_write(image_ctx, aio_comp, off, len, std::move(bl), op_flags, false);
+
+ r = ctx.wait();
+ if (r < 0) {
+ return r;
+ }
+ return len;
+}
+
+template <typename I>
+ssize_t Io<I>::discard(
+ I &image_ctx, uint64_t off, uint64_t len,
+ uint32_t discard_granularity_bytes) {
+ auto cct = image_ctx.cct;
+ ldout(cct, 20) << "ictx=" << &image_ctx << ", off=" << off << ", "
+ << "len = " << len << dendl;
+
+ image_ctx.image_lock.lock_shared();
+ int r = clip_io(util::get_image_ctx(&image_ctx), off, &len,
+ io::ImageArea::DATA);
+ image_ctx.image_lock.unlock_shared();
+ if (r < 0) {
+ lderr(cct) << "invalid IO request: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ C_SaferCond ctx;
+ auto aio_comp = io::AioCompletion::create(&ctx);
+ aio_discard(image_ctx, aio_comp, off, len, discard_granularity_bytes, false);
+
+ r = ctx.wait();
+ if (r < 0) {
+ return r;
+ }
+ return len;
+}
+
+template <typename I>
+ssize_t Io<I>::write_same(
+ I &image_ctx, uint64_t off, uint64_t len, bufferlist &&bl, int op_flags) {
+ auto cct = image_ctx.cct;
+ ldout(cct, 20) << "ictx=" << &image_ctx << ", off=" << off << ", "
+ << "len = " << len << ", data_len " << bl.length() << dendl;
+
+ image_ctx.image_lock.lock_shared();
+ int r = clip_io(util::get_image_ctx(&image_ctx), off, &len,
+ io::ImageArea::DATA);
+ image_ctx.image_lock.unlock_shared();
+ if (r < 0) {
+ lderr(cct) << "invalid IO request: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ C_SaferCond ctx;
+ auto aio_comp = io::AioCompletion::create(&ctx);
+ aio_write_same(image_ctx, aio_comp, off, len, std::move(bl), op_flags, false);
+
+ r = ctx.wait();
+ if (r < 0) {
+ return r;
+ }
+ return len;
+}
+
+template <typename I>
+ssize_t Io<I>::write_zeroes(I& image_ctx, uint64_t off, uint64_t len,
+ int zero_flags, int op_flags) {
+ auto cct = image_ctx.cct;
+ ldout(cct, 20) << "ictx=" << &image_ctx << ", off=" << off << ", "
+ << "len = " << len << dendl;
+
+ image_ctx.image_lock.lock_shared();
+ int r = clip_io(util::get_image_ctx(&image_ctx), off, &len,
+ io::ImageArea::DATA);
+ image_ctx.image_lock.unlock_shared();
+ if (r < 0) {
+ lderr(cct) << "invalid IO request: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ C_SaferCond ctx;
+ auto aio_comp = io::AioCompletion::create(&ctx);
+ aio_write_zeroes(image_ctx, aio_comp, off, len, zero_flags, op_flags, false);
+
+ r = ctx.wait();
+ if (r < 0) {
+ return r;
+ }
+ return len;
+}
+
+template <typename I>
+ssize_t Io<I>::compare_and_write(
+ I &image_ctx, uint64_t off, uint64_t len, bufferlist &&cmp_bl,
+ bufferlist &&bl, uint64_t *mismatch_off, int op_flags) {
+ auto cct = image_ctx.cct;
+ ldout(cct, 20) << "compare_and_write ictx=" << &image_ctx << ", off="
+ << off << ", " << "len = " << len << dendl;
+
+ image_ctx.image_lock.lock_shared();
+ int r = clip_io(util::get_image_ctx(&image_ctx), off, &len,
+ io::ImageArea::DATA);
+ image_ctx.image_lock.unlock_shared();
+ if (r < 0) {
+ lderr(cct) << "invalid IO request: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ C_SaferCond ctx;
+ auto aio_comp = io::AioCompletion::create(&ctx);
+ aio_compare_and_write(image_ctx, aio_comp, off, len, std::move(cmp_bl),
+ std::move(bl), mismatch_off, op_flags, false);
+
+ r = ctx.wait();
+ if (r < 0) {
+ return r;
+ }
+ return len;
+}
+
+template <typename I>
+int Io<I>::flush(I &image_ctx) {
+ auto cct = image_ctx.cct;
+ ldout(cct, 20) << "ictx=" << &image_ctx << dendl;
+
+ C_SaferCond ctx;
+ auto aio_comp = io::AioCompletion::create(&ctx);
+ aio_flush(image_ctx, aio_comp, false);
+
+ int r = ctx.wait();
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+void Io<I>::aio_read(I &image_ctx, io::AioCompletion *aio_comp, uint64_t off,
+ uint64_t len, io::ReadResult &&read_result, int op_flags,
+ bool native_async) {
+ auto cct = image_ctx.cct;
+ FUNCTRACE(cct);
+ ZTracer::Trace trace;
+ if (image_ctx.blkin_trace_all) {
+ trace.init("io: read", &image_ctx.trace_endpoint);
+ trace.event("init");
+ }
+
+ aio_comp->init_time(util::get_image_ctx(&image_ctx), io::AIO_TYPE_READ);
+ ldout(cct, 20) << "ictx=" << &image_ctx << ", "
+ << "completion=" << aio_comp << ", off=" << off << ", "
+ << "len=" << len << ", " << "flags=" << op_flags << dendl;
+
+ if (native_async && image_ctx.event_socket.is_valid()) {
+ aio_comp->set_event_notify(true);
+ }
+
+ if (!is_valid_io(image_ctx, aio_comp)) {
+ return;
+ }
+
+ auto req = io::ImageDispatchSpec::create_read(
+ image_ctx, io::IMAGE_DISPATCH_LAYER_API_START, aio_comp,
+ {{off, len}}, io::ImageArea::DATA, std::move(read_result),
+ image_ctx.get_data_io_context(), op_flags, 0, trace);
+ req->send();
+}
+
+template <typename I>
+void Io<I>::aio_write(I &image_ctx, io::AioCompletion *aio_comp, uint64_t off,
+ uint64_t len, bufferlist &&bl, int op_flags,
+ bool native_async) {
+ auto cct = image_ctx.cct;
+ FUNCTRACE(cct);
+ ZTracer::Trace trace;
+ if (image_ctx.blkin_trace_all) {
+ trace.init("io: write", &image_ctx.trace_endpoint);
+ trace.event("init");
+ }
+
+ aio_comp->init_time(util::get_image_ctx(&image_ctx), io::AIO_TYPE_WRITE);
+ ldout(cct, 20) << "ictx=" << &image_ctx << ", "
+ << "completion=" << aio_comp << ", off=" << off << ", "
+ << "len=" << len << ", flags=" << op_flags << dendl;
+
+ if (native_async && image_ctx.event_socket.is_valid()) {
+ aio_comp->set_event_notify(true);
+ }
+
+ if (!is_valid_io(image_ctx, aio_comp)) {
+ return;
+ }
+
+ auto req = io::ImageDispatchSpec::create_write(
+ image_ctx, io::IMAGE_DISPATCH_LAYER_API_START, aio_comp,
+ {{off, len}}, io::ImageArea::DATA, std::move(bl), op_flags, trace);
+ req->send();
+}
+
+template <typename I>
+void Io<I>::aio_discard(I &image_ctx, io::AioCompletion *aio_comp, uint64_t off,
+ uint64_t len, uint32_t discard_granularity_bytes,
+ bool native_async) {
+ auto cct = image_ctx.cct;
+ FUNCTRACE(cct);
+ ZTracer::Trace trace;
+ if (image_ctx.blkin_trace_all) {
+ trace.init("io: discard", &image_ctx.trace_endpoint);
+ trace.event("init");
+ }
+
+ aio_comp->init_time(util::get_image_ctx(&image_ctx), io::AIO_TYPE_DISCARD);
+ ldout(cct, 20) << "ictx=" << &image_ctx << ", "
+ << "completion=" << aio_comp << ", off=" << off << ", "
+ << "len=" << len << dendl;
+
+ if (native_async && image_ctx.event_socket.is_valid()) {
+ aio_comp->set_event_notify(true);
+ }
+
+ if (!is_valid_io(image_ctx, aio_comp)) {
+ return;
+ }
+
+ auto req = io::ImageDispatchSpec::create_discard(
+ image_ctx, io::IMAGE_DISPATCH_LAYER_API_START, aio_comp,
+ {{off, len}}, io::ImageArea::DATA, discard_granularity_bytes, trace);
+ req->send();
+}
+
+template <typename I>
+void Io<I>::aio_write_same(I &image_ctx, io::AioCompletion *aio_comp,
+ uint64_t off, uint64_t len, bufferlist &&bl,
+ int op_flags, bool native_async) {
+ auto cct = image_ctx.cct;
+ FUNCTRACE(cct);
+ ZTracer::Trace trace;
+ if (image_ctx.blkin_trace_all) {
+ trace.init("io: writesame", &image_ctx.trace_endpoint);
+ trace.event("init");
+ }
+
+ aio_comp->init_time(util::get_image_ctx(&image_ctx), io::AIO_TYPE_WRITESAME);
+ ldout(cct, 20) << "ictx=" << &image_ctx << ", "
+ << "completion=" << aio_comp << ", off=" << off << ", "
+ << "len=" << len << ", data_len = " << bl.length() << ", "
+ << "flags=" << op_flags << dendl;
+
+ if (native_async && image_ctx.event_socket.is_valid()) {
+ aio_comp->set_event_notify(true);
+ }
+
+ if (!is_valid_io(image_ctx, aio_comp)) {
+ return;
+ }
+
+ auto req = io::ImageDispatchSpec::create_write_same(
+ image_ctx, io::IMAGE_DISPATCH_LAYER_API_START, aio_comp,
+ {{off, len}}, io::ImageArea::DATA, std::move(bl), op_flags, trace);
+ req->send();
+}
+
+template <typename I>
+void Io<I>::aio_write_zeroes(I& image_ctx, io::AioCompletion *aio_comp,
+ uint64_t off, uint64_t len, int zero_flags,
+ int op_flags, bool native_async) {
+ auto cct = image_ctx.cct;
+ FUNCTRACE(cct);
+ ZTracer::Trace trace;
+ if (image_ctx.blkin_trace_all) {
+ trace.init("io: write_zeroes", &image_ctx.trace_endpoint);
+ trace.event("init");
+ }
+
+ auto io_type = io::AIO_TYPE_DISCARD;
+ if ((zero_flags & RBD_WRITE_ZEROES_FLAG_THICK_PROVISION) != 0) {
+ zero_flags &= ~RBD_WRITE_ZEROES_FLAG_THICK_PROVISION;
+ io_type = io::AIO_TYPE_WRITESAME;
+ }
+
+ aio_comp->init_time(util::get_image_ctx(&image_ctx), io_type);
+ ldout(cct, 20) << "ictx=" << &image_ctx << ", "
+ << "completion=" << aio_comp << ", off=" << off << ", "
+ << "len=" << len << dendl;
+
+ if (native_async && image_ctx.event_socket.is_valid()) {
+ aio_comp->set_event_notify(true);
+ }
+
+ // validate the supported flags
+ if (zero_flags != 0U) {
+ aio_comp->fail(-EINVAL);
+ return;
+ }
+
+ if (!is_valid_io(image_ctx, aio_comp)) {
+ return;
+ }
+
+ if (io_type == io::AIO_TYPE_WRITESAME) {
+ // write-same needs to be aligned to its buffer but librbd has never forced
+ // block alignment. Hide that requirement from the user by adding optional
+ // writes.
+ const uint64_t data_length = 512;
+ uint64_t write_same_offset = p2roundup(off, data_length);
+ uint64_t write_same_offset_end = p2align(off + len, data_length);
+ uint64_t write_same_length = 0;
+ if (write_same_offset_end > write_same_offset) {
+ write_same_length = write_same_offset_end - write_same_offset;
+ }
+
+ uint64_t prepend_offset = off;
+ uint64_t prepend_length = write_same_offset - off;
+ uint64_t append_offset = write_same_offset + write_same_length;
+ uint64_t append_length = len - prepend_length - write_same_length;
+ ldout(cct, 20) << "prepend_offset=" << prepend_offset << ", "
+ << "prepend_length=" << prepend_length << ", "
+ << "write_same_offset=" << write_same_offset << ", "
+ << "write_same_length=" << write_same_length << ", "
+ << "append_offset=" << append_offset << ", "
+ << "append_length=" << append_length << dendl;
+ ceph_assert(prepend_length + write_same_length + append_length == len);
+
+ if (write_same_length <= data_length) {
+ // unaligned or small write-zeroes request -- use single write
+ bufferlist bl;
+ bl.append_zero(len);
+
+ aio_comp->aio_type = io::AIO_TYPE_WRITE;
+ auto req = io::ImageDispatchSpec::create_write(
+ image_ctx, io::IMAGE_DISPATCH_LAYER_API_START, aio_comp,
+ {{off, len}}, io::ImageArea::DATA, std::move(bl), op_flags, trace);
+ req->send();
+ return;
+ } else if (prepend_length == 0 && append_length == 0) {
+ // fully aligned -- use a single write-same image request
+ bufferlist bl;
+ bl.append_zero(data_length);
+
+ auto req = io::ImageDispatchSpec::create_write_same(
+ image_ctx, io::IMAGE_DISPATCH_LAYER_API_START, aio_comp,
+ {{off, len}}, io::ImageArea::DATA, std::move(bl), op_flags, trace);
+ req->send();
+ return;
+ }
+
+ // to reach this point, we need at least one prepend/append write along with
+ // a write-same -- therefore we will need to wrap the provided AioCompletion
+ auto request_count = 1;
+ if (prepend_length > 0) {
+ ++request_count;
+ }
+ if (append_length > 0) {
+ ++request_count;
+ }
+
+ ceph_assert(request_count > 1);
+ aio_comp->start_op();
+ aio_comp->set_request_count(request_count);
+
+ if (prepend_length > 0) {
+ bufferlist bl;
+ bl.append_zero(prepend_length);
+
+ Context* prepend_ctx = new io::C_AioRequest(aio_comp);
+ auto prepend_aio_comp = io::AioCompletion::create_and_start(
+ prepend_ctx, &image_ctx, io::AIO_TYPE_WRITE);
+ auto prepend_req = io::ImageDispatchSpec::create_write(
+ image_ctx, io::IMAGE_DISPATCH_LAYER_API_START, prepend_aio_comp,
+ {{prepend_offset, prepend_length}}, io::ImageArea::DATA,
+ std::move(bl), op_flags, trace);
+ prepend_req->send();
+ }
+
+ if (append_length > 0) {
+ bufferlist bl;
+ bl.append_zero(append_length);
+
+ Context* append_ctx = new io::C_AioRequest(aio_comp);
+ auto append_aio_comp = io::AioCompletion::create_and_start(
+ append_ctx, &image_ctx, io::AIO_TYPE_WRITE);
+ auto append_req = io::ImageDispatchSpec::create_write(
+ image_ctx, io::IMAGE_DISPATCH_LAYER_API_START, append_aio_comp,
+ {{append_offset, append_length}}, io::ImageArea::DATA,
+ std::move(bl), op_flags, trace);
+ append_req->send();
+ }
+
+ bufferlist bl;
+ bl.append_zero(data_length);
+
+ Context* write_same_ctx = new io::C_AioRequest(aio_comp);
+ auto write_same_aio_comp = io::AioCompletion::create_and_start(
+ write_same_ctx, &image_ctx, io::AIO_TYPE_WRITESAME);
+ auto req = io::ImageDispatchSpec::create_write_same(
+ image_ctx, io::IMAGE_DISPATCH_LAYER_API_START, write_same_aio_comp,
+ {{write_same_offset, write_same_length}}, io::ImageArea::DATA,
+ std::move(bl), op_flags, trace);
+ req->send();
+ return;
+ }
+
+ // enable partial discard (zeroing) of objects
+ uint32_t discard_granularity_bytes = 0;
+
+ auto req = io::ImageDispatchSpec::create_discard(
+ image_ctx, io::IMAGE_DISPATCH_LAYER_API_START, aio_comp,
+ {{off, len}}, io::ImageArea::DATA, discard_granularity_bytes, trace);
+ req->send();
+}
+
+template <typename I>
+void Io<I>::aio_compare_and_write(I &image_ctx, io::AioCompletion *aio_comp,
+ uint64_t off, uint64_t len,
+ bufferlist &&cmp_bl,
+ bufferlist &&bl, uint64_t *mismatch_off,
+ int op_flags, bool native_async) {
+ auto cct = image_ctx.cct;
+ FUNCTRACE(cct);
+ ZTracer::Trace trace;
+ if (image_ctx.blkin_trace_all) {
+ trace.init("io: compare_and_write", &image_ctx.trace_endpoint);
+ trace.event("init");
+ }
+
+ aio_comp->init_time(util::get_image_ctx(&image_ctx),
+ io::AIO_TYPE_COMPARE_AND_WRITE);
+ ldout(cct, 20) << "ictx=" << &image_ctx << ", "
+ << "completion=" << aio_comp << ", off=" << off << ", "
+ << "len=" << len << dendl;
+
+ if (native_async && image_ctx.event_socket.is_valid()) {
+ aio_comp->set_event_notify(true);
+ }
+
+ if (!is_valid_io(image_ctx, aio_comp)) {
+ return;
+ }
+
+ auto req = io::ImageDispatchSpec::create_compare_and_write(
+ image_ctx, io::IMAGE_DISPATCH_LAYER_API_START, aio_comp,
+ {{off, len}}, io::ImageArea::DATA, std::move(cmp_bl), std::move(bl),
+ mismatch_off, op_flags, trace);
+ req->send();
+}
+
+template <typename I>
+void Io<I>::aio_flush(I &image_ctx, io::AioCompletion *aio_comp,
+ bool native_async) {
+ auto cct = image_ctx.cct;
+ FUNCTRACE(cct);
+ ZTracer::Trace trace;
+ if (image_ctx.blkin_trace_all) {
+ trace.init("io: flush", &image_ctx.trace_endpoint);
+ trace.event("init");
+ }
+
+ aio_comp->init_time(util::get_image_ctx(&image_ctx), io::AIO_TYPE_FLUSH);
+ ldout(cct, 20) << "ictx=" << &image_ctx << ", "
+ << "completion=" << aio_comp << dendl;
+
+ if (native_async && image_ctx.event_socket.is_valid()) {
+ aio_comp->set_event_notify(true);
+ }
+
+ if (!is_valid_io(image_ctx, aio_comp)) {
+ return;
+ }
+
+ auto req = io::ImageDispatchSpec::create_flush(
+ image_ctx, io::IMAGE_DISPATCH_LAYER_API_START, aio_comp,
+ io::FLUSH_SOURCE_USER, trace);
+ req->send();
+}
+
+} // namespace api
+} // namespace librbd
+
+template class librbd::api::Io<librbd::ImageCtx>;
diff --git a/src/librbd/api/Io.h b/src/librbd/api/Io.h
new file mode 100644
index 000000000..4e2ec5028
--- /dev/null
+++ b/src/librbd/api/Io.h
@@ -0,0 +1,65 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef LIBRBD_API_IO_H
+#define LIBRBD_API_IO_H
+
+#include "include/int_types.h"
+#include "librbd/io/ReadResult.h"
+
+namespace librbd {
+
+struct ImageCtx;
+namespace io { struct AioCompletion; }
+
+namespace api {
+
+template<typename ImageCtxT = ImageCtx>
+struct Io {
+ static ssize_t read(ImageCtxT &image_ctx, uint64_t off, uint64_t len,
+ io::ReadResult &&read_result, int op_flags);
+ static ssize_t write(ImageCtxT &image_ctx, uint64_t off, uint64_t len,
+ bufferlist &&bl, int op_flags);
+ static ssize_t discard(ImageCtxT &image_ctx, uint64_t off, uint64_t len,
+ uint32_t discard_granularity_bytes);
+ static ssize_t write_same(ImageCtxT &image_ctx, uint64_t off, uint64_t len,
+ bufferlist &&bl, int op_flags);
+ static ssize_t write_zeroes(ImageCtxT &image_ctx, uint64_t off, uint64_t len,
+ int zero_flags, int op_flags);
+ static ssize_t compare_and_write(ImageCtxT &image_ctx, uint64_t off,
+ uint64_t len, bufferlist &&cmp_bl,
+ bufferlist &&bl, uint64_t *mismatch_off,
+ int op_flags);
+ static int flush(ImageCtxT &image_ctx);
+
+ static void aio_read(ImageCtxT &image_ctx, io::AioCompletion *c, uint64_t off,
+ uint64_t len, io::ReadResult &&read_result, int op_flags,
+ bool native_async);
+ static void aio_write(ImageCtxT &image_ctx, io::AioCompletion *c,
+ uint64_t off, uint64_t len, bufferlist &&bl,
+ int op_flags, bool native_async);
+ static void aio_discard(ImageCtxT &image_ctx, io::AioCompletion *c,
+ uint64_t off, uint64_t len,
+ uint32_t discard_granularity_bytes,
+ bool native_async);
+ static void aio_write_same(ImageCtxT &image_ctx, io::AioCompletion *c,
+ uint64_t off, uint64_t len, bufferlist &&bl,
+ int op_flags, bool native_async);
+ static void aio_write_zeroes(ImageCtxT &image_ctx, io::AioCompletion *c,
+ uint64_t off, uint64_t len, int zero_flags,
+ int op_flags, bool native_async);
+ static void aio_compare_and_write(ImageCtxT &image_ctx, io::AioCompletion *c,
+ uint64_t off, uint64_t len,
+ bufferlist &&cmp_bl, bufferlist &&bl,
+ uint64_t *mismatch_off, int op_flags,
+ bool native_async);
+ static void aio_flush(ImageCtxT &image_ctx, io::AioCompletion *c,
+ bool native_async);
+};
+
+} // namespace api
+} // namespace librbd
+
+extern template class librbd::api::Io<librbd::ImageCtx>;
+
+#endif // LIBRBD_API_IO_H
diff --git a/src/librbd/api/Migration.cc b/src/librbd/api/Migration.cc
new file mode 100644
index 000000000..957c872ac
--- /dev/null
+++ b/src/librbd/api/Migration.cc
@@ -0,0 +1,2126 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/api/Migration.h"
+#include "include/rados/librados.hpp"
+#include "include/stringify.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "common/Cond.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "librbd/AsioEngine.h"
+#include "librbd/ExclusiveLock.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/Operations.h"
+#include "librbd/Utils.h"
+#include "librbd/api/Config.h"
+#include "librbd/api/Group.h"
+#include "librbd/api/Image.h"
+#include "librbd/api/Snapshot.h"
+#include "librbd/api/Trash.h"
+#include "librbd/deep_copy/Handler.h"
+#include "librbd/deep_copy/ImageCopyRequest.h"
+#include "librbd/deep_copy/MetadataCopyRequest.h"
+#include "librbd/deep_copy/SnapshotCopyRequest.h"
+#include "librbd/exclusive_lock/Policy.h"
+#include "librbd/image/AttachChildRequest.h"
+#include "librbd/image/AttachParentRequest.h"
+#include "librbd/image/CloneRequest.h"
+#include "librbd/image/CreateRequest.h"
+#include "librbd/image/DetachChildRequest.h"
+#include "librbd/image/DetachParentRequest.h"
+#include "librbd/image/ListWatchersRequest.h"
+#include "librbd/image/RemoveRequest.h"
+#include "librbd/image/Types.h"
+#include "librbd/internal.h"
+#include "librbd/migration/FormatInterface.h"
+#include "librbd/migration/OpenSourceImageRequest.h"
+#include "librbd/migration/NativeFormat.h"
+#include "librbd/mirror/DisableRequest.h"
+#include "librbd/mirror/EnableRequest.h"
+
+#include <boost/scope_exit.hpp>
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::Migration: " << __func__ << ": "
+
+namespace librbd {
+
+inline bool operator==(const linked_image_spec_t& rhs,
+ const linked_image_spec_t& lhs) {
+ bool result = (rhs.pool_id == lhs.pool_id &&
+ rhs.pool_namespace == lhs.pool_namespace &&
+ rhs.image_id == lhs.image_id);
+ return result;
+}
+
+namespace api {
+
+using util::create_rados_callback;
+
+namespace {
+
+class MigrationProgressContext : public ProgressContext {
+public:
+ MigrationProgressContext(librados::IoCtx& io_ctx,
+ const std::string &header_oid,
+ cls::rbd::MigrationState state,
+ ProgressContext *prog_ctx)
+ : m_io_ctx(io_ctx), m_header_oid(header_oid), m_state(state),
+ m_prog_ctx(prog_ctx), m_cct(reinterpret_cast<CephContext*>(io_ctx.cct())),
+ m_lock(ceph::make_mutex(
+ util::unique_lock_name("librbd::api::MigrationProgressContext",
+ this))) {
+ ceph_assert(m_prog_ctx != nullptr);
+ }
+
+ ~MigrationProgressContext() {
+ wait_for_in_flight_updates();
+ }
+
+ int update_progress(uint64_t offset, uint64_t total) override {
+ ldout(m_cct, 20) << "offset=" << offset << ", total=" << total << dendl;
+
+ m_prog_ctx->update_progress(offset, total);
+
+ std::string description = stringify(offset * 100 / total) + "% complete";
+
+ send_state_description_update(description);
+
+ return 0;
+ }
+
+private:
+ librados::IoCtx& m_io_ctx;
+ std::string m_header_oid;
+ cls::rbd::MigrationState m_state;
+ ProgressContext *m_prog_ctx;
+
+ CephContext* m_cct;
+ mutable ceph::mutex m_lock;
+ ceph::condition_variable m_cond;
+ std::string m_state_description;
+ bool m_pending_update = false;
+ int m_in_flight_state_updates = 0;
+
+ void send_state_description_update(const std::string &description) {
+ std::lock_guard locker{m_lock};
+
+ if (description == m_state_description) {
+ return;
+ }
+
+ m_state_description = description;
+
+ if (m_in_flight_state_updates > 0) {
+ m_pending_update = true;
+ return;
+ }
+
+ set_state_description();
+ }
+
+ void set_state_description() {
+ ldout(m_cct, 20) << "state_description=" << m_state_description << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ librados::ObjectWriteOperation op;
+ cls_client::migration_set_state(&op, m_state, m_state_description);
+
+ using klass = MigrationProgressContext;
+ librados::AioCompletion *comp =
+ create_rados_callback<klass, &klass::handle_set_state_description>(this);
+ int r = m_io_ctx.aio_operate(m_header_oid, comp, &op);
+ ceph_assert(r == 0);
+ comp->release();
+
+ m_in_flight_state_updates++;
+ }
+
+ void handle_set_state_description(int r) {
+ ldout(m_cct, 20) << "r=" << r << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ m_in_flight_state_updates--;
+
+ if (r < 0) {
+ lderr(m_cct) << "failed to update migration state: " << cpp_strerror(r)
+ << dendl;
+ } else if (m_pending_update) {
+ set_state_description();
+ m_pending_update = false;
+ } else {
+ m_cond.notify_all();
+ }
+ }
+
+ void wait_for_in_flight_updates() {
+ std::unique_lock locker{m_lock};
+
+ ldout(m_cct, 20) << "m_in_flight_state_updates="
+ << m_in_flight_state_updates << dendl;
+ m_pending_update = false;
+ m_cond.wait(locker, [this] { return m_in_flight_state_updates <= 0; });
+ }
+};
+
+int trash_search(librados::IoCtx &io_ctx, rbd_trash_image_source_t source,
+ const std::string &image_name, std::string *image_id) {
+ std::vector<trash_image_info_t> entries;
+
+ int r = Trash<>::list(io_ctx, entries, false);
+ if (r < 0) {
+ return r;
+ }
+
+ for (auto &entry : entries) {
+ if (entry.source == source && entry.name == image_name) {
+ *image_id = entry.id;
+ return 0;
+ }
+ }
+
+ return -ENOENT;
+}
+
+template <typename I>
+int open_images(librados::IoCtx& io_ctx, const std::string &image_name,
+ I **src_image_ctx, I **dst_image_ctx,
+ cls::rbd::MigrationSpec* src_migration_spec,
+ cls::rbd::MigrationSpec* dst_migration_spec,
+ bool skip_open_dst_image) {
+ CephContext* cct = reinterpret_cast<CephContext *>(io_ctx.cct());
+
+ *src_image_ctx = nullptr;
+ *dst_image_ctx = nullptr;
+
+ ldout(cct, 10) << "trying to open image by name " << io_ctx.get_pool_name()
+ << "/" << image_name << dendl;
+ auto image_ctx = I::create(image_name, "", nullptr, io_ctx, false);
+ int r = image_ctx->state->open(OPEN_FLAG_IGNORE_MIGRATING);
+ if (r == -ENOENT) {
+ // presume user passed the source image so we need to search the trash
+ ldout(cct, 10) << "Source image is not found. Trying trash" << dendl;
+
+ std::string src_image_id;
+ r = trash_search(io_ctx, RBD_TRASH_IMAGE_SOURCE_MIGRATION, image_name,
+ &src_image_id);
+ if (r < 0) {
+ lderr(cct) << "failed to determine image id: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ ldout(cct, 10) << "source image id from trash: " << src_image_id << dendl;
+ image_ctx = I::create(image_name, src_image_id, nullptr, io_ctx, false);
+ r = image_ctx->state->open(OPEN_FLAG_IGNORE_MIGRATING);
+ }
+
+ if (r < 0) {
+ if (r != -ENOENT) {
+ lderr(cct) << "failed to open image: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+ image_ctx = nullptr;
+ }
+
+ BOOST_SCOPE_EXIT_TPL(&r, &image_ctx, src_image_ctx, dst_image_ctx) {
+ if (r != 0) {
+ if (*src_image_ctx != nullptr) {
+ (*src_image_ctx)->state->close();
+ }
+ if (*dst_image_ctx != nullptr) {
+ (*dst_image_ctx)->state->close();
+ }
+ if (image_ctx != nullptr) {
+ image_ctx->state->close();
+ }
+ }
+ } BOOST_SCOPE_EXIT_END;
+
+ // The opened image is either a source or destination
+ cls::rbd::MigrationSpec migration_spec;
+ r = cls_client::migration_get(&image_ctx->md_ctx, image_ctx->header_oid,
+ &migration_spec);
+ if (r < 0) {
+ lderr(cct) << "failed retrieving migration header: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ ldout(cct, 10) << "migration spec: " << migration_spec << dendl;
+ if (migration_spec.header_type == cls::rbd::MIGRATION_HEADER_TYPE_SRC) {
+ ldout(cct, 10) << "the source image is opened" << dendl;
+ *src_image_ctx = image_ctx;
+ *src_migration_spec = migration_spec;
+ image_ctx = nullptr;
+ } else if (migration_spec.header_type ==
+ cls::rbd::MIGRATION_HEADER_TYPE_DST) {
+ ldout(cct, 10) << "the destination image is opened" << dendl;
+ std::string image_id = image_ctx->id;
+ image_ctx->state->close();
+ image_ctx = I::create(image_name, image_id, nullptr, io_ctx, false);
+
+ if (!skip_open_dst_image) {
+ ldout(cct, 10) << "re-opening the destination image" << dendl;
+ r = image_ctx->state->open(0);
+ if (r < 0) {
+ image_ctx = nullptr;
+ lderr(cct) << "failed to re-open destination image: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+ }
+
+ *dst_image_ctx = image_ctx;
+ *dst_migration_spec = migration_spec;
+ image_ctx = nullptr;
+ } else {
+ lderr(cct) << "unexpected migration header type: "
+ << migration_spec.header_type << dendl;
+ r = -EINVAL;
+ return r;
+ }
+
+ // attempt to open the other (paired) image
+ I** other_image_ctx = nullptr;
+ std::string other_image_type;
+ std::string other_image_name;
+ std::string other_image_id;
+ cls::rbd::MigrationSpec* other_migration_spec = nullptr;
+ librados::IoCtx other_io_ctx;
+
+ int flags = OPEN_FLAG_IGNORE_MIGRATING;
+ if (*src_image_ctx == nullptr &&
+ dst_migration_spec->source_spec.empty()) {
+ r = util::create_ioctx(io_ctx, "source image", migration_spec.pool_id,
+ migration_spec.pool_namespace, &other_io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ other_image_type = "source";
+ other_image_ctx = src_image_ctx;
+ other_migration_spec = src_migration_spec;
+ other_image_name = migration_spec.image_name;
+ other_image_id = migration_spec.image_id;
+
+ if (other_image_id.empty()) {
+ ldout(cct, 20) << "trying to open v1 image by name "
+ << other_io_ctx.get_pool_name() << "/"
+ << other_image_name << dendl;
+ flags |= OPEN_FLAG_OLD_FORMAT;
+ } else {
+ ldout(cct, 20) << "trying to open v2 image by id "
+ << other_io_ctx.get_pool_name() << "/"
+ << other_image_id << dendl;
+ }
+
+ *src_image_ctx = I::create(other_image_name, other_image_id, nullptr,
+ other_io_ctx, false);
+ } else if (*dst_image_ctx == nullptr) {
+ r = util::create_ioctx(io_ctx, "destination image", migration_spec.pool_id,
+ migration_spec.pool_namespace, &other_io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ other_image_name = migration_spec.image_name;
+ if (skip_open_dst_image) {
+ other_image_id = migration_spec.image_id;
+ } else {
+ other_image_type = "destination";
+ other_image_ctx = dst_image_ctx;
+ other_migration_spec = dst_migration_spec;
+ other_image_id = migration_spec.image_id;
+ }
+
+ *dst_image_ctx = I::create(other_image_name, other_image_id, nullptr,
+ other_io_ctx, false);
+ }
+
+ if (other_image_ctx != nullptr) {
+ r = (*other_image_ctx)->state->open(flags);
+ if (r < 0) {
+ lderr(cct) << "failed to open " << other_image_type << " image "
+ << other_io_ctx.get_pool_name()
+ << "/" << (other_image_id.empty() ?
+ other_image_name : other_image_id)
+ << ": " << cpp_strerror(r) << dendl;
+ *other_image_ctx = nullptr;
+ return r;
+ }
+
+ r = cls_client::migration_get(&(*other_image_ctx)->md_ctx,
+ (*other_image_ctx)->header_oid,
+ other_migration_spec);
+ if (r < 0) {
+ lderr(cct) << "failed retrieving migration header: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ ldout(cct, 20) << other_image_type << " migration spec: "
+ << *other_migration_spec << dendl;
+ }
+
+ if (!skip_open_dst_image) {
+ // legacy clients will only store status in the source images
+ if (dst_migration_spec->source_spec.empty()) {
+ dst_migration_spec->state = migration_spec.state;
+ dst_migration_spec->state_description =
+ migration_spec.state_description;
+ }
+ }
+
+ return 0;
+}
+
+class SteppedProgressContext : public ProgressContext {
+public:
+ SteppedProgressContext(ProgressContext* progress_ctx, size_t total_steps)
+ : m_progress_ctx(progress_ctx), m_total_steps(total_steps) {
+ }
+
+ void next_step() {
+ ceph_assert(m_current_step < m_total_steps);
+ ++m_current_step;
+ }
+
+ int update_progress(uint64_t object_number,
+ uint64_t object_count) override {
+ return m_progress_ctx->update_progress(
+ object_number + (object_count * (m_current_step - 1)),
+ object_count * m_total_steps);
+ }
+
+private:
+ ProgressContext* m_progress_ctx;
+ size_t m_total_steps;
+ size_t m_current_step = 1;
+};
+
+} // anonymous namespace
+
+template <typename I>
+int Migration<I>::prepare(librados::IoCtx& io_ctx,
+ const std::string &image_name,
+ librados::IoCtx& dest_io_ctx,
+ const std::string &dest_image_name_,
+ ImageOptions& opts) {
+ CephContext* cct = reinterpret_cast<CephContext *>(io_ctx.cct());
+
+ std::string dest_image_name = dest_image_name_.empty() ? image_name :
+ dest_image_name_;
+
+ ldout(cct, 10) << io_ctx.get_pool_name() << "/" << image_name << " -> "
+ << dest_io_ctx.get_pool_name() << "/" << dest_image_name
+ << ", opts=" << opts << dendl;
+
+ auto src_image_ctx = I::create(image_name, "", nullptr, io_ctx, false);
+ int r = src_image_ctx->state->open(0);
+ if (r < 0) {
+ lderr(cct) << "failed to open image: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+ BOOST_SCOPE_EXIT_TPL(src_image_ctx) {
+ src_image_ctx->state->close();
+ } BOOST_SCOPE_EXIT_END;
+
+ std::list<obj_watch_t> watchers;
+ int flags = librbd::image::LIST_WATCHERS_FILTER_OUT_MY_INSTANCE |
+ librbd::image::LIST_WATCHERS_FILTER_OUT_MIRROR_INSTANCES;
+ C_SaferCond on_list_watchers;
+ auto list_watchers_request = librbd::image::ListWatchersRequest<I>::create(
+ *src_image_ctx, flags, &watchers, &on_list_watchers);
+ list_watchers_request->send();
+ r = on_list_watchers.wait();
+ if (r < 0) {
+ lderr(cct) << "failed listing watchers:" << cpp_strerror(r) << dendl;
+ return r;
+ }
+ if (!watchers.empty()) {
+ lderr(cct) << "image has watchers - not migrating" << dendl;
+ return -EBUSY;
+ }
+
+ uint64_t format = 2;
+ if (opts.get(RBD_IMAGE_OPTION_FORMAT, &format) != 0) {
+ opts.set(RBD_IMAGE_OPTION_FORMAT, format);
+ }
+ if (format != 2) {
+ lderr(cct) << "unsupported destination image format: " << format << dendl;
+ return -EINVAL;
+ }
+
+ uint64_t features;
+ {
+ std::shared_lock image_locker{src_image_ctx->image_lock};
+ features = src_image_ctx->features;
+ }
+ opts.get(RBD_IMAGE_OPTION_FEATURES, &features);
+ if ((features & ~RBD_FEATURES_ALL) != 0) {
+ lderr(cct) << "librbd does not support requested features" << dendl;
+ return -ENOSYS;
+ }
+ opts.set(RBD_IMAGE_OPTION_FEATURES, features);
+
+ uint64_t order = src_image_ctx->order;
+ if (opts.get(RBD_IMAGE_OPTION_ORDER, &order) != 0) {
+ opts.set(RBD_IMAGE_OPTION_ORDER, order);
+ }
+ r = image::CreateRequest<I>::validate_order(cct, order);
+ if (r < 0) {
+ return r;
+ }
+
+ uint64_t stripe_unit = src_image_ctx->stripe_unit;
+ if (opts.get(RBD_IMAGE_OPTION_STRIPE_UNIT, &stripe_unit) != 0) {
+ opts.set(RBD_IMAGE_OPTION_STRIPE_UNIT, stripe_unit);
+ }
+ uint64_t stripe_count = src_image_ctx->stripe_count;
+ if (opts.get(RBD_IMAGE_OPTION_STRIPE_COUNT, &stripe_count) != 0) {
+ opts.set(RBD_IMAGE_OPTION_STRIPE_COUNT, stripe_count);
+ }
+
+ uint64_t flatten = 0;
+ if (opts.get(RBD_IMAGE_OPTION_FLATTEN, &flatten) == 0) {
+ opts.unset(RBD_IMAGE_OPTION_FLATTEN);
+ }
+
+ ldout(cct, 20) << "updated opts=" << opts << dendl;
+
+ auto dst_image_ctx = I::create(
+ dest_image_name, util::generate_image_id(dest_io_ctx), nullptr,
+ dest_io_ctx, false);
+ src_image_ctx->image_lock.lock_shared();
+ cls::rbd::MigrationSpec dst_migration_spec{
+ cls::rbd::MIGRATION_HEADER_TYPE_DST,
+ src_image_ctx->md_ctx.get_id(), src_image_ctx->md_ctx.get_namespace(),
+ src_image_ctx->name, src_image_ctx->id, "", {}, 0, false,
+ cls::rbd::MIRROR_IMAGE_MODE_JOURNAL, flatten > 0,
+ cls::rbd::MIGRATION_STATE_PREPARING, ""};
+ src_image_ctx->image_lock.unlock_shared();
+
+ Migration migration(src_image_ctx, dst_image_ctx, dst_migration_spec,
+ opts, nullptr);
+ r = migration.prepare();
+
+ return r;
+}
+
+template <typename I>
+int Migration<I>::prepare_import(
+ const std::string& source_spec, librados::IoCtx& dest_io_ctx,
+ const std::string &dest_image_name, ImageOptions& opts) {
+ if (source_spec.empty() || !dest_io_ctx.is_valid() ||
+ dest_image_name.empty()) {
+ return -EINVAL;
+ }
+
+ auto cct = reinterpret_cast<CephContext *>(dest_io_ctx.cct());
+ ldout(cct, 10) << source_spec << " -> "
+ << dest_io_ctx.get_pool_name() << "/"
+ << dest_image_name << ", opts=" << opts << dendl;
+
+ I* src_image_ctx = nullptr;
+ C_SaferCond open_ctx;
+ auto req = migration::OpenSourceImageRequest<I>::create(
+ dest_io_ctx, nullptr, CEPH_NOSNAP,
+ {-1, "", "", "", source_spec, {}, 0, false}, &src_image_ctx, &open_ctx);
+ req->send();
+
+ int r = open_ctx.wait();
+ if (r < 0) {
+ lderr(cct) << "failed to open source image: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ auto asio_engine = src_image_ctx->asio_engine;
+ BOOST_SCOPE_EXIT_TPL(src_image_ctx) {
+ src_image_ctx->state->close();
+ } BOOST_SCOPE_EXIT_END;
+
+ uint64_t image_format = 2;
+ if (opts.get(RBD_IMAGE_OPTION_FORMAT, &image_format) != 0) {
+ opts.set(RBD_IMAGE_OPTION_FORMAT, image_format);
+ }
+ if (image_format != 2) {
+ lderr(cct) << "unsupported destination image format: " << image_format
+ << dendl;
+ return -EINVAL;
+ }
+
+ ldout(cct, 20) << "updated opts=" << opts << dendl;
+
+ // use json-spirit to clean-up json formatting
+ json_spirit::mObject source_spec_object;
+ json_spirit::mValue json_root;
+ if(json_spirit::read(source_spec, json_root)) {
+ try {
+ source_spec_object = json_root.get_obj();
+ } catch (std::runtime_error&) {
+ lderr(cct) << "failed to clean source spec" << dendl;
+ return -EINVAL;
+ }
+ }
+
+ auto dst_image_ctx = I::create(
+ dest_image_name, util::generate_image_id(dest_io_ctx), nullptr,
+ dest_io_ctx, false);
+ cls::rbd::MigrationSpec dst_migration_spec{
+ cls::rbd::MIGRATION_HEADER_TYPE_DST, -1, "", "", "",
+ json_spirit::write(source_spec_object), {},
+ 0, false, cls::rbd::MIRROR_IMAGE_MODE_JOURNAL, true,
+ cls::rbd::MIGRATION_STATE_PREPARING, ""};
+
+ Migration migration(src_image_ctx, dst_image_ctx, dst_migration_spec,
+ opts, nullptr);
+ return migration.prepare_import();
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Migration<I>::execute(librados::IoCtx& io_ctx,
+ const std::string &image_name,
+ ProgressContext &prog_ctx) {
+ CephContext* cct = reinterpret_cast<CephContext *>(io_ctx.cct());
+
+ ldout(cct, 10) << io_ctx.get_pool_name() << "/" << image_name << dendl;
+
+ I *src_image_ctx;
+ I *dst_image_ctx;
+ cls::rbd::MigrationSpec src_migration_spec;
+ cls::rbd::MigrationSpec dst_migration_spec;
+ int r = open_images(io_ctx, image_name, &src_image_ctx, &dst_image_ctx,
+ &src_migration_spec, &dst_migration_spec, false);
+ if (r < 0) {
+ return r;
+ }
+
+ // ensure the destination loads the migration info
+ dst_image_ctx->ignore_migrating = false;
+ r = dst_image_ctx->state->refresh();
+ if (r < 0) {
+ lderr(cct) << "failed to refresh destination image: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ BOOST_SCOPE_EXIT_TPL(src_image_ctx, dst_image_ctx) {
+ dst_image_ctx->state->close();
+ if (src_image_ctx != nullptr) {
+ src_image_ctx->state->close();
+ }
+ } BOOST_SCOPE_EXIT_END;
+
+ if (dst_migration_spec.state != cls::rbd::MIGRATION_STATE_PREPARED &&
+ dst_migration_spec.state != cls::rbd::MIGRATION_STATE_EXECUTING) {
+ lderr(cct) << "current migration state is '" << dst_migration_spec.state
+ << "' (should be 'prepared')" << dendl;
+ return -EINVAL;
+ }
+
+ ldout(cct, 5) << "migrating ";
+ if (!dst_migration_spec.source_spec.empty()) {
+ *_dout << dst_migration_spec.source_spec;
+ } else {
+ *_dout << src_image_ctx->md_ctx.get_pool_name() << "/"
+ << src_image_ctx->name;
+ }
+ *_dout << " -> " << dst_image_ctx->md_ctx.get_pool_name() << "/"
+ << dst_image_ctx->name << dendl;
+
+ ImageOptions opts;
+ Migration migration(src_image_ctx, dst_image_ctx, dst_migration_spec,
+ opts, &prog_ctx);
+ r = migration.execute();
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Migration<I>::abort(librados::IoCtx& io_ctx, const std::string &image_name,
+ ProgressContext &prog_ctx) {
+ CephContext* cct = reinterpret_cast<CephContext *>(io_ctx.cct());
+
+ ldout(cct, 10) << io_ctx.get_pool_name() << "/" << image_name << dendl;
+
+ I *src_image_ctx;
+ I *dst_image_ctx;
+ cls::rbd::MigrationSpec src_migration_spec;
+ cls::rbd::MigrationSpec dst_migration_spec;
+ int r = open_images(io_ctx, image_name, &src_image_ctx, &dst_image_ctx,
+ &src_migration_spec, &dst_migration_spec, true);
+ if (r < 0) {
+ return r;
+ }
+
+ ldout(cct, 5) << "canceling incomplete migration ";
+ if (!dst_migration_spec.source_spec.empty()) {
+ *_dout << dst_migration_spec.source_spec;
+ } else {
+ *_dout << src_image_ctx->md_ctx.get_pool_name() << "/"
+ << src_image_ctx->name;
+ }
+ *_dout << " -> " << dst_image_ctx->md_ctx.get_pool_name() << "/"
+ << dst_image_ctx->name << dendl;
+
+ ImageOptions opts;
+ Migration migration(src_image_ctx, dst_image_ctx, dst_migration_spec,
+ opts, &prog_ctx);
+ r = migration.abort();
+
+ if (src_image_ctx != nullptr) {
+ src_image_ctx->state->close();
+ }
+
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Migration<I>::commit(librados::IoCtx& io_ctx,
+ const std::string &image_name,
+ ProgressContext &prog_ctx) {
+ CephContext* cct = reinterpret_cast<CephContext *>(io_ctx.cct());
+
+ ldout(cct, 10) << io_ctx.get_pool_name() << "/" << image_name << dendl;
+
+ I *src_image_ctx;
+ I *dst_image_ctx;
+ cls::rbd::MigrationSpec src_migration_spec;
+ cls::rbd::MigrationSpec dst_migration_spec;
+ int r = open_images(io_ctx, image_name, &src_image_ctx, &dst_image_ctx,
+ &src_migration_spec, &dst_migration_spec, false);
+ if (r < 0) {
+ return r;
+ }
+
+ if (dst_migration_spec.state != cls::rbd::MIGRATION_STATE_EXECUTED) {
+ lderr(cct) << "current migration state is '" << dst_migration_spec.state
+ << "' (should be 'executed')" << dendl;
+ dst_image_ctx->state->close();
+ if (src_image_ctx != nullptr) {
+ src_image_ctx->state->close();
+ }
+ return -EINVAL;
+ }
+
+ // ensure the destination loads the migration info
+ dst_image_ctx->ignore_migrating = false;
+ r = dst_image_ctx->state->refresh();
+ if (r < 0) {
+ lderr(cct) << "failed to refresh destination image: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ ldout(cct, 5) << "migrating ";
+ if (!dst_migration_spec.source_spec.empty()) {
+ *_dout << dst_migration_spec.source_spec;
+ } else {
+ *_dout << src_image_ctx->md_ctx.get_pool_name() << "/"
+ << src_image_ctx->name;
+ }
+ *_dout << " -> " << dst_image_ctx->md_ctx.get_pool_name() << "/"
+ << dst_image_ctx->name << dendl;
+
+ ImageOptions opts;
+ Migration migration(src_image_ctx, dst_image_ctx, dst_migration_spec,
+ opts, &prog_ctx);
+ r = migration.commit();
+
+ // image_ctx is closed in commit when removing src image
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Migration<I>::status(librados::IoCtx& io_ctx,
+ const std::string &image_name,
+ image_migration_status_t *status) {
+ CephContext* cct = reinterpret_cast<CephContext *>(io_ctx.cct());
+
+ ldout(cct, 10) << io_ctx.get_pool_name() << "/" << image_name << dendl;
+
+ I *src_image_ctx;
+ I *dst_image_ctx;
+ cls::rbd::MigrationSpec src_migration_spec;
+ cls::rbd::MigrationSpec dst_migration_spec;
+ int r = open_images(io_ctx, image_name, &src_image_ctx, &dst_image_ctx,
+ &src_migration_spec, &dst_migration_spec, false);
+ if (r < 0) {
+ return r;
+ }
+
+ ldout(cct, 5) << "migrating ";
+ if (!dst_migration_spec.source_spec.empty()) {
+ *_dout << dst_migration_spec.source_spec;
+ } else {
+ *_dout << src_image_ctx->md_ctx.get_pool_name() << "/"
+ << src_image_ctx->name;
+ }
+ *_dout << " -> " << dst_image_ctx->md_ctx.get_pool_name() << "/"
+ << dst_image_ctx->name << dendl;
+
+ ImageOptions opts;
+ Migration migration(src_image_ctx, dst_image_ctx, dst_migration_spec,
+ opts, nullptr);
+ r = migration.status(status);
+
+ dst_image_ctx->state->close();
+ if (src_image_ctx != nullptr) {
+ src_image_ctx->state->close();
+ }
+
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Migration<I>::get_source_spec(I* image_ctx, std::string* source_spec) {
+ auto cct = image_ctx->cct;
+ ldout(cct, 10) << dendl;
+
+ image_ctx->image_lock.lock_shared();
+ auto migration_info = image_ctx->migration_info;
+ image_ctx->image_lock.unlock_shared();
+
+ if (migration_info.empty()) {
+ // attempt to directly read the spec in case the state is EXECUTED
+ cls::rbd::MigrationSpec migration_spec;
+ int r = cls_client::migration_get(&image_ctx->md_ctx, image_ctx->header_oid,
+ &migration_spec);
+ if (r == -ENOENT) {
+ return r;
+ } else if (r < 0) {
+ lderr(cct) << "failed retrieving migration header: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ migration_info = {
+ migration_spec.pool_id, migration_spec.pool_namespace,
+ migration_spec.image_name, migration_spec.image_id,
+ migration_spec.source_spec, {}, 0, false};
+ }
+
+ if (!migration_info.source_spec.empty()) {
+ *source_spec = migration_info.source_spec;
+ } else {
+ // legacy migration source
+ *source_spec = migration::NativeFormat<I>::build_source_spec(
+ migration_info.pool_id,
+ migration_info.pool_namespace,
+ migration_info.image_name,
+ migration_info.image_id);
+ }
+
+ return 0;
+}
+
+template <typename I>
+Migration<I>::Migration(ImageCtx* src_image_ctx,
+ ImageCtx* dst_image_ctx,
+ const cls::rbd::MigrationSpec& dst_migration_spec,
+ ImageOptions& opts, ProgressContext *prog_ctx)
+ : m_cct(dst_image_ctx->cct),
+ m_src_image_ctx(src_image_ctx), m_dst_image_ctx(dst_image_ctx),
+ m_dst_io_ctx(dst_image_ctx->md_ctx), m_dst_image_name(dst_image_ctx->name),
+ m_dst_image_id(dst_image_ctx->id),
+ m_dst_header_oid(util::header_name(m_dst_image_id)),
+ m_image_options(opts), m_flatten(dst_migration_spec.flatten),
+ m_mirroring(dst_migration_spec.mirroring),
+ m_mirror_image_mode(dst_migration_spec.mirror_image_mode),
+ m_prog_ctx(prog_ctx),
+ m_src_migration_spec(cls::rbd::MIGRATION_HEADER_TYPE_SRC,
+ m_dst_io_ctx.get_id(), m_dst_io_ctx.get_namespace(),
+ m_dst_image_name, m_dst_image_id, "", {}, 0,
+ m_mirroring, m_mirror_image_mode, m_flatten,
+ dst_migration_spec.state,
+ dst_migration_spec.state_description),
+ m_dst_migration_spec(dst_migration_spec) {
+ m_dst_io_ctx.dup(dst_image_ctx->md_ctx);
+}
+
+template <typename I>
+int Migration<I>::prepare() {
+ ldout(m_cct, 10) << dendl;
+
+ BOOST_SCOPE_EXIT_TPL(&m_dst_image_ctx) {
+ if (m_dst_image_ctx != nullptr) {
+ m_dst_image_ctx->state->close();
+ }
+ } BOOST_SCOPE_EXIT_END;
+
+ int r = validate_src_snaps(m_src_image_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ r = disable_mirroring(m_src_image_ctx, &m_mirroring, &m_mirror_image_mode);
+ if (r < 0) {
+ return r;
+ }
+
+ r = unlink_src_image(m_src_image_ctx);
+ if (r < 0) {
+ enable_mirroring(m_src_image_ctx, m_mirroring, m_mirror_image_mode);
+ return r;
+ }
+
+ r = set_src_migration(m_src_image_ctx);
+ if (r < 0) {
+ relink_src_image(m_src_image_ctx);
+ enable_mirroring(m_src_image_ctx, m_mirroring, m_mirror_image_mode);
+ return r;
+ }
+
+ r = create_dst_image(&m_dst_image_ctx);
+ if (r < 0) {
+ abort();
+ return r;
+ }
+
+ ldout(m_cct, 10) << "succeeded" << dendl;
+
+ return 0;
+}
+
+template <typename I>
+int Migration<I>::prepare_import() {
+ ldout(m_cct, 10) << dendl;
+
+ BOOST_SCOPE_EXIT_TPL(&m_dst_image_ctx) {
+ if (m_dst_image_ctx != nullptr) {
+ m_dst_image_ctx->state->close();
+ }
+ } BOOST_SCOPE_EXIT_END;
+
+ int r = create_dst_image(&m_dst_image_ctx);
+ if (r < 0) {
+ abort();
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Migration<I>::execute() {
+ ldout(m_cct, 10) << dendl;
+
+ int r = set_state(cls::rbd::MIGRATION_STATE_EXECUTING, "");
+ if (r < 0) {
+ return r;
+ }
+
+ {
+ MigrationProgressContext dst_prog_ctx(
+ m_dst_image_ctx->md_ctx, m_dst_image_ctx->header_oid,
+ cls::rbd::MIGRATION_STATE_EXECUTING, m_prog_ctx);
+ std::optional<MigrationProgressContext> src_prog_ctx;
+ if (m_src_image_ctx != nullptr) {
+ src_prog_ctx.emplace(m_src_image_ctx->md_ctx, m_src_image_ctx->header_oid,
+ cls::rbd::MIGRATION_STATE_EXECUTING, &dst_prog_ctx);
+ }
+
+ while (true) {
+ r = m_dst_image_ctx->operations->migrate(
+ *(src_prog_ctx ? &src_prog_ctx.value() : &dst_prog_ctx));
+ if (r == -EROFS) {
+ std::shared_lock owner_locker{m_dst_image_ctx->owner_lock};
+ if (m_dst_image_ctx->exclusive_lock != nullptr &&
+ !m_dst_image_ctx->exclusive_lock->accept_ops()) {
+ ldout(m_cct, 5) << "lost exclusive lock, retrying remote" << dendl;
+ continue;
+ }
+ }
+ break;
+ }
+ }
+
+ if (r < 0) {
+ lderr(m_cct) << "migration failed: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ r = set_state(cls::rbd::MIGRATION_STATE_EXECUTED, "");
+ if (r < 0) {
+ return r;
+ }
+
+ m_dst_image_ctx->notify_update();
+
+ ldout(m_cct, 10) << "succeeded" << dendl;
+
+ return 0;
+}
+
+template <typename I>
+int Migration<I>::abort() {
+ ldout(m_cct, 10) << dendl;
+
+ int r;
+ if (m_src_image_ctx != nullptr) {
+ m_src_image_ctx->owner_lock.lock_shared();
+ if (m_src_image_ctx->exclusive_lock != nullptr &&
+ !m_src_image_ctx->exclusive_lock->is_lock_owner()) {
+ C_SaferCond ctx;
+ m_src_image_ctx->exclusive_lock->acquire_lock(&ctx);
+ m_src_image_ctx->owner_lock.unlock_shared();
+ r = ctx.wait();
+ if (r < 0) {
+ lderr(m_cct) << "error acquiring exclusive lock: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+ } else {
+ m_src_image_ctx->owner_lock.unlock_shared();
+ }
+ }
+
+ group_info_t group_info;
+ group_info.pool = -1;
+
+ r = m_dst_image_ctx->state->open(OPEN_FLAG_IGNORE_MIGRATING);
+ if (r < 0) {
+ ldout(m_cct, 1) << "failed to open destination image: " << cpp_strerror(r)
+ << dendl;
+ m_dst_image_ctx = nullptr;
+ } else {
+ BOOST_SCOPE_EXIT_TPL(&m_dst_image_ctx) {
+ if (m_dst_image_ctx != nullptr) {
+ m_dst_image_ctx->state->close();
+ }
+ } BOOST_SCOPE_EXIT_END;
+
+ std::list<obj_watch_t> watchers;
+ int flags = librbd::image::LIST_WATCHERS_FILTER_OUT_MY_INSTANCE |
+ librbd::image::LIST_WATCHERS_FILTER_OUT_MIRROR_INSTANCES;
+ C_SaferCond on_list_watchers;
+ auto list_watchers_request = librbd::image::ListWatchersRequest<I>::create(
+ *m_dst_image_ctx, flags, &watchers, &on_list_watchers);
+ list_watchers_request->send();
+ r = on_list_watchers.wait();
+ if (r < 0) {
+ lderr(m_cct) << "failed listing watchers:" << cpp_strerror(r) << dendl;
+ return r;
+ }
+ if (!watchers.empty()) {
+ lderr(m_cct) << "image has watchers - cannot abort migration" << dendl;
+ return -EBUSY;
+ }
+
+ // ensure destination image is now read-only
+ r = set_state(cls::rbd::MIGRATION_STATE_ABORTING, "");
+ if (r < 0) {
+ return r;
+ }
+
+ SteppedProgressContext progress_ctx(
+ m_prog_ctx, (m_src_image_ctx != nullptr ? 2 : 1));
+ if (m_src_image_ctx != nullptr) {
+ // copy dst HEAD -> src HEAD
+ revert_data(m_dst_image_ctx, m_src_image_ctx, &progress_ctx);
+ progress_ctx.next_step();
+
+ ldout(m_cct, 10) << "relinking children" << dendl;
+ r = relink_children(m_dst_image_ctx, m_src_image_ctx);
+ if (r < 0) {
+ return r;
+ }
+ }
+
+ ldout(m_cct, 10) << "removing dst image snapshots" << dendl;
+ std::vector<librbd::snap_info_t> snaps;
+ r = Snapshot<I>::list(m_dst_image_ctx, snaps);
+ if (r < 0) {
+ lderr(m_cct) << "failed listing snapshots: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ for (auto &snap : snaps) {
+ librbd::NoOpProgressContext prog_ctx;
+ int r = Snapshot<I>::remove(m_dst_image_ctx, snap.name.c_str(),
+ RBD_SNAP_REMOVE_UNPROTECT, prog_ctx);
+ if (r < 0) {
+ lderr(m_cct) << "failed removing snapshot: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+ }
+
+ ldout(m_cct, 10) << "removing group" << dendl;
+
+ r = remove_group(m_dst_image_ctx, &group_info);
+ if (r < 0 && r != -ENOENT) {
+ return r;
+ }
+
+ ldout(m_cct, 10) << "removing dst image" << dendl;
+
+ ceph_assert(m_dst_image_ctx->ignore_migrating);
+
+ auto asio_engine = m_dst_image_ctx->asio_engine;
+ librados::IoCtx dst_io_ctx(m_dst_image_ctx->md_ctx);
+
+ C_SaferCond on_remove;
+ auto req = librbd::image::RemoveRequest<>::create(
+ dst_io_ctx, m_dst_image_ctx, false, false, progress_ctx,
+ asio_engine->get_work_queue(), &on_remove);
+ req->send();
+ r = on_remove.wait();
+
+ m_dst_image_ctx = nullptr;
+
+ if (r < 0) {
+ lderr(m_cct) << "failed removing destination image '"
+ << dst_io_ctx.get_pool_name() << "/" << m_dst_image_name
+ << " (" << m_dst_image_id << ")': " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+ }
+
+ if (m_src_image_ctx != nullptr) {
+ r = relink_src_image(m_src_image_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ r = add_group(m_src_image_ctx, group_info);
+ if (r < 0) {
+ return r;
+ }
+
+ r = remove_migration(m_src_image_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ r = enable_mirroring(m_src_image_ctx, m_mirroring, m_mirror_image_mode);
+ if (r < 0) {
+ return r;
+ }
+ }
+
+ ldout(m_cct, 10) << "succeeded" << dendl;
+
+ return 0;
+}
+
+template <typename I>
+int Migration<I>::commit() {
+ ldout(m_cct, 10) << dendl;
+
+ BOOST_SCOPE_EXIT_TPL(&m_dst_image_ctx, &m_src_image_ctx) {
+ m_dst_image_ctx->state->close();
+ if (m_src_image_ctx != nullptr) {
+ m_src_image_ctx->state->close();
+ }
+ } BOOST_SCOPE_EXIT_END;
+
+ int r = remove_migration(m_dst_image_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ if (m_src_image_ctx != nullptr) {
+ r = remove_src_image(&m_src_image_ctx);
+ if (r < 0) {
+ return r;
+ }
+ }
+
+ r = enable_mirroring(m_dst_image_ctx, m_mirroring, m_mirror_image_mode);
+ if (r < 0) {
+ return r;
+ }
+
+ ldout(m_cct, 10) << "succeeded" << dendl;
+
+ return 0;
+}
+
+template <typename I>
+int Migration<I>::status(image_migration_status_t *status) {
+ ldout(m_cct, 10) << dendl;
+
+ status->source_pool_id = m_dst_migration_spec.pool_id;
+ status->source_pool_namespace = m_dst_migration_spec.pool_namespace;
+ status->source_image_name = m_dst_migration_spec.image_name;
+ status->source_image_id = m_dst_migration_spec.image_id;
+ status->dest_pool_id = m_src_migration_spec.pool_id;
+ status->dest_pool_namespace = m_src_migration_spec.pool_namespace;
+ status->dest_image_name = m_src_migration_spec.image_name;
+ status->dest_image_id = m_src_migration_spec.image_id;
+
+ switch (m_src_migration_spec.state) {
+ case cls::rbd::MIGRATION_STATE_ERROR:
+ status->state = RBD_IMAGE_MIGRATION_STATE_ERROR;
+ break;
+ case cls::rbd::MIGRATION_STATE_PREPARING:
+ status->state = RBD_IMAGE_MIGRATION_STATE_PREPARING;
+ break;
+ case cls::rbd::MIGRATION_STATE_PREPARED:
+ status->state = RBD_IMAGE_MIGRATION_STATE_PREPARED;
+ break;
+ case cls::rbd::MIGRATION_STATE_EXECUTING:
+ status->state = RBD_IMAGE_MIGRATION_STATE_EXECUTING;
+ break;
+ case cls::rbd::MIGRATION_STATE_EXECUTED:
+ status->state = RBD_IMAGE_MIGRATION_STATE_EXECUTED;
+ break;
+ default:
+ status->state = RBD_IMAGE_MIGRATION_STATE_UNKNOWN;
+ break;
+ }
+
+ status->state_description = m_src_migration_spec.state_description;
+
+ return 0;
+}
+
+template <typename I>
+int Migration<I>::set_state(I* image_ctx, const std::string& image_description,
+ cls::rbd::MigrationState state,
+ const std::string &description) {
+ int r = cls_client::migration_set_state(&image_ctx->md_ctx,
+ image_ctx->header_oid,
+ state, description);
+ if (r < 0) {
+ lderr(m_cct) << "failed to set " << image_description << " "
+ << "migration header: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+ return 0;
+}
+
+template <typename I>
+int Migration<I>::set_state(cls::rbd::MigrationState state,
+ const std::string &description) {
+ int r;
+ if (m_src_image_ctx != nullptr) {
+ r = set_state(m_src_image_ctx, "source", state, description);
+ if (r < 0) {
+ return r;
+ }
+ }
+
+ r = set_state(m_dst_image_ctx, "destination", state, description);
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Migration<I>::list_src_snaps(I* image_ctx,
+ std::vector<librbd::snap_info_t> *snaps) {
+ ldout(m_cct, 10) << dendl;
+
+ int r = Snapshot<I>::list(image_ctx, *snaps);
+ if (r < 0) {
+ lderr(m_cct) << "failed listing snapshots: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ for (auto &snap : *snaps) {
+ librbd::snap_namespace_type_t namespace_type;
+ r = Snapshot<I>::get_namespace_type(image_ctx, snap.id,
+ &namespace_type);
+ if (r < 0) {
+ lderr(m_cct) << "error getting snap namespace type: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ if (namespace_type != RBD_SNAP_NAMESPACE_TYPE_USER) {
+ if (namespace_type == RBD_SNAP_NAMESPACE_TYPE_TRASH) {
+ lderr(m_cct) << "image has snapshots with linked clones that must be "
+ << "deleted or flattened before the image can be migrated"
+ << dendl;
+ } else {
+ lderr(m_cct) << "image has non-user type snapshots "
+ << "that are not supported by migration" << dendl;
+ }
+ return -EBUSY;
+ }
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Migration<I>::validate_src_snaps(I* image_ctx) {
+ ldout(m_cct, 10) << dendl;
+
+ std::vector<librbd::snap_info_t> snaps;
+ int r = list_src_snaps(image_ctx, &snaps);
+ if (r < 0) {
+ return r;
+ }
+
+ uint64_t dst_features = 0;
+ r = m_image_options.get(RBD_IMAGE_OPTION_FEATURES, &dst_features);
+ ceph_assert(r == 0);
+
+ if (!image_ctx->test_features(RBD_FEATURE_LAYERING)) {
+ return 0;
+ }
+
+ for (auto &snap : snaps) {
+ std::shared_lock image_locker{image_ctx->image_lock};
+ cls::rbd::ParentImageSpec parent_spec{image_ctx->md_ctx.get_id(),
+ image_ctx->md_ctx.get_namespace(),
+ image_ctx->id, snap.id};
+ std::vector<librbd::linked_image_spec_t> child_images;
+ r = api::Image<I>::list_children(image_ctx, parent_spec,
+ &child_images);
+ if (r < 0) {
+ lderr(m_cct) << "failed listing children: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+ if (!child_images.empty()) {
+ ldout(m_cct, 1) << image_ctx->name << "@" << snap.name
+ << " has children" << dendl;
+
+ if ((dst_features & RBD_FEATURE_LAYERING) == 0) {
+ lderr(m_cct) << "can't migrate to destination without layering feature: "
+ << "image has children" << dendl;
+ return -EINVAL;
+ }
+ }
+ }
+
+ return 0;
+}
+
+
+template <typename I>
+int Migration<I>::set_src_migration(I* image_ctx) {
+ ldout(m_cct, 10) << dendl;
+
+ image_ctx->ignore_migrating = true;
+
+ int r = cls_client::migration_set(&image_ctx->md_ctx, image_ctx->header_oid,
+ m_src_migration_spec);
+ if (r < 0) {
+ lderr(m_cct) << "failed to set source migration header: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ image_ctx->notify_update();
+
+ return 0;
+}
+
+template <typename I>
+int Migration<I>::remove_migration(I *image_ctx) {
+ ldout(m_cct, 10) << dendl;
+
+ int r;
+
+ r = cls_client::migration_remove(&image_ctx->md_ctx, image_ctx->header_oid);
+ if (r == -ENOENT) {
+ r = 0;
+ }
+ if (r < 0) {
+ lderr(m_cct) << "failed removing migration header: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ image_ctx->notify_update();
+
+ return 0;
+}
+
+template <typename I>
+int Migration<I>::unlink_src_image(I* image_ctx) {
+ if (image_ctx->old_format) {
+ return v1_unlink_src_image(image_ctx);
+ } else {
+ return v2_unlink_src_image(image_ctx);
+ }
+}
+
+template <typename I>
+int Migration<I>::v1_unlink_src_image(I* image_ctx) {
+ ldout(m_cct, 10) << dendl;
+
+ std::shared_lock image_locker{image_ctx->image_lock};
+ int r = tmap_rm(image_ctx->md_ctx, image_ctx->name);
+ if (r < 0) {
+ lderr(m_cct) << "failed removing " << image_ctx->name << " from tmap: "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Migration<I>::v2_unlink_src_image(I* image_ctx) {
+ ldout(m_cct, 10) << dendl;
+
+ image_ctx->owner_lock.lock_shared();
+ if (image_ctx->exclusive_lock != nullptr &&
+ image_ctx->exclusive_lock->is_lock_owner()) {
+ C_SaferCond ctx;
+ image_ctx->exclusive_lock->release_lock(&ctx);
+ image_ctx->owner_lock.unlock_shared();
+ int r = ctx.wait();
+ if (r < 0) {
+ lderr(m_cct) << "error releasing exclusive lock: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+ } else {
+ image_ctx->owner_lock.unlock_shared();
+ }
+
+ int r = Trash<I>::move(image_ctx->md_ctx, RBD_TRASH_IMAGE_SOURCE_MIGRATION,
+ image_ctx->name, 0);
+ if (r < 0) {
+ lderr(m_cct) << "failed moving image to trash: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Migration<I>::relink_src_image(I* image_ctx) {
+ if (image_ctx->old_format) {
+ return v1_relink_src_image(image_ctx);
+ } else {
+ return v2_relink_src_image(image_ctx);
+ }
+}
+
+template <typename I>
+int Migration<I>::v1_relink_src_image(I* image_ctx) {
+ ldout(m_cct, 10) << dendl;
+
+ std::shared_lock image_locker{image_ctx->image_lock};
+ int r = tmap_set(image_ctx->md_ctx, image_ctx->name);
+ if (r < 0) {
+ lderr(m_cct) << "failed adding " << image_ctx->name << " to tmap: "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Migration<I>::v2_relink_src_image(I* image_ctx) {
+ ldout(m_cct, 10) << dendl;
+
+ std::shared_lock image_locker{image_ctx->image_lock};
+ int r = Trash<I>::restore(image_ctx->md_ctx,
+ {cls::rbd::TRASH_IMAGE_SOURCE_MIGRATION},
+ image_ctx->id, image_ctx->name);
+ if (r < 0) {
+ lderr(m_cct) << "failed restoring image from trash: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Migration<I>::create_dst_image(I** image_ctx) {
+ ldout(m_cct, 10) << dendl;
+
+ uint64_t size;
+ cls::rbd::ParentImageSpec parent_spec;
+ {
+ std::shared_lock image_locker{m_src_image_ctx->image_lock};
+ size = m_src_image_ctx->size;
+
+ // use oldest snapshot or HEAD for parent spec
+ if (!m_src_image_ctx->snap_info.empty()) {
+ parent_spec = m_src_image_ctx->snap_info.begin()->second.parent.spec;
+ } else {
+ parent_spec = m_src_image_ctx->parent_md.spec;
+ }
+ }
+
+ ConfigProxy config{m_cct->_conf};
+ api::Config<I>::apply_pool_overrides(m_dst_io_ctx, &config);
+
+ uint64_t mirror_image_mode;
+ if (m_image_options.get(RBD_IMAGE_OPTION_MIRROR_IMAGE_MODE,
+ &mirror_image_mode) == 0) {
+ m_mirroring = true;
+ m_mirror_image_mode = static_cast<cls::rbd::MirrorImageMode>(
+ mirror_image_mode);
+ m_image_options.unset(RBD_IMAGE_OPTION_MIRROR_IMAGE_MODE);
+ }
+
+ int r;
+ C_SaferCond on_create;
+ librados::IoCtx parent_io_ctx;
+ if (parent_spec.pool_id == -1) {
+ auto *req = image::CreateRequest<I>::create(
+ config, m_dst_io_ctx, m_dst_image_name, m_dst_image_id, size,
+ m_image_options, image::CREATE_FLAG_SKIP_MIRROR_ENABLE,
+ cls::rbd::MIRROR_IMAGE_MODE_JOURNAL, "", "",
+ m_src_image_ctx->op_work_queue, &on_create);
+ req->send();
+ } else {
+ r = util::create_ioctx(m_src_image_ctx->md_ctx, "parent image",
+ parent_spec.pool_id, parent_spec.pool_namespace,
+ &parent_io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ auto *req = image::CloneRequest<I>::create(
+ config, parent_io_ctx, parent_spec.image_id, "", {}, parent_spec.snap_id,
+ m_dst_io_ctx, m_dst_image_name, m_dst_image_id, m_image_options,
+ cls::rbd::MIRROR_IMAGE_MODE_JOURNAL, "", "",
+ m_src_image_ctx->op_work_queue, &on_create);
+ req->send();
+ }
+
+ r = on_create.wait();
+ if (r < 0) {
+ lderr(m_cct) << "header creation failed: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ auto dst_image_ctx = *image_ctx;
+ dst_image_ctx->id = m_dst_image_id;
+ *image_ctx = nullptr; // prevent prepare from cleaning up the ImageCtx
+
+ r = dst_image_ctx->state->open(OPEN_FLAG_IGNORE_MIGRATING);
+ if (r < 0) {
+ lderr(m_cct) << "failed to open newly created header: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ BOOST_SCOPE_EXIT_TPL(dst_image_ctx) {
+ dst_image_ctx->state->close();
+ } BOOST_SCOPE_EXIT_END;
+
+ {
+ std::shared_lock owner_locker{dst_image_ctx->owner_lock};
+ r = dst_image_ctx->operations->prepare_image_update(
+ exclusive_lock::OPERATION_REQUEST_TYPE_GENERAL, true);
+ if (r < 0) {
+ lderr(m_cct) << "cannot obtain exclusive lock" << dendl;
+ return r;
+ }
+ if (dst_image_ctx->exclusive_lock != nullptr) {
+ dst_image_ctx->exclusive_lock->block_requests(0);
+ }
+ }
+
+ SnapSeqs snap_seqs;
+
+ C_SaferCond on_snapshot_copy;
+ auto snapshot_copy_req = librbd::deep_copy::SnapshotCopyRequest<I>::create(
+ m_src_image_ctx, dst_image_ctx, 0, CEPH_NOSNAP, 0, m_flatten,
+ m_src_image_ctx->op_work_queue, &snap_seqs, &on_snapshot_copy);
+ snapshot_copy_req->send();
+ r = on_snapshot_copy.wait();
+ if (r < 0) {
+ lderr(m_cct) << "failed to copy snapshots: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ if (!m_src_image_ctx->header_oid.empty()) {
+ C_SaferCond on_metadata_copy;
+ auto metadata_copy_req = librbd::deep_copy::MetadataCopyRequest<I>::create(
+ m_src_image_ctx, dst_image_ctx, &on_metadata_copy);
+ metadata_copy_req->send();
+ r = on_metadata_copy.wait();
+ if (r < 0) {
+ lderr(m_cct) << "failed to copy metadata: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+ }
+
+ m_dst_migration_spec.snap_seqs = snap_seqs;
+ m_dst_migration_spec.overlap = size;
+ m_dst_migration_spec.mirroring = m_mirroring;
+ m_dst_migration_spec.mirror_image_mode = m_mirror_image_mode;
+ m_dst_migration_spec.flatten = m_flatten;
+ r = cls_client::migration_set(&m_dst_io_ctx, m_dst_header_oid,
+ m_dst_migration_spec);
+ if (r < 0) {
+ lderr(m_cct) << "failed to set migration header: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ if (m_dst_migration_spec.source_spec.empty()) {
+ r = update_group(m_src_image_ctx, dst_image_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ r = set_state(m_src_image_ctx, "source",
+ cls::rbd::MIGRATION_STATE_PREPARED, "");
+ if (r < 0) {
+ return r;
+ }
+ }
+
+ r = set_state(dst_image_ctx, "destination",
+ cls::rbd::MIGRATION_STATE_PREPARED, "");
+ if (r < 0) {
+ return r;
+ }
+
+ if (m_dst_migration_spec.source_spec.empty()) {
+ r = dst_image_ctx->state->refresh();
+ if (r < 0) {
+ lderr(m_cct) << "failed to refresh destination image: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ r = relink_children(m_src_image_ctx, dst_image_ctx);
+ if (r < 0) {
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Migration<I>::remove_group(I *image_ctx, group_info_t *group_info) {
+ int r = librbd::api::Group<I>::image_get_group(image_ctx, group_info);
+ if (r < 0) {
+ lderr(m_cct) << "failed to get image group: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ if (group_info->pool == -1) {
+ return -ENOENT;
+ }
+
+ ceph_assert(!image_ctx->id.empty());
+
+ ldout(m_cct, 10) << dendl;
+
+ IoCtx group_ioctx;
+ r = util::create_ioctx(image_ctx->md_ctx, "group", group_info->pool, {},
+ &group_ioctx);
+ if (r < 0) {
+ return r;
+ }
+
+ r = librbd::api::Group<I>::image_remove_by_id(group_ioctx,
+ group_info->name.c_str(),
+ image_ctx->md_ctx,
+ image_ctx->id.c_str());
+ if (r < 0) {
+ lderr(m_cct) << "failed to remove image from group: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Migration<I>::add_group(I *image_ctx, group_info_t &group_info) {
+ if (group_info.pool == -1) {
+ return 0;
+ }
+
+ ldout(m_cct, 10) << dendl;
+
+ IoCtx group_ioctx;
+ int r = util::create_ioctx(image_ctx->md_ctx, "group", group_info.pool, {},
+ &group_ioctx);
+ if (r < 0) {
+ return r;
+ }
+
+ r = librbd::api::Group<I>::image_add(group_ioctx, group_info.name.c_str(),
+ image_ctx->md_ctx,
+ image_ctx->name.c_str());
+ if (r < 0) {
+ lderr(m_cct) << "failed to add image to group: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Migration<I>::update_group(I *from_image_ctx, I *to_image_ctx) {
+ ldout(m_cct, 10) << dendl;
+
+ group_info_t group_info;
+
+ int r = remove_group(from_image_ctx, &group_info);
+ if (r < 0) {
+ return r == -ENOENT ? 0 : r;
+ }
+
+ r = add_group(to_image_ctx, group_info);
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Migration<I>::disable_mirroring(
+ I *image_ctx, bool *was_enabled,
+ cls::rbd::MirrorImageMode *mirror_image_mode) {
+ *was_enabled = false;
+
+ cls::rbd::MirrorImage mirror_image;
+ int r = cls_client::mirror_image_get(&image_ctx->md_ctx, image_ctx->id,
+ &mirror_image);
+ if (r == -ENOENT) {
+ ldout(m_cct, 10) << "mirroring is not enabled for this image" << dendl;
+ return 0;
+ }
+
+ if (r < 0) {
+ lderr(m_cct) << "failed to retrieve mirror image: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ if (mirror_image.state == cls::rbd::MIRROR_IMAGE_STATE_ENABLED) {
+ *was_enabled = true;
+ *mirror_image_mode = mirror_image.mode;
+ }
+
+ ldout(m_cct, 10) << dendl;
+
+ C_SaferCond ctx;
+ auto req = mirror::DisableRequest<I>::create(image_ctx, false, true, &ctx);
+ req->send();
+ r = ctx.wait();
+ if (r < 0) {
+ lderr(m_cct) << "failed to disable mirroring: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ m_src_migration_spec.mirroring = true;
+
+ return 0;
+}
+
+template <typename I>
+int Migration<I>::enable_mirroring(
+ I *image_ctx, bool was_enabled,
+ cls::rbd::MirrorImageMode mirror_image_mode) {
+ cls::rbd::MirrorMode mirror_mode;
+ int r = cls_client::mirror_mode_get(&image_ctx->md_ctx, &mirror_mode);
+ if (r < 0 && r != -ENOENT) {
+ lderr(m_cct) << "failed to retrieve mirror mode: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ if (mirror_mode == cls::rbd::MIRROR_MODE_DISABLED) {
+ ldout(m_cct, 10) << "mirroring is not enabled for destination pool"
+ << dendl;
+ return 0;
+ }
+ if (mirror_mode == cls::rbd::MIRROR_MODE_IMAGE && !was_enabled) {
+ ldout(m_cct, 10) << "mirroring is not enabled for image" << dendl;
+ return 0;
+ }
+
+ ldout(m_cct, 10) << dendl;
+
+ C_SaferCond ctx;
+ auto req = mirror::EnableRequest<I>::create(
+ image_ctx, mirror_image_mode, "", false, &ctx);
+ req->send();
+ r = ctx.wait();
+ if (r < 0) {
+ lderr(m_cct) << "failed to enable mirroring: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+// When relinking children we should be careful as it my be interrupted
+// at any moment by some reason and we may end up in an inconsistent
+// state, which we have to be able to fix with "migration abort". Below
+// are all possible states during migration (P1 - sourse parent, P2 -
+// destination parent, C - child):
+//
+// P1 P2 P1 P2 P1 P2 P1 P2
+// ^\ \ ^ \ /^ /^
+// \v v/ v/ v/
+// C C C C
+//
+// 1 2 3 4
+//
+// (1) and (4) are the initial and the final consistent states. (2)
+// and (3) are intermediate inconsistent states that have to be fixed
+// by relink_children running in "migration abort" mode. For this, it
+// scans P2 for all children attached and relinks (fixes) states (3)
+// and (4) to state (1). Then it scans P1 for remaining children and
+// fixes the states (2).
+
+template <typename I>
+int Migration<I>::relink_children(I *from_image_ctx, I *to_image_ctx) {
+ ldout(m_cct, 10) << dendl;
+
+ bool migration_abort = (to_image_ctx == m_src_image_ctx);
+
+ std::vector<librbd::snap_info_t> snaps;
+ int r = list_src_snaps(
+ migration_abort ? to_image_ctx : from_image_ctx, &snaps);
+ if (r < 0) {
+ return r;
+ }
+
+ for (auto it = snaps.begin(); it != snaps.end(); it++) {
+ auto &snap = *it;
+ std::vector<librbd::linked_image_spec_t> src_child_images;
+
+ if (from_image_ctx != m_src_image_ctx) {
+ ceph_assert(migration_abort);
+
+ // We run list snaps against the src image to get only those snapshots
+ // that are migrated. If the "from" image is not the src image
+ // (abort migration case), we need to remap snap ids.
+ // Also collect the list of the children currently attached to the
+ // source, so we could make a proper decision later about relinking.
+
+ std::shared_lock src_image_locker{to_image_ctx->image_lock};
+ cls::rbd::ParentImageSpec src_parent_spec{to_image_ctx->md_ctx.get_id(),
+ to_image_ctx->md_ctx.get_namespace(),
+ to_image_ctx->id, snap.id};
+ r = api::Image<I>::list_children(to_image_ctx, src_parent_spec,
+ &src_child_images);
+ if (r < 0) {
+ lderr(m_cct) << "failed listing children: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ std::shared_lock image_locker{from_image_ctx->image_lock};
+ snap.id = from_image_ctx->get_snap_id(cls::rbd::UserSnapshotNamespace(),
+ snap.name);
+ if (snap.id == CEPH_NOSNAP) {
+ ldout(m_cct, 5) << "skipping snapshot " << snap.name << dendl;
+ continue;
+ }
+ }
+
+ std::vector<librbd::linked_image_spec_t> child_images;
+ {
+ std::shared_lock image_locker{from_image_ctx->image_lock};
+ cls::rbd::ParentImageSpec parent_spec{from_image_ctx->md_ctx.get_id(),
+ from_image_ctx->md_ctx.get_namespace(),
+ from_image_ctx->id, snap.id};
+ r = api::Image<I>::list_children(from_image_ctx, parent_spec,
+ &child_images);
+ if (r < 0) {
+ lderr(m_cct) << "failed listing children: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+ }
+
+ for (auto &child_image : child_images) {
+ r = relink_child(from_image_ctx, to_image_ctx, snap, child_image,
+ migration_abort, true);
+ if (r < 0) {
+ return r;
+ }
+
+ src_child_images.erase(std::remove(src_child_images.begin(),
+ src_child_images.end(), child_image),
+ src_child_images.end());
+ }
+
+ for (auto &child_image : src_child_images) {
+ r = relink_child(from_image_ctx, to_image_ctx, snap, child_image,
+ migration_abort, false);
+ if (r < 0) {
+ return r;
+ }
+ }
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Migration<I>::relink_child(I *from_image_ctx, I *to_image_ctx,
+ const librbd::snap_info_t &from_snap,
+ const librbd::linked_image_spec_t &child_image,
+ bool migration_abort, bool reattach_child) {
+ ldout(m_cct, 10) << from_snap.name << " " << child_image.pool_name << "/"
+ << child_image.pool_namespace << "/"
+ << child_image.image_name << " (migration_abort="
+ << migration_abort << ", reattach_child=" << reattach_child
+ << ")" << dendl;
+
+ librados::snap_t to_snap_id;
+ {
+ std::shared_lock image_locker{to_image_ctx->image_lock};
+ to_snap_id = to_image_ctx->get_snap_id(cls::rbd::UserSnapshotNamespace(),
+ from_snap.name);
+ if (to_snap_id == CEPH_NOSNAP) {
+ lderr(m_cct) << "no snapshot " << from_snap.name << " on destination image"
+ << dendl;
+ return -ENOENT;
+ }
+ }
+
+ librados::IoCtx child_io_ctx;
+ int r = util::create_ioctx(to_image_ctx->md_ctx,
+ "child image " + child_image.image_name,
+ child_image.pool_id, child_image.pool_namespace,
+ &child_io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ I *child_image_ctx = I::create("", child_image.image_id, nullptr,
+ child_io_ctx, false);
+ r = child_image_ctx->state->open(OPEN_FLAG_SKIP_OPEN_PARENT);
+ if (r < 0) {
+ lderr(m_cct) << "failed to open child image: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+ BOOST_SCOPE_EXIT_TPL(child_image_ctx) {
+ child_image_ctx->state->close();
+ } BOOST_SCOPE_EXIT_END;
+
+ uint32_t clone_format = 1;
+ if (child_image_ctx->test_op_features(RBD_OPERATION_FEATURE_CLONE_CHILD)) {
+ clone_format = 2;
+ }
+
+ cls::rbd::ParentImageSpec parent_spec;
+ uint64_t parent_overlap;
+ {
+ std::shared_lock image_locker{child_image_ctx->image_lock};
+
+ // use oldest snapshot or HEAD for parent spec
+ if (!child_image_ctx->snap_info.empty()) {
+ parent_spec = child_image_ctx->snap_info.begin()->second.parent.spec;
+ parent_overlap = child_image_ctx->snap_info.begin()->second.parent.overlap;
+ } else {
+ parent_spec = child_image_ctx->parent_md.spec;
+ parent_overlap = child_image_ctx->parent_md.overlap;
+ }
+ }
+
+ if (migration_abort &&
+ parent_spec.pool_id == to_image_ctx->md_ctx.get_id() &&
+ parent_spec.pool_namespace == to_image_ctx->md_ctx.get_namespace() &&
+ parent_spec.image_id == to_image_ctx->id &&
+ parent_spec.snap_id == to_snap_id) {
+ ldout(m_cct, 10) << "no need for parent re-attach" << dendl;
+ } else {
+ if (parent_spec.pool_id != from_image_ctx->md_ctx.get_id() ||
+ parent_spec.pool_namespace != from_image_ctx->md_ctx.get_namespace() ||
+ parent_spec.image_id != from_image_ctx->id ||
+ parent_spec.snap_id != from_snap.id) {
+ lderr(m_cct) << "parent is not source image: " << parent_spec.pool_id
+ << "/" << parent_spec.pool_namespace << "/"
+ << parent_spec.image_id << "@" << parent_spec.snap_id
+ << dendl;
+ return -ESTALE;
+ }
+
+ parent_spec.pool_id = to_image_ctx->md_ctx.get_id();
+ parent_spec.pool_namespace = to_image_ctx->md_ctx.get_namespace();
+ parent_spec.image_id = to_image_ctx->id;
+ parent_spec.snap_id = to_snap_id;
+
+ C_SaferCond on_reattach_parent;
+ auto reattach_parent_req = image::AttachParentRequest<I>::create(
+ *child_image_ctx, parent_spec, parent_overlap, true, &on_reattach_parent);
+ reattach_parent_req->send();
+ r = on_reattach_parent.wait();
+ if (r < 0) {
+ lderr(m_cct) << "failed to re-attach parent: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+ }
+
+ if (reattach_child) {
+ C_SaferCond on_reattach_child;
+ auto reattach_child_req = image::AttachChildRequest<I>::create(
+ child_image_ctx, to_image_ctx, to_snap_id, from_image_ctx, from_snap.id,
+ clone_format, &on_reattach_child);
+ reattach_child_req->send();
+ r = on_reattach_child.wait();
+ if (r < 0) {
+ lderr(m_cct) << "failed to re-attach child: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+ }
+
+ child_image_ctx->notify_update();
+
+ return 0;
+}
+
+template <typename I>
+int Migration<I>::remove_src_image(I** image_ctx) {
+ ldout(m_cct, 10) << dendl;
+
+ auto src_image_ctx = *image_ctx;
+
+ std::vector<librbd::snap_info_t> snaps;
+ int r = list_src_snaps(src_image_ctx, &snaps);
+ if (r < 0) {
+ return r;
+ }
+
+ for (auto it = snaps.rbegin(); it != snaps.rend(); it++) {
+ auto &snap = *it;
+
+ librbd::NoOpProgressContext prog_ctx;
+ int r = Snapshot<I>::remove(src_image_ctx, snap.name.c_str(),
+ RBD_SNAP_REMOVE_UNPROTECT, prog_ctx);
+ if (r < 0) {
+ lderr(m_cct) << "failed removing source image snapshot '" << snap.name
+ << "': " << cpp_strerror(r) << dendl;
+ return r;
+ }
+ }
+
+ ceph_assert(src_image_ctx->ignore_migrating);
+
+ auto asio_engine = src_image_ctx->asio_engine;
+ auto src_image_id = src_image_ctx->id;
+ librados::IoCtx src_io_ctx(src_image_ctx->md_ctx);
+
+ C_SaferCond on_remove;
+ auto req = librbd::image::RemoveRequest<I>::create(
+ src_io_ctx, src_image_ctx, false, true, *m_prog_ctx,
+ asio_engine->get_work_queue(), &on_remove);
+ req->send();
+ r = on_remove.wait();
+
+ *image_ctx = nullptr;
+
+ // For old format image it will return -ENOENT due to expected
+ // tmap_rm failure at the end.
+ if (r < 0 && r != -ENOENT) {
+ lderr(m_cct) << "failed removing source image: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ if (!src_image_id.empty()) {
+ r = cls_client::trash_remove(&src_io_ctx, src_image_id);
+ if (r < 0 && r != -ENOENT) {
+ lderr(m_cct) << "error removing image " << src_image_id
+ << " from rbd_trash object" << dendl;
+ }
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Migration<I>::revert_data(I* src_image_ctx, I* dst_image_ctx,
+ ProgressContext* prog_ctx) {
+ ldout(m_cct, 10) << dendl;
+
+ cls::rbd::MigrationSpec migration_spec;
+ int r = cls_client::migration_get(&src_image_ctx->md_ctx,
+ src_image_ctx->header_oid,
+ &migration_spec);
+
+ if (r < 0) {
+ lderr(m_cct) << "failed retrieving migration header: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ if (migration_spec.header_type != cls::rbd::MIGRATION_HEADER_TYPE_DST) {
+ lderr(m_cct) << "unexpected migration header type: "
+ << migration_spec.header_type << dendl;
+ return -EINVAL;
+ }
+
+ uint64_t src_snap_id_start = 0;
+ uint64_t src_snap_id_end = CEPH_NOSNAP;
+ uint64_t dst_snap_id_start = 0;
+ if (!migration_spec.snap_seqs.empty()) {
+ src_snap_id_start = migration_spec.snap_seqs.rbegin()->second;
+ }
+
+ // we only care about the HEAD revision so only add a single mapping to
+ // represent the most recent state
+ SnapSeqs snap_seqs;
+ snap_seqs[CEPH_NOSNAP] = CEPH_NOSNAP;
+
+ ldout(m_cct, 20) << "src_snap_id_start=" << src_snap_id_start << ", "
+ << "src_snap_id_end=" << src_snap_id_end << ", "
+ << "dst_snap_id_start=" << dst_snap_id_start << ", "
+ << "snap_seqs=" << snap_seqs << dendl;
+
+ C_SaferCond ctx;
+ deep_copy::ProgressHandler progress_handler(prog_ctx);
+ auto request = deep_copy::ImageCopyRequest<I>::create(
+ src_image_ctx, dst_image_ctx, src_snap_id_start, src_snap_id_end,
+ dst_snap_id_start, false, {}, snap_seqs, &progress_handler, &ctx);
+ request->send();
+
+ r = ctx.wait();
+ if (r < 0) {
+ lderr(m_cct) << "error reverting destination image data blocks back to "
+ << "source image: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+} // namespace api
+} // namespace librbd
+
+template class librbd::api::Migration<librbd::ImageCtx>;
diff --git a/src/librbd/api/Migration.h b/src/librbd/api/Migration.h
new file mode 100644
index 000000000..dd70dcc23
--- /dev/null
+++ b/src/librbd/api/Migration.h
@@ -0,0 +1,113 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_API_MIGRATION_H
+#define CEPH_LIBRBD_API_MIGRATION_H
+
+#include "include/int_types.h"
+#include "include/rados/librados_fwd.hpp"
+#include "include/rbd/librbd.hpp"
+#include "cls/rbd/cls_rbd_types.h"
+
+#include <vector>
+
+namespace librbd {
+
+class ImageCtx;
+
+namespace api {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class Migration {
+public:
+ static int prepare(librados::IoCtx& io_ctx, const std::string &image_name,
+ librados::IoCtx& dest_io_ctx,
+ const std::string &dest_image_name, ImageOptions& opts);
+ static int prepare_import(const std::string& source_spec,
+ librados::IoCtx& dest_io_ctx,
+ const std::string &dest_image_name,
+ ImageOptions& opts);
+ static int execute(librados::IoCtx& io_ctx, const std::string &image_name,
+ ProgressContext &prog_ctx);
+ static int abort(librados::IoCtx& io_ctx, const std::string &image_name,
+ ProgressContext &prog_ctx);
+ static int commit(librados::IoCtx& io_ctx, const std::string &image_name,
+ ProgressContext &prog_ctx);
+ static int status(librados::IoCtx& io_ctx, const std::string &image_name,
+ image_migration_status_t *status);
+
+ static int get_source_spec(ImageCtxT* image_ctx, std::string* source_spec);
+
+private:
+ CephContext* m_cct;
+ ImageCtx* m_src_image_ctx;
+ ImageCtx* m_dst_image_ctx;
+ librados::IoCtx m_dst_io_ctx;
+ std::string m_dst_image_name;
+ std::string m_dst_image_id;
+ std::string m_dst_header_oid;
+ ImageOptions &m_image_options;
+ bool m_flatten;
+ bool m_mirroring;
+ cls::rbd::MirrorImageMode m_mirror_image_mode;
+ ProgressContext *m_prog_ctx;
+
+ cls::rbd::MigrationSpec m_src_migration_spec;
+ cls::rbd::MigrationSpec m_dst_migration_spec;
+
+ Migration(ImageCtx* src_image_ctx, ImageCtx* dst_image_ctx,
+ const cls::rbd::MigrationSpec& dst_migration_spec,
+ ImageOptions& opts, ProgressContext *prog_ctx);
+
+ int prepare();
+ int prepare_import();
+ int execute();
+ int abort();
+ int commit();
+ int status(image_migration_status_t *status);
+
+ int set_state(ImageCtxT* image_ctx, const std::string& image_description,
+ cls::rbd::MigrationState state,
+ const std::string &description);
+ int set_state(cls::rbd::MigrationState state, const std::string &description);
+
+ int list_src_snaps(ImageCtxT* image_ctx,
+ std::vector<librbd::snap_info_t> *snaps);
+ int validate_src_snaps(ImageCtxT* image_ctx);
+ int disable_mirroring(ImageCtxT* image_ctx, bool *was_enabled,
+ cls::rbd::MirrorImageMode *mirror_image_mode);
+ int enable_mirroring(ImageCtxT* image_ctx, bool was_enabled,
+ cls::rbd::MirrorImageMode mirror_image_mode);
+ int set_src_migration(ImageCtxT* image_ctx);
+ int unlink_src_image(ImageCtxT* image_ctx);
+ int relink_src_image(ImageCtxT* image_ctx);
+ int create_dst_image(ImageCtxT** image_ctx);
+ int remove_group(ImageCtxT* image_ctx, group_info_t *group_info);
+ int add_group(ImageCtxT* image_ctx, group_info_t &group_info);
+ int update_group(ImageCtxT *from_image_ctx, ImageCtxT *to_image_ctx);
+ int remove_migration(ImageCtxT* image_ctx);
+ int relink_children(ImageCtxT *from_image_ctx, ImageCtxT *to_image_ctx);
+ int remove_src_image(ImageCtxT** image_ctx);
+
+ int v1_set_src_migration(ImageCtxT* image_ctx);
+ int v2_set_src_migration(ImageCtxT* image_ctx);
+ int v1_unlink_src_image(ImageCtxT* image_ctx);
+ int v2_unlink_src_image(ImageCtxT* image_ctx);
+ int v1_relink_src_image(ImageCtxT* image_ctx);
+ int v2_relink_src_image(ImageCtxT* image_ctx);
+
+ int relink_child(ImageCtxT *from_image_ctx, ImageCtxT *to_image_ctx,
+ const librbd::snap_info_t &src_snap,
+ const librbd::linked_image_spec_t &child_image,
+ bool migration_abort, bool reattach_child);
+
+ int revert_data(ImageCtxT* src_image_ctx, ImageCtxT* dst_image_ctx,
+ ProgressContext *prog_ctx);
+};
+
+} // namespace api
+} // namespace librbd
+
+extern template class librbd::api::Migration<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_API_MIGRATION_H
diff --git a/src/librbd/api/Mirror.cc b/src/librbd/api/Mirror.cc
new file mode 100644
index 000000000..2cfad0d32
--- /dev/null
+++ b/src/librbd/api/Mirror.cc
@@ -0,0 +1,2104 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/api/Mirror.h"
+#include "include/rados/librados.hpp"
+#include "include/stringify.h"
+#include "common/ceph_json.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "librbd/AsioEngine.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/Journal.h"
+#include "librbd/MirroringWatcher.h"
+#include "librbd/Operations.h"
+#include "librbd/Utils.h"
+#include "librbd/api/Image.h"
+#include "librbd/api/Namespace.h"
+#include "librbd/mirror/DemoteRequest.h"
+#include "librbd/mirror/DisableRequest.h"
+#include "librbd/mirror/EnableRequest.h"
+#include "librbd/mirror/GetInfoRequest.h"
+#include "librbd/mirror/GetStatusRequest.h"
+#include "librbd/mirror/GetUuidRequest.h"
+#include "librbd/mirror/PromoteRequest.h"
+#include "librbd/mirror/Types.h"
+#include "librbd/MirroringWatcher.h"
+#include "librbd/mirror/snapshot/CreatePrimaryRequest.h"
+#include "librbd/mirror/snapshot/ImageMeta.h"
+#include "librbd/mirror/snapshot/UnlinkPeerRequest.h"
+#include "librbd/mirror/snapshot/Utils.h"
+#include <boost/algorithm/string/trim.hpp>
+#include <boost/algorithm/string/replace.hpp>
+#include <boost/scope_exit.hpp>
+#include "json_spirit/json_spirit.h"
+
+#include <algorithm>
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::api::Mirror: " << __func__ << ": "
+
+namespace librbd {
+namespace api {
+
+namespace {
+
+int get_config_key(librados::Rados& rados, const std::string& key,
+ std::string* value) {
+ std::string cmd =
+ "{"
+ "\"prefix\": \"config-key get\", "
+ "\"key\": \"" + key + "\""
+ "}";
+
+ bufferlist in_bl;
+ bufferlist out_bl;
+
+ int r = rados.mon_command(cmd, in_bl, &out_bl, nullptr);
+ if (r == -EINVAL) {
+ return -EOPNOTSUPP;
+ } else if (r < 0 && r != -ENOENT) {
+ return r;
+ }
+
+ *value = out_bl.to_str();
+ return 0;
+}
+
+int set_config_key(librados::Rados& rados, const std::string& key,
+ const std::string& value) {
+ std::string cmd;
+ if (value.empty()) {
+ cmd = "{"
+ "\"prefix\": \"config-key rm\", "
+ "\"key\": \"" + key + "\""
+ "}";
+ } else {
+ cmd = "{"
+ "\"prefix\": \"config-key set\", "
+ "\"key\": \"" + key + "\", "
+ "\"val\": \"" + value + "\""
+ "}";
+ }
+ bufferlist in_bl;
+ bufferlist out_bl;
+
+ int r = rados.mon_command(cmd, in_bl, &out_bl, nullptr);
+ if (r == -EINVAL) {
+ return -EOPNOTSUPP;
+ } else if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+std::string get_peer_config_key_name(int64_t pool_id,
+ const std::string& peer_uuid) {
+ return RBD_MIRROR_PEER_CONFIG_KEY_PREFIX + stringify(pool_id) + "/" +
+ peer_uuid;
+}
+
+int remove_peer_config_key(librados::IoCtx& io_ctx,
+ const std::string& peer_uuid) {
+ int64_t pool_id = io_ctx.get_id();
+ auto key = get_peer_config_key_name(pool_id, peer_uuid);
+
+ librados::Rados rados(io_ctx);
+ int r = set_config_key(rados, key, "");
+ if (r < 0 && r != -ENOENT && r != -EPERM) {
+ return r;
+ }
+ return 0;
+}
+
+std::string get_mon_host(CephContext* cct) {
+ std::string mon_host;
+ if (auto mon_addrs = cct->get_mon_addrs();
+ mon_addrs != nullptr && !mon_addrs->empty()) {
+ CachedStackStringStream css;
+ for (auto it = mon_addrs->begin(); it != mon_addrs->end(); ++it) {
+ if (it != mon_addrs->begin()) {
+ *css << ",";
+ }
+ *css << *it;
+ }
+ mon_host = css->str();
+ } else {
+ ldout(cct, 20) << "falling back to mon_host in conf" << dendl;
+ mon_host = cct->_conf.get_val<std::string>("mon_host");
+ }
+ ldout(cct, 20) << "mon_host=" << mon_host << dendl;
+ return mon_host;
+}
+
+int create_bootstrap_user(CephContext* cct, librados::Rados& rados,
+ std::string* peer_client_id, std::string* cephx_key) {
+ ldout(cct, 20) << dendl;
+
+ // retrieve peer CephX user from config-key
+ int r = get_config_key(rados, RBD_MIRROR_PEER_CLIENT_ID_CONFIG_KEY,
+ peer_client_id);
+ if (r == -EACCES) {
+ ldout(cct, 5) << "insufficient permissions to get peer-client-id "
+ << "config-key" << dendl;
+ return r;
+ } else if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "failed to retrieve peer client id key: "
+ << cpp_strerror(r) << dendl;
+ return r;
+ } else if (r == -ENOENT || peer_client_id->empty()) {
+ ldout(cct, 20) << "creating new peer-client-id config-key" << dendl;
+
+ *peer_client_id = "rbd-mirror-peer";
+ r = set_config_key(rados, RBD_MIRROR_PEER_CLIENT_ID_CONFIG_KEY,
+ *peer_client_id);
+ if (r == -EACCES) {
+ ldout(cct, 5) << "insufficient permissions to update peer-client-id "
+ << "config-key" << dendl;
+ return r;
+ } else if (r < 0) {
+ lderr(cct) << "failed to update peer client id key: "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+ }
+ ldout(cct, 20) << "peer_client_id=" << *peer_client_id << dendl;
+
+ // create peer client user
+ std::string cmd =
+ R"({)" \
+ R"( "prefix": "auth get-or-create",)" \
+ R"( "entity": "client.)" + *peer_client_id + R"(",)" \
+ R"( "caps": [)" \
+ R"( "mon", "profile rbd-mirror-peer",)" \
+ R"( "osd", "profile rbd"],)" \
+ R"( "format": "json")" \
+ R"(})";
+
+ bufferlist in_bl;
+ bufferlist out_bl;
+
+ r = rados.mon_command(cmd, in_bl, &out_bl, nullptr);
+ if (r == -EINVAL) {
+ ldout(cct, 5) << "caps mismatch for existing user" << dendl;
+ return -EEXIST;
+ } else if (r == -EACCES) {
+ ldout(cct, 5) << "insufficient permissions to create user" << dendl;
+ return r;
+ } else if (r < 0) {
+ lderr(cct) << "failed to create or update RBD mirroring bootstrap user: "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ // extract key from response
+ bool json_valid = false;
+ json_spirit::mValue json_root;
+ if(json_spirit::read(out_bl.to_str(), json_root)) {
+ try {
+ auto& json_obj = json_root.get_array()[0].get_obj();
+ *cephx_key = json_obj["key"].get_str();
+ json_valid = true;
+ } catch (std::runtime_error&) {
+ }
+ }
+
+ if (!json_valid) {
+ lderr(cct) << "invalid auth keyring JSON received" << dendl;
+ return -EBADMSG;
+ }
+
+ return 0;
+}
+
+int create_bootstrap_peer(CephContext* cct, librados::IoCtx& io_ctx,
+ mirror_peer_direction_t direction,
+ const std::string& site_name, const std::string& fsid,
+ const std::string& client_id, const std::string& key,
+ const std::string& mon_host,
+ const std::string& cluster1,
+ const std::string& cluster2) {
+ ldout(cct, 20) << dendl;
+
+ std::string peer_uuid;
+ std::vector<mirror_peer_site_t> peers;
+ int r = Mirror<>::peer_site_list(io_ctx, &peers);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "failed to list mirror peers: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ if (peers.empty()) {
+ r = Mirror<>::peer_site_add(io_ctx, &peer_uuid, direction, site_name,
+ "client." + client_id);
+ if (r < 0) {
+ lderr(cct) << "failed to add " << cluster1 << " peer to "
+ << cluster2 << " " << "cluster: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+ } else if (peers[0].site_name != site_name &&
+ peers[0].site_name != fsid) {
+ // only support a single peer
+ lderr(cct) << "multiple peers are not currently supported" << dendl;
+ return -EINVAL;
+ } else {
+ peer_uuid = peers[0].uuid;
+
+ if (peers[0].site_name != site_name) {
+ r = Mirror<>::peer_site_set_name(io_ctx, peer_uuid, site_name);
+ if (r < 0) {
+ // non-fatal attempt to update site name
+ lderr(cct) << "failed to update peer site name" << dendl;
+ }
+ }
+ }
+
+ Mirror<>::Attributes attributes {
+ {"mon_host", mon_host},
+ {"key", key}};
+ r = Mirror<>::peer_site_set_attributes(io_ctx, peer_uuid, attributes);
+ if (r < 0) {
+ lderr(cct) << "failed to update " << cluster1 << " cluster connection "
+ << "attributes in " << cluster2 << " cluster: "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+int list_mirror_images(librados::IoCtx& io_ctx,
+ std::set<std::string>& mirror_image_ids) {
+ CephContext *cct = reinterpret_cast<CephContext *>(io_ctx.cct());
+
+ std::string last_read = "";
+ int max_read = 1024;
+ int r;
+ do {
+ std::map<std::string, std::string> mirror_images;
+ r = cls_client::mirror_image_list(&io_ctx, last_read, max_read,
+ &mirror_images);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "error listing mirrored image directory: "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+ for (auto it = mirror_images.begin(); it != mirror_images.end(); ++it) {
+ mirror_image_ids.insert(it->first);
+ }
+ if (!mirror_images.empty()) {
+ last_read = mirror_images.rbegin()->first;
+ }
+ r = mirror_images.size();
+ } while (r == max_read);
+
+ return 0;
+}
+
+template <typename I>
+const char *pool_or_namespace(I *ictx) {
+ if (!ictx->md_ctx.get_namespace().empty()) {
+ return "namespace";
+ } else {
+ return "pool";
+ }
+}
+
+struct C_ImageGetInfo : public Context {
+ mirror_image_info_t *mirror_image_info;
+ mirror_image_mode_t *mirror_image_mode;
+ Context *on_finish;
+
+ cls::rbd::MirrorImage mirror_image;
+ mirror::PromotionState promotion_state = mirror::PROMOTION_STATE_PRIMARY;
+ std::string primary_mirror_uuid;
+
+ C_ImageGetInfo(mirror_image_info_t *mirror_image_info,
+ mirror_image_mode_t *mirror_image_mode, Context *on_finish)
+ : mirror_image_info(mirror_image_info),
+ mirror_image_mode(mirror_image_mode), on_finish(on_finish) {
+ }
+
+ void finish(int r) override {
+ if (r < 0 && r != -ENOENT) {
+ on_finish->complete(r);
+ return;
+ }
+
+ if (mirror_image_info != nullptr) {
+ mirror_image_info->global_id = mirror_image.global_image_id;
+ mirror_image_info->state = static_cast<rbd_mirror_image_state_t>(
+ mirror_image.state);
+ mirror_image_info->primary = (
+ promotion_state == mirror::PROMOTION_STATE_PRIMARY);
+ }
+
+ if (mirror_image_mode != nullptr) {
+ *mirror_image_mode =
+ static_cast<rbd_mirror_image_mode_t>(mirror_image.mode);
+ }
+
+ on_finish->complete(0);
+ }
+};
+
+struct C_ImageGetGlobalStatus : public C_ImageGetInfo {
+ std::string image_name;
+ mirror_image_global_status_t *mirror_image_global_status;
+
+ cls::rbd::MirrorImageStatus mirror_image_status_internal;
+
+ C_ImageGetGlobalStatus(
+ const std::string &image_name,
+ mirror_image_global_status_t *mirror_image_global_status,
+ Context *on_finish)
+ : C_ImageGetInfo(&mirror_image_global_status->info, nullptr, on_finish),
+ image_name(image_name),
+ mirror_image_global_status(mirror_image_global_status) {
+ }
+
+ void finish(int r) override {
+ if (r < 0 && r != -ENOENT) {
+ on_finish->complete(r);
+ return;
+ }
+
+ mirror_image_global_status->name = image_name;
+ mirror_image_global_status->site_statuses.clear();
+ mirror_image_global_status->site_statuses.reserve(
+ mirror_image_status_internal.mirror_image_site_statuses.size());
+ for (auto& site_status :
+ mirror_image_status_internal.mirror_image_site_statuses) {
+ mirror_image_global_status->site_statuses.push_back({
+ site_status.mirror_uuid,
+ static_cast<mirror_image_status_state_t>(site_status.state),
+ site_status.description, site_status.last_update.sec(),
+ site_status.up});
+ }
+ C_ImageGetInfo::finish(0);
+ }
+};
+
+template <typename I>
+struct C_ImageSnapshotCreate : public Context {
+ I *ictx;
+ uint64_t snap_create_flags;
+ uint64_t *snap_id;
+ Context *on_finish;
+
+ cls::rbd::MirrorImage mirror_image;
+ mirror::PromotionState promotion_state;
+ std::string primary_mirror_uuid;
+
+ C_ImageSnapshotCreate(I *ictx, uint64_t snap_create_flags, uint64_t *snap_id,
+ Context *on_finish)
+ : ictx(ictx), snap_create_flags(snap_create_flags), snap_id(snap_id),
+ on_finish(on_finish) {
+ }
+
+ void finish(int r) override {
+ if (r < 0 && r != -ENOENT) {
+ on_finish->complete(r);
+ return;
+ }
+
+ if (mirror_image.mode != cls::rbd::MIRROR_IMAGE_MODE_SNAPSHOT ||
+ mirror_image.state != cls::rbd::MIRROR_IMAGE_STATE_ENABLED) {
+ lderr(ictx->cct) << "snapshot based mirroring is not enabled" << dendl;
+ on_finish->complete(-EINVAL);
+ return;
+ }
+
+ auto req = mirror::snapshot::CreatePrimaryRequest<I>::create(
+ ictx, mirror_image.global_image_id, CEPH_NOSNAP, snap_create_flags, 0U,
+ snap_id, on_finish);
+ req->send();
+ }
+};
+
+} // anonymous namespace
+
+template <typename I>
+int Mirror<I>::image_enable(I *ictx, mirror_image_mode_t mode,
+ bool relax_same_pool_parent_check) {
+ CephContext *cct = ictx->cct;
+ ldout(cct, 20) << "ictx=" << ictx << " mode=" << mode
+ << " relax_same_pool_parent_check="
+ << relax_same_pool_parent_check << dendl;
+
+ int r = ictx->state->refresh_if_required();
+ if (r < 0) {
+ return r;
+ }
+
+ cls::rbd::MirrorMode mirror_mode;
+ r = cls_client::mirror_mode_get(&ictx->md_ctx, &mirror_mode);
+ if (r < 0) {
+ lderr(cct) << "cannot enable mirroring: failed to retrieve mirror mode: "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ if (mirror_mode == cls::rbd::MIRROR_MODE_DISABLED) {
+ lderr(cct) << "cannot enable mirroring: mirroring is not enabled on a "
+ << pool_or_namespace(ictx) << dendl;
+ return -EINVAL;
+ }
+
+ if (mirror_mode != cls::rbd::MIRROR_MODE_IMAGE) {
+ lderr(cct) << "cannot enable mirroring: " << pool_or_namespace(ictx)
+ << " is not in image mirror mode" << dendl;
+ return -EINVAL;
+ }
+
+ // is mirroring not enabled for the parent?
+ {
+ std::shared_lock image_locker{ictx->image_lock};
+ ImageCtx *parent = ictx->parent;
+ if (parent) {
+ if (parent->md_ctx.get_id() != ictx->md_ctx.get_id() ||
+ !relax_same_pool_parent_check) {
+ cls::rbd::MirrorImage mirror_image_internal;
+ r = cls_client::mirror_image_get(&(parent->md_ctx), parent->id,
+ &mirror_image_internal);
+ if (r == -ENOENT) {
+ lderr(cct) << "mirroring is not enabled for the parent" << dendl;
+ return -EINVAL;
+ }
+ }
+ }
+ }
+
+ if (mode == RBD_MIRROR_IMAGE_MODE_JOURNAL &&
+ !ictx->test_features(RBD_FEATURE_JOURNALING)) {
+ uint64_t features = RBD_FEATURE_JOURNALING;
+ if (!ictx->test_features(RBD_FEATURE_EXCLUSIVE_LOCK)) {
+ features |= RBD_FEATURE_EXCLUSIVE_LOCK;
+ }
+ r = ictx->operations->update_features(features, true);
+ if (r < 0) {
+ lderr(cct) << "cannot enable journaling: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+ }
+
+ C_SaferCond ctx;
+ auto req = mirror::EnableRequest<ImageCtx>::create(
+ ictx, static_cast<cls::rbd::MirrorImageMode>(mode), "", false, &ctx);
+ req->send();
+
+ r = ctx.wait();
+ if (r < 0) {
+ lderr(cct) << "cannot enable mirroring: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Mirror<I>::image_disable(I *ictx, bool force) {
+ CephContext *cct = ictx->cct;
+ ldout(cct, 20) << "ictx=" << ictx << dendl;
+
+ int r = ictx->state->refresh_if_required();
+ if (r < 0) {
+ return r;
+ }
+
+ cls::rbd::MirrorMode mirror_mode;
+ r = cls_client::mirror_mode_get(&ictx->md_ctx, &mirror_mode);
+ if (r < 0) {
+ lderr(cct) << "cannot disable mirroring: failed to retrieve pool "
+ "mirroring mode: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ if (mirror_mode != cls::rbd::MIRROR_MODE_IMAGE) {
+ lderr(cct) << "cannot disable mirroring in the current pool mirroring "
+ "mode" << dendl;
+ return -EINVAL;
+ }
+
+ // is mirroring enabled for the image?
+ cls::rbd::MirrorImage mirror_image_internal;
+ r = cls_client::mirror_image_get(&ictx->md_ctx, ictx->id,
+ &mirror_image_internal);
+ if (r == -ENOENT) {
+ // mirroring is not enabled for this image
+ ldout(cct, 20) << "ignoring disable command: mirroring is not enabled for "
+ << "this image" << dendl;
+ return 0;
+ } else if (r == -EOPNOTSUPP) {
+ ldout(cct, 5) << "mirroring not supported by OSD" << dendl;
+ return r;
+ } else if (r < 0) {
+ lderr(cct) << "failed to retrieve mirror image metadata: "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ mirror_image_internal.state = cls::rbd::MIRROR_IMAGE_STATE_DISABLING;
+ r = cls_client::mirror_image_set(&ictx->md_ctx, ictx->id,
+ mirror_image_internal);
+ if (r < 0) {
+ lderr(cct) << "cannot disable mirroring: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ bool rollback = false;
+ BOOST_SCOPE_EXIT_ALL(ictx, &mirror_image_internal, &rollback) {
+ if (rollback) {
+ // restore the mask bit for treating the non-primary feature as read-only
+ ictx->image_lock.lock();
+ ictx->read_only_mask |= IMAGE_READ_ONLY_FLAG_NON_PRIMARY;
+ ictx->image_lock.unlock();
+
+ ictx->state->handle_update_notification();
+
+ // attempt to restore the image state
+ CephContext *cct = ictx->cct;
+ mirror_image_internal.state = cls::rbd::MIRROR_IMAGE_STATE_ENABLED;
+ int r = cls_client::mirror_image_set(&ictx->md_ctx, ictx->id,
+ mirror_image_internal);
+ if (r < 0) {
+ lderr(cct) << "failed to re-enable image mirroring: "
+ << cpp_strerror(r) << dendl;
+ }
+ }
+ };
+
+ std::unique_lock image_locker{ictx->image_lock};
+ std::map<librados::snap_t, SnapInfo> snap_info = ictx->snap_info;
+ for (auto &info : snap_info) {
+ cls::rbd::ParentImageSpec parent_spec{ictx->md_ctx.get_id(),
+ ictx->md_ctx.get_namespace(),
+ ictx->id, info.first};
+ std::vector<librbd::linked_image_spec_t> child_images;
+ r = Image<I>::list_children(ictx, parent_spec, &child_images);
+ if (r < 0) {
+ rollback = true;
+ return r;
+ }
+
+ if (child_images.empty()) {
+ continue;
+ }
+
+ librados::IoCtx child_io_ctx;
+ int64_t child_pool_id = -1;
+ for (auto &child_image : child_images){
+ std::string pool = child_image.pool_name;
+ if (child_pool_id == -1 ||
+ child_pool_id != child_image.pool_id ||
+ child_io_ctx.get_namespace() != child_image.pool_namespace) {
+ r = util::create_ioctx(ictx->md_ctx, "child image",
+ child_image.pool_id,
+ child_image.pool_namespace,
+ &child_io_ctx);
+ if (r < 0) {
+ rollback = true;
+ return r;
+ }
+
+ child_pool_id = child_image.pool_id;
+ }
+
+ cls::rbd::MirrorImage child_mirror_image_internal;
+ r = cls_client::mirror_image_get(&child_io_ctx, child_image.image_id,
+ &child_mirror_image_internal);
+ if (r != -ENOENT) {
+ rollback = true;
+ lderr(cct) << "mirroring is enabled on one or more children "
+ << dendl;
+ return -EBUSY;
+ }
+ }
+ }
+ image_locker.unlock();
+
+ if (mirror_image_internal.mode == cls::rbd::MIRROR_IMAGE_MODE_SNAPSHOT) {
+ // don't let the non-primary feature bit prevent image updates
+ ictx->image_lock.lock();
+ ictx->read_only_mask &= ~IMAGE_READ_ONLY_FLAG_NON_PRIMARY;
+ ictx->image_lock.unlock();
+
+ r = ictx->state->refresh();
+ if (r < 0) {
+ rollback = true;
+ return r;
+ }
+
+ // remove any snapshot-based mirroring image-meta from image
+ std::string mirror_uuid;
+ r = uuid_get(ictx->md_ctx, &mirror_uuid);
+ if (r < 0) {
+ rollback = true;
+ return r;
+ }
+
+ r = ictx->operations->metadata_remove(
+ mirror::snapshot::util::get_image_meta_key(mirror_uuid));
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "cannot remove snapshot image-meta key: " << cpp_strerror(r)
+ << dendl;
+ rollback = true;
+ return r;
+ }
+ }
+
+ C_SaferCond ctx;
+ auto req = mirror::DisableRequest<ImageCtx>::create(ictx, force, true,
+ &ctx);
+ req->send();
+
+ r = ctx.wait();
+ if (r < 0) {
+ lderr(cct) << "cannot disable mirroring: " << cpp_strerror(r) << dendl;
+ rollback = true;
+ return r;
+ }
+
+ if (mirror_image_internal.mode == cls::rbd::MIRROR_IMAGE_MODE_JOURNAL) {
+ r = ictx->operations->update_features(RBD_FEATURE_JOURNALING, false);
+ if (r < 0) {
+ lderr(cct) << "cannot disable journaling: " << cpp_strerror(r) << dendl;
+ // not fatal
+ }
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Mirror<I>::image_promote(I *ictx, bool force) {
+ CephContext *cct = ictx->cct;
+
+ C_SaferCond ctx;
+ Mirror<I>::image_promote(ictx, force, &ctx);
+ int r = ctx.wait();
+ if (r < 0) {
+ lderr(cct) << "failed to promote image" << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+void Mirror<I>::image_promote(I *ictx, bool force, Context *on_finish) {
+ CephContext *cct = ictx->cct;
+ ldout(cct, 20) << "ictx=" << ictx << ", "
+ << "force=" << force << dendl;
+
+ // don't let the non-primary feature bit prevent image updates
+ ictx->image_lock.lock();
+ ictx->read_only_mask &= ~IMAGE_READ_ONLY_FLAG_NON_PRIMARY;
+ ictx->image_lock.unlock();
+
+ auto on_promote = new LambdaContext([ictx, on_finish](int r) {
+ ictx->image_lock.lock();
+ ictx->read_only_mask |= IMAGE_READ_ONLY_FLAG_NON_PRIMARY;
+ ictx->image_lock.unlock();
+
+ ictx->state->handle_update_notification();
+ on_finish->complete(r);
+ });
+
+ auto on_refresh = new LambdaContext([ictx, force, on_promote](int r) {
+ if (r < 0) {
+ lderr(ictx->cct) << "refresh failed: " << cpp_strerror(r) << dendl;
+ on_promote->complete(r);
+ return;
+ }
+
+ auto req = mirror::PromoteRequest<>::create(*ictx, force, on_promote);
+ req->send();
+ });
+ ictx->state->refresh(on_refresh);
+}
+
+template <typename I>
+int Mirror<I>::image_demote(I *ictx) {
+ CephContext *cct = ictx->cct;
+
+ C_SaferCond ctx;
+ Mirror<I>::image_demote(ictx, &ctx);
+ int r = ctx.wait();
+ if (r < 0) {
+ lderr(cct) << "failed to demote image" << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+void Mirror<I>::image_demote(I *ictx, Context *on_finish) {
+ CephContext *cct = ictx->cct;
+ ldout(cct, 20) << "ictx=" << ictx << dendl;
+
+ auto on_cleanup = new LambdaContext([ictx, on_finish](int r) {
+ ictx->image_lock.lock();
+ ictx->read_only_mask |= IMAGE_READ_ONLY_FLAG_NON_PRIMARY;
+ ictx->image_lock.unlock();
+
+ ictx->state->handle_update_notification();
+
+ on_finish->complete(r);
+ });
+ auto on_refresh = new LambdaContext([ictx, on_cleanup](int r) {
+ if (r < 0) {
+ lderr(ictx->cct) << "refresh failed: " << cpp_strerror(r) << dendl;
+ on_cleanup->complete(r);
+ return;
+ }
+
+ auto req = mirror::DemoteRequest<>::create(*ictx, on_cleanup);
+ req->send();
+ });
+
+ // ensure we can create a snapshot after setting the non-primary
+ // feature bit
+ ictx->image_lock.lock();
+ ictx->read_only_mask &= ~IMAGE_READ_ONLY_FLAG_NON_PRIMARY;
+ ictx->image_lock.unlock();
+
+ ictx->state->refresh(on_refresh);
+}
+
+template <typename I>
+int Mirror<I>::image_resync(I *ictx) {
+ CephContext *cct = ictx->cct;
+ ldout(cct, 20) << "ictx=" << ictx << dendl;
+
+ int r = ictx->state->refresh_if_required();
+ if (r < 0) {
+ return r;
+ }
+
+ cls::rbd::MirrorImage mirror_image;
+ mirror::PromotionState promotion_state;
+ std::string primary_mirror_uuid;
+ C_SaferCond get_info_ctx;
+ auto req = mirror::GetInfoRequest<I>::create(*ictx, &mirror_image,
+ &promotion_state,
+ &primary_mirror_uuid,
+ &get_info_ctx);
+ req->send();
+
+ r = get_info_ctx.wait();
+ if (r < 0) {
+ return r;
+ }
+
+ if (promotion_state == mirror::PROMOTION_STATE_PRIMARY) {
+ lderr(cct) << "image is primary, cannot resync to itself" << dendl;
+ return -EINVAL;
+ }
+
+ if (mirror_image.mode == cls::rbd::MIRROR_IMAGE_MODE_JOURNAL) {
+ // flag the journal indicating that we want to rebuild the local image
+ r = Journal<I>::request_resync(ictx);
+ if (r < 0) {
+ lderr(cct) << "failed to request resync: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+ } else if (mirror_image.mode == cls::rbd::MIRROR_IMAGE_MODE_SNAPSHOT) {
+ std::string mirror_uuid;
+ r = uuid_get(ictx->md_ctx, &mirror_uuid);
+ if (r < 0) {
+ return r;
+ }
+
+ mirror::snapshot::ImageMeta image_meta(ictx, mirror_uuid);
+
+ C_SaferCond load_meta_ctx;
+ image_meta.load(&load_meta_ctx);
+ r = load_meta_ctx.wait();
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "failed to load mirror image-meta: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ image_meta.resync_requested = true;
+
+ C_SaferCond save_meta_ctx;
+ image_meta.save(&save_meta_ctx);
+ r = save_meta_ctx.wait();
+ if (r < 0) {
+ lderr(cct) << "failed to request resync: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+ } else {
+ lderr(cct) << "unknown mirror mode" << dendl;
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+template <typename I>
+void Mirror<I>::image_get_info(I *ictx, mirror_image_info_t *mirror_image_info,
+ Context *on_finish) {
+ CephContext *cct = ictx->cct;
+ ldout(cct, 20) << "ictx=" << ictx << dendl;
+
+ auto on_refresh = new LambdaContext(
+ [ictx, mirror_image_info, on_finish](int r) {
+ if (r < 0) {
+ lderr(ictx->cct) << "refresh failed: " << cpp_strerror(r) << dendl;
+ on_finish->complete(r);
+ return;
+ }
+
+ auto ctx = new C_ImageGetInfo(mirror_image_info, nullptr, on_finish);
+ auto req = mirror::GetInfoRequest<I>::create(*ictx, &ctx->mirror_image,
+ &ctx->promotion_state,
+ &ctx->primary_mirror_uuid,
+ ctx);
+ req->send();
+ });
+
+ if (ictx->state->is_refresh_required()) {
+ ictx->state->refresh(on_refresh);
+ } else {
+ on_refresh->complete(0);
+ }
+}
+
+template <typename I>
+int Mirror<I>::image_get_info(I *ictx, mirror_image_info_t *mirror_image_info) {
+ C_SaferCond ctx;
+ image_get_info(ictx, mirror_image_info, &ctx);
+
+ int r = ctx.wait();
+ if (r < 0) {
+ return r;
+ }
+ return 0;
+}
+
+template <typename I>
+void Mirror<I>::image_get_info(librados::IoCtx& io_ctx,
+ asio::ContextWQ *op_work_queue,
+ const std::string &image_id,
+ mirror_image_info_t *mirror_image_info,
+ Context *on_finish) {
+ auto cct = reinterpret_cast<CephContext *>(io_ctx.cct());
+ ldout(cct, 20) << "pool_id=" << io_ctx.get_id() << ", image_id=" << image_id
+ << dendl;
+
+ auto ctx = new C_ImageGetInfo(mirror_image_info, nullptr, on_finish);
+ auto req = mirror::GetInfoRequest<I>::create(io_ctx, op_work_queue, image_id,
+ &ctx->mirror_image,
+ &ctx->promotion_state,
+ &ctx->primary_mirror_uuid, ctx);
+ req->send();
+}
+
+template <typename I>
+int Mirror<I>::image_get_info(librados::IoCtx& io_ctx,
+ asio::ContextWQ *op_work_queue,
+ const std::string &image_id,
+ mirror_image_info_t *mirror_image_info) {
+ C_SaferCond ctx;
+ image_get_info(io_ctx, op_work_queue, image_id, mirror_image_info, &ctx);
+
+ int r = ctx.wait();
+ if (r < 0) {
+ return r;
+ }
+ return 0;
+}
+
+template <typename I>
+void Mirror<I>::image_get_mode(I *ictx, mirror_image_mode_t *mode,
+ Context *on_finish) {
+ CephContext *cct = ictx->cct;
+ ldout(cct, 20) << "ictx=" << ictx << dendl;
+
+ auto ctx = new C_ImageGetInfo(nullptr, mode, on_finish);
+ auto req = mirror::GetInfoRequest<I>::create(*ictx, &ctx->mirror_image,
+ &ctx->promotion_state,
+ &ctx->primary_mirror_uuid, ctx);
+ req->send();
+}
+
+template <typename I>
+int Mirror<I>::image_get_mode(I *ictx, mirror_image_mode_t *mode) {
+ C_SaferCond ctx;
+ image_get_mode(ictx, mode, &ctx);
+
+ int r = ctx.wait();
+ if (r < 0) {
+ return r;
+ }
+ return 0;
+}
+
+template <typename I>
+void Mirror<I>::image_get_global_status(I *ictx,
+ mirror_image_global_status_t *status,
+ Context *on_finish) {
+ CephContext *cct = ictx->cct;
+ ldout(cct, 20) << "ictx=" << ictx << dendl;
+
+ auto ctx = new C_ImageGetGlobalStatus(ictx->name, status, on_finish);
+ auto req = mirror::GetStatusRequest<I>::create(
+ *ictx, &ctx->mirror_image_status_internal, &ctx->mirror_image,
+ &ctx->promotion_state, ctx);
+ req->send();
+}
+
+template <typename I>
+int Mirror<I>::image_get_global_status(I *ictx,
+ mirror_image_global_status_t *status) {
+ C_SaferCond ctx;
+ image_get_global_status(ictx, status, &ctx);
+
+ int r = ctx.wait();
+ if (r < 0) {
+ return r;
+ }
+ return 0;
+}
+
+template <typename I>
+int Mirror<I>::image_get_instance_id(I *ictx, std::string *instance_id) {
+ CephContext *cct = ictx->cct;
+ ldout(cct, 20) << "ictx=" << ictx << dendl;
+
+ cls::rbd::MirrorImage mirror_image;
+ int r = cls_client::mirror_image_get(&ictx->md_ctx, ictx->id, &mirror_image);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "failed to retrieve mirroring state: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ } else if (mirror_image.state != cls::rbd::MIRROR_IMAGE_STATE_ENABLED) {
+ lderr(cct) << "mirroring is not currently enabled" << dendl;
+ return -EINVAL;
+ }
+
+ entity_inst_t instance;
+ r = cls_client::mirror_image_instance_get(&ictx->md_ctx,
+ mirror_image.global_image_id,
+ &instance);
+ if (r < 0) {
+ if (r != -ENOENT && r != -ESTALE) {
+ lderr(cct) << "failed to get mirror image instance: " << cpp_strerror(r)
+ << dendl;
+ }
+ return r;
+ }
+
+ *instance_id = stringify(instance.name.num());
+ return 0;
+}
+
+template <typename I>
+int Mirror<I>::site_name_get(librados::Rados& rados, std::string* name) {
+ CephContext *cct = reinterpret_cast<CephContext *>(rados.cct());
+ ldout(cct, 20) << dendl;
+
+ int r = get_config_key(rados, RBD_MIRROR_SITE_NAME_CONFIG_KEY, name);
+ if (r == -EOPNOTSUPP) {
+ return r;
+ } else if (r == -ENOENT || name->empty()) {
+ // default to the cluster fsid
+ r = rados.cluster_fsid(name);
+ if (r < 0) {
+ lderr(cct) << "failed to retrieve cluster fsid: " << cpp_strerror(r)
+ << dendl;
+ }
+ return r;
+ } else if (r < 0) {
+ lderr(cct) << "failed to retrieve site name: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Mirror<I>::site_name_set(librados::Rados& rados, const std::string& name) {
+ CephContext *cct = reinterpret_cast<CephContext *>(rados.cct());
+
+ std::string site_name{name};
+ boost::algorithm::trim(site_name);
+ ldout(cct, 20) << "site_name=" << site_name << dendl;
+
+ int r = set_config_key(rados, RBD_MIRROR_SITE_NAME_CONFIG_KEY, name);
+ if (r == -EOPNOTSUPP) {
+ return r;
+ } else if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "failed to update site name: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Mirror<I>::mode_get(librados::IoCtx& io_ctx,
+ rbd_mirror_mode_t *mirror_mode) {
+ CephContext *cct = reinterpret_cast<CephContext *>(io_ctx.cct());
+ ldout(cct, 20) << dendl;
+
+ cls::rbd::MirrorMode mirror_mode_internal;
+ int r = cls_client::mirror_mode_get(&io_ctx, &mirror_mode_internal);
+ if (r < 0) {
+ lderr(cct) << "failed to retrieve mirror mode: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ switch (mirror_mode_internal) {
+ case cls::rbd::MIRROR_MODE_DISABLED:
+ case cls::rbd::MIRROR_MODE_IMAGE:
+ case cls::rbd::MIRROR_MODE_POOL:
+ *mirror_mode = static_cast<rbd_mirror_mode_t>(mirror_mode_internal);
+ break;
+ default:
+ lderr(cct) << "unknown mirror mode ("
+ << static_cast<uint32_t>(mirror_mode_internal) << ")"
+ << dendl;
+ return -EINVAL;
+ }
+ return 0;
+}
+
+template <typename I>
+int Mirror<I>::mode_set(librados::IoCtx& io_ctx,
+ rbd_mirror_mode_t mirror_mode) {
+ CephContext *cct = reinterpret_cast<CephContext *>(io_ctx.cct());
+ ldout(cct, 20) << dendl;
+
+ cls::rbd::MirrorMode next_mirror_mode;
+ switch (mirror_mode) {
+ case RBD_MIRROR_MODE_DISABLED:
+ case RBD_MIRROR_MODE_IMAGE:
+ case RBD_MIRROR_MODE_POOL:
+ next_mirror_mode = static_cast<cls::rbd::MirrorMode>(mirror_mode);
+ break;
+ default:
+ lderr(cct) << "unknown mirror mode ("
+ << static_cast<uint32_t>(mirror_mode) << ")" << dendl;
+ return -EINVAL;
+ }
+
+ int r;
+ if (next_mirror_mode == cls::rbd::MIRROR_MODE_DISABLED) {
+ // fail early if pool still has peers registered and attempting to disable
+ std::vector<cls::rbd::MirrorPeer> mirror_peers;
+ r = cls_client::mirror_peer_list(&io_ctx, &mirror_peers);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "failed to list peers: " << cpp_strerror(r) << dendl;
+ return r;
+ } else if (!mirror_peers.empty()) {
+ lderr(cct) << "mirror peers still registered" << dendl;
+ return -EBUSY;
+ }
+ }
+
+ cls::rbd::MirrorMode current_mirror_mode;
+ r = cls_client::mirror_mode_get(&io_ctx, &current_mirror_mode);
+ if (r < 0) {
+ lderr(cct) << "failed to retrieve mirror mode: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ if (current_mirror_mode == next_mirror_mode) {
+ return 0;
+ } else if (current_mirror_mode == cls::rbd::MIRROR_MODE_DISABLED) {
+ uuid_d uuid_gen;
+ uuid_gen.generate_random();
+ r = cls_client::mirror_uuid_set(&io_ctx, uuid_gen.to_string());
+ if (r < 0) {
+ lderr(cct) << "failed to allocate mirroring uuid: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+ }
+
+ if (current_mirror_mode != cls::rbd::MIRROR_MODE_IMAGE) {
+ r = cls_client::mirror_mode_set(&io_ctx, cls::rbd::MIRROR_MODE_IMAGE);
+ if (r < 0) {
+ lderr(cct) << "failed to set mirror mode to image: "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ r = MirroringWatcher<>::notify_mode_updated(io_ctx,
+ cls::rbd::MIRROR_MODE_IMAGE);
+ if (r < 0) {
+ lderr(cct) << "failed to send update notification: " << cpp_strerror(r)
+ << dendl;
+ }
+ }
+
+ if (next_mirror_mode == cls::rbd::MIRROR_MODE_IMAGE) {
+ return 0;
+ }
+
+ if (next_mirror_mode == cls::rbd::MIRROR_MODE_POOL) {
+ std::map<std::string, std::string> images;
+ r = Image<I>::list_images_v2(io_ctx, &images);
+ if (r < 0) {
+ lderr(cct) << "failed listing images: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ for (const auto& img_pair : images) {
+ uint64_t features;
+ uint64_t incompatible_features;
+ r = cls_client::get_features(&io_ctx, util::header_name(img_pair.second),
+ true, &features, &incompatible_features);
+ if (r < 0) {
+ lderr(cct) << "error getting features for image " << img_pair.first
+ << ": " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ // Enable only journal based mirroring
+
+ if ((features & RBD_FEATURE_JOURNALING) != 0) {
+ I *img_ctx = I::create("", img_pair.second, nullptr, io_ctx, false);
+ r = img_ctx->state->open(0);
+ if (r < 0) {
+ lderr(cct) << "error opening image "<< img_pair.first << ": "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ r = image_enable(img_ctx, RBD_MIRROR_IMAGE_MODE_JOURNAL, true);
+ int close_r = img_ctx->state->close();
+ if (r < 0) {
+ lderr(cct) << "error enabling mirroring for image "
+ << img_pair.first << ": " << cpp_strerror(r) << dendl;
+ return r;
+ } else if (close_r < 0) {
+ lderr(cct) << "failed to close image " << img_pair.first << ": "
+ << cpp_strerror(close_r) << dendl;
+ return close_r;
+ }
+ }
+ }
+ } else if (next_mirror_mode == cls::rbd::MIRROR_MODE_DISABLED) {
+ while (true) {
+ bool retry_busy = false;
+ bool pending_busy = false;
+
+ std::set<std::string> image_ids;
+ r = list_mirror_images(io_ctx, image_ids);
+ if (r < 0) {
+ lderr(cct) << "failed listing images: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ for (const auto& img_id : image_ids) {
+ if (current_mirror_mode == cls::rbd::MIRROR_MODE_IMAGE) {
+ cls::rbd::MirrorImage mirror_image;
+ r = cls_client::mirror_image_get(&io_ctx, img_id, &mirror_image);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "failed to retrieve mirroring state for image id "
+ << img_id << ": " << cpp_strerror(r) << dendl;
+ return r;
+ }
+ if (mirror_image.state == cls::rbd::MIRROR_IMAGE_STATE_ENABLED) {
+ lderr(cct) << "failed to disable mirror mode: there are still "
+ << "images with mirroring enabled" << dendl;
+ return -EINVAL;
+ }
+ } else {
+ I *img_ctx = I::create("", img_id, nullptr, io_ctx, false);
+ r = img_ctx->state->open(0);
+ if (r < 0) {
+ lderr(cct) << "error opening image id "<< img_id << ": "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ r = image_disable(img_ctx, false);
+ int close_r = img_ctx->state->close();
+ if (r == -EBUSY) {
+ pending_busy = true;
+ } else if (r < 0) {
+ lderr(cct) << "error disabling mirroring for image id " << img_id
+ << cpp_strerror(r) << dendl;
+ return r;
+ } else if (close_r < 0) {
+ lderr(cct) << "failed to close image id " << img_id << ": "
+ << cpp_strerror(close_r) << dendl;
+ return close_r;
+ } else if (pending_busy) {
+ // at least one mirrored image was successfully disabled, so we can
+ // retry any failures caused by busy parent/child relationships
+ retry_busy = true;
+ }
+ }
+ }
+
+ if (!retry_busy && pending_busy) {
+ lderr(cct) << "error disabling mirroring for one or more images"
+ << dendl;
+ return -EBUSY;
+ } else if (!retry_busy) {
+ break;
+ }
+ }
+ }
+
+ r = cls_client::mirror_mode_set(&io_ctx, next_mirror_mode);
+ if (r < 0) {
+ lderr(cct) << "failed to set mirror mode: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ r = MirroringWatcher<>::notify_mode_updated(io_ctx, next_mirror_mode);
+ if (r < 0) {
+ lderr(cct) << "failed to send update notification: " << cpp_strerror(r)
+ << dendl;
+ }
+ return 0;
+}
+
+template <typename I>
+int Mirror<I>::uuid_get(librados::IoCtx& io_ctx, std::string* mirror_uuid) {
+ CephContext *cct = reinterpret_cast<CephContext *>(io_ctx.cct());
+ ldout(cct, 20) << dendl;
+
+ C_SaferCond ctx;
+ uuid_get(io_ctx, mirror_uuid, &ctx);
+ int r = ctx.wait();
+ if (r < 0) {
+ if (r != -ENOENT) {
+ lderr(cct) << "failed to retrieve mirroring uuid: " << cpp_strerror(r)
+ << dendl;
+ }
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+void Mirror<I>::uuid_get(librados::IoCtx& io_ctx, std::string* mirror_uuid,
+ Context* on_finish) {
+ CephContext *cct = reinterpret_cast<CephContext *>(io_ctx.cct());
+ ldout(cct, 20) << dendl;
+
+ auto req = mirror::GetUuidRequest<I>::create(io_ctx, mirror_uuid, on_finish);
+ req->send();
+}
+
+template <typename I>
+int Mirror<I>::peer_bootstrap_create(librados::IoCtx& io_ctx,
+ std::string* token) {
+ CephContext *cct = reinterpret_cast<CephContext *>(io_ctx.cct());
+ ldout(cct, 20) << dendl;
+
+ auto mirror_mode = cls::rbd::MIRROR_MODE_DISABLED;
+ int r = cls_client::mirror_mode_get(&io_ctx, &mirror_mode);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "failed to retrieve mirroring mode: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ } else if (mirror_mode == cls::rbd::MIRROR_MODE_DISABLED) {
+ return -EINVAL;
+ }
+
+ // retrieve the cluster fsid
+ std::string fsid;
+ librados::Rados rados(io_ctx);
+ r = rados.cluster_fsid(&fsid);
+ if (r < 0) {
+ lderr(cct) << "failed to retrieve cluster fsid: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ std::string peer_client_id;
+ std::string cephx_key;
+ r = create_bootstrap_user(cct, rados, &peer_client_id, &cephx_key);
+ if (r < 0) {
+ return r;
+ }
+
+ std::string mon_host = get_mon_host(cct);
+
+ // format the token response
+ bufferlist token_bl;
+ token_bl.append(
+ R"({)" \
+ R"("fsid":")" + fsid + R"(",)" + \
+ R"("client_id":")" + peer_client_id + R"(",)" + \
+ R"("key":")" + cephx_key + R"(",)" + \
+ R"("mon_host":")" + \
+ boost::replace_all_copy(mon_host, "\"", "\\\"") + R"(")" + \
+ R"(})");
+ ldout(cct, 20) << "token=" << token_bl.to_str() << dendl;
+
+ bufferlist base64_bl;
+ token_bl.encode_base64(base64_bl);
+ *token = base64_bl.to_str();
+
+ return 0;
+}
+
+template <typename I>
+int Mirror<I>::peer_bootstrap_import(librados::IoCtx& io_ctx,
+ rbd_mirror_peer_direction_t direction,
+ const std::string& token) {
+ CephContext *cct = reinterpret_cast<CephContext *>(io_ctx.cct());
+ ldout(cct, 20) << dendl;
+
+ if (direction != RBD_MIRROR_PEER_DIRECTION_RX &&
+ direction != RBD_MIRROR_PEER_DIRECTION_RX_TX) {
+ lderr(cct) << "invalid mirror peer direction" << dendl;
+ return -EINVAL;
+ }
+
+ bufferlist token_bl;
+ try {
+ bufferlist base64_bl;
+ base64_bl.append(token);
+ token_bl.decode_base64(base64_bl);
+ } catch (buffer::error& err) {
+ lderr(cct) << "failed to decode base64" << dendl;
+ return -EINVAL;
+ }
+
+ ldout(cct, 20) << "token=" << token_bl.to_str() << dendl;
+
+ bool json_valid = false;
+ std::string expected_remote_fsid;
+ std::string remote_client_id;
+ std::string remote_key;
+ std::string remote_mon_host;
+
+ json_spirit::mValue json_root;
+ if(json_spirit::read(token_bl.to_str(), json_root)) {
+ try {
+ auto& json_obj = json_root.get_obj();
+ expected_remote_fsid = json_obj["fsid"].get_str();
+ remote_client_id = json_obj["client_id"].get_str();
+ remote_key = json_obj["key"].get_str();
+ remote_mon_host = json_obj["mon_host"].get_str();
+ json_valid = true;
+ } catch (std::runtime_error&) {
+ }
+ }
+
+ if (!json_valid) {
+ lderr(cct) << "invalid bootstrap token JSON received" << dendl;
+ return -EINVAL;
+ }
+
+ // sanity check import process
+ std::string local_fsid;
+ librados::Rados rados(io_ctx);
+ int r = rados.cluster_fsid(&local_fsid);
+ if (r < 0) {
+ lderr(cct) << "failed to retrieve cluster fsid: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ std::string local_site_name;
+ r = site_name_get(rados, &local_site_name);
+ if (r < 0) {
+ lderr(cct) << "failed to retrieve cluster site name: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ // attempt to connect to remote cluster
+ librados::Rados remote_rados;
+ remote_rados.init(remote_client_id.c_str());
+
+ auto remote_cct = reinterpret_cast<CephContext*>(remote_rados.cct());
+ remote_cct->_conf.set_val("mon_host", remote_mon_host);
+ remote_cct->_conf.set_val("key", remote_key);
+
+ r = remote_rados.connect();
+ if (r < 0) {
+ lderr(cct) << "failed to connect to peer cluster: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ std::string remote_fsid;
+ r = remote_rados.cluster_fsid(&remote_fsid);
+ if (r < 0) {
+ lderr(cct) << "failed to retrieve remote cluster fsid: "
+ << cpp_strerror(r) << dendl;
+ return r;
+ } else if (local_fsid == remote_fsid) {
+ lderr(cct) << "cannot import token for local cluster" << dendl;
+ return -EINVAL;
+ } else if (expected_remote_fsid != remote_fsid) {
+ lderr(cct) << "unexpected remote cluster fsid" << dendl;
+ return -EINVAL;
+ }
+
+ std::string remote_site_name;
+ r = site_name_get(remote_rados, &remote_site_name);
+ if (r < 0) {
+ lderr(cct) << "failed to retrieve remote cluster site name: "
+ << cpp_strerror(r) << dendl;
+ return r;
+ } else if (local_site_name == remote_site_name) {
+ lderr(cct) << "cannot import token for duplicate site name" << dendl;
+ return -EINVAL;
+ }
+
+ librados::IoCtx remote_io_ctx;
+ r = remote_rados.ioctx_create(io_ctx.get_pool_name().c_str(), remote_io_ctx);
+ if (r == -ENOENT) {
+ ldout(cct, 10) << "remote pool does not exist" << dendl;
+ return r;
+ } else if (r < 0) {
+ lderr(cct) << "failed to open remote pool '" << io_ctx.get_pool_name()
+ << "': " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ auto remote_mirror_mode = cls::rbd::MIRROR_MODE_DISABLED;
+ r = cls_client::mirror_mode_get(&remote_io_ctx, &remote_mirror_mode);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "failed to retrieve remote mirroring mode: "
+ << cpp_strerror(r) << dendl;
+ return r;
+ } else if (remote_mirror_mode == cls::rbd::MIRROR_MODE_DISABLED) {
+ return -ENOSYS;
+ }
+
+ auto local_mirror_mode = cls::rbd::MIRROR_MODE_DISABLED;
+ r = cls_client::mirror_mode_get(&io_ctx, &local_mirror_mode);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "failed to retrieve local mirroring mode: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ } else if (local_mirror_mode == cls::rbd::MIRROR_MODE_DISABLED) {
+ // copy mirror mode from remote peer
+ r = mode_set(io_ctx, static_cast<rbd_mirror_mode_t>(remote_mirror_mode));
+ if (r < 0) {
+ return r;
+ }
+ }
+
+ if (direction == RBD_MIRROR_PEER_DIRECTION_RX_TX) {
+ // create a local mirror peer user and export it to the remote cluster
+ std::string local_client_id;
+ std::string local_key;
+ r = create_bootstrap_user(cct, rados, &local_client_id, &local_key);
+ if (r < 0) {
+ return r;
+ }
+
+ std::string local_mon_host = get_mon_host(cct);
+
+ // create local cluster peer in remote cluster
+ r = create_bootstrap_peer(cct, remote_io_ctx,
+ RBD_MIRROR_PEER_DIRECTION_RX_TX, local_site_name,
+ local_fsid, local_client_id, local_key,
+ local_mon_host, "local", "remote");
+ if (r < 0) {
+ return r;
+ }
+ }
+
+ // create remote cluster peer in local cluster
+ r = create_bootstrap_peer(cct, io_ctx, direction, remote_site_name,
+ remote_fsid, remote_client_id, remote_key,
+ remote_mon_host, "remote", "local");
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Mirror<I>::peer_site_add(librados::IoCtx& io_ctx, std::string *uuid,
+ mirror_peer_direction_t direction,
+ const std::string &site_name,
+ const std::string &client_name) {
+ CephContext *cct = reinterpret_cast<CephContext *>(io_ctx.cct());
+ ldout(cct, 20) << "name=" << site_name << ", "
+ << "client=" << client_name << dendl;
+
+ if (cct->_conf->cluster == site_name) {
+ lderr(cct) << "cannot add self as remote peer" << dendl;
+ return -EINVAL;
+ }
+
+ if (direction == RBD_MIRROR_PEER_DIRECTION_TX) {
+ return -EINVAL;
+ }
+
+ int r;
+ do {
+ uuid_d uuid_gen;
+ uuid_gen.generate_random();
+
+ *uuid = uuid_gen.to_string();
+ r = cls_client::mirror_peer_add(
+ &io_ctx, {*uuid, static_cast<cls::rbd::MirrorPeerDirection>(direction),
+ site_name, client_name, ""});
+ if (r == -ESTALE) {
+ ldout(cct, 5) << "duplicate UUID detected, retrying" << dendl;
+ } else if (r < 0) {
+ lderr(cct) << "failed to add mirror peer '" << site_name << "': "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+ } while (r == -ESTALE);
+ return 0;
+}
+
+template <typename I>
+int Mirror<I>::peer_site_remove(librados::IoCtx& io_ctx,
+ const std::string &uuid) {
+ CephContext *cct = reinterpret_cast<CephContext *>(io_ctx.cct());
+ ldout(cct, 20) << "uuid=" << uuid << dendl;
+
+ int r = remove_peer_config_key(io_ctx, uuid);
+ if (r < 0) {
+ lderr(cct) << "failed to remove peer attributes '" << uuid << "': "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ r = cls_client::mirror_peer_remove(&io_ctx, uuid);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "failed to remove peer '" << uuid << "': "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ std::vector<std::string> names;
+ r = Namespace<I>::list(io_ctx, &names);
+ if (r < 0) {
+ return r;
+ }
+
+ names.push_back("");
+
+ librados::IoCtx ns_io_ctx;
+ ns_io_ctx.dup(io_ctx);
+
+ for (auto &name : names) {
+ ns_io_ctx.set_namespace(name);
+
+ std::set<std::string> image_ids;
+ r = list_mirror_images(ns_io_ctx, image_ids);
+ if (r < 0) {
+ lderr(cct) << "failed listing images in "
+ << (name.empty() ? "default" : name) << " namespace : "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ for (const auto& image_id : image_ids) {
+ cls::rbd::MirrorImage mirror_image;
+ r = cls_client::mirror_image_get(&ns_io_ctx, image_id, &mirror_image);
+ if (r == -ENOENT) {
+ continue;
+ }
+ if (r < 0) {
+ lderr(cct) << "error getting mirror info for image " << image_id
+ << ": " << cpp_strerror(r) << dendl;
+ return r;
+ }
+ if (mirror_image.mode != cls::rbd::MIRROR_IMAGE_MODE_SNAPSHOT) {
+ continue;
+ }
+
+ // Snapshot based mirroring. Unlink the peer from mirroring snapshots.
+ // TODO: optimize.
+
+ I *img_ctx = I::create("", image_id, nullptr, ns_io_ctx, false);
+ img_ctx->read_only_mask &= ~IMAGE_READ_ONLY_FLAG_NON_PRIMARY;
+
+ r = img_ctx->state->open(0);
+ if (r == -ENOENT) {
+ continue;
+ }
+ if (r < 0) {
+ lderr(cct) << "error opening image " << image_id << ": "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ std::list<uint64_t> snap_ids;
+ {
+ std::shared_lock image_locker{img_ctx->image_lock};
+ for (auto &it : img_ctx->snap_info) {
+ auto info = std::get_if<cls::rbd::MirrorSnapshotNamespace>(
+ &it.second.snap_namespace);
+ if (info && info->mirror_peer_uuids.count(uuid)) {
+ snap_ids.push_back(it.first);
+ }
+ }
+ }
+ for (auto snap_id : snap_ids) {
+ C_SaferCond cond;
+ auto req = mirror::snapshot::UnlinkPeerRequest<I>::create(
+ img_ctx, snap_id, uuid, true, &cond);
+ req->send();
+ r = cond.wait();
+ if (r == -ENOENT) {
+ r = 0;
+ }
+ if (r < 0) {
+ break;
+ }
+ }
+
+ int close_r = img_ctx->state->close();
+ if (r < 0) {
+ lderr(cct) << "error unlinking peer for image " << image_id << ": "
+ << cpp_strerror(r) << dendl;
+ return r;
+ } else if (close_r < 0) {
+ lderr(cct) << "failed to close image " << image_id << ": "
+ << cpp_strerror(close_r) << dendl;
+ return close_r;
+ }
+ }
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Mirror<I>::peer_site_list(librados::IoCtx& io_ctx,
+ std::vector<mirror_peer_site_t> *peers) {
+ CephContext *cct = reinterpret_cast<CephContext *>(io_ctx.cct());
+ ldout(cct, 20) << dendl;
+
+ std::vector<cls::rbd::MirrorPeer> mirror_peers;
+ int r = cls_client::mirror_peer_list(&io_ctx, &mirror_peers);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "failed to list peers: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ peers->clear();
+ peers->reserve(mirror_peers.size());
+ for (auto &mirror_peer : mirror_peers) {
+ mirror_peer_site_t peer;
+ peer.uuid = mirror_peer.uuid;
+ peer.direction = static_cast<mirror_peer_direction_t>(
+ mirror_peer.mirror_peer_direction);
+ peer.site_name = mirror_peer.site_name;
+ peer.mirror_uuid = mirror_peer.mirror_uuid;
+ peer.client_name = mirror_peer.client_name;
+ peer.last_seen = mirror_peer.last_seen.sec();
+ peers->push_back(peer);
+ }
+ return 0;
+}
+
+template <typename I>
+int Mirror<I>::peer_site_set_client(librados::IoCtx& io_ctx,
+ const std::string &uuid,
+ const std::string &client_name) {
+ CephContext *cct = reinterpret_cast<CephContext *>(io_ctx.cct());
+ ldout(cct, 20) << "uuid=" << uuid << ", "
+ << "client=" << client_name << dendl;
+
+ int r = cls_client::mirror_peer_set_client(&io_ctx, uuid, client_name);
+ if (r < 0) {
+ lderr(cct) << "failed to update client '" << uuid << "': "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+ return 0;
+}
+
+template <typename I>
+int Mirror<I>::peer_site_set_name(librados::IoCtx& io_ctx,
+ const std::string &uuid,
+ const std::string &site_name) {
+ CephContext *cct = reinterpret_cast<CephContext *>(io_ctx.cct());
+ ldout(cct, 20) << "uuid=" << uuid << ", "
+ << "name=" << site_name << dendl;
+
+ if (cct->_conf->cluster == site_name) {
+ lderr(cct) << "cannot set self as remote peer" << dendl;
+ return -EINVAL;
+ }
+
+ int r = cls_client::mirror_peer_set_cluster(&io_ctx, uuid, site_name);
+ if (r < 0) {
+ lderr(cct) << "failed to update site '" << uuid << "': "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+ return 0;
+}
+
+template <typename I>
+int Mirror<I>::peer_site_set_direction(librados::IoCtx& io_ctx,
+ const std::string &uuid,
+ mirror_peer_direction_t direction) {
+ cls::rbd::MirrorPeerDirection mirror_peer_direction = static_cast<
+ cls::rbd::MirrorPeerDirection>(direction);
+
+ CephContext *cct = reinterpret_cast<CephContext *>(io_ctx.cct());
+ ldout(cct, 20) << "uuid=" << uuid << ", "
+ << "direction=" << mirror_peer_direction << dendl;
+
+ int r = cls_client::mirror_peer_set_direction(&io_ctx, uuid,
+ mirror_peer_direction);
+ if (r < 0) {
+ lderr(cct) << "failed to update direction '" << uuid << "': "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+ return 0;
+}
+
+template <typename I>
+int Mirror<I>::peer_site_get_attributes(librados::IoCtx& io_ctx,
+ const std::string &uuid,
+ Attributes* attributes) {
+ CephContext *cct = reinterpret_cast<CephContext *>(io_ctx.cct());
+ ldout(cct, 20) << "uuid=" << uuid << dendl;
+
+ attributes->clear();
+
+ librados::Rados rados(io_ctx);
+ std::string value;
+ int r = get_config_key(rados, get_peer_config_key_name(io_ctx.get_id(), uuid),
+ &value);
+ if (r == -ENOENT || value.empty()) {
+ return -ENOENT;
+ } else if (r < 0) {
+ lderr(cct) << "failed to retrieve peer attributes: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ bool json_valid = false;
+ json_spirit::mValue json_root;
+ if(json_spirit::read(value, json_root)) {
+ try {
+ auto& json_obj = json_root.get_obj();
+ for (auto& pairs : json_obj) {
+ (*attributes)[pairs.first] = pairs.second.get_str();
+ }
+ json_valid = true;
+ } catch (std::runtime_error&) {
+ }
+ }
+
+ if (!json_valid) {
+ lderr(cct) << "invalid peer attributes JSON received" << dendl;
+ return -EINVAL;
+ }
+ return 0;
+}
+
+template <typename I>
+int Mirror<I>::peer_site_set_attributes(librados::IoCtx& io_ctx,
+ const std::string &uuid,
+ const Attributes& attributes) {
+ CephContext *cct = reinterpret_cast<CephContext *>(io_ctx.cct());
+ ldout(cct, 20) << "uuid=" << uuid << ", "
+ << "attributes=" << attributes << dendl;
+
+ std::vector<mirror_peer_site_t> mirror_peers;
+ int r = peer_site_list(io_ctx, &mirror_peers);
+ if (r < 0) {
+ return r;
+ }
+
+ if (std::find_if(mirror_peers.begin(), mirror_peers.end(),
+ [&uuid](const librbd::mirror_peer_site_t& peer) {
+ return uuid == peer.uuid;
+ }) == mirror_peers.end()) {
+ ldout(cct, 5) << "mirror peer uuid " << uuid << " does not exist" << dendl;
+ return -ENOENT;
+ }
+
+ std::stringstream ss;
+ ss << "{";
+ for (auto& pair : attributes) {
+ ss << "\\\"" << pair.first << "\\\": "
+ << "\\\"" << pair.second << "\\\"";
+ if (&pair != &(*attributes.rbegin())) {
+ ss << ", ";
+ }
+ }
+ ss << "}";
+
+ librados::Rados rados(io_ctx);
+ r = set_config_key(rados, get_peer_config_key_name(io_ctx.get_id(), uuid),
+ ss.str());
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "failed to update peer attributes: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Mirror<I>::image_global_status_list(
+ librados::IoCtx& io_ctx, const std::string &start_id, size_t max,
+ IdToMirrorImageGlobalStatus *images) {
+ CephContext *cct = reinterpret_cast<CephContext *>(io_ctx.cct());
+ int r;
+
+ std::map<std::string, std::string> id_to_name;
+ {
+ std::map<std::string, std::string> name_to_id;
+ r = Image<I>::list_images_v2(io_ctx, &name_to_id);
+ if (r < 0) {
+ return r;
+ }
+ for (auto it : name_to_id) {
+ id_to_name[it.second] = it.first;
+ }
+ }
+
+ std::map<std::string, cls::rbd::MirrorImage> images_;
+ std::map<std::string, cls::rbd::MirrorImageStatus> statuses_;
+
+ r = librbd::cls_client::mirror_image_status_list(&io_ctx, start_id, max,
+ &images_, &statuses_);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "failed to list mirror image statuses: "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ const std::string STATUS_NOT_FOUND("status not found");
+ for (auto it = images_.begin(); it != images_.end(); ++it) {
+ auto &image_id = it->first;
+ auto &info = it->second;
+ if (info.state == cls::rbd::MIRROR_IMAGE_STATE_DISABLED) {
+ continue;
+ }
+
+ auto &image_name = id_to_name[image_id];
+ if (image_name.empty()) {
+ lderr(cct) << "failed to find image name for image " << image_id << ", "
+ << "using image id as name" << dendl;
+ image_name = image_id;
+ }
+
+ mirror_image_global_status_t& global_status = (*images)[image_id];
+ global_status.name = image_name;
+ global_status.info = mirror_image_info_t{
+ info.global_image_id,
+ static_cast<mirror_image_state_t>(info.state),
+ false}; // XXX: To set "primary" right would require an additional call.
+
+ bool found_local_site_status = false;
+ auto s_it = statuses_.find(image_id);
+ if (s_it != statuses_.end()) {
+ auto& status = s_it->second;
+
+ global_status.site_statuses.reserve(
+ status.mirror_image_site_statuses.size());
+ for (auto& site_status : status.mirror_image_site_statuses) {
+ if (site_status.mirror_uuid ==
+ cls::rbd::MirrorImageSiteStatus::LOCAL_MIRROR_UUID) {
+ found_local_site_status = true;
+ }
+
+ global_status.site_statuses.push_back(mirror_image_site_status_t{
+ site_status.mirror_uuid,
+ static_cast<mirror_image_status_state_t>(site_status.state),
+ site_status.state == cls::rbd::MIRROR_IMAGE_STATUS_STATE_UNKNOWN ?
+ STATUS_NOT_FOUND : site_status.description,
+ site_status.last_update.sec(), site_status.up});
+ }
+ }
+
+ if (!found_local_site_status) {
+ global_status.site_statuses.push_back(mirror_image_site_status_t{
+ cls::rbd::MirrorImageSiteStatus::LOCAL_MIRROR_UUID,
+ MIRROR_IMAGE_STATUS_STATE_UNKNOWN, STATUS_NOT_FOUND, 0, false});
+ }
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Mirror<I>::image_status_summary(librados::IoCtx& io_ctx,
+ MirrorImageStatusStates *states) {
+ CephContext *cct = reinterpret_cast<CephContext *>(io_ctx.cct());
+
+ std::vector<cls::rbd::MirrorPeer> mirror_peers;
+ int r = cls_client::mirror_peer_list(&io_ctx, &mirror_peers);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "failed to list mirror peers: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ std::map<cls::rbd::MirrorImageStatusState, int32_t> states_;
+ r = cls_client::mirror_image_status_get_summary(&io_ctx, mirror_peers,
+ &states_);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "failed to get mirror status summary: "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+ for (auto &s : states_) {
+ (*states)[static_cast<mirror_image_status_state_t>(s.first)] = s.second;
+ }
+ return 0;
+}
+
+template <typename I>
+int Mirror<I>::image_instance_id_list(
+ librados::IoCtx& io_ctx, const std::string &start_image_id, size_t max,
+ std::map<std::string, std::string> *instance_ids) {
+ CephContext *cct = reinterpret_cast<CephContext *>(io_ctx.cct());
+ std::map<std::string, entity_inst_t> instances;
+
+ int r = librbd::cls_client::mirror_image_instance_list(
+ &io_ctx, start_image_id, max, &instances);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "failed to list mirror image instances: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ for (auto it : instances) {
+ (*instance_ids)[it.first] = stringify(it.second.name.num());
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Mirror<I>::image_info_list(
+ librados::IoCtx& io_ctx, mirror_image_mode_t *mode_filter,
+ const std::string &start_id, size_t max,
+ std::map<std::string, std::pair<mirror_image_mode_t,
+ mirror_image_info_t>> *entries) {
+ CephContext *cct = reinterpret_cast<CephContext *>(io_ctx.cct());
+ ldout(cct, 20) << "pool=" << io_ctx.get_pool_name() << ", mode_filter="
+ << (mode_filter ? stringify(*mode_filter) : "null")
+ << ", start_id=" << start_id << ", max=" << max << dendl;
+
+ std::string last_read = start_id;
+ entries->clear();
+
+ while (entries->size() < max) {
+ std::map<std::string, cls::rbd::MirrorImage> images;
+ std::map<std::string, cls::rbd::MirrorImageStatus> statuses;
+
+ int r = librbd::cls_client::mirror_image_status_list(&io_ctx, last_read,
+ max, &images,
+ &statuses);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "failed to list mirror image statuses: "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ if (images.empty()) {
+ break;
+ }
+
+ AsioEngine asio_engine(io_ctx);
+
+ for (auto &it : images) {
+ auto &image_id = it.first;
+ auto &image = it.second;
+ auto mode = static_cast<mirror_image_mode_t>(image.mode);
+
+ if ((mode_filter && mode != *mode_filter) ||
+ image.state != cls::rbd::MIRROR_IMAGE_STATE_ENABLED) {
+ continue;
+ }
+
+ // need to call get_info for every image to retrieve promotion state
+
+ mirror_image_info_t info;
+ r = image_get_info(io_ctx, asio_engine.get_work_queue(), image_id, &info);
+ if (r < 0) {
+ continue;
+ }
+
+ (*entries)[image_id] = std::make_pair(mode, info);
+ if (entries->size() == max) {
+ break;
+ }
+ }
+
+ last_read = images.rbegin()->first;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Mirror<I>::image_snapshot_create(I *ictx, uint32_t flags,
+ uint64_t *snap_id) {
+ C_SaferCond ctx;
+ Mirror<I>::image_snapshot_create(ictx, flags, snap_id, &ctx);
+
+ return ctx.wait();
+}
+
+template <typename I>
+void Mirror<I>::image_snapshot_create(I *ictx, uint32_t flags,
+ uint64_t *snap_id, Context *on_finish) {
+ CephContext *cct = ictx->cct;
+ ldout(cct, 20) << "ictx=" << ictx << dendl;
+
+ uint64_t snap_create_flags = 0;
+ int r = util::snap_create_flags_api_to_internal(cct, flags,
+ &snap_create_flags);
+ if (r < 0) {
+ on_finish->complete(r);
+ return;
+ }
+
+ auto on_refresh = new LambdaContext(
+ [ictx, snap_create_flags, snap_id, on_finish](int r) {
+ if (r < 0) {
+ lderr(ictx->cct) << "refresh failed: " << cpp_strerror(r) << dendl;
+ on_finish->complete(r);
+ return;
+ }
+
+ auto ctx = new C_ImageSnapshotCreate<I>(ictx, snap_create_flags, snap_id,
+ on_finish);
+ auto req = mirror::GetInfoRequest<I>::create(*ictx, &ctx->mirror_image,
+ &ctx->promotion_state,
+ &ctx->primary_mirror_uuid,
+ ctx);
+ req->send();
+ });
+
+ if (ictx->state->is_refresh_required()) {
+ ictx->state->refresh(on_refresh);
+ } else {
+ on_refresh->complete(0);
+ }
+}
+
+} // namespace api
+} // namespace librbd
+
+template class librbd::api::Mirror<librbd::ImageCtx>;
diff --git a/src/librbd/api/Mirror.h b/src/librbd/api/Mirror.h
new file mode 100644
index 000000000..b3a552b13
--- /dev/null
+++ b/src/librbd/api/Mirror.h
@@ -0,0 +1,126 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef LIBRBD_API_MIRROR_H
+#define LIBRBD_API_MIRROR_H
+
+#include "include/rbd/librbd.hpp"
+#include <map>
+#include <string>
+#include <vector>
+
+struct Context;
+
+namespace librbd {
+
+struct ImageCtx;
+namespace asio { struct ContextWQ; }
+
+namespace api {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+struct Mirror {
+ typedef std::map<std::string, std::string> Attributes;
+ typedef std::map<std::string, mirror_image_global_status_t>
+ IdToMirrorImageGlobalStatus;
+ typedef std::map<mirror_image_status_state_t, int> MirrorImageStatusStates;
+
+ static int site_name_get(librados::Rados& rados, std::string* name);
+ static int site_name_set(librados::Rados& rados, const std::string& name);
+
+ static int mode_get(librados::IoCtx& io_ctx, rbd_mirror_mode_t *mirror_mode);
+ static int mode_set(librados::IoCtx& io_ctx, rbd_mirror_mode_t mirror_mode);
+
+ static int uuid_get(librados::IoCtx& io_ctx, std::string* mirror_uuid);
+ static void uuid_get(librados::IoCtx& io_ctx, std::string* mirror_uuid,
+ Context* on_finish);
+
+ static int peer_bootstrap_create(librados::IoCtx& io_ctx, std::string* token);
+ static int peer_bootstrap_import(librados::IoCtx& io_ctx,
+ rbd_mirror_peer_direction_t direction,
+ const std::string& token);
+
+ static int peer_site_add(librados::IoCtx& io_ctx, std::string *uuid,
+ mirror_peer_direction_t direction,
+ const std::string &site_name,
+ const std::string &client_name);
+ static int peer_site_remove(librados::IoCtx& io_ctx, const std::string &uuid);
+ static int peer_site_list(librados::IoCtx& io_ctx,
+ std::vector<mirror_peer_site_t> *peers);
+ static int peer_site_set_client(librados::IoCtx& io_ctx,
+ const std::string &uuid,
+ const std::string &client_name);
+ static int peer_site_set_name(librados::IoCtx& io_ctx,
+ const std::string &uuid,
+ const std::string &site_name);
+ static int peer_site_set_direction(librados::IoCtx& io_ctx,
+ const std::string &uuid,
+ mirror_peer_direction_t direction);
+ static int peer_site_get_attributes(librados::IoCtx& io_ctx,
+ const std::string &uuid,
+ Attributes* attributes);
+ static int peer_site_set_attributes(librados::IoCtx& io_ctx,
+ const std::string &uuid,
+ const Attributes& attributes);
+
+ static int image_global_status_list(librados::IoCtx& io_ctx,
+ const std::string &start_id, size_t max,
+ IdToMirrorImageGlobalStatus *images);
+
+ static int image_status_summary(librados::IoCtx& io_ctx,
+ MirrorImageStatusStates *states);
+ static int image_instance_id_list(librados::IoCtx& io_ctx,
+ const std::string &start_image_id,
+ size_t max,
+ std::map<std::string, std::string> *ids);
+
+ static int image_info_list(
+ librados::IoCtx& io_ctx, mirror_image_mode_t *mode_filter,
+ const std::string &start_id, size_t max,
+ std::map<std::string, std::pair<mirror_image_mode_t,
+ mirror_image_info_t>> *entries);
+
+ static int image_enable(ImageCtxT *ictx, mirror_image_mode_t mode,
+ bool relax_same_pool_parent_check);
+ static int image_disable(ImageCtxT *ictx, bool force);
+ static int image_promote(ImageCtxT *ictx, bool force);
+ static void image_promote(ImageCtxT *ictx, bool force, Context *on_finish);
+ static int image_demote(ImageCtxT *ictx);
+ static void image_demote(ImageCtxT *ictx, Context *on_finish);
+ static int image_resync(ImageCtxT *ictx);
+ static int image_get_info(ImageCtxT *ictx,
+ mirror_image_info_t *mirror_image_info);
+ static void image_get_info(ImageCtxT *ictx,
+ mirror_image_info_t *mirror_image_info,
+ Context *on_finish);
+ static int image_get_info(librados::IoCtx& io_ctx,
+ asio::ContextWQ *op_work_queue,
+ const std::string &image_id,
+ mirror_image_info_t *mirror_image_info);
+ static void image_get_info(librados::IoCtx& io_ctx,
+ asio::ContextWQ *op_work_queue,
+ const std::string &image_id,
+ mirror_image_info_t *mirror_image_info,
+ Context *on_finish);
+ static int image_get_mode(ImageCtxT *ictx, mirror_image_mode_t *mode);
+ static void image_get_mode(ImageCtxT *ictx, mirror_image_mode_t *mode,
+ Context *on_finish);
+ static int image_get_global_status(ImageCtxT *ictx,
+ mirror_image_global_status_t *status);
+ static void image_get_global_status(ImageCtxT *ictx,
+ mirror_image_global_status_t *status,
+ Context *on_finish);
+ static int image_get_instance_id(ImageCtxT *ictx, std::string *instance_id);
+
+ static int image_snapshot_create(ImageCtxT *ictx, uint32_t flags,
+ uint64_t *snap_id);
+ static void image_snapshot_create(ImageCtxT *ictx, uint32_t flags,
+ uint64_t *snap_id, Context *on_finish);
+};
+
+} // namespace api
+} // namespace librbd
+
+extern template class librbd::api::Mirror<librbd::ImageCtx>;
+
+#endif // LIBRBD_API_MIRROR_H
diff --git a/src/librbd/api/Namespace.cc b/src/librbd/api/Namespace.cc
new file mode 100644
index 000000000..86ed70c06
--- /dev/null
+++ b/src/librbd/api/Namespace.cc
@@ -0,0 +1,235 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "common/errno.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "librbd/api/Mirror.h"
+#include "librbd/api/Namespace.h"
+#include "librbd/ImageCtx.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::api::Namespace: " << __func__ << ": "
+
+namespace librbd {
+namespace api {
+
+namespace {
+
+const std::list<std::string> POOL_OBJECTS {
+ RBD_CHILDREN,
+ RBD_GROUP_DIRECTORY,
+ RBD_INFO,
+ RBD_MIRRORING,
+ RBD_TASK,
+ RBD_TRASH,
+ RBD_DIRECTORY
+};
+
+} // anonymous namespace
+
+template <typename I>
+int Namespace<I>::create(librados::IoCtx& io_ctx, const std::string& name)
+{
+ CephContext *cct = (CephContext *)io_ctx.cct();
+ ldout(cct, 5) << "name=" << name << dendl;
+
+ if (name.empty()) {
+ return -EINVAL;
+ }
+
+ librados::Rados rados(io_ctx);
+ int8_t require_osd_release;
+ int r = rados.get_min_compatible_osd(&require_osd_release);
+ if (r < 0) {
+ lderr(cct) << "failed to retrieve min OSD release: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ if (require_osd_release < CEPH_RELEASE_NAUTILUS) {
+ ldout(cct, 1) << "namespace support requires nautilus or later OSD"
+ << dendl;
+ return -ENOSYS;
+ }
+
+
+ librados::IoCtx default_ns_ctx;
+ default_ns_ctx.dup(io_ctx);
+ default_ns_ctx.set_namespace("");
+
+ r = cls_client::namespace_add(&default_ns_ctx, name);
+ if (r < 0) {
+ lderr(cct) << "failed to add namespace: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ librados::IoCtx ns_ctx;
+ ns_ctx.dup(io_ctx);
+ ns_ctx.set_namespace(name);
+
+ r = cls_client::dir_state_set(&ns_ctx, RBD_DIRECTORY,
+ cls::rbd::DIRECTORY_STATE_READY);
+ if (r < 0) {
+ lderr(cct) << "failed to initialize image directory: " << cpp_strerror(r)
+ << dendl;
+ goto rollback;
+ }
+
+ return 0;
+
+rollback:
+ int ret_val = cls_client::namespace_remove(&default_ns_ctx, name);
+ if (ret_val < 0) {
+ lderr(cct) << "failed to remove namespace: " << cpp_strerror(ret_val) << dendl;
+ }
+
+ return r;
+}
+
+template <typename I>
+int Namespace<I>::remove(librados::IoCtx& io_ctx, const std::string& name)
+{
+ CephContext *cct = (CephContext *)io_ctx.cct();
+ ldout(cct, 5) << "name=" << name << dendl;
+
+ if (name.empty()) {
+ return -EINVAL;
+ }
+
+ librados::IoCtx default_ns_ctx;
+ default_ns_ctx.dup(io_ctx);
+ default_ns_ctx.set_namespace("");
+
+ librados::IoCtx ns_ctx;
+ ns_ctx.dup(io_ctx);
+ ns_ctx.set_namespace(name);
+
+ std::map<std::string, cls::rbd::TrashImageSpec> trash_entries;
+
+ librados::ObjectWriteOperation dir_op;
+ librbd::cls_client::dir_state_set(
+ &dir_op, cls::rbd::DIRECTORY_STATE_ADD_DISABLED);
+ dir_op.remove();
+
+ int r = ns_ctx.operate(RBD_DIRECTORY, &dir_op);
+ if (r == -EBUSY) {
+ ldout(cct, 5) << "image directory not empty" << dendl;
+ goto rollback;
+ } else if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "failed to disable the namespace: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ r = cls_client::trash_list(&ns_ctx, "", 1, &trash_entries);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "failed to list trash directory: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ } else if (!trash_entries.empty()) {
+ ldout(cct, 5) << "image trash not empty" << dendl;
+ goto rollback;
+ }
+
+ r = Mirror<I>::mode_set(ns_ctx, RBD_MIRROR_MODE_DISABLED);
+ if (r < 0) {
+ lderr(cct) << "failed to disable mirroring: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ for (auto& oid : POOL_OBJECTS) {
+ r = ns_ctx.remove(oid);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "failed to remove object '" << oid << "': "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+ }
+
+ r = cls_client::namespace_remove(&default_ns_ctx, name);
+ if (r < 0) {
+ lderr(cct) << "failed to remove namespace: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ return 0;
+
+rollback:
+
+ r = librbd::cls_client::dir_state_set(
+ &ns_ctx, RBD_DIRECTORY, cls::rbd::DIRECTORY_STATE_READY);
+ if (r < 0) {
+ lderr(cct) << "failed to restore directory state: " << cpp_strerror(r)
+ << dendl;
+ }
+
+ return -EBUSY;
+}
+
+template <typename I>
+int Namespace<I>::list(IoCtx& io_ctx, std::vector<std::string> *names)
+{
+ CephContext *cct = (CephContext *)io_ctx.cct();
+ ldout(cct, 5) << dendl;
+
+ librados::IoCtx default_ns_ctx;
+ default_ns_ctx.dup(io_ctx);
+ default_ns_ctx.set_namespace("");
+
+ int r;
+ int max_read = 1024;
+ std::string last_read = "";
+ do {
+ std::list<std::string> name_list;
+ r = cls_client::namespace_list(&default_ns_ctx, last_read, max_read,
+ &name_list);
+ if (r == -ENOENT) {
+ return 0;
+ } else if (r < 0) {
+ lderr(cct) << "error listing namespaces: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ names->insert(names->end(), name_list.begin(), name_list.end());
+ if (!name_list.empty()) {
+ last_read = name_list.back();
+ }
+ r = name_list.size();
+ } while (r == max_read);
+
+ return 0;
+}
+
+template <typename I>
+int Namespace<I>::exists(librados::IoCtx& io_ctx, const std::string& name, bool *exists)
+{
+ CephContext *cct = (CephContext *)io_ctx.cct();
+ ldout(cct, 5) << "name=" << name << dendl;
+
+ *exists = false;
+ if (name.empty()) {
+ return -EINVAL;
+ }
+
+ librados::IoCtx ns_ctx;
+ ns_ctx.dup(io_ctx);
+ ns_ctx.set_namespace(name);
+
+ int r = librbd::cls_client::dir_state_assert(&ns_ctx, RBD_DIRECTORY,
+ cls::rbd::DIRECTORY_STATE_READY);
+ if (r == 0) {
+ *exists = true;
+ } else if (r != -ENOENT) {
+ lderr(cct) << "error asserting namespace: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+} // namespace api
+} // namespace librbd
+
+template class librbd::api::Namespace<librbd::ImageCtx>;
diff --git a/src/librbd/api/Namespace.h b/src/librbd/api/Namespace.h
new file mode 100644
index 000000000..220eb28f3
--- /dev/null
+++ b/src/librbd/api/Namespace.h
@@ -0,0 +1,33 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_API_NAMESPACE_H
+#define CEPH_LIBRBD_API_NAMESPACE_H
+
+#include "include/rados/librados_fwd.hpp"
+#include "include/rbd/librbd.hpp"
+#include <string>
+#include <vector>
+
+namespace librbd {
+
+struct ImageCtx;
+
+namespace api {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+struct Namespace {
+
+ static int create(librados::IoCtx& io_ctx, const std::string& name);
+ static int remove(librados::IoCtx& io_ctx, const std::string& name);
+ static int list(librados::IoCtx& io_ctx, std::vector<std::string>* names);
+ static int exists(librados::IoCtx& io_ctx, const std::string& name, bool *exists);
+
+};
+
+} // namespace api
+} // namespace librbd
+
+extern template class librbd::api::Namespace<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_API_NAMESPACE_H
diff --git a/src/librbd/api/Pool.cc b/src/librbd/api/Pool.cc
new file mode 100644
index 000000000..65d55328f
--- /dev/null
+++ b/src/librbd/api/Pool.cc
@@ -0,0 +1,375 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/api/Pool.h"
+#include "include/rados/librados.hpp"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "common/Cond.h"
+#include "common/Throttle.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "osd/osd_types.h"
+#include "librbd/AsioEngine.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/Utils.h"
+#include "librbd/api/Config.h"
+#include "librbd/api/Image.h"
+#include "librbd/api/Trash.h"
+#include "librbd/image/ValidatePoolRequest.h"
+
+#define dout_subsys ceph_subsys_rbd
+
+namespace librbd {
+namespace api {
+
+namespace {
+
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::api::Pool::ImageStatRequest: " \
+ << __func__ << " " << this << ": " \
+ << "(id=" << m_image_id << "): "
+
+template <typename I>
+class ImageStatRequest {
+public:
+ ImageStatRequest(librados::IoCtx& io_ctx, SimpleThrottle& throttle,
+ const std::string& image_id, bool scan_snaps,
+ std::atomic<uint64_t>* bytes,
+ std::atomic<uint64_t>* max_bytes,
+ std::atomic<uint64_t>* snaps)
+ : m_cct(reinterpret_cast<CephContext*>(io_ctx.cct())),
+ m_io_ctx(io_ctx), m_throttle(throttle), m_image_id(image_id),
+ m_scan_snaps(scan_snaps), m_bytes(bytes), m_max_bytes(max_bytes),
+ m_snaps(snaps) {
+ m_throttle.start_op();
+ }
+
+ void send() {
+ get_head();
+ }
+
+protected:
+ void finish(int r) {
+ (*m_max_bytes) += m_max_size;
+ m_throttle.end_op(r);
+
+ delete this;
+ }
+
+private:
+ CephContext* m_cct;
+ librados::IoCtx& m_io_ctx;
+ SimpleThrottle& m_throttle;
+ const std::string& m_image_id;
+ bool m_scan_snaps;
+ std::atomic<uint64_t>* m_bytes;
+ std::atomic<uint64_t>* m_max_bytes;
+ std::atomic<uint64_t>* m_snaps;
+ bufferlist m_out_bl;
+
+ uint64_t m_max_size = 0;
+ ::SnapContext m_snapc;
+
+ void get_head() {
+ ldout(m_cct, 15) << dendl;
+
+ librados::ObjectReadOperation op;
+ cls_client::get_size_start(&op, CEPH_NOSNAP);
+ if (m_scan_snaps) {
+ cls_client::get_snapcontext_start(&op);
+ }
+
+ m_out_bl.clear();
+ auto aio_comp = util::create_rados_callback<
+ ImageStatRequest<I>, &ImageStatRequest<I>::handle_get_head>(this);
+ int r = m_io_ctx.aio_operate(util::header_name(m_image_id), aio_comp, &op,
+ &m_out_bl);
+ ceph_assert(r == 0);
+ aio_comp->release();
+ }
+
+ void handle_get_head(int r) {
+ ldout(m_cct, 15) << "r=" << r << dendl;
+
+ auto it = m_out_bl.cbegin();
+ if (r == 0) {
+ uint8_t order;
+ r = cls_client::get_size_finish(&it, &m_max_size, &order);
+ if (r == 0) {
+ (*m_bytes) += m_max_size;
+ }
+ }
+ if (m_scan_snaps && r == 0) {
+ r = cls_client::get_snapcontext_finish(&it, &m_snapc);
+ if (r == 0) {
+ (*m_snaps) += m_snapc.snaps.size();
+ }
+ }
+
+ if (r == -ENOENT) {
+ finish(r);
+ return;
+ } else if (r < 0) {
+ lderr(m_cct) << "failed to stat image: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ if (!m_snapc.is_valid()) {
+ lderr(m_cct) << "snap context is invalid" << dendl;
+ finish(-EIO);
+ return;
+ }
+
+ get_snaps();
+ }
+
+ void get_snaps() {
+ if (!m_scan_snaps || m_snapc.snaps.empty()) {
+ finish(0);
+ return;
+ }
+
+ ldout(m_cct, 15) << dendl;
+ librados::ObjectReadOperation op;
+ for (auto snap_seq : m_snapc.snaps) {
+ cls_client::get_size_start(&op, snap_seq);
+ }
+
+ m_out_bl.clear();
+ auto aio_comp = util::create_rados_callback<
+ ImageStatRequest<I>, &ImageStatRequest<I>::handle_get_snaps>(this);
+ int r = m_io_ctx.aio_operate(util::header_name(m_image_id), aio_comp, &op,
+ &m_out_bl);
+ ceph_assert(r == 0);
+ aio_comp->release();
+ }
+
+ void handle_get_snaps(int r) {
+ ldout(m_cct, 15) << "r=" << r << dendl;
+
+ auto it = m_out_bl.cbegin();
+ for ([[maybe_unused]] auto snap_seq : m_snapc.snaps) {
+ uint64_t size;
+ if (r == 0) {
+ uint8_t order;
+ r = cls_client::get_size_finish(&it, &size, &order);
+ }
+ if (r == 0 && m_max_size < size) {
+ m_max_size = size;
+ }
+ }
+
+ if (r == -ENOENT) {
+ ldout(m_cct, 15) << "out-of-sync metadata" << dendl;
+ get_head();
+ } else if (r < 0) {
+ lderr(m_cct) << "failed to retrieve snap size: " << cpp_strerror(r)
+ << dendl;
+ finish(r);
+ } else {
+ finish(0);
+ }
+ }
+
+};
+
+template <typename I>
+void get_pool_stat_option_value(typename Pool<I>::StatOptions* stat_options,
+ rbd_pool_stat_option_t option,
+ uint64_t** value) {
+ auto it = stat_options->find(option);
+ if (it == stat_options->end()) {
+ *value = nullptr;
+ } else {
+ *value = it->second;
+ }
+}
+
+template <typename I>
+int get_pool_stats(librados::IoCtx& io_ctx, const ConfigProxy& config,
+ const std::vector<std::string>& image_ids, uint64_t* image_count,
+ uint64_t* provisioned_bytes, uint64_t* max_provisioned_bytes,
+ uint64_t* snapshot_count) {
+
+ bool scan_snaps = ((max_provisioned_bytes != nullptr) ||
+ (snapshot_count != nullptr));
+
+ SimpleThrottle throttle(
+ config.template get_val<uint64_t>("rbd_concurrent_management_ops"), true);
+ std::atomic<uint64_t> bytes{0};
+ std::atomic<uint64_t> max_bytes{0};
+ std::atomic<uint64_t> snaps{0};
+ for (auto& image_id : image_ids) {
+ if (throttle.pending_error()) {
+ break;
+ }
+
+ auto req = new ImageStatRequest<I>(io_ctx, throttle, image_id,
+ scan_snaps, &bytes, &max_bytes, &snaps);
+ req->send();
+ }
+
+ int r = throttle.wait_for_ret();
+ if (r < 0) {
+ return r;
+ }
+
+ if (image_count != nullptr) {
+ *image_count = image_ids.size();
+ }
+ if (provisioned_bytes != nullptr) {
+ *provisioned_bytes = bytes.load();
+ }
+ if (max_provisioned_bytes != nullptr) {
+ *max_provisioned_bytes = max_bytes.load();
+ }
+ if (snapshot_count != nullptr) {
+ *snapshot_count = snaps.load();
+ }
+
+ return 0;
+}
+
+} // anonymous namespace
+
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::api::Pool: " << __func__ << ": "
+
+template <typename I>
+int Pool<I>::init(librados::IoCtx& io_ctx, bool force) {
+ auto cct = reinterpret_cast<CephContext*>(io_ctx.cct());
+ ldout(cct, 10) << dendl;
+
+ int r = io_ctx.application_enable(pg_pool_t::APPLICATION_NAME_RBD, force);
+ if (r < 0) {
+ return r;
+ }
+
+ ConfigProxy config{cct->_conf};
+ api::Config<I>::apply_pool_overrides(io_ctx, &config);
+ if (!config.get_val<bool>("rbd_validate_pool")) {
+ return 0;
+ }
+
+ C_SaferCond ctx;
+ auto req = image::ValidatePoolRequest<I>::create(io_ctx, &ctx);
+ req->send();
+
+ return ctx.wait();
+}
+
+template <typename I>
+int Pool<I>::add_stat_option(StatOptions* stat_options,
+ rbd_pool_stat_option_t option,
+ uint64_t* value) {
+ switch (option) {
+ case RBD_POOL_STAT_OPTION_IMAGES:
+ case RBD_POOL_STAT_OPTION_IMAGE_PROVISIONED_BYTES:
+ case RBD_POOL_STAT_OPTION_IMAGE_MAX_PROVISIONED_BYTES:
+ case RBD_POOL_STAT_OPTION_IMAGE_SNAPSHOTS:
+ case RBD_POOL_STAT_OPTION_TRASH_IMAGES:
+ case RBD_POOL_STAT_OPTION_TRASH_PROVISIONED_BYTES:
+ case RBD_POOL_STAT_OPTION_TRASH_MAX_PROVISIONED_BYTES:
+ case RBD_POOL_STAT_OPTION_TRASH_SNAPSHOTS:
+ stat_options->emplace(option, value);
+ return 0;
+ default:
+ break;
+ }
+ return -ENOENT;
+}
+
+template <typename I>
+int Pool<I>::get_stats(librados::IoCtx& io_ctx, StatOptions* stat_options) {
+ auto cct = reinterpret_cast<CephContext*>(io_ctx.cct());
+ ldout(cct, 10) << dendl;
+
+ ConfigProxy config{cct->_conf};
+ api::Config<I>::apply_pool_overrides(io_ctx, &config);
+
+ uint64_t* image_count;
+ uint64_t* provisioned_bytes;
+ uint64_t* max_provisioned_bytes;
+ uint64_t* snapshot_count;
+
+ std::vector<trash_image_info_t> trash_entries;
+ int r = Trash<I>::list(io_ctx, trash_entries, false);
+ if (r < 0 && r != -EOPNOTSUPP) {
+ return r;
+ }
+
+ get_pool_stat_option_value<I>(
+ stat_options, RBD_POOL_STAT_OPTION_IMAGES, &image_count);
+ get_pool_stat_option_value<I>(
+ stat_options, RBD_POOL_STAT_OPTION_IMAGE_PROVISIONED_BYTES,
+ &provisioned_bytes);
+ get_pool_stat_option_value<I>(
+ stat_options, RBD_POOL_STAT_OPTION_IMAGE_MAX_PROVISIONED_BYTES,
+ &max_provisioned_bytes);
+ get_pool_stat_option_value<I>(
+ stat_options, RBD_POOL_STAT_OPTION_IMAGE_SNAPSHOTS, &snapshot_count);
+ if (image_count != nullptr || provisioned_bytes != nullptr ||
+ max_provisioned_bytes != nullptr || snapshot_count != nullptr) {
+ typename Image<I>::ImageNameToIds images;
+ int r = Image<I>::list_images_v2(io_ctx, &images);
+ if (r < 0) {
+ return r;
+ }
+
+ std::vector<std::string> image_ids;
+ image_ids.reserve(images.size() + trash_entries.size());
+ for (auto& it : images) {
+ image_ids.push_back(std::move(it.second));
+ }
+ for (auto& it : trash_entries) {
+ if (it.source == RBD_TRASH_IMAGE_SOURCE_REMOVING) {
+ image_ids.push_back(std::move(it.id));
+ }
+ }
+
+ r = get_pool_stats<I>(io_ctx, config, image_ids, image_count,
+ provisioned_bytes, max_provisioned_bytes,
+ snapshot_count);
+ if (r < 0) {
+ return r;
+ }
+ }
+
+ get_pool_stat_option_value<I>(
+ stat_options, RBD_POOL_STAT_OPTION_TRASH_IMAGES, &image_count);
+ get_pool_stat_option_value<I>(
+ stat_options, RBD_POOL_STAT_OPTION_TRASH_PROVISIONED_BYTES,
+ &provisioned_bytes);
+ get_pool_stat_option_value<I>(
+ stat_options, RBD_POOL_STAT_OPTION_TRASH_MAX_PROVISIONED_BYTES,
+ &max_provisioned_bytes);
+ get_pool_stat_option_value<I>(
+ stat_options, RBD_POOL_STAT_OPTION_TRASH_SNAPSHOTS, &snapshot_count);
+ if (image_count != nullptr || provisioned_bytes != nullptr ||
+ max_provisioned_bytes != nullptr || snapshot_count != nullptr) {
+
+ std::vector<std::string> image_ids;
+ image_ids.reserve(trash_entries.size());
+ for (auto& it : trash_entries) {
+ if (it.source == RBD_TRASH_IMAGE_SOURCE_REMOVING) {
+ continue;
+ }
+ image_ids.push_back(std::move(it.id));
+ }
+
+ r = get_pool_stats<I>(io_ctx, config, image_ids, image_count,
+ provisioned_bytes, max_provisioned_bytes,
+ snapshot_count);
+ if (r < 0) {
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+} // namespace api
+} // namespace librbd
+
+template class librbd::api::Pool<librbd::ImageCtx>;
diff --git a/src/librbd/api/Pool.h b/src/librbd/api/Pool.h
new file mode 100644
index 000000000..7b607ab6e
--- /dev/null
+++ b/src/librbd/api/Pool.h
@@ -0,0 +1,38 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_API_POOL_H
+#define CEPH_LIBRBD_API_POOL_H
+
+#include "include/int_types.h"
+#include "include/rados/librados_fwd.hpp"
+#include "include/rbd/librbd.h"
+#include <map>
+
+namespace librbd {
+
+struct ImageCtx;
+
+namespace api {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class Pool {
+public:
+ typedef std::map<rbd_pool_stat_option_t, uint64_t*> StatOptions;
+
+ static int init(librados::IoCtx& io_ctx, bool force);
+
+ static int add_stat_option(StatOptions* stat_options,
+ rbd_pool_stat_option_t option,
+ uint64_t* value);
+
+ static int get_stats(librados::IoCtx& io_ctx, StatOptions* stat_options);
+
+};
+
+} // namespace api
+} // namespace librbd
+
+extern template class librbd::api::Pool<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_API_POOL_H
diff --git a/src/librbd/api/PoolMetadata.cc b/src/librbd/api/PoolMetadata.cc
new file mode 100644
index 000000000..33e3fb648
--- /dev/null
+++ b/src/librbd/api/PoolMetadata.cc
@@ -0,0 +1,156 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/api/PoolMetadata.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "common/Cond.h"
+#include "librbd/Utils.h"
+#include "librbd/api/Config.h"
+#include "librbd/image/GetMetadataRequest.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::PoolMetadata: " << __func__ << ": "
+
+namespace librbd {
+namespace api {
+
+namespace {
+
+void update_pool_timestamp(librados::IoCtx& io_ctx) {
+ CephContext *cct = (CephContext *)io_ctx.cct();
+
+ auto now = ceph_clock_now();
+ std::string cmd =
+ R"({)"
+ R"("prefix": "config set", )"
+ R"("who": "global", )"
+ R"("name": "rbd_config_pool_override_update_timestamp", )"
+ R"("value": ")" + stringify(now.sec()) + R"(")"
+ R"(})";
+
+ librados::Rados rados(io_ctx);
+ bufferlist in_bl;
+ std::string ss;
+ int r = rados.mon_command(cmd, in_bl, nullptr, &ss);
+ if (r < 0) {
+ lderr(cct) << "failed to notify clients of pool config update: "
+ << cpp_strerror(r) << dendl;
+ }
+}
+
+} // anonymous namespace
+
+template <typename I>
+int PoolMetadata<I>::get(librados::IoCtx& io_ctx,
+ const std::string &key, std::string *value) {
+ CephContext *cct = (CephContext *)io_ctx.cct();
+
+ int r = cls_client::metadata_get(&io_ctx, RBD_INFO, key, value);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "failed reading metadata " << key << ": " << cpp_strerror(r)
+ << dendl;
+ }
+
+ return r;
+}
+
+template <typename I>
+int PoolMetadata<I>::set(librados::IoCtx& io_ctx, const std::string &key,
+ const std::string &value) {
+ CephContext *cct = (CephContext *)io_ctx.cct();
+
+ bool need_update_pool_timestamp = false;
+
+ std::string config_key;
+ if (util::is_metadata_config_override(key, &config_key)) {
+ if (!librbd::api::Config<I>::is_option_name(io_ctx, config_key)) {
+ lderr(cct) << "validation for " << key
+ << " failed: not allowed pool level override" << dendl;
+ return -EINVAL;
+ }
+ int r = ConfigProxy{false}.set_val(config_key.c_str(), value);
+ if (r < 0) {
+ lderr(cct) << "validation for " << key << " failed: " << cpp_strerror(r)
+ << dendl;
+ return -EINVAL;
+ }
+
+ need_update_pool_timestamp = true;
+ }
+
+ ceph::bufferlist bl;
+ bl.append(value);
+
+ int r = cls_client::metadata_set(&io_ctx, RBD_INFO, {{key, bl}});
+ if (r < 0) {
+ lderr(cct) << "failed setting metadata " << key << ": " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ if (need_update_pool_timestamp) {
+ update_pool_timestamp(io_ctx);
+ }
+
+ return 0;
+}
+
+template <typename I>
+int PoolMetadata<I>::remove(librados::IoCtx& io_ctx, const std::string &key) {
+ CephContext *cct = (CephContext *)io_ctx.cct();
+
+ std::string value;
+ int r = cls_client::metadata_get(&io_ctx, RBD_INFO, key, &value);
+ if (r < 0) {
+ if (r == -ENOENT) {
+ ldout(cct, 1) << "metadata " << key << " does not exist" << dendl;
+ } else {
+ lderr(cct) << "failed reading metadata " << key << ": " << cpp_strerror(r)
+ << dendl;
+ }
+ return r;
+ }
+
+ r = cls_client::metadata_remove(&io_ctx, RBD_INFO, key);
+ if (r < 0) {
+ lderr(cct) << "failed removing metadata " << key << ": " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ std::string config_key;
+ if (util::is_metadata_config_override(key, &config_key)) {
+ update_pool_timestamp(io_ctx);
+ }
+
+ return 0;
+}
+
+template <typename I>
+int PoolMetadata<I>::list(librados::IoCtx& io_ctx, const std::string &start,
+ uint64_t max,
+ std::map<std::string, ceph::bufferlist> *pairs) {
+ CephContext *cct = (CephContext *)io_ctx.cct();
+
+ pairs->clear();
+ C_SaferCond ctx;
+ auto req = image::GetMetadataRequest<I>::create(
+ io_ctx, RBD_INFO, false, "", start, max, pairs, &ctx);
+ req->send();
+
+ int r = ctx.wait();
+ if (r < 0) {
+ lderr(cct) << "failed listing metadata: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+ return 0;
+}
+
+} // namespace api
+} // namespace librbd
+
+template class librbd::api::PoolMetadata<librbd::ImageCtx>;
diff --git a/src/librbd/api/PoolMetadata.h b/src/librbd/api/PoolMetadata.h
new file mode 100644
index 000000000..69ab574ac
--- /dev/null
+++ b/src/librbd/api/PoolMetadata.h
@@ -0,0 +1,37 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_API_POOL_METADATA_H
+#define CEPH_LIBRBD_API_POOL_METADATA_H
+
+#include "include/buffer_fwd.h"
+#include "include/rados/librados_fwd.hpp"
+
+#include <cstdint>
+#include <map>
+#include <string>
+
+namespace librbd {
+
+class ImageCtx;
+
+namespace api {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class PoolMetadata {
+public:
+ static int get(librados::IoCtx& io_ctx, const std::string &key,
+ std::string *value);
+ static int set(librados::IoCtx& io_ctx, const std::string &key,
+ const std::string &value);
+ static int remove(librados::IoCtx& io_ctx, const std::string &key);
+ static int list(librados::IoCtx& io_ctx, const std::string &start,
+ uint64_t max, std::map<std::string, ceph::bufferlist> *pairs);
+};
+
+} // namespace api
+} // namespace librbd
+
+extern template class librbd::api::PoolMetadata<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_API_POOL_METADATA_H
diff --git a/src/librbd/api/Snapshot.cc b/src/librbd/api/Snapshot.cc
new file mode 100644
index 000000000..03cefbd1c
--- /dev/null
+++ b/src/librbd/api/Snapshot.cc
@@ -0,0 +1,444 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/api/Snapshot.h"
+#include "cls/rbd/cls_rbd_types.h"
+#include "common/errno.h"
+#include "librbd/internal.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/Operations.h"
+#include "librbd/Utils.h"
+#include "librbd/api/Image.h"
+#include "include/Context.h"
+#include "common/Cond.h"
+
+#include <boost/variant.hpp>
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::api::Snapshot: " << __func__ << ": "
+
+using librados::snap_t;
+
+namespace librbd {
+namespace api {
+
+namespace {
+
+class GetGroupVisitor {
+public:
+ CephContext* cct;
+ librados::IoCtx *image_ioctx;
+ snap_group_namespace_t *group_snap;
+
+ explicit GetGroupVisitor(CephContext* cct, librados::IoCtx *_image_ioctx,
+ snap_group_namespace_t *group_snap)
+ : cct(cct), image_ioctx(_image_ioctx), group_snap(group_snap) {};
+
+ template <typename T>
+ inline int operator()(const T&) const {
+ // ignore other than GroupSnapshotNamespace types.
+ return -EINVAL;
+ }
+
+ inline int operator()(
+ const cls::rbd::GroupSnapshotNamespace& snap_namespace) {
+ IoCtx group_ioctx;
+ int r = util::create_ioctx(*image_ioctx, "group", snap_namespace.group_pool,
+ {}, &group_ioctx);
+ if (r < 0) {
+ return r;
+ }
+
+ cls::rbd::GroupSnapshot group_snapshot;
+
+ std::string group_name;
+ r = cls_client::dir_get_name(&group_ioctx, RBD_GROUP_DIRECTORY,
+ snap_namespace.group_id, &group_name);
+ if (r < 0) {
+ lderr(cct) << "failed to retrieve group name: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ std::string group_header_oid = util::group_header_name(snap_namespace.group_id);
+ r = cls_client::group_snap_get_by_id(&group_ioctx,
+ group_header_oid,
+ snap_namespace.group_snapshot_id,
+ &group_snapshot);
+ if (r < 0) {
+ lderr(cct) << "failed to retrieve group snapshot: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ group_snap->group_pool = group_ioctx.get_id();
+ group_snap->group_name = group_name;
+ group_snap->group_snap_name = group_snapshot.name;
+ return 0;
+ }
+};
+
+class GetTrashVisitor {
+public:
+ std::string* original_name;
+
+ explicit GetTrashVisitor(std::string* original_name)
+ : original_name(original_name) {
+ }
+
+ template <typename T>
+ inline int operator()(const T&) const {
+ return -EINVAL;
+ }
+
+ inline int operator()(
+ const cls::rbd::TrashSnapshotNamespace& snap_namespace) {
+ *original_name = snap_namespace.original_name;
+ return 0;
+ }
+};
+
+class GetMirrorVisitor {
+public:
+ snap_mirror_namespace_t *mirror_snap;
+
+ explicit GetMirrorVisitor(snap_mirror_namespace_t *mirror_snap)
+ : mirror_snap(mirror_snap) {
+ }
+
+ template <typename T>
+ inline int operator()(const T&) const {
+ return -EINVAL;
+ }
+
+ inline int operator()(
+ const cls::rbd::MirrorSnapshotNamespace& snap_namespace) {
+ mirror_snap->state = static_cast<snap_mirror_state_t>(snap_namespace.state);
+ mirror_snap->complete = snap_namespace.complete;
+ mirror_snap->mirror_peer_uuids = snap_namespace.mirror_peer_uuids;
+ mirror_snap->primary_mirror_uuid = snap_namespace.primary_mirror_uuid;
+ mirror_snap->primary_snap_id = snap_namespace.primary_snap_id;
+ mirror_snap->last_copied_object_number =
+ snap_namespace.last_copied_object_number;
+ return 0;
+ }
+};
+
+} // anonymous namespace
+
+template <typename I>
+int Snapshot<I>::get_group_namespace(I *ictx, uint64_t snap_id,
+ snap_group_namespace_t *group_snap) {
+ int r = ictx->state->refresh_if_required();
+ if (r < 0) {
+ return r;
+ }
+
+ std::shared_lock image_locker{ictx->image_lock};
+ auto snap_info = ictx->get_snap_info(snap_id);
+ if (snap_info == nullptr) {
+ return -ENOENT;
+ }
+
+ GetGroupVisitor ggv = GetGroupVisitor(ictx->cct, &ictx->md_ctx, group_snap);
+ r = snap_info->snap_namespace.visit(ggv);
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Snapshot<I>::get_trash_namespace(I *ictx, uint64_t snap_id,
+ std::string* original_name) {
+ int r = ictx->state->refresh_if_required();
+ if (r < 0) {
+ return r;
+ }
+
+ std::shared_lock image_locker{ictx->image_lock};
+ auto snap_info = ictx->get_snap_info(snap_id);
+ if (snap_info == nullptr) {
+ return -ENOENT;
+ }
+
+ auto visitor = GetTrashVisitor(original_name);
+ r = snap_info->snap_namespace.visit(visitor);
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Snapshot<I>::get_mirror_namespace(
+ I *ictx, uint64_t snap_id, snap_mirror_namespace_t *mirror_snap) {
+ int r = ictx->state->refresh_if_required();
+ if (r < 0) {
+ return r;
+ }
+
+ std::shared_lock image_locker{ictx->image_lock};
+ auto snap_info = ictx->get_snap_info(snap_id);
+ if (snap_info == nullptr) {
+ return -ENOENT;
+ }
+
+ auto gmv = GetMirrorVisitor(mirror_snap);
+ r = snap_info->snap_namespace.visit(gmv);
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Snapshot<I>::get_namespace_type(I *ictx, uint64_t snap_id,
+ snap_namespace_type_t *namespace_type) {
+ int r = ictx->state->refresh_if_required();
+ if (r < 0) {
+ return r;
+ }
+
+ std::shared_lock l{ictx->image_lock};
+ auto snap_info = ictx->get_snap_info(snap_id);
+ if (snap_info == nullptr) {
+ return -ENOENT;
+ }
+
+ *namespace_type = static_cast<snap_namespace_type_t>(
+ cls::rbd::get_snap_namespace_type(snap_info->snap_namespace));
+ return 0;
+}
+
+template <typename I>
+int Snapshot<I>::remove(I *ictx, uint64_t snap_id) {
+ ldout(ictx->cct, 20) << "snap_remove " << ictx << " " << snap_id << dendl;
+
+ int r = ictx->state->refresh_if_required();
+ if (r < 0) {
+ return r;
+ }
+
+ cls::rbd::SnapshotNamespace snapshot_namespace;
+ std::string snapshot_name;
+ {
+ std::shared_lock image_locker{ictx->image_lock};
+ auto it = ictx->snap_info.find(snap_id);
+ if (it == ictx->snap_info.end()) {
+ return -ENOENT;
+ }
+
+ snapshot_namespace = it->second.snap_namespace;
+ snapshot_name = it->second.name;
+ }
+
+ C_SaferCond ctx;
+ ictx->operations->snap_remove(snapshot_namespace, snapshot_name, &ctx);
+ r = ctx.wait();
+ return r;
+}
+
+template <typename I>
+int Snapshot<I>::get_name(I *ictx, uint64_t snap_id, std::string *snap_name)
+ {
+ ldout(ictx->cct, 20) << "snap_get_name " << ictx << " " << snap_id << dendl;
+
+ int r = ictx->state->refresh_if_required();
+ if (r < 0)
+ return r;
+
+ std::shared_lock image_locker{ictx->image_lock};
+ r = ictx->get_snap_name(snap_id, snap_name);
+
+ return r;
+ }
+
+template <typename I>
+int Snapshot<I>::get_id(I *ictx, const std::string& snap_name, uint64_t *snap_id)
+ {
+ ldout(ictx->cct, 20) << "snap_get_id " << ictx << " " << snap_name << dendl;
+
+ int r = ictx->state->refresh_if_required();
+ if (r < 0)
+ return r;
+
+ std::shared_lock image_locker{ictx->image_lock};
+ *snap_id = ictx->get_snap_id(cls::rbd::UserSnapshotNamespace(), snap_name);
+ if (*snap_id == CEPH_NOSNAP)
+ return -ENOENT;
+
+ return 0;
+ }
+
+template <typename I>
+int Snapshot<I>::list(I *ictx, std::vector<snap_info_t>& snaps) {
+ ldout(ictx->cct, 20) << "snap_list " << ictx << dendl;
+
+ int r = ictx->state->refresh_if_required();
+ if (r < 0)
+ return r;
+
+ std::shared_lock l{ictx->image_lock};
+ for (auto &it : ictx->snap_info) {
+ snap_info_t info;
+ info.name = it.second.name;
+ info.id = it.first;
+ info.size = it.second.size;
+ snaps.push_back(info);
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Snapshot<I>::exists(I *ictx, const cls::rbd::SnapshotNamespace& snap_namespace,
+ const char *snap_name, bool *exists) {
+ ldout(ictx->cct, 20) << "snap_exists " << ictx << " " << snap_name << dendl;
+
+ int r = ictx->state->refresh_if_required();
+ if (r < 0)
+ return r;
+
+ std::shared_lock l{ictx->image_lock};
+ *exists = ictx->get_snap_id(snap_namespace, snap_name) != CEPH_NOSNAP;
+ return 0;
+}
+
+template <typename I>
+int Snapshot<I>::create(I *ictx, const char *snap_name, uint32_t flags,
+ ProgressContext& pctx) {
+ ldout(ictx->cct, 20) << "snap_create " << ictx << " " << snap_name
+ << " flags: " << flags << dendl;
+
+ uint64_t internal_flags = 0;
+ int r = util::snap_create_flags_api_to_internal(ictx->cct, flags,
+ &internal_flags);
+ if (r < 0) {
+ return r;
+ }
+
+ return ictx->operations->snap_create(cls::rbd::UserSnapshotNamespace(),
+ snap_name, internal_flags, pctx);
+}
+
+template <typename I>
+int Snapshot<I>::remove(I *ictx, const char *snap_name, uint32_t flags,
+ ProgressContext& pctx) {
+ ldout(ictx->cct, 20) << "snap_remove " << ictx << " " << snap_name << " flags: " << flags << dendl;
+
+ int r = 0;
+
+ r = ictx->state->refresh_if_required();
+ if (r < 0)
+ return r;
+
+ if (flags & RBD_SNAP_REMOVE_FLATTEN) {
+ r = Image<I>::flatten_children(ictx, snap_name, pctx);
+ if (r < 0) {
+ return r;
+ }
+ }
+
+ bool protect;
+ r = is_protected(ictx, snap_name, &protect);
+ if (r < 0) {
+ return r;
+ }
+
+ if (protect && flags & RBD_SNAP_REMOVE_UNPROTECT) {
+ r = ictx->operations->snap_unprotect(cls::rbd::UserSnapshotNamespace(), snap_name);
+ if (r < 0) {
+ lderr(ictx->cct) << "failed to unprotect snapshot: " << snap_name << dendl;
+ return r;
+ }
+
+ r = is_protected(ictx, snap_name, &protect);
+ if (r < 0) {
+ return r;
+ }
+ if (protect) {
+ lderr(ictx->cct) << "snapshot is still protected after unprotection" << dendl;
+ ceph_abort();
+ }
+ }
+
+ C_SaferCond ctx;
+ ictx->operations->snap_remove(cls::rbd::UserSnapshotNamespace(), snap_name, &ctx);
+
+ r = ctx.wait();
+ return r;
+}
+
+template <typename I>
+int Snapshot<I>::get_timestamp(I *ictx, uint64_t snap_id, struct timespec *timestamp) {
+ auto snap_it = ictx->snap_info.find(snap_id);
+ ceph_assert(snap_it != ictx->snap_info.end());
+ utime_t time = snap_it->second.timestamp;
+ time.to_timespec(timestamp);
+ return 0;
+}
+
+template <typename I>
+int Snapshot<I>::get_limit(I *ictx, uint64_t *limit) {
+ int r = cls_client::snapshot_get_limit(&ictx->md_ctx, ictx->header_oid,
+ limit);
+ if (r == -EOPNOTSUPP) {
+ *limit = UINT64_MAX;
+ r = 0;
+ }
+ return r;
+}
+
+template <typename I>
+int Snapshot<I>::set_limit(I *ictx, uint64_t limit) {
+ return ictx->operations->snap_set_limit(limit);
+}
+
+template <typename I>
+int Snapshot<I>::is_protected(I *ictx, const char *snap_name, bool *protect) {
+ ldout(ictx->cct, 20) << "snap_is_protected " << ictx << " " << snap_name
+ << dendl;
+
+ int r = ictx->state->refresh_if_required();
+ if (r < 0)
+ return r;
+
+ std::shared_lock l{ictx->image_lock};
+ snap_t snap_id = ictx->get_snap_id(cls::rbd::UserSnapshotNamespace(), snap_name);
+ if (snap_id == CEPH_NOSNAP)
+ return -ENOENT;
+ bool is_unprotected;
+ r = ictx->is_snap_unprotected(snap_id, &is_unprotected);
+ // consider both PROTECTED or UNPROTECTING to be 'protected',
+ // since in either state they can't be deleted
+ *protect = !is_unprotected;
+ return r;
+}
+
+template <typename I>
+int Snapshot<I>::get_namespace(I *ictx, const char *snap_name,
+ cls::rbd::SnapshotNamespace *snap_namespace) {
+ ldout(ictx->cct, 20) << "get_snap_namespace " << ictx << " " << snap_name
+ << dendl;
+
+ int r = ictx->state->refresh_if_required();
+ if (r < 0)
+ return r;
+ std::shared_lock l{ictx->image_lock};
+ snap_t snap_id = ictx->get_snap_id(*snap_namespace, snap_name);
+ if (snap_id == CEPH_NOSNAP)
+ return -ENOENT;
+ r = ictx->get_snap_namespace(snap_id, snap_namespace);
+ return r;
+}
+
+} // namespace api
+} // namespace librbd
+
+template class librbd::api::Snapshot<librbd::ImageCtx>;
diff --git a/src/librbd/api/Snapshot.h b/src/librbd/api/Snapshot.h
new file mode 100644
index 000000000..7e06a5a8d
--- /dev/null
+++ b/src/librbd/api/Snapshot.h
@@ -0,0 +1,67 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_API_SNAPSHOT_H
+#define CEPH_LIBRBD_API_SNAPSHOT_H
+
+#include "include/rbd/librbd.hpp"
+#include "cls/rbd/cls_rbd_types.h"
+#include <string>
+
+namespace librbd {
+
+struct ImageCtx;
+
+namespace api {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+struct Snapshot {
+
+ static int get_group_namespace(ImageCtxT *ictx, uint64_t snap_id,
+ snap_group_namespace_t *group_snap);
+
+ static int get_trash_namespace(ImageCtxT *ictx, uint64_t snap_id,
+ std::string *original_name);
+
+ static int get_mirror_namespace(
+ ImageCtxT *ictx, uint64_t snap_id,
+ snap_mirror_namespace_t *mirror_snap);
+
+ static int get_namespace_type(ImageCtxT *ictx, uint64_t snap_id,
+ snap_namespace_type_t *namespace_type);
+
+ static int remove(ImageCtxT *ictx, uint64_t snap_id);
+
+ static int get_name(ImageCtxT *ictx, uint64_t snap_id, std::string *snap_name);
+
+ static int get_id(ImageCtxT *ictx, const std::string& snap_name, uint64_t *snap_id);
+
+ static int list(ImageCtxT *ictx, std::vector<snap_info_t>& snaps);
+
+ static int exists(ImageCtxT *ictx, const cls::rbd::SnapshotNamespace& snap_namespace,
+ const char *snap_name, bool *exists);
+
+ static int create(ImageCtxT *ictx, const char *snap_name, uint32_t flags,
+ ProgressContext& pctx);
+
+ static int remove(ImageCtxT *ictx, const char *snap_name, uint32_t flags, ProgressContext& pctx);
+
+ static int get_limit(ImageCtxT *ictx, uint64_t *limit);
+
+ static int set_limit(ImageCtxT *ictx, uint64_t limit);
+
+ static int get_timestamp(ImageCtxT *ictx, uint64_t snap_id, struct timespec *timestamp);
+
+ static int is_protected(ImageCtxT *ictx, const char *snap_name, bool *protect);
+
+ static int get_namespace(ImageCtxT *ictx, const char *snap_name,
+ cls::rbd::SnapshotNamespace *snap_namespace);
+
+};
+
+} // namespace api
+} // namespace librbd
+
+extern template class librbd::api::Snapshot<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_API_SNAPSHOT_H
diff --git a/src/librbd/api/Trash.cc b/src/librbd/api/Trash.cc
new file mode 100644
index 000000000..d8189e8a7
--- /dev/null
+++ b/src/librbd/api/Trash.cc
@@ -0,0 +1,759 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/api/Trash.h"
+#include "include/rados/librados.hpp"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "common/Cond.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "librbd/AsioEngine.h"
+#include "librbd/ExclusiveLock.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/internal.h"
+#include "librbd/Operations.h"
+#include "librbd/TrashWatcher.h"
+#include "librbd/Utils.h"
+#include "librbd/api/DiffIterate.h"
+#include "librbd/exclusive_lock/Policy.h"
+#include "librbd/image/RemoveRequest.h"
+#include "librbd/mirror/DisableRequest.h"
+#include "librbd/mirror/EnableRequest.h"
+#include "librbd/trash/MoveRequest.h"
+#include "librbd/trash/RemoveRequest.h"
+#include <json_spirit/json_spirit.h>
+#include "librbd/journal/DisabledPolicy.h"
+#include "librbd/image/ListWatchersRequest.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::api::Trash: " << __func__ << ": "
+
+namespace librbd {
+namespace api {
+
+template <typename I>
+const typename Trash<I>::TrashImageSources Trash<I>::ALLOWED_RESTORE_SOURCES {
+ cls::rbd::TRASH_IMAGE_SOURCE_USER,
+ cls::rbd::TRASH_IMAGE_SOURCE_MIRRORING,
+ cls::rbd::TRASH_IMAGE_SOURCE_USER_PARENT
+ };
+
+namespace {
+
+template <typename I>
+int disable_mirroring(I *ictx) {
+ ldout(ictx->cct, 10) << dendl;
+
+ C_SaferCond ctx;
+ auto req = mirror::DisableRequest<I>::create(ictx, false, true, &ctx);
+ req->send();
+ int r = ctx.wait();
+ if (r < 0) {
+ lderr(ictx->cct) << "failed to disable mirroring: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int enable_mirroring(IoCtx &io_ctx, const std::string &image_id) {
+ auto cct = reinterpret_cast<CephContext*>(io_ctx.cct());
+
+ uint64_t features;
+ uint64_t incompatible_features;
+ int r = cls_client::get_features(&io_ctx, util::header_name(image_id), true,
+ &features, &incompatible_features);
+ if (r < 0) {
+ lderr(cct) << "failed to retrieve features: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ if ((features & RBD_FEATURE_JOURNALING) == 0) {
+ return 0;
+ }
+
+ cls::rbd::MirrorMode mirror_mode;
+ r = cls_client::mirror_mode_get(&io_ctx, &mirror_mode);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "failed to retrieve mirror mode: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ if (mirror_mode != cls::rbd::MIRROR_MODE_POOL) {
+ ldout(cct, 10) << "not pool mirroring mode" << dendl;
+ return 0;
+ }
+
+ ldout(cct, 10) << dendl;
+
+ AsioEngine asio_engine(io_ctx);
+
+ C_SaferCond ctx;
+ auto req = mirror::EnableRequest<I>::create(
+ io_ctx, image_id, cls::rbd::MIRROR_IMAGE_MODE_JOURNAL, "", false,
+ asio_engine.get_work_queue(), &ctx);
+ req->send();
+ r = ctx.wait();
+ if (r < 0) {
+ lderr(cct) << "failed to enable mirroring: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+int list_trash_image_specs(
+ librados::IoCtx &io_ctx,
+ std::map<std::string, cls::rbd::TrashImageSpec>* trash_image_specs,
+ bool exclude_user_remove_source) {
+ CephContext *cct((CephContext *)io_ctx.cct());
+ ldout(cct, 20) << "list_trash_image_specs " << &io_ctx << dendl;
+
+ bool more_entries;
+ uint32_t max_read = 1024;
+ std::string last_read;
+ do {
+ std::map<std::string, cls::rbd::TrashImageSpec> trash_entries;
+ int r = cls_client::trash_list(&io_ctx, last_read, max_read,
+ &trash_entries);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "error listing rbd trash entries: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ } else if (r == -ENOENT) {
+ break;
+ }
+
+ if (trash_entries.empty()) {
+ break;
+ }
+
+ for (const auto &entry : trash_entries) {
+ if (exclude_user_remove_source &&
+ entry.second.source == cls::rbd::TRASH_IMAGE_SOURCE_REMOVING) {
+ continue;
+ }
+
+ trash_image_specs->insert({entry.first, entry.second});
+ }
+
+ last_read = trash_entries.rbegin()->first;
+ more_entries = (trash_entries.size() >= max_read);
+ } while (more_entries);
+
+ return 0;
+}
+
+} // anonymous namespace
+
+template <typename I>
+int Trash<I>::move(librados::IoCtx &io_ctx, rbd_trash_image_source_t source,
+ const std::string &image_name, const std::string &image_id,
+ uint64_t delay) {
+ ceph_assert(!image_name.empty() && !image_id.empty());
+ CephContext *cct((CephContext *)io_ctx.cct());
+ ldout(cct, 20) << &io_ctx << " name=" << image_name << ", id=" << image_id
+ << dendl;
+
+ auto ictx = new I("", image_id, nullptr, io_ctx, false);
+ int r = ictx->state->open(OPEN_FLAG_SKIP_OPEN_PARENT);
+
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "failed to open image: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ if (r == 0) {
+ cls::rbd::MirrorImage mirror_image;
+ int mirror_r = cls_client::mirror_image_get(&ictx->md_ctx, ictx->id,
+ &mirror_image);
+ if (mirror_r == -ENOENT) {
+ ldout(ictx->cct, 10) << "mirroring is not enabled for this image"
+ << dendl;
+ } else if (mirror_r < 0) {
+ lderr(ictx->cct) << "failed to retrieve mirror image: "
+ << cpp_strerror(mirror_r) << dendl;
+ return mirror_r;
+ } else if (mirror_image.mode == cls::rbd::MIRROR_IMAGE_MODE_SNAPSHOT) {
+ // a remote rbd-mirror might own the exclusive-lock on this image
+ // and therefore we need to disable mirroring so that it closes the image
+ r = disable_mirroring<I>(ictx);
+ if (r < 0) {
+ ictx->state->close();
+ return r;
+ }
+ }
+
+ if (ictx->test_features(RBD_FEATURE_JOURNALING)) {
+ std::unique_lock image_locker{ictx->image_lock};
+ ictx->set_journal_policy(new journal::DisabledPolicy());
+ }
+
+ ictx->owner_lock.lock_shared();
+ if (ictx->exclusive_lock != nullptr) {
+ ictx->exclusive_lock->block_requests(0);
+
+ r = ictx->operations->prepare_image_update(
+ exclusive_lock::OPERATION_REQUEST_TYPE_GENERAL, true);
+ if (r < 0) {
+ lderr(cct) << "cannot obtain exclusive lock - not removing" << dendl;
+ ictx->owner_lock.unlock_shared();
+ ictx->state->close();
+ return -EBUSY;
+ }
+ }
+ ictx->owner_lock.unlock_shared();
+
+ ictx->image_lock.lock_shared();
+ if (!ictx->migration_info.empty()) {
+ lderr(cct) << "cannot move migrating image to trash" << dendl;
+ ictx->image_lock.unlock_shared();
+ ictx->state->close();
+ return -EBUSY;
+ }
+ ictx->image_lock.unlock_shared();
+
+ if (mirror_r >= 0 &&
+ mirror_image.mode != cls::rbd::MIRROR_IMAGE_MODE_SNAPSHOT) {
+ r = disable_mirroring<I>(ictx);
+ if (r < 0) {
+ ictx->state->close();
+ return r;
+ }
+ }
+
+ ictx->state->close();
+ }
+
+ utime_t delete_time{ceph_clock_now()};
+ utime_t deferment_end_time{delete_time};
+ deferment_end_time += delay;
+ cls::rbd::TrashImageSpec trash_image_spec{
+ static_cast<cls::rbd::TrashImageSource>(source), image_name,
+ delete_time, deferment_end_time};
+
+ trash_image_spec.state = cls::rbd::TRASH_IMAGE_STATE_MOVING;
+ C_SaferCond ctx;
+ auto req = trash::MoveRequest<I>::create(io_ctx, image_id, trash_image_spec,
+ &ctx);
+ req->send();
+
+ r = ctx.wait();
+ trash_image_spec.state = cls::rbd::TRASH_IMAGE_STATE_NORMAL;
+ int ret = cls_client::trash_state_set(&io_ctx, image_id,
+ trash_image_spec.state,
+ cls::rbd::TRASH_IMAGE_STATE_MOVING);
+ if (ret < 0 && ret != -EOPNOTSUPP) {
+ lderr(cct) << "error setting trash image state: "
+ << cpp_strerror(ret) << dendl;
+ return ret;
+ }
+ if (r < 0) {
+ return r;
+ }
+
+ C_SaferCond notify_ctx;
+ TrashWatcher<I>::notify_image_added(io_ctx, image_id, trash_image_spec,
+ &notify_ctx);
+ r = notify_ctx.wait();
+ if (r < 0) {
+ lderr(cct) << "failed to send update notification: " << cpp_strerror(r)
+ << dendl;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Trash<I>::move(librados::IoCtx &io_ctx, rbd_trash_image_source_t source,
+ const std::string &image_name, uint64_t delay) {
+ CephContext *cct((CephContext *)io_ctx.cct());
+ ldout(cct, 20) << &io_ctx << " name=" << image_name << dendl;
+
+ // try to get image id from the directory
+ std::string image_id;
+ int r = cls_client::dir_get_id(&io_ctx, RBD_DIRECTORY, image_name,
+ &image_id);
+ if (r == -ENOENT) {
+ r = io_ctx.stat(util::old_header_name(image_name), nullptr, nullptr);
+ if (r == 0) {
+ // cannot move V1 image to trash
+ ldout(cct, 10) << "cannot move v1 image to trash" << dendl;
+ return -EOPNOTSUPP;
+ }
+
+ // search for an interrupted trash move request
+ std::map<std::string, cls::rbd::TrashImageSpec> trash_image_specs;
+ int r = list_trash_image_specs(io_ctx, &trash_image_specs, true);
+ if (r < 0) {
+ return r;
+ }
+ if (auto found_image =
+ std::find_if(
+ trash_image_specs.begin(), trash_image_specs.end(),
+ [&](const auto& pair) {
+ const auto& spec = pair.second;
+ return (spec.source == cls::rbd::TRASH_IMAGE_SOURCE_USER &&
+ spec.state == cls::rbd::TRASH_IMAGE_STATE_MOVING &&
+ spec.name == image_name);
+ });
+ found_image != trash_image_specs.end()) {
+ image_id = found_image->first;
+ } else {
+ return -ENOENT;
+ }
+ ldout(cct, 15) << "derived image id " << image_id << " from existing "
+ << "trash entry" << dendl;
+ } else if (r < 0) {
+ lderr(cct) << "failed to retrieve image id: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ if (image_name.empty() || image_id.empty()) {
+ lderr(cct) << "invalid image name/id" << dendl;
+ return -EINVAL;
+ }
+
+ return Trash<I>::move(io_ctx, source, image_name, image_id, delay);
+}
+
+template <typename I>
+int Trash<I>::get(IoCtx &io_ctx, const std::string &id,
+ trash_image_info_t *info) {
+ CephContext *cct((CephContext *)io_ctx.cct());
+ ldout(cct, 20) << __func__ << " " << &io_ctx << dendl;
+
+ cls::rbd::TrashImageSpec spec;
+ int r = cls_client::trash_get(&io_ctx, id, &spec);
+ if (r == -ENOENT) {
+ return r;
+ } else if (r < 0) {
+ lderr(cct) << "error retrieving trash entry: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ rbd_trash_image_source_t source = static_cast<rbd_trash_image_source_t>(
+ spec.source);
+ *info = trash_image_info_t{id, spec.name, source, spec.deletion_time.sec(),
+ spec.deferment_end_time.sec()};
+ return 0;
+}
+
+template <typename I>
+int Trash<I>::list(IoCtx &io_ctx, std::vector<trash_image_info_t> &entries,
+ bool exclude_user_remove_source) {
+ CephContext *cct((CephContext *)io_ctx.cct());
+ ldout(cct, 20) << __func__ << " " << &io_ctx << dendl;
+
+ std::map<std::string, cls::rbd::TrashImageSpec> trash_image_specs;
+ int r = list_trash_image_specs(io_ctx, &trash_image_specs,
+ exclude_user_remove_source);
+ if (r < 0) {
+ return r;
+ }
+
+ entries.reserve(trash_image_specs.size());
+ for (const auto& [image_id, spec] : trash_image_specs) {
+ rbd_trash_image_source_t source =
+ static_cast<rbd_trash_image_source_t>(spec.source);
+ entries.push_back({image_id, spec.name, source,
+ spec.deletion_time.sec(),
+ spec.deferment_end_time.sec()});
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Trash<I>::purge(IoCtx& io_ctx, time_t expire_ts,
+ float threshold, ProgressContext& pctx) {
+ auto *cct((CephContext *) io_ctx.cct());
+ ldout(cct, 20) << &io_ctx << dendl;
+
+ std::vector<librbd::trash_image_info_t> trash_entries;
+ int r = librbd::api::Trash<I>::list(io_ctx, trash_entries, true);
+ if (r < 0) {
+ return r;
+ }
+
+ trash_entries.erase(
+ std::remove_if(trash_entries.begin(), trash_entries.end(),
+ [](librbd::trash_image_info_t info) {
+ return info.source != RBD_TRASH_IMAGE_SOURCE_USER &&
+ info.source != RBD_TRASH_IMAGE_SOURCE_USER_PARENT;
+ }),
+ trash_entries.end());
+
+ std::set<std::string> to_be_removed;
+ if (threshold != -1) {
+ if (threshold < 0 || threshold > 1) {
+ lderr(cct) << "argument 'threshold' is out of valid range"
+ << dendl;
+ return -EINVAL;
+ }
+
+ librados::bufferlist inbl;
+ librados::bufferlist outbl;
+ std::string pool_name = io_ctx.get_pool_name();
+
+ librados::Rados rados(io_ctx);
+ rados.mon_command(R"({"prefix": "df", "format": "json"})", inbl,
+ &outbl, nullptr);
+
+ json_spirit::mValue json;
+ if (!json_spirit::read(outbl.to_str(), json)) {
+ lderr(cct) << "ceph df json output could not be parsed"
+ << dendl;
+ return -EBADMSG;
+ }
+
+ json_spirit::mArray arr = json.get_obj()["pools"].get_array();
+
+ double pool_percent_used = 0;
+ uint64_t pool_total_bytes = 0;
+
+ std::map<std::string, std::vector<std::string>> datapools;
+
+ std::sort(trash_entries.begin(), trash_entries.end(),
+ [](librbd::trash_image_info_t a, librbd::trash_image_info_t b) {
+ return a.deferment_end_time < b.deferment_end_time;
+ }
+ );
+
+ for (const auto &entry : trash_entries) {
+ int64_t data_pool_id = -1;
+ r = cls_client::get_data_pool(&io_ctx, util::header_name(entry.id),
+ &data_pool_id);
+ if (r < 0 && r != -ENOENT && r != -EOPNOTSUPP) {
+ lderr(cct) << "failed to query data pool: " << cpp_strerror(r) << dendl;
+ return r;
+ } else if (data_pool_id == -1) {
+ data_pool_id = io_ctx.get_id();
+ }
+
+ if (data_pool_id != io_ctx.get_id()) {
+ librados::IoCtx data_io_ctx;
+ r = util::create_ioctx(io_ctx, "image", data_pool_id,
+ {}, &data_io_ctx);
+ if (r < 0) {
+ lderr(cct) << "error accessing data pool" << dendl;
+ continue;
+ }
+ auto data_pool = data_io_ctx.get_pool_name();
+ datapools[data_pool].push_back(entry.id);
+ } else {
+ datapools[pool_name].push_back(entry.id);
+ }
+ }
+
+ uint64_t bytes_to_free = 0;
+
+ for (uint8_t i = 0; i < arr.size(); ++i) {
+ json_spirit::mObject obj = arr[i].get_obj();
+ std::string name = obj.find("name")->second.get_str();
+ auto img = datapools.find(name);
+ if (img != datapools.end()) {
+ json_spirit::mObject stats = arr[i].get_obj()["stats"].get_obj();
+ pool_percent_used = stats["percent_used"].get_real();
+ if (pool_percent_used <= threshold) continue;
+
+ bytes_to_free = 0;
+
+ pool_total_bytes = stats["max_avail"].get_uint64() +
+ stats["bytes_used"].get_uint64();
+
+ auto bytes_threshold = (uint64_t) (pool_total_bytes *
+ (pool_percent_used - threshold));
+
+ for (const auto &it : img->second) {
+ auto ictx = new I("", it, nullptr, io_ctx, false);
+ r = ictx->state->open(OPEN_FLAG_SKIP_OPEN_PARENT);
+ if (r == -ENOENT) {
+ continue;
+ } else if (r < 0) {
+ lderr(cct) << "failed to open image " << it << ": "
+ << cpp_strerror(r) << dendl;
+ }
+
+ r = librbd::api::DiffIterate<I>::diff_iterate(
+ ictx, cls::rbd::UserSnapshotNamespace(), nullptr, 0, ictx->size,
+ false, true,
+ [](uint64_t offset, size_t len, int exists, void *arg) {
+ auto *to_free = reinterpret_cast<uint64_t *>(arg);
+ if (exists)
+ (*to_free) += len;
+ return 0;
+ }, &bytes_to_free);
+
+ ictx->state->close();
+ if (r < 0) {
+ lderr(cct) << "failed to calculate disk usage for image " << it
+ << ": " << cpp_strerror(r) << dendl;
+ continue;
+ }
+
+ to_be_removed.insert(it);
+ if (bytes_to_free >= bytes_threshold) {
+ break;
+ }
+ }
+ }
+ }
+
+ if (bytes_to_free == 0) {
+ ldout(cct, 10) << "pool usage is lower than or equal to "
+ << (threshold * 100)
+ << "%" << dendl;
+ return 0;
+ }
+ }
+
+ if (expire_ts == 0) {
+ struct timespec now;
+ clock_gettime(CLOCK_REALTIME, &now);
+ expire_ts = now.tv_sec;
+ }
+
+ for (const auto &entry : trash_entries) {
+ if (expire_ts >= entry.deferment_end_time) {
+ to_be_removed.insert(entry.id);
+ }
+ }
+
+ NoOpProgressContext remove_pctx;
+ uint64_t list_size = to_be_removed.size(), i = 0;
+ int remove_err = 1;
+ while (!to_be_removed.empty() && remove_err == 1) {
+ remove_err = 0;
+ for (auto it = to_be_removed.begin(); it != to_be_removed.end(); ) {
+ trash_image_info_t trash_info;
+ r = Trash<I>::get(io_ctx, *it, &trash_info);
+ if (r == -ENOENT) {
+ // likely RBD_TRASH_IMAGE_SOURCE_USER_PARENT image removed as a side
+ // effect of a preceeding remove (last child detach)
+ pctx.update_progress(++i, list_size);
+ it = to_be_removed.erase(it);
+ continue;
+ } else if (r < 0) {
+ lderr(cct) << "error getting image id " << *it
+ << " info: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ r = Trash<I>::remove(io_ctx, *it, true, remove_pctx);
+ if (r == -ENOTEMPTY || r == -EBUSY || r == -EMLINK || r == -EUCLEAN) {
+ if (!remove_err) {
+ remove_err = r;
+ }
+ ++it;
+ continue;
+ } else if (r < 0) {
+ lderr(cct) << "error removing image id " << *it
+ << ": " << cpp_strerror(r) << dendl;
+ return r;
+ }
+ pctx.update_progress(++i, list_size);
+ it = to_be_removed.erase(it);
+ remove_err = 1;
+ }
+ ldout(cct, 20) << "remove_err=" << remove_err << dendl;
+ }
+
+ if (!to_be_removed.empty()) {
+ ceph_assert(remove_err < 0);
+ ldout(cct, 10) << "couldn't remove " << to_be_removed.size()
+ << " expired images" << dendl;
+ return remove_err;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Trash<I>::remove(IoCtx &io_ctx, const std::string &image_id, bool force,
+ ProgressContext& prog_ctx) {
+ CephContext *cct((CephContext *)io_ctx.cct());
+ ldout(cct, 20) << "trash_remove " << &io_ctx << " " << image_id
+ << " " << force << dendl;
+
+ cls::rbd::TrashImageSpec trash_spec;
+ int r = cls_client::trash_get(&io_ctx, image_id, &trash_spec);
+ if (r < 0) {
+ lderr(cct) << "error getting image id " << image_id
+ << " info from trash: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ utime_t now = ceph_clock_now();
+ if (now < trash_spec.deferment_end_time && !force) {
+ lderr(cct) << "error: deferment time has not expired." << dendl;
+ return -EPERM;
+ }
+ if (trash_spec.state == cls::rbd::TRASH_IMAGE_STATE_MOVING) {
+ lderr(cct) << "error: image is pending moving to the trash."
+ << dendl;
+ return -EUCLEAN;
+ } else if (trash_spec.state != cls::rbd::TRASH_IMAGE_STATE_NORMAL &&
+ trash_spec.state != cls::rbd::TRASH_IMAGE_STATE_REMOVING) {
+ lderr(cct) << "error: image is pending restoration." << dendl;
+ return -EBUSY;
+ }
+
+ AsioEngine asio_engine(io_ctx);
+
+ C_SaferCond cond;
+ auto req = librbd::trash::RemoveRequest<I>::create(
+ io_ctx, image_id, asio_engine.get_work_queue(), force, prog_ctx, &cond);
+ req->send();
+
+ r = cond.wait();
+ if (r < 0) {
+ return r;
+ }
+
+ C_SaferCond notify_ctx;
+ TrashWatcher<I>::notify_image_removed(io_ctx, image_id, &notify_ctx);
+ r = notify_ctx.wait();
+ if (r < 0) {
+ lderr(cct) << "failed to send update notification: " << cpp_strerror(r)
+ << dendl;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Trash<I>::restore(librados::IoCtx &io_ctx,
+ const TrashImageSources& trash_image_sources,
+ const std::string &image_id,
+ const std::string &image_new_name) {
+ CephContext *cct((CephContext *)io_ctx.cct());
+ ldout(cct, 20) << "trash_restore " << &io_ctx << " " << image_id << " "
+ << image_new_name << dendl;
+
+ cls::rbd::TrashImageSpec trash_spec;
+ int r = cls_client::trash_get(&io_ctx, image_id, &trash_spec);
+ if (r < 0) {
+ lderr(cct) << "error getting image id " << image_id
+ << " info from trash: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ if (trash_image_sources.count(trash_spec.source) == 0) {
+ lderr(cct) << "Current trash source '" << trash_spec.source << "' "
+ << "does not match expected: "
+ << trash_image_sources << dendl;
+ return -EINVAL;
+ }
+
+ std::string image_name = image_new_name;
+ if (trash_spec.state != cls::rbd::TRASH_IMAGE_STATE_NORMAL &&
+ trash_spec.state != cls::rbd::TRASH_IMAGE_STATE_RESTORING) {
+ lderr(cct) << "error restoring image id " << image_id
+ << ", which is pending deletion" << dendl;
+ return -EBUSY;
+ }
+ r = cls_client::trash_state_set(&io_ctx, image_id,
+ cls::rbd::TRASH_IMAGE_STATE_RESTORING,
+ cls::rbd::TRASH_IMAGE_STATE_NORMAL);
+ if (r < 0 && r != -EOPNOTSUPP) {
+ lderr(cct) << "error setting trash image state: "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ if (image_name.empty()) {
+ // if user didn't specify a new name, let's try using the old name
+ image_name = trash_spec.name;
+ ldout(cct, 20) << "restoring image id " << image_id << " with name "
+ << image_name << dendl;
+ }
+
+ // check if no image exists with the same name
+ bool create_id_obj = true;
+ std::string existing_id;
+ r = cls_client::get_id(&io_ctx, util::id_obj_name(image_name), &existing_id);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "error checking if image " << image_name << " exists: "
+ << cpp_strerror(r) << dendl;
+ int ret = cls_client::trash_state_set(&io_ctx, image_id,
+ cls::rbd::TRASH_IMAGE_STATE_NORMAL,
+ cls::rbd::TRASH_IMAGE_STATE_RESTORING);
+ if (ret < 0 && ret != -EOPNOTSUPP) {
+ lderr(cct) << "error setting trash image state: "
+ << cpp_strerror(ret) << dendl;
+ }
+ return r;
+ } else if (r != -ENOENT){
+ // checking if we are recovering from an incomplete restore
+ if (existing_id != image_id) {
+ ldout(cct, 2) << "an image with the same name already exists" << dendl;
+ int r2 = cls_client::trash_state_set(&io_ctx, image_id,
+ cls::rbd::TRASH_IMAGE_STATE_NORMAL,
+ cls::rbd::TRASH_IMAGE_STATE_RESTORING);
+ if (r2 < 0 && r2 != -EOPNOTSUPP) {
+ lderr(cct) << "error setting trash image state: "
+ << cpp_strerror(r2) << dendl;
+ }
+ return -EEXIST;
+ }
+ create_id_obj = false;
+ }
+
+ if (create_id_obj) {
+ ldout(cct, 2) << "adding id object" << dendl;
+ librados::ObjectWriteOperation op;
+ op.create(true);
+ cls_client::set_id(&op, image_id);
+ r = io_ctx.operate(util::id_obj_name(image_name), &op);
+ if (r < 0) {
+ lderr(cct) << "error adding id object for image " << image_name
+ << ": " << cpp_strerror(r) << dendl;
+ return r;
+ }
+ }
+
+ ldout(cct, 2) << "adding rbd image to v2 directory..." << dendl;
+ r = cls_client::dir_add_image(&io_ctx, RBD_DIRECTORY, image_name,
+ image_id);
+ if (r < 0 && r != -EEXIST) {
+ lderr(cct) << "error adding image to v2 directory: "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ r = enable_mirroring<I>(io_ctx, image_id);
+ if (r < 0) {
+ // not fatal -- ignore
+ }
+
+ ldout(cct, 2) << "removing image from trash..." << dendl;
+ r = cls_client::trash_remove(&io_ctx, image_id);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "error removing image id " << image_id << " from trash: "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ C_SaferCond notify_ctx;
+ TrashWatcher<I>::notify_image_removed(io_ctx, image_id, &notify_ctx);
+ r = notify_ctx.wait();
+ if (r < 0) {
+ lderr(cct) << "failed to send update notification: " << cpp_strerror(r)
+ << dendl;
+ }
+
+ return 0;
+}
+
+} // namespace api
+} // namespace librbd
+
+template class librbd::api::Trash<librbd::ImageCtx>;
diff --git a/src/librbd/api/Trash.h b/src/librbd/api/Trash.h
new file mode 100644
index 000000000..66f819dfa
--- /dev/null
+++ b/src/librbd/api/Trash.h
@@ -0,0 +1,53 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef LIBRBD_API_TRASH_H
+#define LIBRBD_API_TRASH_H
+
+#include "include/rados/librados_fwd.hpp"
+#include "include/rbd/librbd.hpp"
+#include "cls/rbd/cls_rbd_types.h"
+#include <set>
+#include <string>
+#include <vector>
+
+namespace librbd {
+
+class ProgressContext;
+
+struct ImageCtx;
+
+namespace api {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+struct Trash {
+ typedef std::set<cls::rbd::TrashImageSource> TrashImageSources;
+ static const TrashImageSources ALLOWED_RESTORE_SOURCES;
+
+ static int move(librados::IoCtx &io_ctx, rbd_trash_image_source_t source,
+ const std::string &image_name, uint64_t delay);
+ static int move(librados::IoCtx &io_ctx, rbd_trash_image_source_t source,
+ const std::string &image_name, const std::string &image_id,
+ uint64_t delay);
+ static int get(librados::IoCtx &io_ctx, const std::string &id,
+ trash_image_info_t *info);
+ static int list(librados::IoCtx &io_ctx,
+ std::vector<trash_image_info_t> &entries,
+ bool exclude_user_remove_source);
+ static int purge(IoCtx& io_ctx, time_t expire_ts,
+ float threshold, ProgressContext& pctx);
+ static int remove(librados::IoCtx &io_ctx, const std::string &image_id,
+ bool force, ProgressContext& prog_ctx);
+ static int restore(librados::IoCtx &io_ctx,
+ const TrashImageSources& trash_image_sources,
+ const std::string &image_id,
+ const std::string &image_new_name);
+
+};
+
+} // namespace api
+} // namespace librbd
+
+extern template class librbd::api::Trash<librbd::ImageCtx>;
+
+#endif // LIBRBD_API_TRASH_H
diff --git a/src/librbd/api/Utils.cc b/src/librbd/api/Utils.cc
new file mode 100644
index 000000000..056b6b435
--- /dev/null
+++ b/src/librbd/api/Utils.cc
@@ -0,0 +1,102 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/api/Utils.h"
+#include "common/dout.h"
+
+#if defined(HAVE_LIBCRYPTSETUP)
+#include "librbd/crypto/luks/LUKSEncryptionFormat.h"
+#endif
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::api::util: " << __func__ << ": "
+
+namespace librbd {
+namespace api {
+namespace util {
+
+template <typename I>
+int create_encryption_format(
+ CephContext* cct, encryption_format_t format,
+ encryption_options_t opts, size_t opts_size, bool c_api,
+ crypto::EncryptionFormat<I>** result_format) {
+ size_t expected_opts_size;
+ switch (format) {
+#if defined(HAVE_LIBCRYPTSETUP)
+ case RBD_ENCRYPTION_FORMAT_LUKS1: {
+ if (c_api) {
+ expected_opts_size = sizeof(rbd_encryption_luks1_format_options_t);
+ if (expected_opts_size == opts_size) {
+ auto c_opts = (rbd_encryption_luks1_format_options_t*)opts;
+ *result_format = new crypto::luks::LUKS1EncryptionFormat<I>(
+ c_opts->alg, {c_opts->passphrase, c_opts->passphrase_size});
+ }
+ } else {
+ expected_opts_size = sizeof(encryption_luks1_format_options_t);
+ if (expected_opts_size == opts_size) {
+ auto cpp_opts = (encryption_luks1_format_options_t*)opts;
+ *result_format = new crypto::luks::LUKS1EncryptionFormat<I>(
+ cpp_opts->alg, cpp_opts->passphrase);
+ }
+ }
+ break;
+ }
+ case RBD_ENCRYPTION_FORMAT_LUKS2: {
+ if (c_api) {
+ expected_opts_size = sizeof(rbd_encryption_luks2_format_options_t);
+ if (expected_opts_size == opts_size) {
+ auto c_opts = (rbd_encryption_luks2_format_options_t*)opts;
+ *result_format = new crypto::luks::LUKS2EncryptionFormat<I>(
+ c_opts->alg, {c_opts->passphrase, c_opts->passphrase_size});
+ }
+ } else {
+ expected_opts_size = sizeof(encryption_luks2_format_options_t);
+ if (expected_opts_size == opts_size) {
+ auto cpp_opts = (encryption_luks2_format_options_t*)opts;
+ *result_format = new crypto::luks::LUKS2EncryptionFormat<I>(
+ cpp_opts->alg, cpp_opts->passphrase);
+ }
+ }
+ break;
+ }
+ case RBD_ENCRYPTION_FORMAT_LUKS: {
+ if (c_api) {
+ expected_opts_size = sizeof(rbd_encryption_luks_format_options_t);
+ if (expected_opts_size == opts_size) {
+ auto c_opts = (rbd_encryption_luks_format_options_t*)opts;
+ *result_format = new crypto::luks::LUKSEncryptionFormat<I>(
+ {c_opts->passphrase, c_opts->passphrase_size});
+ }
+ } else {
+ expected_opts_size = sizeof(encryption_luks_format_options_t);
+ if (expected_opts_size == opts_size) {
+ auto cpp_opts = (encryption_luks_format_options_t*)opts;
+ *result_format = new crypto::luks::LUKSEncryptionFormat<I>(
+ cpp_opts->passphrase);
+ }
+ }
+ break;
+ }
+#endif
+ default:
+ lderr(cct) << "unsupported encryption format: " << format << dendl;
+ return -ENOTSUP;
+ }
+
+ if (expected_opts_size != opts_size) {
+ lderr(cct) << "expected opts_size: " << expected_opts_size << dendl;
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+} // namespace util
+} // namespace api
+} // namespace librbd
+
+template int librbd::api::util::create_encryption_format(
+ CephContext* cct, encryption_format_t format, encryption_options_t opts,
+ size_t opts_size, bool c_api,
+ crypto::EncryptionFormat<librbd::ImageCtx>** result_format);
diff --git a/src/librbd/api/Utils.h b/src/librbd/api/Utils.h
new file mode 100644
index 000000000..8f8c22290
--- /dev/null
+++ b/src/librbd/api/Utils.h
@@ -0,0 +1,28 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_API_UTILS_H
+#define CEPH_LIBRBD_API_UTILS_H
+
+#include "include/rbd/librbd.hpp"
+#include "librbd/ImageCtx.h"
+#include "librbd/crypto/EncryptionFormat.h"
+
+namespace librbd {
+
+struct ImageCtx;
+
+namespace api {
+namespace util {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+int create_encryption_format(
+ CephContext* cct, encryption_format_t format,
+ encryption_options_t opts, size_t opts_size, bool c_api,
+ crypto::EncryptionFormat<ImageCtxT>** result_format);
+
+} // namespace util
+} // namespace api
+} // namespace librbd
+
+#endif // CEPH_LIBRBD_API_UTILS_H