summaryrefslogtreecommitdiffstats
path: root/src/librbd/api
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-27 18:24:20 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-27 18:24:20 +0000
commit483eb2f56657e8e7f419ab1a4fab8dce9ade8609 (patch)
treee5d88d25d870d5dedacb6bbdbe2a966086a0a5cf /src/librbd/api
parentInitial commit. (diff)
downloadceph-483eb2f56657e8e7f419ab1a4fab8dce9ade8609.tar.xz
ceph-483eb2f56657e8e7f419ab1a4fab8dce9ade8609.zip
Adding upstream version 14.2.21.upstream/14.2.21upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r--src/librbd/api/Config.cc240
-rw-r--r--src/librbd/api/Config.h37
-rw-r--r--src/librbd/api/DiffIterate.cc554
-rw-r--r--src/librbd/api/DiffIterate.h66
-rw-r--r--src/librbd/api/Group.cc1207
-rw-r--r--src/librbd/api/Group.h59
-rw-r--r--src/librbd/api/Image.cc836
-rw-r--r--src/librbd/api/Image.h78
-rw-r--r--src/librbd/api/Migration.cc1885
-rw-r--r--src/librbd/api/Migration.h105
-rw-r--r--src/librbd/api/Mirror.cc1541
-rw-r--r--src/librbd/api/Mirror.h87
-rw-r--r--src/librbd/api/Namespace.cc227
-rw-r--r--src/librbd/api/Namespace.h33
-rw-r--r--src/librbd/api/Pool.cc377
-rw-r--r--src/librbd/api/Pool.h38
-rw-r--r--src/librbd/api/PoolMetadata.cc151
-rw-r--r--src/librbd/api/PoolMetadata.h36
-rw-r--r--src/librbd/api/Snapshot.cc196
-rw-r--r--src/librbd/api/Snapshot.h37
-rw-r--r--src/librbd/api/Trash.cc681
-rw-r--r--src/librbd/api/Trash.h53
22 files changed, 8524 insertions, 0 deletions
diff --git a/src/librbd/api/Config.cc b/src/librbd/api/Config.cc
new file mode 100644
index 00000000..08be9d2c
--- /dev/null
+++ b/src/librbd/api/Config.cc
@@ -0,0 +1,240 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/api/Config.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/Utils.h"
+#include "librbd/api/PoolMetadata.h"
+#include <boost/algorithm/string/predicate.hpp>
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::Config: " << __func__ << ": "
+
+namespace librbd {
+namespace api {
+
+namespace {
+
+const uint32_t MAX_KEYS = 64;
+
+typedef std::map<std::string, std::pair<std::string, config_source_t>> Parent;
+
+static std::set<std::string> EXCLUDE_OPTIONS {
+ "rbd_auto_exclusive_lock_until_manual_request",
+ "rbd_default_format",
+ "rbd_default_map_options",
+ "rbd_default_pool",
+ "rbd_discard_on_zeroed_write_same",
+ "rbd_op_thread_timeout",
+ "rbd_op_threads",
+ "rbd_tracing",
+ "rbd_validate_names",
+ "rbd_validate_pool",
+ "rbd_mirror_pool_replayers_refresh_interval",
+ "rbd_config_pool_override_update_timestamp"
+ };
+static std::set<std::string> EXCLUDE_IMAGE_OPTIONS {
+ "rbd_default_clone_format",
+ "rbd_default_data_pool",
+ "rbd_default_features",
+ "rbd_default_format",
+ "rbd_default_order",
+ "rbd_default_stripe_count",
+ "rbd_default_stripe_unit",
+ "rbd_journal_order",
+ "rbd_journal_pool",
+ "rbd_journal_splay_width"
+ };
+
+struct Options : Parent {
+ librados::IoCtx m_io_ctx;
+
+ Options(librados::IoCtx& io_ctx, bool image_apply_only_options) {
+ m_io_ctx.dup(io_ctx);
+ m_io_ctx.set_namespace("");
+
+ CephContext *cct = reinterpret_cast<CephContext *>(m_io_ctx.cct());
+
+ const std::string rbd_key_prefix("rbd_");
+ const std::string rbd_mirror_key_prefix("rbd_mirror_");
+ auto& schema = cct->_conf.get_schema();
+ for (auto& pair : schema) {
+ if (!boost::starts_with(pair.first, rbd_key_prefix)) {
+ continue;
+ } else if (EXCLUDE_OPTIONS.count(pair.first) != 0) {
+ continue;
+ } else if (image_apply_only_options &&
+ EXCLUDE_IMAGE_OPTIONS.count(pair.first) != 0) {
+ continue;
+ } else if (image_apply_only_options &&
+ boost::starts_with(pair.first, rbd_mirror_key_prefix)) {
+ continue;
+ }
+
+ insert({pair.first, {}});
+ }
+ }
+
+ int init() {
+ CephContext *cct = (CephContext *)m_io_ctx.cct();
+
+ for (auto &it : *this) {
+ int r = cct->_conf.get_val(it.first.c_str(), &it.second.first);
+ ceph_assert(r == 0);
+ it.second.second = RBD_CONFIG_SOURCE_CONFIG;
+ }
+
+ std::string last_key = ImageCtx::METADATA_CONF_PREFIX;
+ bool more_results = true;
+
+ while (more_results) {
+ std::map<std::string, bufferlist> pairs;
+
+ int r = librbd::api::PoolMetadata<>::list(m_io_ctx, last_key, MAX_KEYS,
+ &pairs);
+ if (r < 0) {
+ return r;
+ }
+
+ if (pairs.empty()) {
+ break;
+ }
+
+ more_results = (pairs.size() == MAX_KEYS);
+ last_key = pairs.rbegin()->first;
+
+ for (auto kv : pairs) {
+ std::string key;
+ if (!util::is_metadata_config_override(kv.first, &key)) {
+ more_results = false;
+ break;
+ }
+ auto it = find(key);
+ if (it != end()) {
+ it->second = {{kv.second.c_str(), kv.second.length()},
+ RBD_CONFIG_SOURCE_POOL};
+ }
+ }
+ }
+ return 0;
+ }
+};
+
+} // anonymous namespace
+
+template <typename I>
+bool Config<I>::is_option_name(librados::IoCtx& io_ctx,
+ const std::string &name) {
+ Options opts(io_ctx, false);
+
+ return (opts.find(name) != opts.end());
+}
+
+template <typename I>
+int Config<I>::list(librados::IoCtx& io_ctx,
+ std::vector<config_option_t> *options) {
+ Options opts(io_ctx, false);
+
+ int r = opts.init();
+ if (r < 0) {
+ return r;
+ }
+
+ for (auto &it : opts) {
+ options->push_back({it.first, it.second.first, it.second.second});
+ }
+
+ return 0;
+}
+
+template <typename I>
+bool Config<I>::is_option_name(I *image_ctx, const std::string &name) {
+ Options opts(image_ctx->md_ctx, true);
+
+ return (opts.find(name) != opts.end());
+}
+
+template <typename I>
+int Config<I>::list(I *image_ctx, std::vector<config_option_t> *options) {
+ CephContext *cct = image_ctx->cct;
+ Options opts(image_ctx->md_ctx, true);
+
+ int r = opts.init();
+ if (r < 0) {
+ return r;
+ }
+
+ std::string last_key = ImageCtx::METADATA_CONF_PREFIX;
+ bool more_results = true;
+
+ while (more_results) {
+ std::map<std::string, bufferlist> pairs;
+
+ r = cls_client::metadata_list(&image_ctx->md_ctx, image_ctx->header_oid,
+ last_key, MAX_KEYS, &pairs);
+ if (r < 0) {
+ lderr(cct) << "failed reading image metadata: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ if (pairs.empty()) {
+ break;
+ }
+
+ more_results = (pairs.size() == MAX_KEYS);
+ last_key = pairs.rbegin()->first;
+
+ for (auto kv : pairs) {
+ std::string key;
+ if (!util::is_metadata_config_override(kv.first, &key)) {
+ more_results = false;
+ break;
+ }
+ auto it = opts.find(key);
+ if (it != opts.end()) {
+ it->second = {{kv.second.c_str(), kv.second.length()},
+ RBD_CONFIG_SOURCE_IMAGE};
+ }
+ }
+ }
+
+ for (auto &it : opts) {
+ options->push_back({it.first, it.second.first, it.second.second});
+ }
+
+ return 0;
+}
+
+template <typename I>
+void Config<I>::apply_pool_overrides(librados::IoCtx& io_ctx,
+ ConfigProxy* config) {
+ CephContext *cct = reinterpret_cast<CephContext *>(io_ctx.cct());
+
+ Options opts(io_ctx, false);
+ int r = opts.init();
+ if (r < 0) {
+ lderr(cct) << "failed to read pool config overrides: " << cpp_strerror(r)
+ << dendl;
+ return;
+ }
+
+ for (auto& pair : opts) {
+ if (pair.second.second == RBD_CONFIG_SOURCE_POOL) {
+ r = config->set_val(pair.first, pair.second.first);
+ if (r < 0) {
+ lderr(cct) << "failed to override pool config " << pair.first << "="
+ << pair.second.first << ": " << cpp_strerror(r) << dendl;
+ }
+ }
+ }
+}
+
+} // namespace api
+} // namespace librbd
+
+template class librbd::api::Config<librbd::ImageCtx>;
diff --git a/src/librbd/api/Config.h b/src/librbd/api/Config.h
new file mode 100644
index 00000000..daa718c9
--- /dev/null
+++ b/src/librbd/api/Config.h
@@ -0,0 +1,37 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_API_CONFIG_H
+#define CEPH_LIBRBD_API_CONFIG_H
+
+#include "include/rbd/librbd.hpp"
+#include "include/rados/librados_fwd.hpp"
+
+struct ConfigProxy;
+
+namespace librbd {
+
+class ImageCtx;
+
+namespace api {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class Config {
+public:
+ static bool is_option_name(librados::IoCtx& io_ctx, const std::string &name);
+ static int list(librados::IoCtx& io_ctx,
+ std::vector<config_option_t> *options);
+
+ static bool is_option_name(ImageCtxT *image_ctx, const std::string &name);
+ static int list(ImageCtxT *image_ctx, std::vector<config_option_t> *options);
+
+ static void apply_pool_overrides(librados::IoCtx& io_ctx,
+ ConfigProxy* config);
+};
+
+} // namespace api
+} // namespace librbd
+
+extern template class librbd::api::Config<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_API_CONFIG_H
diff --git a/src/librbd/api/DiffIterate.cc b/src/librbd/api/DiffIterate.cc
new file mode 100644
index 00000000..f96264e3
--- /dev/null
+++ b/src/librbd/api/DiffIterate.cc
@@ -0,0 +1,554 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/api/DiffIterate.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/ObjectMap.h"
+#include "librbd/Utils.h"
+#include "librbd/internal.h"
+#include "librbd/io/AioCompletion.h"
+#include "librbd/io/ImageDispatchSpec.h"
+#include "librbd/io/ImageRequestWQ.h"
+#include "include/rados/librados.hpp"
+#include "include/interval_set.h"
+#include "common/errno.h"
+#include "common/Throttle.h"
+#include "osdc/Striper.h"
+#include "librados/snap_set_diff.h"
+#include <boost/tuple/tuple.hpp>
+#include <list>
+#include <map>
+#include <vector>
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::DiffIterate: "
+
+namespace librbd {
+namespace api {
+
+namespace {
+
+enum ObjectDiffState {
+ OBJECT_DIFF_STATE_NONE = 0,
+ OBJECT_DIFF_STATE_UPDATED = 1,
+ OBJECT_DIFF_STATE_HOLE = 2
+};
+
+struct DiffContext {
+ DiffIterate<>::Callback callback;
+ void *callback_arg;
+ bool whole_object;
+ uint64_t from_snap_id;
+ uint64_t end_snap_id;
+ interval_set<uint64_t> parent_diff;
+ OrderedThrottle throttle;
+
+ template <typename I>
+ DiffContext(I &image_ctx, DiffIterate<>::Callback callback,
+ void *callback_arg, bool _whole_object, uint64_t _from_snap_id,
+ uint64_t _end_snap_id)
+ : callback(callback), callback_arg(callback_arg),
+ whole_object(_whole_object), from_snap_id(_from_snap_id),
+ end_snap_id(_end_snap_id),
+ throttle(image_ctx.config.template get_val<uint64_t>("rbd_concurrent_management_ops"), true) {
+ }
+};
+
+class C_DiffObject : public Context {
+public:
+ template <typename I>
+ C_DiffObject(I &image_ctx, librados::IoCtx &head_ctx,
+ DiffContext &diff_context, const std::string &oid,
+ uint64_t offset, const std::vector<ObjectExtent> &object_extents)
+ : m_cct(image_ctx.cct), m_head_ctx(head_ctx),
+ m_diff_context(diff_context), m_oid(oid), m_offset(offset),
+ m_object_extents(object_extents), m_snap_ret(0) {
+ }
+
+ void send() {
+ C_OrderedThrottle *ctx = m_diff_context.throttle.start_op(this);
+ librados::AioCompletion *rados_completion =
+ util::create_rados_callback(ctx);
+
+ librados::ObjectReadOperation op;
+ op.list_snaps(&m_snap_set, &m_snap_ret);
+
+ int r = m_head_ctx.aio_operate(m_oid, rados_completion, &op, NULL);
+ ceph_assert(r == 0);
+ rados_completion->release();
+ }
+
+protected:
+ typedef boost::tuple<uint64_t, size_t, bool> Diff;
+ typedef std::list<Diff> Diffs;
+
+ void finish(int r) override {
+ CephContext *cct = m_cct;
+ if (r == 0 && m_snap_ret < 0) {
+ r = m_snap_ret;
+ }
+
+ Diffs diffs;
+ if (r == 0) {
+ ldout(cct, 20) << "object " << m_oid << ": list_snaps complete" << dendl;
+ compute_diffs(&diffs);
+ } else if (r == -ENOENT) {
+ ldout(cct, 20) << "object " << m_oid << ": list_snaps (not found)"
+ << dendl;
+ r = 0;
+ compute_parent_overlap(&diffs);
+ } else {
+ ldout(cct, 20) << "object " << m_oid << ": list_snaps failed: "
+ << cpp_strerror(r) << dendl;
+ }
+
+ if (r == 0) {
+ for (Diffs::const_iterator d = diffs.begin(); d != diffs.end(); ++d) {
+ r = m_diff_context.callback(d->get<0>(), d->get<1>(), d->get<2>(),
+ m_diff_context.callback_arg);
+ if (r < 0) {
+ break;
+ }
+ }
+ }
+ m_diff_context.throttle.end_op(r);
+ }
+
+private:
+ CephContext *m_cct;
+ librados::IoCtx &m_head_ctx;
+ DiffContext &m_diff_context;
+ std::string m_oid;
+ uint64_t m_offset;
+ std::vector<ObjectExtent> m_object_extents;
+
+ librados::snap_set_t m_snap_set;
+ int m_snap_ret;
+
+ void compute_diffs(Diffs *diffs) {
+ CephContext *cct = m_cct;
+
+ // calc diff from from_snap_id -> to_snap_id
+ interval_set<uint64_t> diff;
+ uint64_t end_size;
+ bool end_exists;
+ librados::snap_t clone_end_snap_id;
+ bool whole_object;
+ calc_snap_set_diff(cct, m_snap_set, m_diff_context.from_snap_id,
+ m_diff_context.end_snap_id, &diff, &end_size,
+ &end_exists, &clone_end_snap_id, &whole_object);
+ if (whole_object) {
+ ldout(cct, 1) << "object " << m_oid << ": need to provide full object"
+ << dendl;
+ }
+ ldout(cct, 20) << " diff " << diff << " end_exists=" << end_exists
+ << dendl;
+ if (diff.empty() && !whole_object) {
+ if (m_diff_context.from_snap_id == 0 && !end_exists) {
+ compute_parent_overlap(diffs);
+ }
+ return;
+ } else if (m_diff_context.whole_object || whole_object) {
+ // provide the full object extents to the callback
+ for (vector<ObjectExtent>::iterator q = m_object_extents.begin();
+ q != m_object_extents.end(); ++q) {
+ diffs->push_back(boost::make_tuple(m_offset + q->offset, q->length,
+ end_exists));
+ }
+ return;
+ }
+
+ for (vector<ObjectExtent>::iterator q = m_object_extents.begin();
+ q != m_object_extents.end(); ++q) {
+ ldout(cct, 20) << "diff_iterate object " << m_oid << " extent "
+ << q->offset << "~" << q->length << " from "
+ << q->buffer_extents << dendl;
+ uint64_t opos = q->offset;
+ for (vector<pair<uint64_t,uint64_t> >::iterator r =
+ q->buffer_extents.begin();
+ r != q->buffer_extents.end(); ++r) {
+ interval_set<uint64_t> overlap; // object extents
+ overlap.insert(opos, r->second);
+ overlap.intersection_of(diff);
+ ldout(cct, 20) << " opos " << opos
+ << " buf " << r->first << "~" << r->second
+ << " overlap " << overlap << dendl;
+ for (interval_set<uint64_t>::iterator s = overlap.begin();
+ s != overlap.end(); ++s) {
+ uint64_t su_off = s.get_start() - opos;
+ uint64_t logical_off = m_offset + r->first + su_off;
+ ldout(cct, 20) << " overlap extent " << s.get_start() << "~"
+ << s.get_len() << " logical " << logical_off << "~"
+ << s.get_len() << dendl;
+ diffs->push_back(boost::make_tuple(logical_off, s.get_len(),
+ end_exists));
+ }
+ opos += r->second;
+ }
+ ceph_assert(opos == q->offset + q->length);
+ }
+ }
+
+ void compute_parent_overlap(Diffs *diffs) {
+ if (m_diff_context.from_snap_id == 0 &&
+ !m_diff_context.parent_diff.empty()) {
+ // report parent diff instead
+ for (vector<ObjectExtent>::iterator q = m_object_extents.begin();
+ q != m_object_extents.end(); ++q) {
+ for (vector<pair<uint64_t,uint64_t> >::iterator r =
+ q->buffer_extents.begin();
+ r != q->buffer_extents.end(); ++r) {
+ interval_set<uint64_t> o;
+ o.insert(m_offset + r->first, r->second);
+ o.intersection_of(m_diff_context.parent_diff);
+ ldout(m_cct, 20) << " reporting parent overlap " << o << dendl;
+ for (interval_set<uint64_t>::iterator s = o.begin(); s != o.end();
+ ++s) {
+ diffs->push_back(boost::make_tuple(s.get_start(), s.get_len(),
+ true));
+ }
+ }
+ }
+ }
+ }
+};
+
+int simple_diff_cb(uint64_t off, size_t len, int exists, void *arg) {
+ // it's possible for a discard to create a hole in the parent image -- ignore
+ if (exists) {
+ interval_set<uint64_t> *diff = static_cast<interval_set<uint64_t> *>(arg);
+ diff->insert(off, len);
+ }
+ return 0;
+}
+
+} // anonymous namespace
+
+template <typename I>
+int DiffIterate<I>::diff_iterate(I *ictx,
+ const cls::rbd::SnapshotNamespace& from_snap_namespace,
+ const char *fromsnapname,
+ uint64_t off, uint64_t len,
+ bool include_parent, bool whole_object,
+ int (*cb)(uint64_t, size_t, int, void *),
+ void *arg)
+{
+ ldout(ictx->cct, 20) << "diff_iterate " << ictx << " off = " << off
+ << " len = " << len << dendl;
+
+ if (!ictx->data_ctx.is_valid()) {
+ return -ENODEV;
+ }
+
+ // ensure previous writes are visible to listsnaps
+ C_SaferCond flush_ctx;
+ {
+ RWLock::RLocker owner_locker(ictx->owner_lock);
+ auto aio_comp = io::AioCompletion::create_and_start(&flush_ctx, ictx,
+ io::AIO_TYPE_FLUSH);
+ auto req = io::ImageDispatchSpec<I>::create_flush_request(
+ *ictx, aio_comp, io::FLUSH_SOURCE_INTERNAL, {});
+ req->send();
+ delete req;
+ }
+ int r = flush_ctx.wait();
+ if (r < 0) {
+ return r;
+ }
+
+ r = ictx->state->refresh_if_required();
+ if (r < 0) {
+ return r;
+ }
+
+ ictx->snap_lock.get_read();
+ r = clip_io(ictx, off, &len);
+ ictx->snap_lock.put_read();
+ if (r < 0) {
+ return r;
+ }
+
+ DiffIterate command(*ictx, from_snap_namespace, fromsnapname, off, len,
+ include_parent, whole_object, cb, arg);
+ r = command.execute();
+ return r;
+}
+
+template <typename I>
+int DiffIterate<I>::execute() {
+ CephContext* cct = m_image_ctx.cct;
+
+ ceph_assert(m_image_ctx.data_ctx.is_valid());
+
+ librados::IoCtx head_ctx;
+ librados::snap_t from_snap_id = 0;
+ librados::snap_t end_snap_id;
+ uint64_t from_size = 0;
+ uint64_t end_size;
+ {
+ RWLock::RLocker md_locker(m_image_ctx.md_lock);
+ RWLock::RLocker snap_locker(m_image_ctx.snap_lock);
+ head_ctx.dup(m_image_ctx.data_ctx);
+ if (m_from_snap_name) {
+ from_snap_id = m_image_ctx.get_snap_id(m_from_snap_namespace, m_from_snap_name);
+ from_size = m_image_ctx.get_image_size(from_snap_id);
+ }
+ end_snap_id = m_image_ctx.snap_id;
+ end_size = m_image_ctx.get_image_size(end_snap_id);
+ }
+
+ if (from_snap_id == CEPH_NOSNAP) {
+ return -ENOENT;
+ }
+ if (from_snap_id == end_snap_id) {
+ // no diff.
+ return 0;
+ }
+ if (from_snap_id >= end_snap_id) {
+ return -EINVAL;
+ }
+
+ int r;
+ bool fast_diff_enabled = false;
+ BitVector<2> object_diff_state;
+ {
+ RWLock::RLocker snap_locker(m_image_ctx.snap_lock);
+ if (m_whole_object && (m_image_ctx.features & RBD_FEATURE_FAST_DIFF) != 0) {
+ r = diff_object_map(from_snap_id, end_snap_id, &object_diff_state);
+ if (r < 0) {
+ ldout(cct, 5) << "fast diff disabled" << dendl;
+ } else {
+ ldout(cct, 5) << "fast diff enabled" << dendl;
+ fast_diff_enabled = true;
+ }
+ }
+ }
+
+ // we must list snaps via the head, not end snap
+ head_ctx.snap_set_read(CEPH_SNAPDIR);
+
+ ldout(cct, 5) << "diff_iterate from " << from_snap_id << " to "
+ << end_snap_id << " size from " << from_size
+ << " to " << end_size << dendl;
+
+ // check parent overlap only if we are comparing to the beginning of time
+ DiffContext diff_context(m_image_ctx, m_callback, m_callback_arg,
+ m_whole_object, from_snap_id, end_snap_id);
+ if (m_include_parent && from_snap_id == 0) {
+ RWLock::RLocker l(m_image_ctx.snap_lock);
+ RWLock::RLocker l2(m_image_ctx.parent_lock);
+ uint64_t overlap = 0;
+ m_image_ctx.get_parent_overlap(m_image_ctx.snap_id, &overlap);
+ r = 0;
+ if (m_image_ctx.parent && overlap > 0) {
+ ldout(cct, 10) << " first getting parent diff" << dendl;
+ DiffIterate diff_parent(*m_image_ctx.parent, {},
+ nullptr, 0, overlap,
+ m_include_parent, m_whole_object,
+ &simple_diff_cb,
+ &diff_context.parent_diff);
+ r = diff_parent.execute();
+ }
+ if (r < 0) {
+ return r;
+ }
+ }
+
+ uint64_t period = m_image_ctx.get_stripe_period();
+ uint64_t off = m_offset;
+ uint64_t left = m_length;
+
+ while (left > 0) {
+ uint64_t period_off = off - (off % period);
+ uint64_t read_len = min(period_off + period - off, left);
+
+ // map to extents
+ map<object_t,vector<ObjectExtent> > object_extents;
+ Striper::file_to_extents(cct, m_image_ctx.format_string,
+ &m_image_ctx.layout, off, read_len, 0,
+ object_extents, 0);
+
+ // get snap info for each object
+ for (map<object_t,vector<ObjectExtent> >::iterator p =
+ object_extents.begin();
+ p != object_extents.end(); ++p) {
+ ldout(cct, 20) << "object " << p->first << dendl;
+
+ if (fast_diff_enabled) {
+ const uint64_t object_no = p->second.front().objectno;
+ if (object_diff_state[object_no] == OBJECT_DIFF_STATE_NONE &&
+ from_snap_id == 0 && !diff_context.parent_diff.empty()) {
+ // no data in child object -- report parent diff instead
+ for (auto& oe : p->second) {
+ for (auto& be : oe.buffer_extents) {
+ interval_set<uint64_t> o;
+ o.insert(off + be.first, be.second);
+ o.intersection_of(diff_context.parent_diff);
+ ldout(cct, 20) << " reporting parent overlap " << o << dendl;
+ for (auto e = o.begin(); e != o.end(); ++e) {
+ r = m_callback(e.get_start(), e.get_len(), true,
+ m_callback_arg);
+ if (r < 0) {
+ return r;
+ }
+ }
+ }
+ }
+ } else if (object_diff_state[object_no] != OBJECT_DIFF_STATE_NONE) {
+ bool updated = (object_diff_state[object_no] ==
+ OBJECT_DIFF_STATE_UPDATED);
+ for (std::vector<ObjectExtent>::iterator q = p->second.begin();
+ q != p->second.end(); ++q) {
+ r = m_callback(off + q->offset, q->length, updated, m_callback_arg);
+ if (r < 0) {
+ return r;
+ }
+ }
+ }
+ } else {
+ C_DiffObject *diff_object = new C_DiffObject(m_image_ctx, head_ctx,
+ diff_context,
+ p->first.name, off,
+ p->second);
+ diff_object->send();
+
+ if (diff_context.throttle.pending_error()) {
+ r = diff_context.throttle.wait_for_ret();
+ return r;
+ }
+ }
+ }
+
+ left -= read_len;
+ off += read_len;
+ }
+
+ r = diff_context.throttle.wait_for_ret();
+ if (r < 0) {
+ return r;
+ }
+ return 0;
+}
+
+template <typename I>
+int DiffIterate<I>::diff_object_map(uint64_t from_snap_id, uint64_t to_snap_id,
+ BitVector<2>* object_diff_state) {
+ ceph_assert(m_image_ctx.snap_lock.is_locked());
+ CephContext* cct = m_image_ctx.cct;
+
+ bool diff_from_start = (from_snap_id == 0);
+ if (from_snap_id == 0) {
+ if (!m_image_ctx.snaps.empty()) {
+ from_snap_id = m_image_ctx.snaps.back();
+ } else {
+ from_snap_id = CEPH_NOSNAP;
+ }
+ }
+
+ object_diff_state->clear();
+ uint64_t current_snap_id = from_snap_id;
+ uint64_t next_snap_id = to_snap_id;
+ BitVector<2> prev_object_map;
+ bool prev_object_map_valid = false;
+ while (true) {
+ uint64_t current_size = m_image_ctx.size;
+ if (current_snap_id != CEPH_NOSNAP) {
+ std::map<librados::snap_t, SnapInfo>::const_iterator snap_it =
+ m_image_ctx.snap_info.find(current_snap_id);
+ ceph_assert(snap_it != m_image_ctx.snap_info.end());
+ current_size = snap_it->second.size;
+
+ ++snap_it;
+ if (snap_it != m_image_ctx.snap_info.end()) {
+ next_snap_id = snap_it->first;
+ } else {
+ next_snap_id = CEPH_NOSNAP;
+ }
+ }
+
+ uint64_t flags;
+ int r = m_image_ctx.get_flags(from_snap_id, &flags);
+ if (r < 0) {
+ lderr(cct) << "diff_object_map: failed to retrieve image flags" << dendl;
+ return r;
+ }
+ if ((flags & RBD_FLAG_FAST_DIFF_INVALID) != 0) {
+ ldout(cct, 1) << "diff_object_map: cannot perform fast diff on invalid "
+ << "object map" << dendl;
+ return -EINVAL;
+ }
+
+ BitVector<2> object_map;
+ std::string oid(ObjectMap<>::object_map_name(m_image_ctx.id,
+ current_snap_id));
+ r = cls_client::object_map_load(&m_image_ctx.md_ctx, oid, &object_map);
+ if (r < 0) {
+ lderr(cct) << "diff_object_map: failed to load object map " << oid
+ << dendl;
+ return r;
+ }
+ ldout(cct, 20) << "diff_object_map: loaded object map " << oid << dendl;
+
+ uint64_t num_objs = Striper::get_num_objects(m_image_ctx.layout,
+ current_size);
+ if (object_map.size() < num_objs) {
+ ldout(cct, 1) << "diff_object_map: object map too small: "
+ << object_map.size() << " < " << num_objs << dendl;
+ return -EINVAL;
+ }
+ object_map.resize(num_objs);
+ object_diff_state->resize(object_map.size());
+
+ uint64_t overlap = std::min(object_map.size(), prev_object_map.size());
+ auto it = object_map.begin();
+ auto overlap_end_it = it + overlap;
+ auto pre_it = prev_object_map.begin();
+ auto diff_it = object_diff_state->begin();
+ uint64_t i = 0;
+ for (; it != overlap_end_it; ++it, ++pre_it, ++diff_it, ++i) {
+ ldout(cct, 20) << __func__ << ": object state: " << i << " "
+ << static_cast<uint32_t>(*pre_it)
+ << "->" << static_cast<uint32_t>(*it) << dendl;
+ if (*it == OBJECT_NONEXISTENT) {
+ if (*pre_it != OBJECT_NONEXISTENT) {
+ *diff_it = OBJECT_DIFF_STATE_HOLE;
+ }
+ } else if (*it == OBJECT_EXISTS ||
+ (*pre_it != *it &&
+ !(*pre_it == OBJECT_EXISTS &&
+ *it == OBJECT_EXISTS_CLEAN))) {
+ *diff_it = OBJECT_DIFF_STATE_UPDATED;
+ }
+ }
+ ldout(cct, 20) << "diff_object_map: computed overlap diffs" << dendl;
+ auto end_it = object_map.end();
+ if (object_map.size() > prev_object_map.size() &&
+ (diff_from_start || prev_object_map_valid)) {
+ for (; it != end_it; ++it,++diff_it, ++i) {
+ ldout(cct, 20) << __func__ << ": object state: " << i << " "
+ << "->" << static_cast<uint32_t>(*it) << dendl;
+ if (*it == OBJECT_NONEXISTENT) {
+ *diff_it = OBJECT_DIFF_STATE_NONE;
+ } else {
+ *diff_it = OBJECT_DIFF_STATE_UPDATED;
+ }
+ }
+ }
+ ldout(cct, 20) << "diff_object_map: computed resize diffs" << dendl;
+
+ if (current_snap_id == next_snap_id || next_snap_id > to_snap_id) {
+ break;
+ }
+ current_snap_id = next_snap_id;
+ prev_object_map = object_map;
+ prev_object_map_valid = true;
+ }
+ return 0;
+}
+
+} // namespace api
+} // namespace librbd
+
+template class librbd::api::DiffIterate<librbd::ImageCtx>;
diff --git a/src/librbd/api/DiffIterate.h b/src/librbd/api/DiffIterate.h
new file mode 100644
index 00000000..e6074d9c
--- /dev/null
+++ b/src/librbd/api/DiffIterate.h
@@ -0,0 +1,66 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_API_DIFF_ITERATE_H
+#define CEPH_LIBRBD_API_DIFF_ITERATE_H
+
+#include "include/int_types.h"
+#include "common/bit_vector.hpp"
+#include "cls/rbd/cls_rbd_types.h"
+
+namespace librbd {
+
+class ImageCtx;
+
+namespace api {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class DiffIterate {
+public:
+ typedef int (*Callback)(uint64_t, size_t, int, void *);
+
+ static int diff_iterate(ImageCtxT *ictx,
+ const cls::rbd::SnapshotNamespace& from_snap_namespace,
+ const char *fromsnapname,
+ uint64_t off, uint64_t len, bool include_parent,
+ bool whole_object,
+ int (*cb)(uint64_t, size_t, int, void *),
+ void *arg);
+
+private:
+ ImageCtxT &m_image_ctx;
+ cls::rbd::SnapshotNamespace m_from_snap_namespace;
+ const char* m_from_snap_name;
+ uint64_t m_offset;
+ uint64_t m_length;
+ bool m_include_parent;
+ bool m_whole_object;
+ Callback m_callback;
+ void *m_callback_arg;
+
+ DiffIterate(ImageCtxT &image_ctx,
+ const cls::rbd::SnapshotNamespace& from_snap_namespace,
+ const char *from_snap_name, uint64_t off, uint64_t len,
+ bool include_parent, bool whole_object, Callback callback,
+ void *callback_arg)
+ : m_image_ctx(image_ctx), m_from_snap_namespace(from_snap_namespace),
+ m_from_snap_name(from_snap_name), m_offset(off),
+ m_length(len), m_include_parent(include_parent),
+ m_whole_object(whole_object), m_callback(callback),
+ m_callback_arg(callback_arg)
+ {
+ }
+
+ int execute();
+
+ int diff_object_map(uint64_t from_snap_id, uint64_t to_snap_id,
+ BitVector<2>* object_diff_state);
+
+};
+
+} // namespace api
+} // namespace librbd
+
+extern template class librbd::api::DiffIterate<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_API_DIFF_ITERATE_H
diff --git a/src/librbd/api/Group.cc b/src/librbd/api/Group.cc
new file mode 100644
index 00000000..8b75bd94
--- /dev/null
+++ b/src/librbd/api/Group.cc
@@ -0,0 +1,1207 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "common/errno.h"
+
+#include "librbd/ExclusiveLock.h"
+#include "librbd/api/Group.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/ImageWatcher.h"
+#include "librbd/Operations.h"
+#include "librbd/Utils.h"
+#include "librbd/io/AioCompletion.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::api::Group: " << __func__ << ": "
+
+using std::map;
+using std::pair;
+using std::set;
+using std::string;
+using std::vector;
+// list binds to list() here, so std::list is explicitly used below
+
+using ceph::bufferlist;
+using librados::snap_t;
+using librados::IoCtx;
+using librados::Rados;
+
+
+namespace librbd {
+namespace api {
+
+namespace {
+
+template <typename I>
+snap_t get_group_snap_id(I* ictx,
+ const cls::rbd::SnapshotNamespace& in_snap_namespace) {
+ ceph_assert(ictx->snap_lock.is_locked());
+ auto it = ictx->snap_ids.lower_bound({in_snap_namespace, ""});
+ if (it != ictx->snap_ids.end() && it->first.first == in_snap_namespace) {
+ return it->second;
+ }
+ return CEPH_NOSNAP;
+}
+
+string generate_uuid(librados::IoCtx& io_ctx)
+{
+ Rados rados(io_ctx);
+ uint64_t bid = rados.get_instance_id();
+
+ uint32_t extra = rand() % 0xFFFFFFFF;
+ ostringstream bid_ss;
+ bid_ss << std::hex << bid << std::hex << extra;
+ return bid_ss.str();
+}
+
+int group_snap_list(librados::IoCtx& group_ioctx, const char *group_name,
+ std::vector<cls::rbd::GroupSnapshot> *cls_snaps)
+{
+ CephContext *cct = (CephContext *)group_ioctx.cct();
+
+ string group_id;
+ vector<string> ind_snap_names;
+
+ int r = cls_client::dir_get_id(&group_ioctx, RBD_GROUP_DIRECTORY,
+ group_name, &group_id);
+ if (r < 0) {
+ lderr(cct) << "error reading group id object: "
+ << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+ string group_header_oid = util::group_header_name(group_id);
+
+ const int max_read = 1024;
+ cls::rbd::GroupSnapshot snap_last;
+
+ for (;;) {
+ vector<cls::rbd::GroupSnapshot> snaps_page;
+
+ r = cls_client::group_snap_list(&group_ioctx, group_header_oid,
+ snap_last, max_read, &snaps_page);
+
+ if (r < 0) {
+ lderr(cct) << "error reading snap list from group: "
+ << cpp_strerror(-r) << dendl;
+ return r;
+ }
+ cls_snaps->insert(cls_snaps->end(), snaps_page.begin(), snaps_page.end());
+ if (snaps_page.size() < max_read) {
+ break;
+ }
+ snap_last = *snaps_page.rbegin();
+ }
+
+ return 0;
+}
+
+std::string calc_ind_image_snap_name(uint64_t pool_id,
+ const std::string &group_id,
+ const std::string &snap_id)
+{
+ std::stringstream ind_snap_name_stream;
+ ind_snap_name_stream << ".group." << std::hex << pool_id << "_"
+ << group_id << "_" << snap_id;
+ return ind_snap_name_stream.str();
+}
+
+int group_image_list(librados::IoCtx& group_ioctx, const char *group_name,
+ std::vector<cls::rbd::GroupImageStatus> *image_ids)
+{
+ CephContext *cct = (CephContext *)group_ioctx.cct();
+
+ string group_id;
+
+ int r = cls_client::dir_get_id(&group_ioctx, RBD_GROUP_DIRECTORY,
+ group_name, &group_id);
+ if (r < 0) {
+ lderr(cct) << "error reading group id object: "
+ << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+ string group_header_oid = util::group_header_name(group_id);
+
+ ldout(cct, 20) << "listing images in group name "
+ << group_name << " group id " << group_header_oid << dendl;
+ image_ids->clear();
+
+ const int max_read = 1024;
+ cls::rbd::GroupImageSpec start_last;
+ do {
+ std::vector<cls::rbd::GroupImageStatus> image_ids_page;
+
+ r = cls_client::group_image_list(&group_ioctx, group_header_oid,
+ start_last, max_read, &image_ids_page);
+
+ if (r < 0) {
+ lderr(cct) << "error reading image list from group: "
+ << cpp_strerror(-r) << dendl;
+ return r;
+ }
+ image_ids->insert(image_ids->end(),
+ image_ids_page.begin(), image_ids_page.end());
+
+ if (image_ids_page.size() > 0)
+ start_last = image_ids_page.rbegin()->spec;
+
+ r = image_ids_page.size();
+ } while (r == max_read);
+
+ return 0;
+}
+
+int group_image_remove(librados::IoCtx& group_ioctx, string group_id,
+ librados::IoCtx& image_ioctx, string image_id)
+{
+ CephContext *cct = (CephContext *)group_ioctx.cct();
+
+ string group_header_oid = util::group_header_name(group_id);
+
+ string image_header_oid = util::header_name(image_id);
+
+ ldout(cct, 20) << "removing image " << image_id
+ << " image id " << image_header_oid << dendl;
+
+ cls::rbd::GroupSpec group_spec(group_id, group_ioctx.get_id());
+
+ cls::rbd::GroupImageStatus incomplete_st(image_id, image_ioctx.get_id(),
+ cls::rbd::GROUP_IMAGE_LINK_STATE_INCOMPLETE);
+
+ cls::rbd::GroupImageSpec spec(image_id, image_ioctx.get_id());
+
+ int r = cls_client::group_image_set(&group_ioctx, group_header_oid,
+ incomplete_st);
+
+ if (r < 0) {
+ lderr(cct) << "couldn't put image into removing state: "
+ << cpp_strerror(-r) << dendl;
+ return r;
+ }
+
+ r = cls_client::image_group_remove(&image_ioctx, image_header_oid,
+ group_spec);
+ if ((r < 0) && (r != -ENOENT)) {
+ lderr(cct) << "couldn't remove group reference from image"
+ << cpp_strerror(-r) << dendl;
+ return r;
+ } else if (r >= 0) {
+ ImageWatcher<>::notify_header_update(image_ioctx, image_header_oid);
+ }
+
+ r = cls_client::group_image_remove(&group_ioctx, group_header_oid, spec);
+ if (r < 0) {
+ lderr(cct) << "couldn't remove image from group"
+ << cpp_strerror(-r) << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+int group_snap_remove_by_record(librados::IoCtx& group_ioctx,
+ const cls::rbd::GroupSnapshot& group_snap,
+ const std::string& group_id,
+ const std::string& group_header_oid) {
+
+ CephContext *cct = (CephContext *)group_ioctx.cct();
+ std::vector<C_SaferCond*> on_finishes;
+ int r, ret_code;
+
+ std::vector<librbd::ImageCtx*> ictxs;
+
+ cls::rbd::GroupSnapshotNamespace ne{group_ioctx.get_id(), group_id,
+ group_snap.id};
+
+ ldout(cct, 20) << "Removing snapshots" << dendl;
+ int snap_count = group_snap.snaps.size();
+
+ for (int i = 0; i < snap_count; ++i) {
+ librbd::IoCtx image_io_ctx;
+ r = util::create_ioctx(group_ioctx, "image", group_snap.snaps[i].pool, {},
+ &image_io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ librbd::ImageCtx* image_ctx = new ImageCtx("", group_snap.snaps[i].image_id,
+ nullptr, image_io_ctx, false);
+
+ C_SaferCond* on_finish = new C_SaferCond;
+
+ image_ctx->state->open(0, on_finish);
+
+ ictxs.push_back(image_ctx);
+ on_finishes.push_back(on_finish);
+ }
+
+ ret_code = 0;
+ for (int i = 0; i < snap_count; ++i) {
+ r = on_finishes[i]->wait();
+ delete on_finishes[i];
+ if (r < 0) {
+ delete ictxs[i];
+ ictxs[i] = nullptr;
+ ret_code = r;
+ }
+ }
+ if (ret_code != 0) {
+ goto finish;
+ }
+
+ ldout(cct, 20) << "Opened participating images. " <<
+ "Deleting snapshots themselves." << dendl;
+
+ for (int i = 0; i < snap_count; ++i) {
+ ImageCtx *ictx = ictxs[i];
+ on_finishes[i] = new C_SaferCond;
+
+ std::string snap_name;
+ ictx->snap_lock.get_read();
+ snap_t snap_id = get_group_snap_id(ictx, ne);
+ r = ictx->get_snap_name(snap_id, &snap_name);
+ ictx->snap_lock.put_read();
+
+ if (r >= 0) {
+ ldout(cct, 20) << "removing individual snapshot from image " << ictx->name
+ << dendl;
+ ictx->operations->snap_remove(ne, snap_name, on_finishes[i]);
+ } else {
+ // We are ok to ignore missing image snapshots. The snapshot could have
+ // been inconsistent in the first place.
+ on_finishes[i]->complete(0);
+ }
+ }
+
+ for (int i = 0; i < snap_count; ++i) {
+ r = on_finishes[i]->wait();
+ delete on_finishes[i];
+ if (r < 0 && r != -ENOENT) {
+ // if previous attempts to remove this snapshot failed then the image's
+ // snapshot may not exist
+ lderr(cct) << "Failed deleting image snapshot. Ret code: " << r << dendl;
+ ret_code = r;
+ }
+ }
+
+ if (ret_code != 0) {
+ goto finish;
+ }
+
+ ldout(cct, 20) << "Removed images snapshots removing snapshot record."
+ << dendl;
+
+ r = cls_client::group_snap_remove(&group_ioctx, group_header_oid,
+ group_snap.id);
+ if (r < 0) {
+ ret_code = r;
+ goto finish;
+ }
+
+finish:
+ for (int i = 0; i < snap_count; ++i) {
+ if (ictxs[i] != nullptr) {
+ ictxs[i]->state->close();
+ }
+ }
+ return ret_code;
+}
+
+int group_snap_rollback_by_record(librados::IoCtx& group_ioctx,
+ const cls::rbd::GroupSnapshot& group_snap,
+ const std::string& group_id,
+ const std::string& group_header_oid,
+ ProgressContext& pctx) {
+ CephContext *cct = (CephContext *)group_ioctx.cct();
+ std::vector<C_SaferCond*> on_finishes;
+ int r, ret_code;
+
+ std::vector<librbd::ImageCtx*> ictxs;
+
+ cls::rbd::GroupSnapshotNamespace ne{group_ioctx.get_id(), group_id,
+ group_snap.id};
+
+ ldout(cct, 20) << "Rolling back snapshots" << dendl;
+ int snap_count = group_snap.snaps.size();
+
+ for (int i = 0; i < snap_count; ++i) {
+ librados::IoCtx image_io_ctx;
+ r = util::create_ioctx(group_ioctx, "image", group_snap.snaps[i].pool, {},
+ &image_io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ librbd::ImageCtx* image_ctx = new ImageCtx("", group_snap.snaps[i].image_id,
+ nullptr, image_io_ctx, false);
+
+ C_SaferCond* on_finish = new C_SaferCond;
+
+ image_ctx->state->open(0, on_finish);
+
+ ictxs.push_back(image_ctx);
+ on_finishes.push_back(on_finish);
+ }
+
+ ret_code = 0;
+ for (int i = 0; i < snap_count; ++i) {
+ r = on_finishes[i]->wait();
+ delete on_finishes[i];
+ if (r < 0) {
+ delete ictxs[i];
+ ictxs[i] = nullptr;
+ ret_code = r;
+ }
+ }
+ if (ret_code != 0) {
+ goto finish;
+ }
+
+ ldout(cct, 20) << "Requesting exclusive locks for images" << dendl;
+ for (auto ictx: ictxs) {
+ RWLock::RLocker owner_lock(ictx->owner_lock);
+ if (ictx->exclusive_lock != nullptr) {
+ ictx->exclusive_lock->block_requests(-EBUSY);
+ }
+ }
+ for (int i = 0; i < snap_count; ++i) {
+ ImageCtx *ictx = ictxs[i];
+ RWLock::RLocker owner_lock(ictx->owner_lock);
+
+ on_finishes[i] = new C_SaferCond;
+ if (ictx->exclusive_lock != nullptr) {
+ ictx->exclusive_lock->acquire_lock(on_finishes[i]);
+ }
+ }
+
+ ret_code = 0;
+ for (int i = 0; i < snap_count; ++i) {
+ r = 0;
+ ImageCtx *ictx = ictxs[i];
+ if (ictx->exclusive_lock != nullptr) {
+ r = on_finishes[i]->wait();
+ }
+ delete on_finishes[i];
+ if (r < 0) {
+ ret_code = r;
+ }
+ }
+ if (ret_code != 0) {
+ goto finish;
+ }
+
+ for (int i = 0; i < snap_count; ++i) {
+ ImageCtx *ictx = ictxs[i];
+ on_finishes[i] = new C_SaferCond;
+
+ RWLock::RLocker owner_locker(ictx->owner_lock);
+ std::string snap_name;
+ ictx->snap_lock.get_read();
+ snap_t snap_id = get_group_snap_id(ictx, ne);
+ r = ictx->get_snap_name(snap_id, &snap_name);
+ ictx->snap_lock.put_read();
+
+ if (r >= 0) {
+ ldout(cct, 20) << "rolling back to individual snapshot for image " << ictx->name
+ << dendl;
+ ictx->operations->execute_snap_rollback(ne, snap_name, pctx, on_finishes[i]);
+ } else {
+ on_finishes[i]->complete(r);
+ }
+ }
+
+ for (int i = 0; i < snap_count; ++i) {
+ r = on_finishes[i]->wait();
+ delete on_finishes[i];
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "Failed rolling back group to snapshot. Ret code: " << r << dendl;
+ ret_code = r;
+ }
+ }
+
+finish:
+ for (int i = 0; i < snap_count; ++i) {
+ if (ictxs[i] != nullptr) {
+ ictxs[i]->state->close();
+ }
+ }
+ return ret_code;
+}
+
+} // anonymous namespace
+
+template <typename I>
+int Group<I>::image_remove_by_id(librados::IoCtx& group_ioctx,
+ const char *group_name,
+ librados::IoCtx& image_ioctx,
+ const char *image_id)
+{
+ CephContext *cct = (CephContext *)group_ioctx.cct();
+ ldout(cct, 20) << "io_ctx=" << &group_ioctx
+ << " group name " << group_name << " image "
+ << &image_ioctx << " id " << image_id << dendl;
+
+ string group_id;
+
+ int r = cls_client::dir_get_id(&group_ioctx, RBD_GROUP_DIRECTORY, group_name,
+ &group_id);
+ if (r < 0) {
+ lderr(cct) << "error reading group id object: "
+ << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ ldout(cct, 20) << "removing image from group name " << group_name
+ << " group id " << group_id << dendl;
+
+ return group_image_remove(group_ioctx, group_id, image_ioctx, string(image_id));
+}
+
+template <typename I>
+int Group<I>::create(librados::IoCtx& io_ctx, const char *group_name)
+{
+ CephContext *cct = (CephContext *)io_ctx.cct();
+
+ string id = generate_uuid(io_ctx);
+
+ ldout(cct, 2) << "adding group to directory..." << dendl;
+
+ int r = cls_client::group_dir_add(&io_ctx, RBD_GROUP_DIRECTORY, group_name,
+ id);
+ if (r < 0) {
+ lderr(cct) << "error adding group to directory: "
+ << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+ string header_oid = util::group_header_name(id);
+
+ r = io_ctx.create(header_oid, true);
+ if (r < 0) {
+ lderr(cct) << "error creating group header: " << cpp_strerror(r) << dendl;
+ goto err_remove_from_dir;
+ }
+
+ return 0;
+
+err_remove_from_dir:
+ int remove_r = cls_client::group_dir_remove(&io_ctx, RBD_GROUP_DIRECTORY,
+ group_name, id);
+ if (remove_r < 0) {
+ lderr(cct) << "error cleaning up group from rbd_directory "
+ << "object after creation failed: " << cpp_strerror(remove_r)
+ << dendl;
+ }
+
+ return r;
+}
+
+template <typename I>
+int Group<I>::remove(librados::IoCtx& io_ctx, const char *group_name)
+{
+ CephContext *cct((CephContext *)io_ctx.cct());
+ ldout(cct, 20) << "group_remove " << &io_ctx << " " << group_name << dendl;
+
+ std::string group_id;
+ int r = cls_client::dir_get_id(&io_ctx, RBD_GROUP_DIRECTORY,
+ std::string(group_name), &group_id);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "error getting id of group" << dendl;
+ return r;
+ }
+ string group_header_oid = util::group_header_name(group_id);
+
+ std::vector<cls::rbd::GroupSnapshot> snaps;
+ r = group_snap_list(io_ctx, group_name, &snaps);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "error listing group snapshots" << dendl;
+ return r;
+ }
+
+ for (auto &snap : snaps) {
+ r = group_snap_remove_by_record(io_ctx, snap, group_id, group_header_oid);
+ if (r < 0) {
+ return r;
+ }
+ }
+
+ std::vector<cls::rbd::GroupImageStatus> images;
+ r = group_image_list(io_ctx, group_name, &images);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "error listing group images" << dendl;
+ return r;
+ }
+
+ for (auto image : images) {
+ IoCtx image_ioctx;
+ r = util::create_ioctx(io_ctx, "image", image.spec.pool_id, {},
+ &image_ioctx);
+ if (r < 0) {
+ return r;
+ }
+
+ r = group_image_remove(io_ctx, group_id, image_ioctx, image.spec.image_id);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "error removing image from a group" << dendl;
+ return r;
+ }
+ }
+
+ string header_oid = util::group_header_name(group_id);
+
+ r = io_ctx.remove(header_oid);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "error removing header: " << cpp_strerror(-r) << dendl;
+ return r;
+ }
+
+ r = cls_client::group_dir_remove(&io_ctx, RBD_GROUP_DIRECTORY,
+ group_name, group_id);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "error removing group from directory" << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Group<I>::list(IoCtx& io_ctx, vector<string> *names)
+{
+ CephContext *cct = (CephContext *)io_ctx.cct();
+ ldout(cct, 20) << "io_ctx=" << &io_ctx << dendl;
+
+ int max_read = 1024;
+ string last_read = "";
+ int r;
+ do {
+ map<string, string> groups;
+ r = cls_client::group_dir_list(&io_ctx, RBD_GROUP_DIRECTORY, last_read,
+ max_read, &groups);
+ if (r == -ENOENT) {
+ return 0; // Ignore missing rbd group directory. It means we don't have any groups yet.
+ }
+ if (r < 0) {
+ if (r != -ENOENT) {
+ lderr(cct) << "error listing group in directory: "
+ << cpp_strerror(r) << dendl;
+ } else {
+ r = 0;
+ }
+ return r;
+ }
+ for (pair<string, string> group : groups) {
+ names->push_back(group.first);
+ }
+ if (!groups.empty()) {
+ last_read = groups.rbegin()->first;
+ }
+ r = groups.size();
+ } while (r == max_read);
+
+ return 0;
+}
+
+template <typename I>
+int Group<I>::image_add(librados::IoCtx& group_ioctx, const char *group_name,
+ librados::IoCtx& image_ioctx, const char *image_name)
+{
+ CephContext *cct = (CephContext *)group_ioctx.cct();
+ ldout(cct, 20) << "io_ctx=" << &group_ioctx
+ << " group name " << group_name << " image "
+ << &image_ioctx << " name " << image_name << dendl;
+
+ if (group_ioctx.get_namespace() != image_ioctx.get_namespace()) {
+ lderr(cct) << "group and image cannot be in different namespaces" << dendl;
+ return -EINVAL;
+ }
+
+ string group_id;
+
+ int r = cls_client::dir_get_id(&group_ioctx, RBD_GROUP_DIRECTORY, group_name,
+ &group_id);
+ if (r < 0) {
+ lderr(cct) << "error reading group id object: "
+ << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+ string group_header_oid = util::group_header_name(group_id);
+
+
+ ldout(cct, 20) << "adding image to group name " << group_name
+ << " group id " << group_header_oid << dendl;
+
+ string image_id;
+
+ r = cls_client::dir_get_id(&image_ioctx, RBD_DIRECTORY, image_name,
+ &image_id);
+ if (r < 0) {
+ lderr(cct) << "error reading image id object: "
+ << cpp_strerror(-r) << dendl;
+ return r;
+ }
+
+ string image_header_oid = util::header_name(image_id);
+
+ ldout(cct, 20) << "adding image " << image_name
+ << " image id " << image_header_oid << dendl;
+
+ cls::rbd::GroupImageStatus incomplete_st(
+ image_id, image_ioctx.get_id(),
+ cls::rbd::GROUP_IMAGE_LINK_STATE_INCOMPLETE);
+ cls::rbd::GroupImageStatus attached_st(
+ image_id, image_ioctx.get_id(), cls::rbd::GROUP_IMAGE_LINK_STATE_ATTACHED);
+
+ r = cls_client::group_image_set(&group_ioctx, group_header_oid,
+ incomplete_st);
+
+ cls::rbd::GroupSpec group_spec(group_id, group_ioctx.get_id());
+
+ if (r < 0) {
+ lderr(cct) << "error adding image reference to group: "
+ << cpp_strerror(-r) << dendl;
+ return r;
+ }
+
+ r = cls_client::image_group_add(&image_ioctx, image_header_oid, group_spec);
+ if (r < 0) {
+ lderr(cct) << "error adding group reference to image: "
+ << cpp_strerror(-r) << dendl;
+ cls::rbd::GroupImageSpec spec(image_id, image_ioctx.get_id());
+ cls_client::group_image_remove(&group_ioctx, group_header_oid, spec);
+ // Ignore errors in the clean up procedure.
+ return r;
+ }
+ ImageWatcher<>::notify_header_update(image_ioctx, image_header_oid);
+
+ r = cls_client::group_image_set(&group_ioctx, group_header_oid,
+ attached_st);
+
+ return r;
+}
+
+template <typename I>
+int Group<I>::image_remove(librados::IoCtx& group_ioctx, const char *group_name,
+ librados::IoCtx& image_ioctx, const char *image_name)
+{
+ CephContext *cct = (CephContext *)group_ioctx.cct();
+ ldout(cct, 20) << "io_ctx=" << &group_ioctx
+ << " group name " << group_name << " image "
+ << &image_ioctx << " name " << image_name << dendl;
+
+ if (group_ioctx.get_namespace() != image_ioctx.get_namespace()) {
+ lderr(cct) << "group and image cannot be in different namespaces" << dendl;
+ return -EINVAL;
+ }
+
+ string group_id;
+
+ int r = cls_client::dir_get_id(&group_ioctx, RBD_GROUP_DIRECTORY, group_name,
+ &group_id);
+ if (r < 0) {
+ lderr(cct) << "error reading group id object: "
+ << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ ldout(cct, 20) << "removing image from group name " << group_name
+ << " group id " << group_id << dendl;
+
+ string image_id;
+ r = cls_client::dir_get_id(&image_ioctx, RBD_DIRECTORY, image_name,
+ &image_id);
+ if (r < 0) {
+ lderr(cct) << "error reading image id object: "
+ << cpp_strerror(-r) << dendl;
+ return r;
+ }
+
+ r = group_image_remove(group_ioctx, group_id, image_ioctx, image_id);
+
+ return r;
+}
+
+template <typename I>
+int Group<I>::image_list(librados::IoCtx& group_ioctx,
+ const char *group_name,
+ std::vector<group_image_info_t>* images)
+{
+ CephContext *cct = (CephContext *)group_ioctx.cct();
+ ldout(cct, 20) << "io_ctx=" << &group_ioctx
+ << " group name " << group_name << dendl;
+
+ std::vector<cls::rbd::GroupImageStatus> image_ids;
+
+ group_image_list(group_ioctx, group_name, &image_ids);
+
+ for (auto image_id : image_ids) {
+ IoCtx ioctx;
+ int r = util::create_ioctx(group_ioctx, "image", image_id.spec.pool_id, {},
+ &ioctx);
+ if (r < 0) {
+ return r;
+ }
+
+ std::string image_name;
+ r = cls_client::dir_get_name(&ioctx, RBD_DIRECTORY,
+ image_id.spec.image_id, &image_name);
+ if (r < 0) {
+ return r;
+ }
+
+ images->push_back(
+ group_image_info_t {
+ image_name,
+ ioctx.get_id(),
+ static_cast<group_image_state_t>(image_id.state)});
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Group<I>::rename(librados::IoCtx& io_ctx, const char *src_name,
+ const char *dest_name)
+{
+ CephContext *cct((CephContext *)io_ctx.cct());
+ ldout(cct, 20) << "group_rename " << &io_ctx << " " << src_name
+ << " -> " << dest_name << dendl;
+
+ std::string group_id;
+ int r = cls_client::dir_get_id(&io_ctx, RBD_GROUP_DIRECTORY,
+ std::string(src_name), &group_id);
+ if (r < 0) {
+ if (r != -ENOENT)
+ lderr(cct) << "error getting id of group" << dendl;
+ return r;
+ }
+
+ r = cls_client::group_dir_rename(&io_ctx, RBD_GROUP_DIRECTORY,
+ src_name, dest_name, group_id);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "error renaming group from directory" << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+
+template <typename I>
+int Group<I>::image_get_group(I *ictx, group_info_t *group_info)
+{
+ int r = ictx->state->refresh_if_required();
+ if (r < 0)
+ return r;
+
+ if (RBD_GROUP_INVALID_POOL != ictx->group_spec.pool_id) {
+ IoCtx ioctx;
+ r = util::create_ioctx(ictx->md_ctx, "group", ictx->group_spec.pool_id, {},
+ &ioctx);
+ if (r < 0) {
+ return r;
+ }
+
+ std::string group_name;
+ r = cls_client::dir_get_name(&ioctx, RBD_GROUP_DIRECTORY,
+ ictx->group_spec.group_id, &group_name);
+ if (r < 0)
+ return r;
+ group_info->pool = ioctx.get_id();
+ group_info->name = group_name;
+ } else {
+ group_info->pool = RBD_GROUP_INVALID_POOL;
+ group_info->name = "";
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Group<I>::snap_create(librados::IoCtx& group_ioctx,
+ const char *group_name, const char *snap_name)
+{
+ CephContext *cct = (CephContext *)group_ioctx.cct();
+
+ string group_id;
+ cls::rbd::GroupSnapshot group_snap;
+ vector<cls::rbd::ImageSnapshotSpec> image_snaps;
+ std::string ind_snap_name;
+
+ std::vector<librbd::ImageCtx*> ictxs;
+ std::vector<C_SaferCond*> on_finishes;
+
+ int r = cls_client::dir_get_id(&group_ioctx, RBD_GROUP_DIRECTORY,
+ group_name, &group_id);
+ if (r < 0) {
+ lderr(cct) << "error reading group id object: "
+ << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ std::vector<cls::rbd::GroupImageStatus> images;
+ r = group_image_list(group_ioctx, group_name, &images);
+ if (r < 0) {
+ return r;
+ }
+ int image_count = images.size();
+
+ ldout(cct, 20) << "Found " << image_count << " images in group" << dendl;
+
+ image_snaps = vector<cls::rbd::ImageSnapshotSpec>(image_count,
+ cls::rbd::ImageSnapshotSpec());
+
+ for (int i = 0; i < image_count; ++i) {
+ image_snaps[i].pool = images[i].spec.pool_id;
+ image_snaps[i].image_id = images[i].spec.image_id;
+ }
+
+ string group_header_oid = util::group_header_name(group_id);
+
+ group_snap.id = generate_uuid(group_ioctx);
+ group_snap.name = string(snap_name);
+ group_snap.state = cls::rbd::GROUP_SNAPSHOT_STATE_INCOMPLETE;
+ group_snap.snaps = image_snaps;
+
+ cls::rbd::GroupSnapshotNamespace ne{group_ioctx.get_id(), group_id,
+ group_snap.id};
+
+ r = cls_client::group_snap_set(&group_ioctx, group_header_oid, group_snap);
+ if (r == -EEXIST) {
+ lderr(cct) << "snapshot with this name already exists: "
+ << cpp_strerror(r)
+ << dendl;
+ }
+ int ret_code = 0;
+ if (r < 0) {
+ ret_code = r;
+ goto finish;
+ }
+
+ for (auto image: images) {
+ librbd::IoCtx image_io_ctx;
+ r = util::create_ioctx(group_ioctx, "image", image.spec.pool_id, {},
+ &image_io_ctx);
+ if (r < 0) {
+ ret_code = r;
+ goto finish;
+ }
+
+ ldout(cct, 20) << "Opening image with id " << image.spec.image_id << dendl;
+
+ librbd::ImageCtx* image_ctx = new ImageCtx("", image.spec.image_id.c_str(),
+ nullptr, image_io_ctx, false);
+
+ C_SaferCond* on_finish = new C_SaferCond;
+
+ image_ctx->state->open(0, on_finish);
+
+ ictxs.push_back(image_ctx);
+ on_finishes.push_back(on_finish);
+ }
+ ldout(cct, 20) << "Issued open request waiting for the completion" << dendl;
+ ret_code = 0;
+ for (int i = 0; i < image_count; ++i) {
+
+ ldout(cct, 20) << "Waiting for completion on on_finish: " <<
+ on_finishes[i] << dendl;
+
+ r = on_finishes[i]->wait();
+ delete on_finishes[i];
+ if (r < 0) {
+ delete ictxs[i];
+ ictxs[i] = nullptr;
+ ret_code = r;
+ }
+ }
+ if (ret_code != 0) {
+ goto remove_record;
+ }
+ ldout(cct, 20) << "Requesting exclusive locks for images" << dendl;
+
+ for (auto ictx: ictxs) {
+ RWLock::RLocker owner_lock(ictx->owner_lock);
+ if (ictx->exclusive_lock != nullptr) {
+ ictx->exclusive_lock->block_requests(-EBUSY);
+ }
+ }
+ for (int i = 0; i < image_count; ++i) {
+ ImageCtx *ictx = ictxs[i];
+ RWLock::RLocker owner_lock(ictx->owner_lock);
+
+ on_finishes[i] = new C_SaferCond;
+ if (ictx->exclusive_lock != nullptr) {
+ ictx->exclusive_lock->acquire_lock(on_finishes[i]);
+ }
+ }
+
+ ret_code = 0;
+ for (int i = 0; i < image_count; ++i) {
+ r = 0;
+ ImageCtx *ictx = ictxs[i];
+ if (ictx->exclusive_lock != nullptr) {
+ r = on_finishes[i]->wait();
+ }
+ delete on_finishes[i];
+ if (r < 0) {
+ ret_code = r;
+ }
+ }
+ if (ret_code != 0) {
+ goto remove_record;
+ }
+
+ ind_snap_name = calc_ind_image_snap_name(group_ioctx.get_id(), group_id,
+ group_snap.id);
+
+ for (int i = 0; i < image_count; ++i) {
+ ImageCtx *ictx = ictxs[i];
+
+ C_SaferCond* on_finish = new C_SaferCond;
+
+ ictx->operations->snap_create(ne, ind_snap_name.c_str(), on_finish);
+
+ on_finishes[i] = on_finish;
+ }
+
+ ret_code = 0;
+ for (int i = 0; i < image_count; ++i) {
+ r = on_finishes[i]->wait();
+ delete on_finishes[i];
+ if (r < 0) {
+ ret_code = r;
+ } else {
+ ImageCtx *ictx = ictxs[i];
+ ictx->snap_lock.get_read();
+ snap_t snap_id = get_group_snap_id(ictx, ne);
+ ictx->snap_lock.put_read();
+ if (snap_id == CEPH_NOSNAP) {
+ ldout(cct, 20) << "Couldn't find created snapshot with namespace: "
+ << ne << dendl;
+ ret_code = -ENOENT;
+ } else {
+ image_snaps[i].snap_id = snapid_t(snap_id);
+ image_snaps[i].pool = ictx->md_ctx.get_id();
+ image_snaps[i].image_id = ictx->id;
+ }
+ }
+ }
+ if (ret_code != 0) {
+ goto remove_image_snaps;
+ }
+
+ group_snap.snaps = image_snaps;
+ group_snap.state = cls::rbd::GROUP_SNAPSHOT_STATE_COMPLETE;
+
+ r = cls_client::group_snap_set(&group_ioctx, group_header_oid, group_snap);
+ if (r < 0) {
+ ret_code = r;
+ goto remove_image_snaps;
+ }
+
+ goto finish;
+
+remove_image_snaps:
+
+ for (int i = 0; i < image_count; ++i) {
+ ImageCtx *ictx = ictxs[i];
+ ldout(cct, 20) << "Removing individual snapshot with name: " <<
+ ind_snap_name << dendl;
+
+ on_finishes[i] = new C_SaferCond;
+ std::string snap_name;
+ ictx->snap_lock.get_read();
+ snap_t snap_id = get_group_snap_id(ictx, ne);
+ r = ictx->get_snap_name(snap_id, &snap_name);
+ ictx->snap_lock.put_read();
+ if (r >= 0) {
+ ictx->operations->snap_remove(ne, snap_name.c_str(), on_finishes[i]);
+ } else {
+ // Ignore missing image snapshots. The whole snapshot could have been
+ // inconsistent.
+ on_finishes[i]->complete(0);
+ }
+ }
+
+ for (int i = 0, n = on_finishes.size(); i < n; ++i) {
+ r = on_finishes[i]->wait();
+ delete on_finishes[i];
+ if (r < 0 && r != -ENOENT) { // if previous attempts to remove this snapshot failed then the image's snapshot may not exist
+ lderr(cct) << "Failed cleaning up image snapshot. Ret code: " << r << dendl;
+ // just report error, but don't abort the process
+ }
+ }
+
+remove_record:
+ r = cls_client::group_snap_remove(&group_ioctx, group_header_oid,
+ group_snap.id);
+ if (r < 0) {
+ lderr(cct) << "error while cleaning up group snapshot" << dendl;
+ // we ignore return value in clean up
+ }
+
+finish:
+ for (int i = 0, n = ictxs.size(); i < n; ++i) {
+ if (ictxs[i] != nullptr) {
+ ictxs[i]->state->close();
+ }
+ }
+ return ret_code;
+}
+
+template <typename I>
+int Group<I>::snap_remove(librados::IoCtx& group_ioctx, const char *group_name,
+ const char *snap_name)
+{
+ CephContext *cct = (CephContext *)group_ioctx.cct();
+
+ string group_id;
+ int r = cls_client::dir_get_id(&group_ioctx, RBD_GROUP_DIRECTORY,
+ group_name, &group_id);
+ if (r < 0) {
+ lderr(cct) << "error reading group id object: "
+ << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ std::vector<cls::rbd::GroupSnapshot> snaps;
+ r = group_snap_list(group_ioctx, group_name, &snaps);
+ if (r < 0) {
+ return r;
+ }
+
+ cls::rbd::GroupSnapshot *group_snap = nullptr;
+ for (auto &snap : snaps) {
+ if (snap.name == string(snap_name)) {
+ group_snap = &snap;
+ break;
+ }
+ }
+ if (group_snap == nullptr) {
+ return -ENOENT;
+ }
+
+ string group_header_oid = util::group_header_name(group_id);
+ r = group_snap_remove_by_record(group_ioctx, *group_snap, group_id,
+ group_header_oid);
+ return r;
+}
+
+template <typename I>
+int Group<I>::snap_rename(librados::IoCtx& group_ioctx, const char *group_name,
+ const char *old_snap_name,
+ const char *new_snap_name) {
+ CephContext *cct = (CephContext *)group_ioctx.cct();
+ if (0 == strcmp(old_snap_name, new_snap_name))
+ return -EEXIST;
+
+ std::string group_id;
+ int r = cls_client::dir_get_id(&group_ioctx, RBD_GROUP_DIRECTORY,
+ group_name, &group_id);
+ if (r == -ENOENT) {
+ return r;
+ } else if (r < 0) {
+ lderr(cct) << "error reading group id object: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ std::vector<cls::rbd::GroupSnapshot> group_snaps;
+ r = group_snap_list(group_ioctx, group_name, &group_snaps);
+ if (r < 0) {
+ return r;
+ }
+
+ cls::rbd::GroupSnapshot group_snap;
+ for (auto &snap : group_snaps) {
+ if (snap.name == old_snap_name) {
+ group_snap = snap;
+ break;
+ }
+ }
+
+ if (group_snap.id.empty()) {
+ return -ENOENT;
+ }
+
+ std::string group_header_oid = util::group_header_name(group_id);
+ group_snap.name = new_snap_name;
+ r = cls_client::group_snap_set(&group_ioctx, group_header_oid, group_snap);
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Group<I>::snap_list(librados::IoCtx& group_ioctx, const char *group_name,
+ std::vector<group_snap_info_t> *snaps)
+{
+ std::vector<cls::rbd::GroupSnapshot> cls_snaps;
+
+ int r = group_snap_list(group_ioctx, group_name, &cls_snaps);
+ if (r < 0) {
+ return r;
+ }
+
+ for (auto snap : cls_snaps) {
+ snaps->push_back(
+ group_snap_info_t {
+ snap.name,
+ static_cast<group_snap_state_t>(snap.state)});
+
+ }
+ return 0;
+}
+
+template <typename I>
+int Group<I>::snap_rollback(librados::IoCtx& group_ioctx,
+ const char *group_name, const char *snap_name,
+ ProgressContext& pctx)
+{
+ CephContext *cct = (CephContext *)group_ioctx.cct();
+
+ string group_id;
+ int r = cls_client::dir_get_id(&group_ioctx, RBD_GROUP_DIRECTORY,
+ group_name, &group_id);
+ if (r < 0) {
+ lderr(cct) << "error reading group id object: "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ std::vector<cls::rbd::GroupSnapshot> snaps;
+ r = group_snap_list(group_ioctx, group_name, &snaps);
+ if (r < 0) {
+ return r;
+ }
+
+ cls::rbd::GroupSnapshot *group_snap = nullptr;
+ for (auto &snap : snaps) {
+ if (snap.name == string(snap_name)) {
+ group_snap = &snap;
+ break;
+ }
+ }
+ if (group_snap == nullptr) {
+ return -ENOENT;
+ }
+
+ string group_header_oid = util::group_header_name(group_id);
+ r = group_snap_rollback_by_record(group_ioctx, *group_snap, group_id,
+ group_header_oid, pctx);
+ return r;
+}
+
+} // namespace api
+} // namespace librbd
+
+template class librbd::api::Group<librbd::ImageCtx>;
diff --git a/src/librbd/api/Group.h b/src/librbd/api/Group.h
new file mode 100644
index 00000000..59b978a8
--- /dev/null
+++ b/src/librbd/api/Group.h
@@ -0,0 +1,59 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_API_GROUP_H
+#define CEPH_LIBRBD_API_GROUP_H
+
+#include "include/rbd/librbd.hpp"
+#include "include/rados/librados_fwd.hpp"
+#include <string>
+#include <vector>
+
+namespace librbd {
+
+struct ImageCtx;
+
+namespace api {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+struct Group {
+
+ static int create(librados::IoCtx& io_ctx, const char *group_name);
+ static int remove(librados::IoCtx& io_ctx, const char *group_name);
+ static int list(librados::IoCtx& io_ctx, std::vector<std::string> *names);
+ static int rename(librados::IoCtx& io_ctx, const char *src_group_name,
+ const char *dest_group_name);
+
+ static int image_add(librados::IoCtx& group_ioctx, const char *group_name,
+ librados::IoCtx& image_ioctx, const char *image_name);
+ static int image_remove(librados::IoCtx& group_ioctx, const char *group_name,
+ librados::IoCtx& image_ioctx, const char *image_name);
+ static int image_remove_by_id(librados::IoCtx& group_ioctx,
+ const char *group_name,
+ librados::IoCtx& image_ioctx,
+ const char *image_id);
+ static int image_list(librados::IoCtx& group_ioctx, const char *group_name,
+ std::vector<group_image_info_t> *images);
+
+ static int image_get_group(ImageCtxT *ictx, group_info_t *group_info);
+
+ static int snap_create(librados::IoCtx& group_ioctx,
+ const char *group_name, const char *snap_name);
+ static int snap_remove(librados::IoCtx& group_ioctx,
+ const char *group_name, const char *snap_name);
+ static int snap_rename(librados::IoCtx& group_ioctx, const char *group_name,
+ const char *old_snap_name, const char *new_snap_name);
+ static int snap_list(librados::IoCtx& group_ioctx, const char *group_name,
+ std::vector<group_snap_info_t> *snaps);
+ static int snap_rollback(librados::IoCtx& group_ioctx,
+ const char *group_name, const char *snap_name,
+ ProgressContext& pctx);
+
+};
+
+} // namespace api
+} // namespace librbd
+
+extern template class librbd::api::Group<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_API_GROUP_H
diff --git a/src/librbd/api/Image.cc b/src/librbd/api/Image.cc
new file mode 100644
index 00000000..aa427535
--- /dev/null
+++ b/src/librbd/api/Image.cc
@@ -0,0 +1,836 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/api/Image.h"
+#include "include/rados/librados.hpp"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "common/Cond.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "librbd/DeepCopyRequest.h"
+#include "librbd/ExclusiveLock.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/internal.h"
+#include "librbd/Utils.h"
+#include "librbd/api/Config.h"
+#include "librbd/api/Trash.h"
+#include "librbd/image/CloneRequest.h"
+#include "librbd/image/RemoveRequest.h"
+#include "librbd/image/PreRemoveRequest.h"
+#include <boost/scope_exit.hpp>
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::api::Image: " << __func__ << ": "
+
+namespace librbd {
+namespace api {
+
+namespace {
+
+bool compare_by_pool(const librbd::linked_image_spec_t& lhs,
+ const librbd::linked_image_spec_t& rhs)
+{
+ if (lhs.pool_id != rhs.pool_id) {
+ return lhs.pool_id < rhs.pool_id;
+ } else if (lhs.pool_namespace != rhs.pool_namespace) {
+ return lhs.pool_namespace < rhs.pool_namespace;
+ }
+ return false;
+}
+
+bool compare(const librbd::linked_image_spec_t& lhs,
+ const librbd::linked_image_spec_t& rhs)
+{
+ if (lhs.pool_name != rhs.pool_name) {
+ return lhs.pool_name < rhs.pool_name;
+ } else if (lhs.pool_id != rhs.pool_id) {
+ return lhs.pool_id < rhs.pool_id;
+ } else if (lhs.pool_namespace != rhs.pool_namespace) {
+ return lhs.pool_namespace < rhs.pool_namespace;
+ } else if (lhs.image_name != rhs.image_name) {
+ return lhs.image_name < rhs.image_name;
+ } else if (lhs.image_id != rhs.image_id) {
+ return lhs.image_id < rhs.image_id;
+ }
+ return false;
+}
+
+template <typename I>
+int pre_remove_image(librados::IoCtx& io_ctx, const std::string& image_id) {
+ I *image_ctx = I::create("", image_id, nullptr, io_ctx, false);
+ int r = image_ctx->state->open(OPEN_FLAG_SKIP_OPEN_PARENT);
+ if (r < 0) {
+ return r;
+ }
+
+ C_SaferCond ctx;
+ auto req = image::PreRemoveRequest<I>::create(image_ctx, false, &ctx);
+ req->send();
+
+ r = ctx.wait();
+ image_ctx->state->close();
+ return r;
+}
+
+} // anonymous namespace
+
+template <typename I>
+int64_t Image<I>::get_data_pool_id(I *ictx) {
+ if (ictx->data_ctx.is_valid()) {
+ return ictx->data_ctx.get_id();
+ }
+
+ int64_t pool_id;
+ int r = cls_client::get_data_pool(&ictx->md_ctx, ictx->header_oid, &pool_id);
+ if (r < 0) {
+ CephContext *cct = ictx->cct;
+ lderr(cct) << "error getting data pool ID: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ return pool_id;
+}
+
+template <typename I>
+int Image<I>::get_op_features(I *ictx, uint64_t *op_features) {
+ CephContext *cct = ictx->cct;
+ ldout(cct, 20) << "image_ctx=" << ictx << dendl;
+
+ int r = ictx->state->refresh_if_required();
+ if (r < 0) {
+ return r;
+ }
+
+ RWLock::RLocker snap_locker(ictx->snap_lock);
+ *op_features = ictx->op_features;
+ return 0;
+}
+
+template <typename I>
+int Image<I>::list_images(librados::IoCtx& io_ctx,
+ std::vector<image_spec_t> *images) {
+ CephContext *cct = (CephContext *)io_ctx.cct();
+ ldout(cct, 20) << "list " << &io_ctx << dendl;
+
+ int r;
+ images->clear();
+
+ if (io_ctx.get_namespace().empty()) {
+ bufferlist bl;
+ r = io_ctx.read(RBD_DIRECTORY, bl, 0, 0);
+ if (r == -ENOENT) {
+ return 0;
+ } else if (r < 0) {
+ lderr(cct) << "error listing v1 images: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ // V1 format images are in a tmap
+ if (bl.length()) {
+ auto p = bl.cbegin();
+ bufferlist header;
+ std::map<std::string, bufferlist> m;
+ decode(header, p);
+ decode(m, p);
+ for (auto& it : m) {
+ images->push_back({.id ="", .name = it.first});
+ }
+ }
+ }
+
+ // V2 format images
+ std::map<std::string, std::string> image_names_to_ids;
+ r = list_images_v2(io_ctx, &image_names_to_ids);
+ if (r < 0) {
+ lderr(cct) << "error listing v2 images: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ for (const auto& img_pair : image_names_to_ids) {
+ images->push_back({.id = img_pair.second,
+ .name = img_pair.first});
+ }
+
+ // include V2 images in a partially removed state
+ std::vector<librbd::trash_image_info_t> trash_images;
+ r = Trash<I>::list(io_ctx, trash_images, false);
+ if (r < 0 && r != -EOPNOTSUPP) {
+ lderr(cct) << "error listing trash images: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ for (const auto& trash_image : trash_images) {
+ if (trash_image.source == RBD_TRASH_IMAGE_SOURCE_REMOVING) {
+ images->push_back({.id = trash_image.id,
+ .name = trash_image.name});
+
+ }
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Image<I>::list_images_v2(librados::IoCtx& io_ctx, ImageNameToIds *images) {
+ CephContext *cct = (CephContext *)io_ctx.cct();
+ ldout(cct, 20) << "io_ctx=" << &io_ctx << dendl;
+
+ // new format images are accessed by class methods
+ int r;
+ int max_read = 1024;
+ string last_read = "";
+ do {
+ map<string, string> images_page;
+ r = cls_client::dir_list(&io_ctx, RBD_DIRECTORY, last_read, max_read,
+ &images_page);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "error listing image in directory: "
+ << cpp_strerror(r) << dendl;
+ return r;
+ } else if (r == -ENOENT) {
+ break;
+ }
+ for (map<string, string>::const_iterator it = images_page.begin();
+ it != images_page.end(); ++it) {
+ images->insert(*it);
+ }
+ if (!images_page.empty()) {
+ last_read = images_page.rbegin()->first;
+ }
+ r = images_page.size();
+ } while (r == max_read);
+
+ return 0;
+}
+
+template <typename I>
+int Image<I>::get_parent(I *ictx,
+ librbd::linked_image_spec_t *parent_image,
+ librbd::snap_spec_t *parent_snap) {
+ auto cct = ictx->cct;
+ ldout(cct, 20) << "image_ctx=" << ictx << dendl;
+
+ int r = ictx->state->refresh_if_required();
+ if (r < 0) {
+ return r;
+ }
+
+ RWLock::RLocker snap_locker(ictx->snap_lock);
+ RWLock::RLocker parent_locker(ictx->parent_lock);
+
+ bool release_parent_locks = false;
+ BOOST_SCOPE_EXIT_ALL(ictx, &release_parent_locks) {
+ if (release_parent_locks) {
+ ictx->parent->parent_lock.put_read();
+ ictx->parent->snap_lock.put_read();
+ }
+ };
+
+ // if a migration is in-progress, the true parent is the parent
+ // of the migration source image
+ auto parent = ictx->parent;
+ if (!ictx->migration_info.empty() && ictx->parent != nullptr) {
+ release_parent_locks = true;
+ ictx->parent->snap_lock.get_read();
+ ictx->parent->parent_lock.get_read();
+
+ parent = ictx->parent->parent;
+ }
+
+ if (parent == nullptr) {
+ return -ENOENT;
+ }
+
+ parent_image->pool_id = parent->md_ctx.get_id();
+ parent_image->pool_name = parent->md_ctx.get_pool_name();
+ parent_image->pool_namespace = parent->md_ctx.get_namespace();
+
+ RWLock::RLocker parent_snap_locker(parent->snap_lock);
+ parent_snap->id = parent->snap_id;
+ parent_snap->namespace_type = RBD_SNAP_NAMESPACE_TYPE_USER;
+ if (parent->snap_id != CEPH_NOSNAP) {
+ auto snap_info = parent->get_snap_info(parent->snap_id);
+ if (snap_info == nullptr) {
+ lderr(cct) << "error finding parent snap name: " << cpp_strerror(r)
+ << dendl;
+ return -ENOENT;
+ }
+
+ parent_snap->namespace_type = static_cast<snap_namespace_type_t>(
+ cls::rbd::get_snap_namespace_type(snap_info->snap_namespace));
+ parent_snap->name = snap_info->name;
+ }
+
+ parent_image->image_id = parent->id;
+ parent_image->image_name = parent->name;
+ parent_image->trash = true;
+
+ librbd::trash_image_info_t trash_info;
+ r = Trash<I>::get(parent->md_ctx, parent->id, &trash_info);
+ if (r == -ENOENT || r == -EOPNOTSUPP) {
+ parent_image->trash = false;
+ } else if (r < 0) {
+ lderr(cct) << "error looking up trash status: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Image<I>::list_children(I *ictx,
+ std::vector<librbd::linked_image_spec_t> *images) {
+ images->clear();
+ return list_descendants(ictx, 1, images);
+}
+
+template <typename I>
+int Image<I>::list_children(I *ictx,
+ const cls::rbd::ParentImageSpec &parent_spec,
+ std::vector<librbd::linked_image_spec_t> *images) {
+ images->clear();
+ return list_descendants(ictx, parent_spec, 1, images);
+}
+
+template <typename I>
+int Image<I>::list_descendants(
+ librados::IoCtx& io_ctx, const std::string &image_id,
+ const std::optional<size_t> &max_level,
+ std::vector<librbd::linked_image_spec_t> *images) {
+ ImageCtx *ictx = new librbd::ImageCtx("", image_id, nullptr,
+ io_ctx, true);
+ int r = ictx->state->open(OPEN_FLAG_SKIP_OPEN_PARENT);
+ if (r < 0) {
+ if (r == -ENOENT) {
+ return 0;
+ }
+ lderr(ictx->cct) << "failed to open descendant " << image_id
+ << " from pool " << io_ctx.get_pool_name() << ":"
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ r = list_descendants(ictx, max_level, images);
+
+ int r1 = ictx->state->close();
+ if (r1 < 0) {
+ lderr(ictx->cct) << "error when closing descendant " << image_id
+ << " from pool " << io_ctx.get_pool_name() << ":"
+ << cpp_strerror(r) << dendl;
+ }
+
+ return r;
+}
+
+template <typename I>
+int Image<I>::list_descendants(
+ I *ictx, const std::optional<size_t> &max_level,
+ std::vector<librbd::linked_image_spec_t> *images) {
+ RWLock::RLocker l(ictx->snap_lock);
+ std::vector<librados::snap_t> snap_ids;
+ if (ictx->snap_id != CEPH_NOSNAP) {
+ snap_ids.push_back(ictx->snap_id);
+ } else {
+ snap_ids = ictx->snaps;
+ }
+ for (auto snap_id : snap_ids) {
+ cls::rbd::ParentImageSpec parent_spec{ictx->md_ctx.get_id(),
+ ictx->md_ctx.get_namespace(),
+ ictx->id, snap_id};
+ int r = list_descendants(ictx, parent_spec, max_level, images);
+ if (r < 0) {
+ return r;
+ }
+ }
+ return 0;
+}
+
+template <typename I>
+int Image<I>::list_descendants(
+ I *ictx, const cls::rbd::ParentImageSpec &parent_spec,
+ const std::optional<size_t> &max_level,
+ std::vector<librbd::linked_image_spec_t> *images) {
+ auto child_max_level = max_level;
+ if (child_max_level) {
+ if (child_max_level == 0) {
+ return 0;
+ }
+ (*child_max_level)--;
+ }
+ CephContext *cct = ictx->cct;
+ ldout(cct, 20) << "ictx=" << ictx << dendl;
+
+ // no children for non-layered or old format image
+ if (!ictx->test_features(RBD_FEATURE_LAYERING, ictx->snap_lock)) {
+ return 0;
+ }
+
+ librados::Rados rados(ictx->md_ctx);
+
+ // search all pools for clone v1 children dependent on this snapshot
+ std::list<std::pair<int64_t, std::string> > pools;
+ int r = rados.pool_list2(pools);
+ if (r < 0) {
+ lderr(cct) << "error listing pools: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ for (auto& it : pools) {
+ int64_t base_tier;
+ r = rados.pool_get_base_tier(it.first, &base_tier);
+ if (r == -ENOENT) {
+ ldout(cct, 1) << "pool " << it.second << " no longer exists" << dendl;
+ continue;
+ } else if (r < 0) {
+ lderr(cct) << "error retrieving base tier for pool " << it.second
+ << dendl;
+ return r;
+ }
+ if (it.first != base_tier) {
+ // pool is a cache; skip it
+ continue;
+ }
+
+ IoCtx ioctx;
+ r = util::create_ioctx(ictx->md_ctx, "child image", it.first, {}, &ioctx);
+ if (r == -ENOENT) {
+ continue;
+ } else if (r < 0) {
+ return r;
+ }
+
+ std::set<std::string> image_ids;
+ r = cls_client::get_children(&ioctx, RBD_CHILDREN, parent_spec,
+ image_ids);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "error reading list of children from pool " << it.second
+ << dendl;
+ return r;
+ }
+
+ for (auto& image_id : image_ids) {
+ images->push_back({
+ it.first, "", ictx->md_ctx.get_namespace(), image_id, "", false});
+ r = list_descendants(ioctx, image_id, child_max_level, images);
+ if (r < 0) {
+ return r;
+ }
+ }
+ }
+
+ // retrieve clone v2 children attached to this snapshot
+ IoCtx parent_io_ctx;
+ r = util::create_ioctx(ictx->md_ctx, "parent image", parent_spec.pool_id,
+ parent_spec.pool_namespace, &parent_io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ cls::rbd::ChildImageSpecs child_images;
+ r = cls_client::children_list(&parent_io_ctx,
+ util::header_name(parent_spec.image_id),
+ parent_spec.snap_id, &child_images);
+ if (r < 0 && r != -ENOENT && r != -EOPNOTSUPP) {
+ lderr(cct) << "error retrieving children: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ for (auto& child_image : child_images) {
+ images->push_back({
+ child_image.pool_id, "", child_image.pool_namespace,
+ child_image.image_id, "", false});
+ if (!child_max_level || *child_max_level > 0) {
+ IoCtx ioctx;
+ r = util::create_ioctx(ictx->md_ctx, "child image", child_image.pool_id,
+ child_image.pool_namespace, &ioctx);
+ if (r == -ENOENT) {
+ continue;
+ } else if (r < 0) {
+ return r;
+ }
+ r = list_descendants(ioctx, child_image.image_id, child_max_level,
+ images);
+ if (r < 0) {
+ return r;
+ }
+ }
+ }
+
+ // batch lookups by pool + namespace
+ std::sort(images->begin(), images->end(), compare_by_pool);
+
+ int64_t child_pool_id = -1;
+ librados::IoCtx child_io_ctx;
+ std::map<std::string, std::pair<std::string, bool>> child_image_id_to_info;
+ for (auto& image : *images) {
+ if (child_pool_id == -1 || child_pool_id != image.pool_id ||
+ child_io_ctx.get_namespace() != image.pool_namespace) {
+ r = util::create_ioctx(ictx->md_ctx, "child image", image.pool_id,
+ image.pool_namespace, &child_io_ctx);
+ if (r == -ENOENT) {
+ image.pool_name = "";
+ image.image_name = "";
+ continue;
+ } else if (r < 0) {
+ return r;
+ }
+ child_pool_id = image.pool_id;
+
+ child_image_id_to_info.clear();
+
+ std::map<std::string, std::string> image_names_to_ids;
+ r = list_images_v2(child_io_ctx, &image_names_to_ids);
+ if (r < 0) {
+ lderr(cct) << "error listing v2 images: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ for (auto& [name, id] : image_names_to_ids) {
+ child_image_id_to_info.insert({id, {name, false}});
+ }
+
+ std::vector<librbd::trash_image_info_t> trash_images;
+ r = Trash<I>::list(child_io_ctx, trash_images, false);
+ if (r < 0 && r != -EOPNOTSUPP) {
+ lderr(cct) << "error listing trash images: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ for (auto& it : trash_images) {
+ child_image_id_to_info.insert({
+ it.id,
+ {it.name,
+ it.source == RBD_TRASH_IMAGE_SOURCE_REMOVING ? false : true}});
+ }
+ }
+
+ auto it = child_image_id_to_info.find(image.image_id);
+ if (it == child_image_id_to_info.end()) {
+ lderr(cct) << "error looking up name for image id "
+ << image.image_id << " in pool "
+ << child_io_ctx.get_pool_name()
+ << (image.pool_namespace.empty() ?
+ "" : "/" + image.pool_namespace) << dendl;
+ return -ENOENT;
+ }
+
+ image.pool_name = child_io_ctx.get_pool_name();
+ image.image_name = it->second.first;
+ image.trash = it->second.second;
+ }
+
+ // final sort by pool + image names
+ std::sort(images->begin(), images->end(), compare);
+ return 0;
+}
+
+template <typename I>
+int Image<I>::deep_copy(I *src, librados::IoCtx& dest_md_ctx,
+ const char *destname, ImageOptions& opts,
+ ProgressContext &prog_ctx) {
+ CephContext *cct = (CephContext *)dest_md_ctx.cct();
+ ldout(cct, 20) << src->name
+ << (src->snap_name.length() ? "@" + src->snap_name : "")
+ << " -> " << destname << " opts = " << opts << dendl;
+
+ uint64_t features;
+ uint64_t src_size;
+ {
+ RWLock::RLocker snap_locker(src->snap_lock);
+
+ if (!src->migration_info.empty()) {
+ lderr(cct) << "cannot deep copy migrating image" << dendl;
+ return -EBUSY;
+ }
+
+ features = src->features;
+ src_size = src->get_image_size(src->snap_id);
+ }
+ uint64_t format = 2;
+ if (opts.get(RBD_IMAGE_OPTION_FORMAT, &format) != 0) {
+ opts.set(RBD_IMAGE_OPTION_FORMAT, format);
+ }
+ if (format == 1) {
+ lderr(cct) << "old format not supported for destination image" << dendl;
+ return -EINVAL;
+ }
+ uint64_t stripe_unit = src->stripe_unit;
+ if (opts.get(RBD_IMAGE_OPTION_STRIPE_UNIT, &stripe_unit) != 0) {
+ opts.set(RBD_IMAGE_OPTION_STRIPE_UNIT, stripe_unit);
+ }
+ uint64_t stripe_count = src->stripe_count;
+ if (opts.get(RBD_IMAGE_OPTION_STRIPE_COUNT, &stripe_count) != 0) {
+ opts.set(RBD_IMAGE_OPTION_STRIPE_COUNT, stripe_count);
+ }
+ uint64_t order = src->order;
+ if (opts.get(RBD_IMAGE_OPTION_ORDER, &order) != 0) {
+ opts.set(RBD_IMAGE_OPTION_ORDER, order);
+ }
+ if (opts.get(RBD_IMAGE_OPTION_FEATURES, &features) != 0) {
+ opts.set(RBD_IMAGE_OPTION_FEATURES, features);
+ }
+ if (features & ~RBD_FEATURES_ALL) {
+ lderr(cct) << "librbd does not support requested features" << dendl;
+ return -ENOSYS;
+ }
+
+ uint64_t flatten = 0;
+ if (opts.get(RBD_IMAGE_OPTION_FLATTEN, &flatten) == 0) {
+ opts.unset(RBD_IMAGE_OPTION_FLATTEN);
+ }
+
+ cls::rbd::ParentImageSpec parent_spec;
+ if (flatten > 0) {
+ parent_spec.pool_id = -1;
+ } else {
+ RWLock::RLocker snap_locker(src->snap_lock);
+ RWLock::RLocker parent_locker(src->parent_lock);
+
+ // use oldest snapshot or HEAD for parent spec
+ if (!src->snap_info.empty()) {
+ parent_spec = src->snap_info.begin()->second.parent.spec;
+ } else {
+ parent_spec = src->parent_md.spec;
+ }
+ }
+
+ int r;
+ if (parent_spec.pool_id == -1) {
+ r = create(dest_md_ctx, destname, "", src_size, opts, "", "", false);
+ } else {
+ librados::IoCtx parent_io_ctx;
+ r = util::create_ioctx(src->md_ctx, "parent image", parent_spec.pool_id,
+ parent_spec.pool_namespace, &parent_io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ ConfigProxy config{cct->_conf};
+ api::Config<I>::apply_pool_overrides(dest_md_ctx, &config);
+
+ C_SaferCond ctx;
+ std::string dest_id = util::generate_image_id(dest_md_ctx);
+ auto *req = image::CloneRequest<I>::create(
+ config, parent_io_ctx, parent_spec.image_id, "", parent_spec.snap_id,
+ dest_md_ctx, destname, dest_id, opts, "", "", src->op_work_queue, &ctx);
+ req->send();
+ r = ctx.wait();
+ }
+ if (r < 0) {
+ lderr(cct) << "header creation failed" << dendl;
+ return r;
+ }
+ opts.set(RBD_IMAGE_OPTION_ORDER, static_cast<uint64_t>(order));
+
+ auto dest = new I(destname, "", nullptr, dest_md_ctx, false);
+ r = dest->state->open(0);
+ if (r < 0) {
+ lderr(cct) << "failed to read newly created header" << dendl;
+ return r;
+ }
+
+ C_SaferCond lock_ctx;
+ {
+ RWLock::WLocker locker(dest->owner_lock);
+
+ if (dest->exclusive_lock == nullptr ||
+ dest->exclusive_lock->is_lock_owner()) {
+ lock_ctx.complete(0);
+ } else {
+ dest->exclusive_lock->acquire_lock(&lock_ctx);
+ }
+ }
+
+ r = lock_ctx.wait();
+ if (r < 0) {
+ lderr(cct) << "failed to request exclusive lock: " << cpp_strerror(r)
+ << dendl;
+ dest->state->close();
+ return r;
+ }
+
+ r = deep_copy(src, dest, flatten > 0, prog_ctx);
+
+ int close_r = dest->state->close();
+ if (r == 0 && close_r < 0) {
+ r = close_r;
+ }
+ return r;
+}
+
+template <typename I>
+int Image<I>::deep_copy(I *src, I *dest, bool flatten,
+ ProgressContext &prog_ctx) {
+ CephContext *cct = src->cct;
+ librados::snap_t snap_id_start = 0;
+ librados::snap_t snap_id_end;
+ {
+ RWLock::RLocker snap_locker(src->snap_lock);
+ snap_id_end = src->snap_id;
+ }
+
+ ThreadPool *thread_pool;
+ ContextWQ *op_work_queue;
+ ImageCtx::get_thread_pool_instance(cct, &thread_pool, &op_work_queue);
+
+ C_SaferCond cond;
+ SnapSeqs snap_seqs;
+ auto req = DeepCopyRequest<I>::create(src, dest, snap_id_start, snap_id_end,
+ 0U, flatten, boost::none, op_work_queue,
+ &snap_seqs, &prog_ctx, &cond);
+ req->send();
+ int r = cond.wait();
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Image<I>::snap_set(I *ictx,
+ const cls::rbd::SnapshotNamespace &snap_namespace,
+ const char *snap_name) {
+ ldout(ictx->cct, 20) << "snap_set " << ictx << " snap = "
+ << (snap_name ? snap_name : "NULL") << dendl;
+
+ // ignore return value, since we may be set to a non-existent
+ // snapshot and the user is trying to fix that
+ ictx->state->refresh_if_required();
+
+ uint64_t snap_id = CEPH_NOSNAP;
+ std::string name(snap_name == nullptr ? "" : snap_name);
+ if (!name.empty()) {
+ RWLock::RLocker snap_locker(ictx->snap_lock);
+ snap_id = ictx->get_snap_id(cls::rbd::UserSnapshotNamespace{},
+ snap_name);
+ if (snap_id == CEPH_NOSNAP) {
+ return -ENOENT;
+ }
+ }
+
+ return snap_set(ictx, snap_id);
+}
+
+template <typename I>
+int Image<I>::snap_set(I *ictx, uint64_t snap_id) {
+ ldout(ictx->cct, 20) << "snap_set " << ictx << " "
+ << "snap_id=" << snap_id << dendl;
+
+ // ignore return value, since we may be set to a non-existent
+ // snapshot and the user is trying to fix that
+ ictx->state->refresh_if_required();
+
+ C_SaferCond ctx;
+ ictx->state->snap_set(snap_id, &ctx);
+ int r = ctx.wait();
+ if (r < 0) {
+ if (r != -ENOENT) {
+ lderr(ictx->cct) << "failed to " << (snap_id == CEPH_NOSNAP ? "un" : "")
+ << "set snapshot: " << cpp_strerror(r) << dendl;
+ }
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Image<I>::remove(IoCtx& io_ctx, const std::string &image_name,
+ ProgressContext& prog_ctx)
+{
+ CephContext *cct((CephContext *)io_ctx.cct());
+ ldout(cct, 20) << "name=" << image_name << dendl;
+
+ // look up the V2 image id based on the image name
+ std::string image_id;
+ int r = cls_client::dir_get_id(&io_ctx, RBD_DIRECTORY, image_name,
+ &image_id);
+ if (r == -ENOENT) {
+ // check if it already exists in trash from an aborted trash remove attempt
+ std::vector<trash_image_info_t> trash_entries;
+ r = Trash<I>::list(io_ctx, trash_entries, false);
+ if (r < 0) {
+ return r;
+ } else if (r >= 0) {
+ for (auto& entry : trash_entries) {
+ if (entry.name == image_name &&
+ entry.source == RBD_TRASH_IMAGE_SOURCE_REMOVING) {
+ return Trash<I>::remove(io_ctx, entry.id, true, prog_ctx);
+ }
+ }
+ }
+
+ // fall-through if we failed to locate the image in the V2 directory and
+ // trash
+ } else if (r < 0) {
+ lderr(cct) << "failed to retrieve image id: " << cpp_strerror(r) << dendl;
+ return r;
+ } else {
+ // attempt to move the image to the trash (and optionally immediately
+ // delete the image)
+ ConfigProxy config(cct->_conf);
+ Config<I>::apply_pool_overrides(io_ctx, &config);
+
+ rbd_trash_image_source_t trash_image_source =
+ RBD_TRASH_IMAGE_SOURCE_REMOVING;
+ uint64_t expire_seconds = 0;
+ if (config.get_val<bool>("rbd_move_to_trash_on_remove")) {
+ // keep the image in the trash upon remove requests
+ trash_image_source = RBD_TRASH_IMAGE_SOURCE_USER;
+ expire_seconds = config.get_val<uint64_t>(
+ "rbd_move_to_trash_on_remove_expire_seconds");
+ } else {
+ // attempt to pre-validate the removal before moving to trash and
+ // removing
+ r = pre_remove_image<I>(io_ctx, image_id);
+ if (r < 0 && r != -ENOENT) {
+ return r;
+ }
+ }
+
+ r = Trash<I>::move(io_ctx, trash_image_source, image_name, image_id,
+ expire_seconds);
+ if (r >= 0) {
+ if (trash_image_source == RBD_TRASH_IMAGE_SOURCE_REMOVING) {
+ // proceed with attempting to immediately remove the image
+ r = Trash<I>::remove(io_ctx, image_id, true, prog_ctx);
+
+ if (r == -ENOTEMPTY || r == -EBUSY || r == -EMLINK) {
+ // best-effort try to restore the image if the removal
+ // failed for possible expected reasons
+ Trash<I>::restore(io_ctx, {cls::rbd::TRASH_IMAGE_SOURCE_REMOVING},
+ image_id, image_name);
+ }
+ }
+ return r;
+ } else if (r < 0 && r != -EOPNOTSUPP) {
+ return r;
+ }
+
+ // fall-through if trash isn't supported
+ }
+
+ ThreadPool *thread_pool;
+ ContextWQ *op_work_queue;
+ ImageCtx::get_thread_pool_instance(cct, &thread_pool, &op_work_queue);
+
+ // might be a V1 image format that cannot be moved to the trash
+ // and would not have been listed in the V2 directory -- or the OSDs
+ // are too old and don't support the trash feature
+ C_SaferCond cond;
+ auto req = librbd::image::RemoveRequest<I>::create(
+ io_ctx, image_name, "", false, false, prog_ctx, op_work_queue, &cond);
+ req->send();
+
+ return cond.wait();
+}
+
+} // namespace api
+} // namespace librbd
+
+template class librbd::api::Image<librbd::ImageCtx>;
diff --git a/src/librbd/api/Image.h b/src/librbd/api/Image.h
new file mode 100644
index 00000000..af928c84
--- /dev/null
+++ b/src/librbd/api/Image.h
@@ -0,0 +1,78 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef LIBRBD_API_IMAGE_H
+#define LIBRBD_API_IMAGE_H
+
+#include "include/rbd/librbd.hpp"
+#include "include/rados/librados_fwd.hpp"
+#include "librbd/Types.h"
+#include <map>
+#include <set>
+#include <string>
+
+namespace librbd {
+
+class ImageOptions;
+class ProgressContext;
+
+struct ImageCtx;
+
+namespace api {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+struct Image {
+ typedef std::map<std::string, std::string> ImageNameToIds;
+
+ static int64_t get_data_pool_id(ImageCtxT *ictx);
+
+ static int get_op_features(ImageCtxT *ictx, uint64_t *op_features);
+
+ static int list_images(librados::IoCtx& io_ctx,
+ std::vector<image_spec_t> *images);
+ static int list_images_v2(librados::IoCtx& io_ctx,
+ ImageNameToIds *images);
+
+ static int get_parent(ImageCtxT *ictx,
+ librbd::linked_image_spec_t *parent_image,
+ librbd::snap_spec_t *parent_snap);
+
+ static int list_children(ImageCtxT *ictx,
+ std::vector<librbd::linked_image_spec_t> *images);
+ static int list_children(ImageCtxT *ictx,
+ const cls::rbd::ParentImageSpec &parent_spec,
+ std::vector<librbd::linked_image_spec_t> *images);
+
+ static int list_descendants(IoCtx& io_ctx, const std::string &image_id,
+ const std::optional<size_t> &max_level,
+ std::vector<librbd::linked_image_spec_t> *images);
+ static int list_descendants(ImageCtxT *ictx,
+ const std::optional<size_t> &max_level,
+ std::vector<librbd::linked_image_spec_t> *images);
+ static int list_descendants(ImageCtxT *ictx,
+ const cls::rbd::ParentImageSpec &parent_spec,
+ const std::optional<size_t> &max_level,
+ std::vector<librbd::linked_image_spec_t> *images);
+
+ static int deep_copy(ImageCtxT *ictx, librados::IoCtx& dest_md_ctx,
+ const char *destname, ImageOptions& opts,
+ ProgressContext &prog_ctx);
+ static int deep_copy(ImageCtxT *src, ImageCtxT *dest, bool flatten,
+ ProgressContext &prog_ctx);
+
+ static int snap_set(ImageCtxT *ictx,
+ const cls::rbd::SnapshotNamespace &snap_namespace,
+ const char *snap_name);
+ static int snap_set(ImageCtxT *ictx, uint64_t snap_id);
+
+ static int remove(librados::IoCtx& io_ctx, const std::string &image_name,
+ ProgressContext& prog_ctx);
+
+};
+
+} // namespace api
+} // namespace librbd
+
+extern template class librbd::api::Image<librbd::ImageCtx>;
+
+#endif // LIBRBD_API_IMAGE_H
diff --git a/src/librbd/api/Migration.cc b/src/librbd/api/Migration.cc
new file mode 100644
index 00000000..ae09d407
--- /dev/null
+++ b/src/librbd/api/Migration.cc
@@ -0,0 +1,1885 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/api/Migration.h"
+#include "include/rados/librados.hpp"
+#include "include/stringify.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "librbd/ExclusiveLock.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/Operations.h"
+#include "librbd/Utils.h"
+#include "librbd/api/Config.h"
+#include "librbd/api/Group.h"
+#include "librbd/api/Image.h"
+#include "librbd/api/Snapshot.h"
+#include "librbd/api/Trash.h"
+#include "librbd/deep_copy/ImageCopyRequest.h"
+#include "librbd/deep_copy/MetadataCopyRequest.h"
+#include "librbd/deep_copy/SnapshotCopyRequest.h"
+#include "librbd/exclusive_lock/Policy.h"
+#include "librbd/image/AttachChildRequest.h"
+#include "librbd/image/AttachParentRequest.h"
+#include "librbd/image/CloneRequest.h"
+#include "librbd/image/CreateRequest.h"
+#include "librbd/image/DetachChildRequest.h"
+#include "librbd/image/DetachParentRequest.h"
+#include "librbd/image/ListWatchersRequest.h"
+#include "librbd/image/RemoveRequest.h"
+#include "librbd/internal.h"
+#include "librbd/io/ImageRequestWQ.h"
+#include "librbd/mirror/DisableRequest.h"
+#include "librbd/mirror/EnableRequest.h"
+
+#include <boost/scope_exit.hpp>
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::Migration: " << __func__ << ": "
+
+namespace librbd {
+
+inline bool operator==(const linked_image_spec_t& rhs,
+ const linked_image_spec_t& lhs) {
+ bool result = (rhs.pool_id == lhs.pool_id &&
+ rhs.pool_namespace == lhs.pool_namespace &&
+ rhs.image_id == lhs.image_id);
+ return result;
+}
+
+namespace api {
+
+using util::create_rados_callback;
+
+namespace {
+
+class MigrationProgressContext : public ProgressContext {
+public:
+ MigrationProgressContext(librados::IoCtx& io_ctx,
+ const std::string &header_oid,
+ cls::rbd::MigrationState state,
+ ProgressContext *prog_ctx)
+ : m_io_ctx(io_ctx), m_header_oid(header_oid), m_state(state),
+ m_prog_ctx(prog_ctx), m_cct(reinterpret_cast<CephContext*>(io_ctx.cct())),
+ m_lock(util::unique_lock_name("librbd::api::MigrationProgressContext",
+ this)) {
+ ceph_assert(m_prog_ctx != nullptr);
+ }
+
+ ~MigrationProgressContext() {
+ wait_for_in_flight_updates();
+ }
+
+ int update_progress(uint64_t offset, uint64_t total) override {
+ ldout(m_cct, 20) << "offset=" << offset << ", total=" << total << dendl;
+
+ m_prog_ctx->update_progress(offset, total);
+
+ std::string description = stringify(offset * 100 / total) + "% complete";
+
+ send_state_description_update(description);
+
+ return 0;
+ }
+
+private:
+ librados::IoCtx& m_io_ctx;
+ std::string m_header_oid;
+ cls::rbd::MigrationState m_state;
+ ProgressContext *m_prog_ctx;
+
+ CephContext* m_cct;
+ mutable Mutex m_lock;
+ Cond m_cond;
+ std::string m_state_description;
+ bool m_pending_update = false;
+ int m_in_flight_state_updates = 0;
+
+ void send_state_description_update(const std::string &description) {
+ Mutex::Locker locker(m_lock);
+
+ if (description == m_state_description) {
+ return;
+ }
+
+ m_state_description = description;
+
+ if (m_in_flight_state_updates > 0) {
+ m_pending_update = true;
+ return;
+ }
+
+ set_state_description();
+ }
+
+ void set_state_description() {
+ ldout(m_cct, 20) << "state_description=" << m_state_description << dendl;
+
+ ceph_assert(m_lock.is_locked());
+
+ librados::ObjectWriteOperation op;
+ cls_client::migration_set_state(&op, m_state, m_state_description);
+
+ using klass = MigrationProgressContext;
+ librados::AioCompletion *comp =
+ create_rados_callback<klass, &klass::handle_set_state_description>(this);
+ int r = m_io_ctx.aio_operate(m_header_oid, comp, &op);
+ ceph_assert(r == 0);
+ comp->release();
+
+ m_in_flight_state_updates++;
+ }
+
+ void handle_set_state_description(int r) {
+ ldout(m_cct, 20) << "r=" << r << dendl;
+
+ Mutex::Locker locker(m_lock);
+
+ m_in_flight_state_updates--;
+
+ if (r < 0) {
+ lderr(m_cct) << "failed to update migration state: " << cpp_strerror(r)
+ << dendl;
+ } else if (m_pending_update) {
+ set_state_description();
+ m_pending_update = false;
+ } else {
+ m_cond.Signal();
+ }
+ }
+
+ void wait_for_in_flight_updates() {
+ Mutex::Locker locker(m_lock);
+
+ ldout(m_cct, 20) << "m_in_flight_state_updates="
+ << m_in_flight_state_updates << dendl;
+
+ m_pending_update = false;
+ while (m_in_flight_state_updates > 0) {
+ m_cond.Wait(m_lock);
+ }
+ }
+};
+
+int trash_search(librados::IoCtx &io_ctx, rbd_trash_image_source_t source,
+ const std::string &image_name, std::string *image_id) {
+ std::vector<trash_image_info_t> entries;
+
+ int r = Trash<>::list(io_ctx, entries, false);
+ if (r < 0) {
+ return r;
+ }
+
+ for (auto &entry : entries) {
+ if (entry.source == source && entry.name == image_name) {
+ *image_id = entry.id;
+ return 0;
+ }
+ }
+
+ return -ENOENT;
+}
+
+template <typename I>
+int open_source_image(librados::IoCtx& io_ctx, const std::string &image_name,
+ I **src_image_ctx, librados::IoCtx *dst_io_ctx,
+ std::string *dst_image_name, std::string *dst_image_id,
+ bool *flatten, bool *mirroring,
+ cls::rbd::MigrationState *state,
+ std::string *state_description) {
+ CephContext* cct = reinterpret_cast<CephContext *>(io_ctx.cct());
+
+ librados::IoCtx src_io_ctx;
+ std::string src_image_name;
+ std::string src_image_id;
+ cls::rbd::MigrationSpec migration_spec;
+ I *image_ctx = I::create(image_name, "", nullptr, io_ctx, false);
+
+ ldout(cct, 10) << "trying to open image by name " << io_ctx.get_pool_name()
+ << "/" << image_name << dendl;
+
+ int r = image_ctx->state->open(OPEN_FLAG_IGNORE_MIGRATING);
+ if (r < 0) {
+ if (r != -ENOENT) {
+ lderr(cct) << "failed to open image: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+ image_ctx = nullptr;
+ }
+
+ BOOST_SCOPE_EXIT_TPL(&r, &image_ctx) {
+ if (r != 0 && image_ctx != nullptr) {
+ image_ctx->state->close();
+ }
+ } BOOST_SCOPE_EXIT_END;
+
+ if (r == 0) {
+ // The opened image is either a source (then just proceed) or a
+ // destination (then look for the source image id in the migration
+ // header).
+
+ r = cls_client::migration_get(&image_ctx->md_ctx, image_ctx->header_oid,
+ &migration_spec);
+
+ if (r < 0) {
+ lderr(cct) << "failed retrieving migration header: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ ldout(cct, 10) << "migration spec: " << migration_spec << dendl;
+
+ if (migration_spec.header_type != cls::rbd::MIGRATION_HEADER_TYPE_SRC &&
+ migration_spec.header_type != cls::rbd::MIGRATION_HEADER_TYPE_DST) {
+ lderr(cct) << "unexpected migration header type: "
+ << migration_spec.header_type << dendl;
+ r = -EINVAL;
+ return r;
+ }
+
+ if (migration_spec.header_type == cls::rbd::MIGRATION_HEADER_TYPE_DST) {
+ ldout(cct, 10) << "the destination image is opened" << dendl;
+
+ // Close and look for the source image.
+ r = image_ctx->state->close();
+ image_ctx = nullptr;
+ if (r < 0) {
+ lderr(cct) << "failed closing image: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ r = util::create_ioctx(io_ctx, "source image", migration_spec.pool_id,
+ migration_spec.pool_namespace, &src_io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ src_image_name = migration_spec.image_name;
+ src_image_id = migration_spec.image_id;
+ } else {
+ ldout(cct, 10) << "the source image is opened" << dendl;
+ }
+ } else {
+ assert (r == -ENOENT);
+
+ ldout(cct, 10) << "source image is not found. Trying trash" << dendl;
+
+ r = trash_search(io_ctx, RBD_TRASH_IMAGE_SOURCE_MIGRATION, image_name,
+ &src_image_id);
+ if (r < 0) {
+ lderr(cct) << "failed to determine image id: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ ldout(cct, 10) << "source image id from trash: " << src_image_id << dendl;
+
+ src_io_ctx.dup(io_ctx);
+ }
+
+ if (image_ctx == nullptr) {
+ int flags = OPEN_FLAG_IGNORE_MIGRATING;
+
+ if (src_image_id.empty()) {
+ ldout(cct, 20) << "trying to open v1 image by name "
+ << src_io_ctx.get_pool_name() << "/" << src_image_name
+ << dendl;
+
+ flags |= OPEN_FLAG_OLD_FORMAT;
+ } else {
+ ldout(cct, 20) << "trying to open v2 image by id "
+ << src_io_ctx.get_pool_name() << "/" << src_image_id
+ << dendl;
+ }
+
+ image_ctx = I::create(src_image_name, src_image_id, nullptr, src_io_ctx,
+ false);
+ r = image_ctx->state->open(flags);
+ if (r < 0) {
+ lderr(cct) << "failed to open source image " << src_io_ctx.get_pool_name()
+ << "/" << (src_image_id.empty() ? src_image_name : src_image_id)
+ << ": " << cpp_strerror(r) << dendl;
+ image_ctx = nullptr;
+ return r;
+ }
+
+ r = cls_client::migration_get(&image_ctx->md_ctx, image_ctx->header_oid,
+ &migration_spec);
+ if (r < 0) {
+ lderr(cct) << "failed retrieving migration header: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ ldout(cct, 20) << "migration spec: " << migration_spec << dendl;
+ }
+
+ r = util::create_ioctx(image_ctx->md_ctx, "source image",
+ migration_spec.pool_id, migration_spec.pool_namespace,
+ dst_io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ *src_image_ctx = image_ctx;
+ *dst_image_name = migration_spec.image_name;
+ *dst_image_id = migration_spec.image_id;
+ *flatten = migration_spec.flatten;
+ *mirroring = migration_spec.mirroring;
+ *state = migration_spec.state;
+ *state_description = migration_spec.state_description;
+
+ return 0;
+}
+
+class SteppedProgressContext : public ProgressContext {
+public:
+ SteppedProgressContext(ProgressContext* progress_ctx, size_t total_steps)
+ : m_progress_ctx(progress_ctx), m_total_steps(total_steps) {
+ }
+
+ void next_step() {
+ ceph_assert(m_current_step < m_total_steps);
+ ++m_current_step;
+ }
+
+ int update_progress(uint64_t object_number,
+ uint64_t object_count) override {
+ return m_progress_ctx->update_progress(
+ object_number + (object_count * (m_current_step - 1)),
+ object_count * m_total_steps);
+ }
+
+private:
+ ProgressContext* m_progress_ctx;
+ size_t m_total_steps;
+ size_t m_current_step = 1;
+};
+
+} // anonymous namespace
+
+template <typename I>
+int Migration<I>::prepare(librados::IoCtx& io_ctx,
+ const std::string &image_name,
+ librados::IoCtx& dest_io_ctx,
+ const std::string &dest_image_name_,
+ ImageOptions& opts) {
+ CephContext* cct = reinterpret_cast<CephContext *>(io_ctx.cct());
+
+ std::string dest_image_name = dest_image_name_.empty() ? image_name :
+ dest_image_name_;
+
+ ldout(cct, 10) << io_ctx.get_pool_name() << "/" << image_name << " -> "
+ << dest_io_ctx.get_pool_name() << "/" << dest_image_name
+ << ", opts=" << opts << dendl;
+
+ auto image_ctx = I::create(image_name, "", nullptr, io_ctx, false);
+ int r = image_ctx->state->open(0);
+ if (r < 0) {
+ lderr(cct) << "failed to open image: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+ BOOST_SCOPE_EXIT_TPL(image_ctx) {
+ image_ctx->state->close();
+ } BOOST_SCOPE_EXIT_END;
+
+ std::list<obj_watch_t> watchers;
+ int flags = librbd::image::LIST_WATCHERS_FILTER_OUT_MY_INSTANCE |
+ librbd::image::LIST_WATCHERS_FILTER_OUT_MIRROR_INSTANCES;
+ C_SaferCond on_list_watchers;
+ auto list_watchers_request = librbd::image::ListWatchersRequest<I>::create(
+ *image_ctx, flags, &watchers, &on_list_watchers);
+ list_watchers_request->send();
+ r = on_list_watchers.wait();
+ if (r < 0) {
+ lderr(cct) << "failed listing watchers:" << cpp_strerror(r) << dendl;
+ return r;
+ }
+ if (!watchers.empty()) {
+ lderr(cct) << "image has watchers - not migrating" << dendl;
+ return -EBUSY;
+ }
+
+ uint64_t format = 2;
+ if (opts.get(RBD_IMAGE_OPTION_FORMAT, &format) != 0) {
+ opts.set(RBD_IMAGE_OPTION_FORMAT, format);
+ }
+ if (format != 2) {
+ lderr(cct) << "unsupported destination image format: " << format << dendl;
+ return -EINVAL;
+ }
+
+ uint64_t features;
+ {
+ RWLock::RLocker snap_locker(image_ctx->snap_lock);
+ features = image_ctx->features;
+ }
+ opts.get(RBD_IMAGE_OPTION_FEATURES, &features);
+ if ((features & ~RBD_FEATURES_ALL) != 0) {
+ lderr(cct) << "librbd does not support requested features" << dendl;
+ return -ENOSYS;
+ }
+ features &= ~RBD_FEATURES_INTERNAL;
+ opts.set(RBD_IMAGE_OPTION_FEATURES, features);
+
+ uint64_t order = image_ctx->order;
+ if (opts.get(RBD_IMAGE_OPTION_ORDER, &order) != 0) {
+ opts.set(RBD_IMAGE_OPTION_ORDER, order);
+ }
+ r = image::CreateRequest<I>::validate_order(cct, order);
+ if (r < 0) {
+ return r;
+ }
+
+ uint64_t stripe_unit = image_ctx->stripe_unit;
+ if (opts.get(RBD_IMAGE_OPTION_STRIPE_UNIT, &stripe_unit) != 0) {
+ opts.set(RBD_IMAGE_OPTION_STRIPE_UNIT, stripe_unit);
+ }
+ uint64_t stripe_count = image_ctx->stripe_count;
+ if (opts.get(RBD_IMAGE_OPTION_STRIPE_COUNT, &stripe_count) != 0) {
+ opts.set(RBD_IMAGE_OPTION_STRIPE_COUNT, stripe_count);
+ }
+
+ uint64_t flatten = 0;
+ if (opts.get(RBD_IMAGE_OPTION_FLATTEN, &flatten) == 0) {
+ opts.unset(RBD_IMAGE_OPTION_FLATTEN);
+ }
+
+ ldout(cct, 20) << "updated opts=" << opts << dendl;
+
+ Migration migration(image_ctx, dest_io_ctx, dest_image_name, "", opts, flatten > 0,
+ false, cls::rbd::MIGRATION_STATE_PREPARING, "", nullptr);
+ r = migration.prepare();
+
+ return r;
+}
+
+template <typename I>
+int Migration<I>::execute(librados::IoCtx& io_ctx,
+ const std::string &image_name,
+ ProgressContext &prog_ctx) {
+ CephContext* cct = reinterpret_cast<CephContext *>(io_ctx.cct());
+
+ ldout(cct, 10) << io_ctx.get_pool_name() << "/" << image_name << dendl;
+
+ I *image_ctx;
+ librados::IoCtx dest_io_ctx;
+ std::string dest_image_name;
+ std::string dest_image_id;
+ bool flatten;
+ bool mirroring;
+ cls::rbd::MigrationState state;
+ std::string state_description;
+
+ int r = open_source_image(io_ctx, image_name, &image_ctx, &dest_io_ctx,
+ &dest_image_name, &dest_image_id, &flatten,
+ &mirroring, &state, &state_description);
+ if (r < 0) {
+ return r;
+ }
+
+ BOOST_SCOPE_EXIT_TPL(image_ctx) {
+ image_ctx->state->close();
+ } BOOST_SCOPE_EXIT_END;
+
+ if (state != cls::rbd::MIGRATION_STATE_PREPARED) {
+ lderr(cct) << "current migration state is '" << state << "'"
+ << " (should be 'prepared')" << dendl;
+ return -EINVAL;
+ }
+
+ ldout(cct, 5) << "migrating " << image_ctx->md_ctx.get_pool_name() << "/"
+ << image_ctx->name << " -> " << dest_io_ctx.get_pool_name()
+ << "/" << dest_image_name << dendl;
+
+ ImageOptions opts;
+ Migration migration(image_ctx, dest_io_ctx, dest_image_name, dest_image_id,
+ opts, flatten, mirroring, state, state_description,
+ &prog_ctx);
+ r = migration.execute();
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Migration<I>::abort(librados::IoCtx& io_ctx, const std::string &image_name,
+ ProgressContext &prog_ctx) {
+ CephContext* cct = reinterpret_cast<CephContext *>(io_ctx.cct());
+
+ ldout(cct, 10) << io_ctx.get_pool_name() << "/" << image_name << dendl;
+
+ I *image_ctx;
+ librados::IoCtx dest_io_ctx;
+ std::string dest_image_name;
+ std::string dest_image_id;
+ bool flatten;
+ bool mirroring;
+ cls::rbd::MigrationState state;
+ std::string state_description;
+
+ int r = open_source_image(io_ctx, image_name, &image_ctx, &dest_io_ctx,
+ &dest_image_name, &dest_image_id, &flatten,
+ &mirroring, &state, &state_description);
+ if (r < 0) {
+ return r;
+ }
+
+ ldout(cct, 5) << "canceling incomplete migration "
+ << image_ctx->md_ctx.get_pool_name() << "/" << image_ctx->name
+ << " -> " << dest_io_ctx.get_pool_name() << "/" << dest_image_name
+ << dendl;
+
+ ImageOptions opts;
+ Migration migration(image_ctx, dest_io_ctx, dest_image_name, dest_image_id,
+ opts, flatten, mirroring, state, state_description,
+ &prog_ctx);
+ r = migration.abort();
+
+ image_ctx->state->close();
+
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Migration<I>::commit(librados::IoCtx& io_ctx,
+ const std::string &image_name,
+ ProgressContext &prog_ctx) {
+ CephContext* cct = reinterpret_cast<CephContext *>(io_ctx.cct());
+
+ ldout(cct, 10) << io_ctx.get_pool_name() << "/" << image_name << dendl;
+
+ I *image_ctx;
+ librados::IoCtx dest_io_ctx;
+ std::string dest_image_name;
+ std::string dest_image_id;
+ bool flatten;
+ bool mirroring;
+ cls::rbd::MigrationState state;
+ std::string state_description;
+
+ int r = open_source_image(io_ctx, image_name, &image_ctx, &dest_io_ctx,
+ &dest_image_name, &dest_image_id, &flatten,
+ &mirroring, &state, &state_description);
+ if (r < 0) {
+ return r;
+ }
+
+ if (state != cls::rbd::MIGRATION_STATE_EXECUTED) {
+ lderr(cct) << "current migration state is '" << state << "'"
+ << " (should be 'executed')" << dendl;
+ image_ctx->state->close();
+ return -EINVAL;
+ }
+
+ ldout(cct, 5) << "migrating " << image_ctx->md_ctx.get_pool_name() << "/"
+ << image_ctx->name << " -> " << dest_io_ctx.get_pool_name()
+ << "/" << dest_image_name << dendl;
+
+ ImageOptions opts;
+ Migration migration(image_ctx, dest_io_ctx, dest_image_name, dest_image_id,
+ opts, flatten, mirroring, state, state_description,
+ &prog_ctx);
+ r = migration.commit();
+
+ // image_ctx is closed in commit when removing src image
+
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Migration<I>::status(librados::IoCtx& io_ctx,
+ const std::string &image_name,
+ image_migration_status_t *status) {
+ CephContext* cct = reinterpret_cast<CephContext *>(io_ctx.cct());
+
+ ldout(cct, 10) << io_ctx.get_pool_name() << "/" << image_name << dendl;
+
+ I *image_ctx;
+ librados::IoCtx dest_io_ctx;
+ std::string dest_image_name;
+ std::string dest_image_id;
+ bool flatten;
+ bool mirroring;
+ cls::rbd::MigrationState state;
+ std::string state_description;
+
+ int r = open_source_image(io_ctx, image_name, &image_ctx, &dest_io_ctx,
+ &dest_image_name, &dest_image_id, &flatten,
+ &mirroring, &state, &state_description);
+ if (r < 0) {
+ return r;
+ }
+
+ ldout(cct, 5) << "migrating " << image_ctx->md_ctx.get_pool_name() << "/"
+ << image_ctx->name << " -> " << dest_io_ctx.get_pool_name()
+ << "/" << dest_image_name << dendl;
+
+ ImageOptions opts;
+ Migration migration(image_ctx, dest_io_ctx, dest_image_name, dest_image_id,
+ opts, flatten, mirroring, state, state_description,
+ nullptr);
+ r = migration.status(status);
+
+ image_ctx->state->close();
+
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+Migration<I>::Migration(I *src_image_ctx, librados::IoCtx& dst_io_ctx,
+ const std::string &dstname,
+ const std::string &dst_image_id,
+ ImageOptions& opts, bool flatten, bool mirroring,
+ cls::rbd::MigrationState state,
+ const std::string &state_description,
+ ProgressContext *prog_ctx)
+ : m_cct(static_cast<CephContext *>(dst_io_ctx.cct())),
+ m_src_image_ctx(src_image_ctx), m_dst_io_ctx(dst_io_ctx),
+ m_src_old_format(m_src_image_ctx->old_format),
+ m_src_image_name(m_src_image_ctx->old_format ? m_src_image_ctx->name : ""),
+ m_src_image_id(m_src_image_ctx->id),
+ m_src_header_oid(m_src_image_ctx->header_oid), m_dst_image_name(dstname),
+ m_dst_image_id(dst_image_id.empty() ?
+ util::generate_image_id(m_dst_io_ctx) : dst_image_id),
+ m_dst_header_oid(util::header_name(m_dst_image_id)), m_image_options(opts),
+ m_flatten(flatten), m_mirroring(mirroring), m_prog_ctx(prog_ctx),
+ m_src_migration_spec(cls::rbd::MIGRATION_HEADER_TYPE_SRC,
+ m_dst_io_ctx.get_id(), m_dst_io_ctx.get_namespace(),
+ m_dst_image_name, m_dst_image_id, {}, 0, flatten,
+ mirroring, state, state_description),
+ m_dst_migration_spec(cls::rbd::MIGRATION_HEADER_TYPE_DST,
+ src_image_ctx->md_ctx.get_id(),
+ src_image_ctx->md_ctx.get_namespace(),
+ m_src_image_ctx->name, m_src_image_ctx->id, {}, 0,
+ flatten, mirroring, state, state_description) {
+ m_src_io_ctx.dup(src_image_ctx->md_ctx);
+}
+
+template <typename I>
+int Migration<I>::prepare() {
+ ldout(m_cct, 10) << dendl;
+
+ int r = validate_src_snaps();
+ if (r < 0) {
+ return r;
+ }
+
+ r = disable_mirroring(m_src_image_ctx, &m_mirroring);
+ if (r < 0) {
+ return r;
+ }
+
+ r = unlink_src_image();
+ if (r < 0) {
+ enable_mirroring(m_src_image_ctx, m_mirroring);
+ return r;
+ }
+
+ r = set_migration();
+ if (r < 0) {
+ relink_src_image();
+ enable_mirroring(m_src_image_ctx, m_mirroring);
+ return r;
+ }
+
+ r = create_dst_image();
+ if (r < 0) {
+ abort();
+ return r;
+ }
+
+ ldout(m_cct, 10) << "succeeded" << dendl;
+
+ return 0;
+}
+
+template <typename I>
+int Migration<I>::execute() {
+ ldout(m_cct, 10) << dendl;
+
+ auto dst_image_ctx = I::create(m_dst_image_name, m_dst_image_id, nullptr,
+ m_dst_io_ctx, false);
+ int r = dst_image_ctx->state->open(0);
+ if (r < 0) {
+ lderr(m_cct) << "failed to open destination image: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ BOOST_SCOPE_EXIT_TPL(dst_image_ctx) {
+ dst_image_ctx->state->close();
+ } BOOST_SCOPE_EXIT_END;
+
+ r = set_state(cls::rbd::MIGRATION_STATE_EXECUTING, "");
+ if (r < 0) {
+ return r;
+ }
+
+ while (true) {
+ MigrationProgressContext prog_ctx(m_src_io_ctx, m_src_header_oid,
+ cls::rbd::MIGRATION_STATE_EXECUTING,
+ m_prog_ctx);
+ r = dst_image_ctx->operations->migrate(prog_ctx);
+ if (r == -EROFS) {
+ RWLock::RLocker owner_locker(dst_image_ctx->owner_lock);
+ if (dst_image_ctx->exclusive_lock != nullptr &&
+ !dst_image_ctx->exclusive_lock->accept_ops()) {
+ ldout(m_cct, 5) << "lost exclusive lock, retrying remote" << dendl;
+ continue;
+ }
+ }
+ break;
+ }
+ if (r < 0) {
+ lderr(m_cct) << "migration failed: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ r = set_state(cls::rbd::MIGRATION_STATE_EXECUTED, "");
+ if (r < 0) {
+ return r;
+ }
+
+ dst_image_ctx->notify_update();
+
+ ldout(m_cct, 10) << "succeeded" << dendl;
+
+ return 0;
+}
+
+template <typename I>
+int Migration<I>::abort() {
+ ldout(m_cct, 10) << dendl;
+
+ int r;
+
+ m_src_image_ctx->owner_lock.get_read();
+ if (m_src_image_ctx->exclusive_lock != nullptr &&
+ !m_src_image_ctx->exclusive_lock->is_lock_owner()) {
+ C_SaferCond ctx;
+ m_src_image_ctx->exclusive_lock->acquire_lock(&ctx);
+ m_src_image_ctx->owner_lock.put_read();
+ r = ctx.wait();
+ if (r < 0) {
+ lderr(m_cct) << "error acquiring exclusive lock: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+ } else {
+ m_src_image_ctx->owner_lock.put_read();
+ }
+
+ group_info_t group_info;
+ group_info.pool = -1;
+
+ auto dst_image_ctx = I::create(m_dst_image_name, m_dst_image_id, nullptr,
+ m_dst_io_ctx, false);
+ r = dst_image_ctx->state->open(OPEN_FLAG_IGNORE_MIGRATING);
+ if (r < 0) {
+ ldout(m_cct, 1) << "failed to open destination image: " << cpp_strerror(r)
+ << dendl;
+ } else {
+ BOOST_SCOPE_EXIT_TPL(&dst_image_ctx) {
+ if (dst_image_ctx != nullptr) {
+ dst_image_ctx->state->close();
+ }
+ } BOOST_SCOPE_EXIT_END;
+
+ std::list<obj_watch_t> watchers;
+ int flags = librbd::image::LIST_WATCHERS_FILTER_OUT_MY_INSTANCE |
+ librbd::image::LIST_WATCHERS_FILTER_OUT_MIRROR_INSTANCES;
+ C_SaferCond on_list_watchers;
+ auto list_watchers_request = librbd::image::ListWatchersRequest<I>::create(
+ *dst_image_ctx, flags, &watchers, &on_list_watchers);
+ list_watchers_request->send();
+ r = on_list_watchers.wait();
+ if (r < 0) {
+ lderr(m_cct) << "failed listing watchers:" << cpp_strerror(r) << dendl;
+ return r;
+ }
+ if (!watchers.empty()) {
+ lderr(m_cct) << "image has watchers - cannot abort migration" << dendl;
+ return -EBUSY;
+ }
+
+ // ensure destination image is now read-only
+ r = set_state(cls::rbd::MIGRATION_STATE_ABORTING, "");
+ if (r < 0) {
+ return r;
+ }
+
+ // copy dst HEAD -> src HEAD
+ SteppedProgressContext progress_ctx(m_prog_ctx, 2);
+ revert_data(dst_image_ctx, m_src_image_ctx, &progress_ctx);
+ progress_ctx.next_step();
+
+ ldout(m_cct, 10) << "relinking children" << dendl;
+ r = relink_children(dst_image_ctx, m_src_image_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ ldout(m_cct, 10) << "removing dst image snapshots" << dendl;
+
+ std::vector<librbd::snap_info_t> snaps;
+ r = snap_list(dst_image_ctx, snaps);
+ if (r < 0) {
+ lderr(m_cct) << "failed listing snapshots: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ for (auto &snap : snaps) {
+ librbd::NoOpProgressContext prog_ctx;
+ int r = snap_remove(dst_image_ctx, snap.name.c_str(),
+ RBD_SNAP_REMOVE_UNPROTECT, prog_ctx);
+ if (r < 0) {
+ lderr(m_cct) << "failed removing snapshot: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+ }
+
+ ldout(m_cct, 10) << "removing group" << dendl;
+
+ r = remove_group(dst_image_ctx, &group_info);
+ if (r < 0 && r != -ENOENT) {
+ return r;
+ }
+
+ ldout(m_cct, 10) << "removing dst image" << dendl;
+
+ ceph_assert(dst_image_ctx->ignore_migrating);
+
+ ThreadPool *thread_pool;
+ ContextWQ *op_work_queue;
+ ImageCtx::get_thread_pool_instance(m_cct, &thread_pool, &op_work_queue);
+ C_SaferCond on_remove;
+ auto req = librbd::image::RemoveRequest<>::create(
+ m_dst_io_ctx, dst_image_ctx, false, false, progress_ctx, op_work_queue,
+ &on_remove);
+ req->send();
+ r = on_remove.wait();
+
+ dst_image_ctx = nullptr;
+
+ if (r < 0) {
+ lderr(m_cct) << "failed removing destination image '"
+ << m_dst_io_ctx.get_pool_name() << "/" << m_dst_image_name
+ << " (" << m_dst_image_id << ")': " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+ }
+
+ r = relink_src_image();
+ if (r < 0) {
+ return r;
+ }
+
+ r = add_group(m_src_image_ctx, group_info);
+ if (r < 0) {
+ return r;
+ }
+
+ r = remove_migration(m_src_image_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ r = enable_mirroring(m_src_image_ctx, m_mirroring);
+ if (r < 0) {
+ return r;
+ }
+
+ ldout(m_cct, 10) << "succeeded" << dendl;
+
+ return 0;
+}
+
+template <typename I>
+int Migration<I>::commit() {
+ ldout(m_cct, 10) << dendl;
+
+ BOOST_SCOPE_EXIT_TPL(&m_src_image_ctx) {
+ if (m_src_image_ctx != nullptr) {
+ m_src_image_ctx->state->close();
+ }
+ } BOOST_SCOPE_EXIT_END;
+
+ auto dst_image_ctx = I::create(m_dst_image_name, m_dst_image_id, nullptr,
+ m_dst_io_ctx, false);
+ int r = dst_image_ctx->state->open(0);
+ if (r < 0) {
+ lderr(m_cct) << "failed to open destination image: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ BOOST_SCOPE_EXIT_TPL(dst_image_ctx) {
+ dst_image_ctx->state->close();
+ } BOOST_SCOPE_EXIT_END;
+
+ r = remove_migration(dst_image_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ r = remove_src_image();
+ if (r < 0) {
+ return r;
+ }
+
+ r = enable_mirroring(dst_image_ctx, m_mirroring);
+ if (r < 0) {
+ return r;
+ }
+
+ ldout(m_cct, 10) << "succeeded" << dendl;
+
+ return 0;
+}
+
+template <typename I>
+int Migration<I>::status(image_migration_status_t *status) {
+ ldout(m_cct, 10) << dendl;
+
+ status->source_pool_id = m_dst_migration_spec.pool_id;
+ status->source_pool_namespace = m_dst_migration_spec.pool_namespace;
+ status->source_image_name = m_dst_migration_spec.image_name;
+ status->source_image_id = m_dst_migration_spec.image_id;
+ status->dest_pool_id = m_src_migration_spec.pool_id;
+ status->dest_pool_namespace = m_src_migration_spec.pool_namespace;
+ status->dest_image_name = m_src_migration_spec.image_name;
+ status->dest_image_id = m_src_migration_spec.image_id;
+
+ switch (m_src_migration_spec.state) {
+ case cls::rbd::MIGRATION_STATE_ERROR:
+ status->state = RBD_IMAGE_MIGRATION_STATE_ERROR;
+ break;
+ case cls::rbd::MIGRATION_STATE_PREPARING:
+ status->state = RBD_IMAGE_MIGRATION_STATE_PREPARING;
+ break;
+ case cls::rbd::MIGRATION_STATE_PREPARED:
+ status->state = RBD_IMAGE_MIGRATION_STATE_PREPARED;
+ break;
+ case cls::rbd::MIGRATION_STATE_EXECUTING:
+ status->state = RBD_IMAGE_MIGRATION_STATE_EXECUTING;
+ break;
+ case cls::rbd::MIGRATION_STATE_EXECUTED:
+ status->state = RBD_IMAGE_MIGRATION_STATE_EXECUTED;
+ break;
+ default:
+ status->state = RBD_IMAGE_MIGRATION_STATE_UNKNOWN;
+ break;
+ }
+
+ status->state_description = m_src_migration_spec.state_description;
+
+ return 0;
+}
+
+template <typename I>
+int Migration<I>::set_state(cls::rbd::MigrationState state,
+ const std::string &description) {
+ int r = cls_client::migration_set_state(&m_src_io_ctx, m_src_header_oid,
+ state, description);
+ if (r < 0) {
+ lderr(m_cct) << "failed to set source migration header: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ r = cls_client::migration_set_state(&m_dst_io_ctx, m_dst_header_oid, state,
+ description);
+ if (r < 0) {
+ lderr(m_cct) << "failed to set destination migration header: "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Migration<I>::list_src_snaps(std::vector<librbd::snap_info_t> *snaps) {
+ ldout(m_cct, 10) << dendl;
+
+ int r = snap_list(m_src_image_ctx, *snaps);
+ if (r < 0) {
+ lderr(m_cct) << "failed listing snapshots: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ for (auto &snap : *snaps) {
+ librbd::snap_namespace_type_t namespace_type;
+ r = Snapshot<I>::get_namespace_type(m_src_image_ctx, snap.id,
+ &namespace_type);
+ if (r < 0) {
+ lderr(m_cct) << "error getting snap namespace type: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ if (namespace_type != RBD_SNAP_NAMESPACE_TYPE_USER) {
+ if (namespace_type == RBD_SNAP_NAMESPACE_TYPE_TRASH) {
+ lderr(m_cct) << "image has snapshots with linked clones that must be "
+ << "deleted or flattened before the image can be migrated"
+ << dendl;
+ } else {
+ lderr(m_cct) << "image has non-user type snapshots "
+ << "that are not supported by migration" << dendl;
+ }
+ return -EBUSY;
+ }
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Migration<I>::validate_src_snaps() {
+ ldout(m_cct, 10) << dendl;
+
+ std::vector<librbd::snap_info_t> snaps;
+ int r = list_src_snaps(&snaps);
+ if (r < 0) {
+ return r;
+ }
+
+ uint64_t dst_features = 0;
+ r = m_image_options.get(RBD_IMAGE_OPTION_FEATURES, &dst_features);
+ ceph_assert(r == 0);
+
+ if (!m_src_image_ctx->test_features(RBD_FEATURE_LAYERING)) {
+ return 0;
+ }
+
+ for (auto &snap : snaps) {
+ RWLock::RLocker snap_locker(m_src_image_ctx->snap_lock);
+ cls::rbd::ParentImageSpec parent_spec{m_src_image_ctx->md_ctx.get_id(),
+ m_src_image_ctx->md_ctx.get_namespace(),
+ m_src_image_ctx->id, snap.id};
+ std::vector<librbd::linked_image_spec_t> child_images;
+ r = api::Image<I>::list_children(m_src_image_ctx, parent_spec,
+ &child_images);
+ if (r < 0) {
+ lderr(m_cct) << "failed listing children: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+ if (!child_images.empty()) {
+ ldout(m_cct, 1) << m_src_image_ctx->name << "@" << snap.name
+ << " has children" << dendl;
+
+ if ((dst_features & RBD_FEATURE_LAYERING) == 0) {
+ lderr(m_cct) << "can't migrate to destination without layering feature: "
+ << "image has children" << dendl;
+ return -EINVAL;
+ }
+ }
+ }
+
+ return 0;
+}
+
+
+template <typename I>
+int Migration<I>::set_migration() {
+ ldout(m_cct, 10) << dendl;
+
+ m_src_image_ctx->ignore_migrating = true;
+
+ int r = cls_client::migration_set(&m_src_io_ctx, m_src_header_oid,
+ m_src_migration_spec);
+ if (r < 0) {
+ lderr(m_cct) << "failed to set migration header: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ m_src_image_ctx->notify_update();
+
+ return 0;
+}
+
+template <typename I>
+int Migration<I>::remove_migration(I *image_ctx) {
+ ldout(m_cct, 10) << dendl;
+
+ int r;
+
+ r = cls_client::migration_remove(&image_ctx->md_ctx, image_ctx->header_oid);
+ if (r == -ENOENT) {
+ r = 0;
+ }
+ if (r < 0) {
+ lderr(m_cct) << "failed removing migration header: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ image_ctx->notify_update();
+
+ return 0;
+}
+
+template <typename I>
+int Migration<I>::unlink_src_image() {
+ if (m_src_old_format) {
+ return v1_unlink_src_image();
+ } else {
+ return v2_unlink_src_image();
+ }
+}
+
+template <typename I>
+int Migration<I>::v1_unlink_src_image() {
+ ldout(m_cct, 10) << dendl;
+
+ int r = tmap_rm(m_src_io_ctx, m_src_image_name);
+ if (r < 0) {
+ lderr(m_cct) << "failed removing " << m_src_image_name << " from tmap: "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Migration<I>::v2_unlink_src_image() {
+ ldout(m_cct, 10) << dendl;
+
+ m_src_image_ctx->owner_lock.get_read();
+ if (m_src_image_ctx->exclusive_lock != nullptr &&
+ m_src_image_ctx->exclusive_lock->is_lock_owner()) {
+ C_SaferCond ctx;
+ m_src_image_ctx->exclusive_lock->release_lock(&ctx);
+ m_src_image_ctx->owner_lock.put_read();
+ int r = ctx.wait();
+ if (r < 0) {
+ lderr(m_cct) << "error releasing exclusive lock: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+ } else {
+ m_src_image_ctx->owner_lock.put_read();
+ }
+
+ int r = Trash<I>::move(m_src_io_ctx, RBD_TRASH_IMAGE_SOURCE_MIGRATION,
+ m_src_image_ctx->name, 0);
+ if (r < 0) {
+ lderr(m_cct) << "failed moving image to trash: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Migration<I>::relink_src_image() {
+ if (m_src_old_format) {
+ return v1_relink_src_image();
+ } else {
+ return v2_relink_src_image();
+ }
+}
+
+template <typename I>
+int Migration<I>::v1_relink_src_image() {
+ ldout(m_cct, 10) << dendl;
+
+ int r = tmap_set(m_src_io_ctx, m_src_image_name);
+ if (r < 0) {
+ lderr(m_cct) << "failed adding " << m_src_image_name << " to tmap: "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Migration<I>::v2_relink_src_image() {
+ ldout(m_cct, 10) << dendl;
+
+ int r = Trash<I>::restore(m_src_io_ctx,
+ {cls::rbd::TRASH_IMAGE_SOURCE_MIGRATION},
+ m_src_image_ctx->id, m_src_image_ctx->name);
+ if (r < 0) {
+ lderr(m_cct) << "failed restoring image from trash: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Migration<I>::create_dst_image() {
+ ldout(m_cct, 10) << dendl;
+
+ uint64_t size;
+ cls::rbd::ParentImageSpec parent_spec;
+ {
+ RWLock::RLocker snap_locker(m_src_image_ctx->snap_lock);
+ RWLock::RLocker parent_locker(m_src_image_ctx->parent_lock);
+ size = m_src_image_ctx->size;
+
+ // use oldest snapshot or HEAD for parent spec
+ if (!m_src_image_ctx->snap_info.empty()) {
+ parent_spec = m_src_image_ctx->snap_info.begin()->second.parent.spec;
+ } else {
+ parent_spec = m_src_image_ctx->parent_md.spec;
+ }
+ }
+
+ ThreadPool *thread_pool;
+ ContextWQ *op_work_queue;
+ ImageCtx::get_thread_pool_instance(m_cct, &thread_pool, &op_work_queue);
+
+ ConfigProxy config{m_cct->_conf};
+ api::Config<I>::apply_pool_overrides(m_dst_io_ctx, &config);
+
+ int r;
+ C_SaferCond on_create;
+ librados::IoCtx parent_io_ctx;
+ if (parent_spec.pool_id == -1) {
+ auto *req = image::CreateRequest<I>::create(
+ config, m_dst_io_ctx, m_dst_image_name, m_dst_image_id, size,
+ m_image_options, "", "", true /* skip_mirror_enable */, op_work_queue,
+ &on_create);
+ req->send();
+ } else {
+ r = util::create_ioctx(m_src_image_ctx->md_ctx, "destination image",
+ parent_spec.pool_id, parent_spec.pool_namespace,
+ &parent_io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ auto *req = image::CloneRequest<I>::create(
+ config, parent_io_ctx, parent_spec.image_id, "", parent_spec.snap_id,
+ m_dst_io_ctx, m_dst_image_name, m_dst_image_id, m_image_options, "", "",
+ op_work_queue, &on_create);
+ req->send();
+ }
+
+ r = on_create.wait();
+ if (r < 0) {
+ lderr(m_cct) << "header creation failed: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ auto dst_image_ctx = I::create(m_dst_image_name, m_dst_image_id, nullptr,
+ m_dst_io_ctx, false);
+
+ r = dst_image_ctx->state->open(OPEN_FLAG_IGNORE_MIGRATING);
+ if (r < 0) {
+ lderr(m_cct) << "failed to open newly created header: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ BOOST_SCOPE_EXIT_TPL(dst_image_ctx) {
+ dst_image_ctx->state->close();
+ } BOOST_SCOPE_EXIT_END;
+
+ {
+ RWLock::RLocker owner_locker(dst_image_ctx->owner_lock);
+ r = dst_image_ctx->operations->prepare_image_update(
+ exclusive_lock::OPERATION_REQUEST_TYPE_GENERAL, true);
+ if (r < 0) {
+ lderr(m_cct) << "cannot obtain exclusive lock" << dendl;
+ return r;
+ }
+ if (dst_image_ctx->exclusive_lock != nullptr) {
+ dst_image_ctx->exclusive_lock->block_requests(0);
+ }
+ }
+
+ SnapSeqs snap_seqs;
+
+ C_SaferCond on_snapshot_copy;
+ auto snapshot_copy_req = librbd::deep_copy::SnapshotCopyRequest<I>::create(
+ m_src_image_ctx, dst_image_ctx, 0, CEPH_NOSNAP, 0, m_flatten,
+ m_src_image_ctx->op_work_queue, &snap_seqs, &on_snapshot_copy);
+ snapshot_copy_req->send();
+ r = on_snapshot_copy.wait();
+ if (r < 0) {
+ lderr(m_cct) << "failed to copy snapshots: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ C_SaferCond on_metadata_copy;
+ auto metadata_copy_req = librbd::deep_copy::MetadataCopyRequest<I>::create(
+ m_src_image_ctx, dst_image_ctx, &on_metadata_copy);
+ metadata_copy_req->send();
+ r = on_metadata_copy.wait();
+ if (r < 0) {
+ lderr(m_cct) << "failed to copy metadata: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ m_dst_migration_spec = {cls::rbd::MIGRATION_HEADER_TYPE_DST,
+ m_src_io_ctx.get_id(), m_src_io_ctx.get_namespace(),
+ m_src_image_name, m_src_image_id, snap_seqs, size,
+ m_flatten, m_mirroring,
+ cls::rbd::MIGRATION_STATE_PREPARING, ""};
+
+ r = cls_client::migration_set(&m_dst_io_ctx, m_dst_header_oid,
+ m_dst_migration_spec);
+ if (r < 0) {
+ lderr(m_cct) << "failed to set migration header: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ r = update_group(m_src_image_ctx, dst_image_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ r = set_state(cls::rbd::MIGRATION_STATE_PREPARED, "");
+ if (r < 0) {
+ return r;
+ }
+
+ r = dst_image_ctx->state->refresh();
+ if (r < 0) {
+ lderr(m_cct) << "failed to refresh destination image: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ r = relink_children(m_src_image_ctx, dst_image_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Migration<I>::remove_group(I *image_ctx, group_info_t *group_info) {
+ int r = librbd::api::Group<I>::image_get_group(image_ctx, group_info);
+ if (r < 0) {
+ lderr(m_cct) << "failed to get image group: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ if (group_info->pool == -1) {
+ return -ENOENT;
+ }
+
+ ceph_assert(!image_ctx->id.empty());
+
+ ldout(m_cct, 10) << dendl;
+
+ IoCtx group_ioctx;
+ r = util::create_ioctx(image_ctx->md_ctx, "group", group_info->pool, {},
+ &group_ioctx);
+ if (r < 0) {
+ return r;
+ }
+
+ r = librbd::api::Group<I>::image_remove_by_id(group_ioctx,
+ group_info->name.c_str(),
+ image_ctx->md_ctx,
+ image_ctx->id.c_str());
+ if (r < 0) {
+ lderr(m_cct) << "failed to remove image from group: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Migration<I>::add_group(I *image_ctx, group_info_t &group_info) {
+ if (group_info.pool == -1) {
+ return 0;
+ }
+
+ ldout(m_cct, 10) << dendl;
+
+ IoCtx group_ioctx;
+ int r = util::create_ioctx(image_ctx->md_ctx, "group", group_info.pool, {},
+ &group_ioctx);
+ if (r < 0) {
+ return r;
+ }
+
+ r = librbd::api::Group<I>::image_add(group_ioctx, group_info.name.c_str(),
+ image_ctx->md_ctx,
+ image_ctx->name.c_str());
+ if (r < 0) {
+ lderr(m_cct) << "failed to add image to group: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Migration<I>::update_group(I *from_image_ctx, I *to_image_ctx) {
+ ldout(m_cct, 10) << dendl;
+
+ group_info_t group_info;
+
+ int r = remove_group(from_image_ctx, &group_info);
+ if (r < 0) {
+ return r == -ENOENT ? 0 : r;
+ }
+
+ r = add_group(to_image_ctx, group_info);
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Migration<I>::disable_mirroring(I *image_ctx, bool *was_enabled) {
+ *was_enabled = false;
+
+ if (!image_ctx->test_features(RBD_FEATURE_JOURNALING)) {
+ return 0;
+ }
+
+ cls::rbd::MirrorImage mirror_image;
+ int r = cls_client::mirror_image_get(&image_ctx->md_ctx, image_ctx->id,
+ &mirror_image);
+ if (r == -ENOENT) {
+ ldout(m_cct, 10) << "mirroring is not enabled for this image" << dendl;
+ return 0;
+ }
+
+ if (r < 0) {
+ lderr(m_cct) << "failed to retrieve mirror image: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ if (mirror_image.state == cls::rbd::MIRROR_IMAGE_STATE_ENABLED) {
+ *was_enabled = true;
+ }
+
+ ldout(m_cct, 10) << dendl;
+
+ C_SaferCond ctx;
+ auto req = mirror::DisableRequest<I>::create(image_ctx, false, true, &ctx);
+ req->send();
+ r = ctx.wait();
+ if (r < 0) {
+ lderr(m_cct) << "failed to disable mirroring: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ m_src_migration_spec.mirroring = true;
+
+ return 0;
+}
+
+template <typename I>
+int Migration<I>::enable_mirroring(I *image_ctx, bool was_enabled) {
+
+ if (!image_ctx->test_features(RBD_FEATURE_JOURNALING)) {
+ return 0;
+ }
+
+ cls::rbd::MirrorMode mirror_mode;
+ int r = cls_client::mirror_mode_get(&image_ctx->md_ctx, &mirror_mode);
+ if (r < 0 && r != -ENOENT) {
+ lderr(m_cct) << "failed to retrieve mirror mode: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ if (mirror_mode == cls::rbd::MIRROR_MODE_DISABLED) {
+ ldout(m_cct, 10) << "mirroring is not enabled for destination pool"
+ << dendl;
+ return 0;
+ }
+ if (mirror_mode == cls::rbd::MIRROR_MODE_IMAGE && !was_enabled) {
+ ldout(m_cct, 10) << "mirroring is not enabled for image" << dendl;
+ return 0;
+ }
+
+ ldout(m_cct, 10) << dendl;
+
+ C_SaferCond ctx;
+ auto req = mirror::EnableRequest<I>::create(image_ctx->md_ctx, image_ctx->id,
+ "", image_ctx->op_work_queue,
+ &ctx);
+ req->send();
+ r = ctx.wait();
+ if (r < 0) {
+ lderr(m_cct) << "failed to enable mirroring: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+// When relinking children we should be careful as it my be interrupted
+// at any moment by some reason and we may end up in an inconsistent
+// state, which we have to be able to fix with "migration abort". Below
+// are all possible states during migration (P1 - sourse parent, P2 -
+// destination parent, C - child):
+//
+// P1 P2 P1 P2 P1 P2 P1 P2
+// ^\ \ ^ \ /^ /^
+// \v v/ v/ v/
+// C C C C
+//
+// 1 2 3 4
+//
+// (1) and (4) are the initial and the final consistent states. (2)
+// and (3) are intermediate inconsistent states that have to be fixed
+// by relink_children running in "migration abort" mode. For this, it
+// scans P2 for all children attached and relinks (fixes) states (3)
+// and (4) to state (1). Then it scans P1 for remaining children and
+// fixes the states (2).
+
+template <typename I>
+int Migration<I>::relink_children(I *from_image_ctx, I *to_image_ctx) {
+ ldout(m_cct, 10) << dendl;
+
+ std::vector<librbd::snap_info_t> snaps;
+ int r = list_src_snaps(&snaps);
+ if (r < 0) {
+ return r;
+ }
+
+ bool migration_abort = (to_image_ctx == m_src_image_ctx);
+
+ for (auto it = snaps.begin(); it != snaps.end(); it++) {
+ auto &snap = *it;
+ std::vector<librbd::linked_image_spec_t> src_child_images;
+
+ if (from_image_ctx != m_src_image_ctx) {
+ ceph_assert(migration_abort);
+
+ // We run list snaps against the src image to get only those snapshots
+ // that are migrated. If the "from" image is not the src image
+ // (abort migration case), we need to remap snap ids.
+ // Also collect the list of the children currently attached to the
+ // source, so we could make a proper decision later about relinking.
+
+ RWLock::RLocker src_snap_locker(to_image_ctx->snap_lock);
+ cls::rbd::ParentImageSpec src_parent_spec{to_image_ctx->md_ctx.get_id(),
+ to_image_ctx->md_ctx.get_namespace(),
+ to_image_ctx->id, snap.id};
+ r = api::Image<I>::list_children(to_image_ctx, src_parent_spec,
+ &src_child_images);
+ if (r < 0) {
+ lderr(m_cct) << "failed listing children: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ RWLock::RLocker snap_locker(from_image_ctx->snap_lock);
+ snap.id = from_image_ctx->get_snap_id(cls::rbd::UserSnapshotNamespace(),
+ snap.name);
+ if (snap.id == CEPH_NOSNAP) {
+ ldout(m_cct, 5) << "skipping snapshot " << snap.name << dendl;
+ continue;
+ }
+ }
+
+ std::vector<librbd::linked_image_spec_t> child_images;
+ {
+ RWLock::RLocker snap_locker(from_image_ctx->snap_lock);
+ cls::rbd::ParentImageSpec parent_spec{from_image_ctx->md_ctx.get_id(),
+ from_image_ctx->md_ctx.get_namespace(),
+ from_image_ctx->id, snap.id};
+ r = api::Image<I>::list_children(from_image_ctx, parent_spec,
+ &child_images);
+ if (r < 0) {
+ lderr(m_cct) << "failed listing children: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+ }
+
+ for (auto &child_image : child_images) {
+ r = relink_child(from_image_ctx, to_image_ctx, snap, child_image,
+ migration_abort, true);
+ if (r < 0) {
+ return r;
+ }
+
+ src_child_images.erase(std::remove(src_child_images.begin(),
+ src_child_images.end(), child_image),
+ src_child_images.end());
+ }
+
+ for (auto &child_image : src_child_images) {
+ r = relink_child(from_image_ctx, to_image_ctx, snap, child_image,
+ migration_abort, false);
+ if (r < 0) {
+ return r;
+ }
+ }
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Migration<I>::relink_child(I *from_image_ctx, I *to_image_ctx,
+ const librbd::snap_info_t &from_snap,
+ const librbd::linked_image_spec_t &child_image,
+ bool migration_abort, bool reattach_child) {
+ ldout(m_cct, 10) << from_snap.name << " " << child_image.pool_name << "/"
+ << child_image.pool_namespace << "/"
+ << child_image.image_name << " (migration_abort="
+ << migration_abort << ", reattach_child=" << reattach_child
+ << ")" << dendl;
+
+ librados::snap_t to_snap_id;
+ {
+ RWLock::RLocker snap_locker(to_image_ctx->snap_lock);
+ to_snap_id = to_image_ctx->get_snap_id(cls::rbd::UserSnapshotNamespace(),
+ from_snap.name);
+ if (to_snap_id == CEPH_NOSNAP) {
+ lderr(m_cct) << "no snapshot " << from_snap.name << " on destination image"
+ << dendl;
+ return -ENOENT;
+ }
+ }
+
+ librados::IoCtx child_io_ctx;
+ int r = util::create_ioctx(to_image_ctx->md_ctx,
+ "child image " + child_image.image_name,
+ child_image.pool_id, child_image.pool_namespace,
+ &child_io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ I *child_image_ctx = I::create("", child_image.image_id, nullptr,
+ child_io_ctx, false);
+ r = child_image_ctx->state->open(OPEN_FLAG_SKIP_OPEN_PARENT);
+ if (r < 0) {
+ lderr(m_cct) << "failed to open child image: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+ BOOST_SCOPE_EXIT_TPL(child_image_ctx) {
+ child_image_ctx->state->close();
+ } BOOST_SCOPE_EXIT_END;
+
+ uint32_t clone_format = 1;
+ if (child_image_ctx->test_op_features(RBD_OPERATION_FEATURE_CLONE_CHILD)) {
+ clone_format = 2;
+ }
+
+ cls::rbd::ParentImageSpec parent_spec;
+ uint64_t parent_overlap;
+ {
+ RWLock::RLocker snap_locker(child_image_ctx->snap_lock);
+ RWLock::RLocker parent_locker(child_image_ctx->parent_lock);
+
+ // use oldest snapshot or HEAD for parent spec
+ if (!child_image_ctx->snap_info.empty()) {
+ parent_spec = child_image_ctx->snap_info.begin()->second.parent.spec;
+ parent_overlap = child_image_ctx->snap_info.begin()->second.parent.overlap;
+ } else {
+ parent_spec = child_image_ctx->parent_md.spec;
+ parent_overlap = child_image_ctx->parent_md.overlap;
+ }
+ }
+
+ if (migration_abort &&
+ parent_spec.pool_id == to_image_ctx->md_ctx.get_id() &&
+ parent_spec.pool_namespace == to_image_ctx->md_ctx.get_namespace() &&
+ parent_spec.image_id == to_image_ctx->id &&
+ parent_spec.snap_id == to_snap_id) {
+ ldout(m_cct, 10) << "no need for parent re-attach" << dendl;
+ } else {
+ if (parent_spec.pool_id != from_image_ctx->md_ctx.get_id() ||
+ parent_spec.pool_namespace != from_image_ctx->md_ctx.get_namespace() ||
+ parent_spec.image_id != from_image_ctx->id ||
+ parent_spec.snap_id != from_snap.id) {
+ lderr(m_cct) << "parent is not source image: " << parent_spec.pool_id
+ << "/" << parent_spec.pool_namespace << "/"
+ << parent_spec.image_id << "@" << parent_spec.snap_id
+ << dendl;
+ return -ESTALE;
+ }
+
+ parent_spec.pool_id = to_image_ctx->md_ctx.get_id();
+ parent_spec.pool_namespace = to_image_ctx->md_ctx.get_namespace();
+ parent_spec.image_id = to_image_ctx->id;
+ parent_spec.snap_id = to_snap_id;
+
+ C_SaferCond on_reattach_parent;
+ auto reattach_parent_req = image::AttachParentRequest<I>::create(
+ *child_image_ctx, parent_spec, parent_overlap, true, &on_reattach_parent);
+ reattach_parent_req->send();
+ r = on_reattach_parent.wait();
+ if (r < 0) {
+ lderr(m_cct) << "failed to re-attach parent: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+ }
+
+ if (reattach_child) {
+ C_SaferCond on_reattach_child;
+ auto reattach_child_req = image::AttachChildRequest<I>::create(
+ child_image_ctx, to_image_ctx, to_snap_id, from_image_ctx, from_snap.id,
+ clone_format, &on_reattach_child);
+ reattach_child_req->send();
+ r = on_reattach_child.wait();
+ if (r < 0) {
+ lderr(m_cct) << "failed to re-attach child: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+ }
+
+ child_image_ctx->notify_update();
+
+ return 0;
+}
+
+template <typename I>
+int Migration<I>::remove_src_image() {
+ ldout(m_cct, 10) << dendl;
+
+ std::vector<librbd::snap_info_t> snaps;
+ int r = list_src_snaps(&snaps);
+ if (r < 0) {
+ return r;
+ }
+
+ for (auto it = snaps.rbegin(); it != snaps.rend(); it++) {
+ auto &snap = *it;
+
+ librbd::NoOpProgressContext prog_ctx;
+ int r = snap_remove(m_src_image_ctx, snap.name.c_str(),
+ RBD_SNAP_REMOVE_UNPROTECT, prog_ctx);
+ if (r < 0) {
+ lderr(m_cct) << "failed removing source image snapshot '" << snap.name
+ << "': " << cpp_strerror(r) << dendl;
+ return r;
+ }
+ }
+
+ ceph_assert(m_src_image_ctx->ignore_migrating);
+
+ ThreadPool *thread_pool;
+ ContextWQ *op_work_queue;
+ ImageCtx::get_thread_pool_instance(m_cct, &thread_pool, &op_work_queue);
+ C_SaferCond on_remove;
+ auto req = librbd::image::RemoveRequest<I>::create(
+ m_src_io_ctx, m_src_image_ctx, false, true, *m_prog_ctx, op_work_queue,
+ &on_remove);
+ req->send();
+ r = on_remove.wait();
+
+ m_src_image_ctx = nullptr;
+
+ // For old format image it will return -ENOENT due to expected
+ // tmap_rm failure at the end.
+ if (r < 0 && r != -ENOENT) {
+ lderr(m_cct) << "failed removing source image: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ if (!m_src_image_id.empty()) {
+ r = cls_client::trash_remove(&m_src_io_ctx, m_src_image_id);
+ if (r < 0 && r != -ENOENT) {
+ lderr(m_cct) << "error removing image " << m_src_image_id
+ << " from rbd_trash object" << dendl;
+ }
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Migration<I>::revert_data(I* src_image_ctx, I* dst_image_ctx,
+ ProgressContext* prog_ctx) {
+ ldout(m_cct, 10) << dendl;
+
+ cls::rbd::MigrationSpec migration_spec;
+ int r = cls_client::migration_get(&src_image_ctx->md_ctx,
+ src_image_ctx->header_oid,
+ &migration_spec);
+
+ if (r < 0) {
+ lderr(m_cct) << "failed retrieving migration header: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ if (migration_spec.header_type != cls::rbd::MIGRATION_HEADER_TYPE_DST) {
+ lderr(m_cct) << "unexpected migration header type: "
+ << migration_spec.header_type << dendl;
+ return -EINVAL;
+ }
+
+ uint64_t src_snap_id_start = 0;
+ uint64_t src_snap_id_end = CEPH_NOSNAP;
+ uint64_t dst_snap_id_start = 0;
+ if (!migration_spec.snap_seqs.empty()) {
+ src_snap_id_start = migration_spec.snap_seqs.rbegin()->second;
+ }
+
+ // we only care about the HEAD revision so only add a single mapping to
+ // represent the most recent state
+ SnapSeqs snap_seqs;
+ snap_seqs[CEPH_NOSNAP] = CEPH_NOSNAP;
+
+ ldout(m_cct, 20) << "src_snap_id_start=" << src_snap_id_start << ", "
+ << "src_snap_id_end=" << src_snap_id_end << ", "
+ << "snap_seqs=" << snap_seqs << dendl;
+
+ C_SaferCond ctx;
+ auto request = deep_copy::ImageCopyRequest<I>::create(
+ src_image_ctx, dst_image_ctx, src_snap_id_start, src_snap_id_end,
+ dst_snap_id_start, false, {}, snap_seqs, prog_ctx, &ctx);
+ request->send();
+
+ r = ctx.wait();
+ if (r < 0) {
+ lderr(m_cct) << "error reverting destination image data blocks back to "
+ << "source image: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+} // namespace api
+} // namespace librbd
+
+template class librbd::api::Migration<librbd::ImageCtx>;
diff --git a/src/librbd/api/Migration.h b/src/librbd/api/Migration.h
new file mode 100644
index 00000000..52a4517b
--- /dev/null
+++ b/src/librbd/api/Migration.h
@@ -0,0 +1,105 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_API_MIGRATION_H
+#define CEPH_LIBRBD_API_MIGRATION_H
+
+#include "include/int_types.h"
+#include "include/rados/librados_fwd.hpp"
+#include "include/rbd/librbd.hpp"
+#include "cls/rbd/cls_rbd_types.h"
+
+#include <vector>
+
+namespace librbd {
+
+class ImageCtx;
+
+namespace api {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class Migration {
+public:
+ static int prepare(librados::IoCtx& io_ctx, const std::string &image_name,
+ librados::IoCtx& dest_io_ctx,
+ const std::string &dest_image_name, ImageOptions& opts);
+ static int execute(librados::IoCtx& io_ctx, const std::string &image_name,
+ ProgressContext &prog_ctx);
+ static int abort(librados::IoCtx& io_ctx, const std::string &image_name,
+ ProgressContext &prog_ctx);
+ static int commit(librados::IoCtx& io_ctx, const std::string &image_name,
+ ProgressContext &prog_ctx);
+ static int status(librados::IoCtx& io_ctx, const std::string &image_name,
+ image_migration_status_t *status);
+
+private:
+ CephContext* m_cct;
+ ImageCtxT *m_src_image_ctx;
+ librados::IoCtx m_src_io_ctx;
+ librados::IoCtx &m_dst_io_ctx;
+ bool m_src_old_format;
+ std::string m_src_image_name;
+ std::string m_src_image_id;
+ std::string m_src_header_oid;
+ std::string m_dst_image_name;
+ std::string m_dst_image_id;
+ std::string m_dst_header_oid;
+ ImageOptions &m_image_options;
+ bool m_flatten;
+ bool m_mirroring;
+ ProgressContext *m_prog_ctx;
+
+ cls::rbd::MigrationSpec m_src_migration_spec;
+ cls::rbd::MigrationSpec m_dst_migration_spec;
+
+ Migration(ImageCtxT *image_ctx, librados::IoCtx& dest_io_ctx,
+ const std::string &dest_image_name, const std::string &dst_image_id,
+ ImageOptions& opts, bool flatten, bool mirroring,
+ cls::rbd::MigrationState state, const std::string &state_desc,
+ ProgressContext *prog_ctx);
+
+ int prepare();
+ int execute();
+ int abort();
+ int commit();
+ int status(image_migration_status_t *status);
+
+ int set_state(cls::rbd::MigrationState state, const std::string &description);
+
+ int list_src_snaps(std::vector<librbd::snap_info_t> *snaps);
+ int validate_src_snaps();
+ int disable_mirroring(ImageCtxT *image_ctx, bool *was_enabled);
+ int enable_mirroring(ImageCtxT *image_ctx, bool was_enabled);
+ int set_migration();
+ int unlink_src_image();
+ int relink_src_image();
+ int create_dst_image();
+ int remove_group(ImageCtxT *image_ctx, group_info_t *group_info);
+ int add_group(ImageCtxT *image_ctx, group_info_t &group_info);
+ int update_group(ImageCtxT *from_image_ctx, ImageCtxT *to_image_ctx);
+ int remove_migration(ImageCtxT *image_ctx);
+ int relink_children(ImageCtxT *from_image_ctx, ImageCtxT *to_image_ctx);
+ int remove_src_image();
+
+ int v1_set_migration();
+ int v2_set_migration();
+ int v1_unlink_src_image();
+ int v2_unlink_src_image();
+ int v1_relink_src_image();
+ int v2_relink_src_image();
+
+ int relink_child(ImageCtxT *from_image_ctx, ImageCtxT *to_image_ctx,
+ const librbd::snap_info_t &src_snap,
+ const librbd::linked_image_spec_t &child_image,
+ bool migration_abort, bool reattach_child);
+
+ int revert_data(ImageCtxT* src_image_ctx, ImageCtxT* dst_image_ctx,
+ ProgressContext *prog_ctx);
+};
+
+} // namespace api
+} // namespace librbd
+
+extern template class librbd::api::Migration<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_API_MIGRATION_H
diff --git a/src/librbd/api/Mirror.cc b/src/librbd/api/Mirror.cc
new file mode 100644
index 00000000..4e7b7dc8
--- /dev/null
+++ b/src/librbd/api/Mirror.cc
@@ -0,0 +1,1541 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/api/Mirror.h"
+#include "include/rados/librados.hpp"
+#include "include/stringify.h"
+#include "common/ceph_json.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "librbd/ExclusiveLock.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/Journal.h"
+#include "librbd/Utils.h"
+#include "librbd/api/Image.h"
+#include "librbd/mirror/DemoteRequest.h"
+#include "librbd/mirror/DisableRequest.h"
+#include "librbd/mirror/EnableRequest.h"
+#include "librbd/mirror/GetInfoRequest.h"
+#include "librbd/mirror/GetStatusRequest.h"
+#include "librbd/mirror/PromoteRequest.h"
+#include "librbd/mirror/Types.h"
+#include "librbd/MirroringWatcher.h"
+#include <boost/algorithm/string/trim.hpp>
+#include <boost/algorithm/string/replace.hpp>
+#include <boost/scope_exit.hpp>
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::api::Mirror: " << __func__ << ": "
+
+namespace librbd {
+namespace api {
+
+namespace {
+
+int get_config_key(librados::Rados& rados, const std::string& key,
+ std::string* value) {
+ std::string cmd =
+ "{"
+ "\"prefix\": \"config-key get\", "
+ "\"key\": \"" + key + "\""
+ "}";
+
+ bufferlist in_bl;
+ bufferlist out_bl;
+
+ int r = rados.mon_command(cmd, in_bl, &out_bl, nullptr);
+ if (r == -EINVAL) {
+ return -EOPNOTSUPP;
+ } else if (r < 0 && r != -ENOENT) {
+ return r;
+ }
+
+ *value = out_bl.to_str();
+ return 0;
+}
+
+int set_config_key(librados::Rados& rados, const std::string& key,
+ const std::string& value) {
+ std::string cmd;
+ if (value.empty()) {
+ cmd = "{"
+ "\"prefix\": \"config-key rm\", "
+ "\"key\": \"" + key + "\""
+ "}";
+ } else {
+ cmd = "{"
+ "\"prefix\": \"config-key set\", "
+ "\"key\": \"" + key + "\", "
+ "\"val\": \"" + value + "\""
+ "}";
+ }
+ bufferlist in_bl;
+ bufferlist out_bl;
+
+ int r = rados.mon_command(cmd, in_bl, &out_bl, nullptr);
+ if (r == -EINVAL) {
+ return -EOPNOTSUPP;
+ } else if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+std::string get_peer_config_key_name(int64_t pool_id,
+ const std::string& peer_uuid) {
+ return RBD_MIRROR_PEER_CONFIG_KEY_PREFIX + stringify(pool_id) + "/" +
+ peer_uuid;
+}
+
+int remove_peer_config_key(librados::IoCtx& io_ctx,
+ const std::string& peer_uuid) {
+ int64_t pool_id = io_ctx.get_id();
+ auto key = get_peer_config_key_name(pool_id, peer_uuid);
+
+ librados::Rados rados(io_ctx);
+ int r = set_config_key(rados, key, "");
+ if (r < 0 && r != -ENOENT && r != -EPERM) {
+ return r;
+ }
+ return 0;
+}
+
+int create_bootstrap_user(CephContext* cct, librados::Rados& rados,
+ std::string* peer_client_id, std::string* cephx_key) {
+ ldout(cct, 20) << dendl;
+
+ // retrieve peer CephX user from config-key
+ int r = get_config_key(rados, RBD_MIRROR_PEER_CLIENT_ID_CONFIG_KEY,
+ peer_client_id);
+ if (r == -EACCES) {
+ ldout(cct, 5) << "insufficient permissions to get peer-client-id "
+ << "config-key" << dendl;
+ return r;
+ } else if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "failed to retrieve peer client id key: "
+ << cpp_strerror(r) << dendl;
+ return r;
+ } else if (r == -ENOENT || peer_client_id->empty()) {
+ ldout(cct, 20) << "creating new peer-client-id config-key" << dendl;
+
+ *peer_client_id = "rbd-mirror-peer";
+ r = set_config_key(rados, RBD_MIRROR_PEER_CLIENT_ID_CONFIG_KEY,
+ *peer_client_id);
+ if (r == -EACCES) {
+ ldout(cct, 5) << "insufficient permissions to update peer-client-id "
+ << "config-key" << dendl;
+ return r;
+ } else if (r < 0) {
+ lderr(cct) << "failed to update peer client id key: "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+ }
+ ldout(cct, 20) << "peer_client_id=" << *peer_client_id << dendl;
+
+ // create peer client user
+ std::string cmd =
+ R"({)" \
+ R"( "prefix": "auth get-or-create",)" \
+ R"( "entity": "client.)" + *peer_client_id + R"(",)" \
+ R"( "caps": [)" \
+ R"( "mon", "profile rbd-mirror-peer",)" \
+ R"( "osd", "profile rbd"],)" \
+ R"( "format": "json")" \
+ R"(})";
+
+ bufferlist in_bl;
+ bufferlist out_bl;
+
+ r = rados.mon_command(cmd, in_bl, &out_bl, nullptr);
+ if (r == -EINVAL) {
+ ldout(cct, 5) << "caps mismatch for existing user" << dendl;
+ return -EEXIST;
+ } else if (r == -EACCES) {
+ ldout(cct, 5) << "insufficient permissions to create user" << dendl;
+ return r;
+ } else if (r < 0) {
+ lderr(cct) << "failed to create or update RBD mirroring bootstrap user: "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ // extract key from response
+ bool json_valid = false;
+ json_spirit::mValue json_root;
+ if(json_spirit::read(out_bl.to_str(), json_root)) {
+ try {
+ auto& json_obj = json_root.get_array()[0].get_obj();
+ *cephx_key = json_obj["key"].get_str();
+ json_valid = true;
+ } catch (std::runtime_error&) {
+ }
+ }
+
+ if (!json_valid) {
+ lderr(cct) << "invalid auth keyring JSON received" << dendl;
+ return -EBADMSG;
+ }
+
+ return 0;
+}
+
+int create_bootstrap_peer(CephContext* cct, librados::IoCtx& io_ctx,
+ const std::string& site_name, const std::string& fsid,
+ const std::string& client_id, const std::string& key,
+ const std::string& mon_host,
+ const std::string& cluster1,
+ const std::string& cluster2) {
+ ldout(cct, 20) << dendl;
+
+ std::string peer_uuid;
+ std::vector<mirror_peer_t> peers;
+ int r = Mirror<>::peer_list(io_ctx, &peers);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "failed to list mirror peers: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ if (peers.empty()) {
+ r = Mirror<>::peer_add(io_ctx, &peer_uuid, site_name,
+ "client." + client_id);
+ if (r < 0) {
+ lderr(cct) << "failed to add " << cluster1 << " peer to "
+ << cluster2 << " " << "cluster: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+ } else if (peers[0].cluster_name != site_name &&
+ peers[0].cluster_name != fsid) {
+ // only support a single peer
+ lderr(cct) << "multiple peers are not currently supported" << dendl;
+ return -EINVAL;
+ } else {
+ peer_uuid = peers[0].uuid;
+
+ if (peers[0].cluster_name != site_name) {
+ r = Mirror<>::peer_set_cluster(io_ctx, peer_uuid, site_name);
+ if (r < 0) {
+ // non-fatal attempt to update site name
+ lderr(cct) << "failed to update peer site name" << dendl;
+ }
+ }
+ }
+
+ Mirror<>::Attributes attributes {
+ {"mon_host", mon_host},
+ {"key", key}};
+ r = Mirror<>::peer_set_attributes(io_ctx, peer_uuid, attributes);
+ if (r < 0) {
+ lderr(cct) << "failed to update " << cluster1 << " cluster connection "
+ << "attributes in " << cluster2 << " cluster: "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int validate_mirroring_enabled(I *ictx) {
+ CephContext *cct = ictx->cct;
+ cls::rbd::MirrorImage mirror_image_internal;
+ int r = cls_client::mirror_image_get(&ictx->md_ctx, ictx->id,
+ &mirror_image_internal);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "failed to retrieve mirroring state: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ } else if (mirror_image_internal.state !=
+ cls::rbd::MIRROR_IMAGE_STATE_ENABLED) {
+ lderr(cct) << "mirroring is not currently enabled" << dendl;
+ return -EINVAL;
+ }
+ return 0;
+}
+
+int list_mirror_images(librados::IoCtx& io_ctx,
+ std::set<std::string>& mirror_image_ids) {
+ CephContext *cct = reinterpret_cast<CephContext *>(io_ctx.cct());
+
+ std::string last_read = "";
+ int max_read = 1024;
+ int r;
+ do {
+ std::map<std::string, std::string> mirror_images;
+ r = cls_client::mirror_image_list(&io_ctx, last_read, max_read,
+ &mirror_images);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "error listing mirrored image directory: "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+ for (auto it = mirror_images.begin(); it != mirror_images.end(); ++it) {
+ mirror_image_ids.insert(it->first);
+ }
+ if (!mirror_images.empty()) {
+ last_read = mirror_images.rbegin()->first;
+ }
+ r = mirror_images.size();
+ } while (r == max_read);
+
+ return 0;
+}
+
+struct C_ImageGetInfo : public Context {
+ mirror_image_info_t *mirror_image_info;
+ Context *on_finish;
+
+ cls::rbd::MirrorImage mirror_image;
+ mirror::PromotionState promotion_state = mirror::PROMOTION_STATE_PRIMARY;
+
+ C_ImageGetInfo(mirror_image_info_t *mirror_image_info, Context *on_finish)
+ : mirror_image_info(mirror_image_info), on_finish(on_finish) {
+ }
+
+ void finish(int r) override {
+ if (r < 0) {
+ on_finish->complete(r);
+ return;
+ }
+
+ mirror_image_info->global_id = mirror_image.global_image_id;
+ mirror_image_info->state = static_cast<rbd_mirror_image_state_t>(
+ mirror_image.state);
+ mirror_image_info->primary = (
+ promotion_state == mirror::PROMOTION_STATE_PRIMARY);
+ on_finish->complete(0);
+ }
+};
+
+struct C_ImageGetStatus : public C_ImageGetInfo {
+ std::string image_name;
+ mirror_image_status_t *mirror_image_status;
+
+ cls::rbd::MirrorImageStatus mirror_image_status_internal;
+
+ C_ImageGetStatus(const std::string &image_name,
+ mirror_image_status_t *mirror_image_status,
+ Context *on_finish)
+ : C_ImageGetInfo(&mirror_image_status->info, on_finish),
+ image_name(image_name), mirror_image_status(mirror_image_status) {
+ }
+
+ void finish(int r) override {
+ if (r < 0) {
+ on_finish->complete(r);
+ return;
+ }
+
+ mirror_image_status->name = image_name;
+ mirror_image_status->state = static_cast<mirror_image_status_state_t>(
+ mirror_image_status_internal.state);
+ mirror_image_status->description = mirror_image_status_internal.description;
+ mirror_image_status->last_update =
+ mirror_image_status_internal.last_update.sec();
+ mirror_image_status->up = mirror_image_status_internal.up;
+ C_ImageGetInfo::finish(0);
+ }
+};
+
+} // anonymous namespace
+
+template <typename I>
+int Mirror<I>::image_enable(I *ictx, bool relax_same_pool_parent_check) {
+ CephContext *cct = ictx->cct;
+ ldout(cct, 20) << "ictx=" << ictx << dendl;
+
+ // TODO
+ if (!ictx->md_ctx.get_namespace().empty()) {
+ lderr(cct) << "namespaces are not supported" << dendl;
+ return -EINVAL;
+ }
+
+ int r = ictx->state->refresh_if_required();
+ if (r < 0) {
+ return r;
+ }
+
+ cls::rbd::MirrorMode mirror_mode;
+ r = cls_client::mirror_mode_get(&ictx->md_ctx, &mirror_mode);
+ if (r < 0) {
+ lderr(cct) << "cannot enable mirroring: failed to retrieve mirror mode: "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ if (mirror_mode != cls::rbd::MIRROR_MODE_IMAGE) {
+ lderr(cct) << "cannot enable mirroring in the current pool mirroring mode"
+ << dendl;
+ return -EINVAL;
+ }
+
+ // is mirroring not enabled for the parent?
+ {
+ RWLock::RLocker l(ictx->parent_lock);
+ ImageCtx *parent = ictx->parent;
+ if (parent) {
+ if (relax_same_pool_parent_check &&
+ parent->md_ctx.get_id() == ictx->md_ctx.get_id()) {
+ if (!parent->test_features(RBD_FEATURE_JOURNALING)) {
+ lderr(cct) << "journaling is not enabled for the parent" << dendl;
+ return -EINVAL;
+ }
+ } else {
+ cls::rbd::MirrorImage mirror_image_internal;
+ r = cls_client::mirror_image_get(&(parent->md_ctx), parent->id,
+ &mirror_image_internal);
+ if (r == -ENOENT) {
+ lderr(cct) << "mirroring is not enabled for the parent" << dendl;
+ return -EINVAL;
+ }
+ }
+ }
+ }
+
+ if ((ictx->features & RBD_FEATURE_JOURNALING) == 0) {
+ lderr(cct) << "cannot enable mirroring: journaling is not enabled" << dendl;
+ return -EINVAL;
+ }
+
+ C_SaferCond ctx;
+ auto req = mirror::EnableRequest<ImageCtx>::create(ictx, &ctx);
+ req->send();
+
+ r = ctx.wait();
+ if (r < 0) {
+ lderr(cct) << "cannot enable mirroring: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Mirror<I>::image_disable(I *ictx, bool force) {
+ CephContext *cct = ictx->cct;
+ ldout(cct, 20) << "ictx=" << ictx << dendl;
+
+ int r = ictx->state->refresh_if_required();
+ if (r < 0) {
+ return r;
+ }
+
+ cls::rbd::MirrorMode mirror_mode;
+ r = cls_client::mirror_mode_get(&ictx->md_ctx, &mirror_mode);
+ if (r < 0) {
+ lderr(cct) << "cannot disable mirroring: failed to retrieve pool "
+ "mirroring mode: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ if (mirror_mode != cls::rbd::MIRROR_MODE_IMAGE) {
+ lderr(cct) << "cannot disable mirroring in the current pool mirroring "
+ "mode" << dendl;
+ return -EINVAL;
+ }
+
+ // is mirroring enabled for the child?
+ cls::rbd::MirrorImage mirror_image_internal;
+ r = cls_client::mirror_image_get(&ictx->md_ctx, ictx->id,
+ &mirror_image_internal);
+ if (r == -ENOENT) {
+ // mirroring is not enabled for this image
+ ldout(cct, 20) << "ignoring disable command: mirroring is not enabled for "
+ << "this image" << dendl;
+ return 0;
+ } else if (r == -EOPNOTSUPP) {
+ ldout(cct, 5) << "mirroring not supported by OSD" << dendl;
+ return r;
+ } else if (r < 0) {
+ lderr(cct) << "failed to retrieve mirror image metadata: "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ mirror_image_internal.state = cls::rbd::MIRROR_IMAGE_STATE_DISABLING;
+ r = cls_client::mirror_image_set(&ictx->md_ctx, ictx->id,
+ mirror_image_internal);
+ if (r < 0) {
+ lderr(cct) << "cannot disable mirroring: " << cpp_strerror(r) << dendl;
+ return r;
+ } else {
+ bool rollback = false;
+ BOOST_SCOPE_EXIT_ALL(ictx, &mirror_image_internal, &rollback) {
+ if (rollback) {
+ CephContext *cct = ictx->cct;
+ mirror_image_internal.state = cls::rbd::MIRROR_IMAGE_STATE_ENABLED;
+ int r = cls_client::mirror_image_set(&ictx->md_ctx, ictx->id,
+ mirror_image_internal);
+ if (r < 0) {
+ lderr(cct) << "failed to re-enable image mirroring: "
+ << cpp_strerror(r) << dendl;
+ }
+ }
+ };
+
+ {
+ RWLock::RLocker l(ictx->snap_lock);
+ map<librados::snap_t, SnapInfo> snap_info = ictx->snap_info;
+ for (auto &info : snap_info) {
+ cls::rbd::ParentImageSpec parent_spec{ictx->md_ctx.get_id(),
+ ictx->md_ctx.get_namespace(),
+ ictx->id, info.first};
+ std::vector<librbd::linked_image_spec_t> child_images;
+ r = Image<I>::list_children(ictx, parent_spec, &child_images);
+ if (r < 0) {
+ rollback = true;
+ return r;
+ }
+
+ if (child_images.empty()) {
+ continue;
+ }
+
+ librados::IoCtx child_io_ctx;
+ int64_t child_pool_id = -1;
+ for (auto &child_image : child_images){
+ std::string pool = child_image.pool_name;
+ if (child_pool_id == -1 ||
+ child_pool_id != child_image.pool_id ||
+ child_io_ctx.get_namespace() != child_image.pool_namespace) {
+ r = util::create_ioctx(ictx->md_ctx, "child image",
+ child_image.pool_id,
+ child_image.pool_namespace,
+ &child_io_ctx);
+ if (r < 0) {
+ rollback = true;
+ return r;
+ }
+
+ child_pool_id = child_image.pool_id;
+ }
+
+ cls::rbd::MirrorImage mirror_image_internal;
+ r = cls_client::mirror_image_get(&child_io_ctx, child_image.image_id,
+ &mirror_image_internal);
+ if (r != -ENOENT) {
+ rollback = true;
+ lderr(cct) << "mirroring is enabled on one or more children "
+ << dendl;
+ return -EBUSY;
+ }
+ }
+ }
+ }
+
+ C_SaferCond ctx;
+ auto req = mirror::DisableRequest<ImageCtx>::create(ictx, force, true,
+ &ctx);
+ req->send();
+
+ r = ctx.wait();
+ if (r < 0) {
+ lderr(cct) << "cannot disable mirroring: " << cpp_strerror(r) << dendl;
+ rollback = true;
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Mirror<I>::image_promote(I *ictx, bool force) {
+ CephContext *cct = ictx->cct;
+
+ C_SaferCond ctx;
+ Mirror<I>::image_promote(ictx, force, &ctx);
+ int r = ctx.wait();
+ if (r < 0) {
+ lderr(cct) << "failed to promote image" << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+void Mirror<I>::image_promote(I *ictx, bool force, Context *on_finish) {
+ CephContext *cct = ictx->cct;
+ ldout(cct, 20) << "ictx=" << ictx << ", "
+ << "force=" << force << dendl;
+
+ auto req = mirror::PromoteRequest<>::create(*ictx, force, on_finish);
+ req->send();
+}
+
+template <typename I>
+int Mirror<I>::image_demote(I *ictx) {
+ CephContext *cct = ictx->cct;
+
+ C_SaferCond ctx;
+ Mirror<I>::image_demote(ictx, &ctx);
+ int r = ctx.wait();
+ if (r < 0) {
+ lderr(cct) << "failed to demote image" << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+void Mirror<I>::image_demote(I *ictx, Context *on_finish) {
+ CephContext *cct = ictx->cct;
+ ldout(cct, 20) << "ictx=" << ictx << dendl;
+
+ auto req = mirror::DemoteRequest<>::create(*ictx, on_finish);
+ req->send();
+}
+
+template <typename I>
+int Mirror<I>::image_resync(I *ictx) {
+ CephContext *cct = ictx->cct;
+ ldout(cct, 20) << "ictx=" << ictx << dendl;
+
+ int r = ictx->state->refresh_if_required();
+ if (r < 0) {
+ return r;
+ }
+
+ C_SaferCond tag_owner_ctx;
+ bool is_tag_owner;
+ Journal<I>::is_tag_owner(ictx, &is_tag_owner, &tag_owner_ctx);
+ r = tag_owner_ctx.wait();
+ if (r < 0) {
+ lderr(cct) << "failed to determine tag ownership: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ } else if (is_tag_owner) {
+ lderr(cct) << "image is primary, cannot resync to itself" << dendl;
+ return -EINVAL;
+ }
+
+ // flag the journal indicating that we want to rebuild the local image
+ r = Journal<I>::request_resync(ictx);
+ if (r < 0) {
+ lderr(cct) << "failed to request resync: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+void Mirror<I>::image_get_info(I *ictx, mirror_image_info_t *mirror_image_info,
+ Context *on_finish) {
+ CephContext *cct = ictx->cct;
+ ldout(cct, 20) << "ictx=" << ictx << dendl;
+
+ auto on_refresh = new FunctionContext(
+ [ictx, mirror_image_info, on_finish](int r) {
+ if (r < 0) {
+ lderr(ictx->cct) << "refresh failed: " << cpp_strerror(r) << dendl;
+ on_finish->complete(r);
+ return;
+ }
+
+ auto ctx = new C_ImageGetInfo(mirror_image_info, on_finish);
+ auto req = mirror::GetInfoRequest<I>::create(*ictx, &ctx->mirror_image,
+ &ctx->promotion_state,
+ ctx);
+ req->send();
+ });
+
+ if (ictx->state->is_refresh_required()) {
+ ictx->state->refresh(on_refresh);
+ } else {
+ on_refresh->complete(0);
+ }
+}
+
+template <typename I>
+int Mirror<I>::image_get_info(I *ictx, mirror_image_info_t *mirror_image_info) {
+ C_SaferCond ctx;
+ image_get_info(ictx, mirror_image_info, &ctx);
+
+ int r = ctx.wait();
+ if (r < 0) {
+ return r;
+ }
+ return 0;
+}
+
+template <typename I>
+void Mirror<I>::image_get_status(I *ictx, mirror_image_status_t *status,
+ Context *on_finish) {
+ CephContext *cct = ictx->cct;
+ ldout(cct, 20) << "ictx=" << ictx << dendl;
+
+ auto ctx = new C_ImageGetStatus(ictx->name, status, on_finish);
+ auto req = mirror::GetStatusRequest<I>::create(
+ *ictx, &ctx->mirror_image_status_internal, &ctx->mirror_image,
+ &ctx->promotion_state, ctx);
+ req->send();
+}
+
+template <typename I>
+int Mirror<I>::image_get_status(I *ictx, mirror_image_status_t *status) {
+ C_SaferCond ctx;
+ image_get_status(ictx, status, &ctx);
+
+ int r = ctx.wait();
+ if (r < 0) {
+ return r;
+ }
+ return 0;
+}
+
+template <typename I>
+int Mirror<I>::image_get_instance_id(I *ictx, std::string *instance_id) {
+ CephContext *cct = ictx->cct;
+ ldout(cct, 20) << "ictx=" << ictx << dendl;
+
+ cls::rbd::MirrorImage mirror_image;
+ int r = cls_client::mirror_image_get(&ictx->md_ctx, ictx->id, &mirror_image);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "failed to retrieve mirroring state: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ } else if (mirror_image.state != cls::rbd::MIRROR_IMAGE_STATE_ENABLED) {
+ lderr(cct) << "mirroring is not currently enabled" << dendl;
+ return -EINVAL;
+ }
+
+ entity_inst_t instance;
+ r = cls_client::mirror_image_instance_get(&ictx->md_ctx,
+ mirror_image.global_image_id,
+ &instance);
+ if (r < 0) {
+ if (r != -ENOENT && r != -ESTALE) {
+ lderr(cct) << "failed to get mirror image instance: " << cpp_strerror(r)
+ << dendl;
+ }
+ return r;
+ }
+
+ *instance_id = stringify(instance.name.num());
+ return 0;
+}
+
+template <typename I>
+int Mirror<I>::site_name_get(librados::Rados& rados, std::string* name) {
+ CephContext *cct = reinterpret_cast<CephContext *>(rados.cct());
+ ldout(cct, 20) << dendl;
+
+ int r = get_config_key(rados, RBD_MIRROR_SITE_NAME_CONFIG_KEY, name);
+ if (r == -EOPNOTSUPP) {
+ return r;
+ } else if (r == -ENOENT || name->empty()) {
+ // default to the cluster fsid
+ r = rados.cluster_fsid(name);
+ if (r < 0) {
+ lderr(cct) << "failed to retrieve cluster fsid: " << cpp_strerror(r)
+ << dendl;
+ }
+ return r;
+ } else if (r < 0) {
+ lderr(cct) << "failed to retrieve site name: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Mirror<I>::site_name_set(librados::Rados& rados, const std::string& name) {
+ CephContext *cct = reinterpret_cast<CephContext *>(rados.cct());
+
+ std::string site_name{name};
+ boost::algorithm::trim(site_name);
+ ldout(cct, 20) << "site_name=" << site_name << dendl;
+
+ int r = set_config_key(rados, RBD_MIRROR_SITE_NAME_CONFIG_KEY, name);
+ if (r == -EOPNOTSUPP) {
+ return r;
+ } else if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "failed to update site name: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Mirror<I>::mode_get(librados::IoCtx& io_ctx,
+ rbd_mirror_mode_t *mirror_mode) {
+ CephContext *cct = reinterpret_cast<CephContext *>(io_ctx.cct());
+ ldout(cct, 20) << dendl;
+
+ cls::rbd::MirrorMode mirror_mode_internal;
+ int r = cls_client::mirror_mode_get(&io_ctx, &mirror_mode_internal);
+ if (r < 0) {
+ lderr(cct) << "failed to retrieve mirror mode: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ switch (mirror_mode_internal) {
+ case cls::rbd::MIRROR_MODE_DISABLED:
+ case cls::rbd::MIRROR_MODE_IMAGE:
+ case cls::rbd::MIRROR_MODE_POOL:
+ *mirror_mode = static_cast<rbd_mirror_mode_t>(mirror_mode_internal);
+ break;
+ default:
+ lderr(cct) << "unknown mirror mode ("
+ << static_cast<uint32_t>(mirror_mode_internal) << ")"
+ << dendl;
+ return -EINVAL;
+ }
+ return 0;
+}
+
+template <typename I>
+int Mirror<I>::mode_set(librados::IoCtx& io_ctx,
+ rbd_mirror_mode_t mirror_mode) {
+ CephContext *cct = reinterpret_cast<CephContext *>(io_ctx.cct());
+ ldout(cct, 20) << dendl;
+
+ // TODO
+ if (!io_ctx.get_namespace().empty()) {
+ lderr(cct) << "namespaces are not supported" << dendl;
+ return -EINVAL;
+ }
+
+ cls::rbd::MirrorMode next_mirror_mode;
+ switch (mirror_mode) {
+ case RBD_MIRROR_MODE_DISABLED:
+ case RBD_MIRROR_MODE_IMAGE:
+ case RBD_MIRROR_MODE_POOL:
+ next_mirror_mode = static_cast<cls::rbd::MirrorMode>(mirror_mode);
+ break;
+ default:
+ lderr(cct) << "unknown mirror mode ("
+ << static_cast<uint32_t>(mirror_mode) << ")" << dendl;
+ return -EINVAL;
+ }
+
+ int r;
+ if (next_mirror_mode == cls::rbd::MIRROR_MODE_DISABLED) {
+ // fail early if pool still has peers registered and attempting to disable
+ std::vector<cls::rbd::MirrorPeer> mirror_peers;
+ r = cls_client::mirror_peer_list(&io_ctx, &mirror_peers);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "failed to list peers: " << cpp_strerror(r) << dendl;
+ return r;
+ } else if (!mirror_peers.empty()) {
+ lderr(cct) << "mirror peers still registered" << dendl;
+ return -EBUSY;
+ }
+ }
+
+ cls::rbd::MirrorMode current_mirror_mode;
+ r = cls_client::mirror_mode_get(&io_ctx, &current_mirror_mode);
+ if (r < 0) {
+ lderr(cct) << "failed to retrieve mirror mode: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ if (current_mirror_mode == next_mirror_mode) {
+ return 0;
+ } else if (current_mirror_mode == cls::rbd::MIRROR_MODE_DISABLED) {
+ uuid_d uuid_gen;
+ uuid_gen.generate_random();
+ r = cls_client::mirror_uuid_set(&io_ctx, uuid_gen.to_string());
+ if (r < 0) {
+ lderr(cct) << "failed to allocate mirroring uuid: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+ }
+
+ if (current_mirror_mode != cls::rbd::MIRROR_MODE_IMAGE) {
+ r = cls_client::mirror_mode_set(&io_ctx, cls::rbd::MIRROR_MODE_IMAGE);
+ if (r < 0) {
+ lderr(cct) << "failed to set mirror mode to image: "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ r = MirroringWatcher<>::notify_mode_updated(io_ctx,
+ cls::rbd::MIRROR_MODE_IMAGE);
+ if (r < 0) {
+ lderr(cct) << "failed to send update notification: " << cpp_strerror(r)
+ << dendl;
+ }
+ }
+
+ if (next_mirror_mode == cls::rbd::MIRROR_MODE_IMAGE) {
+ return 0;
+ }
+
+ if (next_mirror_mode == cls::rbd::MIRROR_MODE_POOL) {
+ map<string, string> images;
+ r = Image<I>::list_images_v2(io_ctx, &images);
+ if (r < 0) {
+ lderr(cct) << "failed listing images: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ for (const auto& img_pair : images) {
+ uint64_t features;
+ uint64_t incompatible_features;
+ r = cls_client::get_features(&io_ctx, util::header_name(img_pair.second),
+ true, &features, &incompatible_features);
+ if (r < 0) {
+ lderr(cct) << "error getting features for image " << img_pair.first
+ << ": " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ if ((features & RBD_FEATURE_JOURNALING) != 0) {
+ I *img_ctx = I::create("", img_pair.second, nullptr, io_ctx, false);
+ r = img_ctx->state->open(0);
+ if (r < 0) {
+ lderr(cct) << "error opening image "<< img_pair.first << ": "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ r = image_enable(img_ctx, true);
+ int close_r = img_ctx->state->close();
+ if (r < 0) {
+ lderr(cct) << "error enabling mirroring for image "
+ << img_pair.first << ": " << cpp_strerror(r) << dendl;
+ return r;
+ } else if (close_r < 0) {
+ lderr(cct) << "failed to close image " << img_pair.first << ": "
+ << cpp_strerror(close_r) << dendl;
+ return close_r;
+ }
+ }
+ }
+ } else if (next_mirror_mode == cls::rbd::MIRROR_MODE_DISABLED) {
+ while (true) {
+ bool retry_busy = false;
+ bool pending_busy = false;
+
+ std::set<std::string> image_ids;
+ r = list_mirror_images(io_ctx, image_ids);
+ if (r < 0) {
+ lderr(cct) << "failed listing images: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ for (const auto& img_id : image_ids) {
+ if (current_mirror_mode == cls::rbd::MIRROR_MODE_IMAGE) {
+ cls::rbd::MirrorImage mirror_image;
+ r = cls_client::mirror_image_get(&io_ctx, img_id, &mirror_image);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "failed to retrieve mirroring state for image id "
+ << img_id << ": " << cpp_strerror(r) << dendl;
+ return r;
+ }
+ if (mirror_image.state == cls::rbd::MIRROR_IMAGE_STATE_ENABLED) {
+ lderr(cct) << "failed to disable mirror mode: there are still "
+ << "images with mirroring enabled" << dendl;
+ return -EINVAL;
+ }
+ } else {
+ I *img_ctx = I::create("", img_id, nullptr, io_ctx, false);
+ r = img_ctx->state->open(0);
+ if (r < 0) {
+ lderr(cct) << "error opening image id "<< img_id << ": "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ r = image_disable(img_ctx, false);
+ int close_r = img_ctx->state->close();
+ if (r == -EBUSY) {
+ pending_busy = true;
+ } else if (r < 0) {
+ lderr(cct) << "error disabling mirroring for image id " << img_id
+ << cpp_strerror(r) << dendl;
+ return r;
+ } else if (close_r < 0) {
+ lderr(cct) << "failed to close image id " << img_id << ": "
+ << cpp_strerror(close_r) << dendl;
+ return close_r;
+ } else if (pending_busy) {
+ // at least one mirrored image was successfully disabled, so we can
+ // retry any failures caused by busy parent/child relationships
+ retry_busy = true;
+ }
+ }
+ }
+
+ if (!retry_busy && pending_busy) {
+ lderr(cct) << "error disabling mirroring for one or more images"
+ << dendl;
+ return -EBUSY;
+ } else if (!retry_busy) {
+ break;
+ }
+ }
+ }
+
+ r = cls_client::mirror_mode_set(&io_ctx, next_mirror_mode);
+ if (r < 0) {
+ lderr(cct) << "failed to set mirror mode: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ r = MirroringWatcher<>::notify_mode_updated(io_ctx, next_mirror_mode);
+ if (r < 0) {
+ lderr(cct) << "failed to send update notification: " << cpp_strerror(r)
+ << dendl;
+ }
+ return 0;
+}
+
+template <typename I>
+int Mirror<I>::peer_bootstrap_create(librados::IoCtx& io_ctx,
+ std::string* token) {
+ CephContext *cct = reinterpret_cast<CephContext *>(io_ctx.cct());
+ ldout(cct, 20) << dendl;
+
+ auto mirror_mode = cls::rbd::MIRROR_MODE_DISABLED;
+ int r = cls_client::mirror_mode_get(&io_ctx, &mirror_mode);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "failed to retrieve mirroring mode: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ } else if (mirror_mode == cls::rbd::MIRROR_MODE_DISABLED) {
+ return -EINVAL;
+ }
+
+ // retrieve the cluster fsid
+ std::string fsid;
+ librados::Rados rados(io_ctx);
+ r = rados.cluster_fsid(&fsid);
+ if (r < 0) {
+ lderr(cct) << "failed to retrieve cluster fsid: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ std::string peer_client_id;
+ std::string cephx_key;
+ r = create_bootstrap_user(cct, rados, &peer_client_id, &cephx_key);
+ if (r < 0) {
+ return r;
+ }
+
+ std::string mon_host = cct->_conf.get_val<std::string>("mon_host");
+ ldout(cct, 20) << "mon_host=" << mon_host << dendl;
+
+ // format the token response
+ bufferlist token_bl;
+ token_bl.append(
+ R"({)" \
+ R"("fsid":")" + fsid + R"(",)" + \
+ R"("client_id":")" + peer_client_id + R"(",)" + \
+ R"("key":")" + cephx_key + R"(",)" + \
+ R"("mon_host":")" + \
+ boost::replace_all_copy(mon_host, "\"", "\\\"") + R"(")" + \
+ R"(})");
+ ldout(cct, 20) << "token=" << token_bl.to_str() << dendl;
+
+ bufferlist base64_bl;
+ token_bl.encode_base64(base64_bl);
+ *token = base64_bl.to_str();
+
+ return 0;
+}
+
+template <typename I>
+int Mirror<I>::peer_bootstrap_import(librados::IoCtx& io_ctx,
+ rbd_mirror_peer_direction_t direction,
+ const std::string& token) {
+ CephContext *cct = reinterpret_cast<CephContext *>(io_ctx.cct());
+ ldout(cct, 20) << dendl;
+
+ if (direction != RBD_MIRROR_PEER_DIRECTION_RX &&
+ direction != RBD_MIRROR_PEER_DIRECTION_RX_TX) {
+ lderr(cct) << "invalid mirror peer direction" << dendl;
+ return -EINVAL;
+ }
+
+ bufferlist token_bl;
+ try {
+ bufferlist base64_bl;
+ base64_bl.append(token);
+ token_bl.decode_base64(base64_bl);
+ } catch (buffer::error& err) {
+ lderr(cct) << "failed to decode base64" << dendl;
+ return -EINVAL;
+ }
+
+ ldout(cct, 20) << "token=" << token_bl.to_str() << dendl;
+
+ bool json_valid = false;
+ std::string expected_remote_fsid;
+ std::string remote_client_id;
+ std::string remote_key;
+ std::string remote_mon_host;
+
+ json_spirit::mValue json_root;
+ if(json_spirit::read(token_bl.to_str(), json_root)) {
+ try {
+ auto& json_obj = json_root.get_obj();
+ expected_remote_fsid = json_obj["fsid"].get_str();
+ remote_client_id = json_obj["client_id"].get_str();
+ remote_key = json_obj["key"].get_str();
+ remote_mon_host = json_obj["mon_host"].get_str();
+ json_valid = true;
+ } catch (std::runtime_error&) {
+ }
+ }
+
+ if (!json_valid) {
+ lderr(cct) << "invalid bootstrap token JSON received" << dendl;
+ return -EINVAL;
+ }
+
+ // sanity check import process
+ std::string local_fsid;
+ librados::Rados rados(io_ctx);
+ int r = rados.cluster_fsid(&local_fsid);
+ if (r < 0) {
+ lderr(cct) << "failed to retrieve cluster fsid: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ std::string local_site_name;
+ r = site_name_get(rados, &local_site_name);
+ if (r < 0) {
+ lderr(cct) << "failed to retrieve cluster site name: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ // attempt to connect to remote cluster
+ librados::Rados remote_rados;
+ remote_rados.init(remote_client_id.c_str());
+
+ auto remote_cct = reinterpret_cast<CephContext*>(remote_rados.cct());
+ remote_cct->_conf.set_val("mon_host", remote_mon_host);
+ remote_cct->_conf.set_val("key", remote_key);
+
+ r = remote_rados.connect();
+ if (r < 0) {
+ lderr(cct) << "failed to connect to peer cluster: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ std::string remote_fsid;
+ r = remote_rados.cluster_fsid(&remote_fsid);
+ if (r < 0) {
+ lderr(cct) << "failed to retrieve remote cluster fsid: "
+ << cpp_strerror(r) << dendl;
+ return r;
+ } else if (local_fsid == remote_fsid) {
+ lderr(cct) << "cannot import token for local cluster" << dendl;
+ return -EINVAL;
+ } else if (expected_remote_fsid != remote_fsid) {
+ lderr(cct) << "unexpected remote cluster fsid" << dendl;
+ return -EINVAL;
+ }
+
+ std::string remote_site_name;
+ r = site_name_get(remote_rados, &remote_site_name);
+ if (r < 0) {
+ lderr(cct) << "failed to retrieve remote cluster site name: "
+ << cpp_strerror(r) << dendl;
+ return r;
+ } else if (local_site_name == remote_site_name) {
+ lderr(cct) << "cannot import token for duplicate site name" << dendl;
+ return -EINVAL;
+ }
+
+ librados::IoCtx remote_io_ctx;
+ r = remote_rados.ioctx_create(io_ctx.get_pool_name().c_str(), remote_io_ctx);
+ if (r == -ENOENT) {
+ ldout(cct, 10) << "remote pool does not exist" << dendl;
+ return r;
+ } else if (r < 0) {
+ lderr(cct) << "failed to open remote pool '" << io_ctx.get_pool_name()
+ << "': " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ auto remote_mirror_mode = cls::rbd::MIRROR_MODE_DISABLED;
+ r = cls_client::mirror_mode_get(&remote_io_ctx, &remote_mirror_mode);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "failed to retrieve remote mirroring mode: "
+ << cpp_strerror(r) << dendl;
+ return r;
+ } else if (remote_mirror_mode == cls::rbd::MIRROR_MODE_DISABLED) {
+ return -ENOSYS;
+ }
+
+ auto local_mirror_mode = cls::rbd::MIRROR_MODE_DISABLED;
+ r = cls_client::mirror_mode_get(&io_ctx, &local_mirror_mode);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "failed to retrieve local mirroring mode: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ } else if (local_mirror_mode == cls::rbd::MIRROR_MODE_DISABLED) {
+ // copy mirror mode from remote peer
+ r = mode_set(io_ctx, static_cast<rbd_mirror_mode_t>(remote_mirror_mode));
+ if (r < 0) {
+ return r;
+ }
+ }
+
+ if (direction == RBD_MIRROR_PEER_DIRECTION_RX_TX) {
+ // create a local mirror peer user and export it to the remote cluster
+ std::string local_client_id;
+ std::string local_key;
+ r = create_bootstrap_user(cct, rados, &local_client_id, &local_key);
+ if (r < 0) {
+ return r;
+ }
+
+ std::string local_mon_host = cct->_conf.get_val<std::string>("mon_host");
+
+ // create local cluster peer in remote cluster
+ r = create_bootstrap_peer(cct, remote_io_ctx, local_site_name, local_fsid,
+ local_client_id, local_key, local_mon_host,
+ "local", "remote");
+ if (r < 0) {
+ return r;
+ }
+ }
+
+ // create remote cluster peer in local cluster
+ r = create_bootstrap_peer(cct, io_ctx, remote_site_name, remote_fsid,
+ remote_client_id, remote_key, remote_mon_host,
+ "remote", "local");
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Mirror<I>::peer_add(librados::IoCtx& io_ctx, std::string *uuid,
+ const std::string &cluster_name,
+ const std::string &client_name) {
+ CephContext *cct = reinterpret_cast<CephContext *>(io_ctx.cct());
+ ldout(cct, 20) << "name=" << cluster_name << ", "
+ << "client=" << client_name << dendl;
+
+ // TODO
+ if (!io_ctx.get_namespace().empty()) {
+ lderr(cct) << "namespaces are not supported" << dendl;
+ return -EINVAL;
+ }
+
+ if (cct->_conf->cluster == cluster_name) {
+ lderr(cct) << "cannot add self as remote peer" << dendl;
+ return -EINVAL;
+ }
+
+ int r;
+ do {
+ uuid_d uuid_gen;
+ uuid_gen.generate_random();
+
+ *uuid = uuid_gen.to_string();
+ r = cls_client::mirror_peer_add(&io_ctx, *uuid, cluster_name,
+ client_name);
+ if (r == -ESTALE) {
+ ldout(cct, 5) << "duplicate UUID detected, retrying" << dendl;
+ } else if (r < 0) {
+ lderr(cct) << "failed to add mirror peer '" << uuid << "': "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+ } while (r == -ESTALE);
+ return 0;
+}
+
+template <typename I>
+int Mirror<I>::peer_remove(librados::IoCtx& io_ctx, const std::string &uuid) {
+ CephContext *cct = reinterpret_cast<CephContext *>(io_ctx.cct());
+ ldout(cct, 20) << "uuid=" << uuid << dendl;
+
+ int r = remove_peer_config_key(io_ctx, uuid);
+ if (r < 0) {
+ lderr(cct) << "failed to remove peer attributes '" << uuid << "': "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ r = cls_client::mirror_peer_remove(&io_ctx, uuid);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "failed to remove peer '" << uuid << "': "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+ return 0;
+}
+
+template <typename I>
+int Mirror<I>::peer_list(librados::IoCtx& io_ctx,
+ std::vector<mirror_peer_t> *peers) {
+ CephContext *cct = reinterpret_cast<CephContext *>(io_ctx.cct());
+ ldout(cct, 20) << dendl;
+
+ std::vector<cls::rbd::MirrorPeer> mirror_peers;
+ int r = cls_client::mirror_peer_list(&io_ctx, &mirror_peers);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "failed to list peers: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ peers->clear();
+ peers->reserve(mirror_peers.size());
+ for (auto &mirror_peer : mirror_peers) {
+ mirror_peer_t peer;
+ peer.uuid = mirror_peer.uuid;
+ peer.cluster_name = mirror_peer.cluster_name;
+ peer.client_name = mirror_peer.client_name;
+ peers->push_back(peer);
+ }
+ return 0;
+}
+
+template <typename I>
+int Mirror<I>::peer_set_client(librados::IoCtx& io_ctx, const std::string &uuid,
+ const std::string &client_name) {
+ CephContext *cct = reinterpret_cast<CephContext *>(io_ctx.cct());
+ ldout(cct, 20) << "uuid=" << uuid << ", "
+ << "client=" << client_name << dendl;
+
+ int r = cls_client::mirror_peer_set_client(&io_ctx, uuid, client_name);
+ if (r < 0) {
+ lderr(cct) << "failed to update client '" << uuid << "': "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+ return 0;
+}
+
+template <typename I>
+int Mirror<I>::peer_set_cluster(librados::IoCtx& io_ctx,
+ const std::string &uuid,
+ const std::string &cluster_name) {
+ CephContext *cct = reinterpret_cast<CephContext *>(io_ctx.cct());
+ ldout(cct, 20) << "uuid=" << uuid << ", "
+ << "cluster=" << cluster_name << dendl;
+
+ if (cct->_conf->cluster == cluster_name) {
+ lderr(cct) << "cannot set self as remote peer" << dendl;
+ return -EINVAL;
+ }
+
+ int r = cls_client::mirror_peer_set_cluster(&io_ctx, uuid, cluster_name);
+ if (r < 0) {
+ lderr(cct) << "failed to update cluster '" << uuid << "': "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+ return 0;
+}
+
+template <typename I>
+int Mirror<I>::peer_get_attributes(librados::IoCtx& io_ctx,
+ const std::string &uuid,
+ Attributes* attributes) {
+ CephContext *cct = reinterpret_cast<CephContext *>(io_ctx.cct());
+ ldout(cct, 20) << "uuid=" << uuid << dendl;
+
+ attributes->clear();
+
+ librados::Rados rados(io_ctx);
+ std::string value;
+ int r = get_config_key(rados, get_peer_config_key_name(io_ctx.get_id(), uuid),
+ &value);
+ if (r == -ENOENT || value.empty()) {
+ return -ENOENT;
+ } else if (r < 0) {
+ lderr(cct) << "failed to retrieve peer attributes: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ bool json_valid = false;
+ json_spirit::mValue json_root;
+ if(json_spirit::read(value, json_root)) {
+ try {
+ auto& json_obj = json_root.get_obj();
+ for (auto& pairs : json_obj) {
+ (*attributes)[pairs.first] = pairs.second.get_str();
+ }
+ json_valid = true;
+ } catch (std::runtime_error&) {
+ }
+ }
+
+ if (!json_valid) {
+ lderr(cct) << "invalid peer attributes JSON received" << dendl;
+ return -EINVAL;
+ }
+ return 0;
+}
+
+template <typename I>
+int Mirror<I>::peer_set_attributes(librados::IoCtx& io_ctx,
+ const std::string &uuid,
+ const Attributes& attributes) {
+ CephContext *cct = reinterpret_cast<CephContext *>(io_ctx.cct());
+ ldout(cct, 20) << "uuid=" << uuid << ", "
+ << "attributes=" << attributes << dendl;
+
+ std::vector<mirror_peer_t> mirror_peers;
+ int r = peer_list(io_ctx, &mirror_peers);
+ if (r < 0) {
+ return r;
+ }
+
+ if (std::find_if(mirror_peers.begin(), mirror_peers.end(),
+ [&uuid](const librbd::mirror_peer_t& peer) {
+ return uuid == peer.uuid;
+ }) == mirror_peers.end()) {
+ ldout(cct, 5) << "mirror peer uuid " << uuid << " does not exist" << dendl;
+ return -ENOENT;
+ }
+
+ std::stringstream ss;
+ ss << "{";
+ for (auto& pair : attributes) {
+ ss << "\\\"" << pair.first << "\\\": "
+ << "\\\"" << pair.second << "\\\"";
+ if (&pair != &(*attributes.rbegin())) {
+ ss << ", ";
+ }
+ }
+ ss << "}";
+
+ librados::Rados rados(io_ctx);
+ r = set_config_key(rados, get_peer_config_key_name(io_ctx.get_id(), uuid),
+ ss.str());
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "failed to update peer attributes: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Mirror<I>::image_status_list(librados::IoCtx& io_ctx,
+ const std::string &start_id, size_t max,
+ IdToMirrorImageStatus *images) {
+ CephContext *cct = reinterpret_cast<CephContext *>(io_ctx.cct());
+ int r;
+
+ map<string, string> id_to_name;
+ {
+ map<string, string> name_to_id;
+ r = Image<I>::list_images_v2(io_ctx, &name_to_id);
+ if (r < 0) {
+ return r;
+ }
+ for (auto it : name_to_id) {
+ id_to_name[it.second] = it.first;
+ }
+ }
+
+ map<std::string, cls::rbd::MirrorImage> images_;
+ map<std::string, cls::rbd::MirrorImageStatus> statuses_;
+
+ r = librbd::cls_client::mirror_image_status_list(&io_ctx, start_id, max,
+ &images_, &statuses_);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "failed to list mirror image statuses: "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ cls::rbd::MirrorImageStatus unknown_status(
+ cls::rbd::MIRROR_IMAGE_STATUS_STATE_UNKNOWN, "status not found");
+
+ for (auto it = images_.begin(); it != images_.end(); ++it) {
+ auto &image_id = it->first;
+ auto &info = it->second;
+ if (info.state == cls::rbd::MIRROR_IMAGE_STATE_DISABLED) {
+ continue;
+ }
+
+ auto &image_name = id_to_name[image_id];
+ if (image_name.empty()) {
+ lderr(cct) << "failed to find image name for image " << image_id << ", "
+ << "using image id as name" << dendl;
+ image_name = image_id;
+ }
+ auto s_it = statuses_.find(image_id);
+ auto &s = s_it != statuses_.end() ? s_it->second : unknown_status;
+ (*images)[image_id] = mirror_image_status_t{
+ image_name,
+ mirror_image_info_t{
+ info.global_image_id,
+ static_cast<mirror_image_state_t>(info.state),
+ false}, // XXX: To set "primary" right would require an additional call.
+ static_cast<mirror_image_status_state_t>(s.state),
+ s.description,
+ s.last_update.sec(),
+ s.up};
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Mirror<I>::image_status_summary(librados::IoCtx& io_ctx,
+ MirrorImageStatusStates *states) {
+ CephContext *cct = reinterpret_cast<CephContext *>(io_ctx.cct());
+
+ std::map<cls::rbd::MirrorImageStatusState, int> states_;
+ int r = cls_client::mirror_image_status_get_summary(&io_ctx, &states_);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "failed to get mirror status summary: "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+ for (auto &s : states_) {
+ (*states)[static_cast<mirror_image_status_state_t>(s.first)] = s.second;
+ }
+ return 0;
+}
+
+template <typename I>
+int Mirror<I>::image_instance_id_list(
+ librados::IoCtx& io_ctx, const std::string &start_image_id, size_t max,
+ std::map<std::string, std::string> *instance_ids) {
+ CephContext *cct = reinterpret_cast<CephContext *>(io_ctx.cct());
+ std::map<std::string, entity_inst_t> instances;
+
+ int r = librbd::cls_client::mirror_image_instance_list(
+ &io_ctx, start_image_id, max, &instances);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "failed to list mirror image instances: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ for (auto it : instances) {
+ (*instance_ids)[it.first] = stringify(it.second.name.num());
+ }
+
+ return 0;
+}
+
+} // namespace api
+} // namespace librbd
+
+template class librbd::api::Mirror<librbd::ImageCtx>;
diff --git a/src/librbd/api/Mirror.h b/src/librbd/api/Mirror.h
new file mode 100644
index 00000000..94768acd
--- /dev/null
+++ b/src/librbd/api/Mirror.h
@@ -0,0 +1,87 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef LIBRBD_API_MIRROR_H
+#define LIBRBD_API_MIRROR_H
+
+#include "include/rbd/librbd.hpp"
+#include <map>
+#include <string>
+#include <vector>
+
+struct Context;
+
+namespace librbd {
+
+struct ImageCtx;
+
+namespace api {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+struct Mirror {
+ typedef std::map<std::string, std::string> Attributes;
+ typedef std::map<std::string, mirror_image_status_t> IdToMirrorImageStatus;
+ typedef std::map<mirror_image_status_state_t, int> MirrorImageStatusStates;
+
+ static int site_name_get(librados::Rados& rados, std::string* name);
+ static int site_name_set(librados::Rados& rados, const std::string& name);
+
+ static int mode_get(librados::IoCtx& io_ctx, rbd_mirror_mode_t *mirror_mode);
+ static int mode_set(librados::IoCtx& io_ctx, rbd_mirror_mode_t mirror_mode);
+
+ static int peer_bootstrap_create(librados::IoCtx& io_ctx, std::string* token);
+ static int peer_bootstrap_import(librados::IoCtx& io_ctx,
+ rbd_mirror_peer_direction_t direction,
+ const std::string& token);
+
+ static int peer_add(librados::IoCtx& io_ctx, std::string *uuid,
+ const std::string &cluster_name,
+ const std::string &client_name);
+ static int peer_remove(librados::IoCtx& io_ctx, const std::string &uuid);
+ static int peer_list(librados::IoCtx& io_ctx,
+ std::vector<mirror_peer_t> *peers);
+ static int peer_set_client(librados::IoCtx& io_ctx, const std::string &uuid,
+ const std::string &client_name);
+ static int peer_set_cluster(librados::IoCtx& io_ctx, const std::string &uuid,
+ const std::string &cluster_name);
+ static int peer_get_attributes(librados::IoCtx& io_ctx,
+ const std::string &uuid,
+ Attributes* attributes);
+ static int peer_set_attributes(librados::IoCtx& io_ctx,
+ const std::string &uuid,
+ const Attributes& attributes);
+
+ static int image_status_list(librados::IoCtx& io_ctx,
+ const std::string &start_id, size_t max,
+ IdToMirrorImageStatus *images);
+ static int image_status_summary(librados::IoCtx& io_ctx,
+ MirrorImageStatusStates *states);
+ static int image_instance_id_list(librados::IoCtx& io_ctx,
+ const std::string &start_image_id,
+ size_t max,
+ std::map<std::string, std::string> *ids);
+
+ static int image_enable(ImageCtxT *ictx, bool relax_same_pool_parent_check);
+ static int image_disable(ImageCtxT *ictx, bool force);
+ static int image_promote(ImageCtxT *ictx, bool force);
+ static void image_promote(ImageCtxT *ictx, bool force, Context *on_finish);
+ static int image_demote(ImageCtxT *ictx);
+ static void image_demote(ImageCtxT *ictx, Context *on_finish);
+ static int image_resync(ImageCtxT *ictx);
+ static int image_get_info(ImageCtxT *ictx,
+ mirror_image_info_t *mirror_image_info);
+ static void image_get_info(ImageCtxT *ictx,
+ mirror_image_info_t *mirror_image_info,
+ Context *on_finish);
+ static int image_get_status(ImageCtxT *ictx, mirror_image_status_t *status);
+ static void image_get_status(ImageCtxT *ictx, mirror_image_status_t *status,
+ Context *on_finish);
+ static int image_get_instance_id(ImageCtxT *ictx, std::string *instance_id);
+};
+
+} // namespace api
+} // namespace librbd
+
+extern template class librbd::api::Mirror<librbd::ImageCtx>;
+
+#endif // LIBRBD_API_MIRROR_H
diff --git a/src/librbd/api/Namespace.cc b/src/librbd/api/Namespace.cc
new file mode 100644
index 00000000..ed7f8e28
--- /dev/null
+++ b/src/librbd/api/Namespace.cc
@@ -0,0 +1,227 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "common/errno.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "librbd/api/Namespace.h"
+#include "librbd/ImageCtx.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::api::Namespace: " << __func__ << ": "
+
+namespace librbd {
+namespace api {
+
+namespace {
+
+const std::list<std::string> POOL_OBJECTS {
+ RBD_CHILDREN,
+ RBD_GROUP_DIRECTORY,
+ RBD_INFO,
+ RBD_MIRRORING,
+ RBD_TASK,
+ RBD_TRASH,
+ RBD_DIRECTORY
+};
+
+} // anonymous namespace
+
+template <typename I>
+int Namespace<I>::create(librados::IoCtx& io_ctx, const std::string& name)
+{
+ CephContext *cct = (CephContext *)io_ctx.cct();
+ ldout(cct, 5) << "name=" << name << dendl;
+
+ if (name.empty()) {
+ return -EINVAL;
+ }
+
+ librados::Rados rados(io_ctx);
+ int8_t require_osd_release;
+ int r = rados.get_min_compatible_osd(&require_osd_release);
+ if (r < 0) {
+ lderr(cct) << "failed to retrieve min OSD release: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ if (require_osd_release < CEPH_RELEASE_NAUTILUS) {
+ ldout(cct, 1) << "namespace support requires nautilus or later OSD"
+ << dendl;
+ return -ENOSYS;
+ }
+
+
+ librados::IoCtx default_ns_ctx;
+ default_ns_ctx.dup(io_ctx);
+ default_ns_ctx.set_namespace("");
+
+ r = cls_client::namespace_add(&default_ns_ctx, name);
+ if (r < 0) {
+ lderr(cct) << "failed to add namespace: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ librados::IoCtx ns_ctx;
+ ns_ctx.dup(io_ctx);
+ ns_ctx.set_namespace(name);
+
+ r = cls_client::dir_state_set(&ns_ctx, RBD_DIRECTORY,
+ cls::rbd::DIRECTORY_STATE_READY);
+ if (r < 0) {
+ lderr(cct) << "failed to initialize image directory: " << cpp_strerror(r)
+ << dendl;
+ goto rollback;
+ }
+
+ return 0;
+
+rollback:
+ int ret_val = cls_client::namespace_remove(&default_ns_ctx, name);
+ if (ret_val < 0) {
+ lderr(cct) << "failed to remove namespace: " << cpp_strerror(ret_val) << dendl;
+ }
+
+ return r;
+}
+
+template <typename I>
+int Namespace<I>::remove(librados::IoCtx& io_ctx, const std::string& name)
+{
+ CephContext *cct = (CephContext *)io_ctx.cct();
+ ldout(cct, 5) << "name=" << name << dendl;
+
+ if (name.empty()) {
+ return -EINVAL;
+ }
+
+ librados::IoCtx default_ns_ctx;
+ default_ns_ctx.dup(io_ctx);
+ default_ns_ctx.set_namespace("");
+
+ librados::IoCtx ns_ctx;
+ ns_ctx.dup(io_ctx);
+ ns_ctx.set_namespace(name);
+
+ std::map<std::string, cls::rbd::TrashImageSpec> trash_entries;
+
+ librados::ObjectWriteOperation dir_op;
+ librbd::cls_client::dir_state_set(
+ &dir_op, cls::rbd::DIRECTORY_STATE_ADD_DISABLED);
+ dir_op.remove();
+
+ int r = ns_ctx.operate(RBD_DIRECTORY, &dir_op);
+ if (r == -EBUSY) {
+ ldout(cct, 5) << "image directory not empty" << dendl;
+ goto rollback;
+ } else if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "failed to disable the namespace: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ r = cls_client::trash_list(&ns_ctx, "", 1, &trash_entries);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "failed to list trash directory: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ } else if (!trash_entries.empty()) {
+ ldout(cct, 5) << "image trash not empty" << dendl;
+ goto rollback;
+ }
+
+ for (auto& oid : POOL_OBJECTS) {
+ r = ns_ctx.remove(oid);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "failed to remove object '" << oid << "': "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+ }
+
+ r = cls_client::namespace_remove(&default_ns_ctx, name);
+ if (r < 0) {
+ lderr(cct) << "failed to remove namespace: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ return 0;
+
+rollback:
+
+ r = librbd::cls_client::dir_state_set(
+ &ns_ctx, RBD_DIRECTORY, cls::rbd::DIRECTORY_STATE_READY);
+ if (r < 0) {
+ lderr(cct) << "failed to restore directory state: " << cpp_strerror(r)
+ << dendl;
+ }
+
+ return -EBUSY;
+}
+
+template <typename I>
+int Namespace<I>::list(IoCtx& io_ctx, vector<string> *names)
+{
+ CephContext *cct = (CephContext *)io_ctx.cct();
+ ldout(cct, 5) << dendl;
+
+ librados::IoCtx default_ns_ctx;
+ default_ns_ctx.dup(io_ctx);
+ default_ns_ctx.set_namespace("");
+
+ int r;
+ int max_read = 1024;
+ std::string last_read = "";
+ do {
+ std::list<std::string> name_list;
+ r = cls_client::namespace_list(&default_ns_ctx, last_read, max_read,
+ &name_list);
+ if (r == -ENOENT) {
+ return 0;
+ } else if (r < 0) {
+ lderr(cct) << "error listing namespaces: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ names->insert(names->end(), name_list.begin(), name_list.end());
+ if (!name_list.empty()) {
+ last_read = name_list.back();
+ }
+ r = name_list.size();
+ } while (r == max_read);
+
+ return 0;
+}
+
+template <typename I>
+int Namespace<I>::exists(librados::IoCtx& io_ctx, const std::string& name, bool *exists)
+{
+ CephContext *cct = (CephContext *)io_ctx.cct();
+ ldout(cct, 5) << "name=" << name << dendl;
+
+ *exists = false;
+ if (name.empty()) {
+ return -EINVAL;
+ }
+
+ librados::IoCtx ns_ctx;
+ ns_ctx.dup(io_ctx);
+ ns_ctx.set_namespace(name);
+
+ int r = librbd::cls_client::dir_state_assert(&ns_ctx, RBD_DIRECTORY,
+ cls::rbd::DIRECTORY_STATE_READY);
+ if (r == 0) {
+ *exists = true;
+ } else if (r != -ENOENT) {
+ lderr(cct) << "error asserting namespace: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+} // namespace api
+} // namespace librbd
+
+template class librbd::api::Namespace<librbd::ImageCtx>;
diff --git a/src/librbd/api/Namespace.h b/src/librbd/api/Namespace.h
new file mode 100644
index 00000000..220eb28f
--- /dev/null
+++ b/src/librbd/api/Namespace.h
@@ -0,0 +1,33 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_API_NAMESPACE_H
+#define CEPH_LIBRBD_API_NAMESPACE_H
+
+#include "include/rados/librados_fwd.hpp"
+#include "include/rbd/librbd.hpp"
+#include <string>
+#include <vector>
+
+namespace librbd {
+
+struct ImageCtx;
+
+namespace api {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+struct Namespace {
+
+ static int create(librados::IoCtx& io_ctx, const std::string& name);
+ static int remove(librados::IoCtx& io_ctx, const std::string& name);
+ static int list(librados::IoCtx& io_ctx, std::vector<std::string>* names);
+ static int exists(librados::IoCtx& io_ctx, const std::string& name, bool *exists);
+
+};
+
+} // namespace api
+} // namespace librbd
+
+extern template class librbd::api::Namespace<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_API_NAMESPACE_H
diff --git a/src/librbd/api/Pool.cc b/src/librbd/api/Pool.cc
new file mode 100644
index 00000000..88adf561
--- /dev/null
+++ b/src/librbd/api/Pool.cc
@@ -0,0 +1,377 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/api/Pool.h"
+#include "include/rados/librados.hpp"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "common/Throttle.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "osd/osd_types.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/Utils.h"
+#include "librbd/api/Config.h"
+#include "librbd/api/Image.h"
+#include "librbd/api/Trash.h"
+#include "librbd/image/ValidatePoolRequest.h"
+
+#define dout_subsys ceph_subsys_rbd
+
+namespace librbd {
+namespace api {
+
+namespace {
+
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::api::Pool::ImageStatRequest: " \
+ << __func__ << " " << this << ": " \
+ << "(id=" << m_image_id << "): "
+
+template <typename I>
+class ImageStatRequest {
+public:
+ ImageStatRequest(librados::IoCtx& io_ctx, SimpleThrottle& throttle,
+ const std::string& image_id, bool scan_snaps,
+ std::atomic<uint64_t>* bytes,
+ std::atomic<uint64_t>* max_bytes,
+ std::atomic<uint64_t>* snaps)
+ : m_cct(reinterpret_cast<CephContext*>(io_ctx.cct())),
+ m_io_ctx(io_ctx), m_throttle(throttle), m_image_id(image_id),
+ m_scan_snaps(scan_snaps), m_bytes(bytes), m_max_bytes(max_bytes),
+ m_snaps(snaps) {
+ m_throttle.start_op();
+ }
+
+ void send() {
+ get_head();
+ }
+
+protected:
+ void finish(int r) {
+ (*m_max_bytes) += m_max_size;
+ m_throttle.end_op(r);
+
+ delete this;
+ }
+
+private:
+ CephContext* m_cct;
+ librados::IoCtx& m_io_ctx;
+ SimpleThrottle& m_throttle;
+ const std::string& m_image_id;
+ bool m_scan_snaps;
+ std::atomic<uint64_t>* m_bytes;
+ std::atomic<uint64_t>* m_max_bytes;
+ std::atomic<uint64_t>* m_snaps;
+ bufferlist m_out_bl;
+
+ uint64_t m_max_size = 0;
+ ::SnapContext m_snapc;
+
+ void get_head() {
+ ldout(m_cct, 15) << dendl;
+
+ librados::ObjectReadOperation op;
+ cls_client::get_size_start(&op, CEPH_NOSNAP);
+ if (m_scan_snaps) {
+ cls_client::get_snapcontext_start(&op);
+ }
+
+ m_out_bl.clear();
+ auto aio_comp = util::create_rados_callback<
+ ImageStatRequest<I>, &ImageStatRequest<I>::handle_get_head>(this);
+ int r = m_io_ctx.aio_operate(util::header_name(m_image_id), aio_comp, &op,
+ &m_out_bl);
+ ceph_assert(r == 0);
+ aio_comp->release();
+ }
+
+ void handle_get_head(int r) {
+ ldout(m_cct, 15) << "r=" << r << dendl;
+
+ auto it = m_out_bl.cbegin();
+ if (r == 0) {
+ uint8_t order;
+ r = cls_client::get_size_finish(&it, &m_max_size, &order);
+ if (r == 0) {
+ (*m_bytes) += m_max_size;
+ }
+ }
+ if (m_scan_snaps && r == 0) {
+ r = cls_client::get_snapcontext_finish(&it, &m_snapc);
+ if (r == 0) {
+ (*m_snaps) += m_snapc.snaps.size();
+ }
+ }
+
+ if (r == -ENOENT) {
+ finish(r);
+ return;
+ } else if (r < 0) {
+ lderr(m_cct) << "failed to stat image: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ if (!m_snapc.is_valid()) {
+ lderr(m_cct) << "snap context is invalid" << dendl;
+ finish(-EIO);
+ return;
+ }
+
+ get_snaps();
+ }
+
+ void get_snaps() {
+ if (!m_scan_snaps || m_snapc.snaps.empty()) {
+ finish(0);
+ return;
+ }
+
+ ldout(m_cct, 15) << dendl;
+ librados::ObjectReadOperation op;
+ for (auto snap_seq : m_snapc.snaps) {
+ cls_client::get_size_start(&op, snap_seq);
+ }
+
+ m_out_bl.clear();
+ auto aio_comp = util::create_rados_callback<
+ ImageStatRequest<I>, &ImageStatRequest<I>::handle_get_snaps>(this);
+ int r = m_io_ctx.aio_operate(util::header_name(m_image_id), aio_comp, &op,
+ &m_out_bl);
+ ceph_assert(r == 0);
+ aio_comp->release();
+ }
+
+ void handle_get_snaps(int r) {
+ ldout(m_cct, 15) << "r=" << r << dendl;
+
+ auto it = m_out_bl.cbegin();
+ for ([[maybe_unused]] auto snap_seq : m_snapc.snaps) {
+ uint64_t size;
+ if (r == 0) {
+ uint8_t order;
+ r = cls_client::get_size_finish(&it, &size, &order);
+ }
+ if (r == 0 && m_max_size < size) {
+ m_max_size = size;
+ }
+ }
+
+ if (r == -ENOENT) {
+ ldout(m_cct, 15) << "out-of-sync metadata" << dendl;
+ get_head();
+ } else if (r < 0) {
+ lderr(m_cct) << "failed to retrieve snap size: " << cpp_strerror(r)
+ << dendl;
+ finish(r);
+ } else {
+ finish(0);
+ }
+ }
+
+};
+
+template <typename I>
+void get_pool_stat_option_value(typename Pool<I>::StatOptions* stat_options,
+ rbd_pool_stat_option_t option,
+ uint64_t** value) {
+ auto it = stat_options->find(option);
+ if (it == stat_options->end()) {
+ *value = nullptr;
+ } else {
+ *value = it->second;
+ }
+}
+
+template <typename I>
+int get_pool_stats(librados::IoCtx& io_ctx, const ConfigProxy& config,
+ const std::vector<std::string>& image_ids, uint64_t* image_count,
+ uint64_t* provisioned_bytes, uint64_t* max_provisioned_bytes,
+ uint64_t* snapshot_count) {
+
+ bool scan_snaps = ((max_provisioned_bytes != nullptr) ||
+ (snapshot_count != nullptr));
+
+ SimpleThrottle throttle(
+ config.template get_val<uint64_t>("rbd_concurrent_management_ops"), true);
+ std::atomic<uint64_t> bytes{0};
+ std::atomic<uint64_t> max_bytes{0};
+ std::atomic<uint64_t> snaps{0};
+ for (auto& image_id : image_ids) {
+ if (throttle.pending_error()) {
+ break;
+ }
+
+ auto req = new ImageStatRequest<I>(io_ctx, throttle, image_id,
+ scan_snaps, &bytes, &max_bytes, &snaps);
+ req->send();
+ }
+
+ int r = throttle.wait_for_ret();
+ if (r < 0) {
+ return r;
+ }
+
+ if (image_count != nullptr) {
+ *image_count = image_ids.size();
+ }
+ if (provisioned_bytes != nullptr) {
+ *provisioned_bytes = bytes.load();
+ }
+ if (max_provisioned_bytes != nullptr) {
+ *max_provisioned_bytes = max_bytes.load();
+ }
+ if (snapshot_count != nullptr) {
+ *snapshot_count = snaps.load();
+ }
+
+ return 0;
+}
+
+} // anonymous namespace
+
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::api::Pool: " << __func__ << ": "
+
+template <typename I>
+int Pool<I>::init(librados::IoCtx& io_ctx, bool force) {
+ auto cct = reinterpret_cast<CephContext*>(io_ctx.cct());
+ ldout(cct, 10) << dendl;
+
+ int r = io_ctx.application_enable(pg_pool_t::APPLICATION_NAME_RBD, force);
+ if (r < 0) {
+ return r;
+ }
+
+ ConfigProxy config{cct->_conf};
+ api::Config<I>::apply_pool_overrides(io_ctx, &config);
+ if (!config.get_val<bool>("rbd_validate_pool")) {
+ return 0;
+ }
+
+ ThreadPool *thread_pool;
+ ContextWQ *op_work_queue;
+ ImageCtx::get_thread_pool_instance(cct, &thread_pool, &op_work_queue);
+
+ C_SaferCond ctx;
+ auto req = image::ValidatePoolRequest<I>::create(io_ctx, op_work_queue, &ctx);
+ req->send();
+
+ return ctx.wait();
+}
+
+template <typename I>
+int Pool<I>::add_stat_option(StatOptions* stat_options,
+ rbd_pool_stat_option_t option,
+ uint64_t* value) {
+ switch (option) {
+ case RBD_POOL_STAT_OPTION_IMAGES:
+ case RBD_POOL_STAT_OPTION_IMAGE_PROVISIONED_BYTES:
+ case RBD_POOL_STAT_OPTION_IMAGE_MAX_PROVISIONED_BYTES:
+ case RBD_POOL_STAT_OPTION_IMAGE_SNAPSHOTS:
+ case RBD_POOL_STAT_OPTION_TRASH_IMAGES:
+ case RBD_POOL_STAT_OPTION_TRASH_PROVISIONED_BYTES:
+ case RBD_POOL_STAT_OPTION_TRASH_MAX_PROVISIONED_BYTES:
+ case RBD_POOL_STAT_OPTION_TRASH_SNAPSHOTS:
+ stat_options->emplace(option, value);
+ return 0;
+ default:
+ break;
+ }
+ return -ENOENT;
+}
+
+template <typename I>
+int Pool<I>::get_stats(librados::IoCtx& io_ctx, StatOptions* stat_options) {
+ auto cct = reinterpret_cast<CephContext*>(io_ctx.cct());
+ ldout(cct, 10) << dendl;
+
+ ConfigProxy config{cct->_conf};
+ api::Config<I>::apply_pool_overrides(io_ctx, &config);
+
+ uint64_t* image_count;
+ uint64_t* provisioned_bytes;
+ uint64_t* max_provisioned_bytes;
+ uint64_t* snapshot_count;
+
+ std::vector<trash_image_info_t> trash_entries;
+ int r = Trash<I>::list(io_ctx, trash_entries, false);
+ if (r < 0 && r != -EOPNOTSUPP) {
+ return r;
+ }
+
+ get_pool_stat_option_value<I>(
+ stat_options, RBD_POOL_STAT_OPTION_IMAGES, &image_count);
+ get_pool_stat_option_value<I>(
+ stat_options, RBD_POOL_STAT_OPTION_IMAGE_PROVISIONED_BYTES,
+ &provisioned_bytes);
+ get_pool_stat_option_value<I>(
+ stat_options, RBD_POOL_STAT_OPTION_IMAGE_MAX_PROVISIONED_BYTES,
+ &max_provisioned_bytes);
+ get_pool_stat_option_value<I>(
+ stat_options, RBD_POOL_STAT_OPTION_IMAGE_SNAPSHOTS, &snapshot_count);
+ if (image_count != nullptr || provisioned_bytes != nullptr ||
+ max_provisioned_bytes != nullptr || snapshot_count != nullptr) {
+ typename Image<I>::ImageNameToIds images;
+ int r = Image<I>::list_images_v2(io_ctx, &images);
+ if (r < 0) {
+ return r;
+ }
+
+ std::vector<std::string> image_ids;
+ image_ids.reserve(images.size() + trash_entries.size());
+ for (auto& it : images) {
+ image_ids.push_back(std::move(it.second));
+ }
+ for (auto& it : trash_entries) {
+ if (it.source == RBD_TRASH_IMAGE_SOURCE_REMOVING) {
+ image_ids.push_back(std::move(it.id));
+ }
+ }
+
+ r = get_pool_stats<I>(io_ctx, config, image_ids, image_count,
+ provisioned_bytes, max_provisioned_bytes,
+ snapshot_count);
+ if (r < 0) {
+ return r;
+ }
+ }
+
+ get_pool_stat_option_value<I>(
+ stat_options, RBD_POOL_STAT_OPTION_TRASH_IMAGES, &image_count);
+ get_pool_stat_option_value<I>(
+ stat_options, RBD_POOL_STAT_OPTION_TRASH_PROVISIONED_BYTES,
+ &provisioned_bytes);
+ get_pool_stat_option_value<I>(
+ stat_options, RBD_POOL_STAT_OPTION_TRASH_MAX_PROVISIONED_BYTES,
+ &max_provisioned_bytes);
+ get_pool_stat_option_value<I>(
+ stat_options, RBD_POOL_STAT_OPTION_TRASH_SNAPSHOTS, &snapshot_count);
+ if (image_count != nullptr || provisioned_bytes != nullptr ||
+ max_provisioned_bytes != nullptr || snapshot_count != nullptr) {
+
+ std::vector<std::string> image_ids;
+ image_ids.reserve(trash_entries.size());
+ for (auto& it : trash_entries) {
+ if (it.source == RBD_TRASH_IMAGE_SOURCE_REMOVING) {
+ continue;
+ }
+ image_ids.push_back(std::move(it.id));
+ }
+
+ r = get_pool_stats<I>(io_ctx, config, image_ids, image_count,
+ provisioned_bytes, max_provisioned_bytes,
+ snapshot_count);
+ if (r < 0) {
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+} // namespace api
+} // namespace librbd
+
+template class librbd::api::Pool<librbd::ImageCtx>;
diff --git a/src/librbd/api/Pool.h b/src/librbd/api/Pool.h
new file mode 100644
index 00000000..7b607ab6
--- /dev/null
+++ b/src/librbd/api/Pool.h
@@ -0,0 +1,38 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_API_POOL_H
+#define CEPH_LIBRBD_API_POOL_H
+
+#include "include/int_types.h"
+#include "include/rados/librados_fwd.hpp"
+#include "include/rbd/librbd.h"
+#include <map>
+
+namespace librbd {
+
+struct ImageCtx;
+
+namespace api {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class Pool {
+public:
+ typedef std::map<rbd_pool_stat_option_t, uint64_t*> StatOptions;
+
+ static int init(librados::IoCtx& io_ctx, bool force);
+
+ static int add_stat_option(StatOptions* stat_options,
+ rbd_pool_stat_option_t option,
+ uint64_t* value);
+
+ static int get_stats(librados::IoCtx& io_ctx, StatOptions* stat_options);
+
+};
+
+} // namespace api
+} // namespace librbd
+
+extern template class librbd::api::Pool<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_API_POOL_H
diff --git a/src/librbd/api/PoolMetadata.cc b/src/librbd/api/PoolMetadata.cc
new file mode 100644
index 00000000..f3d53944
--- /dev/null
+++ b/src/librbd/api/PoolMetadata.cc
@@ -0,0 +1,151 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/api/PoolMetadata.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "librbd/Utils.h"
+#include "librbd/api/Config.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::PoolMetadata: " << __func__ << ": "
+
+namespace librbd {
+namespace api {
+
+namespace {
+
+void update_pool_timestamp(librados::IoCtx& io_ctx) {
+ CephContext *cct = (CephContext *)io_ctx.cct();
+
+ auto now = ceph_clock_now();
+ std::string cmd =
+ R"({)"
+ R"("prefix": "config set", )"
+ R"("who": "global", )"
+ R"("name": "rbd_config_pool_override_update_timestamp", )"
+ R"("value": ")" + stringify(now.sec()) + R"(")"
+ R"(})";
+
+ librados::Rados rados(io_ctx);
+ bufferlist in_bl;
+ std::string ss;
+ int r = rados.mon_command(cmd, in_bl, nullptr, &ss);
+ if (r < 0) {
+ lderr(cct) << "failed to notify clients of pool config update: "
+ << cpp_strerror(r) << dendl;
+ }
+}
+
+} // anonymous namespace
+
+template <typename I>
+int PoolMetadata<I>::get(librados::IoCtx& io_ctx,
+ const std::string &key, std::string *value) {
+ CephContext *cct = (CephContext *)io_ctx.cct();
+
+ int r = cls_client::metadata_get(&io_ctx, RBD_INFO, key, value);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "failed reading metadata " << key << ": " << cpp_strerror(r)
+ << dendl;
+ }
+
+ return r;
+}
+
+template <typename I>
+int PoolMetadata<I>::set(librados::IoCtx& io_ctx, const std::string &key,
+ const std::string &value) {
+ CephContext *cct = (CephContext *)io_ctx.cct();
+
+ bool need_update_pool_timestamp = false;
+
+ std::string config_key;
+ if (util::is_metadata_config_override(key, &config_key)) {
+ if (!librbd::api::Config<I>::is_option_name(io_ctx, config_key)) {
+ lderr(cct) << "validation for " << key
+ << " failed: not allowed pool level override" << dendl;
+ return -EINVAL;
+ }
+ int r = ConfigProxy{false}.set_val(config_key.c_str(), value);
+ if (r < 0) {
+ lderr(cct) << "validation for " << key << " failed: " << cpp_strerror(r)
+ << dendl;
+ return -EINVAL;
+ }
+
+ need_update_pool_timestamp = true;
+ }
+
+ ceph::bufferlist bl;
+ bl.append(value);
+
+ int r = cls_client::metadata_set(&io_ctx, RBD_INFO, {{key, bl}});
+ if (r < 0) {
+ lderr(cct) << "failed setting metadata " << key << ": " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ if (need_update_pool_timestamp) {
+ update_pool_timestamp(io_ctx);
+ }
+
+ return 0;
+}
+
+template <typename I>
+int PoolMetadata<I>::remove(librados::IoCtx& io_ctx, const std::string &key) {
+ CephContext *cct = (CephContext *)io_ctx.cct();
+
+ std::string value;
+ int r = cls_client::metadata_get(&io_ctx, RBD_INFO, key, &value);
+ if (r < 0) {
+ if (r == -ENOENT) {
+ ldout(cct, 1) << "metadata " << key << " does not exist" << dendl;
+ } else {
+ lderr(cct) << "failed reading metadata " << key << ": " << cpp_strerror(r)
+ << dendl;
+ }
+ return r;
+ }
+
+ r = cls_client::metadata_remove(&io_ctx, RBD_INFO, key);
+ if (r < 0) {
+ lderr(cct) << "failed removing metadata " << key << ": " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ std::string config_key;
+ if (util::is_metadata_config_override(key, &config_key)) {
+ update_pool_timestamp(io_ctx);
+ }
+
+ return 0;
+}
+
+template <typename I>
+int PoolMetadata<I>::list(librados::IoCtx& io_ctx, const std::string &start,
+ uint64_t max,
+ std::map<std::string, ceph::bufferlist> *pairs) {
+ CephContext *cct = (CephContext *)io_ctx.cct();
+
+ int r = cls_client::metadata_list(&io_ctx, RBD_INFO, start, max, pairs);
+ if (r == -ENOENT) {
+ r = 0;
+ } else if (r < 0) {
+ lderr(cct) << "failed listing metadata: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+} // namespace api
+} // namespace librbd
+
+template class librbd::api::PoolMetadata<librbd::ImageCtx>;
diff --git a/src/librbd/api/PoolMetadata.h b/src/librbd/api/PoolMetadata.h
new file mode 100644
index 00000000..c0a81735
--- /dev/null
+++ b/src/librbd/api/PoolMetadata.h
@@ -0,0 +1,36 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_API_POOL_METADATA_H
+#define CEPH_LIBRBD_API_POOL_METADATA_H
+
+#include "include/buffer_fwd.h"
+#include "include/rados/librados_fwd.hpp"
+
+#include <map>
+#include <string>
+
+namespace librbd {
+
+class ImageCtx;
+
+namespace api {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class PoolMetadata {
+public:
+ static int get(librados::IoCtx& io_ctx, const std::string &key,
+ std::string *value);
+ static int set(librados::IoCtx& io_ctx, const std::string &key,
+ const std::string &value);
+ static int remove(librados::IoCtx& io_ctx, const std::string &key);
+ static int list(librados::IoCtx& io_ctx, const std::string &start,
+ uint64_t max, std::map<std::string, ceph::bufferlist> *pairs);
+};
+
+} // namespace api
+} // namespace librbd
+
+extern template class librbd::api::PoolMetadata<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_API_POOL_METADATA_H
diff --git a/src/librbd/api/Snapshot.cc b/src/librbd/api/Snapshot.cc
new file mode 100644
index 00000000..3d29cfb2
--- /dev/null
+++ b/src/librbd/api/Snapshot.cc
@@ -0,0 +1,196 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/api/Snapshot.h"
+#include "cls/rbd/cls_rbd_types.h"
+#include "common/errno.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/Operations.h"
+#include "librbd/Utils.h"
+#include <boost/variant.hpp>
+#include "include/Context.h"
+#include "common/Cond.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::api::Snapshot: " << __func__ << ": "
+
+namespace librbd {
+namespace api {
+
+namespace {
+
+class GetGroupVisitor : public boost::static_visitor<int> {
+public:
+ CephContext* cct;
+ librados::IoCtx *image_ioctx;
+ snap_group_namespace_t *group_snap;
+
+ explicit GetGroupVisitor(CephContext* cct, librados::IoCtx *_image_ioctx,
+ snap_group_namespace_t *group_snap)
+ : cct(cct), image_ioctx(_image_ioctx), group_snap(group_snap) {};
+
+ template <typename T>
+ inline int operator()(const T&) const {
+ // ignore other than GroupSnapshotNamespace types.
+ return -EINVAL;
+ }
+
+ inline int operator()(
+ const cls::rbd::GroupSnapshotNamespace& snap_namespace) {
+ IoCtx group_ioctx;
+ int r = util::create_ioctx(*image_ioctx, "group", snap_namespace.group_pool,
+ {}, &group_ioctx);
+ if (r < 0) {
+ return r;
+ }
+
+ cls::rbd::GroupSnapshot group_snapshot;
+
+ std::string group_name;
+ r = cls_client::dir_get_name(&group_ioctx, RBD_GROUP_DIRECTORY,
+ snap_namespace.group_id, &group_name);
+ if (r < 0) {
+ lderr(cct) << "failed to retrieve group name: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ string group_header_oid = util::group_header_name(snap_namespace.group_id);
+ r = cls_client::group_snap_get_by_id(&group_ioctx,
+ group_header_oid,
+ snap_namespace.group_snapshot_id,
+ &group_snapshot);
+ if (r < 0) {
+ lderr(cct) << "failed to retrieve group snapshot: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ group_snap->group_pool = group_ioctx.get_id();
+ group_snap->group_name = group_name;
+ group_snap->group_snap_name = group_snapshot.name;
+ return 0;
+ }
+};
+
+class GetTrashVisitor : public boost::static_visitor<int> {
+public:
+ std::string* original_name;
+
+ explicit GetTrashVisitor(std::string* original_name)
+ : original_name(original_name) {
+ }
+
+ template <typename T>
+ inline int operator()(const T&) const {
+ return -EINVAL;
+ }
+
+ inline int operator()(
+ const cls::rbd::TrashSnapshotNamespace& snap_namespace) {
+ *original_name = snap_namespace.original_name;
+ return 0;
+ }
+};
+
+} // anonymous namespace
+
+template <typename I>
+int Snapshot<I>::get_group_namespace(I *ictx, uint64_t snap_id,
+ snap_group_namespace_t *group_snap) {
+ int r = ictx->state->refresh_if_required();
+ if (r < 0) {
+ return r;
+ }
+
+ RWLock::RLocker snap_locker(ictx->snap_lock);
+ auto snap_info = ictx->get_snap_info(snap_id);
+ if (snap_info == nullptr) {
+ return -ENOENT;
+ }
+
+ GetGroupVisitor ggv = GetGroupVisitor(ictx->cct, &ictx->md_ctx, group_snap);
+ r = boost::apply_visitor(ggv, snap_info->snap_namespace);
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Snapshot<I>::get_trash_namespace(I *ictx, uint64_t snap_id,
+ std::string* original_name) {
+ int r = ictx->state->refresh_if_required();
+ if (r < 0) {
+ return r;
+ }
+
+ RWLock::RLocker snap_locker(ictx->snap_lock);
+ auto snap_info = ictx->get_snap_info(snap_id);
+ if (snap_info == nullptr) {
+ return -ENOENT;
+ }
+
+ auto visitor = GetTrashVisitor(original_name);
+ r = boost::apply_visitor(visitor, snap_info->snap_namespace);
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Snapshot<I>::get_namespace_type(I *ictx, uint64_t snap_id,
+ snap_namespace_type_t *namespace_type) {
+ int r = ictx->state->refresh_if_required();
+ if (r < 0) {
+ return r;
+ }
+
+ RWLock::RLocker l(ictx->snap_lock);
+ auto snap_info = ictx->get_snap_info(snap_id);
+ if (snap_info == nullptr) {
+ return -ENOENT;
+ }
+
+ *namespace_type = static_cast<snap_namespace_type_t>(
+ cls::rbd::get_snap_namespace_type(snap_info->snap_namespace));
+ return 0;
+}
+
+template <typename I>
+int Snapshot<I>::remove(I *ictx, uint64_t snap_id) {
+ ldout(ictx->cct, 20) << "snap_remove " << ictx << " " << snap_id << dendl;
+
+ int r = ictx->state->refresh_if_required();
+ if (r < 0) {
+ return r;
+ }
+
+ cls::rbd::SnapshotNamespace snapshot_namespace;
+ std::string snapshot_name;
+ {
+ RWLock::RLocker snap_locker(ictx->snap_lock);
+ auto it = ictx->snap_info.find(snap_id);
+ if (it == ictx->snap_info.end()) {
+ return -ENOENT;
+ }
+
+ snapshot_namespace = it->second.snap_namespace;
+ snapshot_name = it->second.name;
+ }
+
+ C_SaferCond ctx;
+ ictx->operations->snap_remove(snapshot_namespace, snapshot_name, &ctx);
+ r = ctx.wait();
+ return r;
+}
+
+} // namespace api
+} // namespace librbd
+
+template class librbd::api::Snapshot<librbd::ImageCtx>;
diff --git a/src/librbd/api/Snapshot.h b/src/librbd/api/Snapshot.h
new file mode 100644
index 00000000..453861c4
--- /dev/null
+++ b/src/librbd/api/Snapshot.h
@@ -0,0 +1,37 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_API_SNAPSHOT_H
+#define CEPH_LIBRBD_API_SNAPSHOT_H
+
+#include "include/rbd/librbd.hpp"
+#include <string>
+
+namespace librbd {
+
+struct ImageCtx;
+
+namespace api {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+struct Snapshot {
+
+ static int get_group_namespace(ImageCtxT *ictx, uint64_t snap_id,
+ snap_group_namespace_t *group_snap);
+
+ static int get_trash_namespace(ImageCtxT *ictx, uint64_t snap_id,
+ std::string *original_name);
+
+ static int get_namespace_type(ImageCtxT *ictx, uint64_t snap_id,
+ snap_namespace_type_t *namespace_type);
+
+ static int remove(ImageCtxT *ictx, uint64_t snap_id);
+
+};
+
+} // namespace api
+} // namespace librbd
+
+extern template class librbd::api::Snapshot<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_API_SNAPSHOT_H
diff --git a/src/librbd/api/Trash.cc b/src/librbd/api/Trash.cc
new file mode 100644
index 00000000..0664461e
--- /dev/null
+++ b/src/librbd/api/Trash.cc
@@ -0,0 +1,681 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/api/Trash.h"
+#include "include/rados/librados.hpp"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "common/Cond.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "librbd/ExclusiveLock.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/internal.h"
+#include "librbd/Operations.h"
+#include "librbd/TrashWatcher.h"
+#include "librbd/Utils.h"
+#include "librbd/api/DiffIterate.h"
+#include "librbd/exclusive_lock/Policy.h"
+#include "librbd/image/RemoveRequest.h"
+#include "librbd/mirror/DisableRequest.h"
+#include "librbd/mirror/EnableRequest.h"
+#include "librbd/trash/MoveRequest.h"
+#include "librbd/trash/RemoveRequest.h"
+#include <json_spirit/json_spirit.h>
+#include "librbd/journal/DisabledPolicy.h"
+#include "librbd/image/ListWatchersRequest.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::api::Trash: " << __func__ << ": "
+
+namespace librbd {
+namespace api {
+
+template <typename I>
+const typename Trash<I>::TrashImageSources Trash<I>::RESTORE_SOURCE_WHITELIST {
+ cls::rbd::TRASH_IMAGE_SOURCE_USER,
+ cls::rbd::TRASH_IMAGE_SOURCE_MIRRORING
+ };
+
+namespace {
+
+template <typename I>
+int disable_mirroring(I *ictx) {
+ if (!ictx->test_features(RBD_FEATURE_JOURNALING)) {
+ return 0;
+ }
+
+ cls::rbd::MirrorImage mirror_image;
+ int r = cls_client::mirror_image_get(&ictx->md_ctx, ictx->id, &mirror_image);
+ if (r == -ENOENT) {
+ ldout(ictx->cct, 10) << "mirroring is not enabled for this image" << dendl;
+ return 0;
+ }
+
+ if (r < 0) {
+ lderr(ictx->cct) << "failed to retrieve mirror image: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ ldout(ictx->cct, 10) << dendl;
+
+ C_SaferCond ctx;
+ auto req = mirror::DisableRequest<I>::create(ictx, false, true, &ctx);
+ req->send();
+ r = ctx.wait();
+ if (r < 0) {
+ lderr(ictx->cct) << "failed to disable mirroring: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int enable_mirroring(IoCtx &io_ctx, const std::string &image_id) {
+ auto cct = reinterpret_cast<CephContext*>(io_ctx.cct());
+
+ uint64_t features;
+ uint64_t incompatible_features;
+ int r = cls_client::get_features(&io_ctx, util::header_name(image_id), true,
+ &features, &incompatible_features);
+ if (r < 0) {
+ lderr(cct) << "failed to retrieve features: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ if ((features & RBD_FEATURE_JOURNALING) == 0) {
+ return 0;
+ }
+
+ cls::rbd::MirrorMode mirror_mode;
+ r = cls_client::mirror_mode_get(&io_ctx, &mirror_mode);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "failed to retrieve mirror mode: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ if (mirror_mode != cls::rbd::MIRROR_MODE_POOL) {
+ ldout(cct, 10) << "not pool mirroring mode" << dendl;
+ return 0;
+ }
+
+ ldout(cct, 10) << dendl;
+
+ ThreadPool *thread_pool;
+ ContextWQ *op_work_queue;
+ ImageCtx::get_thread_pool_instance(cct, &thread_pool, &op_work_queue);
+ C_SaferCond ctx;
+ auto req = mirror::EnableRequest<I>::create(io_ctx, image_id, "",
+ op_work_queue, &ctx);
+ req->send();
+ r = ctx.wait();
+ if (r < 0) {
+ lderr(cct) << "failed to enable mirroring: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+} // anonymous namespace
+
+template <typename I>
+int Trash<I>::move(librados::IoCtx &io_ctx, rbd_trash_image_source_t source,
+ const std::string &image_name, const std::string &image_id,
+ uint64_t delay) {
+ ceph_assert(!image_name.empty() && !image_id.empty());
+ CephContext *cct((CephContext *)io_ctx.cct());
+ ldout(cct, 20) << &io_ctx << " name=" << image_name << ", id=" << image_id
+ << dendl;
+
+ auto ictx = new I("", image_id, nullptr, io_ctx, false);
+ int r = ictx->state->open(OPEN_FLAG_SKIP_OPEN_PARENT);
+
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "failed to open image: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ if (r == 0) {
+ if (ictx->test_features(RBD_FEATURE_JOURNALING)) {
+ RWLock::WLocker snap_locker(ictx->snap_lock);
+ ictx->set_journal_policy(new journal::DisabledPolicy());
+ }
+
+ ictx->owner_lock.get_read();
+ if (ictx->exclusive_lock != nullptr) {
+ ictx->exclusive_lock->block_requests(0);
+
+ r = ictx->operations->prepare_image_update(
+ exclusive_lock::OPERATION_REQUEST_TYPE_GENERAL, false);
+ if (r < 0) {
+ lderr(cct) << "cannot obtain exclusive lock - not removing" << dendl;
+ ictx->owner_lock.put_read();
+ ictx->state->close();
+ return -EBUSY;
+ }
+ }
+ ictx->owner_lock.put_read();
+
+ ictx->snap_lock.get_read();
+ if (!ictx->migration_info.empty()) {
+ lderr(cct) << "cannot move migrating image to trash" << dendl;
+ ictx->snap_lock.put_read();
+ ictx->state->close();
+ return -EBUSY;
+ }
+ ictx->snap_lock.put_read();
+
+ r = disable_mirroring<I>(ictx);
+ if (r < 0) {
+ ictx->state->close();
+ return r;
+ }
+
+ ictx->state->close();
+ }
+
+ utime_t delete_time{ceph_clock_now()};
+ utime_t deferment_end_time{delete_time};
+ deferment_end_time += delay;
+ cls::rbd::TrashImageSpec trash_image_spec{
+ static_cast<cls::rbd::TrashImageSource>(source), image_name,
+ delete_time, deferment_end_time};
+
+ trash_image_spec.state = cls::rbd::TRASH_IMAGE_STATE_MOVING;
+ C_SaferCond ctx;
+ auto req = trash::MoveRequest<I>::create(io_ctx, image_id, trash_image_spec,
+ &ctx);
+ req->send();
+
+ r = ctx.wait();
+ trash_image_spec.state = cls::rbd::TRASH_IMAGE_STATE_NORMAL;
+ int ret = cls_client::trash_state_set(&io_ctx, image_id,
+ trash_image_spec.state,
+ cls::rbd::TRASH_IMAGE_STATE_MOVING);
+ if (ret < 0 && ret != -EOPNOTSUPP) {
+ lderr(cct) << "error setting trash image state: "
+ << cpp_strerror(ret) << dendl;
+ return ret;
+ }
+ if (r < 0) {
+ return r;
+ }
+
+ C_SaferCond notify_ctx;
+ TrashWatcher<I>::notify_image_added(io_ctx, image_id, trash_image_spec,
+ &notify_ctx);
+ r = notify_ctx.wait();
+ if (r < 0) {
+ lderr(cct) << "failed to send update notification: " << cpp_strerror(r)
+ << dendl;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Trash<I>::move(librados::IoCtx &io_ctx, rbd_trash_image_source_t source,
+ const std::string &image_name, uint64_t delay) {
+ CephContext *cct((CephContext *)io_ctx.cct());
+ ldout(cct, 20) << &io_ctx << " name=" << image_name << dendl;
+
+ // try to get image id from the directory
+ std::string image_id;
+ int r = cls_client::dir_get_id(&io_ctx, RBD_DIRECTORY, image_name,
+ &image_id);
+ if (r == -ENOENT) {
+ r = io_ctx.stat(util::old_header_name(image_name), nullptr, nullptr);
+ if (r == 0) {
+ // cannot move V1 image to trash
+ ldout(cct, 10) << "cannot move v1 image to trash" << dendl;
+ return -EOPNOTSUPP;
+ }
+
+ // image doesn't exist -- perhaps already in the trash since removing
+ // from the directory is the last step
+ return -ENOENT;
+ } else if (r < 0) {
+ lderr(cct) << "failed to retrieve image id: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ ceph_assert(!image_name.empty() && !image_id.empty());
+ return Trash<I>::move(io_ctx, source, image_name, image_id, delay);
+}
+
+template <typename I>
+int Trash<I>::get(IoCtx &io_ctx, const std::string &id,
+ trash_image_info_t *info) {
+ CephContext *cct((CephContext *)io_ctx.cct());
+ ldout(cct, 20) << __func__ << " " << &io_ctx << dendl;
+
+ cls::rbd::TrashImageSpec spec;
+ int r = cls_client::trash_get(&io_ctx, id, &spec);
+ if (r == -ENOENT) {
+ return r;
+ } else if (r < 0) {
+ lderr(cct) << "error retrieving trash entry: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ rbd_trash_image_source_t source = static_cast<rbd_trash_image_source_t>(
+ spec.source);
+ *info = trash_image_info_t{id, spec.name, source, spec.deletion_time.sec(),
+ spec.deferment_end_time.sec()};
+ return 0;
+}
+
+template <typename I>
+int Trash<I>::list(IoCtx &io_ctx, vector<trash_image_info_t> &entries,
+ bool exclude_user_remove_source) {
+ CephContext *cct((CephContext *)io_ctx.cct());
+ ldout(cct, 20) << "trash_list " << &io_ctx << dendl;
+
+ bool more_entries;
+ uint32_t max_read = 1024;
+ std::string last_read = "";
+ do {
+ map<string, cls::rbd::TrashImageSpec> trash_entries;
+ int r = cls_client::trash_list(&io_ctx, last_read, max_read,
+ &trash_entries);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "error listing rbd trash entries: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ } else if (r == -ENOENT) {
+ break;
+ }
+
+ if (trash_entries.empty()) {
+ break;
+ }
+
+ for (const auto &entry : trash_entries) {
+ rbd_trash_image_source_t source =
+ static_cast<rbd_trash_image_source_t>(entry.second.source);
+ if (exclude_user_remove_source &&
+ source == RBD_TRASH_IMAGE_SOURCE_REMOVING) {
+ continue;
+ }
+ entries.push_back({entry.first, entry.second.name, source,
+ entry.second.deletion_time.sec(),
+ entry.second.deferment_end_time.sec()});
+ }
+ last_read = trash_entries.rbegin()->first;
+ more_entries = (trash_entries.size() >= max_read);
+ } while (more_entries);
+
+ return 0;
+}
+
+template <typename I>
+int Trash<I>::purge(IoCtx& io_ctx, time_t expire_ts,
+ float threshold, ProgressContext& pctx) {
+ auto *cct((CephContext *) io_ctx.cct());
+ ldout(cct, 20) << &io_ctx << dendl;
+
+ std::vector<librbd::trash_image_info_t> trash_entries;
+ int r = librbd::api::Trash<I>::list(io_ctx, trash_entries, true);
+ if (r < 0) {
+ return r;
+ }
+
+ trash_entries.erase(
+ std::remove_if(trash_entries.begin(), trash_entries.end(),
+ [](librbd::trash_image_info_t info) {
+ return info.source != RBD_TRASH_IMAGE_SOURCE_USER;
+ }),
+ trash_entries.end());
+
+ std::set<std::string> to_be_removed;
+ if (threshold != -1) {
+ if (threshold < 0 || threshold > 1) {
+ lderr(cct) << "argument 'threshold' is out of valid range"
+ << dendl;
+ return -EINVAL;
+ }
+
+ librados::bufferlist inbl;
+ librados::bufferlist outbl;
+ std::string pool_name = io_ctx.get_pool_name();
+
+ librados::Rados rados(io_ctx);
+ rados.mon_command(R"({"prefix": "df", "format": "json"})", inbl,
+ &outbl, nullptr);
+
+ json_spirit::mValue json;
+ if (!json_spirit::read(outbl.to_str(), json)) {
+ lderr(cct) << "ceph df json output could not be parsed"
+ << dendl;
+ return -EBADMSG;
+ }
+
+ json_spirit::mArray arr = json.get_obj()["pools"].get_array();
+
+ double pool_percent_used = 0;
+ uint64_t pool_total_bytes = 0;
+
+ std::map<std::string, std::vector<std::string>> datapools;
+
+ std::sort(trash_entries.begin(), trash_entries.end(),
+ [](librbd::trash_image_info_t a, librbd::trash_image_info_t b) {
+ return a.deferment_end_time < b.deferment_end_time;
+ }
+ );
+
+ for (const auto &entry : trash_entries) {
+ int64_t data_pool_id = -1;
+ r = cls_client::get_data_pool(&io_ctx, util::header_name(entry.id),
+ &data_pool_id);
+ if (r < 0 && r != -ENOENT && r != -EOPNOTSUPP) {
+ lderr(cct) << "failed to query data pool: " << cpp_strerror(r) << dendl;
+ return r;
+ } else if (data_pool_id == -1) {
+ data_pool_id = io_ctx.get_id();
+ }
+
+ if (data_pool_id != io_ctx.get_id()) {
+ librados::IoCtx data_io_ctx;
+ r = util::create_ioctx(io_ctx, "image", data_pool_id,
+ {}, &data_io_ctx);
+ if (r < 0) {
+ lderr(cct) << "error accessing data pool" << dendl;
+ continue;
+ }
+ auto data_pool = data_io_ctx.get_pool_name();
+ datapools[data_pool].push_back(entry.id);
+ } else {
+ datapools[pool_name].push_back(entry.id);
+ }
+ }
+
+ uint64_t bytes_to_free = 0;
+
+ for (uint8_t i = 0; i < arr.size(); ++i) {
+ json_spirit::mObject obj = arr[i].get_obj();
+ std::string name = obj.find("name")->second.get_str();
+ auto img = datapools.find(name);
+ if (img != datapools.end()) {
+ json_spirit::mObject stats = arr[i].get_obj()["stats"].get_obj();
+ pool_percent_used = stats["percent_used"].get_real();
+ if (pool_percent_used <= threshold) continue;
+
+ bytes_to_free = 0;
+
+ pool_total_bytes = stats["max_avail"].get_uint64() +
+ stats["bytes_used"].get_uint64();
+
+ auto bytes_threshold = (uint64_t) (pool_total_bytes *
+ (pool_percent_used - threshold));
+
+ for (const auto &it : img->second) {
+ auto ictx = new I("", it, nullptr, io_ctx, false);
+ r = ictx->state->open(OPEN_FLAG_SKIP_OPEN_PARENT);
+ if (r == -ENOENT) {
+ continue;
+ } else if (r < 0) {
+ lderr(cct) << "failed to open image " << it << ": "
+ << cpp_strerror(r) << dendl;
+ }
+
+ r = librbd::api::DiffIterate<I>::diff_iterate(
+ ictx, cls::rbd::UserSnapshotNamespace(), nullptr, 0, ictx->size,
+ false, true,
+ [](uint64_t offset, size_t len, int exists, void *arg) {
+ auto *to_free = reinterpret_cast<uint64_t *>(arg);
+ if (exists)
+ (*to_free) += len;
+ return 0;
+ }, &bytes_to_free);
+
+ ictx->state->close();
+ if (r < 0) {
+ lderr(cct) << "failed to calculate disk usage for image " << it
+ << ": " << cpp_strerror(r) << dendl;
+ continue;
+ }
+
+ to_be_removed.insert(it);
+ if (bytes_to_free >= bytes_threshold) {
+ break;
+ }
+ }
+ }
+ }
+
+ if (bytes_to_free == 0) {
+ ldout(cct, 10) << "pool usage is lower than or equal to "
+ << (threshold * 100)
+ << "%" << dendl;
+ return 0;
+ }
+ }
+
+ if (expire_ts == 0) {
+ struct timespec now;
+ clock_gettime(CLOCK_REALTIME, &now);
+ expire_ts = now.tv_sec;
+ }
+
+ for (const auto &entry : trash_entries) {
+ if (expire_ts >= entry.deferment_end_time) {
+ to_be_removed.insert(entry.id);
+ }
+ }
+
+ NoOpProgressContext remove_pctx;
+ uint64_t list_size = to_be_removed.size(), i = 0;
+ for (const auto &entry_id : to_be_removed) {
+ r = librbd::api::Trash<I>::remove(io_ctx, entry_id, true, remove_pctx);
+ if (r < 0) {
+ if (r == -ENOTEMPTY) {
+ ldout(cct, 5) << "image has snapshots - these must be deleted "
+ << "with 'rbd snap purge' before the image can be "
+ << "removed." << dendl;
+ } else if (r == -EBUSY) {
+ ldout(cct, 5) << "error: image still has watchers" << std::endl
+ << "This means the image is still open or the client "
+ << "using it crashed. Try again after closing/unmapping "
+ << "it or waiting 30s for the crashed client to timeout."
+ << dendl;
+ } else if (r == -EMLINK) {
+ ldout(cct, 5) << "Remove the image from the group and try again."
+ << dendl;
+ } else {
+ lderr(cct) << "remove error: " << cpp_strerror(r) << dendl;
+ }
+ return r;
+ }
+ pctx.update_progress(++i, list_size);
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Trash<I>::remove(IoCtx &io_ctx, const std::string &image_id, bool force,
+ ProgressContext& prog_ctx) {
+ CephContext *cct((CephContext *)io_ctx.cct());
+ ldout(cct, 20) << "trash_remove " << &io_ctx << " " << image_id
+ << " " << force << dendl;
+
+ cls::rbd::TrashImageSpec trash_spec;
+ int r = cls_client::trash_get(&io_ctx, image_id, &trash_spec);
+ if (r < 0) {
+ lderr(cct) << "error getting image id " << image_id
+ << " info from trash: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ utime_t now = ceph_clock_now();
+ if (now < trash_spec.deferment_end_time && !force) {
+ lderr(cct) << "error: deferment time has not expired." << dendl;
+ return -EPERM;
+ }
+ if (trash_spec.state != cls::rbd::TRASH_IMAGE_STATE_NORMAL &&
+ trash_spec.state != cls::rbd::TRASH_IMAGE_STATE_REMOVING) {
+ lderr(cct) << "error: image is pending restoration." << dendl;
+ return -EBUSY;
+ }
+
+ ThreadPool *thread_pool;
+ ContextWQ *op_work_queue;
+ ImageCtx::get_thread_pool_instance(cct, &thread_pool, &op_work_queue);
+
+ C_SaferCond cond;
+ auto req = librbd::trash::RemoveRequest<I>::create(
+ io_ctx, image_id, op_work_queue, force, prog_ctx, &cond);
+ req->send();
+
+ r = cond.wait();
+ if (r < 0) {
+ return r;
+ }
+
+ C_SaferCond notify_ctx;
+ TrashWatcher<I>::notify_image_removed(io_ctx, image_id, &notify_ctx);
+ r = notify_ctx.wait();
+ if (r < 0) {
+ lderr(cct) << "failed to send update notification: " << cpp_strerror(r)
+ << dendl;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int Trash<I>::restore(librados::IoCtx &io_ctx,
+ const TrashImageSources& trash_image_sources,
+ const std::string &image_id,
+ const std::string &image_new_name) {
+ CephContext *cct((CephContext *)io_ctx.cct());
+ ldout(cct, 20) << "trash_restore " << &io_ctx << " " << image_id << " "
+ << image_new_name << dendl;
+
+ cls::rbd::TrashImageSpec trash_spec;
+ int r = cls_client::trash_get(&io_ctx, image_id, &trash_spec);
+ if (r < 0) {
+ lderr(cct) << "error getting image id " << image_id
+ << " info from trash: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ if (trash_image_sources.count(trash_spec.source) == 0) {
+ lderr(cct) << "Current trash source '" << trash_spec.source << "' "
+ << "does not match expected: "
+ << trash_image_sources << dendl;
+ return -EINVAL;
+ }
+
+ std::string image_name = image_new_name;
+ if (trash_spec.state != cls::rbd::TRASH_IMAGE_STATE_NORMAL &&
+ trash_spec.state != cls::rbd::TRASH_IMAGE_STATE_RESTORING) {
+ lderr(cct) << "error restoring image id " << image_id
+ << ", which is pending deletion" << dendl;
+ return -EBUSY;
+ }
+ r = cls_client::trash_state_set(&io_ctx, image_id,
+ cls::rbd::TRASH_IMAGE_STATE_RESTORING,
+ cls::rbd::TRASH_IMAGE_STATE_NORMAL);
+ if (r < 0 && r != -EOPNOTSUPP) {
+ lderr(cct) << "error setting trash image state: "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ if (image_name.empty()) {
+ // if user didn't specify a new name, let's try using the old name
+ image_name = trash_spec.name;
+ ldout(cct, 20) << "restoring image id " << image_id << " with name "
+ << image_name << dendl;
+ }
+
+ // check if no image exists with the same name
+ bool create_id_obj = true;
+ std::string existing_id;
+ r = cls_client::get_id(&io_ctx, util::id_obj_name(image_name), &existing_id);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "error checking if image " << image_name << " exists: "
+ << cpp_strerror(r) << dendl;
+ int ret = cls_client::trash_state_set(&io_ctx, image_id,
+ cls::rbd::TRASH_IMAGE_STATE_NORMAL,
+ cls::rbd::TRASH_IMAGE_STATE_RESTORING);
+ if (ret < 0 && ret != -EOPNOTSUPP) {
+ lderr(cct) << "error setting trash image state: "
+ << cpp_strerror(ret) << dendl;
+ }
+ return r;
+ } else if (r != -ENOENT){
+ // checking if we are recovering from an incomplete restore
+ if (existing_id != image_id) {
+ ldout(cct, 2) << "an image with the same name already exists" << dendl;
+ int r2 = cls_client::trash_state_set(&io_ctx, image_id,
+ cls::rbd::TRASH_IMAGE_STATE_NORMAL,
+ cls::rbd::TRASH_IMAGE_STATE_RESTORING);
+ if (r2 < 0 && r2 != -EOPNOTSUPP) {
+ lderr(cct) << "error setting trash image state: "
+ << cpp_strerror(r2) << dendl;
+ }
+ return -EEXIST;
+ }
+ create_id_obj = false;
+ }
+
+ if (create_id_obj) {
+ ldout(cct, 2) << "adding id object" << dendl;
+ librados::ObjectWriteOperation op;
+ op.create(true);
+ cls_client::set_id(&op, image_id);
+ r = io_ctx.operate(util::id_obj_name(image_name), &op);
+ if (r < 0) {
+ lderr(cct) << "error adding id object for image " << image_name
+ << ": " << cpp_strerror(r) << dendl;
+ return r;
+ }
+ }
+
+ ldout(cct, 2) << "adding rbd image to v2 directory..." << dendl;
+ r = cls_client::dir_add_image(&io_ctx, RBD_DIRECTORY, image_name,
+ image_id);
+ if (r < 0 && r != -EEXIST) {
+ lderr(cct) << "error adding image to v2 directory: "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ r = enable_mirroring<I>(io_ctx, image_id);
+ if (r < 0) {
+ // not fatal -- ignore
+ }
+
+ ldout(cct, 2) << "removing image from trash..." << dendl;
+ r = cls_client::trash_remove(&io_ctx, image_id);
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "error removing image id " << image_id << " from trash: "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ C_SaferCond notify_ctx;
+ TrashWatcher<I>::notify_image_removed(io_ctx, image_id, &notify_ctx);
+ r = notify_ctx.wait();
+ if (r < 0) {
+ lderr(cct) << "failed to send update notification: " << cpp_strerror(r)
+ << dendl;
+ }
+
+ return 0;
+}
+
+} // namespace api
+} // namespace librbd
+
+template class librbd::api::Trash<librbd::ImageCtx>;
diff --git a/src/librbd/api/Trash.h b/src/librbd/api/Trash.h
new file mode 100644
index 00000000..65b6b8bc
--- /dev/null
+++ b/src/librbd/api/Trash.h
@@ -0,0 +1,53 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef LIBRBD_API_TRASH_H
+#define LIBRBD_API_TRASH_H
+
+#include "include/rados/librados_fwd.hpp"
+#include "include/rbd/librbd.hpp"
+#include "cls/rbd/cls_rbd_types.h"
+#include <set>
+#include <string>
+#include <vector>
+
+namespace librbd {
+
+class ProgressContext;
+
+struct ImageCtx;
+
+namespace api {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+struct Trash {
+ typedef std::set<cls::rbd::TrashImageSource> TrashImageSources;
+ static const TrashImageSources RESTORE_SOURCE_WHITELIST;
+
+ static int move(librados::IoCtx &io_ctx, rbd_trash_image_source_t source,
+ const std::string &image_name, uint64_t delay);
+ static int move(librados::IoCtx &io_ctx, rbd_trash_image_source_t source,
+ const std::string &image_name, const std::string &image_id,
+ uint64_t delay);
+ static int get(librados::IoCtx &io_ctx, const std::string &id,
+ trash_image_info_t *info);
+ static int list(librados::IoCtx &io_ctx,
+ std::vector<trash_image_info_t> &entries,
+ bool exclude_user_remove_source);
+ static int purge(IoCtx& io_ctx, time_t expire_ts,
+ float threshold, ProgressContext& pctx);
+ static int remove(librados::IoCtx &io_ctx, const std::string &image_id,
+ bool force, ProgressContext& prog_ctx);
+ static int restore(librados::IoCtx &io_ctx,
+ const TrashImageSources& trash_image_sources,
+ const std::string &image_id,
+ const std::string &image_new_name);
+
+};
+
+} // namespace api
+} // namespace librbd
+
+extern template class librbd::api::Trash<librbd::ImageCtx>;
+
+#endif // LIBRBD_API_TRASH_H