diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 18:24:20 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 18:24:20 +0000 |
commit | 483eb2f56657e8e7f419ab1a4fab8dce9ade8609 (patch) | |
tree | e5d88d25d870d5dedacb6bbdbe2a966086a0a5cf /src/librbd/api | |
parent | Initial commit. (diff) | |
download | ceph-483eb2f56657e8e7f419ab1a4fab8dce9ade8609.tar.xz ceph-483eb2f56657e8e7f419ab1a4fab8dce9ade8609.zip |
Adding upstream version 14.2.21.upstream/14.2.21upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | src/librbd/api/Config.cc | 240 | ||||
-rw-r--r-- | src/librbd/api/Config.h | 37 | ||||
-rw-r--r-- | src/librbd/api/DiffIterate.cc | 554 | ||||
-rw-r--r-- | src/librbd/api/DiffIterate.h | 66 | ||||
-rw-r--r-- | src/librbd/api/Group.cc | 1207 | ||||
-rw-r--r-- | src/librbd/api/Group.h | 59 | ||||
-rw-r--r-- | src/librbd/api/Image.cc | 836 | ||||
-rw-r--r-- | src/librbd/api/Image.h | 78 | ||||
-rw-r--r-- | src/librbd/api/Migration.cc | 1885 | ||||
-rw-r--r-- | src/librbd/api/Migration.h | 105 | ||||
-rw-r--r-- | src/librbd/api/Mirror.cc | 1541 | ||||
-rw-r--r-- | src/librbd/api/Mirror.h | 87 | ||||
-rw-r--r-- | src/librbd/api/Namespace.cc | 227 | ||||
-rw-r--r-- | src/librbd/api/Namespace.h | 33 | ||||
-rw-r--r-- | src/librbd/api/Pool.cc | 377 | ||||
-rw-r--r-- | src/librbd/api/Pool.h | 38 | ||||
-rw-r--r-- | src/librbd/api/PoolMetadata.cc | 151 | ||||
-rw-r--r-- | src/librbd/api/PoolMetadata.h | 36 | ||||
-rw-r--r-- | src/librbd/api/Snapshot.cc | 196 | ||||
-rw-r--r-- | src/librbd/api/Snapshot.h | 37 | ||||
-rw-r--r-- | src/librbd/api/Trash.cc | 681 | ||||
-rw-r--r-- | src/librbd/api/Trash.h | 53 |
22 files changed, 8524 insertions, 0 deletions
diff --git a/src/librbd/api/Config.cc b/src/librbd/api/Config.cc new file mode 100644 index 00000000..08be9d2c --- /dev/null +++ b/src/librbd/api/Config.cc @@ -0,0 +1,240 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/api/Config.h" +#include "cls/rbd/cls_rbd_client.h" +#include "common/dout.h" +#include "common/errno.h" +#include "librbd/ImageCtx.h" +#include "librbd/Utils.h" +#include "librbd/api/PoolMetadata.h" +#include <boost/algorithm/string/predicate.hpp> + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::Config: " << __func__ << ": " + +namespace librbd { +namespace api { + +namespace { + +const uint32_t MAX_KEYS = 64; + +typedef std::map<std::string, std::pair<std::string, config_source_t>> Parent; + +static std::set<std::string> EXCLUDE_OPTIONS { + "rbd_auto_exclusive_lock_until_manual_request", + "rbd_default_format", + "rbd_default_map_options", + "rbd_default_pool", + "rbd_discard_on_zeroed_write_same", + "rbd_op_thread_timeout", + "rbd_op_threads", + "rbd_tracing", + "rbd_validate_names", + "rbd_validate_pool", + "rbd_mirror_pool_replayers_refresh_interval", + "rbd_config_pool_override_update_timestamp" + }; +static std::set<std::string> EXCLUDE_IMAGE_OPTIONS { + "rbd_default_clone_format", + "rbd_default_data_pool", + "rbd_default_features", + "rbd_default_format", + "rbd_default_order", + "rbd_default_stripe_count", + "rbd_default_stripe_unit", + "rbd_journal_order", + "rbd_journal_pool", + "rbd_journal_splay_width" + }; + +struct Options : Parent { + librados::IoCtx m_io_ctx; + + Options(librados::IoCtx& io_ctx, bool image_apply_only_options) { + m_io_ctx.dup(io_ctx); + m_io_ctx.set_namespace(""); + + CephContext *cct = reinterpret_cast<CephContext *>(m_io_ctx.cct()); + + const std::string rbd_key_prefix("rbd_"); + const std::string rbd_mirror_key_prefix("rbd_mirror_"); + auto& schema = cct->_conf.get_schema(); + for (auto& pair : schema) { + if (!boost::starts_with(pair.first, rbd_key_prefix)) { + continue; + } else if (EXCLUDE_OPTIONS.count(pair.first) != 0) { + continue; + } else if (image_apply_only_options && + EXCLUDE_IMAGE_OPTIONS.count(pair.first) != 0) { + continue; + } else if (image_apply_only_options && + boost::starts_with(pair.first, rbd_mirror_key_prefix)) { + continue; + } + + insert({pair.first, {}}); + } + } + + int init() { + CephContext *cct = (CephContext *)m_io_ctx.cct(); + + for (auto &it : *this) { + int r = cct->_conf.get_val(it.first.c_str(), &it.second.first); + ceph_assert(r == 0); + it.second.second = RBD_CONFIG_SOURCE_CONFIG; + } + + std::string last_key = ImageCtx::METADATA_CONF_PREFIX; + bool more_results = true; + + while (more_results) { + std::map<std::string, bufferlist> pairs; + + int r = librbd::api::PoolMetadata<>::list(m_io_ctx, last_key, MAX_KEYS, + &pairs); + if (r < 0) { + return r; + } + + if (pairs.empty()) { + break; + } + + more_results = (pairs.size() == MAX_KEYS); + last_key = pairs.rbegin()->first; + + for (auto kv : pairs) { + std::string key; + if (!util::is_metadata_config_override(kv.first, &key)) { + more_results = false; + break; + } + auto it = find(key); + if (it != end()) { + it->second = {{kv.second.c_str(), kv.second.length()}, + RBD_CONFIG_SOURCE_POOL}; + } + } + } + return 0; + } +}; + +} // anonymous namespace + +template <typename I> +bool Config<I>::is_option_name(librados::IoCtx& io_ctx, + const std::string &name) { + Options opts(io_ctx, false); + + return (opts.find(name) != opts.end()); +} + +template <typename I> +int Config<I>::list(librados::IoCtx& io_ctx, + std::vector<config_option_t> *options) { + Options opts(io_ctx, false); + + int r = opts.init(); + if (r < 0) { + return r; + } + + for (auto &it : opts) { + options->push_back({it.first, it.second.first, it.second.second}); + } + + return 0; +} + +template <typename I> +bool Config<I>::is_option_name(I *image_ctx, const std::string &name) { + Options opts(image_ctx->md_ctx, true); + + return (opts.find(name) != opts.end()); +} + +template <typename I> +int Config<I>::list(I *image_ctx, std::vector<config_option_t> *options) { + CephContext *cct = image_ctx->cct; + Options opts(image_ctx->md_ctx, true); + + int r = opts.init(); + if (r < 0) { + return r; + } + + std::string last_key = ImageCtx::METADATA_CONF_PREFIX; + bool more_results = true; + + while (more_results) { + std::map<std::string, bufferlist> pairs; + + r = cls_client::metadata_list(&image_ctx->md_ctx, image_ctx->header_oid, + last_key, MAX_KEYS, &pairs); + if (r < 0) { + lderr(cct) << "failed reading image metadata: " << cpp_strerror(r) + << dendl; + return r; + } + + if (pairs.empty()) { + break; + } + + more_results = (pairs.size() == MAX_KEYS); + last_key = pairs.rbegin()->first; + + for (auto kv : pairs) { + std::string key; + if (!util::is_metadata_config_override(kv.first, &key)) { + more_results = false; + break; + } + auto it = opts.find(key); + if (it != opts.end()) { + it->second = {{kv.second.c_str(), kv.second.length()}, + RBD_CONFIG_SOURCE_IMAGE}; + } + } + } + + for (auto &it : opts) { + options->push_back({it.first, it.second.first, it.second.second}); + } + + return 0; +} + +template <typename I> +void Config<I>::apply_pool_overrides(librados::IoCtx& io_ctx, + ConfigProxy* config) { + CephContext *cct = reinterpret_cast<CephContext *>(io_ctx.cct()); + + Options opts(io_ctx, false); + int r = opts.init(); + if (r < 0) { + lderr(cct) << "failed to read pool config overrides: " << cpp_strerror(r) + << dendl; + return; + } + + for (auto& pair : opts) { + if (pair.second.second == RBD_CONFIG_SOURCE_POOL) { + r = config->set_val(pair.first, pair.second.first); + if (r < 0) { + lderr(cct) << "failed to override pool config " << pair.first << "=" + << pair.second.first << ": " << cpp_strerror(r) << dendl; + } + } + } +} + +} // namespace api +} // namespace librbd + +template class librbd::api::Config<librbd::ImageCtx>; diff --git a/src/librbd/api/Config.h b/src/librbd/api/Config.h new file mode 100644 index 00000000..daa718c9 --- /dev/null +++ b/src/librbd/api/Config.h @@ -0,0 +1,37 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_API_CONFIG_H +#define CEPH_LIBRBD_API_CONFIG_H + +#include "include/rbd/librbd.hpp" +#include "include/rados/librados_fwd.hpp" + +struct ConfigProxy; + +namespace librbd { + +class ImageCtx; + +namespace api { + +template <typename ImageCtxT = librbd::ImageCtx> +class Config { +public: + static bool is_option_name(librados::IoCtx& io_ctx, const std::string &name); + static int list(librados::IoCtx& io_ctx, + std::vector<config_option_t> *options); + + static bool is_option_name(ImageCtxT *image_ctx, const std::string &name); + static int list(ImageCtxT *image_ctx, std::vector<config_option_t> *options); + + static void apply_pool_overrides(librados::IoCtx& io_ctx, + ConfigProxy* config); +}; + +} // namespace api +} // namespace librbd + +extern template class librbd::api::Config<librbd::ImageCtx>; + +#endif // CEPH_LIBRBD_API_CONFIG_H diff --git a/src/librbd/api/DiffIterate.cc b/src/librbd/api/DiffIterate.cc new file mode 100644 index 00000000..f96264e3 --- /dev/null +++ b/src/librbd/api/DiffIterate.cc @@ -0,0 +1,554 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/api/DiffIterate.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/ObjectMap.h" +#include "librbd/Utils.h" +#include "librbd/internal.h" +#include "librbd/io/AioCompletion.h" +#include "librbd/io/ImageDispatchSpec.h" +#include "librbd/io/ImageRequestWQ.h" +#include "include/rados/librados.hpp" +#include "include/interval_set.h" +#include "common/errno.h" +#include "common/Throttle.h" +#include "osdc/Striper.h" +#include "librados/snap_set_diff.h" +#include <boost/tuple/tuple.hpp> +#include <list> +#include <map> +#include <vector> + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::DiffIterate: " + +namespace librbd { +namespace api { + +namespace { + +enum ObjectDiffState { + OBJECT_DIFF_STATE_NONE = 0, + OBJECT_DIFF_STATE_UPDATED = 1, + OBJECT_DIFF_STATE_HOLE = 2 +}; + +struct DiffContext { + DiffIterate<>::Callback callback; + void *callback_arg; + bool whole_object; + uint64_t from_snap_id; + uint64_t end_snap_id; + interval_set<uint64_t> parent_diff; + OrderedThrottle throttle; + + template <typename I> + DiffContext(I &image_ctx, DiffIterate<>::Callback callback, + void *callback_arg, bool _whole_object, uint64_t _from_snap_id, + uint64_t _end_snap_id) + : callback(callback), callback_arg(callback_arg), + whole_object(_whole_object), from_snap_id(_from_snap_id), + end_snap_id(_end_snap_id), + throttle(image_ctx.config.template get_val<uint64_t>("rbd_concurrent_management_ops"), true) { + } +}; + +class C_DiffObject : public Context { +public: + template <typename I> + C_DiffObject(I &image_ctx, librados::IoCtx &head_ctx, + DiffContext &diff_context, const std::string &oid, + uint64_t offset, const std::vector<ObjectExtent> &object_extents) + : m_cct(image_ctx.cct), m_head_ctx(head_ctx), + m_diff_context(diff_context), m_oid(oid), m_offset(offset), + m_object_extents(object_extents), m_snap_ret(0) { + } + + void send() { + C_OrderedThrottle *ctx = m_diff_context.throttle.start_op(this); + librados::AioCompletion *rados_completion = + util::create_rados_callback(ctx); + + librados::ObjectReadOperation op; + op.list_snaps(&m_snap_set, &m_snap_ret); + + int r = m_head_ctx.aio_operate(m_oid, rados_completion, &op, NULL); + ceph_assert(r == 0); + rados_completion->release(); + } + +protected: + typedef boost::tuple<uint64_t, size_t, bool> Diff; + typedef std::list<Diff> Diffs; + + void finish(int r) override { + CephContext *cct = m_cct; + if (r == 0 && m_snap_ret < 0) { + r = m_snap_ret; + } + + Diffs diffs; + if (r == 0) { + ldout(cct, 20) << "object " << m_oid << ": list_snaps complete" << dendl; + compute_diffs(&diffs); + } else if (r == -ENOENT) { + ldout(cct, 20) << "object " << m_oid << ": list_snaps (not found)" + << dendl; + r = 0; + compute_parent_overlap(&diffs); + } else { + ldout(cct, 20) << "object " << m_oid << ": list_snaps failed: " + << cpp_strerror(r) << dendl; + } + + if (r == 0) { + for (Diffs::const_iterator d = diffs.begin(); d != diffs.end(); ++d) { + r = m_diff_context.callback(d->get<0>(), d->get<1>(), d->get<2>(), + m_diff_context.callback_arg); + if (r < 0) { + break; + } + } + } + m_diff_context.throttle.end_op(r); + } + +private: + CephContext *m_cct; + librados::IoCtx &m_head_ctx; + DiffContext &m_diff_context; + std::string m_oid; + uint64_t m_offset; + std::vector<ObjectExtent> m_object_extents; + + librados::snap_set_t m_snap_set; + int m_snap_ret; + + void compute_diffs(Diffs *diffs) { + CephContext *cct = m_cct; + + // calc diff from from_snap_id -> to_snap_id + interval_set<uint64_t> diff; + uint64_t end_size; + bool end_exists; + librados::snap_t clone_end_snap_id; + bool whole_object; + calc_snap_set_diff(cct, m_snap_set, m_diff_context.from_snap_id, + m_diff_context.end_snap_id, &diff, &end_size, + &end_exists, &clone_end_snap_id, &whole_object); + if (whole_object) { + ldout(cct, 1) << "object " << m_oid << ": need to provide full object" + << dendl; + } + ldout(cct, 20) << " diff " << diff << " end_exists=" << end_exists + << dendl; + if (diff.empty() && !whole_object) { + if (m_diff_context.from_snap_id == 0 && !end_exists) { + compute_parent_overlap(diffs); + } + return; + } else if (m_diff_context.whole_object || whole_object) { + // provide the full object extents to the callback + for (vector<ObjectExtent>::iterator q = m_object_extents.begin(); + q != m_object_extents.end(); ++q) { + diffs->push_back(boost::make_tuple(m_offset + q->offset, q->length, + end_exists)); + } + return; + } + + for (vector<ObjectExtent>::iterator q = m_object_extents.begin(); + q != m_object_extents.end(); ++q) { + ldout(cct, 20) << "diff_iterate object " << m_oid << " extent " + << q->offset << "~" << q->length << " from " + << q->buffer_extents << dendl; + uint64_t opos = q->offset; + for (vector<pair<uint64_t,uint64_t> >::iterator r = + q->buffer_extents.begin(); + r != q->buffer_extents.end(); ++r) { + interval_set<uint64_t> overlap; // object extents + overlap.insert(opos, r->second); + overlap.intersection_of(diff); + ldout(cct, 20) << " opos " << opos + << " buf " << r->first << "~" << r->second + << " overlap " << overlap << dendl; + for (interval_set<uint64_t>::iterator s = overlap.begin(); + s != overlap.end(); ++s) { + uint64_t su_off = s.get_start() - opos; + uint64_t logical_off = m_offset + r->first + su_off; + ldout(cct, 20) << " overlap extent " << s.get_start() << "~" + << s.get_len() << " logical " << logical_off << "~" + << s.get_len() << dendl; + diffs->push_back(boost::make_tuple(logical_off, s.get_len(), + end_exists)); + } + opos += r->second; + } + ceph_assert(opos == q->offset + q->length); + } + } + + void compute_parent_overlap(Diffs *diffs) { + if (m_diff_context.from_snap_id == 0 && + !m_diff_context.parent_diff.empty()) { + // report parent diff instead + for (vector<ObjectExtent>::iterator q = m_object_extents.begin(); + q != m_object_extents.end(); ++q) { + for (vector<pair<uint64_t,uint64_t> >::iterator r = + q->buffer_extents.begin(); + r != q->buffer_extents.end(); ++r) { + interval_set<uint64_t> o; + o.insert(m_offset + r->first, r->second); + o.intersection_of(m_diff_context.parent_diff); + ldout(m_cct, 20) << " reporting parent overlap " << o << dendl; + for (interval_set<uint64_t>::iterator s = o.begin(); s != o.end(); + ++s) { + diffs->push_back(boost::make_tuple(s.get_start(), s.get_len(), + true)); + } + } + } + } + } +}; + +int simple_diff_cb(uint64_t off, size_t len, int exists, void *arg) { + // it's possible for a discard to create a hole in the parent image -- ignore + if (exists) { + interval_set<uint64_t> *diff = static_cast<interval_set<uint64_t> *>(arg); + diff->insert(off, len); + } + return 0; +} + +} // anonymous namespace + +template <typename I> +int DiffIterate<I>::diff_iterate(I *ictx, + const cls::rbd::SnapshotNamespace& from_snap_namespace, + const char *fromsnapname, + uint64_t off, uint64_t len, + bool include_parent, bool whole_object, + int (*cb)(uint64_t, size_t, int, void *), + void *arg) +{ + ldout(ictx->cct, 20) << "diff_iterate " << ictx << " off = " << off + << " len = " << len << dendl; + + if (!ictx->data_ctx.is_valid()) { + return -ENODEV; + } + + // ensure previous writes are visible to listsnaps + C_SaferCond flush_ctx; + { + RWLock::RLocker owner_locker(ictx->owner_lock); + auto aio_comp = io::AioCompletion::create_and_start(&flush_ctx, ictx, + io::AIO_TYPE_FLUSH); + auto req = io::ImageDispatchSpec<I>::create_flush_request( + *ictx, aio_comp, io::FLUSH_SOURCE_INTERNAL, {}); + req->send(); + delete req; + } + int r = flush_ctx.wait(); + if (r < 0) { + return r; + } + + r = ictx->state->refresh_if_required(); + if (r < 0) { + return r; + } + + ictx->snap_lock.get_read(); + r = clip_io(ictx, off, &len); + ictx->snap_lock.put_read(); + if (r < 0) { + return r; + } + + DiffIterate command(*ictx, from_snap_namespace, fromsnapname, off, len, + include_parent, whole_object, cb, arg); + r = command.execute(); + return r; +} + +template <typename I> +int DiffIterate<I>::execute() { + CephContext* cct = m_image_ctx.cct; + + ceph_assert(m_image_ctx.data_ctx.is_valid()); + + librados::IoCtx head_ctx; + librados::snap_t from_snap_id = 0; + librados::snap_t end_snap_id; + uint64_t from_size = 0; + uint64_t end_size; + { + RWLock::RLocker md_locker(m_image_ctx.md_lock); + RWLock::RLocker snap_locker(m_image_ctx.snap_lock); + head_ctx.dup(m_image_ctx.data_ctx); + if (m_from_snap_name) { + from_snap_id = m_image_ctx.get_snap_id(m_from_snap_namespace, m_from_snap_name); + from_size = m_image_ctx.get_image_size(from_snap_id); + } + end_snap_id = m_image_ctx.snap_id; + end_size = m_image_ctx.get_image_size(end_snap_id); + } + + if (from_snap_id == CEPH_NOSNAP) { + return -ENOENT; + } + if (from_snap_id == end_snap_id) { + // no diff. + return 0; + } + if (from_snap_id >= end_snap_id) { + return -EINVAL; + } + + int r; + bool fast_diff_enabled = false; + BitVector<2> object_diff_state; + { + RWLock::RLocker snap_locker(m_image_ctx.snap_lock); + if (m_whole_object && (m_image_ctx.features & RBD_FEATURE_FAST_DIFF) != 0) { + r = diff_object_map(from_snap_id, end_snap_id, &object_diff_state); + if (r < 0) { + ldout(cct, 5) << "fast diff disabled" << dendl; + } else { + ldout(cct, 5) << "fast diff enabled" << dendl; + fast_diff_enabled = true; + } + } + } + + // we must list snaps via the head, not end snap + head_ctx.snap_set_read(CEPH_SNAPDIR); + + ldout(cct, 5) << "diff_iterate from " << from_snap_id << " to " + << end_snap_id << " size from " << from_size + << " to " << end_size << dendl; + + // check parent overlap only if we are comparing to the beginning of time + DiffContext diff_context(m_image_ctx, m_callback, m_callback_arg, + m_whole_object, from_snap_id, end_snap_id); + if (m_include_parent && from_snap_id == 0) { + RWLock::RLocker l(m_image_ctx.snap_lock); + RWLock::RLocker l2(m_image_ctx.parent_lock); + uint64_t overlap = 0; + m_image_ctx.get_parent_overlap(m_image_ctx.snap_id, &overlap); + r = 0; + if (m_image_ctx.parent && overlap > 0) { + ldout(cct, 10) << " first getting parent diff" << dendl; + DiffIterate diff_parent(*m_image_ctx.parent, {}, + nullptr, 0, overlap, + m_include_parent, m_whole_object, + &simple_diff_cb, + &diff_context.parent_diff); + r = diff_parent.execute(); + } + if (r < 0) { + return r; + } + } + + uint64_t period = m_image_ctx.get_stripe_period(); + uint64_t off = m_offset; + uint64_t left = m_length; + + while (left > 0) { + uint64_t period_off = off - (off % period); + uint64_t read_len = min(period_off + period - off, left); + + // map to extents + map<object_t,vector<ObjectExtent> > object_extents; + Striper::file_to_extents(cct, m_image_ctx.format_string, + &m_image_ctx.layout, off, read_len, 0, + object_extents, 0); + + // get snap info for each object + for (map<object_t,vector<ObjectExtent> >::iterator p = + object_extents.begin(); + p != object_extents.end(); ++p) { + ldout(cct, 20) << "object " << p->first << dendl; + + if (fast_diff_enabled) { + const uint64_t object_no = p->second.front().objectno; + if (object_diff_state[object_no] == OBJECT_DIFF_STATE_NONE && + from_snap_id == 0 && !diff_context.parent_diff.empty()) { + // no data in child object -- report parent diff instead + for (auto& oe : p->second) { + for (auto& be : oe.buffer_extents) { + interval_set<uint64_t> o; + o.insert(off + be.first, be.second); + o.intersection_of(diff_context.parent_diff); + ldout(cct, 20) << " reporting parent overlap " << o << dendl; + for (auto e = o.begin(); e != o.end(); ++e) { + r = m_callback(e.get_start(), e.get_len(), true, + m_callback_arg); + if (r < 0) { + return r; + } + } + } + } + } else if (object_diff_state[object_no] != OBJECT_DIFF_STATE_NONE) { + bool updated = (object_diff_state[object_no] == + OBJECT_DIFF_STATE_UPDATED); + for (std::vector<ObjectExtent>::iterator q = p->second.begin(); + q != p->second.end(); ++q) { + r = m_callback(off + q->offset, q->length, updated, m_callback_arg); + if (r < 0) { + return r; + } + } + } + } else { + C_DiffObject *diff_object = new C_DiffObject(m_image_ctx, head_ctx, + diff_context, + p->first.name, off, + p->second); + diff_object->send(); + + if (diff_context.throttle.pending_error()) { + r = diff_context.throttle.wait_for_ret(); + return r; + } + } + } + + left -= read_len; + off += read_len; + } + + r = diff_context.throttle.wait_for_ret(); + if (r < 0) { + return r; + } + return 0; +} + +template <typename I> +int DiffIterate<I>::diff_object_map(uint64_t from_snap_id, uint64_t to_snap_id, + BitVector<2>* object_diff_state) { + ceph_assert(m_image_ctx.snap_lock.is_locked()); + CephContext* cct = m_image_ctx.cct; + + bool diff_from_start = (from_snap_id == 0); + if (from_snap_id == 0) { + if (!m_image_ctx.snaps.empty()) { + from_snap_id = m_image_ctx.snaps.back(); + } else { + from_snap_id = CEPH_NOSNAP; + } + } + + object_diff_state->clear(); + uint64_t current_snap_id = from_snap_id; + uint64_t next_snap_id = to_snap_id; + BitVector<2> prev_object_map; + bool prev_object_map_valid = false; + while (true) { + uint64_t current_size = m_image_ctx.size; + if (current_snap_id != CEPH_NOSNAP) { + std::map<librados::snap_t, SnapInfo>::const_iterator snap_it = + m_image_ctx.snap_info.find(current_snap_id); + ceph_assert(snap_it != m_image_ctx.snap_info.end()); + current_size = snap_it->second.size; + + ++snap_it; + if (snap_it != m_image_ctx.snap_info.end()) { + next_snap_id = snap_it->first; + } else { + next_snap_id = CEPH_NOSNAP; + } + } + + uint64_t flags; + int r = m_image_ctx.get_flags(from_snap_id, &flags); + if (r < 0) { + lderr(cct) << "diff_object_map: failed to retrieve image flags" << dendl; + return r; + } + if ((flags & RBD_FLAG_FAST_DIFF_INVALID) != 0) { + ldout(cct, 1) << "diff_object_map: cannot perform fast diff on invalid " + << "object map" << dendl; + return -EINVAL; + } + + BitVector<2> object_map; + std::string oid(ObjectMap<>::object_map_name(m_image_ctx.id, + current_snap_id)); + r = cls_client::object_map_load(&m_image_ctx.md_ctx, oid, &object_map); + if (r < 0) { + lderr(cct) << "diff_object_map: failed to load object map " << oid + << dendl; + return r; + } + ldout(cct, 20) << "diff_object_map: loaded object map " << oid << dendl; + + uint64_t num_objs = Striper::get_num_objects(m_image_ctx.layout, + current_size); + if (object_map.size() < num_objs) { + ldout(cct, 1) << "diff_object_map: object map too small: " + << object_map.size() << " < " << num_objs << dendl; + return -EINVAL; + } + object_map.resize(num_objs); + object_diff_state->resize(object_map.size()); + + uint64_t overlap = std::min(object_map.size(), prev_object_map.size()); + auto it = object_map.begin(); + auto overlap_end_it = it + overlap; + auto pre_it = prev_object_map.begin(); + auto diff_it = object_diff_state->begin(); + uint64_t i = 0; + for (; it != overlap_end_it; ++it, ++pre_it, ++diff_it, ++i) { + ldout(cct, 20) << __func__ << ": object state: " << i << " " + << static_cast<uint32_t>(*pre_it) + << "->" << static_cast<uint32_t>(*it) << dendl; + if (*it == OBJECT_NONEXISTENT) { + if (*pre_it != OBJECT_NONEXISTENT) { + *diff_it = OBJECT_DIFF_STATE_HOLE; + } + } else if (*it == OBJECT_EXISTS || + (*pre_it != *it && + !(*pre_it == OBJECT_EXISTS && + *it == OBJECT_EXISTS_CLEAN))) { + *diff_it = OBJECT_DIFF_STATE_UPDATED; + } + } + ldout(cct, 20) << "diff_object_map: computed overlap diffs" << dendl; + auto end_it = object_map.end(); + if (object_map.size() > prev_object_map.size() && + (diff_from_start || prev_object_map_valid)) { + for (; it != end_it; ++it,++diff_it, ++i) { + ldout(cct, 20) << __func__ << ": object state: " << i << " " + << "->" << static_cast<uint32_t>(*it) << dendl; + if (*it == OBJECT_NONEXISTENT) { + *diff_it = OBJECT_DIFF_STATE_NONE; + } else { + *diff_it = OBJECT_DIFF_STATE_UPDATED; + } + } + } + ldout(cct, 20) << "diff_object_map: computed resize diffs" << dendl; + + if (current_snap_id == next_snap_id || next_snap_id > to_snap_id) { + break; + } + current_snap_id = next_snap_id; + prev_object_map = object_map; + prev_object_map_valid = true; + } + return 0; +} + +} // namespace api +} // namespace librbd + +template class librbd::api::DiffIterate<librbd::ImageCtx>; diff --git a/src/librbd/api/DiffIterate.h b/src/librbd/api/DiffIterate.h new file mode 100644 index 00000000..e6074d9c --- /dev/null +++ b/src/librbd/api/DiffIterate.h @@ -0,0 +1,66 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_API_DIFF_ITERATE_H +#define CEPH_LIBRBD_API_DIFF_ITERATE_H + +#include "include/int_types.h" +#include "common/bit_vector.hpp" +#include "cls/rbd/cls_rbd_types.h" + +namespace librbd { + +class ImageCtx; + +namespace api { + +template <typename ImageCtxT = librbd::ImageCtx> +class DiffIterate { +public: + typedef int (*Callback)(uint64_t, size_t, int, void *); + + static int diff_iterate(ImageCtxT *ictx, + const cls::rbd::SnapshotNamespace& from_snap_namespace, + const char *fromsnapname, + uint64_t off, uint64_t len, bool include_parent, + bool whole_object, + int (*cb)(uint64_t, size_t, int, void *), + void *arg); + +private: + ImageCtxT &m_image_ctx; + cls::rbd::SnapshotNamespace m_from_snap_namespace; + const char* m_from_snap_name; + uint64_t m_offset; + uint64_t m_length; + bool m_include_parent; + bool m_whole_object; + Callback m_callback; + void *m_callback_arg; + + DiffIterate(ImageCtxT &image_ctx, + const cls::rbd::SnapshotNamespace& from_snap_namespace, + const char *from_snap_name, uint64_t off, uint64_t len, + bool include_parent, bool whole_object, Callback callback, + void *callback_arg) + : m_image_ctx(image_ctx), m_from_snap_namespace(from_snap_namespace), + m_from_snap_name(from_snap_name), m_offset(off), + m_length(len), m_include_parent(include_parent), + m_whole_object(whole_object), m_callback(callback), + m_callback_arg(callback_arg) + { + } + + int execute(); + + int diff_object_map(uint64_t from_snap_id, uint64_t to_snap_id, + BitVector<2>* object_diff_state); + +}; + +} // namespace api +} // namespace librbd + +extern template class librbd::api::DiffIterate<librbd::ImageCtx>; + +#endif // CEPH_LIBRBD_API_DIFF_ITERATE_H diff --git a/src/librbd/api/Group.cc b/src/librbd/api/Group.cc new file mode 100644 index 00000000..8b75bd94 --- /dev/null +++ b/src/librbd/api/Group.cc @@ -0,0 +1,1207 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "common/errno.h" + +#include "librbd/ExclusiveLock.h" +#include "librbd/api/Group.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/ImageWatcher.h" +#include "librbd/Operations.h" +#include "librbd/Utils.h" +#include "librbd/io/AioCompletion.h" + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::api::Group: " << __func__ << ": " + +using std::map; +using std::pair; +using std::set; +using std::string; +using std::vector; +// list binds to list() here, so std::list is explicitly used below + +using ceph::bufferlist; +using librados::snap_t; +using librados::IoCtx; +using librados::Rados; + + +namespace librbd { +namespace api { + +namespace { + +template <typename I> +snap_t get_group_snap_id(I* ictx, + const cls::rbd::SnapshotNamespace& in_snap_namespace) { + ceph_assert(ictx->snap_lock.is_locked()); + auto it = ictx->snap_ids.lower_bound({in_snap_namespace, ""}); + if (it != ictx->snap_ids.end() && it->first.first == in_snap_namespace) { + return it->second; + } + return CEPH_NOSNAP; +} + +string generate_uuid(librados::IoCtx& io_ctx) +{ + Rados rados(io_ctx); + uint64_t bid = rados.get_instance_id(); + + uint32_t extra = rand() % 0xFFFFFFFF; + ostringstream bid_ss; + bid_ss << std::hex << bid << std::hex << extra; + return bid_ss.str(); +} + +int group_snap_list(librados::IoCtx& group_ioctx, const char *group_name, + std::vector<cls::rbd::GroupSnapshot> *cls_snaps) +{ + CephContext *cct = (CephContext *)group_ioctx.cct(); + + string group_id; + vector<string> ind_snap_names; + + int r = cls_client::dir_get_id(&group_ioctx, RBD_GROUP_DIRECTORY, + group_name, &group_id); + if (r < 0) { + lderr(cct) << "error reading group id object: " + << cpp_strerror(r) + << dendl; + return r; + } + string group_header_oid = util::group_header_name(group_id); + + const int max_read = 1024; + cls::rbd::GroupSnapshot snap_last; + + for (;;) { + vector<cls::rbd::GroupSnapshot> snaps_page; + + r = cls_client::group_snap_list(&group_ioctx, group_header_oid, + snap_last, max_read, &snaps_page); + + if (r < 0) { + lderr(cct) << "error reading snap list from group: " + << cpp_strerror(-r) << dendl; + return r; + } + cls_snaps->insert(cls_snaps->end(), snaps_page.begin(), snaps_page.end()); + if (snaps_page.size() < max_read) { + break; + } + snap_last = *snaps_page.rbegin(); + } + + return 0; +} + +std::string calc_ind_image_snap_name(uint64_t pool_id, + const std::string &group_id, + const std::string &snap_id) +{ + std::stringstream ind_snap_name_stream; + ind_snap_name_stream << ".group." << std::hex << pool_id << "_" + << group_id << "_" << snap_id; + return ind_snap_name_stream.str(); +} + +int group_image_list(librados::IoCtx& group_ioctx, const char *group_name, + std::vector<cls::rbd::GroupImageStatus> *image_ids) +{ + CephContext *cct = (CephContext *)group_ioctx.cct(); + + string group_id; + + int r = cls_client::dir_get_id(&group_ioctx, RBD_GROUP_DIRECTORY, + group_name, &group_id); + if (r < 0) { + lderr(cct) << "error reading group id object: " + << cpp_strerror(r) + << dendl; + return r; + } + string group_header_oid = util::group_header_name(group_id); + + ldout(cct, 20) << "listing images in group name " + << group_name << " group id " << group_header_oid << dendl; + image_ids->clear(); + + const int max_read = 1024; + cls::rbd::GroupImageSpec start_last; + do { + std::vector<cls::rbd::GroupImageStatus> image_ids_page; + + r = cls_client::group_image_list(&group_ioctx, group_header_oid, + start_last, max_read, &image_ids_page); + + if (r < 0) { + lderr(cct) << "error reading image list from group: " + << cpp_strerror(-r) << dendl; + return r; + } + image_ids->insert(image_ids->end(), + image_ids_page.begin(), image_ids_page.end()); + + if (image_ids_page.size() > 0) + start_last = image_ids_page.rbegin()->spec; + + r = image_ids_page.size(); + } while (r == max_read); + + return 0; +} + +int group_image_remove(librados::IoCtx& group_ioctx, string group_id, + librados::IoCtx& image_ioctx, string image_id) +{ + CephContext *cct = (CephContext *)group_ioctx.cct(); + + string group_header_oid = util::group_header_name(group_id); + + string image_header_oid = util::header_name(image_id); + + ldout(cct, 20) << "removing image " << image_id + << " image id " << image_header_oid << dendl; + + cls::rbd::GroupSpec group_spec(group_id, group_ioctx.get_id()); + + cls::rbd::GroupImageStatus incomplete_st(image_id, image_ioctx.get_id(), + cls::rbd::GROUP_IMAGE_LINK_STATE_INCOMPLETE); + + cls::rbd::GroupImageSpec spec(image_id, image_ioctx.get_id()); + + int r = cls_client::group_image_set(&group_ioctx, group_header_oid, + incomplete_st); + + if (r < 0) { + lderr(cct) << "couldn't put image into removing state: " + << cpp_strerror(-r) << dendl; + return r; + } + + r = cls_client::image_group_remove(&image_ioctx, image_header_oid, + group_spec); + if ((r < 0) && (r != -ENOENT)) { + lderr(cct) << "couldn't remove group reference from image" + << cpp_strerror(-r) << dendl; + return r; + } else if (r >= 0) { + ImageWatcher<>::notify_header_update(image_ioctx, image_header_oid); + } + + r = cls_client::group_image_remove(&group_ioctx, group_header_oid, spec); + if (r < 0) { + lderr(cct) << "couldn't remove image from group" + << cpp_strerror(-r) << dendl; + return r; + } + + return 0; +} + +int group_snap_remove_by_record(librados::IoCtx& group_ioctx, + const cls::rbd::GroupSnapshot& group_snap, + const std::string& group_id, + const std::string& group_header_oid) { + + CephContext *cct = (CephContext *)group_ioctx.cct(); + std::vector<C_SaferCond*> on_finishes; + int r, ret_code; + + std::vector<librbd::ImageCtx*> ictxs; + + cls::rbd::GroupSnapshotNamespace ne{group_ioctx.get_id(), group_id, + group_snap.id}; + + ldout(cct, 20) << "Removing snapshots" << dendl; + int snap_count = group_snap.snaps.size(); + + for (int i = 0; i < snap_count; ++i) { + librbd::IoCtx image_io_ctx; + r = util::create_ioctx(group_ioctx, "image", group_snap.snaps[i].pool, {}, + &image_io_ctx); + if (r < 0) { + return r; + } + + librbd::ImageCtx* image_ctx = new ImageCtx("", group_snap.snaps[i].image_id, + nullptr, image_io_ctx, false); + + C_SaferCond* on_finish = new C_SaferCond; + + image_ctx->state->open(0, on_finish); + + ictxs.push_back(image_ctx); + on_finishes.push_back(on_finish); + } + + ret_code = 0; + for (int i = 0; i < snap_count; ++i) { + r = on_finishes[i]->wait(); + delete on_finishes[i]; + if (r < 0) { + delete ictxs[i]; + ictxs[i] = nullptr; + ret_code = r; + } + } + if (ret_code != 0) { + goto finish; + } + + ldout(cct, 20) << "Opened participating images. " << + "Deleting snapshots themselves." << dendl; + + for (int i = 0; i < snap_count; ++i) { + ImageCtx *ictx = ictxs[i]; + on_finishes[i] = new C_SaferCond; + + std::string snap_name; + ictx->snap_lock.get_read(); + snap_t snap_id = get_group_snap_id(ictx, ne); + r = ictx->get_snap_name(snap_id, &snap_name); + ictx->snap_lock.put_read(); + + if (r >= 0) { + ldout(cct, 20) << "removing individual snapshot from image " << ictx->name + << dendl; + ictx->operations->snap_remove(ne, snap_name, on_finishes[i]); + } else { + // We are ok to ignore missing image snapshots. The snapshot could have + // been inconsistent in the first place. + on_finishes[i]->complete(0); + } + } + + for (int i = 0; i < snap_count; ++i) { + r = on_finishes[i]->wait(); + delete on_finishes[i]; + if (r < 0 && r != -ENOENT) { + // if previous attempts to remove this snapshot failed then the image's + // snapshot may not exist + lderr(cct) << "Failed deleting image snapshot. Ret code: " << r << dendl; + ret_code = r; + } + } + + if (ret_code != 0) { + goto finish; + } + + ldout(cct, 20) << "Removed images snapshots removing snapshot record." + << dendl; + + r = cls_client::group_snap_remove(&group_ioctx, group_header_oid, + group_snap.id); + if (r < 0) { + ret_code = r; + goto finish; + } + +finish: + for (int i = 0; i < snap_count; ++i) { + if (ictxs[i] != nullptr) { + ictxs[i]->state->close(); + } + } + return ret_code; +} + +int group_snap_rollback_by_record(librados::IoCtx& group_ioctx, + const cls::rbd::GroupSnapshot& group_snap, + const std::string& group_id, + const std::string& group_header_oid, + ProgressContext& pctx) { + CephContext *cct = (CephContext *)group_ioctx.cct(); + std::vector<C_SaferCond*> on_finishes; + int r, ret_code; + + std::vector<librbd::ImageCtx*> ictxs; + + cls::rbd::GroupSnapshotNamespace ne{group_ioctx.get_id(), group_id, + group_snap.id}; + + ldout(cct, 20) << "Rolling back snapshots" << dendl; + int snap_count = group_snap.snaps.size(); + + for (int i = 0; i < snap_count; ++i) { + librados::IoCtx image_io_ctx; + r = util::create_ioctx(group_ioctx, "image", group_snap.snaps[i].pool, {}, + &image_io_ctx); + if (r < 0) { + return r; + } + + librbd::ImageCtx* image_ctx = new ImageCtx("", group_snap.snaps[i].image_id, + nullptr, image_io_ctx, false); + + C_SaferCond* on_finish = new C_SaferCond; + + image_ctx->state->open(0, on_finish); + + ictxs.push_back(image_ctx); + on_finishes.push_back(on_finish); + } + + ret_code = 0; + for (int i = 0; i < snap_count; ++i) { + r = on_finishes[i]->wait(); + delete on_finishes[i]; + if (r < 0) { + delete ictxs[i]; + ictxs[i] = nullptr; + ret_code = r; + } + } + if (ret_code != 0) { + goto finish; + } + + ldout(cct, 20) << "Requesting exclusive locks for images" << dendl; + for (auto ictx: ictxs) { + RWLock::RLocker owner_lock(ictx->owner_lock); + if (ictx->exclusive_lock != nullptr) { + ictx->exclusive_lock->block_requests(-EBUSY); + } + } + for (int i = 0; i < snap_count; ++i) { + ImageCtx *ictx = ictxs[i]; + RWLock::RLocker owner_lock(ictx->owner_lock); + + on_finishes[i] = new C_SaferCond; + if (ictx->exclusive_lock != nullptr) { + ictx->exclusive_lock->acquire_lock(on_finishes[i]); + } + } + + ret_code = 0; + for (int i = 0; i < snap_count; ++i) { + r = 0; + ImageCtx *ictx = ictxs[i]; + if (ictx->exclusive_lock != nullptr) { + r = on_finishes[i]->wait(); + } + delete on_finishes[i]; + if (r < 0) { + ret_code = r; + } + } + if (ret_code != 0) { + goto finish; + } + + for (int i = 0; i < snap_count; ++i) { + ImageCtx *ictx = ictxs[i]; + on_finishes[i] = new C_SaferCond; + + RWLock::RLocker owner_locker(ictx->owner_lock); + std::string snap_name; + ictx->snap_lock.get_read(); + snap_t snap_id = get_group_snap_id(ictx, ne); + r = ictx->get_snap_name(snap_id, &snap_name); + ictx->snap_lock.put_read(); + + if (r >= 0) { + ldout(cct, 20) << "rolling back to individual snapshot for image " << ictx->name + << dendl; + ictx->operations->execute_snap_rollback(ne, snap_name, pctx, on_finishes[i]); + } else { + on_finishes[i]->complete(r); + } + } + + for (int i = 0; i < snap_count; ++i) { + r = on_finishes[i]->wait(); + delete on_finishes[i]; + if (r < 0 && r != -ENOENT) { + lderr(cct) << "Failed rolling back group to snapshot. Ret code: " << r << dendl; + ret_code = r; + } + } + +finish: + for (int i = 0; i < snap_count; ++i) { + if (ictxs[i] != nullptr) { + ictxs[i]->state->close(); + } + } + return ret_code; +} + +} // anonymous namespace + +template <typename I> +int Group<I>::image_remove_by_id(librados::IoCtx& group_ioctx, + const char *group_name, + librados::IoCtx& image_ioctx, + const char *image_id) +{ + CephContext *cct = (CephContext *)group_ioctx.cct(); + ldout(cct, 20) << "io_ctx=" << &group_ioctx + << " group name " << group_name << " image " + << &image_ioctx << " id " << image_id << dendl; + + string group_id; + + int r = cls_client::dir_get_id(&group_ioctx, RBD_GROUP_DIRECTORY, group_name, + &group_id); + if (r < 0) { + lderr(cct) << "error reading group id object: " + << cpp_strerror(r) + << dendl; + return r; + } + + ldout(cct, 20) << "removing image from group name " << group_name + << " group id " << group_id << dendl; + + return group_image_remove(group_ioctx, group_id, image_ioctx, string(image_id)); +} + +template <typename I> +int Group<I>::create(librados::IoCtx& io_ctx, const char *group_name) +{ + CephContext *cct = (CephContext *)io_ctx.cct(); + + string id = generate_uuid(io_ctx); + + ldout(cct, 2) << "adding group to directory..." << dendl; + + int r = cls_client::group_dir_add(&io_ctx, RBD_GROUP_DIRECTORY, group_name, + id); + if (r < 0) { + lderr(cct) << "error adding group to directory: " + << cpp_strerror(r) + << dendl; + return r; + } + string header_oid = util::group_header_name(id); + + r = io_ctx.create(header_oid, true); + if (r < 0) { + lderr(cct) << "error creating group header: " << cpp_strerror(r) << dendl; + goto err_remove_from_dir; + } + + return 0; + +err_remove_from_dir: + int remove_r = cls_client::group_dir_remove(&io_ctx, RBD_GROUP_DIRECTORY, + group_name, id); + if (remove_r < 0) { + lderr(cct) << "error cleaning up group from rbd_directory " + << "object after creation failed: " << cpp_strerror(remove_r) + << dendl; + } + + return r; +} + +template <typename I> +int Group<I>::remove(librados::IoCtx& io_ctx, const char *group_name) +{ + CephContext *cct((CephContext *)io_ctx.cct()); + ldout(cct, 20) << "group_remove " << &io_ctx << " " << group_name << dendl; + + std::string group_id; + int r = cls_client::dir_get_id(&io_ctx, RBD_GROUP_DIRECTORY, + std::string(group_name), &group_id); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "error getting id of group" << dendl; + return r; + } + string group_header_oid = util::group_header_name(group_id); + + std::vector<cls::rbd::GroupSnapshot> snaps; + r = group_snap_list(io_ctx, group_name, &snaps); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "error listing group snapshots" << dendl; + return r; + } + + for (auto &snap : snaps) { + r = group_snap_remove_by_record(io_ctx, snap, group_id, group_header_oid); + if (r < 0) { + return r; + } + } + + std::vector<cls::rbd::GroupImageStatus> images; + r = group_image_list(io_ctx, group_name, &images); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "error listing group images" << dendl; + return r; + } + + for (auto image : images) { + IoCtx image_ioctx; + r = util::create_ioctx(io_ctx, "image", image.spec.pool_id, {}, + &image_ioctx); + if (r < 0) { + return r; + } + + r = group_image_remove(io_ctx, group_id, image_ioctx, image.spec.image_id); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "error removing image from a group" << dendl; + return r; + } + } + + string header_oid = util::group_header_name(group_id); + + r = io_ctx.remove(header_oid); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "error removing header: " << cpp_strerror(-r) << dendl; + return r; + } + + r = cls_client::group_dir_remove(&io_ctx, RBD_GROUP_DIRECTORY, + group_name, group_id); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "error removing group from directory" << dendl; + return r; + } + + return 0; +} + +template <typename I> +int Group<I>::list(IoCtx& io_ctx, vector<string> *names) +{ + CephContext *cct = (CephContext *)io_ctx.cct(); + ldout(cct, 20) << "io_ctx=" << &io_ctx << dendl; + + int max_read = 1024; + string last_read = ""; + int r; + do { + map<string, string> groups; + r = cls_client::group_dir_list(&io_ctx, RBD_GROUP_DIRECTORY, last_read, + max_read, &groups); + if (r == -ENOENT) { + return 0; // Ignore missing rbd group directory. It means we don't have any groups yet. + } + if (r < 0) { + if (r != -ENOENT) { + lderr(cct) << "error listing group in directory: " + << cpp_strerror(r) << dendl; + } else { + r = 0; + } + return r; + } + for (pair<string, string> group : groups) { + names->push_back(group.first); + } + if (!groups.empty()) { + last_read = groups.rbegin()->first; + } + r = groups.size(); + } while (r == max_read); + + return 0; +} + +template <typename I> +int Group<I>::image_add(librados::IoCtx& group_ioctx, const char *group_name, + librados::IoCtx& image_ioctx, const char *image_name) +{ + CephContext *cct = (CephContext *)group_ioctx.cct(); + ldout(cct, 20) << "io_ctx=" << &group_ioctx + << " group name " << group_name << " image " + << &image_ioctx << " name " << image_name << dendl; + + if (group_ioctx.get_namespace() != image_ioctx.get_namespace()) { + lderr(cct) << "group and image cannot be in different namespaces" << dendl; + return -EINVAL; + } + + string group_id; + + int r = cls_client::dir_get_id(&group_ioctx, RBD_GROUP_DIRECTORY, group_name, + &group_id); + if (r < 0) { + lderr(cct) << "error reading group id object: " + << cpp_strerror(r) + << dendl; + return r; + } + string group_header_oid = util::group_header_name(group_id); + + + ldout(cct, 20) << "adding image to group name " << group_name + << " group id " << group_header_oid << dendl; + + string image_id; + + r = cls_client::dir_get_id(&image_ioctx, RBD_DIRECTORY, image_name, + &image_id); + if (r < 0) { + lderr(cct) << "error reading image id object: " + << cpp_strerror(-r) << dendl; + return r; + } + + string image_header_oid = util::header_name(image_id); + + ldout(cct, 20) << "adding image " << image_name + << " image id " << image_header_oid << dendl; + + cls::rbd::GroupImageStatus incomplete_st( + image_id, image_ioctx.get_id(), + cls::rbd::GROUP_IMAGE_LINK_STATE_INCOMPLETE); + cls::rbd::GroupImageStatus attached_st( + image_id, image_ioctx.get_id(), cls::rbd::GROUP_IMAGE_LINK_STATE_ATTACHED); + + r = cls_client::group_image_set(&group_ioctx, group_header_oid, + incomplete_st); + + cls::rbd::GroupSpec group_spec(group_id, group_ioctx.get_id()); + + if (r < 0) { + lderr(cct) << "error adding image reference to group: " + << cpp_strerror(-r) << dendl; + return r; + } + + r = cls_client::image_group_add(&image_ioctx, image_header_oid, group_spec); + if (r < 0) { + lderr(cct) << "error adding group reference to image: " + << cpp_strerror(-r) << dendl; + cls::rbd::GroupImageSpec spec(image_id, image_ioctx.get_id()); + cls_client::group_image_remove(&group_ioctx, group_header_oid, spec); + // Ignore errors in the clean up procedure. + return r; + } + ImageWatcher<>::notify_header_update(image_ioctx, image_header_oid); + + r = cls_client::group_image_set(&group_ioctx, group_header_oid, + attached_st); + + return r; +} + +template <typename I> +int Group<I>::image_remove(librados::IoCtx& group_ioctx, const char *group_name, + librados::IoCtx& image_ioctx, const char *image_name) +{ + CephContext *cct = (CephContext *)group_ioctx.cct(); + ldout(cct, 20) << "io_ctx=" << &group_ioctx + << " group name " << group_name << " image " + << &image_ioctx << " name " << image_name << dendl; + + if (group_ioctx.get_namespace() != image_ioctx.get_namespace()) { + lderr(cct) << "group and image cannot be in different namespaces" << dendl; + return -EINVAL; + } + + string group_id; + + int r = cls_client::dir_get_id(&group_ioctx, RBD_GROUP_DIRECTORY, group_name, + &group_id); + if (r < 0) { + lderr(cct) << "error reading group id object: " + << cpp_strerror(r) + << dendl; + return r; + } + + ldout(cct, 20) << "removing image from group name " << group_name + << " group id " << group_id << dendl; + + string image_id; + r = cls_client::dir_get_id(&image_ioctx, RBD_DIRECTORY, image_name, + &image_id); + if (r < 0) { + lderr(cct) << "error reading image id object: " + << cpp_strerror(-r) << dendl; + return r; + } + + r = group_image_remove(group_ioctx, group_id, image_ioctx, image_id); + + return r; +} + +template <typename I> +int Group<I>::image_list(librados::IoCtx& group_ioctx, + const char *group_name, + std::vector<group_image_info_t>* images) +{ + CephContext *cct = (CephContext *)group_ioctx.cct(); + ldout(cct, 20) << "io_ctx=" << &group_ioctx + << " group name " << group_name << dendl; + + std::vector<cls::rbd::GroupImageStatus> image_ids; + + group_image_list(group_ioctx, group_name, &image_ids); + + for (auto image_id : image_ids) { + IoCtx ioctx; + int r = util::create_ioctx(group_ioctx, "image", image_id.spec.pool_id, {}, + &ioctx); + if (r < 0) { + return r; + } + + std::string image_name; + r = cls_client::dir_get_name(&ioctx, RBD_DIRECTORY, + image_id.spec.image_id, &image_name); + if (r < 0) { + return r; + } + + images->push_back( + group_image_info_t { + image_name, + ioctx.get_id(), + static_cast<group_image_state_t>(image_id.state)}); + } + + return 0; +} + +template <typename I> +int Group<I>::rename(librados::IoCtx& io_ctx, const char *src_name, + const char *dest_name) +{ + CephContext *cct((CephContext *)io_ctx.cct()); + ldout(cct, 20) << "group_rename " << &io_ctx << " " << src_name + << " -> " << dest_name << dendl; + + std::string group_id; + int r = cls_client::dir_get_id(&io_ctx, RBD_GROUP_DIRECTORY, + std::string(src_name), &group_id); + if (r < 0) { + if (r != -ENOENT) + lderr(cct) << "error getting id of group" << dendl; + return r; + } + + r = cls_client::group_dir_rename(&io_ctx, RBD_GROUP_DIRECTORY, + src_name, dest_name, group_id); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "error renaming group from directory" << dendl; + return r; + } + + return 0; +} + + +template <typename I> +int Group<I>::image_get_group(I *ictx, group_info_t *group_info) +{ + int r = ictx->state->refresh_if_required(); + if (r < 0) + return r; + + if (RBD_GROUP_INVALID_POOL != ictx->group_spec.pool_id) { + IoCtx ioctx; + r = util::create_ioctx(ictx->md_ctx, "group", ictx->group_spec.pool_id, {}, + &ioctx); + if (r < 0) { + return r; + } + + std::string group_name; + r = cls_client::dir_get_name(&ioctx, RBD_GROUP_DIRECTORY, + ictx->group_spec.group_id, &group_name); + if (r < 0) + return r; + group_info->pool = ioctx.get_id(); + group_info->name = group_name; + } else { + group_info->pool = RBD_GROUP_INVALID_POOL; + group_info->name = ""; + } + + return 0; +} + +template <typename I> +int Group<I>::snap_create(librados::IoCtx& group_ioctx, + const char *group_name, const char *snap_name) +{ + CephContext *cct = (CephContext *)group_ioctx.cct(); + + string group_id; + cls::rbd::GroupSnapshot group_snap; + vector<cls::rbd::ImageSnapshotSpec> image_snaps; + std::string ind_snap_name; + + std::vector<librbd::ImageCtx*> ictxs; + std::vector<C_SaferCond*> on_finishes; + + int r = cls_client::dir_get_id(&group_ioctx, RBD_GROUP_DIRECTORY, + group_name, &group_id); + if (r < 0) { + lderr(cct) << "error reading group id object: " + << cpp_strerror(r) + << dendl; + return r; + } + + std::vector<cls::rbd::GroupImageStatus> images; + r = group_image_list(group_ioctx, group_name, &images); + if (r < 0) { + return r; + } + int image_count = images.size(); + + ldout(cct, 20) << "Found " << image_count << " images in group" << dendl; + + image_snaps = vector<cls::rbd::ImageSnapshotSpec>(image_count, + cls::rbd::ImageSnapshotSpec()); + + for (int i = 0; i < image_count; ++i) { + image_snaps[i].pool = images[i].spec.pool_id; + image_snaps[i].image_id = images[i].spec.image_id; + } + + string group_header_oid = util::group_header_name(group_id); + + group_snap.id = generate_uuid(group_ioctx); + group_snap.name = string(snap_name); + group_snap.state = cls::rbd::GROUP_SNAPSHOT_STATE_INCOMPLETE; + group_snap.snaps = image_snaps; + + cls::rbd::GroupSnapshotNamespace ne{group_ioctx.get_id(), group_id, + group_snap.id}; + + r = cls_client::group_snap_set(&group_ioctx, group_header_oid, group_snap); + if (r == -EEXIST) { + lderr(cct) << "snapshot with this name already exists: " + << cpp_strerror(r) + << dendl; + } + int ret_code = 0; + if (r < 0) { + ret_code = r; + goto finish; + } + + for (auto image: images) { + librbd::IoCtx image_io_ctx; + r = util::create_ioctx(group_ioctx, "image", image.spec.pool_id, {}, + &image_io_ctx); + if (r < 0) { + ret_code = r; + goto finish; + } + + ldout(cct, 20) << "Opening image with id " << image.spec.image_id << dendl; + + librbd::ImageCtx* image_ctx = new ImageCtx("", image.spec.image_id.c_str(), + nullptr, image_io_ctx, false); + + C_SaferCond* on_finish = new C_SaferCond; + + image_ctx->state->open(0, on_finish); + + ictxs.push_back(image_ctx); + on_finishes.push_back(on_finish); + } + ldout(cct, 20) << "Issued open request waiting for the completion" << dendl; + ret_code = 0; + for (int i = 0; i < image_count; ++i) { + + ldout(cct, 20) << "Waiting for completion on on_finish: " << + on_finishes[i] << dendl; + + r = on_finishes[i]->wait(); + delete on_finishes[i]; + if (r < 0) { + delete ictxs[i]; + ictxs[i] = nullptr; + ret_code = r; + } + } + if (ret_code != 0) { + goto remove_record; + } + ldout(cct, 20) << "Requesting exclusive locks for images" << dendl; + + for (auto ictx: ictxs) { + RWLock::RLocker owner_lock(ictx->owner_lock); + if (ictx->exclusive_lock != nullptr) { + ictx->exclusive_lock->block_requests(-EBUSY); + } + } + for (int i = 0; i < image_count; ++i) { + ImageCtx *ictx = ictxs[i]; + RWLock::RLocker owner_lock(ictx->owner_lock); + + on_finishes[i] = new C_SaferCond; + if (ictx->exclusive_lock != nullptr) { + ictx->exclusive_lock->acquire_lock(on_finishes[i]); + } + } + + ret_code = 0; + for (int i = 0; i < image_count; ++i) { + r = 0; + ImageCtx *ictx = ictxs[i]; + if (ictx->exclusive_lock != nullptr) { + r = on_finishes[i]->wait(); + } + delete on_finishes[i]; + if (r < 0) { + ret_code = r; + } + } + if (ret_code != 0) { + goto remove_record; + } + + ind_snap_name = calc_ind_image_snap_name(group_ioctx.get_id(), group_id, + group_snap.id); + + for (int i = 0; i < image_count; ++i) { + ImageCtx *ictx = ictxs[i]; + + C_SaferCond* on_finish = new C_SaferCond; + + ictx->operations->snap_create(ne, ind_snap_name.c_str(), on_finish); + + on_finishes[i] = on_finish; + } + + ret_code = 0; + for (int i = 0; i < image_count; ++i) { + r = on_finishes[i]->wait(); + delete on_finishes[i]; + if (r < 0) { + ret_code = r; + } else { + ImageCtx *ictx = ictxs[i]; + ictx->snap_lock.get_read(); + snap_t snap_id = get_group_snap_id(ictx, ne); + ictx->snap_lock.put_read(); + if (snap_id == CEPH_NOSNAP) { + ldout(cct, 20) << "Couldn't find created snapshot with namespace: " + << ne << dendl; + ret_code = -ENOENT; + } else { + image_snaps[i].snap_id = snapid_t(snap_id); + image_snaps[i].pool = ictx->md_ctx.get_id(); + image_snaps[i].image_id = ictx->id; + } + } + } + if (ret_code != 0) { + goto remove_image_snaps; + } + + group_snap.snaps = image_snaps; + group_snap.state = cls::rbd::GROUP_SNAPSHOT_STATE_COMPLETE; + + r = cls_client::group_snap_set(&group_ioctx, group_header_oid, group_snap); + if (r < 0) { + ret_code = r; + goto remove_image_snaps; + } + + goto finish; + +remove_image_snaps: + + for (int i = 0; i < image_count; ++i) { + ImageCtx *ictx = ictxs[i]; + ldout(cct, 20) << "Removing individual snapshot with name: " << + ind_snap_name << dendl; + + on_finishes[i] = new C_SaferCond; + std::string snap_name; + ictx->snap_lock.get_read(); + snap_t snap_id = get_group_snap_id(ictx, ne); + r = ictx->get_snap_name(snap_id, &snap_name); + ictx->snap_lock.put_read(); + if (r >= 0) { + ictx->operations->snap_remove(ne, snap_name.c_str(), on_finishes[i]); + } else { + // Ignore missing image snapshots. The whole snapshot could have been + // inconsistent. + on_finishes[i]->complete(0); + } + } + + for (int i = 0, n = on_finishes.size(); i < n; ++i) { + r = on_finishes[i]->wait(); + delete on_finishes[i]; + if (r < 0 && r != -ENOENT) { // if previous attempts to remove this snapshot failed then the image's snapshot may not exist + lderr(cct) << "Failed cleaning up image snapshot. Ret code: " << r << dendl; + // just report error, but don't abort the process + } + } + +remove_record: + r = cls_client::group_snap_remove(&group_ioctx, group_header_oid, + group_snap.id); + if (r < 0) { + lderr(cct) << "error while cleaning up group snapshot" << dendl; + // we ignore return value in clean up + } + +finish: + for (int i = 0, n = ictxs.size(); i < n; ++i) { + if (ictxs[i] != nullptr) { + ictxs[i]->state->close(); + } + } + return ret_code; +} + +template <typename I> +int Group<I>::snap_remove(librados::IoCtx& group_ioctx, const char *group_name, + const char *snap_name) +{ + CephContext *cct = (CephContext *)group_ioctx.cct(); + + string group_id; + int r = cls_client::dir_get_id(&group_ioctx, RBD_GROUP_DIRECTORY, + group_name, &group_id); + if (r < 0) { + lderr(cct) << "error reading group id object: " + << cpp_strerror(r) + << dendl; + return r; + } + + std::vector<cls::rbd::GroupSnapshot> snaps; + r = group_snap_list(group_ioctx, group_name, &snaps); + if (r < 0) { + return r; + } + + cls::rbd::GroupSnapshot *group_snap = nullptr; + for (auto &snap : snaps) { + if (snap.name == string(snap_name)) { + group_snap = &snap; + break; + } + } + if (group_snap == nullptr) { + return -ENOENT; + } + + string group_header_oid = util::group_header_name(group_id); + r = group_snap_remove_by_record(group_ioctx, *group_snap, group_id, + group_header_oid); + return r; +} + +template <typename I> +int Group<I>::snap_rename(librados::IoCtx& group_ioctx, const char *group_name, + const char *old_snap_name, + const char *new_snap_name) { + CephContext *cct = (CephContext *)group_ioctx.cct(); + if (0 == strcmp(old_snap_name, new_snap_name)) + return -EEXIST; + + std::string group_id; + int r = cls_client::dir_get_id(&group_ioctx, RBD_GROUP_DIRECTORY, + group_name, &group_id); + if (r == -ENOENT) { + return r; + } else if (r < 0) { + lderr(cct) << "error reading group id object: " << cpp_strerror(r) << dendl; + return r; + } + + std::vector<cls::rbd::GroupSnapshot> group_snaps; + r = group_snap_list(group_ioctx, group_name, &group_snaps); + if (r < 0) { + return r; + } + + cls::rbd::GroupSnapshot group_snap; + for (auto &snap : group_snaps) { + if (snap.name == old_snap_name) { + group_snap = snap; + break; + } + } + + if (group_snap.id.empty()) { + return -ENOENT; + } + + std::string group_header_oid = util::group_header_name(group_id); + group_snap.name = new_snap_name; + r = cls_client::group_snap_set(&group_ioctx, group_header_oid, group_snap); + if (r < 0) { + return r; + } + + return 0; +} + +template <typename I> +int Group<I>::snap_list(librados::IoCtx& group_ioctx, const char *group_name, + std::vector<group_snap_info_t> *snaps) +{ + std::vector<cls::rbd::GroupSnapshot> cls_snaps; + + int r = group_snap_list(group_ioctx, group_name, &cls_snaps); + if (r < 0) { + return r; + } + + for (auto snap : cls_snaps) { + snaps->push_back( + group_snap_info_t { + snap.name, + static_cast<group_snap_state_t>(snap.state)}); + + } + return 0; +} + +template <typename I> +int Group<I>::snap_rollback(librados::IoCtx& group_ioctx, + const char *group_name, const char *snap_name, + ProgressContext& pctx) +{ + CephContext *cct = (CephContext *)group_ioctx.cct(); + + string group_id; + int r = cls_client::dir_get_id(&group_ioctx, RBD_GROUP_DIRECTORY, + group_name, &group_id); + if (r < 0) { + lderr(cct) << "error reading group id object: " + << cpp_strerror(r) << dendl; + return r; + } + + std::vector<cls::rbd::GroupSnapshot> snaps; + r = group_snap_list(group_ioctx, group_name, &snaps); + if (r < 0) { + return r; + } + + cls::rbd::GroupSnapshot *group_snap = nullptr; + for (auto &snap : snaps) { + if (snap.name == string(snap_name)) { + group_snap = &snap; + break; + } + } + if (group_snap == nullptr) { + return -ENOENT; + } + + string group_header_oid = util::group_header_name(group_id); + r = group_snap_rollback_by_record(group_ioctx, *group_snap, group_id, + group_header_oid, pctx); + return r; +} + +} // namespace api +} // namespace librbd + +template class librbd::api::Group<librbd::ImageCtx>; diff --git a/src/librbd/api/Group.h b/src/librbd/api/Group.h new file mode 100644 index 00000000..59b978a8 --- /dev/null +++ b/src/librbd/api/Group.h @@ -0,0 +1,59 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_API_GROUP_H +#define CEPH_LIBRBD_API_GROUP_H + +#include "include/rbd/librbd.hpp" +#include "include/rados/librados_fwd.hpp" +#include <string> +#include <vector> + +namespace librbd { + +struct ImageCtx; + +namespace api { + +template <typename ImageCtxT = librbd::ImageCtx> +struct Group { + + static int create(librados::IoCtx& io_ctx, const char *group_name); + static int remove(librados::IoCtx& io_ctx, const char *group_name); + static int list(librados::IoCtx& io_ctx, std::vector<std::string> *names); + static int rename(librados::IoCtx& io_ctx, const char *src_group_name, + const char *dest_group_name); + + static int image_add(librados::IoCtx& group_ioctx, const char *group_name, + librados::IoCtx& image_ioctx, const char *image_name); + static int image_remove(librados::IoCtx& group_ioctx, const char *group_name, + librados::IoCtx& image_ioctx, const char *image_name); + static int image_remove_by_id(librados::IoCtx& group_ioctx, + const char *group_name, + librados::IoCtx& image_ioctx, + const char *image_id); + static int image_list(librados::IoCtx& group_ioctx, const char *group_name, + std::vector<group_image_info_t> *images); + + static int image_get_group(ImageCtxT *ictx, group_info_t *group_info); + + static int snap_create(librados::IoCtx& group_ioctx, + const char *group_name, const char *snap_name); + static int snap_remove(librados::IoCtx& group_ioctx, + const char *group_name, const char *snap_name); + static int snap_rename(librados::IoCtx& group_ioctx, const char *group_name, + const char *old_snap_name, const char *new_snap_name); + static int snap_list(librados::IoCtx& group_ioctx, const char *group_name, + std::vector<group_snap_info_t> *snaps); + static int snap_rollback(librados::IoCtx& group_ioctx, + const char *group_name, const char *snap_name, + ProgressContext& pctx); + +}; + +} // namespace api +} // namespace librbd + +extern template class librbd::api::Group<librbd::ImageCtx>; + +#endif // CEPH_LIBRBD_API_GROUP_H diff --git a/src/librbd/api/Image.cc b/src/librbd/api/Image.cc new file mode 100644 index 00000000..aa427535 --- /dev/null +++ b/src/librbd/api/Image.cc @@ -0,0 +1,836 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/api/Image.h" +#include "include/rados/librados.hpp" +#include "common/dout.h" +#include "common/errno.h" +#include "common/Cond.h" +#include "cls/rbd/cls_rbd_client.h" +#include "librbd/DeepCopyRequest.h" +#include "librbd/ExclusiveLock.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/internal.h" +#include "librbd/Utils.h" +#include "librbd/api/Config.h" +#include "librbd/api/Trash.h" +#include "librbd/image/CloneRequest.h" +#include "librbd/image/RemoveRequest.h" +#include "librbd/image/PreRemoveRequest.h" +#include <boost/scope_exit.hpp> + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::api::Image: " << __func__ << ": " + +namespace librbd { +namespace api { + +namespace { + +bool compare_by_pool(const librbd::linked_image_spec_t& lhs, + const librbd::linked_image_spec_t& rhs) +{ + if (lhs.pool_id != rhs.pool_id) { + return lhs.pool_id < rhs.pool_id; + } else if (lhs.pool_namespace != rhs.pool_namespace) { + return lhs.pool_namespace < rhs.pool_namespace; + } + return false; +} + +bool compare(const librbd::linked_image_spec_t& lhs, + const librbd::linked_image_spec_t& rhs) +{ + if (lhs.pool_name != rhs.pool_name) { + return lhs.pool_name < rhs.pool_name; + } else if (lhs.pool_id != rhs.pool_id) { + return lhs.pool_id < rhs.pool_id; + } else if (lhs.pool_namespace != rhs.pool_namespace) { + return lhs.pool_namespace < rhs.pool_namespace; + } else if (lhs.image_name != rhs.image_name) { + return lhs.image_name < rhs.image_name; + } else if (lhs.image_id != rhs.image_id) { + return lhs.image_id < rhs.image_id; + } + return false; +} + +template <typename I> +int pre_remove_image(librados::IoCtx& io_ctx, const std::string& image_id) { + I *image_ctx = I::create("", image_id, nullptr, io_ctx, false); + int r = image_ctx->state->open(OPEN_FLAG_SKIP_OPEN_PARENT); + if (r < 0) { + return r; + } + + C_SaferCond ctx; + auto req = image::PreRemoveRequest<I>::create(image_ctx, false, &ctx); + req->send(); + + r = ctx.wait(); + image_ctx->state->close(); + return r; +} + +} // anonymous namespace + +template <typename I> +int64_t Image<I>::get_data_pool_id(I *ictx) { + if (ictx->data_ctx.is_valid()) { + return ictx->data_ctx.get_id(); + } + + int64_t pool_id; + int r = cls_client::get_data_pool(&ictx->md_ctx, ictx->header_oid, &pool_id); + if (r < 0) { + CephContext *cct = ictx->cct; + lderr(cct) << "error getting data pool ID: " << cpp_strerror(r) << dendl; + return r; + } + + return pool_id; +} + +template <typename I> +int Image<I>::get_op_features(I *ictx, uint64_t *op_features) { + CephContext *cct = ictx->cct; + ldout(cct, 20) << "image_ctx=" << ictx << dendl; + + int r = ictx->state->refresh_if_required(); + if (r < 0) { + return r; + } + + RWLock::RLocker snap_locker(ictx->snap_lock); + *op_features = ictx->op_features; + return 0; +} + +template <typename I> +int Image<I>::list_images(librados::IoCtx& io_ctx, + std::vector<image_spec_t> *images) { + CephContext *cct = (CephContext *)io_ctx.cct(); + ldout(cct, 20) << "list " << &io_ctx << dendl; + + int r; + images->clear(); + + if (io_ctx.get_namespace().empty()) { + bufferlist bl; + r = io_ctx.read(RBD_DIRECTORY, bl, 0, 0); + if (r == -ENOENT) { + return 0; + } else if (r < 0) { + lderr(cct) << "error listing v1 images: " << cpp_strerror(r) << dendl; + return r; + } + + // V1 format images are in a tmap + if (bl.length()) { + auto p = bl.cbegin(); + bufferlist header; + std::map<std::string, bufferlist> m; + decode(header, p); + decode(m, p); + for (auto& it : m) { + images->push_back({.id ="", .name = it.first}); + } + } + } + + // V2 format images + std::map<std::string, std::string> image_names_to_ids; + r = list_images_v2(io_ctx, &image_names_to_ids); + if (r < 0) { + lderr(cct) << "error listing v2 images: " << cpp_strerror(r) << dendl; + return r; + } + + for (const auto& img_pair : image_names_to_ids) { + images->push_back({.id = img_pair.second, + .name = img_pair.first}); + } + + // include V2 images in a partially removed state + std::vector<librbd::trash_image_info_t> trash_images; + r = Trash<I>::list(io_ctx, trash_images, false); + if (r < 0 && r != -EOPNOTSUPP) { + lderr(cct) << "error listing trash images: " << cpp_strerror(r) << dendl; + return r; + } + + for (const auto& trash_image : trash_images) { + if (trash_image.source == RBD_TRASH_IMAGE_SOURCE_REMOVING) { + images->push_back({.id = trash_image.id, + .name = trash_image.name}); + + } + } + + return 0; +} + +template <typename I> +int Image<I>::list_images_v2(librados::IoCtx& io_ctx, ImageNameToIds *images) { + CephContext *cct = (CephContext *)io_ctx.cct(); + ldout(cct, 20) << "io_ctx=" << &io_ctx << dendl; + + // new format images are accessed by class methods + int r; + int max_read = 1024; + string last_read = ""; + do { + map<string, string> images_page; + r = cls_client::dir_list(&io_ctx, RBD_DIRECTORY, last_read, max_read, + &images_page); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "error listing image in directory: " + << cpp_strerror(r) << dendl; + return r; + } else if (r == -ENOENT) { + break; + } + for (map<string, string>::const_iterator it = images_page.begin(); + it != images_page.end(); ++it) { + images->insert(*it); + } + if (!images_page.empty()) { + last_read = images_page.rbegin()->first; + } + r = images_page.size(); + } while (r == max_read); + + return 0; +} + +template <typename I> +int Image<I>::get_parent(I *ictx, + librbd::linked_image_spec_t *parent_image, + librbd::snap_spec_t *parent_snap) { + auto cct = ictx->cct; + ldout(cct, 20) << "image_ctx=" << ictx << dendl; + + int r = ictx->state->refresh_if_required(); + if (r < 0) { + return r; + } + + RWLock::RLocker snap_locker(ictx->snap_lock); + RWLock::RLocker parent_locker(ictx->parent_lock); + + bool release_parent_locks = false; + BOOST_SCOPE_EXIT_ALL(ictx, &release_parent_locks) { + if (release_parent_locks) { + ictx->parent->parent_lock.put_read(); + ictx->parent->snap_lock.put_read(); + } + }; + + // if a migration is in-progress, the true parent is the parent + // of the migration source image + auto parent = ictx->parent; + if (!ictx->migration_info.empty() && ictx->parent != nullptr) { + release_parent_locks = true; + ictx->parent->snap_lock.get_read(); + ictx->parent->parent_lock.get_read(); + + parent = ictx->parent->parent; + } + + if (parent == nullptr) { + return -ENOENT; + } + + parent_image->pool_id = parent->md_ctx.get_id(); + parent_image->pool_name = parent->md_ctx.get_pool_name(); + parent_image->pool_namespace = parent->md_ctx.get_namespace(); + + RWLock::RLocker parent_snap_locker(parent->snap_lock); + parent_snap->id = parent->snap_id; + parent_snap->namespace_type = RBD_SNAP_NAMESPACE_TYPE_USER; + if (parent->snap_id != CEPH_NOSNAP) { + auto snap_info = parent->get_snap_info(parent->snap_id); + if (snap_info == nullptr) { + lderr(cct) << "error finding parent snap name: " << cpp_strerror(r) + << dendl; + return -ENOENT; + } + + parent_snap->namespace_type = static_cast<snap_namespace_type_t>( + cls::rbd::get_snap_namespace_type(snap_info->snap_namespace)); + parent_snap->name = snap_info->name; + } + + parent_image->image_id = parent->id; + parent_image->image_name = parent->name; + parent_image->trash = true; + + librbd::trash_image_info_t trash_info; + r = Trash<I>::get(parent->md_ctx, parent->id, &trash_info); + if (r == -ENOENT || r == -EOPNOTSUPP) { + parent_image->trash = false; + } else if (r < 0) { + lderr(cct) << "error looking up trash status: " << cpp_strerror(r) + << dendl; + return r; + } + + return 0; +} + +template <typename I> +int Image<I>::list_children(I *ictx, + std::vector<librbd::linked_image_spec_t> *images) { + images->clear(); + return list_descendants(ictx, 1, images); +} + +template <typename I> +int Image<I>::list_children(I *ictx, + const cls::rbd::ParentImageSpec &parent_spec, + std::vector<librbd::linked_image_spec_t> *images) { + images->clear(); + return list_descendants(ictx, parent_spec, 1, images); +} + +template <typename I> +int Image<I>::list_descendants( + librados::IoCtx& io_ctx, const std::string &image_id, + const std::optional<size_t> &max_level, + std::vector<librbd::linked_image_spec_t> *images) { + ImageCtx *ictx = new librbd::ImageCtx("", image_id, nullptr, + io_ctx, true); + int r = ictx->state->open(OPEN_FLAG_SKIP_OPEN_PARENT); + if (r < 0) { + if (r == -ENOENT) { + return 0; + } + lderr(ictx->cct) << "failed to open descendant " << image_id + << " from pool " << io_ctx.get_pool_name() << ":" + << cpp_strerror(r) << dendl; + return r; + } + + r = list_descendants(ictx, max_level, images); + + int r1 = ictx->state->close(); + if (r1 < 0) { + lderr(ictx->cct) << "error when closing descendant " << image_id + << " from pool " << io_ctx.get_pool_name() << ":" + << cpp_strerror(r) << dendl; + } + + return r; +} + +template <typename I> +int Image<I>::list_descendants( + I *ictx, const std::optional<size_t> &max_level, + std::vector<librbd::linked_image_spec_t> *images) { + RWLock::RLocker l(ictx->snap_lock); + std::vector<librados::snap_t> snap_ids; + if (ictx->snap_id != CEPH_NOSNAP) { + snap_ids.push_back(ictx->snap_id); + } else { + snap_ids = ictx->snaps; + } + for (auto snap_id : snap_ids) { + cls::rbd::ParentImageSpec parent_spec{ictx->md_ctx.get_id(), + ictx->md_ctx.get_namespace(), + ictx->id, snap_id}; + int r = list_descendants(ictx, parent_spec, max_level, images); + if (r < 0) { + return r; + } + } + return 0; +} + +template <typename I> +int Image<I>::list_descendants( + I *ictx, const cls::rbd::ParentImageSpec &parent_spec, + const std::optional<size_t> &max_level, + std::vector<librbd::linked_image_spec_t> *images) { + auto child_max_level = max_level; + if (child_max_level) { + if (child_max_level == 0) { + return 0; + } + (*child_max_level)--; + } + CephContext *cct = ictx->cct; + ldout(cct, 20) << "ictx=" << ictx << dendl; + + // no children for non-layered or old format image + if (!ictx->test_features(RBD_FEATURE_LAYERING, ictx->snap_lock)) { + return 0; + } + + librados::Rados rados(ictx->md_ctx); + + // search all pools for clone v1 children dependent on this snapshot + std::list<std::pair<int64_t, std::string> > pools; + int r = rados.pool_list2(pools); + if (r < 0) { + lderr(cct) << "error listing pools: " << cpp_strerror(r) << dendl; + return r; + } + + for (auto& it : pools) { + int64_t base_tier; + r = rados.pool_get_base_tier(it.first, &base_tier); + if (r == -ENOENT) { + ldout(cct, 1) << "pool " << it.second << " no longer exists" << dendl; + continue; + } else if (r < 0) { + lderr(cct) << "error retrieving base tier for pool " << it.second + << dendl; + return r; + } + if (it.first != base_tier) { + // pool is a cache; skip it + continue; + } + + IoCtx ioctx; + r = util::create_ioctx(ictx->md_ctx, "child image", it.first, {}, &ioctx); + if (r == -ENOENT) { + continue; + } else if (r < 0) { + return r; + } + + std::set<std::string> image_ids; + r = cls_client::get_children(&ioctx, RBD_CHILDREN, parent_spec, + image_ids); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "error reading list of children from pool " << it.second + << dendl; + return r; + } + + for (auto& image_id : image_ids) { + images->push_back({ + it.first, "", ictx->md_ctx.get_namespace(), image_id, "", false}); + r = list_descendants(ioctx, image_id, child_max_level, images); + if (r < 0) { + return r; + } + } + } + + // retrieve clone v2 children attached to this snapshot + IoCtx parent_io_ctx; + r = util::create_ioctx(ictx->md_ctx, "parent image", parent_spec.pool_id, + parent_spec.pool_namespace, &parent_io_ctx); + if (r < 0) { + return r; + } + + cls::rbd::ChildImageSpecs child_images; + r = cls_client::children_list(&parent_io_ctx, + util::header_name(parent_spec.image_id), + parent_spec.snap_id, &child_images); + if (r < 0 && r != -ENOENT && r != -EOPNOTSUPP) { + lderr(cct) << "error retrieving children: " << cpp_strerror(r) << dendl; + return r; + } + + for (auto& child_image : child_images) { + images->push_back({ + child_image.pool_id, "", child_image.pool_namespace, + child_image.image_id, "", false}); + if (!child_max_level || *child_max_level > 0) { + IoCtx ioctx; + r = util::create_ioctx(ictx->md_ctx, "child image", child_image.pool_id, + child_image.pool_namespace, &ioctx); + if (r == -ENOENT) { + continue; + } else if (r < 0) { + return r; + } + r = list_descendants(ioctx, child_image.image_id, child_max_level, + images); + if (r < 0) { + return r; + } + } + } + + // batch lookups by pool + namespace + std::sort(images->begin(), images->end(), compare_by_pool); + + int64_t child_pool_id = -1; + librados::IoCtx child_io_ctx; + std::map<std::string, std::pair<std::string, bool>> child_image_id_to_info; + for (auto& image : *images) { + if (child_pool_id == -1 || child_pool_id != image.pool_id || + child_io_ctx.get_namespace() != image.pool_namespace) { + r = util::create_ioctx(ictx->md_ctx, "child image", image.pool_id, + image.pool_namespace, &child_io_ctx); + if (r == -ENOENT) { + image.pool_name = ""; + image.image_name = ""; + continue; + } else if (r < 0) { + return r; + } + child_pool_id = image.pool_id; + + child_image_id_to_info.clear(); + + std::map<std::string, std::string> image_names_to_ids; + r = list_images_v2(child_io_ctx, &image_names_to_ids); + if (r < 0) { + lderr(cct) << "error listing v2 images: " << cpp_strerror(r) << dendl; + return r; + } + + for (auto& [name, id] : image_names_to_ids) { + child_image_id_to_info.insert({id, {name, false}}); + } + + std::vector<librbd::trash_image_info_t> trash_images; + r = Trash<I>::list(child_io_ctx, trash_images, false); + if (r < 0 && r != -EOPNOTSUPP) { + lderr(cct) << "error listing trash images: " << cpp_strerror(r) + << dendl; + return r; + } + + for (auto& it : trash_images) { + child_image_id_to_info.insert({ + it.id, + {it.name, + it.source == RBD_TRASH_IMAGE_SOURCE_REMOVING ? false : true}}); + } + } + + auto it = child_image_id_to_info.find(image.image_id); + if (it == child_image_id_to_info.end()) { + lderr(cct) << "error looking up name for image id " + << image.image_id << " in pool " + << child_io_ctx.get_pool_name() + << (image.pool_namespace.empty() ? + "" : "/" + image.pool_namespace) << dendl; + return -ENOENT; + } + + image.pool_name = child_io_ctx.get_pool_name(); + image.image_name = it->second.first; + image.trash = it->second.second; + } + + // final sort by pool + image names + std::sort(images->begin(), images->end(), compare); + return 0; +} + +template <typename I> +int Image<I>::deep_copy(I *src, librados::IoCtx& dest_md_ctx, + const char *destname, ImageOptions& opts, + ProgressContext &prog_ctx) { + CephContext *cct = (CephContext *)dest_md_ctx.cct(); + ldout(cct, 20) << src->name + << (src->snap_name.length() ? "@" + src->snap_name : "") + << " -> " << destname << " opts = " << opts << dendl; + + uint64_t features; + uint64_t src_size; + { + RWLock::RLocker snap_locker(src->snap_lock); + + if (!src->migration_info.empty()) { + lderr(cct) << "cannot deep copy migrating image" << dendl; + return -EBUSY; + } + + features = src->features; + src_size = src->get_image_size(src->snap_id); + } + uint64_t format = 2; + if (opts.get(RBD_IMAGE_OPTION_FORMAT, &format) != 0) { + opts.set(RBD_IMAGE_OPTION_FORMAT, format); + } + if (format == 1) { + lderr(cct) << "old format not supported for destination image" << dendl; + return -EINVAL; + } + uint64_t stripe_unit = src->stripe_unit; + if (opts.get(RBD_IMAGE_OPTION_STRIPE_UNIT, &stripe_unit) != 0) { + opts.set(RBD_IMAGE_OPTION_STRIPE_UNIT, stripe_unit); + } + uint64_t stripe_count = src->stripe_count; + if (opts.get(RBD_IMAGE_OPTION_STRIPE_COUNT, &stripe_count) != 0) { + opts.set(RBD_IMAGE_OPTION_STRIPE_COUNT, stripe_count); + } + uint64_t order = src->order; + if (opts.get(RBD_IMAGE_OPTION_ORDER, &order) != 0) { + opts.set(RBD_IMAGE_OPTION_ORDER, order); + } + if (opts.get(RBD_IMAGE_OPTION_FEATURES, &features) != 0) { + opts.set(RBD_IMAGE_OPTION_FEATURES, features); + } + if (features & ~RBD_FEATURES_ALL) { + lderr(cct) << "librbd does not support requested features" << dendl; + return -ENOSYS; + } + + uint64_t flatten = 0; + if (opts.get(RBD_IMAGE_OPTION_FLATTEN, &flatten) == 0) { + opts.unset(RBD_IMAGE_OPTION_FLATTEN); + } + + cls::rbd::ParentImageSpec parent_spec; + if (flatten > 0) { + parent_spec.pool_id = -1; + } else { + RWLock::RLocker snap_locker(src->snap_lock); + RWLock::RLocker parent_locker(src->parent_lock); + + // use oldest snapshot or HEAD for parent spec + if (!src->snap_info.empty()) { + parent_spec = src->snap_info.begin()->second.parent.spec; + } else { + parent_spec = src->parent_md.spec; + } + } + + int r; + if (parent_spec.pool_id == -1) { + r = create(dest_md_ctx, destname, "", src_size, opts, "", "", false); + } else { + librados::IoCtx parent_io_ctx; + r = util::create_ioctx(src->md_ctx, "parent image", parent_spec.pool_id, + parent_spec.pool_namespace, &parent_io_ctx); + if (r < 0) { + return r; + } + + ConfigProxy config{cct->_conf}; + api::Config<I>::apply_pool_overrides(dest_md_ctx, &config); + + C_SaferCond ctx; + std::string dest_id = util::generate_image_id(dest_md_ctx); + auto *req = image::CloneRequest<I>::create( + config, parent_io_ctx, parent_spec.image_id, "", parent_spec.snap_id, + dest_md_ctx, destname, dest_id, opts, "", "", src->op_work_queue, &ctx); + req->send(); + r = ctx.wait(); + } + if (r < 0) { + lderr(cct) << "header creation failed" << dendl; + return r; + } + opts.set(RBD_IMAGE_OPTION_ORDER, static_cast<uint64_t>(order)); + + auto dest = new I(destname, "", nullptr, dest_md_ctx, false); + r = dest->state->open(0); + if (r < 0) { + lderr(cct) << "failed to read newly created header" << dendl; + return r; + } + + C_SaferCond lock_ctx; + { + RWLock::WLocker locker(dest->owner_lock); + + if (dest->exclusive_lock == nullptr || + dest->exclusive_lock->is_lock_owner()) { + lock_ctx.complete(0); + } else { + dest->exclusive_lock->acquire_lock(&lock_ctx); + } + } + + r = lock_ctx.wait(); + if (r < 0) { + lderr(cct) << "failed to request exclusive lock: " << cpp_strerror(r) + << dendl; + dest->state->close(); + return r; + } + + r = deep_copy(src, dest, flatten > 0, prog_ctx); + + int close_r = dest->state->close(); + if (r == 0 && close_r < 0) { + r = close_r; + } + return r; +} + +template <typename I> +int Image<I>::deep_copy(I *src, I *dest, bool flatten, + ProgressContext &prog_ctx) { + CephContext *cct = src->cct; + librados::snap_t snap_id_start = 0; + librados::snap_t snap_id_end; + { + RWLock::RLocker snap_locker(src->snap_lock); + snap_id_end = src->snap_id; + } + + ThreadPool *thread_pool; + ContextWQ *op_work_queue; + ImageCtx::get_thread_pool_instance(cct, &thread_pool, &op_work_queue); + + C_SaferCond cond; + SnapSeqs snap_seqs; + auto req = DeepCopyRequest<I>::create(src, dest, snap_id_start, snap_id_end, + 0U, flatten, boost::none, op_work_queue, + &snap_seqs, &prog_ctx, &cond); + req->send(); + int r = cond.wait(); + if (r < 0) { + return r; + } + + return 0; +} + +template <typename I> +int Image<I>::snap_set(I *ictx, + const cls::rbd::SnapshotNamespace &snap_namespace, + const char *snap_name) { + ldout(ictx->cct, 20) << "snap_set " << ictx << " snap = " + << (snap_name ? snap_name : "NULL") << dendl; + + // ignore return value, since we may be set to a non-existent + // snapshot and the user is trying to fix that + ictx->state->refresh_if_required(); + + uint64_t snap_id = CEPH_NOSNAP; + std::string name(snap_name == nullptr ? "" : snap_name); + if (!name.empty()) { + RWLock::RLocker snap_locker(ictx->snap_lock); + snap_id = ictx->get_snap_id(cls::rbd::UserSnapshotNamespace{}, + snap_name); + if (snap_id == CEPH_NOSNAP) { + return -ENOENT; + } + } + + return snap_set(ictx, snap_id); +} + +template <typename I> +int Image<I>::snap_set(I *ictx, uint64_t snap_id) { + ldout(ictx->cct, 20) << "snap_set " << ictx << " " + << "snap_id=" << snap_id << dendl; + + // ignore return value, since we may be set to a non-existent + // snapshot and the user is trying to fix that + ictx->state->refresh_if_required(); + + C_SaferCond ctx; + ictx->state->snap_set(snap_id, &ctx); + int r = ctx.wait(); + if (r < 0) { + if (r != -ENOENT) { + lderr(ictx->cct) << "failed to " << (snap_id == CEPH_NOSNAP ? "un" : "") + << "set snapshot: " << cpp_strerror(r) << dendl; + } + return r; + } + + return 0; +} + +template <typename I> +int Image<I>::remove(IoCtx& io_ctx, const std::string &image_name, + ProgressContext& prog_ctx) +{ + CephContext *cct((CephContext *)io_ctx.cct()); + ldout(cct, 20) << "name=" << image_name << dendl; + + // look up the V2 image id based on the image name + std::string image_id; + int r = cls_client::dir_get_id(&io_ctx, RBD_DIRECTORY, image_name, + &image_id); + if (r == -ENOENT) { + // check if it already exists in trash from an aborted trash remove attempt + std::vector<trash_image_info_t> trash_entries; + r = Trash<I>::list(io_ctx, trash_entries, false); + if (r < 0) { + return r; + } else if (r >= 0) { + for (auto& entry : trash_entries) { + if (entry.name == image_name && + entry.source == RBD_TRASH_IMAGE_SOURCE_REMOVING) { + return Trash<I>::remove(io_ctx, entry.id, true, prog_ctx); + } + } + } + + // fall-through if we failed to locate the image in the V2 directory and + // trash + } else if (r < 0) { + lderr(cct) << "failed to retrieve image id: " << cpp_strerror(r) << dendl; + return r; + } else { + // attempt to move the image to the trash (and optionally immediately + // delete the image) + ConfigProxy config(cct->_conf); + Config<I>::apply_pool_overrides(io_ctx, &config); + + rbd_trash_image_source_t trash_image_source = + RBD_TRASH_IMAGE_SOURCE_REMOVING; + uint64_t expire_seconds = 0; + if (config.get_val<bool>("rbd_move_to_trash_on_remove")) { + // keep the image in the trash upon remove requests + trash_image_source = RBD_TRASH_IMAGE_SOURCE_USER; + expire_seconds = config.get_val<uint64_t>( + "rbd_move_to_trash_on_remove_expire_seconds"); + } else { + // attempt to pre-validate the removal before moving to trash and + // removing + r = pre_remove_image<I>(io_ctx, image_id); + if (r < 0 && r != -ENOENT) { + return r; + } + } + + r = Trash<I>::move(io_ctx, trash_image_source, image_name, image_id, + expire_seconds); + if (r >= 0) { + if (trash_image_source == RBD_TRASH_IMAGE_SOURCE_REMOVING) { + // proceed with attempting to immediately remove the image + r = Trash<I>::remove(io_ctx, image_id, true, prog_ctx); + + if (r == -ENOTEMPTY || r == -EBUSY || r == -EMLINK) { + // best-effort try to restore the image if the removal + // failed for possible expected reasons + Trash<I>::restore(io_ctx, {cls::rbd::TRASH_IMAGE_SOURCE_REMOVING}, + image_id, image_name); + } + } + return r; + } else if (r < 0 && r != -EOPNOTSUPP) { + return r; + } + + // fall-through if trash isn't supported + } + + ThreadPool *thread_pool; + ContextWQ *op_work_queue; + ImageCtx::get_thread_pool_instance(cct, &thread_pool, &op_work_queue); + + // might be a V1 image format that cannot be moved to the trash + // and would not have been listed in the V2 directory -- or the OSDs + // are too old and don't support the trash feature + C_SaferCond cond; + auto req = librbd::image::RemoveRequest<I>::create( + io_ctx, image_name, "", false, false, prog_ctx, op_work_queue, &cond); + req->send(); + + return cond.wait(); +} + +} // namespace api +} // namespace librbd + +template class librbd::api::Image<librbd::ImageCtx>; diff --git a/src/librbd/api/Image.h b/src/librbd/api/Image.h new file mode 100644 index 00000000..af928c84 --- /dev/null +++ b/src/librbd/api/Image.h @@ -0,0 +1,78 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef LIBRBD_API_IMAGE_H +#define LIBRBD_API_IMAGE_H + +#include "include/rbd/librbd.hpp" +#include "include/rados/librados_fwd.hpp" +#include "librbd/Types.h" +#include <map> +#include <set> +#include <string> + +namespace librbd { + +class ImageOptions; +class ProgressContext; + +struct ImageCtx; + +namespace api { + +template <typename ImageCtxT = librbd::ImageCtx> +struct Image { + typedef std::map<std::string, std::string> ImageNameToIds; + + static int64_t get_data_pool_id(ImageCtxT *ictx); + + static int get_op_features(ImageCtxT *ictx, uint64_t *op_features); + + static int list_images(librados::IoCtx& io_ctx, + std::vector<image_spec_t> *images); + static int list_images_v2(librados::IoCtx& io_ctx, + ImageNameToIds *images); + + static int get_parent(ImageCtxT *ictx, + librbd::linked_image_spec_t *parent_image, + librbd::snap_spec_t *parent_snap); + + static int list_children(ImageCtxT *ictx, + std::vector<librbd::linked_image_spec_t> *images); + static int list_children(ImageCtxT *ictx, + const cls::rbd::ParentImageSpec &parent_spec, + std::vector<librbd::linked_image_spec_t> *images); + + static int list_descendants(IoCtx& io_ctx, const std::string &image_id, + const std::optional<size_t> &max_level, + std::vector<librbd::linked_image_spec_t> *images); + static int list_descendants(ImageCtxT *ictx, + const std::optional<size_t> &max_level, + std::vector<librbd::linked_image_spec_t> *images); + static int list_descendants(ImageCtxT *ictx, + const cls::rbd::ParentImageSpec &parent_spec, + const std::optional<size_t> &max_level, + std::vector<librbd::linked_image_spec_t> *images); + + static int deep_copy(ImageCtxT *ictx, librados::IoCtx& dest_md_ctx, + const char *destname, ImageOptions& opts, + ProgressContext &prog_ctx); + static int deep_copy(ImageCtxT *src, ImageCtxT *dest, bool flatten, + ProgressContext &prog_ctx); + + static int snap_set(ImageCtxT *ictx, + const cls::rbd::SnapshotNamespace &snap_namespace, + const char *snap_name); + static int snap_set(ImageCtxT *ictx, uint64_t snap_id); + + static int remove(librados::IoCtx& io_ctx, const std::string &image_name, + ProgressContext& prog_ctx); + +}; + +} // namespace api +} // namespace librbd + +extern template class librbd::api::Image<librbd::ImageCtx>; + +#endif // LIBRBD_API_IMAGE_H diff --git a/src/librbd/api/Migration.cc b/src/librbd/api/Migration.cc new file mode 100644 index 00000000..ae09d407 --- /dev/null +++ b/src/librbd/api/Migration.cc @@ -0,0 +1,1885 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/api/Migration.h" +#include "include/rados/librados.hpp" +#include "include/stringify.h" +#include "common/dout.h" +#include "common/errno.h" +#include "cls/rbd/cls_rbd_client.h" +#include "librbd/ExclusiveLock.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/Operations.h" +#include "librbd/Utils.h" +#include "librbd/api/Config.h" +#include "librbd/api/Group.h" +#include "librbd/api/Image.h" +#include "librbd/api/Snapshot.h" +#include "librbd/api/Trash.h" +#include "librbd/deep_copy/ImageCopyRequest.h" +#include "librbd/deep_copy/MetadataCopyRequest.h" +#include "librbd/deep_copy/SnapshotCopyRequest.h" +#include "librbd/exclusive_lock/Policy.h" +#include "librbd/image/AttachChildRequest.h" +#include "librbd/image/AttachParentRequest.h" +#include "librbd/image/CloneRequest.h" +#include "librbd/image/CreateRequest.h" +#include "librbd/image/DetachChildRequest.h" +#include "librbd/image/DetachParentRequest.h" +#include "librbd/image/ListWatchersRequest.h" +#include "librbd/image/RemoveRequest.h" +#include "librbd/internal.h" +#include "librbd/io/ImageRequestWQ.h" +#include "librbd/mirror/DisableRequest.h" +#include "librbd/mirror/EnableRequest.h" + +#include <boost/scope_exit.hpp> + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::Migration: " << __func__ << ": " + +namespace librbd { + +inline bool operator==(const linked_image_spec_t& rhs, + const linked_image_spec_t& lhs) { + bool result = (rhs.pool_id == lhs.pool_id && + rhs.pool_namespace == lhs.pool_namespace && + rhs.image_id == lhs.image_id); + return result; +} + +namespace api { + +using util::create_rados_callback; + +namespace { + +class MigrationProgressContext : public ProgressContext { +public: + MigrationProgressContext(librados::IoCtx& io_ctx, + const std::string &header_oid, + cls::rbd::MigrationState state, + ProgressContext *prog_ctx) + : m_io_ctx(io_ctx), m_header_oid(header_oid), m_state(state), + m_prog_ctx(prog_ctx), m_cct(reinterpret_cast<CephContext*>(io_ctx.cct())), + m_lock(util::unique_lock_name("librbd::api::MigrationProgressContext", + this)) { + ceph_assert(m_prog_ctx != nullptr); + } + + ~MigrationProgressContext() { + wait_for_in_flight_updates(); + } + + int update_progress(uint64_t offset, uint64_t total) override { + ldout(m_cct, 20) << "offset=" << offset << ", total=" << total << dendl; + + m_prog_ctx->update_progress(offset, total); + + std::string description = stringify(offset * 100 / total) + "% complete"; + + send_state_description_update(description); + + return 0; + } + +private: + librados::IoCtx& m_io_ctx; + std::string m_header_oid; + cls::rbd::MigrationState m_state; + ProgressContext *m_prog_ctx; + + CephContext* m_cct; + mutable Mutex m_lock; + Cond m_cond; + std::string m_state_description; + bool m_pending_update = false; + int m_in_flight_state_updates = 0; + + void send_state_description_update(const std::string &description) { + Mutex::Locker locker(m_lock); + + if (description == m_state_description) { + return; + } + + m_state_description = description; + + if (m_in_flight_state_updates > 0) { + m_pending_update = true; + return; + } + + set_state_description(); + } + + void set_state_description() { + ldout(m_cct, 20) << "state_description=" << m_state_description << dendl; + + ceph_assert(m_lock.is_locked()); + + librados::ObjectWriteOperation op; + cls_client::migration_set_state(&op, m_state, m_state_description); + + using klass = MigrationProgressContext; + librados::AioCompletion *comp = + create_rados_callback<klass, &klass::handle_set_state_description>(this); + int r = m_io_ctx.aio_operate(m_header_oid, comp, &op); + ceph_assert(r == 0); + comp->release(); + + m_in_flight_state_updates++; + } + + void handle_set_state_description(int r) { + ldout(m_cct, 20) << "r=" << r << dendl; + + Mutex::Locker locker(m_lock); + + m_in_flight_state_updates--; + + if (r < 0) { + lderr(m_cct) << "failed to update migration state: " << cpp_strerror(r) + << dendl; + } else if (m_pending_update) { + set_state_description(); + m_pending_update = false; + } else { + m_cond.Signal(); + } + } + + void wait_for_in_flight_updates() { + Mutex::Locker locker(m_lock); + + ldout(m_cct, 20) << "m_in_flight_state_updates=" + << m_in_flight_state_updates << dendl; + + m_pending_update = false; + while (m_in_flight_state_updates > 0) { + m_cond.Wait(m_lock); + } + } +}; + +int trash_search(librados::IoCtx &io_ctx, rbd_trash_image_source_t source, + const std::string &image_name, std::string *image_id) { + std::vector<trash_image_info_t> entries; + + int r = Trash<>::list(io_ctx, entries, false); + if (r < 0) { + return r; + } + + for (auto &entry : entries) { + if (entry.source == source && entry.name == image_name) { + *image_id = entry.id; + return 0; + } + } + + return -ENOENT; +} + +template <typename I> +int open_source_image(librados::IoCtx& io_ctx, const std::string &image_name, + I **src_image_ctx, librados::IoCtx *dst_io_ctx, + std::string *dst_image_name, std::string *dst_image_id, + bool *flatten, bool *mirroring, + cls::rbd::MigrationState *state, + std::string *state_description) { + CephContext* cct = reinterpret_cast<CephContext *>(io_ctx.cct()); + + librados::IoCtx src_io_ctx; + std::string src_image_name; + std::string src_image_id; + cls::rbd::MigrationSpec migration_spec; + I *image_ctx = I::create(image_name, "", nullptr, io_ctx, false); + + ldout(cct, 10) << "trying to open image by name " << io_ctx.get_pool_name() + << "/" << image_name << dendl; + + int r = image_ctx->state->open(OPEN_FLAG_IGNORE_MIGRATING); + if (r < 0) { + if (r != -ENOENT) { + lderr(cct) << "failed to open image: " << cpp_strerror(r) << dendl; + return r; + } + image_ctx = nullptr; + } + + BOOST_SCOPE_EXIT_TPL(&r, &image_ctx) { + if (r != 0 && image_ctx != nullptr) { + image_ctx->state->close(); + } + } BOOST_SCOPE_EXIT_END; + + if (r == 0) { + // The opened image is either a source (then just proceed) or a + // destination (then look for the source image id in the migration + // header). + + r = cls_client::migration_get(&image_ctx->md_ctx, image_ctx->header_oid, + &migration_spec); + + if (r < 0) { + lderr(cct) << "failed retrieving migration header: " << cpp_strerror(r) + << dendl; + return r; + } + + ldout(cct, 10) << "migration spec: " << migration_spec << dendl; + + if (migration_spec.header_type != cls::rbd::MIGRATION_HEADER_TYPE_SRC && + migration_spec.header_type != cls::rbd::MIGRATION_HEADER_TYPE_DST) { + lderr(cct) << "unexpected migration header type: " + << migration_spec.header_type << dendl; + r = -EINVAL; + return r; + } + + if (migration_spec.header_type == cls::rbd::MIGRATION_HEADER_TYPE_DST) { + ldout(cct, 10) << "the destination image is opened" << dendl; + + // Close and look for the source image. + r = image_ctx->state->close(); + image_ctx = nullptr; + if (r < 0) { + lderr(cct) << "failed closing image: " << cpp_strerror(r) + << dendl; + return r; + } + + r = util::create_ioctx(io_ctx, "source image", migration_spec.pool_id, + migration_spec.pool_namespace, &src_io_ctx); + if (r < 0) { + return r; + } + + src_image_name = migration_spec.image_name; + src_image_id = migration_spec.image_id; + } else { + ldout(cct, 10) << "the source image is opened" << dendl; + } + } else { + assert (r == -ENOENT); + + ldout(cct, 10) << "source image is not found. Trying trash" << dendl; + + r = trash_search(io_ctx, RBD_TRASH_IMAGE_SOURCE_MIGRATION, image_name, + &src_image_id); + if (r < 0) { + lderr(cct) << "failed to determine image id: " << cpp_strerror(r) + << dendl; + return r; + } + + ldout(cct, 10) << "source image id from trash: " << src_image_id << dendl; + + src_io_ctx.dup(io_ctx); + } + + if (image_ctx == nullptr) { + int flags = OPEN_FLAG_IGNORE_MIGRATING; + + if (src_image_id.empty()) { + ldout(cct, 20) << "trying to open v1 image by name " + << src_io_ctx.get_pool_name() << "/" << src_image_name + << dendl; + + flags |= OPEN_FLAG_OLD_FORMAT; + } else { + ldout(cct, 20) << "trying to open v2 image by id " + << src_io_ctx.get_pool_name() << "/" << src_image_id + << dendl; + } + + image_ctx = I::create(src_image_name, src_image_id, nullptr, src_io_ctx, + false); + r = image_ctx->state->open(flags); + if (r < 0) { + lderr(cct) << "failed to open source image " << src_io_ctx.get_pool_name() + << "/" << (src_image_id.empty() ? src_image_name : src_image_id) + << ": " << cpp_strerror(r) << dendl; + image_ctx = nullptr; + return r; + } + + r = cls_client::migration_get(&image_ctx->md_ctx, image_ctx->header_oid, + &migration_spec); + if (r < 0) { + lderr(cct) << "failed retrieving migration header: " << cpp_strerror(r) + << dendl; + return r; + } + + ldout(cct, 20) << "migration spec: " << migration_spec << dendl; + } + + r = util::create_ioctx(image_ctx->md_ctx, "source image", + migration_spec.pool_id, migration_spec.pool_namespace, + dst_io_ctx); + if (r < 0) { + return r; + } + + *src_image_ctx = image_ctx; + *dst_image_name = migration_spec.image_name; + *dst_image_id = migration_spec.image_id; + *flatten = migration_spec.flatten; + *mirroring = migration_spec.mirroring; + *state = migration_spec.state; + *state_description = migration_spec.state_description; + + return 0; +} + +class SteppedProgressContext : public ProgressContext { +public: + SteppedProgressContext(ProgressContext* progress_ctx, size_t total_steps) + : m_progress_ctx(progress_ctx), m_total_steps(total_steps) { + } + + void next_step() { + ceph_assert(m_current_step < m_total_steps); + ++m_current_step; + } + + int update_progress(uint64_t object_number, + uint64_t object_count) override { + return m_progress_ctx->update_progress( + object_number + (object_count * (m_current_step - 1)), + object_count * m_total_steps); + } + +private: + ProgressContext* m_progress_ctx; + size_t m_total_steps; + size_t m_current_step = 1; +}; + +} // anonymous namespace + +template <typename I> +int Migration<I>::prepare(librados::IoCtx& io_ctx, + const std::string &image_name, + librados::IoCtx& dest_io_ctx, + const std::string &dest_image_name_, + ImageOptions& opts) { + CephContext* cct = reinterpret_cast<CephContext *>(io_ctx.cct()); + + std::string dest_image_name = dest_image_name_.empty() ? image_name : + dest_image_name_; + + ldout(cct, 10) << io_ctx.get_pool_name() << "/" << image_name << " -> " + << dest_io_ctx.get_pool_name() << "/" << dest_image_name + << ", opts=" << opts << dendl; + + auto image_ctx = I::create(image_name, "", nullptr, io_ctx, false); + int r = image_ctx->state->open(0); + if (r < 0) { + lderr(cct) << "failed to open image: " << cpp_strerror(r) << dendl; + return r; + } + BOOST_SCOPE_EXIT_TPL(image_ctx) { + image_ctx->state->close(); + } BOOST_SCOPE_EXIT_END; + + std::list<obj_watch_t> watchers; + int flags = librbd::image::LIST_WATCHERS_FILTER_OUT_MY_INSTANCE | + librbd::image::LIST_WATCHERS_FILTER_OUT_MIRROR_INSTANCES; + C_SaferCond on_list_watchers; + auto list_watchers_request = librbd::image::ListWatchersRequest<I>::create( + *image_ctx, flags, &watchers, &on_list_watchers); + list_watchers_request->send(); + r = on_list_watchers.wait(); + if (r < 0) { + lderr(cct) << "failed listing watchers:" << cpp_strerror(r) << dendl; + return r; + } + if (!watchers.empty()) { + lderr(cct) << "image has watchers - not migrating" << dendl; + return -EBUSY; + } + + uint64_t format = 2; + if (opts.get(RBD_IMAGE_OPTION_FORMAT, &format) != 0) { + opts.set(RBD_IMAGE_OPTION_FORMAT, format); + } + if (format != 2) { + lderr(cct) << "unsupported destination image format: " << format << dendl; + return -EINVAL; + } + + uint64_t features; + { + RWLock::RLocker snap_locker(image_ctx->snap_lock); + features = image_ctx->features; + } + opts.get(RBD_IMAGE_OPTION_FEATURES, &features); + if ((features & ~RBD_FEATURES_ALL) != 0) { + lderr(cct) << "librbd does not support requested features" << dendl; + return -ENOSYS; + } + features &= ~RBD_FEATURES_INTERNAL; + opts.set(RBD_IMAGE_OPTION_FEATURES, features); + + uint64_t order = image_ctx->order; + if (opts.get(RBD_IMAGE_OPTION_ORDER, &order) != 0) { + opts.set(RBD_IMAGE_OPTION_ORDER, order); + } + r = image::CreateRequest<I>::validate_order(cct, order); + if (r < 0) { + return r; + } + + uint64_t stripe_unit = image_ctx->stripe_unit; + if (opts.get(RBD_IMAGE_OPTION_STRIPE_UNIT, &stripe_unit) != 0) { + opts.set(RBD_IMAGE_OPTION_STRIPE_UNIT, stripe_unit); + } + uint64_t stripe_count = image_ctx->stripe_count; + if (opts.get(RBD_IMAGE_OPTION_STRIPE_COUNT, &stripe_count) != 0) { + opts.set(RBD_IMAGE_OPTION_STRIPE_COUNT, stripe_count); + } + + uint64_t flatten = 0; + if (opts.get(RBD_IMAGE_OPTION_FLATTEN, &flatten) == 0) { + opts.unset(RBD_IMAGE_OPTION_FLATTEN); + } + + ldout(cct, 20) << "updated opts=" << opts << dendl; + + Migration migration(image_ctx, dest_io_ctx, dest_image_name, "", opts, flatten > 0, + false, cls::rbd::MIGRATION_STATE_PREPARING, "", nullptr); + r = migration.prepare(); + + return r; +} + +template <typename I> +int Migration<I>::execute(librados::IoCtx& io_ctx, + const std::string &image_name, + ProgressContext &prog_ctx) { + CephContext* cct = reinterpret_cast<CephContext *>(io_ctx.cct()); + + ldout(cct, 10) << io_ctx.get_pool_name() << "/" << image_name << dendl; + + I *image_ctx; + librados::IoCtx dest_io_ctx; + std::string dest_image_name; + std::string dest_image_id; + bool flatten; + bool mirroring; + cls::rbd::MigrationState state; + std::string state_description; + + int r = open_source_image(io_ctx, image_name, &image_ctx, &dest_io_ctx, + &dest_image_name, &dest_image_id, &flatten, + &mirroring, &state, &state_description); + if (r < 0) { + return r; + } + + BOOST_SCOPE_EXIT_TPL(image_ctx) { + image_ctx->state->close(); + } BOOST_SCOPE_EXIT_END; + + if (state != cls::rbd::MIGRATION_STATE_PREPARED) { + lderr(cct) << "current migration state is '" << state << "'" + << " (should be 'prepared')" << dendl; + return -EINVAL; + } + + ldout(cct, 5) << "migrating " << image_ctx->md_ctx.get_pool_name() << "/" + << image_ctx->name << " -> " << dest_io_ctx.get_pool_name() + << "/" << dest_image_name << dendl; + + ImageOptions opts; + Migration migration(image_ctx, dest_io_ctx, dest_image_name, dest_image_id, + opts, flatten, mirroring, state, state_description, + &prog_ctx); + r = migration.execute(); + if (r < 0) { + return r; + } + + return 0; +} + +template <typename I> +int Migration<I>::abort(librados::IoCtx& io_ctx, const std::string &image_name, + ProgressContext &prog_ctx) { + CephContext* cct = reinterpret_cast<CephContext *>(io_ctx.cct()); + + ldout(cct, 10) << io_ctx.get_pool_name() << "/" << image_name << dendl; + + I *image_ctx; + librados::IoCtx dest_io_ctx; + std::string dest_image_name; + std::string dest_image_id; + bool flatten; + bool mirroring; + cls::rbd::MigrationState state; + std::string state_description; + + int r = open_source_image(io_ctx, image_name, &image_ctx, &dest_io_ctx, + &dest_image_name, &dest_image_id, &flatten, + &mirroring, &state, &state_description); + if (r < 0) { + return r; + } + + ldout(cct, 5) << "canceling incomplete migration " + << image_ctx->md_ctx.get_pool_name() << "/" << image_ctx->name + << " -> " << dest_io_ctx.get_pool_name() << "/" << dest_image_name + << dendl; + + ImageOptions opts; + Migration migration(image_ctx, dest_io_ctx, dest_image_name, dest_image_id, + opts, flatten, mirroring, state, state_description, + &prog_ctx); + r = migration.abort(); + + image_ctx->state->close(); + + if (r < 0) { + return r; + } + + return 0; +} + +template <typename I> +int Migration<I>::commit(librados::IoCtx& io_ctx, + const std::string &image_name, + ProgressContext &prog_ctx) { + CephContext* cct = reinterpret_cast<CephContext *>(io_ctx.cct()); + + ldout(cct, 10) << io_ctx.get_pool_name() << "/" << image_name << dendl; + + I *image_ctx; + librados::IoCtx dest_io_ctx; + std::string dest_image_name; + std::string dest_image_id; + bool flatten; + bool mirroring; + cls::rbd::MigrationState state; + std::string state_description; + + int r = open_source_image(io_ctx, image_name, &image_ctx, &dest_io_ctx, + &dest_image_name, &dest_image_id, &flatten, + &mirroring, &state, &state_description); + if (r < 0) { + return r; + } + + if (state != cls::rbd::MIGRATION_STATE_EXECUTED) { + lderr(cct) << "current migration state is '" << state << "'" + << " (should be 'executed')" << dendl; + image_ctx->state->close(); + return -EINVAL; + } + + ldout(cct, 5) << "migrating " << image_ctx->md_ctx.get_pool_name() << "/" + << image_ctx->name << " -> " << dest_io_ctx.get_pool_name() + << "/" << dest_image_name << dendl; + + ImageOptions opts; + Migration migration(image_ctx, dest_io_ctx, dest_image_name, dest_image_id, + opts, flatten, mirroring, state, state_description, + &prog_ctx); + r = migration.commit(); + + // image_ctx is closed in commit when removing src image + + if (r < 0) { + return r; + } + + return 0; +} + +template <typename I> +int Migration<I>::status(librados::IoCtx& io_ctx, + const std::string &image_name, + image_migration_status_t *status) { + CephContext* cct = reinterpret_cast<CephContext *>(io_ctx.cct()); + + ldout(cct, 10) << io_ctx.get_pool_name() << "/" << image_name << dendl; + + I *image_ctx; + librados::IoCtx dest_io_ctx; + std::string dest_image_name; + std::string dest_image_id; + bool flatten; + bool mirroring; + cls::rbd::MigrationState state; + std::string state_description; + + int r = open_source_image(io_ctx, image_name, &image_ctx, &dest_io_ctx, + &dest_image_name, &dest_image_id, &flatten, + &mirroring, &state, &state_description); + if (r < 0) { + return r; + } + + ldout(cct, 5) << "migrating " << image_ctx->md_ctx.get_pool_name() << "/" + << image_ctx->name << " -> " << dest_io_ctx.get_pool_name() + << "/" << dest_image_name << dendl; + + ImageOptions opts; + Migration migration(image_ctx, dest_io_ctx, dest_image_name, dest_image_id, + opts, flatten, mirroring, state, state_description, + nullptr); + r = migration.status(status); + + image_ctx->state->close(); + + if (r < 0) { + return r; + } + + return 0; +} + +template <typename I> +Migration<I>::Migration(I *src_image_ctx, librados::IoCtx& dst_io_ctx, + const std::string &dstname, + const std::string &dst_image_id, + ImageOptions& opts, bool flatten, bool mirroring, + cls::rbd::MigrationState state, + const std::string &state_description, + ProgressContext *prog_ctx) + : m_cct(static_cast<CephContext *>(dst_io_ctx.cct())), + m_src_image_ctx(src_image_ctx), m_dst_io_ctx(dst_io_ctx), + m_src_old_format(m_src_image_ctx->old_format), + m_src_image_name(m_src_image_ctx->old_format ? m_src_image_ctx->name : ""), + m_src_image_id(m_src_image_ctx->id), + m_src_header_oid(m_src_image_ctx->header_oid), m_dst_image_name(dstname), + m_dst_image_id(dst_image_id.empty() ? + util::generate_image_id(m_dst_io_ctx) : dst_image_id), + m_dst_header_oid(util::header_name(m_dst_image_id)), m_image_options(opts), + m_flatten(flatten), m_mirroring(mirroring), m_prog_ctx(prog_ctx), + m_src_migration_spec(cls::rbd::MIGRATION_HEADER_TYPE_SRC, + m_dst_io_ctx.get_id(), m_dst_io_ctx.get_namespace(), + m_dst_image_name, m_dst_image_id, {}, 0, flatten, + mirroring, state, state_description), + m_dst_migration_spec(cls::rbd::MIGRATION_HEADER_TYPE_DST, + src_image_ctx->md_ctx.get_id(), + src_image_ctx->md_ctx.get_namespace(), + m_src_image_ctx->name, m_src_image_ctx->id, {}, 0, + flatten, mirroring, state, state_description) { + m_src_io_ctx.dup(src_image_ctx->md_ctx); +} + +template <typename I> +int Migration<I>::prepare() { + ldout(m_cct, 10) << dendl; + + int r = validate_src_snaps(); + if (r < 0) { + return r; + } + + r = disable_mirroring(m_src_image_ctx, &m_mirroring); + if (r < 0) { + return r; + } + + r = unlink_src_image(); + if (r < 0) { + enable_mirroring(m_src_image_ctx, m_mirroring); + return r; + } + + r = set_migration(); + if (r < 0) { + relink_src_image(); + enable_mirroring(m_src_image_ctx, m_mirroring); + return r; + } + + r = create_dst_image(); + if (r < 0) { + abort(); + return r; + } + + ldout(m_cct, 10) << "succeeded" << dendl; + + return 0; +} + +template <typename I> +int Migration<I>::execute() { + ldout(m_cct, 10) << dendl; + + auto dst_image_ctx = I::create(m_dst_image_name, m_dst_image_id, nullptr, + m_dst_io_ctx, false); + int r = dst_image_ctx->state->open(0); + if (r < 0) { + lderr(m_cct) << "failed to open destination image: " << cpp_strerror(r) + << dendl; + return r; + } + + BOOST_SCOPE_EXIT_TPL(dst_image_ctx) { + dst_image_ctx->state->close(); + } BOOST_SCOPE_EXIT_END; + + r = set_state(cls::rbd::MIGRATION_STATE_EXECUTING, ""); + if (r < 0) { + return r; + } + + while (true) { + MigrationProgressContext prog_ctx(m_src_io_ctx, m_src_header_oid, + cls::rbd::MIGRATION_STATE_EXECUTING, + m_prog_ctx); + r = dst_image_ctx->operations->migrate(prog_ctx); + if (r == -EROFS) { + RWLock::RLocker owner_locker(dst_image_ctx->owner_lock); + if (dst_image_ctx->exclusive_lock != nullptr && + !dst_image_ctx->exclusive_lock->accept_ops()) { + ldout(m_cct, 5) << "lost exclusive lock, retrying remote" << dendl; + continue; + } + } + break; + } + if (r < 0) { + lderr(m_cct) << "migration failed: " << cpp_strerror(r) << dendl; + return r; + } + + r = set_state(cls::rbd::MIGRATION_STATE_EXECUTED, ""); + if (r < 0) { + return r; + } + + dst_image_ctx->notify_update(); + + ldout(m_cct, 10) << "succeeded" << dendl; + + return 0; +} + +template <typename I> +int Migration<I>::abort() { + ldout(m_cct, 10) << dendl; + + int r; + + m_src_image_ctx->owner_lock.get_read(); + if (m_src_image_ctx->exclusive_lock != nullptr && + !m_src_image_ctx->exclusive_lock->is_lock_owner()) { + C_SaferCond ctx; + m_src_image_ctx->exclusive_lock->acquire_lock(&ctx); + m_src_image_ctx->owner_lock.put_read(); + r = ctx.wait(); + if (r < 0) { + lderr(m_cct) << "error acquiring exclusive lock: " << cpp_strerror(r) + << dendl; + return r; + } + } else { + m_src_image_ctx->owner_lock.put_read(); + } + + group_info_t group_info; + group_info.pool = -1; + + auto dst_image_ctx = I::create(m_dst_image_name, m_dst_image_id, nullptr, + m_dst_io_ctx, false); + r = dst_image_ctx->state->open(OPEN_FLAG_IGNORE_MIGRATING); + if (r < 0) { + ldout(m_cct, 1) << "failed to open destination image: " << cpp_strerror(r) + << dendl; + } else { + BOOST_SCOPE_EXIT_TPL(&dst_image_ctx) { + if (dst_image_ctx != nullptr) { + dst_image_ctx->state->close(); + } + } BOOST_SCOPE_EXIT_END; + + std::list<obj_watch_t> watchers; + int flags = librbd::image::LIST_WATCHERS_FILTER_OUT_MY_INSTANCE | + librbd::image::LIST_WATCHERS_FILTER_OUT_MIRROR_INSTANCES; + C_SaferCond on_list_watchers; + auto list_watchers_request = librbd::image::ListWatchersRequest<I>::create( + *dst_image_ctx, flags, &watchers, &on_list_watchers); + list_watchers_request->send(); + r = on_list_watchers.wait(); + if (r < 0) { + lderr(m_cct) << "failed listing watchers:" << cpp_strerror(r) << dendl; + return r; + } + if (!watchers.empty()) { + lderr(m_cct) << "image has watchers - cannot abort migration" << dendl; + return -EBUSY; + } + + // ensure destination image is now read-only + r = set_state(cls::rbd::MIGRATION_STATE_ABORTING, ""); + if (r < 0) { + return r; + } + + // copy dst HEAD -> src HEAD + SteppedProgressContext progress_ctx(m_prog_ctx, 2); + revert_data(dst_image_ctx, m_src_image_ctx, &progress_ctx); + progress_ctx.next_step(); + + ldout(m_cct, 10) << "relinking children" << dendl; + r = relink_children(dst_image_ctx, m_src_image_ctx); + if (r < 0) { + return r; + } + + ldout(m_cct, 10) << "removing dst image snapshots" << dendl; + + std::vector<librbd::snap_info_t> snaps; + r = snap_list(dst_image_ctx, snaps); + if (r < 0) { + lderr(m_cct) << "failed listing snapshots: " << cpp_strerror(r) + << dendl; + return r; + } + + for (auto &snap : snaps) { + librbd::NoOpProgressContext prog_ctx; + int r = snap_remove(dst_image_ctx, snap.name.c_str(), + RBD_SNAP_REMOVE_UNPROTECT, prog_ctx); + if (r < 0) { + lderr(m_cct) << "failed removing snapshot: " << cpp_strerror(r) + << dendl; + return r; + } + } + + ldout(m_cct, 10) << "removing group" << dendl; + + r = remove_group(dst_image_ctx, &group_info); + if (r < 0 && r != -ENOENT) { + return r; + } + + ldout(m_cct, 10) << "removing dst image" << dendl; + + ceph_assert(dst_image_ctx->ignore_migrating); + + ThreadPool *thread_pool; + ContextWQ *op_work_queue; + ImageCtx::get_thread_pool_instance(m_cct, &thread_pool, &op_work_queue); + C_SaferCond on_remove; + auto req = librbd::image::RemoveRequest<>::create( + m_dst_io_ctx, dst_image_ctx, false, false, progress_ctx, op_work_queue, + &on_remove); + req->send(); + r = on_remove.wait(); + + dst_image_ctx = nullptr; + + if (r < 0) { + lderr(m_cct) << "failed removing destination image '" + << m_dst_io_ctx.get_pool_name() << "/" << m_dst_image_name + << " (" << m_dst_image_id << ")': " << cpp_strerror(r) + << dendl; + return r; + } + } + + r = relink_src_image(); + if (r < 0) { + return r; + } + + r = add_group(m_src_image_ctx, group_info); + if (r < 0) { + return r; + } + + r = remove_migration(m_src_image_ctx); + if (r < 0) { + return r; + } + + r = enable_mirroring(m_src_image_ctx, m_mirroring); + if (r < 0) { + return r; + } + + ldout(m_cct, 10) << "succeeded" << dendl; + + return 0; +} + +template <typename I> +int Migration<I>::commit() { + ldout(m_cct, 10) << dendl; + + BOOST_SCOPE_EXIT_TPL(&m_src_image_ctx) { + if (m_src_image_ctx != nullptr) { + m_src_image_ctx->state->close(); + } + } BOOST_SCOPE_EXIT_END; + + auto dst_image_ctx = I::create(m_dst_image_name, m_dst_image_id, nullptr, + m_dst_io_ctx, false); + int r = dst_image_ctx->state->open(0); + if (r < 0) { + lderr(m_cct) << "failed to open destination image: " << cpp_strerror(r) + << dendl; + return r; + } + + BOOST_SCOPE_EXIT_TPL(dst_image_ctx) { + dst_image_ctx->state->close(); + } BOOST_SCOPE_EXIT_END; + + r = remove_migration(dst_image_ctx); + if (r < 0) { + return r; + } + + r = remove_src_image(); + if (r < 0) { + return r; + } + + r = enable_mirroring(dst_image_ctx, m_mirroring); + if (r < 0) { + return r; + } + + ldout(m_cct, 10) << "succeeded" << dendl; + + return 0; +} + +template <typename I> +int Migration<I>::status(image_migration_status_t *status) { + ldout(m_cct, 10) << dendl; + + status->source_pool_id = m_dst_migration_spec.pool_id; + status->source_pool_namespace = m_dst_migration_spec.pool_namespace; + status->source_image_name = m_dst_migration_spec.image_name; + status->source_image_id = m_dst_migration_spec.image_id; + status->dest_pool_id = m_src_migration_spec.pool_id; + status->dest_pool_namespace = m_src_migration_spec.pool_namespace; + status->dest_image_name = m_src_migration_spec.image_name; + status->dest_image_id = m_src_migration_spec.image_id; + + switch (m_src_migration_spec.state) { + case cls::rbd::MIGRATION_STATE_ERROR: + status->state = RBD_IMAGE_MIGRATION_STATE_ERROR; + break; + case cls::rbd::MIGRATION_STATE_PREPARING: + status->state = RBD_IMAGE_MIGRATION_STATE_PREPARING; + break; + case cls::rbd::MIGRATION_STATE_PREPARED: + status->state = RBD_IMAGE_MIGRATION_STATE_PREPARED; + break; + case cls::rbd::MIGRATION_STATE_EXECUTING: + status->state = RBD_IMAGE_MIGRATION_STATE_EXECUTING; + break; + case cls::rbd::MIGRATION_STATE_EXECUTED: + status->state = RBD_IMAGE_MIGRATION_STATE_EXECUTED; + break; + default: + status->state = RBD_IMAGE_MIGRATION_STATE_UNKNOWN; + break; + } + + status->state_description = m_src_migration_spec.state_description; + + return 0; +} + +template <typename I> +int Migration<I>::set_state(cls::rbd::MigrationState state, + const std::string &description) { + int r = cls_client::migration_set_state(&m_src_io_ctx, m_src_header_oid, + state, description); + if (r < 0) { + lderr(m_cct) << "failed to set source migration header: " << cpp_strerror(r) + << dendl; + return r; + } + + r = cls_client::migration_set_state(&m_dst_io_ctx, m_dst_header_oid, state, + description); + if (r < 0) { + lderr(m_cct) << "failed to set destination migration header: " + << cpp_strerror(r) << dendl; + return r; + } + + return 0; +} + +template <typename I> +int Migration<I>::list_src_snaps(std::vector<librbd::snap_info_t> *snaps) { + ldout(m_cct, 10) << dendl; + + int r = snap_list(m_src_image_ctx, *snaps); + if (r < 0) { + lderr(m_cct) << "failed listing snapshots: " << cpp_strerror(r) << dendl; + return r; + } + + for (auto &snap : *snaps) { + librbd::snap_namespace_type_t namespace_type; + r = Snapshot<I>::get_namespace_type(m_src_image_ctx, snap.id, + &namespace_type); + if (r < 0) { + lderr(m_cct) << "error getting snap namespace type: " << cpp_strerror(r) + << dendl; + return r; + } + + if (namespace_type != RBD_SNAP_NAMESPACE_TYPE_USER) { + if (namespace_type == RBD_SNAP_NAMESPACE_TYPE_TRASH) { + lderr(m_cct) << "image has snapshots with linked clones that must be " + << "deleted or flattened before the image can be migrated" + << dendl; + } else { + lderr(m_cct) << "image has non-user type snapshots " + << "that are not supported by migration" << dendl; + } + return -EBUSY; + } + } + + return 0; +} + +template <typename I> +int Migration<I>::validate_src_snaps() { + ldout(m_cct, 10) << dendl; + + std::vector<librbd::snap_info_t> snaps; + int r = list_src_snaps(&snaps); + if (r < 0) { + return r; + } + + uint64_t dst_features = 0; + r = m_image_options.get(RBD_IMAGE_OPTION_FEATURES, &dst_features); + ceph_assert(r == 0); + + if (!m_src_image_ctx->test_features(RBD_FEATURE_LAYERING)) { + return 0; + } + + for (auto &snap : snaps) { + RWLock::RLocker snap_locker(m_src_image_ctx->snap_lock); + cls::rbd::ParentImageSpec parent_spec{m_src_image_ctx->md_ctx.get_id(), + m_src_image_ctx->md_ctx.get_namespace(), + m_src_image_ctx->id, snap.id}; + std::vector<librbd::linked_image_spec_t> child_images; + r = api::Image<I>::list_children(m_src_image_ctx, parent_spec, + &child_images); + if (r < 0) { + lderr(m_cct) << "failed listing children: " << cpp_strerror(r) + << dendl; + return r; + } + if (!child_images.empty()) { + ldout(m_cct, 1) << m_src_image_ctx->name << "@" << snap.name + << " has children" << dendl; + + if ((dst_features & RBD_FEATURE_LAYERING) == 0) { + lderr(m_cct) << "can't migrate to destination without layering feature: " + << "image has children" << dendl; + return -EINVAL; + } + } + } + + return 0; +} + + +template <typename I> +int Migration<I>::set_migration() { + ldout(m_cct, 10) << dendl; + + m_src_image_ctx->ignore_migrating = true; + + int r = cls_client::migration_set(&m_src_io_ctx, m_src_header_oid, + m_src_migration_spec); + if (r < 0) { + lderr(m_cct) << "failed to set migration header: " << cpp_strerror(r) + << dendl; + return r; + } + + m_src_image_ctx->notify_update(); + + return 0; +} + +template <typename I> +int Migration<I>::remove_migration(I *image_ctx) { + ldout(m_cct, 10) << dendl; + + int r; + + r = cls_client::migration_remove(&image_ctx->md_ctx, image_ctx->header_oid); + if (r == -ENOENT) { + r = 0; + } + if (r < 0) { + lderr(m_cct) << "failed removing migration header: " << cpp_strerror(r) + << dendl; + return r; + } + + image_ctx->notify_update(); + + return 0; +} + +template <typename I> +int Migration<I>::unlink_src_image() { + if (m_src_old_format) { + return v1_unlink_src_image(); + } else { + return v2_unlink_src_image(); + } +} + +template <typename I> +int Migration<I>::v1_unlink_src_image() { + ldout(m_cct, 10) << dendl; + + int r = tmap_rm(m_src_io_ctx, m_src_image_name); + if (r < 0) { + lderr(m_cct) << "failed removing " << m_src_image_name << " from tmap: " + << cpp_strerror(r) << dendl; + return r; + } + + return 0; +} + +template <typename I> +int Migration<I>::v2_unlink_src_image() { + ldout(m_cct, 10) << dendl; + + m_src_image_ctx->owner_lock.get_read(); + if (m_src_image_ctx->exclusive_lock != nullptr && + m_src_image_ctx->exclusive_lock->is_lock_owner()) { + C_SaferCond ctx; + m_src_image_ctx->exclusive_lock->release_lock(&ctx); + m_src_image_ctx->owner_lock.put_read(); + int r = ctx.wait(); + if (r < 0) { + lderr(m_cct) << "error releasing exclusive lock: " << cpp_strerror(r) + << dendl; + return r; + } + } else { + m_src_image_ctx->owner_lock.put_read(); + } + + int r = Trash<I>::move(m_src_io_ctx, RBD_TRASH_IMAGE_SOURCE_MIGRATION, + m_src_image_ctx->name, 0); + if (r < 0) { + lderr(m_cct) << "failed moving image to trash: " << cpp_strerror(r) + << dendl; + return r; + } + + return 0; +} + +template <typename I> +int Migration<I>::relink_src_image() { + if (m_src_old_format) { + return v1_relink_src_image(); + } else { + return v2_relink_src_image(); + } +} + +template <typename I> +int Migration<I>::v1_relink_src_image() { + ldout(m_cct, 10) << dendl; + + int r = tmap_set(m_src_io_ctx, m_src_image_name); + if (r < 0) { + lderr(m_cct) << "failed adding " << m_src_image_name << " to tmap: " + << cpp_strerror(r) << dendl; + return r; + } + + return 0; +} + +template <typename I> +int Migration<I>::v2_relink_src_image() { + ldout(m_cct, 10) << dendl; + + int r = Trash<I>::restore(m_src_io_ctx, + {cls::rbd::TRASH_IMAGE_SOURCE_MIGRATION}, + m_src_image_ctx->id, m_src_image_ctx->name); + if (r < 0) { + lderr(m_cct) << "failed restoring image from trash: " << cpp_strerror(r) + << dendl; + return r; + } + + return 0; +} + +template <typename I> +int Migration<I>::create_dst_image() { + ldout(m_cct, 10) << dendl; + + uint64_t size; + cls::rbd::ParentImageSpec parent_spec; + { + RWLock::RLocker snap_locker(m_src_image_ctx->snap_lock); + RWLock::RLocker parent_locker(m_src_image_ctx->parent_lock); + size = m_src_image_ctx->size; + + // use oldest snapshot or HEAD for parent spec + if (!m_src_image_ctx->snap_info.empty()) { + parent_spec = m_src_image_ctx->snap_info.begin()->second.parent.spec; + } else { + parent_spec = m_src_image_ctx->parent_md.spec; + } + } + + ThreadPool *thread_pool; + ContextWQ *op_work_queue; + ImageCtx::get_thread_pool_instance(m_cct, &thread_pool, &op_work_queue); + + ConfigProxy config{m_cct->_conf}; + api::Config<I>::apply_pool_overrides(m_dst_io_ctx, &config); + + int r; + C_SaferCond on_create; + librados::IoCtx parent_io_ctx; + if (parent_spec.pool_id == -1) { + auto *req = image::CreateRequest<I>::create( + config, m_dst_io_ctx, m_dst_image_name, m_dst_image_id, size, + m_image_options, "", "", true /* skip_mirror_enable */, op_work_queue, + &on_create); + req->send(); + } else { + r = util::create_ioctx(m_src_image_ctx->md_ctx, "destination image", + parent_spec.pool_id, parent_spec.pool_namespace, + &parent_io_ctx); + if (r < 0) { + return r; + } + + auto *req = image::CloneRequest<I>::create( + config, parent_io_ctx, parent_spec.image_id, "", parent_spec.snap_id, + m_dst_io_ctx, m_dst_image_name, m_dst_image_id, m_image_options, "", "", + op_work_queue, &on_create); + req->send(); + } + + r = on_create.wait(); + if (r < 0) { + lderr(m_cct) << "header creation failed: " << cpp_strerror(r) << dendl; + return r; + } + + auto dst_image_ctx = I::create(m_dst_image_name, m_dst_image_id, nullptr, + m_dst_io_ctx, false); + + r = dst_image_ctx->state->open(OPEN_FLAG_IGNORE_MIGRATING); + if (r < 0) { + lderr(m_cct) << "failed to open newly created header: " << cpp_strerror(r) + << dendl; + return r; + } + + BOOST_SCOPE_EXIT_TPL(dst_image_ctx) { + dst_image_ctx->state->close(); + } BOOST_SCOPE_EXIT_END; + + { + RWLock::RLocker owner_locker(dst_image_ctx->owner_lock); + r = dst_image_ctx->operations->prepare_image_update( + exclusive_lock::OPERATION_REQUEST_TYPE_GENERAL, true); + if (r < 0) { + lderr(m_cct) << "cannot obtain exclusive lock" << dendl; + return r; + } + if (dst_image_ctx->exclusive_lock != nullptr) { + dst_image_ctx->exclusive_lock->block_requests(0); + } + } + + SnapSeqs snap_seqs; + + C_SaferCond on_snapshot_copy; + auto snapshot_copy_req = librbd::deep_copy::SnapshotCopyRequest<I>::create( + m_src_image_ctx, dst_image_ctx, 0, CEPH_NOSNAP, 0, m_flatten, + m_src_image_ctx->op_work_queue, &snap_seqs, &on_snapshot_copy); + snapshot_copy_req->send(); + r = on_snapshot_copy.wait(); + if (r < 0) { + lderr(m_cct) << "failed to copy snapshots: " << cpp_strerror(r) << dendl; + return r; + } + + C_SaferCond on_metadata_copy; + auto metadata_copy_req = librbd::deep_copy::MetadataCopyRequest<I>::create( + m_src_image_ctx, dst_image_ctx, &on_metadata_copy); + metadata_copy_req->send(); + r = on_metadata_copy.wait(); + if (r < 0) { + lderr(m_cct) << "failed to copy metadata: " << cpp_strerror(r) << dendl; + return r; + } + + m_dst_migration_spec = {cls::rbd::MIGRATION_HEADER_TYPE_DST, + m_src_io_ctx.get_id(), m_src_io_ctx.get_namespace(), + m_src_image_name, m_src_image_id, snap_seqs, size, + m_flatten, m_mirroring, + cls::rbd::MIGRATION_STATE_PREPARING, ""}; + + r = cls_client::migration_set(&m_dst_io_ctx, m_dst_header_oid, + m_dst_migration_spec); + if (r < 0) { + lderr(m_cct) << "failed to set migration header: " << cpp_strerror(r) + << dendl; + return r; + } + + r = update_group(m_src_image_ctx, dst_image_ctx); + if (r < 0) { + return r; + } + + r = set_state(cls::rbd::MIGRATION_STATE_PREPARED, ""); + if (r < 0) { + return r; + } + + r = dst_image_ctx->state->refresh(); + if (r < 0) { + lderr(m_cct) << "failed to refresh destination image: " << cpp_strerror(r) + << dendl; + return r; + } + + r = relink_children(m_src_image_ctx, dst_image_ctx); + if (r < 0) { + return r; + } + + return 0; +} + +template <typename I> +int Migration<I>::remove_group(I *image_ctx, group_info_t *group_info) { + int r = librbd::api::Group<I>::image_get_group(image_ctx, group_info); + if (r < 0) { + lderr(m_cct) << "failed to get image group: " << cpp_strerror(r) << dendl; + return r; + } + + if (group_info->pool == -1) { + return -ENOENT; + } + + ceph_assert(!image_ctx->id.empty()); + + ldout(m_cct, 10) << dendl; + + IoCtx group_ioctx; + r = util::create_ioctx(image_ctx->md_ctx, "group", group_info->pool, {}, + &group_ioctx); + if (r < 0) { + return r; + } + + r = librbd::api::Group<I>::image_remove_by_id(group_ioctx, + group_info->name.c_str(), + image_ctx->md_ctx, + image_ctx->id.c_str()); + if (r < 0) { + lderr(m_cct) << "failed to remove image from group: " << cpp_strerror(r) + << dendl; + return r; + } + + return 0; +} + +template <typename I> +int Migration<I>::add_group(I *image_ctx, group_info_t &group_info) { + if (group_info.pool == -1) { + return 0; + } + + ldout(m_cct, 10) << dendl; + + IoCtx group_ioctx; + int r = util::create_ioctx(image_ctx->md_ctx, "group", group_info.pool, {}, + &group_ioctx); + if (r < 0) { + return r; + } + + r = librbd::api::Group<I>::image_add(group_ioctx, group_info.name.c_str(), + image_ctx->md_ctx, + image_ctx->name.c_str()); + if (r < 0) { + lderr(m_cct) << "failed to add image to group: " << cpp_strerror(r) + << dendl; + return r; + } + + return 0; +} + +template <typename I> +int Migration<I>::update_group(I *from_image_ctx, I *to_image_ctx) { + ldout(m_cct, 10) << dendl; + + group_info_t group_info; + + int r = remove_group(from_image_ctx, &group_info); + if (r < 0) { + return r == -ENOENT ? 0 : r; + } + + r = add_group(to_image_ctx, group_info); + if (r < 0) { + return r; + } + + return 0; +} + +template <typename I> +int Migration<I>::disable_mirroring(I *image_ctx, bool *was_enabled) { + *was_enabled = false; + + if (!image_ctx->test_features(RBD_FEATURE_JOURNALING)) { + return 0; + } + + cls::rbd::MirrorImage mirror_image; + int r = cls_client::mirror_image_get(&image_ctx->md_ctx, image_ctx->id, + &mirror_image); + if (r == -ENOENT) { + ldout(m_cct, 10) << "mirroring is not enabled for this image" << dendl; + return 0; + } + + if (r < 0) { + lderr(m_cct) << "failed to retrieve mirror image: " << cpp_strerror(r) + << dendl; + return r; + } + + if (mirror_image.state == cls::rbd::MIRROR_IMAGE_STATE_ENABLED) { + *was_enabled = true; + } + + ldout(m_cct, 10) << dendl; + + C_SaferCond ctx; + auto req = mirror::DisableRequest<I>::create(image_ctx, false, true, &ctx); + req->send(); + r = ctx.wait(); + if (r < 0) { + lderr(m_cct) << "failed to disable mirroring: " << cpp_strerror(r) + << dendl; + return r; + } + + m_src_migration_spec.mirroring = true; + + return 0; +} + +template <typename I> +int Migration<I>::enable_mirroring(I *image_ctx, bool was_enabled) { + + if (!image_ctx->test_features(RBD_FEATURE_JOURNALING)) { + return 0; + } + + cls::rbd::MirrorMode mirror_mode; + int r = cls_client::mirror_mode_get(&image_ctx->md_ctx, &mirror_mode); + if (r < 0 && r != -ENOENT) { + lderr(m_cct) << "failed to retrieve mirror mode: " << cpp_strerror(r) + << dendl; + return r; + } + + if (mirror_mode == cls::rbd::MIRROR_MODE_DISABLED) { + ldout(m_cct, 10) << "mirroring is not enabled for destination pool" + << dendl; + return 0; + } + if (mirror_mode == cls::rbd::MIRROR_MODE_IMAGE && !was_enabled) { + ldout(m_cct, 10) << "mirroring is not enabled for image" << dendl; + return 0; + } + + ldout(m_cct, 10) << dendl; + + C_SaferCond ctx; + auto req = mirror::EnableRequest<I>::create(image_ctx->md_ctx, image_ctx->id, + "", image_ctx->op_work_queue, + &ctx); + req->send(); + r = ctx.wait(); + if (r < 0) { + lderr(m_cct) << "failed to enable mirroring: " << cpp_strerror(r) + << dendl; + return r; + } + + return 0; +} + +// When relinking children we should be careful as it my be interrupted +// at any moment by some reason and we may end up in an inconsistent +// state, which we have to be able to fix with "migration abort". Below +// are all possible states during migration (P1 - sourse parent, P2 - +// destination parent, C - child): +// +// P1 P2 P1 P2 P1 P2 P1 P2 +// ^\ \ ^ \ /^ /^ +// \v v/ v/ v/ +// C C C C +// +// 1 2 3 4 +// +// (1) and (4) are the initial and the final consistent states. (2) +// and (3) are intermediate inconsistent states that have to be fixed +// by relink_children running in "migration abort" mode. For this, it +// scans P2 for all children attached and relinks (fixes) states (3) +// and (4) to state (1). Then it scans P1 for remaining children and +// fixes the states (2). + +template <typename I> +int Migration<I>::relink_children(I *from_image_ctx, I *to_image_ctx) { + ldout(m_cct, 10) << dendl; + + std::vector<librbd::snap_info_t> snaps; + int r = list_src_snaps(&snaps); + if (r < 0) { + return r; + } + + bool migration_abort = (to_image_ctx == m_src_image_ctx); + + for (auto it = snaps.begin(); it != snaps.end(); it++) { + auto &snap = *it; + std::vector<librbd::linked_image_spec_t> src_child_images; + + if (from_image_ctx != m_src_image_ctx) { + ceph_assert(migration_abort); + + // We run list snaps against the src image to get only those snapshots + // that are migrated. If the "from" image is not the src image + // (abort migration case), we need to remap snap ids. + // Also collect the list of the children currently attached to the + // source, so we could make a proper decision later about relinking. + + RWLock::RLocker src_snap_locker(to_image_ctx->snap_lock); + cls::rbd::ParentImageSpec src_parent_spec{to_image_ctx->md_ctx.get_id(), + to_image_ctx->md_ctx.get_namespace(), + to_image_ctx->id, snap.id}; + r = api::Image<I>::list_children(to_image_ctx, src_parent_spec, + &src_child_images); + if (r < 0) { + lderr(m_cct) << "failed listing children: " << cpp_strerror(r) + << dendl; + return r; + } + + RWLock::RLocker snap_locker(from_image_ctx->snap_lock); + snap.id = from_image_ctx->get_snap_id(cls::rbd::UserSnapshotNamespace(), + snap.name); + if (snap.id == CEPH_NOSNAP) { + ldout(m_cct, 5) << "skipping snapshot " << snap.name << dendl; + continue; + } + } + + std::vector<librbd::linked_image_spec_t> child_images; + { + RWLock::RLocker snap_locker(from_image_ctx->snap_lock); + cls::rbd::ParentImageSpec parent_spec{from_image_ctx->md_ctx.get_id(), + from_image_ctx->md_ctx.get_namespace(), + from_image_ctx->id, snap.id}; + r = api::Image<I>::list_children(from_image_ctx, parent_spec, + &child_images); + if (r < 0) { + lderr(m_cct) << "failed listing children: " << cpp_strerror(r) + << dendl; + return r; + } + } + + for (auto &child_image : child_images) { + r = relink_child(from_image_ctx, to_image_ctx, snap, child_image, + migration_abort, true); + if (r < 0) { + return r; + } + + src_child_images.erase(std::remove(src_child_images.begin(), + src_child_images.end(), child_image), + src_child_images.end()); + } + + for (auto &child_image : src_child_images) { + r = relink_child(from_image_ctx, to_image_ctx, snap, child_image, + migration_abort, false); + if (r < 0) { + return r; + } + } + } + + return 0; +} + +template <typename I> +int Migration<I>::relink_child(I *from_image_ctx, I *to_image_ctx, + const librbd::snap_info_t &from_snap, + const librbd::linked_image_spec_t &child_image, + bool migration_abort, bool reattach_child) { + ldout(m_cct, 10) << from_snap.name << " " << child_image.pool_name << "/" + << child_image.pool_namespace << "/" + << child_image.image_name << " (migration_abort=" + << migration_abort << ", reattach_child=" << reattach_child + << ")" << dendl; + + librados::snap_t to_snap_id; + { + RWLock::RLocker snap_locker(to_image_ctx->snap_lock); + to_snap_id = to_image_ctx->get_snap_id(cls::rbd::UserSnapshotNamespace(), + from_snap.name); + if (to_snap_id == CEPH_NOSNAP) { + lderr(m_cct) << "no snapshot " << from_snap.name << " on destination image" + << dendl; + return -ENOENT; + } + } + + librados::IoCtx child_io_ctx; + int r = util::create_ioctx(to_image_ctx->md_ctx, + "child image " + child_image.image_name, + child_image.pool_id, child_image.pool_namespace, + &child_io_ctx); + if (r < 0) { + return r; + } + + I *child_image_ctx = I::create("", child_image.image_id, nullptr, + child_io_ctx, false); + r = child_image_ctx->state->open(OPEN_FLAG_SKIP_OPEN_PARENT); + if (r < 0) { + lderr(m_cct) << "failed to open child image: " << cpp_strerror(r) << dendl; + return r; + } + BOOST_SCOPE_EXIT_TPL(child_image_ctx) { + child_image_ctx->state->close(); + } BOOST_SCOPE_EXIT_END; + + uint32_t clone_format = 1; + if (child_image_ctx->test_op_features(RBD_OPERATION_FEATURE_CLONE_CHILD)) { + clone_format = 2; + } + + cls::rbd::ParentImageSpec parent_spec; + uint64_t parent_overlap; + { + RWLock::RLocker snap_locker(child_image_ctx->snap_lock); + RWLock::RLocker parent_locker(child_image_ctx->parent_lock); + + // use oldest snapshot or HEAD for parent spec + if (!child_image_ctx->snap_info.empty()) { + parent_spec = child_image_ctx->snap_info.begin()->second.parent.spec; + parent_overlap = child_image_ctx->snap_info.begin()->second.parent.overlap; + } else { + parent_spec = child_image_ctx->parent_md.spec; + parent_overlap = child_image_ctx->parent_md.overlap; + } + } + + if (migration_abort && + parent_spec.pool_id == to_image_ctx->md_ctx.get_id() && + parent_spec.pool_namespace == to_image_ctx->md_ctx.get_namespace() && + parent_spec.image_id == to_image_ctx->id && + parent_spec.snap_id == to_snap_id) { + ldout(m_cct, 10) << "no need for parent re-attach" << dendl; + } else { + if (parent_spec.pool_id != from_image_ctx->md_ctx.get_id() || + parent_spec.pool_namespace != from_image_ctx->md_ctx.get_namespace() || + parent_spec.image_id != from_image_ctx->id || + parent_spec.snap_id != from_snap.id) { + lderr(m_cct) << "parent is not source image: " << parent_spec.pool_id + << "/" << parent_spec.pool_namespace << "/" + << parent_spec.image_id << "@" << parent_spec.snap_id + << dendl; + return -ESTALE; + } + + parent_spec.pool_id = to_image_ctx->md_ctx.get_id(); + parent_spec.pool_namespace = to_image_ctx->md_ctx.get_namespace(); + parent_spec.image_id = to_image_ctx->id; + parent_spec.snap_id = to_snap_id; + + C_SaferCond on_reattach_parent; + auto reattach_parent_req = image::AttachParentRequest<I>::create( + *child_image_ctx, parent_spec, parent_overlap, true, &on_reattach_parent); + reattach_parent_req->send(); + r = on_reattach_parent.wait(); + if (r < 0) { + lderr(m_cct) << "failed to re-attach parent: " << cpp_strerror(r) << dendl; + return r; + } + } + + if (reattach_child) { + C_SaferCond on_reattach_child; + auto reattach_child_req = image::AttachChildRequest<I>::create( + child_image_ctx, to_image_ctx, to_snap_id, from_image_ctx, from_snap.id, + clone_format, &on_reattach_child); + reattach_child_req->send(); + r = on_reattach_child.wait(); + if (r < 0) { + lderr(m_cct) << "failed to re-attach child: " << cpp_strerror(r) << dendl; + return r; + } + } + + child_image_ctx->notify_update(); + + return 0; +} + +template <typename I> +int Migration<I>::remove_src_image() { + ldout(m_cct, 10) << dendl; + + std::vector<librbd::snap_info_t> snaps; + int r = list_src_snaps(&snaps); + if (r < 0) { + return r; + } + + for (auto it = snaps.rbegin(); it != snaps.rend(); it++) { + auto &snap = *it; + + librbd::NoOpProgressContext prog_ctx; + int r = snap_remove(m_src_image_ctx, snap.name.c_str(), + RBD_SNAP_REMOVE_UNPROTECT, prog_ctx); + if (r < 0) { + lderr(m_cct) << "failed removing source image snapshot '" << snap.name + << "': " << cpp_strerror(r) << dendl; + return r; + } + } + + ceph_assert(m_src_image_ctx->ignore_migrating); + + ThreadPool *thread_pool; + ContextWQ *op_work_queue; + ImageCtx::get_thread_pool_instance(m_cct, &thread_pool, &op_work_queue); + C_SaferCond on_remove; + auto req = librbd::image::RemoveRequest<I>::create( + m_src_io_ctx, m_src_image_ctx, false, true, *m_prog_ctx, op_work_queue, + &on_remove); + req->send(); + r = on_remove.wait(); + + m_src_image_ctx = nullptr; + + // For old format image it will return -ENOENT due to expected + // tmap_rm failure at the end. + if (r < 0 && r != -ENOENT) { + lderr(m_cct) << "failed removing source image: " << cpp_strerror(r) + << dendl; + return r; + } + + if (!m_src_image_id.empty()) { + r = cls_client::trash_remove(&m_src_io_ctx, m_src_image_id); + if (r < 0 && r != -ENOENT) { + lderr(m_cct) << "error removing image " << m_src_image_id + << " from rbd_trash object" << dendl; + } + } + + return 0; +} + +template <typename I> +int Migration<I>::revert_data(I* src_image_ctx, I* dst_image_ctx, + ProgressContext* prog_ctx) { + ldout(m_cct, 10) << dendl; + + cls::rbd::MigrationSpec migration_spec; + int r = cls_client::migration_get(&src_image_ctx->md_ctx, + src_image_ctx->header_oid, + &migration_spec); + + if (r < 0) { + lderr(m_cct) << "failed retrieving migration header: " << cpp_strerror(r) + << dendl; + return r; + } + + if (migration_spec.header_type != cls::rbd::MIGRATION_HEADER_TYPE_DST) { + lderr(m_cct) << "unexpected migration header type: " + << migration_spec.header_type << dendl; + return -EINVAL; + } + + uint64_t src_snap_id_start = 0; + uint64_t src_snap_id_end = CEPH_NOSNAP; + uint64_t dst_snap_id_start = 0; + if (!migration_spec.snap_seqs.empty()) { + src_snap_id_start = migration_spec.snap_seqs.rbegin()->second; + } + + // we only care about the HEAD revision so only add a single mapping to + // represent the most recent state + SnapSeqs snap_seqs; + snap_seqs[CEPH_NOSNAP] = CEPH_NOSNAP; + + ldout(m_cct, 20) << "src_snap_id_start=" << src_snap_id_start << ", " + << "src_snap_id_end=" << src_snap_id_end << ", " + << "snap_seqs=" << snap_seqs << dendl; + + C_SaferCond ctx; + auto request = deep_copy::ImageCopyRequest<I>::create( + src_image_ctx, dst_image_ctx, src_snap_id_start, src_snap_id_end, + dst_snap_id_start, false, {}, snap_seqs, prog_ctx, &ctx); + request->send(); + + r = ctx.wait(); + if (r < 0) { + lderr(m_cct) << "error reverting destination image data blocks back to " + << "source image: " << cpp_strerror(r) << dendl; + return r; + } + + return 0; +} + +} // namespace api +} // namespace librbd + +template class librbd::api::Migration<librbd::ImageCtx>; diff --git a/src/librbd/api/Migration.h b/src/librbd/api/Migration.h new file mode 100644 index 00000000..52a4517b --- /dev/null +++ b/src/librbd/api/Migration.h @@ -0,0 +1,105 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_API_MIGRATION_H +#define CEPH_LIBRBD_API_MIGRATION_H + +#include "include/int_types.h" +#include "include/rados/librados_fwd.hpp" +#include "include/rbd/librbd.hpp" +#include "cls/rbd/cls_rbd_types.h" + +#include <vector> + +namespace librbd { + +class ImageCtx; + +namespace api { + +template <typename ImageCtxT = librbd::ImageCtx> +class Migration { +public: + static int prepare(librados::IoCtx& io_ctx, const std::string &image_name, + librados::IoCtx& dest_io_ctx, + const std::string &dest_image_name, ImageOptions& opts); + static int execute(librados::IoCtx& io_ctx, const std::string &image_name, + ProgressContext &prog_ctx); + static int abort(librados::IoCtx& io_ctx, const std::string &image_name, + ProgressContext &prog_ctx); + static int commit(librados::IoCtx& io_ctx, const std::string &image_name, + ProgressContext &prog_ctx); + static int status(librados::IoCtx& io_ctx, const std::string &image_name, + image_migration_status_t *status); + +private: + CephContext* m_cct; + ImageCtxT *m_src_image_ctx; + librados::IoCtx m_src_io_ctx; + librados::IoCtx &m_dst_io_ctx; + bool m_src_old_format; + std::string m_src_image_name; + std::string m_src_image_id; + std::string m_src_header_oid; + std::string m_dst_image_name; + std::string m_dst_image_id; + std::string m_dst_header_oid; + ImageOptions &m_image_options; + bool m_flatten; + bool m_mirroring; + ProgressContext *m_prog_ctx; + + cls::rbd::MigrationSpec m_src_migration_spec; + cls::rbd::MigrationSpec m_dst_migration_spec; + + Migration(ImageCtxT *image_ctx, librados::IoCtx& dest_io_ctx, + const std::string &dest_image_name, const std::string &dst_image_id, + ImageOptions& opts, bool flatten, bool mirroring, + cls::rbd::MigrationState state, const std::string &state_desc, + ProgressContext *prog_ctx); + + int prepare(); + int execute(); + int abort(); + int commit(); + int status(image_migration_status_t *status); + + int set_state(cls::rbd::MigrationState state, const std::string &description); + + int list_src_snaps(std::vector<librbd::snap_info_t> *snaps); + int validate_src_snaps(); + int disable_mirroring(ImageCtxT *image_ctx, bool *was_enabled); + int enable_mirroring(ImageCtxT *image_ctx, bool was_enabled); + int set_migration(); + int unlink_src_image(); + int relink_src_image(); + int create_dst_image(); + int remove_group(ImageCtxT *image_ctx, group_info_t *group_info); + int add_group(ImageCtxT *image_ctx, group_info_t &group_info); + int update_group(ImageCtxT *from_image_ctx, ImageCtxT *to_image_ctx); + int remove_migration(ImageCtxT *image_ctx); + int relink_children(ImageCtxT *from_image_ctx, ImageCtxT *to_image_ctx); + int remove_src_image(); + + int v1_set_migration(); + int v2_set_migration(); + int v1_unlink_src_image(); + int v2_unlink_src_image(); + int v1_relink_src_image(); + int v2_relink_src_image(); + + int relink_child(ImageCtxT *from_image_ctx, ImageCtxT *to_image_ctx, + const librbd::snap_info_t &src_snap, + const librbd::linked_image_spec_t &child_image, + bool migration_abort, bool reattach_child); + + int revert_data(ImageCtxT* src_image_ctx, ImageCtxT* dst_image_ctx, + ProgressContext *prog_ctx); +}; + +} // namespace api +} // namespace librbd + +extern template class librbd::api::Migration<librbd::ImageCtx>; + +#endif // CEPH_LIBRBD_API_MIGRATION_H diff --git a/src/librbd/api/Mirror.cc b/src/librbd/api/Mirror.cc new file mode 100644 index 00000000..4e7b7dc8 --- /dev/null +++ b/src/librbd/api/Mirror.cc @@ -0,0 +1,1541 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/api/Mirror.h" +#include "include/rados/librados.hpp" +#include "include/stringify.h" +#include "common/ceph_json.h" +#include "common/dout.h" +#include "common/errno.h" +#include "cls/rbd/cls_rbd_client.h" +#include "librbd/ExclusiveLock.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/Journal.h" +#include "librbd/Utils.h" +#include "librbd/api/Image.h" +#include "librbd/mirror/DemoteRequest.h" +#include "librbd/mirror/DisableRequest.h" +#include "librbd/mirror/EnableRequest.h" +#include "librbd/mirror/GetInfoRequest.h" +#include "librbd/mirror/GetStatusRequest.h" +#include "librbd/mirror/PromoteRequest.h" +#include "librbd/mirror/Types.h" +#include "librbd/MirroringWatcher.h" +#include <boost/algorithm/string/trim.hpp> +#include <boost/algorithm/string/replace.hpp> +#include <boost/scope_exit.hpp> + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::api::Mirror: " << __func__ << ": " + +namespace librbd { +namespace api { + +namespace { + +int get_config_key(librados::Rados& rados, const std::string& key, + std::string* value) { + std::string cmd = + "{" + "\"prefix\": \"config-key get\", " + "\"key\": \"" + key + "\"" + "}"; + + bufferlist in_bl; + bufferlist out_bl; + + int r = rados.mon_command(cmd, in_bl, &out_bl, nullptr); + if (r == -EINVAL) { + return -EOPNOTSUPP; + } else if (r < 0 && r != -ENOENT) { + return r; + } + + *value = out_bl.to_str(); + return 0; +} + +int set_config_key(librados::Rados& rados, const std::string& key, + const std::string& value) { + std::string cmd; + if (value.empty()) { + cmd = "{" + "\"prefix\": \"config-key rm\", " + "\"key\": \"" + key + "\"" + "}"; + } else { + cmd = "{" + "\"prefix\": \"config-key set\", " + "\"key\": \"" + key + "\", " + "\"val\": \"" + value + "\"" + "}"; + } + bufferlist in_bl; + bufferlist out_bl; + + int r = rados.mon_command(cmd, in_bl, &out_bl, nullptr); + if (r == -EINVAL) { + return -EOPNOTSUPP; + } else if (r < 0) { + return r; + } + + return 0; +} + +std::string get_peer_config_key_name(int64_t pool_id, + const std::string& peer_uuid) { + return RBD_MIRROR_PEER_CONFIG_KEY_PREFIX + stringify(pool_id) + "/" + + peer_uuid; +} + +int remove_peer_config_key(librados::IoCtx& io_ctx, + const std::string& peer_uuid) { + int64_t pool_id = io_ctx.get_id(); + auto key = get_peer_config_key_name(pool_id, peer_uuid); + + librados::Rados rados(io_ctx); + int r = set_config_key(rados, key, ""); + if (r < 0 && r != -ENOENT && r != -EPERM) { + return r; + } + return 0; +} + +int create_bootstrap_user(CephContext* cct, librados::Rados& rados, + std::string* peer_client_id, std::string* cephx_key) { + ldout(cct, 20) << dendl; + + // retrieve peer CephX user from config-key + int r = get_config_key(rados, RBD_MIRROR_PEER_CLIENT_ID_CONFIG_KEY, + peer_client_id); + if (r == -EACCES) { + ldout(cct, 5) << "insufficient permissions to get peer-client-id " + << "config-key" << dendl; + return r; + } else if (r < 0 && r != -ENOENT) { + lderr(cct) << "failed to retrieve peer client id key: " + << cpp_strerror(r) << dendl; + return r; + } else if (r == -ENOENT || peer_client_id->empty()) { + ldout(cct, 20) << "creating new peer-client-id config-key" << dendl; + + *peer_client_id = "rbd-mirror-peer"; + r = set_config_key(rados, RBD_MIRROR_PEER_CLIENT_ID_CONFIG_KEY, + *peer_client_id); + if (r == -EACCES) { + ldout(cct, 5) << "insufficient permissions to update peer-client-id " + << "config-key" << dendl; + return r; + } else if (r < 0) { + lderr(cct) << "failed to update peer client id key: " + << cpp_strerror(r) << dendl; + return r; + } + } + ldout(cct, 20) << "peer_client_id=" << *peer_client_id << dendl; + + // create peer client user + std::string cmd = + R"({)" \ + R"( "prefix": "auth get-or-create",)" \ + R"( "entity": "client.)" + *peer_client_id + R"(",)" \ + R"( "caps": [)" \ + R"( "mon", "profile rbd-mirror-peer",)" \ + R"( "osd", "profile rbd"],)" \ + R"( "format": "json")" \ + R"(})"; + + bufferlist in_bl; + bufferlist out_bl; + + r = rados.mon_command(cmd, in_bl, &out_bl, nullptr); + if (r == -EINVAL) { + ldout(cct, 5) << "caps mismatch for existing user" << dendl; + return -EEXIST; + } else if (r == -EACCES) { + ldout(cct, 5) << "insufficient permissions to create user" << dendl; + return r; + } else if (r < 0) { + lderr(cct) << "failed to create or update RBD mirroring bootstrap user: " + << cpp_strerror(r) << dendl; + return r; + } + + // extract key from response + bool json_valid = false; + json_spirit::mValue json_root; + if(json_spirit::read(out_bl.to_str(), json_root)) { + try { + auto& json_obj = json_root.get_array()[0].get_obj(); + *cephx_key = json_obj["key"].get_str(); + json_valid = true; + } catch (std::runtime_error&) { + } + } + + if (!json_valid) { + lderr(cct) << "invalid auth keyring JSON received" << dendl; + return -EBADMSG; + } + + return 0; +} + +int create_bootstrap_peer(CephContext* cct, librados::IoCtx& io_ctx, + const std::string& site_name, const std::string& fsid, + const std::string& client_id, const std::string& key, + const std::string& mon_host, + const std::string& cluster1, + const std::string& cluster2) { + ldout(cct, 20) << dendl; + + std::string peer_uuid; + std::vector<mirror_peer_t> peers; + int r = Mirror<>::peer_list(io_ctx, &peers); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "failed to list mirror peers: " << cpp_strerror(r) << dendl; + return r; + } + + if (peers.empty()) { + r = Mirror<>::peer_add(io_ctx, &peer_uuid, site_name, + "client." + client_id); + if (r < 0) { + lderr(cct) << "failed to add " << cluster1 << " peer to " + << cluster2 << " " << "cluster: " << cpp_strerror(r) << dendl; + return r; + } + } else if (peers[0].cluster_name != site_name && + peers[0].cluster_name != fsid) { + // only support a single peer + lderr(cct) << "multiple peers are not currently supported" << dendl; + return -EINVAL; + } else { + peer_uuid = peers[0].uuid; + + if (peers[0].cluster_name != site_name) { + r = Mirror<>::peer_set_cluster(io_ctx, peer_uuid, site_name); + if (r < 0) { + // non-fatal attempt to update site name + lderr(cct) << "failed to update peer site name" << dendl; + } + } + } + + Mirror<>::Attributes attributes { + {"mon_host", mon_host}, + {"key", key}}; + r = Mirror<>::peer_set_attributes(io_ctx, peer_uuid, attributes); + if (r < 0) { + lderr(cct) << "failed to update " << cluster1 << " cluster connection " + << "attributes in " << cluster2 << " cluster: " + << cpp_strerror(r) << dendl; + return r; + } + + return 0; +} + +template <typename I> +int validate_mirroring_enabled(I *ictx) { + CephContext *cct = ictx->cct; + cls::rbd::MirrorImage mirror_image_internal; + int r = cls_client::mirror_image_get(&ictx->md_ctx, ictx->id, + &mirror_image_internal); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "failed to retrieve mirroring state: " << cpp_strerror(r) + << dendl; + return r; + } else if (mirror_image_internal.state != + cls::rbd::MIRROR_IMAGE_STATE_ENABLED) { + lderr(cct) << "mirroring is not currently enabled" << dendl; + return -EINVAL; + } + return 0; +} + +int list_mirror_images(librados::IoCtx& io_ctx, + std::set<std::string>& mirror_image_ids) { + CephContext *cct = reinterpret_cast<CephContext *>(io_ctx.cct()); + + std::string last_read = ""; + int max_read = 1024; + int r; + do { + std::map<std::string, std::string> mirror_images; + r = cls_client::mirror_image_list(&io_ctx, last_read, max_read, + &mirror_images); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "error listing mirrored image directory: " + << cpp_strerror(r) << dendl; + return r; + } + for (auto it = mirror_images.begin(); it != mirror_images.end(); ++it) { + mirror_image_ids.insert(it->first); + } + if (!mirror_images.empty()) { + last_read = mirror_images.rbegin()->first; + } + r = mirror_images.size(); + } while (r == max_read); + + return 0; +} + +struct C_ImageGetInfo : public Context { + mirror_image_info_t *mirror_image_info; + Context *on_finish; + + cls::rbd::MirrorImage mirror_image; + mirror::PromotionState promotion_state = mirror::PROMOTION_STATE_PRIMARY; + + C_ImageGetInfo(mirror_image_info_t *mirror_image_info, Context *on_finish) + : mirror_image_info(mirror_image_info), on_finish(on_finish) { + } + + void finish(int r) override { + if (r < 0) { + on_finish->complete(r); + return; + } + + mirror_image_info->global_id = mirror_image.global_image_id; + mirror_image_info->state = static_cast<rbd_mirror_image_state_t>( + mirror_image.state); + mirror_image_info->primary = ( + promotion_state == mirror::PROMOTION_STATE_PRIMARY); + on_finish->complete(0); + } +}; + +struct C_ImageGetStatus : public C_ImageGetInfo { + std::string image_name; + mirror_image_status_t *mirror_image_status; + + cls::rbd::MirrorImageStatus mirror_image_status_internal; + + C_ImageGetStatus(const std::string &image_name, + mirror_image_status_t *mirror_image_status, + Context *on_finish) + : C_ImageGetInfo(&mirror_image_status->info, on_finish), + image_name(image_name), mirror_image_status(mirror_image_status) { + } + + void finish(int r) override { + if (r < 0) { + on_finish->complete(r); + return; + } + + mirror_image_status->name = image_name; + mirror_image_status->state = static_cast<mirror_image_status_state_t>( + mirror_image_status_internal.state); + mirror_image_status->description = mirror_image_status_internal.description; + mirror_image_status->last_update = + mirror_image_status_internal.last_update.sec(); + mirror_image_status->up = mirror_image_status_internal.up; + C_ImageGetInfo::finish(0); + } +}; + +} // anonymous namespace + +template <typename I> +int Mirror<I>::image_enable(I *ictx, bool relax_same_pool_parent_check) { + CephContext *cct = ictx->cct; + ldout(cct, 20) << "ictx=" << ictx << dendl; + + // TODO + if (!ictx->md_ctx.get_namespace().empty()) { + lderr(cct) << "namespaces are not supported" << dendl; + return -EINVAL; + } + + int r = ictx->state->refresh_if_required(); + if (r < 0) { + return r; + } + + cls::rbd::MirrorMode mirror_mode; + r = cls_client::mirror_mode_get(&ictx->md_ctx, &mirror_mode); + if (r < 0) { + lderr(cct) << "cannot enable mirroring: failed to retrieve mirror mode: " + << cpp_strerror(r) << dendl; + return r; + } + + if (mirror_mode != cls::rbd::MIRROR_MODE_IMAGE) { + lderr(cct) << "cannot enable mirroring in the current pool mirroring mode" + << dendl; + return -EINVAL; + } + + // is mirroring not enabled for the parent? + { + RWLock::RLocker l(ictx->parent_lock); + ImageCtx *parent = ictx->parent; + if (parent) { + if (relax_same_pool_parent_check && + parent->md_ctx.get_id() == ictx->md_ctx.get_id()) { + if (!parent->test_features(RBD_FEATURE_JOURNALING)) { + lderr(cct) << "journaling is not enabled for the parent" << dendl; + return -EINVAL; + } + } else { + cls::rbd::MirrorImage mirror_image_internal; + r = cls_client::mirror_image_get(&(parent->md_ctx), parent->id, + &mirror_image_internal); + if (r == -ENOENT) { + lderr(cct) << "mirroring is not enabled for the parent" << dendl; + return -EINVAL; + } + } + } + } + + if ((ictx->features & RBD_FEATURE_JOURNALING) == 0) { + lderr(cct) << "cannot enable mirroring: journaling is not enabled" << dendl; + return -EINVAL; + } + + C_SaferCond ctx; + auto req = mirror::EnableRequest<ImageCtx>::create(ictx, &ctx); + req->send(); + + r = ctx.wait(); + if (r < 0) { + lderr(cct) << "cannot enable mirroring: " << cpp_strerror(r) << dendl; + return r; + } + + return 0; +} + +template <typename I> +int Mirror<I>::image_disable(I *ictx, bool force) { + CephContext *cct = ictx->cct; + ldout(cct, 20) << "ictx=" << ictx << dendl; + + int r = ictx->state->refresh_if_required(); + if (r < 0) { + return r; + } + + cls::rbd::MirrorMode mirror_mode; + r = cls_client::mirror_mode_get(&ictx->md_ctx, &mirror_mode); + if (r < 0) { + lderr(cct) << "cannot disable mirroring: failed to retrieve pool " + "mirroring mode: " << cpp_strerror(r) << dendl; + return r; + } + + if (mirror_mode != cls::rbd::MIRROR_MODE_IMAGE) { + lderr(cct) << "cannot disable mirroring in the current pool mirroring " + "mode" << dendl; + return -EINVAL; + } + + // is mirroring enabled for the child? + cls::rbd::MirrorImage mirror_image_internal; + r = cls_client::mirror_image_get(&ictx->md_ctx, ictx->id, + &mirror_image_internal); + if (r == -ENOENT) { + // mirroring is not enabled for this image + ldout(cct, 20) << "ignoring disable command: mirroring is not enabled for " + << "this image" << dendl; + return 0; + } else if (r == -EOPNOTSUPP) { + ldout(cct, 5) << "mirroring not supported by OSD" << dendl; + return r; + } else if (r < 0) { + lderr(cct) << "failed to retrieve mirror image metadata: " + << cpp_strerror(r) << dendl; + return r; + } + + mirror_image_internal.state = cls::rbd::MIRROR_IMAGE_STATE_DISABLING; + r = cls_client::mirror_image_set(&ictx->md_ctx, ictx->id, + mirror_image_internal); + if (r < 0) { + lderr(cct) << "cannot disable mirroring: " << cpp_strerror(r) << dendl; + return r; + } else { + bool rollback = false; + BOOST_SCOPE_EXIT_ALL(ictx, &mirror_image_internal, &rollback) { + if (rollback) { + CephContext *cct = ictx->cct; + mirror_image_internal.state = cls::rbd::MIRROR_IMAGE_STATE_ENABLED; + int r = cls_client::mirror_image_set(&ictx->md_ctx, ictx->id, + mirror_image_internal); + if (r < 0) { + lderr(cct) << "failed to re-enable image mirroring: " + << cpp_strerror(r) << dendl; + } + } + }; + + { + RWLock::RLocker l(ictx->snap_lock); + map<librados::snap_t, SnapInfo> snap_info = ictx->snap_info; + for (auto &info : snap_info) { + cls::rbd::ParentImageSpec parent_spec{ictx->md_ctx.get_id(), + ictx->md_ctx.get_namespace(), + ictx->id, info.first}; + std::vector<librbd::linked_image_spec_t> child_images; + r = Image<I>::list_children(ictx, parent_spec, &child_images); + if (r < 0) { + rollback = true; + return r; + } + + if (child_images.empty()) { + continue; + } + + librados::IoCtx child_io_ctx; + int64_t child_pool_id = -1; + for (auto &child_image : child_images){ + std::string pool = child_image.pool_name; + if (child_pool_id == -1 || + child_pool_id != child_image.pool_id || + child_io_ctx.get_namespace() != child_image.pool_namespace) { + r = util::create_ioctx(ictx->md_ctx, "child image", + child_image.pool_id, + child_image.pool_namespace, + &child_io_ctx); + if (r < 0) { + rollback = true; + return r; + } + + child_pool_id = child_image.pool_id; + } + + cls::rbd::MirrorImage mirror_image_internal; + r = cls_client::mirror_image_get(&child_io_ctx, child_image.image_id, + &mirror_image_internal); + if (r != -ENOENT) { + rollback = true; + lderr(cct) << "mirroring is enabled on one or more children " + << dendl; + return -EBUSY; + } + } + } + } + + C_SaferCond ctx; + auto req = mirror::DisableRequest<ImageCtx>::create(ictx, force, true, + &ctx); + req->send(); + + r = ctx.wait(); + if (r < 0) { + lderr(cct) << "cannot disable mirroring: " << cpp_strerror(r) << dendl; + rollback = true; + return r; + } + } + + return 0; +} + +template <typename I> +int Mirror<I>::image_promote(I *ictx, bool force) { + CephContext *cct = ictx->cct; + + C_SaferCond ctx; + Mirror<I>::image_promote(ictx, force, &ctx); + int r = ctx.wait(); + if (r < 0) { + lderr(cct) << "failed to promote image" << dendl; + return r; + } + + return 0; +} + +template <typename I> +void Mirror<I>::image_promote(I *ictx, bool force, Context *on_finish) { + CephContext *cct = ictx->cct; + ldout(cct, 20) << "ictx=" << ictx << ", " + << "force=" << force << dendl; + + auto req = mirror::PromoteRequest<>::create(*ictx, force, on_finish); + req->send(); +} + +template <typename I> +int Mirror<I>::image_demote(I *ictx) { + CephContext *cct = ictx->cct; + + C_SaferCond ctx; + Mirror<I>::image_demote(ictx, &ctx); + int r = ctx.wait(); + if (r < 0) { + lderr(cct) << "failed to demote image" << dendl; + return r; + } + + return 0; +} + +template <typename I> +void Mirror<I>::image_demote(I *ictx, Context *on_finish) { + CephContext *cct = ictx->cct; + ldout(cct, 20) << "ictx=" << ictx << dendl; + + auto req = mirror::DemoteRequest<>::create(*ictx, on_finish); + req->send(); +} + +template <typename I> +int Mirror<I>::image_resync(I *ictx) { + CephContext *cct = ictx->cct; + ldout(cct, 20) << "ictx=" << ictx << dendl; + + int r = ictx->state->refresh_if_required(); + if (r < 0) { + return r; + } + + C_SaferCond tag_owner_ctx; + bool is_tag_owner; + Journal<I>::is_tag_owner(ictx, &is_tag_owner, &tag_owner_ctx); + r = tag_owner_ctx.wait(); + if (r < 0) { + lderr(cct) << "failed to determine tag ownership: " << cpp_strerror(r) + << dendl; + return r; + } else if (is_tag_owner) { + lderr(cct) << "image is primary, cannot resync to itself" << dendl; + return -EINVAL; + } + + // flag the journal indicating that we want to rebuild the local image + r = Journal<I>::request_resync(ictx); + if (r < 0) { + lderr(cct) << "failed to request resync: " << cpp_strerror(r) << dendl; + return r; + } + + return 0; +} + +template <typename I> +void Mirror<I>::image_get_info(I *ictx, mirror_image_info_t *mirror_image_info, + Context *on_finish) { + CephContext *cct = ictx->cct; + ldout(cct, 20) << "ictx=" << ictx << dendl; + + auto on_refresh = new FunctionContext( + [ictx, mirror_image_info, on_finish](int r) { + if (r < 0) { + lderr(ictx->cct) << "refresh failed: " << cpp_strerror(r) << dendl; + on_finish->complete(r); + return; + } + + auto ctx = new C_ImageGetInfo(mirror_image_info, on_finish); + auto req = mirror::GetInfoRequest<I>::create(*ictx, &ctx->mirror_image, + &ctx->promotion_state, + ctx); + req->send(); + }); + + if (ictx->state->is_refresh_required()) { + ictx->state->refresh(on_refresh); + } else { + on_refresh->complete(0); + } +} + +template <typename I> +int Mirror<I>::image_get_info(I *ictx, mirror_image_info_t *mirror_image_info) { + C_SaferCond ctx; + image_get_info(ictx, mirror_image_info, &ctx); + + int r = ctx.wait(); + if (r < 0) { + return r; + } + return 0; +} + +template <typename I> +void Mirror<I>::image_get_status(I *ictx, mirror_image_status_t *status, + Context *on_finish) { + CephContext *cct = ictx->cct; + ldout(cct, 20) << "ictx=" << ictx << dendl; + + auto ctx = new C_ImageGetStatus(ictx->name, status, on_finish); + auto req = mirror::GetStatusRequest<I>::create( + *ictx, &ctx->mirror_image_status_internal, &ctx->mirror_image, + &ctx->promotion_state, ctx); + req->send(); +} + +template <typename I> +int Mirror<I>::image_get_status(I *ictx, mirror_image_status_t *status) { + C_SaferCond ctx; + image_get_status(ictx, status, &ctx); + + int r = ctx.wait(); + if (r < 0) { + return r; + } + return 0; +} + +template <typename I> +int Mirror<I>::image_get_instance_id(I *ictx, std::string *instance_id) { + CephContext *cct = ictx->cct; + ldout(cct, 20) << "ictx=" << ictx << dendl; + + cls::rbd::MirrorImage mirror_image; + int r = cls_client::mirror_image_get(&ictx->md_ctx, ictx->id, &mirror_image); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "failed to retrieve mirroring state: " << cpp_strerror(r) + << dendl; + return r; + } else if (mirror_image.state != cls::rbd::MIRROR_IMAGE_STATE_ENABLED) { + lderr(cct) << "mirroring is not currently enabled" << dendl; + return -EINVAL; + } + + entity_inst_t instance; + r = cls_client::mirror_image_instance_get(&ictx->md_ctx, + mirror_image.global_image_id, + &instance); + if (r < 0) { + if (r != -ENOENT && r != -ESTALE) { + lderr(cct) << "failed to get mirror image instance: " << cpp_strerror(r) + << dendl; + } + return r; + } + + *instance_id = stringify(instance.name.num()); + return 0; +} + +template <typename I> +int Mirror<I>::site_name_get(librados::Rados& rados, std::string* name) { + CephContext *cct = reinterpret_cast<CephContext *>(rados.cct()); + ldout(cct, 20) << dendl; + + int r = get_config_key(rados, RBD_MIRROR_SITE_NAME_CONFIG_KEY, name); + if (r == -EOPNOTSUPP) { + return r; + } else if (r == -ENOENT || name->empty()) { + // default to the cluster fsid + r = rados.cluster_fsid(name); + if (r < 0) { + lderr(cct) << "failed to retrieve cluster fsid: " << cpp_strerror(r) + << dendl; + } + return r; + } else if (r < 0) { + lderr(cct) << "failed to retrieve site name: " << cpp_strerror(r) + << dendl; + return r; + } + + return 0; +} + +template <typename I> +int Mirror<I>::site_name_set(librados::Rados& rados, const std::string& name) { + CephContext *cct = reinterpret_cast<CephContext *>(rados.cct()); + + std::string site_name{name}; + boost::algorithm::trim(site_name); + ldout(cct, 20) << "site_name=" << site_name << dendl; + + int r = set_config_key(rados, RBD_MIRROR_SITE_NAME_CONFIG_KEY, name); + if (r == -EOPNOTSUPP) { + return r; + } else if (r < 0 && r != -ENOENT) { + lderr(cct) << "failed to update site name: " << cpp_strerror(r) + << dendl; + return r; + } + + return 0; +} + +template <typename I> +int Mirror<I>::mode_get(librados::IoCtx& io_ctx, + rbd_mirror_mode_t *mirror_mode) { + CephContext *cct = reinterpret_cast<CephContext *>(io_ctx.cct()); + ldout(cct, 20) << dendl; + + cls::rbd::MirrorMode mirror_mode_internal; + int r = cls_client::mirror_mode_get(&io_ctx, &mirror_mode_internal); + if (r < 0) { + lderr(cct) << "failed to retrieve mirror mode: " << cpp_strerror(r) + << dendl; + return r; + } + + switch (mirror_mode_internal) { + case cls::rbd::MIRROR_MODE_DISABLED: + case cls::rbd::MIRROR_MODE_IMAGE: + case cls::rbd::MIRROR_MODE_POOL: + *mirror_mode = static_cast<rbd_mirror_mode_t>(mirror_mode_internal); + break; + default: + lderr(cct) << "unknown mirror mode (" + << static_cast<uint32_t>(mirror_mode_internal) << ")" + << dendl; + return -EINVAL; + } + return 0; +} + +template <typename I> +int Mirror<I>::mode_set(librados::IoCtx& io_ctx, + rbd_mirror_mode_t mirror_mode) { + CephContext *cct = reinterpret_cast<CephContext *>(io_ctx.cct()); + ldout(cct, 20) << dendl; + + // TODO + if (!io_ctx.get_namespace().empty()) { + lderr(cct) << "namespaces are not supported" << dendl; + return -EINVAL; + } + + cls::rbd::MirrorMode next_mirror_mode; + switch (mirror_mode) { + case RBD_MIRROR_MODE_DISABLED: + case RBD_MIRROR_MODE_IMAGE: + case RBD_MIRROR_MODE_POOL: + next_mirror_mode = static_cast<cls::rbd::MirrorMode>(mirror_mode); + break; + default: + lderr(cct) << "unknown mirror mode (" + << static_cast<uint32_t>(mirror_mode) << ")" << dendl; + return -EINVAL; + } + + int r; + if (next_mirror_mode == cls::rbd::MIRROR_MODE_DISABLED) { + // fail early if pool still has peers registered and attempting to disable + std::vector<cls::rbd::MirrorPeer> mirror_peers; + r = cls_client::mirror_peer_list(&io_ctx, &mirror_peers); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "failed to list peers: " << cpp_strerror(r) << dendl; + return r; + } else if (!mirror_peers.empty()) { + lderr(cct) << "mirror peers still registered" << dendl; + return -EBUSY; + } + } + + cls::rbd::MirrorMode current_mirror_mode; + r = cls_client::mirror_mode_get(&io_ctx, ¤t_mirror_mode); + if (r < 0) { + lderr(cct) << "failed to retrieve mirror mode: " << cpp_strerror(r) + << dendl; + return r; + } + + if (current_mirror_mode == next_mirror_mode) { + return 0; + } else if (current_mirror_mode == cls::rbd::MIRROR_MODE_DISABLED) { + uuid_d uuid_gen; + uuid_gen.generate_random(); + r = cls_client::mirror_uuid_set(&io_ctx, uuid_gen.to_string()); + if (r < 0) { + lderr(cct) << "failed to allocate mirroring uuid: " << cpp_strerror(r) + << dendl; + return r; + } + } + + if (current_mirror_mode != cls::rbd::MIRROR_MODE_IMAGE) { + r = cls_client::mirror_mode_set(&io_ctx, cls::rbd::MIRROR_MODE_IMAGE); + if (r < 0) { + lderr(cct) << "failed to set mirror mode to image: " + << cpp_strerror(r) << dendl; + return r; + } + + r = MirroringWatcher<>::notify_mode_updated(io_ctx, + cls::rbd::MIRROR_MODE_IMAGE); + if (r < 0) { + lderr(cct) << "failed to send update notification: " << cpp_strerror(r) + << dendl; + } + } + + if (next_mirror_mode == cls::rbd::MIRROR_MODE_IMAGE) { + return 0; + } + + if (next_mirror_mode == cls::rbd::MIRROR_MODE_POOL) { + map<string, string> images; + r = Image<I>::list_images_v2(io_ctx, &images); + if (r < 0) { + lderr(cct) << "failed listing images: " << cpp_strerror(r) << dendl; + return r; + } + + for (const auto& img_pair : images) { + uint64_t features; + uint64_t incompatible_features; + r = cls_client::get_features(&io_ctx, util::header_name(img_pair.second), + true, &features, &incompatible_features); + if (r < 0) { + lderr(cct) << "error getting features for image " << img_pair.first + << ": " << cpp_strerror(r) << dendl; + return r; + } + + if ((features & RBD_FEATURE_JOURNALING) != 0) { + I *img_ctx = I::create("", img_pair.second, nullptr, io_ctx, false); + r = img_ctx->state->open(0); + if (r < 0) { + lderr(cct) << "error opening image "<< img_pair.first << ": " + << cpp_strerror(r) << dendl; + return r; + } + + r = image_enable(img_ctx, true); + int close_r = img_ctx->state->close(); + if (r < 0) { + lderr(cct) << "error enabling mirroring for image " + << img_pair.first << ": " << cpp_strerror(r) << dendl; + return r; + } else if (close_r < 0) { + lderr(cct) << "failed to close image " << img_pair.first << ": " + << cpp_strerror(close_r) << dendl; + return close_r; + } + } + } + } else if (next_mirror_mode == cls::rbd::MIRROR_MODE_DISABLED) { + while (true) { + bool retry_busy = false; + bool pending_busy = false; + + std::set<std::string> image_ids; + r = list_mirror_images(io_ctx, image_ids); + if (r < 0) { + lderr(cct) << "failed listing images: " << cpp_strerror(r) << dendl; + return r; + } + + for (const auto& img_id : image_ids) { + if (current_mirror_mode == cls::rbd::MIRROR_MODE_IMAGE) { + cls::rbd::MirrorImage mirror_image; + r = cls_client::mirror_image_get(&io_ctx, img_id, &mirror_image); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "failed to retrieve mirroring state for image id " + << img_id << ": " << cpp_strerror(r) << dendl; + return r; + } + if (mirror_image.state == cls::rbd::MIRROR_IMAGE_STATE_ENABLED) { + lderr(cct) << "failed to disable mirror mode: there are still " + << "images with mirroring enabled" << dendl; + return -EINVAL; + } + } else { + I *img_ctx = I::create("", img_id, nullptr, io_ctx, false); + r = img_ctx->state->open(0); + if (r < 0) { + lderr(cct) << "error opening image id "<< img_id << ": " + << cpp_strerror(r) << dendl; + return r; + } + + r = image_disable(img_ctx, false); + int close_r = img_ctx->state->close(); + if (r == -EBUSY) { + pending_busy = true; + } else if (r < 0) { + lderr(cct) << "error disabling mirroring for image id " << img_id + << cpp_strerror(r) << dendl; + return r; + } else if (close_r < 0) { + lderr(cct) << "failed to close image id " << img_id << ": " + << cpp_strerror(close_r) << dendl; + return close_r; + } else if (pending_busy) { + // at least one mirrored image was successfully disabled, so we can + // retry any failures caused by busy parent/child relationships + retry_busy = true; + } + } + } + + if (!retry_busy && pending_busy) { + lderr(cct) << "error disabling mirroring for one or more images" + << dendl; + return -EBUSY; + } else if (!retry_busy) { + break; + } + } + } + + r = cls_client::mirror_mode_set(&io_ctx, next_mirror_mode); + if (r < 0) { + lderr(cct) << "failed to set mirror mode: " << cpp_strerror(r) << dendl; + return r; + } + + r = MirroringWatcher<>::notify_mode_updated(io_ctx, next_mirror_mode); + if (r < 0) { + lderr(cct) << "failed to send update notification: " << cpp_strerror(r) + << dendl; + } + return 0; +} + +template <typename I> +int Mirror<I>::peer_bootstrap_create(librados::IoCtx& io_ctx, + std::string* token) { + CephContext *cct = reinterpret_cast<CephContext *>(io_ctx.cct()); + ldout(cct, 20) << dendl; + + auto mirror_mode = cls::rbd::MIRROR_MODE_DISABLED; + int r = cls_client::mirror_mode_get(&io_ctx, &mirror_mode); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "failed to retrieve mirroring mode: " << cpp_strerror(r) + << dendl; + return r; + } else if (mirror_mode == cls::rbd::MIRROR_MODE_DISABLED) { + return -EINVAL; + } + + // retrieve the cluster fsid + std::string fsid; + librados::Rados rados(io_ctx); + r = rados.cluster_fsid(&fsid); + if (r < 0) { + lderr(cct) << "failed to retrieve cluster fsid: " << cpp_strerror(r) + << dendl; + return r; + } + + std::string peer_client_id; + std::string cephx_key; + r = create_bootstrap_user(cct, rados, &peer_client_id, &cephx_key); + if (r < 0) { + return r; + } + + std::string mon_host = cct->_conf.get_val<std::string>("mon_host"); + ldout(cct, 20) << "mon_host=" << mon_host << dendl; + + // format the token response + bufferlist token_bl; + token_bl.append( + R"({)" \ + R"("fsid":")" + fsid + R"(",)" + \ + R"("client_id":")" + peer_client_id + R"(",)" + \ + R"("key":")" + cephx_key + R"(",)" + \ + R"("mon_host":")" + \ + boost::replace_all_copy(mon_host, "\"", "\\\"") + R"(")" + \ + R"(})"); + ldout(cct, 20) << "token=" << token_bl.to_str() << dendl; + + bufferlist base64_bl; + token_bl.encode_base64(base64_bl); + *token = base64_bl.to_str(); + + return 0; +} + +template <typename I> +int Mirror<I>::peer_bootstrap_import(librados::IoCtx& io_ctx, + rbd_mirror_peer_direction_t direction, + const std::string& token) { + CephContext *cct = reinterpret_cast<CephContext *>(io_ctx.cct()); + ldout(cct, 20) << dendl; + + if (direction != RBD_MIRROR_PEER_DIRECTION_RX && + direction != RBD_MIRROR_PEER_DIRECTION_RX_TX) { + lderr(cct) << "invalid mirror peer direction" << dendl; + return -EINVAL; + } + + bufferlist token_bl; + try { + bufferlist base64_bl; + base64_bl.append(token); + token_bl.decode_base64(base64_bl); + } catch (buffer::error& err) { + lderr(cct) << "failed to decode base64" << dendl; + return -EINVAL; + } + + ldout(cct, 20) << "token=" << token_bl.to_str() << dendl; + + bool json_valid = false; + std::string expected_remote_fsid; + std::string remote_client_id; + std::string remote_key; + std::string remote_mon_host; + + json_spirit::mValue json_root; + if(json_spirit::read(token_bl.to_str(), json_root)) { + try { + auto& json_obj = json_root.get_obj(); + expected_remote_fsid = json_obj["fsid"].get_str(); + remote_client_id = json_obj["client_id"].get_str(); + remote_key = json_obj["key"].get_str(); + remote_mon_host = json_obj["mon_host"].get_str(); + json_valid = true; + } catch (std::runtime_error&) { + } + } + + if (!json_valid) { + lderr(cct) << "invalid bootstrap token JSON received" << dendl; + return -EINVAL; + } + + // sanity check import process + std::string local_fsid; + librados::Rados rados(io_ctx); + int r = rados.cluster_fsid(&local_fsid); + if (r < 0) { + lderr(cct) << "failed to retrieve cluster fsid: " << cpp_strerror(r) + << dendl; + return r; + } + + std::string local_site_name; + r = site_name_get(rados, &local_site_name); + if (r < 0) { + lderr(cct) << "failed to retrieve cluster site name: " << cpp_strerror(r) + << dendl; + return r; + } + + // attempt to connect to remote cluster + librados::Rados remote_rados; + remote_rados.init(remote_client_id.c_str()); + + auto remote_cct = reinterpret_cast<CephContext*>(remote_rados.cct()); + remote_cct->_conf.set_val("mon_host", remote_mon_host); + remote_cct->_conf.set_val("key", remote_key); + + r = remote_rados.connect(); + if (r < 0) { + lderr(cct) << "failed to connect to peer cluster: " << cpp_strerror(r) + << dendl; + return r; + } + + std::string remote_fsid; + r = remote_rados.cluster_fsid(&remote_fsid); + if (r < 0) { + lderr(cct) << "failed to retrieve remote cluster fsid: " + << cpp_strerror(r) << dendl; + return r; + } else if (local_fsid == remote_fsid) { + lderr(cct) << "cannot import token for local cluster" << dendl; + return -EINVAL; + } else if (expected_remote_fsid != remote_fsid) { + lderr(cct) << "unexpected remote cluster fsid" << dendl; + return -EINVAL; + } + + std::string remote_site_name; + r = site_name_get(remote_rados, &remote_site_name); + if (r < 0) { + lderr(cct) << "failed to retrieve remote cluster site name: " + << cpp_strerror(r) << dendl; + return r; + } else if (local_site_name == remote_site_name) { + lderr(cct) << "cannot import token for duplicate site name" << dendl; + return -EINVAL; + } + + librados::IoCtx remote_io_ctx; + r = remote_rados.ioctx_create(io_ctx.get_pool_name().c_str(), remote_io_ctx); + if (r == -ENOENT) { + ldout(cct, 10) << "remote pool does not exist" << dendl; + return r; + } else if (r < 0) { + lderr(cct) << "failed to open remote pool '" << io_ctx.get_pool_name() + << "': " << cpp_strerror(r) << dendl; + return r; + } + + auto remote_mirror_mode = cls::rbd::MIRROR_MODE_DISABLED; + r = cls_client::mirror_mode_get(&remote_io_ctx, &remote_mirror_mode); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "failed to retrieve remote mirroring mode: " + << cpp_strerror(r) << dendl; + return r; + } else if (remote_mirror_mode == cls::rbd::MIRROR_MODE_DISABLED) { + return -ENOSYS; + } + + auto local_mirror_mode = cls::rbd::MIRROR_MODE_DISABLED; + r = cls_client::mirror_mode_get(&io_ctx, &local_mirror_mode); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "failed to retrieve local mirroring mode: " << cpp_strerror(r) + << dendl; + return r; + } else if (local_mirror_mode == cls::rbd::MIRROR_MODE_DISABLED) { + // copy mirror mode from remote peer + r = mode_set(io_ctx, static_cast<rbd_mirror_mode_t>(remote_mirror_mode)); + if (r < 0) { + return r; + } + } + + if (direction == RBD_MIRROR_PEER_DIRECTION_RX_TX) { + // create a local mirror peer user and export it to the remote cluster + std::string local_client_id; + std::string local_key; + r = create_bootstrap_user(cct, rados, &local_client_id, &local_key); + if (r < 0) { + return r; + } + + std::string local_mon_host = cct->_conf.get_val<std::string>("mon_host"); + + // create local cluster peer in remote cluster + r = create_bootstrap_peer(cct, remote_io_ctx, local_site_name, local_fsid, + local_client_id, local_key, local_mon_host, + "local", "remote"); + if (r < 0) { + return r; + } + } + + // create remote cluster peer in local cluster + r = create_bootstrap_peer(cct, io_ctx, remote_site_name, remote_fsid, + remote_client_id, remote_key, remote_mon_host, + "remote", "local"); + if (r < 0) { + return r; + } + + return 0; +} + +template <typename I> +int Mirror<I>::peer_add(librados::IoCtx& io_ctx, std::string *uuid, + const std::string &cluster_name, + const std::string &client_name) { + CephContext *cct = reinterpret_cast<CephContext *>(io_ctx.cct()); + ldout(cct, 20) << "name=" << cluster_name << ", " + << "client=" << client_name << dendl; + + // TODO + if (!io_ctx.get_namespace().empty()) { + lderr(cct) << "namespaces are not supported" << dendl; + return -EINVAL; + } + + if (cct->_conf->cluster == cluster_name) { + lderr(cct) << "cannot add self as remote peer" << dendl; + return -EINVAL; + } + + int r; + do { + uuid_d uuid_gen; + uuid_gen.generate_random(); + + *uuid = uuid_gen.to_string(); + r = cls_client::mirror_peer_add(&io_ctx, *uuid, cluster_name, + client_name); + if (r == -ESTALE) { + ldout(cct, 5) << "duplicate UUID detected, retrying" << dendl; + } else if (r < 0) { + lderr(cct) << "failed to add mirror peer '" << uuid << "': " + << cpp_strerror(r) << dendl; + return r; + } + } while (r == -ESTALE); + return 0; +} + +template <typename I> +int Mirror<I>::peer_remove(librados::IoCtx& io_ctx, const std::string &uuid) { + CephContext *cct = reinterpret_cast<CephContext *>(io_ctx.cct()); + ldout(cct, 20) << "uuid=" << uuid << dendl; + + int r = remove_peer_config_key(io_ctx, uuid); + if (r < 0) { + lderr(cct) << "failed to remove peer attributes '" << uuid << "': " + << cpp_strerror(r) << dendl; + return r; + } + + r = cls_client::mirror_peer_remove(&io_ctx, uuid); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "failed to remove peer '" << uuid << "': " + << cpp_strerror(r) << dendl; + return r; + } + return 0; +} + +template <typename I> +int Mirror<I>::peer_list(librados::IoCtx& io_ctx, + std::vector<mirror_peer_t> *peers) { + CephContext *cct = reinterpret_cast<CephContext *>(io_ctx.cct()); + ldout(cct, 20) << dendl; + + std::vector<cls::rbd::MirrorPeer> mirror_peers; + int r = cls_client::mirror_peer_list(&io_ctx, &mirror_peers); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "failed to list peers: " << cpp_strerror(r) << dendl; + return r; + } + + peers->clear(); + peers->reserve(mirror_peers.size()); + for (auto &mirror_peer : mirror_peers) { + mirror_peer_t peer; + peer.uuid = mirror_peer.uuid; + peer.cluster_name = mirror_peer.cluster_name; + peer.client_name = mirror_peer.client_name; + peers->push_back(peer); + } + return 0; +} + +template <typename I> +int Mirror<I>::peer_set_client(librados::IoCtx& io_ctx, const std::string &uuid, + const std::string &client_name) { + CephContext *cct = reinterpret_cast<CephContext *>(io_ctx.cct()); + ldout(cct, 20) << "uuid=" << uuid << ", " + << "client=" << client_name << dendl; + + int r = cls_client::mirror_peer_set_client(&io_ctx, uuid, client_name); + if (r < 0) { + lderr(cct) << "failed to update client '" << uuid << "': " + << cpp_strerror(r) << dendl; + return r; + } + return 0; +} + +template <typename I> +int Mirror<I>::peer_set_cluster(librados::IoCtx& io_ctx, + const std::string &uuid, + const std::string &cluster_name) { + CephContext *cct = reinterpret_cast<CephContext *>(io_ctx.cct()); + ldout(cct, 20) << "uuid=" << uuid << ", " + << "cluster=" << cluster_name << dendl; + + if (cct->_conf->cluster == cluster_name) { + lderr(cct) << "cannot set self as remote peer" << dendl; + return -EINVAL; + } + + int r = cls_client::mirror_peer_set_cluster(&io_ctx, uuid, cluster_name); + if (r < 0) { + lderr(cct) << "failed to update cluster '" << uuid << "': " + << cpp_strerror(r) << dendl; + return r; + } + return 0; +} + +template <typename I> +int Mirror<I>::peer_get_attributes(librados::IoCtx& io_ctx, + const std::string &uuid, + Attributes* attributes) { + CephContext *cct = reinterpret_cast<CephContext *>(io_ctx.cct()); + ldout(cct, 20) << "uuid=" << uuid << dendl; + + attributes->clear(); + + librados::Rados rados(io_ctx); + std::string value; + int r = get_config_key(rados, get_peer_config_key_name(io_ctx.get_id(), uuid), + &value); + if (r == -ENOENT || value.empty()) { + return -ENOENT; + } else if (r < 0) { + lderr(cct) << "failed to retrieve peer attributes: " << cpp_strerror(r) + << dendl; + return r; + } + + bool json_valid = false; + json_spirit::mValue json_root; + if(json_spirit::read(value, json_root)) { + try { + auto& json_obj = json_root.get_obj(); + for (auto& pairs : json_obj) { + (*attributes)[pairs.first] = pairs.second.get_str(); + } + json_valid = true; + } catch (std::runtime_error&) { + } + } + + if (!json_valid) { + lderr(cct) << "invalid peer attributes JSON received" << dendl; + return -EINVAL; + } + return 0; +} + +template <typename I> +int Mirror<I>::peer_set_attributes(librados::IoCtx& io_ctx, + const std::string &uuid, + const Attributes& attributes) { + CephContext *cct = reinterpret_cast<CephContext *>(io_ctx.cct()); + ldout(cct, 20) << "uuid=" << uuid << ", " + << "attributes=" << attributes << dendl; + + std::vector<mirror_peer_t> mirror_peers; + int r = peer_list(io_ctx, &mirror_peers); + if (r < 0) { + return r; + } + + if (std::find_if(mirror_peers.begin(), mirror_peers.end(), + [&uuid](const librbd::mirror_peer_t& peer) { + return uuid == peer.uuid; + }) == mirror_peers.end()) { + ldout(cct, 5) << "mirror peer uuid " << uuid << " does not exist" << dendl; + return -ENOENT; + } + + std::stringstream ss; + ss << "{"; + for (auto& pair : attributes) { + ss << "\\\"" << pair.first << "\\\": " + << "\\\"" << pair.second << "\\\""; + if (&pair != &(*attributes.rbegin())) { + ss << ", "; + } + } + ss << "}"; + + librados::Rados rados(io_ctx); + r = set_config_key(rados, get_peer_config_key_name(io_ctx.get_id(), uuid), + ss.str()); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "failed to update peer attributes: " << cpp_strerror(r) + << dendl; + return r; + } + + return 0; +} + +template <typename I> +int Mirror<I>::image_status_list(librados::IoCtx& io_ctx, + const std::string &start_id, size_t max, + IdToMirrorImageStatus *images) { + CephContext *cct = reinterpret_cast<CephContext *>(io_ctx.cct()); + int r; + + map<string, string> id_to_name; + { + map<string, string> name_to_id; + r = Image<I>::list_images_v2(io_ctx, &name_to_id); + if (r < 0) { + return r; + } + for (auto it : name_to_id) { + id_to_name[it.second] = it.first; + } + } + + map<std::string, cls::rbd::MirrorImage> images_; + map<std::string, cls::rbd::MirrorImageStatus> statuses_; + + r = librbd::cls_client::mirror_image_status_list(&io_ctx, start_id, max, + &images_, &statuses_); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "failed to list mirror image statuses: " + << cpp_strerror(r) << dendl; + return r; + } + + cls::rbd::MirrorImageStatus unknown_status( + cls::rbd::MIRROR_IMAGE_STATUS_STATE_UNKNOWN, "status not found"); + + for (auto it = images_.begin(); it != images_.end(); ++it) { + auto &image_id = it->first; + auto &info = it->second; + if (info.state == cls::rbd::MIRROR_IMAGE_STATE_DISABLED) { + continue; + } + + auto &image_name = id_to_name[image_id]; + if (image_name.empty()) { + lderr(cct) << "failed to find image name for image " << image_id << ", " + << "using image id as name" << dendl; + image_name = image_id; + } + auto s_it = statuses_.find(image_id); + auto &s = s_it != statuses_.end() ? s_it->second : unknown_status; + (*images)[image_id] = mirror_image_status_t{ + image_name, + mirror_image_info_t{ + info.global_image_id, + static_cast<mirror_image_state_t>(info.state), + false}, // XXX: To set "primary" right would require an additional call. + static_cast<mirror_image_status_state_t>(s.state), + s.description, + s.last_update.sec(), + s.up}; + } + + return 0; +} + +template <typename I> +int Mirror<I>::image_status_summary(librados::IoCtx& io_ctx, + MirrorImageStatusStates *states) { + CephContext *cct = reinterpret_cast<CephContext *>(io_ctx.cct()); + + std::map<cls::rbd::MirrorImageStatusState, int> states_; + int r = cls_client::mirror_image_status_get_summary(&io_ctx, &states_); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "failed to get mirror status summary: " + << cpp_strerror(r) << dendl; + return r; + } + for (auto &s : states_) { + (*states)[static_cast<mirror_image_status_state_t>(s.first)] = s.second; + } + return 0; +} + +template <typename I> +int Mirror<I>::image_instance_id_list( + librados::IoCtx& io_ctx, const std::string &start_image_id, size_t max, + std::map<std::string, std::string> *instance_ids) { + CephContext *cct = reinterpret_cast<CephContext *>(io_ctx.cct()); + std::map<std::string, entity_inst_t> instances; + + int r = librbd::cls_client::mirror_image_instance_list( + &io_ctx, start_image_id, max, &instances); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "failed to list mirror image instances: " << cpp_strerror(r) + << dendl; + return r; + } + + for (auto it : instances) { + (*instance_ids)[it.first] = stringify(it.second.name.num()); + } + + return 0; +} + +} // namespace api +} // namespace librbd + +template class librbd::api::Mirror<librbd::ImageCtx>; diff --git a/src/librbd/api/Mirror.h b/src/librbd/api/Mirror.h new file mode 100644 index 00000000..94768acd --- /dev/null +++ b/src/librbd/api/Mirror.h @@ -0,0 +1,87 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef LIBRBD_API_MIRROR_H +#define LIBRBD_API_MIRROR_H + +#include "include/rbd/librbd.hpp" +#include <map> +#include <string> +#include <vector> + +struct Context; + +namespace librbd { + +struct ImageCtx; + +namespace api { + +template <typename ImageCtxT = librbd::ImageCtx> +struct Mirror { + typedef std::map<std::string, std::string> Attributes; + typedef std::map<std::string, mirror_image_status_t> IdToMirrorImageStatus; + typedef std::map<mirror_image_status_state_t, int> MirrorImageStatusStates; + + static int site_name_get(librados::Rados& rados, std::string* name); + static int site_name_set(librados::Rados& rados, const std::string& name); + + static int mode_get(librados::IoCtx& io_ctx, rbd_mirror_mode_t *mirror_mode); + static int mode_set(librados::IoCtx& io_ctx, rbd_mirror_mode_t mirror_mode); + + static int peer_bootstrap_create(librados::IoCtx& io_ctx, std::string* token); + static int peer_bootstrap_import(librados::IoCtx& io_ctx, + rbd_mirror_peer_direction_t direction, + const std::string& token); + + static int peer_add(librados::IoCtx& io_ctx, std::string *uuid, + const std::string &cluster_name, + const std::string &client_name); + static int peer_remove(librados::IoCtx& io_ctx, const std::string &uuid); + static int peer_list(librados::IoCtx& io_ctx, + std::vector<mirror_peer_t> *peers); + static int peer_set_client(librados::IoCtx& io_ctx, const std::string &uuid, + const std::string &client_name); + static int peer_set_cluster(librados::IoCtx& io_ctx, const std::string &uuid, + const std::string &cluster_name); + static int peer_get_attributes(librados::IoCtx& io_ctx, + const std::string &uuid, + Attributes* attributes); + static int peer_set_attributes(librados::IoCtx& io_ctx, + const std::string &uuid, + const Attributes& attributes); + + static int image_status_list(librados::IoCtx& io_ctx, + const std::string &start_id, size_t max, + IdToMirrorImageStatus *images); + static int image_status_summary(librados::IoCtx& io_ctx, + MirrorImageStatusStates *states); + static int image_instance_id_list(librados::IoCtx& io_ctx, + const std::string &start_image_id, + size_t max, + std::map<std::string, std::string> *ids); + + static int image_enable(ImageCtxT *ictx, bool relax_same_pool_parent_check); + static int image_disable(ImageCtxT *ictx, bool force); + static int image_promote(ImageCtxT *ictx, bool force); + static void image_promote(ImageCtxT *ictx, bool force, Context *on_finish); + static int image_demote(ImageCtxT *ictx); + static void image_demote(ImageCtxT *ictx, Context *on_finish); + static int image_resync(ImageCtxT *ictx); + static int image_get_info(ImageCtxT *ictx, + mirror_image_info_t *mirror_image_info); + static void image_get_info(ImageCtxT *ictx, + mirror_image_info_t *mirror_image_info, + Context *on_finish); + static int image_get_status(ImageCtxT *ictx, mirror_image_status_t *status); + static void image_get_status(ImageCtxT *ictx, mirror_image_status_t *status, + Context *on_finish); + static int image_get_instance_id(ImageCtxT *ictx, std::string *instance_id); +}; + +} // namespace api +} // namespace librbd + +extern template class librbd::api::Mirror<librbd::ImageCtx>; + +#endif // LIBRBD_API_MIRROR_H diff --git a/src/librbd/api/Namespace.cc b/src/librbd/api/Namespace.cc new file mode 100644 index 00000000..ed7f8e28 --- /dev/null +++ b/src/librbd/api/Namespace.cc @@ -0,0 +1,227 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "common/errno.h" +#include "cls/rbd/cls_rbd_client.h" +#include "librbd/api/Namespace.h" +#include "librbd/ImageCtx.h" + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::api::Namespace: " << __func__ << ": " + +namespace librbd { +namespace api { + +namespace { + +const std::list<std::string> POOL_OBJECTS { + RBD_CHILDREN, + RBD_GROUP_DIRECTORY, + RBD_INFO, + RBD_MIRRORING, + RBD_TASK, + RBD_TRASH, + RBD_DIRECTORY +}; + +} // anonymous namespace + +template <typename I> +int Namespace<I>::create(librados::IoCtx& io_ctx, const std::string& name) +{ + CephContext *cct = (CephContext *)io_ctx.cct(); + ldout(cct, 5) << "name=" << name << dendl; + + if (name.empty()) { + return -EINVAL; + } + + librados::Rados rados(io_ctx); + int8_t require_osd_release; + int r = rados.get_min_compatible_osd(&require_osd_release); + if (r < 0) { + lderr(cct) << "failed to retrieve min OSD release: " << cpp_strerror(r) + << dendl; + return r; + } + + if (require_osd_release < CEPH_RELEASE_NAUTILUS) { + ldout(cct, 1) << "namespace support requires nautilus or later OSD" + << dendl; + return -ENOSYS; + } + + + librados::IoCtx default_ns_ctx; + default_ns_ctx.dup(io_ctx); + default_ns_ctx.set_namespace(""); + + r = cls_client::namespace_add(&default_ns_ctx, name); + if (r < 0) { + lderr(cct) << "failed to add namespace: " << cpp_strerror(r) << dendl; + return r; + } + + librados::IoCtx ns_ctx; + ns_ctx.dup(io_ctx); + ns_ctx.set_namespace(name); + + r = cls_client::dir_state_set(&ns_ctx, RBD_DIRECTORY, + cls::rbd::DIRECTORY_STATE_READY); + if (r < 0) { + lderr(cct) << "failed to initialize image directory: " << cpp_strerror(r) + << dendl; + goto rollback; + } + + return 0; + +rollback: + int ret_val = cls_client::namespace_remove(&default_ns_ctx, name); + if (ret_val < 0) { + lderr(cct) << "failed to remove namespace: " << cpp_strerror(ret_val) << dendl; + } + + return r; +} + +template <typename I> +int Namespace<I>::remove(librados::IoCtx& io_ctx, const std::string& name) +{ + CephContext *cct = (CephContext *)io_ctx.cct(); + ldout(cct, 5) << "name=" << name << dendl; + + if (name.empty()) { + return -EINVAL; + } + + librados::IoCtx default_ns_ctx; + default_ns_ctx.dup(io_ctx); + default_ns_ctx.set_namespace(""); + + librados::IoCtx ns_ctx; + ns_ctx.dup(io_ctx); + ns_ctx.set_namespace(name); + + std::map<std::string, cls::rbd::TrashImageSpec> trash_entries; + + librados::ObjectWriteOperation dir_op; + librbd::cls_client::dir_state_set( + &dir_op, cls::rbd::DIRECTORY_STATE_ADD_DISABLED); + dir_op.remove(); + + int r = ns_ctx.operate(RBD_DIRECTORY, &dir_op); + if (r == -EBUSY) { + ldout(cct, 5) << "image directory not empty" << dendl; + goto rollback; + } else if (r < 0 && r != -ENOENT) { + lderr(cct) << "failed to disable the namespace: " << cpp_strerror(r) + << dendl; + return r; + } + + r = cls_client::trash_list(&ns_ctx, "", 1, &trash_entries); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "failed to list trash directory: " << cpp_strerror(r) + << dendl; + return r; + } else if (!trash_entries.empty()) { + ldout(cct, 5) << "image trash not empty" << dendl; + goto rollback; + } + + for (auto& oid : POOL_OBJECTS) { + r = ns_ctx.remove(oid); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "failed to remove object '" << oid << "': " + << cpp_strerror(r) << dendl; + return r; + } + } + + r = cls_client::namespace_remove(&default_ns_ctx, name); + if (r < 0) { + lderr(cct) << "failed to remove namespace: " << cpp_strerror(r) << dendl; + return r; + } + + return 0; + +rollback: + + r = librbd::cls_client::dir_state_set( + &ns_ctx, RBD_DIRECTORY, cls::rbd::DIRECTORY_STATE_READY); + if (r < 0) { + lderr(cct) << "failed to restore directory state: " << cpp_strerror(r) + << dendl; + } + + return -EBUSY; +} + +template <typename I> +int Namespace<I>::list(IoCtx& io_ctx, vector<string> *names) +{ + CephContext *cct = (CephContext *)io_ctx.cct(); + ldout(cct, 5) << dendl; + + librados::IoCtx default_ns_ctx; + default_ns_ctx.dup(io_ctx); + default_ns_ctx.set_namespace(""); + + int r; + int max_read = 1024; + std::string last_read = ""; + do { + std::list<std::string> name_list; + r = cls_client::namespace_list(&default_ns_ctx, last_read, max_read, + &name_list); + if (r == -ENOENT) { + return 0; + } else if (r < 0) { + lderr(cct) << "error listing namespaces: " << cpp_strerror(r) << dendl; + return r; + } + + names->insert(names->end(), name_list.begin(), name_list.end()); + if (!name_list.empty()) { + last_read = name_list.back(); + } + r = name_list.size(); + } while (r == max_read); + + return 0; +} + +template <typename I> +int Namespace<I>::exists(librados::IoCtx& io_ctx, const std::string& name, bool *exists) +{ + CephContext *cct = (CephContext *)io_ctx.cct(); + ldout(cct, 5) << "name=" << name << dendl; + + *exists = false; + if (name.empty()) { + return -EINVAL; + } + + librados::IoCtx ns_ctx; + ns_ctx.dup(io_ctx); + ns_ctx.set_namespace(name); + + int r = librbd::cls_client::dir_state_assert(&ns_ctx, RBD_DIRECTORY, + cls::rbd::DIRECTORY_STATE_READY); + if (r == 0) { + *exists = true; + } else if (r != -ENOENT) { + lderr(cct) << "error asserting namespace: " << cpp_strerror(r) << dendl; + return r; + } + + return 0; +} + +} // namespace api +} // namespace librbd + +template class librbd::api::Namespace<librbd::ImageCtx>; diff --git a/src/librbd/api/Namespace.h b/src/librbd/api/Namespace.h new file mode 100644 index 00000000..220eb28f --- /dev/null +++ b/src/librbd/api/Namespace.h @@ -0,0 +1,33 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_API_NAMESPACE_H +#define CEPH_LIBRBD_API_NAMESPACE_H + +#include "include/rados/librados_fwd.hpp" +#include "include/rbd/librbd.hpp" +#include <string> +#include <vector> + +namespace librbd { + +struct ImageCtx; + +namespace api { + +template <typename ImageCtxT = librbd::ImageCtx> +struct Namespace { + + static int create(librados::IoCtx& io_ctx, const std::string& name); + static int remove(librados::IoCtx& io_ctx, const std::string& name); + static int list(librados::IoCtx& io_ctx, std::vector<std::string>* names); + static int exists(librados::IoCtx& io_ctx, const std::string& name, bool *exists); + +}; + +} // namespace api +} // namespace librbd + +extern template class librbd::api::Namespace<librbd::ImageCtx>; + +#endif // CEPH_LIBRBD_API_NAMESPACE_H diff --git a/src/librbd/api/Pool.cc b/src/librbd/api/Pool.cc new file mode 100644 index 00000000..88adf561 --- /dev/null +++ b/src/librbd/api/Pool.cc @@ -0,0 +1,377 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/api/Pool.h" +#include "include/rados/librados.hpp" +#include "common/dout.h" +#include "common/errno.h" +#include "common/Throttle.h" +#include "cls/rbd/cls_rbd_client.h" +#include "osd/osd_types.h" +#include "librbd/ImageCtx.h" +#include "librbd/Utils.h" +#include "librbd/api/Config.h" +#include "librbd/api/Image.h" +#include "librbd/api/Trash.h" +#include "librbd/image/ValidatePoolRequest.h" + +#define dout_subsys ceph_subsys_rbd + +namespace librbd { +namespace api { + +namespace { + +#undef dout_prefix +#define dout_prefix *_dout << "librbd::api::Pool::ImageStatRequest: " \ + << __func__ << " " << this << ": " \ + << "(id=" << m_image_id << "): " + +template <typename I> +class ImageStatRequest { +public: + ImageStatRequest(librados::IoCtx& io_ctx, SimpleThrottle& throttle, + const std::string& image_id, bool scan_snaps, + std::atomic<uint64_t>* bytes, + std::atomic<uint64_t>* max_bytes, + std::atomic<uint64_t>* snaps) + : m_cct(reinterpret_cast<CephContext*>(io_ctx.cct())), + m_io_ctx(io_ctx), m_throttle(throttle), m_image_id(image_id), + m_scan_snaps(scan_snaps), m_bytes(bytes), m_max_bytes(max_bytes), + m_snaps(snaps) { + m_throttle.start_op(); + } + + void send() { + get_head(); + } + +protected: + void finish(int r) { + (*m_max_bytes) += m_max_size; + m_throttle.end_op(r); + + delete this; + } + +private: + CephContext* m_cct; + librados::IoCtx& m_io_ctx; + SimpleThrottle& m_throttle; + const std::string& m_image_id; + bool m_scan_snaps; + std::atomic<uint64_t>* m_bytes; + std::atomic<uint64_t>* m_max_bytes; + std::atomic<uint64_t>* m_snaps; + bufferlist m_out_bl; + + uint64_t m_max_size = 0; + ::SnapContext m_snapc; + + void get_head() { + ldout(m_cct, 15) << dendl; + + librados::ObjectReadOperation op; + cls_client::get_size_start(&op, CEPH_NOSNAP); + if (m_scan_snaps) { + cls_client::get_snapcontext_start(&op); + } + + m_out_bl.clear(); + auto aio_comp = util::create_rados_callback< + ImageStatRequest<I>, &ImageStatRequest<I>::handle_get_head>(this); + int r = m_io_ctx.aio_operate(util::header_name(m_image_id), aio_comp, &op, + &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); + } + + void handle_get_head(int r) { + ldout(m_cct, 15) << "r=" << r << dendl; + + auto it = m_out_bl.cbegin(); + if (r == 0) { + uint8_t order; + r = cls_client::get_size_finish(&it, &m_max_size, &order); + if (r == 0) { + (*m_bytes) += m_max_size; + } + } + if (m_scan_snaps && r == 0) { + r = cls_client::get_snapcontext_finish(&it, &m_snapc); + if (r == 0) { + (*m_snaps) += m_snapc.snaps.size(); + } + } + + if (r == -ENOENT) { + finish(r); + return; + } else if (r < 0) { + lderr(m_cct) << "failed to stat image: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + if (!m_snapc.is_valid()) { + lderr(m_cct) << "snap context is invalid" << dendl; + finish(-EIO); + return; + } + + get_snaps(); + } + + void get_snaps() { + if (!m_scan_snaps || m_snapc.snaps.empty()) { + finish(0); + return; + } + + ldout(m_cct, 15) << dendl; + librados::ObjectReadOperation op; + for (auto snap_seq : m_snapc.snaps) { + cls_client::get_size_start(&op, snap_seq); + } + + m_out_bl.clear(); + auto aio_comp = util::create_rados_callback< + ImageStatRequest<I>, &ImageStatRequest<I>::handle_get_snaps>(this); + int r = m_io_ctx.aio_operate(util::header_name(m_image_id), aio_comp, &op, + &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); + } + + void handle_get_snaps(int r) { + ldout(m_cct, 15) << "r=" << r << dendl; + + auto it = m_out_bl.cbegin(); + for ([[maybe_unused]] auto snap_seq : m_snapc.snaps) { + uint64_t size; + if (r == 0) { + uint8_t order; + r = cls_client::get_size_finish(&it, &size, &order); + } + if (r == 0 && m_max_size < size) { + m_max_size = size; + } + } + + if (r == -ENOENT) { + ldout(m_cct, 15) << "out-of-sync metadata" << dendl; + get_head(); + } else if (r < 0) { + lderr(m_cct) << "failed to retrieve snap size: " << cpp_strerror(r) + << dendl; + finish(r); + } else { + finish(0); + } + } + +}; + +template <typename I> +void get_pool_stat_option_value(typename Pool<I>::StatOptions* stat_options, + rbd_pool_stat_option_t option, + uint64_t** value) { + auto it = stat_options->find(option); + if (it == stat_options->end()) { + *value = nullptr; + } else { + *value = it->second; + } +} + +template <typename I> +int get_pool_stats(librados::IoCtx& io_ctx, const ConfigProxy& config, + const std::vector<std::string>& image_ids, uint64_t* image_count, + uint64_t* provisioned_bytes, uint64_t* max_provisioned_bytes, + uint64_t* snapshot_count) { + + bool scan_snaps = ((max_provisioned_bytes != nullptr) || + (snapshot_count != nullptr)); + + SimpleThrottle throttle( + config.template get_val<uint64_t>("rbd_concurrent_management_ops"), true); + std::atomic<uint64_t> bytes{0}; + std::atomic<uint64_t> max_bytes{0}; + std::atomic<uint64_t> snaps{0}; + for (auto& image_id : image_ids) { + if (throttle.pending_error()) { + break; + } + + auto req = new ImageStatRequest<I>(io_ctx, throttle, image_id, + scan_snaps, &bytes, &max_bytes, &snaps); + req->send(); + } + + int r = throttle.wait_for_ret(); + if (r < 0) { + return r; + } + + if (image_count != nullptr) { + *image_count = image_ids.size(); + } + if (provisioned_bytes != nullptr) { + *provisioned_bytes = bytes.load(); + } + if (max_provisioned_bytes != nullptr) { + *max_provisioned_bytes = max_bytes.load(); + } + if (snapshot_count != nullptr) { + *snapshot_count = snaps.load(); + } + + return 0; +} + +} // anonymous namespace + +#undef dout_prefix +#define dout_prefix *_dout << "librbd::api::Pool: " << __func__ << ": " + +template <typename I> +int Pool<I>::init(librados::IoCtx& io_ctx, bool force) { + auto cct = reinterpret_cast<CephContext*>(io_ctx.cct()); + ldout(cct, 10) << dendl; + + int r = io_ctx.application_enable(pg_pool_t::APPLICATION_NAME_RBD, force); + if (r < 0) { + return r; + } + + ConfigProxy config{cct->_conf}; + api::Config<I>::apply_pool_overrides(io_ctx, &config); + if (!config.get_val<bool>("rbd_validate_pool")) { + return 0; + } + + ThreadPool *thread_pool; + ContextWQ *op_work_queue; + ImageCtx::get_thread_pool_instance(cct, &thread_pool, &op_work_queue); + + C_SaferCond ctx; + auto req = image::ValidatePoolRequest<I>::create(io_ctx, op_work_queue, &ctx); + req->send(); + + return ctx.wait(); +} + +template <typename I> +int Pool<I>::add_stat_option(StatOptions* stat_options, + rbd_pool_stat_option_t option, + uint64_t* value) { + switch (option) { + case RBD_POOL_STAT_OPTION_IMAGES: + case RBD_POOL_STAT_OPTION_IMAGE_PROVISIONED_BYTES: + case RBD_POOL_STAT_OPTION_IMAGE_MAX_PROVISIONED_BYTES: + case RBD_POOL_STAT_OPTION_IMAGE_SNAPSHOTS: + case RBD_POOL_STAT_OPTION_TRASH_IMAGES: + case RBD_POOL_STAT_OPTION_TRASH_PROVISIONED_BYTES: + case RBD_POOL_STAT_OPTION_TRASH_MAX_PROVISIONED_BYTES: + case RBD_POOL_STAT_OPTION_TRASH_SNAPSHOTS: + stat_options->emplace(option, value); + return 0; + default: + break; + } + return -ENOENT; +} + +template <typename I> +int Pool<I>::get_stats(librados::IoCtx& io_ctx, StatOptions* stat_options) { + auto cct = reinterpret_cast<CephContext*>(io_ctx.cct()); + ldout(cct, 10) << dendl; + + ConfigProxy config{cct->_conf}; + api::Config<I>::apply_pool_overrides(io_ctx, &config); + + uint64_t* image_count; + uint64_t* provisioned_bytes; + uint64_t* max_provisioned_bytes; + uint64_t* snapshot_count; + + std::vector<trash_image_info_t> trash_entries; + int r = Trash<I>::list(io_ctx, trash_entries, false); + if (r < 0 && r != -EOPNOTSUPP) { + return r; + } + + get_pool_stat_option_value<I>( + stat_options, RBD_POOL_STAT_OPTION_IMAGES, &image_count); + get_pool_stat_option_value<I>( + stat_options, RBD_POOL_STAT_OPTION_IMAGE_PROVISIONED_BYTES, + &provisioned_bytes); + get_pool_stat_option_value<I>( + stat_options, RBD_POOL_STAT_OPTION_IMAGE_MAX_PROVISIONED_BYTES, + &max_provisioned_bytes); + get_pool_stat_option_value<I>( + stat_options, RBD_POOL_STAT_OPTION_IMAGE_SNAPSHOTS, &snapshot_count); + if (image_count != nullptr || provisioned_bytes != nullptr || + max_provisioned_bytes != nullptr || snapshot_count != nullptr) { + typename Image<I>::ImageNameToIds images; + int r = Image<I>::list_images_v2(io_ctx, &images); + if (r < 0) { + return r; + } + + std::vector<std::string> image_ids; + image_ids.reserve(images.size() + trash_entries.size()); + for (auto& it : images) { + image_ids.push_back(std::move(it.second)); + } + for (auto& it : trash_entries) { + if (it.source == RBD_TRASH_IMAGE_SOURCE_REMOVING) { + image_ids.push_back(std::move(it.id)); + } + } + + r = get_pool_stats<I>(io_ctx, config, image_ids, image_count, + provisioned_bytes, max_provisioned_bytes, + snapshot_count); + if (r < 0) { + return r; + } + } + + get_pool_stat_option_value<I>( + stat_options, RBD_POOL_STAT_OPTION_TRASH_IMAGES, &image_count); + get_pool_stat_option_value<I>( + stat_options, RBD_POOL_STAT_OPTION_TRASH_PROVISIONED_BYTES, + &provisioned_bytes); + get_pool_stat_option_value<I>( + stat_options, RBD_POOL_STAT_OPTION_TRASH_MAX_PROVISIONED_BYTES, + &max_provisioned_bytes); + get_pool_stat_option_value<I>( + stat_options, RBD_POOL_STAT_OPTION_TRASH_SNAPSHOTS, &snapshot_count); + if (image_count != nullptr || provisioned_bytes != nullptr || + max_provisioned_bytes != nullptr || snapshot_count != nullptr) { + + std::vector<std::string> image_ids; + image_ids.reserve(trash_entries.size()); + for (auto& it : trash_entries) { + if (it.source == RBD_TRASH_IMAGE_SOURCE_REMOVING) { + continue; + } + image_ids.push_back(std::move(it.id)); + } + + r = get_pool_stats<I>(io_ctx, config, image_ids, image_count, + provisioned_bytes, max_provisioned_bytes, + snapshot_count); + if (r < 0) { + return r; + } + } + + return 0; +} + +} // namespace api +} // namespace librbd + +template class librbd::api::Pool<librbd::ImageCtx>; diff --git a/src/librbd/api/Pool.h b/src/librbd/api/Pool.h new file mode 100644 index 00000000..7b607ab6 --- /dev/null +++ b/src/librbd/api/Pool.h @@ -0,0 +1,38 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_API_POOL_H +#define CEPH_LIBRBD_API_POOL_H + +#include "include/int_types.h" +#include "include/rados/librados_fwd.hpp" +#include "include/rbd/librbd.h" +#include <map> + +namespace librbd { + +struct ImageCtx; + +namespace api { + +template <typename ImageCtxT = librbd::ImageCtx> +class Pool { +public: + typedef std::map<rbd_pool_stat_option_t, uint64_t*> StatOptions; + + static int init(librados::IoCtx& io_ctx, bool force); + + static int add_stat_option(StatOptions* stat_options, + rbd_pool_stat_option_t option, + uint64_t* value); + + static int get_stats(librados::IoCtx& io_ctx, StatOptions* stat_options); + +}; + +} // namespace api +} // namespace librbd + +extern template class librbd::api::Pool<librbd::ImageCtx>; + +#endif // CEPH_LIBRBD_API_POOL_H diff --git a/src/librbd/api/PoolMetadata.cc b/src/librbd/api/PoolMetadata.cc new file mode 100644 index 00000000..f3d53944 --- /dev/null +++ b/src/librbd/api/PoolMetadata.cc @@ -0,0 +1,151 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/api/PoolMetadata.h" +#include "cls/rbd/cls_rbd_client.h" +#include "common/dout.h" +#include "common/errno.h" +#include "librbd/Utils.h" +#include "librbd/api/Config.h" + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::PoolMetadata: " << __func__ << ": " + +namespace librbd { +namespace api { + +namespace { + +void update_pool_timestamp(librados::IoCtx& io_ctx) { + CephContext *cct = (CephContext *)io_ctx.cct(); + + auto now = ceph_clock_now(); + std::string cmd = + R"({)" + R"("prefix": "config set", )" + R"("who": "global", )" + R"("name": "rbd_config_pool_override_update_timestamp", )" + R"("value": ")" + stringify(now.sec()) + R"(")" + R"(})"; + + librados::Rados rados(io_ctx); + bufferlist in_bl; + std::string ss; + int r = rados.mon_command(cmd, in_bl, nullptr, &ss); + if (r < 0) { + lderr(cct) << "failed to notify clients of pool config update: " + << cpp_strerror(r) << dendl; + } +} + +} // anonymous namespace + +template <typename I> +int PoolMetadata<I>::get(librados::IoCtx& io_ctx, + const std::string &key, std::string *value) { + CephContext *cct = (CephContext *)io_ctx.cct(); + + int r = cls_client::metadata_get(&io_ctx, RBD_INFO, key, value); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "failed reading metadata " << key << ": " << cpp_strerror(r) + << dendl; + } + + return r; +} + +template <typename I> +int PoolMetadata<I>::set(librados::IoCtx& io_ctx, const std::string &key, + const std::string &value) { + CephContext *cct = (CephContext *)io_ctx.cct(); + + bool need_update_pool_timestamp = false; + + std::string config_key; + if (util::is_metadata_config_override(key, &config_key)) { + if (!librbd::api::Config<I>::is_option_name(io_ctx, config_key)) { + lderr(cct) << "validation for " << key + << " failed: not allowed pool level override" << dendl; + return -EINVAL; + } + int r = ConfigProxy{false}.set_val(config_key.c_str(), value); + if (r < 0) { + lderr(cct) << "validation for " << key << " failed: " << cpp_strerror(r) + << dendl; + return -EINVAL; + } + + need_update_pool_timestamp = true; + } + + ceph::bufferlist bl; + bl.append(value); + + int r = cls_client::metadata_set(&io_ctx, RBD_INFO, {{key, bl}}); + if (r < 0) { + lderr(cct) << "failed setting metadata " << key << ": " << cpp_strerror(r) + << dendl; + return r; + } + + if (need_update_pool_timestamp) { + update_pool_timestamp(io_ctx); + } + + return 0; +} + +template <typename I> +int PoolMetadata<I>::remove(librados::IoCtx& io_ctx, const std::string &key) { + CephContext *cct = (CephContext *)io_ctx.cct(); + + std::string value; + int r = cls_client::metadata_get(&io_ctx, RBD_INFO, key, &value); + if (r < 0) { + if (r == -ENOENT) { + ldout(cct, 1) << "metadata " << key << " does not exist" << dendl; + } else { + lderr(cct) << "failed reading metadata " << key << ": " << cpp_strerror(r) + << dendl; + } + return r; + } + + r = cls_client::metadata_remove(&io_ctx, RBD_INFO, key); + if (r < 0) { + lderr(cct) << "failed removing metadata " << key << ": " << cpp_strerror(r) + << dendl; + return r; + } + + std::string config_key; + if (util::is_metadata_config_override(key, &config_key)) { + update_pool_timestamp(io_ctx); + } + + return 0; +} + +template <typename I> +int PoolMetadata<I>::list(librados::IoCtx& io_ctx, const std::string &start, + uint64_t max, + std::map<std::string, ceph::bufferlist> *pairs) { + CephContext *cct = (CephContext *)io_ctx.cct(); + + int r = cls_client::metadata_list(&io_ctx, RBD_INFO, start, max, pairs); + if (r == -ENOENT) { + r = 0; + } else if (r < 0) { + lderr(cct) << "failed listing metadata: " << cpp_strerror(r) + << dendl; + return r; + } + + return 0; +} + +} // namespace api +} // namespace librbd + +template class librbd::api::PoolMetadata<librbd::ImageCtx>; diff --git a/src/librbd/api/PoolMetadata.h b/src/librbd/api/PoolMetadata.h new file mode 100644 index 00000000..c0a81735 --- /dev/null +++ b/src/librbd/api/PoolMetadata.h @@ -0,0 +1,36 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_API_POOL_METADATA_H +#define CEPH_LIBRBD_API_POOL_METADATA_H + +#include "include/buffer_fwd.h" +#include "include/rados/librados_fwd.hpp" + +#include <map> +#include <string> + +namespace librbd { + +class ImageCtx; + +namespace api { + +template <typename ImageCtxT = librbd::ImageCtx> +class PoolMetadata { +public: + static int get(librados::IoCtx& io_ctx, const std::string &key, + std::string *value); + static int set(librados::IoCtx& io_ctx, const std::string &key, + const std::string &value); + static int remove(librados::IoCtx& io_ctx, const std::string &key); + static int list(librados::IoCtx& io_ctx, const std::string &start, + uint64_t max, std::map<std::string, ceph::bufferlist> *pairs); +}; + +} // namespace api +} // namespace librbd + +extern template class librbd::api::PoolMetadata<librbd::ImageCtx>; + +#endif // CEPH_LIBRBD_API_POOL_METADATA_H diff --git a/src/librbd/api/Snapshot.cc b/src/librbd/api/Snapshot.cc new file mode 100644 index 00000000..3d29cfb2 --- /dev/null +++ b/src/librbd/api/Snapshot.cc @@ -0,0 +1,196 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/api/Snapshot.h" +#include "cls/rbd/cls_rbd_types.h" +#include "common/errno.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/Operations.h" +#include "librbd/Utils.h" +#include <boost/variant.hpp> +#include "include/Context.h" +#include "common/Cond.h" + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::api::Snapshot: " << __func__ << ": " + +namespace librbd { +namespace api { + +namespace { + +class GetGroupVisitor : public boost::static_visitor<int> { +public: + CephContext* cct; + librados::IoCtx *image_ioctx; + snap_group_namespace_t *group_snap; + + explicit GetGroupVisitor(CephContext* cct, librados::IoCtx *_image_ioctx, + snap_group_namespace_t *group_snap) + : cct(cct), image_ioctx(_image_ioctx), group_snap(group_snap) {}; + + template <typename T> + inline int operator()(const T&) const { + // ignore other than GroupSnapshotNamespace types. + return -EINVAL; + } + + inline int operator()( + const cls::rbd::GroupSnapshotNamespace& snap_namespace) { + IoCtx group_ioctx; + int r = util::create_ioctx(*image_ioctx, "group", snap_namespace.group_pool, + {}, &group_ioctx); + if (r < 0) { + return r; + } + + cls::rbd::GroupSnapshot group_snapshot; + + std::string group_name; + r = cls_client::dir_get_name(&group_ioctx, RBD_GROUP_DIRECTORY, + snap_namespace.group_id, &group_name); + if (r < 0) { + lderr(cct) << "failed to retrieve group name: " << cpp_strerror(r) + << dendl; + return r; + } + + string group_header_oid = util::group_header_name(snap_namespace.group_id); + r = cls_client::group_snap_get_by_id(&group_ioctx, + group_header_oid, + snap_namespace.group_snapshot_id, + &group_snapshot); + if (r < 0) { + lderr(cct) << "failed to retrieve group snapshot: " << cpp_strerror(r) + << dendl; + return r; + } + + group_snap->group_pool = group_ioctx.get_id(); + group_snap->group_name = group_name; + group_snap->group_snap_name = group_snapshot.name; + return 0; + } +}; + +class GetTrashVisitor : public boost::static_visitor<int> { +public: + std::string* original_name; + + explicit GetTrashVisitor(std::string* original_name) + : original_name(original_name) { + } + + template <typename T> + inline int operator()(const T&) const { + return -EINVAL; + } + + inline int operator()( + const cls::rbd::TrashSnapshotNamespace& snap_namespace) { + *original_name = snap_namespace.original_name; + return 0; + } +}; + +} // anonymous namespace + +template <typename I> +int Snapshot<I>::get_group_namespace(I *ictx, uint64_t snap_id, + snap_group_namespace_t *group_snap) { + int r = ictx->state->refresh_if_required(); + if (r < 0) { + return r; + } + + RWLock::RLocker snap_locker(ictx->snap_lock); + auto snap_info = ictx->get_snap_info(snap_id); + if (snap_info == nullptr) { + return -ENOENT; + } + + GetGroupVisitor ggv = GetGroupVisitor(ictx->cct, &ictx->md_ctx, group_snap); + r = boost::apply_visitor(ggv, snap_info->snap_namespace); + if (r < 0) { + return r; + } + + return 0; +} + +template <typename I> +int Snapshot<I>::get_trash_namespace(I *ictx, uint64_t snap_id, + std::string* original_name) { + int r = ictx->state->refresh_if_required(); + if (r < 0) { + return r; + } + + RWLock::RLocker snap_locker(ictx->snap_lock); + auto snap_info = ictx->get_snap_info(snap_id); + if (snap_info == nullptr) { + return -ENOENT; + } + + auto visitor = GetTrashVisitor(original_name); + r = boost::apply_visitor(visitor, snap_info->snap_namespace); + if (r < 0) { + return r; + } + + return 0; +} + +template <typename I> +int Snapshot<I>::get_namespace_type(I *ictx, uint64_t snap_id, + snap_namespace_type_t *namespace_type) { + int r = ictx->state->refresh_if_required(); + if (r < 0) { + return r; + } + + RWLock::RLocker l(ictx->snap_lock); + auto snap_info = ictx->get_snap_info(snap_id); + if (snap_info == nullptr) { + return -ENOENT; + } + + *namespace_type = static_cast<snap_namespace_type_t>( + cls::rbd::get_snap_namespace_type(snap_info->snap_namespace)); + return 0; +} + +template <typename I> +int Snapshot<I>::remove(I *ictx, uint64_t snap_id) { + ldout(ictx->cct, 20) << "snap_remove " << ictx << " " << snap_id << dendl; + + int r = ictx->state->refresh_if_required(); + if (r < 0) { + return r; + } + + cls::rbd::SnapshotNamespace snapshot_namespace; + std::string snapshot_name; + { + RWLock::RLocker snap_locker(ictx->snap_lock); + auto it = ictx->snap_info.find(snap_id); + if (it == ictx->snap_info.end()) { + return -ENOENT; + } + + snapshot_namespace = it->second.snap_namespace; + snapshot_name = it->second.name; + } + + C_SaferCond ctx; + ictx->operations->snap_remove(snapshot_namespace, snapshot_name, &ctx); + r = ctx.wait(); + return r; +} + +} // namespace api +} // namespace librbd + +template class librbd::api::Snapshot<librbd::ImageCtx>; diff --git a/src/librbd/api/Snapshot.h b/src/librbd/api/Snapshot.h new file mode 100644 index 00000000..453861c4 --- /dev/null +++ b/src/librbd/api/Snapshot.h @@ -0,0 +1,37 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_API_SNAPSHOT_H +#define CEPH_LIBRBD_API_SNAPSHOT_H + +#include "include/rbd/librbd.hpp" +#include <string> + +namespace librbd { + +struct ImageCtx; + +namespace api { + +template <typename ImageCtxT = librbd::ImageCtx> +struct Snapshot { + + static int get_group_namespace(ImageCtxT *ictx, uint64_t snap_id, + snap_group_namespace_t *group_snap); + + static int get_trash_namespace(ImageCtxT *ictx, uint64_t snap_id, + std::string *original_name); + + static int get_namespace_type(ImageCtxT *ictx, uint64_t snap_id, + snap_namespace_type_t *namespace_type); + + static int remove(ImageCtxT *ictx, uint64_t snap_id); + +}; + +} // namespace api +} // namespace librbd + +extern template class librbd::api::Snapshot<librbd::ImageCtx>; + +#endif // CEPH_LIBRBD_API_SNAPSHOT_H diff --git a/src/librbd/api/Trash.cc b/src/librbd/api/Trash.cc new file mode 100644 index 00000000..0664461e --- /dev/null +++ b/src/librbd/api/Trash.cc @@ -0,0 +1,681 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/api/Trash.h" +#include "include/rados/librados.hpp" +#include "common/dout.h" +#include "common/errno.h" +#include "common/Cond.h" +#include "cls/rbd/cls_rbd_client.h" +#include "librbd/ExclusiveLock.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/internal.h" +#include "librbd/Operations.h" +#include "librbd/TrashWatcher.h" +#include "librbd/Utils.h" +#include "librbd/api/DiffIterate.h" +#include "librbd/exclusive_lock/Policy.h" +#include "librbd/image/RemoveRequest.h" +#include "librbd/mirror/DisableRequest.h" +#include "librbd/mirror/EnableRequest.h" +#include "librbd/trash/MoveRequest.h" +#include "librbd/trash/RemoveRequest.h" +#include <json_spirit/json_spirit.h> +#include "librbd/journal/DisabledPolicy.h" +#include "librbd/image/ListWatchersRequest.h" + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::api::Trash: " << __func__ << ": " + +namespace librbd { +namespace api { + +template <typename I> +const typename Trash<I>::TrashImageSources Trash<I>::RESTORE_SOURCE_WHITELIST { + cls::rbd::TRASH_IMAGE_SOURCE_USER, + cls::rbd::TRASH_IMAGE_SOURCE_MIRRORING + }; + +namespace { + +template <typename I> +int disable_mirroring(I *ictx) { + if (!ictx->test_features(RBD_FEATURE_JOURNALING)) { + return 0; + } + + cls::rbd::MirrorImage mirror_image; + int r = cls_client::mirror_image_get(&ictx->md_ctx, ictx->id, &mirror_image); + if (r == -ENOENT) { + ldout(ictx->cct, 10) << "mirroring is not enabled for this image" << dendl; + return 0; + } + + if (r < 0) { + lderr(ictx->cct) << "failed to retrieve mirror image: " << cpp_strerror(r) + << dendl; + return r; + } + + ldout(ictx->cct, 10) << dendl; + + C_SaferCond ctx; + auto req = mirror::DisableRequest<I>::create(ictx, false, true, &ctx); + req->send(); + r = ctx.wait(); + if (r < 0) { + lderr(ictx->cct) << "failed to disable mirroring: " << cpp_strerror(r) + << dendl; + return r; + } + + return 0; +} + +template <typename I> +int enable_mirroring(IoCtx &io_ctx, const std::string &image_id) { + auto cct = reinterpret_cast<CephContext*>(io_ctx.cct()); + + uint64_t features; + uint64_t incompatible_features; + int r = cls_client::get_features(&io_ctx, util::header_name(image_id), true, + &features, &incompatible_features); + if (r < 0) { + lderr(cct) << "failed to retrieve features: " << cpp_strerror(r) << dendl; + return r; + } + + if ((features & RBD_FEATURE_JOURNALING) == 0) { + return 0; + } + + cls::rbd::MirrorMode mirror_mode; + r = cls_client::mirror_mode_get(&io_ctx, &mirror_mode); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "failed to retrieve mirror mode: " << cpp_strerror(r) + << dendl; + return r; + } + + if (mirror_mode != cls::rbd::MIRROR_MODE_POOL) { + ldout(cct, 10) << "not pool mirroring mode" << dendl; + return 0; + } + + ldout(cct, 10) << dendl; + + ThreadPool *thread_pool; + ContextWQ *op_work_queue; + ImageCtx::get_thread_pool_instance(cct, &thread_pool, &op_work_queue); + C_SaferCond ctx; + auto req = mirror::EnableRequest<I>::create(io_ctx, image_id, "", + op_work_queue, &ctx); + req->send(); + r = ctx.wait(); + if (r < 0) { + lderr(cct) << "failed to enable mirroring: " << cpp_strerror(r) + << dendl; + return r; + } + + return 0; +} + +} // anonymous namespace + +template <typename I> +int Trash<I>::move(librados::IoCtx &io_ctx, rbd_trash_image_source_t source, + const std::string &image_name, const std::string &image_id, + uint64_t delay) { + ceph_assert(!image_name.empty() && !image_id.empty()); + CephContext *cct((CephContext *)io_ctx.cct()); + ldout(cct, 20) << &io_ctx << " name=" << image_name << ", id=" << image_id + << dendl; + + auto ictx = new I("", image_id, nullptr, io_ctx, false); + int r = ictx->state->open(OPEN_FLAG_SKIP_OPEN_PARENT); + + if (r < 0 && r != -ENOENT) { + lderr(cct) << "failed to open image: " << cpp_strerror(r) << dendl; + return r; + } + + if (r == 0) { + if (ictx->test_features(RBD_FEATURE_JOURNALING)) { + RWLock::WLocker snap_locker(ictx->snap_lock); + ictx->set_journal_policy(new journal::DisabledPolicy()); + } + + ictx->owner_lock.get_read(); + if (ictx->exclusive_lock != nullptr) { + ictx->exclusive_lock->block_requests(0); + + r = ictx->operations->prepare_image_update( + exclusive_lock::OPERATION_REQUEST_TYPE_GENERAL, false); + if (r < 0) { + lderr(cct) << "cannot obtain exclusive lock - not removing" << dendl; + ictx->owner_lock.put_read(); + ictx->state->close(); + return -EBUSY; + } + } + ictx->owner_lock.put_read(); + + ictx->snap_lock.get_read(); + if (!ictx->migration_info.empty()) { + lderr(cct) << "cannot move migrating image to trash" << dendl; + ictx->snap_lock.put_read(); + ictx->state->close(); + return -EBUSY; + } + ictx->snap_lock.put_read(); + + r = disable_mirroring<I>(ictx); + if (r < 0) { + ictx->state->close(); + return r; + } + + ictx->state->close(); + } + + utime_t delete_time{ceph_clock_now()}; + utime_t deferment_end_time{delete_time}; + deferment_end_time += delay; + cls::rbd::TrashImageSpec trash_image_spec{ + static_cast<cls::rbd::TrashImageSource>(source), image_name, + delete_time, deferment_end_time}; + + trash_image_spec.state = cls::rbd::TRASH_IMAGE_STATE_MOVING; + C_SaferCond ctx; + auto req = trash::MoveRequest<I>::create(io_ctx, image_id, trash_image_spec, + &ctx); + req->send(); + + r = ctx.wait(); + trash_image_spec.state = cls::rbd::TRASH_IMAGE_STATE_NORMAL; + int ret = cls_client::trash_state_set(&io_ctx, image_id, + trash_image_spec.state, + cls::rbd::TRASH_IMAGE_STATE_MOVING); + if (ret < 0 && ret != -EOPNOTSUPP) { + lderr(cct) << "error setting trash image state: " + << cpp_strerror(ret) << dendl; + return ret; + } + if (r < 0) { + return r; + } + + C_SaferCond notify_ctx; + TrashWatcher<I>::notify_image_added(io_ctx, image_id, trash_image_spec, + ¬ify_ctx); + r = notify_ctx.wait(); + if (r < 0) { + lderr(cct) << "failed to send update notification: " << cpp_strerror(r) + << dendl; + } + + return 0; +} + +template <typename I> +int Trash<I>::move(librados::IoCtx &io_ctx, rbd_trash_image_source_t source, + const std::string &image_name, uint64_t delay) { + CephContext *cct((CephContext *)io_ctx.cct()); + ldout(cct, 20) << &io_ctx << " name=" << image_name << dendl; + + // try to get image id from the directory + std::string image_id; + int r = cls_client::dir_get_id(&io_ctx, RBD_DIRECTORY, image_name, + &image_id); + if (r == -ENOENT) { + r = io_ctx.stat(util::old_header_name(image_name), nullptr, nullptr); + if (r == 0) { + // cannot move V1 image to trash + ldout(cct, 10) << "cannot move v1 image to trash" << dendl; + return -EOPNOTSUPP; + } + + // image doesn't exist -- perhaps already in the trash since removing + // from the directory is the last step + return -ENOENT; + } else if (r < 0) { + lderr(cct) << "failed to retrieve image id: " << cpp_strerror(r) << dendl; + return r; + } + + ceph_assert(!image_name.empty() && !image_id.empty()); + return Trash<I>::move(io_ctx, source, image_name, image_id, delay); +} + +template <typename I> +int Trash<I>::get(IoCtx &io_ctx, const std::string &id, + trash_image_info_t *info) { + CephContext *cct((CephContext *)io_ctx.cct()); + ldout(cct, 20) << __func__ << " " << &io_ctx << dendl; + + cls::rbd::TrashImageSpec spec; + int r = cls_client::trash_get(&io_ctx, id, &spec); + if (r == -ENOENT) { + return r; + } else if (r < 0) { + lderr(cct) << "error retrieving trash entry: " << cpp_strerror(r) + << dendl; + return r; + } + + rbd_trash_image_source_t source = static_cast<rbd_trash_image_source_t>( + spec.source); + *info = trash_image_info_t{id, spec.name, source, spec.deletion_time.sec(), + spec.deferment_end_time.sec()}; + return 0; +} + +template <typename I> +int Trash<I>::list(IoCtx &io_ctx, vector<trash_image_info_t> &entries, + bool exclude_user_remove_source) { + CephContext *cct((CephContext *)io_ctx.cct()); + ldout(cct, 20) << "trash_list " << &io_ctx << dendl; + + bool more_entries; + uint32_t max_read = 1024; + std::string last_read = ""; + do { + map<string, cls::rbd::TrashImageSpec> trash_entries; + int r = cls_client::trash_list(&io_ctx, last_read, max_read, + &trash_entries); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "error listing rbd trash entries: " << cpp_strerror(r) + << dendl; + return r; + } else if (r == -ENOENT) { + break; + } + + if (trash_entries.empty()) { + break; + } + + for (const auto &entry : trash_entries) { + rbd_trash_image_source_t source = + static_cast<rbd_trash_image_source_t>(entry.second.source); + if (exclude_user_remove_source && + source == RBD_TRASH_IMAGE_SOURCE_REMOVING) { + continue; + } + entries.push_back({entry.first, entry.second.name, source, + entry.second.deletion_time.sec(), + entry.second.deferment_end_time.sec()}); + } + last_read = trash_entries.rbegin()->first; + more_entries = (trash_entries.size() >= max_read); + } while (more_entries); + + return 0; +} + +template <typename I> +int Trash<I>::purge(IoCtx& io_ctx, time_t expire_ts, + float threshold, ProgressContext& pctx) { + auto *cct((CephContext *) io_ctx.cct()); + ldout(cct, 20) << &io_ctx << dendl; + + std::vector<librbd::trash_image_info_t> trash_entries; + int r = librbd::api::Trash<I>::list(io_ctx, trash_entries, true); + if (r < 0) { + return r; + } + + trash_entries.erase( + std::remove_if(trash_entries.begin(), trash_entries.end(), + [](librbd::trash_image_info_t info) { + return info.source != RBD_TRASH_IMAGE_SOURCE_USER; + }), + trash_entries.end()); + + std::set<std::string> to_be_removed; + if (threshold != -1) { + if (threshold < 0 || threshold > 1) { + lderr(cct) << "argument 'threshold' is out of valid range" + << dendl; + return -EINVAL; + } + + librados::bufferlist inbl; + librados::bufferlist outbl; + std::string pool_name = io_ctx.get_pool_name(); + + librados::Rados rados(io_ctx); + rados.mon_command(R"({"prefix": "df", "format": "json"})", inbl, + &outbl, nullptr); + + json_spirit::mValue json; + if (!json_spirit::read(outbl.to_str(), json)) { + lderr(cct) << "ceph df json output could not be parsed" + << dendl; + return -EBADMSG; + } + + json_spirit::mArray arr = json.get_obj()["pools"].get_array(); + + double pool_percent_used = 0; + uint64_t pool_total_bytes = 0; + + std::map<std::string, std::vector<std::string>> datapools; + + std::sort(trash_entries.begin(), trash_entries.end(), + [](librbd::trash_image_info_t a, librbd::trash_image_info_t b) { + return a.deferment_end_time < b.deferment_end_time; + } + ); + + for (const auto &entry : trash_entries) { + int64_t data_pool_id = -1; + r = cls_client::get_data_pool(&io_ctx, util::header_name(entry.id), + &data_pool_id); + if (r < 0 && r != -ENOENT && r != -EOPNOTSUPP) { + lderr(cct) << "failed to query data pool: " << cpp_strerror(r) << dendl; + return r; + } else if (data_pool_id == -1) { + data_pool_id = io_ctx.get_id(); + } + + if (data_pool_id != io_ctx.get_id()) { + librados::IoCtx data_io_ctx; + r = util::create_ioctx(io_ctx, "image", data_pool_id, + {}, &data_io_ctx); + if (r < 0) { + lderr(cct) << "error accessing data pool" << dendl; + continue; + } + auto data_pool = data_io_ctx.get_pool_name(); + datapools[data_pool].push_back(entry.id); + } else { + datapools[pool_name].push_back(entry.id); + } + } + + uint64_t bytes_to_free = 0; + + for (uint8_t i = 0; i < arr.size(); ++i) { + json_spirit::mObject obj = arr[i].get_obj(); + std::string name = obj.find("name")->second.get_str(); + auto img = datapools.find(name); + if (img != datapools.end()) { + json_spirit::mObject stats = arr[i].get_obj()["stats"].get_obj(); + pool_percent_used = stats["percent_used"].get_real(); + if (pool_percent_used <= threshold) continue; + + bytes_to_free = 0; + + pool_total_bytes = stats["max_avail"].get_uint64() + + stats["bytes_used"].get_uint64(); + + auto bytes_threshold = (uint64_t) (pool_total_bytes * + (pool_percent_used - threshold)); + + for (const auto &it : img->second) { + auto ictx = new I("", it, nullptr, io_ctx, false); + r = ictx->state->open(OPEN_FLAG_SKIP_OPEN_PARENT); + if (r == -ENOENT) { + continue; + } else if (r < 0) { + lderr(cct) << "failed to open image " << it << ": " + << cpp_strerror(r) << dendl; + } + + r = librbd::api::DiffIterate<I>::diff_iterate( + ictx, cls::rbd::UserSnapshotNamespace(), nullptr, 0, ictx->size, + false, true, + [](uint64_t offset, size_t len, int exists, void *arg) { + auto *to_free = reinterpret_cast<uint64_t *>(arg); + if (exists) + (*to_free) += len; + return 0; + }, &bytes_to_free); + + ictx->state->close(); + if (r < 0) { + lderr(cct) << "failed to calculate disk usage for image " << it + << ": " << cpp_strerror(r) << dendl; + continue; + } + + to_be_removed.insert(it); + if (bytes_to_free >= bytes_threshold) { + break; + } + } + } + } + + if (bytes_to_free == 0) { + ldout(cct, 10) << "pool usage is lower than or equal to " + << (threshold * 100) + << "%" << dendl; + return 0; + } + } + + if (expire_ts == 0) { + struct timespec now; + clock_gettime(CLOCK_REALTIME, &now); + expire_ts = now.tv_sec; + } + + for (const auto &entry : trash_entries) { + if (expire_ts >= entry.deferment_end_time) { + to_be_removed.insert(entry.id); + } + } + + NoOpProgressContext remove_pctx; + uint64_t list_size = to_be_removed.size(), i = 0; + for (const auto &entry_id : to_be_removed) { + r = librbd::api::Trash<I>::remove(io_ctx, entry_id, true, remove_pctx); + if (r < 0) { + if (r == -ENOTEMPTY) { + ldout(cct, 5) << "image has snapshots - these must be deleted " + << "with 'rbd snap purge' before the image can be " + << "removed." << dendl; + } else if (r == -EBUSY) { + ldout(cct, 5) << "error: image still has watchers" << std::endl + << "This means the image is still open or the client " + << "using it crashed. Try again after closing/unmapping " + << "it or waiting 30s for the crashed client to timeout." + << dendl; + } else if (r == -EMLINK) { + ldout(cct, 5) << "Remove the image from the group and try again." + << dendl; + } else { + lderr(cct) << "remove error: " << cpp_strerror(r) << dendl; + } + return r; + } + pctx.update_progress(++i, list_size); + } + + return 0; +} + +template <typename I> +int Trash<I>::remove(IoCtx &io_ctx, const std::string &image_id, bool force, + ProgressContext& prog_ctx) { + CephContext *cct((CephContext *)io_ctx.cct()); + ldout(cct, 20) << "trash_remove " << &io_ctx << " " << image_id + << " " << force << dendl; + + cls::rbd::TrashImageSpec trash_spec; + int r = cls_client::trash_get(&io_ctx, image_id, &trash_spec); + if (r < 0) { + lderr(cct) << "error getting image id " << image_id + << " info from trash: " << cpp_strerror(r) << dendl; + return r; + } + + utime_t now = ceph_clock_now(); + if (now < trash_spec.deferment_end_time && !force) { + lderr(cct) << "error: deferment time has not expired." << dendl; + return -EPERM; + } + if (trash_spec.state != cls::rbd::TRASH_IMAGE_STATE_NORMAL && + trash_spec.state != cls::rbd::TRASH_IMAGE_STATE_REMOVING) { + lderr(cct) << "error: image is pending restoration." << dendl; + return -EBUSY; + } + + ThreadPool *thread_pool; + ContextWQ *op_work_queue; + ImageCtx::get_thread_pool_instance(cct, &thread_pool, &op_work_queue); + + C_SaferCond cond; + auto req = librbd::trash::RemoveRequest<I>::create( + io_ctx, image_id, op_work_queue, force, prog_ctx, &cond); + req->send(); + + r = cond.wait(); + if (r < 0) { + return r; + } + + C_SaferCond notify_ctx; + TrashWatcher<I>::notify_image_removed(io_ctx, image_id, ¬ify_ctx); + r = notify_ctx.wait(); + if (r < 0) { + lderr(cct) << "failed to send update notification: " << cpp_strerror(r) + << dendl; + } + + return 0; +} + +template <typename I> +int Trash<I>::restore(librados::IoCtx &io_ctx, + const TrashImageSources& trash_image_sources, + const std::string &image_id, + const std::string &image_new_name) { + CephContext *cct((CephContext *)io_ctx.cct()); + ldout(cct, 20) << "trash_restore " << &io_ctx << " " << image_id << " " + << image_new_name << dendl; + + cls::rbd::TrashImageSpec trash_spec; + int r = cls_client::trash_get(&io_ctx, image_id, &trash_spec); + if (r < 0) { + lderr(cct) << "error getting image id " << image_id + << " info from trash: " << cpp_strerror(r) << dendl; + return r; + } + + if (trash_image_sources.count(trash_spec.source) == 0) { + lderr(cct) << "Current trash source '" << trash_spec.source << "' " + << "does not match expected: " + << trash_image_sources << dendl; + return -EINVAL; + } + + std::string image_name = image_new_name; + if (trash_spec.state != cls::rbd::TRASH_IMAGE_STATE_NORMAL && + trash_spec.state != cls::rbd::TRASH_IMAGE_STATE_RESTORING) { + lderr(cct) << "error restoring image id " << image_id + << ", which is pending deletion" << dendl; + return -EBUSY; + } + r = cls_client::trash_state_set(&io_ctx, image_id, + cls::rbd::TRASH_IMAGE_STATE_RESTORING, + cls::rbd::TRASH_IMAGE_STATE_NORMAL); + if (r < 0 && r != -EOPNOTSUPP) { + lderr(cct) << "error setting trash image state: " + << cpp_strerror(r) << dendl; + return r; + } + + if (image_name.empty()) { + // if user didn't specify a new name, let's try using the old name + image_name = trash_spec.name; + ldout(cct, 20) << "restoring image id " << image_id << " with name " + << image_name << dendl; + } + + // check if no image exists with the same name + bool create_id_obj = true; + std::string existing_id; + r = cls_client::get_id(&io_ctx, util::id_obj_name(image_name), &existing_id); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "error checking if image " << image_name << " exists: " + << cpp_strerror(r) << dendl; + int ret = cls_client::trash_state_set(&io_ctx, image_id, + cls::rbd::TRASH_IMAGE_STATE_NORMAL, + cls::rbd::TRASH_IMAGE_STATE_RESTORING); + if (ret < 0 && ret != -EOPNOTSUPP) { + lderr(cct) << "error setting trash image state: " + << cpp_strerror(ret) << dendl; + } + return r; + } else if (r != -ENOENT){ + // checking if we are recovering from an incomplete restore + if (existing_id != image_id) { + ldout(cct, 2) << "an image with the same name already exists" << dendl; + int r2 = cls_client::trash_state_set(&io_ctx, image_id, + cls::rbd::TRASH_IMAGE_STATE_NORMAL, + cls::rbd::TRASH_IMAGE_STATE_RESTORING); + if (r2 < 0 && r2 != -EOPNOTSUPP) { + lderr(cct) << "error setting trash image state: " + << cpp_strerror(r2) << dendl; + } + return -EEXIST; + } + create_id_obj = false; + } + + if (create_id_obj) { + ldout(cct, 2) << "adding id object" << dendl; + librados::ObjectWriteOperation op; + op.create(true); + cls_client::set_id(&op, image_id); + r = io_ctx.operate(util::id_obj_name(image_name), &op); + if (r < 0) { + lderr(cct) << "error adding id object for image " << image_name + << ": " << cpp_strerror(r) << dendl; + return r; + } + } + + ldout(cct, 2) << "adding rbd image to v2 directory..." << dendl; + r = cls_client::dir_add_image(&io_ctx, RBD_DIRECTORY, image_name, + image_id); + if (r < 0 && r != -EEXIST) { + lderr(cct) << "error adding image to v2 directory: " + << cpp_strerror(r) << dendl; + return r; + } + + r = enable_mirroring<I>(io_ctx, image_id); + if (r < 0) { + // not fatal -- ignore + } + + ldout(cct, 2) << "removing image from trash..." << dendl; + r = cls_client::trash_remove(&io_ctx, image_id); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "error removing image id " << image_id << " from trash: " + << cpp_strerror(r) << dendl; + return r; + } + + C_SaferCond notify_ctx; + TrashWatcher<I>::notify_image_removed(io_ctx, image_id, ¬ify_ctx); + r = notify_ctx.wait(); + if (r < 0) { + lderr(cct) << "failed to send update notification: " << cpp_strerror(r) + << dendl; + } + + return 0; +} + +} // namespace api +} // namespace librbd + +template class librbd::api::Trash<librbd::ImageCtx>; diff --git a/src/librbd/api/Trash.h b/src/librbd/api/Trash.h new file mode 100644 index 00000000..65b6b8bc --- /dev/null +++ b/src/librbd/api/Trash.h @@ -0,0 +1,53 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef LIBRBD_API_TRASH_H +#define LIBRBD_API_TRASH_H + +#include "include/rados/librados_fwd.hpp" +#include "include/rbd/librbd.hpp" +#include "cls/rbd/cls_rbd_types.h" +#include <set> +#include <string> +#include <vector> + +namespace librbd { + +class ProgressContext; + +struct ImageCtx; + +namespace api { + +template <typename ImageCtxT = librbd::ImageCtx> +struct Trash { + typedef std::set<cls::rbd::TrashImageSource> TrashImageSources; + static const TrashImageSources RESTORE_SOURCE_WHITELIST; + + static int move(librados::IoCtx &io_ctx, rbd_trash_image_source_t source, + const std::string &image_name, uint64_t delay); + static int move(librados::IoCtx &io_ctx, rbd_trash_image_source_t source, + const std::string &image_name, const std::string &image_id, + uint64_t delay); + static int get(librados::IoCtx &io_ctx, const std::string &id, + trash_image_info_t *info); + static int list(librados::IoCtx &io_ctx, + std::vector<trash_image_info_t> &entries, + bool exclude_user_remove_source); + static int purge(IoCtx& io_ctx, time_t expire_ts, + float threshold, ProgressContext& pctx); + static int remove(librados::IoCtx &io_ctx, const std::string &image_id, + bool force, ProgressContext& prog_ctx); + static int restore(librados::IoCtx &io_ctx, + const TrashImageSources& trash_image_sources, + const std::string &image_id, + const std::string &image_new_name); + +}; + +} // namespace api +} // namespace librbd + +extern template class librbd::api::Trash<librbd::ImageCtx>; + +#endif // LIBRBD_API_TRASH_H |