From e6918187568dbd01842d8d1d2c808ce16a894239 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 21 Apr 2024 13:54:28 +0200 Subject: Adding upstream version 18.2.2. Signed-off-by: Daniel Baumann --- src/librbd/api/Config.cc | 233 +++++ src/librbd/api/Config.h | 37 + src/librbd/api/DiffIterate.cc | 378 +++++++ src/librbd/api/DiffIterate.h | 66 ++ src/librbd/api/Group.cc | 1287 ++++++++++++++++++++++++ src/librbd/api/Group.h | 60 ++ src/librbd/api/Image.cc | 1015 +++++++++++++++++++ src/librbd/api/Image.h | 85 ++ src/librbd/api/Io.cc | 555 +++++++++++ src/librbd/api/Io.h | 65 ++ src/librbd/api/Migration.cc | 2126 ++++++++++++++++++++++++++++++++++++++++ src/librbd/api/Migration.h | 113 +++ src/librbd/api/Mirror.cc | 2104 +++++++++++++++++++++++++++++++++++++++ src/librbd/api/Mirror.h | 126 +++ src/librbd/api/Namespace.cc | 235 +++++ src/librbd/api/Namespace.h | 33 + src/librbd/api/Pool.cc | 375 +++++++ src/librbd/api/Pool.h | 38 + src/librbd/api/PoolMetadata.cc | 156 +++ src/librbd/api/PoolMetadata.h | 37 + src/librbd/api/Snapshot.cc | 444 +++++++++ src/librbd/api/Snapshot.h | 67 ++ src/librbd/api/Trash.cc | 759 ++++++++++++++ src/librbd/api/Trash.h | 53 + src/librbd/api/Utils.cc | 102 ++ src/librbd/api/Utils.h | 28 + 26 files changed, 10577 insertions(+) create mode 100644 src/librbd/api/Config.cc create mode 100644 src/librbd/api/Config.h create mode 100644 src/librbd/api/DiffIterate.cc create mode 100644 src/librbd/api/DiffIterate.h create mode 100644 src/librbd/api/Group.cc create mode 100644 src/librbd/api/Group.h create mode 100644 src/librbd/api/Image.cc create mode 100644 src/librbd/api/Image.h create mode 100644 src/librbd/api/Io.cc create mode 100644 src/librbd/api/Io.h create mode 100644 src/librbd/api/Migration.cc create mode 100644 src/librbd/api/Migration.h create mode 100644 src/librbd/api/Mirror.cc create mode 100644 src/librbd/api/Mirror.h create mode 100644 src/librbd/api/Namespace.cc create mode 100644 src/librbd/api/Namespace.h create mode 100644 src/librbd/api/Pool.cc create mode 100644 src/librbd/api/Pool.h create mode 100644 src/librbd/api/PoolMetadata.cc create mode 100644 src/librbd/api/PoolMetadata.h create mode 100644 src/librbd/api/Snapshot.cc create mode 100644 src/librbd/api/Snapshot.h create mode 100644 src/librbd/api/Trash.cc create mode 100644 src/librbd/api/Trash.h create mode 100644 src/librbd/api/Utils.cc create mode 100644 src/librbd/api/Utils.h (limited to 'src/librbd/api') diff --git a/src/librbd/api/Config.cc b/src/librbd/api/Config.cc new file mode 100644 index 000000000..8148607e3 --- /dev/null +++ b/src/librbd/api/Config.cc @@ -0,0 +1,233 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/api/Config.h" +#include "common/dout.h" +#include "common/errno.h" +#include "common/Cond.h" +#include "librbd/ImageCtx.h" +#include "librbd/Utils.h" +#include "librbd/api/PoolMetadata.h" +#include "librbd/image/GetMetadataRequest.h" +#include +#include + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::Config: " << __func__ << ": " + +namespace librbd { +namespace api { + +namespace { + +const uint32_t MAX_KEYS = 64; + +typedef std::map> Parent; + +static std::set EXCLUDE_OPTIONS { + "rbd_auto_exclusive_lock_until_manual_request", + "rbd_default_format", + "rbd_default_pool", + "rbd_discard_on_zeroed_write_same", + "rbd_op_thread_timeout", + "rbd_op_threads", + "rbd_tracing", + "rbd_validate_names", + "rbd_validate_pool", + "rbd_mirror_pool_replayers_refresh_interval", + "rbd_config_pool_override_update_timestamp" + }; +static std::set EXCLUDE_IMAGE_OPTIONS { + "rbd_default_clone_format", + "rbd_default_data_pool", + "rbd_default_features", + "rbd_default_format", + "rbd_default_order", + "rbd_default_stripe_count", + "rbd_default_stripe_unit", + "rbd_journal_order", + "rbd_journal_pool", + "rbd_journal_splay_width" + }; + +struct Options : Parent { + librados::IoCtx m_io_ctx; + + Options(librados::IoCtx& io_ctx, bool image_apply_only_options) { + m_io_ctx.dup(io_ctx); + m_io_ctx.set_namespace(""); + + CephContext *cct = reinterpret_cast(m_io_ctx.cct()); + + const std::string rbd_key_prefix("rbd_"); + const std::string rbd_mirror_key_prefix("rbd_mirror_"); + auto& schema = cct->_conf.get_schema(); + for (auto& pair : schema) { + if (!boost::starts_with(pair.first, rbd_key_prefix)) { + continue; + } else if (EXCLUDE_OPTIONS.count(pair.first) != 0) { + continue; + } else if (image_apply_only_options && + EXCLUDE_IMAGE_OPTIONS.count(pair.first) != 0) { + continue; + } else if (image_apply_only_options && + boost::starts_with(pair.first, rbd_mirror_key_prefix)) { + continue; + } + + insert({pair.first, {}}); + } + } + + int init() { + CephContext *cct = (CephContext *)m_io_ctx.cct(); + + for (auto& [k,v] : *this) { + int r = cct->_conf.get_val(k, &v.first); + ceph_assert(r == 0); + v.second = RBD_CONFIG_SOURCE_CONFIG; + } + + std::string last_key = ImageCtx::METADATA_CONF_PREFIX; + bool more_results = true; + + while (more_results) { + std::map pairs; + + int r = librbd::api::PoolMetadata<>::list(m_io_ctx, last_key, MAX_KEYS, + &pairs); + if (r < 0) { + return r; + } + + if (pairs.empty()) { + break; + } + + more_results = (pairs.size() == MAX_KEYS); + last_key = pairs.rbegin()->first; + + for (auto kv : pairs) { + std::string key; + if (!util::is_metadata_config_override(kv.first, &key)) { + more_results = false; + break; + } + auto it = find(key); + if (it != end()) { + it->second = {{kv.second.c_str(), kv.second.length()}, + RBD_CONFIG_SOURCE_POOL}; + } + } + } + return 0; + } +}; + +} // anonymous namespace + +template +bool Config::is_option_name(librados::IoCtx& io_ctx, + const std::string &name) { + Options opts(io_ctx, false); + + return (opts.find(name) != opts.end()); +} + +template +int Config::list(librados::IoCtx& io_ctx, + std::vector *options) { + Options opts(io_ctx, false); + + int r = opts.init(); + if (r < 0) { + return r; + } + + for (auto& [k,v] : opts) { + options->push_back({std::string{k}, v.first, v.second}); + } + + return 0; +} + +template +bool Config::is_option_name(I *image_ctx, const std::string &name) { + Options opts(image_ctx->md_ctx, true); + + return (opts.find(name) != opts.end()); +} + +template +int Config::list(I *image_ctx, std::vector *options) { + CephContext *cct = image_ctx->cct; + Options opts(image_ctx->md_ctx, true); + + int r = opts.init(); + if (r < 0) { + return r; + } + + std::map pairs; + C_SaferCond ctx; + auto req = image::GetMetadataRequest::create( + image_ctx->md_ctx, image_ctx->header_oid, true, + ImageCtx::METADATA_CONF_PREFIX, ImageCtx::METADATA_CONF_PREFIX, 0U, &pairs, + &ctx); + req->send(); + + r = ctx.wait(); + if (r < 0) { + lderr(cct) << "failed reading image metadata: " << cpp_strerror(r) + << dendl; + return r; + } + + for (auto kv : pairs) { + std::string key; + if (!util::is_metadata_config_override(kv.first, &key)) { + break; + } + auto it = opts.find(key); + if (it != opts.end()) { + it->second = {{kv.second.c_str(), kv.second.length()}, + RBD_CONFIG_SOURCE_IMAGE}; + } + } + + for (auto& [k,v] : opts) { + options->push_back({std::string{k}, v.first, v.second}); + } + + return 0; +} + +template +void Config::apply_pool_overrides(librados::IoCtx& io_ctx, + ConfigProxy* config) { + CephContext *cct = reinterpret_cast(io_ctx.cct()); + + Options opts(io_ctx, false); + int r = opts.init(); + if (r < 0) { + lderr(cct) << "failed to read pool config overrides: " << cpp_strerror(r) + << dendl; + return; + } + + for (auto& [k,v] : opts) { + if (v.second == RBD_CONFIG_SOURCE_POOL) { + r = config->set_val(k, v.first); + if (r < 0) { + lderr(cct) << "failed to override pool config " << k << "=" + << v.first << ": " << cpp_strerror(r) << dendl; + } + } + } +} + +} // namespace api +} // namespace librbd + +template class librbd::api::Config; diff --git a/src/librbd/api/Config.h b/src/librbd/api/Config.h new file mode 100644 index 000000000..83225d287 --- /dev/null +++ b/src/librbd/api/Config.h @@ -0,0 +1,37 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_API_CONFIG_H +#define CEPH_LIBRBD_API_CONFIG_H + +#include "common/config_fwd.h" +#include "include/common_fwd.h" +#include "include/rbd/librbd.hpp" +#include "include/rados/librados_fwd.hpp" + +namespace librbd { + +class ImageCtx; + +namespace api { + +template +class Config { +public: + static bool is_option_name(librados::IoCtx& io_ctx, const std::string &name); + static int list(librados::IoCtx& io_ctx, + std::vector *options); + + static bool is_option_name(ImageCtxT *image_ctx, const std::string &name); + static int list(ImageCtxT *image_ctx, std::vector *options); + + static void apply_pool_overrides(librados::IoCtx& io_ctx, + ConfigProxy* config); +}; + +} // namespace api +} // namespace librbd + +extern template class librbd::api::Config; + +#endif // CEPH_LIBRBD_API_CONFIG_H diff --git a/src/librbd/api/DiffIterate.cc b/src/librbd/api/DiffIterate.cc new file mode 100644 index 000000000..b400b5d5a --- /dev/null +++ b/src/librbd/api/DiffIterate.cc @@ -0,0 +1,378 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/api/DiffIterate.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/ObjectMap.h" +#include "librbd/Utils.h" +#include "librbd/internal.h" +#include "librbd/io/AioCompletion.h" +#include "librbd/io/ImageDispatchSpec.h" +#include "librbd/object_map/DiffRequest.h" +#include "include/rados/librados.hpp" +#include "include/interval_set.h" +#include "common/errno.h" +#include "common/Cond.h" +#include "common/Throttle.h" +#include "osdc/Striper.h" +#include +#include +#include +#include + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::DiffIterate: " + +namespace librbd { +namespace api { + +namespace { + +struct DiffContext { + DiffIterate<>::Callback callback; + void *callback_arg; + bool whole_object; + bool include_parent; + uint64_t from_snap_id; + uint64_t end_snap_id; + OrderedThrottle throttle; + + template + DiffContext(I &image_ctx, DiffIterate<>::Callback callback, + void *callback_arg, bool _whole_object, bool _include_parent, + uint64_t _from_snap_id, uint64_t _end_snap_id) + : callback(callback), callback_arg(callback_arg), + whole_object(_whole_object), include_parent(_include_parent), + from_snap_id(_from_snap_id), end_snap_id(_end_snap_id), + throttle(image_ctx.config.template get_val("rbd_concurrent_management_ops"), true) { + } +}; + +template +class C_DiffObject : public Context { +public: + C_DiffObject(I &image_ctx, DiffContext &diff_context, uint64_t image_offset, + uint64_t image_length) + : m_image_ctx(image_ctx), m_cct(image_ctx.cct), + m_diff_context(diff_context), m_image_offset(image_offset), + m_image_length(image_length) { + } + + void send() { + Context* ctx = m_diff_context.throttle.start_op(this); + auto aio_comp = io::AioCompletion::create_and_start( + ctx, util::get_image_ctx(&m_image_ctx), io::AIO_TYPE_GENERIC); + int list_snaps_flags = 0; + if (!m_diff_context.include_parent || m_diff_context.from_snap_id != 0) { + list_snaps_flags |= io::LIST_SNAPS_FLAG_DISABLE_LIST_FROM_PARENT; + } + if (m_diff_context.whole_object) { + list_snaps_flags |= io::LIST_SNAPS_FLAG_WHOLE_OBJECT; + } + auto req = io::ImageDispatchSpec::create_list_snaps( + m_image_ctx, io::IMAGE_DISPATCH_LAYER_INTERNAL_START, + aio_comp, {{m_image_offset, m_image_length}}, io::ImageArea::DATA, + {m_diff_context.from_snap_id, m_diff_context.end_snap_id}, + list_snaps_flags, &m_snapshot_delta, {}); + req->send(); + } + +protected: + typedef boost::tuple Diff; + typedef std::list Diffs; + + void finish(int r) override { + CephContext *cct = m_cct; + + if (r < 0) { + ldout(cct, 20) << "list_snaps failed: " << m_image_offset << "~" + << m_image_length << ": " << cpp_strerror(r) << dendl; + } + + Diffs diffs; + ldout(cct, 20) << "image extent " << m_image_offset << "~" + << m_image_length << ": list_snaps complete" << dendl; + + compute_diffs(&diffs); + for (Diffs::const_iterator d = diffs.begin(); d != diffs.end(); ++d) { + r = m_diff_context.callback(d->get<0>(), d->get<1>(), d->get<2>(), + m_diff_context.callback_arg); + if (r < 0) { + break; + } + } + m_diff_context.throttle.end_op(r); + } + +private: + I& m_image_ctx; + CephContext *m_cct; + DiffContext &m_diff_context; + uint64_t m_image_offset; + uint64_t m_image_length; + + io::SnapshotDelta m_snapshot_delta; + + void compute_diffs(Diffs *diffs) { + CephContext *cct = m_cct; + + // merge per-snapshot deltas into an aggregate + io::SparseExtents aggregate_snapshot_extents; + for (auto& [key, snapshot_extents] : m_snapshot_delta) { + for (auto& snapshot_extent : snapshot_extents) { + auto state = snapshot_extent.get_val().state; + + // ignore DNE object (and parent) + if ((state == io::SPARSE_EXTENT_STATE_DNE) || + (key == io::INITIAL_WRITE_READ_SNAP_IDS && + state == io::SPARSE_EXTENT_STATE_ZEROED)) { + continue; + } + + aggregate_snapshot_extents.insert( + snapshot_extent.get_off(), snapshot_extent.get_len(), + {state, snapshot_extent.get_len()}); + } + } + + // build delta callback set + for (auto& snapshot_extent : aggregate_snapshot_extents) { + ldout(cct, 20) << "off=" << snapshot_extent.get_off() << ", " + << "len=" << snapshot_extent.get_len() << ", " + << "state=" << snapshot_extent.get_val().state << dendl; + diffs->emplace_back( + snapshot_extent.get_off(), snapshot_extent.get_len(), + snapshot_extent.get_val().state == io::SPARSE_EXTENT_STATE_DATA); + } + } +}; + +int simple_diff_cb(uint64_t off, size_t len, int exists, void *arg) { + // it's possible for a discard to create a hole in the parent image -- ignore + if (exists) { + interval_set *diff = static_cast *>(arg); + diff->insert(off, len); + } + return 0; +} + +} // anonymous namespace + +template +int DiffIterate::diff_iterate(I *ictx, + const cls::rbd::SnapshotNamespace& from_snap_namespace, + const char *fromsnapname, + uint64_t off, uint64_t len, + bool include_parent, bool whole_object, + int (*cb)(uint64_t, size_t, int, void *), + void *arg) +{ + ldout(ictx->cct, 20) << "diff_iterate " << ictx << " off = " << off + << " len = " << len << dendl; + + if (!ictx->data_ctx.is_valid()) { + return -ENODEV; + } + + // ensure previous writes are visible to listsnaps + C_SaferCond flush_ctx; + { + std::shared_lock owner_locker{ictx->owner_lock}; + auto aio_comp = io::AioCompletion::create_and_start(&flush_ctx, ictx, + io::AIO_TYPE_FLUSH); + auto req = io::ImageDispatchSpec::create_flush( + *ictx, io::IMAGE_DISPATCH_LAYER_INTERNAL_START, + aio_comp, io::FLUSH_SOURCE_INTERNAL, {}); + req->send(); + } + int r = flush_ctx.wait(); + if (r < 0) { + return r; + } + + r = ictx->state->refresh_if_required(); + if (r < 0) { + return r; + } + + ictx->image_lock.lock_shared(); + r = clip_io(ictx, off, &len, io::ImageArea::DATA); + ictx->image_lock.unlock_shared(); + if (r < 0) { + return r; + } + + DiffIterate command(*ictx, from_snap_namespace, fromsnapname, off, len, + include_parent, whole_object, cb, arg); + r = command.execute(); + return r; +} + +template +int DiffIterate::execute() { + CephContext* cct = m_image_ctx.cct; + + ceph_assert(m_image_ctx.data_ctx.is_valid()); + + librados::snap_t from_snap_id = 0; + librados::snap_t end_snap_id; + uint64_t from_size = 0; + uint64_t end_size; + { + std::shared_lock image_locker{m_image_ctx.image_lock}; + if (m_from_snap_name) { + from_snap_id = m_image_ctx.get_snap_id(m_from_snap_namespace, + m_from_snap_name); + from_size = m_image_ctx.get_image_size(from_snap_id); + } + end_snap_id = m_image_ctx.snap_id; + end_size = m_image_ctx.get_image_size(end_snap_id); + } + + if (from_snap_id == CEPH_NOSNAP) { + return -ENOENT; + } + if (from_snap_id == end_snap_id) { + // no diff. + return 0; + } + if (from_snap_id >= end_snap_id) { + return -EINVAL; + } + + int r; + bool fast_diff_enabled = false; + BitVector<2> object_diff_state; + interval_set parent_diff; + if (m_whole_object) { + C_SaferCond ctx; + auto req = object_map::DiffRequest::create(&m_image_ctx, from_snap_id, + end_snap_id, + &object_diff_state, &ctx); + req->send(); + + r = ctx.wait(); + if (r < 0) { + ldout(cct, 5) << "fast diff disabled" << dendl; + } else { + ldout(cct, 5) << "fast diff enabled" << dendl; + fast_diff_enabled = true; + + // check parent overlap only if we are comparing to the beginning of time + if (m_include_parent && from_snap_id == 0) { + std::shared_lock image_locker{m_image_ctx.image_lock}; + uint64_t raw_overlap = 0; + m_image_ctx.get_parent_overlap(m_image_ctx.snap_id, &raw_overlap); + auto overlap = m_image_ctx.reduce_parent_overlap(raw_overlap, false); + if (overlap.first > 0 && overlap.second == io::ImageArea::DATA) { + ldout(cct, 10) << " first getting parent diff" << dendl; + DiffIterate diff_parent(*m_image_ctx.parent, {}, nullptr, 0, + overlap.first, true, true, &simple_diff_cb, + &parent_diff); + r = diff_parent.execute(); + if (r < 0) { + return r; + } + } + } + } + } + + ldout(cct, 5) << "diff_iterate from " << from_snap_id << " to " + << end_snap_id << " size from " << from_size + << " to " << end_size << dendl; + DiffContext diff_context(m_image_ctx, m_callback, m_callback_arg, + m_whole_object, m_include_parent, from_snap_id, + end_snap_id); + + uint64_t period = m_image_ctx.get_stripe_period(); + uint64_t off = m_offset; + uint64_t left = m_length; + + while (left > 0) { + uint64_t period_off = off - (off % period); + uint64_t read_len = std::min(period_off + period - off, left); + + if (fast_diff_enabled) { + // map to extents + std::map > object_extents; + Striper::file_to_extents(cct, m_image_ctx.format_string, + &m_image_ctx.layout, off, read_len, 0, + object_extents, 0); + + // get diff info for each object and merge adjacent stripe units + // into an aggregate (this also sorts them) + io::SparseExtents aggregate_sparse_extents; + for (auto& [object, extents] : object_extents) { + const uint64_t object_no = extents.front().objectno; + uint8_t diff_state = object_diff_state[object_no]; + ldout(cct, 20) << "object " << object << ": diff_state=" + << (int)diff_state << dendl; + + if (diff_state == object_map::DIFF_STATE_HOLE && + from_snap_id == 0 && !parent_diff.empty()) { + // no data in child object -- report parent diff instead + for (auto& oe : extents) { + for (auto& be : oe.buffer_extents) { + interval_set o; + o.insert(off + be.first, be.second); + o.intersection_of(parent_diff); + ldout(cct, 20) << " reporting parent overlap " << o << dendl; + for (auto e = o.begin(); e != o.end(); ++e) { + aggregate_sparse_extents.insert(e.get_start(), e.get_len(), + {io::SPARSE_EXTENT_STATE_DATA, + e.get_len()}); + } + } + } + } else if (diff_state == object_map::DIFF_STATE_HOLE_UPDATED || + diff_state == object_map::DIFF_STATE_DATA_UPDATED) { + auto state = (diff_state == object_map::DIFF_STATE_HOLE_UPDATED ? + io::SPARSE_EXTENT_STATE_ZEROED : io::SPARSE_EXTENT_STATE_DATA); + for (auto& oe : extents) { + for (auto& be : oe.buffer_extents) { + aggregate_sparse_extents.insert(off + be.first, be.second, + {state, be.second}); + } + } + } + } + + for (const auto& se : aggregate_sparse_extents) { + ldout(cct, 20) << "off=" << se.get_off() << ", len=" << se.get_len() + << ", state=" << se.get_val().state << dendl; + r = m_callback(se.get_off(), se.get_len(), + se.get_val().state == io::SPARSE_EXTENT_STATE_DATA, + m_callback_arg); + if (r < 0) { + return r; + } + } + } else { + auto diff_object = new C_DiffObject(m_image_ctx, diff_context, off, + read_len); + diff_object->send(); + + if (diff_context.throttle.pending_error()) { + r = diff_context.throttle.wait_for_ret(); + return r; + } + } + + left -= read_len; + off += read_len; + } + + r = diff_context.throttle.wait_for_ret(); + if (r < 0) { + return r; + } + return 0; +} + +} // namespace api +} // namespace librbd + +template class librbd::api::DiffIterate; diff --git a/src/librbd/api/DiffIterate.h b/src/librbd/api/DiffIterate.h new file mode 100644 index 000000000..e6074d9cb --- /dev/null +++ b/src/librbd/api/DiffIterate.h @@ -0,0 +1,66 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_API_DIFF_ITERATE_H +#define CEPH_LIBRBD_API_DIFF_ITERATE_H + +#include "include/int_types.h" +#include "common/bit_vector.hpp" +#include "cls/rbd/cls_rbd_types.h" + +namespace librbd { + +class ImageCtx; + +namespace api { + +template +class DiffIterate { +public: + typedef int (*Callback)(uint64_t, size_t, int, void *); + + static int diff_iterate(ImageCtxT *ictx, + const cls::rbd::SnapshotNamespace& from_snap_namespace, + const char *fromsnapname, + uint64_t off, uint64_t len, bool include_parent, + bool whole_object, + int (*cb)(uint64_t, size_t, int, void *), + void *arg); + +private: + ImageCtxT &m_image_ctx; + cls::rbd::SnapshotNamespace m_from_snap_namespace; + const char* m_from_snap_name; + uint64_t m_offset; + uint64_t m_length; + bool m_include_parent; + bool m_whole_object; + Callback m_callback; + void *m_callback_arg; + + DiffIterate(ImageCtxT &image_ctx, + const cls::rbd::SnapshotNamespace& from_snap_namespace, + const char *from_snap_name, uint64_t off, uint64_t len, + bool include_parent, bool whole_object, Callback callback, + void *callback_arg) + : m_image_ctx(image_ctx), m_from_snap_namespace(from_snap_namespace), + m_from_snap_name(from_snap_name), m_offset(off), + m_length(len), m_include_parent(include_parent), + m_whole_object(whole_object), m_callback(callback), + m_callback_arg(callback_arg) + { + } + + int execute(); + + int diff_object_map(uint64_t from_snap_id, uint64_t to_snap_id, + BitVector<2>* object_diff_state); + +}; + +} // namespace api +} // namespace librbd + +extern template class librbd::api::DiffIterate; + +#endif // CEPH_LIBRBD_API_DIFF_ITERATE_H diff --git a/src/librbd/api/Group.cc b/src/librbd/api/Group.cc new file mode 100644 index 000000000..e5f3da69c --- /dev/null +++ b/src/librbd/api/Group.cc @@ -0,0 +1,1287 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "common/Cond.h" +#include "common/errno.h" + +#include "librbd/ExclusiveLock.h" +#include "librbd/api/Group.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/ImageWatcher.h" +#include "librbd/Operations.h" +#include "librbd/Utils.h" +#include "librbd/internal.h" +#include "librbd/io/AioCompletion.h" + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::api::Group: " << __func__ << ": " + +using std::map; +using std::pair; +using std::set; +using std::string; +using std::vector; +// list binds to list() here, so std::list is explicitly used below + +using ceph::bufferlist; +using librados::snap_t; +using librados::IoCtx; +using librados::Rados; + + +namespace librbd { +namespace api { + +namespace { + +template +snap_t get_group_snap_id(I* ictx, + const cls::rbd::SnapshotNamespace& in_snap_namespace) { + ceph_assert(ceph_mutex_is_locked(ictx->image_lock)); + auto it = ictx->snap_ids.lower_bound({cls::rbd::GroupSnapshotNamespace{}, + ""}); + for (; it != ictx->snap_ids.end(); ++it) { + if (it->first.first == in_snap_namespace) { + return it->second; + } else if (!std::holds_alternative( + it->first.first)) { + break; + } + } + return CEPH_NOSNAP; +} + +string generate_uuid(librados::IoCtx& io_ctx) +{ + Rados rados(io_ctx); + uint64_t bid = rados.get_instance_id(); + + uint32_t extra = rand() % 0xFFFFFFFF; + std::ostringstream bid_ss; + bid_ss << std::hex << bid << std::hex << extra; + return bid_ss.str(); +} + +int group_snap_list(librados::IoCtx& group_ioctx, const char *group_name, + std::vector *cls_snaps) +{ + CephContext *cct = (CephContext *)group_ioctx.cct(); + + string group_id; + vector ind_snap_names; + + int r = cls_client::dir_get_id(&group_ioctx, RBD_GROUP_DIRECTORY, + group_name, &group_id); + if (r < 0) { + lderr(cct) << "error reading group id object: " + << cpp_strerror(r) + << dendl; + return r; + } + string group_header_oid = util::group_header_name(group_id); + + const int max_read = 1024; + cls::rbd::GroupSnapshot snap_last; + + for (;;) { + vector snaps_page; + + r = cls_client::group_snap_list(&group_ioctx, group_header_oid, + snap_last, max_read, &snaps_page); + + if (r < 0) { + lderr(cct) << "error reading snap list from group: " + << cpp_strerror(-r) << dendl; + return r; + } + cls_snaps->insert(cls_snaps->end(), snaps_page.begin(), snaps_page.end()); + if (snaps_page.size() < max_read) { + break; + } + snap_last = *snaps_page.rbegin(); + } + + return 0; +} + +std::string calc_ind_image_snap_name(uint64_t pool_id, + const std::string &group_id, + const std::string &snap_id) +{ + std::stringstream ind_snap_name_stream; + ind_snap_name_stream << ".group." << std::hex << pool_id << "_" + << group_id << "_" << snap_id; + return ind_snap_name_stream.str(); +} + +int group_image_list(librados::IoCtx& group_ioctx, const char *group_name, + std::vector *image_ids) +{ + CephContext *cct = (CephContext *)group_ioctx.cct(); + + string group_id; + + int r = cls_client::dir_get_id(&group_ioctx, RBD_GROUP_DIRECTORY, + group_name, &group_id); + if (r < 0) { + lderr(cct) << "error reading group id object: " + << cpp_strerror(r) + << dendl; + return r; + } + string group_header_oid = util::group_header_name(group_id); + + ldout(cct, 20) << "listing images in group name " + << group_name << " group id " << group_header_oid << dendl; + image_ids->clear(); + + const int max_read = 1024; + cls::rbd::GroupImageSpec start_last; + do { + std::vector image_ids_page; + + r = cls_client::group_image_list(&group_ioctx, group_header_oid, + start_last, max_read, &image_ids_page); + + if (r < 0) { + lderr(cct) << "error reading image list from group: " + << cpp_strerror(-r) << dendl; + return r; + } + image_ids->insert(image_ids->end(), + image_ids_page.begin(), image_ids_page.end()); + + if (image_ids_page.size() > 0) + start_last = image_ids_page.rbegin()->spec; + + r = image_ids_page.size(); + } while (r == max_read); + + return 0; +} + +int group_image_remove(librados::IoCtx& group_ioctx, string group_id, + librados::IoCtx& image_ioctx, string image_id) +{ + CephContext *cct = (CephContext *)group_ioctx.cct(); + + string group_header_oid = util::group_header_name(group_id); + + string image_header_oid = util::header_name(image_id); + + ldout(cct, 20) << "removing image " << image_id + << " image id " << image_header_oid << dendl; + + cls::rbd::GroupSpec group_spec(group_id, group_ioctx.get_id()); + + cls::rbd::GroupImageStatus incomplete_st(image_id, image_ioctx.get_id(), + cls::rbd::GROUP_IMAGE_LINK_STATE_INCOMPLETE); + + cls::rbd::GroupImageSpec spec(image_id, image_ioctx.get_id()); + + int r = cls_client::group_image_set(&group_ioctx, group_header_oid, + incomplete_st); + + if (r < 0) { + lderr(cct) << "couldn't put image into removing state: " + << cpp_strerror(-r) << dendl; + return r; + } + + r = cls_client::image_group_remove(&image_ioctx, image_header_oid, + group_spec); + if ((r < 0) && (r != -ENOENT)) { + lderr(cct) << "couldn't remove group reference from image" + << cpp_strerror(-r) << dendl; + return r; + } else if (r >= 0) { + ImageWatcher<>::notify_header_update(image_ioctx, image_header_oid); + } + + r = cls_client::group_image_remove(&group_ioctx, group_header_oid, spec); + if (r < 0) { + lderr(cct) << "couldn't remove image from group" + << cpp_strerror(-r) << dendl; + return r; + } + + return 0; +} + +int group_snap_remove_by_record(librados::IoCtx& group_ioctx, + const cls::rbd::GroupSnapshot& group_snap, + const std::string& group_id, + const std::string& group_header_oid) { + + CephContext *cct = (CephContext *)group_ioctx.cct(); + std::vector on_finishes; + int r, ret_code; + + std::vector ictxs; + + cls::rbd::GroupSnapshotNamespace ne{group_ioctx.get_id(), group_id, + group_snap.id}; + + ldout(cct, 20) << "Removing snapshots" << dendl; + int snap_count = group_snap.snaps.size(); + + for (int i = 0; i < snap_count; ++i) { + librbd::IoCtx image_io_ctx; + r = util::create_ioctx(group_ioctx, "image", group_snap.snaps[i].pool, {}, + &image_io_ctx); + if (r < 0) { + return r; + } + + librbd::ImageCtx* image_ctx = new ImageCtx("", group_snap.snaps[i].image_id, + nullptr, image_io_ctx, false); + + C_SaferCond* on_finish = new C_SaferCond; + + image_ctx->state->open(0, on_finish); + + ictxs.push_back(image_ctx); + on_finishes.push_back(on_finish); + } + + ret_code = 0; + for (int i = 0; i < snap_count; ++i) { + r = on_finishes[i]->wait(); + delete on_finishes[i]; + if (r < 0) { + ictxs[i] = nullptr; + ret_code = r; + } + } + if (ret_code != 0) { + goto finish; + } + + ldout(cct, 20) << "Opened participating images. " << + "Deleting snapshots themselves." << dendl; + + for (int i = 0; i < snap_count; ++i) { + ImageCtx *ictx = ictxs[i]; + on_finishes[i] = new C_SaferCond; + + std::string snap_name; + ictx->image_lock.lock_shared(); + snap_t snap_id = get_group_snap_id(ictx, ne); + r = ictx->get_snap_name(snap_id, &snap_name); + ictx->image_lock.unlock_shared(); + + if (r >= 0) { + ldout(cct, 20) << "removing individual snapshot from image " << ictx->name + << dendl; + ictx->operations->snap_remove(ne, snap_name, on_finishes[i]); + } else { + // We are ok to ignore missing image snapshots. The snapshot could have + // been inconsistent in the first place. + on_finishes[i]->complete(0); + } + } + + for (int i = 0; i < snap_count; ++i) { + r = on_finishes[i]->wait(); + delete on_finishes[i]; + if (r < 0 && r != -ENOENT) { + // if previous attempts to remove this snapshot failed then the image's + // snapshot may not exist + lderr(cct) << "Failed deleting image snapshot. Ret code: " << r << dendl; + ret_code = r; + } + } + + if (ret_code != 0) { + goto finish; + } + + ldout(cct, 20) << "Removed images snapshots removing snapshot record." + << dendl; + + r = cls_client::group_snap_remove(&group_ioctx, group_header_oid, + group_snap.id); + if (r < 0) { + ret_code = r; + goto finish; + } + +finish: + for (int i = 0; i < snap_count; ++i) { + if (ictxs[i] != nullptr) { + ictxs[i]->state->close(); + } + } + return ret_code; +} + +int group_snap_rollback_by_record(librados::IoCtx& group_ioctx, + const cls::rbd::GroupSnapshot& group_snap, + const std::string& group_id, + const std::string& group_header_oid, + ProgressContext& pctx) { + CephContext *cct = (CephContext *)group_ioctx.cct(); + std::vector on_finishes; + int r, ret_code; + + std::vector ictxs; + + cls::rbd::GroupSnapshotNamespace ne{group_ioctx.get_id(), group_id, + group_snap.id}; + + ldout(cct, 20) << "Rolling back snapshots" << dendl; + int snap_count = group_snap.snaps.size(); + + for (int i = 0; i < snap_count; ++i) { + librados::IoCtx image_io_ctx; + r = util::create_ioctx(group_ioctx, "image", group_snap.snaps[i].pool, {}, + &image_io_ctx); + if (r < 0) { + return r; + } + + librbd::ImageCtx* image_ctx = new ImageCtx("", group_snap.snaps[i].image_id, + nullptr, image_io_ctx, false); + + C_SaferCond* on_finish = new C_SaferCond; + + image_ctx->state->open(0, on_finish); + + ictxs.push_back(image_ctx); + on_finishes.push_back(on_finish); + } + + ret_code = 0; + for (int i = 0; i < snap_count; ++i) { + r = on_finishes[i]->wait(); + delete on_finishes[i]; + if (r < 0) { + ictxs[i] = nullptr; + ret_code = r; + } + } + if (ret_code != 0) { + goto finish; + } + + ldout(cct, 20) << "Requesting exclusive locks for images" << dendl; + for (auto ictx: ictxs) { + std::shared_lock owner_lock{ictx->owner_lock}; + if (ictx->exclusive_lock != nullptr) { + ictx->exclusive_lock->block_requests(-EBUSY); + } + } + for (int i = 0; i < snap_count; ++i) { + ImageCtx *ictx = ictxs[i]; + std::shared_lock owner_lock{ictx->owner_lock}; + + on_finishes[i] = new C_SaferCond; + if (ictx->exclusive_lock != nullptr) { + ictx->exclusive_lock->acquire_lock(on_finishes[i]); + } + } + + ret_code = 0; + for (int i = 0; i < snap_count; ++i) { + r = 0; + ImageCtx *ictx = ictxs[i]; + if (ictx->exclusive_lock != nullptr) { + r = on_finishes[i]->wait(); + } + delete on_finishes[i]; + if (r < 0) { + ret_code = r; + } + } + if (ret_code != 0) { + goto finish; + } + + for (int i = 0; i < snap_count; ++i) { + ImageCtx *ictx = ictxs[i]; + on_finishes[i] = new C_SaferCond; + + std::shared_lock owner_locker{ictx->owner_lock}; + std::string snap_name; + ictx->image_lock.lock_shared(); + snap_t snap_id = get_group_snap_id(ictx, ne); + r = ictx->get_snap_name(snap_id, &snap_name); + ictx->image_lock.unlock_shared(); + + if (r >= 0) { + ldout(cct, 20) << "rolling back to individual snapshot for image " << ictx->name + << dendl; + ictx->operations->execute_snap_rollback(ne, snap_name, pctx, on_finishes[i]); + } else { + on_finishes[i]->complete(r); + } + } + + for (int i = 0; i < snap_count; ++i) { + r = on_finishes[i]->wait(); + delete on_finishes[i]; + if (r < 0 && r != -ENOENT) { + lderr(cct) << "Failed rolling back group to snapshot. Ret code: " << r << dendl; + ret_code = r; + } + } + +finish: + for (int i = 0; i < snap_count; ++i) { + if (ictxs[i] != nullptr) { + ictxs[i]->state->close(); + } + } + return ret_code; +} + +template +void notify_unquiesce(std::vector &ictxs, + const std::vector &requests) { + if (requests.empty()) { + return; + } + + ceph_assert(requests.size() == ictxs.size()); + int image_count = ictxs.size(); + std::vector on_finishes(image_count); + + for (int i = 0; i < image_count; ++i) { + ImageCtx *ictx = ictxs[i]; + + ictx->image_watcher->notify_unquiesce(requests[i], &on_finishes[i]); + } + + for (int i = 0; i < image_count; ++i) { + on_finishes[i].wait(); + } +} + +template +int notify_quiesce(std::vector &ictxs, ProgressContext &prog_ctx, + std::vector *requests) { + int image_count = ictxs.size(); + std::vector on_finishes(image_count); + + requests->resize(image_count); + for (int i = 0; i < image_count; ++i) { + auto ictx = ictxs[i]; + + ictx->image_watcher->notify_quiesce(&(*requests)[i], prog_ctx, + &on_finishes[i]); + } + + int ret_code = 0; + for (int i = 0; i < image_count; ++i) { + int r = on_finishes[i].wait(); + if (r < 0) { + ret_code = r; + } + } + + if (ret_code != 0) { + notify_unquiesce(ictxs, *requests); + } + + return ret_code; +} + +} // anonymous namespace + +template +int Group::image_remove_by_id(librados::IoCtx& group_ioctx, + const char *group_name, + librados::IoCtx& image_ioctx, + const char *image_id) +{ + CephContext *cct = (CephContext *)group_ioctx.cct(); + ldout(cct, 20) << "io_ctx=" << &group_ioctx + << " group name " << group_name << " image " + << &image_ioctx << " id " << image_id << dendl; + + string group_id; + + int r = cls_client::dir_get_id(&group_ioctx, RBD_GROUP_DIRECTORY, group_name, + &group_id); + if (r < 0) { + lderr(cct) << "error reading group id object: " + << cpp_strerror(r) + << dendl; + return r; + } + + ldout(cct, 20) << "removing image from group name " << group_name + << " group id " << group_id << dendl; + + return group_image_remove(group_ioctx, group_id, image_ioctx, string(image_id)); +} + +template +int Group::create(librados::IoCtx& io_ctx, const char *group_name) +{ + CephContext *cct = (CephContext *)io_ctx.cct(); + + string id = generate_uuid(io_ctx); + + ldout(cct, 2) << "adding group to directory..." << dendl; + + int r = cls_client::group_dir_add(&io_ctx, RBD_GROUP_DIRECTORY, group_name, + id); + if (r < 0) { + lderr(cct) << "error adding group to directory: " + << cpp_strerror(r) + << dendl; + return r; + } + string header_oid = util::group_header_name(id); + + r = io_ctx.create(header_oid, true); + if (r < 0) { + lderr(cct) << "error creating group header: " << cpp_strerror(r) << dendl; + goto err_remove_from_dir; + } + + return 0; + +err_remove_from_dir: + int remove_r = cls_client::group_dir_remove(&io_ctx, RBD_GROUP_DIRECTORY, + group_name, id); + if (remove_r < 0) { + lderr(cct) << "error cleaning up group from rbd_directory " + << "object after creation failed: " << cpp_strerror(remove_r) + << dendl; + } + + return r; +} + +template +int Group::remove(librados::IoCtx& io_ctx, const char *group_name) +{ + CephContext *cct((CephContext *)io_ctx.cct()); + ldout(cct, 20) << "group_remove " << &io_ctx << " " << group_name << dendl; + + std::string group_id; + int r = cls_client::dir_get_id(&io_ctx, RBD_GROUP_DIRECTORY, + std::string(group_name), &group_id); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "error getting id of group" << dendl; + return r; + } + string group_header_oid = util::group_header_name(group_id); + + std::vector snaps; + r = group_snap_list(io_ctx, group_name, &snaps); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "error listing group snapshots" << dendl; + return r; + } + + for (auto &snap : snaps) { + r = group_snap_remove_by_record(io_ctx, snap, group_id, group_header_oid); + if (r < 0) { + return r; + } + } + + std::vector images; + r = group_image_list(io_ctx, group_name, &images); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "error listing group images" << dendl; + return r; + } + + for (auto image : images) { + IoCtx image_ioctx; + r = util::create_ioctx(io_ctx, "image", image.spec.pool_id, {}, + &image_ioctx); + if (r < 0) { + return r; + } + + r = group_image_remove(io_ctx, group_id, image_ioctx, image.spec.image_id); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "error removing image from a group" << dendl; + return r; + } + } + + string header_oid = util::group_header_name(group_id); + + r = io_ctx.remove(header_oid); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "error removing header: " << cpp_strerror(-r) << dendl; + return r; + } + + r = cls_client::group_dir_remove(&io_ctx, RBD_GROUP_DIRECTORY, + group_name, group_id); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "error removing group from directory" << dendl; + return r; + } + + return 0; +} + +template +int Group::list(IoCtx& io_ctx, vector *names) +{ + CephContext *cct = (CephContext *)io_ctx.cct(); + ldout(cct, 20) << "io_ctx=" << &io_ctx << dendl; + + int max_read = 1024; + string last_read = ""; + int r; + do { + map groups; + r = cls_client::group_dir_list(&io_ctx, RBD_GROUP_DIRECTORY, last_read, + max_read, &groups); + if (r < 0) { + if (r != -ENOENT) { + lderr(cct) << "error listing group in directory: " + << cpp_strerror(r) << dendl; + } else { + r = 0; + } + return r; + } + for (pair group : groups) { + names->push_back(group.first); + } + if (!groups.empty()) { + last_read = groups.rbegin()->first; + } + r = groups.size(); + } while (r == max_read); + + return 0; +} + +template +int Group::image_add(librados::IoCtx& group_ioctx, const char *group_name, + librados::IoCtx& image_ioctx, const char *image_name) +{ + CephContext *cct = (CephContext *)group_ioctx.cct(); + ldout(cct, 20) << "io_ctx=" << &group_ioctx + << " group name " << group_name << " image " + << &image_ioctx << " name " << image_name << dendl; + + if (group_ioctx.get_namespace() != image_ioctx.get_namespace()) { + lderr(cct) << "group and image cannot be in different namespaces" << dendl; + return -EINVAL; + } + + string group_id; + + int r = cls_client::dir_get_id(&group_ioctx, RBD_GROUP_DIRECTORY, group_name, + &group_id); + if (r < 0) { + lderr(cct) << "error reading group id object: " + << cpp_strerror(r) + << dendl; + return r; + } + string group_header_oid = util::group_header_name(group_id); + + + ldout(cct, 20) << "adding image to group name " << group_name + << " group id " << group_header_oid << dendl; + + string image_id; + + r = cls_client::dir_get_id(&image_ioctx, RBD_DIRECTORY, image_name, + &image_id); + if (r < 0) { + lderr(cct) << "error reading image id object: " + << cpp_strerror(-r) << dendl; + return r; + } + + string image_header_oid = util::header_name(image_id); + + ldout(cct, 20) << "adding image " << image_name + << " image id " << image_header_oid << dendl; + + cls::rbd::GroupImageStatus incomplete_st( + image_id, image_ioctx.get_id(), + cls::rbd::GROUP_IMAGE_LINK_STATE_INCOMPLETE); + cls::rbd::GroupImageStatus attached_st( + image_id, image_ioctx.get_id(), cls::rbd::GROUP_IMAGE_LINK_STATE_ATTACHED); + + r = cls_client::group_image_set(&group_ioctx, group_header_oid, + incomplete_st); + + cls::rbd::GroupSpec group_spec(group_id, group_ioctx.get_id()); + + if (r < 0) { + lderr(cct) << "error adding image reference to group: " + << cpp_strerror(-r) << dendl; + return r; + } + + r = cls_client::image_group_add(&image_ioctx, image_header_oid, group_spec); + if (r < 0) { + lderr(cct) << "error adding group reference to image: " + << cpp_strerror(-r) << dendl; + cls::rbd::GroupImageSpec spec(image_id, image_ioctx.get_id()); + cls_client::group_image_remove(&group_ioctx, group_header_oid, spec); + // Ignore errors in the clean up procedure. + return r; + } + ImageWatcher<>::notify_header_update(image_ioctx, image_header_oid); + + r = cls_client::group_image_set(&group_ioctx, group_header_oid, + attached_st); + + return r; +} + +template +int Group::image_remove(librados::IoCtx& group_ioctx, const char *group_name, + librados::IoCtx& image_ioctx, const char *image_name) +{ + CephContext *cct = (CephContext *)group_ioctx.cct(); + ldout(cct, 20) << "io_ctx=" << &group_ioctx + << " group name " << group_name << " image " + << &image_ioctx << " name " << image_name << dendl; + + if (group_ioctx.get_namespace() != image_ioctx.get_namespace()) { + lderr(cct) << "group and image cannot be in different namespaces" << dendl; + return -EINVAL; + } + + string group_id; + + int r = cls_client::dir_get_id(&group_ioctx, RBD_GROUP_DIRECTORY, group_name, + &group_id); + if (r < 0) { + lderr(cct) << "error reading group id object: " + << cpp_strerror(r) + << dendl; + return r; + } + + ldout(cct, 20) << "removing image from group name " << group_name + << " group id " << group_id << dendl; + + string image_id; + r = cls_client::dir_get_id(&image_ioctx, RBD_DIRECTORY, image_name, + &image_id); + if (r < 0) { + lderr(cct) << "error reading image id object: " + << cpp_strerror(-r) << dendl; + return r; + } + + r = group_image_remove(group_ioctx, group_id, image_ioctx, image_id); + + return r; +} + +template +int Group::image_list(librados::IoCtx& group_ioctx, + const char *group_name, + std::vector* images) +{ + CephContext *cct = (CephContext *)group_ioctx.cct(); + ldout(cct, 20) << "io_ctx=" << &group_ioctx + << " group name " << group_name << dendl; + + std::vector image_ids; + + group_image_list(group_ioctx, group_name, &image_ids); + + for (auto image_id : image_ids) { + IoCtx ioctx; + int r = util::create_ioctx(group_ioctx, "image", image_id.spec.pool_id, {}, + &ioctx); + if (r < 0) { + return r; + } + + std::string image_name; + r = cls_client::dir_get_name(&ioctx, RBD_DIRECTORY, + image_id.spec.image_id, &image_name); + if (r < 0) { + return r; + } + + images->push_back( + group_image_info_t { + image_name, + ioctx.get_id(), + static_cast(image_id.state)}); + } + + return 0; +} + +template +int Group::rename(librados::IoCtx& io_ctx, const char *src_name, + const char *dest_name) +{ + CephContext *cct((CephContext *)io_ctx.cct()); + ldout(cct, 20) << "group_rename " << &io_ctx << " " << src_name + << " -> " << dest_name << dendl; + + std::string group_id; + int r = cls_client::dir_get_id(&io_ctx, RBD_GROUP_DIRECTORY, + std::string(src_name), &group_id); + if (r < 0) { + if (r != -ENOENT) + lderr(cct) << "error getting id of group" << dendl; + return r; + } + + r = cls_client::group_dir_rename(&io_ctx, RBD_GROUP_DIRECTORY, + src_name, dest_name, group_id); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "error renaming group from directory" << dendl; + return r; + } + + return 0; +} + + +template +int Group::image_get_group(I *ictx, group_info_t *group_info) +{ + int r = ictx->state->refresh_if_required(); + if (r < 0) + return r; + + if (RBD_GROUP_INVALID_POOL != ictx->group_spec.pool_id) { + IoCtx ioctx; + r = util::create_ioctx(ictx->md_ctx, "group", ictx->group_spec.pool_id, {}, + &ioctx); + if (r < 0) { + return r; + } + + std::string group_name; + r = cls_client::dir_get_name(&ioctx, RBD_GROUP_DIRECTORY, + ictx->group_spec.group_id, &group_name); + if (r < 0) + return r; + group_info->pool = ioctx.get_id(); + group_info->name = group_name; + } else { + group_info->pool = RBD_GROUP_INVALID_POOL; + group_info->name = ""; + } + + return 0; +} + +template +int Group::snap_create(librados::IoCtx& group_ioctx, + const char *group_name, const char *snap_name, + uint32_t flags) { + CephContext *cct = (CephContext *)group_ioctx.cct(); + + string group_id; + cls::rbd::GroupSnapshot group_snap; + vector image_snaps; + std::string ind_snap_name; + + std::vector ictxs; + std::vector on_finishes; + std::vector quiesce_requests; + NoOpProgressContext prog_ctx; + uint64_t internal_flags = 0; + + int r = util::snap_create_flags_api_to_internal(cct, flags, &internal_flags); + if (r < 0) { + return r; + } + internal_flags &= ~(SNAP_CREATE_FLAG_SKIP_NOTIFY_QUIESCE | + SNAP_CREATE_FLAG_IGNORE_NOTIFY_QUIESCE_ERROR); + + r = cls_client::dir_get_id(&group_ioctx, RBD_GROUP_DIRECTORY, group_name, + &group_id); + if (r < 0) { + lderr(cct) << "error reading group id object: " + << cpp_strerror(r) + << dendl; + return r; + } + + std::vector images; + r = group_image_list(group_ioctx, group_name, &images); + if (r < 0) { + return r; + } + int image_count = images.size(); + + ldout(cct, 20) << "Found " << image_count << " images in group" << dendl; + + image_snaps = vector(image_count, + cls::rbd::ImageSnapshotSpec()); + + for (int i = 0; i < image_count; ++i) { + image_snaps[i].pool = images[i].spec.pool_id; + image_snaps[i].image_id = images[i].spec.image_id; + } + + string group_header_oid = util::group_header_name(group_id); + + group_snap.id = generate_uuid(group_ioctx); + group_snap.name = string(snap_name); + group_snap.state = cls::rbd::GROUP_SNAPSHOT_STATE_INCOMPLETE; + group_snap.snaps = image_snaps; + + cls::rbd::GroupSnapshotNamespace ne{group_ioctx.get_id(), group_id, + group_snap.id}; + + r = cls_client::group_snap_set(&group_ioctx, group_header_oid, group_snap); + if (r == -EEXIST) { + lderr(cct) << "snapshot with this name already exists: " + << cpp_strerror(r) + << dendl; + } + int ret_code = 0; + if (r < 0) { + ret_code = r; + goto finish; + } + + for (auto image: images) { + librbd::IoCtx image_io_ctx; + r = util::create_ioctx(group_ioctx, "image", image.spec.pool_id, {}, + &image_io_ctx); + if (r < 0) { + ret_code = r; + goto finish; + } + + ldout(cct, 20) << "Opening image with id " << image.spec.image_id << dendl; + + librbd::ImageCtx* image_ctx = new ImageCtx("", image.spec.image_id.c_str(), + nullptr, image_io_ctx, false); + + C_SaferCond* on_finish = new C_SaferCond; + + image_ctx->state->open(0, on_finish); + + ictxs.push_back(image_ctx); + on_finishes.push_back(on_finish); + } + ldout(cct, 20) << "Issued open request waiting for the completion" << dendl; + ret_code = 0; + for (int i = 0; i < image_count; ++i) { + + ldout(cct, 20) << "Waiting for completion on on_finish: " << + on_finishes[i] << dendl; + + r = on_finishes[i]->wait(); + delete on_finishes[i]; + if (r < 0) { + ictxs[i] = nullptr; + ret_code = r; + } + } + if (ret_code != 0) { + goto remove_record; + } + + if ((flags & RBD_SNAP_CREATE_SKIP_QUIESCE) == 0) { + ldout(cct, 20) << "Sending quiesce notification" << dendl; + ret_code = notify_quiesce(ictxs, prog_ctx, &quiesce_requests); + if (ret_code != 0 && (flags & RBD_SNAP_CREATE_IGNORE_QUIESCE_ERROR) == 0) { + goto remove_record; + } + } + + ldout(cct, 20) << "Requesting exclusive locks for images" << dendl; + + for (auto ictx: ictxs) { + std::shared_lock owner_lock{ictx->owner_lock}; + if (ictx->exclusive_lock != nullptr) { + ictx->exclusive_lock->block_requests(-EBUSY); + } + } + for (int i = 0; i < image_count; ++i) { + ImageCtx *ictx = ictxs[i]; + std::shared_lock owner_lock{ictx->owner_lock}; + + on_finishes[i] = new C_SaferCond; + if (ictx->exclusive_lock != nullptr) { + ictx->exclusive_lock->acquire_lock(on_finishes[i]); + } + } + + ret_code = 0; + for (int i = 0; i < image_count; ++i) { + r = 0; + ImageCtx *ictx = ictxs[i]; + if (ictx->exclusive_lock != nullptr) { + r = on_finishes[i]->wait(); + } + delete on_finishes[i]; + if (r < 0) { + ret_code = r; + } + } + if (ret_code != 0) { + notify_unquiesce(ictxs, quiesce_requests); + goto remove_record; + } + + ind_snap_name = calc_ind_image_snap_name(group_ioctx.get_id(), group_id, + group_snap.id); + + for (int i = 0; i < image_count; ++i) { + ImageCtx *ictx = ictxs[i]; + + C_SaferCond* on_finish = new C_SaferCond; + + std::shared_lock owner_locker{ictx->owner_lock}; + ictx->operations->execute_snap_create( + ne, ind_snap_name.c_str(), on_finish, 0, + SNAP_CREATE_FLAG_SKIP_NOTIFY_QUIESCE, prog_ctx); + + on_finishes[i] = on_finish; + } + + ret_code = 0; + for (int i = 0; i < image_count; ++i) { + r = on_finishes[i]->wait(); + delete on_finishes[i]; + if (r < 0) { + ret_code = r; + } else { + ImageCtx *ictx = ictxs[i]; + ictx->image_lock.lock_shared(); + snap_t snap_id = get_group_snap_id(ictx, ne); + ictx->image_lock.unlock_shared(); + if (snap_id == CEPH_NOSNAP) { + ldout(cct, 20) << "Couldn't find created snapshot with namespace: " + << ne << dendl; + ret_code = -ENOENT; + } else { + image_snaps[i].snap_id = snapid_t(snap_id); + image_snaps[i].pool = ictx->md_ctx.get_id(); + image_snaps[i].image_id = ictx->id; + } + } + } + if (ret_code != 0) { + goto remove_image_snaps; + } + + group_snap.snaps = image_snaps; + group_snap.state = cls::rbd::GROUP_SNAPSHOT_STATE_COMPLETE; + + r = cls_client::group_snap_set(&group_ioctx, group_header_oid, group_snap); + if (r < 0) { + ret_code = r; + goto remove_image_snaps; + } + + ldout(cct, 20) << "Sending unquiesce notification" << dendl; + notify_unquiesce(ictxs, quiesce_requests); + + goto finish; + +remove_image_snaps: + notify_unquiesce(ictxs, quiesce_requests); + + for (int i = 0; i < image_count; ++i) { + ImageCtx *ictx = ictxs[i]; + ldout(cct, 20) << "Removing individual snapshot with name: " << + ind_snap_name << dendl; + + on_finishes[i] = new C_SaferCond; + std::string snap_name; + ictx->image_lock.lock_shared(); + snap_t snap_id = get_group_snap_id(ictx, ne); + r = ictx->get_snap_name(snap_id, &snap_name); + ictx->image_lock.unlock_shared(); + if (r >= 0) { + ictx->operations->snap_remove(ne, snap_name.c_str(), on_finishes[i]); + } else { + // Ignore missing image snapshots. The whole snapshot could have been + // inconsistent. + on_finishes[i]->complete(0); + } + } + + for (int i = 0, n = on_finishes.size(); i < n; ++i) { + r = on_finishes[i]->wait(); + delete on_finishes[i]; + if (r < 0 && r != -ENOENT) { // if previous attempts to remove this snapshot failed then the image's snapshot may not exist + lderr(cct) << "Failed cleaning up image snapshot. Ret code: " << r << dendl; + // just report error, but don't abort the process + } + } + +remove_record: + r = cls_client::group_snap_remove(&group_ioctx, group_header_oid, + group_snap.id); + if (r < 0) { + lderr(cct) << "error while cleaning up group snapshot" << dendl; + // we ignore return value in clean up + } + +finish: + for (int i = 0, n = ictxs.size(); i < n; ++i) { + if (ictxs[i] != nullptr) { + ictxs[i]->state->close(); + } + } + return ret_code; +} + +template +int Group::snap_remove(librados::IoCtx& group_ioctx, const char *group_name, + const char *snap_name) +{ + CephContext *cct = (CephContext *)group_ioctx.cct(); + + string group_id; + int r = cls_client::dir_get_id(&group_ioctx, RBD_GROUP_DIRECTORY, + group_name, &group_id); + if (r < 0) { + lderr(cct) << "error reading group id object: " + << cpp_strerror(r) + << dendl; + return r; + } + + std::vector snaps; + r = group_snap_list(group_ioctx, group_name, &snaps); + if (r < 0) { + return r; + } + + cls::rbd::GroupSnapshot *group_snap = nullptr; + for (auto &snap : snaps) { + if (snap.name == string(snap_name)) { + group_snap = &snap; + break; + } + } + if (group_snap == nullptr) { + return -ENOENT; + } + + string group_header_oid = util::group_header_name(group_id); + r = group_snap_remove_by_record(group_ioctx, *group_snap, group_id, + group_header_oid); + return r; +} + +template +int Group::snap_rename(librados::IoCtx& group_ioctx, const char *group_name, + const char *old_snap_name, + const char *new_snap_name) { + CephContext *cct = (CephContext *)group_ioctx.cct(); + if (0 == strcmp(old_snap_name, new_snap_name)) + return -EEXIST; + + std::string group_id; + int r = cls_client::dir_get_id(&group_ioctx, RBD_GROUP_DIRECTORY, + group_name, &group_id); + if (r == -ENOENT) { + return r; + } else if (r < 0) { + lderr(cct) << "error reading group id object: " << cpp_strerror(r) << dendl; + return r; + } + + std::vector group_snaps; + r = group_snap_list(group_ioctx, group_name, &group_snaps); + if (r < 0) { + return r; + } + + cls::rbd::GroupSnapshot group_snap; + for (auto &snap : group_snaps) { + if (snap.name == old_snap_name) { + group_snap = snap; + break; + } + } + + if (group_snap.id.empty()) { + return -ENOENT; + } + + std::string group_header_oid = util::group_header_name(group_id); + group_snap.name = new_snap_name; + r = cls_client::group_snap_set(&group_ioctx, group_header_oid, group_snap); + if (r < 0) { + return r; + } + + return 0; +} + +template +int Group::snap_list(librados::IoCtx& group_ioctx, const char *group_name, + std::vector *snaps) +{ + std::vector cls_snaps; + + int r = group_snap_list(group_ioctx, group_name, &cls_snaps); + if (r < 0) { + return r; + } + + for (auto snap : cls_snaps) { + snaps->push_back( + group_snap_info_t { + snap.name, + static_cast(snap.state)}); + + } + return 0; +} + +template +int Group::snap_rollback(librados::IoCtx& group_ioctx, + const char *group_name, const char *snap_name, + ProgressContext& pctx) +{ + CephContext *cct = (CephContext *)group_ioctx.cct(); + + string group_id; + int r = cls_client::dir_get_id(&group_ioctx, RBD_GROUP_DIRECTORY, + group_name, &group_id); + if (r < 0) { + lderr(cct) << "error reading group id object: " + << cpp_strerror(r) << dendl; + return r; + } + + std::vector snaps; + r = group_snap_list(group_ioctx, group_name, &snaps); + if (r < 0) { + return r; + } + + cls::rbd::GroupSnapshot *group_snap = nullptr; + for (auto &snap : snaps) { + if (snap.name == string(snap_name)) { + group_snap = &snap; + break; + } + } + if (group_snap == nullptr) { + return -ENOENT; + } + + string group_header_oid = util::group_header_name(group_id); + r = group_snap_rollback_by_record(group_ioctx, *group_snap, group_id, + group_header_oid, pctx); + return r; +} + +} // namespace api +} // namespace librbd + +template class librbd::api::Group; diff --git a/src/librbd/api/Group.h b/src/librbd/api/Group.h new file mode 100644 index 000000000..9d3abcc59 --- /dev/null +++ b/src/librbd/api/Group.h @@ -0,0 +1,60 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_API_GROUP_H +#define CEPH_LIBRBD_API_GROUP_H + +#include "include/rbd/librbd.hpp" +#include "include/rados/librados_fwd.hpp" +#include +#include + +namespace librbd { + +struct ImageCtx; + +namespace api { + +template +struct Group { + + static int create(librados::IoCtx& io_ctx, const char *group_name); + static int remove(librados::IoCtx& io_ctx, const char *group_name); + static int list(librados::IoCtx& io_ctx, std::vector *names); + static int rename(librados::IoCtx& io_ctx, const char *src_group_name, + const char *dest_group_name); + + static int image_add(librados::IoCtx& group_ioctx, const char *group_name, + librados::IoCtx& image_ioctx, const char *image_name); + static int image_remove(librados::IoCtx& group_ioctx, const char *group_name, + librados::IoCtx& image_ioctx, const char *image_name); + static int image_remove_by_id(librados::IoCtx& group_ioctx, + const char *group_name, + librados::IoCtx& image_ioctx, + const char *image_id); + static int image_list(librados::IoCtx& group_ioctx, const char *group_name, + std::vector *images); + + static int image_get_group(ImageCtxT *ictx, group_info_t *group_info); + + static int snap_create(librados::IoCtx& group_ioctx, + const char *group_name, const char *snap_name, + uint32_t flags); + static int snap_remove(librados::IoCtx& group_ioctx, + const char *group_name, const char *snap_name); + static int snap_rename(librados::IoCtx& group_ioctx, const char *group_name, + const char *old_snap_name, const char *new_snap_name); + static int snap_list(librados::IoCtx& group_ioctx, const char *group_name, + std::vector *snaps); + static int snap_rollback(librados::IoCtx& group_ioctx, + const char *group_name, const char *snap_name, + ProgressContext& pctx); + +}; + +} // namespace api +} // namespace librbd + +extern template class librbd::api::Group; + +#endif // CEPH_LIBRBD_API_GROUP_H diff --git a/src/librbd/api/Image.cc b/src/librbd/api/Image.cc new file mode 100644 index 000000000..19dc5aa68 --- /dev/null +++ b/src/librbd/api/Image.cc @@ -0,0 +1,1015 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/api/Image.h" +#include "include/rados/librados.hpp" +#include "common/dout.h" +#include "common/errno.h" +#include "common/Cond.h" +#include "cls/rbd/cls_rbd_client.h" +#include "librbd/AsioEngine.h" +#include "librbd/DeepCopyRequest.h" +#include "librbd/ExclusiveLock.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/internal.h" +#include "librbd/Operations.h" +#include "librbd/Utils.h" +#include "librbd/api/Config.h" +#include "librbd/api/Trash.h" +#include "librbd/api/Utils.h" +#include "librbd/crypto/FormatRequest.h" +#include "librbd/crypto/LoadRequest.h" +#include "librbd/deep_copy/Handler.h" +#include "librbd/image/CloneRequest.h" +#include "librbd/image/RemoveRequest.h" +#include "librbd/image/PreRemoveRequest.h" +#include "librbd/io/ImageDispatcherInterface.h" +#include "librbd/io/ObjectDispatcherInterface.h" +#include "librbd/io/AioCompletion.h" +#include "librbd/io/ImageDispatchSpec.h" +#include + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::api::Image: " << __func__ << ": " + +using std::map; +using std::string; +using librados::snap_t; + +namespace librbd { +namespace api { + +namespace { + +bool compare_by_pool(const librbd::linked_image_spec_t& lhs, + const librbd::linked_image_spec_t& rhs) +{ + if (lhs.pool_id != rhs.pool_id) { + return lhs.pool_id < rhs.pool_id; + } else if (lhs.pool_namespace != rhs.pool_namespace) { + return lhs.pool_namespace < rhs.pool_namespace; + } + return false; +} + +bool compare(const librbd::linked_image_spec_t& lhs, + const librbd::linked_image_spec_t& rhs) +{ + if (lhs.pool_name != rhs.pool_name) { + return lhs.pool_name < rhs.pool_name; + } else if (lhs.pool_id != rhs.pool_id) { + return lhs.pool_id < rhs.pool_id; + } else if (lhs.pool_namespace != rhs.pool_namespace) { + return lhs.pool_namespace < rhs.pool_namespace; + } else if (lhs.image_name != rhs.image_name) { + return lhs.image_name < rhs.image_name; + } else if (lhs.image_id != rhs.image_id) { + return lhs.image_id < rhs.image_id; + } + return false; +} + +template +int pre_remove_image(librados::IoCtx& io_ctx, const std::string& image_id) { + I *image_ctx = I::create("", image_id, nullptr, io_ctx, false); + int r = image_ctx->state->open(OPEN_FLAG_SKIP_OPEN_PARENT); + if (r < 0) { + return r; + } + + C_SaferCond ctx; + auto req = image::PreRemoveRequest::create(image_ctx, false, &ctx); + req->send(); + + r = ctx.wait(); + image_ctx->state->close(); + return r; +} + +} // anonymous namespace + +template +int64_t Image::get_data_pool_id(I *ictx) { + if (ictx->data_ctx.is_valid()) { + return ictx->data_ctx.get_id(); + } + + int64_t pool_id; + int r = cls_client::get_data_pool(&ictx->md_ctx, ictx->header_oid, &pool_id); + if (r < 0) { + CephContext *cct = ictx->cct; + lderr(cct) << "error getting data pool ID: " << cpp_strerror(r) << dendl; + return r; + } + + return pool_id; +} + +template +int Image::get_op_features(I *ictx, uint64_t *op_features) { + CephContext *cct = ictx->cct; + ldout(cct, 20) << "image_ctx=" << ictx << dendl; + + int r = ictx->state->refresh_if_required(); + if (r < 0) { + return r; + } + + std::shared_lock image_locker{ictx->image_lock}; + *op_features = ictx->op_features; + return 0; +} + +template +int Image::list_images(librados::IoCtx& io_ctx, + std::vector *images) { + CephContext *cct = (CephContext *)io_ctx.cct(); + ldout(cct, 20) << "list " << &io_ctx << dendl; + + int r; + images->clear(); + + if (io_ctx.get_namespace().empty()) { + bufferlist bl; + r = io_ctx.read(RBD_DIRECTORY, bl, 0, 0); + if (r == -ENOENT) { + return 0; + } else if (r < 0) { + lderr(cct) << "error listing v1 images: " << cpp_strerror(r) << dendl; + return r; + } + + // V1 format images are in a tmap + if (bl.length()) { + auto p = bl.cbegin(); + bufferlist header; + std::map m; + decode(header, p); + decode(m, p); + for (auto& it : m) { + images->push_back({.id ="", .name = it.first}); + } + } + } + + // V2 format images + std::map image_names_to_ids; + r = list_images_v2(io_ctx, &image_names_to_ids); + if (r < 0) { + lderr(cct) << "error listing v2 images: " << cpp_strerror(r) << dendl; + return r; + } + + for (const auto& img_pair : image_names_to_ids) { + images->push_back({.id = img_pair.second, + .name = img_pair.first}); + } + + // include V2 images in a partially removed state + std::vector trash_images; + r = Trash::list(io_ctx, trash_images, false); + if (r < 0 && r != -EOPNOTSUPP) { + lderr(cct) << "error listing trash images: " << cpp_strerror(r) << dendl; + return r; + } + + for (const auto& trash_image : trash_images) { + if (trash_image.source == RBD_TRASH_IMAGE_SOURCE_REMOVING) { + images->push_back({.id = trash_image.id, + .name = trash_image.name}); + + } + } + + return 0; +} + +template +int Image::list_images_v2(librados::IoCtx& io_ctx, ImageNameToIds *images) { + CephContext *cct = (CephContext *)io_ctx.cct(); + ldout(cct, 20) << "io_ctx=" << &io_ctx << dendl; + + // new format images are accessed by class methods + int r; + int max_read = 1024; + string last_read = ""; + do { + map images_page; + r = cls_client::dir_list(&io_ctx, RBD_DIRECTORY, last_read, max_read, + &images_page); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "error listing image in directory: " + << cpp_strerror(r) << dendl; + return r; + } else if (r == -ENOENT) { + break; + } + for (map::const_iterator it = images_page.begin(); + it != images_page.end(); ++it) { + images->insert(*it); + } + if (!images_page.empty()) { + last_read = images_page.rbegin()->first; + } + r = images_page.size(); + } while (r == max_read); + + return 0; +} + +template +int Image::get_parent(I *ictx, + librbd::linked_image_spec_t *parent_image, + librbd::snap_spec_t *parent_snap) { + auto cct = ictx->cct; + ldout(cct, 20) << "image_ctx=" << ictx << dendl; + + int r = ictx->state->refresh_if_required(); + if (r < 0) { + return r; + } + + std::shared_lock image_locker{ictx->image_lock}; + + bool release_image_lock = false; + BOOST_SCOPE_EXIT_ALL(ictx, &release_image_lock) { + if (release_image_lock) { + ictx->parent->image_lock.unlock_shared(); + } + }; + + // if a migration is in-progress, the true parent is the parent + // of the migration source image + auto parent = ictx->parent; + if (!ictx->migration_info.empty() && ictx->parent != nullptr) { + release_image_lock = true; + ictx->parent->image_lock.lock_shared(); + + parent = ictx->parent->parent; + } + + if (parent == nullptr) { + return -ENOENT; + } + + parent_image->pool_id = parent->md_ctx.get_id(); + parent_image->pool_name = parent->md_ctx.get_pool_name(); + parent_image->pool_namespace = parent->md_ctx.get_namespace(); + + std::shared_lock parent_image_locker{parent->image_lock}; + parent_snap->id = parent->snap_id; + parent_snap->namespace_type = RBD_SNAP_NAMESPACE_TYPE_USER; + if (parent->snap_id != CEPH_NOSNAP) { + auto snap_info = parent->get_snap_info(parent->snap_id); + if (snap_info == nullptr) { + lderr(cct) << "error finding parent snap name: " << cpp_strerror(r) + << dendl; + return -ENOENT; + } + + parent_snap->namespace_type = static_cast( + cls::rbd::get_snap_namespace_type(snap_info->snap_namespace)); + parent_snap->name = snap_info->name; + } + + parent_image->image_id = parent->id; + parent_image->image_name = parent->name; + parent_image->trash = true; + + librbd::trash_image_info_t trash_info; + r = Trash::get(parent->md_ctx, parent->id, &trash_info); + if (r == -ENOENT || r == -EOPNOTSUPP) { + parent_image->trash = false; + } else if (r < 0) { + lderr(cct) << "error looking up trash status: " << cpp_strerror(r) + << dendl; + return r; + } + + return 0; +} + +template +int Image::list_children(I *ictx, + std::vector *images) { + images->clear(); + return list_descendants(ictx, 1, images); +} + +template +int Image::list_children(I *ictx, + const cls::rbd::ParentImageSpec &parent_spec, + std::vector *images) { + images->clear(); + return list_descendants(ictx, parent_spec, 1, images); +} + +template +int Image::list_descendants( + librados::IoCtx& io_ctx, const std::string &image_id, + const std::optional &max_level, + std::vector *images) { + ImageCtx *ictx = new librbd::ImageCtx("", image_id, nullptr, + io_ctx, true); + CephContext *cct = ictx->cct; + int r = ictx->state->open(OPEN_FLAG_SKIP_OPEN_PARENT); + if (r < 0) { + if (r == -ENOENT) { + return 0; + } + lderr(cct) << "failed to open descendant " << image_id + << " from pool " << io_ctx.get_pool_name() << ":" + << cpp_strerror(r) << dendl; + return r; + } + + r = list_descendants(ictx, max_level, images); + + int r1 = ictx->state->close(); + if (r1 < 0) { + lderr(cct) << "error when closing descendant " << image_id + << " from pool " << io_ctx.get_pool_name() << ":" + << cpp_strerror(r1) << dendl; + } + + return r; +} + +template +int Image::list_descendants( + I *ictx, const std::optional &max_level, + std::vector *images) { + std::shared_lock l{ictx->image_lock}; + std::vector snap_ids; + if (ictx->snap_id != CEPH_NOSNAP) { + snap_ids.push_back(ictx->snap_id); + } else { + snap_ids = ictx->snaps; + } + for (auto snap_id : snap_ids) { + cls::rbd::ParentImageSpec parent_spec{ictx->md_ctx.get_id(), + ictx->md_ctx.get_namespace(), + ictx->id, snap_id}; + int r = list_descendants(ictx, parent_spec, max_level, images); + if (r < 0) { + return r; + } + } + return 0; +} + +template +int Image::list_descendants( + I *ictx, const cls::rbd::ParentImageSpec &parent_spec, + const std::optional &max_level, + std::vector *images) { + auto child_max_level = max_level; + if (child_max_level) { + if (child_max_level == 0) { + return 0; + } + (*child_max_level)--; + } + CephContext *cct = ictx->cct; + ldout(cct, 20) << "ictx=" << ictx << dendl; + + // no children for non-layered or old format image + if (!ictx->test_features(RBD_FEATURE_LAYERING, ictx->image_lock)) { + return 0; + } + + librados::Rados rados(ictx->md_ctx); + + // search all pools for clone v1 children dependent on this snapshot + std::list > pools; + int r = rados.pool_list2(pools); + if (r < 0) { + lderr(cct) << "error listing pools: " << cpp_strerror(r) << dendl; + return r; + } + + for (auto& it : pools) { + int64_t base_tier; + r = rados.pool_get_base_tier(it.first, &base_tier); + if (r == -ENOENT) { + ldout(cct, 1) << "pool " << it.second << " no longer exists" << dendl; + continue; + } else if (r < 0) { + lderr(cct) << "error retrieving base tier for pool " << it.second + << dendl; + return r; + } + if (it.first != base_tier) { + // pool is a cache; skip it + continue; + } + + IoCtx ioctx; + r = librbd::util::create_ioctx( + ictx->md_ctx, "child image", it.first, {}, &ioctx); + if (r == -ENOENT) { + continue; + } else if (r < 0) { + return r; + } + + std::set image_ids; + r = cls_client::get_children(&ioctx, RBD_CHILDREN, parent_spec, + image_ids); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "error reading list of children from pool " << it.second + << dendl; + return r; + } + + for (auto& image_id : image_ids) { + images->push_back({ + it.first, "", ictx->md_ctx.get_namespace(), image_id, "", false}); + r = list_descendants(ioctx, image_id, child_max_level, images); + if (r < 0) { + return r; + } + } + } + + // retrieve clone v2 children attached to this snapshot + IoCtx parent_io_ctx; + r = librbd::util::create_ioctx( + ictx->md_ctx, "parent image",parent_spec.pool_id, + parent_spec.pool_namespace, &parent_io_ctx); + if (r < 0) { + return r; + } + + cls::rbd::ChildImageSpecs child_images; + r = cls_client::children_list( + &parent_io_ctx, librbd::util::header_name(parent_spec.image_id), + parent_spec.snap_id, &child_images); + if (r < 0 && r != -ENOENT && r != -EOPNOTSUPP) { + lderr(cct) << "error retrieving children: " << cpp_strerror(r) << dendl; + return r; + } + + for (auto& child_image : child_images) { + images->push_back({ + child_image.pool_id, "", child_image.pool_namespace, + child_image.image_id, "", false}); + if (!child_max_level || *child_max_level > 0) { + IoCtx ioctx; + r = librbd::util::create_ioctx( + ictx->md_ctx, "child image", child_image.pool_id, + child_image.pool_namespace, &ioctx); + if (r == -ENOENT) { + continue; + } else if (r < 0) { + return r; + } + r = list_descendants(ioctx, child_image.image_id, child_max_level, + images); + if (r < 0) { + return r; + } + } + } + + // batch lookups by pool + namespace + std::sort(images->begin(), images->end(), compare_by_pool); + + int64_t child_pool_id = -1; + librados::IoCtx child_io_ctx; + std::map> child_image_id_to_info; + for (auto& image : *images) { + if (child_pool_id == -1 || child_pool_id != image.pool_id || + child_io_ctx.get_namespace() != image.pool_namespace) { + r = librbd::util::create_ioctx( + ictx->md_ctx, "child image", image.pool_id, image.pool_namespace, + &child_io_ctx); + if (r == -ENOENT) { + image.pool_name = ""; + image.image_name = ""; + continue; + } else if (r < 0) { + return r; + } + child_pool_id = image.pool_id; + + child_image_id_to_info.clear(); + + std::map image_names_to_ids; + r = list_images_v2(child_io_ctx, &image_names_to_ids); + if (r < 0) { + lderr(cct) << "error listing v2 images: " << cpp_strerror(r) << dendl; + return r; + } + + for (auto& [name, id] : image_names_to_ids) { + child_image_id_to_info.insert({id, {name, false}}); + } + + std::vector trash_images; + r = Trash::list(child_io_ctx, trash_images, false); + if (r < 0 && r != -EOPNOTSUPP) { + lderr(cct) << "error listing trash images: " << cpp_strerror(r) + << dendl; + return r; + } + + for (auto& it : trash_images) { + child_image_id_to_info.insert({ + it.id, + {it.name, + it.source == RBD_TRASH_IMAGE_SOURCE_REMOVING ? false : true}}); + } + } + + auto it = child_image_id_to_info.find(image.image_id); + if (it == child_image_id_to_info.end()) { + lderr(cct) << "error looking up name for image id " + << image.image_id << " in pool " + << child_io_ctx.get_pool_name() + << (image.pool_namespace.empty() ? + "" : "/" + image.pool_namespace) << dendl; + return -ENOENT; + } + + image.pool_name = child_io_ctx.get_pool_name(); + image.image_name = it->second.first; + image.trash = it->second.second; + } + + // final sort by pool + image names + std::sort(images->begin(), images->end(), compare); + return 0; +} + +template +int Image::deep_copy(I *src, librados::IoCtx& dest_md_ctx, + const char *destname, ImageOptions& opts, + ProgressContext &prog_ctx) { + CephContext *cct = (CephContext *)dest_md_ctx.cct(); + ldout(cct, 20) << src->name + << (src->snap_name.length() ? "@" + src->snap_name : "") + << " -> " << destname << " opts = " << opts << dendl; + + uint64_t features; + uint64_t src_size; + { + std::shared_lock image_locker{src->image_lock}; + + if (!src->migration_info.empty()) { + lderr(cct) << "cannot deep copy migrating image" << dendl; + return -EBUSY; + } + + features = src->features; + src_size = src->get_image_size(src->snap_id); + } + uint64_t format = 2; + if (opts.get(RBD_IMAGE_OPTION_FORMAT, &format) != 0) { + opts.set(RBD_IMAGE_OPTION_FORMAT, format); + } + if (format == 1) { + lderr(cct) << "old format not supported for destination image" << dendl; + return -EINVAL; + } + uint64_t stripe_unit = src->stripe_unit; + if (opts.get(RBD_IMAGE_OPTION_STRIPE_UNIT, &stripe_unit) != 0) { + opts.set(RBD_IMAGE_OPTION_STRIPE_UNIT, stripe_unit); + } + uint64_t stripe_count = src->stripe_count; + if (opts.get(RBD_IMAGE_OPTION_STRIPE_COUNT, &stripe_count) != 0) { + opts.set(RBD_IMAGE_OPTION_STRIPE_COUNT, stripe_count); + } + uint64_t order = src->order; + if (opts.get(RBD_IMAGE_OPTION_ORDER, &order) != 0) { + opts.set(RBD_IMAGE_OPTION_ORDER, order); + } + if (opts.get(RBD_IMAGE_OPTION_FEATURES, &features) != 0) { + opts.set(RBD_IMAGE_OPTION_FEATURES, features); + } + if (features & ~RBD_FEATURES_ALL) { + lderr(cct) << "librbd does not support requested features" << dendl; + return -ENOSYS; + } + + uint64_t flatten = 0; + if (opts.get(RBD_IMAGE_OPTION_FLATTEN, &flatten) == 0) { + opts.unset(RBD_IMAGE_OPTION_FLATTEN); + } + + cls::rbd::ParentImageSpec parent_spec; + if (flatten > 0) { + parent_spec.pool_id = -1; + } else { + std::shared_lock image_locker{src->image_lock}; + + // use oldest snapshot or HEAD for parent spec + if (!src->snap_info.empty()) { + parent_spec = src->snap_info.begin()->second.parent.spec; + } else { + parent_spec = src->parent_md.spec; + } + } + + int r; + if (parent_spec.pool_id == -1) { + r = create(dest_md_ctx, destname, "", src_size, opts, "", "", false); + } else { + librados::IoCtx parent_io_ctx; + r = librbd::util::create_ioctx( + src->md_ctx, "parent image", parent_spec.pool_id, + parent_spec.pool_namespace, &parent_io_ctx); + if (r < 0) { + return r; + } + + ConfigProxy config{cct->_conf}; + api::Config::apply_pool_overrides(dest_md_ctx, &config); + + C_SaferCond ctx; + std::string dest_id = librbd::util::generate_image_id(dest_md_ctx); + auto *req = image::CloneRequest::create( + config, parent_io_ctx, parent_spec.image_id, "", {}, parent_spec.snap_id, + dest_md_ctx, destname, dest_id, opts, cls::rbd::MIRROR_IMAGE_MODE_JOURNAL, + "", "", src->op_work_queue, &ctx); + req->send(); + r = ctx.wait(); + } + if (r < 0) { + lderr(cct) << "header creation failed" << dendl; + return r; + } + opts.set(RBD_IMAGE_OPTION_ORDER, static_cast(order)); + + auto dest = new I(destname, "", nullptr, dest_md_ctx, false); + r = dest->state->open(0); + if (r < 0) { + lderr(cct) << "failed to read newly created header" << dendl; + return r; + } + + C_SaferCond lock_ctx; + { + std::unique_lock locker{dest->owner_lock}; + + if (dest->exclusive_lock == nullptr || + dest->exclusive_lock->is_lock_owner()) { + lock_ctx.complete(0); + } else { + dest->exclusive_lock->acquire_lock(&lock_ctx); + } + } + + r = lock_ctx.wait(); + if (r < 0) { + lderr(cct) << "failed to request exclusive lock: " << cpp_strerror(r) + << dendl; + dest->state->close(); + return r; + } + + r = deep_copy(src, dest, flatten > 0, prog_ctx); + + int close_r = dest->state->close(); + if (r == 0 && close_r < 0) { + r = close_r; + } + return r; +} + +template +int Image::deep_copy(I *src, I *dest, bool flatten, + ProgressContext &prog_ctx) { + // ensure previous writes are visible to dest + C_SaferCond flush_ctx; + { + std::shared_lock owner_locker{src->owner_lock}; + auto aio_comp = io::AioCompletion::create_and_start(&flush_ctx, src, + io::AIO_TYPE_FLUSH); + auto req = io::ImageDispatchSpec::create_flush( + *src, io::IMAGE_DISPATCH_LAYER_INTERNAL_START, + aio_comp, io::FLUSH_SOURCE_INTERNAL, {}); + req->send(); + } + int r = flush_ctx.wait(); + if (r < 0) { + return r; + } + + librados::snap_t snap_id_start = 0; + librados::snap_t snap_id_end; + { + std::shared_lock image_locker{src->image_lock}; + snap_id_end = src->snap_id; + } + + AsioEngine asio_engine(src->md_ctx); + + C_SaferCond cond; + SnapSeqs snap_seqs; + deep_copy::ProgressHandler progress_handler{&prog_ctx}; + auto req = DeepCopyRequest::create( + src, dest, snap_id_start, snap_id_end, 0U, flatten, boost::none, + asio_engine.get_work_queue(), &snap_seqs, &progress_handler, &cond); + req->send(); + r = cond.wait(); + if (r < 0) { + return r; + } + + return 0; +} + +template +int Image::snap_set(I *ictx, + const cls::rbd::SnapshotNamespace &snap_namespace, + const char *snap_name) { + ldout(ictx->cct, 20) << "snap_set " << ictx << " snap = " + << (snap_name ? snap_name : "NULL") << dendl; + + // ignore return value, since we may be set to a non-existent + // snapshot and the user is trying to fix that + ictx->state->refresh_if_required(); + + uint64_t snap_id = CEPH_NOSNAP; + std::string name(snap_name == nullptr ? "" : snap_name); + if (!name.empty()) { + std::shared_lock image_locker{ictx->image_lock}; + snap_id = ictx->get_snap_id(snap_namespace, snap_name); + if (snap_id == CEPH_NOSNAP) { + return -ENOENT; + } + } + + return snap_set(ictx, snap_id); +} + +template +int Image::snap_set(I *ictx, uint64_t snap_id) { + ldout(ictx->cct, 20) << "snap_set " << ictx << " " + << "snap_id=" << snap_id << dendl; + + // ignore return value, since we may be set to a non-existent + // snapshot and the user is trying to fix that + ictx->state->refresh_if_required(); + + C_SaferCond ctx; + ictx->state->snap_set(snap_id, &ctx); + int r = ctx.wait(); + if (r < 0) { + if (r != -ENOENT) { + lderr(ictx->cct) << "failed to " << (snap_id == CEPH_NOSNAP ? "un" : "") + << "set snapshot: " << cpp_strerror(r) << dendl; + } + return r; + } + + return 0; +} + +template +int Image::remove(IoCtx& io_ctx, const std::string &image_name, + ProgressContext& prog_ctx) +{ + CephContext *cct((CephContext *)io_ctx.cct()); + ldout(cct, 20) << "name=" << image_name << dendl; + + // look up the V2 image id based on the image name + std::string image_id; + int r = cls_client::dir_get_id(&io_ctx, RBD_DIRECTORY, image_name, + &image_id); + if (r == -ENOENT) { + // check if it already exists in trash from an aborted trash remove attempt + std::vector trash_entries; + r = Trash::list(io_ctx, trash_entries, false); + if (r < 0) { + return r; + } + for (auto& entry : trash_entries) { + if (entry.name == image_name && + entry.source == RBD_TRASH_IMAGE_SOURCE_REMOVING) { + cls::rbd::TrashImageSpec spec; + r = cls_client::trash_get(&io_ctx, entry.id, &spec); + if (r < 0) { + lderr(cct) << "error getting image id " << entry.id + << " info from trash: " << cpp_strerror(r) << dendl; + return r; + } + if (spec.state == cls::rbd::TRASH_IMAGE_STATE_MOVING) { + r = Trash::move(io_ctx, entry.source, entry.name, entry.id, 0); + if (r < 0) { + return r; + } + } + return Trash::remove(io_ctx, entry.id, true, prog_ctx); + } + } + + // fall-through if we failed to locate the image in the V2 directory and + // trash + } else if (r < 0) { + lderr(cct) << "failed to retrieve image id: " << cpp_strerror(r) << dendl; + return r; + } else { + // attempt to move the image to the trash (and optionally immediately + // delete the image) + ConfigProxy config(cct->_conf); + Config::apply_pool_overrides(io_ctx, &config); + + rbd_trash_image_source_t trash_image_source = + RBD_TRASH_IMAGE_SOURCE_REMOVING; + uint64_t expire_seconds = 0; + if (config.get_val("rbd_move_to_trash_on_remove")) { + // keep the image in the trash upon remove requests + trash_image_source = RBD_TRASH_IMAGE_SOURCE_USER; + expire_seconds = config.get_val( + "rbd_move_to_trash_on_remove_expire_seconds"); + } else { + // attempt to pre-validate the removal before moving to trash and + // removing + r = pre_remove_image(io_ctx, image_id); + if (r == -ECHILD) { + if (config.get_val("rbd_move_parent_to_trash_on_remove")) { + // keep the image in the trash until the last child is removed + trash_image_source = RBD_TRASH_IMAGE_SOURCE_USER_PARENT; + } else { + lderr(cct) << "image has snapshots - not removing" << dendl; + return -ENOTEMPTY; + } + } else if (r < 0 && r != -ENOENT) { + return r; + } + } + + r = Trash::move(io_ctx, trash_image_source, image_name, image_id, + expire_seconds); + if (r >= 0) { + if (trash_image_source == RBD_TRASH_IMAGE_SOURCE_REMOVING) { + // proceed with attempting to immediately remove the image + r = Trash::remove(io_ctx, image_id, true, prog_ctx); + + if (r == -ENOTEMPTY || r == -EBUSY || r == -EMLINK) { + // best-effort try to restore the image if the removal + // failed for possible expected reasons + Trash::restore(io_ctx, {cls::rbd::TRASH_IMAGE_SOURCE_REMOVING}, + image_id, image_name); + } + } + return r; + } else if (r < 0 && r != -EOPNOTSUPP) { + return r; + } + + // fall-through if trash isn't supported + } + + AsioEngine asio_engine(io_ctx); + + // might be a V1 image format that cannot be moved to the trash + // and would not have been listed in the V2 directory -- or the OSDs + // are too old and don't support the trash feature + C_SaferCond cond; + auto req = librbd::image::RemoveRequest::create( + io_ctx, image_name, "", false, false, prog_ctx, + asio_engine.get_work_queue(), &cond); + req->send(); + + return cond.wait(); +} + +template +int Image::flatten_children(I *ictx, const char* snap_name, + ProgressContext& pctx) { + CephContext *cct = ictx->cct; + ldout(cct, 20) << "children flatten " << ictx->name << dendl; + + int r = ictx->state->refresh_if_required(); + if (r < 0) { + return r; + } + + std::shared_lock l{ictx->image_lock}; + snap_t snap_id = ictx->get_snap_id(cls::rbd::UserSnapshotNamespace(), + snap_name); + + cls::rbd::ParentImageSpec parent_spec{ictx->md_ctx.get_id(), + ictx->md_ctx.get_namespace(), + ictx->id, snap_id}; + std::vector child_images; + r = list_children(ictx, parent_spec, &child_images); + if (r < 0) { + return r; + } + + size_t size = child_images.size(); + if (size == 0) { + return 0; + } + + librados::IoCtx child_io_ctx; + int64_t child_pool_id = -1; + size_t i = 0; + for (auto &child_image : child_images){ + std::string pool = child_image.pool_name; + if (child_pool_id == -1 || + child_pool_id != child_image.pool_id || + child_io_ctx.get_namespace() != child_image.pool_namespace) { + r = librbd::util::create_ioctx( + ictx->md_ctx, "child image", child_image.pool_id, + child_image.pool_namespace, &child_io_ctx); + if (r < 0) { + return r; + } + + child_pool_id = child_image.pool_id; + } + + ImageCtx *imctx = new ImageCtx("", child_image.image_id, nullptr, + child_io_ctx, false); + r = imctx->state->open(0); + if (r < 0) { + lderr(cct) << "error opening image: " << cpp_strerror(r) << dendl; + return r; + } + + if ((imctx->features & RBD_FEATURE_DEEP_FLATTEN) == 0 && + !imctx->snaps.empty()) { + lderr(cct) << "snapshot in-use by " << pool << "/" << imctx->name + << dendl; + imctx->state->close(); + return -EBUSY; + } + + librbd::NoOpProgressContext prog_ctx; + r = imctx->operations->flatten(prog_ctx); + if (r < 0) { + lderr(cct) << "error flattening image: " << pool << "/" + << (child_image.pool_namespace.empty() ? + "" : "/" + child_image.pool_namespace) + << child_image.image_name << cpp_strerror(r) << dendl; + imctx->state->close(); + return r; + } + + r = imctx->state->close(); + if (r < 0) { + lderr(cct) << "failed to close image: " << cpp_strerror(r) << dendl; + return r; + } + + pctx.update_progress(++i, size); + ceph_assert(i <= size); + } + + return 0; +} + +template +int Image::encryption_format(I* ictx, encryption_format_t format, + encryption_options_t opts, size_t opts_size, + bool c_api) { + crypto::EncryptionFormat* result_format; + auto r = util::create_encryption_format( + ictx->cct, format, opts, opts_size, c_api, &result_format); + if (r != 0) { + return r; + } + + C_SaferCond cond; + auto req = librbd::crypto::FormatRequest::create( + ictx, std::unique_ptr>(result_format), + &cond); + req->send(); + return cond.wait(); +} + +template +int Image::encryption_load(I* ictx, const encryption_spec_t *specs, + size_t spec_count, bool c_api) { + std::vector>> formats; + + for (size_t i = 0; i < spec_count; ++i) { + crypto::EncryptionFormat* result_format; + auto r = util::create_encryption_format( + ictx->cct, specs[i].format, specs[i].opts, specs[i].opts_size, + c_api, &result_format); + if (r != 0) { + return r; + } + + formats.emplace_back(result_format); + } + + C_SaferCond cond; + auto req = librbd::crypto::LoadRequest::create( + ictx, std::move(formats), &cond); + req->send(); + return cond.wait(); +} + +} // namespace api +} // namespace librbd + +template class librbd::api::Image; diff --git a/src/librbd/api/Image.h b/src/librbd/api/Image.h new file mode 100644 index 000000000..29398d6cd --- /dev/null +++ b/src/librbd/api/Image.h @@ -0,0 +1,85 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef LIBRBD_API_IMAGE_H +#define LIBRBD_API_IMAGE_H + +#include "include/rbd/librbd.hpp" +#include "include/rados/librados_fwd.hpp" +#include "librbd/Types.h" +#include +#include +#include + +namespace librbd { + +class ImageOptions; +class ProgressContext; + +struct ImageCtx; + +namespace api { + +template +struct Image { + typedef std::map ImageNameToIds; + + static int64_t get_data_pool_id(ImageCtxT *ictx); + + static int get_op_features(ImageCtxT *ictx, uint64_t *op_features); + + static int list_images(librados::IoCtx& io_ctx, + std::vector *images); + static int list_images_v2(librados::IoCtx& io_ctx, + ImageNameToIds *images); + + static int get_parent(ImageCtxT *ictx, + librbd::linked_image_spec_t *parent_image, + librbd::snap_spec_t *parent_snap); + + static int list_children(ImageCtxT *ictx, + std::vector *images); + static int list_children(ImageCtxT *ictx, + const cls::rbd::ParentImageSpec &parent_spec, + std::vector *images); + + static int list_descendants(IoCtx& io_ctx, const std::string &image_id, + const std::optional &max_level, + std::vector *images); + static int list_descendants(ImageCtxT *ictx, + const std::optional &max_level, + std::vector *images); + static int list_descendants(ImageCtxT *ictx, + const cls::rbd::ParentImageSpec &parent_spec, + const std::optional &max_level, + std::vector *images); + + static int deep_copy(ImageCtxT *ictx, librados::IoCtx& dest_md_ctx, + const char *destname, ImageOptions& opts, + ProgressContext &prog_ctx); + static int deep_copy(ImageCtxT *src, ImageCtxT *dest, bool flatten, + ProgressContext &prog_ctx); + + static int snap_set(ImageCtxT *ictx, + const cls::rbd::SnapshotNamespace &snap_namespace, + const char *snap_name); + static int snap_set(ImageCtxT *ictx, uint64_t snap_id); + + static int remove(librados::IoCtx& io_ctx, const std::string &image_name, + ProgressContext& prog_ctx); + + static int flatten_children(ImageCtxT *ictx, const char* snap_name, ProgressContext& pctx); + + static int encryption_format(ImageCtxT *ictx, encryption_format_t format, + encryption_options_t opts, size_t opts_size, + bool c_api); + static int encryption_load(ImageCtxT *ictx, const encryption_spec_t *specs, + size_t spec_count, bool c_api); +}; + +} // namespace api +} // namespace librbd + +extern template class librbd::api::Image; + +#endif // LIBRBD_API_IMAGE_H diff --git a/src/librbd/api/Io.cc b/src/librbd/api/Io.cc new file mode 100644 index 000000000..c1bd38fc0 --- /dev/null +++ b/src/librbd/api/Io.cc @@ -0,0 +1,555 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/api/Io.h" +#include "include/intarith.h" +#include "common/dout.h" +#include "common/errno.h" +#include "common/Cond.h" +#include "common/EventTrace.h" +#include "librbd/ImageCtx.h" +#include "librbd/internal.h" +#include "librbd/Utils.h" +#include "librbd/io/AioCompletion.h" +#include "librbd/io/ImageDispatchSpec.h" +#include "librbd/io/Types.h" + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::api::Io " << __func__ << ": " + +namespace librbd { +namespace api { + +namespace { + +template +bool is_valid_io(I& image_ctx, io::AioCompletion* aio_comp) { + auto cct = image_ctx.cct; + + if (!image_ctx.data_ctx.is_valid()) { + lderr(cct) << "missing data pool" << dendl; + + aio_comp->fail(-ENODEV); + return false; + } + + return true; +} + +} // anonymous namespace + +template +ssize_t Io::read( + I &image_ctx, uint64_t off, uint64_t len, io::ReadResult &&read_result, + int op_flags) { + auto cct = image_ctx.cct; + + ldout(cct, 20) << "ictx=" << &image_ctx << ", off=" << off << ", " + << "len = " << len << dendl; + + C_SaferCond ctx; + auto aio_comp = io::AioCompletion::create(&ctx); + aio_read(image_ctx, aio_comp, off, len, std::move(read_result), op_flags, + false); + return ctx.wait(); +} + +template +ssize_t Io::write( + I &image_ctx, uint64_t off, uint64_t len, bufferlist &&bl, int op_flags) { + auto cct = image_ctx.cct; + ldout(cct, 20) << "ictx=" << &image_ctx << ", off=" << off << ", " + << "len = " << len << dendl; + + image_ctx.image_lock.lock_shared(); + int r = clip_io(util::get_image_ctx(&image_ctx), off, &len, + io::ImageArea::DATA); + image_ctx.image_lock.unlock_shared(); + if (r < 0) { + lderr(cct) << "invalid IO request: " << cpp_strerror(r) << dendl; + return r; + } + + C_SaferCond ctx; + auto aio_comp = io::AioCompletion::create(&ctx); + aio_write(image_ctx, aio_comp, off, len, std::move(bl), op_flags, false); + + r = ctx.wait(); + if (r < 0) { + return r; + } + return len; +} + +template +ssize_t Io::discard( + I &image_ctx, uint64_t off, uint64_t len, + uint32_t discard_granularity_bytes) { + auto cct = image_ctx.cct; + ldout(cct, 20) << "ictx=" << &image_ctx << ", off=" << off << ", " + << "len = " << len << dendl; + + image_ctx.image_lock.lock_shared(); + int r = clip_io(util::get_image_ctx(&image_ctx), off, &len, + io::ImageArea::DATA); + image_ctx.image_lock.unlock_shared(); + if (r < 0) { + lderr(cct) << "invalid IO request: " << cpp_strerror(r) << dendl; + return r; + } + + C_SaferCond ctx; + auto aio_comp = io::AioCompletion::create(&ctx); + aio_discard(image_ctx, aio_comp, off, len, discard_granularity_bytes, false); + + r = ctx.wait(); + if (r < 0) { + return r; + } + return len; +} + +template +ssize_t Io::write_same( + I &image_ctx, uint64_t off, uint64_t len, bufferlist &&bl, int op_flags) { + auto cct = image_ctx.cct; + ldout(cct, 20) << "ictx=" << &image_ctx << ", off=" << off << ", " + << "len = " << len << ", data_len " << bl.length() << dendl; + + image_ctx.image_lock.lock_shared(); + int r = clip_io(util::get_image_ctx(&image_ctx), off, &len, + io::ImageArea::DATA); + image_ctx.image_lock.unlock_shared(); + if (r < 0) { + lderr(cct) << "invalid IO request: " << cpp_strerror(r) << dendl; + return r; + } + + C_SaferCond ctx; + auto aio_comp = io::AioCompletion::create(&ctx); + aio_write_same(image_ctx, aio_comp, off, len, std::move(bl), op_flags, false); + + r = ctx.wait(); + if (r < 0) { + return r; + } + return len; +} + +template +ssize_t Io::write_zeroes(I& image_ctx, uint64_t off, uint64_t len, + int zero_flags, int op_flags) { + auto cct = image_ctx.cct; + ldout(cct, 20) << "ictx=" << &image_ctx << ", off=" << off << ", " + << "len = " << len << dendl; + + image_ctx.image_lock.lock_shared(); + int r = clip_io(util::get_image_ctx(&image_ctx), off, &len, + io::ImageArea::DATA); + image_ctx.image_lock.unlock_shared(); + if (r < 0) { + lderr(cct) << "invalid IO request: " << cpp_strerror(r) << dendl; + return r; + } + + C_SaferCond ctx; + auto aio_comp = io::AioCompletion::create(&ctx); + aio_write_zeroes(image_ctx, aio_comp, off, len, zero_flags, op_flags, false); + + r = ctx.wait(); + if (r < 0) { + return r; + } + return len; +} + +template +ssize_t Io::compare_and_write( + I &image_ctx, uint64_t off, uint64_t len, bufferlist &&cmp_bl, + bufferlist &&bl, uint64_t *mismatch_off, int op_flags) { + auto cct = image_ctx.cct; + ldout(cct, 20) << "compare_and_write ictx=" << &image_ctx << ", off=" + << off << ", " << "len = " << len << dendl; + + image_ctx.image_lock.lock_shared(); + int r = clip_io(util::get_image_ctx(&image_ctx), off, &len, + io::ImageArea::DATA); + image_ctx.image_lock.unlock_shared(); + if (r < 0) { + lderr(cct) << "invalid IO request: " << cpp_strerror(r) << dendl; + return r; + } + + C_SaferCond ctx; + auto aio_comp = io::AioCompletion::create(&ctx); + aio_compare_and_write(image_ctx, aio_comp, off, len, std::move(cmp_bl), + std::move(bl), mismatch_off, op_flags, false); + + r = ctx.wait(); + if (r < 0) { + return r; + } + return len; +} + +template +int Io::flush(I &image_ctx) { + auto cct = image_ctx.cct; + ldout(cct, 20) << "ictx=" << &image_ctx << dendl; + + C_SaferCond ctx; + auto aio_comp = io::AioCompletion::create(&ctx); + aio_flush(image_ctx, aio_comp, false); + + int r = ctx.wait(); + if (r < 0) { + return r; + } + + return 0; +} + +template +void Io::aio_read(I &image_ctx, io::AioCompletion *aio_comp, uint64_t off, + uint64_t len, io::ReadResult &&read_result, int op_flags, + bool native_async) { + auto cct = image_ctx.cct; + FUNCTRACE(cct); + ZTracer::Trace trace; + if (image_ctx.blkin_trace_all) { + trace.init("io: read", &image_ctx.trace_endpoint); + trace.event("init"); + } + + aio_comp->init_time(util::get_image_ctx(&image_ctx), io::AIO_TYPE_READ); + ldout(cct, 20) << "ictx=" << &image_ctx << ", " + << "completion=" << aio_comp << ", off=" << off << ", " + << "len=" << len << ", " << "flags=" << op_flags << dendl; + + if (native_async && image_ctx.event_socket.is_valid()) { + aio_comp->set_event_notify(true); + } + + if (!is_valid_io(image_ctx, aio_comp)) { + return; + } + + auto req = io::ImageDispatchSpec::create_read( + image_ctx, io::IMAGE_DISPATCH_LAYER_API_START, aio_comp, + {{off, len}}, io::ImageArea::DATA, std::move(read_result), + image_ctx.get_data_io_context(), op_flags, 0, trace); + req->send(); +} + +template +void Io::aio_write(I &image_ctx, io::AioCompletion *aio_comp, uint64_t off, + uint64_t len, bufferlist &&bl, int op_flags, + bool native_async) { + auto cct = image_ctx.cct; + FUNCTRACE(cct); + ZTracer::Trace trace; + if (image_ctx.blkin_trace_all) { + trace.init("io: write", &image_ctx.trace_endpoint); + trace.event("init"); + } + + aio_comp->init_time(util::get_image_ctx(&image_ctx), io::AIO_TYPE_WRITE); + ldout(cct, 20) << "ictx=" << &image_ctx << ", " + << "completion=" << aio_comp << ", off=" << off << ", " + << "len=" << len << ", flags=" << op_flags << dendl; + + if (native_async && image_ctx.event_socket.is_valid()) { + aio_comp->set_event_notify(true); + } + + if (!is_valid_io(image_ctx, aio_comp)) { + return; + } + + auto req = io::ImageDispatchSpec::create_write( + image_ctx, io::IMAGE_DISPATCH_LAYER_API_START, aio_comp, + {{off, len}}, io::ImageArea::DATA, std::move(bl), op_flags, trace); + req->send(); +} + +template +void Io::aio_discard(I &image_ctx, io::AioCompletion *aio_comp, uint64_t off, + uint64_t len, uint32_t discard_granularity_bytes, + bool native_async) { + auto cct = image_ctx.cct; + FUNCTRACE(cct); + ZTracer::Trace trace; + if (image_ctx.blkin_trace_all) { + trace.init("io: discard", &image_ctx.trace_endpoint); + trace.event("init"); + } + + aio_comp->init_time(util::get_image_ctx(&image_ctx), io::AIO_TYPE_DISCARD); + ldout(cct, 20) << "ictx=" << &image_ctx << ", " + << "completion=" << aio_comp << ", off=" << off << ", " + << "len=" << len << dendl; + + if (native_async && image_ctx.event_socket.is_valid()) { + aio_comp->set_event_notify(true); + } + + if (!is_valid_io(image_ctx, aio_comp)) { + return; + } + + auto req = io::ImageDispatchSpec::create_discard( + image_ctx, io::IMAGE_DISPATCH_LAYER_API_START, aio_comp, + {{off, len}}, io::ImageArea::DATA, discard_granularity_bytes, trace); + req->send(); +} + +template +void Io::aio_write_same(I &image_ctx, io::AioCompletion *aio_comp, + uint64_t off, uint64_t len, bufferlist &&bl, + int op_flags, bool native_async) { + auto cct = image_ctx.cct; + FUNCTRACE(cct); + ZTracer::Trace trace; + if (image_ctx.blkin_trace_all) { + trace.init("io: writesame", &image_ctx.trace_endpoint); + trace.event("init"); + } + + aio_comp->init_time(util::get_image_ctx(&image_ctx), io::AIO_TYPE_WRITESAME); + ldout(cct, 20) << "ictx=" << &image_ctx << ", " + << "completion=" << aio_comp << ", off=" << off << ", " + << "len=" << len << ", data_len = " << bl.length() << ", " + << "flags=" << op_flags << dendl; + + if (native_async && image_ctx.event_socket.is_valid()) { + aio_comp->set_event_notify(true); + } + + if (!is_valid_io(image_ctx, aio_comp)) { + return; + } + + auto req = io::ImageDispatchSpec::create_write_same( + image_ctx, io::IMAGE_DISPATCH_LAYER_API_START, aio_comp, + {{off, len}}, io::ImageArea::DATA, std::move(bl), op_flags, trace); + req->send(); +} + +template +void Io::aio_write_zeroes(I& image_ctx, io::AioCompletion *aio_comp, + uint64_t off, uint64_t len, int zero_flags, + int op_flags, bool native_async) { + auto cct = image_ctx.cct; + FUNCTRACE(cct); + ZTracer::Trace trace; + if (image_ctx.blkin_trace_all) { + trace.init("io: write_zeroes", &image_ctx.trace_endpoint); + trace.event("init"); + } + + auto io_type = io::AIO_TYPE_DISCARD; + if ((zero_flags & RBD_WRITE_ZEROES_FLAG_THICK_PROVISION) != 0) { + zero_flags &= ~RBD_WRITE_ZEROES_FLAG_THICK_PROVISION; + io_type = io::AIO_TYPE_WRITESAME; + } + + aio_comp->init_time(util::get_image_ctx(&image_ctx), io_type); + ldout(cct, 20) << "ictx=" << &image_ctx << ", " + << "completion=" << aio_comp << ", off=" << off << ", " + << "len=" << len << dendl; + + if (native_async && image_ctx.event_socket.is_valid()) { + aio_comp->set_event_notify(true); + } + + // validate the supported flags + if (zero_flags != 0U) { + aio_comp->fail(-EINVAL); + return; + } + + if (!is_valid_io(image_ctx, aio_comp)) { + return; + } + + if (io_type == io::AIO_TYPE_WRITESAME) { + // write-same needs to be aligned to its buffer but librbd has never forced + // block alignment. Hide that requirement from the user by adding optional + // writes. + const uint64_t data_length = 512; + uint64_t write_same_offset = p2roundup(off, data_length); + uint64_t write_same_offset_end = p2align(off + len, data_length); + uint64_t write_same_length = 0; + if (write_same_offset_end > write_same_offset) { + write_same_length = write_same_offset_end - write_same_offset; + } + + uint64_t prepend_offset = off; + uint64_t prepend_length = write_same_offset - off; + uint64_t append_offset = write_same_offset + write_same_length; + uint64_t append_length = len - prepend_length - write_same_length; + ldout(cct, 20) << "prepend_offset=" << prepend_offset << ", " + << "prepend_length=" << prepend_length << ", " + << "write_same_offset=" << write_same_offset << ", " + << "write_same_length=" << write_same_length << ", " + << "append_offset=" << append_offset << ", " + << "append_length=" << append_length << dendl; + ceph_assert(prepend_length + write_same_length + append_length == len); + + if (write_same_length <= data_length) { + // unaligned or small write-zeroes request -- use single write + bufferlist bl; + bl.append_zero(len); + + aio_comp->aio_type = io::AIO_TYPE_WRITE; + auto req = io::ImageDispatchSpec::create_write( + image_ctx, io::IMAGE_DISPATCH_LAYER_API_START, aio_comp, + {{off, len}}, io::ImageArea::DATA, std::move(bl), op_flags, trace); + req->send(); + return; + } else if (prepend_length == 0 && append_length == 0) { + // fully aligned -- use a single write-same image request + bufferlist bl; + bl.append_zero(data_length); + + auto req = io::ImageDispatchSpec::create_write_same( + image_ctx, io::IMAGE_DISPATCH_LAYER_API_START, aio_comp, + {{off, len}}, io::ImageArea::DATA, std::move(bl), op_flags, trace); + req->send(); + return; + } + + // to reach this point, we need at least one prepend/append write along with + // a write-same -- therefore we will need to wrap the provided AioCompletion + auto request_count = 1; + if (prepend_length > 0) { + ++request_count; + } + if (append_length > 0) { + ++request_count; + } + + ceph_assert(request_count > 1); + aio_comp->start_op(); + aio_comp->set_request_count(request_count); + + if (prepend_length > 0) { + bufferlist bl; + bl.append_zero(prepend_length); + + Context* prepend_ctx = new io::C_AioRequest(aio_comp); + auto prepend_aio_comp = io::AioCompletion::create_and_start( + prepend_ctx, &image_ctx, io::AIO_TYPE_WRITE); + auto prepend_req = io::ImageDispatchSpec::create_write( + image_ctx, io::IMAGE_DISPATCH_LAYER_API_START, prepend_aio_comp, + {{prepend_offset, prepend_length}}, io::ImageArea::DATA, + std::move(bl), op_flags, trace); + prepend_req->send(); + } + + if (append_length > 0) { + bufferlist bl; + bl.append_zero(append_length); + + Context* append_ctx = new io::C_AioRequest(aio_comp); + auto append_aio_comp = io::AioCompletion::create_and_start( + append_ctx, &image_ctx, io::AIO_TYPE_WRITE); + auto append_req = io::ImageDispatchSpec::create_write( + image_ctx, io::IMAGE_DISPATCH_LAYER_API_START, append_aio_comp, + {{append_offset, append_length}}, io::ImageArea::DATA, + std::move(bl), op_flags, trace); + append_req->send(); + } + + bufferlist bl; + bl.append_zero(data_length); + + Context* write_same_ctx = new io::C_AioRequest(aio_comp); + auto write_same_aio_comp = io::AioCompletion::create_and_start( + write_same_ctx, &image_ctx, io::AIO_TYPE_WRITESAME); + auto req = io::ImageDispatchSpec::create_write_same( + image_ctx, io::IMAGE_DISPATCH_LAYER_API_START, write_same_aio_comp, + {{write_same_offset, write_same_length}}, io::ImageArea::DATA, + std::move(bl), op_flags, trace); + req->send(); + return; + } + + // enable partial discard (zeroing) of objects + uint32_t discard_granularity_bytes = 0; + + auto req = io::ImageDispatchSpec::create_discard( + image_ctx, io::IMAGE_DISPATCH_LAYER_API_START, aio_comp, + {{off, len}}, io::ImageArea::DATA, discard_granularity_bytes, trace); + req->send(); +} + +template +void Io::aio_compare_and_write(I &image_ctx, io::AioCompletion *aio_comp, + uint64_t off, uint64_t len, + bufferlist &&cmp_bl, + bufferlist &&bl, uint64_t *mismatch_off, + int op_flags, bool native_async) { + auto cct = image_ctx.cct; + FUNCTRACE(cct); + ZTracer::Trace trace; + if (image_ctx.blkin_trace_all) { + trace.init("io: compare_and_write", &image_ctx.trace_endpoint); + trace.event("init"); + } + + aio_comp->init_time(util::get_image_ctx(&image_ctx), + io::AIO_TYPE_COMPARE_AND_WRITE); + ldout(cct, 20) << "ictx=" << &image_ctx << ", " + << "completion=" << aio_comp << ", off=" << off << ", " + << "len=" << len << dendl; + + if (native_async && image_ctx.event_socket.is_valid()) { + aio_comp->set_event_notify(true); + } + + if (!is_valid_io(image_ctx, aio_comp)) { + return; + } + + auto req = io::ImageDispatchSpec::create_compare_and_write( + image_ctx, io::IMAGE_DISPATCH_LAYER_API_START, aio_comp, + {{off, len}}, io::ImageArea::DATA, std::move(cmp_bl), std::move(bl), + mismatch_off, op_flags, trace); + req->send(); +} + +template +void Io::aio_flush(I &image_ctx, io::AioCompletion *aio_comp, + bool native_async) { + auto cct = image_ctx.cct; + FUNCTRACE(cct); + ZTracer::Trace trace; + if (image_ctx.blkin_trace_all) { + trace.init("io: flush", &image_ctx.trace_endpoint); + trace.event("init"); + } + + aio_comp->init_time(util::get_image_ctx(&image_ctx), io::AIO_TYPE_FLUSH); + ldout(cct, 20) << "ictx=" << &image_ctx << ", " + << "completion=" << aio_comp << dendl; + + if (native_async && image_ctx.event_socket.is_valid()) { + aio_comp->set_event_notify(true); + } + + if (!is_valid_io(image_ctx, aio_comp)) { + return; + } + + auto req = io::ImageDispatchSpec::create_flush( + image_ctx, io::IMAGE_DISPATCH_LAYER_API_START, aio_comp, + io::FLUSH_SOURCE_USER, trace); + req->send(); +} + +} // namespace api +} // namespace librbd + +template class librbd::api::Io; diff --git a/src/librbd/api/Io.h b/src/librbd/api/Io.h new file mode 100644 index 000000000..4e2ec5028 --- /dev/null +++ b/src/librbd/api/Io.h @@ -0,0 +1,65 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef LIBRBD_API_IO_H +#define LIBRBD_API_IO_H + +#include "include/int_types.h" +#include "librbd/io/ReadResult.h" + +namespace librbd { + +struct ImageCtx; +namespace io { struct AioCompletion; } + +namespace api { + +template +struct Io { + static ssize_t read(ImageCtxT &image_ctx, uint64_t off, uint64_t len, + io::ReadResult &&read_result, int op_flags); + static ssize_t write(ImageCtxT &image_ctx, uint64_t off, uint64_t len, + bufferlist &&bl, int op_flags); + static ssize_t discard(ImageCtxT &image_ctx, uint64_t off, uint64_t len, + uint32_t discard_granularity_bytes); + static ssize_t write_same(ImageCtxT &image_ctx, uint64_t off, uint64_t len, + bufferlist &&bl, int op_flags); + static ssize_t write_zeroes(ImageCtxT &image_ctx, uint64_t off, uint64_t len, + int zero_flags, int op_flags); + static ssize_t compare_and_write(ImageCtxT &image_ctx, uint64_t off, + uint64_t len, bufferlist &&cmp_bl, + bufferlist &&bl, uint64_t *mismatch_off, + int op_flags); + static int flush(ImageCtxT &image_ctx); + + static void aio_read(ImageCtxT &image_ctx, io::AioCompletion *c, uint64_t off, + uint64_t len, io::ReadResult &&read_result, int op_flags, + bool native_async); + static void aio_write(ImageCtxT &image_ctx, io::AioCompletion *c, + uint64_t off, uint64_t len, bufferlist &&bl, + int op_flags, bool native_async); + static void aio_discard(ImageCtxT &image_ctx, io::AioCompletion *c, + uint64_t off, uint64_t len, + uint32_t discard_granularity_bytes, + bool native_async); + static void aio_write_same(ImageCtxT &image_ctx, io::AioCompletion *c, + uint64_t off, uint64_t len, bufferlist &&bl, + int op_flags, bool native_async); + static void aio_write_zeroes(ImageCtxT &image_ctx, io::AioCompletion *c, + uint64_t off, uint64_t len, int zero_flags, + int op_flags, bool native_async); + static void aio_compare_and_write(ImageCtxT &image_ctx, io::AioCompletion *c, + uint64_t off, uint64_t len, + bufferlist &&cmp_bl, bufferlist &&bl, + uint64_t *mismatch_off, int op_flags, + bool native_async); + static void aio_flush(ImageCtxT &image_ctx, io::AioCompletion *c, + bool native_async); +}; + +} // namespace api +} // namespace librbd + +extern template class librbd::api::Io; + +#endif // LIBRBD_API_IO_H diff --git a/src/librbd/api/Migration.cc b/src/librbd/api/Migration.cc new file mode 100644 index 000000000..957c872ac --- /dev/null +++ b/src/librbd/api/Migration.cc @@ -0,0 +1,2126 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/api/Migration.h" +#include "include/rados/librados.hpp" +#include "include/stringify.h" +#include "common/dout.h" +#include "common/errno.h" +#include "common/Cond.h" +#include "cls/rbd/cls_rbd_client.h" +#include "librbd/AsioEngine.h" +#include "librbd/ExclusiveLock.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/Operations.h" +#include "librbd/Utils.h" +#include "librbd/api/Config.h" +#include "librbd/api/Group.h" +#include "librbd/api/Image.h" +#include "librbd/api/Snapshot.h" +#include "librbd/api/Trash.h" +#include "librbd/deep_copy/Handler.h" +#include "librbd/deep_copy/ImageCopyRequest.h" +#include "librbd/deep_copy/MetadataCopyRequest.h" +#include "librbd/deep_copy/SnapshotCopyRequest.h" +#include "librbd/exclusive_lock/Policy.h" +#include "librbd/image/AttachChildRequest.h" +#include "librbd/image/AttachParentRequest.h" +#include "librbd/image/CloneRequest.h" +#include "librbd/image/CreateRequest.h" +#include "librbd/image/DetachChildRequest.h" +#include "librbd/image/DetachParentRequest.h" +#include "librbd/image/ListWatchersRequest.h" +#include "librbd/image/RemoveRequest.h" +#include "librbd/image/Types.h" +#include "librbd/internal.h" +#include "librbd/migration/FormatInterface.h" +#include "librbd/migration/OpenSourceImageRequest.h" +#include "librbd/migration/NativeFormat.h" +#include "librbd/mirror/DisableRequest.h" +#include "librbd/mirror/EnableRequest.h" + +#include + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::Migration: " << __func__ << ": " + +namespace librbd { + +inline bool operator==(const linked_image_spec_t& rhs, + const linked_image_spec_t& lhs) { + bool result = (rhs.pool_id == lhs.pool_id && + rhs.pool_namespace == lhs.pool_namespace && + rhs.image_id == lhs.image_id); + return result; +} + +namespace api { + +using util::create_rados_callback; + +namespace { + +class MigrationProgressContext : public ProgressContext { +public: + MigrationProgressContext(librados::IoCtx& io_ctx, + const std::string &header_oid, + cls::rbd::MigrationState state, + ProgressContext *prog_ctx) + : m_io_ctx(io_ctx), m_header_oid(header_oid), m_state(state), + m_prog_ctx(prog_ctx), m_cct(reinterpret_cast(io_ctx.cct())), + m_lock(ceph::make_mutex( + util::unique_lock_name("librbd::api::MigrationProgressContext", + this))) { + ceph_assert(m_prog_ctx != nullptr); + } + + ~MigrationProgressContext() { + wait_for_in_flight_updates(); + } + + int update_progress(uint64_t offset, uint64_t total) override { + ldout(m_cct, 20) << "offset=" << offset << ", total=" << total << dendl; + + m_prog_ctx->update_progress(offset, total); + + std::string description = stringify(offset * 100 / total) + "% complete"; + + send_state_description_update(description); + + return 0; + } + +private: + librados::IoCtx& m_io_ctx; + std::string m_header_oid; + cls::rbd::MigrationState m_state; + ProgressContext *m_prog_ctx; + + CephContext* m_cct; + mutable ceph::mutex m_lock; + ceph::condition_variable m_cond; + std::string m_state_description; + bool m_pending_update = false; + int m_in_flight_state_updates = 0; + + void send_state_description_update(const std::string &description) { + std::lock_guard locker{m_lock}; + + if (description == m_state_description) { + return; + } + + m_state_description = description; + + if (m_in_flight_state_updates > 0) { + m_pending_update = true; + return; + } + + set_state_description(); + } + + void set_state_description() { + ldout(m_cct, 20) << "state_description=" << m_state_description << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + + librados::ObjectWriteOperation op; + cls_client::migration_set_state(&op, m_state, m_state_description); + + using klass = MigrationProgressContext; + librados::AioCompletion *comp = + create_rados_callback(this); + int r = m_io_ctx.aio_operate(m_header_oid, comp, &op); + ceph_assert(r == 0); + comp->release(); + + m_in_flight_state_updates++; + } + + void handle_set_state_description(int r) { + ldout(m_cct, 20) << "r=" << r << dendl; + + std::lock_guard locker{m_lock}; + + m_in_flight_state_updates--; + + if (r < 0) { + lderr(m_cct) << "failed to update migration state: " << cpp_strerror(r) + << dendl; + } else if (m_pending_update) { + set_state_description(); + m_pending_update = false; + } else { + m_cond.notify_all(); + } + } + + void wait_for_in_flight_updates() { + std::unique_lock locker{m_lock}; + + ldout(m_cct, 20) << "m_in_flight_state_updates=" + << m_in_flight_state_updates << dendl; + m_pending_update = false; + m_cond.wait(locker, [this] { return m_in_flight_state_updates <= 0; }); + } +}; + +int trash_search(librados::IoCtx &io_ctx, rbd_trash_image_source_t source, + const std::string &image_name, std::string *image_id) { + std::vector entries; + + int r = Trash<>::list(io_ctx, entries, false); + if (r < 0) { + return r; + } + + for (auto &entry : entries) { + if (entry.source == source && entry.name == image_name) { + *image_id = entry.id; + return 0; + } + } + + return -ENOENT; +} + +template +int open_images(librados::IoCtx& io_ctx, const std::string &image_name, + I **src_image_ctx, I **dst_image_ctx, + cls::rbd::MigrationSpec* src_migration_spec, + cls::rbd::MigrationSpec* dst_migration_spec, + bool skip_open_dst_image) { + CephContext* cct = reinterpret_cast(io_ctx.cct()); + + *src_image_ctx = nullptr; + *dst_image_ctx = nullptr; + + ldout(cct, 10) << "trying to open image by name " << io_ctx.get_pool_name() + << "/" << image_name << dendl; + auto image_ctx = I::create(image_name, "", nullptr, io_ctx, false); + int r = image_ctx->state->open(OPEN_FLAG_IGNORE_MIGRATING); + if (r == -ENOENT) { + // presume user passed the source image so we need to search the trash + ldout(cct, 10) << "Source image is not found. Trying trash" << dendl; + + std::string src_image_id; + r = trash_search(io_ctx, RBD_TRASH_IMAGE_SOURCE_MIGRATION, image_name, + &src_image_id); + if (r < 0) { + lderr(cct) << "failed to determine image id: " << cpp_strerror(r) + << dendl; + return r; + } + + ldout(cct, 10) << "source image id from trash: " << src_image_id << dendl; + image_ctx = I::create(image_name, src_image_id, nullptr, io_ctx, false); + r = image_ctx->state->open(OPEN_FLAG_IGNORE_MIGRATING); + } + + if (r < 0) { + if (r != -ENOENT) { + lderr(cct) << "failed to open image: " << cpp_strerror(r) << dendl; + return r; + } + image_ctx = nullptr; + } + + BOOST_SCOPE_EXIT_TPL(&r, &image_ctx, src_image_ctx, dst_image_ctx) { + if (r != 0) { + if (*src_image_ctx != nullptr) { + (*src_image_ctx)->state->close(); + } + if (*dst_image_ctx != nullptr) { + (*dst_image_ctx)->state->close(); + } + if (image_ctx != nullptr) { + image_ctx->state->close(); + } + } + } BOOST_SCOPE_EXIT_END; + + // The opened image is either a source or destination + cls::rbd::MigrationSpec migration_spec; + r = cls_client::migration_get(&image_ctx->md_ctx, image_ctx->header_oid, + &migration_spec); + if (r < 0) { + lderr(cct) << "failed retrieving migration header: " << cpp_strerror(r) + << dendl; + return r; + } + + ldout(cct, 10) << "migration spec: " << migration_spec << dendl; + if (migration_spec.header_type == cls::rbd::MIGRATION_HEADER_TYPE_SRC) { + ldout(cct, 10) << "the source image is opened" << dendl; + *src_image_ctx = image_ctx; + *src_migration_spec = migration_spec; + image_ctx = nullptr; + } else if (migration_spec.header_type == + cls::rbd::MIGRATION_HEADER_TYPE_DST) { + ldout(cct, 10) << "the destination image is opened" << dendl; + std::string image_id = image_ctx->id; + image_ctx->state->close(); + image_ctx = I::create(image_name, image_id, nullptr, io_ctx, false); + + if (!skip_open_dst_image) { + ldout(cct, 10) << "re-opening the destination image" << dendl; + r = image_ctx->state->open(0); + if (r < 0) { + image_ctx = nullptr; + lderr(cct) << "failed to re-open destination image: " << cpp_strerror(r) + << dendl; + return r; + } + } + + *dst_image_ctx = image_ctx; + *dst_migration_spec = migration_spec; + image_ctx = nullptr; + } else { + lderr(cct) << "unexpected migration header type: " + << migration_spec.header_type << dendl; + r = -EINVAL; + return r; + } + + // attempt to open the other (paired) image + I** other_image_ctx = nullptr; + std::string other_image_type; + std::string other_image_name; + std::string other_image_id; + cls::rbd::MigrationSpec* other_migration_spec = nullptr; + librados::IoCtx other_io_ctx; + + int flags = OPEN_FLAG_IGNORE_MIGRATING; + if (*src_image_ctx == nullptr && + dst_migration_spec->source_spec.empty()) { + r = util::create_ioctx(io_ctx, "source image", migration_spec.pool_id, + migration_spec.pool_namespace, &other_io_ctx); + if (r < 0) { + return r; + } + + other_image_type = "source"; + other_image_ctx = src_image_ctx; + other_migration_spec = src_migration_spec; + other_image_name = migration_spec.image_name; + other_image_id = migration_spec.image_id; + + if (other_image_id.empty()) { + ldout(cct, 20) << "trying to open v1 image by name " + << other_io_ctx.get_pool_name() << "/" + << other_image_name << dendl; + flags |= OPEN_FLAG_OLD_FORMAT; + } else { + ldout(cct, 20) << "trying to open v2 image by id " + << other_io_ctx.get_pool_name() << "/" + << other_image_id << dendl; + } + + *src_image_ctx = I::create(other_image_name, other_image_id, nullptr, + other_io_ctx, false); + } else if (*dst_image_ctx == nullptr) { + r = util::create_ioctx(io_ctx, "destination image", migration_spec.pool_id, + migration_spec.pool_namespace, &other_io_ctx); + if (r < 0) { + return r; + } + + other_image_name = migration_spec.image_name; + if (skip_open_dst_image) { + other_image_id = migration_spec.image_id; + } else { + other_image_type = "destination"; + other_image_ctx = dst_image_ctx; + other_migration_spec = dst_migration_spec; + other_image_id = migration_spec.image_id; + } + + *dst_image_ctx = I::create(other_image_name, other_image_id, nullptr, + other_io_ctx, false); + } + + if (other_image_ctx != nullptr) { + r = (*other_image_ctx)->state->open(flags); + if (r < 0) { + lderr(cct) << "failed to open " << other_image_type << " image " + << other_io_ctx.get_pool_name() + << "/" << (other_image_id.empty() ? + other_image_name : other_image_id) + << ": " << cpp_strerror(r) << dendl; + *other_image_ctx = nullptr; + return r; + } + + r = cls_client::migration_get(&(*other_image_ctx)->md_ctx, + (*other_image_ctx)->header_oid, + other_migration_spec); + if (r < 0) { + lderr(cct) << "failed retrieving migration header: " << cpp_strerror(r) + << dendl; + return r; + } + + ldout(cct, 20) << other_image_type << " migration spec: " + << *other_migration_spec << dendl; + } + + if (!skip_open_dst_image) { + // legacy clients will only store status in the source images + if (dst_migration_spec->source_spec.empty()) { + dst_migration_spec->state = migration_spec.state; + dst_migration_spec->state_description = + migration_spec.state_description; + } + } + + return 0; +} + +class SteppedProgressContext : public ProgressContext { +public: + SteppedProgressContext(ProgressContext* progress_ctx, size_t total_steps) + : m_progress_ctx(progress_ctx), m_total_steps(total_steps) { + } + + void next_step() { + ceph_assert(m_current_step < m_total_steps); + ++m_current_step; + } + + int update_progress(uint64_t object_number, + uint64_t object_count) override { + return m_progress_ctx->update_progress( + object_number + (object_count * (m_current_step - 1)), + object_count * m_total_steps); + } + +private: + ProgressContext* m_progress_ctx; + size_t m_total_steps; + size_t m_current_step = 1; +}; + +} // anonymous namespace + +template +int Migration::prepare(librados::IoCtx& io_ctx, + const std::string &image_name, + librados::IoCtx& dest_io_ctx, + const std::string &dest_image_name_, + ImageOptions& opts) { + CephContext* cct = reinterpret_cast(io_ctx.cct()); + + std::string dest_image_name = dest_image_name_.empty() ? image_name : + dest_image_name_; + + ldout(cct, 10) << io_ctx.get_pool_name() << "/" << image_name << " -> " + << dest_io_ctx.get_pool_name() << "/" << dest_image_name + << ", opts=" << opts << dendl; + + auto src_image_ctx = I::create(image_name, "", nullptr, io_ctx, false); + int r = src_image_ctx->state->open(0); + if (r < 0) { + lderr(cct) << "failed to open image: " << cpp_strerror(r) << dendl; + return r; + } + BOOST_SCOPE_EXIT_TPL(src_image_ctx) { + src_image_ctx->state->close(); + } BOOST_SCOPE_EXIT_END; + + std::list watchers; + int flags = librbd::image::LIST_WATCHERS_FILTER_OUT_MY_INSTANCE | + librbd::image::LIST_WATCHERS_FILTER_OUT_MIRROR_INSTANCES; + C_SaferCond on_list_watchers; + auto list_watchers_request = librbd::image::ListWatchersRequest::create( + *src_image_ctx, flags, &watchers, &on_list_watchers); + list_watchers_request->send(); + r = on_list_watchers.wait(); + if (r < 0) { + lderr(cct) << "failed listing watchers:" << cpp_strerror(r) << dendl; + return r; + } + if (!watchers.empty()) { + lderr(cct) << "image has watchers - not migrating" << dendl; + return -EBUSY; + } + + uint64_t format = 2; + if (opts.get(RBD_IMAGE_OPTION_FORMAT, &format) != 0) { + opts.set(RBD_IMAGE_OPTION_FORMAT, format); + } + if (format != 2) { + lderr(cct) << "unsupported destination image format: " << format << dendl; + return -EINVAL; + } + + uint64_t features; + { + std::shared_lock image_locker{src_image_ctx->image_lock}; + features = src_image_ctx->features; + } + opts.get(RBD_IMAGE_OPTION_FEATURES, &features); + if ((features & ~RBD_FEATURES_ALL) != 0) { + lderr(cct) << "librbd does not support requested features" << dendl; + return -ENOSYS; + } + opts.set(RBD_IMAGE_OPTION_FEATURES, features); + + uint64_t order = src_image_ctx->order; + if (opts.get(RBD_IMAGE_OPTION_ORDER, &order) != 0) { + opts.set(RBD_IMAGE_OPTION_ORDER, order); + } + r = image::CreateRequest::validate_order(cct, order); + if (r < 0) { + return r; + } + + uint64_t stripe_unit = src_image_ctx->stripe_unit; + if (opts.get(RBD_IMAGE_OPTION_STRIPE_UNIT, &stripe_unit) != 0) { + opts.set(RBD_IMAGE_OPTION_STRIPE_UNIT, stripe_unit); + } + uint64_t stripe_count = src_image_ctx->stripe_count; + if (opts.get(RBD_IMAGE_OPTION_STRIPE_COUNT, &stripe_count) != 0) { + opts.set(RBD_IMAGE_OPTION_STRIPE_COUNT, stripe_count); + } + + uint64_t flatten = 0; + if (opts.get(RBD_IMAGE_OPTION_FLATTEN, &flatten) == 0) { + opts.unset(RBD_IMAGE_OPTION_FLATTEN); + } + + ldout(cct, 20) << "updated opts=" << opts << dendl; + + auto dst_image_ctx = I::create( + dest_image_name, util::generate_image_id(dest_io_ctx), nullptr, + dest_io_ctx, false); + src_image_ctx->image_lock.lock_shared(); + cls::rbd::MigrationSpec dst_migration_spec{ + cls::rbd::MIGRATION_HEADER_TYPE_DST, + src_image_ctx->md_ctx.get_id(), src_image_ctx->md_ctx.get_namespace(), + src_image_ctx->name, src_image_ctx->id, "", {}, 0, false, + cls::rbd::MIRROR_IMAGE_MODE_JOURNAL, flatten > 0, + cls::rbd::MIGRATION_STATE_PREPARING, ""}; + src_image_ctx->image_lock.unlock_shared(); + + Migration migration(src_image_ctx, dst_image_ctx, dst_migration_spec, + opts, nullptr); + r = migration.prepare(); + + return r; +} + +template +int Migration::prepare_import( + const std::string& source_spec, librados::IoCtx& dest_io_ctx, + const std::string &dest_image_name, ImageOptions& opts) { + if (source_spec.empty() || !dest_io_ctx.is_valid() || + dest_image_name.empty()) { + return -EINVAL; + } + + auto cct = reinterpret_cast(dest_io_ctx.cct()); + ldout(cct, 10) << source_spec << " -> " + << dest_io_ctx.get_pool_name() << "/" + << dest_image_name << ", opts=" << opts << dendl; + + I* src_image_ctx = nullptr; + C_SaferCond open_ctx; + auto req = migration::OpenSourceImageRequest::create( + dest_io_ctx, nullptr, CEPH_NOSNAP, + {-1, "", "", "", source_spec, {}, 0, false}, &src_image_ctx, &open_ctx); + req->send(); + + int r = open_ctx.wait(); + if (r < 0) { + lderr(cct) << "failed to open source image: " << cpp_strerror(r) << dendl; + return r; + } + + auto asio_engine = src_image_ctx->asio_engine; + BOOST_SCOPE_EXIT_TPL(src_image_ctx) { + src_image_ctx->state->close(); + } BOOST_SCOPE_EXIT_END; + + uint64_t image_format = 2; + if (opts.get(RBD_IMAGE_OPTION_FORMAT, &image_format) != 0) { + opts.set(RBD_IMAGE_OPTION_FORMAT, image_format); + } + if (image_format != 2) { + lderr(cct) << "unsupported destination image format: " << image_format + << dendl; + return -EINVAL; + } + + ldout(cct, 20) << "updated opts=" << opts << dendl; + + // use json-spirit to clean-up json formatting + json_spirit::mObject source_spec_object; + json_spirit::mValue json_root; + if(json_spirit::read(source_spec, json_root)) { + try { + source_spec_object = json_root.get_obj(); + } catch (std::runtime_error&) { + lderr(cct) << "failed to clean source spec" << dendl; + return -EINVAL; + } + } + + auto dst_image_ctx = I::create( + dest_image_name, util::generate_image_id(dest_io_ctx), nullptr, + dest_io_ctx, false); + cls::rbd::MigrationSpec dst_migration_spec{ + cls::rbd::MIGRATION_HEADER_TYPE_DST, -1, "", "", "", + json_spirit::write(source_spec_object), {}, + 0, false, cls::rbd::MIRROR_IMAGE_MODE_JOURNAL, true, + cls::rbd::MIGRATION_STATE_PREPARING, ""}; + + Migration migration(src_image_ctx, dst_image_ctx, dst_migration_spec, + opts, nullptr); + return migration.prepare_import(); + if (r < 0) { + return r; + } + + return 0; +} + +template +int Migration::execute(librados::IoCtx& io_ctx, + const std::string &image_name, + ProgressContext &prog_ctx) { + CephContext* cct = reinterpret_cast(io_ctx.cct()); + + ldout(cct, 10) << io_ctx.get_pool_name() << "/" << image_name << dendl; + + I *src_image_ctx; + I *dst_image_ctx; + cls::rbd::MigrationSpec src_migration_spec; + cls::rbd::MigrationSpec dst_migration_spec; + int r = open_images(io_ctx, image_name, &src_image_ctx, &dst_image_ctx, + &src_migration_spec, &dst_migration_spec, false); + if (r < 0) { + return r; + } + + // ensure the destination loads the migration info + dst_image_ctx->ignore_migrating = false; + r = dst_image_ctx->state->refresh(); + if (r < 0) { + lderr(cct) << "failed to refresh destination image: " << cpp_strerror(r) + << dendl; + return r; + } + + BOOST_SCOPE_EXIT_TPL(src_image_ctx, dst_image_ctx) { + dst_image_ctx->state->close(); + if (src_image_ctx != nullptr) { + src_image_ctx->state->close(); + } + } BOOST_SCOPE_EXIT_END; + + if (dst_migration_spec.state != cls::rbd::MIGRATION_STATE_PREPARED && + dst_migration_spec.state != cls::rbd::MIGRATION_STATE_EXECUTING) { + lderr(cct) << "current migration state is '" << dst_migration_spec.state + << "' (should be 'prepared')" << dendl; + return -EINVAL; + } + + ldout(cct, 5) << "migrating "; + if (!dst_migration_spec.source_spec.empty()) { + *_dout << dst_migration_spec.source_spec; + } else { + *_dout << src_image_ctx->md_ctx.get_pool_name() << "/" + << src_image_ctx->name; + } + *_dout << " -> " << dst_image_ctx->md_ctx.get_pool_name() << "/" + << dst_image_ctx->name << dendl; + + ImageOptions opts; + Migration migration(src_image_ctx, dst_image_ctx, dst_migration_spec, + opts, &prog_ctx); + r = migration.execute(); + if (r < 0) { + return r; + } + + return 0; +} + +template +int Migration::abort(librados::IoCtx& io_ctx, const std::string &image_name, + ProgressContext &prog_ctx) { + CephContext* cct = reinterpret_cast(io_ctx.cct()); + + ldout(cct, 10) << io_ctx.get_pool_name() << "/" << image_name << dendl; + + I *src_image_ctx; + I *dst_image_ctx; + cls::rbd::MigrationSpec src_migration_spec; + cls::rbd::MigrationSpec dst_migration_spec; + int r = open_images(io_ctx, image_name, &src_image_ctx, &dst_image_ctx, + &src_migration_spec, &dst_migration_spec, true); + if (r < 0) { + return r; + } + + ldout(cct, 5) << "canceling incomplete migration "; + if (!dst_migration_spec.source_spec.empty()) { + *_dout << dst_migration_spec.source_spec; + } else { + *_dout << src_image_ctx->md_ctx.get_pool_name() << "/" + << src_image_ctx->name; + } + *_dout << " -> " << dst_image_ctx->md_ctx.get_pool_name() << "/" + << dst_image_ctx->name << dendl; + + ImageOptions opts; + Migration migration(src_image_ctx, dst_image_ctx, dst_migration_spec, + opts, &prog_ctx); + r = migration.abort(); + + if (src_image_ctx != nullptr) { + src_image_ctx->state->close(); + } + + if (r < 0) { + return r; + } + + return 0; +} + +template +int Migration::commit(librados::IoCtx& io_ctx, + const std::string &image_name, + ProgressContext &prog_ctx) { + CephContext* cct = reinterpret_cast(io_ctx.cct()); + + ldout(cct, 10) << io_ctx.get_pool_name() << "/" << image_name << dendl; + + I *src_image_ctx; + I *dst_image_ctx; + cls::rbd::MigrationSpec src_migration_spec; + cls::rbd::MigrationSpec dst_migration_spec; + int r = open_images(io_ctx, image_name, &src_image_ctx, &dst_image_ctx, + &src_migration_spec, &dst_migration_spec, false); + if (r < 0) { + return r; + } + + if (dst_migration_spec.state != cls::rbd::MIGRATION_STATE_EXECUTED) { + lderr(cct) << "current migration state is '" << dst_migration_spec.state + << "' (should be 'executed')" << dendl; + dst_image_ctx->state->close(); + if (src_image_ctx != nullptr) { + src_image_ctx->state->close(); + } + return -EINVAL; + } + + // ensure the destination loads the migration info + dst_image_ctx->ignore_migrating = false; + r = dst_image_ctx->state->refresh(); + if (r < 0) { + lderr(cct) << "failed to refresh destination image: " << cpp_strerror(r) + << dendl; + return r; + } + + ldout(cct, 5) << "migrating "; + if (!dst_migration_spec.source_spec.empty()) { + *_dout << dst_migration_spec.source_spec; + } else { + *_dout << src_image_ctx->md_ctx.get_pool_name() << "/" + << src_image_ctx->name; + } + *_dout << " -> " << dst_image_ctx->md_ctx.get_pool_name() << "/" + << dst_image_ctx->name << dendl; + + ImageOptions opts; + Migration migration(src_image_ctx, dst_image_ctx, dst_migration_spec, + opts, &prog_ctx); + r = migration.commit(); + + // image_ctx is closed in commit when removing src image + if (r < 0) { + return r; + } + + return 0; +} + +template +int Migration::status(librados::IoCtx& io_ctx, + const std::string &image_name, + image_migration_status_t *status) { + CephContext* cct = reinterpret_cast(io_ctx.cct()); + + ldout(cct, 10) << io_ctx.get_pool_name() << "/" << image_name << dendl; + + I *src_image_ctx; + I *dst_image_ctx; + cls::rbd::MigrationSpec src_migration_spec; + cls::rbd::MigrationSpec dst_migration_spec; + int r = open_images(io_ctx, image_name, &src_image_ctx, &dst_image_ctx, + &src_migration_spec, &dst_migration_spec, false); + if (r < 0) { + return r; + } + + ldout(cct, 5) << "migrating "; + if (!dst_migration_spec.source_spec.empty()) { + *_dout << dst_migration_spec.source_spec; + } else { + *_dout << src_image_ctx->md_ctx.get_pool_name() << "/" + << src_image_ctx->name; + } + *_dout << " -> " << dst_image_ctx->md_ctx.get_pool_name() << "/" + << dst_image_ctx->name << dendl; + + ImageOptions opts; + Migration migration(src_image_ctx, dst_image_ctx, dst_migration_spec, + opts, nullptr); + r = migration.status(status); + + dst_image_ctx->state->close(); + if (src_image_ctx != nullptr) { + src_image_ctx->state->close(); + } + + if (r < 0) { + return r; + } + + return 0; +} + +template +int Migration::get_source_spec(I* image_ctx, std::string* source_spec) { + auto cct = image_ctx->cct; + ldout(cct, 10) << dendl; + + image_ctx->image_lock.lock_shared(); + auto migration_info = image_ctx->migration_info; + image_ctx->image_lock.unlock_shared(); + + if (migration_info.empty()) { + // attempt to directly read the spec in case the state is EXECUTED + cls::rbd::MigrationSpec migration_spec; + int r = cls_client::migration_get(&image_ctx->md_ctx, image_ctx->header_oid, + &migration_spec); + if (r == -ENOENT) { + return r; + } else if (r < 0) { + lderr(cct) << "failed retrieving migration header: " << cpp_strerror(r) + << dendl; + return r; + } + + migration_info = { + migration_spec.pool_id, migration_spec.pool_namespace, + migration_spec.image_name, migration_spec.image_id, + migration_spec.source_spec, {}, 0, false}; + } + + if (!migration_info.source_spec.empty()) { + *source_spec = migration_info.source_spec; + } else { + // legacy migration source + *source_spec = migration::NativeFormat::build_source_spec( + migration_info.pool_id, + migration_info.pool_namespace, + migration_info.image_name, + migration_info.image_id); + } + + return 0; +} + +template +Migration::Migration(ImageCtx* src_image_ctx, + ImageCtx* dst_image_ctx, + const cls::rbd::MigrationSpec& dst_migration_spec, + ImageOptions& opts, ProgressContext *prog_ctx) + : m_cct(dst_image_ctx->cct), + m_src_image_ctx(src_image_ctx), m_dst_image_ctx(dst_image_ctx), + m_dst_io_ctx(dst_image_ctx->md_ctx), m_dst_image_name(dst_image_ctx->name), + m_dst_image_id(dst_image_ctx->id), + m_dst_header_oid(util::header_name(m_dst_image_id)), + m_image_options(opts), m_flatten(dst_migration_spec.flatten), + m_mirroring(dst_migration_spec.mirroring), + m_mirror_image_mode(dst_migration_spec.mirror_image_mode), + m_prog_ctx(prog_ctx), + m_src_migration_spec(cls::rbd::MIGRATION_HEADER_TYPE_SRC, + m_dst_io_ctx.get_id(), m_dst_io_ctx.get_namespace(), + m_dst_image_name, m_dst_image_id, "", {}, 0, + m_mirroring, m_mirror_image_mode, m_flatten, + dst_migration_spec.state, + dst_migration_spec.state_description), + m_dst_migration_spec(dst_migration_spec) { + m_dst_io_ctx.dup(dst_image_ctx->md_ctx); +} + +template +int Migration::prepare() { + ldout(m_cct, 10) << dendl; + + BOOST_SCOPE_EXIT_TPL(&m_dst_image_ctx) { + if (m_dst_image_ctx != nullptr) { + m_dst_image_ctx->state->close(); + } + } BOOST_SCOPE_EXIT_END; + + int r = validate_src_snaps(m_src_image_ctx); + if (r < 0) { + return r; + } + + r = disable_mirroring(m_src_image_ctx, &m_mirroring, &m_mirror_image_mode); + if (r < 0) { + return r; + } + + r = unlink_src_image(m_src_image_ctx); + if (r < 0) { + enable_mirroring(m_src_image_ctx, m_mirroring, m_mirror_image_mode); + return r; + } + + r = set_src_migration(m_src_image_ctx); + if (r < 0) { + relink_src_image(m_src_image_ctx); + enable_mirroring(m_src_image_ctx, m_mirroring, m_mirror_image_mode); + return r; + } + + r = create_dst_image(&m_dst_image_ctx); + if (r < 0) { + abort(); + return r; + } + + ldout(m_cct, 10) << "succeeded" << dendl; + + return 0; +} + +template +int Migration::prepare_import() { + ldout(m_cct, 10) << dendl; + + BOOST_SCOPE_EXIT_TPL(&m_dst_image_ctx) { + if (m_dst_image_ctx != nullptr) { + m_dst_image_ctx->state->close(); + } + } BOOST_SCOPE_EXIT_END; + + int r = create_dst_image(&m_dst_image_ctx); + if (r < 0) { + abort(); + return r; + } + + return 0; +} + +template +int Migration::execute() { + ldout(m_cct, 10) << dendl; + + int r = set_state(cls::rbd::MIGRATION_STATE_EXECUTING, ""); + if (r < 0) { + return r; + } + + { + MigrationProgressContext dst_prog_ctx( + m_dst_image_ctx->md_ctx, m_dst_image_ctx->header_oid, + cls::rbd::MIGRATION_STATE_EXECUTING, m_prog_ctx); + std::optional src_prog_ctx; + if (m_src_image_ctx != nullptr) { + src_prog_ctx.emplace(m_src_image_ctx->md_ctx, m_src_image_ctx->header_oid, + cls::rbd::MIGRATION_STATE_EXECUTING, &dst_prog_ctx); + } + + while (true) { + r = m_dst_image_ctx->operations->migrate( + *(src_prog_ctx ? &src_prog_ctx.value() : &dst_prog_ctx)); + if (r == -EROFS) { + std::shared_lock owner_locker{m_dst_image_ctx->owner_lock}; + if (m_dst_image_ctx->exclusive_lock != nullptr && + !m_dst_image_ctx->exclusive_lock->accept_ops()) { + ldout(m_cct, 5) << "lost exclusive lock, retrying remote" << dendl; + continue; + } + } + break; + } + } + + if (r < 0) { + lderr(m_cct) << "migration failed: " << cpp_strerror(r) << dendl; + return r; + } + + r = set_state(cls::rbd::MIGRATION_STATE_EXECUTED, ""); + if (r < 0) { + return r; + } + + m_dst_image_ctx->notify_update(); + + ldout(m_cct, 10) << "succeeded" << dendl; + + return 0; +} + +template +int Migration::abort() { + ldout(m_cct, 10) << dendl; + + int r; + if (m_src_image_ctx != nullptr) { + m_src_image_ctx->owner_lock.lock_shared(); + if (m_src_image_ctx->exclusive_lock != nullptr && + !m_src_image_ctx->exclusive_lock->is_lock_owner()) { + C_SaferCond ctx; + m_src_image_ctx->exclusive_lock->acquire_lock(&ctx); + m_src_image_ctx->owner_lock.unlock_shared(); + r = ctx.wait(); + if (r < 0) { + lderr(m_cct) << "error acquiring exclusive lock: " << cpp_strerror(r) + << dendl; + return r; + } + } else { + m_src_image_ctx->owner_lock.unlock_shared(); + } + } + + group_info_t group_info; + group_info.pool = -1; + + r = m_dst_image_ctx->state->open(OPEN_FLAG_IGNORE_MIGRATING); + if (r < 0) { + ldout(m_cct, 1) << "failed to open destination image: " << cpp_strerror(r) + << dendl; + m_dst_image_ctx = nullptr; + } else { + BOOST_SCOPE_EXIT_TPL(&m_dst_image_ctx) { + if (m_dst_image_ctx != nullptr) { + m_dst_image_ctx->state->close(); + } + } BOOST_SCOPE_EXIT_END; + + std::list watchers; + int flags = librbd::image::LIST_WATCHERS_FILTER_OUT_MY_INSTANCE | + librbd::image::LIST_WATCHERS_FILTER_OUT_MIRROR_INSTANCES; + C_SaferCond on_list_watchers; + auto list_watchers_request = librbd::image::ListWatchersRequest::create( + *m_dst_image_ctx, flags, &watchers, &on_list_watchers); + list_watchers_request->send(); + r = on_list_watchers.wait(); + if (r < 0) { + lderr(m_cct) << "failed listing watchers:" << cpp_strerror(r) << dendl; + return r; + } + if (!watchers.empty()) { + lderr(m_cct) << "image has watchers - cannot abort migration" << dendl; + return -EBUSY; + } + + // ensure destination image is now read-only + r = set_state(cls::rbd::MIGRATION_STATE_ABORTING, ""); + if (r < 0) { + return r; + } + + SteppedProgressContext progress_ctx( + m_prog_ctx, (m_src_image_ctx != nullptr ? 2 : 1)); + if (m_src_image_ctx != nullptr) { + // copy dst HEAD -> src HEAD + revert_data(m_dst_image_ctx, m_src_image_ctx, &progress_ctx); + progress_ctx.next_step(); + + ldout(m_cct, 10) << "relinking children" << dendl; + r = relink_children(m_dst_image_ctx, m_src_image_ctx); + if (r < 0) { + return r; + } + } + + ldout(m_cct, 10) << "removing dst image snapshots" << dendl; + std::vector snaps; + r = Snapshot::list(m_dst_image_ctx, snaps); + if (r < 0) { + lderr(m_cct) << "failed listing snapshots: " << cpp_strerror(r) + << dendl; + return r; + } + + for (auto &snap : snaps) { + librbd::NoOpProgressContext prog_ctx; + int r = Snapshot::remove(m_dst_image_ctx, snap.name.c_str(), + RBD_SNAP_REMOVE_UNPROTECT, prog_ctx); + if (r < 0) { + lderr(m_cct) << "failed removing snapshot: " << cpp_strerror(r) + << dendl; + return r; + } + } + + ldout(m_cct, 10) << "removing group" << dendl; + + r = remove_group(m_dst_image_ctx, &group_info); + if (r < 0 && r != -ENOENT) { + return r; + } + + ldout(m_cct, 10) << "removing dst image" << dendl; + + ceph_assert(m_dst_image_ctx->ignore_migrating); + + auto asio_engine = m_dst_image_ctx->asio_engine; + librados::IoCtx dst_io_ctx(m_dst_image_ctx->md_ctx); + + C_SaferCond on_remove; + auto req = librbd::image::RemoveRequest<>::create( + dst_io_ctx, m_dst_image_ctx, false, false, progress_ctx, + asio_engine->get_work_queue(), &on_remove); + req->send(); + r = on_remove.wait(); + + m_dst_image_ctx = nullptr; + + if (r < 0) { + lderr(m_cct) << "failed removing destination image '" + << dst_io_ctx.get_pool_name() << "/" << m_dst_image_name + << " (" << m_dst_image_id << ")': " << cpp_strerror(r) + << dendl; + return r; + } + } + + if (m_src_image_ctx != nullptr) { + r = relink_src_image(m_src_image_ctx); + if (r < 0) { + return r; + } + + r = add_group(m_src_image_ctx, group_info); + if (r < 0) { + return r; + } + + r = remove_migration(m_src_image_ctx); + if (r < 0) { + return r; + } + + r = enable_mirroring(m_src_image_ctx, m_mirroring, m_mirror_image_mode); + if (r < 0) { + return r; + } + } + + ldout(m_cct, 10) << "succeeded" << dendl; + + return 0; +} + +template +int Migration::commit() { + ldout(m_cct, 10) << dendl; + + BOOST_SCOPE_EXIT_TPL(&m_dst_image_ctx, &m_src_image_ctx) { + m_dst_image_ctx->state->close(); + if (m_src_image_ctx != nullptr) { + m_src_image_ctx->state->close(); + } + } BOOST_SCOPE_EXIT_END; + + int r = remove_migration(m_dst_image_ctx); + if (r < 0) { + return r; + } + + if (m_src_image_ctx != nullptr) { + r = remove_src_image(&m_src_image_ctx); + if (r < 0) { + return r; + } + } + + r = enable_mirroring(m_dst_image_ctx, m_mirroring, m_mirror_image_mode); + if (r < 0) { + return r; + } + + ldout(m_cct, 10) << "succeeded" << dendl; + + return 0; +} + +template +int Migration::status(image_migration_status_t *status) { + ldout(m_cct, 10) << dendl; + + status->source_pool_id = m_dst_migration_spec.pool_id; + status->source_pool_namespace = m_dst_migration_spec.pool_namespace; + status->source_image_name = m_dst_migration_spec.image_name; + status->source_image_id = m_dst_migration_spec.image_id; + status->dest_pool_id = m_src_migration_spec.pool_id; + status->dest_pool_namespace = m_src_migration_spec.pool_namespace; + status->dest_image_name = m_src_migration_spec.image_name; + status->dest_image_id = m_src_migration_spec.image_id; + + switch (m_src_migration_spec.state) { + case cls::rbd::MIGRATION_STATE_ERROR: + status->state = RBD_IMAGE_MIGRATION_STATE_ERROR; + break; + case cls::rbd::MIGRATION_STATE_PREPARING: + status->state = RBD_IMAGE_MIGRATION_STATE_PREPARING; + break; + case cls::rbd::MIGRATION_STATE_PREPARED: + status->state = RBD_IMAGE_MIGRATION_STATE_PREPARED; + break; + case cls::rbd::MIGRATION_STATE_EXECUTING: + status->state = RBD_IMAGE_MIGRATION_STATE_EXECUTING; + break; + case cls::rbd::MIGRATION_STATE_EXECUTED: + status->state = RBD_IMAGE_MIGRATION_STATE_EXECUTED; + break; + default: + status->state = RBD_IMAGE_MIGRATION_STATE_UNKNOWN; + break; + } + + status->state_description = m_src_migration_spec.state_description; + + return 0; +} + +template +int Migration::set_state(I* image_ctx, const std::string& image_description, + cls::rbd::MigrationState state, + const std::string &description) { + int r = cls_client::migration_set_state(&image_ctx->md_ctx, + image_ctx->header_oid, + state, description); + if (r < 0) { + lderr(m_cct) << "failed to set " << image_description << " " + << "migration header: " << cpp_strerror(r) << dendl; + return r; + } + return 0; +} + +template +int Migration::set_state(cls::rbd::MigrationState state, + const std::string &description) { + int r; + if (m_src_image_ctx != nullptr) { + r = set_state(m_src_image_ctx, "source", state, description); + if (r < 0) { + return r; + } + } + + r = set_state(m_dst_image_ctx, "destination", state, description); + if (r < 0) { + return r; + } + + return 0; +} + +template +int Migration::list_src_snaps(I* image_ctx, + std::vector *snaps) { + ldout(m_cct, 10) << dendl; + + int r = Snapshot::list(image_ctx, *snaps); + if (r < 0) { + lderr(m_cct) << "failed listing snapshots: " << cpp_strerror(r) << dendl; + return r; + } + + for (auto &snap : *snaps) { + librbd::snap_namespace_type_t namespace_type; + r = Snapshot::get_namespace_type(image_ctx, snap.id, + &namespace_type); + if (r < 0) { + lderr(m_cct) << "error getting snap namespace type: " << cpp_strerror(r) + << dendl; + return r; + } + + if (namespace_type != RBD_SNAP_NAMESPACE_TYPE_USER) { + if (namespace_type == RBD_SNAP_NAMESPACE_TYPE_TRASH) { + lderr(m_cct) << "image has snapshots with linked clones that must be " + << "deleted or flattened before the image can be migrated" + << dendl; + } else { + lderr(m_cct) << "image has non-user type snapshots " + << "that are not supported by migration" << dendl; + } + return -EBUSY; + } + } + + return 0; +} + +template +int Migration::validate_src_snaps(I* image_ctx) { + ldout(m_cct, 10) << dendl; + + std::vector snaps; + int r = list_src_snaps(image_ctx, &snaps); + if (r < 0) { + return r; + } + + uint64_t dst_features = 0; + r = m_image_options.get(RBD_IMAGE_OPTION_FEATURES, &dst_features); + ceph_assert(r == 0); + + if (!image_ctx->test_features(RBD_FEATURE_LAYERING)) { + return 0; + } + + for (auto &snap : snaps) { + std::shared_lock image_locker{image_ctx->image_lock}; + cls::rbd::ParentImageSpec parent_spec{image_ctx->md_ctx.get_id(), + image_ctx->md_ctx.get_namespace(), + image_ctx->id, snap.id}; + std::vector child_images; + r = api::Image::list_children(image_ctx, parent_spec, + &child_images); + if (r < 0) { + lderr(m_cct) << "failed listing children: " << cpp_strerror(r) + << dendl; + return r; + } + if (!child_images.empty()) { + ldout(m_cct, 1) << image_ctx->name << "@" << snap.name + << " has children" << dendl; + + if ((dst_features & RBD_FEATURE_LAYERING) == 0) { + lderr(m_cct) << "can't migrate to destination without layering feature: " + << "image has children" << dendl; + return -EINVAL; + } + } + } + + return 0; +} + + +template +int Migration::set_src_migration(I* image_ctx) { + ldout(m_cct, 10) << dendl; + + image_ctx->ignore_migrating = true; + + int r = cls_client::migration_set(&image_ctx->md_ctx, image_ctx->header_oid, + m_src_migration_spec); + if (r < 0) { + lderr(m_cct) << "failed to set source migration header: " << cpp_strerror(r) + << dendl; + return r; + } + + image_ctx->notify_update(); + + return 0; +} + +template +int Migration::remove_migration(I *image_ctx) { + ldout(m_cct, 10) << dendl; + + int r; + + r = cls_client::migration_remove(&image_ctx->md_ctx, image_ctx->header_oid); + if (r == -ENOENT) { + r = 0; + } + if (r < 0) { + lderr(m_cct) << "failed removing migration header: " << cpp_strerror(r) + << dendl; + return r; + } + + image_ctx->notify_update(); + + return 0; +} + +template +int Migration::unlink_src_image(I* image_ctx) { + if (image_ctx->old_format) { + return v1_unlink_src_image(image_ctx); + } else { + return v2_unlink_src_image(image_ctx); + } +} + +template +int Migration::v1_unlink_src_image(I* image_ctx) { + ldout(m_cct, 10) << dendl; + + std::shared_lock image_locker{image_ctx->image_lock}; + int r = tmap_rm(image_ctx->md_ctx, image_ctx->name); + if (r < 0) { + lderr(m_cct) << "failed removing " << image_ctx->name << " from tmap: " + << cpp_strerror(r) << dendl; + return r; + } + + return 0; +} + +template +int Migration::v2_unlink_src_image(I* image_ctx) { + ldout(m_cct, 10) << dendl; + + image_ctx->owner_lock.lock_shared(); + if (image_ctx->exclusive_lock != nullptr && + image_ctx->exclusive_lock->is_lock_owner()) { + C_SaferCond ctx; + image_ctx->exclusive_lock->release_lock(&ctx); + image_ctx->owner_lock.unlock_shared(); + int r = ctx.wait(); + if (r < 0) { + lderr(m_cct) << "error releasing exclusive lock: " << cpp_strerror(r) + << dendl; + return r; + } + } else { + image_ctx->owner_lock.unlock_shared(); + } + + int r = Trash::move(image_ctx->md_ctx, RBD_TRASH_IMAGE_SOURCE_MIGRATION, + image_ctx->name, 0); + if (r < 0) { + lderr(m_cct) << "failed moving image to trash: " << cpp_strerror(r) + << dendl; + return r; + } + + return 0; +} + +template +int Migration::relink_src_image(I* image_ctx) { + if (image_ctx->old_format) { + return v1_relink_src_image(image_ctx); + } else { + return v2_relink_src_image(image_ctx); + } +} + +template +int Migration::v1_relink_src_image(I* image_ctx) { + ldout(m_cct, 10) << dendl; + + std::shared_lock image_locker{image_ctx->image_lock}; + int r = tmap_set(image_ctx->md_ctx, image_ctx->name); + if (r < 0) { + lderr(m_cct) << "failed adding " << image_ctx->name << " to tmap: " + << cpp_strerror(r) << dendl; + return r; + } + + return 0; +} + +template +int Migration::v2_relink_src_image(I* image_ctx) { + ldout(m_cct, 10) << dendl; + + std::shared_lock image_locker{image_ctx->image_lock}; + int r = Trash::restore(image_ctx->md_ctx, + {cls::rbd::TRASH_IMAGE_SOURCE_MIGRATION}, + image_ctx->id, image_ctx->name); + if (r < 0) { + lderr(m_cct) << "failed restoring image from trash: " << cpp_strerror(r) + << dendl; + return r; + } + + return 0; +} + +template +int Migration::create_dst_image(I** image_ctx) { + ldout(m_cct, 10) << dendl; + + uint64_t size; + cls::rbd::ParentImageSpec parent_spec; + { + std::shared_lock image_locker{m_src_image_ctx->image_lock}; + size = m_src_image_ctx->size; + + // use oldest snapshot or HEAD for parent spec + if (!m_src_image_ctx->snap_info.empty()) { + parent_spec = m_src_image_ctx->snap_info.begin()->second.parent.spec; + } else { + parent_spec = m_src_image_ctx->parent_md.spec; + } + } + + ConfigProxy config{m_cct->_conf}; + api::Config::apply_pool_overrides(m_dst_io_ctx, &config); + + uint64_t mirror_image_mode; + if (m_image_options.get(RBD_IMAGE_OPTION_MIRROR_IMAGE_MODE, + &mirror_image_mode) == 0) { + m_mirroring = true; + m_mirror_image_mode = static_cast( + mirror_image_mode); + m_image_options.unset(RBD_IMAGE_OPTION_MIRROR_IMAGE_MODE); + } + + int r; + C_SaferCond on_create; + librados::IoCtx parent_io_ctx; + if (parent_spec.pool_id == -1) { + auto *req = image::CreateRequest::create( + config, m_dst_io_ctx, m_dst_image_name, m_dst_image_id, size, + m_image_options, image::CREATE_FLAG_SKIP_MIRROR_ENABLE, + cls::rbd::MIRROR_IMAGE_MODE_JOURNAL, "", "", + m_src_image_ctx->op_work_queue, &on_create); + req->send(); + } else { + r = util::create_ioctx(m_src_image_ctx->md_ctx, "parent image", + parent_spec.pool_id, parent_spec.pool_namespace, + &parent_io_ctx); + if (r < 0) { + return r; + } + + auto *req = image::CloneRequest::create( + config, parent_io_ctx, parent_spec.image_id, "", {}, parent_spec.snap_id, + m_dst_io_ctx, m_dst_image_name, m_dst_image_id, m_image_options, + cls::rbd::MIRROR_IMAGE_MODE_JOURNAL, "", "", + m_src_image_ctx->op_work_queue, &on_create); + req->send(); + } + + r = on_create.wait(); + if (r < 0) { + lderr(m_cct) << "header creation failed: " << cpp_strerror(r) << dendl; + return r; + } + + auto dst_image_ctx = *image_ctx; + dst_image_ctx->id = m_dst_image_id; + *image_ctx = nullptr; // prevent prepare from cleaning up the ImageCtx + + r = dst_image_ctx->state->open(OPEN_FLAG_IGNORE_MIGRATING); + if (r < 0) { + lderr(m_cct) << "failed to open newly created header: " << cpp_strerror(r) + << dendl; + return r; + } + + BOOST_SCOPE_EXIT_TPL(dst_image_ctx) { + dst_image_ctx->state->close(); + } BOOST_SCOPE_EXIT_END; + + { + std::shared_lock owner_locker{dst_image_ctx->owner_lock}; + r = dst_image_ctx->operations->prepare_image_update( + exclusive_lock::OPERATION_REQUEST_TYPE_GENERAL, true); + if (r < 0) { + lderr(m_cct) << "cannot obtain exclusive lock" << dendl; + return r; + } + if (dst_image_ctx->exclusive_lock != nullptr) { + dst_image_ctx->exclusive_lock->block_requests(0); + } + } + + SnapSeqs snap_seqs; + + C_SaferCond on_snapshot_copy; + auto snapshot_copy_req = librbd::deep_copy::SnapshotCopyRequest::create( + m_src_image_ctx, dst_image_ctx, 0, CEPH_NOSNAP, 0, m_flatten, + m_src_image_ctx->op_work_queue, &snap_seqs, &on_snapshot_copy); + snapshot_copy_req->send(); + r = on_snapshot_copy.wait(); + if (r < 0) { + lderr(m_cct) << "failed to copy snapshots: " << cpp_strerror(r) << dendl; + return r; + } + + if (!m_src_image_ctx->header_oid.empty()) { + C_SaferCond on_metadata_copy; + auto metadata_copy_req = librbd::deep_copy::MetadataCopyRequest::create( + m_src_image_ctx, dst_image_ctx, &on_metadata_copy); + metadata_copy_req->send(); + r = on_metadata_copy.wait(); + if (r < 0) { + lderr(m_cct) << "failed to copy metadata: " << cpp_strerror(r) << dendl; + return r; + } + } + + m_dst_migration_spec.snap_seqs = snap_seqs; + m_dst_migration_spec.overlap = size; + m_dst_migration_spec.mirroring = m_mirroring; + m_dst_migration_spec.mirror_image_mode = m_mirror_image_mode; + m_dst_migration_spec.flatten = m_flatten; + r = cls_client::migration_set(&m_dst_io_ctx, m_dst_header_oid, + m_dst_migration_spec); + if (r < 0) { + lderr(m_cct) << "failed to set migration header: " << cpp_strerror(r) + << dendl; + return r; + } + + if (m_dst_migration_spec.source_spec.empty()) { + r = update_group(m_src_image_ctx, dst_image_ctx); + if (r < 0) { + return r; + } + + r = set_state(m_src_image_ctx, "source", + cls::rbd::MIGRATION_STATE_PREPARED, ""); + if (r < 0) { + return r; + } + } + + r = set_state(dst_image_ctx, "destination", + cls::rbd::MIGRATION_STATE_PREPARED, ""); + if (r < 0) { + return r; + } + + if (m_dst_migration_spec.source_spec.empty()) { + r = dst_image_ctx->state->refresh(); + if (r < 0) { + lderr(m_cct) << "failed to refresh destination image: " << cpp_strerror(r) + << dendl; + return r; + } + + r = relink_children(m_src_image_ctx, dst_image_ctx); + if (r < 0) { + return r; + } + } + + return 0; +} + +template +int Migration::remove_group(I *image_ctx, group_info_t *group_info) { + int r = librbd::api::Group::image_get_group(image_ctx, group_info); + if (r < 0) { + lderr(m_cct) << "failed to get image group: " << cpp_strerror(r) << dendl; + return r; + } + + if (group_info->pool == -1) { + return -ENOENT; + } + + ceph_assert(!image_ctx->id.empty()); + + ldout(m_cct, 10) << dendl; + + IoCtx group_ioctx; + r = util::create_ioctx(image_ctx->md_ctx, "group", group_info->pool, {}, + &group_ioctx); + if (r < 0) { + return r; + } + + r = librbd::api::Group::image_remove_by_id(group_ioctx, + group_info->name.c_str(), + image_ctx->md_ctx, + image_ctx->id.c_str()); + if (r < 0) { + lderr(m_cct) << "failed to remove image from group: " << cpp_strerror(r) + << dendl; + return r; + } + + return 0; +} + +template +int Migration::add_group(I *image_ctx, group_info_t &group_info) { + if (group_info.pool == -1) { + return 0; + } + + ldout(m_cct, 10) << dendl; + + IoCtx group_ioctx; + int r = util::create_ioctx(image_ctx->md_ctx, "group", group_info.pool, {}, + &group_ioctx); + if (r < 0) { + return r; + } + + r = librbd::api::Group::image_add(group_ioctx, group_info.name.c_str(), + image_ctx->md_ctx, + image_ctx->name.c_str()); + if (r < 0) { + lderr(m_cct) << "failed to add image to group: " << cpp_strerror(r) + << dendl; + return r; + } + + return 0; +} + +template +int Migration::update_group(I *from_image_ctx, I *to_image_ctx) { + ldout(m_cct, 10) << dendl; + + group_info_t group_info; + + int r = remove_group(from_image_ctx, &group_info); + if (r < 0) { + return r == -ENOENT ? 0 : r; + } + + r = add_group(to_image_ctx, group_info); + if (r < 0) { + return r; + } + + return 0; +} + +template +int Migration::disable_mirroring( + I *image_ctx, bool *was_enabled, + cls::rbd::MirrorImageMode *mirror_image_mode) { + *was_enabled = false; + + cls::rbd::MirrorImage mirror_image; + int r = cls_client::mirror_image_get(&image_ctx->md_ctx, image_ctx->id, + &mirror_image); + if (r == -ENOENT) { + ldout(m_cct, 10) << "mirroring is not enabled for this image" << dendl; + return 0; + } + + if (r < 0) { + lderr(m_cct) << "failed to retrieve mirror image: " << cpp_strerror(r) + << dendl; + return r; + } + + if (mirror_image.state == cls::rbd::MIRROR_IMAGE_STATE_ENABLED) { + *was_enabled = true; + *mirror_image_mode = mirror_image.mode; + } + + ldout(m_cct, 10) << dendl; + + C_SaferCond ctx; + auto req = mirror::DisableRequest::create(image_ctx, false, true, &ctx); + req->send(); + r = ctx.wait(); + if (r < 0) { + lderr(m_cct) << "failed to disable mirroring: " << cpp_strerror(r) + << dendl; + return r; + } + + m_src_migration_spec.mirroring = true; + + return 0; +} + +template +int Migration::enable_mirroring( + I *image_ctx, bool was_enabled, + cls::rbd::MirrorImageMode mirror_image_mode) { + cls::rbd::MirrorMode mirror_mode; + int r = cls_client::mirror_mode_get(&image_ctx->md_ctx, &mirror_mode); + if (r < 0 && r != -ENOENT) { + lderr(m_cct) << "failed to retrieve mirror mode: " << cpp_strerror(r) + << dendl; + return r; + } + + if (mirror_mode == cls::rbd::MIRROR_MODE_DISABLED) { + ldout(m_cct, 10) << "mirroring is not enabled for destination pool" + << dendl; + return 0; + } + if (mirror_mode == cls::rbd::MIRROR_MODE_IMAGE && !was_enabled) { + ldout(m_cct, 10) << "mirroring is not enabled for image" << dendl; + return 0; + } + + ldout(m_cct, 10) << dendl; + + C_SaferCond ctx; + auto req = mirror::EnableRequest::create( + image_ctx, mirror_image_mode, "", false, &ctx); + req->send(); + r = ctx.wait(); + if (r < 0) { + lderr(m_cct) << "failed to enable mirroring: " << cpp_strerror(r) + << dendl; + return r; + } + + return 0; +} + +// When relinking children we should be careful as it my be interrupted +// at any moment by some reason and we may end up in an inconsistent +// state, which we have to be able to fix with "migration abort". Below +// are all possible states during migration (P1 - sourse parent, P2 - +// destination parent, C - child): +// +// P1 P2 P1 P2 P1 P2 P1 P2 +// ^\ \ ^ \ /^ /^ +// \v v/ v/ v/ +// C C C C +// +// 1 2 3 4 +// +// (1) and (4) are the initial and the final consistent states. (2) +// and (3) are intermediate inconsistent states that have to be fixed +// by relink_children running in "migration abort" mode. For this, it +// scans P2 for all children attached and relinks (fixes) states (3) +// and (4) to state (1). Then it scans P1 for remaining children and +// fixes the states (2). + +template +int Migration::relink_children(I *from_image_ctx, I *to_image_ctx) { + ldout(m_cct, 10) << dendl; + + bool migration_abort = (to_image_ctx == m_src_image_ctx); + + std::vector snaps; + int r = list_src_snaps( + migration_abort ? to_image_ctx : from_image_ctx, &snaps); + if (r < 0) { + return r; + } + + for (auto it = snaps.begin(); it != snaps.end(); it++) { + auto &snap = *it; + std::vector src_child_images; + + if (from_image_ctx != m_src_image_ctx) { + ceph_assert(migration_abort); + + // We run list snaps against the src image to get only those snapshots + // that are migrated. If the "from" image is not the src image + // (abort migration case), we need to remap snap ids. + // Also collect the list of the children currently attached to the + // source, so we could make a proper decision later about relinking. + + std::shared_lock src_image_locker{to_image_ctx->image_lock}; + cls::rbd::ParentImageSpec src_parent_spec{to_image_ctx->md_ctx.get_id(), + to_image_ctx->md_ctx.get_namespace(), + to_image_ctx->id, snap.id}; + r = api::Image::list_children(to_image_ctx, src_parent_spec, + &src_child_images); + if (r < 0) { + lderr(m_cct) << "failed listing children: " << cpp_strerror(r) + << dendl; + return r; + } + + std::shared_lock image_locker{from_image_ctx->image_lock}; + snap.id = from_image_ctx->get_snap_id(cls::rbd::UserSnapshotNamespace(), + snap.name); + if (snap.id == CEPH_NOSNAP) { + ldout(m_cct, 5) << "skipping snapshot " << snap.name << dendl; + continue; + } + } + + std::vector child_images; + { + std::shared_lock image_locker{from_image_ctx->image_lock}; + cls::rbd::ParentImageSpec parent_spec{from_image_ctx->md_ctx.get_id(), + from_image_ctx->md_ctx.get_namespace(), + from_image_ctx->id, snap.id}; + r = api::Image::list_children(from_image_ctx, parent_spec, + &child_images); + if (r < 0) { + lderr(m_cct) << "failed listing children: " << cpp_strerror(r) + << dendl; + return r; + } + } + + for (auto &child_image : child_images) { + r = relink_child(from_image_ctx, to_image_ctx, snap, child_image, + migration_abort, true); + if (r < 0) { + return r; + } + + src_child_images.erase(std::remove(src_child_images.begin(), + src_child_images.end(), child_image), + src_child_images.end()); + } + + for (auto &child_image : src_child_images) { + r = relink_child(from_image_ctx, to_image_ctx, snap, child_image, + migration_abort, false); + if (r < 0) { + return r; + } + } + } + + return 0; +} + +template +int Migration::relink_child(I *from_image_ctx, I *to_image_ctx, + const librbd::snap_info_t &from_snap, + const librbd::linked_image_spec_t &child_image, + bool migration_abort, bool reattach_child) { + ldout(m_cct, 10) << from_snap.name << " " << child_image.pool_name << "/" + << child_image.pool_namespace << "/" + << child_image.image_name << " (migration_abort=" + << migration_abort << ", reattach_child=" << reattach_child + << ")" << dendl; + + librados::snap_t to_snap_id; + { + std::shared_lock image_locker{to_image_ctx->image_lock}; + to_snap_id = to_image_ctx->get_snap_id(cls::rbd::UserSnapshotNamespace(), + from_snap.name); + if (to_snap_id == CEPH_NOSNAP) { + lderr(m_cct) << "no snapshot " << from_snap.name << " on destination image" + << dendl; + return -ENOENT; + } + } + + librados::IoCtx child_io_ctx; + int r = util::create_ioctx(to_image_ctx->md_ctx, + "child image " + child_image.image_name, + child_image.pool_id, child_image.pool_namespace, + &child_io_ctx); + if (r < 0) { + return r; + } + + I *child_image_ctx = I::create("", child_image.image_id, nullptr, + child_io_ctx, false); + r = child_image_ctx->state->open(OPEN_FLAG_SKIP_OPEN_PARENT); + if (r < 0) { + lderr(m_cct) << "failed to open child image: " << cpp_strerror(r) << dendl; + return r; + } + BOOST_SCOPE_EXIT_TPL(child_image_ctx) { + child_image_ctx->state->close(); + } BOOST_SCOPE_EXIT_END; + + uint32_t clone_format = 1; + if (child_image_ctx->test_op_features(RBD_OPERATION_FEATURE_CLONE_CHILD)) { + clone_format = 2; + } + + cls::rbd::ParentImageSpec parent_spec; + uint64_t parent_overlap; + { + std::shared_lock image_locker{child_image_ctx->image_lock}; + + // use oldest snapshot or HEAD for parent spec + if (!child_image_ctx->snap_info.empty()) { + parent_spec = child_image_ctx->snap_info.begin()->second.parent.spec; + parent_overlap = child_image_ctx->snap_info.begin()->second.parent.overlap; + } else { + parent_spec = child_image_ctx->parent_md.spec; + parent_overlap = child_image_ctx->parent_md.overlap; + } + } + + if (migration_abort && + parent_spec.pool_id == to_image_ctx->md_ctx.get_id() && + parent_spec.pool_namespace == to_image_ctx->md_ctx.get_namespace() && + parent_spec.image_id == to_image_ctx->id && + parent_spec.snap_id == to_snap_id) { + ldout(m_cct, 10) << "no need for parent re-attach" << dendl; + } else { + if (parent_spec.pool_id != from_image_ctx->md_ctx.get_id() || + parent_spec.pool_namespace != from_image_ctx->md_ctx.get_namespace() || + parent_spec.image_id != from_image_ctx->id || + parent_spec.snap_id != from_snap.id) { + lderr(m_cct) << "parent is not source image: " << parent_spec.pool_id + << "/" << parent_spec.pool_namespace << "/" + << parent_spec.image_id << "@" << parent_spec.snap_id + << dendl; + return -ESTALE; + } + + parent_spec.pool_id = to_image_ctx->md_ctx.get_id(); + parent_spec.pool_namespace = to_image_ctx->md_ctx.get_namespace(); + parent_spec.image_id = to_image_ctx->id; + parent_spec.snap_id = to_snap_id; + + C_SaferCond on_reattach_parent; + auto reattach_parent_req = image::AttachParentRequest::create( + *child_image_ctx, parent_spec, parent_overlap, true, &on_reattach_parent); + reattach_parent_req->send(); + r = on_reattach_parent.wait(); + if (r < 0) { + lderr(m_cct) << "failed to re-attach parent: " << cpp_strerror(r) << dendl; + return r; + } + } + + if (reattach_child) { + C_SaferCond on_reattach_child; + auto reattach_child_req = image::AttachChildRequest::create( + child_image_ctx, to_image_ctx, to_snap_id, from_image_ctx, from_snap.id, + clone_format, &on_reattach_child); + reattach_child_req->send(); + r = on_reattach_child.wait(); + if (r < 0) { + lderr(m_cct) << "failed to re-attach child: " << cpp_strerror(r) << dendl; + return r; + } + } + + child_image_ctx->notify_update(); + + return 0; +} + +template +int Migration::remove_src_image(I** image_ctx) { + ldout(m_cct, 10) << dendl; + + auto src_image_ctx = *image_ctx; + + std::vector snaps; + int r = list_src_snaps(src_image_ctx, &snaps); + if (r < 0) { + return r; + } + + for (auto it = snaps.rbegin(); it != snaps.rend(); it++) { + auto &snap = *it; + + librbd::NoOpProgressContext prog_ctx; + int r = Snapshot::remove(src_image_ctx, snap.name.c_str(), + RBD_SNAP_REMOVE_UNPROTECT, prog_ctx); + if (r < 0) { + lderr(m_cct) << "failed removing source image snapshot '" << snap.name + << "': " << cpp_strerror(r) << dendl; + return r; + } + } + + ceph_assert(src_image_ctx->ignore_migrating); + + auto asio_engine = src_image_ctx->asio_engine; + auto src_image_id = src_image_ctx->id; + librados::IoCtx src_io_ctx(src_image_ctx->md_ctx); + + C_SaferCond on_remove; + auto req = librbd::image::RemoveRequest::create( + src_io_ctx, src_image_ctx, false, true, *m_prog_ctx, + asio_engine->get_work_queue(), &on_remove); + req->send(); + r = on_remove.wait(); + + *image_ctx = nullptr; + + // For old format image it will return -ENOENT due to expected + // tmap_rm failure at the end. + if (r < 0 && r != -ENOENT) { + lderr(m_cct) << "failed removing source image: " << cpp_strerror(r) + << dendl; + return r; + } + + if (!src_image_id.empty()) { + r = cls_client::trash_remove(&src_io_ctx, src_image_id); + if (r < 0 && r != -ENOENT) { + lderr(m_cct) << "error removing image " << src_image_id + << " from rbd_trash object" << dendl; + } + } + + return 0; +} + +template +int Migration::revert_data(I* src_image_ctx, I* dst_image_ctx, + ProgressContext* prog_ctx) { + ldout(m_cct, 10) << dendl; + + cls::rbd::MigrationSpec migration_spec; + int r = cls_client::migration_get(&src_image_ctx->md_ctx, + src_image_ctx->header_oid, + &migration_spec); + + if (r < 0) { + lderr(m_cct) << "failed retrieving migration header: " << cpp_strerror(r) + << dendl; + return r; + } + + if (migration_spec.header_type != cls::rbd::MIGRATION_HEADER_TYPE_DST) { + lderr(m_cct) << "unexpected migration header type: " + << migration_spec.header_type << dendl; + return -EINVAL; + } + + uint64_t src_snap_id_start = 0; + uint64_t src_snap_id_end = CEPH_NOSNAP; + uint64_t dst_snap_id_start = 0; + if (!migration_spec.snap_seqs.empty()) { + src_snap_id_start = migration_spec.snap_seqs.rbegin()->second; + } + + // we only care about the HEAD revision so only add a single mapping to + // represent the most recent state + SnapSeqs snap_seqs; + snap_seqs[CEPH_NOSNAP] = CEPH_NOSNAP; + + ldout(m_cct, 20) << "src_snap_id_start=" << src_snap_id_start << ", " + << "src_snap_id_end=" << src_snap_id_end << ", " + << "dst_snap_id_start=" << dst_snap_id_start << ", " + << "snap_seqs=" << snap_seqs << dendl; + + C_SaferCond ctx; + deep_copy::ProgressHandler progress_handler(prog_ctx); + auto request = deep_copy::ImageCopyRequest::create( + src_image_ctx, dst_image_ctx, src_snap_id_start, src_snap_id_end, + dst_snap_id_start, false, {}, snap_seqs, &progress_handler, &ctx); + request->send(); + + r = ctx.wait(); + if (r < 0) { + lderr(m_cct) << "error reverting destination image data blocks back to " + << "source image: " << cpp_strerror(r) << dendl; + return r; + } + + return 0; +} + +} // namespace api +} // namespace librbd + +template class librbd::api::Migration; diff --git a/src/librbd/api/Migration.h b/src/librbd/api/Migration.h new file mode 100644 index 000000000..dd70dcc23 --- /dev/null +++ b/src/librbd/api/Migration.h @@ -0,0 +1,113 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_API_MIGRATION_H +#define CEPH_LIBRBD_API_MIGRATION_H + +#include "include/int_types.h" +#include "include/rados/librados_fwd.hpp" +#include "include/rbd/librbd.hpp" +#include "cls/rbd/cls_rbd_types.h" + +#include + +namespace librbd { + +class ImageCtx; + +namespace api { + +template +class Migration { +public: + static int prepare(librados::IoCtx& io_ctx, const std::string &image_name, + librados::IoCtx& dest_io_ctx, + const std::string &dest_image_name, ImageOptions& opts); + static int prepare_import(const std::string& source_spec, + librados::IoCtx& dest_io_ctx, + const std::string &dest_image_name, + ImageOptions& opts); + static int execute(librados::IoCtx& io_ctx, const std::string &image_name, + ProgressContext &prog_ctx); + static int abort(librados::IoCtx& io_ctx, const std::string &image_name, + ProgressContext &prog_ctx); + static int commit(librados::IoCtx& io_ctx, const std::string &image_name, + ProgressContext &prog_ctx); + static int status(librados::IoCtx& io_ctx, const std::string &image_name, + image_migration_status_t *status); + + static int get_source_spec(ImageCtxT* image_ctx, std::string* source_spec); + +private: + CephContext* m_cct; + ImageCtx* m_src_image_ctx; + ImageCtx* m_dst_image_ctx; + librados::IoCtx m_dst_io_ctx; + std::string m_dst_image_name; + std::string m_dst_image_id; + std::string m_dst_header_oid; + ImageOptions &m_image_options; + bool m_flatten; + bool m_mirroring; + cls::rbd::MirrorImageMode m_mirror_image_mode; + ProgressContext *m_prog_ctx; + + cls::rbd::MigrationSpec m_src_migration_spec; + cls::rbd::MigrationSpec m_dst_migration_spec; + + Migration(ImageCtx* src_image_ctx, ImageCtx* dst_image_ctx, + const cls::rbd::MigrationSpec& dst_migration_spec, + ImageOptions& opts, ProgressContext *prog_ctx); + + int prepare(); + int prepare_import(); + int execute(); + int abort(); + int commit(); + int status(image_migration_status_t *status); + + int set_state(ImageCtxT* image_ctx, const std::string& image_description, + cls::rbd::MigrationState state, + const std::string &description); + int set_state(cls::rbd::MigrationState state, const std::string &description); + + int list_src_snaps(ImageCtxT* image_ctx, + std::vector *snaps); + int validate_src_snaps(ImageCtxT* image_ctx); + int disable_mirroring(ImageCtxT* image_ctx, bool *was_enabled, + cls::rbd::MirrorImageMode *mirror_image_mode); + int enable_mirroring(ImageCtxT* image_ctx, bool was_enabled, + cls::rbd::MirrorImageMode mirror_image_mode); + int set_src_migration(ImageCtxT* image_ctx); + int unlink_src_image(ImageCtxT* image_ctx); + int relink_src_image(ImageCtxT* image_ctx); + int create_dst_image(ImageCtxT** image_ctx); + int remove_group(ImageCtxT* image_ctx, group_info_t *group_info); + int add_group(ImageCtxT* image_ctx, group_info_t &group_info); + int update_group(ImageCtxT *from_image_ctx, ImageCtxT *to_image_ctx); + int remove_migration(ImageCtxT* image_ctx); + int relink_children(ImageCtxT *from_image_ctx, ImageCtxT *to_image_ctx); + int remove_src_image(ImageCtxT** image_ctx); + + int v1_set_src_migration(ImageCtxT* image_ctx); + int v2_set_src_migration(ImageCtxT* image_ctx); + int v1_unlink_src_image(ImageCtxT* image_ctx); + int v2_unlink_src_image(ImageCtxT* image_ctx); + int v1_relink_src_image(ImageCtxT* image_ctx); + int v2_relink_src_image(ImageCtxT* image_ctx); + + int relink_child(ImageCtxT *from_image_ctx, ImageCtxT *to_image_ctx, + const librbd::snap_info_t &src_snap, + const librbd::linked_image_spec_t &child_image, + bool migration_abort, bool reattach_child); + + int revert_data(ImageCtxT* src_image_ctx, ImageCtxT* dst_image_ctx, + ProgressContext *prog_ctx); +}; + +} // namespace api +} // namespace librbd + +extern template class librbd::api::Migration; + +#endif // CEPH_LIBRBD_API_MIGRATION_H diff --git a/src/librbd/api/Mirror.cc b/src/librbd/api/Mirror.cc new file mode 100644 index 000000000..2cfad0d32 --- /dev/null +++ b/src/librbd/api/Mirror.cc @@ -0,0 +1,2104 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/api/Mirror.h" +#include "include/rados/librados.hpp" +#include "include/stringify.h" +#include "common/ceph_json.h" +#include "common/dout.h" +#include "common/errno.h" +#include "cls/rbd/cls_rbd_client.h" +#include "librbd/AsioEngine.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/Journal.h" +#include "librbd/MirroringWatcher.h" +#include "librbd/Operations.h" +#include "librbd/Utils.h" +#include "librbd/api/Image.h" +#include "librbd/api/Namespace.h" +#include "librbd/mirror/DemoteRequest.h" +#include "librbd/mirror/DisableRequest.h" +#include "librbd/mirror/EnableRequest.h" +#include "librbd/mirror/GetInfoRequest.h" +#include "librbd/mirror/GetStatusRequest.h" +#include "librbd/mirror/GetUuidRequest.h" +#include "librbd/mirror/PromoteRequest.h" +#include "librbd/mirror/Types.h" +#include "librbd/MirroringWatcher.h" +#include "librbd/mirror/snapshot/CreatePrimaryRequest.h" +#include "librbd/mirror/snapshot/ImageMeta.h" +#include "librbd/mirror/snapshot/UnlinkPeerRequest.h" +#include "librbd/mirror/snapshot/Utils.h" +#include +#include +#include +#include "json_spirit/json_spirit.h" + +#include + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::api::Mirror: " << __func__ << ": " + +namespace librbd { +namespace api { + +namespace { + +int get_config_key(librados::Rados& rados, const std::string& key, + std::string* value) { + std::string cmd = + "{" + "\"prefix\": \"config-key get\", " + "\"key\": \"" + key + "\"" + "}"; + + bufferlist in_bl; + bufferlist out_bl; + + int r = rados.mon_command(cmd, in_bl, &out_bl, nullptr); + if (r == -EINVAL) { + return -EOPNOTSUPP; + } else if (r < 0 && r != -ENOENT) { + return r; + } + + *value = out_bl.to_str(); + return 0; +} + +int set_config_key(librados::Rados& rados, const std::string& key, + const std::string& value) { + std::string cmd; + if (value.empty()) { + cmd = "{" + "\"prefix\": \"config-key rm\", " + "\"key\": \"" + key + "\"" + "}"; + } else { + cmd = "{" + "\"prefix\": \"config-key set\", " + "\"key\": \"" + key + "\", " + "\"val\": \"" + value + "\"" + "}"; + } + bufferlist in_bl; + bufferlist out_bl; + + int r = rados.mon_command(cmd, in_bl, &out_bl, nullptr); + if (r == -EINVAL) { + return -EOPNOTSUPP; + } else if (r < 0) { + return r; + } + + return 0; +} + +std::string get_peer_config_key_name(int64_t pool_id, + const std::string& peer_uuid) { + return RBD_MIRROR_PEER_CONFIG_KEY_PREFIX + stringify(pool_id) + "/" + + peer_uuid; +} + +int remove_peer_config_key(librados::IoCtx& io_ctx, + const std::string& peer_uuid) { + int64_t pool_id = io_ctx.get_id(); + auto key = get_peer_config_key_name(pool_id, peer_uuid); + + librados::Rados rados(io_ctx); + int r = set_config_key(rados, key, ""); + if (r < 0 && r != -ENOENT && r != -EPERM) { + return r; + } + return 0; +} + +std::string get_mon_host(CephContext* cct) { + std::string mon_host; + if (auto mon_addrs = cct->get_mon_addrs(); + mon_addrs != nullptr && !mon_addrs->empty()) { + CachedStackStringStream css; + for (auto it = mon_addrs->begin(); it != mon_addrs->end(); ++it) { + if (it != mon_addrs->begin()) { + *css << ","; + } + *css << *it; + } + mon_host = css->str(); + } else { + ldout(cct, 20) << "falling back to mon_host in conf" << dendl; + mon_host = cct->_conf.get_val("mon_host"); + } + ldout(cct, 20) << "mon_host=" << mon_host << dendl; + return mon_host; +} + +int create_bootstrap_user(CephContext* cct, librados::Rados& rados, + std::string* peer_client_id, std::string* cephx_key) { + ldout(cct, 20) << dendl; + + // retrieve peer CephX user from config-key + int r = get_config_key(rados, RBD_MIRROR_PEER_CLIENT_ID_CONFIG_KEY, + peer_client_id); + if (r == -EACCES) { + ldout(cct, 5) << "insufficient permissions to get peer-client-id " + << "config-key" << dendl; + return r; + } else if (r < 0 && r != -ENOENT) { + lderr(cct) << "failed to retrieve peer client id key: " + << cpp_strerror(r) << dendl; + return r; + } else if (r == -ENOENT || peer_client_id->empty()) { + ldout(cct, 20) << "creating new peer-client-id config-key" << dendl; + + *peer_client_id = "rbd-mirror-peer"; + r = set_config_key(rados, RBD_MIRROR_PEER_CLIENT_ID_CONFIG_KEY, + *peer_client_id); + if (r == -EACCES) { + ldout(cct, 5) << "insufficient permissions to update peer-client-id " + << "config-key" << dendl; + return r; + } else if (r < 0) { + lderr(cct) << "failed to update peer client id key: " + << cpp_strerror(r) << dendl; + return r; + } + } + ldout(cct, 20) << "peer_client_id=" << *peer_client_id << dendl; + + // create peer client user + std::string cmd = + R"({)" \ + R"( "prefix": "auth get-or-create",)" \ + R"( "entity": "client.)" + *peer_client_id + R"(",)" \ + R"( "caps": [)" \ + R"( "mon", "profile rbd-mirror-peer",)" \ + R"( "osd", "profile rbd"],)" \ + R"( "format": "json")" \ + R"(})"; + + bufferlist in_bl; + bufferlist out_bl; + + r = rados.mon_command(cmd, in_bl, &out_bl, nullptr); + if (r == -EINVAL) { + ldout(cct, 5) << "caps mismatch for existing user" << dendl; + return -EEXIST; + } else if (r == -EACCES) { + ldout(cct, 5) << "insufficient permissions to create user" << dendl; + return r; + } else if (r < 0) { + lderr(cct) << "failed to create or update RBD mirroring bootstrap user: " + << cpp_strerror(r) << dendl; + return r; + } + + // extract key from response + bool json_valid = false; + json_spirit::mValue json_root; + if(json_spirit::read(out_bl.to_str(), json_root)) { + try { + auto& json_obj = json_root.get_array()[0].get_obj(); + *cephx_key = json_obj["key"].get_str(); + json_valid = true; + } catch (std::runtime_error&) { + } + } + + if (!json_valid) { + lderr(cct) << "invalid auth keyring JSON received" << dendl; + return -EBADMSG; + } + + return 0; +} + +int create_bootstrap_peer(CephContext* cct, librados::IoCtx& io_ctx, + mirror_peer_direction_t direction, + const std::string& site_name, const std::string& fsid, + const std::string& client_id, const std::string& key, + const std::string& mon_host, + const std::string& cluster1, + const std::string& cluster2) { + ldout(cct, 20) << dendl; + + std::string peer_uuid; + std::vector peers; + int r = Mirror<>::peer_site_list(io_ctx, &peers); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "failed to list mirror peers: " << cpp_strerror(r) << dendl; + return r; + } + + if (peers.empty()) { + r = Mirror<>::peer_site_add(io_ctx, &peer_uuid, direction, site_name, + "client." + client_id); + if (r < 0) { + lderr(cct) << "failed to add " << cluster1 << " peer to " + << cluster2 << " " << "cluster: " << cpp_strerror(r) << dendl; + return r; + } + } else if (peers[0].site_name != site_name && + peers[0].site_name != fsid) { + // only support a single peer + lderr(cct) << "multiple peers are not currently supported" << dendl; + return -EINVAL; + } else { + peer_uuid = peers[0].uuid; + + if (peers[0].site_name != site_name) { + r = Mirror<>::peer_site_set_name(io_ctx, peer_uuid, site_name); + if (r < 0) { + // non-fatal attempt to update site name + lderr(cct) << "failed to update peer site name" << dendl; + } + } + } + + Mirror<>::Attributes attributes { + {"mon_host", mon_host}, + {"key", key}}; + r = Mirror<>::peer_site_set_attributes(io_ctx, peer_uuid, attributes); + if (r < 0) { + lderr(cct) << "failed to update " << cluster1 << " cluster connection " + << "attributes in " << cluster2 << " cluster: " + << cpp_strerror(r) << dendl; + return r; + } + + return 0; +} + +int list_mirror_images(librados::IoCtx& io_ctx, + std::set& mirror_image_ids) { + CephContext *cct = reinterpret_cast(io_ctx.cct()); + + std::string last_read = ""; + int max_read = 1024; + int r; + do { + std::map mirror_images; + r = cls_client::mirror_image_list(&io_ctx, last_read, max_read, + &mirror_images); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "error listing mirrored image directory: " + << cpp_strerror(r) << dendl; + return r; + } + for (auto it = mirror_images.begin(); it != mirror_images.end(); ++it) { + mirror_image_ids.insert(it->first); + } + if (!mirror_images.empty()) { + last_read = mirror_images.rbegin()->first; + } + r = mirror_images.size(); + } while (r == max_read); + + return 0; +} + +template +const char *pool_or_namespace(I *ictx) { + if (!ictx->md_ctx.get_namespace().empty()) { + return "namespace"; + } else { + return "pool"; + } +} + +struct C_ImageGetInfo : public Context { + mirror_image_info_t *mirror_image_info; + mirror_image_mode_t *mirror_image_mode; + Context *on_finish; + + cls::rbd::MirrorImage mirror_image; + mirror::PromotionState promotion_state = mirror::PROMOTION_STATE_PRIMARY; + std::string primary_mirror_uuid; + + C_ImageGetInfo(mirror_image_info_t *mirror_image_info, + mirror_image_mode_t *mirror_image_mode, Context *on_finish) + : mirror_image_info(mirror_image_info), + mirror_image_mode(mirror_image_mode), on_finish(on_finish) { + } + + void finish(int r) override { + if (r < 0 && r != -ENOENT) { + on_finish->complete(r); + return; + } + + if (mirror_image_info != nullptr) { + mirror_image_info->global_id = mirror_image.global_image_id; + mirror_image_info->state = static_cast( + mirror_image.state); + mirror_image_info->primary = ( + promotion_state == mirror::PROMOTION_STATE_PRIMARY); + } + + if (mirror_image_mode != nullptr) { + *mirror_image_mode = + static_cast(mirror_image.mode); + } + + on_finish->complete(0); + } +}; + +struct C_ImageGetGlobalStatus : public C_ImageGetInfo { + std::string image_name; + mirror_image_global_status_t *mirror_image_global_status; + + cls::rbd::MirrorImageStatus mirror_image_status_internal; + + C_ImageGetGlobalStatus( + const std::string &image_name, + mirror_image_global_status_t *mirror_image_global_status, + Context *on_finish) + : C_ImageGetInfo(&mirror_image_global_status->info, nullptr, on_finish), + image_name(image_name), + mirror_image_global_status(mirror_image_global_status) { + } + + void finish(int r) override { + if (r < 0 && r != -ENOENT) { + on_finish->complete(r); + return; + } + + mirror_image_global_status->name = image_name; + mirror_image_global_status->site_statuses.clear(); + mirror_image_global_status->site_statuses.reserve( + mirror_image_status_internal.mirror_image_site_statuses.size()); + for (auto& site_status : + mirror_image_status_internal.mirror_image_site_statuses) { + mirror_image_global_status->site_statuses.push_back({ + site_status.mirror_uuid, + static_cast(site_status.state), + site_status.description, site_status.last_update.sec(), + site_status.up}); + } + C_ImageGetInfo::finish(0); + } +}; + +template +struct C_ImageSnapshotCreate : public Context { + I *ictx; + uint64_t snap_create_flags; + uint64_t *snap_id; + Context *on_finish; + + cls::rbd::MirrorImage mirror_image; + mirror::PromotionState promotion_state; + std::string primary_mirror_uuid; + + C_ImageSnapshotCreate(I *ictx, uint64_t snap_create_flags, uint64_t *snap_id, + Context *on_finish) + : ictx(ictx), snap_create_flags(snap_create_flags), snap_id(snap_id), + on_finish(on_finish) { + } + + void finish(int r) override { + if (r < 0 && r != -ENOENT) { + on_finish->complete(r); + return; + } + + if (mirror_image.mode != cls::rbd::MIRROR_IMAGE_MODE_SNAPSHOT || + mirror_image.state != cls::rbd::MIRROR_IMAGE_STATE_ENABLED) { + lderr(ictx->cct) << "snapshot based mirroring is not enabled" << dendl; + on_finish->complete(-EINVAL); + return; + } + + auto req = mirror::snapshot::CreatePrimaryRequest::create( + ictx, mirror_image.global_image_id, CEPH_NOSNAP, snap_create_flags, 0U, + snap_id, on_finish); + req->send(); + } +}; + +} // anonymous namespace + +template +int Mirror::image_enable(I *ictx, mirror_image_mode_t mode, + bool relax_same_pool_parent_check) { + CephContext *cct = ictx->cct; + ldout(cct, 20) << "ictx=" << ictx << " mode=" << mode + << " relax_same_pool_parent_check=" + << relax_same_pool_parent_check << dendl; + + int r = ictx->state->refresh_if_required(); + if (r < 0) { + return r; + } + + cls::rbd::MirrorMode mirror_mode; + r = cls_client::mirror_mode_get(&ictx->md_ctx, &mirror_mode); + if (r < 0) { + lderr(cct) << "cannot enable mirroring: failed to retrieve mirror mode: " + << cpp_strerror(r) << dendl; + return r; + } + + if (mirror_mode == cls::rbd::MIRROR_MODE_DISABLED) { + lderr(cct) << "cannot enable mirroring: mirroring is not enabled on a " + << pool_or_namespace(ictx) << dendl; + return -EINVAL; + } + + if (mirror_mode != cls::rbd::MIRROR_MODE_IMAGE) { + lderr(cct) << "cannot enable mirroring: " << pool_or_namespace(ictx) + << " is not in image mirror mode" << dendl; + return -EINVAL; + } + + // is mirroring not enabled for the parent? + { + std::shared_lock image_locker{ictx->image_lock}; + ImageCtx *parent = ictx->parent; + if (parent) { + if (parent->md_ctx.get_id() != ictx->md_ctx.get_id() || + !relax_same_pool_parent_check) { + cls::rbd::MirrorImage mirror_image_internal; + r = cls_client::mirror_image_get(&(parent->md_ctx), parent->id, + &mirror_image_internal); + if (r == -ENOENT) { + lderr(cct) << "mirroring is not enabled for the parent" << dendl; + return -EINVAL; + } + } + } + } + + if (mode == RBD_MIRROR_IMAGE_MODE_JOURNAL && + !ictx->test_features(RBD_FEATURE_JOURNALING)) { + uint64_t features = RBD_FEATURE_JOURNALING; + if (!ictx->test_features(RBD_FEATURE_EXCLUSIVE_LOCK)) { + features |= RBD_FEATURE_EXCLUSIVE_LOCK; + } + r = ictx->operations->update_features(features, true); + if (r < 0) { + lderr(cct) << "cannot enable journaling: " << cpp_strerror(r) << dendl; + return r; + } + } + + C_SaferCond ctx; + auto req = mirror::EnableRequest::create( + ictx, static_cast(mode), "", false, &ctx); + req->send(); + + r = ctx.wait(); + if (r < 0) { + lderr(cct) << "cannot enable mirroring: " << cpp_strerror(r) << dendl; + return r; + } + + return 0; +} + +template +int Mirror::image_disable(I *ictx, bool force) { + CephContext *cct = ictx->cct; + ldout(cct, 20) << "ictx=" << ictx << dendl; + + int r = ictx->state->refresh_if_required(); + if (r < 0) { + return r; + } + + cls::rbd::MirrorMode mirror_mode; + r = cls_client::mirror_mode_get(&ictx->md_ctx, &mirror_mode); + if (r < 0) { + lderr(cct) << "cannot disable mirroring: failed to retrieve pool " + "mirroring mode: " << cpp_strerror(r) << dendl; + return r; + } + + if (mirror_mode != cls::rbd::MIRROR_MODE_IMAGE) { + lderr(cct) << "cannot disable mirroring in the current pool mirroring " + "mode" << dendl; + return -EINVAL; + } + + // is mirroring enabled for the image? + cls::rbd::MirrorImage mirror_image_internal; + r = cls_client::mirror_image_get(&ictx->md_ctx, ictx->id, + &mirror_image_internal); + if (r == -ENOENT) { + // mirroring is not enabled for this image + ldout(cct, 20) << "ignoring disable command: mirroring is not enabled for " + << "this image" << dendl; + return 0; + } else if (r == -EOPNOTSUPP) { + ldout(cct, 5) << "mirroring not supported by OSD" << dendl; + return r; + } else if (r < 0) { + lderr(cct) << "failed to retrieve mirror image metadata: " + << cpp_strerror(r) << dendl; + return r; + } + + mirror_image_internal.state = cls::rbd::MIRROR_IMAGE_STATE_DISABLING; + r = cls_client::mirror_image_set(&ictx->md_ctx, ictx->id, + mirror_image_internal); + if (r < 0) { + lderr(cct) << "cannot disable mirroring: " << cpp_strerror(r) << dendl; + return r; + } + + bool rollback = false; + BOOST_SCOPE_EXIT_ALL(ictx, &mirror_image_internal, &rollback) { + if (rollback) { + // restore the mask bit for treating the non-primary feature as read-only + ictx->image_lock.lock(); + ictx->read_only_mask |= IMAGE_READ_ONLY_FLAG_NON_PRIMARY; + ictx->image_lock.unlock(); + + ictx->state->handle_update_notification(); + + // attempt to restore the image state + CephContext *cct = ictx->cct; + mirror_image_internal.state = cls::rbd::MIRROR_IMAGE_STATE_ENABLED; + int r = cls_client::mirror_image_set(&ictx->md_ctx, ictx->id, + mirror_image_internal); + if (r < 0) { + lderr(cct) << "failed to re-enable image mirroring: " + << cpp_strerror(r) << dendl; + } + } + }; + + std::unique_lock image_locker{ictx->image_lock}; + std::map snap_info = ictx->snap_info; + for (auto &info : snap_info) { + cls::rbd::ParentImageSpec parent_spec{ictx->md_ctx.get_id(), + ictx->md_ctx.get_namespace(), + ictx->id, info.first}; + std::vector child_images; + r = Image::list_children(ictx, parent_spec, &child_images); + if (r < 0) { + rollback = true; + return r; + } + + if (child_images.empty()) { + continue; + } + + librados::IoCtx child_io_ctx; + int64_t child_pool_id = -1; + for (auto &child_image : child_images){ + std::string pool = child_image.pool_name; + if (child_pool_id == -1 || + child_pool_id != child_image.pool_id || + child_io_ctx.get_namespace() != child_image.pool_namespace) { + r = util::create_ioctx(ictx->md_ctx, "child image", + child_image.pool_id, + child_image.pool_namespace, + &child_io_ctx); + if (r < 0) { + rollback = true; + return r; + } + + child_pool_id = child_image.pool_id; + } + + cls::rbd::MirrorImage child_mirror_image_internal; + r = cls_client::mirror_image_get(&child_io_ctx, child_image.image_id, + &child_mirror_image_internal); + if (r != -ENOENT) { + rollback = true; + lderr(cct) << "mirroring is enabled on one or more children " + << dendl; + return -EBUSY; + } + } + } + image_locker.unlock(); + + if (mirror_image_internal.mode == cls::rbd::MIRROR_IMAGE_MODE_SNAPSHOT) { + // don't let the non-primary feature bit prevent image updates + ictx->image_lock.lock(); + ictx->read_only_mask &= ~IMAGE_READ_ONLY_FLAG_NON_PRIMARY; + ictx->image_lock.unlock(); + + r = ictx->state->refresh(); + if (r < 0) { + rollback = true; + return r; + } + + // remove any snapshot-based mirroring image-meta from image + std::string mirror_uuid; + r = uuid_get(ictx->md_ctx, &mirror_uuid); + if (r < 0) { + rollback = true; + return r; + } + + r = ictx->operations->metadata_remove( + mirror::snapshot::util::get_image_meta_key(mirror_uuid)); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "cannot remove snapshot image-meta key: " << cpp_strerror(r) + << dendl; + rollback = true; + return r; + } + } + + C_SaferCond ctx; + auto req = mirror::DisableRequest::create(ictx, force, true, + &ctx); + req->send(); + + r = ctx.wait(); + if (r < 0) { + lderr(cct) << "cannot disable mirroring: " << cpp_strerror(r) << dendl; + rollback = true; + return r; + } + + if (mirror_image_internal.mode == cls::rbd::MIRROR_IMAGE_MODE_JOURNAL) { + r = ictx->operations->update_features(RBD_FEATURE_JOURNALING, false); + if (r < 0) { + lderr(cct) << "cannot disable journaling: " << cpp_strerror(r) << dendl; + // not fatal + } + } + + return 0; +} + +template +int Mirror::image_promote(I *ictx, bool force) { + CephContext *cct = ictx->cct; + + C_SaferCond ctx; + Mirror::image_promote(ictx, force, &ctx); + int r = ctx.wait(); + if (r < 0) { + lderr(cct) << "failed to promote image" << dendl; + return r; + } + + return 0; +} + +template +void Mirror::image_promote(I *ictx, bool force, Context *on_finish) { + CephContext *cct = ictx->cct; + ldout(cct, 20) << "ictx=" << ictx << ", " + << "force=" << force << dendl; + + // don't let the non-primary feature bit prevent image updates + ictx->image_lock.lock(); + ictx->read_only_mask &= ~IMAGE_READ_ONLY_FLAG_NON_PRIMARY; + ictx->image_lock.unlock(); + + auto on_promote = new LambdaContext([ictx, on_finish](int r) { + ictx->image_lock.lock(); + ictx->read_only_mask |= IMAGE_READ_ONLY_FLAG_NON_PRIMARY; + ictx->image_lock.unlock(); + + ictx->state->handle_update_notification(); + on_finish->complete(r); + }); + + auto on_refresh = new LambdaContext([ictx, force, on_promote](int r) { + if (r < 0) { + lderr(ictx->cct) << "refresh failed: " << cpp_strerror(r) << dendl; + on_promote->complete(r); + return; + } + + auto req = mirror::PromoteRequest<>::create(*ictx, force, on_promote); + req->send(); + }); + ictx->state->refresh(on_refresh); +} + +template +int Mirror::image_demote(I *ictx) { + CephContext *cct = ictx->cct; + + C_SaferCond ctx; + Mirror::image_demote(ictx, &ctx); + int r = ctx.wait(); + if (r < 0) { + lderr(cct) << "failed to demote image" << dendl; + return r; + } + + return 0; +} + +template +void Mirror::image_demote(I *ictx, Context *on_finish) { + CephContext *cct = ictx->cct; + ldout(cct, 20) << "ictx=" << ictx << dendl; + + auto on_cleanup = new LambdaContext([ictx, on_finish](int r) { + ictx->image_lock.lock(); + ictx->read_only_mask |= IMAGE_READ_ONLY_FLAG_NON_PRIMARY; + ictx->image_lock.unlock(); + + ictx->state->handle_update_notification(); + + on_finish->complete(r); + }); + auto on_refresh = new LambdaContext([ictx, on_cleanup](int r) { + if (r < 0) { + lderr(ictx->cct) << "refresh failed: " << cpp_strerror(r) << dendl; + on_cleanup->complete(r); + return; + } + + auto req = mirror::DemoteRequest<>::create(*ictx, on_cleanup); + req->send(); + }); + + // ensure we can create a snapshot after setting the non-primary + // feature bit + ictx->image_lock.lock(); + ictx->read_only_mask &= ~IMAGE_READ_ONLY_FLAG_NON_PRIMARY; + ictx->image_lock.unlock(); + + ictx->state->refresh(on_refresh); +} + +template +int Mirror::image_resync(I *ictx) { + CephContext *cct = ictx->cct; + ldout(cct, 20) << "ictx=" << ictx << dendl; + + int r = ictx->state->refresh_if_required(); + if (r < 0) { + return r; + } + + cls::rbd::MirrorImage mirror_image; + mirror::PromotionState promotion_state; + std::string primary_mirror_uuid; + C_SaferCond get_info_ctx; + auto req = mirror::GetInfoRequest::create(*ictx, &mirror_image, + &promotion_state, + &primary_mirror_uuid, + &get_info_ctx); + req->send(); + + r = get_info_ctx.wait(); + if (r < 0) { + return r; + } + + if (promotion_state == mirror::PROMOTION_STATE_PRIMARY) { + lderr(cct) << "image is primary, cannot resync to itself" << dendl; + return -EINVAL; + } + + if (mirror_image.mode == cls::rbd::MIRROR_IMAGE_MODE_JOURNAL) { + // flag the journal indicating that we want to rebuild the local image + r = Journal::request_resync(ictx); + if (r < 0) { + lderr(cct) << "failed to request resync: " << cpp_strerror(r) << dendl; + return r; + } + } else if (mirror_image.mode == cls::rbd::MIRROR_IMAGE_MODE_SNAPSHOT) { + std::string mirror_uuid; + r = uuid_get(ictx->md_ctx, &mirror_uuid); + if (r < 0) { + return r; + } + + mirror::snapshot::ImageMeta image_meta(ictx, mirror_uuid); + + C_SaferCond load_meta_ctx; + image_meta.load(&load_meta_ctx); + r = load_meta_ctx.wait(); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "failed to load mirror image-meta: " << cpp_strerror(r) + << dendl; + return r; + } + + image_meta.resync_requested = true; + + C_SaferCond save_meta_ctx; + image_meta.save(&save_meta_ctx); + r = save_meta_ctx.wait(); + if (r < 0) { + lderr(cct) << "failed to request resync: " << cpp_strerror(r) << dendl; + return r; + } + } else { + lderr(cct) << "unknown mirror mode" << dendl; + return -EINVAL; + } + + return 0; +} + +template +void Mirror::image_get_info(I *ictx, mirror_image_info_t *mirror_image_info, + Context *on_finish) { + CephContext *cct = ictx->cct; + ldout(cct, 20) << "ictx=" << ictx << dendl; + + auto on_refresh = new LambdaContext( + [ictx, mirror_image_info, on_finish](int r) { + if (r < 0) { + lderr(ictx->cct) << "refresh failed: " << cpp_strerror(r) << dendl; + on_finish->complete(r); + return; + } + + auto ctx = new C_ImageGetInfo(mirror_image_info, nullptr, on_finish); + auto req = mirror::GetInfoRequest::create(*ictx, &ctx->mirror_image, + &ctx->promotion_state, + &ctx->primary_mirror_uuid, + ctx); + req->send(); + }); + + if (ictx->state->is_refresh_required()) { + ictx->state->refresh(on_refresh); + } else { + on_refresh->complete(0); + } +} + +template +int Mirror::image_get_info(I *ictx, mirror_image_info_t *mirror_image_info) { + C_SaferCond ctx; + image_get_info(ictx, mirror_image_info, &ctx); + + int r = ctx.wait(); + if (r < 0) { + return r; + } + return 0; +} + +template +void Mirror::image_get_info(librados::IoCtx& io_ctx, + asio::ContextWQ *op_work_queue, + const std::string &image_id, + mirror_image_info_t *mirror_image_info, + Context *on_finish) { + auto cct = reinterpret_cast(io_ctx.cct()); + ldout(cct, 20) << "pool_id=" << io_ctx.get_id() << ", image_id=" << image_id + << dendl; + + auto ctx = new C_ImageGetInfo(mirror_image_info, nullptr, on_finish); + auto req = mirror::GetInfoRequest::create(io_ctx, op_work_queue, image_id, + &ctx->mirror_image, + &ctx->promotion_state, + &ctx->primary_mirror_uuid, ctx); + req->send(); +} + +template +int Mirror::image_get_info(librados::IoCtx& io_ctx, + asio::ContextWQ *op_work_queue, + const std::string &image_id, + mirror_image_info_t *mirror_image_info) { + C_SaferCond ctx; + image_get_info(io_ctx, op_work_queue, image_id, mirror_image_info, &ctx); + + int r = ctx.wait(); + if (r < 0) { + return r; + } + return 0; +} + +template +void Mirror::image_get_mode(I *ictx, mirror_image_mode_t *mode, + Context *on_finish) { + CephContext *cct = ictx->cct; + ldout(cct, 20) << "ictx=" << ictx << dendl; + + auto ctx = new C_ImageGetInfo(nullptr, mode, on_finish); + auto req = mirror::GetInfoRequest::create(*ictx, &ctx->mirror_image, + &ctx->promotion_state, + &ctx->primary_mirror_uuid, ctx); + req->send(); +} + +template +int Mirror::image_get_mode(I *ictx, mirror_image_mode_t *mode) { + C_SaferCond ctx; + image_get_mode(ictx, mode, &ctx); + + int r = ctx.wait(); + if (r < 0) { + return r; + } + return 0; +} + +template +void Mirror::image_get_global_status(I *ictx, + mirror_image_global_status_t *status, + Context *on_finish) { + CephContext *cct = ictx->cct; + ldout(cct, 20) << "ictx=" << ictx << dendl; + + auto ctx = new C_ImageGetGlobalStatus(ictx->name, status, on_finish); + auto req = mirror::GetStatusRequest::create( + *ictx, &ctx->mirror_image_status_internal, &ctx->mirror_image, + &ctx->promotion_state, ctx); + req->send(); +} + +template +int Mirror::image_get_global_status(I *ictx, + mirror_image_global_status_t *status) { + C_SaferCond ctx; + image_get_global_status(ictx, status, &ctx); + + int r = ctx.wait(); + if (r < 0) { + return r; + } + return 0; +} + +template +int Mirror::image_get_instance_id(I *ictx, std::string *instance_id) { + CephContext *cct = ictx->cct; + ldout(cct, 20) << "ictx=" << ictx << dendl; + + cls::rbd::MirrorImage mirror_image; + int r = cls_client::mirror_image_get(&ictx->md_ctx, ictx->id, &mirror_image); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "failed to retrieve mirroring state: " << cpp_strerror(r) + << dendl; + return r; + } else if (mirror_image.state != cls::rbd::MIRROR_IMAGE_STATE_ENABLED) { + lderr(cct) << "mirroring is not currently enabled" << dendl; + return -EINVAL; + } + + entity_inst_t instance; + r = cls_client::mirror_image_instance_get(&ictx->md_ctx, + mirror_image.global_image_id, + &instance); + if (r < 0) { + if (r != -ENOENT && r != -ESTALE) { + lderr(cct) << "failed to get mirror image instance: " << cpp_strerror(r) + << dendl; + } + return r; + } + + *instance_id = stringify(instance.name.num()); + return 0; +} + +template +int Mirror::site_name_get(librados::Rados& rados, std::string* name) { + CephContext *cct = reinterpret_cast(rados.cct()); + ldout(cct, 20) << dendl; + + int r = get_config_key(rados, RBD_MIRROR_SITE_NAME_CONFIG_KEY, name); + if (r == -EOPNOTSUPP) { + return r; + } else if (r == -ENOENT || name->empty()) { + // default to the cluster fsid + r = rados.cluster_fsid(name); + if (r < 0) { + lderr(cct) << "failed to retrieve cluster fsid: " << cpp_strerror(r) + << dendl; + } + return r; + } else if (r < 0) { + lderr(cct) << "failed to retrieve site name: " << cpp_strerror(r) + << dendl; + return r; + } + + return 0; +} + +template +int Mirror::site_name_set(librados::Rados& rados, const std::string& name) { + CephContext *cct = reinterpret_cast(rados.cct()); + + std::string site_name{name}; + boost::algorithm::trim(site_name); + ldout(cct, 20) << "site_name=" << site_name << dendl; + + int r = set_config_key(rados, RBD_MIRROR_SITE_NAME_CONFIG_KEY, name); + if (r == -EOPNOTSUPP) { + return r; + } else if (r < 0 && r != -ENOENT) { + lderr(cct) << "failed to update site name: " << cpp_strerror(r) + << dendl; + return r; + } + + return 0; +} + +template +int Mirror::mode_get(librados::IoCtx& io_ctx, + rbd_mirror_mode_t *mirror_mode) { + CephContext *cct = reinterpret_cast(io_ctx.cct()); + ldout(cct, 20) << dendl; + + cls::rbd::MirrorMode mirror_mode_internal; + int r = cls_client::mirror_mode_get(&io_ctx, &mirror_mode_internal); + if (r < 0) { + lderr(cct) << "failed to retrieve mirror mode: " << cpp_strerror(r) + << dendl; + return r; + } + + switch (mirror_mode_internal) { + case cls::rbd::MIRROR_MODE_DISABLED: + case cls::rbd::MIRROR_MODE_IMAGE: + case cls::rbd::MIRROR_MODE_POOL: + *mirror_mode = static_cast(mirror_mode_internal); + break; + default: + lderr(cct) << "unknown mirror mode (" + << static_cast(mirror_mode_internal) << ")" + << dendl; + return -EINVAL; + } + return 0; +} + +template +int Mirror::mode_set(librados::IoCtx& io_ctx, + rbd_mirror_mode_t mirror_mode) { + CephContext *cct = reinterpret_cast(io_ctx.cct()); + ldout(cct, 20) << dendl; + + cls::rbd::MirrorMode next_mirror_mode; + switch (mirror_mode) { + case RBD_MIRROR_MODE_DISABLED: + case RBD_MIRROR_MODE_IMAGE: + case RBD_MIRROR_MODE_POOL: + next_mirror_mode = static_cast(mirror_mode); + break; + default: + lderr(cct) << "unknown mirror mode (" + << static_cast(mirror_mode) << ")" << dendl; + return -EINVAL; + } + + int r; + if (next_mirror_mode == cls::rbd::MIRROR_MODE_DISABLED) { + // fail early if pool still has peers registered and attempting to disable + std::vector mirror_peers; + r = cls_client::mirror_peer_list(&io_ctx, &mirror_peers); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "failed to list peers: " << cpp_strerror(r) << dendl; + return r; + } else if (!mirror_peers.empty()) { + lderr(cct) << "mirror peers still registered" << dendl; + return -EBUSY; + } + } + + cls::rbd::MirrorMode current_mirror_mode; + r = cls_client::mirror_mode_get(&io_ctx, ¤t_mirror_mode); + if (r < 0) { + lderr(cct) << "failed to retrieve mirror mode: " << cpp_strerror(r) + << dendl; + return r; + } + + if (current_mirror_mode == next_mirror_mode) { + return 0; + } else if (current_mirror_mode == cls::rbd::MIRROR_MODE_DISABLED) { + uuid_d uuid_gen; + uuid_gen.generate_random(); + r = cls_client::mirror_uuid_set(&io_ctx, uuid_gen.to_string()); + if (r < 0) { + lderr(cct) << "failed to allocate mirroring uuid: " << cpp_strerror(r) + << dendl; + return r; + } + } + + if (current_mirror_mode != cls::rbd::MIRROR_MODE_IMAGE) { + r = cls_client::mirror_mode_set(&io_ctx, cls::rbd::MIRROR_MODE_IMAGE); + if (r < 0) { + lderr(cct) << "failed to set mirror mode to image: " + << cpp_strerror(r) << dendl; + return r; + } + + r = MirroringWatcher<>::notify_mode_updated(io_ctx, + cls::rbd::MIRROR_MODE_IMAGE); + if (r < 0) { + lderr(cct) << "failed to send update notification: " << cpp_strerror(r) + << dendl; + } + } + + if (next_mirror_mode == cls::rbd::MIRROR_MODE_IMAGE) { + return 0; + } + + if (next_mirror_mode == cls::rbd::MIRROR_MODE_POOL) { + std::map images; + r = Image::list_images_v2(io_ctx, &images); + if (r < 0) { + lderr(cct) << "failed listing images: " << cpp_strerror(r) << dendl; + return r; + } + + for (const auto& img_pair : images) { + uint64_t features; + uint64_t incompatible_features; + r = cls_client::get_features(&io_ctx, util::header_name(img_pair.second), + true, &features, &incompatible_features); + if (r < 0) { + lderr(cct) << "error getting features for image " << img_pair.first + << ": " << cpp_strerror(r) << dendl; + return r; + } + + // Enable only journal based mirroring + + if ((features & RBD_FEATURE_JOURNALING) != 0) { + I *img_ctx = I::create("", img_pair.second, nullptr, io_ctx, false); + r = img_ctx->state->open(0); + if (r < 0) { + lderr(cct) << "error opening image "<< img_pair.first << ": " + << cpp_strerror(r) << dendl; + return r; + } + + r = image_enable(img_ctx, RBD_MIRROR_IMAGE_MODE_JOURNAL, true); + int close_r = img_ctx->state->close(); + if (r < 0) { + lderr(cct) << "error enabling mirroring for image " + << img_pair.first << ": " << cpp_strerror(r) << dendl; + return r; + } else if (close_r < 0) { + lderr(cct) << "failed to close image " << img_pair.first << ": " + << cpp_strerror(close_r) << dendl; + return close_r; + } + } + } + } else if (next_mirror_mode == cls::rbd::MIRROR_MODE_DISABLED) { + while (true) { + bool retry_busy = false; + bool pending_busy = false; + + std::set image_ids; + r = list_mirror_images(io_ctx, image_ids); + if (r < 0) { + lderr(cct) << "failed listing images: " << cpp_strerror(r) << dendl; + return r; + } + + for (const auto& img_id : image_ids) { + if (current_mirror_mode == cls::rbd::MIRROR_MODE_IMAGE) { + cls::rbd::MirrorImage mirror_image; + r = cls_client::mirror_image_get(&io_ctx, img_id, &mirror_image); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "failed to retrieve mirroring state for image id " + << img_id << ": " << cpp_strerror(r) << dendl; + return r; + } + if (mirror_image.state == cls::rbd::MIRROR_IMAGE_STATE_ENABLED) { + lderr(cct) << "failed to disable mirror mode: there are still " + << "images with mirroring enabled" << dendl; + return -EINVAL; + } + } else { + I *img_ctx = I::create("", img_id, nullptr, io_ctx, false); + r = img_ctx->state->open(0); + if (r < 0) { + lderr(cct) << "error opening image id "<< img_id << ": " + << cpp_strerror(r) << dendl; + return r; + } + + r = image_disable(img_ctx, false); + int close_r = img_ctx->state->close(); + if (r == -EBUSY) { + pending_busy = true; + } else if (r < 0) { + lderr(cct) << "error disabling mirroring for image id " << img_id + << cpp_strerror(r) << dendl; + return r; + } else if (close_r < 0) { + lderr(cct) << "failed to close image id " << img_id << ": " + << cpp_strerror(close_r) << dendl; + return close_r; + } else if (pending_busy) { + // at least one mirrored image was successfully disabled, so we can + // retry any failures caused by busy parent/child relationships + retry_busy = true; + } + } + } + + if (!retry_busy && pending_busy) { + lderr(cct) << "error disabling mirroring for one or more images" + << dendl; + return -EBUSY; + } else if (!retry_busy) { + break; + } + } + } + + r = cls_client::mirror_mode_set(&io_ctx, next_mirror_mode); + if (r < 0) { + lderr(cct) << "failed to set mirror mode: " << cpp_strerror(r) << dendl; + return r; + } + + r = MirroringWatcher<>::notify_mode_updated(io_ctx, next_mirror_mode); + if (r < 0) { + lderr(cct) << "failed to send update notification: " << cpp_strerror(r) + << dendl; + } + return 0; +} + +template +int Mirror::uuid_get(librados::IoCtx& io_ctx, std::string* mirror_uuid) { + CephContext *cct = reinterpret_cast(io_ctx.cct()); + ldout(cct, 20) << dendl; + + C_SaferCond ctx; + uuid_get(io_ctx, mirror_uuid, &ctx); + int r = ctx.wait(); + if (r < 0) { + if (r != -ENOENT) { + lderr(cct) << "failed to retrieve mirroring uuid: " << cpp_strerror(r) + << dendl; + } + return r; + } + + return 0; +} + +template +void Mirror::uuid_get(librados::IoCtx& io_ctx, std::string* mirror_uuid, + Context* on_finish) { + CephContext *cct = reinterpret_cast(io_ctx.cct()); + ldout(cct, 20) << dendl; + + auto req = mirror::GetUuidRequest::create(io_ctx, mirror_uuid, on_finish); + req->send(); +} + +template +int Mirror::peer_bootstrap_create(librados::IoCtx& io_ctx, + std::string* token) { + CephContext *cct = reinterpret_cast(io_ctx.cct()); + ldout(cct, 20) << dendl; + + auto mirror_mode = cls::rbd::MIRROR_MODE_DISABLED; + int r = cls_client::mirror_mode_get(&io_ctx, &mirror_mode); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "failed to retrieve mirroring mode: " << cpp_strerror(r) + << dendl; + return r; + } else if (mirror_mode == cls::rbd::MIRROR_MODE_DISABLED) { + return -EINVAL; + } + + // retrieve the cluster fsid + std::string fsid; + librados::Rados rados(io_ctx); + r = rados.cluster_fsid(&fsid); + if (r < 0) { + lderr(cct) << "failed to retrieve cluster fsid: " << cpp_strerror(r) + << dendl; + return r; + } + + std::string peer_client_id; + std::string cephx_key; + r = create_bootstrap_user(cct, rados, &peer_client_id, &cephx_key); + if (r < 0) { + return r; + } + + std::string mon_host = get_mon_host(cct); + + // format the token response + bufferlist token_bl; + token_bl.append( + R"({)" \ + R"("fsid":")" + fsid + R"(",)" + \ + R"("client_id":")" + peer_client_id + R"(",)" + \ + R"("key":")" + cephx_key + R"(",)" + \ + R"("mon_host":")" + \ + boost::replace_all_copy(mon_host, "\"", "\\\"") + R"(")" + \ + R"(})"); + ldout(cct, 20) << "token=" << token_bl.to_str() << dendl; + + bufferlist base64_bl; + token_bl.encode_base64(base64_bl); + *token = base64_bl.to_str(); + + return 0; +} + +template +int Mirror::peer_bootstrap_import(librados::IoCtx& io_ctx, + rbd_mirror_peer_direction_t direction, + const std::string& token) { + CephContext *cct = reinterpret_cast(io_ctx.cct()); + ldout(cct, 20) << dendl; + + if (direction != RBD_MIRROR_PEER_DIRECTION_RX && + direction != RBD_MIRROR_PEER_DIRECTION_RX_TX) { + lderr(cct) << "invalid mirror peer direction" << dendl; + return -EINVAL; + } + + bufferlist token_bl; + try { + bufferlist base64_bl; + base64_bl.append(token); + token_bl.decode_base64(base64_bl); + } catch (buffer::error& err) { + lderr(cct) << "failed to decode base64" << dendl; + return -EINVAL; + } + + ldout(cct, 20) << "token=" << token_bl.to_str() << dendl; + + bool json_valid = false; + std::string expected_remote_fsid; + std::string remote_client_id; + std::string remote_key; + std::string remote_mon_host; + + json_spirit::mValue json_root; + if(json_spirit::read(token_bl.to_str(), json_root)) { + try { + auto& json_obj = json_root.get_obj(); + expected_remote_fsid = json_obj["fsid"].get_str(); + remote_client_id = json_obj["client_id"].get_str(); + remote_key = json_obj["key"].get_str(); + remote_mon_host = json_obj["mon_host"].get_str(); + json_valid = true; + } catch (std::runtime_error&) { + } + } + + if (!json_valid) { + lderr(cct) << "invalid bootstrap token JSON received" << dendl; + return -EINVAL; + } + + // sanity check import process + std::string local_fsid; + librados::Rados rados(io_ctx); + int r = rados.cluster_fsid(&local_fsid); + if (r < 0) { + lderr(cct) << "failed to retrieve cluster fsid: " << cpp_strerror(r) + << dendl; + return r; + } + + std::string local_site_name; + r = site_name_get(rados, &local_site_name); + if (r < 0) { + lderr(cct) << "failed to retrieve cluster site name: " << cpp_strerror(r) + << dendl; + return r; + } + + // attempt to connect to remote cluster + librados::Rados remote_rados; + remote_rados.init(remote_client_id.c_str()); + + auto remote_cct = reinterpret_cast(remote_rados.cct()); + remote_cct->_conf.set_val("mon_host", remote_mon_host); + remote_cct->_conf.set_val("key", remote_key); + + r = remote_rados.connect(); + if (r < 0) { + lderr(cct) << "failed to connect to peer cluster: " << cpp_strerror(r) + << dendl; + return r; + } + + std::string remote_fsid; + r = remote_rados.cluster_fsid(&remote_fsid); + if (r < 0) { + lderr(cct) << "failed to retrieve remote cluster fsid: " + << cpp_strerror(r) << dendl; + return r; + } else if (local_fsid == remote_fsid) { + lderr(cct) << "cannot import token for local cluster" << dendl; + return -EINVAL; + } else if (expected_remote_fsid != remote_fsid) { + lderr(cct) << "unexpected remote cluster fsid" << dendl; + return -EINVAL; + } + + std::string remote_site_name; + r = site_name_get(remote_rados, &remote_site_name); + if (r < 0) { + lderr(cct) << "failed to retrieve remote cluster site name: " + << cpp_strerror(r) << dendl; + return r; + } else if (local_site_name == remote_site_name) { + lderr(cct) << "cannot import token for duplicate site name" << dendl; + return -EINVAL; + } + + librados::IoCtx remote_io_ctx; + r = remote_rados.ioctx_create(io_ctx.get_pool_name().c_str(), remote_io_ctx); + if (r == -ENOENT) { + ldout(cct, 10) << "remote pool does not exist" << dendl; + return r; + } else if (r < 0) { + lderr(cct) << "failed to open remote pool '" << io_ctx.get_pool_name() + << "': " << cpp_strerror(r) << dendl; + return r; + } + + auto remote_mirror_mode = cls::rbd::MIRROR_MODE_DISABLED; + r = cls_client::mirror_mode_get(&remote_io_ctx, &remote_mirror_mode); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "failed to retrieve remote mirroring mode: " + << cpp_strerror(r) << dendl; + return r; + } else if (remote_mirror_mode == cls::rbd::MIRROR_MODE_DISABLED) { + return -ENOSYS; + } + + auto local_mirror_mode = cls::rbd::MIRROR_MODE_DISABLED; + r = cls_client::mirror_mode_get(&io_ctx, &local_mirror_mode); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "failed to retrieve local mirroring mode: " << cpp_strerror(r) + << dendl; + return r; + } else if (local_mirror_mode == cls::rbd::MIRROR_MODE_DISABLED) { + // copy mirror mode from remote peer + r = mode_set(io_ctx, static_cast(remote_mirror_mode)); + if (r < 0) { + return r; + } + } + + if (direction == RBD_MIRROR_PEER_DIRECTION_RX_TX) { + // create a local mirror peer user and export it to the remote cluster + std::string local_client_id; + std::string local_key; + r = create_bootstrap_user(cct, rados, &local_client_id, &local_key); + if (r < 0) { + return r; + } + + std::string local_mon_host = get_mon_host(cct); + + // create local cluster peer in remote cluster + r = create_bootstrap_peer(cct, remote_io_ctx, + RBD_MIRROR_PEER_DIRECTION_RX_TX, local_site_name, + local_fsid, local_client_id, local_key, + local_mon_host, "local", "remote"); + if (r < 0) { + return r; + } + } + + // create remote cluster peer in local cluster + r = create_bootstrap_peer(cct, io_ctx, direction, remote_site_name, + remote_fsid, remote_client_id, remote_key, + remote_mon_host, "remote", "local"); + if (r < 0) { + return r; + } + + return 0; +} + +template +int Mirror::peer_site_add(librados::IoCtx& io_ctx, std::string *uuid, + mirror_peer_direction_t direction, + const std::string &site_name, + const std::string &client_name) { + CephContext *cct = reinterpret_cast(io_ctx.cct()); + ldout(cct, 20) << "name=" << site_name << ", " + << "client=" << client_name << dendl; + + if (cct->_conf->cluster == site_name) { + lderr(cct) << "cannot add self as remote peer" << dendl; + return -EINVAL; + } + + if (direction == RBD_MIRROR_PEER_DIRECTION_TX) { + return -EINVAL; + } + + int r; + do { + uuid_d uuid_gen; + uuid_gen.generate_random(); + + *uuid = uuid_gen.to_string(); + r = cls_client::mirror_peer_add( + &io_ctx, {*uuid, static_cast(direction), + site_name, client_name, ""}); + if (r == -ESTALE) { + ldout(cct, 5) << "duplicate UUID detected, retrying" << dendl; + } else if (r < 0) { + lderr(cct) << "failed to add mirror peer '" << site_name << "': " + << cpp_strerror(r) << dendl; + return r; + } + } while (r == -ESTALE); + return 0; +} + +template +int Mirror::peer_site_remove(librados::IoCtx& io_ctx, + const std::string &uuid) { + CephContext *cct = reinterpret_cast(io_ctx.cct()); + ldout(cct, 20) << "uuid=" << uuid << dendl; + + int r = remove_peer_config_key(io_ctx, uuid); + if (r < 0) { + lderr(cct) << "failed to remove peer attributes '" << uuid << "': " + << cpp_strerror(r) << dendl; + return r; + } + + r = cls_client::mirror_peer_remove(&io_ctx, uuid); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "failed to remove peer '" << uuid << "': " + << cpp_strerror(r) << dendl; + return r; + } + + std::vector names; + r = Namespace::list(io_ctx, &names); + if (r < 0) { + return r; + } + + names.push_back(""); + + librados::IoCtx ns_io_ctx; + ns_io_ctx.dup(io_ctx); + + for (auto &name : names) { + ns_io_ctx.set_namespace(name); + + std::set image_ids; + r = list_mirror_images(ns_io_ctx, image_ids); + if (r < 0) { + lderr(cct) << "failed listing images in " + << (name.empty() ? "default" : name) << " namespace : " + << cpp_strerror(r) << dendl; + return r; + } + + for (const auto& image_id : image_ids) { + cls::rbd::MirrorImage mirror_image; + r = cls_client::mirror_image_get(&ns_io_ctx, image_id, &mirror_image); + if (r == -ENOENT) { + continue; + } + if (r < 0) { + lderr(cct) << "error getting mirror info for image " << image_id + << ": " << cpp_strerror(r) << dendl; + return r; + } + if (mirror_image.mode != cls::rbd::MIRROR_IMAGE_MODE_SNAPSHOT) { + continue; + } + + // Snapshot based mirroring. Unlink the peer from mirroring snapshots. + // TODO: optimize. + + I *img_ctx = I::create("", image_id, nullptr, ns_io_ctx, false); + img_ctx->read_only_mask &= ~IMAGE_READ_ONLY_FLAG_NON_PRIMARY; + + r = img_ctx->state->open(0); + if (r == -ENOENT) { + continue; + } + if (r < 0) { + lderr(cct) << "error opening image " << image_id << ": " + << cpp_strerror(r) << dendl; + return r; + } + + std::list snap_ids; + { + std::shared_lock image_locker{img_ctx->image_lock}; + for (auto &it : img_ctx->snap_info) { + auto info = std::get_if( + &it.second.snap_namespace); + if (info && info->mirror_peer_uuids.count(uuid)) { + snap_ids.push_back(it.first); + } + } + } + for (auto snap_id : snap_ids) { + C_SaferCond cond; + auto req = mirror::snapshot::UnlinkPeerRequest::create( + img_ctx, snap_id, uuid, true, &cond); + req->send(); + r = cond.wait(); + if (r == -ENOENT) { + r = 0; + } + if (r < 0) { + break; + } + } + + int close_r = img_ctx->state->close(); + if (r < 0) { + lderr(cct) << "error unlinking peer for image " << image_id << ": " + << cpp_strerror(r) << dendl; + return r; + } else if (close_r < 0) { + lderr(cct) << "failed to close image " << image_id << ": " + << cpp_strerror(close_r) << dendl; + return close_r; + } + } + } + + return 0; +} + +template +int Mirror::peer_site_list(librados::IoCtx& io_ctx, + std::vector *peers) { + CephContext *cct = reinterpret_cast(io_ctx.cct()); + ldout(cct, 20) << dendl; + + std::vector mirror_peers; + int r = cls_client::mirror_peer_list(&io_ctx, &mirror_peers); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "failed to list peers: " << cpp_strerror(r) << dendl; + return r; + } + + peers->clear(); + peers->reserve(mirror_peers.size()); + for (auto &mirror_peer : mirror_peers) { + mirror_peer_site_t peer; + peer.uuid = mirror_peer.uuid; + peer.direction = static_cast( + mirror_peer.mirror_peer_direction); + peer.site_name = mirror_peer.site_name; + peer.mirror_uuid = mirror_peer.mirror_uuid; + peer.client_name = mirror_peer.client_name; + peer.last_seen = mirror_peer.last_seen.sec(); + peers->push_back(peer); + } + return 0; +} + +template +int Mirror::peer_site_set_client(librados::IoCtx& io_ctx, + const std::string &uuid, + const std::string &client_name) { + CephContext *cct = reinterpret_cast(io_ctx.cct()); + ldout(cct, 20) << "uuid=" << uuid << ", " + << "client=" << client_name << dendl; + + int r = cls_client::mirror_peer_set_client(&io_ctx, uuid, client_name); + if (r < 0) { + lderr(cct) << "failed to update client '" << uuid << "': " + << cpp_strerror(r) << dendl; + return r; + } + return 0; +} + +template +int Mirror::peer_site_set_name(librados::IoCtx& io_ctx, + const std::string &uuid, + const std::string &site_name) { + CephContext *cct = reinterpret_cast(io_ctx.cct()); + ldout(cct, 20) << "uuid=" << uuid << ", " + << "name=" << site_name << dendl; + + if (cct->_conf->cluster == site_name) { + lderr(cct) << "cannot set self as remote peer" << dendl; + return -EINVAL; + } + + int r = cls_client::mirror_peer_set_cluster(&io_ctx, uuid, site_name); + if (r < 0) { + lderr(cct) << "failed to update site '" << uuid << "': " + << cpp_strerror(r) << dendl; + return r; + } + return 0; +} + +template +int Mirror::peer_site_set_direction(librados::IoCtx& io_ctx, + const std::string &uuid, + mirror_peer_direction_t direction) { + cls::rbd::MirrorPeerDirection mirror_peer_direction = static_cast< + cls::rbd::MirrorPeerDirection>(direction); + + CephContext *cct = reinterpret_cast(io_ctx.cct()); + ldout(cct, 20) << "uuid=" << uuid << ", " + << "direction=" << mirror_peer_direction << dendl; + + int r = cls_client::mirror_peer_set_direction(&io_ctx, uuid, + mirror_peer_direction); + if (r < 0) { + lderr(cct) << "failed to update direction '" << uuid << "': " + << cpp_strerror(r) << dendl; + return r; + } + return 0; +} + +template +int Mirror::peer_site_get_attributes(librados::IoCtx& io_ctx, + const std::string &uuid, + Attributes* attributes) { + CephContext *cct = reinterpret_cast(io_ctx.cct()); + ldout(cct, 20) << "uuid=" << uuid << dendl; + + attributes->clear(); + + librados::Rados rados(io_ctx); + std::string value; + int r = get_config_key(rados, get_peer_config_key_name(io_ctx.get_id(), uuid), + &value); + if (r == -ENOENT || value.empty()) { + return -ENOENT; + } else if (r < 0) { + lderr(cct) << "failed to retrieve peer attributes: " << cpp_strerror(r) + << dendl; + return r; + } + + bool json_valid = false; + json_spirit::mValue json_root; + if(json_spirit::read(value, json_root)) { + try { + auto& json_obj = json_root.get_obj(); + for (auto& pairs : json_obj) { + (*attributes)[pairs.first] = pairs.second.get_str(); + } + json_valid = true; + } catch (std::runtime_error&) { + } + } + + if (!json_valid) { + lderr(cct) << "invalid peer attributes JSON received" << dendl; + return -EINVAL; + } + return 0; +} + +template +int Mirror::peer_site_set_attributes(librados::IoCtx& io_ctx, + const std::string &uuid, + const Attributes& attributes) { + CephContext *cct = reinterpret_cast(io_ctx.cct()); + ldout(cct, 20) << "uuid=" << uuid << ", " + << "attributes=" << attributes << dendl; + + std::vector mirror_peers; + int r = peer_site_list(io_ctx, &mirror_peers); + if (r < 0) { + return r; + } + + if (std::find_if(mirror_peers.begin(), mirror_peers.end(), + [&uuid](const librbd::mirror_peer_site_t& peer) { + return uuid == peer.uuid; + }) == mirror_peers.end()) { + ldout(cct, 5) << "mirror peer uuid " << uuid << " does not exist" << dendl; + return -ENOENT; + } + + std::stringstream ss; + ss << "{"; + for (auto& pair : attributes) { + ss << "\\\"" << pair.first << "\\\": " + << "\\\"" << pair.second << "\\\""; + if (&pair != &(*attributes.rbegin())) { + ss << ", "; + } + } + ss << "}"; + + librados::Rados rados(io_ctx); + r = set_config_key(rados, get_peer_config_key_name(io_ctx.get_id(), uuid), + ss.str()); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "failed to update peer attributes: " << cpp_strerror(r) + << dendl; + return r; + } + + return 0; +} + +template +int Mirror::image_global_status_list( + librados::IoCtx& io_ctx, const std::string &start_id, size_t max, + IdToMirrorImageGlobalStatus *images) { + CephContext *cct = reinterpret_cast(io_ctx.cct()); + int r; + + std::map id_to_name; + { + std::map name_to_id; + r = Image::list_images_v2(io_ctx, &name_to_id); + if (r < 0) { + return r; + } + for (auto it : name_to_id) { + id_to_name[it.second] = it.first; + } + } + + std::map images_; + std::map statuses_; + + r = librbd::cls_client::mirror_image_status_list(&io_ctx, start_id, max, + &images_, &statuses_); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "failed to list mirror image statuses: " + << cpp_strerror(r) << dendl; + return r; + } + + const std::string STATUS_NOT_FOUND("status not found"); + for (auto it = images_.begin(); it != images_.end(); ++it) { + auto &image_id = it->first; + auto &info = it->second; + if (info.state == cls::rbd::MIRROR_IMAGE_STATE_DISABLED) { + continue; + } + + auto &image_name = id_to_name[image_id]; + if (image_name.empty()) { + lderr(cct) << "failed to find image name for image " << image_id << ", " + << "using image id as name" << dendl; + image_name = image_id; + } + + mirror_image_global_status_t& global_status = (*images)[image_id]; + global_status.name = image_name; + global_status.info = mirror_image_info_t{ + info.global_image_id, + static_cast(info.state), + false}; // XXX: To set "primary" right would require an additional call. + + bool found_local_site_status = false; + auto s_it = statuses_.find(image_id); + if (s_it != statuses_.end()) { + auto& status = s_it->second; + + global_status.site_statuses.reserve( + status.mirror_image_site_statuses.size()); + for (auto& site_status : status.mirror_image_site_statuses) { + if (site_status.mirror_uuid == + cls::rbd::MirrorImageSiteStatus::LOCAL_MIRROR_UUID) { + found_local_site_status = true; + } + + global_status.site_statuses.push_back(mirror_image_site_status_t{ + site_status.mirror_uuid, + static_cast(site_status.state), + site_status.state == cls::rbd::MIRROR_IMAGE_STATUS_STATE_UNKNOWN ? + STATUS_NOT_FOUND : site_status.description, + site_status.last_update.sec(), site_status.up}); + } + } + + if (!found_local_site_status) { + global_status.site_statuses.push_back(mirror_image_site_status_t{ + cls::rbd::MirrorImageSiteStatus::LOCAL_MIRROR_UUID, + MIRROR_IMAGE_STATUS_STATE_UNKNOWN, STATUS_NOT_FOUND, 0, false}); + } + } + + return 0; +} + +template +int Mirror::image_status_summary(librados::IoCtx& io_ctx, + MirrorImageStatusStates *states) { + CephContext *cct = reinterpret_cast(io_ctx.cct()); + + std::vector mirror_peers; + int r = cls_client::mirror_peer_list(&io_ctx, &mirror_peers); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "failed to list mirror peers: " << cpp_strerror(r) << dendl; + return r; + } + + std::map states_; + r = cls_client::mirror_image_status_get_summary(&io_ctx, mirror_peers, + &states_); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "failed to get mirror status summary: " + << cpp_strerror(r) << dendl; + return r; + } + for (auto &s : states_) { + (*states)[static_cast(s.first)] = s.second; + } + return 0; +} + +template +int Mirror::image_instance_id_list( + librados::IoCtx& io_ctx, const std::string &start_image_id, size_t max, + std::map *instance_ids) { + CephContext *cct = reinterpret_cast(io_ctx.cct()); + std::map instances; + + int r = librbd::cls_client::mirror_image_instance_list( + &io_ctx, start_image_id, max, &instances); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "failed to list mirror image instances: " << cpp_strerror(r) + << dendl; + return r; + } + + for (auto it : instances) { + (*instance_ids)[it.first] = stringify(it.second.name.num()); + } + + return 0; +} + +template +int Mirror::image_info_list( + librados::IoCtx& io_ctx, mirror_image_mode_t *mode_filter, + const std::string &start_id, size_t max, + std::map> *entries) { + CephContext *cct = reinterpret_cast(io_ctx.cct()); + ldout(cct, 20) << "pool=" << io_ctx.get_pool_name() << ", mode_filter=" + << (mode_filter ? stringify(*mode_filter) : "null") + << ", start_id=" << start_id << ", max=" << max << dendl; + + std::string last_read = start_id; + entries->clear(); + + while (entries->size() < max) { + std::map images; + std::map statuses; + + int r = librbd::cls_client::mirror_image_status_list(&io_ctx, last_read, + max, &images, + &statuses); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "failed to list mirror image statuses: " + << cpp_strerror(r) << dendl; + return r; + } + + if (images.empty()) { + break; + } + + AsioEngine asio_engine(io_ctx); + + for (auto &it : images) { + auto &image_id = it.first; + auto &image = it.second; + auto mode = static_cast(image.mode); + + if ((mode_filter && mode != *mode_filter) || + image.state != cls::rbd::MIRROR_IMAGE_STATE_ENABLED) { + continue; + } + + // need to call get_info for every image to retrieve promotion state + + mirror_image_info_t info; + r = image_get_info(io_ctx, asio_engine.get_work_queue(), image_id, &info); + if (r < 0) { + continue; + } + + (*entries)[image_id] = std::make_pair(mode, info); + if (entries->size() == max) { + break; + } + } + + last_read = images.rbegin()->first; + } + + return 0; +} + +template +int Mirror::image_snapshot_create(I *ictx, uint32_t flags, + uint64_t *snap_id) { + C_SaferCond ctx; + Mirror::image_snapshot_create(ictx, flags, snap_id, &ctx); + + return ctx.wait(); +} + +template +void Mirror::image_snapshot_create(I *ictx, uint32_t flags, + uint64_t *snap_id, Context *on_finish) { + CephContext *cct = ictx->cct; + ldout(cct, 20) << "ictx=" << ictx << dendl; + + uint64_t snap_create_flags = 0; + int r = util::snap_create_flags_api_to_internal(cct, flags, + &snap_create_flags); + if (r < 0) { + on_finish->complete(r); + return; + } + + auto on_refresh = new LambdaContext( + [ictx, snap_create_flags, snap_id, on_finish](int r) { + if (r < 0) { + lderr(ictx->cct) << "refresh failed: " << cpp_strerror(r) << dendl; + on_finish->complete(r); + return; + } + + auto ctx = new C_ImageSnapshotCreate(ictx, snap_create_flags, snap_id, + on_finish); + auto req = mirror::GetInfoRequest::create(*ictx, &ctx->mirror_image, + &ctx->promotion_state, + &ctx->primary_mirror_uuid, + ctx); + req->send(); + }); + + if (ictx->state->is_refresh_required()) { + ictx->state->refresh(on_refresh); + } else { + on_refresh->complete(0); + } +} + +} // namespace api +} // namespace librbd + +template class librbd::api::Mirror; diff --git a/src/librbd/api/Mirror.h b/src/librbd/api/Mirror.h new file mode 100644 index 000000000..b3a552b13 --- /dev/null +++ b/src/librbd/api/Mirror.h @@ -0,0 +1,126 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef LIBRBD_API_MIRROR_H +#define LIBRBD_API_MIRROR_H + +#include "include/rbd/librbd.hpp" +#include +#include +#include + +struct Context; + +namespace librbd { + +struct ImageCtx; +namespace asio { struct ContextWQ; } + +namespace api { + +template +struct Mirror { + typedef std::map Attributes; + typedef std::map + IdToMirrorImageGlobalStatus; + typedef std::map MirrorImageStatusStates; + + static int site_name_get(librados::Rados& rados, std::string* name); + static int site_name_set(librados::Rados& rados, const std::string& name); + + static int mode_get(librados::IoCtx& io_ctx, rbd_mirror_mode_t *mirror_mode); + static int mode_set(librados::IoCtx& io_ctx, rbd_mirror_mode_t mirror_mode); + + static int uuid_get(librados::IoCtx& io_ctx, std::string* mirror_uuid); + static void uuid_get(librados::IoCtx& io_ctx, std::string* mirror_uuid, + Context* on_finish); + + static int peer_bootstrap_create(librados::IoCtx& io_ctx, std::string* token); + static int peer_bootstrap_import(librados::IoCtx& io_ctx, + rbd_mirror_peer_direction_t direction, + const std::string& token); + + static int peer_site_add(librados::IoCtx& io_ctx, std::string *uuid, + mirror_peer_direction_t direction, + const std::string &site_name, + const std::string &client_name); + static int peer_site_remove(librados::IoCtx& io_ctx, const std::string &uuid); + static int peer_site_list(librados::IoCtx& io_ctx, + std::vector *peers); + static int peer_site_set_client(librados::IoCtx& io_ctx, + const std::string &uuid, + const std::string &client_name); + static int peer_site_set_name(librados::IoCtx& io_ctx, + const std::string &uuid, + const std::string &site_name); + static int peer_site_set_direction(librados::IoCtx& io_ctx, + const std::string &uuid, + mirror_peer_direction_t direction); + static int peer_site_get_attributes(librados::IoCtx& io_ctx, + const std::string &uuid, + Attributes* attributes); + static int peer_site_set_attributes(librados::IoCtx& io_ctx, + const std::string &uuid, + const Attributes& attributes); + + static int image_global_status_list(librados::IoCtx& io_ctx, + const std::string &start_id, size_t max, + IdToMirrorImageGlobalStatus *images); + + static int image_status_summary(librados::IoCtx& io_ctx, + MirrorImageStatusStates *states); + static int image_instance_id_list(librados::IoCtx& io_ctx, + const std::string &start_image_id, + size_t max, + std::map *ids); + + static int image_info_list( + librados::IoCtx& io_ctx, mirror_image_mode_t *mode_filter, + const std::string &start_id, size_t max, + std::map> *entries); + + static int image_enable(ImageCtxT *ictx, mirror_image_mode_t mode, + bool relax_same_pool_parent_check); + static int image_disable(ImageCtxT *ictx, bool force); + static int image_promote(ImageCtxT *ictx, bool force); + static void image_promote(ImageCtxT *ictx, bool force, Context *on_finish); + static int image_demote(ImageCtxT *ictx); + static void image_demote(ImageCtxT *ictx, Context *on_finish); + static int image_resync(ImageCtxT *ictx); + static int image_get_info(ImageCtxT *ictx, + mirror_image_info_t *mirror_image_info); + static void image_get_info(ImageCtxT *ictx, + mirror_image_info_t *mirror_image_info, + Context *on_finish); + static int image_get_info(librados::IoCtx& io_ctx, + asio::ContextWQ *op_work_queue, + const std::string &image_id, + mirror_image_info_t *mirror_image_info); + static void image_get_info(librados::IoCtx& io_ctx, + asio::ContextWQ *op_work_queue, + const std::string &image_id, + mirror_image_info_t *mirror_image_info, + Context *on_finish); + static int image_get_mode(ImageCtxT *ictx, mirror_image_mode_t *mode); + static void image_get_mode(ImageCtxT *ictx, mirror_image_mode_t *mode, + Context *on_finish); + static int image_get_global_status(ImageCtxT *ictx, + mirror_image_global_status_t *status); + static void image_get_global_status(ImageCtxT *ictx, + mirror_image_global_status_t *status, + Context *on_finish); + static int image_get_instance_id(ImageCtxT *ictx, std::string *instance_id); + + static int image_snapshot_create(ImageCtxT *ictx, uint32_t flags, + uint64_t *snap_id); + static void image_snapshot_create(ImageCtxT *ictx, uint32_t flags, + uint64_t *snap_id, Context *on_finish); +}; + +} // namespace api +} // namespace librbd + +extern template class librbd::api::Mirror; + +#endif // LIBRBD_API_MIRROR_H diff --git a/src/librbd/api/Namespace.cc b/src/librbd/api/Namespace.cc new file mode 100644 index 000000000..86ed70c06 --- /dev/null +++ b/src/librbd/api/Namespace.cc @@ -0,0 +1,235 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "common/errno.h" +#include "cls/rbd/cls_rbd_client.h" +#include "librbd/api/Mirror.h" +#include "librbd/api/Namespace.h" +#include "librbd/ImageCtx.h" + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::api::Namespace: " << __func__ << ": " + +namespace librbd { +namespace api { + +namespace { + +const std::list POOL_OBJECTS { + RBD_CHILDREN, + RBD_GROUP_DIRECTORY, + RBD_INFO, + RBD_MIRRORING, + RBD_TASK, + RBD_TRASH, + RBD_DIRECTORY +}; + +} // anonymous namespace + +template +int Namespace::create(librados::IoCtx& io_ctx, const std::string& name) +{ + CephContext *cct = (CephContext *)io_ctx.cct(); + ldout(cct, 5) << "name=" << name << dendl; + + if (name.empty()) { + return -EINVAL; + } + + librados::Rados rados(io_ctx); + int8_t require_osd_release; + int r = rados.get_min_compatible_osd(&require_osd_release); + if (r < 0) { + lderr(cct) << "failed to retrieve min OSD release: " << cpp_strerror(r) + << dendl; + return r; + } + + if (require_osd_release < CEPH_RELEASE_NAUTILUS) { + ldout(cct, 1) << "namespace support requires nautilus or later OSD" + << dendl; + return -ENOSYS; + } + + + librados::IoCtx default_ns_ctx; + default_ns_ctx.dup(io_ctx); + default_ns_ctx.set_namespace(""); + + r = cls_client::namespace_add(&default_ns_ctx, name); + if (r < 0) { + lderr(cct) << "failed to add namespace: " << cpp_strerror(r) << dendl; + return r; + } + + librados::IoCtx ns_ctx; + ns_ctx.dup(io_ctx); + ns_ctx.set_namespace(name); + + r = cls_client::dir_state_set(&ns_ctx, RBD_DIRECTORY, + cls::rbd::DIRECTORY_STATE_READY); + if (r < 0) { + lderr(cct) << "failed to initialize image directory: " << cpp_strerror(r) + << dendl; + goto rollback; + } + + return 0; + +rollback: + int ret_val = cls_client::namespace_remove(&default_ns_ctx, name); + if (ret_val < 0) { + lderr(cct) << "failed to remove namespace: " << cpp_strerror(ret_val) << dendl; + } + + return r; +} + +template +int Namespace::remove(librados::IoCtx& io_ctx, const std::string& name) +{ + CephContext *cct = (CephContext *)io_ctx.cct(); + ldout(cct, 5) << "name=" << name << dendl; + + if (name.empty()) { + return -EINVAL; + } + + librados::IoCtx default_ns_ctx; + default_ns_ctx.dup(io_ctx); + default_ns_ctx.set_namespace(""); + + librados::IoCtx ns_ctx; + ns_ctx.dup(io_ctx); + ns_ctx.set_namespace(name); + + std::map trash_entries; + + librados::ObjectWriteOperation dir_op; + librbd::cls_client::dir_state_set( + &dir_op, cls::rbd::DIRECTORY_STATE_ADD_DISABLED); + dir_op.remove(); + + int r = ns_ctx.operate(RBD_DIRECTORY, &dir_op); + if (r == -EBUSY) { + ldout(cct, 5) << "image directory not empty" << dendl; + goto rollback; + } else if (r < 0 && r != -ENOENT) { + lderr(cct) << "failed to disable the namespace: " << cpp_strerror(r) + << dendl; + return r; + } + + r = cls_client::trash_list(&ns_ctx, "", 1, &trash_entries); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "failed to list trash directory: " << cpp_strerror(r) + << dendl; + return r; + } else if (!trash_entries.empty()) { + ldout(cct, 5) << "image trash not empty" << dendl; + goto rollback; + } + + r = Mirror::mode_set(ns_ctx, RBD_MIRROR_MODE_DISABLED); + if (r < 0) { + lderr(cct) << "failed to disable mirroring: " << cpp_strerror(r) + << dendl; + return r; + } + + for (auto& oid : POOL_OBJECTS) { + r = ns_ctx.remove(oid); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "failed to remove object '" << oid << "': " + << cpp_strerror(r) << dendl; + return r; + } + } + + r = cls_client::namespace_remove(&default_ns_ctx, name); + if (r < 0) { + lderr(cct) << "failed to remove namespace: " << cpp_strerror(r) << dendl; + return r; + } + + return 0; + +rollback: + + r = librbd::cls_client::dir_state_set( + &ns_ctx, RBD_DIRECTORY, cls::rbd::DIRECTORY_STATE_READY); + if (r < 0) { + lderr(cct) << "failed to restore directory state: " << cpp_strerror(r) + << dendl; + } + + return -EBUSY; +} + +template +int Namespace::list(IoCtx& io_ctx, std::vector *names) +{ + CephContext *cct = (CephContext *)io_ctx.cct(); + ldout(cct, 5) << dendl; + + librados::IoCtx default_ns_ctx; + default_ns_ctx.dup(io_ctx); + default_ns_ctx.set_namespace(""); + + int r; + int max_read = 1024; + std::string last_read = ""; + do { + std::list name_list; + r = cls_client::namespace_list(&default_ns_ctx, last_read, max_read, + &name_list); + if (r == -ENOENT) { + return 0; + } else if (r < 0) { + lderr(cct) << "error listing namespaces: " << cpp_strerror(r) << dendl; + return r; + } + + names->insert(names->end(), name_list.begin(), name_list.end()); + if (!name_list.empty()) { + last_read = name_list.back(); + } + r = name_list.size(); + } while (r == max_read); + + return 0; +} + +template +int Namespace::exists(librados::IoCtx& io_ctx, const std::string& name, bool *exists) +{ + CephContext *cct = (CephContext *)io_ctx.cct(); + ldout(cct, 5) << "name=" << name << dendl; + + *exists = false; + if (name.empty()) { + return -EINVAL; + } + + librados::IoCtx ns_ctx; + ns_ctx.dup(io_ctx); + ns_ctx.set_namespace(name); + + int r = librbd::cls_client::dir_state_assert(&ns_ctx, RBD_DIRECTORY, + cls::rbd::DIRECTORY_STATE_READY); + if (r == 0) { + *exists = true; + } else if (r != -ENOENT) { + lderr(cct) << "error asserting namespace: " << cpp_strerror(r) << dendl; + return r; + } + + return 0; +} + +} // namespace api +} // namespace librbd + +template class librbd::api::Namespace; diff --git a/src/librbd/api/Namespace.h b/src/librbd/api/Namespace.h new file mode 100644 index 000000000..220eb28f3 --- /dev/null +++ b/src/librbd/api/Namespace.h @@ -0,0 +1,33 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_API_NAMESPACE_H +#define CEPH_LIBRBD_API_NAMESPACE_H + +#include "include/rados/librados_fwd.hpp" +#include "include/rbd/librbd.hpp" +#include +#include + +namespace librbd { + +struct ImageCtx; + +namespace api { + +template +struct Namespace { + + static int create(librados::IoCtx& io_ctx, const std::string& name); + static int remove(librados::IoCtx& io_ctx, const std::string& name); + static int list(librados::IoCtx& io_ctx, std::vector* names); + static int exists(librados::IoCtx& io_ctx, const std::string& name, bool *exists); + +}; + +} // namespace api +} // namespace librbd + +extern template class librbd::api::Namespace; + +#endif // CEPH_LIBRBD_API_NAMESPACE_H diff --git a/src/librbd/api/Pool.cc b/src/librbd/api/Pool.cc new file mode 100644 index 000000000..65d55328f --- /dev/null +++ b/src/librbd/api/Pool.cc @@ -0,0 +1,375 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/api/Pool.h" +#include "include/rados/librados.hpp" +#include "common/dout.h" +#include "common/errno.h" +#include "common/Cond.h" +#include "common/Throttle.h" +#include "cls/rbd/cls_rbd_client.h" +#include "osd/osd_types.h" +#include "librbd/AsioEngine.h" +#include "librbd/ImageCtx.h" +#include "librbd/Utils.h" +#include "librbd/api/Config.h" +#include "librbd/api/Image.h" +#include "librbd/api/Trash.h" +#include "librbd/image/ValidatePoolRequest.h" + +#define dout_subsys ceph_subsys_rbd + +namespace librbd { +namespace api { + +namespace { + +#undef dout_prefix +#define dout_prefix *_dout << "librbd::api::Pool::ImageStatRequest: " \ + << __func__ << " " << this << ": " \ + << "(id=" << m_image_id << "): " + +template +class ImageStatRequest { +public: + ImageStatRequest(librados::IoCtx& io_ctx, SimpleThrottle& throttle, + const std::string& image_id, bool scan_snaps, + std::atomic* bytes, + std::atomic* max_bytes, + std::atomic* snaps) + : m_cct(reinterpret_cast(io_ctx.cct())), + m_io_ctx(io_ctx), m_throttle(throttle), m_image_id(image_id), + m_scan_snaps(scan_snaps), m_bytes(bytes), m_max_bytes(max_bytes), + m_snaps(snaps) { + m_throttle.start_op(); + } + + void send() { + get_head(); + } + +protected: + void finish(int r) { + (*m_max_bytes) += m_max_size; + m_throttle.end_op(r); + + delete this; + } + +private: + CephContext* m_cct; + librados::IoCtx& m_io_ctx; + SimpleThrottle& m_throttle; + const std::string& m_image_id; + bool m_scan_snaps; + std::atomic* m_bytes; + std::atomic* m_max_bytes; + std::atomic* m_snaps; + bufferlist m_out_bl; + + uint64_t m_max_size = 0; + ::SnapContext m_snapc; + + void get_head() { + ldout(m_cct, 15) << dendl; + + librados::ObjectReadOperation op; + cls_client::get_size_start(&op, CEPH_NOSNAP); + if (m_scan_snaps) { + cls_client::get_snapcontext_start(&op); + } + + m_out_bl.clear(); + auto aio_comp = util::create_rados_callback< + ImageStatRequest, &ImageStatRequest::handle_get_head>(this); + int r = m_io_ctx.aio_operate(util::header_name(m_image_id), aio_comp, &op, + &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); + } + + void handle_get_head(int r) { + ldout(m_cct, 15) << "r=" << r << dendl; + + auto it = m_out_bl.cbegin(); + if (r == 0) { + uint8_t order; + r = cls_client::get_size_finish(&it, &m_max_size, &order); + if (r == 0) { + (*m_bytes) += m_max_size; + } + } + if (m_scan_snaps && r == 0) { + r = cls_client::get_snapcontext_finish(&it, &m_snapc); + if (r == 0) { + (*m_snaps) += m_snapc.snaps.size(); + } + } + + if (r == -ENOENT) { + finish(r); + return; + } else if (r < 0) { + lderr(m_cct) << "failed to stat image: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + if (!m_snapc.is_valid()) { + lderr(m_cct) << "snap context is invalid" << dendl; + finish(-EIO); + return; + } + + get_snaps(); + } + + void get_snaps() { + if (!m_scan_snaps || m_snapc.snaps.empty()) { + finish(0); + return; + } + + ldout(m_cct, 15) << dendl; + librados::ObjectReadOperation op; + for (auto snap_seq : m_snapc.snaps) { + cls_client::get_size_start(&op, snap_seq); + } + + m_out_bl.clear(); + auto aio_comp = util::create_rados_callback< + ImageStatRequest, &ImageStatRequest::handle_get_snaps>(this); + int r = m_io_ctx.aio_operate(util::header_name(m_image_id), aio_comp, &op, + &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); + } + + void handle_get_snaps(int r) { + ldout(m_cct, 15) << "r=" << r << dendl; + + auto it = m_out_bl.cbegin(); + for ([[maybe_unused]] auto snap_seq : m_snapc.snaps) { + uint64_t size; + if (r == 0) { + uint8_t order; + r = cls_client::get_size_finish(&it, &size, &order); + } + if (r == 0 && m_max_size < size) { + m_max_size = size; + } + } + + if (r == -ENOENT) { + ldout(m_cct, 15) << "out-of-sync metadata" << dendl; + get_head(); + } else if (r < 0) { + lderr(m_cct) << "failed to retrieve snap size: " << cpp_strerror(r) + << dendl; + finish(r); + } else { + finish(0); + } + } + +}; + +template +void get_pool_stat_option_value(typename Pool::StatOptions* stat_options, + rbd_pool_stat_option_t option, + uint64_t** value) { + auto it = stat_options->find(option); + if (it == stat_options->end()) { + *value = nullptr; + } else { + *value = it->second; + } +} + +template +int get_pool_stats(librados::IoCtx& io_ctx, const ConfigProxy& config, + const std::vector& image_ids, uint64_t* image_count, + uint64_t* provisioned_bytes, uint64_t* max_provisioned_bytes, + uint64_t* snapshot_count) { + + bool scan_snaps = ((max_provisioned_bytes != nullptr) || + (snapshot_count != nullptr)); + + SimpleThrottle throttle( + config.template get_val("rbd_concurrent_management_ops"), true); + std::atomic bytes{0}; + std::atomic max_bytes{0}; + std::atomic snaps{0}; + for (auto& image_id : image_ids) { + if (throttle.pending_error()) { + break; + } + + auto req = new ImageStatRequest(io_ctx, throttle, image_id, + scan_snaps, &bytes, &max_bytes, &snaps); + req->send(); + } + + int r = throttle.wait_for_ret(); + if (r < 0) { + return r; + } + + if (image_count != nullptr) { + *image_count = image_ids.size(); + } + if (provisioned_bytes != nullptr) { + *provisioned_bytes = bytes.load(); + } + if (max_provisioned_bytes != nullptr) { + *max_provisioned_bytes = max_bytes.load(); + } + if (snapshot_count != nullptr) { + *snapshot_count = snaps.load(); + } + + return 0; +} + +} // anonymous namespace + +#undef dout_prefix +#define dout_prefix *_dout << "librbd::api::Pool: " << __func__ << ": " + +template +int Pool::init(librados::IoCtx& io_ctx, bool force) { + auto cct = reinterpret_cast(io_ctx.cct()); + ldout(cct, 10) << dendl; + + int r = io_ctx.application_enable(pg_pool_t::APPLICATION_NAME_RBD, force); + if (r < 0) { + return r; + } + + ConfigProxy config{cct->_conf}; + api::Config::apply_pool_overrides(io_ctx, &config); + if (!config.get_val("rbd_validate_pool")) { + return 0; + } + + C_SaferCond ctx; + auto req = image::ValidatePoolRequest::create(io_ctx, &ctx); + req->send(); + + return ctx.wait(); +} + +template +int Pool::add_stat_option(StatOptions* stat_options, + rbd_pool_stat_option_t option, + uint64_t* value) { + switch (option) { + case RBD_POOL_STAT_OPTION_IMAGES: + case RBD_POOL_STAT_OPTION_IMAGE_PROVISIONED_BYTES: + case RBD_POOL_STAT_OPTION_IMAGE_MAX_PROVISIONED_BYTES: + case RBD_POOL_STAT_OPTION_IMAGE_SNAPSHOTS: + case RBD_POOL_STAT_OPTION_TRASH_IMAGES: + case RBD_POOL_STAT_OPTION_TRASH_PROVISIONED_BYTES: + case RBD_POOL_STAT_OPTION_TRASH_MAX_PROVISIONED_BYTES: + case RBD_POOL_STAT_OPTION_TRASH_SNAPSHOTS: + stat_options->emplace(option, value); + return 0; + default: + break; + } + return -ENOENT; +} + +template +int Pool::get_stats(librados::IoCtx& io_ctx, StatOptions* stat_options) { + auto cct = reinterpret_cast(io_ctx.cct()); + ldout(cct, 10) << dendl; + + ConfigProxy config{cct->_conf}; + api::Config::apply_pool_overrides(io_ctx, &config); + + uint64_t* image_count; + uint64_t* provisioned_bytes; + uint64_t* max_provisioned_bytes; + uint64_t* snapshot_count; + + std::vector trash_entries; + int r = Trash::list(io_ctx, trash_entries, false); + if (r < 0 && r != -EOPNOTSUPP) { + return r; + } + + get_pool_stat_option_value( + stat_options, RBD_POOL_STAT_OPTION_IMAGES, &image_count); + get_pool_stat_option_value( + stat_options, RBD_POOL_STAT_OPTION_IMAGE_PROVISIONED_BYTES, + &provisioned_bytes); + get_pool_stat_option_value( + stat_options, RBD_POOL_STAT_OPTION_IMAGE_MAX_PROVISIONED_BYTES, + &max_provisioned_bytes); + get_pool_stat_option_value( + stat_options, RBD_POOL_STAT_OPTION_IMAGE_SNAPSHOTS, &snapshot_count); + if (image_count != nullptr || provisioned_bytes != nullptr || + max_provisioned_bytes != nullptr || snapshot_count != nullptr) { + typename Image::ImageNameToIds images; + int r = Image::list_images_v2(io_ctx, &images); + if (r < 0) { + return r; + } + + std::vector image_ids; + image_ids.reserve(images.size() + trash_entries.size()); + for (auto& it : images) { + image_ids.push_back(std::move(it.second)); + } + for (auto& it : trash_entries) { + if (it.source == RBD_TRASH_IMAGE_SOURCE_REMOVING) { + image_ids.push_back(std::move(it.id)); + } + } + + r = get_pool_stats(io_ctx, config, image_ids, image_count, + provisioned_bytes, max_provisioned_bytes, + snapshot_count); + if (r < 0) { + return r; + } + } + + get_pool_stat_option_value( + stat_options, RBD_POOL_STAT_OPTION_TRASH_IMAGES, &image_count); + get_pool_stat_option_value( + stat_options, RBD_POOL_STAT_OPTION_TRASH_PROVISIONED_BYTES, + &provisioned_bytes); + get_pool_stat_option_value( + stat_options, RBD_POOL_STAT_OPTION_TRASH_MAX_PROVISIONED_BYTES, + &max_provisioned_bytes); + get_pool_stat_option_value( + stat_options, RBD_POOL_STAT_OPTION_TRASH_SNAPSHOTS, &snapshot_count); + if (image_count != nullptr || provisioned_bytes != nullptr || + max_provisioned_bytes != nullptr || snapshot_count != nullptr) { + + std::vector image_ids; + image_ids.reserve(trash_entries.size()); + for (auto& it : trash_entries) { + if (it.source == RBD_TRASH_IMAGE_SOURCE_REMOVING) { + continue; + } + image_ids.push_back(std::move(it.id)); + } + + r = get_pool_stats(io_ctx, config, image_ids, image_count, + provisioned_bytes, max_provisioned_bytes, + snapshot_count); + if (r < 0) { + return r; + } + } + + return 0; +} + +} // namespace api +} // namespace librbd + +template class librbd::api::Pool; diff --git a/src/librbd/api/Pool.h b/src/librbd/api/Pool.h new file mode 100644 index 000000000..7b607ab6e --- /dev/null +++ b/src/librbd/api/Pool.h @@ -0,0 +1,38 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_API_POOL_H +#define CEPH_LIBRBD_API_POOL_H + +#include "include/int_types.h" +#include "include/rados/librados_fwd.hpp" +#include "include/rbd/librbd.h" +#include + +namespace librbd { + +struct ImageCtx; + +namespace api { + +template +class Pool { +public: + typedef std::map StatOptions; + + static int init(librados::IoCtx& io_ctx, bool force); + + static int add_stat_option(StatOptions* stat_options, + rbd_pool_stat_option_t option, + uint64_t* value); + + static int get_stats(librados::IoCtx& io_ctx, StatOptions* stat_options); + +}; + +} // namespace api +} // namespace librbd + +extern template class librbd::api::Pool; + +#endif // CEPH_LIBRBD_API_POOL_H diff --git a/src/librbd/api/PoolMetadata.cc b/src/librbd/api/PoolMetadata.cc new file mode 100644 index 000000000..33e3fb648 --- /dev/null +++ b/src/librbd/api/PoolMetadata.cc @@ -0,0 +1,156 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/api/PoolMetadata.h" +#include "cls/rbd/cls_rbd_client.h" +#include "common/dout.h" +#include "common/errno.h" +#include "common/Cond.h" +#include "librbd/Utils.h" +#include "librbd/api/Config.h" +#include "librbd/image/GetMetadataRequest.h" + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::PoolMetadata: " << __func__ << ": " + +namespace librbd { +namespace api { + +namespace { + +void update_pool_timestamp(librados::IoCtx& io_ctx) { + CephContext *cct = (CephContext *)io_ctx.cct(); + + auto now = ceph_clock_now(); + std::string cmd = + R"({)" + R"("prefix": "config set", )" + R"("who": "global", )" + R"("name": "rbd_config_pool_override_update_timestamp", )" + R"("value": ")" + stringify(now.sec()) + R"(")" + R"(})"; + + librados::Rados rados(io_ctx); + bufferlist in_bl; + std::string ss; + int r = rados.mon_command(cmd, in_bl, nullptr, &ss); + if (r < 0) { + lderr(cct) << "failed to notify clients of pool config update: " + << cpp_strerror(r) << dendl; + } +} + +} // anonymous namespace + +template +int PoolMetadata::get(librados::IoCtx& io_ctx, + const std::string &key, std::string *value) { + CephContext *cct = (CephContext *)io_ctx.cct(); + + int r = cls_client::metadata_get(&io_ctx, RBD_INFO, key, value); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "failed reading metadata " << key << ": " << cpp_strerror(r) + << dendl; + } + + return r; +} + +template +int PoolMetadata::set(librados::IoCtx& io_ctx, const std::string &key, + const std::string &value) { + CephContext *cct = (CephContext *)io_ctx.cct(); + + bool need_update_pool_timestamp = false; + + std::string config_key; + if (util::is_metadata_config_override(key, &config_key)) { + if (!librbd::api::Config::is_option_name(io_ctx, config_key)) { + lderr(cct) << "validation for " << key + << " failed: not allowed pool level override" << dendl; + return -EINVAL; + } + int r = ConfigProxy{false}.set_val(config_key.c_str(), value); + if (r < 0) { + lderr(cct) << "validation for " << key << " failed: " << cpp_strerror(r) + << dendl; + return -EINVAL; + } + + need_update_pool_timestamp = true; + } + + ceph::bufferlist bl; + bl.append(value); + + int r = cls_client::metadata_set(&io_ctx, RBD_INFO, {{key, bl}}); + if (r < 0) { + lderr(cct) << "failed setting metadata " << key << ": " << cpp_strerror(r) + << dendl; + return r; + } + + if (need_update_pool_timestamp) { + update_pool_timestamp(io_ctx); + } + + return 0; +} + +template +int PoolMetadata::remove(librados::IoCtx& io_ctx, const std::string &key) { + CephContext *cct = (CephContext *)io_ctx.cct(); + + std::string value; + int r = cls_client::metadata_get(&io_ctx, RBD_INFO, key, &value); + if (r < 0) { + if (r == -ENOENT) { + ldout(cct, 1) << "metadata " << key << " does not exist" << dendl; + } else { + lderr(cct) << "failed reading metadata " << key << ": " << cpp_strerror(r) + << dendl; + } + return r; + } + + r = cls_client::metadata_remove(&io_ctx, RBD_INFO, key); + if (r < 0) { + lderr(cct) << "failed removing metadata " << key << ": " << cpp_strerror(r) + << dendl; + return r; + } + + std::string config_key; + if (util::is_metadata_config_override(key, &config_key)) { + update_pool_timestamp(io_ctx); + } + + return 0; +} + +template +int PoolMetadata::list(librados::IoCtx& io_ctx, const std::string &start, + uint64_t max, + std::map *pairs) { + CephContext *cct = (CephContext *)io_ctx.cct(); + + pairs->clear(); + C_SaferCond ctx; + auto req = image::GetMetadataRequest::create( + io_ctx, RBD_INFO, false, "", start, max, pairs, &ctx); + req->send(); + + int r = ctx.wait(); + if (r < 0) { + lderr(cct) << "failed listing metadata: " << cpp_strerror(r) + << dendl; + return r; + } + return 0; +} + +} // namespace api +} // namespace librbd + +template class librbd::api::PoolMetadata; diff --git a/src/librbd/api/PoolMetadata.h b/src/librbd/api/PoolMetadata.h new file mode 100644 index 000000000..69ab574ac --- /dev/null +++ b/src/librbd/api/PoolMetadata.h @@ -0,0 +1,37 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_API_POOL_METADATA_H +#define CEPH_LIBRBD_API_POOL_METADATA_H + +#include "include/buffer_fwd.h" +#include "include/rados/librados_fwd.hpp" + +#include +#include +#include + +namespace librbd { + +class ImageCtx; + +namespace api { + +template +class PoolMetadata { +public: + static int get(librados::IoCtx& io_ctx, const std::string &key, + std::string *value); + static int set(librados::IoCtx& io_ctx, const std::string &key, + const std::string &value); + static int remove(librados::IoCtx& io_ctx, const std::string &key); + static int list(librados::IoCtx& io_ctx, const std::string &start, + uint64_t max, std::map *pairs); +}; + +} // namespace api +} // namespace librbd + +extern template class librbd::api::PoolMetadata; + +#endif // CEPH_LIBRBD_API_POOL_METADATA_H diff --git a/src/librbd/api/Snapshot.cc b/src/librbd/api/Snapshot.cc new file mode 100644 index 000000000..03cefbd1c --- /dev/null +++ b/src/librbd/api/Snapshot.cc @@ -0,0 +1,444 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/api/Snapshot.h" +#include "cls/rbd/cls_rbd_types.h" +#include "common/errno.h" +#include "librbd/internal.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/Operations.h" +#include "librbd/Utils.h" +#include "librbd/api/Image.h" +#include "include/Context.h" +#include "common/Cond.h" + +#include + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::api::Snapshot: " << __func__ << ": " + +using librados::snap_t; + +namespace librbd { +namespace api { + +namespace { + +class GetGroupVisitor { +public: + CephContext* cct; + librados::IoCtx *image_ioctx; + snap_group_namespace_t *group_snap; + + explicit GetGroupVisitor(CephContext* cct, librados::IoCtx *_image_ioctx, + snap_group_namespace_t *group_snap) + : cct(cct), image_ioctx(_image_ioctx), group_snap(group_snap) {}; + + template + inline int operator()(const T&) const { + // ignore other than GroupSnapshotNamespace types. + return -EINVAL; + } + + inline int operator()( + const cls::rbd::GroupSnapshotNamespace& snap_namespace) { + IoCtx group_ioctx; + int r = util::create_ioctx(*image_ioctx, "group", snap_namespace.group_pool, + {}, &group_ioctx); + if (r < 0) { + return r; + } + + cls::rbd::GroupSnapshot group_snapshot; + + std::string group_name; + r = cls_client::dir_get_name(&group_ioctx, RBD_GROUP_DIRECTORY, + snap_namespace.group_id, &group_name); + if (r < 0) { + lderr(cct) << "failed to retrieve group name: " << cpp_strerror(r) + << dendl; + return r; + } + + std::string group_header_oid = util::group_header_name(snap_namespace.group_id); + r = cls_client::group_snap_get_by_id(&group_ioctx, + group_header_oid, + snap_namespace.group_snapshot_id, + &group_snapshot); + if (r < 0) { + lderr(cct) << "failed to retrieve group snapshot: " << cpp_strerror(r) + << dendl; + return r; + } + + group_snap->group_pool = group_ioctx.get_id(); + group_snap->group_name = group_name; + group_snap->group_snap_name = group_snapshot.name; + return 0; + } +}; + +class GetTrashVisitor { +public: + std::string* original_name; + + explicit GetTrashVisitor(std::string* original_name) + : original_name(original_name) { + } + + template + inline int operator()(const T&) const { + return -EINVAL; + } + + inline int operator()( + const cls::rbd::TrashSnapshotNamespace& snap_namespace) { + *original_name = snap_namespace.original_name; + return 0; + } +}; + +class GetMirrorVisitor { +public: + snap_mirror_namespace_t *mirror_snap; + + explicit GetMirrorVisitor(snap_mirror_namespace_t *mirror_snap) + : mirror_snap(mirror_snap) { + } + + template + inline int operator()(const T&) const { + return -EINVAL; + } + + inline int operator()( + const cls::rbd::MirrorSnapshotNamespace& snap_namespace) { + mirror_snap->state = static_cast(snap_namespace.state); + mirror_snap->complete = snap_namespace.complete; + mirror_snap->mirror_peer_uuids = snap_namespace.mirror_peer_uuids; + mirror_snap->primary_mirror_uuid = snap_namespace.primary_mirror_uuid; + mirror_snap->primary_snap_id = snap_namespace.primary_snap_id; + mirror_snap->last_copied_object_number = + snap_namespace.last_copied_object_number; + return 0; + } +}; + +} // anonymous namespace + +template +int Snapshot::get_group_namespace(I *ictx, uint64_t snap_id, + snap_group_namespace_t *group_snap) { + int r = ictx->state->refresh_if_required(); + if (r < 0) { + return r; + } + + std::shared_lock image_locker{ictx->image_lock}; + auto snap_info = ictx->get_snap_info(snap_id); + if (snap_info == nullptr) { + return -ENOENT; + } + + GetGroupVisitor ggv = GetGroupVisitor(ictx->cct, &ictx->md_ctx, group_snap); + r = snap_info->snap_namespace.visit(ggv); + if (r < 0) { + return r; + } + + return 0; +} + +template +int Snapshot::get_trash_namespace(I *ictx, uint64_t snap_id, + std::string* original_name) { + int r = ictx->state->refresh_if_required(); + if (r < 0) { + return r; + } + + std::shared_lock image_locker{ictx->image_lock}; + auto snap_info = ictx->get_snap_info(snap_id); + if (snap_info == nullptr) { + return -ENOENT; + } + + auto visitor = GetTrashVisitor(original_name); + r = snap_info->snap_namespace.visit(visitor); + if (r < 0) { + return r; + } + + return 0; +} + +template +int Snapshot::get_mirror_namespace( + I *ictx, uint64_t snap_id, snap_mirror_namespace_t *mirror_snap) { + int r = ictx->state->refresh_if_required(); + if (r < 0) { + return r; + } + + std::shared_lock image_locker{ictx->image_lock}; + auto snap_info = ictx->get_snap_info(snap_id); + if (snap_info == nullptr) { + return -ENOENT; + } + + auto gmv = GetMirrorVisitor(mirror_snap); + r = snap_info->snap_namespace.visit(gmv); + if (r < 0) { + return r; + } + + return 0; +} + +template +int Snapshot::get_namespace_type(I *ictx, uint64_t snap_id, + snap_namespace_type_t *namespace_type) { + int r = ictx->state->refresh_if_required(); + if (r < 0) { + return r; + } + + std::shared_lock l{ictx->image_lock}; + auto snap_info = ictx->get_snap_info(snap_id); + if (snap_info == nullptr) { + return -ENOENT; + } + + *namespace_type = static_cast( + cls::rbd::get_snap_namespace_type(snap_info->snap_namespace)); + return 0; +} + +template +int Snapshot::remove(I *ictx, uint64_t snap_id) { + ldout(ictx->cct, 20) << "snap_remove " << ictx << " " << snap_id << dendl; + + int r = ictx->state->refresh_if_required(); + if (r < 0) { + return r; + } + + cls::rbd::SnapshotNamespace snapshot_namespace; + std::string snapshot_name; + { + std::shared_lock image_locker{ictx->image_lock}; + auto it = ictx->snap_info.find(snap_id); + if (it == ictx->snap_info.end()) { + return -ENOENT; + } + + snapshot_namespace = it->second.snap_namespace; + snapshot_name = it->second.name; + } + + C_SaferCond ctx; + ictx->operations->snap_remove(snapshot_namespace, snapshot_name, &ctx); + r = ctx.wait(); + return r; +} + +template +int Snapshot::get_name(I *ictx, uint64_t snap_id, std::string *snap_name) + { + ldout(ictx->cct, 20) << "snap_get_name " << ictx << " " << snap_id << dendl; + + int r = ictx->state->refresh_if_required(); + if (r < 0) + return r; + + std::shared_lock image_locker{ictx->image_lock}; + r = ictx->get_snap_name(snap_id, snap_name); + + return r; + } + +template +int Snapshot::get_id(I *ictx, const std::string& snap_name, uint64_t *snap_id) + { + ldout(ictx->cct, 20) << "snap_get_id " << ictx << " " << snap_name << dendl; + + int r = ictx->state->refresh_if_required(); + if (r < 0) + return r; + + std::shared_lock image_locker{ictx->image_lock}; + *snap_id = ictx->get_snap_id(cls::rbd::UserSnapshotNamespace(), snap_name); + if (*snap_id == CEPH_NOSNAP) + return -ENOENT; + + return 0; + } + +template +int Snapshot::list(I *ictx, std::vector& snaps) { + ldout(ictx->cct, 20) << "snap_list " << ictx << dendl; + + int r = ictx->state->refresh_if_required(); + if (r < 0) + return r; + + std::shared_lock l{ictx->image_lock}; + for (auto &it : ictx->snap_info) { + snap_info_t info; + info.name = it.second.name; + info.id = it.first; + info.size = it.second.size; + snaps.push_back(info); + } + + return 0; +} + +template +int Snapshot::exists(I *ictx, const cls::rbd::SnapshotNamespace& snap_namespace, + const char *snap_name, bool *exists) { + ldout(ictx->cct, 20) << "snap_exists " << ictx << " " << snap_name << dendl; + + int r = ictx->state->refresh_if_required(); + if (r < 0) + return r; + + std::shared_lock l{ictx->image_lock}; + *exists = ictx->get_snap_id(snap_namespace, snap_name) != CEPH_NOSNAP; + return 0; +} + +template +int Snapshot::create(I *ictx, const char *snap_name, uint32_t flags, + ProgressContext& pctx) { + ldout(ictx->cct, 20) << "snap_create " << ictx << " " << snap_name + << " flags: " << flags << dendl; + + uint64_t internal_flags = 0; + int r = util::snap_create_flags_api_to_internal(ictx->cct, flags, + &internal_flags); + if (r < 0) { + return r; + } + + return ictx->operations->snap_create(cls::rbd::UserSnapshotNamespace(), + snap_name, internal_flags, pctx); +} + +template +int Snapshot::remove(I *ictx, const char *snap_name, uint32_t flags, + ProgressContext& pctx) { + ldout(ictx->cct, 20) << "snap_remove " << ictx << " " << snap_name << " flags: " << flags << dendl; + + int r = 0; + + r = ictx->state->refresh_if_required(); + if (r < 0) + return r; + + if (flags & RBD_SNAP_REMOVE_FLATTEN) { + r = Image::flatten_children(ictx, snap_name, pctx); + if (r < 0) { + return r; + } + } + + bool protect; + r = is_protected(ictx, snap_name, &protect); + if (r < 0) { + return r; + } + + if (protect && flags & RBD_SNAP_REMOVE_UNPROTECT) { + r = ictx->operations->snap_unprotect(cls::rbd::UserSnapshotNamespace(), snap_name); + if (r < 0) { + lderr(ictx->cct) << "failed to unprotect snapshot: " << snap_name << dendl; + return r; + } + + r = is_protected(ictx, snap_name, &protect); + if (r < 0) { + return r; + } + if (protect) { + lderr(ictx->cct) << "snapshot is still protected after unprotection" << dendl; + ceph_abort(); + } + } + + C_SaferCond ctx; + ictx->operations->snap_remove(cls::rbd::UserSnapshotNamespace(), snap_name, &ctx); + + r = ctx.wait(); + return r; +} + +template +int Snapshot::get_timestamp(I *ictx, uint64_t snap_id, struct timespec *timestamp) { + auto snap_it = ictx->snap_info.find(snap_id); + ceph_assert(snap_it != ictx->snap_info.end()); + utime_t time = snap_it->second.timestamp; + time.to_timespec(timestamp); + return 0; +} + +template +int Snapshot::get_limit(I *ictx, uint64_t *limit) { + int r = cls_client::snapshot_get_limit(&ictx->md_ctx, ictx->header_oid, + limit); + if (r == -EOPNOTSUPP) { + *limit = UINT64_MAX; + r = 0; + } + return r; +} + +template +int Snapshot::set_limit(I *ictx, uint64_t limit) { + return ictx->operations->snap_set_limit(limit); +} + +template +int Snapshot::is_protected(I *ictx, const char *snap_name, bool *protect) { + ldout(ictx->cct, 20) << "snap_is_protected " << ictx << " " << snap_name + << dendl; + + int r = ictx->state->refresh_if_required(); + if (r < 0) + return r; + + std::shared_lock l{ictx->image_lock}; + snap_t snap_id = ictx->get_snap_id(cls::rbd::UserSnapshotNamespace(), snap_name); + if (snap_id == CEPH_NOSNAP) + return -ENOENT; + bool is_unprotected; + r = ictx->is_snap_unprotected(snap_id, &is_unprotected); + // consider both PROTECTED or UNPROTECTING to be 'protected', + // since in either state they can't be deleted + *protect = !is_unprotected; + return r; +} + +template +int Snapshot::get_namespace(I *ictx, const char *snap_name, + cls::rbd::SnapshotNamespace *snap_namespace) { + ldout(ictx->cct, 20) << "get_snap_namespace " << ictx << " " << snap_name + << dendl; + + int r = ictx->state->refresh_if_required(); + if (r < 0) + return r; + std::shared_lock l{ictx->image_lock}; + snap_t snap_id = ictx->get_snap_id(*snap_namespace, snap_name); + if (snap_id == CEPH_NOSNAP) + return -ENOENT; + r = ictx->get_snap_namespace(snap_id, snap_namespace); + return r; +} + +} // namespace api +} // namespace librbd + +template class librbd::api::Snapshot; diff --git a/src/librbd/api/Snapshot.h b/src/librbd/api/Snapshot.h new file mode 100644 index 000000000..7e06a5a8d --- /dev/null +++ b/src/librbd/api/Snapshot.h @@ -0,0 +1,67 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_API_SNAPSHOT_H +#define CEPH_LIBRBD_API_SNAPSHOT_H + +#include "include/rbd/librbd.hpp" +#include "cls/rbd/cls_rbd_types.h" +#include + +namespace librbd { + +struct ImageCtx; + +namespace api { + +template +struct Snapshot { + + static int get_group_namespace(ImageCtxT *ictx, uint64_t snap_id, + snap_group_namespace_t *group_snap); + + static int get_trash_namespace(ImageCtxT *ictx, uint64_t snap_id, + std::string *original_name); + + static int get_mirror_namespace( + ImageCtxT *ictx, uint64_t snap_id, + snap_mirror_namespace_t *mirror_snap); + + static int get_namespace_type(ImageCtxT *ictx, uint64_t snap_id, + snap_namespace_type_t *namespace_type); + + static int remove(ImageCtxT *ictx, uint64_t snap_id); + + static int get_name(ImageCtxT *ictx, uint64_t snap_id, std::string *snap_name); + + static int get_id(ImageCtxT *ictx, const std::string& snap_name, uint64_t *snap_id); + + static int list(ImageCtxT *ictx, std::vector& snaps); + + static int exists(ImageCtxT *ictx, const cls::rbd::SnapshotNamespace& snap_namespace, + const char *snap_name, bool *exists); + + static int create(ImageCtxT *ictx, const char *snap_name, uint32_t flags, + ProgressContext& pctx); + + static int remove(ImageCtxT *ictx, const char *snap_name, uint32_t flags, ProgressContext& pctx); + + static int get_limit(ImageCtxT *ictx, uint64_t *limit); + + static int set_limit(ImageCtxT *ictx, uint64_t limit); + + static int get_timestamp(ImageCtxT *ictx, uint64_t snap_id, struct timespec *timestamp); + + static int is_protected(ImageCtxT *ictx, const char *snap_name, bool *protect); + + static int get_namespace(ImageCtxT *ictx, const char *snap_name, + cls::rbd::SnapshotNamespace *snap_namespace); + +}; + +} // namespace api +} // namespace librbd + +extern template class librbd::api::Snapshot; + +#endif // CEPH_LIBRBD_API_SNAPSHOT_H diff --git a/src/librbd/api/Trash.cc b/src/librbd/api/Trash.cc new file mode 100644 index 000000000..d8189e8a7 --- /dev/null +++ b/src/librbd/api/Trash.cc @@ -0,0 +1,759 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/api/Trash.h" +#include "include/rados/librados.hpp" +#include "common/dout.h" +#include "common/errno.h" +#include "common/Cond.h" +#include "cls/rbd/cls_rbd_client.h" +#include "librbd/AsioEngine.h" +#include "librbd/ExclusiveLock.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/internal.h" +#include "librbd/Operations.h" +#include "librbd/TrashWatcher.h" +#include "librbd/Utils.h" +#include "librbd/api/DiffIterate.h" +#include "librbd/exclusive_lock/Policy.h" +#include "librbd/image/RemoveRequest.h" +#include "librbd/mirror/DisableRequest.h" +#include "librbd/mirror/EnableRequest.h" +#include "librbd/trash/MoveRequest.h" +#include "librbd/trash/RemoveRequest.h" +#include +#include "librbd/journal/DisabledPolicy.h" +#include "librbd/image/ListWatchersRequest.h" + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::api::Trash: " << __func__ << ": " + +namespace librbd { +namespace api { + +template +const typename Trash::TrashImageSources Trash::ALLOWED_RESTORE_SOURCES { + cls::rbd::TRASH_IMAGE_SOURCE_USER, + cls::rbd::TRASH_IMAGE_SOURCE_MIRRORING, + cls::rbd::TRASH_IMAGE_SOURCE_USER_PARENT + }; + +namespace { + +template +int disable_mirroring(I *ictx) { + ldout(ictx->cct, 10) << dendl; + + C_SaferCond ctx; + auto req = mirror::DisableRequest::create(ictx, false, true, &ctx); + req->send(); + int r = ctx.wait(); + if (r < 0) { + lderr(ictx->cct) << "failed to disable mirroring: " << cpp_strerror(r) + << dendl; + return r; + } + + return 0; +} + +template +int enable_mirroring(IoCtx &io_ctx, const std::string &image_id) { + auto cct = reinterpret_cast(io_ctx.cct()); + + uint64_t features; + uint64_t incompatible_features; + int r = cls_client::get_features(&io_ctx, util::header_name(image_id), true, + &features, &incompatible_features); + if (r < 0) { + lderr(cct) << "failed to retrieve features: " << cpp_strerror(r) << dendl; + return r; + } + + if ((features & RBD_FEATURE_JOURNALING) == 0) { + return 0; + } + + cls::rbd::MirrorMode mirror_mode; + r = cls_client::mirror_mode_get(&io_ctx, &mirror_mode); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "failed to retrieve mirror mode: " << cpp_strerror(r) + << dendl; + return r; + } + + if (mirror_mode != cls::rbd::MIRROR_MODE_POOL) { + ldout(cct, 10) << "not pool mirroring mode" << dendl; + return 0; + } + + ldout(cct, 10) << dendl; + + AsioEngine asio_engine(io_ctx); + + C_SaferCond ctx; + auto req = mirror::EnableRequest::create( + io_ctx, image_id, cls::rbd::MIRROR_IMAGE_MODE_JOURNAL, "", false, + asio_engine.get_work_queue(), &ctx); + req->send(); + r = ctx.wait(); + if (r < 0) { + lderr(cct) << "failed to enable mirroring: " << cpp_strerror(r) + << dendl; + return r; + } + + return 0; +} + +int list_trash_image_specs( + librados::IoCtx &io_ctx, + std::map* trash_image_specs, + bool exclude_user_remove_source) { + CephContext *cct((CephContext *)io_ctx.cct()); + ldout(cct, 20) << "list_trash_image_specs " << &io_ctx << dendl; + + bool more_entries; + uint32_t max_read = 1024; + std::string last_read; + do { + std::map trash_entries; + int r = cls_client::trash_list(&io_ctx, last_read, max_read, + &trash_entries); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "error listing rbd trash entries: " << cpp_strerror(r) + << dendl; + return r; + } else if (r == -ENOENT) { + break; + } + + if (trash_entries.empty()) { + break; + } + + for (const auto &entry : trash_entries) { + if (exclude_user_remove_source && + entry.second.source == cls::rbd::TRASH_IMAGE_SOURCE_REMOVING) { + continue; + } + + trash_image_specs->insert({entry.first, entry.second}); + } + + last_read = trash_entries.rbegin()->first; + more_entries = (trash_entries.size() >= max_read); + } while (more_entries); + + return 0; +} + +} // anonymous namespace + +template +int Trash::move(librados::IoCtx &io_ctx, rbd_trash_image_source_t source, + const std::string &image_name, const std::string &image_id, + uint64_t delay) { + ceph_assert(!image_name.empty() && !image_id.empty()); + CephContext *cct((CephContext *)io_ctx.cct()); + ldout(cct, 20) << &io_ctx << " name=" << image_name << ", id=" << image_id + << dendl; + + auto ictx = new I("", image_id, nullptr, io_ctx, false); + int r = ictx->state->open(OPEN_FLAG_SKIP_OPEN_PARENT); + + if (r < 0 && r != -ENOENT) { + lderr(cct) << "failed to open image: " << cpp_strerror(r) << dendl; + return r; + } + + if (r == 0) { + cls::rbd::MirrorImage mirror_image; + int mirror_r = cls_client::mirror_image_get(&ictx->md_ctx, ictx->id, + &mirror_image); + if (mirror_r == -ENOENT) { + ldout(ictx->cct, 10) << "mirroring is not enabled for this image" + << dendl; + } else if (mirror_r < 0) { + lderr(ictx->cct) << "failed to retrieve mirror image: " + << cpp_strerror(mirror_r) << dendl; + return mirror_r; + } else if (mirror_image.mode == cls::rbd::MIRROR_IMAGE_MODE_SNAPSHOT) { + // a remote rbd-mirror might own the exclusive-lock on this image + // and therefore we need to disable mirroring so that it closes the image + r = disable_mirroring(ictx); + if (r < 0) { + ictx->state->close(); + return r; + } + } + + if (ictx->test_features(RBD_FEATURE_JOURNALING)) { + std::unique_lock image_locker{ictx->image_lock}; + ictx->set_journal_policy(new journal::DisabledPolicy()); + } + + ictx->owner_lock.lock_shared(); + if (ictx->exclusive_lock != nullptr) { + ictx->exclusive_lock->block_requests(0); + + r = ictx->operations->prepare_image_update( + exclusive_lock::OPERATION_REQUEST_TYPE_GENERAL, true); + if (r < 0) { + lderr(cct) << "cannot obtain exclusive lock - not removing" << dendl; + ictx->owner_lock.unlock_shared(); + ictx->state->close(); + return -EBUSY; + } + } + ictx->owner_lock.unlock_shared(); + + ictx->image_lock.lock_shared(); + if (!ictx->migration_info.empty()) { + lderr(cct) << "cannot move migrating image to trash" << dendl; + ictx->image_lock.unlock_shared(); + ictx->state->close(); + return -EBUSY; + } + ictx->image_lock.unlock_shared(); + + if (mirror_r >= 0 && + mirror_image.mode != cls::rbd::MIRROR_IMAGE_MODE_SNAPSHOT) { + r = disable_mirroring(ictx); + if (r < 0) { + ictx->state->close(); + return r; + } + } + + ictx->state->close(); + } + + utime_t delete_time{ceph_clock_now()}; + utime_t deferment_end_time{delete_time}; + deferment_end_time += delay; + cls::rbd::TrashImageSpec trash_image_spec{ + static_cast(source), image_name, + delete_time, deferment_end_time}; + + trash_image_spec.state = cls::rbd::TRASH_IMAGE_STATE_MOVING; + C_SaferCond ctx; + auto req = trash::MoveRequest::create(io_ctx, image_id, trash_image_spec, + &ctx); + req->send(); + + r = ctx.wait(); + trash_image_spec.state = cls::rbd::TRASH_IMAGE_STATE_NORMAL; + int ret = cls_client::trash_state_set(&io_ctx, image_id, + trash_image_spec.state, + cls::rbd::TRASH_IMAGE_STATE_MOVING); + if (ret < 0 && ret != -EOPNOTSUPP) { + lderr(cct) << "error setting trash image state: " + << cpp_strerror(ret) << dendl; + return ret; + } + if (r < 0) { + return r; + } + + C_SaferCond notify_ctx; + TrashWatcher::notify_image_added(io_ctx, image_id, trash_image_spec, + ¬ify_ctx); + r = notify_ctx.wait(); + if (r < 0) { + lderr(cct) << "failed to send update notification: " << cpp_strerror(r) + << dendl; + } + + return 0; +} + +template +int Trash::move(librados::IoCtx &io_ctx, rbd_trash_image_source_t source, + const std::string &image_name, uint64_t delay) { + CephContext *cct((CephContext *)io_ctx.cct()); + ldout(cct, 20) << &io_ctx << " name=" << image_name << dendl; + + // try to get image id from the directory + std::string image_id; + int r = cls_client::dir_get_id(&io_ctx, RBD_DIRECTORY, image_name, + &image_id); + if (r == -ENOENT) { + r = io_ctx.stat(util::old_header_name(image_name), nullptr, nullptr); + if (r == 0) { + // cannot move V1 image to trash + ldout(cct, 10) << "cannot move v1 image to trash" << dendl; + return -EOPNOTSUPP; + } + + // search for an interrupted trash move request + std::map trash_image_specs; + int r = list_trash_image_specs(io_ctx, &trash_image_specs, true); + if (r < 0) { + return r; + } + if (auto found_image = + std::find_if( + trash_image_specs.begin(), trash_image_specs.end(), + [&](const auto& pair) { + const auto& spec = pair.second; + return (spec.source == cls::rbd::TRASH_IMAGE_SOURCE_USER && + spec.state == cls::rbd::TRASH_IMAGE_STATE_MOVING && + spec.name == image_name); + }); + found_image != trash_image_specs.end()) { + image_id = found_image->first; + } else { + return -ENOENT; + } + ldout(cct, 15) << "derived image id " << image_id << " from existing " + << "trash entry" << dendl; + } else if (r < 0) { + lderr(cct) << "failed to retrieve image id: " << cpp_strerror(r) << dendl; + return r; + } + + if (image_name.empty() || image_id.empty()) { + lderr(cct) << "invalid image name/id" << dendl; + return -EINVAL; + } + + return Trash::move(io_ctx, source, image_name, image_id, delay); +} + +template +int Trash::get(IoCtx &io_ctx, const std::string &id, + trash_image_info_t *info) { + CephContext *cct((CephContext *)io_ctx.cct()); + ldout(cct, 20) << __func__ << " " << &io_ctx << dendl; + + cls::rbd::TrashImageSpec spec; + int r = cls_client::trash_get(&io_ctx, id, &spec); + if (r == -ENOENT) { + return r; + } else if (r < 0) { + lderr(cct) << "error retrieving trash entry: " << cpp_strerror(r) + << dendl; + return r; + } + + rbd_trash_image_source_t source = static_cast( + spec.source); + *info = trash_image_info_t{id, spec.name, source, spec.deletion_time.sec(), + spec.deferment_end_time.sec()}; + return 0; +} + +template +int Trash::list(IoCtx &io_ctx, std::vector &entries, + bool exclude_user_remove_source) { + CephContext *cct((CephContext *)io_ctx.cct()); + ldout(cct, 20) << __func__ << " " << &io_ctx << dendl; + + std::map trash_image_specs; + int r = list_trash_image_specs(io_ctx, &trash_image_specs, + exclude_user_remove_source); + if (r < 0) { + return r; + } + + entries.reserve(trash_image_specs.size()); + for (const auto& [image_id, spec] : trash_image_specs) { + rbd_trash_image_source_t source = + static_cast(spec.source); + entries.push_back({image_id, spec.name, source, + spec.deletion_time.sec(), + spec.deferment_end_time.sec()}); + } + + return 0; +} + +template +int Trash::purge(IoCtx& io_ctx, time_t expire_ts, + float threshold, ProgressContext& pctx) { + auto *cct((CephContext *) io_ctx.cct()); + ldout(cct, 20) << &io_ctx << dendl; + + std::vector trash_entries; + int r = librbd::api::Trash::list(io_ctx, trash_entries, true); + if (r < 0) { + return r; + } + + trash_entries.erase( + std::remove_if(trash_entries.begin(), trash_entries.end(), + [](librbd::trash_image_info_t info) { + return info.source != RBD_TRASH_IMAGE_SOURCE_USER && + info.source != RBD_TRASH_IMAGE_SOURCE_USER_PARENT; + }), + trash_entries.end()); + + std::set to_be_removed; + if (threshold != -1) { + if (threshold < 0 || threshold > 1) { + lderr(cct) << "argument 'threshold' is out of valid range" + << dendl; + return -EINVAL; + } + + librados::bufferlist inbl; + librados::bufferlist outbl; + std::string pool_name = io_ctx.get_pool_name(); + + librados::Rados rados(io_ctx); + rados.mon_command(R"({"prefix": "df", "format": "json"})", inbl, + &outbl, nullptr); + + json_spirit::mValue json; + if (!json_spirit::read(outbl.to_str(), json)) { + lderr(cct) << "ceph df json output could not be parsed" + << dendl; + return -EBADMSG; + } + + json_spirit::mArray arr = json.get_obj()["pools"].get_array(); + + double pool_percent_used = 0; + uint64_t pool_total_bytes = 0; + + std::map> datapools; + + std::sort(trash_entries.begin(), trash_entries.end(), + [](librbd::trash_image_info_t a, librbd::trash_image_info_t b) { + return a.deferment_end_time < b.deferment_end_time; + } + ); + + for (const auto &entry : trash_entries) { + int64_t data_pool_id = -1; + r = cls_client::get_data_pool(&io_ctx, util::header_name(entry.id), + &data_pool_id); + if (r < 0 && r != -ENOENT && r != -EOPNOTSUPP) { + lderr(cct) << "failed to query data pool: " << cpp_strerror(r) << dendl; + return r; + } else if (data_pool_id == -1) { + data_pool_id = io_ctx.get_id(); + } + + if (data_pool_id != io_ctx.get_id()) { + librados::IoCtx data_io_ctx; + r = util::create_ioctx(io_ctx, "image", data_pool_id, + {}, &data_io_ctx); + if (r < 0) { + lderr(cct) << "error accessing data pool" << dendl; + continue; + } + auto data_pool = data_io_ctx.get_pool_name(); + datapools[data_pool].push_back(entry.id); + } else { + datapools[pool_name].push_back(entry.id); + } + } + + uint64_t bytes_to_free = 0; + + for (uint8_t i = 0; i < arr.size(); ++i) { + json_spirit::mObject obj = arr[i].get_obj(); + std::string name = obj.find("name")->second.get_str(); + auto img = datapools.find(name); + if (img != datapools.end()) { + json_spirit::mObject stats = arr[i].get_obj()["stats"].get_obj(); + pool_percent_used = stats["percent_used"].get_real(); + if (pool_percent_used <= threshold) continue; + + bytes_to_free = 0; + + pool_total_bytes = stats["max_avail"].get_uint64() + + stats["bytes_used"].get_uint64(); + + auto bytes_threshold = (uint64_t) (pool_total_bytes * + (pool_percent_used - threshold)); + + for (const auto &it : img->second) { + auto ictx = new I("", it, nullptr, io_ctx, false); + r = ictx->state->open(OPEN_FLAG_SKIP_OPEN_PARENT); + if (r == -ENOENT) { + continue; + } else if (r < 0) { + lderr(cct) << "failed to open image " << it << ": " + << cpp_strerror(r) << dendl; + } + + r = librbd::api::DiffIterate::diff_iterate( + ictx, cls::rbd::UserSnapshotNamespace(), nullptr, 0, ictx->size, + false, true, + [](uint64_t offset, size_t len, int exists, void *arg) { + auto *to_free = reinterpret_cast(arg); + if (exists) + (*to_free) += len; + return 0; + }, &bytes_to_free); + + ictx->state->close(); + if (r < 0) { + lderr(cct) << "failed to calculate disk usage for image " << it + << ": " << cpp_strerror(r) << dendl; + continue; + } + + to_be_removed.insert(it); + if (bytes_to_free >= bytes_threshold) { + break; + } + } + } + } + + if (bytes_to_free == 0) { + ldout(cct, 10) << "pool usage is lower than or equal to " + << (threshold * 100) + << "%" << dendl; + return 0; + } + } + + if (expire_ts == 0) { + struct timespec now; + clock_gettime(CLOCK_REALTIME, &now); + expire_ts = now.tv_sec; + } + + for (const auto &entry : trash_entries) { + if (expire_ts >= entry.deferment_end_time) { + to_be_removed.insert(entry.id); + } + } + + NoOpProgressContext remove_pctx; + uint64_t list_size = to_be_removed.size(), i = 0; + int remove_err = 1; + while (!to_be_removed.empty() && remove_err == 1) { + remove_err = 0; + for (auto it = to_be_removed.begin(); it != to_be_removed.end(); ) { + trash_image_info_t trash_info; + r = Trash::get(io_ctx, *it, &trash_info); + if (r == -ENOENT) { + // likely RBD_TRASH_IMAGE_SOURCE_USER_PARENT image removed as a side + // effect of a preceeding remove (last child detach) + pctx.update_progress(++i, list_size); + it = to_be_removed.erase(it); + continue; + } else if (r < 0) { + lderr(cct) << "error getting image id " << *it + << " info: " << cpp_strerror(r) << dendl; + return r; + } + + r = Trash::remove(io_ctx, *it, true, remove_pctx); + if (r == -ENOTEMPTY || r == -EBUSY || r == -EMLINK || r == -EUCLEAN) { + if (!remove_err) { + remove_err = r; + } + ++it; + continue; + } else if (r < 0) { + lderr(cct) << "error removing image id " << *it + << ": " << cpp_strerror(r) << dendl; + return r; + } + pctx.update_progress(++i, list_size); + it = to_be_removed.erase(it); + remove_err = 1; + } + ldout(cct, 20) << "remove_err=" << remove_err << dendl; + } + + if (!to_be_removed.empty()) { + ceph_assert(remove_err < 0); + ldout(cct, 10) << "couldn't remove " << to_be_removed.size() + << " expired images" << dendl; + return remove_err; + } + + return 0; +} + +template +int Trash::remove(IoCtx &io_ctx, const std::string &image_id, bool force, + ProgressContext& prog_ctx) { + CephContext *cct((CephContext *)io_ctx.cct()); + ldout(cct, 20) << "trash_remove " << &io_ctx << " " << image_id + << " " << force << dendl; + + cls::rbd::TrashImageSpec trash_spec; + int r = cls_client::trash_get(&io_ctx, image_id, &trash_spec); + if (r < 0) { + lderr(cct) << "error getting image id " << image_id + << " info from trash: " << cpp_strerror(r) << dendl; + return r; + } + + utime_t now = ceph_clock_now(); + if (now < trash_spec.deferment_end_time && !force) { + lderr(cct) << "error: deferment time has not expired." << dendl; + return -EPERM; + } + if (trash_spec.state == cls::rbd::TRASH_IMAGE_STATE_MOVING) { + lderr(cct) << "error: image is pending moving to the trash." + << dendl; + return -EUCLEAN; + } else if (trash_spec.state != cls::rbd::TRASH_IMAGE_STATE_NORMAL && + trash_spec.state != cls::rbd::TRASH_IMAGE_STATE_REMOVING) { + lderr(cct) << "error: image is pending restoration." << dendl; + return -EBUSY; + } + + AsioEngine asio_engine(io_ctx); + + C_SaferCond cond; + auto req = librbd::trash::RemoveRequest::create( + io_ctx, image_id, asio_engine.get_work_queue(), force, prog_ctx, &cond); + req->send(); + + r = cond.wait(); + if (r < 0) { + return r; + } + + C_SaferCond notify_ctx; + TrashWatcher::notify_image_removed(io_ctx, image_id, ¬ify_ctx); + r = notify_ctx.wait(); + if (r < 0) { + lderr(cct) << "failed to send update notification: " << cpp_strerror(r) + << dendl; + } + + return 0; +} + +template +int Trash::restore(librados::IoCtx &io_ctx, + const TrashImageSources& trash_image_sources, + const std::string &image_id, + const std::string &image_new_name) { + CephContext *cct((CephContext *)io_ctx.cct()); + ldout(cct, 20) << "trash_restore " << &io_ctx << " " << image_id << " " + << image_new_name << dendl; + + cls::rbd::TrashImageSpec trash_spec; + int r = cls_client::trash_get(&io_ctx, image_id, &trash_spec); + if (r < 0) { + lderr(cct) << "error getting image id " << image_id + << " info from trash: " << cpp_strerror(r) << dendl; + return r; + } + + if (trash_image_sources.count(trash_spec.source) == 0) { + lderr(cct) << "Current trash source '" << trash_spec.source << "' " + << "does not match expected: " + << trash_image_sources << dendl; + return -EINVAL; + } + + std::string image_name = image_new_name; + if (trash_spec.state != cls::rbd::TRASH_IMAGE_STATE_NORMAL && + trash_spec.state != cls::rbd::TRASH_IMAGE_STATE_RESTORING) { + lderr(cct) << "error restoring image id " << image_id + << ", which is pending deletion" << dendl; + return -EBUSY; + } + r = cls_client::trash_state_set(&io_ctx, image_id, + cls::rbd::TRASH_IMAGE_STATE_RESTORING, + cls::rbd::TRASH_IMAGE_STATE_NORMAL); + if (r < 0 && r != -EOPNOTSUPP) { + lderr(cct) << "error setting trash image state: " + << cpp_strerror(r) << dendl; + return r; + } + + if (image_name.empty()) { + // if user didn't specify a new name, let's try using the old name + image_name = trash_spec.name; + ldout(cct, 20) << "restoring image id " << image_id << " with name " + << image_name << dendl; + } + + // check if no image exists with the same name + bool create_id_obj = true; + std::string existing_id; + r = cls_client::get_id(&io_ctx, util::id_obj_name(image_name), &existing_id); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "error checking if image " << image_name << " exists: " + << cpp_strerror(r) << dendl; + int ret = cls_client::trash_state_set(&io_ctx, image_id, + cls::rbd::TRASH_IMAGE_STATE_NORMAL, + cls::rbd::TRASH_IMAGE_STATE_RESTORING); + if (ret < 0 && ret != -EOPNOTSUPP) { + lderr(cct) << "error setting trash image state: " + << cpp_strerror(ret) << dendl; + } + return r; + } else if (r != -ENOENT){ + // checking if we are recovering from an incomplete restore + if (existing_id != image_id) { + ldout(cct, 2) << "an image with the same name already exists" << dendl; + int r2 = cls_client::trash_state_set(&io_ctx, image_id, + cls::rbd::TRASH_IMAGE_STATE_NORMAL, + cls::rbd::TRASH_IMAGE_STATE_RESTORING); + if (r2 < 0 && r2 != -EOPNOTSUPP) { + lderr(cct) << "error setting trash image state: " + << cpp_strerror(r2) << dendl; + } + return -EEXIST; + } + create_id_obj = false; + } + + if (create_id_obj) { + ldout(cct, 2) << "adding id object" << dendl; + librados::ObjectWriteOperation op; + op.create(true); + cls_client::set_id(&op, image_id); + r = io_ctx.operate(util::id_obj_name(image_name), &op); + if (r < 0) { + lderr(cct) << "error adding id object for image " << image_name + << ": " << cpp_strerror(r) << dendl; + return r; + } + } + + ldout(cct, 2) << "adding rbd image to v2 directory..." << dendl; + r = cls_client::dir_add_image(&io_ctx, RBD_DIRECTORY, image_name, + image_id); + if (r < 0 && r != -EEXIST) { + lderr(cct) << "error adding image to v2 directory: " + << cpp_strerror(r) << dendl; + return r; + } + + r = enable_mirroring(io_ctx, image_id); + if (r < 0) { + // not fatal -- ignore + } + + ldout(cct, 2) << "removing image from trash..." << dendl; + r = cls_client::trash_remove(&io_ctx, image_id); + if (r < 0 && r != -ENOENT) { + lderr(cct) << "error removing image id " << image_id << " from trash: " + << cpp_strerror(r) << dendl; + return r; + } + + C_SaferCond notify_ctx; + TrashWatcher::notify_image_removed(io_ctx, image_id, ¬ify_ctx); + r = notify_ctx.wait(); + if (r < 0) { + lderr(cct) << "failed to send update notification: " << cpp_strerror(r) + << dendl; + } + + return 0; +} + +} // namespace api +} // namespace librbd + +template class librbd::api::Trash; diff --git a/src/librbd/api/Trash.h b/src/librbd/api/Trash.h new file mode 100644 index 000000000..66f819dfa --- /dev/null +++ b/src/librbd/api/Trash.h @@ -0,0 +1,53 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef LIBRBD_API_TRASH_H +#define LIBRBD_API_TRASH_H + +#include "include/rados/librados_fwd.hpp" +#include "include/rbd/librbd.hpp" +#include "cls/rbd/cls_rbd_types.h" +#include +#include +#include + +namespace librbd { + +class ProgressContext; + +struct ImageCtx; + +namespace api { + +template +struct Trash { + typedef std::set TrashImageSources; + static const TrashImageSources ALLOWED_RESTORE_SOURCES; + + static int move(librados::IoCtx &io_ctx, rbd_trash_image_source_t source, + const std::string &image_name, uint64_t delay); + static int move(librados::IoCtx &io_ctx, rbd_trash_image_source_t source, + const std::string &image_name, const std::string &image_id, + uint64_t delay); + static int get(librados::IoCtx &io_ctx, const std::string &id, + trash_image_info_t *info); + static int list(librados::IoCtx &io_ctx, + std::vector &entries, + bool exclude_user_remove_source); + static int purge(IoCtx& io_ctx, time_t expire_ts, + float threshold, ProgressContext& pctx); + static int remove(librados::IoCtx &io_ctx, const std::string &image_id, + bool force, ProgressContext& prog_ctx); + static int restore(librados::IoCtx &io_ctx, + const TrashImageSources& trash_image_sources, + const std::string &image_id, + const std::string &image_new_name); + +}; + +} // namespace api +} // namespace librbd + +extern template class librbd::api::Trash; + +#endif // LIBRBD_API_TRASH_H diff --git a/src/librbd/api/Utils.cc b/src/librbd/api/Utils.cc new file mode 100644 index 000000000..056b6b435 --- /dev/null +++ b/src/librbd/api/Utils.cc @@ -0,0 +1,102 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/api/Utils.h" +#include "common/dout.h" + +#if defined(HAVE_LIBCRYPTSETUP) +#include "librbd/crypto/luks/LUKSEncryptionFormat.h" +#endif + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::api::util: " << __func__ << ": " + +namespace librbd { +namespace api { +namespace util { + +template +int create_encryption_format( + CephContext* cct, encryption_format_t format, + encryption_options_t opts, size_t opts_size, bool c_api, + crypto::EncryptionFormat** result_format) { + size_t expected_opts_size; + switch (format) { +#if defined(HAVE_LIBCRYPTSETUP) + case RBD_ENCRYPTION_FORMAT_LUKS1: { + if (c_api) { + expected_opts_size = sizeof(rbd_encryption_luks1_format_options_t); + if (expected_opts_size == opts_size) { + auto c_opts = (rbd_encryption_luks1_format_options_t*)opts; + *result_format = new crypto::luks::LUKS1EncryptionFormat( + c_opts->alg, {c_opts->passphrase, c_opts->passphrase_size}); + } + } else { + expected_opts_size = sizeof(encryption_luks1_format_options_t); + if (expected_opts_size == opts_size) { + auto cpp_opts = (encryption_luks1_format_options_t*)opts; + *result_format = new crypto::luks::LUKS1EncryptionFormat( + cpp_opts->alg, cpp_opts->passphrase); + } + } + break; + } + case RBD_ENCRYPTION_FORMAT_LUKS2: { + if (c_api) { + expected_opts_size = sizeof(rbd_encryption_luks2_format_options_t); + if (expected_opts_size == opts_size) { + auto c_opts = (rbd_encryption_luks2_format_options_t*)opts; + *result_format = new crypto::luks::LUKS2EncryptionFormat( + c_opts->alg, {c_opts->passphrase, c_opts->passphrase_size}); + } + } else { + expected_opts_size = sizeof(encryption_luks2_format_options_t); + if (expected_opts_size == opts_size) { + auto cpp_opts = (encryption_luks2_format_options_t*)opts; + *result_format = new crypto::luks::LUKS2EncryptionFormat( + cpp_opts->alg, cpp_opts->passphrase); + } + } + break; + } + case RBD_ENCRYPTION_FORMAT_LUKS: { + if (c_api) { + expected_opts_size = sizeof(rbd_encryption_luks_format_options_t); + if (expected_opts_size == opts_size) { + auto c_opts = (rbd_encryption_luks_format_options_t*)opts; + *result_format = new crypto::luks::LUKSEncryptionFormat( + {c_opts->passphrase, c_opts->passphrase_size}); + } + } else { + expected_opts_size = sizeof(encryption_luks_format_options_t); + if (expected_opts_size == opts_size) { + auto cpp_opts = (encryption_luks_format_options_t*)opts; + *result_format = new crypto::luks::LUKSEncryptionFormat( + cpp_opts->passphrase); + } + } + break; + } +#endif + default: + lderr(cct) << "unsupported encryption format: " << format << dendl; + return -ENOTSUP; + } + + if (expected_opts_size != opts_size) { + lderr(cct) << "expected opts_size: " << expected_opts_size << dendl; + return -EINVAL; + } + + return 0; +} + +} // namespace util +} // namespace api +} // namespace librbd + +template int librbd::api::util::create_encryption_format( + CephContext* cct, encryption_format_t format, encryption_options_t opts, + size_t opts_size, bool c_api, + crypto::EncryptionFormat** result_format); diff --git a/src/librbd/api/Utils.h b/src/librbd/api/Utils.h new file mode 100644 index 000000000..8f8c22290 --- /dev/null +++ b/src/librbd/api/Utils.h @@ -0,0 +1,28 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_API_UTILS_H +#define CEPH_LIBRBD_API_UTILS_H + +#include "include/rbd/librbd.hpp" +#include "librbd/ImageCtx.h" +#include "librbd/crypto/EncryptionFormat.h" + +namespace librbd { + +struct ImageCtx; + +namespace api { +namespace util { + +template +int create_encryption_format( + CephContext* cct, encryption_format_t format, + encryption_options_t opts, size_t opts_size, bool c_api, + crypto::EncryptionFormat** result_format); + +} // namespace util +} // namespace api +} // namespace librbd + +#endif // CEPH_LIBRBD_API_UTILS_H -- cgit v1.2.3