diff options
Diffstat (limited to 'src/librbd/operation')
40 files changed, 7760 insertions, 0 deletions
diff --git a/src/librbd/operation/DisableFeaturesRequest.cc b/src/librbd/operation/DisableFeaturesRequest.cc new file mode 100644 index 00000000..27dfa458 --- /dev/null +++ b/src/librbd/operation/DisableFeaturesRequest.cc @@ -0,0 +1,646 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/operation/DisableFeaturesRequest.h" +#include "common/dout.h" +#include "common/errno.h" +#include "cls/rbd/cls_rbd_client.h" +#include "librbd/ExclusiveLock.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/Journal.h" +#include "librbd/Utils.h" +#include "librbd/image/SetFlagsRequest.h" +#include "librbd/io/ImageRequestWQ.h" +#include "librbd/journal/RemoveRequest.h" +#include "librbd/mirror/DisableRequest.h" +#include "librbd/object_map/RemoveRequest.h" + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::DisableFeaturesRequest: " + +namespace librbd { +namespace operation { + +using util::create_async_context_callback; +using util::create_context_callback; +using util::create_rados_callback; + +template <typename I> +DisableFeaturesRequest<I>::DisableFeaturesRequest(I &image_ctx, + Context *on_finish, + uint64_t journal_op_tid, + uint64_t features, + bool force) + : Request<I>(image_ctx, on_finish, journal_op_tid), m_features(features), + m_force(force) { +} + +template <typename I> +void DisableFeaturesRequest<I>::send_op() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ceph_assert(image_ctx.owner_lock.is_locked()); + + ldout(cct, 20) << this << " " << __func__ << ": features=" << m_features + << dendl; + + send_prepare_lock(); +} + +template <typename I> +bool DisableFeaturesRequest<I>::should_complete(int r) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 20) << this << " " << __func__ << " r=" << r << dendl; + + if (r < 0) { + lderr(cct) << "encountered error: " << cpp_strerror(r) << dendl; + } + return true; +} + +template <typename I> +void DisableFeaturesRequest<I>::send_prepare_lock() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 20) << this << " " << __func__ << dendl; + + image_ctx.state->prepare_lock(create_async_context_callback( + image_ctx, create_context_callback< + DisableFeaturesRequest<I>, + &DisableFeaturesRequest<I>::handle_prepare_lock>(this))); +} + +template <typename I> +Context *DisableFeaturesRequest<I>::handle_prepare_lock(int *result) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 20) << this << " " << __func__ << ": r=" << *result << dendl; + + if (*result < 0) { + lderr(cct) << "failed to lock image: " << cpp_strerror(*result) << dendl; + return this->create_context_finisher(*result); + } + + send_block_writes(); + return nullptr; +} + +template <typename I> +void DisableFeaturesRequest<I>::send_block_writes() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 20) << this << " " << __func__ << dendl; + + RWLock::WLocker locker(image_ctx.owner_lock); + image_ctx.io_work_queue->block_writes(create_context_callback< + DisableFeaturesRequest<I>, + &DisableFeaturesRequest<I>::handle_block_writes>(this)); +} + +template <typename I> +Context *DisableFeaturesRequest<I>::handle_block_writes(int *result) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 20) << this << " " << __func__ << ": r=" << *result << dendl; + + if (*result < 0) { + lderr(cct) << "failed to block writes: " << cpp_strerror(*result) << dendl; + return handle_finish(*result); + } + m_writes_blocked = true; + + { + RWLock::WLocker locker(image_ctx.owner_lock); + // avoid accepting new requests from peers while we manipulate + // the image features + if (image_ctx.exclusive_lock != nullptr && + (image_ctx.journal == nullptr || + !image_ctx.journal->is_journal_replaying())) { + image_ctx.exclusive_lock->block_requests(0); + m_requests_blocked = true; + } + } + + send_acquire_exclusive_lock(); + return nullptr; +} + +template <typename I> +void DisableFeaturesRequest<I>::send_acquire_exclusive_lock() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 20) << this << " " << __func__ << dendl; + + Context *ctx = create_context_callback< + DisableFeaturesRequest<I>, + &DisableFeaturesRequest<I>::handle_acquire_exclusive_lock>(this); + + { + RWLock::WLocker locker(image_ctx.owner_lock); + // if disabling features w/ exclusive lock supported, we need to + // acquire the lock to temporarily block IO against the image + if (image_ctx.exclusive_lock != nullptr && + !image_ctx.exclusive_lock->is_lock_owner()) { + m_acquired_lock = true; + + image_ctx.exclusive_lock->acquire_lock(ctx); + return; + } + } + + ctx->complete(0); +} + +template <typename I> +Context *DisableFeaturesRequest<I>::handle_acquire_exclusive_lock(int *result) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 20) << this << " " << __func__ << ": r=" << *result << dendl; + + image_ctx.owner_lock.get_read(); + if (*result < 0) { + lderr(cct) << "failed to lock image: " << cpp_strerror(*result) << dendl; + image_ctx.owner_lock.put_read(); + return handle_finish(*result); + } else if (image_ctx.exclusive_lock != nullptr && + !image_ctx.exclusive_lock->is_lock_owner()) { + lderr(cct) << "failed to acquire exclusive lock" << dendl; + *result = image_ctx.exclusive_lock->get_unlocked_op_error(); + image_ctx.owner_lock.put_read(); + return handle_finish(*result); + } + + do { + m_features &= image_ctx.features; + + // interlock object-map and fast-diff together + if (((m_features & RBD_FEATURE_OBJECT_MAP) != 0) || + ((m_features & RBD_FEATURE_FAST_DIFF) != 0)) { + m_features |= (RBD_FEATURE_OBJECT_MAP | RBD_FEATURE_FAST_DIFF); + } + + m_new_features = image_ctx.features & ~m_features; + m_features_mask = m_features; + + if ((m_features & RBD_FEATURE_EXCLUSIVE_LOCK) != 0) { + if ((m_new_features & RBD_FEATURE_OBJECT_MAP) != 0 || + (m_new_features & RBD_FEATURE_JOURNALING) != 0) { + lderr(cct) << "cannot disable exclusive-lock. object-map " + "or journaling must be disabled before " + "disabling exclusive-lock." << dendl; + *result = -EINVAL; + break; + } + m_features_mask |= (RBD_FEATURE_OBJECT_MAP | + RBD_FEATURE_FAST_DIFF | + RBD_FEATURE_JOURNALING); + } + if ((m_features & RBD_FEATURE_FAST_DIFF) != 0) { + m_disable_flags |= RBD_FLAG_FAST_DIFF_INVALID; + } + if ((m_features & RBD_FEATURE_OBJECT_MAP) != 0) { + m_disable_flags |= RBD_FLAG_OBJECT_MAP_INVALID; + } + } while (false); + image_ctx.owner_lock.put_read(); + + if (*result < 0) { + return handle_finish(*result); + } + + send_get_mirror_mode(); + return nullptr; +} + +template <typename I> +void DisableFeaturesRequest<I>::send_get_mirror_mode() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + + if ((m_features & RBD_FEATURE_JOURNALING) == 0) { + send_append_op_event(); + return; + } + + ldout(cct, 20) << this << " " << __func__ << dendl; + + librados::ObjectReadOperation op; + cls_client::mirror_mode_get_start(&op); + + using klass = DisableFeaturesRequest<I>; + librados::AioCompletion *comp = + create_rados_callback<klass, &klass::handle_get_mirror_mode>(this); + m_out_bl.clear(); + int r = image_ctx.md_ctx.aio_operate(RBD_MIRRORING, comp, &op, &m_out_bl); + ceph_assert(r == 0); + comp->release(); +} + +template <typename I> +Context *DisableFeaturesRequest<I>::handle_get_mirror_mode(int *result) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 20) << this << " " << __func__ << ": r=" << *result << dendl; + + if (*result == 0) { + auto it = m_out_bl.cbegin(); + *result = cls_client::mirror_mode_get_finish(&it, &m_mirror_mode); + } + + if (*result < 0 && *result != -ENOENT) { + lderr(cct) << "failed to retrieve pool mirror mode: " + << cpp_strerror(*result) << dendl; + return handle_finish(*result); + } + + ldout(cct, 20) << this << " " << __func__ << ": m_mirror_mode=" + << m_mirror_mode << dendl; + + send_get_mirror_image(); + return nullptr; +} + +template <typename I> +void DisableFeaturesRequest<I>::send_get_mirror_image() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + + if (m_mirror_mode != cls::rbd::MIRROR_MODE_IMAGE) { + send_disable_mirror_image(); + return; + } + + ldout(cct, 20) << this << " " << __func__ << dendl; + + librados::ObjectReadOperation op; + cls_client::mirror_image_get_start(&op, image_ctx.id); + + using klass = DisableFeaturesRequest<I>; + librados::AioCompletion *comp = + create_rados_callback<klass, &klass::handle_get_mirror_image>(this); + m_out_bl.clear(); + int r = image_ctx.md_ctx.aio_operate(RBD_MIRRORING, comp, &op, &m_out_bl); + ceph_assert(r == 0); + comp->release(); +} + +template <typename I> +Context *DisableFeaturesRequest<I>::handle_get_mirror_image(int *result) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 20) << this << " " << __func__ << dendl; + + cls::rbd::MirrorImage mirror_image; + + if (*result == 0) { + auto it = m_out_bl.cbegin(); + *result = cls_client::mirror_image_get_finish(&it, &mirror_image); + } + + if (*result < 0 && *result != -ENOENT) { + lderr(cct) << "failed to retrieve pool mirror image: " + << cpp_strerror(*result) << dendl; + return handle_finish(*result); + } + + if ((mirror_image.state == cls::rbd::MIRROR_IMAGE_STATE_ENABLED) && !m_force) { + lderr(cct) << "cannot disable journaling: image mirroring " + << "enabled and mirror pool mode set to image" + << dendl; + *result = -EINVAL; + return handle_finish(*result); + } + + send_disable_mirror_image(); + return nullptr; +} + +template <typename I> +void DisableFeaturesRequest<I>::send_disable_mirror_image() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + + ldout(cct, 20) << this << " " << __func__ << dendl; + + Context *ctx = create_context_callback< + DisableFeaturesRequest<I>, + &DisableFeaturesRequest<I>::handle_disable_mirror_image>(this); + + mirror::DisableRequest<I> *req = + mirror::DisableRequest<I>::create(&image_ctx, m_force, true, ctx); + req->send(); +} + +template <typename I> +Context *DisableFeaturesRequest<I>::handle_disable_mirror_image(int *result) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 20) << this << " " << __func__ << ": r=" << *result << dendl; + + if (*result < 0) { + lderr(cct) << "failed to disable image mirroring: " << cpp_strerror(*result) + << dendl; + // not fatal + } + + send_close_journal(); + return nullptr; +} + +template <typename I> +void DisableFeaturesRequest<I>::send_close_journal() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + + { + RWLock::WLocker locker(image_ctx.owner_lock); + if (image_ctx.journal != nullptr) { + ldout(cct, 20) << this << " " << __func__ << dendl; + + std::swap(m_journal, image_ctx.journal); + Context *ctx = create_context_callback< + DisableFeaturesRequest<I>, + &DisableFeaturesRequest<I>::handle_close_journal>(this); + + m_journal->close(ctx); + return; + } + } + + send_remove_journal(); +} + +template <typename I> +Context *DisableFeaturesRequest<I>::handle_close_journal(int *result) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 20) << this << " " << __func__ << ": r=" << *result << dendl; + + if (*result < 0) { + lderr(cct) << "failed to close image journal: " << cpp_strerror(*result) + << dendl; + } + + ceph_assert(m_journal != nullptr); + delete m_journal; + m_journal = nullptr; + + send_remove_journal(); + return nullptr; +} + +template <typename I> +void DisableFeaturesRequest<I>::send_remove_journal() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 20) << this << " " << __func__ << dendl; + + Context *ctx = create_context_callback< + DisableFeaturesRequest<I>, + &DisableFeaturesRequest<I>::handle_remove_journal>(this); + + journal::RemoveRequest<I> *req = journal::RemoveRequest<I>::create( + image_ctx.md_ctx, image_ctx.id, librbd::Journal<>::IMAGE_CLIENT_ID, + image_ctx.op_work_queue, ctx); + + req->send(); +} + +template <typename I> +Context *DisableFeaturesRequest<I>::handle_remove_journal(int *result) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 20) << this << " " << __func__ << ": r=" << *result << dendl; + + if (*result < 0) { + lderr(cct) << "failed to remove image journal: " << cpp_strerror(*result) + << dendl; + return handle_finish(*result); + } + + send_append_op_event(); + return nullptr; +} + +template <typename I> +void DisableFeaturesRequest<I>::send_append_op_event() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + + if (!this->template append_op_event< + DisableFeaturesRequest<I>, + &DisableFeaturesRequest<I>::handle_append_op_event>(this)) { + send_remove_object_map(); + } + + ldout(cct, 20) << this << " " << __func__ << dendl; +} + +template <typename I> +Context *DisableFeaturesRequest<I>::handle_append_op_event(int *result) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 20) << this << " " << __func__ << ": r=" << *result << dendl; + + if (*result < 0) { + lderr(cct) << "failed to commit journal entry: " << cpp_strerror(*result) + << dendl; + return handle_finish(*result); + } + + send_remove_object_map(); + return nullptr; +} + +template <typename I> +void DisableFeaturesRequest<I>::send_remove_object_map() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 20) << this << " " << __func__ << dendl; + + if ((m_features & RBD_FEATURE_OBJECT_MAP) == 0) { + send_set_features(); + return; + } + + Context *ctx = create_context_callback< + DisableFeaturesRequest<I>, + &DisableFeaturesRequest<I>::handle_remove_object_map>(this); + + object_map::RemoveRequest<I> *req = + object_map::RemoveRequest<I>::create(&image_ctx, ctx); + req->send(); +} + +template <typename I> +Context *DisableFeaturesRequest<I>::handle_remove_object_map(int *result) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 20) << this << " " << __func__ << ": r=" << *result << dendl; + + if (*result < 0 && *result != -ENOENT) { + lderr(cct) << "failed to remove object map: " << cpp_strerror(*result) << dendl; + return handle_finish(*result); + } + + send_set_features(); + return nullptr; +} + +template <typename I> +void DisableFeaturesRequest<I>::send_set_features() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 20) << this << " " << __func__ << ": new_features=" + << m_new_features << ", features_mask=" << m_features_mask + << dendl; + + librados::ObjectWriteOperation op; + librbd::cls_client::set_features(&op, m_new_features, m_features_mask); + + using klass = DisableFeaturesRequest<I>; + librados::AioCompletion *comp = + create_rados_callback<klass, &klass::handle_set_features>(this); + int r = image_ctx.md_ctx.aio_operate(image_ctx.header_oid, comp, &op); + ceph_assert(r == 0); + comp->release(); +} + +template <typename I> +Context *DisableFeaturesRequest<I>::handle_set_features(int *result) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 20) << this << " " << __func__ << ": r=" << *result << dendl; + + if (*result == -EINVAL && (m_features_mask & RBD_FEATURE_JOURNALING) != 0) { + // NOTE: infernalis OSDs will not accept a mask with new features, so + // re-attempt with a reduced mask. + ldout(cct, 5) << this << " " << __func__ + << ": re-attempt with a reduced mask" << dendl; + m_features_mask &= ~RBD_FEATURE_JOURNALING; + send_set_features(); + } + + if (*result < 0) { + lderr(cct) << "failed to update features: " << cpp_strerror(*result) + << dendl; + return handle_finish(*result); + } + + send_update_flags(); + return nullptr; +} + +template <typename I> +void DisableFeaturesRequest<I>::send_update_flags() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + + if (m_disable_flags == 0) { + send_notify_update(); + return; + } + + ldout(cct, 20) << this << " " << __func__ << ": disable_flags=" + << m_disable_flags << dendl; + + Context *ctx = create_context_callback< + DisableFeaturesRequest<I>, + &DisableFeaturesRequest<I>::handle_update_flags>(this); + + image::SetFlagsRequest<I> *req = + image::SetFlagsRequest<I>::create(&image_ctx, 0, m_disable_flags, ctx); + req->send(); +} + +template <typename I> +Context *DisableFeaturesRequest<I>::handle_update_flags(int *result) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 20) << this << " " << __func__ << ": r=" << *result << dendl; + + if (*result < 0) { + lderr(cct) << "failed to update image flags: " << cpp_strerror(*result) + << dendl; + return handle_finish(*result); + } + + send_notify_update(); + return nullptr; +} + +template <typename I> +void DisableFeaturesRequest<I>::send_notify_update() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 20) << this << " " << __func__ << dendl; + + Context *ctx = create_context_callback< + DisableFeaturesRequest<I>, + &DisableFeaturesRequest<I>::handle_notify_update>(this); + + image_ctx.notify_update(ctx); +} + +template <typename I> +Context *DisableFeaturesRequest<I>::handle_notify_update(int *result) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 20) << this << " " << __func__ << ": r=" << *result << dendl; + + if (image_ctx.exclusive_lock == nullptr || !m_acquired_lock) { + return handle_finish(*result); + } + + send_release_exclusive_lock(); + return nullptr; +} + +template <typename I> +void DisableFeaturesRequest<I>::send_release_exclusive_lock() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 20) << this << " " << __func__ << dendl; + + Context *ctx = create_context_callback< + DisableFeaturesRequest<I>, + &DisableFeaturesRequest<I>::handle_release_exclusive_lock>(this); + + image_ctx.exclusive_lock->release_lock(ctx); +} + +template <typename I> +Context *DisableFeaturesRequest<I>::handle_release_exclusive_lock(int *result) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 20) << this << " " << __func__ << ": r=" << *result << dendl; + + return handle_finish(*result); +} + +template <typename I> +Context *DisableFeaturesRequest<I>::handle_finish(int r) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 20) << this << " " << __func__ << ": r=" << r << dendl; + + { + RWLock::WLocker locker(image_ctx.owner_lock); + if (image_ctx.exclusive_lock != nullptr && m_requests_blocked) { + image_ctx.exclusive_lock->unblock_requests(); + } + + image_ctx.io_work_queue->unblock_writes(); + } + image_ctx.state->handle_prepare_lock_complete(); + + return this->create_context_finisher(r); +} + +} // namespace operation +} // namespace librbd + +template class librbd::operation::DisableFeaturesRequest<librbd::ImageCtx>; diff --git a/src/librbd/operation/DisableFeaturesRequest.h b/src/librbd/operation/DisableFeaturesRequest.h new file mode 100644 index 00000000..b064bc45 --- /dev/null +++ b/src/librbd/operation/DisableFeaturesRequest.h @@ -0,0 +1,171 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_OPERATION_DISABLE_FEATURES_REQUEST_H +#define CEPH_LIBRBD_OPERATION_DISABLE_FEATURES_REQUEST_H + +#include "librbd/ImageCtx.h" +#include "librbd/operation/Request.h" +#include "cls/rbd/cls_rbd_client.h" + +class Context; + +namespace librbd { + +class ImageCtx; + +namespace operation { + +template <typename ImageCtxT = ImageCtx> +class DisableFeaturesRequest : public Request<ImageCtxT> { +public: + static DisableFeaturesRequest *create(ImageCtxT &image_ctx, Context *on_finish, + uint64_t journal_op_tid, + uint64_t features, bool force) { + return new DisableFeaturesRequest(image_ctx, on_finish, journal_op_tid, + features, force); + } + + DisableFeaturesRequest(ImageCtxT &image_ctx, Context *on_finish, + uint64_t journal_op_tid, uint64_t features, bool force); + +protected: + void send_op() override; + bool should_complete(int r) override; + bool can_affect_io() const override { + return true; + } + journal::Event create_event(uint64_t op_tid) const override { + return journal::UpdateFeaturesEvent(op_tid, m_features, false); + } + +private: + /** + * DisableFeatures goes through the following state machine: + * + * @verbatim + * + * <start> + * | + * v + * STATE_PREPARE_LOCK + * | + * v + * STATE_BLOCK_WRITES + * | + * v + * STATE_ACQUIRE_EXCLUSIVE_LOCK (skip if not + * | required) + * | (disbling journaling) + * \-------------------\ + * | | + * | V + * | STATE_GET_MIRROR_MODE + * |(not | + * | disabling v + * | journaling) STATE_GET_MIRROR_IMAGE + * | | + * | v + * | STATE_DISABLE_MIRROR_IMAGE (skip if not + * | | required) + * | v + * | STATE_CLOSE_JOURNAL + * | | + * | v + * | STATE_REMOVE_JOURNAL + * | | + * |/-------------------/ + * | + * v + * STATE_APPEND_OP_EVENT (skip if journaling + * | disabled) + * v + * STATE_REMOVE_OBJECT_MAP (skip if not + * | disabling object map) + * v + * STATE_SET_FEATURES + * | + * v + * STATE_UPDATE_FLAGS + * | + * v + * STATE_NOTIFY_UPDATE + * | + * v + * STATE_REALEASE_EXCLUSIVE_LOCK (skip if not + * | required) + * | (unblock writes) + * v + * <finish> + * + * @endverbatim + * + */ + + uint64_t m_features; + bool m_force; + + bool m_acquired_lock = false; + bool m_writes_blocked = false; + bool m_snap_lock_acquired = false; + bool m_requests_blocked = false; + + uint64_t m_new_features = 0; + uint64_t m_disable_flags = 0; + uint64_t m_features_mask = 0; + + decltype(ImageCtxT::journal) m_journal = nullptr; + cls::rbd::MirrorMode m_mirror_mode = cls::rbd::MIRROR_MODE_DISABLED; + bufferlist m_out_bl; + + void send_prepare_lock(); + Context *handle_prepare_lock(int *result); + + void send_block_writes(); + Context *handle_block_writes(int *result); + + void send_acquire_exclusive_lock(); + Context *handle_acquire_exclusive_lock(int *result); + + void send_get_mirror_mode(); + Context *handle_get_mirror_mode(int *result); + + void send_get_mirror_image(); + Context *handle_get_mirror_image(int *result); + + void send_disable_mirror_image(); + Context *handle_disable_mirror_image(int *result); + + void send_close_journal(); + Context *handle_close_journal(int *result); + + void send_remove_journal(); + Context *handle_remove_journal(int *result); + + void send_append_op_event(); + Context *handle_append_op_event(int *result); + + void send_remove_object_map(); + Context *handle_remove_object_map(int *result); + + void send_set_features(); + Context *handle_set_features(int *result); + + void send_update_flags(); + Context *handle_update_flags(int *result); + + void send_notify_update(); + Context *handle_notify_update(int *result); + + void send_release_exclusive_lock(); + Context *handle_release_exclusive_lock(int *result); + + Context *handle_finish(int r); +}; + +} // namespace operation +} // namespace librbd + +extern template class librbd::operation::DisableFeaturesRequest<librbd::ImageCtx>; + +#endif // CEPH_LIBRBD_OPERATION_DISABLE_FEATURES_REQUEST_H diff --git a/src/librbd/operation/EnableFeaturesRequest.cc b/src/librbd/operation/EnableFeaturesRequest.cc new file mode 100644 index 00000000..e2c1113d --- /dev/null +++ b/src/librbd/operation/EnableFeaturesRequest.cc @@ -0,0 +1,489 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/operation/EnableFeaturesRequest.h" +#include "common/dout.h" +#include "common/errno.h" +#include "librbd/ExclusiveLock.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/Journal.h" +#include "librbd/Utils.h" +#include "librbd/image/SetFlagsRequest.h" +#include "librbd/io/ImageRequestWQ.h" +#include "librbd/journal/CreateRequest.h" +#include "librbd/mirror/EnableRequest.h" +#include "librbd/object_map/CreateRequest.h" + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::EnableFeaturesRequest: " + +namespace librbd { +namespace operation { + +using util::create_async_context_callback; +using util::create_context_callback; +using util::create_rados_callback; + +template <typename I> +EnableFeaturesRequest<I>::EnableFeaturesRequest(I &image_ctx, + Context *on_finish, + uint64_t journal_op_tid, + uint64_t features) + : Request<I>(image_ctx, on_finish, journal_op_tid), m_features(features) { +} + +template <typename I> +void EnableFeaturesRequest<I>::send_op() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ceph_assert(image_ctx.owner_lock.is_locked()); + + ldout(cct, 20) << this << " " << __func__ << ": features=" << m_features + << dendl; + send_prepare_lock(); +} + +template <typename I> +bool EnableFeaturesRequest<I>::should_complete(int r) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 20) << this << " " << __func__ << " r=" << r << dendl; + + if (r < 0) { + lderr(cct) << "encountered error: " << cpp_strerror(r) << dendl; + } + return true; +} + +template <typename I> +void EnableFeaturesRequest<I>::send_prepare_lock() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 20) << this << " " << __func__ << dendl; + + image_ctx.state->prepare_lock(create_async_context_callback( + image_ctx, create_context_callback< + EnableFeaturesRequest<I>, + &EnableFeaturesRequest<I>::handle_prepare_lock>(this))); +} + +template <typename I> +Context *EnableFeaturesRequest<I>::handle_prepare_lock(int *result) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 20) << this << " " << __func__ << ": r=" << *result << dendl; + + if (*result < 0) { + lderr(cct) << "failed to lock image: " << cpp_strerror(*result) << dendl; + return this->create_context_finisher(*result); + } + + send_block_writes(); + return nullptr; +} + +template <typename I> +void EnableFeaturesRequest<I>::send_block_writes() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 20) << this << " " << __func__ << dendl; + + RWLock::WLocker locker(image_ctx.owner_lock); + image_ctx.io_work_queue->block_writes(create_context_callback< + EnableFeaturesRequest<I>, + &EnableFeaturesRequest<I>::handle_block_writes>(this)); +} + +template <typename I> +Context *EnableFeaturesRequest<I>::handle_block_writes(int *result) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 20) << this << " " << __func__ << ": r=" << *result << dendl; + + if (*result < 0) { + lderr(cct) << "failed to block writes: " << cpp_strerror(*result) << dendl; + return handle_finish(*result); + } + m_writes_blocked = true; + + send_get_mirror_mode(); + return nullptr; +} + +template <typename I> +void EnableFeaturesRequest<I>::send_get_mirror_mode() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + + if ((m_features & RBD_FEATURE_JOURNALING) == 0) { + Context *ctx = create_context_callback< + EnableFeaturesRequest<I>, + &EnableFeaturesRequest<I>::handle_get_mirror_mode>(this); + ctx->complete(-ENOENT); + return; + } + + ldout(cct, 20) << this << " " << __func__ << dendl; + + librados::ObjectReadOperation op; + cls_client::mirror_mode_get_start(&op); + + using klass = EnableFeaturesRequest<I>; + librados::AioCompletion *comp = + create_rados_callback<klass, &klass::handle_get_mirror_mode>(this); + m_out_bl.clear(); + int r = image_ctx.md_ctx.aio_operate(RBD_MIRRORING, comp, &op, &m_out_bl); + ceph_assert(r == 0); + comp->release(); +} + +template <typename I> +Context *EnableFeaturesRequest<I>::handle_get_mirror_mode(int *result) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 20) << this << " " << __func__ << ": r=" << *result << dendl; + + cls::rbd::MirrorMode mirror_mode = cls::rbd::MIRROR_MODE_DISABLED; + if (*result == 0) { + auto it = m_out_bl.cbegin(); + *result = cls_client::mirror_mode_get_finish(&it, &mirror_mode); + } else if (*result == -ENOENT) { + *result = 0; + } + + if (*result < 0) { + lderr(cct) << "failed to retrieve pool mirror mode: " + << cpp_strerror(*result) << dendl; + return handle_finish(*result); + } + + m_enable_mirroring = (mirror_mode == cls::rbd::MIRROR_MODE_POOL); + + bool create_journal = false; + do { + RWLock::WLocker locker(image_ctx.owner_lock); + + // avoid accepting new requests from peers while we manipulate + // the image features + if (image_ctx.exclusive_lock != nullptr && + (image_ctx.journal == nullptr || + !image_ctx.journal->is_journal_replaying())) { + image_ctx.exclusive_lock->block_requests(0); + m_requests_blocked = true; + } + + m_features &= ~image_ctx.features; + + // interlock object-map and fast-diff together + if (((m_features & RBD_FEATURE_OBJECT_MAP) != 0) || + ((m_features & RBD_FEATURE_FAST_DIFF) != 0)) { + m_features |= (RBD_FEATURE_OBJECT_MAP | RBD_FEATURE_FAST_DIFF); + } + + m_new_features = image_ctx.features | m_features; + m_features_mask = m_features; + + if ((m_features & RBD_FEATURE_OBJECT_MAP) != 0) { + if ((m_new_features & RBD_FEATURE_EXCLUSIVE_LOCK) == 0) { + lderr(cct) << "cannot enable object-map. exclusive-lock must be " + "enabled before enabling object-map." << dendl; + *result = -EINVAL; + break; + } + m_enable_flags |= RBD_FLAG_OBJECT_MAP_INVALID; + m_features_mask |= (RBD_FEATURE_EXCLUSIVE_LOCK | RBD_FEATURE_FAST_DIFF); + } + if ((m_features & RBD_FEATURE_FAST_DIFF) != 0) { + m_enable_flags |= RBD_FLAG_FAST_DIFF_INVALID; + m_features_mask |= (RBD_FEATURE_EXCLUSIVE_LOCK | RBD_FEATURE_OBJECT_MAP); + } + + if ((m_features & RBD_FEATURE_JOURNALING) != 0) { + if ((m_new_features & RBD_FEATURE_EXCLUSIVE_LOCK) == 0) { + lderr(cct) << "cannot enable journaling. exclusive-lock must be " + "enabled before enabling journaling." << dendl; + *result = -EINVAL; + break; + } + m_features_mask |= RBD_FEATURE_EXCLUSIVE_LOCK; + create_journal = true; + } + } while (false); + + if (*result < 0) { + return handle_finish(*result); + } + if (create_journal) { + send_create_journal(); + return nullptr; + } + send_append_op_event(); + return nullptr; +} + +template <typename I> +void EnableFeaturesRequest<I>::send_create_journal() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + + ldout(cct, 20) << this << " " << __func__ << dendl; + + journal::TagData tag_data(librbd::Journal<>::LOCAL_MIRROR_UUID); + Context *ctx = create_context_callback< + EnableFeaturesRequest<I>, + &EnableFeaturesRequest<I>::handle_create_journal>(this); + + journal::CreateRequest<I> *req = journal::CreateRequest<I>::create( + image_ctx.md_ctx, image_ctx.id, + image_ctx.config.template get_val<uint64_t>("rbd_journal_order"), + image_ctx.config.template get_val<uint64_t>("rbd_journal_splay_width"), + image_ctx.config.template get_val<std::string>("rbd_journal_pool"), + cls::journal::Tag::TAG_CLASS_NEW, tag_data, + librbd::Journal<>::IMAGE_CLIENT_ID, image_ctx.op_work_queue, ctx); + + req->send(); +} + +template <typename I> +Context *EnableFeaturesRequest<I>::handle_create_journal(int *result) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 20) << this << " " << __func__ << ": r=" << *result << dendl; + + if (*result < 0) { + lderr(cct) << "failed to create journal: " << cpp_strerror(*result) + << dendl; + return handle_finish(*result); + } + + send_append_op_event(); + return nullptr; +} + +template <typename I> +void EnableFeaturesRequest<I>::send_append_op_event() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + + if (!this->template append_op_event< + EnableFeaturesRequest<I>, + &EnableFeaturesRequest<I>::handle_append_op_event>(this)) { + send_update_flags(); + } + + ldout(cct, 20) << this << " " << __func__ << dendl; +} + +template <typename I> +Context *EnableFeaturesRequest<I>::handle_append_op_event(int *result) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 20) << this << " " << __func__ << ": r=" << *result << dendl; + + if (*result < 0) { + lderr(cct) << "failed to commit journal entry: " << cpp_strerror(*result) + << dendl; + return handle_finish(*result); + } + + send_update_flags(); + return nullptr; +} + +template <typename I> +void EnableFeaturesRequest<I>::send_update_flags() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + + if (m_enable_flags == 0) { + send_set_features(); + return; + } + + ldout(cct, 20) << this << " " << __func__ << ": enable_flags=" + << m_enable_flags << dendl; + + Context *ctx = create_context_callback< + EnableFeaturesRequest<I>, + &EnableFeaturesRequest<I>::handle_update_flags>(this); + + image::SetFlagsRequest<I> *req = + image::SetFlagsRequest<I>::create(&image_ctx, m_enable_flags, + m_enable_flags, ctx); + req->send(); +} + +template <typename I> +Context *EnableFeaturesRequest<I>::handle_update_flags(int *result) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 20) << this << " " << __func__ << ": r=" << *result << dendl; + + if (*result < 0) { + lderr(cct) << "failed to update image flags: " << cpp_strerror(*result) + << dendl; + return handle_finish(*result); + } + + send_set_features(); + return nullptr; +} + +template <typename I> +void EnableFeaturesRequest<I>::send_set_features() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 20) << this << " " << __func__ << ": new_features=" + << m_new_features << ", features_mask=" << m_features_mask + << dendl; + + librados::ObjectWriteOperation op; + librbd::cls_client::set_features(&op, m_new_features, m_features_mask); + + using klass = EnableFeaturesRequest<I>; + librados::AioCompletion *comp = + create_rados_callback<klass, &klass::handle_set_features>(this); + int r = image_ctx.md_ctx.aio_operate(image_ctx.header_oid, comp, &op); + ceph_assert(r == 0); + comp->release(); +} + +template <typename I> +Context *EnableFeaturesRequest<I>::handle_set_features(int *result) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 20) << this << " " << __func__ << ": r=" << *result << dendl; + + if (*result < 0) { + lderr(cct) << "failed to update features: " << cpp_strerror(*result) + << dendl; + return handle_finish(*result); + } + + send_create_object_map(); + return nullptr; +} + +template <typename I> +void EnableFeaturesRequest<I>::send_create_object_map() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + + if (((image_ctx.features & RBD_FEATURE_OBJECT_MAP) != 0) || + ((m_features & RBD_FEATURE_OBJECT_MAP) == 0)) { + send_enable_mirror_image(); + return; + } + + ldout(cct, 20) << this << " " << __func__ << dendl; + + Context *ctx = create_context_callback< + EnableFeaturesRequest<I>, + &EnableFeaturesRequest<I>::handle_create_object_map>(this); + + object_map::CreateRequest<I> *req = + object_map::CreateRequest<I>::create(&image_ctx, ctx); + req->send(); +} + +template <typename I> +Context *EnableFeaturesRequest<I>::handle_create_object_map(int *result) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 20) << this << " " << __func__ << ": r=" << *result << dendl; + + if (*result < 0) { + lderr(cct) << "failed to create object map: " << cpp_strerror(*result) + << dendl; + return handle_finish(*result); + } + + send_enable_mirror_image(); + return nullptr; +} + +template <typename I> +void EnableFeaturesRequest<I>::send_enable_mirror_image() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 20) << this << " " << __func__ << dendl; + + if (!m_enable_mirroring) { + send_notify_update(); + return; + } + + Context *ctx = create_context_callback< + EnableFeaturesRequest<I>, + &EnableFeaturesRequest<I>::handle_enable_mirror_image>(this); + + mirror::EnableRequest<I> *req = + mirror::EnableRequest<I>::create(&image_ctx, ctx); + req->send(); +} + +template <typename I> +Context *EnableFeaturesRequest<I>::handle_enable_mirror_image(int *result) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 20) << this << " " << __func__ << ": r=" << *result << dendl; + + if (*result < 0) { + lderr(cct) << "failed to enable mirroring: " << cpp_strerror(*result) + << dendl; + // not fatal + } + + send_notify_update(); + return nullptr; +} + +template <typename I> +void EnableFeaturesRequest<I>::send_notify_update() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 20) << this << " " << __func__ << dendl; + + Context *ctx = create_context_callback< + EnableFeaturesRequest<I>, + &EnableFeaturesRequest<I>::handle_notify_update>(this); + + image_ctx.notify_update(ctx); +} + +template <typename I> +Context *EnableFeaturesRequest<I>::handle_notify_update(int *result) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 20) << this << " " << __func__ << ": r=" << *result << dendl; + + return handle_finish(*result); +} + +template <typename I> +Context *EnableFeaturesRequest<I>::handle_finish(int r) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 20) << this << " " << __func__ << ": r=" << r << dendl; + + { + RWLock::WLocker locker(image_ctx.owner_lock); + + if (image_ctx.exclusive_lock != nullptr && m_requests_blocked) { + image_ctx.exclusive_lock->unblock_requests(); + } + if (m_writes_blocked) { + image_ctx.io_work_queue->unblock_writes(); + } + } + image_ctx.state->handle_prepare_lock_complete(); + + return this->create_context_finisher(r); +} + +} // namespace operation +} // namespace librbd + +template class librbd::operation::EnableFeaturesRequest<librbd::ImageCtx>; diff --git a/src/librbd/operation/EnableFeaturesRequest.h b/src/librbd/operation/EnableFeaturesRequest.h new file mode 100644 index 00000000..1c91b4dc --- /dev/null +++ b/src/librbd/operation/EnableFeaturesRequest.h @@ -0,0 +1,135 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_OPERATION_ENABLE_FEATURES_REQUEST_H +#define CEPH_LIBRBD_OPERATION_ENABLE_FEATURES_REQUEST_H + +#include "librbd/operation/Request.h" + +class Context; + +namespace librbd { + +class ImageCtx; + +namespace operation { + +template <typename ImageCtxT = ImageCtx> +class EnableFeaturesRequest : public Request<ImageCtxT> { +public: + static EnableFeaturesRequest *create(ImageCtxT &image_ctx, Context *on_finish, + uint64_t journal_op_tid, + uint64_t features) { + return new EnableFeaturesRequest(image_ctx, on_finish, journal_op_tid, + features); + } + + EnableFeaturesRequest(ImageCtxT &image_ctx, Context *on_finish, + uint64_t journal_op_tid, uint64_t features); + +protected: + void send_op() override; + bool should_complete(int r) override; + bool can_affect_io() const override { + return true; + } + journal::Event create_event(uint64_t op_tid) const override { + return journal::UpdateFeaturesEvent(op_tid, m_features, true); + } + +private: + /** + * EnableFeatures goes through the following state machine: + * + * @verbatim + * + * <start> + * | + * v + * STATE_PREPARE_LOCK + * | + * v + * STATE_BLOCK_WRITES + * | + * v + * STATE_GET_MIRROR_MODE + * | + * v + * STATE_CREATE_JOURNAL (skip if not + * | required) + * v + * STATE_APPEND_OP_EVENT (skip if journaling + * | disabled) + * v + * STATE_UPDATE_FLAGS + * | + * v + * STATE_SET_FEATURES + * | + * v + * STATE_CREATE_OBJECT_MAP (skip if not + * | required) + * v + * STATE_ENABLE_MIRROR_IMAGE + * | + * V + * STATE_NOTIFY_UPDATE + * | + * | (unblock writes) + * v + * <finish> + * @endverbatim + * + */ + + uint64_t m_features; + + bool m_enable_mirroring = false; + bool m_requests_blocked = false; + bool m_writes_blocked = false; + + uint64_t m_new_features = 0; + uint64_t m_enable_flags = 0; + uint64_t m_features_mask = 0; + + bufferlist m_out_bl; + + void send_prepare_lock(); + Context *handle_prepare_lock(int *result); + + void send_block_writes(); + Context *handle_block_writes(int *result); + + void send_get_mirror_mode(); + Context *handle_get_mirror_mode(int *result); + + void send_create_journal(); + Context *handle_create_journal(int *result); + + void send_append_op_event(); + Context *handle_append_op_event(int *result); + + void send_update_flags(); + Context *handle_update_flags(int *result); + + void send_set_features(); + Context *handle_set_features(int *result); + + void send_create_object_map(); + Context *handle_create_object_map(int *result); + + void send_enable_mirror_image(); + Context *handle_enable_mirror_image(int *result); + + void send_notify_update(); + Context *handle_notify_update(int *result); + + Context *handle_finish(int r); +}; + +} // namespace operation +} // namespace librbd + +extern template class librbd::operation::EnableFeaturesRequest<librbd::ImageCtx>; + +#endif // CEPH_LIBRBD_OPERATION_ENABLE_FEATURES_REQUEST_H diff --git a/src/librbd/operation/FlattenRequest.cc b/src/librbd/operation/FlattenRequest.cc new file mode 100644 index 00000000..af21bd4e --- /dev/null +++ b/src/librbd/operation/FlattenRequest.cc @@ -0,0 +1,230 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/operation/FlattenRequest.h" +#include "librbd/AsyncObjectThrottle.h" +#include "librbd/ExclusiveLock.h" +#include "librbd/ImageCtx.h" +#include "librbd/image/DetachChildRequest.h" +#include "librbd/image/DetachParentRequest.h" +#include "librbd/Types.h" +#include "librbd/io/ObjectRequest.h" +#include "common/dout.h" +#include "common/errno.h" +#include <boost/lambda/bind.hpp> +#include <boost/lambda/construct.hpp> + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::operation::FlattenRequest: " << this \ + << " " << __func__ << ": " + +namespace librbd { +namespace operation { + +using util::create_context_callback; +using util::create_rados_callback; + +template <typename I> +class C_FlattenObject : public C_AsyncObjectThrottle<I> { +public: + C_FlattenObject(AsyncObjectThrottle<I> &throttle, I *image_ctx, + ::SnapContext snapc, uint64_t object_no) + : C_AsyncObjectThrottle<I>(throttle, *image_ctx), m_snapc(snapc), + m_object_no(object_no) { + } + + int send() override { + I &image_ctx = this->m_image_ctx; + ceph_assert(image_ctx.owner_lock.is_locked()); + CephContext *cct = image_ctx.cct; + + if (image_ctx.exclusive_lock != nullptr && + !image_ctx.exclusive_lock->is_lock_owner()) { + ldout(cct, 1) << "lost exclusive lock during flatten" << dendl; + return -ERESTART; + } + + { + RWLock::RLocker snap_lock(image_ctx.snap_lock); + if (image_ctx.object_map != nullptr && + !image_ctx.object_map->object_may_not_exist(m_object_no)) { + // can skip because the object already exists + return 1; + } + } + + bufferlist bl; + string oid = image_ctx.get_object_name(m_object_no); + auto req = new io::ObjectWriteRequest<I>(&image_ctx, oid, m_object_no, 0, + std::move(bl), m_snapc, 0, {}, + this); + if (!req->has_parent()) { + // stop early if the parent went away - it just means + // another flatten finished first or the image was resized + delete req; + return 1; + } + + req->send(); + return 0; + } + +private: + ::SnapContext m_snapc; + uint64_t m_object_no; +}; + +template <typename I> +bool FlattenRequest<I>::should_complete(int r) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << "r=" << r << dendl; + if (r < 0) { + lderr(cct) << "encountered error: " << cpp_strerror(r) << dendl; + } + return true; +} + +template <typename I> +void FlattenRequest<I>::send_op() { + flatten_objects(); +} + +template <typename I> +void FlattenRequest<I>::flatten_objects() { + I &image_ctx = this->m_image_ctx; + ceph_assert(image_ctx.owner_lock.is_locked()); + + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << dendl; + + assert(image_ctx.owner_lock.is_locked()); + auto ctx = create_context_callback< + FlattenRequest<I>, + &FlattenRequest<I>::handle_flatten_objects>(this); + typename AsyncObjectThrottle<I>::ContextFactory context_factory( + boost::lambda::bind(boost::lambda::new_ptr<C_FlattenObject<I> >(), + boost::lambda::_1, &image_ctx, m_snapc, boost::lambda::_2)); + AsyncObjectThrottle<I> *throttle = new AsyncObjectThrottle<I>( + this, image_ctx, context_factory, ctx, &m_prog_ctx, 0, m_overlap_objects); + throttle->start_ops( + image_ctx.config.template get_val<uint64_t>("rbd_concurrent_management_ops")); +} + +template <typename I> +void FlattenRequest<I>::handle_flatten_objects(int r) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << "r=" << r << dendl; + + if (r == -ERESTART) { + ldout(cct, 5) << "flatten operation interrupted" << dendl; + this->complete(r); + return; + } else if (r < 0) { + lderr(cct) << "flatten encountered an error: " << cpp_strerror(r) << dendl; + this->complete(r); + return; + } + + detach_child(); +} + +template <typename I> +void FlattenRequest<I>::detach_child() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + + // should have been canceled prior to releasing lock + image_ctx.owner_lock.get_read(); + ceph_assert(image_ctx.exclusive_lock == nullptr || + image_ctx.exclusive_lock->is_lock_owner()); + + // if there are no snaps, remove from the children object as well + // (if snapshots remain, they have their own parent info, and the child + // will be removed when the last snap goes away) + image_ctx.snap_lock.get_read(); + if ((image_ctx.features & RBD_FEATURE_DEEP_FLATTEN) == 0 && + !image_ctx.snaps.empty()) { + image_ctx.snap_lock.put_read(); + image_ctx.owner_lock.put_read(); + detach_parent(); + return; + } + image_ctx.snap_lock.put_read(); + + ldout(cct, 5) << dendl; + auto ctx = create_context_callback< + FlattenRequest<I>, + &FlattenRequest<I>::handle_detach_child>(this); + auto req = image::DetachChildRequest<I>::create(image_ctx, ctx); + req->send(); + image_ctx.owner_lock.put_read(); +} + +template <typename I> +void FlattenRequest<I>::handle_detach_child(int r) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << "r=" << r << dendl; + + if (r < 0 && r != -ENOENT) { + lderr(cct) << "detach encountered an error: " << cpp_strerror(r) << dendl; + this->complete(r); + return; + } + + detach_parent(); +} + +template <typename I> +void FlattenRequest<I>::detach_parent() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << dendl; + + // should have been canceled prior to releasing lock + image_ctx.owner_lock.get_read(); + ceph_assert(image_ctx.exclusive_lock == nullptr || + image_ctx.exclusive_lock->is_lock_owner()); + + // stop early if the parent went away - it just means + // another flatten finished first, so this one is useless. + image_ctx.parent_lock.get_read(); + if (!image_ctx.parent) { + ldout(cct, 5) << "image already flattened" << dendl; + image_ctx.parent_lock.put_read(); + image_ctx.owner_lock.put_read(); + this->complete(0); + return; + } + image_ctx.parent_lock.put_read(); + + // remove parent from this (base) image + auto ctx = create_context_callback< + FlattenRequest<I>, + &FlattenRequest<I>::handle_detach_parent>(this); + auto req = image::DetachParentRequest<I>::create(image_ctx, ctx); + req->send(); + image_ctx.owner_lock.put_read(); +} + +template <typename I> +void FlattenRequest<I>::handle_detach_parent(int r) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << "r=" << r << dendl; + + if (r < 0) { + lderr(cct) << "remove parent encountered an error: " << cpp_strerror(r) + << dendl; + } + + this->complete(r); +} + +} // namespace operation +} // namespace librbd + +template class librbd::operation::FlattenRequest<librbd::ImageCtx>; diff --git a/src/librbd/operation/FlattenRequest.h b/src/librbd/operation/FlattenRequest.h new file mode 100644 index 00000000..78730963 --- /dev/null +++ b/src/librbd/operation/FlattenRequest.h @@ -0,0 +1,77 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +#ifndef CEPH_LIBRBD_OPERATION_FLATTEN_REQUEST_H +#define CEPH_LIBRBD_OPERATION_FLATTEN_REQUEST_H + +#include "librbd/operation/Request.h" +#include "common/snap_types.h" + +namespace librbd { + +class ImageCtx; +class ProgressContext; + +namespace operation { + +template <typename ImageCtxT = ImageCtx> +class FlattenRequest : public Request<ImageCtxT> +{ +public: + FlattenRequest(ImageCtxT &image_ctx, Context *on_finish, + uint64_t overlap_objects, const ::SnapContext &snapc, + ProgressContext &prog_ctx) + : Request<ImageCtxT>(image_ctx, on_finish), + m_overlap_objects(overlap_objects), + m_snapc(snapc), m_prog_ctx(prog_ctx) { + } + +protected: + void send_op() override; + bool should_complete(int r) override; + + journal::Event create_event(uint64_t op_tid) const override { + return journal::FlattenEvent(op_tid); + } + +private: + /** + * @verbatim + * + * <start> + * | + * v + * FLATTEN_OBJECTS + * | + * v + * DETACH_CHILD + * | + * v + * DETACH_PARENT + * | + * v + * <finish> + * + * @endverbatim + */ + + uint64_t m_overlap_objects; + ::SnapContext m_snapc; + ProgressContext &m_prog_ctx; + + void flatten_objects(); + void handle_flatten_objects(int r); + + void detach_child(); + void handle_detach_child(int r); + + void detach_parent(); + void handle_detach_parent(int r); + +}; + +} // namespace operation +} // namespace librbd + +extern template class librbd::operation::FlattenRequest<librbd::ImageCtx>; + +#endif // CEPH_LIBRBD_OPERATION_FLATTEN_REQUEST_H diff --git a/src/librbd/operation/MetadataRemoveRequest.cc b/src/librbd/operation/MetadataRemoveRequest.cc new file mode 100644 index 00000000..828e7a5b --- /dev/null +++ b/src/librbd/operation/MetadataRemoveRequest.cc @@ -0,0 +1,60 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/operation/MetadataRemoveRequest.h" +#include "common/dout.h" +#include "common/errno.h" +#include "librbd/ImageCtx.h" + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::MetadataRemoveRequest: " + +namespace librbd { +namespace operation { + +template <typename I> +MetadataRemoveRequest<I>::MetadataRemoveRequest(I &image_ctx, + Context *on_finish, + const std::string &key) + : Request<I>(image_ctx, on_finish), m_key(key) { +} + +template <typename I> +void MetadataRemoveRequest<I>::send_op() { + send_metadata_remove(); +} + +template <typename I> +bool MetadataRemoveRequest<I>::should_complete(int r) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 20) << this << " " << __func__ << " r=" << r << dendl; + + if (r < 0) { + lderr(cct) << "encountered error: " << cpp_strerror(r) << dendl; + } + return true; +} + +template <typename I> +void MetadataRemoveRequest<I>::send_metadata_remove() { + I &image_ctx = this->m_image_ctx; + ceph_assert(image_ctx.owner_lock.is_locked()); + + CephContext *cct = image_ctx.cct; + ldout(cct, 20) << this << " " << __func__ << dendl; + + librados::ObjectWriteOperation op; + cls_client::metadata_remove(&op, m_key); + + librados::AioCompletion *comp = this->create_callback_completion(); + int r = image_ctx.md_ctx.aio_operate(image_ctx.header_oid, comp, &op); + ceph_assert(r == 0); + comp->release(); +} + +} // namespace operation +} // namespace librbd + +template class librbd::operation::MetadataRemoveRequest<librbd::ImageCtx>; diff --git a/src/librbd/operation/MetadataRemoveRequest.h b/src/librbd/operation/MetadataRemoveRequest.h new file mode 100644 index 00000000..1d7f2a46 --- /dev/null +++ b/src/librbd/operation/MetadataRemoveRequest.h @@ -0,0 +1,44 @@ +// -*- mode:C++; tab-width:8; c-basic-offremove:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_OPERATION_METADATA_REMOVE_REQUEST_H +#define CEPH_LIBRBD_OPERATION_METADATA_REMOVE_REQUEST_H + +#include "librbd/operation/Request.h" +#include <iosfwd> +#include <string> + +class Context; + +namespace librbd { + +class ImageCtx; + +namespace operation { + +template <typename ImageCtxT = ImageCtx> +class MetadataRemoveRequest : public Request<ImageCtxT> { +public: + MetadataRemoveRequest(ImageCtxT &image_ctx, Context *on_finish, + const std::string &key); + +protected: + void send_op() override; + bool should_complete(int r) override; + + journal::Event create_event(uint64_t op_tid) const override { + return journal::MetadataRemoveEvent(op_tid, m_key); + } + +private: + std::string m_key; + + void send_metadata_remove(); +}; + +} // namespace operation +} // namespace librbd + +extern template class librbd::operation::MetadataRemoveRequest<librbd::ImageCtx>; + +#endif // CEPH_LIBRBD_OPERATION_METADATA_REMOVE_REQUEST_H diff --git a/src/librbd/operation/MetadataSetRequest.cc b/src/librbd/operation/MetadataSetRequest.cc new file mode 100644 index 00000000..760e9b1e --- /dev/null +++ b/src/librbd/operation/MetadataSetRequest.cc @@ -0,0 +1,62 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/operation/MetadataSetRequest.h" +#include "common/dout.h" +#include "common/errno.h" +#include "librbd/ImageCtx.h" + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::MetadataSetRequest: " + +namespace librbd { +namespace operation { + +template <typename I> +MetadataSetRequest<I>::MetadataSetRequest(I &image_ctx, + Context *on_finish, + const std::string &key, + const std::string &value) + : Request<I>(image_ctx, on_finish), m_key(key), m_value(value) { +} + +template <typename I> +void MetadataSetRequest<I>::send_op() { + send_metadata_set(); +} + +template <typename I> +bool MetadataSetRequest<I>::should_complete(int r) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 20) << this << " " << __func__ << " r=" << r << dendl; + + if (r < 0) { + lderr(cct) << "encountered error: " << cpp_strerror(r) << dendl; + } + return true; +} + +template <typename I> +void MetadataSetRequest<I>::send_metadata_set() { + I &image_ctx = this->m_image_ctx; + ceph_assert(image_ctx.owner_lock.is_locked()); + + CephContext *cct = image_ctx.cct; + ldout(cct, 20) << this << " " << __func__ << dendl; + + m_data[m_key].append(m_value); + librados::ObjectWriteOperation op; + cls_client::metadata_set(&op, m_data); + + librados::AioCompletion *comp = this->create_callback_completion(); + int r = image_ctx.md_ctx.aio_operate(image_ctx.header_oid, comp, &op); + ceph_assert(r == 0); + comp->release(); +} + +} // namespace operation +} // namespace librbd + +template class librbd::operation::MetadataSetRequest<librbd::ImageCtx>; diff --git a/src/librbd/operation/MetadataSetRequest.h b/src/librbd/operation/MetadataSetRequest.h new file mode 100644 index 00000000..5f8daa2f --- /dev/null +++ b/src/librbd/operation/MetadataSetRequest.h @@ -0,0 +1,47 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_OPERATION_METADATA_SET_REQUEST_H +#define CEPH_LIBRBD_OPERATION_METADATA_SET_REQUEST_H + +#include "librbd/operation/Request.h" +#include "include/buffer.h" +#include <string> +#include <map> + +class Context; + +namespace librbd { + +class ImageCtx; + +namespace operation { + +template <typename ImageCtxT = ImageCtx> +class MetadataSetRequest : public Request<ImageCtxT> { +public: + MetadataSetRequest(ImageCtxT &image_ctx, Context *on_finish, + const std::string &key, const std::string &value); + +protected: + void send_op() override; + bool should_complete(int r) override; + + journal::Event create_event(uint64_t op_tid) const override { + return journal::MetadataSetEvent(op_tid, m_key, m_value); + } + +private: + std::string m_key; + std::string m_value; + std::map<std::string, bufferlist> m_data; + + void send_metadata_set(); +}; + +} // namespace operation +} // namespace librbd + +extern template class librbd::operation::MetadataSetRequest<librbd::ImageCtx>; + +#endif // CEPH_LIBRBD_OPERATION_METADATA_SET_REQUEST_H diff --git a/src/librbd/operation/MigrateRequest.cc b/src/librbd/operation/MigrateRequest.cc new file mode 100644 index 00000000..e3ddff0e --- /dev/null +++ b/src/librbd/operation/MigrateRequest.cc @@ -0,0 +1,235 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/operation/MigrateRequest.h" +#include "common/dout.h" +#include "common/errno.h" +#include "librbd/AsyncObjectThrottle.h" +#include "librbd/ExclusiveLock.h" +#include "librbd/ImageCtx.h" +#include "librbd/Utils.h" +#include "librbd/deep_copy/ObjectCopyRequest.h" +#include "librbd/io/AsyncOperation.h" +#include "librbd/io/ImageRequestWQ.h" +#include "librbd/io/ObjectRequest.h" +#include "osdc/Striper.h" +#include <boost/lambda/bind.hpp> +#include <boost/lambda/construct.hpp> + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::MigrateRequest: " << this << " " \ + << __func__ << ": " + +namespace librbd { +namespace operation { + +using util::create_context_callback; +using util::create_async_context_callback; + +namespace { + +template <typename I> +class C_MigrateObject : public C_AsyncObjectThrottle<I> { +public: + C_MigrateObject(AsyncObjectThrottle<I> &throttle, I *image_ctx, + ::SnapContext snapc, uint64_t object_no) + : C_AsyncObjectThrottle<I>(throttle, *image_ctx), m_snapc(snapc), + m_object_no(object_no) { + } + + int send() override { + I &image_ctx = this->m_image_ctx; + ceph_assert(image_ctx.owner_lock.is_locked()); + CephContext *cct = image_ctx.cct; + + if (image_ctx.exclusive_lock != nullptr && + !image_ctx.exclusive_lock->is_lock_owner()) { + ldout(cct, 1) << "lost exclusive lock during migrate" << dendl; + return -ERESTART; + } + + start_async_op(); + return 0; + } + +private: + uint64_t m_object_size; + ::SnapContext m_snapc; + uint64_t m_object_no; + + io::AsyncOperation *m_async_op = nullptr; + + void start_async_op() { + I &image_ctx = this->m_image_ctx; + ceph_assert(image_ctx.owner_lock.is_locked()); + CephContext *cct = image_ctx.cct; + ldout(cct, 10) << dendl; + + ceph_assert(m_async_op == nullptr); + m_async_op = new io::AsyncOperation(); + m_async_op->start_op(image_ctx); + + if (!image_ctx.io_work_queue->writes_blocked()) { + migrate_object(); + return; + } + + auto ctx = create_async_context_callback( + image_ctx, create_context_callback< + C_MigrateObject<I>, &C_MigrateObject<I>::handle_start_async_op>(this)); + m_async_op->finish_op(); + delete m_async_op; + m_async_op = nullptr; + image_ctx.io_work_queue->wait_on_writes_unblocked(ctx); + } + + void handle_start_async_op(int r) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 10) << "r=" << r << dendl; + + if (r < 0) { + lderr(cct) << "failed to start async op: " << cpp_strerror(r) << dendl; + this->complete(r); + return; + } + + RWLock::RLocker owner_locker(image_ctx.owner_lock); + start_async_op(); + } + + bool is_within_overlap_bounds() { + I &image_ctx = this->m_image_ctx; + RWLock::RLocker snap_locker(image_ctx.snap_lock); + + auto overlap = std::min(image_ctx.size, image_ctx.migration_info.overlap); + return overlap > 0 && + Striper::get_num_objects(image_ctx.layout, overlap) > m_object_no; + } + + void migrate_object() { + I &image_ctx = this->m_image_ctx; + ceph_assert(image_ctx.owner_lock.is_locked()); + CephContext *cct = image_ctx.cct; + + auto ctx = create_context_callback< + C_MigrateObject<I>, &C_MigrateObject<I>::handle_migrate_object>(this); + + if (is_within_overlap_bounds()) { + bufferlist bl; + string oid = image_ctx.get_object_name(m_object_no); + auto req = new io::ObjectWriteRequest<I>(&image_ctx, oid, m_object_no, 0, + std::move(bl), m_snapc, 0, {}, + ctx); + + ldout(cct, 20) << "copyup object req " << req << ", object_no " + << m_object_no << dendl; + + req->send(); + } else { + ceph_assert(image_ctx.parent != nullptr); + + auto req = deep_copy::ObjectCopyRequest<I>::create( + image_ctx.parent, &image_ctx, 0, 0, image_ctx.migration_info.snap_map, + m_object_no, image_ctx.migration_info.flatten, ctx); + + ldout(cct, 20) << "deep copy object req " << req << ", object_no " + << m_object_no << dendl; + req->send(); + } + } + + void handle_migrate_object(int r) { + CephContext *cct = this->m_image_ctx.cct; + ldout(cct, 10) << "r=" << r << dendl; + + if (r == -ENOENT) { + r = 0; + } + + m_async_op->finish_op(); + delete m_async_op; + this->complete(r); + } +}; + +} // anonymous namespace + +template <typename I> +void MigrateRequest<I>::send_op() { + I &image_ctx = this->m_image_ctx; + ceph_assert(image_ctx.owner_lock.is_locked()); + CephContext *cct = image_ctx.cct; + ldout(cct, 10) << dendl; + + migrate_objects(); +} + +template <typename I> +bool MigrateRequest<I>::should_complete(int r) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 10) << "r=" << r << dendl; + + if (r < 0) { + lderr(cct) << "encountered error: " << cpp_strerror(r) << dendl; + } + + return true; +} + +template <typename I> +void MigrateRequest<I>::migrate_objects() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ceph_assert(image_ctx.owner_lock.is_locked()); + + uint64_t overlap_objects = get_num_overlap_objects(); + + ldout(cct, 10) << "from 0 to " << overlap_objects << dendl; + + auto ctx = create_context_callback< + MigrateRequest<I>, &MigrateRequest<I>::handle_migrate_objects>(this); + + typename AsyncObjectThrottle<I>::ContextFactory context_factory( + boost::lambda::bind(boost::lambda::new_ptr<C_MigrateObject<I> >(), + boost::lambda::_1, &image_ctx, image_ctx.snapc, boost::lambda::_2)); + AsyncObjectThrottle<I> *throttle = new AsyncObjectThrottle<I>( + this, image_ctx, context_factory, ctx, &m_prog_ctx, 0, overlap_objects); + throttle->start_ops( + image_ctx.config.template get_val<uint64_t>("rbd_concurrent_management_ops")); +} + +template <typename I> +void MigrateRequest<I>::handle_migrate_objects(int r) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << "r=" << r << dendl; + + if (r < 0) { + lderr(cct) << "failed to migrate objects: " << cpp_strerror(r) << dendl; + } + + this->complete(r); +} + +template <typename I> +uint64_t MigrateRequest<I>::get_num_overlap_objects() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 10) << dendl; + + RWLock::RLocker snap_locker(image_ctx.snap_lock); + RWLock::RLocker parent_locker(image_ctx.parent_lock); + + auto overlap = image_ctx.migration_info.overlap; + + return overlap > 0 ? + Striper::get_num_objects(image_ctx.layout, overlap) : 0; +} + +} // namespace operation +} // namespace librbd + +template class librbd::operation::MigrateRequest<librbd::ImageCtx>; diff --git a/src/librbd/operation/MigrateRequest.h b/src/librbd/operation/MigrateRequest.h new file mode 100644 index 00000000..99f3b012 --- /dev/null +++ b/src/librbd/operation/MigrateRequest.h @@ -0,0 +1,69 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +#ifndef CEPH_LIBRBD_OPERATION_MIGRATE_REQUEST_H +#define CEPH_LIBRBD_OPERATION_MIGRATE_REQUEST_H + +#include "librbd/operation/Request.h" +#include "common/snap_types.h" +#include "librbd/Types.h" + +namespace librbd { + +class ImageCtx; +class ProgressContext; + +namespace operation { + +template <typename ImageCtxT = ImageCtx> +class MigrateRequest : public Request<ImageCtxT> +{ +public: + MigrateRequest(ImageCtxT &image_ctx, Context *on_finish, + ProgressContext &prog_ctx) + : Request<ImageCtxT>(image_ctx, on_finish), m_prog_ctx(prog_ctx) { + } + +protected: + void send_op() override; + bool should_complete(int r) override; + bool can_affect_io() const override { + return true; + } + journal::Event create_event(uint64_t op_tid) const override { + ceph_abort(); + return journal::UnknownEvent(); + } + +private: + /** + * Migrate goes through the following state machine to copy objects + * from the parent (migrating source) image: + * + * @verbatim + * + * <start> + * | + * v + * MIGRATE_OBJECTS + * | + * v + * <finish> + * + * @endverbatim + * + */ + + ProgressContext &m_prog_ctx; + + void migrate_objects(); + void handle_migrate_objects(int r); + + uint64_t get_num_overlap_objects(); +}; + +} // namespace operation +} // namespace librbd + +extern template class librbd::operation::MigrateRequest<librbd::ImageCtx>; + +#endif // CEPH_LIBRBD_OPERATION_MIGRATE_REQUEST_H diff --git a/src/librbd/operation/ObjectMapIterate.cc b/src/librbd/operation/ObjectMapIterate.cc new file mode 100644 index 00000000..66958416 --- /dev/null +++ b/src/librbd/operation/ObjectMapIterate.cc @@ -0,0 +1,310 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/operation/ObjectMapIterate.h" +#include "common/dout.h" +#include "common/errno.h" +#include "osdc/Striper.h" +#include "librbd/AsyncObjectThrottle.h" +#include "librbd/ExclusiveLock.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageWatcher.h" +#include "librbd/internal.h" +#include "librbd/ObjectMap.h" +#include "librbd/operation/ResizeRequest.h" +#include "librbd/object_map/InvalidateRequest.h" +#include "librbd/Utils.h" +#include <boost/lambda/bind.hpp> +#include <boost/lambda/construct.hpp> + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::ObjectMapIterateRequest: " + +namespace librbd { +namespace operation { + +namespace { + +template <typename I> +class C_VerifyObjectCallback : public C_AsyncObjectThrottle<I> { +public: + C_VerifyObjectCallback(AsyncObjectThrottle<I> &throttle, I *image_ctx, + uint64_t snap_id, uint64_t object_no, + ObjectIterateWork<I> handle_mismatch, + std::atomic_flag *invalidate) + : C_AsyncObjectThrottle<I>(throttle, *image_ctx), + m_snap_id(snap_id), m_object_no(object_no), + m_oid(image_ctx->get_object_name(m_object_no)), + m_handle_mismatch(handle_mismatch), + m_invalidate(invalidate) + { + m_io_ctx.dup(image_ctx->data_ctx); + m_io_ctx.snap_set_read(CEPH_SNAPDIR); + } + + void complete(int r) override { + I &image_ctx = this->m_image_ctx; + if (should_complete(r)) { + ldout(image_ctx.cct, 20) << m_oid << " C_VerifyObjectCallback completed " + << dendl; + m_io_ctx.close(); + + this->finish(r); + delete this; + } + } + + int send() override { + send_list_snaps(); + return 0; + } + +private: + librados::IoCtx m_io_ctx; + uint64_t m_snap_id; + uint64_t m_object_no; + std::string m_oid; + ObjectIterateWork<I> m_handle_mismatch; + std::atomic_flag *m_invalidate; + + librados::snap_set_t m_snap_set; + int m_snap_list_ret = 0; + + bool should_complete(int r) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + if (r == 0) { + r = m_snap_list_ret; + } + if (r < 0 && r != -ENOENT) { + lderr(cct) << m_oid << " C_VerifyObjectCallback::should_complete: " + << "encountered an error: " << cpp_strerror(r) << dendl; + return true; + } + + ldout(cct, 20) << m_oid << " C_VerifyObjectCallback::should_complete: " + << " r=" + << r << dendl; + return object_map_action(get_object_state()); + } + + void send_list_snaps() { + I &image_ctx = this->m_image_ctx; + ceph_assert(image_ctx.owner_lock.is_locked()); + ldout(image_ctx.cct, 5) << m_oid + << " C_VerifyObjectCallback::send_list_snaps" + << dendl; + + librados::ObjectReadOperation op; + op.list_snaps(&m_snap_set, &m_snap_list_ret); + + librados::AioCompletion *comp = util::create_rados_callback(this); + int r = m_io_ctx.aio_operate(m_oid, comp, &op, NULL); + ceph_assert(r == 0); + comp->release(); + } + + uint8_t get_object_state() { + I &image_ctx = this->m_image_ctx; + RWLock::RLocker snap_locker(image_ctx.snap_lock); + for (std::vector<librados::clone_info_t>::const_iterator r = + m_snap_set.clones.begin(); r != m_snap_set.clones.end(); ++r) { + librados::snap_t from_snap_id; + librados::snap_t to_snap_id; + if (r->cloneid == librados::SNAP_HEAD) { + from_snap_id = next_valid_snap_id(m_snap_set.seq + 1); + to_snap_id = librados::SNAP_HEAD; + } else { + from_snap_id = next_valid_snap_id(r->snaps[0]); + to_snap_id = r->snaps[r->snaps.size()-1]; + } + + if (to_snap_id < m_snap_id) { + continue; + } else if (m_snap_id < from_snap_id) { + break; + } + + if ((image_ctx.features & RBD_FEATURE_FAST_DIFF) != 0 && + from_snap_id != m_snap_id) { + return OBJECT_EXISTS_CLEAN; + } + return OBJECT_EXISTS; + } + return OBJECT_NONEXISTENT; + } + + uint64_t next_valid_snap_id(uint64_t snap_id) { + I &image_ctx = this->m_image_ctx; + ceph_assert(image_ctx.snap_lock.is_locked()); + + std::map<librados::snap_t, SnapInfo>::iterator it = + image_ctx.snap_info.lower_bound(snap_id); + if (it == image_ctx.snap_info.end()) { + return CEPH_NOSNAP; + } + return it->first; + } + + bool object_map_action(uint8_t new_state) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + RWLock::RLocker owner_locker(image_ctx.owner_lock); + + // should have been canceled prior to releasing lock + ceph_assert(image_ctx.exclusive_lock == nullptr || + image_ctx.exclusive_lock->is_lock_owner()); + + RWLock::RLocker snap_locker(image_ctx.snap_lock); + ceph_assert(image_ctx.object_map != nullptr); + + RWLock::WLocker l(image_ctx.object_map_lock); + uint8_t state = (*image_ctx.object_map)[m_object_no]; + + ldout(cct, 10) << "C_VerifyObjectCallback::object_map_action" + << " object " << image_ctx.get_object_name(m_object_no) + << " state " << (int)state + << " new_state " << (int)new_state << dendl; + + if (state != new_state) { + int r = 0; + + ceph_assert(m_handle_mismatch); + r = m_handle_mismatch(image_ctx, m_object_no, state, new_state); + if (r) { + lderr(cct) << "object map error: object " + << image_ctx.get_object_name(m_object_no) + << " marked as " << (int)state << ", but should be " + << (int)new_state << dendl; + m_invalidate->test_and_set(); + } else { + ldout(cct, 1) << "object map inconsistent: object " + << image_ctx.get_object_name(m_object_no) + << " marked as " << (int)state << ", but should be " + << (int)new_state << dendl; + } + } + + return true; + } +}; + +} // anonymous namespace + +template <typename I> +void ObjectMapIterateRequest<I>::send() { + if (!m_image_ctx.data_ctx.is_valid()) { + this->async_complete(-ENODEV); + return; + } + + send_verify_objects(); +} + +template <typename I> +bool ObjectMapIterateRequest<I>::should_complete(int r) { + CephContext *cct = m_image_ctx.cct; + ldout(cct, 5) << this << " should_complete: " << " r=" << r << dendl; + + if (r == -ENODEV) { + lderr(cct) << "missing data pool" << dendl; + return true; + } + + if (r < 0) { + lderr(cct) << "object map operation encountered an error: " + << cpp_strerror(r) << dendl; + } + + RWLock::RLocker owner_lock(m_image_ctx.owner_lock); + switch (m_state) { + case STATE_VERIFY_OBJECTS: + if (m_invalidate.test_and_set()) { + send_invalidate_object_map(); + return false; + } else if (r == 0) { + return true; + } + break; + + case STATE_INVALIDATE_OBJECT_MAP: + if (r == 0) { + return true; + } + break; + + default: + ceph_abort(); + break; + } + + if (r < 0) { + return true; + } + + return false; +} + +template <typename I> +void ObjectMapIterateRequest<I>::send_verify_objects() { + ceph_assert(m_image_ctx.owner_lock.is_locked()); + CephContext *cct = m_image_ctx.cct; + + uint64_t snap_id; + uint64_t num_objects; + { + RWLock::RLocker l(m_image_ctx.snap_lock); + snap_id = m_image_ctx.snap_id; + num_objects = Striper::get_num_objects(m_image_ctx.layout, + m_image_ctx.get_image_size(snap_id)); + } + ldout(cct, 5) << this << " send_verify_objects" << dendl; + + m_state = STATE_VERIFY_OBJECTS; + + typename AsyncObjectThrottle<I>::ContextFactory context_factory( + boost::lambda::bind(boost::lambda::new_ptr<C_VerifyObjectCallback<I> >(), + boost::lambda::_1, &m_image_ctx, snap_id, + boost::lambda::_2, m_handle_mismatch, &m_invalidate)); + AsyncObjectThrottle<I> *throttle = new AsyncObjectThrottle<I>( + this, m_image_ctx, context_factory, this->create_callback_context(), + &m_prog_ctx, 0, num_objects); + throttle->start_ops( + m_image_ctx.config.template get_val<uint64_t>("rbd_concurrent_management_ops")); +} + +template <typename I> +uint64_t ObjectMapIterateRequest<I>::get_image_size() const { + ceph_assert(m_image_ctx.snap_lock.is_locked()); + if (m_image_ctx.snap_id == CEPH_NOSNAP) { + if (!m_image_ctx.resize_reqs.empty()) { + return m_image_ctx.resize_reqs.front()->get_image_size(); + } else { + return m_image_ctx.size; + } + } + return m_image_ctx.get_image_size(m_image_ctx.snap_id); +} + +template <typename I> +void ObjectMapIterateRequest<I>::send_invalidate_object_map() { + CephContext *cct = m_image_ctx.cct; + + ldout(cct, 5) << this << " send_invalidate_object_map" << dendl; + m_state = STATE_INVALIDATE_OBJECT_MAP; + + object_map::InvalidateRequest<I>*req = + object_map::InvalidateRequest<I>::create(m_image_ctx, m_image_ctx.snap_id, + true, + this->create_callback_context()); + + ceph_assert(m_image_ctx.owner_lock.is_locked()); + RWLock::WLocker snap_locker(m_image_ctx.snap_lock); + req->send(); +} + +} // namespace operation +} // namespace librbd + +template class librbd::operation::ObjectMapIterateRequest<librbd::ImageCtx>; diff --git a/src/librbd/operation/ObjectMapIterate.h b/src/librbd/operation/ObjectMapIterate.h new file mode 100644 index 00000000..14215902 --- /dev/null +++ b/src/librbd/operation/ObjectMapIterate.h @@ -0,0 +1,65 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +#ifndef CEPH_LIBRBD_OPERATION_OBJECT_MAP_ITERATE_H +#define CEPH_LIBRBD_OPERATION_OBJECT_MAP_ITERATE_H + +#include <iostream> +#include <atomic> + +#include "include/int_types.h" +#include "include/rbd/object_map_types.h" +#include "librbd/AsyncRequest.h" + +namespace librbd { + +class ImageCtx; +class ProgressContext; + +namespace operation { + +template <typename ImageCtxT = ImageCtx> +using ObjectIterateWork = bool(*)(ImageCtxT &image_ctx, + uint64_t object_no, + uint8_t current_state, + uint8_t new_state); + +template <typename ImageCtxT = ImageCtx> +class ObjectMapIterateRequest : public AsyncRequest<ImageCtxT> { +public: + ObjectMapIterateRequest(ImageCtxT &image_ctx, Context *on_finish, + ProgressContext &prog_ctx, + ObjectIterateWork<ImageCtxT> handle_mismatch) + : AsyncRequest<ImageCtxT>(image_ctx, on_finish), m_image_ctx(image_ctx), + m_prog_ctx(prog_ctx), m_handle_mismatch(handle_mismatch) + { + } + + void send() override; + +protected: + bool should_complete(int r) override; + +private: + enum State { + STATE_VERIFY_OBJECTS, + STATE_INVALIDATE_OBJECT_MAP + }; + + ImageCtxT &m_image_ctx; + ProgressContext &m_prog_ctx; + ObjectIterateWork<ImageCtxT> m_handle_mismatch; + std::atomic_flag m_invalidate = ATOMIC_FLAG_INIT; + State m_state = STATE_VERIFY_OBJECTS; + + void send_verify_objects(); + void send_invalidate_object_map(); + + uint64_t get_image_size() const; +}; + +} // namespace operation +} // namespace librbd + +extern template class librbd::operation::ObjectMapIterateRequest<librbd::ImageCtx>; + +#endif diff --git a/src/librbd/operation/RebuildObjectMapRequest.cc b/src/librbd/operation/RebuildObjectMapRequest.cc new file mode 100644 index 00000000..f923f36c --- /dev/null +++ b/src/librbd/operation/RebuildObjectMapRequest.cc @@ -0,0 +1,248 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/operation/RebuildObjectMapRequest.h" +#include "common/dout.h" +#include "common/errno.h" +#include "osdc/Striper.h" +#include "librbd/AsyncObjectThrottle.h" +#include "librbd/ExclusiveLock.h" +#include "librbd/ImageCtx.h" +#include "librbd/internal.h" +#include "librbd/ObjectMap.h" +#include "librbd/operation/ResizeRequest.h" +#include "librbd/operation/TrimRequest.h" +#include "librbd/operation/ObjectMapIterate.h" +#include "librbd/Utils.h" +#include <boost/lambda/bind.hpp> +#include <boost/lambda/construct.hpp> + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::RebuildObjectMapRequest: " + +namespace librbd { +namespace operation { + +template <typename I> +void RebuildObjectMapRequest<I>::send() { + send_resize_object_map(); +} + +template <typename I> +bool RebuildObjectMapRequest<I>::should_complete(int r) { + CephContext *cct = m_image_ctx.cct; + ldout(cct, 5) << this << " should_complete: " << " r=" << r << dendl; + + RWLock::RLocker owner_lock(m_image_ctx.owner_lock); + switch (m_state) { + case STATE_RESIZE_OBJECT_MAP: + ldout(cct, 5) << "RESIZE_OBJECT_MAP" << dendl; + if (r == -ESTALE && !m_attempted_trim) { + // objects are still flagged as in-use -- delete them + m_attempted_trim = true; + send_trim_image(); + return false; + } else if (r == 0) { + send_verify_objects(); + } + break; + + case STATE_TRIM_IMAGE: + ldout(cct, 5) << "TRIM_IMAGE" << dendl; + if (r == 0) { + send_resize_object_map(); + } + break; + + case STATE_VERIFY_OBJECTS: + ldout(cct, 5) << "VERIFY_OBJECTS" << dendl; + if (r == 0) { + send_save_object_map(); + } + break; + + case STATE_SAVE_OBJECT_MAP: + ldout(cct, 5) << "SAVE_OBJECT_MAP" << dendl; + if (r == 0) { + send_update_header(); + } + break; + case STATE_UPDATE_HEADER: + ldout(cct, 5) << "UPDATE_HEADER" << dendl; + if (r == 0) { + return true; + } + break; + + default: + ceph_abort(); + break; + } + + if (r == -ERESTART) { + ldout(cct, 5) << "rebuild object map operation interrupted" << dendl; + return true; + } else if (r < 0) { + lderr(cct) << "rebuild object map encountered an error: " << cpp_strerror(r) + << dendl; + return true; + } + return false; +} + +template <typename I> +void RebuildObjectMapRequest<I>::send_resize_object_map() { + ceph_assert(m_image_ctx.owner_lock.is_locked()); + CephContext *cct = m_image_ctx.cct; + + m_image_ctx.snap_lock.get_read(); + ceph_assert(m_image_ctx.object_map != nullptr); + + uint64_t size = get_image_size(); + uint64_t num_objects = Striper::get_num_objects(m_image_ctx.layout, size); + + if (m_image_ctx.object_map->size() == num_objects) { + m_image_ctx.snap_lock.put_read(); + send_verify_objects(); + return; + } + + ldout(cct, 5) << this << " send_resize_object_map" << dendl; + m_state = STATE_RESIZE_OBJECT_MAP; + + // should have been canceled prior to releasing lock + ceph_assert(m_image_ctx.exclusive_lock == nullptr || + m_image_ctx.exclusive_lock->is_lock_owner()); + + m_image_ctx.object_map->aio_resize(size, OBJECT_NONEXISTENT, + this->create_callback_context()); + m_image_ctx.snap_lock.put_read(); +} + +template <typename I> +void RebuildObjectMapRequest<I>::send_trim_image() { + CephContext *cct = m_image_ctx.cct; + + RWLock::RLocker l(m_image_ctx.owner_lock); + + // should have been canceled prior to releasing lock + ceph_assert(m_image_ctx.exclusive_lock == nullptr || + m_image_ctx.exclusive_lock->is_lock_owner()); + ldout(cct, 5) << this << " send_trim_image" << dendl; + m_state = STATE_TRIM_IMAGE; + + uint64_t new_size; + uint64_t orig_size; + { + RWLock::RLocker l(m_image_ctx.snap_lock); + ceph_assert(m_image_ctx.object_map != nullptr); + + new_size = get_image_size(); + orig_size = m_image_ctx.get_object_size() * + m_image_ctx.object_map->size(); + } + TrimRequest<I> *req = TrimRequest<I>::create(m_image_ctx, + this->create_callback_context(), + orig_size, new_size, m_prog_ctx); + req->send(); +} + +template <typename I> +bool update_object_map(I& image_ctx, uint64_t object_no, uint8_t current_state, + uint8_t new_state) { + CephContext *cct = image_ctx.cct; + uint64_t snap_id = image_ctx.snap_id; + + uint8_t state = (*image_ctx.object_map)[object_no]; + if (state == OBJECT_EXISTS && new_state == OBJECT_NONEXISTENT && + snap_id == CEPH_NOSNAP) { + // might be writing object to OSD concurrently + new_state = state; + } + + if (new_state != state) { + ldout(cct, 15) << image_ctx.get_object_name(object_no) + << " rebuild updating object map " + << static_cast<uint32_t>(state) << "->" + << static_cast<uint32_t>(new_state) << dendl; + (*image_ctx.object_map)[object_no] = new_state; + } + return false; +} + +template <typename I> +void RebuildObjectMapRequest<I>::send_verify_objects() { + ceph_assert(m_image_ctx.owner_lock.is_locked()); + CephContext *cct = m_image_ctx.cct; + + m_state = STATE_VERIFY_OBJECTS; + ldout(cct, 5) << this << " send_verify_objects" << dendl; + + ObjectMapIterateRequest<I> *req = + new ObjectMapIterateRequest<I>(m_image_ctx, + this->create_callback_context(), + m_prog_ctx, update_object_map); + + req->send(); +} + +template <typename I> +void RebuildObjectMapRequest<I>::send_save_object_map() { + ceph_assert(m_image_ctx.owner_lock.is_locked()); + CephContext *cct = m_image_ctx.cct; + + ldout(cct, 5) << this << " send_save_object_map" << dendl; + m_state = STATE_SAVE_OBJECT_MAP; + + // should have been canceled prior to releasing lock + ceph_assert(m_image_ctx.exclusive_lock == nullptr || + m_image_ctx.exclusive_lock->is_lock_owner()); + + RWLock::RLocker snap_locker(m_image_ctx.snap_lock); + ceph_assert(m_image_ctx.object_map != nullptr); + m_image_ctx.object_map->aio_save(this->create_callback_context()); +} + +template <typename I> +void RebuildObjectMapRequest<I>::send_update_header() { + ceph_assert(m_image_ctx.owner_lock.is_locked()); + + // should have been canceled prior to releasing lock + ceph_assert(m_image_ctx.exclusive_lock == nullptr || + m_image_ctx.exclusive_lock->is_lock_owner()); + + ldout(m_image_ctx.cct, 5) << this << " send_update_header" << dendl; + m_state = STATE_UPDATE_HEADER; + + librados::ObjectWriteOperation op; + + uint64_t flags = RBD_FLAG_OBJECT_MAP_INVALID | RBD_FLAG_FAST_DIFF_INVALID; + cls_client::set_flags(&op, m_image_ctx.snap_id, 0, flags); + + librados::AioCompletion *comp = this->create_callback_completion(); + int r = m_image_ctx.md_ctx.aio_operate(m_image_ctx.header_oid, comp, &op); + ceph_assert(r == 0); + comp->release(); + + RWLock::WLocker snap_locker(m_image_ctx.snap_lock); + m_image_ctx.update_flags(m_image_ctx.snap_id, flags, false); +} + +template <typename I> +uint64_t RebuildObjectMapRequest<I>::get_image_size() const { + ceph_assert(m_image_ctx.snap_lock.is_locked()); + if (m_image_ctx.snap_id == CEPH_NOSNAP) { + if (!m_image_ctx.resize_reqs.empty()) { + return m_image_ctx.resize_reqs.front()->get_image_size(); + } else { + return m_image_ctx.size; + } + } + return m_image_ctx.get_image_size(m_image_ctx.snap_id); +} + +} // namespace operation +} // namespace librbd + +template class librbd::operation::RebuildObjectMapRequest<librbd::ImageCtx>; diff --git a/src/librbd/operation/RebuildObjectMapRequest.h b/src/librbd/operation/RebuildObjectMapRequest.h new file mode 100644 index 00000000..c7f1aa3b --- /dev/null +++ b/src/librbd/operation/RebuildObjectMapRequest.h @@ -0,0 +1,84 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +#ifndef CEPH_LIBRBD_OPERATION_REBUILD_OBJECT_MAP_REQUEST_H +#define CEPH_LIBRBD_OPERATION_REBUILD_OBJECT_MAP_REQUEST_H + +#include "include/int_types.h" +#include "librbd/AsyncRequest.h" + +namespace librbd { + +class ImageCtx; +class ProgressContext; + +namespace operation { + +template <typename ImageCtxT = ImageCtx> +class RebuildObjectMapRequest : public AsyncRequest<ImageCtxT> { +public: + + RebuildObjectMapRequest(ImageCtxT &image_ctx, Context *on_finish, + ProgressContext &prog_ctx) + : AsyncRequest<ImageCtxT>(image_ctx, on_finish), m_image_ctx(image_ctx), + m_prog_ctx(prog_ctx), m_attempted_trim(false) + { + } + + void send() override; + +protected: + bool should_complete(int r) override; + +private: + /** + * Rebuild object map goes through the following state machine to + * verify per-object state: + * + * <start> + * . | . . . . . . . . . . + * . | . . + * . v v . + * . STATE_RESIZE_OBJECT_MAP . . . > STATE_TRIM_IMAGE + * . | + * . v + * . . . > STATE_VERIFY_OBJECTS + * | + * v + * STATE_SAVE_OBJECT_MAP + * | + * v + * STATE_UPDATE_HEADER + * + * The _RESIZE_OBJECT_MAP state will be skipped if the object map + * is appropriately sized for the image. The _TRIM_IMAGE state will + * only be hit if the resize failed due to an in-use object. + */ + enum State { + STATE_RESIZE_OBJECT_MAP, + STATE_TRIM_IMAGE, + STATE_VERIFY_OBJECTS, + STATE_SAVE_OBJECT_MAP, + STATE_UPDATE_HEADER + }; + + ImageCtxT &m_image_ctx; + ProgressContext &m_prog_ctx; + State m_state = STATE_RESIZE_OBJECT_MAP; + bool m_attempted_trim; + + void send_resize_object_map(); + void send_trim_image(); + void send_verify_objects(); + void send_save_object_map(); + void send_update_header(); + + uint64_t get_image_size() const; + +}; + +} // namespace operation +} // namespace librbd + +extern template class librbd::operation::RebuildObjectMapRequest<librbd::ImageCtx>; + +#endif // CEPH_LIBRBD_OPERATION_REBUILD_OBJECT_MAP_REQUEST_H diff --git a/src/librbd/operation/RenameRequest.cc b/src/librbd/operation/RenameRequest.cc new file mode 100644 index 00000000..44c5e2cd --- /dev/null +++ b/src/librbd/operation/RenameRequest.cc @@ -0,0 +1,212 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/operation/RenameRequest.h" +#include "common/dout.h" +#include "common/errno.h" +#include "include/rados/librados.hpp" +#include "librbd/ImageCtx.h" +#include "librbd/internal.h" +#include "librbd/Utils.h" + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::operation::RenameRequest: " + +namespace librbd { +namespace operation { + +namespace { + +template <typename I> +std::ostream& operator<<(std::ostream& os, + const typename RenameRequest<I>::State& state) { + switch(state) { + case RenameRequest<I>::STATE_READ_SOURCE_HEADER: + os << "READ_SOURCE_HEADER"; + break; + case RenameRequest<I>::STATE_WRITE_DEST_HEADER: + os << "WRITE_DEST_HEADER"; + break; + case RenameRequest<I>::STATE_UPDATE_DIRECTORY: + os << "UPDATE_DIRECTORY"; + break; + case RenameRequest<I>::STATE_REMOVE_SOURCE_HEADER: + os << "REMOVE_SOURCE_HEADER"; + break; + default: + os << "UNKNOWN (" << static_cast<uint32_t>(state) << ")"; + break; + } + return os; +} + +} // anonymous namespace + +template <typename I> +RenameRequest<I>::RenameRequest(I &image_ctx, Context *on_finish, + const std::string &dest_name) + : Request<I>(image_ctx, on_finish), m_dest_name(dest_name), + m_source_oid(image_ctx.old_format ? util::old_header_name(image_ctx.name) : + util::id_obj_name(image_ctx.name)), + m_dest_oid(image_ctx.old_format ? util::old_header_name(dest_name) : + util::id_obj_name(dest_name)) { +} + +template <typename I> +void RenameRequest<I>::send_op() { + send_read_source_header(); +} + +template <typename I> +bool RenameRequest<I>::should_complete(int r) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << ": state=" << m_state << ", " + << "r=" << r << dendl; + r = filter_return_code(r); + if (r < 0) { + if (r == -EEXIST) { + ldout(cct, 1) << "image already exists" << dendl; + } else { + lderr(cct) << "encountered error: " << cpp_strerror(r) << dendl; + } + return true; + } + + if (m_state == STATE_UPDATE_DIRECTORY) { + // update in-memory name before removing source header + apply(); + } else if (m_state == STATE_REMOVE_SOURCE_HEADER) { + return true; + } + + RWLock::RLocker owner_lock(image_ctx.owner_lock); + switch (m_state) { + case STATE_READ_SOURCE_HEADER: + send_write_destination_header(); + break; + case STATE_WRITE_DEST_HEADER: + send_update_directory(); + break; + case STATE_UPDATE_DIRECTORY: + send_remove_source_header(); + break; + default: + ceph_abort(); + break; + } + return false; +} + +template <typename I> +int RenameRequest<I>::filter_return_code(int r) const { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + + if (m_state == STATE_READ_SOURCE_HEADER && r == -ENOENT) { + RWLock::RLocker snap_locker(image_ctx.snap_lock); + if (image_ctx.name == m_dest_name) { + // signal that replay raced with itself + return -EEXIST; + } + } else if (m_state == STATE_REMOVE_SOURCE_HEADER && r < 0) { + if (r != -ENOENT) { + lderr(cct) << "warning: couldn't remove old source object (" + << m_source_oid << ")" << dendl; + } + return 0; + } + return r; +} + +template <typename I> +void RenameRequest<I>::send_read_source_header() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << dendl; + m_state = STATE_READ_SOURCE_HEADER; + + librados::ObjectReadOperation op; + op.read(0, 0, NULL, NULL); + + // TODO: old code read omap values but there are no omap values on the + // old format header nor the new format id object + librados::AioCompletion *rados_completion = this->create_callback_completion(); + int r = image_ctx.md_ctx.aio_operate(m_source_oid, rados_completion, &op, + &m_header_bl); + ceph_assert(r == 0); + rados_completion->release(); +} + +template <typename I> +void RenameRequest<I>::send_write_destination_header() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << dendl; + m_state = STATE_WRITE_DEST_HEADER; + + librados::ObjectWriteOperation op; + op.create(true); + op.write_full(m_header_bl); + + librados::AioCompletion *rados_completion = this->create_callback_completion(); + int r = image_ctx.md_ctx.aio_operate(m_dest_oid, rados_completion, &op); + ceph_assert(r == 0); + rados_completion->release(); +} + +template <typename I> +void RenameRequest<I>::send_update_directory() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << dendl; + m_state = STATE_UPDATE_DIRECTORY; + + librados::ObjectWriteOperation op; + if (image_ctx.old_format) { + bufferlist cmd_bl; + bufferlist empty_bl; + encode(static_cast<__u8>(CEPH_OSD_TMAP_SET), cmd_bl); + encode(m_dest_name, cmd_bl); + encode(empty_bl, cmd_bl); + encode(static_cast<__u8>(CEPH_OSD_TMAP_RM), cmd_bl); + encode(image_ctx.name, cmd_bl); + op.tmap_update(cmd_bl); + } else { + cls_client::dir_rename_image(&op, image_ctx.name, m_dest_name, + image_ctx.id); + } + + librados::AioCompletion *rados_completion = this->create_callback_completion(); + int r = image_ctx.md_ctx.aio_operate(RBD_DIRECTORY, rados_completion, &op); + ceph_assert(r == 0); + rados_completion->release(); +} + +template <typename I> +void RenameRequest<I>::send_remove_source_header() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << dendl; + m_state = STATE_REMOVE_SOURCE_HEADER; + + librados::ObjectWriteOperation op; + op.remove(); + + librados::AioCompletion *rados_completion = this->create_callback_completion(); + int r = image_ctx.md_ctx.aio_operate(m_source_oid, rados_completion, &op); + ceph_assert(r == 0); + rados_completion->release(); +} + +template <typename I> +void RenameRequest<I>::apply() { + I &image_ctx = this->m_image_ctx; + image_ctx.set_image_name(m_dest_name); +} + +} // namespace operation +} // namespace librbd + +template class librbd::operation::RenameRequest<librbd::ImageCtx>; diff --git a/src/librbd/operation/RenameRequest.h b/src/librbd/operation/RenameRequest.h new file mode 100644 index 00000000..6534d36c --- /dev/null +++ b/src/librbd/operation/RenameRequest.h @@ -0,0 +1,89 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_RENAME_REQUEST_H +#define CEPH_LIBRBD_RENAME_REQUEST_H + +#include "librbd/operation/Request.h" +#include <string> + +class Context; + +namespace librbd { + +class ImageCtx; + +namespace operation { + +template <typename ImageCtxT = ImageCtx> +class RenameRequest : public Request<ImageCtxT> +{ +public: + /** + * Rename goes through the following state machine: + * + * @verbatim + * + * <start> + * | + * v + * STATE_READ_SOURCE_HEADER + * | + * v + * STATE_WRITE_DEST_HEADER + * | + * v + * STATE_UPDATE_DIRECTORY + * | + * v + * STATE_REMOVE_SOURCE_HEADER + * | + * v + * <finish> + * + * @endverbatim + * + */ + enum State { + STATE_READ_SOURCE_HEADER, + STATE_WRITE_DEST_HEADER, + STATE_UPDATE_DIRECTORY, + STATE_REMOVE_SOURCE_HEADER + }; + + RenameRequest(ImageCtxT &image_ctx, Context *on_finish, + const std::string &dest_name); + +protected: + void send_op() override; + bool should_complete(int r) override; + int filter_return_code(int r) const override; + + journal::Event create_event(uint64_t op_tid) const override { + return journal::RenameEvent(op_tid, m_dest_name); + } + +private: + std::string m_dest_name; + + std::string m_source_oid; + std::string m_dest_oid; + + State m_state = STATE_READ_SOURCE_HEADER; + + bufferlist m_header_bl; + + void send_read_source_header(); + void send_write_destination_header(); + void send_update_directory(); + void send_remove_source_header(); + + void apply(); +}; + +} // namespace operation +} // namespace librbd + +extern template class librbd::operation::RenameRequest<librbd::ImageCtx>; + +#endif // CEPH_LIBRBD_RENAME_REQUEST_H diff --git a/src/librbd/operation/Request.cc b/src/librbd/operation/Request.cc new file mode 100644 index 00000000..429cc01e --- /dev/null +++ b/src/librbd/operation/Request.cc @@ -0,0 +1,183 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/operation/Request.h" +#include "common/dout.h" +#include "common/errno.h" +#include "common/WorkQueue.h" +#include "librbd/ImageCtx.h" + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::Request: " + +namespace librbd { +namespace operation { + +template <typename I> +Request<I>::Request(I &image_ctx, Context *on_finish, uint64_t journal_op_tid) + : AsyncRequest<I>(image_ctx, on_finish), m_op_tid(journal_op_tid) { +} + +template <typename I> +void Request<I>::send() { + I &image_ctx = this->m_image_ctx; + ceph_assert(image_ctx.owner_lock.is_locked()); + + // automatically create the event if we don't need to worry + // about affecting concurrent IO ops + if (can_affect_io() || !append_op_event()) { + send_op(); + } +} + +template <typename I> +Context *Request<I>::create_context_finisher(int r) { + // automatically commit the event if required (delete after commit) + if (m_appended_op_event && !m_committed_op_event && + commit_op_event(r)) { + return nullptr; + } + + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 10) << this << " " << __func__ << dendl; + return util::create_context_callback<Request<I>, &Request<I>::finish>(this); +} + +template <typename I> +void Request<I>::finish_and_destroy(int r) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 10) << this << " " << __func__ << ": r=" << r << dendl; + + // automatically commit the event if required (delete after commit) + if (m_appended_op_event && !m_committed_op_event && + commit_op_event(r)) { + return; + } + + AsyncRequest<I>::finish_and_destroy(r); +} + +template <typename I> +void Request<I>::finish(int r) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 10) << this << " " << __func__ << ": r=" << r << dendl; + + ceph_assert(!m_appended_op_event || m_committed_op_event); + AsyncRequest<I>::finish(r); +} + +template <typename I> +bool Request<I>::append_op_event() { + I &image_ctx = this->m_image_ctx; + + ceph_assert(image_ctx.owner_lock.is_locked()); + RWLock::RLocker snap_locker(image_ctx.snap_lock); + if (image_ctx.journal != nullptr && + image_ctx.journal->is_journal_appending()) { + append_op_event(util::create_context_callback< + Request<I>, &Request<I>::handle_op_event_safe>(this)); + return true; + } + return false; +} + +template <typename I> +bool Request<I>::commit_op_event(int r) { + I &image_ctx = this->m_image_ctx; + RWLock::RLocker snap_locker(image_ctx.snap_lock); + + if (!m_appended_op_event) { + return false; + } + + ceph_assert(m_op_tid != 0); + ceph_assert(!m_committed_op_event); + m_committed_op_event = true; + + if (image_ctx.journal != nullptr && + image_ctx.journal->is_journal_appending()) { + CephContext *cct = image_ctx.cct; + ldout(cct, 10) << this << " " << __func__ << ": r=" << r << dendl; + + // ops will be canceled / completed before closing journal + ceph_assert(image_ctx.journal->is_journal_ready()); + image_ctx.journal->commit_op_event(m_op_tid, r, + new C_CommitOpEvent(this, r)); + return true; + } + return false; +} + +template <typename I> +void Request<I>::handle_commit_op_event(int r, int original_ret_val) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 10) << this << " " << __func__ << ": r=" << r << dendl; + + if (r < 0) { + lderr(cct) << "failed to commit op event to journal: " << cpp_strerror(r) + << dendl; + } + if (original_ret_val < 0) { + r = original_ret_val; + } + finish(r); +} + +template <typename I> +void Request<I>::replay_op_ready(Context *on_safe) { + I &image_ctx = this->m_image_ctx; + ceph_assert(image_ctx.owner_lock.is_locked()); + ceph_assert(image_ctx.snap_lock.is_locked()); + ceph_assert(m_op_tid != 0); + + m_appended_op_event = true; + image_ctx.journal->replay_op_ready( + m_op_tid, util::create_async_context_callback(image_ctx, on_safe)); +} + +template <typename I> +void Request<I>::append_op_event(Context *on_safe) { + I &image_ctx = this->m_image_ctx; + ceph_assert(image_ctx.owner_lock.is_locked()); + ceph_assert(image_ctx.snap_lock.is_locked()); + + CephContext *cct = image_ctx.cct; + ldout(cct, 10) << this << " " << __func__ << dendl; + + m_op_tid = image_ctx.journal->allocate_op_tid(); + image_ctx.journal->append_op_event( + m_op_tid, journal::EventEntry{create_event(m_op_tid)}, + new C_AppendOpEvent(this, on_safe)); +} + +template <typename I> +void Request<I>::handle_op_event_safe(int r) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 10) << this << " " << __func__ << ": r=" << r << dendl; + + if (r < 0) { + lderr(cct) << "failed to commit op event to journal: " << cpp_strerror(r) + << dendl; + this->finish(r); + delete this; + } else { + ceph_assert(!can_affect_io()); + + // haven't started the request state machine yet + RWLock::RLocker owner_locker(image_ctx.owner_lock); + send_op(); + } +} + +} // namespace operation +} // namespace librbd + +#ifndef TEST_F +template class librbd::operation::Request<librbd::ImageCtx>; +#endif diff --git a/src/librbd/operation/Request.h b/src/librbd/operation/Request.h new file mode 100644 index 00000000..315a3c96 --- /dev/null +++ b/src/librbd/operation/Request.h @@ -0,0 +1,108 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_OPERATION_REQUEST_H +#define CEPH_LIBRBD_OPERATION_REQUEST_H + +#include "librbd/AsyncRequest.h" +#include "include/Context.h" +#include "common/RWLock.h" +#include "librbd/Utils.h" +#include "librbd/Journal.h" + +namespace librbd { + +class ImageCtx; + +namespace operation { + +template <typename ImageCtxT = ImageCtx> +class Request : public AsyncRequest<ImageCtxT> { +public: + Request(ImageCtxT &image_ctx, Context *on_finish, + uint64_t journal_op_tid = 0); + + void send(); + +protected: + void finish(int r) override; + virtual void send_op() = 0; + + virtual bool can_affect_io() const { + return false; + } + virtual journal::Event create_event(uint64_t op_tid) const = 0; + + template <typename T, Context*(T::*MF)(int*)> + bool append_op_event(T *request) { + ImageCtxT &image_ctx = this->m_image_ctx; + + ceph_assert(can_affect_io()); + RWLock::RLocker owner_locker(image_ctx.owner_lock); + RWLock::RLocker snap_locker(image_ctx.snap_lock); + if (image_ctx.journal != nullptr) { + if (image_ctx.journal->is_journal_replaying()) { + Context *ctx = util::create_context_callback<T, MF>(request); + replay_op_ready(ctx); + return true; + } else if (image_ctx.journal->is_journal_appending()) { + Context *ctx = util::create_context_callback<T, MF>(request); + append_op_event(ctx); + return true; + } + } + return false; + } + + bool append_op_event(); + + // NOTE: temporary until converted to new state machine format + Context *create_context_finisher(int r); + void finish_and_destroy(int r) override; + +private: + struct C_AppendOpEvent : public Context { + Request *request; + Context *on_safe; + C_AppendOpEvent(Request *request, Context *on_safe) + : request(request), on_safe(on_safe) { + } + void finish(int r) override { + if (r >= 0) { + request->m_appended_op_event = true; + } + on_safe->complete(r); + } + }; + + struct C_CommitOpEvent : public Context { + Request *request; + int ret_val; + C_CommitOpEvent(Request *request, int ret_val) + : request(request), ret_val(ret_val) { + } + void finish(int r) override { + request->handle_commit_op_event(r, ret_val); + delete request; + } + }; + + uint64_t m_op_tid = 0; + bool m_appended_op_event = false; + bool m_committed_op_event = false; + + void replay_op_ready(Context *on_safe); + void append_op_event(Context *on_safe); + void handle_op_event_safe(int r); + + bool commit_op_event(int r); + void handle_commit_op_event(int r, int original_ret_val); + +}; + +} // namespace operation +} // namespace librbd + +extern template class librbd::operation::Request<librbd::ImageCtx>; + +#endif // CEPH_LIBRBD_OPERATION_REQUEST_H diff --git a/src/librbd/operation/ResizeRequest.cc b/src/librbd/operation/ResizeRequest.cc new file mode 100644 index 00000000..537436ce --- /dev/null +++ b/src/librbd/operation/ResizeRequest.cc @@ -0,0 +1,467 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/operation/ResizeRequest.h" +#include "librbd/ExclusiveLock.h" +#include "librbd/ImageCtx.h" +#include "librbd/internal.h" +#include "librbd/ObjectMap.h" +#include "librbd/Utils.h" +#include "librbd/io/AioCompletion.h" +#include "librbd/io/ImageDispatchSpec.h" +#include "librbd/io/ImageRequestWQ.h" +#include "librbd/io/ObjectDispatcher.h" +#include "librbd/operation/TrimRequest.h" +#include "common/dout.h" +#include "common/errno.h" + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::operation::ResizeRequest: " << this \ + << " " << __func__ << ": " + +namespace librbd { +namespace operation { + +using util::create_async_context_callback; +using util::create_context_callback; +using util::create_rados_callback; + +template <typename I> +ResizeRequest<I>::ResizeRequest(I &image_ctx, Context *on_finish, + uint64_t new_size, bool allow_shrink, ProgressContext &prog_ctx, + uint64_t journal_op_tid, bool disable_journal) + : Request<I>(image_ctx, on_finish, journal_op_tid), + m_original_size(0), m_new_size(new_size), m_allow_shrink(allow_shrink), + m_prog_ctx(prog_ctx), m_new_parent_overlap(0), m_disable_journal(disable_journal), + m_xlist_item(this) +{ +} + +template <typename I> +ResizeRequest<I>::~ResizeRequest() { + I &image_ctx = this->m_image_ctx; + ResizeRequest *next_req = NULL; + { + RWLock::WLocker snap_locker(image_ctx.snap_lock); + ceph_assert(m_xlist_item.remove_myself()); + if (!image_ctx.resize_reqs.empty()) { + next_req = image_ctx.resize_reqs.front(); + } + } + + if (next_req != NULL) { + RWLock::RLocker owner_locker(image_ctx.owner_lock); + next_req->send(); + } +} + +template <typename I> +void ResizeRequest<I>::send() { + I &image_ctx = this->m_image_ctx; + ceph_assert(image_ctx.owner_lock.is_locked()); + + { + RWLock::WLocker snap_locker(image_ctx.snap_lock); + if (!m_xlist_item.is_on_list()) { + image_ctx.resize_reqs.push_back(&m_xlist_item); + if (image_ctx.resize_reqs.front() != this) { + return; + } + } + + ceph_assert(image_ctx.resize_reqs.front() == this); + m_original_size = image_ctx.size; + compute_parent_overlap(); + } + + Request<I>::send(); +} + +template <typename I> +void ResizeRequest<I>::send_op() { + I &image_ctx = this->m_image_ctx; + ceph_assert(image_ctx.owner_lock.is_locked()); + + if (this->is_canceled()) { + this->async_complete(-ERESTART); + } else { + send_pre_block_writes(); + } +} + +template <typename I> +void ResizeRequest<I>::send_pre_block_writes() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << dendl; + + image_ctx.io_work_queue->block_writes(create_context_callback< + ResizeRequest<I>, &ResizeRequest<I>::handle_pre_block_writes>(this)); +} + +template <typename I> +Context *ResizeRequest<I>::handle_pre_block_writes(int *result) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << "r=" << *result << dendl; + + if (*result < 0) { + lderr(cct) << "failed to block writes: " << cpp_strerror(*result) << dendl; + image_ctx.io_work_queue->unblock_writes(); + return this->create_context_finisher(*result); + } + + return send_append_op_event(); +} + +template <typename I> +Context *ResizeRequest<I>::send_append_op_event() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + + if (m_new_size < m_original_size && !m_allow_shrink) { + ldout(cct, 1) << "shrinking the image is not permitted" << dendl; + image_ctx.io_work_queue->unblock_writes(); + this->async_complete(-EINVAL); + return nullptr; + } + + if (m_disable_journal || !this->template append_op_event< + ResizeRequest<I>, &ResizeRequest<I>::handle_append_op_event>(this)) { + return send_grow_object_map(); + } + + ldout(cct, 5) << dendl; + return nullptr; +} + +template <typename I> +Context *ResizeRequest<I>::handle_append_op_event(int *result) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << "r=" << *result << dendl; + + if (*result < 0) { + lderr(cct) << "failed to commit journal entry: " << cpp_strerror(*result) + << dendl; + image_ctx.io_work_queue->unblock_writes(); + return this->create_context_finisher(*result); + } + + return send_grow_object_map(); +} + +template <typename I> +void ResizeRequest<I>::send_trim_image() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << dendl; + + RWLock::RLocker owner_locker(image_ctx.owner_lock); + TrimRequest<I> *req = TrimRequest<I>::create( + image_ctx, create_context_callback< + ResizeRequest<I>, &ResizeRequest<I>::handle_trim_image>(this), + m_original_size, m_new_size, m_prog_ctx); + req->send(); +} + +template <typename I> +Context *ResizeRequest<I>::handle_trim_image(int *result) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << "r=" << *result << dendl; + + if (*result == -ERESTART) { + ldout(cct, 5) << "resize operation interrupted" << dendl; + return this->create_context_finisher(*result); + } else if (*result < 0) { + lderr(cct) << "failed to trim image: " << cpp_strerror(*result) << dendl; + return this->create_context_finisher(*result); + } + + send_post_block_writes(); + return nullptr; +} + +template <typename I> +void ResizeRequest<I>::send_flush_cache() { + I &image_ctx = this->m_image_ctx; + + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << dendl; + + RWLock::RLocker owner_locker(image_ctx.owner_lock); + auto ctx = create_context_callback< + ResizeRequest<I>, &ResizeRequest<I>::handle_flush_cache>(this); + auto aio_comp = io::AioCompletion::create_and_start( + ctx, util::get_image_ctx(&image_ctx), io::AIO_TYPE_FLUSH); + auto req = io::ImageDispatchSpec<I>::create_flush_request( + image_ctx, aio_comp, io::FLUSH_SOURCE_INTERNAL, {}); + req->send(); + delete req; +} + +template <typename I> +Context *ResizeRequest<I>::handle_flush_cache(int *result) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << "r=" << *result << dendl; + + if (*result < 0) { + lderr(cct) << "failed to flush cache: " << cpp_strerror(*result) << dendl; + return this->create_context_finisher(*result); + } + + send_invalidate_cache(); + return nullptr; +} + +template <typename I> +void ResizeRequest<I>::send_invalidate_cache() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << dendl; + + // need to invalidate since we're deleting objects, and + // ObjectCacher doesn't track non-existent objects + RWLock::RLocker owner_locker(image_ctx.owner_lock); + image_ctx.io_object_dispatcher->invalidate_cache(create_context_callback< + ResizeRequest<I>, &ResizeRequest<I>::handle_invalidate_cache>(this)); +} + +template <typename I> +Context *ResizeRequest<I>::handle_invalidate_cache(int *result) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << "r=" << *result << dendl; + + // ignore busy error -- writeback was successfully flushed so we might be + // wasting some cache space for trimmed objects, but they will get purged + // eventually. Most likely cause of the issue was a in-flight cache read + if (*result < 0 && *result != -EBUSY) { + lderr(cct) << "failed to invalidate cache: " << cpp_strerror(*result) + << dendl; + return this->create_context_finisher(*result); + } + + send_trim_image(); + return nullptr; +} + +template <typename I> +Context *ResizeRequest<I>::send_grow_object_map() { + I &image_ctx = this->m_image_ctx; + + { + RWLock::WLocker snap_locker(image_ctx.snap_lock); + m_shrink_size_visible = true; + } + + if (m_original_size == m_new_size) { + image_ctx.io_work_queue->unblock_writes(); + return this->create_context_finisher(0); + } else if (m_new_size < m_original_size) { + image_ctx.io_work_queue->unblock_writes(); + send_flush_cache(); + return nullptr; + } + + image_ctx.owner_lock.get_read(); + image_ctx.snap_lock.get_read(); + if (image_ctx.object_map == nullptr) { + image_ctx.snap_lock.put_read(); + image_ctx.owner_lock.put_read(); + + // IO is still blocked + send_update_header(); + return nullptr; + } + + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << dendl; + + // should have been canceled prior to releasing lock + ceph_assert(image_ctx.exclusive_lock == nullptr || + image_ctx.exclusive_lock->is_lock_owner()); + + image_ctx.object_map->aio_resize( + m_new_size, OBJECT_NONEXISTENT, create_context_callback< + ResizeRequest<I>, &ResizeRequest<I>::handle_grow_object_map>(this)); + image_ctx.snap_lock.put_read(); + image_ctx.owner_lock.put_read(); + return nullptr; +} + +template <typename I> +Context *ResizeRequest<I>::handle_grow_object_map(int *result) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << "r=" << *result << dendl; + + if (*result < 0) { + lderr(cct) << "failed to resize object map: " + << cpp_strerror(*result) << dendl; + image_ctx.io_work_queue->unblock_writes(); + return this->create_context_finisher(*result); + } + + // IO is still blocked + send_update_header(); + return nullptr; +} + +template <typename I> +Context *ResizeRequest<I>::send_shrink_object_map() { + I &image_ctx = this->m_image_ctx; + + image_ctx.owner_lock.get_read(); + image_ctx.snap_lock.get_read(); + if (image_ctx.object_map == nullptr || m_new_size > m_original_size) { + image_ctx.snap_lock.put_read(); + image_ctx.owner_lock.put_read(); + + update_size_and_overlap(); + return this->create_context_finisher(0); + } + + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << "original_size=" << m_original_size << ", " + << "new_size=" << m_new_size << dendl; + + // should have been canceled prior to releasing lock + ceph_assert(image_ctx.exclusive_lock == nullptr || + image_ctx.exclusive_lock->is_lock_owner()); + + image_ctx.object_map->aio_resize( + m_new_size, OBJECT_NONEXISTENT, create_context_callback< + ResizeRequest<I>, &ResizeRequest<I>::handle_shrink_object_map>(this)); + image_ctx.snap_lock.put_read(); + image_ctx.owner_lock.put_read(); + return nullptr; +} + +template <typename I> +Context *ResizeRequest<I>::handle_shrink_object_map(int *result) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << "r=" << *result << dendl; + + if (*result < 0) { + lderr(cct) << "failed to resize object map: " + << cpp_strerror(*result) << dendl; + image_ctx.io_work_queue->unblock_writes(); + return this->create_context_finisher(*result); + } + + update_size_and_overlap(); + return this->create_context_finisher(0); +} + +template <typename I> +void ResizeRequest<I>::send_post_block_writes() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << dendl; + + RWLock::RLocker owner_locker(image_ctx.owner_lock); + image_ctx.io_work_queue->block_writes(create_context_callback< + ResizeRequest<I>, &ResizeRequest<I>::handle_post_block_writes>(this)); +} + +template <typename I> +Context *ResizeRequest<I>::handle_post_block_writes(int *result) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << "r=" << *result << dendl; + + if (*result < 0) { + image_ctx.io_work_queue->unblock_writes(); + lderr(cct) << "failed to block writes prior to header update: " + << cpp_strerror(*result) << dendl; + return this->create_context_finisher(*result); + } + + send_update_header(); + return nullptr; +} + +template <typename I> +void ResizeRequest<I>::send_update_header() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << "original_size=" << m_original_size << ", " + << "new_size=" << m_new_size << dendl;; + + // should have been canceled prior to releasing lock + RWLock::RLocker owner_locker(image_ctx.owner_lock); + ceph_assert(image_ctx.exclusive_lock == nullptr || + image_ctx.exclusive_lock->is_lock_owner()); + + librados::ObjectWriteOperation op; + if (image_ctx.old_format) { + // rewrite only the size field of the header + ceph_le64 new_size = init_le64(m_new_size); + bufferlist bl; + bl.append(reinterpret_cast<const char*>(&new_size), sizeof(new_size)); + op.write(offsetof(rbd_obj_header_ondisk, image_size), bl); + } else { + cls_client::set_size(&op, m_new_size); + } + + librados::AioCompletion *rados_completion = create_rados_callback< + ResizeRequest<I>, &ResizeRequest<I>::handle_update_header>(this); + int r = image_ctx.md_ctx.aio_operate(image_ctx.header_oid, + rados_completion, &op); + ceph_assert(r == 0); + rados_completion->release(); +} + +template <typename I> +Context *ResizeRequest<I>::handle_update_header(int *result) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << "r=" << *result << dendl; + + if (*result < 0) { + lderr(cct) << "failed to update image header: " << cpp_strerror(*result) + << dendl; + image_ctx.io_work_queue->unblock_writes(); + return this->create_context_finisher(*result); + } + + return send_shrink_object_map(); +} + +template <typename I> +void ResizeRequest<I>::compute_parent_overlap() { + I &image_ctx = this->m_image_ctx; + RWLock::RLocker l2(image_ctx.parent_lock); + if (image_ctx.parent == NULL) { + m_new_parent_overlap = 0; + } else { + m_new_parent_overlap = std::min(m_new_size, image_ctx.parent_md.overlap); + } +} + +template <typename I> +void ResizeRequest<I>::update_size_and_overlap() { + I &image_ctx = this->m_image_ctx; + { + RWLock::WLocker snap_locker(image_ctx.snap_lock); + image_ctx.size = m_new_size; + + RWLock::WLocker parent_locker(image_ctx.parent_lock); + if (image_ctx.parent != NULL && m_new_size < m_original_size) { + image_ctx.parent_md.overlap = m_new_parent_overlap; + } + } + + // blocked by PRE_BLOCK_WRITES (grow) or POST_BLOCK_WRITES (shrink) state + image_ctx.io_work_queue->unblock_writes(); +} + +} // namespace operation +} // namespace librbd + +template class librbd::operation::ResizeRequest<librbd::ImageCtx>; diff --git a/src/librbd/operation/ResizeRequest.h b/src/librbd/operation/ResizeRequest.h new file mode 100644 index 00000000..f5e2f807 --- /dev/null +++ b/src/librbd/operation/ResizeRequest.h @@ -0,0 +1,156 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +#ifndef CEPH_LIBRBD_OPERATION_RESIZE_REQUEST_H +#define CEPH_LIBRBD_OPERATION_RESIZE_REQUEST_H + +#include "librbd/operation/Request.h" +#include "include/xlist.h" + +namespace librbd +{ + +class ImageCtx; +class ProgressContext; + +namespace operation { + +template <typename ImageCtxT = ImageCtx> +class ResizeRequest : public Request<ImageCtxT> { +public: + static ResizeRequest *create(ImageCtxT &image_ctx, Context *on_finish, + uint64_t new_size, bool allow_shrink, + ProgressContext &prog_ctx, uint64_t journal_op_tid, + bool disable_journal) { + return new ResizeRequest(image_ctx, on_finish, new_size, allow_shrink, prog_ctx, + journal_op_tid, disable_journal); + } + + ResizeRequest(ImageCtxT &image_ctx, Context *on_finish, uint64_t new_size, + bool allow_shrink, ProgressContext &prog_ctx, uint64_t journal_op_tid, + bool disable_journal); + ~ResizeRequest() override; + + inline bool shrinking() const { + return (m_shrink_size_visible && m_new_size < m_original_size); + } + + inline uint64_t get_image_size() const { + return m_new_size; + } + + void send() override; + +protected: + void send_op() override; + bool should_complete(int r) override { + return true; + } + bool can_affect_io() const override { + return true; + } + journal::Event create_event(uint64_t op_tid) const override { + return journal::ResizeEvent(op_tid, m_new_size); + } + +private: + /** + * Resize goes through the following state machine to resize the image + * and update the object map: + * + * @verbatim + * + * <start> + * | + * v + * STATE_PRE_BLOCK_WRITES + * | + * v + * STATE_APPEND_OP_EVENT (skip if journaling + * | disabled) + * | + * | (grow) + * |\--------> STATE_GROW_OBJECT_MAP (skip if object map + * | | disabled) + * | v + * | STATE_UPDATE_HEADER ----------------------------\ + * | (unblock writes) | + * | | + * | (unblock writes) | + * | | + * | (shrink) | + * |\--------> STATE_FLUSH_CACHE | + * | | | + * | v | + * | STATE_INVALIDATE_CACHE | + * | | | + * | v | + * | STATE_TRIM_IMAGE | + * | | | + * | v | + * | STATE_POST_BLOCK_WRITES | + * | | | + * | v | + * | STATE_UPDATE_HEADER | + * | | | + * | v | + * | STATE_SHRINK_OBJECT_MAP (skip if object map | + * | | disabled) | + * | | (unblock writes) | + * | (no change) v | + * \------------> <finish> <-----------------------------------/ + * + * @endverbatim + * + * The _OBJECT_MAP states are skipped if the object map isn't enabled. + * The state machine will immediately transition to _FINISHED if there + * are no objects to trim. + */ + + uint64_t m_original_size; + uint64_t m_new_size; + bool m_allow_shrink = true; + ProgressContext &m_prog_ctx; + uint64_t m_new_parent_overlap; + bool m_shrink_size_visible = false; + bool m_disable_journal = false; + + typename xlist<ResizeRequest<ImageCtxT>*>::item m_xlist_item; + + void send_pre_block_writes(); + Context *handle_pre_block_writes(int *result); + + Context *send_append_op_event(); + Context *handle_append_op_event(int *result); + + void send_flush_cache(); + Context *handle_flush_cache(int *result); + + void send_invalidate_cache(); + Context *handle_invalidate_cache(int *result); + + void send_trim_image(); + Context *handle_trim_image(int *result); + + Context *send_grow_object_map(); + Context *handle_grow_object_map(int *result); + + Context *send_shrink_object_map(); + Context *handle_shrink_object_map(int *result); + + void send_post_block_writes(); + Context *handle_post_block_writes(int *result); + + void send_update_header(); + Context *handle_update_header(int *result); + + void compute_parent_overlap(); + void update_size_and_overlap(); + +}; + +} // namespace operation +} // namespace librbd + +extern template class librbd::operation::ResizeRequest<librbd::ImageCtx>; + +#endif // CEPH_LIBRBD_OPERATION_RESIZE_REQUEST_H diff --git a/src/librbd/operation/SnapshotCreateRequest.cc b/src/librbd/operation/SnapshotCreateRequest.cc new file mode 100644 index 00000000..66293609 --- /dev/null +++ b/src/librbd/operation/SnapshotCreateRequest.cc @@ -0,0 +1,342 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "cls/rbd/cls_rbd_types.h" +#include "librbd/operation/SnapshotCreateRequest.h" +#include "common/dout.h" +#include "common/errno.h" +#include "librbd/ExclusiveLock.h" +#include "librbd/ImageCtx.h" +#include "librbd/ObjectMap.h" +#include "librbd/Utils.h" +#include "librbd/io/ImageRequestWQ.h" + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::SnapshotCreateRequest: " + +namespace librbd { +namespace operation { + +using util::create_async_context_callback; +using util::create_context_callback; +using util::create_rados_callback; + +template <typename I> +SnapshotCreateRequest<I>::SnapshotCreateRequest(I &image_ctx, + Context *on_finish, + const cls::rbd::SnapshotNamespace &snap_namespace, + const std::string &snap_name, + uint64_t journal_op_tid, + bool skip_object_map) + : Request<I>(image_ctx, on_finish, journal_op_tid), + m_snap_namespace(snap_namespace), m_snap_name(snap_name), + m_skip_object_map(skip_object_map), m_ret_val(0), m_snap_id(CEPH_NOSNAP) { +} + +template <typename I> +void SnapshotCreateRequest<I>::send_op() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + + if (!image_ctx.data_ctx.is_valid()) { + lderr(cct) << "missing data pool" << dendl; + this->async_complete(-ENODEV); + return; + } + + send_suspend_requests(); +} + +template <typename I> +void SnapshotCreateRequest<I>::send_suspend_requests() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << dendl; + + // TODO suspend (shrink) resize to ensure consistent RBD mirror + send_suspend_aio(); +} + +template <typename I> +Context *SnapshotCreateRequest<I>::handle_suspend_requests(int *result) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl; + + // TODO + send_suspend_aio(); + return nullptr; +} + +template <typename I> +void SnapshotCreateRequest<I>::send_suspend_aio() { + I &image_ctx = this->m_image_ctx; + ceph_assert(image_ctx.owner_lock.is_locked()); + + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << dendl; + + image_ctx.io_work_queue->block_writes(create_context_callback< + SnapshotCreateRequest<I>, + &SnapshotCreateRequest<I>::handle_suspend_aio>(this)); +} + +template <typename I> +Context *SnapshotCreateRequest<I>::handle_suspend_aio(int *result) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl; + + if (*result < 0) { + lderr(cct) << "failed to block writes: " << cpp_strerror(*result) << dendl; + image_ctx.io_work_queue->unblock_writes(); + return this->create_context_finisher(*result); + } + + send_append_op_event(); + return nullptr; +} + +template <typename I> +void SnapshotCreateRequest<I>::send_append_op_event() { + I &image_ctx = this->m_image_ctx; + if (!this->template append_op_event< + SnapshotCreateRequest<I>, + &SnapshotCreateRequest<I>::handle_append_op_event>(this)) { + send_allocate_snap_id(); + return; + } + + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << dendl; +} + +template <typename I> +Context *SnapshotCreateRequest<I>::handle_append_op_event(int *result) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl; + + if (*result < 0) { + image_ctx.io_work_queue->unblock_writes(); + lderr(cct) << "failed to commit journal entry: " << cpp_strerror(*result) + << dendl; + return this->create_context_finisher(*result); + } + + send_allocate_snap_id(); + return nullptr; +} + +template <typename I> +void SnapshotCreateRequest<I>::send_allocate_snap_id() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << dendl; + + librados::AioCompletion *rados_completion = create_rados_callback< + SnapshotCreateRequest<I>, + &SnapshotCreateRequest<I>::handle_allocate_snap_id>(this); + image_ctx.data_ctx.aio_selfmanaged_snap_create(&m_snap_id, rados_completion); + rados_completion->release(); +} + +template <typename I> +Context *SnapshotCreateRequest<I>::handle_allocate_snap_id(int *result) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << ", " + << "snap_id=" << m_snap_id << dendl; + + if (*result < 0) { + save_result(result); + image_ctx.io_work_queue->unblock_writes(); + lderr(cct) << "failed to allocate snapshot id: " << cpp_strerror(*result) + << dendl; + return this->create_context_finisher(*result); + } + + send_create_snap(); + return nullptr; +} + +template <typename I> +void SnapshotCreateRequest<I>::send_create_snap() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << dendl; + + RWLock::RLocker owner_locker(image_ctx.owner_lock); + RWLock::RLocker snap_locker(image_ctx.snap_lock); + RWLock::RLocker parent_locker(image_ctx.parent_lock); + + // should have been canceled prior to releasing lock + ceph_assert(image_ctx.exclusive_lock == nullptr || + image_ctx.exclusive_lock->is_lock_owner()); + + // save current size / parent info for creating snapshot record in ImageCtx + m_size = image_ctx.size; + m_parent_info = image_ctx.parent_md; + + librados::ObjectWriteOperation op; + if (image_ctx.old_format) { + cls_client::old_snapshot_add(&op, m_snap_id, m_snap_name); + } else { + cls_client::snapshot_add(&op, m_snap_id, m_snap_name, m_snap_namespace); + } + + librados::AioCompletion *rados_completion = create_rados_callback< + SnapshotCreateRequest<I>, + &SnapshotCreateRequest<I>::handle_create_snap>(this); + int r = image_ctx.md_ctx.aio_operate(image_ctx.header_oid, + rados_completion, &op); + ceph_assert(r == 0); + rados_completion->release(); +} + +template <typename I> +Context *SnapshotCreateRequest<I>::handle_create_snap(int *result) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl; + + if (*result == -ESTALE) { + send_allocate_snap_id(); + return nullptr; + } else if (*result < 0) { + save_result(result); + send_release_snap_id(); + return nullptr; + } + + return send_create_object_map(); +} + +template <typename I> +Context *SnapshotCreateRequest<I>::send_create_object_map() { + I &image_ctx = this->m_image_ctx; + + image_ctx.snap_lock.get_read(); + if (image_ctx.object_map == nullptr || m_skip_object_map) { + image_ctx.snap_lock.put_read(); + + update_snap_context(); + image_ctx.io_work_queue->unblock_writes(); + return this->create_context_finisher(0); + } + + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << dendl; + + { + RWLock::RLocker object_map_lock(image_ctx.object_map_lock); + image_ctx.object_map->snapshot_add( + m_snap_id, create_context_callback< + SnapshotCreateRequest<I>, + &SnapshotCreateRequest<I>::handle_create_object_map>(this)); + } + image_ctx.snap_lock.put_read(); + return nullptr; +} + +template <typename I> +Context *SnapshotCreateRequest<I>::handle_create_object_map(int *result) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl; + + update_snap_context(); + image_ctx.io_work_queue->unblock_writes(); + if (*result < 0) { + lderr(cct) << this << " " << __func__ << ": failed to snapshot object map: " + << cpp_strerror(*result) << dendl; + return this->create_context_finisher(*result); + } + + return this->create_context_finisher(0); +} + +template <typename I> +void SnapshotCreateRequest<I>::send_release_snap_id() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << dendl; + + ceph_assert(m_snap_id != CEPH_NOSNAP); + + librados::AioCompletion *rados_completion = create_rados_callback< + SnapshotCreateRequest<I>, + &SnapshotCreateRequest<I>::handle_release_snap_id>(this); + image_ctx.data_ctx.aio_selfmanaged_snap_remove(m_snap_id, rados_completion); + rados_completion->release(); +} + +template <typename I> +Context *SnapshotCreateRequest<I>::handle_release_snap_id(int *result) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl; + + ceph_assert(m_ret_val < 0); + *result = m_ret_val; + + image_ctx.io_work_queue->unblock_writes(); + return this->create_context_finisher(m_ret_val); +} + +template <typename I> +void SnapshotCreateRequest<I>::update_snap_context() { + I &image_ctx = this->m_image_ctx; + + RWLock::RLocker owner_locker(image_ctx.owner_lock); + RWLock::WLocker snap_locker(image_ctx.snap_lock); + if (image_ctx.old_format) { + return; + } + + if (image_ctx.get_snap_info(m_snap_id) != NULL) { + return; + } + + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << dendl; + + // should have been canceled prior to releasing lock + ceph_assert(image_ctx.exclusive_lock == nullptr || + image_ctx.exclusive_lock->is_lock_owner()); + + // immediately add a reference to the new snapshot + utime_t snap_time = ceph_clock_now(); + image_ctx.add_snap(m_snap_namespace, m_snap_name, m_snap_id, m_size, + m_parent_info, RBD_PROTECTION_STATUS_UNPROTECTED, + 0, snap_time); + + // immediately start using the new snap context if we + // own the exclusive lock + std::vector<snapid_t> snaps; + snaps.push_back(m_snap_id); + snaps.insert(snaps.end(), image_ctx.snapc.snaps.begin(), + image_ctx.snapc.snaps.end()); + + image_ctx.snapc.seq = m_snap_id; + image_ctx.snapc.snaps.swap(snaps); + image_ctx.data_ctx.selfmanaged_snap_set_write_ctx( + image_ctx.snapc.seq, image_ctx.snaps); + + if (!image_ctx.migration_info.empty()) { + auto it = image_ctx.migration_info.snap_map.find(CEPH_NOSNAP); + ceph_assert(it != image_ctx.migration_info.snap_map.end()); + ceph_assert(!it->second.empty()); + if (it->second[0] == CEPH_NOSNAP) { + ldout(cct, 5) << this << " " << __func__ + << ": updating migration snap_map" << dendl; + it->second[0] = m_snap_id; + } + } +} + +} // namespace operation +} // namespace librbd + +template class librbd::operation::SnapshotCreateRequest<librbd::ImageCtx>; diff --git a/src/librbd/operation/SnapshotCreateRequest.h b/src/librbd/operation/SnapshotCreateRequest.h new file mode 100644 index 00000000..406d2f01 --- /dev/null +++ b/src/librbd/operation/SnapshotCreateRequest.h @@ -0,0 +1,126 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_OPERATION_SNAPSHOT_CREATE_REQUEST_H +#define CEPH_LIBRBD_OPERATION_SNAPSHOT_CREATE_REQUEST_H + +#include "cls/rbd/cls_rbd_types.h" +#include "librbd/Types.h" +#include "librbd/operation/Request.h" +#include <string> + +class Context; + +namespace librbd { + +class ImageCtx; + +namespace operation { + +template <typename ImageCtxT = ImageCtx> +class SnapshotCreateRequest : public Request<ImageCtxT> { +public: + /** + * Snap Create goes through the following state machine: + * + * @verbatim + * + * <start> + * | + * v + * STATE_SUSPEND_REQUESTS + * | + * v + * STATE_SUSPEND_AIO * * * * * * * * * * * * * + * | * + * v * + * STATE_APPEND_OP_EVENT (skip if journal * + * | disabled) * + * (retry) v * + * . . . > STATE_ALLOCATE_SNAP_ID * + * . | * + * . v * + * . . . . STATE_CREATE_SNAP * * * * * * * * * * * + * | * * + * v * * + * STATE_CREATE_OBJECT_MAP (skip if * * + * | disabled) * * + * | * * + * | v * + * | STATE_RELEASE_SNAP_ID * + * | | * + * | v * + * \----------------> <finish> < * * * * * + * + * @endverbatim + * + * The _CREATE_STATE state may repeat back to the _ALLOCATE_SNAP_ID state + * if a stale snapshot context is allocated. If the create operation needs + * to abort, the error path is followed to record the result in the journal + * (if enabled) and bubble the originating error code back to the client. + */ + SnapshotCreateRequest(ImageCtxT &image_ctx, Context *on_finish, + const cls::rbd::SnapshotNamespace &snap_namespace, + const std::string &snap_name, + uint64_t journal_op_tid, + bool skip_object_map); + +protected: + void send_op() override; + bool should_complete(int r) override { + return true; + } + bool can_affect_io() const override { + return true; + } + journal::Event create_event(uint64_t op_tid) const override { + return journal::SnapCreateEvent(op_tid, m_snap_namespace, m_snap_name); + } + +private: + cls::rbd::SnapshotNamespace m_snap_namespace; + std::string m_snap_name; + bool m_skip_object_map; + + int m_ret_val; + + uint64_t m_snap_id; + uint64_t m_size; + ParentImageInfo m_parent_info; + + void send_suspend_requests(); + Context *handle_suspend_requests(int *result); + + void send_suspend_aio(); + Context *handle_suspend_aio(int *result); + + void send_append_op_event(); + Context *handle_append_op_event(int *result); + + void send_allocate_snap_id(); + Context *handle_allocate_snap_id(int *result); + + void send_create_snap(); + Context *handle_create_snap(int *result); + + Context *send_create_object_map(); + Context *handle_create_object_map(int *result); + + void send_release_snap_id(); + Context *handle_release_snap_id(int *result); + + void update_snap_context(); + + void save_result(int *result) { + if (m_ret_val == 0 && *result < 0) { + m_ret_val = *result; + } + } +}; + +} // namespace operation +} // namespace librbd + +extern template class librbd::operation::SnapshotCreateRequest<librbd::ImageCtx>; + +#endif // CEPH_LIBRBD_OPERATION_SNAPSHOT_CREATE_REQUEST_H diff --git a/src/librbd/operation/SnapshotLimitRequest.cc b/src/librbd/operation/SnapshotLimitRequest.cc new file mode 100644 index 00000000..d47dc3a8 --- /dev/null +++ b/src/librbd/operation/SnapshotLimitRequest.cc @@ -0,0 +1,67 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/operation/SnapshotLimitRequest.h" +#include "common/dout.h" +#include "common/errno.h" +#include "librbd/ImageCtx.h" + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::SnapshotLimitRequest: " + +namespace librbd { +namespace operation { + +template <typename I> +SnapshotLimitRequest<I>::SnapshotLimitRequest(I &image_ctx, + Context *on_finish, + uint64_t limit) + : Request<I>(image_ctx, on_finish), m_snap_limit(limit) { +} + +template <typename I> +void SnapshotLimitRequest<I>::send_op() { + send_limit_snaps(); +} + +template <typename I> +bool SnapshotLimitRequest<I>::should_complete(int r) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << " r=" << r << dendl; + + if (r < 0) { + lderr(cct) << "encountered error: " << cpp_strerror(r) << dendl; + } + return true; +} + +template <typename I> +void SnapshotLimitRequest<I>::send_limit_snaps() { + I &image_ctx = this->m_image_ctx; + ceph_assert(image_ctx.owner_lock.is_locked()); + + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << dendl; + + { + RWLock::RLocker md_locker(image_ctx.md_lock); + RWLock::RLocker snap_locker(image_ctx.snap_lock); + + librados::ObjectWriteOperation op; + cls_client::snapshot_set_limit(&op, m_snap_limit); + + librados::AioCompletion *rados_completion = + this->create_callback_completion(); + int r = image_ctx.md_ctx.aio_operate(image_ctx.header_oid, rados_completion, + &op); + ceph_assert(r == 0); + rados_completion->release(); + } +} + +} // namespace operation +} // namespace librbd + +template class librbd::operation::SnapshotLimitRequest<librbd::ImageCtx>; diff --git a/src/librbd/operation/SnapshotLimitRequest.h b/src/librbd/operation/SnapshotLimitRequest.h new file mode 100644 index 00000000..09622a45 --- /dev/null +++ b/src/librbd/operation/SnapshotLimitRequest.h @@ -0,0 +1,44 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_OPERATION_SNAPSHOT_LIMIT_REQUEST_H +#define CEPH_LIBRBD_OPERATION_SNAPSHOT_LIMIT_REQUEST_H + +#include "librbd/operation/Request.h" +#include <iosfwd> +#include <string> + +class Context; + +namespace librbd { + +class ImageCtx; + +namespace operation { + +template <typename ImageCtxT = ImageCtx> +class SnapshotLimitRequest : public Request<ImageCtxT> { +public: + SnapshotLimitRequest(ImageCtxT &image_ctx, Context *on_finish, + uint64_t limit); + +protected: + void send_op() override; + bool should_complete(int r) override; + + journal::Event create_event(uint64_t op_tid) const override { + return journal::SnapLimitEvent(op_tid, m_snap_limit); + } + +private: + uint64_t m_snap_limit; + + void send_limit_snaps(); +}; + +} // namespace operation +} // namespace librbd + +extern template class librbd::operation::SnapshotLimitRequest<librbd::ImageCtx>; + +#endif // CEPH_LIBRBD_OPERATION_SNAPSHOT_LIMIT_REQUEST_H diff --git a/src/librbd/operation/SnapshotProtectRequest.cc b/src/librbd/operation/SnapshotProtectRequest.cc new file mode 100644 index 00000000..9a0375fc --- /dev/null +++ b/src/librbd/operation/SnapshotProtectRequest.cc @@ -0,0 +1,119 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/operation/SnapshotProtectRequest.h" +#include "common/dout.h" +#include "common/errno.h" +#include "librbd/ImageCtx.h" + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::SnapshotProtectRequest: " + +namespace librbd { +namespace operation { + +namespace { + +template <typename I> +std::ostream& operator<<(std::ostream& os, + const typename SnapshotProtectRequest<I>::State& state) { + switch(state) { + case SnapshotProtectRequest<I>::STATE_PROTECT_SNAP: + os << "PROTECT_SNAP"; + break; + } + return os; +} + +} // anonymous namespace + +template <typename I> +SnapshotProtectRequest<I>::SnapshotProtectRequest(I &image_ctx, + Context *on_finish, + const cls::rbd::SnapshotNamespace &snap_namespace, + const std::string &snap_name) + : Request<I>(image_ctx, on_finish), m_snap_namespace(snap_namespace), + m_snap_name(snap_name), m_state(STATE_PROTECT_SNAP) { +} + +template <typename I> +void SnapshotProtectRequest<I>::send_op() { + send_protect_snap(); +} + +template <typename I> +bool SnapshotProtectRequest<I>::should_complete(int r) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << ": state=" << m_state << ", " + << "r=" << r << dendl; + if (r < 0) { + if (r == -EBUSY) { + ldout(cct, 1) << "snapshot is already protected" << dendl; + } else { + lderr(cct) << "encountered error: " << cpp_strerror(r) << dendl; + } + } + return true; +} + +template <typename I> +void SnapshotProtectRequest<I>::send_protect_snap() { + I &image_ctx = this->m_image_ctx; + ceph_assert(image_ctx.owner_lock.is_locked()); + + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << dendl; + + int r = verify_and_send_protect_snap(); + if (r < 0) { + this->async_complete(r); + return; + } +} + +template <typename I> +int SnapshotProtectRequest<I>::verify_and_send_protect_snap() { + I &image_ctx = this->m_image_ctx; + RWLock::RLocker md_locker(image_ctx.md_lock); + RWLock::RLocker snap_locker(image_ctx.snap_lock); + + CephContext *cct = image_ctx.cct; + if ((image_ctx.features & RBD_FEATURE_LAYERING) == 0) { + lderr(cct) << "image must support layering" << dendl; + return -ENOSYS; + } + + uint64_t snap_id = image_ctx.get_snap_id(m_snap_namespace, m_snap_name); + if (snap_id == CEPH_NOSNAP) { + return -ENOENT; + } + + bool is_protected; + int r = image_ctx.is_snap_protected(snap_id, &is_protected); + if (r < 0) { + return r; + } + + if (is_protected) { + return -EBUSY; + } + + librados::ObjectWriteOperation op; + cls_client::set_protection_status(&op, snap_id, + RBD_PROTECTION_STATUS_PROTECTED); + + librados::AioCompletion *rados_completion = + this->create_callback_completion(); + r = image_ctx.md_ctx.aio_operate(image_ctx.header_oid, rados_completion, + &op); + ceph_assert(r == 0); + rados_completion->release(); + return 0; +} + +} // namespace operation +} // namespace librbd + +template class librbd::operation::SnapshotProtectRequest<librbd::ImageCtx>; diff --git a/src/librbd/operation/SnapshotProtectRequest.h b/src/librbd/operation/SnapshotProtectRequest.h new file mode 100644 index 00000000..bef80229 --- /dev/null +++ b/src/librbd/operation/SnapshotProtectRequest.h @@ -0,0 +1,68 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_OPERATION_SNAPSHOT_PROTECT_REQUEST_H +#define CEPH_LIBRBD_OPERATION_SNAPSHOT_PROTECT_REQUEST_H + +#include "librbd/operation/Request.h" +#include <string> + +class Context; + +namespace librbd { + +class ImageCtx; + +namespace operation { + +template <typename ImageCtxT = ImageCtx> +class SnapshotProtectRequest : public Request<ImageCtxT> { +public: + /** + * Snap Protect goes through the following state machine: + * + * @verbatim + * + * <start> + * | + * v + * STATE_PROTECT_SNAP + * | + * v + * <finish> + * + * @endverbatim + * + */ + enum State { + STATE_PROTECT_SNAP + }; + + SnapshotProtectRequest(ImageCtxT &image_ctx, Context *on_finish, + const cls::rbd::SnapshotNamespace &snap_namespace, + const std::string &snap_name); + +protected: + void send_op() override; + bool should_complete(int r) override; + + journal::Event create_event(uint64_t op_tid) const override { + return journal::SnapProtectEvent(op_tid, m_snap_namespace, m_snap_name); + } + +private: + cls::rbd::SnapshotNamespace m_snap_namespace; + std::string m_snap_name; + State m_state; + + void send_protect_snap(); + + int verify_and_send_protect_snap(); +}; + +} // namespace operation +} // namespace librbd + +extern template class librbd::operation::SnapshotProtectRequest<librbd::ImageCtx>; + +#endif // CEPH_LIBRBD_OPERATION_SNAPSHOT_PROTECT_REQUEST_H diff --git a/src/librbd/operation/SnapshotRemoveRequest.cc b/src/librbd/operation/SnapshotRemoveRequest.cc new file mode 100644 index 00000000..69fbc3e7 --- /dev/null +++ b/src/librbd/operation/SnapshotRemoveRequest.cc @@ -0,0 +1,469 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/operation/SnapshotRemoveRequest.h" +#include "common/dout.h" +#include "common/errno.h" +#include "cls/rbd/cls_rbd_client.h" +#include "librbd/ExclusiveLock.h" +#include "librbd/ImageCtx.h" +#include "librbd/ObjectMap.h" +#include "librbd/Utils.h" +#include "librbd/image/DetachChildRequest.h" + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::SnapshotRemoveRequest: " << this << " " \ + << __func__ << ": " + +namespace librbd { +namespace operation { + +using util::create_context_callback; +using util::create_rados_callback; + +template <typename I> +SnapshotRemoveRequest<I>::SnapshotRemoveRequest( + I &image_ctx, Context *on_finish, + const cls::rbd::SnapshotNamespace &snap_namespace, + const std::string &snap_name, uint64_t snap_id) + : Request<I>(image_ctx, on_finish), m_snap_namespace(snap_namespace), + m_snap_name(snap_name), m_snap_id(snap_id) { +} + +template <typename I> +void SnapshotRemoveRequest<I>::send_op() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + + ceph_assert(image_ctx.owner_lock.is_locked()); + { + RWLock::RLocker snap_locker(image_ctx.snap_lock); + RWLock::RLocker object_map_locker(image_ctx.object_map_lock); + if (image_ctx.snap_info.find(m_snap_id) == image_ctx.snap_info.end()) { + lderr(cct) << "snapshot doesn't exist" << dendl; + this->async_complete(-ENOENT); + return; + } + } + + trash_snap(); +} + +template <typename I> +bool SnapshotRemoveRequest<I>::should_complete(int r) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << "r=" << r << dendl; + if (r < 0) { + lderr(cct) << "encountered error: " << cpp_strerror(r) << dendl; + } + return true; +} + +template <typename I> +void SnapshotRemoveRequest<I>::trash_snap() { + I &image_ctx = this->m_image_ctx; + if (image_ctx.old_format) { + release_snap_id(); + return; + } else if (cls::rbd::get_snap_namespace_type(m_snap_namespace) == + cls::rbd::SNAPSHOT_NAMESPACE_TYPE_TRASH) { + get_snap(); + return; + } + + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << dendl; + + librados::ObjectWriteOperation op; + cls_client::snapshot_trash_add(&op, m_snap_id); + + auto aio_comp = create_rados_callback< + SnapshotRemoveRequest<I>, + &SnapshotRemoveRequest<I>::handle_trash_snap>(this); + int r = image_ctx.md_ctx.aio_operate(image_ctx.header_oid, aio_comp, &op); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void SnapshotRemoveRequest<I>::handle_trash_snap(int r) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << "r=" << r << dendl; + + if (r == -EOPNOTSUPP) { + // trash / clone v2 not supported + detach_child(); + return; + } else if (r < 0 && r != -EEXIST) { + lderr(cct) << "failed to move snapshot to trash: " << cpp_strerror(r) + << dendl; + this->complete(r); + return; + } + + m_trashed_snapshot = true; + get_snap(); +} + +template <typename I> +void SnapshotRemoveRequest<I>::get_snap() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << dendl; + + librados::ObjectReadOperation op; + cls_client::snapshot_get_start(&op, m_snap_id); + + auto aio_comp = create_rados_callback< + SnapshotRemoveRequest<I>, + &SnapshotRemoveRequest<I>::handle_get_snap>(this); + m_out_bl.clear(); + int r = image_ctx.md_ctx.aio_operate(image_ctx.header_oid, aio_comp, &op, + &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void SnapshotRemoveRequest<I>::handle_get_snap(int r) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << "r=" << r << dendl; + + if (r == 0) { + cls::rbd::SnapshotInfo snap_info; + + auto it = m_out_bl.cbegin(); + r = cls_client::snapshot_get_finish(&it, &snap_info); + m_child_attached = (snap_info.child_count > 0); + if (r == 0 && m_child_attached) { + list_children(); + return; + } + } + + if (r < 0) { + lderr(cct) << "failed to retrieve snapshot: " << cpp_strerror(r) + << dendl; + this->complete(r); + return; + } + + detach_child(); +} + +template <typename I> +void SnapshotRemoveRequest<I>::list_children() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << dendl; + + librados::ObjectReadOperation op; + cls_client::children_list_start(&op, m_snap_id); + + m_out_bl.clear(); + m_child_images.clear(); + auto aio_comp = create_rados_callback< + SnapshotRemoveRequest<I>, + &SnapshotRemoveRequest<I>::handle_list_children>(this); + int r = image_ctx.md_ctx.aio_operate(image_ctx.header_oid, aio_comp, &op, + &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void SnapshotRemoveRequest<I>::handle_list_children(int r) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << "r=" << r << dendl; + + if (r == 0) { + auto it = m_out_bl.cbegin(); + r = cls_client::children_list_finish(&it, &m_child_images); + } + + if (r < 0 && r != -ENOENT) { + lderr(cct) << "failed to retrieve child: " << cpp_strerror(r) + << dendl; + this->complete(r); + return; + } + + detach_stale_child(); +} + +template <typename I> +void SnapshotRemoveRequest<I>::detach_stale_child() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << dendl; + + for (auto& child_image : m_child_images) { + m_child_attached = true; + IoCtx ioctx; + int r = util::create_ioctx(image_ctx.md_ctx, "child image", + child_image.pool_id, + child_image.pool_namespace, &ioctx); + if (r == -ENOENT) { + librados::ObjectWriteOperation op; + cls_client::child_detach(&op, m_snap_id, + {child_image.pool_id, + child_image.pool_namespace, + child_image.image_id}); + auto aio_comp = create_rados_callback< + SnapshotRemoveRequest<I>, + &SnapshotRemoveRequest<I>::handle_detach_stale_child>(this); + r = image_ctx.md_ctx.aio_operate(image_ctx.header_oid, aio_comp, &op); + ceph_assert(r == 0); + aio_comp->release(); + return; + } else if (r < 0) { + this->async_complete(r); + return; + } + } + + detach_child(); +} + +template <typename I> +void SnapshotRemoveRequest<I>::handle_detach_stale_child(int r) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << "r=" << r << dendl; + + if (r < 0 && r != -ENOENT) { + lderr(cct) << "failed to detach stale child: " << cpp_strerror(r) + << dendl; + this->complete(r); + return; + } + + m_child_attached = false; + list_children(); +} + +template <typename I> +void SnapshotRemoveRequest<I>::detach_child() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + + bool detach_child = false; + { + RWLock::RLocker snap_locker(image_ctx.snap_lock); + RWLock::RLocker parent_locker(image_ctx.parent_lock); + + cls::rbd::ParentImageSpec our_pspec; + int r = image_ctx.get_parent_spec(m_snap_id, &our_pspec); + if (r < 0) { + if (r == -ENOENT) { + ldout(cct, 1) << "No such snapshot" << dendl; + } else { + lderr(cct) << "failed to retrieve parent spec" << dendl; + } + + this->async_complete(r); + return; + } + + if (image_ctx.parent_md.spec != our_pspec && + (scan_for_parents(our_pspec) == -ENOENT)) { + // no other references to the parent image + detach_child = true; + } + } + + if (!detach_child) { + // HEAD image or other snapshots still associated with parent + remove_object_map(); + return; + } + + ldout(cct, 5) << dendl; + auto ctx = create_context_callback< + SnapshotRemoveRequest<I>, + &SnapshotRemoveRequest<I>::handle_detach_child>(this); + auto req = image::DetachChildRequest<I>::create(image_ctx, ctx); + req->send(); +} + +template <typename I> +void SnapshotRemoveRequest<I>::handle_detach_child(int r) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << "r=" << r << dendl; + + if (r < 0 && r != -ENOENT) { + lderr(cct) << "failed to detach child from parent: " << cpp_strerror(r) + << dendl; + this->complete(r); + return; + } + + remove_object_map(); +} + +template <typename I> +void SnapshotRemoveRequest<I>::remove_object_map() { + I &image_ctx = this->m_image_ctx; + if (m_child_attached) { + // if a clone v2 child is attached to this snapshot, we cannot + // proceed. It's only an error if the snap was already in the trash + this->complete(m_trashed_snapshot ? 0 : -EBUSY); + return; + } + + CephContext *cct = image_ctx.cct; + + { + RWLock::RLocker owner_lock(image_ctx.owner_lock); + RWLock::WLocker snap_locker(image_ctx.snap_lock); + RWLock::RLocker object_map_locker(image_ctx.object_map_lock); + if (image_ctx.object_map != nullptr) { + ldout(cct, 5) << dendl; + + auto ctx = create_context_callback< + SnapshotRemoveRequest<I>, + &SnapshotRemoveRequest<I>::handle_remove_object_map>(this); + image_ctx.object_map->snapshot_remove(m_snap_id, ctx); + return; + } + } + + // object map disabled + release_snap_id(); +} + +template <typename I> +void SnapshotRemoveRequest<I>::handle_remove_object_map(int r) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << "r=" << r << dendl; + + if (r < 0) { + lderr(cct) << "failed to remove snapshot object map: " << cpp_strerror(r) + << dendl; + this->complete(r); + return; + } + + release_snap_id(); +} + +template <typename I> +void SnapshotRemoveRequest<I>::release_snap_id() { + I &image_ctx = this->m_image_ctx; + + if (!image_ctx.data_ctx.is_valid()) { + remove_snap(); + return; + } + + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << "snap_name=" << m_snap_name << ", " + << "snap_id=" << m_snap_id << dendl; + + auto aio_comp = create_rados_callback< + SnapshotRemoveRequest<I>, + &SnapshotRemoveRequest<I>::handle_release_snap_id>(this); + image_ctx.data_ctx.aio_selfmanaged_snap_remove(m_snap_id, aio_comp); + aio_comp->release(); +} + +template <typename I> +void SnapshotRemoveRequest<I>::handle_release_snap_id(int r) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << "r=" << r << dendl; + + if (r < 0 && r != -ENOENT) { + lderr(cct) << "failed to release snap id: " << cpp_strerror(r) << dendl; + this->complete(r); + return; + } + + remove_snap(); +} + +template <typename I> +void SnapshotRemoveRequest<I>::remove_snap() { + I &image_ctx = this->m_image_ctx; + + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << dendl; + + librados::ObjectWriteOperation op; + if (image_ctx.old_format) { + cls_client::old_snapshot_remove(&op, m_snap_name); + } else { + cls_client::snapshot_remove(&op, m_snap_id); + } + + auto aio_comp = create_rados_callback< + SnapshotRemoveRequest<I>, + &SnapshotRemoveRequest<I>::handle_remove_snap>(this); + int r = image_ctx.md_ctx.aio_operate(image_ctx.header_oid, aio_comp, &op); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void SnapshotRemoveRequest<I>::handle_remove_snap(int r) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << "r=" << r << dendl; + + if (r < 0) { + lderr(cct) << "failed to remove snapshot: " << cpp_strerror(r) << dendl; + this->complete(r); + return; + } + + remove_snap_context(); + this->complete(0); +} + +template <typename I> +void SnapshotRemoveRequest<I>::remove_snap_context() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << dendl; + + RWLock::WLocker snap_locker(image_ctx.snap_lock); + image_ctx.rm_snap(m_snap_namespace, m_snap_name, m_snap_id); +} + +template <typename I> +int SnapshotRemoveRequest<I>::scan_for_parents( + cls::rbd::ParentImageSpec &pspec) { + I &image_ctx = this->m_image_ctx; + ceph_assert(image_ctx.snap_lock.is_locked()); + ceph_assert(image_ctx.parent_lock.is_locked()); + + if (pspec.pool_id != -1) { + map<uint64_t, SnapInfo>::iterator it; + for (it = image_ctx.snap_info.begin(); + it != image_ctx.snap_info.end(); ++it) { + // skip our snap id (if checking base image, CEPH_NOSNAP won't match) + if (it->first == m_snap_id) { + continue; + } + if (it->second.parent.spec == pspec) { + break; + } + } + if (it == image_ctx.snap_info.end()) { + return -ENOENT; + } + } + return 0; +} + +} // namespace operation +} // namespace librbd + +template class librbd::operation::SnapshotRemoveRequest<librbd::ImageCtx>; diff --git a/src/librbd/operation/SnapshotRemoveRequest.h b/src/librbd/operation/SnapshotRemoveRequest.h new file mode 100644 index 00000000..a47bbc0a --- /dev/null +++ b/src/librbd/operation/SnapshotRemoveRequest.h @@ -0,0 +1,122 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_OPERATION_SNAPSHOT_REMOVE_REQUEST_H +#define CEPH_LIBRBD_OPERATION_SNAPSHOT_REMOVE_REQUEST_H + +#include "librbd/operation/Request.h" +#include "include/buffer.h" +#include "librbd/Types.h" +#include <string> + +class Context; + +namespace librbd { + +class ImageCtx; + +namespace operation { + +template <typename ImageCtxT = ImageCtx> +class SnapshotRemoveRequest : public Request<ImageCtxT> { +public: + /** + * @verbatim + * + * <start> + * | + * v + * TRASH_SNAP + * | + * v (skip if unsupported) + * GET_SNAP + * | + * v (skip if unnecessary) + * LIST_CHILDREN <-------------\ + * | | + * v (skip if unnecessary) | (repeat as needed) + * DETACH_STALE_CHILD ---------/ + * | + * v (skip if unnecessary) + * DETACH_CHILD + * | + * v (skip if disabled/in-use) + * REMOVE_OBJECT_MAP + * | + * v (skip if in-use) + * RELEASE_SNAP_ID + * | + * v (skip if in-use) + * REMOVE_SNAP + * | + * v + * <finish> + * + * @endverbatim + */ + + static SnapshotRemoveRequest *create( + ImageCtxT &image_ctx, const cls::rbd::SnapshotNamespace &snap_namespace, + const std::string &snap_name, uint64_t snap_id, Context *on_finish) { + return new SnapshotRemoveRequest(image_ctx, on_finish, snap_namespace, + snap_name, snap_id); + } + + SnapshotRemoveRequest(ImageCtxT &image_ctx, Context *on_finish, + const cls::rbd::SnapshotNamespace &snap_namespace, + const std::string &snap_name, + uint64_t snap_id); + +protected: + void send_op() override; + bool should_complete(int r) override; + + journal::Event create_event(uint64_t op_tid) const override { + return journal::SnapRemoveEvent(op_tid, m_snap_namespace, m_snap_name); + } + +private: + cls::rbd::SnapshotNamespace m_snap_namespace; + cls::rbd::ChildImageSpecs m_child_images; + std::string m_snap_name; + uint64_t m_snap_id; + bool m_trashed_snapshot = false; + bool m_child_attached = false; + + ceph::bufferlist m_out_bl; + + void trash_snap(); + void handle_trash_snap(int r); + + void get_snap(); + void handle_get_snap(int r); + + void list_children(); + void handle_list_children(int r); + + void detach_stale_child(); + void handle_detach_stale_child(int r); + + void detach_child(); + void handle_detach_child(int r); + + void remove_object_map(); + void handle_remove_object_map(int r); + + void release_snap_id(); + void handle_release_snap_id(int r); + + void remove_snap(); + void handle_remove_snap(int r); + + void remove_snap_context(); + int scan_for_parents(cls::rbd::ParentImageSpec &pspec); + +}; + +} // namespace operation +} // namespace librbd + +extern template class librbd::operation::SnapshotRemoveRequest<librbd::ImageCtx>; + +#endif // CEPH_LIBRBD_OPERATION_SNAPSHOT_REMOVE_REQUEST_H diff --git a/src/librbd/operation/SnapshotRenameRequest.cc b/src/librbd/operation/SnapshotRenameRequest.cc new file mode 100644 index 00000000..b25b991f --- /dev/null +++ b/src/librbd/operation/SnapshotRenameRequest.cc @@ -0,0 +1,104 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/operation/SnapshotRenameRequest.h" +#include "common/dout.h" +#include "common/errno.h" +#include "librbd/ExclusiveLock.h" +#include "librbd/ImageCtx.h" + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::SnapshotRenameRequest: " + +namespace librbd { +namespace operation { + +namespace { + +template <typename I> +std::ostream& operator<<(std::ostream& os, + const typename SnapshotRenameRequest<I>::State& state) { + switch(state) { + case SnapshotRenameRequest<I>::STATE_RENAME_SNAP: + os << "RENAME_SNAP"; + break; + } + return os; +} + +} // anonymous namespace + +template <typename I> +SnapshotRenameRequest<I>::SnapshotRenameRequest(I &image_ctx, + Context *on_finish, + uint64_t snap_id, + const std::string &snap_name) + : Request<I>(image_ctx, on_finish), m_snap_id(snap_id), + m_snap_name(snap_name), m_state(STATE_RENAME_SNAP) { +} + +template <typename I> +journal::Event SnapshotRenameRequest<I>::create_event(uint64_t op_tid) const { + I &image_ctx = this->m_image_ctx; + ceph_assert(image_ctx.snap_lock.is_locked()); + + std::string src_snap_name; + auto snap_info_it = image_ctx.snap_info.find(m_snap_id); + if (snap_info_it != image_ctx.snap_info.end()) { + src_snap_name = snap_info_it->second.name; + } + + return journal::SnapRenameEvent(op_tid, m_snap_id, src_snap_name, + m_snap_name); +} + +template <typename I> +void SnapshotRenameRequest<I>::send_op() { + send_rename_snap(); +} + +template <typename I> +bool SnapshotRenameRequest<I>::should_complete(int r) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << ": state=" << m_state << ", " + << "r=" << r << dendl; + if (r < 0) { + if (r == -EEXIST) { + ldout(cct, 1) << "snapshot already exists" << dendl; + } else { + lderr(cct) << "encountered error: " << cpp_strerror(r) << dendl; + } + } + return true; +} + +template <typename I> +void SnapshotRenameRequest<I>::send_rename_snap() { + I &image_ctx = this->m_image_ctx; + ceph_assert(image_ctx.owner_lock.is_locked()); + RWLock::RLocker md_locker(image_ctx.md_lock); + RWLock::RLocker snap_locker(image_ctx.snap_lock); + + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << dendl; + + librados::ObjectWriteOperation op; + if (image_ctx.old_format) { + cls_client::old_snapshot_rename(&op, m_snap_id, m_snap_name); + } else { + cls_client::snapshot_rename(&op, m_snap_id, m_snap_name); + } + + librados::AioCompletion *rados_completion = this->create_callback_completion(); + int r = image_ctx.md_ctx.aio_operate(image_ctx.header_oid, + rados_completion, &op); + ceph_assert(r == 0); + rados_completion->release(); +} + +} // namespace operation +} // namespace librbd + +template class librbd::operation::SnapshotRenameRequest<librbd::ImageCtx>; diff --git a/src/librbd/operation/SnapshotRenameRequest.h b/src/librbd/operation/SnapshotRenameRequest.h new file mode 100644 index 00000000..697772e0 --- /dev/null +++ b/src/librbd/operation/SnapshotRenameRequest.h @@ -0,0 +1,63 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_OPERATION_SNAPSHOT_RENAME_REQUEST_H +#define CEPH_LIBRBD_OPERATION_SNAPSHOT_RENAME_REQUEST_H + +#include "librbd/operation/Request.h" +#include <string> + +class Context; + +namespace librbd { + +class ImageCtx; + +namespace operation { + +template <typename ImageCtxT = ImageCtx> +class SnapshotRenameRequest : public Request<ImageCtxT> { +public: + /** + * Snap Rename goes through the following state machine: + * + * @verbatim + * + * <start> + * | + * v + * STATE_RENAME_SNAP + * | + * v + * <finish> + * + * @endverbatim + * + */ + enum State { + STATE_RENAME_SNAP + }; + + SnapshotRenameRequest(ImageCtxT &image_ctx, Context *on_finish, + uint64_t snap_id, const std::string &snap_name); + + journal::Event create_event(uint64_t op_tid) const override; + +protected: + void send_op() override; + bool should_complete(int r) override; + +private: + uint64_t m_snap_id; + std::string m_snap_name; + State m_state; + + void send_rename_snap(); +}; + +} // namespace operation +} // namespace librbd + +extern template class librbd::operation::SnapshotRenameRequest<librbd::ImageCtx>; + +#endif // CEPH_LIBRBD_OPERATION_SNAPSHOT_RENAME_REQUEST_H diff --git a/src/librbd/operation/SnapshotRollbackRequest.cc b/src/librbd/operation/SnapshotRollbackRequest.cc new file mode 100644 index 00000000..596570a3 --- /dev/null +++ b/src/librbd/operation/SnapshotRollbackRequest.cc @@ -0,0 +1,412 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/operation/SnapshotRollbackRequest.h" +#include "include/rados/librados.hpp" +#include "common/dout.h" +#include "common/errno.h" +#include "librbd/AsyncObjectThrottle.h" +#include "librbd/ImageCtx.h" +#include "librbd/ObjectMap.h" +#include "librbd/Utils.h" +#include "librbd/io/ImageRequestWQ.h" +#include "librbd/io/ObjectDispatcher.h" +#include "librbd/operation/ResizeRequest.h" +#include "osdc/Striper.h" +#include <boost/lambda/bind.hpp> +#include <boost/lambda/construct.hpp> + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::SnapshotRollbackRequest: " + +namespace librbd { +namespace operation { + +using util::create_context_callback; +using util::create_rados_callback; + +namespace { + +template <typename I> +class C_RollbackObject : public C_AsyncObjectThrottle<I> { +public: + C_RollbackObject(AsyncObjectThrottle<I> &throttle, I *image_ctx, + uint64_t snap_id, uint64_t object_num, + uint64_t head_num_objects, + decltype(I::object_map) snap_object_map) + : C_AsyncObjectThrottle<I>(throttle, *image_ctx), m_snap_id(snap_id), + m_object_num(object_num), m_head_num_objects(head_num_objects), + m_snap_object_map(snap_object_map) { + } + + int send() override { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 20) << "C_RollbackObject: " << __func__ << ": object_num=" + << m_object_num << dendl; + + { + RWLock::RLocker snap_locker(image_ctx.snap_lock); + if (m_object_num < m_head_num_objects && + m_snap_object_map != nullptr && + !image_ctx.object_map->object_may_exist(m_object_num) && + !m_snap_object_map->object_may_exist(m_object_num)) { + return 1; + } + } + + std::string oid = image_ctx.get_object_name(m_object_num); + + librados::ObjectWriteOperation op; + op.selfmanaged_snap_rollback(m_snap_id); + + librados::AioCompletion *rados_completion = + util::create_rados_callback(this); + image_ctx.data_ctx.aio_operate(oid, rados_completion, &op); + rados_completion->release(); + return 0; + } + +private: + uint64_t m_snap_id; + uint64_t m_object_num; + uint64_t m_head_num_objects; + decltype(I::object_map) m_snap_object_map; +}; + +} // anonymous namespace + +template <typename I> +SnapshotRollbackRequest<I>::SnapshotRollbackRequest(I &image_ctx, + Context *on_finish, + const cls::rbd::SnapshotNamespace &snap_namespace, + const std::string &snap_name, + uint64_t snap_id, + uint64_t snap_size, + ProgressContext &prog_ctx) + : Request<I>(image_ctx, on_finish), m_snap_namespace(snap_namespace), + m_snap_name(snap_name), m_snap_id(snap_id), + m_snap_size(snap_size), m_prog_ctx(prog_ctx), + m_object_map(nullptr), m_snap_object_map(nullptr) { +} + +template <typename I> +SnapshotRollbackRequest<I>::~SnapshotRollbackRequest() { + I &image_ctx = this->m_image_ctx; + if (m_blocking_writes) { + image_ctx.io_work_queue->unblock_writes(); + } + delete m_object_map; + delete m_snap_object_map; +} + +template <typename I> +void SnapshotRollbackRequest<I>::send_op() { + send_block_writes(); +} + +template <typename I> +void SnapshotRollbackRequest<I>::send_block_writes() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << dendl; + + m_blocking_writes = true; + image_ctx.io_work_queue->block_writes(create_context_callback< + SnapshotRollbackRequest<I>, + &SnapshotRollbackRequest<I>::handle_block_writes>(this)); +} + +template <typename I> +Context *SnapshotRollbackRequest<I>::handle_block_writes(int *result) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl; + + if (*result < 0) { + lderr(cct) << "failed to block writes: " << cpp_strerror(*result) << dendl; + return this->create_context_finisher(*result); + } + + send_resize_image(); + return nullptr; +} + +template <typename I> +void SnapshotRollbackRequest<I>::send_resize_image() { + I &image_ctx = this->m_image_ctx; + + uint64_t current_size; + { + RWLock::RLocker owner_locker(image_ctx.owner_lock); + RWLock::RLocker snap_locker(image_ctx.snap_lock); + current_size = image_ctx.get_image_size(CEPH_NOSNAP); + } + + m_head_num_objects = Striper::get_num_objects(image_ctx.layout, current_size); + + if (current_size == m_snap_size) { + send_get_snap_object_map(); + return; + } + + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << dendl; + + RWLock::RLocker owner_locker(image_ctx.owner_lock); + Context *ctx = create_context_callback< + SnapshotRollbackRequest<I>, + &SnapshotRollbackRequest<I>::handle_resize_image>(this); + ResizeRequest<I> *req = ResizeRequest<I>::create(image_ctx, ctx, m_snap_size, + true, m_no_op_prog_ctx, 0, true); + req->send(); +} + +template <typename I> +Context *SnapshotRollbackRequest<I>::handle_resize_image(int *result) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl; + + if (*result < 0) { + lderr(cct) << "failed to resize image for rollback: " + << cpp_strerror(*result) << dendl; + return this->create_context_finisher(*result); + } + + send_get_snap_object_map(); + return nullptr; +} + +template <typename I> +void SnapshotRollbackRequest<I>::send_get_snap_object_map() { + I &image_ctx = this->m_image_ctx; + + uint64_t flags = 0; + bool object_map_enabled; + CephContext *cct = image_ctx.cct; + { + RWLock::RLocker owner_locker(image_ctx.owner_lock); + RWLock::RLocker snap_locker(image_ctx.snap_lock); + object_map_enabled = (image_ctx.object_map != nullptr); + int r = image_ctx.get_flags(m_snap_id, &flags); + if (r < 0) { + object_map_enabled = false; + } + } + if (object_map_enabled && + (flags & RBD_FLAG_OBJECT_MAP_INVALID) != 0) { + lderr(cct) << "warning: object-map is invalid for snapshot" << dendl; + object_map_enabled = false; + } + if (!object_map_enabled) { + send_rollback_object_map(); + return; + } + + ldout(cct, 5) << this << " " << __func__ << dendl; + + m_snap_object_map = image_ctx.create_object_map(m_snap_id); + + Context *ctx = create_context_callback< + SnapshotRollbackRequest<I>, + &SnapshotRollbackRequest<I>::handle_get_snap_object_map>(this); + m_snap_object_map->open(ctx); + return; +} + +template <typename I> +Context *SnapshotRollbackRequest<I>::handle_get_snap_object_map(int *result) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl; + + if (*result < 0) { + lderr(cct) << this << " " << __func__ << ": failed to open object map: " + << cpp_strerror(*result) << dendl; + delete m_snap_object_map; + m_snap_object_map = nullptr; + } + + send_rollback_object_map(); + return nullptr; +} + +template <typename I> +void SnapshotRollbackRequest<I>::send_rollback_object_map() { + I &image_ctx = this->m_image_ctx; + + { + RWLock::RLocker owner_locker(image_ctx.owner_lock); + RWLock::RLocker snap_locker(image_ctx.snap_lock); + RWLock::WLocker object_map_lock(image_ctx.object_map_lock); + if (image_ctx.object_map != nullptr) { + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << dendl; + + Context *ctx = create_context_callback< + SnapshotRollbackRequest<I>, + &SnapshotRollbackRequest<I>::handle_rollback_object_map>(this); + image_ctx.object_map->rollback(m_snap_id, ctx); + return; + } + } + + send_rollback_objects(); +} + +template <typename I> +Context *SnapshotRollbackRequest<I>::handle_rollback_object_map(int *result) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl; + + if (*result < 0) { + lderr(cct) << this << " " << __func__ << ": failed to roll back object " + << "map: " << cpp_strerror(*result) << dendl; + + ceph_assert(m_object_map == nullptr); + apply(); + return this->create_context_finisher(*result); + } + + send_rollback_objects(); + return nullptr; +} + +template <typename I> +void SnapshotRollbackRequest<I>::send_rollback_objects() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << dendl; + + RWLock::RLocker owner_locker(image_ctx.owner_lock); + uint64_t num_objects; + { + RWLock::RLocker snap_locker(image_ctx.snap_lock); + num_objects = Striper::get_num_objects(image_ctx.layout, + image_ctx.get_current_size()); + } + + Context *ctx = create_context_callback< + SnapshotRollbackRequest<I>, + &SnapshotRollbackRequest<I>::handle_rollback_objects>(this); + typename AsyncObjectThrottle<I>::ContextFactory context_factory( + boost::lambda::bind(boost::lambda::new_ptr<C_RollbackObject<I> >(), + boost::lambda::_1, &image_ctx, m_snap_id, boost::lambda::_2, + m_head_num_objects, m_snap_object_map)); + AsyncObjectThrottle<I> *throttle = new AsyncObjectThrottle<I>( + this, image_ctx, context_factory, ctx, &m_prog_ctx, 0, num_objects); + throttle->start_ops( + image_ctx.config.template get_val<uint64_t>("rbd_concurrent_management_ops")); +} + +template <typename I> +Context *SnapshotRollbackRequest<I>::handle_rollback_objects(int *result) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl; + + if (*result == -ERESTART) { + ldout(cct, 5) << "snapshot rollback operation interrupted" << dendl; + return this->create_context_finisher(*result); + } else if (*result < 0) { + lderr(cct) << "failed to rollback objects: " << cpp_strerror(*result) + << dendl; + return this->create_context_finisher(*result); + } + + return send_refresh_object_map(); +} + +template <typename I> +Context *SnapshotRollbackRequest<I>::send_refresh_object_map() { + I &image_ctx = this->m_image_ctx; + + bool object_map_enabled; + { + RWLock::RLocker owner_locker(image_ctx.owner_lock); + RWLock::RLocker snap_locker(image_ctx.snap_lock); + object_map_enabled = (image_ctx.object_map != nullptr); + } + if (!object_map_enabled) { + return send_invalidate_cache(); + } + + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << dendl; + + m_object_map = image_ctx.create_object_map(CEPH_NOSNAP); + + Context *ctx = create_context_callback< + SnapshotRollbackRequest<I>, + &SnapshotRollbackRequest<I>::handle_refresh_object_map>(this); + m_object_map->open(ctx); + return nullptr; +} + +template <typename I> +Context *SnapshotRollbackRequest<I>::handle_refresh_object_map(int *result) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl; + + if (*result < 0) { + lderr(cct) << this << " " << __func__ << ": failed to open object map: " + << cpp_strerror(*result) << dendl; + delete m_object_map; + m_object_map = nullptr; + apply(); + + return this->create_context_finisher(*result); + } + + return send_invalidate_cache(); +} + +template <typename I> +Context *SnapshotRollbackRequest<I>::send_invalidate_cache() { + I &image_ctx = this->m_image_ctx; + + apply(); + + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << dendl; + + RWLock::RLocker owner_lock(image_ctx.owner_lock); + Context *ctx = create_context_callback< + SnapshotRollbackRequest<I>, + &SnapshotRollbackRequest<I>::handle_invalidate_cache>(this); + image_ctx.io_object_dispatcher->invalidate_cache(ctx); + return nullptr; +} + +template <typename I> +Context *SnapshotRollbackRequest<I>::handle_invalidate_cache(int *result) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl; + + if (*result < 0) { + lderr(cct) << "failed to invalidate cache: " << cpp_strerror(*result) + << dendl; + } + return this->create_context_finisher(*result); +} + +template <typename I> +void SnapshotRollbackRequest<I>::apply() { + I &image_ctx = this->m_image_ctx; + + RWLock::RLocker owner_locker(image_ctx.owner_lock); + RWLock::WLocker snap_locker(image_ctx.snap_lock); + if (image_ctx.object_map != nullptr) { + std::swap(m_object_map, image_ctx.object_map); + } +} + +} // namespace operation +} // namespace librbd + +template class librbd::operation::SnapshotRollbackRequest<librbd::ImageCtx>; diff --git a/src/librbd/operation/SnapshotRollbackRequest.h b/src/librbd/operation/SnapshotRollbackRequest.h new file mode 100644 index 00000000..e58a618f --- /dev/null +++ b/src/librbd/operation/SnapshotRollbackRequest.h @@ -0,0 +1,122 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_OPERATION_SNAPSHOT_ROLLBACK_REQUEST_H +#define CEPH_LIBRBD_OPERATION_SNAPSHOT_ROLLBACK_REQUEST_H + +#include "librbd/operation/Request.h" +#include "librbd/ImageCtx.h" +#include "librbd/internal.h" +#include "librbd/journal/Types.h" +#include <string> + +class Context; + +namespace librbd { + +class ProgressContext; + +namespace operation { + +template <typename ImageCtxT = ImageCtx> +class SnapshotRollbackRequest : public Request<ImageCtxT> { +public: + /** + * Snap Rollback goes through the following state machine: + * + * @verbatim + * + * <start> ---------\ + * | + * v + * STATE_BLOCK_WRITES + * | + * v + * STATE_RESIZE_IMAGE (skip if resize not + * | required) + * v + * STATE_GET_SNAP_OBJECT_MAP (skip if object) + * | map disabled) + * v + * STATE_ROLLBACK_OBJECT_MAP (skip if object + * | map disabled) + * v + * STATE_ROLLBACK_OBJECTS + * | + * v + * STATE_REFRESH_OBJECT_MAP (skip if object + * | map disabled) + * v + * STATE_INVALIDATE_CACHE (skip if cache + * | disabled) + * v + * <finish> + * + * @endverbatim + * + * The _RESIZE_IMAGE state is skipped if the image doesn't need to be resized. + * The _ROLLBACK_OBJECT_MAP state is skipped if the object map isn't enabled. + * The _INVALIDATE_CACHE state is skipped if the cache isn't enabled. + */ + + SnapshotRollbackRequest(ImageCtxT &image_ctx, Context *on_finish, + const cls::rbd::SnapshotNamespace &snap_namespace, + const std::string &snap_name, + uint64_t snap_id, + uint64_t snap_size, ProgressContext &prog_ctx); + ~SnapshotRollbackRequest() override; + +protected: + void send_op() override; + bool should_complete(int r) override { + return true; + } + + journal::Event create_event(uint64_t op_tid) const override { + return journal::SnapRollbackEvent(op_tid, m_snap_namespace, m_snap_name); + } + +private: + cls::rbd::SnapshotNamespace m_snap_namespace; + std::string m_snap_name; + uint64_t m_snap_id; + uint64_t m_snap_size; + uint64_t m_head_num_objects; + ProgressContext &m_prog_ctx; + + NoOpProgressContext m_no_op_prog_ctx; + + bool m_blocking_writes = false; + decltype(ImageCtxT::object_map) m_object_map; + decltype(ImageCtxT::object_map) m_snap_object_map; + + void send_block_writes(); + Context *handle_block_writes(int *result); + + void send_resize_image(); + Context *handle_resize_image(int *result); + + void send_get_snap_object_map(); + Context *handle_get_snap_object_map(int *result); + + void send_rollback_object_map(); + Context *handle_rollback_object_map(int *result); + + void send_rollback_objects(); + Context *handle_rollback_objects(int *result); + + Context *send_refresh_object_map(); + Context *handle_refresh_object_map(int *result); + + Context *send_invalidate_cache(); + Context *handle_invalidate_cache(int *result); + + void apply(); +}; + +} // namespace operation +} // namespace librbd + +extern template class librbd::operation::SnapshotRollbackRequest<librbd::ImageCtx>; + +#endif // CEPH_LIBRBD_OPERATION_SNAPSHOT_ROLLBACK_REQUEST_H diff --git a/src/librbd/operation/SnapshotUnprotectRequest.cc b/src/librbd/operation/SnapshotUnprotectRequest.cc new file mode 100644 index 00000000..4a58dbc0 --- /dev/null +++ b/src/librbd/operation/SnapshotUnprotectRequest.cc @@ -0,0 +1,354 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/operation/SnapshotUnprotectRequest.h" +#include "include/rados/librados.hpp" +#include "include/stringify.h" +#include "common/dout.h" +#include "common/errno.h" +#include "librbd/AsyncObjectThrottle.h" +#include "librbd/ImageCtx.h" +#include "librbd/internal.h" +#include "librbd/Types.h" +#include "librbd/Utils.h" +#include <list> +#include <set> +#include <vector> +#include <boost/lambda/bind.hpp> +#include <boost/lambda/construct.hpp> + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::SnapshotUnprotectRequest: " + +namespace librbd { +namespace operation { + +namespace { + +typedef std::pair<int64_t, std::string> Pool; +typedef std::vector<Pool> Pools; + +template <typename I> +std::ostream& operator<<(std::ostream& os, + const typename SnapshotUnprotectRequest<I>::State& state) { + switch(state) { + case SnapshotUnprotectRequest<I>::STATE_UNPROTECT_SNAP_START: + os << "UNPROTECT_SNAP_START"; + break; + case SnapshotUnprotectRequest<I>::STATE_SCAN_POOL_CHILDREN: + os << "SCAN_POOL_CHILDREN"; + break; + case SnapshotUnprotectRequest<I>::STATE_UNPROTECT_SNAP_FINISH: + os << "UNPROTECT_SNAP_FINISH"; + break; + case SnapshotUnprotectRequest<I>::STATE_UNPROTECT_SNAP_ROLLBACK: + os << "UNPROTECT_SNAP_ROLLBACK"; + break; + default: + os << "UNKNOWN (" << static_cast<uint32_t>(state) << ")"; + break; + } + return os; +} + +template <typename I> +class C_ScanPoolChildren : public C_AsyncObjectThrottle<I> { +public: + C_ScanPoolChildren(AsyncObjectThrottle<I> &throttle, I *image_ctx, + const cls::rbd::ParentImageSpec &pspec, const Pools &pools, + size_t pool_idx) + : C_AsyncObjectThrottle<I>(throttle, *image_ctx), m_pspec(pspec), + m_pool(pools[pool_idx]) { + } + + int send() override { + I &image_ctx = this->m_image_ctx; + ceph_assert(image_ctx.owner_lock.is_locked()); + + CephContext *cct = image_ctx.cct; + ldout(cct, 10) << this << " scanning pool '" << m_pool.second << "'" + << dendl; + + librados::Rados rados(image_ctx.md_ctx); + int64_t base_tier; + int r = rados.pool_get_base_tier(m_pool.first, &base_tier); + if (r == -ENOENT) { + ldout(cct, 1) << "pool '" << m_pool.second << "' no longer exists" + << dendl; + return 1; + } else if (r < 0) { + lderr(cct) << "error retrieving base tier for pool '" + << m_pool.second << "'" << dendl; + return r; + } + if (m_pool.first != base_tier) { + // pool is a cache; skip it + return 1; + } + + r = util::create_ioctx(image_ctx.md_ctx, "child image", m_pool.first, {}, + &m_pool_ioctx); + if (r == -ENOENT) { + return 1; + } else if (r < 0) { + return r; + } + + librados::ObjectReadOperation op; + cls_client::get_children_start(&op, m_pspec); + + librados::AioCompletion *rados_completion = + util::create_rados_callback(this); + r = m_pool_ioctx.aio_operate(RBD_CHILDREN, rados_completion, &op, + &m_children_bl); + ceph_assert(r == 0); + rados_completion->release(); + return 0; + } + +protected: + void finish(int r) override { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + + if (r == 0) { + auto it = m_children_bl.cbegin(); + r= cls_client::get_children_finish(&it, &m_children); + } + + ldout(cct, 10) << this << " retrieved children: r=" << r << dendl; + if (r == -ENOENT) { + // no children -- proceed with unprotect + r = 0; + } else if (r < 0) { + lderr(cct) << "cannot get children for pool '" << m_pool.second << "'" + << dendl; + } else { + lderr(cct) << "cannot unprotect: at least " << m_children.size() << " " + << "child(ren) [" << joinify(m_children.begin(), + m_children.end(), + std::string(",")) << "] " + << "in pool '" << m_pool.second << "'" << dendl; + r = -EBUSY; + } + C_AsyncObjectThrottle<I>::finish(r); + } + +private: + cls::rbd::ParentImageSpec m_pspec; + Pool m_pool; + + IoCtx m_pool_ioctx; + std::set<std::string> m_children; + bufferlist m_children_bl; +}; + +} // anonymous namespace + +template <typename I> +SnapshotUnprotectRequest<I>::SnapshotUnprotectRequest(I &image_ctx, + Context *on_finish, + const cls::rbd::SnapshotNamespace &snap_namespace, + const std::string &snap_name) + : Request<I>(image_ctx, on_finish), m_snap_namespace(snap_namespace), + m_snap_name(snap_name), m_state(STATE_UNPROTECT_SNAP_START), + m_ret_val(0), m_snap_id(CEPH_NOSNAP) { +} + +template <typename I> +void SnapshotUnprotectRequest<I>::send_op() { + send_unprotect_snap_start(); +} + +template <typename I> +bool SnapshotUnprotectRequest<I>::should_complete(int r) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << ": state=" << m_state << ", " + << "r=" << r << dendl; + if (r < 0) { + if (r == -EINVAL) { + ldout(cct, 1) << "snapshot is already unprotected" << dendl; + } else { + lderr(cct) << "encountered error: " << cpp_strerror(r) << dendl; + } + if (m_ret_val == 0) { + m_ret_val = r; + } + } + + // use a different state machine once an error is encountered + if (m_ret_val < 0) { + return should_complete_error(); + } + + RWLock::RLocker owner_lock(image_ctx.owner_lock); + bool finished = false; + switch (m_state) { + case STATE_UNPROTECT_SNAP_START: + send_scan_pool_children(); + break; + case STATE_SCAN_POOL_CHILDREN: + send_unprotect_snap_finish(); + break; + case STATE_UNPROTECT_SNAP_FINISH: + finished = true; + break; + default: + ceph_abort(); + break; + } + return finished; +} + +template <typename I> +bool SnapshotUnprotectRequest<I>::should_complete_error() { + I &image_ctx = this->m_image_ctx; + RWLock::RLocker owner_locker(image_ctx.owner_lock); + CephContext *cct = image_ctx.cct; + lderr(cct) << this << " " << __func__ << ": " + << "ret_val=" << m_ret_val << dendl; + + bool finished = true; + if (m_state == STATE_SCAN_POOL_CHILDREN || + m_state == STATE_UNPROTECT_SNAP_FINISH) { + send_unprotect_snap_rollback(); + finished = false; + } + return finished; +} + +template <typename I> +void SnapshotUnprotectRequest<I>::send_unprotect_snap_start() { + I &image_ctx = this->m_image_ctx; + ceph_assert(image_ctx.owner_lock.is_locked()); + + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << dendl; + + int r = verify_and_send_unprotect_snap_start(); + if (r < 0) { + this->async_complete(r); + return; + } +} + +template <typename I> +void SnapshotUnprotectRequest<I>::send_scan_pool_children() { + I &image_ctx = this->m_image_ctx; + ceph_assert(image_ctx.owner_lock.is_locked()); + + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << dendl; + m_state = STATE_SCAN_POOL_CHILDREN; + + // search all pools for children depending on this snapshot + // TODO add async version of wait_for_latest_osdmap + librados::Rados rados(image_ctx.md_ctx); + rados.wait_for_latest_osdmap(); + + // protect against pools being renamed/deleted + std::list<Pool> pool_list; + rados.pool_list2(pool_list); + + cls::rbd::ParentImageSpec pspec(image_ctx.md_ctx.get_id(), + image_ctx.md_ctx.get_namespace(), + image_ctx.id, m_snap_id); + Pools pools(pool_list.begin(), pool_list.end()); + + Context *ctx = this->create_callback_context(); + typename AsyncObjectThrottle<I>::ContextFactory context_factory( + boost::lambda::bind(boost::lambda::new_ptr<C_ScanPoolChildren<I> >(), + boost::lambda::_1, &image_ctx, pspec, pools, boost::lambda::_2)); + AsyncObjectThrottle<I> *throttle = new AsyncObjectThrottle<I>( + nullptr, image_ctx, context_factory, ctx, NULL, 0, pools.size()); + throttle->start_ops( + image_ctx.config.template get_val<uint64_t>("rbd_concurrent_management_ops")); +} + +template <typename I> +void SnapshotUnprotectRequest<I>::send_unprotect_snap_finish() { + I &image_ctx = this->m_image_ctx; + ceph_assert(image_ctx.owner_lock.is_locked()); + + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << dendl; + + m_state = STATE_UNPROTECT_SNAP_FINISH; + + librados::ObjectWriteOperation op; + cls_client::set_protection_status(&op, m_snap_id, + RBD_PROTECTION_STATUS_UNPROTECTED); + + librados::AioCompletion *comp = this->create_callback_completion(); + int r = image_ctx.md_ctx.aio_operate(image_ctx.header_oid, comp, &op); + ceph_assert(r == 0); + comp->release(); +} + +template <typename I> +void SnapshotUnprotectRequest<I>::send_unprotect_snap_rollback() { + I &image_ctx = this->m_image_ctx; + ceph_assert(image_ctx.owner_lock.is_locked()); + + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << dendl; + + m_state = STATE_UNPROTECT_SNAP_ROLLBACK; + + librados::ObjectWriteOperation op; + cls_client::set_protection_status(&op, m_snap_id, + RBD_PROTECTION_STATUS_PROTECTED); + + librados::AioCompletion *comp = this->create_callback_completion(); + int r = image_ctx.md_ctx.aio_operate(image_ctx.header_oid, comp, &op); + ceph_assert(r == 0); + comp->release(); +} + +template <typename I> +int SnapshotUnprotectRequest<I>::verify_and_send_unprotect_snap_start() { + I &image_ctx = this->m_image_ctx; + RWLock::RLocker md_locker(image_ctx.md_lock); + RWLock::RLocker snap_locker(image_ctx.snap_lock); + + CephContext *cct = image_ctx.cct; + if ((image_ctx.features & RBD_FEATURE_LAYERING) == 0) { + lderr(cct) << "image must support layering" << dendl; + return -ENOSYS; + } + + m_snap_id = image_ctx.get_snap_id(m_snap_namespace, m_snap_name); + if (m_snap_id == CEPH_NOSNAP) { + return -ENOENT; + } + + bool is_unprotected; + int r = image_ctx.is_snap_unprotected(m_snap_id, &is_unprotected); + if (r < 0) { + return r; + } + + if (is_unprotected) { + lderr(cct) << "snapshot is already unprotected" << dendl; + return -EINVAL; + } + + librados::ObjectWriteOperation op; + cls_client::set_protection_status(&op, m_snap_id, + RBD_PROTECTION_STATUS_UNPROTECTING); + + librados::AioCompletion *comp = this->create_callback_completion(); + r = image_ctx.md_ctx.aio_operate(image_ctx.header_oid, comp, &op); + ceph_assert(r == 0); + comp->release(); + + // TODO legacy code threw a notification post UNPROTECTING update -- required? + return 0; +} + +} // namespace operation +} // namespace librbd + +template class librbd::operation::SnapshotUnprotectRequest<librbd::ImageCtx>; diff --git a/src/librbd/operation/SnapshotUnprotectRequest.h b/src/librbd/operation/SnapshotUnprotectRequest.h new file mode 100644 index 00000000..19cc6d32 --- /dev/null +++ b/src/librbd/operation/SnapshotUnprotectRequest.h @@ -0,0 +1,94 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_OPERATION_SNAPSHOT_UNPROTECT_REQUEST_H +#define CEPH_LIBRBD_OPERATION_SNAPSHOT_UNPROTECT_REQUEST_H + +#include "librbd/operation/Request.h" +#include <string> + +class Context; + +namespace librbd { + +class ImageCtx; + +namespace operation { + +template <typename ImageCtxT = ImageCtx> +class SnapshotUnprotectRequest : public Request<ImageCtxT> { +public: + /** + * Snap Unprotect goes through the following state machine: + * + * @verbatim + * + * <start> + * | + * v + * STATE_UNPROTECT_SNAP_START + * | + * v + * STATE_SCAN_POOL_CHILDREN * * * * > STATE_UNPROTECT_SNAP_ROLLBACK + * | | + * v | + * STATE_UNPROTECT_SNAP_FINISH | + * | | + * v | + * <finish> <----------------------------/ + * + * @endverbatim + * + * If the unprotect operation needs to abort, the error path is followed + * to rollback the unprotect in-progress status on the image. + */ + enum State { + STATE_UNPROTECT_SNAP_START, + STATE_SCAN_POOL_CHILDREN, + STATE_UNPROTECT_SNAP_FINISH, + STATE_UNPROTECT_SNAP_ROLLBACK + }; + + SnapshotUnprotectRequest(ImageCtxT &image_ctx, Context *on_finish, + const cls::rbd::SnapshotNamespace &snap_namespace, + const std::string &snap_name); + +protected: + void send_op() override; + bool should_complete(int r) override; + + int filter_return_code(int r) const override { + if (m_ret_val < 0) { + return m_ret_val; + } + return 0; + } + + journal::Event create_event(uint64_t op_tid) const override { + return journal::SnapUnprotectEvent(op_tid, m_snap_namespace, m_snap_name); + } + +private: + cls::rbd::SnapshotNamespace m_snap_namespace; + std::string m_snap_name; + State m_state; + + int m_ret_val; + uint64_t m_snap_id; + + bool should_complete_error(); + + void send_unprotect_snap_start(); + void send_scan_pool_children(); + void send_unprotect_snap_finish(); + void send_unprotect_snap_rollback(); + + int verify_and_send_unprotect_snap_start(); +}; + +} // namespace operation +} // namespace librbd + +extern template class librbd::operation::SnapshotUnprotectRequest<librbd::ImageCtx>; + +#endif // CEPH_LIBRBD_OPERATION_SNAPSHOT_UNPROTECT_REQUEST_H diff --git a/src/librbd/operation/SparsifyRequest.cc b/src/librbd/operation/SparsifyRequest.cc new file mode 100644 index 00000000..53049f88 --- /dev/null +++ b/src/librbd/operation/SparsifyRequest.cc @@ -0,0 +1,519 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/operation/SparsifyRequest.h" +#include "cls/rbd/cls_rbd_client.h" +#include "common/dout.h" +#include "common/errno.h" +#include "include/err.h" +#include "librbd/AsyncObjectThrottle.h" +#include "librbd/ExclusiveLock.h" +#include "librbd/ImageCtx.h" +#include "librbd/Types.h" +#include "librbd/io/ObjectRequest.h" +#include "osdc/Striper.h" +#include <boost/lambda/bind.hpp> +#include <boost/lambda/construct.hpp> + +#define dout_subsys ceph_subsys_rbd + +namespace librbd { +namespace operation { + +namespace { + +bool may_be_trimmed(const std::map<uint64_t,uint64_t> &extent_map, + const bufferlist &bl, size_t sparse_size, + uint64_t *new_end_ptr) { + if (extent_map.empty()) { + *new_end_ptr = 0; + return true; + } + + uint64_t end = extent_map.rbegin()->first + extent_map.rbegin()->second; + uint64_t new_end = end; + uint64_t bl_off = bl.length(); + + for (auto it = extent_map.rbegin(); it != extent_map.rend(); it++) { + auto off = it->first; + auto len = it->second; + + new_end = p2roundup<uint64_t>(off + len, sparse_size); + + uint64_t extent_left = len; + uint64_t sub_len = len % sparse_size; + if (sub_len == 0) { + sub_len = sparse_size; + } + while (extent_left > 0) { + ceph_assert(bl_off >= sub_len); + bl_off -= sub_len; + bufferlist sub_bl; + sub_bl.substr_of(bl, bl_off, sub_len); + if (!sub_bl.is_zero()) { + break; + } + new_end -= sparse_size; + extent_left -= sub_len; + sub_len = sparse_size; + } + if (extent_left > 0) { + break; + } + } + + if (new_end < end) { + *new_end_ptr = new_end; + return true; + } + + return false; +} + +} // anonymous namespace + +using util::create_context_callback; +using util::create_rados_callback; + +#undef dout_prefix +#define dout_prefix *_dout << "librbd::operation::SparsifyObject: " << this \ + << " " << m_oid << " " << __func__ << ": " + +template <typename I> +class C_SparsifyObject : public C_AsyncObjectThrottle<I> { +public: + + /** + * @verbatim + * + * <start> + * | + * v (not supported) + * SPARSIFY * * * * * * * * * * * * > READ < * * * * * * * * * * (concurrent + * | | * update is + * | (object map disabled) | (can trim) * detected) + * |------------------------\ V * + * | | PRE UPDATE OBJECT MAP * + * | (object map enabled) | | (if needed) * + * v | V * + * PRE UPDATE OBJECT MAP | TRIM * * * * * * * * * * * + * | | | + * v | V + * CHECK EXISTS | POST UPDATE OBJECT MAP + * | | | (if needed) + * v | | + * POST UPDATE OBJECT MAP | | + * | | | + * v | | + * <finish> <------------------/<-------/ + * + * @endverbatim + * + */ + + C_SparsifyObject(AsyncObjectThrottle<I> &throttle, I *image_ctx, + uint64_t object_no, size_t sparse_size) + : C_AsyncObjectThrottle<I>(throttle, *image_ctx), m_cct(image_ctx->cct), + m_object_no(object_no), m_sparse_size(sparse_size), + m_oid(image_ctx->get_object_name(object_no)) { + } + + int send() override { + I &image_ctx = this->m_image_ctx; + ceph_assert(image_ctx.owner_lock.is_locked()); + + ldout(m_cct, 20) << dendl; + + if (!image_ctx.data_ctx.is_valid()) { + lderr(m_cct) << "missing data pool" << dendl; + return -ENODEV; + } + + if (image_ctx.exclusive_lock != nullptr && + !image_ctx.exclusive_lock->is_lock_owner()) { + ldout(m_cct, 1) << "lost exclusive lock during sparsify" << dendl; + return -ERESTART; + } + + { + RWLock::RLocker snap_locker(image_ctx.snap_lock); + if (image_ctx.object_map != nullptr && + !image_ctx.object_map->object_may_exist(m_object_no)) { + // can skip because the object does not exist + return 1; + } + + RWLock::RLocker parent_locker(image_ctx.parent_lock); + uint64_t overlap_objects = 0; + uint64_t overlap; + int r = image_ctx.get_parent_overlap(CEPH_NOSNAP, &overlap); + if (r == 0 && overlap > 0) { + overlap_objects = Striper::get_num_objects(image_ctx.layout, overlap); + } + m_remove_empty = (m_object_no >= overlap_objects); + } + + send_sparsify(); + return 0; + } + + void send_sparsify() { + I &image_ctx = this->m_image_ctx; + ldout(m_cct, 20) << dendl; + + librados::ObjectWriteOperation op; + cls_client::sparsify(&op, m_sparse_size, m_remove_empty); + auto comp = create_rados_callback< + C_SparsifyObject, &C_SparsifyObject::handle_sparsify>(this); + int r = image_ctx.data_ctx.aio_operate(m_oid, comp, &op); + ceph_assert(r == 0); + comp->release(); + } + + void handle_sparsify(int r) { + ldout(m_cct, 20) << "r=" << r << dendl; + + if (r == -EOPNOTSUPP) { + m_trying_trim = true; + send_read(); + return; + } + + if (r == -ENOENT) { + finish_op(0); + return; + } + + if (r < 0) { + lderr(m_cct) << "failed to sparsify: " << cpp_strerror(r) << dendl; + finish_op(r); + return; + } + + send_pre_update_object_map(); + } + + void send_pre_update_object_map() { + I &image_ctx = this->m_image_ctx; + + if (m_trying_trim) { + if (!m_remove_empty || m_new_end != 0 || + !image_ctx.test_features(RBD_FEATURE_OBJECT_MAP)) { + send_trim(); + return; + } + } else if (!m_remove_empty || + !image_ctx.test_features(RBD_FEATURE_OBJECT_MAP)) { + finish_op(0); + return; + } + + ldout(m_cct, 20) << dendl; + + image_ctx.owner_lock.get_read(); + image_ctx.snap_lock.get_read(); + if (image_ctx.object_map == nullptr) { + // possible that exclusive lock was lost in background + lderr(m_cct) << "object map is not initialized" << dendl; + + image_ctx.snap_lock.put_read(); + image_ctx.owner_lock.put_read(); + finish_op(-EINVAL); + return; + } + + int r; + m_finish_op_ctx = image_ctx.exclusive_lock->start_op(&r); + if (m_finish_op_ctx == nullptr) { + lderr(m_cct) << "lost exclusive lock" << dendl; + image_ctx.snap_lock.put_read(); + image_ctx.owner_lock.put_read(); + finish_op(r); + return; + } + + auto ctx = create_context_callback< + C_SparsifyObject<I>, + &C_SparsifyObject<I>::handle_pre_update_object_map>(this); + + image_ctx.object_map_lock.get_write(); + bool sent = image_ctx.object_map->template aio_update< + Context, &Context::complete>(CEPH_NOSNAP, m_object_no, OBJECT_PENDING, + OBJECT_EXISTS, {}, false, ctx); + + // NOTE: state machine might complete before we reach here + image_ctx.object_map_lock.put_write(); + image_ctx.snap_lock.put_read(); + image_ctx.owner_lock.put_read(); + if (!sent) { + finish_op(0); + } + } + + void handle_pre_update_object_map(int r) { + ldout(m_cct, 20) << "r=" << r << dendl; + + if (r < 0) { + lderr(m_cct) << "failed to update object map: " << cpp_strerror(r) + << dendl; + finish_op(r); + return; + } + + if (m_trying_trim) { + send_trim(); + } else { + send_check_exists(); + } + } + + void send_check_exists() { + I &image_ctx = this->m_image_ctx; + + ldout(m_cct, 20) << dendl; + + librados::ObjectReadOperation op; + op.stat(NULL, NULL, NULL); + m_bl.clear(); + auto comp = create_rados_callback< + C_SparsifyObject, &C_SparsifyObject::handle_check_exists>(this); + int r = image_ctx.data_ctx.aio_operate(m_oid, comp, &op, &m_bl); + ceph_assert(r == 0); + comp->release(); + } + + void handle_check_exists(int r) { + ldout(m_cct, 20) << "r=" << r << dendl; + + if (r < 0 && r != -ENOENT) { + lderr(m_cct) << "stat failed: " << cpp_strerror(r) << dendl; + finish_op(r); + return; + } + + send_post_update_object_map(r == 0); + } + + void send_post_update_object_map(bool exists) { + I &image_ctx = this->m_image_ctx; + + ldout(m_cct, 20) << dendl; + + auto ctx = create_context_callback< + C_SparsifyObject<I>, + &C_SparsifyObject<I>::handle_post_update_object_map>(this); + bool sent; + { + RWLock::RLocker owner_locker(image_ctx.owner_lock); + RWLock::RLocker snap_locker(image_ctx.snap_lock); + + assert(image_ctx.exclusive_lock->is_lock_owner()); + assert(image_ctx.object_map != nullptr); + + RWLock::WLocker object_map_locker(image_ctx.object_map_lock); + + sent = image_ctx.object_map->template aio_update< + Context, &Context::complete>(CEPH_NOSNAP, m_object_no, + exists ? OBJECT_EXISTS : OBJECT_NONEXISTENT, + OBJECT_PENDING, {}, false, ctx); + } + if (!sent) { + ctx->complete(0); + } + } + + void handle_post_update_object_map(int r) { + ldout(m_cct, 20) << "r=" << r << dendl; + + if (r < 0) { + lderr(m_cct) << "failed to update object map: " << cpp_strerror(r) + << dendl; + finish_op(r); + return; + } + + finish_op(0); + } + + void send_read() { + I &image_ctx = this->m_image_ctx; + + ldout(m_cct, 20) << dendl; + + librados::ObjectReadOperation op; + m_bl.clear(); + op.sparse_read(0, image_ctx.layout.object_size, &m_extent_map, &m_bl, + nullptr); + auto comp = create_rados_callback< + C_SparsifyObject, &C_SparsifyObject::handle_read>(this); + int r = image_ctx.data_ctx.aio_operate(m_oid, comp, &op, &m_bl); + ceph_assert(r == 0); + comp->release(); + } + + void handle_read(int r) { + ldout(m_cct, 20) << "r=" << r << dendl; + + if (r < 0) { + if (r == -ENOENT) { + r = 0; + } else { + lderr(m_cct) << "failed to read object: " << cpp_strerror(r) << dendl; + } + finish_op(r); + return; + } + + if (!may_be_trimmed(m_extent_map, m_bl, m_sparse_size, &m_new_end)) { + finish_op(0); + return; + } + + send_pre_update_object_map(); + } + + void send_trim() { + I &image_ctx = this->m_image_ctx; + + ldout(m_cct, 20) << dendl; + + ceph_assert(m_new_end < image_ctx.layout.object_size); + + librados::ObjectWriteOperation op; + m_bl.clear(); + m_bl.append_zero(image_ctx.layout.object_size - m_new_end); + op.cmpext(m_new_end, m_bl, nullptr); + if (m_new_end == 0 && m_remove_empty) { + op.remove(); + } else { + op.truncate(m_new_end); + } + + auto comp = create_rados_callback< + C_SparsifyObject, &C_SparsifyObject::handle_trim>(this); + int r = image_ctx.data_ctx.aio_operate(m_oid, comp, &op); + ceph_assert(r == 0); + comp->release(); + } + + void handle_trim(int r) { + I &image_ctx = this->m_image_ctx; + + ldout(m_cct, 20) << "r=" << r << dendl; + + if (r <= -MAX_ERRNO) { + m_finish_op_ctx->complete(0); + m_finish_op_ctx = nullptr; + send_read(); + return; + } + + if (r < 0 && r != -ENOENT) { + lderr(m_cct) << "failed to trim: " << cpp_strerror(r) << dendl; + finish_op(r); + return; + } + + if (!m_remove_empty || m_new_end != 0 || + !image_ctx.test_features(RBD_FEATURE_OBJECT_MAP)) { + finish_op(0); + return; + } + + send_post_update_object_map(false); + } + + void finish_op(int r) { + ldout(m_cct, 20) << "r=" << r << dendl; + + if (m_finish_op_ctx != nullptr) { + m_finish_op_ctx->complete(0); + } + this->complete(r); + } + +private: + CephContext *m_cct; + uint64_t m_object_no; + size_t m_sparse_size; + std::string m_oid; + + bool m_remove_empty = false; + bool m_trying_trim = false; + bufferlist m_bl; + std::map<uint64_t,uint64_t> m_extent_map; + uint64_t m_new_end = 0; + Context *m_finish_op_ctx = nullptr; +}; + +#undef dout_prefix +#define dout_prefix *_dout << "librbd::operation::SparsifyRequest: " << this \ + << " " << __func__ << ": " + +template <typename I> +bool SparsifyRequest<I>::should_complete(int r) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << "r=" << r << dendl; + if (r < 0) { + lderr(cct) << "encountered error: " << cpp_strerror(r) << dendl; + } + return true; +} + +template <typename I> +void SparsifyRequest<I>::send_op() { + sparsify_objects(); +} + +template <typename I> +void SparsifyRequest<I>::sparsify_objects() { + I &image_ctx = this->m_image_ctx; + ceph_assert(image_ctx.owner_lock.is_locked()); + + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << dendl; + + assert(image_ctx.owner_lock.is_locked()); + + uint64_t objects = 0; + { + RWLock::RLocker snap_locker(image_ctx.snap_lock); + objects = image_ctx.get_object_count(CEPH_NOSNAP); + } + + auto ctx = create_context_callback< + SparsifyRequest<I>, + &SparsifyRequest<I>::handle_sparsify_objects>(this); + typename AsyncObjectThrottle<I>::ContextFactory context_factory( + boost::lambda::bind(boost::lambda::new_ptr<C_SparsifyObject<I> >(), + boost::lambda::_1, &image_ctx, boost::lambda::_2, m_sparse_size)); + AsyncObjectThrottle<I> *throttle = new AsyncObjectThrottle<I>( + this, image_ctx, context_factory, ctx, &m_prog_ctx, 0, objects); + throttle->start_ops( + image_ctx.config.template get_val<uint64_t>("rbd_concurrent_management_ops")); +} + +template <typename I> +void SparsifyRequest<I>::handle_sparsify_objects(int r) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << "r=" << r << dendl; + + if (r == -ERESTART) { + ldout(cct, 5) << "sparsify operation interrupted" << dendl; + this->complete(r); + return; + } else if (r < 0) { + lderr(cct) << "sparsify encountered an error: " << cpp_strerror(r) << dendl; + this->complete(r); + return; + } + + this->complete(0); +} + +} // namespace operation +} // namespace librbd + +template class librbd::operation::SparsifyRequest<librbd::ImageCtx>; diff --git a/src/librbd/operation/SparsifyRequest.h b/src/librbd/operation/SparsifyRequest.h new file mode 100644 index 00000000..74f9eb72 --- /dev/null +++ b/src/librbd/operation/SparsifyRequest.h @@ -0,0 +1,64 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +#ifndef CEPH_LIBRBD_OPERATION_SPARSIFY_REQUEST_H +#define CEPH_LIBRBD_OPERATION_SPARSIFY_REQUEST_H + +#include "librbd/operation/Request.h" +#include "common/snap_types.h" + +namespace librbd { + +class ImageCtx; +class ProgressContext; + +namespace operation { + +template <typename ImageCtxT = ImageCtx> +class SparsifyRequest : public Request<ImageCtxT> +{ +public: + SparsifyRequest(ImageCtxT &image_ctx, size_t sparse_size, Context *on_finish, + ProgressContext &prog_ctx) + : Request<ImageCtxT>(image_ctx, on_finish), m_sparse_size(sparse_size), + m_prog_ctx(prog_ctx) { + } + +protected: + void send_op() override; + bool should_complete(int r) override; + bool can_affect_io() const override { + return true; + } + journal::Event create_event(uint64_t op_tid) const override { + ceph_abort(); + return journal::UnknownEvent(); + } + +private: + /** + * @verbatim + * + * <start> + * | + * v + * SPARSIFY OBJECTS + * | + * v + * <finish> + * + * @endverbatim + */ + + size_t m_sparse_size; + ProgressContext &m_prog_ctx; + + void sparsify_objects(); + void handle_sparsify_objects(int r); +}; + +} // namespace operation +} // namespace librbd + +extern template class librbd::operation::SparsifyRequest<librbd::ImageCtx>; + +#endif // CEPH_LIBRBD_OPERATION_SPARSIFY_REQUEST_H diff --git a/src/librbd/operation/TrimRequest.cc b/src/librbd/operation/TrimRequest.cc new file mode 100644 index 00000000..15f6a5df --- /dev/null +++ b/src/librbd/operation/TrimRequest.cc @@ -0,0 +1,377 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/operation/TrimRequest.h" +#include "librbd/AsyncObjectThrottle.h" +#include "librbd/ExclusiveLock.h" +#include "librbd/ImageCtx.h" +#include "librbd/internal.h" +#include "librbd/ObjectMap.h" +#include "librbd/Utils.h" +#include "librbd/io/ObjectDispatchSpec.h" +#include "librbd/io/ObjectDispatcher.h" +#include "common/ContextCompletion.h" +#include "common/dout.h" +#include "common/errno.h" +#include "osdc/Striper.h" + +#include <boost/bind.hpp> +#include <boost/lambda/bind.hpp> +#include <boost/lambda/construct.hpp> +#include <boost/scope_exit.hpp> + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::TrimRequest: " + +namespace librbd { +namespace operation { + +template <typename I> +class C_CopyupObject : public C_AsyncObjectThrottle<I> { +public: + C_CopyupObject(AsyncObjectThrottle<I> &throttle, I *image_ctx, + ::SnapContext snapc, uint64_t object_no) + : C_AsyncObjectThrottle<I>(throttle, *image_ctx), m_snapc(snapc), + m_object_no(object_no) + { + } + + int send() override { + I &image_ctx = this->m_image_ctx; + ceph_assert(image_ctx.owner_lock.is_locked()); + ceph_assert(image_ctx.exclusive_lock == nullptr || + image_ctx.exclusive_lock->is_lock_owner()); + + string oid = image_ctx.get_object_name(m_object_no); + ldout(image_ctx.cct, 10) << "removing (with copyup) " << oid << dendl; + + auto object_dispatch_spec = io::ObjectDispatchSpec::create_discard( + &image_ctx, io::OBJECT_DISPATCH_LAYER_NONE, oid, m_object_no, 0, + image_ctx.layout.object_size, m_snapc, + io::OBJECT_DISCARD_FLAG_DISABLE_OBJECT_MAP_UPDATE, 0, {}, this); + object_dispatch_spec->send(); + return 0; + } +private: + ::SnapContext m_snapc; + uint64_t m_object_no; +}; + +template <typename I> +class C_RemoveObject : public C_AsyncObjectThrottle<I> { +public: + C_RemoveObject(AsyncObjectThrottle<I> &throttle, I *image_ctx, + uint64_t object_no) + : C_AsyncObjectThrottle<I>(throttle, *image_ctx), m_object_no(object_no) + { + } + + int send() override { + I &image_ctx = this->m_image_ctx; + ceph_assert(image_ctx.owner_lock.is_locked()); + ceph_assert(image_ctx.exclusive_lock == nullptr || + image_ctx.exclusive_lock->is_lock_owner()); + + { + RWLock::RLocker snap_locker(image_ctx.snap_lock); + if (image_ctx.object_map != nullptr && + !image_ctx.object_map->object_may_exist(m_object_no)) { + return 1; + } + } + + string oid = image_ctx.get_object_name(m_object_no); + ldout(image_ctx.cct, 10) << "removing " << oid << dendl; + + librados::AioCompletion *rados_completion = + util::create_rados_callback(this); + int r = image_ctx.data_ctx.aio_remove(oid, rados_completion); + ceph_assert(r == 0); + rados_completion->release(); + return 0; + } + +private: + uint64_t m_object_no; +}; + +template <typename I> +TrimRequest<I>::TrimRequest(I &image_ctx, Context *on_finish, + uint64_t original_size, uint64_t new_size, + ProgressContext &prog_ctx) + : AsyncRequest<I>(image_ctx, on_finish), m_new_size(new_size), + m_prog_ctx(prog_ctx) +{ + uint64_t period = image_ctx.get_stripe_period(); + uint64_t new_num_periods = ((m_new_size + period - 1) / period); + m_delete_off = std::min(new_num_periods * period, original_size); + // first object we can delete free and clear + m_delete_start = new_num_periods * image_ctx.get_stripe_count(); + m_delete_start_min = m_delete_start; + m_num_objects = Striper::get_num_objects(image_ctx.layout, original_size); + + CephContext *cct = image_ctx.cct; + ldout(cct, 10) << this << " trim image " << original_size << " -> " + << m_new_size << " periods " << new_num_periods + << " discard to offset " << m_delete_off + << " delete objects " << m_delete_start + << " to " << m_num_objects << dendl; +} + +template <typename I> +bool TrimRequest<I>::should_complete(int r) +{ + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " should_complete: r=" << r << dendl; + if (r == -ERESTART) { + ldout(cct, 5) << "trim operation interrupted" << dendl; + return true; + } else if (r < 0) { + lderr(cct) << "trim encountered an error: " << cpp_strerror(r) << dendl; + return true; + } + + RWLock::RLocker owner_lock(image_ctx.owner_lock); + switch (m_state) { + case STATE_PRE_TRIM: + ldout(cct, 5) << " PRE_TRIM" << dendl; + send_copyup_objects(); + break; + + case STATE_COPYUP_OBJECTS: + ldout(cct, 5) << " COPYUP_OBJECTS" << dendl; + send_remove_objects(); + break; + + case STATE_REMOVE_OBJECTS: + ldout(cct, 5) << " REMOVE_OBJECTS" << dendl; + send_post_trim(); + break; + + case STATE_POST_TRIM: + ldout(cct, 5) << " POST_TRIM" << dendl; + send_clean_boundary(); + break; + + case STATE_CLEAN_BOUNDARY: + ldout(cct, 5) << "CLEAN_BOUNDARY" << dendl; + send_finish(0); + break; + + case STATE_FINISHED: + ldout(cct, 5) << "FINISHED" << dendl; + return true; + + default: + lderr(cct) << "invalid state: " << m_state << dendl; + ceph_abort(); + break; + } + return false; +} + +template <typename I> +void TrimRequest<I>::send() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + + if (!image_ctx.data_ctx.is_valid()) { + lderr(cct) << "missing data pool" << dendl; + send_finish(-ENODEV); + return; + } + + send_pre_trim(); +} + +template<typename I> +void TrimRequest<I>::send_pre_trim() { + I &image_ctx = this->m_image_ctx; + ceph_assert(image_ctx.owner_lock.is_locked()); + + if (m_delete_start >= m_num_objects) { + send_clean_boundary(); + return; + } + + { + RWLock::RLocker snap_locker(image_ctx.snap_lock); + if (image_ctx.object_map != nullptr) { + ldout(image_ctx.cct, 5) << this << " send_pre_trim: " + << " delete_start_min=" << m_delete_start_min + << " num_objects=" << m_num_objects << dendl; + m_state = STATE_PRE_TRIM; + + ceph_assert(image_ctx.exclusive_lock->is_lock_owner()); + + RWLock::WLocker object_map_locker(image_ctx.object_map_lock); + if (image_ctx.object_map->template aio_update<AsyncRequest<I> >( + CEPH_NOSNAP, m_delete_start_min, m_num_objects, OBJECT_PENDING, + OBJECT_EXISTS, {}, false, this)) { + return; + } + } + } + + send_copyup_objects(); +} + +template<typename I> +void TrimRequest<I>::send_copyup_objects() { + I &image_ctx = this->m_image_ctx; + ceph_assert(image_ctx.owner_lock.is_locked()); + + ::SnapContext snapc; + bool has_snapshots; + uint64_t parent_overlap; + { + RWLock::RLocker snap_locker(image_ctx.snap_lock); + RWLock::RLocker parent_locker(image_ctx.parent_lock); + + snapc = image_ctx.snapc; + has_snapshots = !image_ctx.snaps.empty(); + int r = image_ctx.get_parent_overlap(CEPH_NOSNAP, &parent_overlap); + ceph_assert(r == 0); + } + + // copyup is only required for portion of image that overlaps parent + uint64_t copyup_end = Striper::get_num_objects(image_ctx.layout, + parent_overlap); + + // TODO: protect against concurrent shrink and snap create? + // skip to remove if no copyup is required. + if (copyup_end <= m_delete_start || !has_snapshots) { + send_remove_objects(); + return; + } + + uint64_t copyup_start = m_delete_start; + m_delete_start = copyup_end; + + ldout(image_ctx.cct, 5) << this << " send_copyup_objects: " + << " start object=" << copyup_start << ", " + << " end object=" << copyup_end << dendl; + m_state = STATE_COPYUP_OBJECTS; + + Context *ctx = this->create_callback_context(); + typename AsyncObjectThrottle<I>::ContextFactory context_factory( + boost::lambda::bind(boost::lambda::new_ptr<C_CopyupObject<I> >(), + boost::lambda::_1, &image_ctx, snapc, boost::lambda::_2)); + AsyncObjectThrottle<I> *throttle = new AsyncObjectThrottle<I>( + this, image_ctx, context_factory, ctx, &m_prog_ctx, copyup_start, + copyup_end); + throttle->start_ops( + image_ctx.config.template get_val<uint64_t>("rbd_concurrent_management_ops")); +} + +template <typename I> +void TrimRequest<I>::send_remove_objects() { + I &image_ctx = this->m_image_ctx; + ceph_assert(image_ctx.owner_lock.is_locked()); + + ldout(image_ctx.cct, 5) << this << " send_remove_objects: " + << " delete_start=" << m_delete_start + << " num_objects=" << m_num_objects << dendl; + m_state = STATE_REMOVE_OBJECTS; + + Context *ctx = this->create_callback_context(); + typename AsyncObjectThrottle<I>::ContextFactory context_factory( + boost::lambda::bind(boost::lambda::new_ptr<C_RemoveObject<I> >(), + boost::lambda::_1, &image_ctx, boost::lambda::_2)); + AsyncObjectThrottle<I> *throttle = new AsyncObjectThrottle<I>( + this, image_ctx, context_factory, ctx, &m_prog_ctx, m_delete_start, + m_num_objects); + throttle->start_ops( + image_ctx.config.template get_val<uint64_t>("rbd_concurrent_management_ops")); +} + +template<typename I> +void TrimRequest<I>::send_post_trim() { + I &image_ctx = this->m_image_ctx; + ceph_assert(image_ctx.owner_lock.is_locked()); + + { + RWLock::RLocker snap_locker(image_ctx.snap_lock); + if (image_ctx.object_map != nullptr) { + ldout(image_ctx.cct, 5) << this << " send_post_trim:" + << " delete_start_min=" << m_delete_start_min + << " num_objects=" << m_num_objects << dendl; + m_state = STATE_POST_TRIM; + + ceph_assert(image_ctx.exclusive_lock->is_lock_owner()); + + RWLock::WLocker object_map_locker(image_ctx.object_map_lock); + if (image_ctx.object_map->template aio_update<AsyncRequest<I> >( + CEPH_NOSNAP, m_delete_start_min, m_num_objects, OBJECT_NONEXISTENT, + OBJECT_PENDING, {}, false, this)) { + return; + } + } + } + + send_clean_boundary(); +} + +template <typename I> +void TrimRequest<I>::send_clean_boundary() { + I &image_ctx = this->m_image_ctx; + ceph_assert(image_ctx.owner_lock.is_locked()); + CephContext *cct = image_ctx.cct; + if (m_delete_off <= m_new_size) { + send_finish(0); + return; + } + + // should have been canceled prior to releasing lock + ceph_assert(image_ctx.exclusive_lock == nullptr || + image_ctx.exclusive_lock->is_lock_owner()); + uint64_t delete_len = m_delete_off - m_new_size; + ldout(image_ctx.cct, 5) << this << " send_clean_boundary: " + << " delete_off=" << m_delete_off + << " length=" << delete_len << dendl; + m_state = STATE_CLEAN_BOUNDARY; + + ::SnapContext snapc; + { + RWLock::RLocker snap_locker(image_ctx.snap_lock); + snapc = image_ctx.snapc; + } + + // discard the weird boundary + std::vector<ObjectExtent> extents; + Striper::file_to_extents(cct, image_ctx.format_string, + &image_ctx.layout, m_new_size, delete_len, 0, + extents); + + ContextCompletion *completion = + new ContextCompletion(this->create_async_callback_context(), true); + for (vector<ObjectExtent>::iterator p = extents.begin(); + p != extents.end(); ++p) { + ldout(cct, 20) << " ex " << *p << dendl; + Context *req_comp = new C_ContextCompletion(*completion); + + if (p->offset == 0) { + // treat as a full object delete on the boundary + p->length = image_ctx.layout.object_size; + } + + auto object_dispatch_spec = io::ObjectDispatchSpec::create_discard( + &image_ctx, io::OBJECT_DISPATCH_LAYER_NONE, p->oid.name, p->objectno, + p->offset, p->length, snapc, 0, 0, {}, req_comp); + object_dispatch_spec->send(); + } + completion->finish_adding_requests(); +} + +template <typename I> +void TrimRequest<I>::send_finish(int r) { + m_state = STATE_FINISHED; + this->async_complete(r); +} + +} // namespace operation +} // namespace librbd + +template class librbd::operation::TrimRequest<librbd::ImageCtx>; diff --git a/src/librbd/operation/TrimRequest.h b/src/librbd/operation/TrimRequest.h new file mode 100644 index 00000000..8526046c --- /dev/null +++ b/src/librbd/operation/TrimRequest.h @@ -0,0 +1,107 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +#ifndef CEPH_LIBRBD_OPERATION_TRIM_REQUEST_H +#define CEPH_LIBRBD_OPERATION_TRIM_REQUEST_H + +#include "librbd/AsyncRequest.h" + +namespace librbd +{ + +class ImageCtx; +class ProgressContext; + +namespace operation { + +template <typename ImageCtxT = ImageCtx> +class TrimRequest : public AsyncRequest<ImageCtxT> +{ +public: + static TrimRequest *create(ImageCtxT &image_ctx, Context *on_finish, + uint64_t original_size, uint64_t new_size, + ProgressContext &prog_ctx) { + return new TrimRequest(image_ctx, on_finish, original_size, new_size, + prog_ctx); + } + + TrimRequest(ImageCtxT &image_ctx, Context *on_finish, + uint64_t original_size, uint64_t new_size, + ProgressContext &prog_ctx); + + void send() override; + +protected: + /** + * Trim goes through the following state machine to remove whole objects, + * clean partially trimmed objects, and update the object map: + * + * @verbatim + * + * <start> . . . . . . . . . . . . . . . . . + * | . + * v (skip if not needed) . + * STATE_PRE_TRIM . + * | . + * v (skip if not needed) . + * STATE_COPYUP_OBJECTS . + * | . + * v (skip if not needed) . + * STATE_REMOVE_OBJECTS . + * | . + * v (skip if not needed) . + * STATE_POST_TRIM . + * | . + * v (skip if not needed) . + * STATE_CLEAN_BOUNDARY . + * | . + * v . + * STATE_FINISHED < . . . . . . . . . . . . . . . + * | + * v + * <finish> + * + * The _COPYUP_OBJECTS state is skipped if there is no parent overlap + * within the new image size and the image does not have any snapshots. + * The _PRE_TRIM/_POST_TRIM states are skipped if the object map + * isn't enabled. The _REMOVE_OBJECTS state is skipped if no whole objects + * are removed. The _CLEAN_BOUNDARY state is skipped if no boundary + * objects are cleaned. The state machine will immediately transition + * to _FINISHED state if there are no bytes to trim. + */ + + enum State { + STATE_PRE_TRIM, + STATE_COPYUP_OBJECTS, + STATE_REMOVE_OBJECTS, + STATE_POST_TRIM, + STATE_CLEAN_BOUNDARY, + STATE_FINISHED + }; + + bool should_complete(int r) override; + + State m_state = STATE_PRE_TRIM; + +private: + uint64_t m_delete_start; + uint64_t m_delete_start_min = 0; + uint64_t m_num_objects; + uint64_t m_delete_off; + uint64_t m_new_size; + ProgressContext &m_prog_ctx; + + void send_pre_trim(); + void send_copyup_objects(); + void send_remove_objects(); + void send_post_trim(); + + void send_clean_boundary(); + void send_finish(int r); +}; + +} // namespace operation +} // namespace librbd + +extern template class librbd::operation::TrimRequest<librbd::ImageCtx>; + +#endif // CEPH_LIBRBD_OPERATION_TRIM_REQUEST_H |