summaryrefslogtreecommitdiffstats
path: root/src/librbd/operation
diff options
context:
space:
mode:
Diffstat (limited to 'src/librbd/operation')
-rw-r--r--src/librbd/operation/DisableFeaturesRequest.cc655
-rw-r--r--src/librbd/operation/DisableFeaturesRequest.h171
-rw-r--r--src/librbd/operation/EnableFeaturesRequest.cc494
-rw-r--r--src/librbd/operation/EnableFeaturesRequest.h135
-rw-r--r--src/librbd/operation/FlattenRequest.cc226
-rw-r--r--src/librbd/operation/FlattenRequest.h73
-rw-r--r--src/librbd/operation/MetadataRemoveRequest.cc60
-rw-r--r--src/librbd/operation/MetadataRemoveRequest.h44
-rw-r--r--src/librbd/operation/MetadataSetRequest.cc62
-rw-r--r--src/librbd/operation/MetadataSetRequest.h47
-rw-r--r--src/librbd/operation/MigrateRequest.cc238
-rw-r--r--src/librbd/operation/MigrateRequest.h68
-rw-r--r--src/librbd/operation/ObjectMapIterate.cc308
-rw-r--r--src/librbd/operation/ObjectMapIterate.h65
-rw-r--r--src/librbd/operation/RebuildObjectMapRequest.cc250
-rw-r--r--src/librbd/operation/RebuildObjectMapRequest.h84
-rw-r--r--src/librbd/operation/RenameRequest.cc257
-rw-r--r--src/librbd/operation/RenameRequest.h95
-rw-r--r--src/librbd/operation/Request.cc183
-rw-r--r--src/librbd/operation/Request.h107
-rw-r--r--src/librbd/operation/ResizeRequest.cc466
-rw-r--r--src/librbd/operation/ResizeRequest.h156
-rw-r--r--src/librbd/operation/SnapshotCreateRequest.cc449
-rw-r--r--src/librbd/operation/SnapshotCreateRequest.h148
-rw-r--r--src/librbd/operation/SnapshotLimitRequest.cc66
-rw-r--r--src/librbd/operation/SnapshotLimitRequest.h44
-rw-r--r--src/librbd/operation/SnapshotProtectRequest.cc118
-rw-r--r--src/librbd/operation/SnapshotProtectRequest.h68
-rw-r--r--src/librbd/operation/SnapshotRemoveRequest.cc505
-rw-r--r--src/librbd/operation/SnapshotRemoveRequest.h128
-rw-r--r--src/librbd/operation/SnapshotRenameRequest.cc102
-rw-r--r--src/librbd/operation/SnapshotRenameRequest.h63
-rw-r--r--src/librbd/operation/SnapshotRollbackRequest.cc424
-rw-r--r--src/librbd/operation/SnapshotRollbackRequest.h122
-rw-r--r--src/librbd/operation/SnapshotUnprotectRequest.cc353
-rw-r--r--src/librbd/operation/SnapshotUnprotectRequest.h94
-rw-r--r--src/librbd/operation/SparsifyRequest.cc514
-rw-r--r--src/librbd/operation/SparsifyRequest.h64
-rw-r--r--src/librbd/operation/TrimRequest.cc373
-rw-r--r--src/librbd/operation/TrimRequest.h107
40 files changed, 7986 insertions, 0 deletions
diff --git a/src/librbd/operation/DisableFeaturesRequest.cc b/src/librbd/operation/DisableFeaturesRequest.cc
new file mode 100644
index 000000000..32db4b518
--- /dev/null
+++ b/src/librbd/operation/DisableFeaturesRequest.cc
@@ -0,0 +1,655 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/operation/DisableFeaturesRequest.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "librbd/ExclusiveLock.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/Journal.h"
+#include "librbd/Utils.h"
+#include "librbd/image/SetFlagsRequest.h"
+#include "librbd/io/ImageDispatcherInterface.h"
+#include "librbd/journal/RemoveRequest.h"
+#include "librbd/journal/TypeTraits.h"
+#include "librbd/mirror/DisableRequest.h"
+#include "librbd/object_map/RemoveRequest.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::DisableFeaturesRequest: "
+
+namespace librbd {
+namespace operation {
+
+using util::create_async_context_callback;
+using util::create_context_callback;
+using util::create_rados_callback;
+
+template <typename I>
+DisableFeaturesRequest<I>::DisableFeaturesRequest(I &image_ctx,
+ Context *on_finish,
+ uint64_t journal_op_tid,
+ uint64_t features,
+ bool force)
+ : Request<I>(image_ctx, on_finish, journal_op_tid), m_features(features),
+ m_force(force) {
+}
+
+template <typename I>
+void DisableFeaturesRequest<I>::send_op() {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ceph_assert(ceph_mutex_is_locked(image_ctx.owner_lock));
+
+ ldout(cct, 20) << this << " " << __func__ << ": features=" << m_features
+ << dendl;
+
+ send_prepare_lock();
+}
+
+template <typename I>
+bool DisableFeaturesRequest<I>::should_complete(int r) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 20) << this << " " << __func__ << " r=" << r << dendl;
+
+ if (r < 0) {
+ lderr(cct) << "encountered error: " << cpp_strerror(r) << dendl;
+ }
+ return true;
+}
+
+template <typename I>
+void DisableFeaturesRequest<I>::send_prepare_lock() {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 20) << this << " " << __func__ << dendl;
+
+ image_ctx.state->prepare_lock(create_async_context_callback(
+ image_ctx, create_context_callback<
+ DisableFeaturesRequest<I>,
+ &DisableFeaturesRequest<I>::handle_prepare_lock>(this)));
+}
+
+template <typename I>
+Context *DisableFeaturesRequest<I>::handle_prepare_lock(int *result) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 20) << this << " " << __func__ << ": r=" << *result << dendl;
+
+ if (*result < 0) {
+ lderr(cct) << "failed to lock image: " << cpp_strerror(*result) << dendl;
+ return this->create_context_finisher(*result);
+ }
+
+ send_block_writes();
+ return nullptr;
+}
+
+template <typename I>
+void DisableFeaturesRequest<I>::send_block_writes() {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 20) << this << " " << __func__ << dendl;
+
+ std::unique_lock locker{image_ctx.owner_lock};
+ image_ctx.io_image_dispatcher->block_writes(create_context_callback<
+ DisableFeaturesRequest<I>,
+ &DisableFeaturesRequest<I>::handle_block_writes>(this));
+}
+
+template <typename I>
+Context *DisableFeaturesRequest<I>::handle_block_writes(int *result) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 20) << this << " " << __func__ << ": r=" << *result << dendl;
+
+ if (*result < 0) {
+ lderr(cct) << "failed to block writes: " << cpp_strerror(*result) << dendl;
+ return handle_finish(*result);
+ }
+ m_writes_blocked = true;
+
+ {
+ std::unique_lock locker{image_ctx.owner_lock};
+ // avoid accepting new requests from peers while we manipulate
+ // the image features
+ if (image_ctx.exclusive_lock != nullptr &&
+ (image_ctx.journal == nullptr ||
+ !image_ctx.journal->is_journal_replaying())) {
+ image_ctx.exclusive_lock->block_requests(0);
+ m_requests_blocked = true;
+ }
+ }
+
+ return send_acquire_exclusive_lock(result);
+}
+
+template <typename I>
+Context *DisableFeaturesRequest<I>::send_acquire_exclusive_lock(int *result) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 20) << this << " " << __func__ << dendl;
+
+ {
+ std::unique_lock locker{image_ctx.owner_lock};
+ // if disabling features w/ exclusive lock supported, we need to
+ // acquire the lock to temporarily block IO against the image
+ if (image_ctx.exclusive_lock != nullptr &&
+ !image_ctx.exclusive_lock->is_lock_owner()) {
+ m_acquired_lock = true;
+
+ Context *ctx = create_context_callback<
+ DisableFeaturesRequest<I>,
+ &DisableFeaturesRequest<I>::handle_acquire_exclusive_lock>(
+ this, image_ctx.exclusive_lock);
+ image_ctx.exclusive_lock->acquire_lock(ctx);
+ return nullptr;
+ }
+ }
+
+ return handle_acquire_exclusive_lock(result);
+}
+
+template <typename I>
+Context *DisableFeaturesRequest<I>::handle_acquire_exclusive_lock(int *result) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 20) << this << " " << __func__ << ": r=" << *result << dendl;
+
+ image_ctx.owner_lock.lock_shared();
+ if (*result < 0) {
+ lderr(cct) << "failed to lock image: " << cpp_strerror(*result) << dendl;
+ image_ctx.owner_lock.unlock_shared();
+ return handle_finish(*result);
+ } else if (image_ctx.exclusive_lock != nullptr &&
+ !image_ctx.exclusive_lock->is_lock_owner()) {
+ lderr(cct) << "failed to acquire exclusive lock" << dendl;
+ *result = image_ctx.exclusive_lock->get_unlocked_op_error();
+ image_ctx.owner_lock.unlock_shared();
+ return handle_finish(*result);
+ }
+
+ do {
+ m_features &= image_ctx.features;
+
+ // interlock object-map and fast-diff together
+ if (((m_features & RBD_FEATURE_OBJECT_MAP) != 0) ||
+ ((m_features & RBD_FEATURE_FAST_DIFF) != 0)) {
+ m_features |= (RBD_FEATURE_OBJECT_MAP | RBD_FEATURE_FAST_DIFF);
+ }
+
+ m_new_features = image_ctx.features & ~m_features;
+ m_features_mask = m_features;
+
+ if ((m_features & RBD_FEATURE_EXCLUSIVE_LOCK) != 0) {
+ if ((m_new_features & RBD_FEATURE_OBJECT_MAP) != 0 ||
+ (m_new_features & RBD_FEATURE_JOURNALING) != 0) {
+ lderr(cct) << "cannot disable exclusive-lock. object-map "
+ "or journaling must be disabled before "
+ "disabling exclusive-lock." << dendl;
+ *result = -EINVAL;
+ break;
+ }
+ m_features_mask |= (RBD_FEATURE_OBJECT_MAP |
+ RBD_FEATURE_FAST_DIFF |
+ RBD_FEATURE_JOURNALING);
+ }
+ if ((m_features & RBD_FEATURE_FAST_DIFF) != 0) {
+ m_disable_flags |= RBD_FLAG_FAST_DIFF_INVALID;
+ }
+ if ((m_features & RBD_FEATURE_OBJECT_MAP) != 0) {
+ m_disable_flags |= RBD_FLAG_OBJECT_MAP_INVALID;
+ }
+ } while (false);
+ image_ctx.owner_lock.unlock_shared();
+
+ if (*result < 0) {
+ return handle_finish(*result);
+ }
+
+ send_get_mirror_mode();
+ return nullptr;
+}
+
+template <typename I>
+void DisableFeaturesRequest<I>::send_get_mirror_mode() {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+
+ if ((m_features & RBD_FEATURE_JOURNALING) == 0) {
+ send_append_op_event();
+ return;
+ }
+
+ ldout(cct, 20) << this << " " << __func__ << dendl;
+
+ librados::ObjectReadOperation op;
+ cls_client::mirror_mode_get_start(&op);
+
+ using klass = DisableFeaturesRequest<I>;
+ librados::AioCompletion *comp =
+ create_rados_callback<klass, &klass::handle_get_mirror_mode>(this);
+ m_out_bl.clear();
+ int r = image_ctx.md_ctx.aio_operate(RBD_MIRRORING, comp, &op, &m_out_bl);
+ ceph_assert(r == 0);
+ comp->release();
+}
+
+template <typename I>
+Context *DisableFeaturesRequest<I>::handle_get_mirror_mode(int *result) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 20) << this << " " << __func__ << ": r=" << *result << dendl;
+
+ if (*result == 0) {
+ auto it = m_out_bl.cbegin();
+ *result = cls_client::mirror_mode_get_finish(&it, &m_mirror_mode);
+ }
+
+ if (*result < 0 && *result != -ENOENT) {
+ lderr(cct) << "failed to retrieve pool mirror mode: "
+ << cpp_strerror(*result) << dendl;
+ return handle_finish(*result);
+ }
+
+ ldout(cct, 20) << this << " " << __func__ << ": m_mirror_mode="
+ << m_mirror_mode << dendl;
+
+ send_get_mirror_image();
+ return nullptr;
+}
+
+template <typename I>
+void DisableFeaturesRequest<I>::send_get_mirror_image() {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+
+ if (m_mirror_mode != cls::rbd::MIRROR_MODE_IMAGE) {
+ send_disable_mirror_image();
+ return;
+ }
+
+ ldout(cct, 20) << this << " " << __func__ << dendl;
+
+ librados::ObjectReadOperation op;
+ cls_client::mirror_image_get_start(&op, image_ctx.id);
+
+ using klass = DisableFeaturesRequest<I>;
+ librados::AioCompletion *comp =
+ create_rados_callback<klass, &klass::handle_get_mirror_image>(this);
+ m_out_bl.clear();
+ int r = image_ctx.md_ctx.aio_operate(RBD_MIRRORING, comp, &op, &m_out_bl);
+ ceph_assert(r == 0);
+ comp->release();
+}
+
+template <typename I>
+Context *DisableFeaturesRequest<I>::handle_get_mirror_image(int *result) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 20) << this << " " << __func__ << dendl;
+
+ cls::rbd::MirrorImage mirror_image;
+
+ if (*result == 0) {
+ auto it = m_out_bl.cbegin();
+ *result = cls_client::mirror_image_get_finish(&it, &mirror_image);
+ }
+
+ if (*result < 0 && *result != -ENOENT) {
+ lderr(cct) << "failed to retrieve pool mirror image: "
+ << cpp_strerror(*result) << dendl;
+ return handle_finish(*result);
+ }
+
+ if (mirror_image.state == cls::rbd::MIRROR_IMAGE_STATE_ENABLED &&
+ mirror_image.mode == cls::rbd::MIRROR_IMAGE_MODE_JOURNAL && !m_force) {
+ lderr(cct) << "cannot disable journaling: journal-based mirroring "
+ << "enabled and mirror pool mode set to image"
+ << dendl;
+ *result = -EINVAL;
+ return handle_finish(*result);
+ }
+
+ if (mirror_image.mode != cls::rbd::MIRROR_IMAGE_MODE_JOURNAL) {
+ send_close_journal();
+ } else {
+ send_disable_mirror_image();
+ }
+ return nullptr;
+}
+
+template <typename I>
+void DisableFeaturesRequest<I>::send_disable_mirror_image() {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+
+ ldout(cct, 20) << this << " " << __func__ << dendl;
+
+ Context *ctx = create_context_callback<
+ DisableFeaturesRequest<I>,
+ &DisableFeaturesRequest<I>::handle_disable_mirror_image>(this);
+
+ mirror::DisableRequest<I> *req =
+ mirror::DisableRequest<I>::create(&image_ctx, m_force, true, ctx);
+ req->send();
+}
+
+template <typename I>
+Context *DisableFeaturesRequest<I>::handle_disable_mirror_image(int *result) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 20) << this << " " << __func__ << ": r=" << *result << dendl;
+
+ if (*result < 0) {
+ lderr(cct) << "failed to disable image mirroring: " << cpp_strerror(*result)
+ << dendl;
+ // not fatal
+ }
+
+ send_close_journal();
+ return nullptr;
+}
+
+template <typename I>
+void DisableFeaturesRequest<I>::send_close_journal() {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+
+ {
+ std::unique_lock locker{image_ctx.owner_lock};
+ if (image_ctx.journal != nullptr) {
+ ldout(cct, 20) << this << " " << __func__ << dendl;
+
+ std::swap(m_journal, image_ctx.journal);
+ Context *ctx = create_context_callback<
+ DisableFeaturesRequest<I>,
+ &DisableFeaturesRequest<I>::handle_close_journal>(this);
+
+ m_journal->close(ctx);
+ return;
+ }
+ }
+
+ send_remove_journal();
+}
+
+template <typename I>
+Context *DisableFeaturesRequest<I>::handle_close_journal(int *result) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 20) << this << " " << __func__ << ": r=" << *result << dendl;
+
+ if (*result < 0) {
+ lderr(cct) << "failed to close image journal: " << cpp_strerror(*result)
+ << dendl;
+ }
+
+ ceph_assert(m_journal != nullptr);
+ m_journal->put();
+ m_journal = nullptr;
+
+ send_remove_journal();
+ return nullptr;
+}
+
+template <typename I>
+void DisableFeaturesRequest<I>::send_remove_journal() {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 20) << this << " " << __func__ << dendl;
+
+ Context *ctx = create_context_callback<
+ DisableFeaturesRequest<I>,
+ &DisableFeaturesRequest<I>::handle_remove_journal>(this);
+
+ typename journal::TypeTraits<I>::ContextWQ* context_wq;
+ Journal<I>::get_work_queue(cct, &context_wq);
+
+ journal::RemoveRequest<I> *req = journal::RemoveRequest<I>::create(
+ image_ctx.md_ctx, image_ctx.id, librbd::Journal<>::IMAGE_CLIENT_ID,
+ context_wq, ctx);
+
+ req->send();
+}
+
+template <typename I>
+Context *DisableFeaturesRequest<I>::handle_remove_journal(int *result) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 20) << this << " " << __func__ << ": r=" << *result << dendl;
+
+ if (*result < 0) {
+ lderr(cct) << "failed to remove image journal: " << cpp_strerror(*result)
+ << dendl;
+ return handle_finish(*result);
+ }
+
+ send_append_op_event();
+ return nullptr;
+}
+
+template <typename I>
+void DisableFeaturesRequest<I>::send_append_op_event() {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+
+ if (!this->template append_op_event<
+ DisableFeaturesRequest<I>,
+ &DisableFeaturesRequest<I>::handle_append_op_event>(this)) {
+ send_remove_object_map();
+ }
+
+ ldout(cct, 20) << this << " " << __func__ << dendl;
+}
+
+template <typename I>
+Context *DisableFeaturesRequest<I>::handle_append_op_event(int *result) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 20) << this << " " << __func__ << ": r=" << *result << dendl;
+
+ if (*result < 0) {
+ lderr(cct) << "failed to commit journal entry: " << cpp_strerror(*result)
+ << dendl;
+ return handle_finish(*result);
+ }
+
+ send_remove_object_map();
+ return nullptr;
+}
+
+template <typename I>
+void DisableFeaturesRequest<I>::send_remove_object_map() {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 20) << this << " " << __func__ << dendl;
+
+ if ((m_features & RBD_FEATURE_OBJECT_MAP) == 0) {
+ send_set_features();
+ return;
+ }
+
+ Context *ctx = create_context_callback<
+ DisableFeaturesRequest<I>,
+ &DisableFeaturesRequest<I>::handle_remove_object_map>(this);
+
+ object_map::RemoveRequest<I> *req =
+ object_map::RemoveRequest<I>::create(&image_ctx, ctx);
+ req->send();
+}
+
+template <typename I>
+Context *DisableFeaturesRequest<I>::handle_remove_object_map(int *result) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 20) << this << " " << __func__ << ": r=" << *result << dendl;
+
+ if (*result < 0 && *result != -ENOENT) {
+ lderr(cct) << "failed to remove object map: " << cpp_strerror(*result) << dendl;
+ return handle_finish(*result);
+ }
+
+ send_set_features();
+ return nullptr;
+}
+
+template <typename I>
+void DisableFeaturesRequest<I>::send_set_features() {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 20) << this << " " << __func__ << ": new_features="
+ << m_new_features << ", features_mask=" << m_features_mask
+ << dendl;
+
+ librados::ObjectWriteOperation op;
+ librbd::cls_client::set_features(&op, m_new_features, m_features_mask);
+
+ using klass = DisableFeaturesRequest<I>;
+ librados::AioCompletion *comp =
+ create_rados_callback<klass, &klass::handle_set_features>(this);
+ int r = image_ctx.md_ctx.aio_operate(image_ctx.header_oid, comp, &op);
+ ceph_assert(r == 0);
+ comp->release();
+}
+
+template <typename I>
+Context *DisableFeaturesRequest<I>::handle_set_features(int *result) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 20) << this << " " << __func__ << ": r=" << *result << dendl;
+
+ if (*result == -EINVAL && (m_features_mask & RBD_FEATURE_JOURNALING) != 0) {
+ // NOTE: infernalis OSDs will not accept a mask with new features, so
+ // re-attempt with a reduced mask.
+ ldout(cct, 5) << this << " " << __func__
+ << ": re-attempt with a reduced mask" << dendl;
+ m_features_mask &= ~RBD_FEATURE_JOURNALING;
+ send_set_features();
+ }
+
+ if (*result < 0) {
+ lderr(cct) << "failed to update features: " << cpp_strerror(*result)
+ << dendl;
+ return handle_finish(*result);
+ }
+
+ send_update_flags();
+ return nullptr;
+}
+
+template <typename I>
+void DisableFeaturesRequest<I>::send_update_flags() {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+
+ if (m_disable_flags == 0) {
+ send_notify_update();
+ return;
+ }
+
+ ldout(cct, 20) << this << " " << __func__ << ": disable_flags="
+ << m_disable_flags << dendl;
+
+ Context *ctx = create_context_callback<
+ DisableFeaturesRequest<I>,
+ &DisableFeaturesRequest<I>::handle_update_flags>(this);
+
+ image::SetFlagsRequest<I> *req =
+ image::SetFlagsRequest<I>::create(&image_ctx, 0, m_disable_flags, ctx);
+ req->send();
+}
+
+template <typename I>
+Context *DisableFeaturesRequest<I>::handle_update_flags(int *result) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 20) << this << " " << __func__ << ": r=" << *result << dendl;
+
+ if (*result < 0) {
+ lderr(cct) << "failed to update image flags: " << cpp_strerror(*result)
+ << dendl;
+ return handle_finish(*result);
+ }
+
+ send_notify_update();
+ return nullptr;
+}
+
+template <typename I>
+void DisableFeaturesRequest<I>::send_notify_update() {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 20) << this << " " << __func__ << dendl;
+
+ Context *ctx = create_context_callback<
+ DisableFeaturesRequest<I>,
+ &DisableFeaturesRequest<I>::handle_notify_update>(this);
+
+ image_ctx.notify_update(ctx);
+}
+
+template <typename I>
+Context *DisableFeaturesRequest<I>::handle_notify_update(int *result) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 20) << this << " " << __func__ << ": r=" << *result << dendl;
+
+ if (image_ctx.exclusive_lock == nullptr || !m_acquired_lock) {
+ return handle_finish(*result);
+ }
+
+ send_release_exclusive_lock();
+ return nullptr;
+}
+
+template <typename I>
+void DisableFeaturesRequest<I>::send_release_exclusive_lock() {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 20) << this << " " << __func__ << dendl;
+
+ Context *ctx = create_context_callback<
+ DisableFeaturesRequest<I>,
+ &DisableFeaturesRequest<I>::handle_release_exclusive_lock>(
+ this, image_ctx.exclusive_lock);
+
+ image_ctx.exclusive_lock->release_lock(ctx);
+}
+
+template <typename I>
+Context *DisableFeaturesRequest<I>::handle_release_exclusive_lock(int *result) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 20) << this << " " << __func__ << ": r=" << *result << dendl;
+
+ return handle_finish(*result);
+}
+
+template <typename I>
+Context *DisableFeaturesRequest<I>::handle_finish(int r) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 20) << this << " " << __func__ << ": r=" << r << dendl;
+
+ {
+ std::unique_lock locker{image_ctx.owner_lock};
+ if (image_ctx.exclusive_lock != nullptr && m_requests_blocked) {
+ image_ctx.exclusive_lock->unblock_requests();
+ }
+
+ image_ctx.io_image_dispatcher->unblock_writes();
+ }
+ image_ctx.state->handle_prepare_lock_complete();
+
+ return this->create_context_finisher(r);
+}
+
+} // namespace operation
+} // namespace librbd
+
+template class librbd::operation::DisableFeaturesRequest<librbd::ImageCtx>;
diff --git a/src/librbd/operation/DisableFeaturesRequest.h b/src/librbd/operation/DisableFeaturesRequest.h
new file mode 100644
index 000000000..719a03399
--- /dev/null
+++ b/src/librbd/operation/DisableFeaturesRequest.h
@@ -0,0 +1,171 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_OPERATION_DISABLE_FEATURES_REQUEST_H
+#define CEPH_LIBRBD_OPERATION_DISABLE_FEATURES_REQUEST_H
+
+#include "librbd/ImageCtx.h"
+#include "librbd/operation/Request.h"
+#include "cls/rbd/cls_rbd_client.h"
+
+class Context;
+
+namespace librbd {
+
+class ImageCtx;
+
+namespace operation {
+
+template <typename ImageCtxT = ImageCtx>
+class DisableFeaturesRequest : public Request<ImageCtxT> {
+public:
+ static DisableFeaturesRequest *create(ImageCtxT &image_ctx, Context *on_finish,
+ uint64_t journal_op_tid,
+ uint64_t features, bool force) {
+ return new DisableFeaturesRequest(image_ctx, on_finish, journal_op_tid,
+ features, force);
+ }
+
+ DisableFeaturesRequest(ImageCtxT &image_ctx, Context *on_finish,
+ uint64_t journal_op_tid, uint64_t features, bool force);
+
+protected:
+ void send_op() override;
+ bool should_complete(int r) override;
+ bool can_affect_io() const override {
+ return true;
+ }
+ journal::Event create_event(uint64_t op_tid) const override {
+ return journal::UpdateFeaturesEvent(op_tid, m_features, false);
+ }
+
+private:
+ /**
+ * DisableFeatures goes through the following state machine:
+ *
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * STATE_PREPARE_LOCK
+ * |
+ * v
+ * STATE_BLOCK_WRITES
+ * |
+ * v
+ * STATE_ACQUIRE_EXCLUSIVE_LOCK (skip if not
+ * | required)
+ * | (disbling journaling)
+ * \-------------------\
+ * | |
+ * | V
+ * | STATE_GET_MIRROR_MODE
+ * |(not |
+ * | disabling v
+ * | journaling) STATE_GET_MIRROR_IMAGE
+ * | |
+ * | v
+ * | STATE_DISABLE_MIRROR_IMAGE (skip if not
+ * | | required)
+ * | v
+ * | STATE_CLOSE_JOURNAL
+ * | |
+ * | v
+ * | STATE_REMOVE_JOURNAL
+ * | |
+ * |/-------------------/
+ * |
+ * v
+ * STATE_APPEND_OP_EVENT (skip if journaling
+ * | disabled)
+ * v
+ * STATE_REMOVE_OBJECT_MAP (skip if not
+ * | disabling object map)
+ * v
+ * STATE_SET_FEATURES
+ * |
+ * v
+ * STATE_UPDATE_FLAGS
+ * |
+ * v
+ * STATE_NOTIFY_UPDATE
+ * |
+ * v
+ * STATE_REALEASE_EXCLUSIVE_LOCK (skip if not
+ * | required)
+ * | (unblock writes)
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ *
+ */
+
+ uint64_t m_features;
+ bool m_force;
+
+ bool m_acquired_lock = false;
+ bool m_writes_blocked = false;
+ bool m_image_lock_acquired = false;
+ bool m_requests_blocked = false;
+
+ uint64_t m_new_features = 0;
+ uint64_t m_disable_flags = 0;
+ uint64_t m_features_mask = 0;
+
+ decltype(ImageCtxT::journal) m_journal = nullptr;
+ cls::rbd::MirrorMode m_mirror_mode = cls::rbd::MIRROR_MODE_DISABLED;
+ bufferlist m_out_bl;
+
+ void send_prepare_lock();
+ Context *handle_prepare_lock(int *result);
+
+ void send_block_writes();
+ Context *handle_block_writes(int *result);
+
+ Context *send_acquire_exclusive_lock(int *result);
+ Context *handle_acquire_exclusive_lock(int *result);
+
+ void send_get_mirror_mode();
+ Context *handle_get_mirror_mode(int *result);
+
+ void send_get_mirror_image();
+ Context *handle_get_mirror_image(int *result);
+
+ void send_disable_mirror_image();
+ Context *handle_disable_mirror_image(int *result);
+
+ void send_close_journal();
+ Context *handle_close_journal(int *result);
+
+ void send_remove_journal();
+ Context *handle_remove_journal(int *result);
+
+ void send_append_op_event();
+ Context *handle_append_op_event(int *result);
+
+ void send_remove_object_map();
+ Context *handle_remove_object_map(int *result);
+
+ void send_set_features();
+ Context *handle_set_features(int *result);
+
+ void send_update_flags();
+ Context *handle_update_flags(int *result);
+
+ void send_notify_update();
+ Context *handle_notify_update(int *result);
+
+ void send_release_exclusive_lock();
+ Context *handle_release_exclusive_lock(int *result);
+
+ Context *handle_finish(int r);
+};
+
+} // namespace operation
+} // namespace librbd
+
+extern template class librbd::operation::DisableFeaturesRequest<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_OPERATION_DISABLE_FEATURES_REQUEST_H
diff --git a/src/librbd/operation/EnableFeaturesRequest.cc b/src/librbd/operation/EnableFeaturesRequest.cc
new file mode 100644
index 000000000..8e3dad94b
--- /dev/null
+++ b/src/librbd/operation/EnableFeaturesRequest.cc
@@ -0,0 +1,494 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/operation/EnableFeaturesRequest.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "librbd/ExclusiveLock.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/Journal.h"
+#include "librbd/Utils.h"
+#include "librbd/image/SetFlagsRequest.h"
+#include "librbd/io/ImageDispatcherInterface.h"
+#include "librbd/journal/CreateRequest.h"
+#include "librbd/journal/TypeTraits.h"
+#include "librbd/mirror/EnableRequest.h"
+#include "librbd/object_map/CreateRequest.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::EnableFeaturesRequest: "
+
+namespace librbd {
+namespace operation {
+
+using util::create_async_context_callback;
+using util::create_context_callback;
+using util::create_rados_callback;
+
+template <typename I>
+EnableFeaturesRequest<I>::EnableFeaturesRequest(I &image_ctx,
+ Context *on_finish,
+ uint64_t journal_op_tid,
+ uint64_t features)
+ : Request<I>(image_ctx, on_finish, journal_op_tid), m_features(features) {
+}
+
+template <typename I>
+void EnableFeaturesRequest<I>::send_op() {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ceph_assert(ceph_mutex_is_locked(image_ctx.owner_lock));
+
+ ldout(cct, 20) << this << " " << __func__ << ": features=" << m_features
+ << dendl;
+ send_prepare_lock();
+}
+
+template <typename I>
+bool EnableFeaturesRequest<I>::should_complete(int r) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 20) << this << " " << __func__ << " r=" << r << dendl;
+
+ if (r < 0) {
+ lderr(cct) << "encountered error: " << cpp_strerror(r) << dendl;
+ }
+ return true;
+}
+
+template <typename I>
+void EnableFeaturesRequest<I>::send_prepare_lock() {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 20) << this << " " << __func__ << dendl;
+
+ image_ctx.state->prepare_lock(create_async_context_callback(
+ image_ctx, create_context_callback<
+ EnableFeaturesRequest<I>,
+ &EnableFeaturesRequest<I>::handle_prepare_lock>(this)));
+}
+
+template <typename I>
+Context *EnableFeaturesRequest<I>::handle_prepare_lock(int *result) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 20) << this << " " << __func__ << ": r=" << *result << dendl;
+
+ if (*result < 0) {
+ lderr(cct) << "failed to lock image: " << cpp_strerror(*result) << dendl;
+ return this->create_context_finisher(*result);
+ }
+
+ send_block_writes();
+ return nullptr;
+}
+
+template <typename I>
+void EnableFeaturesRequest<I>::send_block_writes() {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 20) << this << " " << __func__ << dendl;
+
+ std::unique_lock locker{image_ctx.owner_lock};
+ image_ctx.io_image_dispatcher->block_writes(create_context_callback<
+ EnableFeaturesRequest<I>,
+ &EnableFeaturesRequest<I>::handle_block_writes>(this));
+}
+
+template <typename I>
+Context *EnableFeaturesRequest<I>::handle_block_writes(int *result) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 20) << this << " " << __func__ << ": r=" << *result << dendl;
+
+ if (*result < 0) {
+ lderr(cct) << "failed to block writes: " << cpp_strerror(*result) << dendl;
+ return handle_finish(*result);
+ }
+ m_writes_blocked = true;
+
+ send_get_mirror_mode();
+ return nullptr;
+}
+
+template <typename I>
+void EnableFeaturesRequest<I>::send_get_mirror_mode() {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+
+ if ((m_features & RBD_FEATURE_JOURNALING) == 0) {
+ Context *ctx = create_context_callback<
+ EnableFeaturesRequest<I>,
+ &EnableFeaturesRequest<I>::handle_get_mirror_mode>(this);
+ ctx->complete(-ENOENT);
+ return;
+ }
+
+ ldout(cct, 20) << this << " " << __func__ << dendl;
+
+ librados::ObjectReadOperation op;
+ cls_client::mirror_mode_get_start(&op);
+
+ using klass = EnableFeaturesRequest<I>;
+ librados::AioCompletion *comp =
+ create_rados_callback<klass, &klass::handle_get_mirror_mode>(this);
+ m_out_bl.clear();
+ int r = image_ctx.md_ctx.aio_operate(RBD_MIRRORING, comp, &op, &m_out_bl);
+ ceph_assert(r == 0);
+ comp->release();
+}
+
+template <typename I>
+Context *EnableFeaturesRequest<I>::handle_get_mirror_mode(int *result) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 20) << this << " " << __func__ << ": r=" << *result << dendl;
+
+ cls::rbd::MirrorMode mirror_mode = cls::rbd::MIRROR_MODE_DISABLED;
+ if (*result == 0) {
+ auto it = m_out_bl.cbegin();
+ *result = cls_client::mirror_mode_get_finish(&it, &mirror_mode);
+ } else if (*result == -ENOENT) {
+ *result = 0;
+ }
+
+ if (*result < 0) {
+ lderr(cct) << "failed to retrieve pool mirror mode: "
+ << cpp_strerror(*result) << dendl;
+ return handle_finish(*result);
+ }
+
+ m_enable_mirroring = (mirror_mode == cls::rbd::MIRROR_MODE_POOL);
+
+ bool create_journal = false;
+ do {
+ std::unique_lock locker{image_ctx.owner_lock};
+
+ // avoid accepting new requests from peers while we manipulate
+ // the image features
+ if (image_ctx.exclusive_lock != nullptr &&
+ (image_ctx.journal == nullptr ||
+ !image_ctx.journal->is_journal_replaying())) {
+ image_ctx.exclusive_lock->block_requests(0);
+ m_requests_blocked = true;
+ }
+
+ m_features &= ~image_ctx.features;
+
+ // interlock object-map and fast-diff together
+ if (((m_features & RBD_FEATURE_OBJECT_MAP) != 0) ||
+ ((m_features & RBD_FEATURE_FAST_DIFF) != 0)) {
+ m_features |= (RBD_FEATURE_OBJECT_MAP | RBD_FEATURE_FAST_DIFF);
+ }
+
+ m_new_features = image_ctx.features | m_features;
+ m_features_mask = m_features;
+
+ if ((m_features & RBD_FEATURE_OBJECT_MAP) != 0) {
+ if ((m_new_features & RBD_FEATURE_EXCLUSIVE_LOCK) == 0) {
+ lderr(cct) << "cannot enable object-map. exclusive-lock must be "
+ "enabled before enabling object-map." << dendl;
+ *result = -EINVAL;
+ break;
+ }
+ m_enable_flags |= RBD_FLAG_OBJECT_MAP_INVALID;
+ m_features_mask |= (RBD_FEATURE_EXCLUSIVE_LOCK | RBD_FEATURE_FAST_DIFF);
+ }
+ if ((m_features & RBD_FEATURE_FAST_DIFF) != 0) {
+ m_enable_flags |= RBD_FLAG_FAST_DIFF_INVALID;
+ m_features_mask |= (RBD_FEATURE_EXCLUSIVE_LOCK | RBD_FEATURE_OBJECT_MAP);
+ }
+
+ if ((m_features & RBD_FEATURE_JOURNALING) != 0) {
+ if ((m_new_features & RBD_FEATURE_EXCLUSIVE_LOCK) == 0) {
+ lderr(cct) << "cannot enable journaling. exclusive-lock must be "
+ "enabled before enabling journaling." << dendl;
+ *result = -EINVAL;
+ break;
+ }
+ m_features_mask |= RBD_FEATURE_EXCLUSIVE_LOCK;
+ create_journal = true;
+ }
+ } while (false);
+
+ if (*result < 0) {
+ return handle_finish(*result);
+ }
+ if (create_journal) {
+ send_create_journal();
+ return nullptr;
+ }
+ send_append_op_event();
+ return nullptr;
+}
+
+template <typename I>
+void EnableFeaturesRequest<I>::send_create_journal() {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+
+ ldout(cct, 20) << this << " " << __func__ << dendl;
+
+ journal::TagData tag_data(librbd::Journal<>::LOCAL_MIRROR_UUID);
+ Context *ctx = create_context_callback<
+ EnableFeaturesRequest<I>,
+ &EnableFeaturesRequest<I>::handle_create_journal>(this);
+
+ typename journal::TypeTraits<I>::ContextWQ* context_wq;
+ Journal<I>::get_work_queue(cct, &context_wq);
+
+ journal::CreateRequest<I> *req = journal::CreateRequest<I>::create(
+ image_ctx.md_ctx, image_ctx.id,
+ image_ctx.config.template get_val<uint64_t>("rbd_journal_order"),
+ image_ctx.config.template get_val<uint64_t>("rbd_journal_splay_width"),
+ image_ctx.config.template get_val<std::string>("rbd_journal_pool"),
+ cls::journal::Tag::TAG_CLASS_NEW, tag_data,
+ librbd::Journal<>::IMAGE_CLIENT_ID, context_wq, ctx);
+
+ req->send();
+}
+
+template <typename I>
+Context *EnableFeaturesRequest<I>::handle_create_journal(int *result) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 20) << this << " " << __func__ << ": r=" << *result << dendl;
+
+ if (*result < 0) {
+ lderr(cct) << "failed to create journal: " << cpp_strerror(*result)
+ << dendl;
+ return handle_finish(*result);
+ }
+
+ send_append_op_event();
+ return nullptr;
+}
+
+template <typename I>
+void EnableFeaturesRequest<I>::send_append_op_event() {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+
+ if (!this->template append_op_event<
+ EnableFeaturesRequest<I>,
+ &EnableFeaturesRequest<I>::handle_append_op_event>(this)) {
+ send_update_flags();
+ }
+
+ ldout(cct, 20) << this << " " << __func__ << dendl;
+}
+
+template <typename I>
+Context *EnableFeaturesRequest<I>::handle_append_op_event(int *result) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 20) << this << " " << __func__ << ": r=" << *result << dendl;
+
+ if (*result < 0) {
+ lderr(cct) << "failed to commit journal entry: " << cpp_strerror(*result)
+ << dendl;
+ return handle_finish(*result);
+ }
+
+ send_update_flags();
+ return nullptr;
+}
+
+template <typename I>
+void EnableFeaturesRequest<I>::send_update_flags() {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+
+ if (m_enable_flags == 0) {
+ send_set_features();
+ return;
+ }
+
+ ldout(cct, 20) << this << " " << __func__ << ": enable_flags="
+ << m_enable_flags << dendl;
+
+ Context *ctx = create_context_callback<
+ EnableFeaturesRequest<I>,
+ &EnableFeaturesRequest<I>::handle_update_flags>(this);
+
+ image::SetFlagsRequest<I> *req =
+ image::SetFlagsRequest<I>::create(&image_ctx, m_enable_flags,
+ m_enable_flags, ctx);
+ req->send();
+}
+
+template <typename I>
+Context *EnableFeaturesRequest<I>::handle_update_flags(int *result) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 20) << this << " " << __func__ << ": r=" << *result << dendl;
+
+ if (*result < 0) {
+ lderr(cct) << "failed to update image flags: " << cpp_strerror(*result)
+ << dendl;
+ return handle_finish(*result);
+ }
+
+ send_set_features();
+ return nullptr;
+}
+
+template <typename I>
+void EnableFeaturesRequest<I>::send_set_features() {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 20) << this << " " << __func__ << ": new_features="
+ << m_new_features << ", features_mask=" << m_features_mask
+ << dendl;
+
+ librados::ObjectWriteOperation op;
+ librbd::cls_client::set_features(&op, m_new_features, m_features_mask);
+
+ using klass = EnableFeaturesRequest<I>;
+ librados::AioCompletion *comp =
+ create_rados_callback<klass, &klass::handle_set_features>(this);
+ int r = image_ctx.md_ctx.aio_operate(image_ctx.header_oid, comp, &op);
+ ceph_assert(r == 0);
+ comp->release();
+}
+
+template <typename I>
+Context *EnableFeaturesRequest<I>::handle_set_features(int *result) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 20) << this << " " << __func__ << ": r=" << *result << dendl;
+
+ if (*result < 0) {
+ lderr(cct) << "failed to update features: " << cpp_strerror(*result)
+ << dendl;
+ return handle_finish(*result);
+ }
+
+ send_create_object_map();
+ return nullptr;
+}
+
+template <typename I>
+void EnableFeaturesRequest<I>::send_create_object_map() {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+
+ if (((image_ctx.features & RBD_FEATURE_OBJECT_MAP) != 0) ||
+ ((m_features & RBD_FEATURE_OBJECT_MAP) == 0)) {
+ send_enable_mirror_image();
+ return;
+ }
+
+ ldout(cct, 20) << this << " " << __func__ << dendl;
+
+ Context *ctx = create_context_callback<
+ EnableFeaturesRequest<I>,
+ &EnableFeaturesRequest<I>::handle_create_object_map>(this);
+
+ object_map::CreateRequest<I> *req =
+ object_map::CreateRequest<I>::create(&image_ctx, ctx);
+ req->send();
+}
+
+template <typename I>
+Context *EnableFeaturesRequest<I>::handle_create_object_map(int *result) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 20) << this << " " << __func__ << ": r=" << *result << dendl;
+
+ if (*result < 0) {
+ lderr(cct) << "failed to create object map: " << cpp_strerror(*result)
+ << dendl;
+ return handle_finish(*result);
+ }
+
+ send_enable_mirror_image();
+ return nullptr;
+}
+
+template <typename I>
+void EnableFeaturesRequest<I>::send_enable_mirror_image() {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+
+ if (!m_enable_mirroring) {
+ send_notify_update();
+ return;
+ }
+
+ ldout(cct, 20) << this << " " << __func__ << dendl;
+
+ Context *ctx = create_context_callback<
+ EnableFeaturesRequest<I>,
+ &EnableFeaturesRequest<I>::handle_enable_mirror_image>(this);
+
+ auto req = mirror::EnableRequest<I>::create(
+ &image_ctx, cls::rbd::MIRROR_IMAGE_MODE_JOURNAL, "", false, ctx);
+ req->send();
+}
+
+template <typename I>
+Context *EnableFeaturesRequest<I>::handle_enable_mirror_image(int *result) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 20) << this << " " << __func__ << ": r=" << *result << dendl;
+
+ if (*result < 0) {
+ lderr(cct) << "failed to enable mirroring: " << cpp_strerror(*result)
+ << dendl;
+ // not fatal
+ }
+
+ send_notify_update();
+ return nullptr;
+}
+
+template <typename I>
+void EnableFeaturesRequest<I>::send_notify_update() {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 20) << this << " " << __func__ << dendl;
+
+ Context *ctx = create_context_callback<
+ EnableFeaturesRequest<I>,
+ &EnableFeaturesRequest<I>::handle_notify_update>(this);
+
+ image_ctx.notify_update(ctx);
+}
+
+template <typename I>
+Context *EnableFeaturesRequest<I>::handle_notify_update(int *result) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 20) << this << " " << __func__ << ": r=" << *result << dendl;
+
+ return handle_finish(*result);
+}
+
+template <typename I>
+Context *EnableFeaturesRequest<I>::handle_finish(int r) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 20) << this << " " << __func__ << ": r=" << r << dendl;
+
+ {
+ std::unique_lock locker{image_ctx.owner_lock};
+
+ if (image_ctx.exclusive_lock != nullptr && m_requests_blocked) {
+ image_ctx.exclusive_lock->unblock_requests();
+ }
+ if (m_writes_blocked) {
+ image_ctx.io_image_dispatcher->unblock_writes();
+ }
+ }
+ image_ctx.state->handle_prepare_lock_complete();
+
+ return this->create_context_finisher(r);
+}
+
+} // namespace operation
+} // namespace librbd
+
+template class librbd::operation::EnableFeaturesRequest<librbd::ImageCtx>;
diff --git a/src/librbd/operation/EnableFeaturesRequest.h b/src/librbd/operation/EnableFeaturesRequest.h
new file mode 100644
index 000000000..1c91b4dc7
--- /dev/null
+++ b/src/librbd/operation/EnableFeaturesRequest.h
@@ -0,0 +1,135 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_OPERATION_ENABLE_FEATURES_REQUEST_H
+#define CEPH_LIBRBD_OPERATION_ENABLE_FEATURES_REQUEST_H
+
+#include "librbd/operation/Request.h"
+
+class Context;
+
+namespace librbd {
+
+class ImageCtx;
+
+namespace operation {
+
+template <typename ImageCtxT = ImageCtx>
+class EnableFeaturesRequest : public Request<ImageCtxT> {
+public:
+ static EnableFeaturesRequest *create(ImageCtxT &image_ctx, Context *on_finish,
+ uint64_t journal_op_tid,
+ uint64_t features) {
+ return new EnableFeaturesRequest(image_ctx, on_finish, journal_op_tid,
+ features);
+ }
+
+ EnableFeaturesRequest(ImageCtxT &image_ctx, Context *on_finish,
+ uint64_t journal_op_tid, uint64_t features);
+
+protected:
+ void send_op() override;
+ bool should_complete(int r) override;
+ bool can_affect_io() const override {
+ return true;
+ }
+ journal::Event create_event(uint64_t op_tid) const override {
+ return journal::UpdateFeaturesEvent(op_tid, m_features, true);
+ }
+
+private:
+ /**
+ * EnableFeatures goes through the following state machine:
+ *
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * STATE_PREPARE_LOCK
+ * |
+ * v
+ * STATE_BLOCK_WRITES
+ * |
+ * v
+ * STATE_GET_MIRROR_MODE
+ * |
+ * v
+ * STATE_CREATE_JOURNAL (skip if not
+ * | required)
+ * v
+ * STATE_APPEND_OP_EVENT (skip if journaling
+ * | disabled)
+ * v
+ * STATE_UPDATE_FLAGS
+ * |
+ * v
+ * STATE_SET_FEATURES
+ * |
+ * v
+ * STATE_CREATE_OBJECT_MAP (skip if not
+ * | required)
+ * v
+ * STATE_ENABLE_MIRROR_IMAGE
+ * |
+ * V
+ * STATE_NOTIFY_UPDATE
+ * |
+ * | (unblock writes)
+ * v
+ * <finish>
+ * @endverbatim
+ *
+ */
+
+ uint64_t m_features;
+
+ bool m_enable_mirroring = false;
+ bool m_requests_blocked = false;
+ bool m_writes_blocked = false;
+
+ uint64_t m_new_features = 0;
+ uint64_t m_enable_flags = 0;
+ uint64_t m_features_mask = 0;
+
+ bufferlist m_out_bl;
+
+ void send_prepare_lock();
+ Context *handle_prepare_lock(int *result);
+
+ void send_block_writes();
+ Context *handle_block_writes(int *result);
+
+ void send_get_mirror_mode();
+ Context *handle_get_mirror_mode(int *result);
+
+ void send_create_journal();
+ Context *handle_create_journal(int *result);
+
+ void send_append_op_event();
+ Context *handle_append_op_event(int *result);
+
+ void send_update_flags();
+ Context *handle_update_flags(int *result);
+
+ void send_set_features();
+ Context *handle_set_features(int *result);
+
+ void send_create_object_map();
+ Context *handle_create_object_map(int *result);
+
+ void send_enable_mirror_image();
+ Context *handle_enable_mirror_image(int *result);
+
+ void send_notify_update();
+ Context *handle_notify_update(int *result);
+
+ Context *handle_finish(int r);
+};
+
+} // namespace operation
+} // namespace librbd
+
+extern template class librbd::operation::EnableFeaturesRequest<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_OPERATION_ENABLE_FEATURES_REQUEST_H
diff --git a/src/librbd/operation/FlattenRequest.cc b/src/librbd/operation/FlattenRequest.cc
new file mode 100644
index 000000000..764552217
--- /dev/null
+++ b/src/librbd/operation/FlattenRequest.cc
@@ -0,0 +1,226 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/operation/FlattenRequest.h"
+#include "librbd/AsyncObjectThrottle.h"
+#include "librbd/ExclusiveLock.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/image/DetachChildRequest.h"
+#include "librbd/image/DetachParentRequest.h"
+#include "librbd/Types.h"
+#include "librbd/io/ObjectRequest.h"
+#include "librbd/io/Utils.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include <boost/lambda/bind.hpp>
+#include <boost/lambda/construct.hpp>
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::operation::FlattenRequest: " << this \
+ << " " << __func__ << ": "
+
+namespace librbd {
+namespace operation {
+
+using util::create_context_callback;
+using util::create_rados_callback;
+
+template <typename I>
+class C_FlattenObject : public C_AsyncObjectThrottle<I> {
+public:
+ C_FlattenObject(AsyncObjectThrottle<I> &throttle, I *image_ctx,
+ IOContext io_context, uint64_t object_no)
+ : C_AsyncObjectThrottle<I>(throttle, *image_ctx), m_io_context(io_context),
+ m_object_no(object_no) {
+ }
+
+ int send() override {
+ I &image_ctx = this->m_image_ctx;
+ ceph_assert(ceph_mutex_is_locked(image_ctx.owner_lock));
+ CephContext *cct = image_ctx.cct;
+
+ if (image_ctx.exclusive_lock != nullptr &&
+ !image_ctx.exclusive_lock->is_lock_owner()) {
+ ldout(cct, 1) << "lost exclusive lock during flatten" << dendl;
+ return -ERESTART;
+ }
+
+ {
+ std::shared_lock image_lock{image_ctx.image_lock};
+ if (image_ctx.object_map != nullptr &&
+ !image_ctx.object_map->object_may_not_exist(m_object_no)) {
+ // can skip because the object already exists
+ return 1;
+ }
+ }
+
+ if (!io::util::trigger_copyup(
+ &image_ctx, m_object_no, m_io_context, this)) {
+ // stop early if the parent went away - it just means
+ // another flatten finished first or the image was resized
+ return 1;
+ }
+
+ return 0;
+ }
+
+private:
+ IOContext m_io_context;
+ uint64_t m_object_no;
+};
+
+template <typename I>
+bool FlattenRequest<I>::should_complete(int r) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << "r=" << r << dendl;
+ if (r < 0) {
+ lderr(cct) << "encountered error: " << cpp_strerror(r) << dendl;
+ }
+ return true;
+}
+
+template <typename I>
+void FlattenRequest<I>::send_op() {
+ flatten_objects();
+}
+
+template <typename I>
+void FlattenRequest<I>::flatten_objects() {
+ I &image_ctx = this->m_image_ctx;
+ ceph_assert(ceph_mutex_is_locked(image_ctx.owner_lock));
+
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << dendl;
+
+ assert(ceph_mutex_is_locked(image_ctx.owner_lock));
+ auto ctx = create_context_callback<
+ FlattenRequest<I>,
+ &FlattenRequest<I>::handle_flatten_objects>(this);
+ typename AsyncObjectThrottle<I>::ContextFactory context_factory(
+ boost::lambda::bind(boost::lambda::new_ptr<C_FlattenObject<I> >(),
+ boost::lambda::_1, &image_ctx, image_ctx.get_data_io_context(),
+ boost::lambda::_2));
+ AsyncObjectThrottle<I> *throttle = new AsyncObjectThrottle<I>(
+ this, image_ctx, context_factory, ctx, &m_prog_ctx, 0, m_overlap_objects);
+ throttle->start_ops(
+ image_ctx.config.template get_val<uint64_t>("rbd_concurrent_management_ops"));
+}
+
+template <typename I>
+void FlattenRequest<I>::handle_flatten_objects(int r) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << "r=" << r << dendl;
+
+ if (r == -ERESTART) {
+ ldout(cct, 5) << "flatten operation interrupted" << dendl;
+ this->complete(r);
+ return;
+ } else if (r < 0) {
+ lderr(cct) << "flatten encountered an error: " << cpp_strerror(r) << dendl;
+ this->complete(r);
+ return;
+ }
+
+ detach_child();
+}
+
+template <typename I>
+void FlattenRequest<I>::detach_child() {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+
+ // should have been canceled prior to releasing lock
+ image_ctx.owner_lock.lock_shared();
+ ceph_assert(image_ctx.exclusive_lock == nullptr ||
+ image_ctx.exclusive_lock->is_lock_owner());
+
+ // if there are no snaps, remove from the children object as well
+ // (if snapshots remain, they have their own parent info, and the child
+ // will be removed when the last snap goes away)
+ image_ctx.image_lock.lock_shared();
+ if ((image_ctx.features & RBD_FEATURE_DEEP_FLATTEN) == 0 &&
+ !image_ctx.snaps.empty()) {
+ image_ctx.image_lock.unlock_shared();
+ image_ctx.owner_lock.unlock_shared();
+ detach_parent();
+ return;
+ }
+ image_ctx.image_lock.unlock_shared();
+
+ ldout(cct, 5) << dendl;
+ auto ctx = create_context_callback<
+ FlattenRequest<I>,
+ &FlattenRequest<I>::handle_detach_child>(this);
+ auto req = image::DetachChildRequest<I>::create(image_ctx, ctx);
+ req->send();
+ image_ctx.owner_lock.unlock_shared();
+}
+
+template <typename I>
+void FlattenRequest<I>::handle_detach_child(int r) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << "r=" << r << dendl;
+
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "detach encountered an error: " << cpp_strerror(r) << dendl;
+ this->complete(r);
+ return;
+ }
+
+ detach_parent();
+}
+
+template <typename I>
+void FlattenRequest<I>::detach_parent() {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << dendl;
+
+ // should have been canceled prior to releasing lock
+ image_ctx.owner_lock.lock_shared();
+ ceph_assert(image_ctx.exclusive_lock == nullptr ||
+ image_ctx.exclusive_lock->is_lock_owner());
+
+ // stop early if the parent went away - it just means
+ // another flatten finished first, so this one is useless.
+ image_ctx.image_lock.lock_shared();
+ if (!image_ctx.parent) {
+ ldout(cct, 5) << "image already flattened" << dendl;
+ image_ctx.image_lock.unlock_shared();
+ image_ctx.owner_lock.unlock_shared();
+ this->complete(0);
+ return;
+ }
+ image_ctx.image_lock.unlock_shared();
+
+ // remove parent from this (base) image
+ auto ctx = create_context_callback<
+ FlattenRequest<I>,
+ &FlattenRequest<I>::handle_detach_parent>(this);
+ auto req = image::DetachParentRequest<I>::create(image_ctx, ctx);
+ req->send();
+ image_ctx.owner_lock.unlock_shared();
+}
+
+template <typename I>
+void FlattenRequest<I>::handle_detach_parent(int r) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << "r=" << r << dendl;
+
+ if (r < 0) {
+ lderr(cct) << "remove parent encountered an error: " << cpp_strerror(r)
+ << dendl;
+ }
+
+ this->complete(r);
+}
+
+} // namespace operation
+} // namespace librbd
+
+template class librbd::operation::FlattenRequest<librbd::ImageCtx>;
diff --git a/src/librbd/operation/FlattenRequest.h b/src/librbd/operation/FlattenRequest.h
new file mode 100644
index 000000000..cdbb4c9e7
--- /dev/null
+++ b/src/librbd/operation/FlattenRequest.h
@@ -0,0 +1,73 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+#ifndef CEPH_LIBRBD_OPERATION_FLATTEN_REQUEST_H
+#define CEPH_LIBRBD_OPERATION_FLATTEN_REQUEST_H
+
+#include "librbd/operation/Request.h"
+
+namespace librbd {
+
+class ImageCtx;
+class ProgressContext;
+
+namespace operation {
+
+template <typename ImageCtxT = ImageCtx>
+class FlattenRequest : public Request<ImageCtxT>
+{
+public:
+ FlattenRequest(ImageCtxT &image_ctx, Context *on_finish,
+ uint64_t overlap_objects, ProgressContext &prog_ctx)
+ : Request<ImageCtxT>(image_ctx, on_finish),
+ m_overlap_objects(overlap_objects), m_prog_ctx(prog_ctx) {
+ }
+
+protected:
+ void send_op() override;
+ bool should_complete(int r) override;
+
+ journal::Event create_event(uint64_t op_tid) const override {
+ return journal::FlattenEvent(op_tid);
+ }
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * FLATTEN_OBJECTS
+ * |
+ * v
+ * DETACH_CHILD
+ * |
+ * v
+ * DETACH_PARENT
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+
+ uint64_t m_overlap_objects;
+ ProgressContext &m_prog_ctx;
+
+ void flatten_objects();
+ void handle_flatten_objects(int r);
+
+ void detach_child();
+ void handle_detach_child(int r);
+
+ void detach_parent();
+ void handle_detach_parent(int r);
+
+};
+
+} // namespace operation
+} // namespace librbd
+
+extern template class librbd::operation::FlattenRequest<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_OPERATION_FLATTEN_REQUEST_H
diff --git a/src/librbd/operation/MetadataRemoveRequest.cc b/src/librbd/operation/MetadataRemoveRequest.cc
new file mode 100644
index 000000000..c5d6141ad
--- /dev/null
+++ b/src/librbd/operation/MetadataRemoveRequest.cc
@@ -0,0 +1,60 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/operation/MetadataRemoveRequest.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "librbd/ImageCtx.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::MetadataRemoveRequest: "
+
+namespace librbd {
+namespace operation {
+
+template <typename I>
+MetadataRemoveRequest<I>::MetadataRemoveRequest(I &image_ctx,
+ Context *on_finish,
+ const std::string &key)
+ : Request<I>(image_ctx, on_finish), m_key(key) {
+}
+
+template <typename I>
+void MetadataRemoveRequest<I>::send_op() {
+ send_metadata_remove();
+}
+
+template <typename I>
+bool MetadataRemoveRequest<I>::should_complete(int r) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 20) << this << " " << __func__ << " r=" << r << dendl;
+
+ if (r < 0) {
+ lderr(cct) << "encountered error: " << cpp_strerror(r) << dendl;
+ }
+ return true;
+}
+
+template <typename I>
+void MetadataRemoveRequest<I>::send_metadata_remove() {
+ I &image_ctx = this->m_image_ctx;
+ ceph_assert(ceph_mutex_is_locked(image_ctx.owner_lock));
+
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 20) << this << " " << __func__ << dendl;
+
+ librados::ObjectWriteOperation op;
+ cls_client::metadata_remove(&op, m_key);
+
+ librados::AioCompletion *comp = this->create_callback_completion();
+ int r = image_ctx.md_ctx.aio_operate(image_ctx.header_oid, comp, &op);
+ ceph_assert(r == 0);
+ comp->release();
+}
+
+} // namespace operation
+} // namespace librbd
+
+template class librbd::operation::MetadataRemoveRequest<librbd::ImageCtx>;
diff --git a/src/librbd/operation/MetadataRemoveRequest.h b/src/librbd/operation/MetadataRemoveRequest.h
new file mode 100644
index 000000000..1d7f2a46a
--- /dev/null
+++ b/src/librbd/operation/MetadataRemoveRequest.h
@@ -0,0 +1,44 @@
+// -*- mode:C++; tab-width:8; c-basic-offremove:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_OPERATION_METADATA_REMOVE_REQUEST_H
+#define CEPH_LIBRBD_OPERATION_METADATA_REMOVE_REQUEST_H
+
+#include "librbd/operation/Request.h"
+#include <iosfwd>
+#include <string>
+
+class Context;
+
+namespace librbd {
+
+class ImageCtx;
+
+namespace operation {
+
+template <typename ImageCtxT = ImageCtx>
+class MetadataRemoveRequest : public Request<ImageCtxT> {
+public:
+ MetadataRemoveRequest(ImageCtxT &image_ctx, Context *on_finish,
+ const std::string &key);
+
+protected:
+ void send_op() override;
+ bool should_complete(int r) override;
+
+ journal::Event create_event(uint64_t op_tid) const override {
+ return journal::MetadataRemoveEvent(op_tid, m_key);
+ }
+
+private:
+ std::string m_key;
+
+ void send_metadata_remove();
+};
+
+} // namespace operation
+} // namespace librbd
+
+extern template class librbd::operation::MetadataRemoveRequest<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_OPERATION_METADATA_REMOVE_REQUEST_H
diff --git a/src/librbd/operation/MetadataSetRequest.cc b/src/librbd/operation/MetadataSetRequest.cc
new file mode 100644
index 000000000..5fb939352
--- /dev/null
+++ b/src/librbd/operation/MetadataSetRequest.cc
@@ -0,0 +1,62 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/operation/MetadataSetRequest.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "librbd/ImageCtx.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::MetadataSetRequest: "
+
+namespace librbd {
+namespace operation {
+
+template <typename I>
+MetadataSetRequest<I>::MetadataSetRequest(I &image_ctx,
+ Context *on_finish,
+ const std::string &key,
+ const std::string &value)
+ : Request<I>(image_ctx, on_finish), m_key(key), m_value(value) {
+}
+
+template <typename I>
+void MetadataSetRequest<I>::send_op() {
+ send_metadata_set();
+}
+
+template <typename I>
+bool MetadataSetRequest<I>::should_complete(int r) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 20) << this << " " << __func__ << " r=" << r << dendl;
+
+ if (r < 0) {
+ lderr(cct) << "encountered error: " << cpp_strerror(r) << dendl;
+ }
+ return true;
+}
+
+template <typename I>
+void MetadataSetRequest<I>::send_metadata_set() {
+ I &image_ctx = this->m_image_ctx;
+ ceph_assert(ceph_mutex_is_locked(image_ctx.owner_lock));
+
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 20) << this << " " << __func__ << dendl;
+
+ m_data[m_key].append(m_value);
+ librados::ObjectWriteOperation op;
+ cls_client::metadata_set(&op, m_data);
+
+ librados::AioCompletion *comp = this->create_callback_completion();
+ int r = image_ctx.md_ctx.aio_operate(image_ctx.header_oid, comp, &op);
+ ceph_assert(r == 0);
+ comp->release();
+}
+
+} // namespace operation
+} // namespace librbd
+
+template class librbd::operation::MetadataSetRequest<librbd::ImageCtx>;
diff --git a/src/librbd/operation/MetadataSetRequest.h b/src/librbd/operation/MetadataSetRequest.h
new file mode 100644
index 000000000..5f8daa2f1
--- /dev/null
+++ b/src/librbd/operation/MetadataSetRequest.h
@@ -0,0 +1,47 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_OPERATION_METADATA_SET_REQUEST_H
+#define CEPH_LIBRBD_OPERATION_METADATA_SET_REQUEST_H
+
+#include "librbd/operation/Request.h"
+#include "include/buffer.h"
+#include <string>
+#include <map>
+
+class Context;
+
+namespace librbd {
+
+class ImageCtx;
+
+namespace operation {
+
+template <typename ImageCtxT = ImageCtx>
+class MetadataSetRequest : public Request<ImageCtxT> {
+public:
+ MetadataSetRequest(ImageCtxT &image_ctx, Context *on_finish,
+ const std::string &key, const std::string &value);
+
+protected:
+ void send_op() override;
+ bool should_complete(int r) override;
+
+ journal::Event create_event(uint64_t op_tid) const override {
+ return journal::MetadataSetEvent(op_tid, m_key, m_value);
+ }
+
+private:
+ std::string m_key;
+ std::string m_value;
+ std::map<std::string, bufferlist> m_data;
+
+ void send_metadata_set();
+};
+
+} // namespace operation
+} // namespace librbd
+
+extern template class librbd::operation::MetadataSetRequest<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_OPERATION_METADATA_SET_REQUEST_H
diff --git a/src/librbd/operation/MigrateRequest.cc b/src/librbd/operation/MigrateRequest.cc
new file mode 100644
index 000000000..2b9adb773
--- /dev/null
+++ b/src/librbd/operation/MigrateRequest.cc
@@ -0,0 +1,238 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/operation/MigrateRequest.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "librbd/AsyncObjectThrottle.h"
+#include "librbd/ExclusiveLock.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/Utils.h"
+#include "librbd/deep_copy/ObjectCopyRequest.h"
+#include "librbd/io/AsyncOperation.h"
+#include "librbd/io/ImageDispatcherInterface.h"
+#include "librbd/io/ObjectRequest.h"
+#include "osdc/Striper.h"
+#include <boost/lambda/bind.hpp>
+#include <boost/lambda/construct.hpp>
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::MigrateRequest: " << this << " " \
+ << __func__ << ": "
+
+namespace librbd {
+namespace operation {
+
+using util::create_context_callback;
+using util::create_async_context_callback;
+
+namespace {
+
+template <typename I>
+class C_MigrateObject : public C_AsyncObjectThrottle<I> {
+public:
+ C_MigrateObject(AsyncObjectThrottle<I> &throttle, I *image_ctx,
+ IOContext io_context, uint64_t object_no)
+ : C_AsyncObjectThrottle<I>(throttle, *image_ctx), m_io_context(io_context),
+ m_object_no(object_no) {
+ }
+
+ int send() override {
+ I &image_ctx = this->m_image_ctx;
+ ceph_assert(ceph_mutex_is_locked(image_ctx.owner_lock));
+ CephContext *cct = image_ctx.cct;
+
+ if (image_ctx.exclusive_lock != nullptr &&
+ !image_ctx.exclusive_lock->is_lock_owner()) {
+ ldout(cct, 1) << "lost exclusive lock during migrate" << dendl;
+ return -ERESTART;
+ }
+
+ start_async_op();
+ return 0;
+ }
+
+private:
+ IOContext m_io_context;
+ uint64_t m_object_no;
+
+ io::AsyncOperation *m_async_op = nullptr;
+
+ void start_async_op() {
+ I &image_ctx = this->m_image_ctx;
+ ceph_assert(ceph_mutex_is_locked(image_ctx.owner_lock));
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 10) << dendl;
+
+ ceph_assert(m_async_op == nullptr);
+ m_async_op = new io::AsyncOperation();
+ m_async_op->start_op(image_ctx);
+
+ if (!image_ctx.io_image_dispatcher->writes_blocked()) {
+ migrate_object();
+ return;
+ }
+
+ auto ctx = create_async_context_callback(
+ image_ctx, create_context_callback<
+ C_MigrateObject<I>, &C_MigrateObject<I>::handle_start_async_op>(this));
+ m_async_op->finish_op();
+ delete m_async_op;
+ m_async_op = nullptr;
+ image_ctx.io_image_dispatcher->wait_on_writes_unblocked(ctx);
+ }
+
+ void handle_start_async_op(int r) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ lderr(cct) << "failed to start async op: " << cpp_strerror(r) << dendl;
+ this->complete(r);
+ return;
+ }
+
+ std::shared_lock owner_locker{image_ctx.owner_lock};
+ start_async_op();
+ }
+
+ bool is_within_overlap_bounds() {
+ I &image_ctx = this->m_image_ctx;
+ std::shared_lock image_locker{image_ctx.image_lock};
+
+ auto overlap = std::min(image_ctx.size, image_ctx.migration_info.overlap);
+ return overlap > 0 &&
+ Striper::get_num_objects(image_ctx.layout, overlap) > m_object_no;
+ }
+
+ void migrate_object() {
+ I &image_ctx = this->m_image_ctx;
+ ceph_assert(ceph_mutex_is_locked(image_ctx.owner_lock));
+ CephContext *cct = image_ctx.cct;
+
+ auto ctx = create_context_callback<
+ C_MigrateObject<I>, &C_MigrateObject<I>::handle_migrate_object>(this);
+
+ if (is_within_overlap_bounds()) {
+ bufferlist bl;
+ auto req = new io::ObjectWriteRequest<I>(&image_ctx, m_object_no, 0,
+ std::move(bl), m_io_context, 0,
+ 0, std::nullopt, {}, ctx);
+
+ ldout(cct, 20) << "copyup object req " << req << ", object_no "
+ << m_object_no << dendl;
+
+ req->send();
+ } else {
+ ceph_assert(image_ctx.parent != nullptr);
+
+ uint32_t flags = deep_copy::OBJECT_COPY_REQUEST_FLAG_MIGRATION;
+ if (image_ctx.migration_info.flatten) {
+ flags |= deep_copy::OBJECT_COPY_REQUEST_FLAG_FLATTEN;
+ }
+
+ auto req = deep_copy::ObjectCopyRequest<I>::create(
+ image_ctx.parent, &image_ctx, 0, 0, image_ctx.migration_info.snap_map,
+ m_object_no, flags, nullptr, ctx);
+
+ ldout(cct, 20) << "deep copy object req " << req << ", object_no "
+ << m_object_no << dendl;
+ req->send();
+ }
+ }
+
+ void handle_migrate_object(int r) {
+ CephContext *cct = this->m_image_ctx.cct;
+ ldout(cct, 10) << "r=" << r << dendl;
+
+ if (r == -ENOENT) {
+ r = 0;
+ }
+
+ m_async_op->finish_op();
+ delete m_async_op;
+ this->complete(r);
+ }
+};
+
+} // anonymous namespace
+
+template <typename I>
+void MigrateRequest<I>::send_op() {
+ I &image_ctx = this->m_image_ctx;
+ ceph_assert(ceph_mutex_is_locked(image_ctx.owner_lock));
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 10) << dendl;
+
+ migrate_objects();
+}
+
+template <typename I>
+bool MigrateRequest<I>::should_complete(int r) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ lderr(cct) << "encountered error: " << cpp_strerror(r) << dendl;
+ }
+
+ return true;
+}
+
+template <typename I>
+void MigrateRequest<I>::migrate_objects() {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ceph_assert(ceph_mutex_is_locked(image_ctx.owner_lock));
+
+ uint64_t overlap_objects = get_num_overlap_objects();
+
+ ldout(cct, 10) << "from 0 to " << overlap_objects << dendl;
+
+ auto ctx = create_context_callback<
+ MigrateRequest<I>, &MigrateRequest<I>::handle_migrate_objects>(this);
+
+ typename AsyncObjectThrottle<I>::ContextFactory context_factory(
+ boost::lambda::bind(boost::lambda::new_ptr<C_MigrateObject<I> >(),
+ boost::lambda::_1, &image_ctx, image_ctx.get_data_io_context(),
+ boost::lambda::_2));
+ AsyncObjectThrottle<I> *throttle = new AsyncObjectThrottle<I>(
+ this, image_ctx, context_factory, ctx, &m_prog_ctx, 0, overlap_objects);
+ throttle->start_ops(
+ image_ctx.config.template get_val<uint64_t>("rbd_concurrent_management_ops"));
+}
+
+template <typename I>
+void MigrateRequest<I>::handle_migrate_objects(int r) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << "r=" << r << dendl;
+
+ if (r < 0) {
+ lderr(cct) << "failed to migrate objects: " << cpp_strerror(r) << dendl;
+ }
+
+ this->complete(r);
+}
+
+template <typename I>
+uint64_t MigrateRequest<I>::get_num_overlap_objects() {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 10) << dendl;
+
+ std::shared_lock image_locker{image_ctx.image_lock};
+
+ auto overlap = image_ctx.migration_info.overlap;
+
+ return overlap > 0 ?
+ Striper::get_num_objects(image_ctx.layout, overlap) : 0;
+}
+
+} // namespace operation
+} // namespace librbd
+
+template class librbd::operation::MigrateRequest<librbd::ImageCtx>;
diff --git a/src/librbd/operation/MigrateRequest.h b/src/librbd/operation/MigrateRequest.h
new file mode 100644
index 000000000..a143b579c
--- /dev/null
+++ b/src/librbd/operation/MigrateRequest.h
@@ -0,0 +1,68 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+#ifndef CEPH_LIBRBD_OPERATION_MIGRATE_REQUEST_H
+#define CEPH_LIBRBD_OPERATION_MIGRATE_REQUEST_H
+
+#include "librbd/operation/Request.h"
+#include "librbd/Types.h"
+
+namespace librbd {
+
+class ImageCtx;
+class ProgressContext;
+
+namespace operation {
+
+template <typename ImageCtxT = ImageCtx>
+class MigrateRequest : public Request<ImageCtxT>
+{
+public:
+ MigrateRequest(ImageCtxT &image_ctx, Context *on_finish,
+ ProgressContext &prog_ctx)
+ : Request<ImageCtxT>(image_ctx, on_finish), m_prog_ctx(prog_ctx) {
+ }
+
+protected:
+ void send_op() override;
+ bool should_complete(int r) override;
+ bool can_affect_io() const override {
+ return true;
+ }
+ journal::Event create_event(uint64_t op_tid) const override {
+ ceph_abort();
+ return journal::UnknownEvent();
+ }
+
+private:
+ /**
+ * Migrate goes through the following state machine to copy objects
+ * from the parent (migrating source) image:
+ *
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * MIGRATE_OBJECTS
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ *
+ */
+
+ ProgressContext &m_prog_ctx;
+
+ void migrate_objects();
+ void handle_migrate_objects(int r);
+
+ uint64_t get_num_overlap_objects();
+};
+
+} // namespace operation
+} // namespace librbd
+
+extern template class librbd::operation::MigrateRequest<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_OPERATION_MIGRATE_REQUEST_H
diff --git a/src/librbd/operation/ObjectMapIterate.cc b/src/librbd/operation/ObjectMapIterate.cc
new file mode 100644
index 000000000..50db3df85
--- /dev/null
+++ b/src/librbd/operation/ObjectMapIterate.cc
@@ -0,0 +1,308 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/operation/ObjectMapIterate.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "osdc/Striper.h"
+#include "librbd/AsyncObjectThrottle.h"
+#include "librbd/ExclusiveLock.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageWatcher.h"
+#include "librbd/internal.h"
+#include "librbd/ObjectMap.h"
+#include "librbd/operation/ResizeRequest.h"
+#include "librbd/object_map/InvalidateRequest.h"
+#include "librbd/Utils.h"
+#include <boost/lambda/bind.hpp>
+#include <boost/lambda/construct.hpp>
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::ObjectMapIterateRequest: "
+
+namespace librbd {
+namespace operation {
+
+namespace {
+
+template <typename I>
+class C_VerifyObjectCallback : public C_AsyncObjectThrottle<I> {
+public:
+ C_VerifyObjectCallback(AsyncObjectThrottle<I> &throttle, I *image_ctx,
+ uint64_t snap_id, uint64_t object_no,
+ ObjectIterateWork<I> handle_mismatch,
+ std::atomic_flag *invalidate)
+ : C_AsyncObjectThrottle<I>(throttle, *image_ctx),
+ m_snap_id(snap_id), m_object_no(object_no),
+ m_oid(image_ctx->get_object_name(m_object_no)),
+ m_handle_mismatch(handle_mismatch),
+ m_invalidate(invalidate)
+ {
+ m_io_ctx.dup(image_ctx->data_ctx);
+ m_io_ctx.snap_set_read(CEPH_SNAPDIR);
+ }
+
+ void complete(int r) override {
+ I &image_ctx = this->m_image_ctx;
+ if (should_complete(r)) {
+ ldout(image_ctx.cct, 20) << m_oid << " C_VerifyObjectCallback completed "
+ << dendl;
+ m_io_ctx.close();
+
+ this->finish(r);
+ delete this;
+ }
+ }
+
+ int send() override {
+ send_list_snaps();
+ return 0;
+ }
+
+private:
+ librados::IoCtx m_io_ctx;
+ uint64_t m_snap_id;
+ uint64_t m_object_no;
+ std::string m_oid;
+ ObjectIterateWork<I> m_handle_mismatch;
+ std::atomic_flag *m_invalidate;
+
+ librados::snap_set_t m_snap_set;
+ int m_snap_list_ret = 0;
+
+ bool should_complete(int r) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ if (r == 0) {
+ r = m_snap_list_ret;
+ }
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << m_oid << " C_VerifyObjectCallback::should_complete: "
+ << "encountered an error: " << cpp_strerror(r) << dendl;
+ return true;
+ }
+
+ ldout(cct, 20) << m_oid << " C_VerifyObjectCallback::should_complete: "
+ << " r="
+ << r << dendl;
+ return object_map_action(get_object_state());
+ }
+
+ void send_list_snaps() {
+ I &image_ctx = this->m_image_ctx;
+ ceph_assert(ceph_mutex_is_locked(image_ctx.owner_lock));
+ ldout(image_ctx.cct, 5) << m_oid
+ << " C_VerifyObjectCallback::send_list_snaps"
+ << dendl;
+
+ librados::ObjectReadOperation op;
+ op.list_snaps(&m_snap_set, &m_snap_list_ret);
+
+ librados::AioCompletion *comp = util::create_rados_callback(this);
+ int r = m_io_ctx.aio_operate(m_oid, comp, &op, NULL);
+ ceph_assert(r == 0);
+ comp->release();
+ }
+
+ uint8_t get_object_state() {
+ I &image_ctx = this->m_image_ctx;
+ std::shared_lock image_locker{image_ctx.image_lock};
+ for (std::vector<librados::clone_info_t>::const_iterator r =
+ m_snap_set.clones.begin(); r != m_snap_set.clones.end(); ++r) {
+ librados::snap_t from_snap_id;
+ librados::snap_t to_snap_id;
+ if (r->cloneid == librados::SNAP_HEAD) {
+ from_snap_id = next_valid_snap_id(m_snap_set.seq + 1);
+ to_snap_id = librados::SNAP_HEAD;
+ } else {
+ from_snap_id = next_valid_snap_id(r->snaps[0]);
+ to_snap_id = r->snaps[r->snaps.size()-1];
+ }
+
+ if (to_snap_id < m_snap_id) {
+ continue;
+ } else if (m_snap_id < from_snap_id) {
+ break;
+ }
+
+ if ((image_ctx.features & RBD_FEATURE_FAST_DIFF) != 0 &&
+ from_snap_id != m_snap_id) {
+ return OBJECT_EXISTS_CLEAN;
+ }
+ return OBJECT_EXISTS;
+ }
+ return OBJECT_NONEXISTENT;
+ }
+
+ uint64_t next_valid_snap_id(uint64_t snap_id) {
+ I &image_ctx = this->m_image_ctx;
+ ceph_assert(ceph_mutex_is_locked(image_ctx.image_lock));
+
+ std::map<librados::snap_t, SnapInfo>::iterator it =
+ image_ctx.snap_info.lower_bound(snap_id);
+ if (it == image_ctx.snap_info.end()) {
+ return CEPH_NOSNAP;
+ }
+ return it->first;
+ }
+
+ bool object_map_action(uint8_t new_state) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ std::shared_lock owner_locker{image_ctx.owner_lock};
+
+ // should have been canceled prior to releasing lock
+ ceph_assert(image_ctx.exclusive_lock == nullptr ||
+ image_ctx.exclusive_lock->is_lock_owner());
+
+ std::shared_lock image_locker{image_ctx.image_lock};
+ ceph_assert(image_ctx.object_map != nullptr);
+
+ uint8_t state = (*image_ctx.object_map)[m_object_no];
+ ldout(cct, 10) << "C_VerifyObjectCallback::object_map_action"
+ << " object " << image_ctx.get_object_name(m_object_no)
+ << " state " << (int)state
+ << " new_state " << (int)new_state << dendl;
+
+ if (state != new_state) {
+ int r = 0;
+
+ ceph_assert(m_handle_mismatch);
+ r = m_handle_mismatch(image_ctx, m_object_no, state, new_state);
+ if (r) {
+ lderr(cct) << "object map error: object "
+ << image_ctx.get_object_name(m_object_no)
+ << " marked as " << (int)state << ", but should be "
+ << (int)new_state << dendl;
+ m_invalidate->test_and_set();
+ } else {
+ ldout(cct, 1) << "object map inconsistent: object "
+ << image_ctx.get_object_name(m_object_no)
+ << " marked as " << (int)state << ", but should be "
+ << (int)new_state << dendl;
+ }
+ }
+
+ return true;
+ }
+};
+
+} // anonymous namespace
+
+template <typename I>
+void ObjectMapIterateRequest<I>::send() {
+ if (!m_image_ctx.data_ctx.is_valid()) {
+ this->async_complete(-ENODEV);
+ return;
+ }
+
+ send_verify_objects();
+}
+
+template <typename I>
+bool ObjectMapIterateRequest<I>::should_complete(int r) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 5) << this << " should_complete: " << " r=" << r << dendl;
+
+ if (r == -ENODEV) {
+ lderr(cct) << "missing data pool" << dendl;
+ return true;
+ }
+
+ if (r < 0) {
+ lderr(cct) << "object map operation encountered an error: "
+ << cpp_strerror(r) << dendl;
+ }
+
+ std::shared_lock owner_lock{m_image_ctx.owner_lock};
+ switch (m_state) {
+ case STATE_VERIFY_OBJECTS:
+ if (m_invalidate.test_and_set()) {
+ send_invalidate_object_map();
+ return false;
+ } else if (r == 0) {
+ return true;
+ }
+ break;
+
+ case STATE_INVALIDATE_OBJECT_MAP:
+ if (r == 0) {
+ return true;
+ }
+ break;
+
+ default:
+ ceph_abort();
+ break;
+ }
+
+ if (r < 0) {
+ return true;
+ }
+
+ return false;
+}
+
+template <typename I>
+void ObjectMapIterateRequest<I>::send_verify_objects() {
+ ceph_assert(ceph_mutex_is_locked(m_image_ctx.owner_lock));
+ CephContext *cct = m_image_ctx.cct;
+
+ uint64_t snap_id;
+ uint64_t num_objects;
+ {
+ std::shared_lock l{m_image_ctx.image_lock};
+ snap_id = m_image_ctx.snap_id;
+ num_objects = Striper::get_num_objects(m_image_ctx.layout,
+ m_image_ctx.get_image_size(snap_id));
+ }
+ ldout(cct, 5) << this << " send_verify_objects" << dendl;
+
+ m_state = STATE_VERIFY_OBJECTS;
+
+ typename AsyncObjectThrottle<I>::ContextFactory context_factory(
+ boost::lambda::bind(boost::lambda::new_ptr<C_VerifyObjectCallback<I> >(),
+ boost::lambda::_1, &m_image_ctx, snap_id,
+ boost::lambda::_2, m_handle_mismatch, &m_invalidate));
+ AsyncObjectThrottle<I> *throttle = new AsyncObjectThrottle<I>(
+ this, m_image_ctx, context_factory, this->create_callback_context(),
+ &m_prog_ctx, 0, num_objects);
+ throttle->start_ops(
+ m_image_ctx.config.template get_val<uint64_t>("rbd_concurrent_management_ops"));
+}
+
+template <typename I>
+uint64_t ObjectMapIterateRequest<I>::get_image_size() const {
+ ceph_assert(ceph_mutex_is_locked(m_image_ctx.image_lock));
+ if (m_image_ctx.snap_id == CEPH_NOSNAP) {
+ if (!m_image_ctx.resize_reqs.empty()) {
+ return m_image_ctx.resize_reqs.front()->get_image_size();
+ } else {
+ return m_image_ctx.size;
+ }
+ }
+ return m_image_ctx.get_image_size(m_image_ctx.snap_id);
+}
+
+template <typename I>
+void ObjectMapIterateRequest<I>::send_invalidate_object_map() {
+ CephContext *cct = m_image_ctx.cct;
+
+ ldout(cct, 5) << this << " send_invalidate_object_map" << dendl;
+ m_state = STATE_INVALIDATE_OBJECT_MAP;
+
+ object_map::InvalidateRequest<I>*req =
+ object_map::InvalidateRequest<I>::create(m_image_ctx, m_image_ctx.snap_id,
+ true,
+ this->create_callback_context());
+
+ ceph_assert(ceph_mutex_is_locked(m_image_ctx.owner_lock));
+ std::unique_lock image_locker{m_image_ctx.image_lock};
+ req->send();
+}
+
+} // namespace operation
+} // namespace librbd
+
+template class librbd::operation::ObjectMapIterateRequest<librbd::ImageCtx>;
diff --git a/src/librbd/operation/ObjectMapIterate.h b/src/librbd/operation/ObjectMapIterate.h
new file mode 100644
index 000000000..14215902a
--- /dev/null
+++ b/src/librbd/operation/ObjectMapIterate.h
@@ -0,0 +1,65 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+#ifndef CEPH_LIBRBD_OPERATION_OBJECT_MAP_ITERATE_H
+#define CEPH_LIBRBD_OPERATION_OBJECT_MAP_ITERATE_H
+
+#include <iostream>
+#include <atomic>
+
+#include "include/int_types.h"
+#include "include/rbd/object_map_types.h"
+#include "librbd/AsyncRequest.h"
+
+namespace librbd {
+
+class ImageCtx;
+class ProgressContext;
+
+namespace operation {
+
+template <typename ImageCtxT = ImageCtx>
+using ObjectIterateWork = bool(*)(ImageCtxT &image_ctx,
+ uint64_t object_no,
+ uint8_t current_state,
+ uint8_t new_state);
+
+template <typename ImageCtxT = ImageCtx>
+class ObjectMapIterateRequest : public AsyncRequest<ImageCtxT> {
+public:
+ ObjectMapIterateRequest(ImageCtxT &image_ctx, Context *on_finish,
+ ProgressContext &prog_ctx,
+ ObjectIterateWork<ImageCtxT> handle_mismatch)
+ : AsyncRequest<ImageCtxT>(image_ctx, on_finish), m_image_ctx(image_ctx),
+ m_prog_ctx(prog_ctx), m_handle_mismatch(handle_mismatch)
+ {
+ }
+
+ void send() override;
+
+protected:
+ bool should_complete(int r) override;
+
+private:
+ enum State {
+ STATE_VERIFY_OBJECTS,
+ STATE_INVALIDATE_OBJECT_MAP
+ };
+
+ ImageCtxT &m_image_ctx;
+ ProgressContext &m_prog_ctx;
+ ObjectIterateWork<ImageCtxT> m_handle_mismatch;
+ std::atomic_flag m_invalidate = ATOMIC_FLAG_INIT;
+ State m_state = STATE_VERIFY_OBJECTS;
+
+ void send_verify_objects();
+ void send_invalidate_object_map();
+
+ uint64_t get_image_size() const;
+};
+
+} // namespace operation
+} // namespace librbd
+
+extern template class librbd::operation::ObjectMapIterateRequest<librbd::ImageCtx>;
+
+#endif
diff --git a/src/librbd/operation/RebuildObjectMapRequest.cc b/src/librbd/operation/RebuildObjectMapRequest.cc
new file mode 100644
index 000000000..5deb182e5
--- /dev/null
+++ b/src/librbd/operation/RebuildObjectMapRequest.cc
@@ -0,0 +1,250 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/operation/RebuildObjectMapRequest.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "osdc/Striper.h"
+#include "librbd/AsyncObjectThrottle.h"
+#include "librbd/ExclusiveLock.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/internal.h"
+#include "librbd/ObjectMap.h"
+#include "librbd/operation/ResizeRequest.h"
+#include "librbd/operation/TrimRequest.h"
+#include "librbd/operation/ObjectMapIterate.h"
+#include "librbd/Utils.h"
+#include <boost/lambda/bind.hpp>
+#include <boost/lambda/construct.hpp>
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::RebuildObjectMapRequest: "
+
+namespace librbd {
+namespace operation {
+
+using util::create_context_callback;
+
+template <typename I>
+void RebuildObjectMapRequest<I>::send() {
+ send_resize_object_map();
+}
+
+template <typename I>
+bool RebuildObjectMapRequest<I>::should_complete(int r) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 5) << this << " should_complete: " << " r=" << r << dendl;
+
+ std::shared_lock owner_lock{m_image_ctx.owner_lock};
+ switch (m_state) {
+ case STATE_RESIZE_OBJECT_MAP:
+ ldout(cct, 5) << "RESIZE_OBJECT_MAP" << dendl;
+ if (r == -ESTALE && !m_attempted_trim) {
+ // objects are still flagged as in-use -- delete them
+ m_attempted_trim = true;
+ send_trim_image();
+ return false;
+ } else if (r == 0) {
+ send_verify_objects();
+ }
+ break;
+
+ case STATE_TRIM_IMAGE:
+ ldout(cct, 5) << "TRIM_IMAGE" << dendl;
+ if (r == 0) {
+ send_resize_object_map();
+ }
+ break;
+
+ case STATE_VERIFY_OBJECTS:
+ ldout(cct, 5) << "VERIFY_OBJECTS" << dendl;
+ if (r == 0) {
+ send_save_object_map();
+ }
+ break;
+
+ case STATE_SAVE_OBJECT_MAP:
+ ldout(cct, 5) << "SAVE_OBJECT_MAP" << dendl;
+ if (r == 0) {
+ send_update_header();
+ }
+ break;
+ case STATE_UPDATE_HEADER:
+ ldout(cct, 5) << "UPDATE_HEADER" << dendl;
+ if (r == 0) {
+ return true;
+ }
+ break;
+
+ default:
+ ceph_abort();
+ break;
+ }
+
+ if (r == -ERESTART) {
+ ldout(cct, 5) << "rebuild object map operation interrupted" << dendl;
+ return true;
+ } else if (r < 0) {
+ lderr(cct) << "rebuild object map encountered an error: " << cpp_strerror(r)
+ << dendl;
+ return true;
+ }
+ return false;
+}
+
+template <typename I>
+void RebuildObjectMapRequest<I>::send_resize_object_map() {
+ ceph_assert(ceph_mutex_is_locked(m_image_ctx.owner_lock));
+ CephContext *cct = m_image_ctx.cct;
+
+ m_image_ctx.image_lock.lock_shared();
+ ceph_assert(m_image_ctx.object_map != nullptr);
+
+ uint64_t size = get_image_size();
+ uint64_t num_objects = Striper::get_num_objects(m_image_ctx.layout, size);
+
+ if (m_image_ctx.object_map->size() == num_objects) {
+ m_image_ctx.image_lock.unlock_shared();
+ send_verify_objects();
+ return;
+ }
+
+ ldout(cct, 5) << this << " send_resize_object_map" << dendl;
+ m_state = STATE_RESIZE_OBJECT_MAP;
+
+ // should have been canceled prior to releasing lock
+ ceph_assert(m_image_ctx.exclusive_lock == nullptr ||
+ m_image_ctx.exclusive_lock->is_lock_owner());
+
+ m_image_ctx.object_map->aio_resize(size, OBJECT_NONEXISTENT,
+ this->create_callback_context());
+ m_image_ctx.image_lock.unlock_shared();
+}
+
+template <typename I>
+void RebuildObjectMapRequest<I>::send_trim_image() {
+ CephContext *cct = m_image_ctx.cct;
+
+ std::shared_lock l{m_image_ctx.owner_lock};
+
+ // should have been canceled prior to releasing lock
+ ceph_assert(m_image_ctx.exclusive_lock == nullptr ||
+ m_image_ctx.exclusive_lock->is_lock_owner());
+ ldout(cct, 5) << this << " send_trim_image" << dendl;
+ m_state = STATE_TRIM_IMAGE;
+
+ uint64_t new_size;
+ uint64_t orig_size;
+ {
+ std::shared_lock l{m_image_ctx.image_lock};
+ ceph_assert(m_image_ctx.object_map != nullptr);
+
+ new_size = get_image_size();
+ orig_size = m_image_ctx.get_object_size() *
+ m_image_ctx.object_map->size();
+ }
+ TrimRequest<I> *req = TrimRequest<I>::create(m_image_ctx,
+ this->create_callback_context(),
+ orig_size, new_size, m_prog_ctx);
+ req->send();
+}
+
+template <typename I>
+bool update_object_map(I& image_ctx, uint64_t object_no, uint8_t current_state,
+ uint8_t new_state) {
+ CephContext *cct = image_ctx.cct;
+ uint64_t snap_id = image_ctx.snap_id;
+
+ current_state = (*image_ctx.object_map)[object_no];
+ if (current_state == OBJECT_EXISTS && new_state == OBJECT_NONEXISTENT &&
+ snap_id == CEPH_NOSNAP) {
+ // might be writing object to OSD concurrently
+ new_state = current_state;
+ }
+
+ if (new_state != current_state) {
+ ldout(cct, 15) << image_ctx.get_object_name(object_no)
+ << " rebuild updating object map "
+ << static_cast<uint32_t>(current_state) << "->"
+ << static_cast<uint32_t>(new_state) << dendl;
+ image_ctx.object_map->set_state(object_no, new_state, current_state);
+ }
+ return false;
+}
+
+template <typename I>
+void RebuildObjectMapRequest<I>::send_verify_objects() {
+ ceph_assert(ceph_mutex_is_locked(m_image_ctx.owner_lock));
+ CephContext *cct = m_image_ctx.cct;
+
+ m_state = STATE_VERIFY_OBJECTS;
+ ldout(cct, 5) << this << " send_verify_objects" << dendl;
+
+ ObjectMapIterateRequest<I> *req =
+ new ObjectMapIterateRequest<I>(m_image_ctx,
+ this->create_callback_context(),
+ m_prog_ctx, update_object_map);
+
+ req->send();
+}
+
+template <typename I>
+void RebuildObjectMapRequest<I>::send_save_object_map() {
+ ceph_assert(ceph_mutex_is_locked(m_image_ctx.owner_lock));
+ CephContext *cct = m_image_ctx.cct;
+
+ ldout(cct, 5) << this << " send_save_object_map" << dendl;
+ m_state = STATE_SAVE_OBJECT_MAP;
+
+ // should have been canceled prior to releasing lock
+ ceph_assert(m_image_ctx.exclusive_lock == nullptr ||
+ m_image_ctx.exclusive_lock->is_lock_owner());
+
+ std::shared_lock image_locker{m_image_ctx.image_lock};
+ ceph_assert(m_image_ctx.object_map != nullptr);
+ m_image_ctx.object_map->aio_save(this->create_callback_context());
+}
+
+template <typename I>
+void RebuildObjectMapRequest<I>::send_update_header() {
+ ceph_assert(ceph_mutex_is_locked(m_image_ctx.owner_lock));
+
+ // should have been canceled prior to releasing lock
+ ceph_assert(m_image_ctx.exclusive_lock == nullptr ||
+ m_image_ctx.exclusive_lock->is_lock_owner());
+
+ ldout(m_image_ctx.cct, 5) << this << " send_update_header" << dendl;
+ m_state = STATE_UPDATE_HEADER;
+
+ librados::ObjectWriteOperation op;
+
+ uint64_t flags = RBD_FLAG_OBJECT_MAP_INVALID | RBD_FLAG_FAST_DIFF_INVALID;
+ cls_client::set_flags(&op, m_image_ctx.snap_id, 0, flags);
+
+ librados::AioCompletion *comp = this->create_callback_completion();
+ int r = m_image_ctx.md_ctx.aio_operate(m_image_ctx.header_oid, comp, &op);
+ ceph_assert(r == 0);
+ comp->release();
+
+ std::unique_lock image_locker{m_image_ctx.image_lock};
+ m_image_ctx.update_flags(m_image_ctx.snap_id, flags, false);
+}
+
+template <typename I>
+uint64_t RebuildObjectMapRequest<I>::get_image_size() const {
+ ceph_assert(ceph_mutex_is_locked(m_image_ctx.image_lock));
+ if (m_image_ctx.snap_id == CEPH_NOSNAP) {
+ if (!m_image_ctx.resize_reqs.empty()) {
+ return m_image_ctx.resize_reqs.front()->get_image_size();
+ } else {
+ return m_image_ctx.size;
+ }
+ }
+ return m_image_ctx.get_image_size(m_image_ctx.snap_id);
+}
+
+} // namespace operation
+} // namespace librbd
+
+template class librbd::operation::RebuildObjectMapRequest<librbd::ImageCtx>;
diff --git a/src/librbd/operation/RebuildObjectMapRequest.h b/src/librbd/operation/RebuildObjectMapRequest.h
new file mode 100644
index 000000000..c7f1aa3b7
--- /dev/null
+++ b/src/librbd/operation/RebuildObjectMapRequest.h
@@ -0,0 +1,84 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+#ifndef CEPH_LIBRBD_OPERATION_REBUILD_OBJECT_MAP_REQUEST_H
+#define CEPH_LIBRBD_OPERATION_REBUILD_OBJECT_MAP_REQUEST_H
+
+#include "include/int_types.h"
+#include "librbd/AsyncRequest.h"
+
+namespace librbd {
+
+class ImageCtx;
+class ProgressContext;
+
+namespace operation {
+
+template <typename ImageCtxT = ImageCtx>
+class RebuildObjectMapRequest : public AsyncRequest<ImageCtxT> {
+public:
+
+ RebuildObjectMapRequest(ImageCtxT &image_ctx, Context *on_finish,
+ ProgressContext &prog_ctx)
+ : AsyncRequest<ImageCtxT>(image_ctx, on_finish), m_image_ctx(image_ctx),
+ m_prog_ctx(prog_ctx), m_attempted_trim(false)
+ {
+ }
+
+ void send() override;
+
+protected:
+ bool should_complete(int r) override;
+
+private:
+ /**
+ * Rebuild object map goes through the following state machine to
+ * verify per-object state:
+ *
+ * <start>
+ * . | . . . . . . . . . .
+ * . | . .
+ * . v v .
+ * . STATE_RESIZE_OBJECT_MAP . . . > STATE_TRIM_IMAGE
+ * . |
+ * . v
+ * . . . > STATE_VERIFY_OBJECTS
+ * |
+ * v
+ * STATE_SAVE_OBJECT_MAP
+ * |
+ * v
+ * STATE_UPDATE_HEADER
+ *
+ * The _RESIZE_OBJECT_MAP state will be skipped if the object map
+ * is appropriately sized for the image. The _TRIM_IMAGE state will
+ * only be hit if the resize failed due to an in-use object.
+ */
+ enum State {
+ STATE_RESIZE_OBJECT_MAP,
+ STATE_TRIM_IMAGE,
+ STATE_VERIFY_OBJECTS,
+ STATE_SAVE_OBJECT_MAP,
+ STATE_UPDATE_HEADER
+ };
+
+ ImageCtxT &m_image_ctx;
+ ProgressContext &m_prog_ctx;
+ State m_state = STATE_RESIZE_OBJECT_MAP;
+ bool m_attempted_trim;
+
+ void send_resize_object_map();
+ void send_trim_image();
+ void send_verify_objects();
+ void send_save_object_map();
+ void send_update_header();
+
+ uint64_t get_image_size() const;
+
+};
+
+} // namespace operation
+} // namespace librbd
+
+extern template class librbd::operation::RebuildObjectMapRequest<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_OPERATION_REBUILD_OBJECT_MAP_REQUEST_H
diff --git a/src/librbd/operation/RenameRequest.cc b/src/librbd/operation/RenameRequest.cc
new file mode 100644
index 000000000..15bcd819c
--- /dev/null
+++ b/src/librbd/operation/RenameRequest.cc
@@ -0,0 +1,257 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/operation/RenameRequest.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "include/rados/librados.hpp"
+#include "librbd/ImageCtx.h"
+#include "librbd/internal.h"
+#include "librbd/Utils.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::operation::RenameRequest: "
+
+namespace librbd {
+namespace operation {
+
+namespace {
+
+template <typename I>
+std::ostream& operator<<(std::ostream& os,
+ const typename RenameRequest<I>::State& state) {
+ switch(state) {
+ case RenameRequest<I>::STATE_READ_DIRECTORY:
+ os << "READ_DIRECTORY";
+ break;
+ case RenameRequest<I>::STATE_READ_SOURCE_HEADER:
+ os << "READ_SOURCE_HEADER";
+ break;
+ case RenameRequest<I>::STATE_WRITE_DEST_HEADER:
+ os << "WRITE_DEST_HEADER";
+ break;
+ case RenameRequest<I>::STATE_UPDATE_DIRECTORY:
+ os << "UPDATE_DIRECTORY";
+ break;
+ case RenameRequest<I>::STATE_REMOVE_SOURCE_HEADER:
+ os << "REMOVE_SOURCE_HEADER";
+ break;
+ default:
+ os << "UNKNOWN (" << static_cast<uint32_t>(state) << ")";
+ break;
+ }
+ return os;
+}
+
+} // anonymous namespace
+
+template <typename I>
+RenameRequest<I>::RenameRequest(I &image_ctx, Context *on_finish,
+ const std::string &dest_name)
+ : Request<I>(image_ctx, on_finish), m_dest_name(dest_name),
+ m_source_oid(image_ctx.old_format ? util::old_header_name(image_ctx.name) :
+ util::id_obj_name(image_ctx.name)),
+ m_dest_oid(image_ctx.old_format ? util::old_header_name(dest_name) :
+ util::id_obj_name(dest_name)) {
+}
+
+template <typename I>
+void RenameRequest<I>::send_op() {
+ I &image_ctx = this->m_image_ctx;
+ if (image_ctx.old_format) {
+ send_read_source_header();
+ return;
+ }
+ send_read_directory();
+}
+
+template <typename I>
+bool RenameRequest<I>::should_complete(int r) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << this << " " << __func__ << ": state=" << m_state << ", "
+ << "r=" << r << dendl;
+ r = filter_return_code(r);
+ if (r < 0) {
+ if (r == -EEXIST) {
+ ldout(cct, 1) << "image already exists" << dendl;
+ } else {
+ lderr(cct) << "encountered error: " << cpp_strerror(r) << dendl;
+ }
+ return true;
+ }
+
+ if (m_state == STATE_READ_DIRECTORY) {
+ std::string name;
+ auto it = m_source_name_bl.cbegin();
+ r = cls_client::dir_get_name_finish(&it, &name);
+ if (r < 0) {
+ lderr(cct) << "could not read directory: " << cpp_strerror(r) << dendl;
+ return true;
+ }
+ bool update = false;
+ {
+ std::shared_lock image_locker{image_ctx.image_lock};
+ update = image_ctx.name != name;
+ }
+ if (update) {
+ image_ctx.set_image_name(name);
+ m_source_oid = util::id_obj_name(name);
+ }
+ } else if (m_state == STATE_UPDATE_DIRECTORY) {
+ // update in-memory name before removing source header
+ apply();
+ } else if (m_state == STATE_REMOVE_SOURCE_HEADER) {
+ return true;
+ }
+
+ std::shared_lock owner_lock{image_ctx.owner_lock};
+ switch (m_state) {
+ case STATE_READ_DIRECTORY:
+ send_read_source_header();
+ break;
+ case STATE_READ_SOURCE_HEADER:
+ send_write_destination_header();
+ break;
+ case STATE_WRITE_DEST_HEADER:
+ send_update_directory();
+ break;
+ case STATE_UPDATE_DIRECTORY:
+ send_remove_source_header();
+ break;
+ default:
+ ceph_abort();
+ break;
+ }
+ return false;
+}
+
+template <typename I>
+int RenameRequest<I>::filter_return_code(int r) const {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+
+ if (m_state == STATE_READ_SOURCE_HEADER && r == -ENOENT) {
+ std::shared_lock image_locker{image_ctx.image_lock};
+ if (image_ctx.name == m_dest_name) {
+ // signal that replay raced with itself
+ return -EEXIST;
+ }
+ } else if (m_state == STATE_REMOVE_SOURCE_HEADER && r < 0) {
+ if (r != -ENOENT) {
+ lderr(cct) << "warning: couldn't remove old source object ("
+ << m_source_oid << ")" << dendl;
+ }
+ return 0;
+ }
+ return r;
+}
+
+template <typename I>
+void RenameRequest<I>::send_read_directory() {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << this << " " << __func__ << dendl;
+ m_state = STATE_READ_DIRECTORY;
+
+ librados::ObjectReadOperation op;
+ cls_client::dir_get_name_start(&op, image_ctx.id);
+
+ auto comp = this->create_callback_completion();
+ int r = image_ctx.md_ctx.aio_operate(RBD_DIRECTORY, comp, &op,
+ &m_source_name_bl);
+ ceph_assert(r == 0);
+ comp->release();
+}
+
+template <typename I>
+void RenameRequest<I>::send_read_source_header() {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << this << " " << __func__ << dendl;
+ m_state = STATE_READ_SOURCE_HEADER;
+
+ librados::ObjectReadOperation op;
+ op.read(0, 0, NULL, NULL);
+
+ // TODO: old code read omap values but there are no omap values on the
+ // old format header nor the new format id object
+ librados::AioCompletion *rados_completion = this->create_callback_completion();
+ int r = image_ctx.md_ctx.aio_operate(m_source_oid, rados_completion, &op,
+ &m_header_bl);
+ ceph_assert(r == 0);
+ rados_completion->release();
+}
+
+template <typename I>
+void RenameRequest<I>::send_write_destination_header() {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << this << " " << __func__ << dendl;
+ m_state = STATE_WRITE_DEST_HEADER;
+
+ librados::ObjectWriteOperation op;
+ op.create(true);
+ op.write_full(m_header_bl);
+
+ librados::AioCompletion *rados_completion = this->create_callback_completion();
+ int r = image_ctx.md_ctx.aio_operate(m_dest_oid, rados_completion, &op);
+ ceph_assert(r == 0);
+ rados_completion->release();
+}
+
+template <typename I>
+void RenameRequest<I>::send_update_directory() {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << this << " " << __func__ << dendl;
+ m_state = STATE_UPDATE_DIRECTORY;
+
+ librados::ObjectWriteOperation op;
+ if (image_ctx.old_format) {
+ bufferlist cmd_bl;
+ bufferlist empty_bl;
+ encode(static_cast<__u8>(CEPH_OSD_TMAP_SET), cmd_bl);
+ encode(m_dest_name, cmd_bl);
+ encode(empty_bl, cmd_bl);
+ encode(static_cast<__u8>(CEPH_OSD_TMAP_RM), cmd_bl);
+ encode(image_ctx.name, cmd_bl);
+ op.tmap_update(cmd_bl);
+ } else {
+ cls_client::dir_rename_image(&op, image_ctx.name, m_dest_name,
+ image_ctx.id);
+ }
+
+ librados::AioCompletion *rados_completion = this->create_callback_completion();
+ int r = image_ctx.md_ctx.aio_operate(RBD_DIRECTORY, rados_completion, &op);
+ ceph_assert(r == 0);
+ rados_completion->release();
+}
+
+template <typename I>
+void RenameRequest<I>::send_remove_source_header() {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << this << " " << __func__ << dendl;
+ m_state = STATE_REMOVE_SOURCE_HEADER;
+
+ librados::ObjectWriteOperation op;
+ op.remove();
+
+ librados::AioCompletion *rados_completion = this->create_callback_completion();
+ int r = image_ctx.md_ctx.aio_operate(m_source_oid, rados_completion, &op);
+ ceph_assert(r == 0);
+ rados_completion->release();
+}
+
+template <typename I>
+void RenameRequest<I>::apply() {
+ I &image_ctx = this->m_image_ctx;
+ image_ctx.set_image_name(m_dest_name);
+}
+
+} // namespace operation
+} // namespace librbd
+
+template class librbd::operation::RenameRequest<librbd::ImageCtx>;
diff --git a/src/librbd/operation/RenameRequest.h b/src/librbd/operation/RenameRequest.h
new file mode 100644
index 000000000..11fdec648
--- /dev/null
+++ b/src/librbd/operation/RenameRequest.h
@@ -0,0 +1,95 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_RENAME_REQUEST_H
+#define CEPH_LIBRBD_RENAME_REQUEST_H
+
+#include "librbd/operation/Request.h"
+#include <string>
+
+class Context;
+
+namespace librbd {
+
+class ImageCtx;
+
+namespace operation {
+
+template <typename ImageCtxT = ImageCtx>
+class RenameRequest : public Request<ImageCtxT>
+{
+public:
+ /**
+ * Rename goes through the following state machine:
+ *
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * STATE_READ_DIRECTORY
+ * |
+ * v
+ * STATE_READ_SOURCE_HEADER
+ * |
+ * v
+ * STATE_WRITE_DEST_HEADER
+ * |
+ * v
+ * STATE_UPDATE_DIRECTORY
+ * |
+ * v
+ * STATE_REMOVE_SOURCE_HEADER
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ *
+ */
+ enum State {
+ STATE_READ_DIRECTORY,
+ STATE_READ_SOURCE_HEADER,
+ STATE_WRITE_DEST_HEADER,
+ STATE_UPDATE_DIRECTORY,
+ STATE_REMOVE_SOURCE_HEADER
+ };
+
+ RenameRequest(ImageCtxT &image_ctx, Context *on_finish,
+ const std::string &dest_name);
+
+protected:
+ void send_op() override;
+ bool should_complete(int r) override;
+ int filter_return_code(int r) const override;
+
+ journal::Event create_event(uint64_t op_tid) const override {
+ return journal::RenameEvent(op_tid, m_dest_name);
+ }
+
+private:
+ std::string m_dest_name;
+
+ std::string m_source_oid;
+ std::string m_dest_oid;
+
+ State m_state = STATE_READ_DIRECTORY;
+
+ bufferlist m_source_name_bl;
+ bufferlist m_header_bl;
+
+ void send_read_directory();
+ void send_read_source_header();
+ void send_write_destination_header();
+ void send_update_directory();
+ void send_remove_source_header();
+
+ void apply();
+};
+
+} // namespace operation
+} // namespace librbd
+
+extern template class librbd::operation::RenameRequest<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_RENAME_REQUEST_H
diff --git a/src/librbd/operation/Request.cc b/src/librbd/operation/Request.cc
new file mode 100644
index 000000000..269c8a4f9
--- /dev/null
+++ b/src/librbd/operation/Request.cc
@@ -0,0 +1,183 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/operation/Request.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/asio/ContextWQ.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::Request: "
+
+namespace librbd {
+namespace operation {
+
+template <typename I>
+Request<I>::Request(I &image_ctx, Context *on_finish, uint64_t journal_op_tid)
+ : AsyncRequest<I>(image_ctx, on_finish), m_op_tid(journal_op_tid) {
+}
+
+template <typename I>
+void Request<I>::send() {
+ [[maybe_unused]] I &image_ctx = this->m_image_ctx;
+ ceph_assert(ceph_mutex_is_locked(image_ctx.owner_lock));
+
+ // automatically create the event if we don't need to worry
+ // about affecting concurrent IO ops
+ if (can_affect_io() || !append_op_event()) {
+ send_op();
+ }
+}
+
+template <typename I>
+Context *Request<I>::create_context_finisher(int r) {
+ // automatically commit the event if required (delete after commit)
+ if (m_appended_op_event && !m_committed_op_event &&
+ commit_op_event(r)) {
+ return nullptr;
+ }
+
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 10) << this << " " << __func__ << dendl;
+ return util::create_context_callback<Request<I>, &Request<I>::finish>(this);
+}
+
+template <typename I>
+void Request<I>::finish_and_destroy(int r) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 10) << this << " " << __func__ << ": r=" << r << dendl;
+
+ // automatically commit the event if required (delete after commit)
+ if (m_appended_op_event && !m_committed_op_event &&
+ commit_op_event(r)) {
+ return;
+ }
+
+ AsyncRequest<I>::finish_and_destroy(r);
+}
+
+template <typename I>
+void Request<I>::finish(int r) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 10) << this << " " << __func__ << ": r=" << r << dendl;
+
+ ceph_assert(!m_appended_op_event || m_committed_op_event);
+ AsyncRequest<I>::finish(r);
+}
+
+template <typename I>
+bool Request<I>::append_op_event() {
+ I &image_ctx = this->m_image_ctx;
+
+ ceph_assert(ceph_mutex_is_locked(image_ctx.owner_lock));
+ std::shared_lock image_locker{image_ctx.image_lock};
+ if (image_ctx.journal != nullptr &&
+ image_ctx.journal->is_journal_appending()) {
+ append_op_event(util::create_context_callback<
+ Request<I>, &Request<I>::handle_op_event_safe>(this));
+ return true;
+ }
+ return false;
+}
+
+template <typename I>
+bool Request<I>::commit_op_event(int r) {
+ I &image_ctx = this->m_image_ctx;
+ std::shared_lock image_locker{image_ctx.image_lock};
+
+ if (!m_appended_op_event) {
+ return false;
+ }
+
+ ceph_assert(m_op_tid != 0);
+ ceph_assert(!m_committed_op_event);
+ m_committed_op_event = true;
+
+ if (image_ctx.journal != nullptr &&
+ image_ctx.journal->is_journal_appending()) {
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 10) << this << " " << __func__ << ": r=" << r << dendl;
+
+ // ops will be canceled / completed before closing journal
+ ceph_assert(image_ctx.journal->is_journal_ready());
+ image_ctx.journal->commit_op_event(m_op_tid, r,
+ new C_CommitOpEvent(this, r));
+ return true;
+ }
+ return false;
+}
+
+template <typename I>
+void Request<I>::handle_commit_op_event(int r, int original_ret_val) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 10) << this << " " << __func__ << ": r=" << r << dendl;
+
+ if (r < 0) {
+ lderr(cct) << "failed to commit op event to journal: " << cpp_strerror(r)
+ << dendl;
+ }
+ if (original_ret_val < 0) {
+ r = original_ret_val;
+ }
+ finish(r);
+}
+
+template <typename I>
+void Request<I>::replay_op_ready(Context *on_safe) {
+ I &image_ctx = this->m_image_ctx;
+ ceph_assert(ceph_mutex_is_locked(image_ctx.owner_lock));
+ ceph_assert(ceph_mutex_is_locked(image_ctx.image_lock));
+ ceph_assert(m_op_tid != 0);
+
+ m_appended_op_event = true;
+ image_ctx.journal->replay_op_ready(
+ m_op_tid, util::create_async_context_callback(image_ctx, on_safe));
+}
+
+template <typename I>
+void Request<I>::append_op_event(Context *on_safe) {
+ I &image_ctx = this->m_image_ctx;
+ ceph_assert(ceph_mutex_is_locked(image_ctx.owner_lock));
+ ceph_assert(ceph_mutex_is_locked(image_ctx.image_lock));
+
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 10) << this << " " << __func__ << dendl;
+
+ m_op_tid = image_ctx.journal->allocate_op_tid();
+ image_ctx.journal->append_op_event(
+ m_op_tid, journal::EventEntry{create_event(m_op_tid)},
+ new C_AppendOpEvent(this, on_safe));
+}
+
+template <typename I>
+void Request<I>::handle_op_event_safe(int r) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 10) << this << " " << __func__ << ": r=" << r << dendl;
+
+ if (r < 0) {
+ lderr(cct) << "failed to commit op event to journal: " << cpp_strerror(r)
+ << dendl;
+ this->finish(r);
+ delete this;
+ } else {
+ ceph_assert(!can_affect_io());
+
+ // haven't started the request state machine yet
+ std::shared_lock owner_locker{image_ctx.owner_lock};
+ send_op();
+ }
+}
+
+} // namespace operation
+} // namespace librbd
+
+#ifndef TEST_F
+template class librbd::operation::Request<librbd::ImageCtx>;
+#endif
diff --git a/src/librbd/operation/Request.h b/src/librbd/operation/Request.h
new file mode 100644
index 000000000..e32b49644
--- /dev/null
+++ b/src/librbd/operation/Request.h
@@ -0,0 +1,107 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_OPERATION_REQUEST_H
+#define CEPH_LIBRBD_OPERATION_REQUEST_H
+
+#include "librbd/AsyncRequest.h"
+#include "include/Context.h"
+#include "common/RWLock.h"
+#include "librbd/Utils.h"
+#include "librbd/Journal.h"
+
+namespace librbd {
+
+class ImageCtx;
+
+namespace operation {
+
+template <typename ImageCtxT = ImageCtx>
+class Request : public AsyncRequest<ImageCtxT> {
+public:
+ Request(ImageCtxT &image_ctx, Context *on_finish,
+ uint64_t journal_op_tid = 0);
+
+ void send();
+
+protected:
+ void finish(int r) override;
+ virtual void send_op() = 0;
+
+ virtual bool can_affect_io() const {
+ return false;
+ }
+ virtual journal::Event create_event(uint64_t op_tid) const = 0;
+
+ template <typename T, Context*(T::*MF)(int*)>
+ bool append_op_event(T *request) {
+ ImageCtxT &image_ctx = this->m_image_ctx;
+
+ ceph_assert(can_affect_io());
+ std::scoped_lock locker{image_ctx.owner_lock, image_ctx.image_lock};
+ if (image_ctx.journal != nullptr) {
+ if (image_ctx.journal->is_journal_replaying()) {
+ Context *ctx = util::create_context_callback<T, MF>(request);
+ replay_op_ready(ctx);
+ return true;
+ } else if (image_ctx.journal->is_journal_appending()) {
+ Context *ctx = util::create_context_callback<T, MF>(request);
+ append_op_event(ctx);
+ return true;
+ }
+ }
+ return false;
+ }
+
+ bool append_op_event();
+
+ // NOTE: temporary until converted to new state machine format
+ Context *create_context_finisher(int r);
+ void finish_and_destroy(int r) override;
+
+private:
+ struct C_AppendOpEvent : public Context {
+ Request *request;
+ Context *on_safe;
+ C_AppendOpEvent(Request *request, Context *on_safe)
+ : request(request), on_safe(on_safe) {
+ }
+ void finish(int r) override {
+ if (r >= 0) {
+ request->m_appended_op_event = true;
+ }
+ on_safe->complete(r);
+ }
+ };
+
+ struct C_CommitOpEvent : public Context {
+ Request *request;
+ int ret_val;
+ C_CommitOpEvent(Request *request, int ret_val)
+ : request(request), ret_val(ret_val) {
+ }
+ void finish(int r) override {
+ request->handle_commit_op_event(r, ret_val);
+ delete request;
+ }
+ };
+
+ uint64_t m_op_tid = 0;
+ bool m_appended_op_event = false;
+ bool m_committed_op_event = false;
+
+ void replay_op_ready(Context *on_safe);
+ void append_op_event(Context *on_safe);
+ void handle_op_event_safe(int r);
+
+ bool commit_op_event(int r);
+ void handle_commit_op_event(int r, int original_ret_val);
+
+};
+
+} // namespace operation
+} // namespace librbd
+
+extern template class librbd::operation::Request<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_OPERATION_REQUEST_H
diff --git a/src/librbd/operation/ResizeRequest.cc b/src/librbd/operation/ResizeRequest.cc
new file mode 100644
index 000000000..e4e76dacd
--- /dev/null
+++ b/src/librbd/operation/ResizeRequest.cc
@@ -0,0 +1,466 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/operation/ResizeRequest.h"
+#include "librbd/ExclusiveLock.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/internal.h"
+#include "librbd/ObjectMap.h"
+#include "librbd/Utils.h"
+#include "librbd/io/AioCompletion.h"
+#include "librbd/io/ImageDispatchSpec.h"
+#include "librbd/io/ImageDispatcherInterface.h"
+#include "librbd/io/ObjectDispatcherInterface.h"
+#include "librbd/operation/TrimRequest.h"
+#include "common/dout.h"
+#include "common/errno.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::operation::ResizeRequest: " << this \
+ << " " << __func__ << ": "
+
+namespace librbd {
+namespace operation {
+
+using util::create_async_context_callback;
+using util::create_context_callback;
+using util::create_rados_callback;
+
+template <typename I>
+ResizeRequest<I>::ResizeRequest(I &image_ctx, Context *on_finish,
+ uint64_t new_size, bool allow_shrink, ProgressContext &prog_ctx,
+ uint64_t journal_op_tid, bool disable_journal)
+ : Request<I>(image_ctx, on_finish, journal_op_tid),
+ m_original_size(0), m_new_size(new_size), m_allow_shrink(allow_shrink),
+ m_prog_ctx(prog_ctx), m_new_parent_overlap(0), m_disable_journal(disable_journal),
+ m_xlist_item(this)
+{
+}
+
+template <typename I>
+ResizeRequest<I>::~ResizeRequest() {
+ I &image_ctx = this->m_image_ctx;
+ ResizeRequest *next_req = NULL;
+ {
+ std::unique_lock image_locker{image_ctx.image_lock};
+ ceph_assert(m_xlist_item.remove_myself());
+ if (!image_ctx.resize_reqs.empty()) {
+ next_req = image_ctx.resize_reqs.front();
+ }
+ }
+
+ if (next_req != NULL) {
+ std::shared_lock owner_locker{image_ctx.owner_lock};
+ next_req->send();
+ }
+}
+
+template <typename I>
+void ResizeRequest<I>::send() {
+ I &image_ctx = this->m_image_ctx;
+ ceph_assert(ceph_mutex_is_locked(image_ctx.owner_lock));
+
+ {
+ std::unique_lock image_locker{image_ctx.image_lock};
+ if (!m_xlist_item.is_on_list()) {
+ image_ctx.resize_reqs.push_back(&m_xlist_item);
+ if (image_ctx.resize_reqs.front() != this) {
+ return;
+ }
+ }
+
+ ceph_assert(image_ctx.resize_reqs.front() == this);
+ m_original_size = image_ctx.size;
+ compute_parent_overlap();
+ }
+
+ Request<I>::send();
+}
+
+template <typename I>
+void ResizeRequest<I>::send_op() {
+ [[maybe_unused]] I &image_ctx = this->m_image_ctx;
+ ceph_assert(ceph_mutex_is_locked(image_ctx.owner_lock));
+
+ if (this->is_canceled()) {
+ this->async_complete(-ERESTART);
+ } else {
+ send_pre_block_writes();
+ }
+}
+
+template <typename I>
+void ResizeRequest<I>::send_pre_block_writes() {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << dendl;
+
+ image_ctx.io_image_dispatcher->block_writes(create_context_callback<
+ ResizeRequest<I>, &ResizeRequest<I>::handle_pre_block_writes>(this));
+}
+
+template <typename I>
+Context *ResizeRequest<I>::handle_pre_block_writes(int *result) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << "r=" << *result << dendl;
+
+ if (*result < 0) {
+ lderr(cct) << "failed to block writes: " << cpp_strerror(*result) << dendl;
+ image_ctx.io_image_dispatcher->unblock_writes();
+ return this->create_context_finisher(*result);
+ }
+
+ return send_append_op_event();
+}
+
+template <typename I>
+Context *ResizeRequest<I>::send_append_op_event() {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+
+ if (m_new_size < m_original_size && !m_allow_shrink) {
+ ldout(cct, 1) << "shrinking the image is not permitted" << dendl;
+ image_ctx.io_image_dispatcher->unblock_writes();
+ this->async_complete(-EINVAL);
+ return nullptr;
+ }
+
+ if (m_disable_journal || !this->template append_op_event<
+ ResizeRequest<I>, &ResizeRequest<I>::handle_append_op_event>(this)) {
+ return send_grow_object_map();
+ }
+
+ ldout(cct, 5) << dendl;
+ return nullptr;
+}
+
+template <typename I>
+Context *ResizeRequest<I>::handle_append_op_event(int *result) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << "r=" << *result << dendl;
+
+ if (*result < 0) {
+ lderr(cct) << "failed to commit journal entry: " << cpp_strerror(*result)
+ << dendl;
+ image_ctx.io_image_dispatcher->unblock_writes();
+ return this->create_context_finisher(*result);
+ }
+
+ return send_grow_object_map();
+}
+
+template <typename I>
+void ResizeRequest<I>::send_trim_image() {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << dendl;
+
+ std::shared_lock owner_locker{image_ctx.owner_lock};
+ TrimRequest<I> *req = TrimRequest<I>::create(
+ image_ctx, create_context_callback<
+ ResizeRequest<I>, &ResizeRequest<I>::handle_trim_image>(this),
+ m_original_size, m_new_size, m_prog_ctx);
+ req->send();
+}
+
+template <typename I>
+Context *ResizeRequest<I>::handle_trim_image(int *result) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << "r=" << *result << dendl;
+
+ if (*result == -ERESTART) {
+ ldout(cct, 5) << "resize operation interrupted" << dendl;
+ return this->create_context_finisher(*result);
+ } else if (*result < 0) {
+ lderr(cct) << "failed to trim image: " << cpp_strerror(*result) << dendl;
+ return this->create_context_finisher(*result);
+ }
+
+ send_post_block_writes();
+ return nullptr;
+}
+
+template <typename I>
+void ResizeRequest<I>::send_flush_cache() {
+ I &image_ctx = this->m_image_ctx;
+
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << dendl;
+
+ std::shared_lock owner_locker{image_ctx.owner_lock};
+ auto ctx = create_context_callback<
+ ResizeRequest<I>, &ResizeRequest<I>::handle_flush_cache>(this);
+ auto aio_comp = io::AioCompletion::create_and_start(
+ ctx, util::get_image_ctx(&image_ctx), io::AIO_TYPE_FLUSH);
+ auto req = io::ImageDispatchSpec::create_flush(
+ image_ctx, io::IMAGE_DISPATCH_LAYER_INTERNAL_START, aio_comp,
+ io::FLUSH_SOURCE_INTERNAL, {});
+ req->send();
+}
+
+template <typename I>
+Context *ResizeRequest<I>::handle_flush_cache(int *result) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << "r=" << *result << dendl;
+
+ if (*result < 0) {
+ lderr(cct) << "failed to flush cache: " << cpp_strerror(*result) << dendl;
+ return this->create_context_finisher(*result);
+ }
+
+ send_invalidate_cache();
+ return nullptr;
+}
+
+template <typename I>
+void ResizeRequest<I>::send_invalidate_cache() {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << dendl;
+
+ // need to invalidate since we're deleting objects, and
+ // ObjectCacher doesn't track non-existent objects
+ image_ctx.io_image_dispatcher->invalidate_cache(create_context_callback<
+ ResizeRequest<I>, &ResizeRequest<I>::handle_invalidate_cache>(this));
+}
+
+template <typename I>
+Context *ResizeRequest<I>::handle_invalidate_cache(int *result) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << "r=" << *result << dendl;
+
+ // ignore busy error -- writeback was successfully flushed so we might be
+ // wasting some cache space for trimmed objects, but they will get purged
+ // eventually. Most likely cause of the issue was a in-flight cache read
+ if (*result < 0 && *result != -EBUSY) {
+ lderr(cct) << "failed to invalidate cache: " << cpp_strerror(*result)
+ << dendl;
+ return this->create_context_finisher(*result);
+ }
+
+ send_trim_image();
+ return nullptr;
+}
+
+template <typename I>
+Context *ResizeRequest<I>::send_grow_object_map() {
+ I &image_ctx = this->m_image_ctx;
+
+ {
+ std::unique_lock image_locker{image_ctx.image_lock};
+ m_shrink_size_visible = true;
+ }
+
+ if (m_original_size == m_new_size) {
+ image_ctx.io_image_dispatcher->unblock_writes();
+ return this->create_context_finisher(0);
+ } else if (m_new_size < m_original_size) {
+ image_ctx.io_image_dispatcher->unblock_writes();
+ send_flush_cache();
+ return nullptr;
+ }
+
+ image_ctx.owner_lock.lock_shared();
+ image_ctx.image_lock.lock_shared();
+ if (image_ctx.object_map == nullptr) {
+ image_ctx.image_lock.unlock_shared();
+ image_ctx.owner_lock.unlock_shared();
+
+ // IO is still blocked
+ send_update_header();
+ return nullptr;
+ }
+
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << dendl;
+
+ // should have been canceled prior to releasing lock
+ ceph_assert(image_ctx.exclusive_lock == nullptr ||
+ image_ctx.exclusive_lock->is_lock_owner());
+
+ image_ctx.object_map->aio_resize(
+ m_new_size, OBJECT_NONEXISTENT, create_context_callback<
+ ResizeRequest<I>, &ResizeRequest<I>::handle_grow_object_map>(this));
+ image_ctx.image_lock.unlock_shared();
+ image_ctx.owner_lock.unlock_shared();
+ return nullptr;
+}
+
+template <typename I>
+Context *ResizeRequest<I>::handle_grow_object_map(int *result) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << "r=" << *result << dendl;
+
+ if (*result < 0) {
+ lderr(cct) << "failed to resize object map: "
+ << cpp_strerror(*result) << dendl;
+ image_ctx.io_image_dispatcher->unblock_writes();
+ return this->create_context_finisher(*result);
+ }
+
+ // IO is still blocked
+ send_update_header();
+ return nullptr;
+}
+
+template <typename I>
+Context *ResizeRequest<I>::send_shrink_object_map() {
+ I &image_ctx = this->m_image_ctx;
+
+ image_ctx.owner_lock.lock_shared();
+ image_ctx.image_lock.lock_shared();
+ if (image_ctx.object_map == nullptr || m_new_size > m_original_size) {
+ image_ctx.image_lock.unlock_shared();
+ image_ctx.owner_lock.unlock_shared();
+
+ update_size_and_overlap();
+ return this->create_context_finisher(0);
+ }
+
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << "original_size=" << m_original_size << ", "
+ << "new_size=" << m_new_size << dendl;
+
+ // should have been canceled prior to releasing lock
+ ceph_assert(image_ctx.exclusive_lock == nullptr ||
+ image_ctx.exclusive_lock->is_lock_owner());
+
+ image_ctx.object_map->aio_resize(
+ m_new_size, OBJECT_NONEXISTENT, create_context_callback<
+ ResizeRequest<I>, &ResizeRequest<I>::handle_shrink_object_map>(this));
+ image_ctx.image_lock.unlock_shared();
+ image_ctx.owner_lock.unlock_shared();
+ return nullptr;
+}
+
+template <typename I>
+Context *ResizeRequest<I>::handle_shrink_object_map(int *result) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << "r=" << *result << dendl;
+
+ if (*result < 0) {
+ lderr(cct) << "failed to resize object map: "
+ << cpp_strerror(*result) << dendl;
+ image_ctx.io_image_dispatcher->unblock_writes();
+ return this->create_context_finisher(*result);
+ }
+
+ update_size_and_overlap();
+ return this->create_context_finisher(0);
+}
+
+template <typename I>
+void ResizeRequest<I>::send_post_block_writes() {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << dendl;
+
+ std::shared_lock owner_locker{image_ctx.owner_lock};
+ image_ctx.io_image_dispatcher->block_writes(create_context_callback<
+ ResizeRequest<I>, &ResizeRequest<I>::handle_post_block_writes>(this));
+}
+
+template <typename I>
+Context *ResizeRequest<I>::handle_post_block_writes(int *result) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << "r=" << *result << dendl;
+
+ if (*result < 0) {
+ image_ctx.io_image_dispatcher->unblock_writes();
+ lderr(cct) << "failed to block writes prior to header update: "
+ << cpp_strerror(*result) << dendl;
+ return this->create_context_finisher(*result);
+ }
+
+ send_update_header();
+ return nullptr;
+}
+
+template <typename I>
+void ResizeRequest<I>::send_update_header() {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << "original_size=" << m_original_size << ", "
+ << "new_size=" << m_new_size << dendl;;
+
+ // should have been canceled prior to releasing lock
+ std::shared_lock owner_locker{image_ctx.owner_lock};
+ ceph_assert(image_ctx.exclusive_lock == nullptr ||
+ image_ctx.exclusive_lock->is_lock_owner());
+
+ librados::ObjectWriteOperation op;
+ if (image_ctx.old_format) {
+ // rewrite only the size field of the header
+ ceph_le64 new_size = init_le64(m_new_size);
+ bufferlist bl;
+ bl.append(reinterpret_cast<const char*>(&new_size), sizeof(new_size));
+ op.write(offsetof(rbd_obj_header_ondisk, image_size), bl);
+ } else {
+ cls_client::set_size(&op, m_new_size);
+ }
+
+ librados::AioCompletion *rados_completion = create_rados_callback<
+ ResizeRequest<I>, &ResizeRequest<I>::handle_update_header>(this);
+ int r = image_ctx.md_ctx.aio_operate(image_ctx.header_oid,
+ rados_completion, &op);
+ ceph_assert(r == 0);
+ rados_completion->release();
+}
+
+template <typename I>
+Context *ResizeRequest<I>::handle_update_header(int *result) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << "r=" << *result << dendl;
+
+ if (*result < 0) {
+ lderr(cct) << "failed to update image header: " << cpp_strerror(*result)
+ << dendl;
+ image_ctx.io_image_dispatcher->unblock_writes();
+ return this->create_context_finisher(*result);
+ }
+
+ return send_shrink_object_map();
+}
+
+template <typename I>
+void ResizeRequest<I>::compute_parent_overlap() {
+ I &image_ctx = this->m_image_ctx;
+ ceph_assert(ceph_mutex_is_locked(image_ctx.image_lock));
+
+ if (image_ctx.parent == NULL) {
+ m_new_parent_overlap = 0;
+ } else {
+ m_new_parent_overlap = std::min(m_new_size, image_ctx.parent_md.overlap);
+ }
+}
+
+template <typename I>
+void ResizeRequest<I>::update_size_and_overlap() {
+ I &image_ctx = this->m_image_ctx;
+ {
+ std::unique_lock image_locker{image_ctx.image_lock};
+ image_ctx.size = m_new_size;
+
+ if (image_ctx.parent != NULL && m_new_size < m_original_size) {
+ image_ctx.parent_md.overlap = m_new_parent_overlap;
+ }
+ }
+
+ // blocked by PRE_BLOCK_WRITES (grow) or POST_BLOCK_WRITES (shrink) state
+ image_ctx.io_image_dispatcher->unblock_writes();
+}
+
+} // namespace operation
+} // namespace librbd
+
+template class librbd::operation::ResizeRequest<librbd::ImageCtx>;
diff --git a/src/librbd/operation/ResizeRequest.h b/src/librbd/operation/ResizeRequest.h
new file mode 100644
index 000000000..f5e2f807f
--- /dev/null
+++ b/src/librbd/operation/ResizeRequest.h
@@ -0,0 +1,156 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+#ifndef CEPH_LIBRBD_OPERATION_RESIZE_REQUEST_H
+#define CEPH_LIBRBD_OPERATION_RESIZE_REQUEST_H
+
+#include "librbd/operation/Request.h"
+#include "include/xlist.h"
+
+namespace librbd
+{
+
+class ImageCtx;
+class ProgressContext;
+
+namespace operation {
+
+template <typename ImageCtxT = ImageCtx>
+class ResizeRequest : public Request<ImageCtxT> {
+public:
+ static ResizeRequest *create(ImageCtxT &image_ctx, Context *on_finish,
+ uint64_t new_size, bool allow_shrink,
+ ProgressContext &prog_ctx, uint64_t journal_op_tid,
+ bool disable_journal) {
+ return new ResizeRequest(image_ctx, on_finish, new_size, allow_shrink, prog_ctx,
+ journal_op_tid, disable_journal);
+ }
+
+ ResizeRequest(ImageCtxT &image_ctx, Context *on_finish, uint64_t new_size,
+ bool allow_shrink, ProgressContext &prog_ctx, uint64_t journal_op_tid,
+ bool disable_journal);
+ ~ResizeRequest() override;
+
+ inline bool shrinking() const {
+ return (m_shrink_size_visible && m_new_size < m_original_size);
+ }
+
+ inline uint64_t get_image_size() const {
+ return m_new_size;
+ }
+
+ void send() override;
+
+protected:
+ void send_op() override;
+ bool should_complete(int r) override {
+ return true;
+ }
+ bool can_affect_io() const override {
+ return true;
+ }
+ journal::Event create_event(uint64_t op_tid) const override {
+ return journal::ResizeEvent(op_tid, m_new_size);
+ }
+
+private:
+ /**
+ * Resize goes through the following state machine to resize the image
+ * and update the object map:
+ *
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * STATE_PRE_BLOCK_WRITES
+ * |
+ * v
+ * STATE_APPEND_OP_EVENT (skip if journaling
+ * | disabled)
+ * |
+ * | (grow)
+ * |\--------> STATE_GROW_OBJECT_MAP (skip if object map
+ * | | disabled)
+ * | v
+ * | STATE_UPDATE_HEADER ----------------------------\
+ * | (unblock writes) |
+ * | |
+ * | (unblock writes) |
+ * | |
+ * | (shrink) |
+ * |\--------> STATE_FLUSH_CACHE |
+ * | | |
+ * | v |
+ * | STATE_INVALIDATE_CACHE |
+ * | | |
+ * | v |
+ * | STATE_TRIM_IMAGE |
+ * | | |
+ * | v |
+ * | STATE_POST_BLOCK_WRITES |
+ * | | |
+ * | v |
+ * | STATE_UPDATE_HEADER |
+ * | | |
+ * | v |
+ * | STATE_SHRINK_OBJECT_MAP (skip if object map |
+ * | | disabled) |
+ * | | (unblock writes) |
+ * | (no change) v |
+ * \------------> <finish> <-----------------------------------/
+ *
+ * @endverbatim
+ *
+ * The _OBJECT_MAP states are skipped if the object map isn't enabled.
+ * The state machine will immediately transition to _FINISHED if there
+ * are no objects to trim.
+ */
+
+ uint64_t m_original_size;
+ uint64_t m_new_size;
+ bool m_allow_shrink = true;
+ ProgressContext &m_prog_ctx;
+ uint64_t m_new_parent_overlap;
+ bool m_shrink_size_visible = false;
+ bool m_disable_journal = false;
+
+ typename xlist<ResizeRequest<ImageCtxT>*>::item m_xlist_item;
+
+ void send_pre_block_writes();
+ Context *handle_pre_block_writes(int *result);
+
+ Context *send_append_op_event();
+ Context *handle_append_op_event(int *result);
+
+ void send_flush_cache();
+ Context *handle_flush_cache(int *result);
+
+ void send_invalidate_cache();
+ Context *handle_invalidate_cache(int *result);
+
+ void send_trim_image();
+ Context *handle_trim_image(int *result);
+
+ Context *send_grow_object_map();
+ Context *handle_grow_object_map(int *result);
+
+ Context *send_shrink_object_map();
+ Context *handle_shrink_object_map(int *result);
+
+ void send_post_block_writes();
+ Context *handle_post_block_writes(int *result);
+
+ void send_update_header();
+ Context *handle_update_header(int *result);
+
+ void compute_parent_overlap();
+ void update_size_and_overlap();
+
+};
+
+} // namespace operation
+} // namespace librbd
+
+extern template class librbd::operation::ResizeRequest<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_OPERATION_RESIZE_REQUEST_H
diff --git a/src/librbd/operation/SnapshotCreateRequest.cc b/src/librbd/operation/SnapshotCreateRequest.cc
new file mode 100644
index 000000000..866ef7d61
--- /dev/null
+++ b/src/librbd/operation/SnapshotCreateRequest.cc
@@ -0,0 +1,449 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "cls/rbd/cls_rbd_types.h"
+#include "librbd/operation/SnapshotCreateRequest.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "librbd/ExclusiveLock.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageWatcher.h"
+#include "librbd/ObjectMap.h"
+#include "librbd/Utils.h"
+#include "librbd/io/ImageDispatcherInterface.h"
+#include "librbd/mirror/snapshot/SetImageStateRequest.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::SnapshotCreateRequest: "
+
+namespace librbd {
+namespace operation {
+
+using util::create_async_context_callback;
+using util::create_context_callback;
+using util::create_rados_callback;
+
+template <typename I>
+SnapshotCreateRequest<I>::SnapshotCreateRequest(I &image_ctx,
+ Context *on_finish,
+ const cls::rbd::SnapshotNamespace &snap_namespace,
+ const std::string &snap_name,
+ uint64_t journal_op_tid,
+ uint64_t flags,
+ ProgressContext &prog_ctx)
+ : Request<I>(image_ctx, on_finish, journal_op_tid),
+ m_snap_namespace(snap_namespace), m_snap_name(snap_name),
+ m_skip_object_map(flags & SNAP_CREATE_FLAG_SKIP_OBJECT_MAP),
+ m_skip_notify_quiesce(flags & SNAP_CREATE_FLAG_SKIP_NOTIFY_QUIESCE),
+ m_ignore_notify_quiesce_error(flags & SNAP_CREATE_FLAG_IGNORE_NOTIFY_QUIESCE_ERROR),
+ m_prog_ctx(prog_ctx) {
+}
+
+template <typename I>
+void SnapshotCreateRequest<I>::send_op() {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+
+ if (!image_ctx.data_ctx.is_valid()) {
+ lderr(cct) << "missing data pool" << dendl;
+ this->async_complete(-ENODEV);
+ return;
+ }
+
+ send_notify_quiesce();
+}
+
+template <typename I>
+void SnapshotCreateRequest<I>::send_notify_quiesce() {
+ if (m_skip_notify_quiesce) {
+ send_suspend_requests();
+ return;
+ }
+
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << this << " " << __func__ << dendl;
+
+ image_ctx.image_watcher->notify_quiesce(
+ &m_request_id, m_prog_ctx, create_async_context_callback(
+ image_ctx, create_context_callback<SnapshotCreateRequest<I>,
+ &SnapshotCreateRequest<I>::handle_notify_quiesce>(this)));
+}
+
+template <typename I>
+Context *SnapshotCreateRequest<I>::handle_notify_quiesce(int *result) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl;
+
+ if (*result < 0 && !m_ignore_notify_quiesce_error) {
+ lderr(cct) << "failed to notify quiesce: " << cpp_strerror(*result)
+ << dendl;
+ save_result(result);
+ send_notify_unquiesce();
+ return nullptr;
+ }
+
+ std::shared_lock owner_locker{image_ctx.owner_lock};
+ send_suspend_requests();
+ return nullptr;
+}
+
+template <typename I>
+void SnapshotCreateRequest<I>::send_suspend_requests() {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << this << " " << __func__ << dendl;
+
+ // TODO suspend (shrink) resize to ensure consistent RBD mirror
+ send_suspend_aio();
+}
+
+template <typename I>
+Context *SnapshotCreateRequest<I>::handle_suspend_requests(int *result) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl;
+
+ // TODO
+ send_suspend_aio();
+ return nullptr;
+}
+
+template <typename I>
+void SnapshotCreateRequest<I>::send_suspend_aio() {
+ I &image_ctx = this->m_image_ctx;
+ ceph_assert(ceph_mutex_is_locked(image_ctx.owner_lock));
+
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << this << " " << __func__ << dendl;
+
+ image_ctx.io_image_dispatcher->block_writes(create_context_callback<
+ SnapshotCreateRequest<I>,
+ &SnapshotCreateRequest<I>::handle_suspend_aio>(this));
+}
+
+template <typename I>
+Context *SnapshotCreateRequest<I>::handle_suspend_aio(int *result) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl;
+
+ if (*result < 0) {
+ lderr(cct) << "failed to block writes: " << cpp_strerror(*result) << dendl;
+ save_result(result);
+ return send_notify_unquiesce();
+ }
+
+ m_writes_blocked = true;
+
+ send_append_op_event();
+ return nullptr;
+}
+
+template <typename I>
+void SnapshotCreateRequest<I>::send_append_op_event() {
+ I &image_ctx = this->m_image_ctx;
+ if (!this->template append_op_event<
+ SnapshotCreateRequest<I>,
+ &SnapshotCreateRequest<I>::handle_append_op_event>(this)) {
+ send_allocate_snap_id();
+ return;
+ }
+
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << this << " " << __func__ << dendl;
+}
+
+template <typename I>
+Context *SnapshotCreateRequest<I>::handle_append_op_event(int *result) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl;
+
+ if (*result < 0) {
+ lderr(cct) << "failed to commit journal entry: " << cpp_strerror(*result)
+ << dendl;
+ save_result(result);
+ return send_notify_unquiesce();
+ }
+
+ send_allocate_snap_id();
+ return nullptr;
+}
+
+template <typename I>
+void SnapshotCreateRequest<I>::send_allocate_snap_id() {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << this << " " << __func__ << dendl;
+
+ librados::AioCompletion *rados_completion = create_rados_callback<
+ SnapshotCreateRequest<I>,
+ &SnapshotCreateRequest<I>::handle_allocate_snap_id>(this);
+ image_ctx.data_ctx.aio_selfmanaged_snap_create(&m_snap_id, rados_completion);
+ rados_completion->release();
+}
+
+template <typename I>
+Context *SnapshotCreateRequest<I>::handle_allocate_snap_id(int *result) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << ", "
+ << "snap_id=" << m_snap_id << dendl;
+
+ if (*result < 0) {
+ lderr(cct) << "failed to allocate snapshot id: " << cpp_strerror(*result)
+ << dendl;
+ save_result(result);
+ return send_notify_unquiesce();
+ }
+
+ send_create_snap();
+ return nullptr;
+}
+
+template <typename I>
+void SnapshotCreateRequest<I>::send_create_snap() {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << this << " " << __func__ << dendl;
+
+ std::shared_lock owner_locker{image_ctx.owner_lock};
+ std::shared_lock image_locker{image_ctx.image_lock};
+
+ // should have been canceled prior to releasing lock
+ ceph_assert(image_ctx.exclusive_lock == nullptr ||
+ image_ctx.exclusive_lock->is_lock_owner());
+
+ // save current size / parent info for creating snapshot record in ImageCtx
+ m_size = image_ctx.size;
+ m_parent_info = image_ctx.parent_md;
+
+ librados::ObjectWriteOperation op;
+ if (image_ctx.old_format) {
+ cls_client::old_snapshot_add(&op, m_snap_id, m_snap_name);
+ } else {
+ cls_client::snapshot_add(&op, m_snap_id, m_snap_name, m_snap_namespace);
+ }
+
+ librados::AioCompletion *rados_completion = create_rados_callback<
+ SnapshotCreateRequest<I>,
+ &SnapshotCreateRequest<I>::handle_create_snap>(this);
+ int r = image_ctx.md_ctx.aio_operate(image_ctx.header_oid,
+ rados_completion, &op);
+ ceph_assert(r == 0);
+ rados_completion->release();
+}
+
+template <typename I>
+Context *SnapshotCreateRequest<I>::handle_create_snap(int *result) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl;
+
+ if (*result == -ESTALE) {
+ send_allocate_snap_id();
+ return nullptr;
+ } else if (*result < 0) {
+ save_result(result);
+ send_release_snap_id();
+ return nullptr;
+ }
+
+ return send_create_object_map();
+}
+
+template <typename I>
+Context *SnapshotCreateRequest<I>::send_create_object_map() {
+ I &image_ctx = this->m_image_ctx;
+
+ image_ctx.image_lock.lock_shared();
+ if (image_ctx.object_map == nullptr || m_skip_object_map) {
+ image_ctx.image_lock.unlock_shared();
+
+ return send_create_image_state();
+ }
+
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << this << " " << __func__ << dendl;
+
+ image_ctx.object_map->snapshot_add(
+ m_snap_id, create_context_callback<
+ SnapshotCreateRequest<I>,
+ &SnapshotCreateRequest<I>::handle_create_object_map>(this));
+ image_ctx.image_lock.unlock_shared();
+ return nullptr;
+}
+
+template <typename I>
+Context *SnapshotCreateRequest<I>::handle_create_object_map(int *result) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl;
+
+ if (*result < 0) {
+ lderr(cct) << this << " " << __func__ << ": failed to snapshot object map: "
+ << cpp_strerror(*result) << dendl;
+
+ save_result(result);
+ update_snap_context();
+ return send_notify_unquiesce();
+ }
+
+ return send_create_image_state();
+}
+
+template <typename I>
+Context *SnapshotCreateRequest<I>::send_create_image_state() {
+ I &image_ctx = this->m_image_ctx;
+ auto mirror_ns = boost::get<cls::rbd::MirrorSnapshotNamespace>(
+ &m_snap_namespace);
+ if (mirror_ns == nullptr || !mirror_ns->is_primary()) {
+ update_snap_context();
+ return send_notify_unquiesce();
+ }
+
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << this << " " << __func__ << dendl;
+
+ auto req = mirror::snapshot::SetImageStateRequest<I>::create(
+ &image_ctx, m_snap_id, create_context_callback<
+ SnapshotCreateRequest<I>,
+ &SnapshotCreateRequest<I>::handle_create_image_state>(this));
+ req->send();
+ return nullptr;
+}
+
+template <typename I>
+Context *SnapshotCreateRequest<I>::handle_create_image_state(int *result) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl;
+
+ update_snap_context();
+ if (*result < 0) {
+ lderr(cct) << this << " " << __func__ << ": failed to create image state: "
+ << cpp_strerror(*result) << dendl;
+ save_result(result);
+ }
+
+ return send_notify_unquiesce();
+}
+
+template <typename I>
+void SnapshotCreateRequest<I>::send_release_snap_id() {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << this << " " << __func__ << dendl;
+
+ ceph_assert(m_snap_id != CEPH_NOSNAP);
+
+ librados::AioCompletion *rados_completion = create_rados_callback<
+ SnapshotCreateRequest<I>,
+ &SnapshotCreateRequest<I>::handle_release_snap_id>(this);
+ image_ctx.data_ctx.aio_selfmanaged_snap_remove(m_snap_id, rados_completion);
+ rados_completion->release();
+}
+
+template <typename I>
+Context *SnapshotCreateRequest<I>::handle_release_snap_id(int *result) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl;
+
+ return send_notify_unquiesce();
+}
+
+template <typename I>
+Context *SnapshotCreateRequest<I>::send_notify_unquiesce() {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+
+ if (m_writes_blocked) {
+ image_ctx.io_image_dispatcher->unblock_writes();
+ }
+
+ if (m_skip_notify_quiesce) {
+ return this->create_context_finisher(m_ret_val);
+ }
+
+ ldout(cct, 5) << this << " " << __func__ << dendl;
+
+ image_ctx.image_watcher->notify_unquiesce(
+ m_request_id, create_context_callback<
+ SnapshotCreateRequest<I>,
+ &SnapshotCreateRequest<I>::handle_notify_unquiesce>(this));
+
+ return nullptr;
+}
+
+template <typename I>
+Context *SnapshotCreateRequest<I>::handle_notify_unquiesce(int *result) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl;
+
+ if (*result < 0) {
+ lderr(cct) << "failed to notify unquiesce: " << cpp_strerror(*result)
+ << dendl;
+ // ignore error
+ }
+
+ *result = m_ret_val;
+ return this->create_context_finisher(m_ret_val);
+}
+
+template <typename I>
+void SnapshotCreateRequest<I>::update_snap_context() {
+ I &image_ctx = this->m_image_ctx;
+
+ std::shared_lock owner_locker{image_ctx.owner_lock};
+ std::unique_lock image_locker{image_ctx.image_lock};
+ if (image_ctx.get_snap_info(m_snap_id) != NULL) {
+ return;
+ }
+
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << this << " " << __func__ << dendl;
+
+ // should have been canceled prior to releasing lock
+ ceph_assert(image_ctx.exclusive_lock == nullptr ||
+ image_ctx.exclusive_lock->is_lock_owner());
+
+ // immediately add a reference to the new snapshot
+ utime_t snap_time = ceph_clock_now();
+ image_ctx.add_snap(m_snap_namespace, m_snap_name, m_snap_id, m_size,
+ m_parent_info, RBD_PROTECTION_STATUS_UNPROTECTED,
+ 0, snap_time);
+
+ // immediately start using the new snap context if we
+ // own the exclusive lock
+ std::vector<snapid_t> snaps;
+ snaps.push_back(m_snap_id);
+ snaps.insert(snaps.end(), image_ctx.snapc.snaps.begin(),
+ image_ctx.snapc.snaps.end());
+
+ image_ctx.snapc.seq = m_snap_id;
+ image_ctx.snapc.snaps.swap(snaps);
+ image_ctx.data_ctx.selfmanaged_snap_set_write_ctx(
+ image_ctx.snapc.seq, image_ctx.snaps);
+ image_ctx.rebuild_data_io_context();
+
+ if (!image_ctx.migration_info.empty()) {
+ auto it = image_ctx.migration_info.snap_map.find(CEPH_NOSNAP);
+ ceph_assert(it != image_ctx.migration_info.snap_map.end());
+ ceph_assert(!it->second.empty());
+ if (it->second[0] == CEPH_NOSNAP) {
+ ldout(cct, 5) << this << " " << __func__
+ << ": updating migration snap_map" << dendl;
+ it->second[0] = m_snap_id;
+ }
+ }
+}
+
+} // namespace operation
+} // namespace librbd
+
+template class librbd::operation::SnapshotCreateRequest<librbd::ImageCtx>;
diff --git a/src/librbd/operation/SnapshotCreateRequest.h b/src/librbd/operation/SnapshotCreateRequest.h
new file mode 100644
index 000000000..d306ee21b
--- /dev/null
+++ b/src/librbd/operation/SnapshotCreateRequest.h
@@ -0,0 +1,148 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_OPERATION_SNAPSHOT_CREATE_REQUEST_H
+#define CEPH_LIBRBD_OPERATION_SNAPSHOT_CREATE_REQUEST_H
+
+#include "cls/rbd/cls_rbd_types.h"
+#include "librbd/Types.h"
+#include "librbd/operation/Request.h"
+#include <string>
+
+class Context;
+
+namespace librbd {
+
+class ImageCtx;
+class ProgressContext;
+
+namespace operation {
+
+template <typename ImageCtxT = ImageCtx>
+class SnapshotCreateRequest : public Request<ImageCtxT> {
+public:
+ /**
+ * Snap Create goes through the following state machine:
+ *
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * STATE_NOTIFY_QUIESCE * * * * * * * * * * * * *
+ * | *
+ * v *
+ * STATE_SUSPEND_REQUESTS *
+ * | *
+ * v *
+ * STATE_SUSPEND_AIO * * * * * * * * * * * * * * *
+ * | *
+ * v *
+ * STATE_APPEND_OP_EVENT (skip if journal *
+ * | disabled) *
+ * (retry) v *
+ * . . . > STATE_ALLOCATE_SNAP_ID *
+ * . | *
+ * . v *
+ * . . . . STATE_CREATE_SNAP * * * * * * * * * * * *
+ * | * *
+ * v * *
+ * STATE_CREATE_OBJECT_MAP (skip if * *
+ * | disabled) * *
+ * v * *
+ * STATE_CREATE_IMAGE_STATE (skip if * *
+ * | not mirror * *
+ * | snapshot) * *
+ * | v *
+ * | STATE_RELEASE_SNAP_ID *
+ * | | *
+ * | v *
+ * \------------> STATE_NOTIFY_UNQUIESCE < * *
+ * |
+ * v
+ * <finish>
+ * @endverbatim
+ *
+ * The _CREATE_STATE state may repeat back to the _ALLOCATE_SNAP_ID state
+ * if a stale snapshot context is allocated. If the create operation needs
+ * to abort, the error path is followed to record the result in the journal
+ * (if enabled) and bubble the originating error code back to the client.
+ */
+ SnapshotCreateRequest(ImageCtxT &image_ctx, Context *on_finish,
+ const cls::rbd::SnapshotNamespace &snap_namespace,
+ const std::string &snap_name, uint64_t journal_op_tid,
+ uint64_t flags, ProgressContext &prog_ctx);
+
+protected:
+ void send_op() override;
+ bool should_complete(int r) override {
+ return true;
+ }
+ bool can_affect_io() const override {
+ return true;
+ }
+ journal::Event create_event(uint64_t op_tid) const override {
+ return journal::SnapCreateEvent(op_tid, m_snap_namespace, m_snap_name);
+ }
+
+private:
+ cls::rbd::SnapshotNamespace m_snap_namespace;
+ std::string m_snap_name;
+ bool m_skip_object_map;
+ bool m_skip_notify_quiesce;
+ bool m_ignore_notify_quiesce_error;
+ ProgressContext &m_prog_ctx;
+
+ uint64_t m_request_id = 0;
+ int m_ret_val = 0;
+ bool m_writes_blocked = false;
+
+ uint64_t m_snap_id = CEPH_NOSNAP;
+ uint64_t m_size;
+ ParentImageInfo m_parent_info;
+
+ void send_notify_quiesce();
+ Context *handle_notify_quiesce(int *result);
+
+ void send_suspend_requests();
+ Context *handle_suspend_requests(int *result);
+
+ void send_suspend_aio();
+ Context *handle_suspend_aio(int *result);
+
+ void send_append_op_event();
+ Context *handle_append_op_event(int *result);
+
+ void send_allocate_snap_id();
+ Context *handle_allocate_snap_id(int *result);
+
+ void send_create_snap();
+ Context *handle_create_snap(int *result);
+
+ Context *send_create_object_map();
+ Context *handle_create_object_map(int *result);
+
+ Context *send_create_image_state();
+ Context *handle_create_image_state(int *result);
+
+ void send_release_snap_id();
+ Context *handle_release_snap_id(int *result);
+
+ Context *send_notify_unquiesce();
+ Context *handle_notify_unquiesce(int *result);
+
+ void update_snap_context();
+
+ void save_result(int *result) {
+ if (m_ret_val == 0 && *result < 0) {
+ m_ret_val = *result;
+ }
+ }
+};
+
+} // namespace operation
+} // namespace librbd
+
+extern template class librbd::operation::SnapshotCreateRequest<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_OPERATION_SNAPSHOT_CREATE_REQUEST_H
diff --git a/src/librbd/operation/SnapshotLimitRequest.cc b/src/librbd/operation/SnapshotLimitRequest.cc
new file mode 100644
index 000000000..17aed5f6a
--- /dev/null
+++ b/src/librbd/operation/SnapshotLimitRequest.cc
@@ -0,0 +1,66 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/operation/SnapshotLimitRequest.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "librbd/ImageCtx.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::SnapshotLimitRequest: "
+
+namespace librbd {
+namespace operation {
+
+template <typename I>
+SnapshotLimitRequest<I>::SnapshotLimitRequest(I &image_ctx,
+ Context *on_finish,
+ uint64_t limit)
+ : Request<I>(image_ctx, on_finish), m_snap_limit(limit) {
+}
+
+template <typename I>
+void SnapshotLimitRequest<I>::send_op() {
+ send_limit_snaps();
+}
+
+template <typename I>
+bool SnapshotLimitRequest<I>::should_complete(int r) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << this << " " << __func__ << " r=" << r << dendl;
+
+ if (r < 0) {
+ lderr(cct) << "encountered error: " << cpp_strerror(r) << dendl;
+ }
+ return true;
+}
+
+template <typename I>
+void SnapshotLimitRequest<I>::send_limit_snaps() {
+ I &image_ctx = this->m_image_ctx;
+ ceph_assert(ceph_mutex_is_locked(image_ctx.owner_lock));
+
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << this << " " << __func__ << dendl;
+
+ {
+ std::shared_lock image_locker{image_ctx.image_lock};
+
+ librados::ObjectWriteOperation op;
+ cls_client::snapshot_set_limit(&op, m_snap_limit);
+
+ librados::AioCompletion *rados_completion =
+ this->create_callback_completion();
+ int r = image_ctx.md_ctx.aio_operate(image_ctx.header_oid, rados_completion,
+ &op);
+ ceph_assert(r == 0);
+ rados_completion->release();
+ }
+}
+
+} // namespace operation
+} // namespace librbd
+
+template class librbd::operation::SnapshotLimitRequest<librbd::ImageCtx>;
diff --git a/src/librbd/operation/SnapshotLimitRequest.h b/src/librbd/operation/SnapshotLimitRequest.h
new file mode 100644
index 000000000..09622a459
--- /dev/null
+++ b/src/librbd/operation/SnapshotLimitRequest.h
@@ -0,0 +1,44 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_OPERATION_SNAPSHOT_LIMIT_REQUEST_H
+#define CEPH_LIBRBD_OPERATION_SNAPSHOT_LIMIT_REQUEST_H
+
+#include "librbd/operation/Request.h"
+#include <iosfwd>
+#include <string>
+
+class Context;
+
+namespace librbd {
+
+class ImageCtx;
+
+namespace operation {
+
+template <typename ImageCtxT = ImageCtx>
+class SnapshotLimitRequest : public Request<ImageCtxT> {
+public:
+ SnapshotLimitRequest(ImageCtxT &image_ctx, Context *on_finish,
+ uint64_t limit);
+
+protected:
+ void send_op() override;
+ bool should_complete(int r) override;
+
+ journal::Event create_event(uint64_t op_tid) const override {
+ return journal::SnapLimitEvent(op_tid, m_snap_limit);
+ }
+
+private:
+ uint64_t m_snap_limit;
+
+ void send_limit_snaps();
+};
+
+} // namespace operation
+} // namespace librbd
+
+extern template class librbd::operation::SnapshotLimitRequest<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_OPERATION_SNAPSHOT_LIMIT_REQUEST_H
diff --git a/src/librbd/operation/SnapshotProtectRequest.cc b/src/librbd/operation/SnapshotProtectRequest.cc
new file mode 100644
index 000000000..f3b9e7e0b
--- /dev/null
+++ b/src/librbd/operation/SnapshotProtectRequest.cc
@@ -0,0 +1,118 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/operation/SnapshotProtectRequest.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "librbd/ImageCtx.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::SnapshotProtectRequest: "
+
+namespace librbd {
+namespace operation {
+
+namespace {
+
+template <typename I>
+std::ostream& operator<<(std::ostream& os,
+ const typename SnapshotProtectRequest<I>::State& state) {
+ switch(state) {
+ case SnapshotProtectRequest<I>::STATE_PROTECT_SNAP:
+ os << "PROTECT_SNAP";
+ break;
+ }
+ return os;
+}
+
+} // anonymous namespace
+
+template <typename I>
+SnapshotProtectRequest<I>::SnapshotProtectRequest(I &image_ctx,
+ Context *on_finish,
+ const cls::rbd::SnapshotNamespace &snap_namespace,
+ const std::string &snap_name)
+ : Request<I>(image_ctx, on_finish), m_snap_namespace(snap_namespace),
+ m_snap_name(snap_name), m_state(STATE_PROTECT_SNAP) {
+}
+
+template <typename I>
+void SnapshotProtectRequest<I>::send_op() {
+ send_protect_snap();
+}
+
+template <typename I>
+bool SnapshotProtectRequest<I>::should_complete(int r) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << this << " " << __func__ << ": state=" << m_state << ", "
+ << "r=" << r << dendl;
+ if (r < 0) {
+ if (r == -EBUSY) {
+ ldout(cct, 1) << "snapshot is already protected" << dendl;
+ } else {
+ lderr(cct) << "encountered error: " << cpp_strerror(r) << dendl;
+ }
+ }
+ return true;
+}
+
+template <typename I>
+void SnapshotProtectRequest<I>::send_protect_snap() {
+ I &image_ctx = this->m_image_ctx;
+ ceph_assert(ceph_mutex_is_locked(image_ctx.owner_lock));
+
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << this << " " << __func__ << dendl;
+
+ int r = verify_and_send_protect_snap();
+ if (r < 0) {
+ this->async_complete(r);
+ return;
+ }
+}
+
+template <typename I>
+int SnapshotProtectRequest<I>::verify_and_send_protect_snap() {
+ I &image_ctx = this->m_image_ctx;
+ std::shared_lock image_locker{image_ctx.image_lock};
+
+ CephContext *cct = image_ctx.cct;
+ if ((image_ctx.features & RBD_FEATURE_LAYERING) == 0) {
+ lderr(cct) << "image must support layering" << dendl;
+ return -ENOSYS;
+ }
+
+ uint64_t snap_id = image_ctx.get_snap_id(m_snap_namespace, m_snap_name);
+ if (snap_id == CEPH_NOSNAP) {
+ return -ENOENT;
+ }
+
+ bool is_protected;
+ int r = image_ctx.is_snap_protected(snap_id, &is_protected);
+ if (r < 0) {
+ return r;
+ }
+
+ if (is_protected) {
+ return -EBUSY;
+ }
+
+ librados::ObjectWriteOperation op;
+ cls_client::set_protection_status(&op, snap_id,
+ RBD_PROTECTION_STATUS_PROTECTED);
+
+ librados::AioCompletion *rados_completion =
+ this->create_callback_completion();
+ r = image_ctx.md_ctx.aio_operate(image_ctx.header_oid, rados_completion,
+ &op);
+ ceph_assert(r == 0);
+ rados_completion->release();
+ return 0;
+}
+
+} // namespace operation
+} // namespace librbd
+
+template class librbd::operation::SnapshotProtectRequest<librbd::ImageCtx>;
diff --git a/src/librbd/operation/SnapshotProtectRequest.h b/src/librbd/operation/SnapshotProtectRequest.h
new file mode 100644
index 000000000..bef80229a
--- /dev/null
+++ b/src/librbd/operation/SnapshotProtectRequest.h
@@ -0,0 +1,68 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_OPERATION_SNAPSHOT_PROTECT_REQUEST_H
+#define CEPH_LIBRBD_OPERATION_SNAPSHOT_PROTECT_REQUEST_H
+
+#include "librbd/operation/Request.h"
+#include <string>
+
+class Context;
+
+namespace librbd {
+
+class ImageCtx;
+
+namespace operation {
+
+template <typename ImageCtxT = ImageCtx>
+class SnapshotProtectRequest : public Request<ImageCtxT> {
+public:
+ /**
+ * Snap Protect goes through the following state machine:
+ *
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * STATE_PROTECT_SNAP
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ *
+ */
+ enum State {
+ STATE_PROTECT_SNAP
+ };
+
+ SnapshotProtectRequest(ImageCtxT &image_ctx, Context *on_finish,
+ const cls::rbd::SnapshotNamespace &snap_namespace,
+ const std::string &snap_name);
+
+protected:
+ void send_op() override;
+ bool should_complete(int r) override;
+
+ journal::Event create_event(uint64_t op_tid) const override {
+ return journal::SnapProtectEvent(op_tid, m_snap_namespace, m_snap_name);
+ }
+
+private:
+ cls::rbd::SnapshotNamespace m_snap_namespace;
+ std::string m_snap_name;
+ State m_state;
+
+ void send_protect_snap();
+
+ int verify_and_send_protect_snap();
+};
+
+} // namespace operation
+} // namespace librbd
+
+extern template class librbd::operation::SnapshotProtectRequest<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_OPERATION_SNAPSHOT_PROTECT_REQUEST_H
diff --git a/src/librbd/operation/SnapshotRemoveRequest.cc b/src/librbd/operation/SnapshotRemoveRequest.cc
new file mode 100644
index 000000000..b78be8a0a
--- /dev/null
+++ b/src/librbd/operation/SnapshotRemoveRequest.cc
@@ -0,0 +1,505 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/operation/SnapshotRemoveRequest.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "include/ceph_assert.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ObjectMap.h"
+#include "librbd/Utils.h"
+#include "librbd/image/DetachChildRequest.h"
+#include "librbd/mirror/snapshot/RemoveImageStateRequest.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::SnapshotRemoveRequest: " << this << " " \
+ << __func__ << ": "
+
+namespace librbd {
+namespace operation {
+
+using util::create_context_callback;
+using util::create_rados_callback;
+
+template <typename I>
+SnapshotRemoveRequest<I>::SnapshotRemoveRequest(
+ I &image_ctx, Context *on_finish,
+ const cls::rbd::SnapshotNamespace &snap_namespace,
+ const std::string &snap_name, uint64_t snap_id)
+ : Request<I>(image_ctx, on_finish), m_snap_namespace(snap_namespace),
+ m_snap_name(snap_name), m_snap_id(snap_id) {
+}
+
+template <typename I>
+void SnapshotRemoveRequest<I>::send_op() {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+
+ ceph_assert(ceph_mutex_is_locked(image_ctx.owner_lock));
+ {
+ std::shared_lock image_locker{image_ctx.image_lock};
+ if (image_ctx.snap_info.find(m_snap_id) == image_ctx.snap_info.end()) {
+ lderr(cct) << "snapshot doesn't exist" << dendl;
+ this->async_complete(-ENOENT);
+ return;
+ }
+ }
+
+ trash_snap();
+}
+
+template <typename I>
+bool SnapshotRemoveRequest<I>::should_complete(int r) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << "r=" << r << dendl;
+ if (r < 0 && r != -EBUSY) {
+ lderr(cct) << "encountered error: " << cpp_strerror(r) << dendl;
+ }
+ return true;
+}
+
+template <typename I>
+void SnapshotRemoveRequest<I>::trash_snap() {
+ I &image_ctx = this->m_image_ctx;
+ if (image_ctx.old_format) {
+ release_snap_id();
+ return;
+ } else if (cls::rbd::get_snap_namespace_type(m_snap_namespace) ==
+ cls::rbd::SNAPSHOT_NAMESPACE_TYPE_TRASH) {
+ get_snap();
+ return;
+ }
+
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << dendl;
+
+ librados::ObjectWriteOperation op;
+ cls_client::snapshot_trash_add(&op, m_snap_id);
+
+ auto aio_comp = create_rados_callback<
+ SnapshotRemoveRequest<I>,
+ &SnapshotRemoveRequest<I>::handle_trash_snap>(this);
+ int r = image_ctx.md_ctx.aio_operate(image_ctx.header_oid, aio_comp, &op);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void SnapshotRemoveRequest<I>::handle_trash_snap(int r) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << "r=" << r << dendl;
+
+ if (r == -EOPNOTSUPP) {
+ // trash / clone v2 not supported
+ detach_child();
+ return;
+ } else if (r < 0 && r != -EEXIST) {
+ lderr(cct) << "failed to move snapshot to trash: " << cpp_strerror(r)
+ << dendl;
+ this->complete(r);
+ return;
+ }
+
+ m_trashed_snapshot = true;
+ get_snap();
+}
+
+template <typename I>
+void SnapshotRemoveRequest<I>::get_snap() {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << dendl;
+
+ librados::ObjectReadOperation op;
+ cls_client::snapshot_get_start(&op, m_snap_id);
+
+ auto aio_comp = create_rados_callback<
+ SnapshotRemoveRequest<I>,
+ &SnapshotRemoveRequest<I>::handle_get_snap>(this);
+ m_out_bl.clear();
+ int r = image_ctx.md_ctx.aio_operate(image_ctx.header_oid, aio_comp, &op,
+ &m_out_bl);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void SnapshotRemoveRequest<I>::handle_get_snap(int r) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << "r=" << r << dendl;
+
+ if (r == 0) {
+ cls::rbd::SnapshotInfo snap_info;
+
+ auto it = m_out_bl.cbegin();
+ r = cls_client::snapshot_get_finish(&it, &snap_info);
+ m_child_attached = (snap_info.child_count > 0);
+ if (r == 0 && m_child_attached) {
+ list_children();
+ return;
+ }
+ }
+
+ if (r < 0) {
+ lderr(cct) << "failed to retrieve snapshot: " << cpp_strerror(r)
+ << dendl;
+ this->complete(r);
+ return;
+ }
+
+ detach_child();
+}
+
+template <typename I>
+void SnapshotRemoveRequest<I>::list_children() {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << dendl;
+
+ librados::ObjectReadOperation op;
+ cls_client::children_list_start(&op, m_snap_id);
+
+ m_out_bl.clear();
+ m_child_images.clear();
+ auto aio_comp = create_rados_callback<
+ SnapshotRemoveRequest<I>,
+ &SnapshotRemoveRequest<I>::handle_list_children>(this);
+ int r = image_ctx.md_ctx.aio_operate(image_ctx.header_oid, aio_comp, &op,
+ &m_out_bl);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void SnapshotRemoveRequest<I>::handle_list_children(int r) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << "r=" << r << dendl;
+
+ if (r == 0) {
+ auto it = m_out_bl.cbegin();
+ r = cls_client::children_list_finish(&it, &m_child_images);
+ }
+
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "failed to retrieve child: " << cpp_strerror(r)
+ << dendl;
+ this->complete(r);
+ return;
+ }
+
+ detach_stale_child();
+}
+
+template <typename I>
+void SnapshotRemoveRequest<I>::detach_stale_child() {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << dendl;
+
+ for (auto& child_image : m_child_images) {
+ m_child_attached = true;
+ IoCtx ioctx;
+ int r = util::create_ioctx(image_ctx.md_ctx, "child image",
+ child_image.pool_id,
+ child_image.pool_namespace, &ioctx);
+ if (r == -ENOENT) {
+ librados::ObjectWriteOperation op;
+ cls_client::child_detach(&op, m_snap_id,
+ {child_image.pool_id,
+ child_image.pool_namespace,
+ child_image.image_id});
+ auto aio_comp = create_rados_callback<
+ SnapshotRemoveRequest<I>,
+ &SnapshotRemoveRequest<I>::handle_detach_stale_child>(this);
+ r = image_ctx.md_ctx.aio_operate(image_ctx.header_oid, aio_comp, &op);
+ ceph_assert(r == 0);
+ aio_comp->release();
+ return;
+ } else if (r < 0) {
+ this->async_complete(r);
+ return;
+ }
+ }
+
+ detach_child();
+}
+
+template <typename I>
+void SnapshotRemoveRequest<I>::handle_detach_stale_child(int r) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << "r=" << r << dendl;
+
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "failed to detach stale child: " << cpp_strerror(r)
+ << dendl;
+ this->complete(r);
+ return;
+ }
+
+ m_child_attached = false;
+ list_children();
+}
+
+template <typename I>
+void SnapshotRemoveRequest<I>::detach_child() {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+
+ bool detach_child = false;
+ {
+ std::shared_lock image_locker{image_ctx.image_lock};
+
+ cls::rbd::ParentImageSpec our_pspec;
+ int r = image_ctx.get_parent_spec(m_snap_id, &our_pspec);
+ if (r < 0) {
+ if (r == -ENOENT) {
+ ldout(cct, 1) << "No such snapshot" << dendl;
+ } else {
+ lderr(cct) << "failed to retrieve parent spec" << dendl;
+ }
+
+ this->async_complete(r);
+ return;
+ }
+
+ if (image_ctx.parent_md.spec != our_pspec &&
+ (scan_for_parents(our_pspec) == -ENOENT)) {
+ // no other references to the parent image
+ detach_child = true;
+ }
+ }
+
+ if (!detach_child) {
+ // HEAD image or other snapshots still associated with parent
+ remove_object_map();
+ return;
+ }
+
+ ldout(cct, 5) << dendl;
+ auto ctx = create_context_callback<
+ SnapshotRemoveRequest<I>,
+ &SnapshotRemoveRequest<I>::handle_detach_child>(this);
+ auto req = image::DetachChildRequest<I>::create(image_ctx, ctx);
+ req->send();
+}
+
+template <typename I>
+void SnapshotRemoveRequest<I>::handle_detach_child(int r) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << "r=" << r << dendl;
+
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "failed to detach child from parent: " << cpp_strerror(r)
+ << dendl;
+ this->complete(r);
+ return;
+ }
+
+ remove_object_map();
+}
+
+template <typename I>
+void SnapshotRemoveRequest<I>::remove_object_map() {
+ I &image_ctx = this->m_image_ctx;
+ if (m_child_attached) {
+ // if a clone v2 child is attached to this snapshot, we cannot
+ // proceed. It's only an error if the snap was already in the trash
+ this->complete(m_trashed_snapshot ? 0 : -EBUSY);
+ return;
+ }
+
+ CephContext *cct = image_ctx.cct;
+
+ {
+ std::shared_lock owner_lock{image_ctx.owner_lock};
+ std::unique_lock image_locker{image_ctx.image_lock};
+ if (image_ctx.object_map != nullptr) {
+ ldout(cct, 5) << dendl;
+
+ auto ctx = create_context_callback<
+ SnapshotRemoveRequest<I>,
+ &SnapshotRemoveRequest<I>::handle_remove_object_map>(this);
+ image_ctx.object_map->snapshot_remove(m_snap_id, ctx);
+ return;
+ }
+ }
+
+ // object map disabled
+ remove_image_state();
+}
+
+template <typename I>
+void SnapshotRemoveRequest<I>::handle_remove_object_map(int r) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << "r=" << r << dendl;
+
+ if (r < 0) {
+ lderr(cct) << "failed to remove snapshot object map: " << cpp_strerror(r)
+ << dendl;
+ this->complete(r);
+ return;
+ }
+
+ remove_image_state();
+}
+
+template <typename I>
+void SnapshotRemoveRequest<I>::remove_image_state() {
+ I &image_ctx = this->m_image_ctx;
+ auto type = cls::rbd::get_snap_namespace_type(m_snap_namespace);
+
+ if (type != cls::rbd::SNAPSHOT_NAMESPACE_TYPE_MIRROR) {
+ release_snap_id();
+ return;
+ }
+
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << dendl;
+
+ auto ctx = create_context_callback<
+ SnapshotRemoveRequest<I>,
+ &SnapshotRemoveRequest<I>::handle_remove_image_state>(this);
+ auto req = mirror::snapshot::RemoveImageStateRequest<I>::create(
+ &image_ctx, m_snap_id, ctx);
+ req->send();
+}
+
+template <typename I>
+void SnapshotRemoveRequest<I>::handle_remove_image_state(int r) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << "r=" << r << dendl;
+
+ if (r < 0) {
+ lderr(cct) << "failed to remove image state: " << cpp_strerror(r)
+ << dendl;
+ if (r != -ENOENT) {
+ this->complete(r);
+ return;
+ }
+ }
+
+ release_snap_id();
+}
+
+template <typename I>
+void SnapshotRemoveRequest<I>::release_snap_id() {
+ I &image_ctx = this->m_image_ctx;
+
+ if (!image_ctx.data_ctx.is_valid()) {
+ remove_snap();
+ return;
+ }
+
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << "snap_name=" << m_snap_name << ", "
+ << "snap_id=" << m_snap_id << dendl;
+
+ auto aio_comp = create_rados_callback<
+ SnapshotRemoveRequest<I>,
+ &SnapshotRemoveRequest<I>::handle_release_snap_id>(this);
+ image_ctx.data_ctx.aio_selfmanaged_snap_remove(m_snap_id, aio_comp);
+ aio_comp->release();
+}
+
+template <typename I>
+void SnapshotRemoveRequest<I>::handle_release_snap_id(int r) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << "r=" << r << dendl;
+
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << "failed to release snap id: " << cpp_strerror(r) << dendl;
+ this->complete(r);
+ return;
+ }
+
+ remove_snap();
+}
+
+template <typename I>
+void SnapshotRemoveRequest<I>::remove_snap() {
+ I &image_ctx = this->m_image_ctx;
+
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << dendl;
+
+ librados::ObjectWriteOperation op;
+ if (image_ctx.old_format) {
+ cls_client::old_snapshot_remove(&op, m_snap_name);
+ } else {
+ cls_client::snapshot_remove(&op, m_snap_id);
+ }
+
+ auto aio_comp = create_rados_callback<
+ SnapshotRemoveRequest<I>,
+ &SnapshotRemoveRequest<I>::handle_remove_snap>(this);
+ int r = image_ctx.md_ctx.aio_operate(image_ctx.header_oid, aio_comp, &op);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void SnapshotRemoveRequest<I>::handle_remove_snap(int r) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << "r=" << r << dendl;
+
+ if (r < 0) {
+ lderr(cct) << "failed to remove snapshot: " << cpp_strerror(r) << dendl;
+ this->complete(r);
+ return;
+ }
+
+ remove_snap_context();
+ this->complete(0);
+}
+
+template <typename I>
+void SnapshotRemoveRequest<I>::remove_snap_context() {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << dendl;
+
+ std::unique_lock image_locker{image_ctx.image_lock};
+ image_ctx.rm_snap(m_snap_namespace, m_snap_name, m_snap_id);
+}
+
+template <typename I>
+int SnapshotRemoveRequest<I>::scan_for_parents(
+ cls::rbd::ParentImageSpec &pspec) {
+ I &image_ctx = this->m_image_ctx;
+ ceph_assert(ceph_mutex_is_locked(image_ctx.image_lock));
+
+ if (pspec.pool_id != -1) {
+ map<uint64_t, SnapInfo>::iterator it;
+ for (it = image_ctx.snap_info.begin();
+ it != image_ctx.snap_info.end(); ++it) {
+ // skip our snap id (if checking base image, CEPH_NOSNAP won't match)
+ if (it->first == m_snap_id) {
+ continue;
+ }
+ if (it->second.parent.spec == pspec) {
+ break;
+ }
+ }
+ if (it == image_ctx.snap_info.end()) {
+ return -ENOENT;
+ }
+ }
+ return 0;
+}
+
+} // namespace operation
+} // namespace librbd
+
+template class librbd::operation::SnapshotRemoveRequest<librbd::ImageCtx>;
diff --git a/src/librbd/operation/SnapshotRemoveRequest.h b/src/librbd/operation/SnapshotRemoveRequest.h
new file mode 100644
index 000000000..17638a529
--- /dev/null
+++ b/src/librbd/operation/SnapshotRemoveRequest.h
@@ -0,0 +1,128 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_OPERATION_SNAPSHOT_REMOVE_REQUEST_H
+#define CEPH_LIBRBD_OPERATION_SNAPSHOT_REMOVE_REQUEST_H
+
+#include "librbd/operation/Request.h"
+#include "include/buffer.h"
+#include "librbd/Types.h"
+#include <string>
+
+class Context;
+
+namespace librbd {
+
+class ImageCtx;
+
+namespace operation {
+
+template <typename ImageCtxT = ImageCtx>
+class SnapshotRemoveRequest : public Request<ImageCtxT> {
+public:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * TRASH_SNAP
+ * |
+ * v (skip if unsupported)
+ * GET_SNAP
+ * |
+ * v (skip if unnecessary)
+ * LIST_CHILDREN <-------------\
+ * | |
+ * v (skip if unnecessary) | (repeat as needed)
+ * DETACH_STALE_CHILD ---------/
+ * |
+ * v (skip if unnecessary)
+ * DETACH_CHILD
+ * |
+ * v (skip if disabled/in-use)
+ * REMOVE_OBJECT_MAP
+ * |
+ * v (skip if not mirror snpashot)
+ * REMOVE_IMAGE_STATE
+ * |
+ * v (skip if in-use)
+ * RELEASE_SNAP_ID
+ * |
+ * v (skip if in-use)
+ * REMOVE_SNAP
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+
+ static SnapshotRemoveRequest *create(
+ ImageCtxT &image_ctx, const cls::rbd::SnapshotNamespace &snap_namespace,
+ const std::string &snap_name, uint64_t snap_id, Context *on_finish) {
+ return new SnapshotRemoveRequest(image_ctx, on_finish, snap_namespace,
+ snap_name, snap_id);
+ }
+
+ SnapshotRemoveRequest(ImageCtxT &image_ctx, Context *on_finish,
+ const cls::rbd::SnapshotNamespace &snap_namespace,
+ const std::string &snap_name,
+ uint64_t snap_id);
+
+protected:
+ void send_op() override;
+ bool should_complete(int r) override;
+
+ journal::Event create_event(uint64_t op_tid) const override {
+ return journal::SnapRemoveEvent(op_tid, m_snap_namespace, m_snap_name);
+ }
+
+private:
+ cls::rbd::SnapshotNamespace m_snap_namespace;
+ cls::rbd::ChildImageSpecs m_child_images;
+ std::string m_snap_name;
+ uint64_t m_snap_id;
+ bool m_trashed_snapshot = false;
+ bool m_child_attached = false;
+
+ ceph::bufferlist m_out_bl;
+
+ void trash_snap();
+ void handle_trash_snap(int r);
+
+ void get_snap();
+ void handle_get_snap(int r);
+
+ void list_children();
+ void handle_list_children(int r);
+
+ void detach_stale_child();
+ void handle_detach_stale_child(int r);
+
+ void detach_child();
+ void handle_detach_child(int r);
+
+ void remove_object_map();
+ void handle_remove_object_map(int r);
+
+ void remove_image_state();
+ void handle_remove_image_state(int r);
+
+ void release_snap_id();
+ void handle_release_snap_id(int r);
+
+ void remove_snap();
+ void handle_remove_snap(int r);
+
+ void remove_snap_context();
+ int scan_for_parents(cls::rbd::ParentImageSpec &pspec);
+
+};
+
+} // namespace operation
+} // namespace librbd
+
+extern template class librbd::operation::SnapshotRemoveRequest<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_OPERATION_SNAPSHOT_REMOVE_REQUEST_H
diff --git a/src/librbd/operation/SnapshotRenameRequest.cc b/src/librbd/operation/SnapshotRenameRequest.cc
new file mode 100644
index 000000000..e9257f18c
--- /dev/null
+++ b/src/librbd/operation/SnapshotRenameRequest.cc
@@ -0,0 +1,102 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/operation/SnapshotRenameRequest.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "librbd/ImageCtx.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::SnapshotRenameRequest: "
+
+namespace librbd {
+namespace operation {
+
+namespace {
+
+template <typename I>
+std::ostream& operator<<(std::ostream& os,
+ const typename SnapshotRenameRequest<I>::State& state) {
+ switch(state) {
+ case SnapshotRenameRequest<I>::STATE_RENAME_SNAP:
+ os << "RENAME_SNAP";
+ break;
+ }
+ return os;
+}
+
+} // anonymous namespace
+
+template <typename I>
+SnapshotRenameRequest<I>::SnapshotRenameRequest(I &image_ctx,
+ Context *on_finish,
+ uint64_t snap_id,
+ const std::string &snap_name)
+ : Request<I>(image_ctx, on_finish), m_snap_id(snap_id),
+ m_snap_name(snap_name), m_state(STATE_RENAME_SNAP) {
+}
+
+template <typename I>
+journal::Event SnapshotRenameRequest<I>::create_event(uint64_t op_tid) const {
+ I &image_ctx = this->m_image_ctx;
+ ceph_assert(ceph_mutex_is_locked(image_ctx.image_lock));
+
+ std::string src_snap_name;
+ auto snap_info_it = image_ctx.snap_info.find(m_snap_id);
+ if (snap_info_it != image_ctx.snap_info.end()) {
+ src_snap_name = snap_info_it->second.name;
+ }
+
+ return journal::SnapRenameEvent(op_tid, m_snap_id, src_snap_name,
+ m_snap_name);
+}
+
+template <typename I>
+void SnapshotRenameRequest<I>::send_op() {
+ send_rename_snap();
+}
+
+template <typename I>
+bool SnapshotRenameRequest<I>::should_complete(int r) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << this << " " << __func__ << ": state=" << m_state << ", "
+ << "r=" << r << dendl;
+ if (r < 0) {
+ if (r == -EEXIST) {
+ ldout(cct, 1) << "snapshot already exists" << dendl;
+ } else {
+ lderr(cct) << "encountered error: " << cpp_strerror(r) << dendl;
+ }
+ }
+ return true;
+}
+
+template <typename I>
+void SnapshotRenameRequest<I>::send_rename_snap() {
+ I &image_ctx = this->m_image_ctx;
+ ceph_assert(ceph_mutex_is_locked(image_ctx.owner_lock));
+ std::shared_lock image_locker{image_ctx.image_lock};
+
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << this << " " << __func__ << dendl;
+
+ librados::ObjectWriteOperation op;
+ if (image_ctx.old_format) {
+ cls_client::old_snapshot_rename(&op, m_snap_id, m_snap_name);
+ } else {
+ cls_client::snapshot_rename(&op, m_snap_id, m_snap_name);
+ }
+
+ librados::AioCompletion *rados_completion = this->create_callback_completion();
+ int r = image_ctx.md_ctx.aio_operate(image_ctx.header_oid,
+ rados_completion, &op);
+ ceph_assert(r == 0);
+ rados_completion->release();
+}
+
+} // namespace operation
+} // namespace librbd
+
+template class librbd::operation::SnapshotRenameRequest<librbd::ImageCtx>;
diff --git a/src/librbd/operation/SnapshotRenameRequest.h b/src/librbd/operation/SnapshotRenameRequest.h
new file mode 100644
index 000000000..697772e02
--- /dev/null
+++ b/src/librbd/operation/SnapshotRenameRequest.h
@@ -0,0 +1,63 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_OPERATION_SNAPSHOT_RENAME_REQUEST_H
+#define CEPH_LIBRBD_OPERATION_SNAPSHOT_RENAME_REQUEST_H
+
+#include "librbd/operation/Request.h"
+#include <string>
+
+class Context;
+
+namespace librbd {
+
+class ImageCtx;
+
+namespace operation {
+
+template <typename ImageCtxT = ImageCtx>
+class SnapshotRenameRequest : public Request<ImageCtxT> {
+public:
+ /**
+ * Snap Rename goes through the following state machine:
+ *
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * STATE_RENAME_SNAP
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ *
+ */
+ enum State {
+ STATE_RENAME_SNAP
+ };
+
+ SnapshotRenameRequest(ImageCtxT &image_ctx, Context *on_finish,
+ uint64_t snap_id, const std::string &snap_name);
+
+ journal::Event create_event(uint64_t op_tid) const override;
+
+protected:
+ void send_op() override;
+ bool should_complete(int r) override;
+
+private:
+ uint64_t m_snap_id;
+ std::string m_snap_name;
+ State m_state;
+
+ void send_rename_snap();
+};
+
+} // namespace operation
+} // namespace librbd
+
+extern template class librbd::operation::SnapshotRenameRequest<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_OPERATION_SNAPSHOT_RENAME_REQUEST_H
diff --git a/src/librbd/operation/SnapshotRollbackRequest.cc b/src/librbd/operation/SnapshotRollbackRequest.cc
new file mode 100644
index 000000000..87c5212de
--- /dev/null
+++ b/src/librbd/operation/SnapshotRollbackRequest.cc
@@ -0,0 +1,424 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/operation/SnapshotRollbackRequest.h"
+#include "include/rados/librados.hpp"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "librbd/AsyncObjectThrottle.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ObjectMap.h"
+#include "librbd/Utils.h"
+#include "librbd/io/ImageDispatcherInterface.h"
+#include "librbd/io/ObjectDispatcherInterface.h"
+#include "librbd/operation/ResizeRequest.h"
+#include "osdc/Striper.h"
+#include <boost/lambda/bind.hpp>
+#include <boost/lambda/construct.hpp>
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::SnapshotRollbackRequest: "
+
+namespace librbd {
+namespace operation {
+
+using util::create_context_callback;
+using util::create_rados_callback;
+
+namespace {
+
+template <typename I>
+class C_RollbackObject : public C_AsyncObjectThrottle<I> {
+public:
+ C_RollbackObject(AsyncObjectThrottle<I> &throttle, I *image_ctx,
+ uint64_t snap_id, uint64_t object_num,
+ uint64_t head_num_objects,
+ decltype(I::object_map) snap_object_map)
+ : C_AsyncObjectThrottle<I>(throttle, *image_ctx), m_snap_id(snap_id),
+ m_object_num(object_num), m_head_num_objects(head_num_objects),
+ m_snap_object_map(snap_object_map) {
+ }
+
+ int send() override {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 20) << "C_RollbackObject: " << __func__ << ": object_num="
+ << m_object_num << dendl;
+
+ {
+ std::shared_lock image_locker{image_ctx.image_lock};
+ if (m_object_num < m_head_num_objects &&
+ m_snap_object_map != nullptr &&
+ !image_ctx.object_map->object_may_exist(m_object_num) &&
+ !m_snap_object_map->object_may_exist(m_object_num)) {
+ return 1;
+ }
+ }
+
+ std::string oid = image_ctx.get_object_name(m_object_num);
+
+ librados::ObjectWriteOperation op;
+ op.selfmanaged_snap_rollback(m_snap_id);
+
+ librados::AioCompletion *rados_completion =
+ util::create_rados_callback(this);
+ image_ctx.data_ctx.aio_operate(oid, rados_completion, &op);
+ rados_completion->release();
+ return 0;
+ }
+
+private:
+ uint64_t m_snap_id;
+ uint64_t m_object_num;
+ uint64_t m_head_num_objects;
+ decltype(I::object_map) m_snap_object_map;
+};
+
+} // anonymous namespace
+
+template <typename I>
+SnapshotRollbackRequest<I>::SnapshotRollbackRequest(I &image_ctx,
+ Context *on_finish,
+ const cls::rbd::SnapshotNamespace &snap_namespace,
+ const std::string &snap_name,
+ uint64_t snap_id,
+ uint64_t snap_size,
+ ProgressContext &prog_ctx)
+ : Request<I>(image_ctx, on_finish), m_snap_namespace(snap_namespace),
+ m_snap_name(snap_name), m_snap_id(snap_id),
+ m_snap_size(snap_size), m_prog_ctx(prog_ctx),
+ m_object_map(nullptr), m_snap_object_map(nullptr) {
+}
+
+template <typename I>
+SnapshotRollbackRequest<I>::~SnapshotRollbackRequest() {
+ I &image_ctx = this->m_image_ctx;
+ if (m_blocking_writes) {
+ image_ctx.io_image_dispatcher->unblock_writes();
+ }
+ if (m_object_map) {
+ m_object_map->put();
+ m_object_map = nullptr;
+ }
+ if (m_snap_object_map) {
+ m_snap_object_map->put();
+ m_snap_object_map = nullptr;
+ }
+}
+
+template <typename I>
+void SnapshotRollbackRequest<I>::send_op() {
+ send_block_writes();
+}
+
+template <typename I>
+void SnapshotRollbackRequest<I>::send_block_writes() {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << this << " " << __func__ << dendl;
+
+ m_blocking_writes = true;
+ image_ctx.io_image_dispatcher->block_writes(create_context_callback<
+ SnapshotRollbackRequest<I>,
+ &SnapshotRollbackRequest<I>::handle_block_writes>(this));
+}
+
+template <typename I>
+Context *SnapshotRollbackRequest<I>::handle_block_writes(int *result) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl;
+
+ if (*result < 0) {
+ lderr(cct) << "failed to block writes: " << cpp_strerror(*result) << dendl;
+ return this->create_context_finisher(*result);
+ }
+
+ send_resize_image();
+ return nullptr;
+}
+
+template <typename I>
+void SnapshotRollbackRequest<I>::send_resize_image() {
+ I &image_ctx = this->m_image_ctx;
+
+ uint64_t current_size;
+ {
+ std::shared_lock owner_locker{image_ctx.owner_lock};
+ std::shared_lock image_locker{image_ctx.image_lock};
+ current_size = image_ctx.get_image_size(CEPH_NOSNAP);
+ }
+
+ m_head_num_objects = Striper::get_num_objects(image_ctx.layout, current_size);
+
+ if (current_size == m_snap_size) {
+ send_get_snap_object_map();
+ return;
+ }
+
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << this << " " << __func__ << dendl;
+
+ std::shared_lock owner_locker{image_ctx.owner_lock};
+ Context *ctx = create_context_callback<
+ SnapshotRollbackRequest<I>,
+ &SnapshotRollbackRequest<I>::handle_resize_image>(this);
+ ResizeRequest<I> *req = ResizeRequest<I>::create(image_ctx, ctx, m_snap_size,
+ true, m_no_op_prog_ctx, 0, true);
+ req->send();
+}
+
+template <typename I>
+Context *SnapshotRollbackRequest<I>::handle_resize_image(int *result) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl;
+
+ if (*result < 0) {
+ lderr(cct) << "failed to resize image for rollback: "
+ << cpp_strerror(*result) << dendl;
+ return this->create_context_finisher(*result);
+ }
+
+ send_get_snap_object_map();
+ return nullptr;
+}
+
+template <typename I>
+void SnapshotRollbackRequest<I>::send_get_snap_object_map() {
+ I &image_ctx = this->m_image_ctx;
+
+ uint64_t flags = 0;
+ bool object_map_enabled;
+ CephContext *cct = image_ctx.cct;
+ {
+ std::shared_lock owner_locker{image_ctx.owner_lock};
+ std::shared_lock image_locker{image_ctx.image_lock};
+ object_map_enabled = (image_ctx.object_map != nullptr);
+ int r = image_ctx.get_flags(m_snap_id, &flags);
+ if (r < 0) {
+ object_map_enabled = false;
+ }
+ }
+ if (object_map_enabled &&
+ (flags & RBD_FLAG_OBJECT_MAP_INVALID) != 0) {
+ lderr(cct) << "warning: object-map is invalid for snapshot" << dendl;
+ object_map_enabled = false;
+ }
+ if (!object_map_enabled) {
+ send_rollback_object_map();
+ return;
+ }
+
+ ldout(cct, 5) << this << " " << __func__ << dendl;
+
+ m_snap_object_map = image_ctx.create_object_map(m_snap_id);
+
+ Context *ctx = create_context_callback<
+ SnapshotRollbackRequest<I>,
+ &SnapshotRollbackRequest<I>::handle_get_snap_object_map>(this);
+ m_snap_object_map->open(ctx);
+ return;
+}
+
+template <typename I>
+Context *SnapshotRollbackRequest<I>::handle_get_snap_object_map(int *result) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl;
+
+ if (*result < 0) {
+ lderr(cct) << this << " " << __func__ << ": failed to open object map: "
+ << cpp_strerror(*result) << dendl;
+ m_snap_object_map->put();
+ m_snap_object_map = nullptr;
+ }
+
+ send_rollback_object_map();
+ return nullptr;
+}
+
+template <typename I>
+void SnapshotRollbackRequest<I>::send_rollback_object_map() {
+ I &image_ctx = this->m_image_ctx;
+
+ {
+ std::shared_lock owner_locker{image_ctx.owner_lock};
+ std::shared_lock image_locker{image_ctx.image_lock};
+ if (image_ctx.object_map != nullptr) {
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << this << " " << __func__ << dendl;
+
+ Context *ctx = create_context_callback<
+ SnapshotRollbackRequest<I>,
+ &SnapshotRollbackRequest<I>::handle_rollback_object_map>(this);
+ image_ctx.object_map->rollback(m_snap_id, ctx);
+ return;
+ }
+ }
+
+ send_rollback_objects();
+}
+
+template <typename I>
+Context *SnapshotRollbackRequest<I>::handle_rollback_object_map(int *result) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl;
+
+ if (*result < 0) {
+ lderr(cct) << this << " " << __func__ << ": failed to roll back object "
+ << "map: " << cpp_strerror(*result) << dendl;
+
+ ceph_assert(m_object_map == nullptr);
+ apply();
+ return this->create_context_finisher(*result);
+ }
+
+ send_rollback_objects();
+ return nullptr;
+}
+
+template <typename I>
+void SnapshotRollbackRequest<I>::send_rollback_objects() {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << this << " " << __func__ << dendl;
+
+ std::shared_lock owner_locker{image_ctx.owner_lock};
+ uint64_t num_objects;
+ {
+ std::shared_lock image_locker{image_ctx.image_lock};
+ num_objects = Striper::get_num_objects(image_ctx.layout,
+ image_ctx.get_current_size());
+ }
+
+ Context *ctx = create_context_callback<
+ SnapshotRollbackRequest<I>,
+ &SnapshotRollbackRequest<I>::handle_rollback_objects>(this);
+ typename AsyncObjectThrottle<I>::ContextFactory context_factory(
+ boost::lambda::bind(boost::lambda::new_ptr<C_RollbackObject<I> >(),
+ boost::lambda::_1, &image_ctx, m_snap_id, boost::lambda::_2,
+ m_head_num_objects, m_snap_object_map));
+ AsyncObjectThrottle<I> *throttle = new AsyncObjectThrottle<I>(
+ this, image_ctx, context_factory, ctx, &m_prog_ctx, 0, num_objects);
+ throttle->start_ops(
+ image_ctx.config.template get_val<uint64_t>("rbd_concurrent_management_ops"));
+}
+
+template <typename I>
+Context *SnapshotRollbackRequest<I>::handle_rollback_objects(int *result) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl;
+
+ if (*result == -ERESTART) {
+ ldout(cct, 5) << "snapshot rollback operation interrupted" << dendl;
+ return this->create_context_finisher(*result);
+ } else if (*result < 0) {
+ lderr(cct) << "failed to rollback objects: " << cpp_strerror(*result)
+ << dendl;
+ return this->create_context_finisher(*result);
+ }
+
+ return send_refresh_object_map();
+}
+
+template <typename I>
+Context *SnapshotRollbackRequest<I>::send_refresh_object_map() {
+ I &image_ctx = this->m_image_ctx;
+
+ bool object_map_enabled;
+ {
+ std::shared_lock owner_locker{image_ctx.owner_lock};
+ std::shared_lock image_locker{image_ctx.image_lock};
+ object_map_enabled = (image_ctx.object_map != nullptr);
+ }
+ if (!object_map_enabled) {
+ return send_invalidate_cache();
+ }
+
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << this << " " << __func__ << dendl;
+
+ m_object_map = image_ctx.create_object_map(CEPH_NOSNAP);
+
+ Context *ctx = create_context_callback<
+ SnapshotRollbackRequest<I>,
+ &SnapshotRollbackRequest<I>::handle_refresh_object_map>(this);
+ m_object_map->open(ctx);
+ return nullptr;
+}
+
+template <typename I>
+Context *SnapshotRollbackRequest<I>::handle_refresh_object_map(int *result) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl;
+
+ if (*result < 0) {
+ lderr(cct) << this << " " << __func__ << ": failed to open object map: "
+ << cpp_strerror(*result) << dendl;
+ m_object_map->put();
+ m_object_map = nullptr;
+ apply();
+
+ return this->create_context_finisher(*result);
+ }
+
+ return send_invalidate_cache();
+}
+
+template <typename I>
+Context *SnapshotRollbackRequest<I>::send_invalidate_cache() {
+ I &image_ctx = this->m_image_ctx;
+
+ apply();
+
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << this << " " << __func__ << dendl;
+
+ if(m_object_map != nullptr) {
+ Context *ctx = create_context_callback<
+ SnapshotRollbackRequest<I>,
+ &SnapshotRollbackRequest<I>::handle_invalidate_cache>(this, m_object_map);
+ image_ctx.io_image_dispatcher->invalidate_cache(ctx);
+ }
+ else {
+ Context *ctx = create_context_callback<
+ SnapshotRollbackRequest<I>,
+ &SnapshotRollbackRequest<I>::handle_invalidate_cache>(this);
+ image_ctx.io_image_dispatcher->invalidate_cache(ctx);
+ }
+ return nullptr;
+}
+
+template <typename I>
+Context *SnapshotRollbackRequest<I>::handle_invalidate_cache(int *result) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl;
+
+ if (*result < 0) {
+ lderr(cct) << "failed to invalidate cache: " << cpp_strerror(*result)
+ << dendl;
+ }
+ return this->create_context_finisher(*result);
+}
+
+template <typename I>
+void SnapshotRollbackRequest<I>::apply() {
+ I &image_ctx = this->m_image_ctx;
+
+ std::shared_lock owner_locker{image_ctx.owner_lock};
+ std::unique_lock image_locker{image_ctx.image_lock};
+ if (image_ctx.object_map != nullptr) {
+ std::swap(m_object_map, image_ctx.object_map);
+ }
+}
+
+} // namespace operation
+} // namespace librbd
+
+template class librbd::operation::SnapshotRollbackRequest<librbd::ImageCtx>;
diff --git a/src/librbd/operation/SnapshotRollbackRequest.h b/src/librbd/operation/SnapshotRollbackRequest.h
new file mode 100644
index 000000000..e58a618f2
--- /dev/null
+++ b/src/librbd/operation/SnapshotRollbackRequest.h
@@ -0,0 +1,122 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_OPERATION_SNAPSHOT_ROLLBACK_REQUEST_H
+#define CEPH_LIBRBD_OPERATION_SNAPSHOT_ROLLBACK_REQUEST_H
+
+#include "librbd/operation/Request.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/internal.h"
+#include "librbd/journal/Types.h"
+#include <string>
+
+class Context;
+
+namespace librbd {
+
+class ProgressContext;
+
+namespace operation {
+
+template <typename ImageCtxT = ImageCtx>
+class SnapshotRollbackRequest : public Request<ImageCtxT> {
+public:
+ /**
+ * Snap Rollback goes through the following state machine:
+ *
+ * @verbatim
+ *
+ * <start> ---------\
+ * |
+ * v
+ * STATE_BLOCK_WRITES
+ * |
+ * v
+ * STATE_RESIZE_IMAGE (skip if resize not
+ * | required)
+ * v
+ * STATE_GET_SNAP_OBJECT_MAP (skip if object)
+ * | map disabled)
+ * v
+ * STATE_ROLLBACK_OBJECT_MAP (skip if object
+ * | map disabled)
+ * v
+ * STATE_ROLLBACK_OBJECTS
+ * |
+ * v
+ * STATE_REFRESH_OBJECT_MAP (skip if object
+ * | map disabled)
+ * v
+ * STATE_INVALIDATE_CACHE (skip if cache
+ * | disabled)
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ *
+ * The _RESIZE_IMAGE state is skipped if the image doesn't need to be resized.
+ * The _ROLLBACK_OBJECT_MAP state is skipped if the object map isn't enabled.
+ * The _INVALIDATE_CACHE state is skipped if the cache isn't enabled.
+ */
+
+ SnapshotRollbackRequest(ImageCtxT &image_ctx, Context *on_finish,
+ const cls::rbd::SnapshotNamespace &snap_namespace,
+ const std::string &snap_name,
+ uint64_t snap_id,
+ uint64_t snap_size, ProgressContext &prog_ctx);
+ ~SnapshotRollbackRequest() override;
+
+protected:
+ void send_op() override;
+ bool should_complete(int r) override {
+ return true;
+ }
+
+ journal::Event create_event(uint64_t op_tid) const override {
+ return journal::SnapRollbackEvent(op_tid, m_snap_namespace, m_snap_name);
+ }
+
+private:
+ cls::rbd::SnapshotNamespace m_snap_namespace;
+ std::string m_snap_name;
+ uint64_t m_snap_id;
+ uint64_t m_snap_size;
+ uint64_t m_head_num_objects;
+ ProgressContext &m_prog_ctx;
+
+ NoOpProgressContext m_no_op_prog_ctx;
+
+ bool m_blocking_writes = false;
+ decltype(ImageCtxT::object_map) m_object_map;
+ decltype(ImageCtxT::object_map) m_snap_object_map;
+
+ void send_block_writes();
+ Context *handle_block_writes(int *result);
+
+ void send_resize_image();
+ Context *handle_resize_image(int *result);
+
+ void send_get_snap_object_map();
+ Context *handle_get_snap_object_map(int *result);
+
+ void send_rollback_object_map();
+ Context *handle_rollback_object_map(int *result);
+
+ void send_rollback_objects();
+ Context *handle_rollback_objects(int *result);
+
+ Context *send_refresh_object_map();
+ Context *handle_refresh_object_map(int *result);
+
+ Context *send_invalidate_cache();
+ Context *handle_invalidate_cache(int *result);
+
+ void apply();
+};
+
+} // namespace operation
+} // namespace librbd
+
+extern template class librbd::operation::SnapshotRollbackRequest<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_OPERATION_SNAPSHOT_ROLLBACK_REQUEST_H
diff --git a/src/librbd/operation/SnapshotUnprotectRequest.cc b/src/librbd/operation/SnapshotUnprotectRequest.cc
new file mode 100644
index 000000000..76caf68f3
--- /dev/null
+++ b/src/librbd/operation/SnapshotUnprotectRequest.cc
@@ -0,0 +1,353 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/operation/SnapshotUnprotectRequest.h"
+#include "include/rados/librados.hpp"
+#include "include/stringify.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "librbd/AsyncObjectThrottle.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/internal.h"
+#include "librbd/Types.h"
+#include "librbd/Utils.h"
+#include <list>
+#include <set>
+#include <vector>
+#include <boost/lambda/bind.hpp>
+#include <boost/lambda/construct.hpp>
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::SnapshotUnprotectRequest: "
+
+namespace librbd {
+namespace operation {
+
+namespace {
+
+typedef std::pair<int64_t, std::string> Pool;
+typedef std::vector<Pool> Pools;
+
+template <typename I>
+std::ostream& operator<<(std::ostream& os,
+ const typename SnapshotUnprotectRequest<I>::State& state) {
+ switch(state) {
+ case SnapshotUnprotectRequest<I>::STATE_UNPROTECT_SNAP_START:
+ os << "UNPROTECT_SNAP_START";
+ break;
+ case SnapshotUnprotectRequest<I>::STATE_SCAN_POOL_CHILDREN:
+ os << "SCAN_POOL_CHILDREN";
+ break;
+ case SnapshotUnprotectRequest<I>::STATE_UNPROTECT_SNAP_FINISH:
+ os << "UNPROTECT_SNAP_FINISH";
+ break;
+ case SnapshotUnprotectRequest<I>::STATE_UNPROTECT_SNAP_ROLLBACK:
+ os << "UNPROTECT_SNAP_ROLLBACK";
+ break;
+ default:
+ os << "UNKNOWN (" << static_cast<uint32_t>(state) << ")";
+ break;
+ }
+ return os;
+}
+
+template <typename I>
+class C_ScanPoolChildren : public C_AsyncObjectThrottle<I> {
+public:
+ C_ScanPoolChildren(AsyncObjectThrottle<I> &throttle, I *image_ctx,
+ const cls::rbd::ParentImageSpec &pspec, const Pools &pools,
+ size_t pool_idx)
+ : C_AsyncObjectThrottle<I>(throttle, *image_ctx), m_pspec(pspec),
+ m_pool(pools[pool_idx]) {
+ }
+
+ int send() override {
+ I &image_ctx = this->m_image_ctx;
+ ceph_assert(ceph_mutex_is_locked(image_ctx.owner_lock));
+
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 10) << this << " scanning pool '" << m_pool.second << "'"
+ << dendl;
+
+ librados::Rados rados(image_ctx.md_ctx);
+ int64_t base_tier;
+ int r = rados.pool_get_base_tier(m_pool.first, &base_tier);
+ if (r == -ENOENT) {
+ ldout(cct, 1) << "pool '" << m_pool.second << "' no longer exists"
+ << dendl;
+ return 1;
+ } else if (r < 0) {
+ lderr(cct) << "error retrieving base tier for pool '"
+ << m_pool.second << "'" << dendl;
+ return r;
+ }
+ if (m_pool.first != base_tier) {
+ // pool is a cache; skip it
+ return 1;
+ }
+
+ r = util::create_ioctx(image_ctx.md_ctx, "child image", m_pool.first, {},
+ &m_pool_ioctx);
+ if (r == -ENOENT) {
+ return 1;
+ } else if (r < 0) {
+ return r;
+ }
+
+ librados::ObjectReadOperation op;
+ cls_client::get_children_start(&op, m_pspec);
+
+ librados::AioCompletion *rados_completion =
+ util::create_rados_callback(this);
+ r = m_pool_ioctx.aio_operate(RBD_CHILDREN, rados_completion, &op,
+ &m_children_bl);
+ ceph_assert(r == 0);
+ rados_completion->release();
+ return 0;
+ }
+
+protected:
+ void finish(int r) override {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+
+ if (r == 0) {
+ auto it = m_children_bl.cbegin();
+ r= cls_client::get_children_finish(&it, &m_children);
+ }
+
+ ldout(cct, 10) << this << " retrieved children: r=" << r << dendl;
+ if (r == -ENOENT) {
+ // no children -- proceed with unprotect
+ r = 0;
+ } else if (r < 0) {
+ lderr(cct) << "cannot get children for pool '" << m_pool.second << "'"
+ << dendl;
+ } else {
+ lderr(cct) << "cannot unprotect: at least " << m_children.size() << " "
+ << "child(ren) [" << joinify(m_children.begin(),
+ m_children.end(),
+ std::string(",")) << "] "
+ << "in pool '" << m_pool.second << "'" << dendl;
+ r = -EBUSY;
+ }
+ C_AsyncObjectThrottle<I>::finish(r);
+ }
+
+private:
+ cls::rbd::ParentImageSpec m_pspec;
+ Pool m_pool;
+
+ IoCtx m_pool_ioctx;
+ std::set<std::string> m_children;
+ bufferlist m_children_bl;
+};
+
+} // anonymous namespace
+
+template <typename I>
+SnapshotUnprotectRequest<I>::SnapshotUnprotectRequest(I &image_ctx,
+ Context *on_finish,
+ const cls::rbd::SnapshotNamespace &snap_namespace,
+ const std::string &snap_name)
+ : Request<I>(image_ctx, on_finish), m_snap_namespace(snap_namespace),
+ m_snap_name(snap_name), m_state(STATE_UNPROTECT_SNAP_START),
+ m_ret_val(0), m_snap_id(CEPH_NOSNAP) {
+}
+
+template <typename I>
+void SnapshotUnprotectRequest<I>::send_op() {
+ send_unprotect_snap_start();
+}
+
+template <typename I>
+bool SnapshotUnprotectRequest<I>::should_complete(int r) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << this << " " << __func__ << ": state=" << m_state << ", "
+ << "r=" << r << dendl;
+ if (r < 0) {
+ if (r == -EINVAL) {
+ ldout(cct, 1) << "snapshot is already unprotected" << dendl;
+ } else {
+ lderr(cct) << "encountered error: " << cpp_strerror(r) << dendl;
+ }
+ if (m_ret_val == 0) {
+ m_ret_val = r;
+ }
+ }
+
+ // use a different state machine once an error is encountered
+ if (m_ret_val < 0) {
+ return should_complete_error();
+ }
+
+ std::shared_lock owner_lock{image_ctx.owner_lock};
+ bool finished = false;
+ switch (m_state) {
+ case STATE_UNPROTECT_SNAP_START:
+ send_scan_pool_children();
+ break;
+ case STATE_SCAN_POOL_CHILDREN:
+ send_unprotect_snap_finish();
+ break;
+ case STATE_UNPROTECT_SNAP_FINISH:
+ finished = true;
+ break;
+ default:
+ ceph_abort();
+ break;
+ }
+ return finished;
+}
+
+template <typename I>
+bool SnapshotUnprotectRequest<I>::should_complete_error() {
+ I &image_ctx = this->m_image_ctx;
+ std::shared_lock owner_locker{image_ctx.owner_lock};
+ CephContext *cct = image_ctx.cct;
+ lderr(cct) << this << " " << __func__ << ": "
+ << "ret_val=" << m_ret_val << dendl;
+
+ bool finished = true;
+ if (m_state == STATE_SCAN_POOL_CHILDREN ||
+ m_state == STATE_UNPROTECT_SNAP_FINISH) {
+ send_unprotect_snap_rollback();
+ finished = false;
+ }
+ return finished;
+}
+
+template <typename I>
+void SnapshotUnprotectRequest<I>::send_unprotect_snap_start() {
+ I &image_ctx = this->m_image_ctx;
+ ceph_assert(ceph_mutex_is_locked(image_ctx.owner_lock));
+
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << this << " " << __func__ << dendl;
+
+ int r = verify_and_send_unprotect_snap_start();
+ if (r < 0) {
+ this->async_complete(r);
+ return;
+ }
+}
+
+template <typename I>
+void SnapshotUnprotectRequest<I>::send_scan_pool_children() {
+ I &image_ctx = this->m_image_ctx;
+ ceph_assert(ceph_mutex_is_locked(image_ctx.owner_lock));
+
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << this << " " << __func__ << dendl;
+ m_state = STATE_SCAN_POOL_CHILDREN;
+
+ // search all pools for children depending on this snapshot
+ // TODO add async version of wait_for_latest_osdmap
+ librados::Rados rados(image_ctx.md_ctx);
+ rados.wait_for_latest_osdmap();
+
+ // protect against pools being renamed/deleted
+ std::list<Pool> pool_list;
+ rados.pool_list2(pool_list);
+
+ cls::rbd::ParentImageSpec pspec(image_ctx.md_ctx.get_id(),
+ image_ctx.md_ctx.get_namespace(),
+ image_ctx.id, m_snap_id);
+ Pools pools(pool_list.begin(), pool_list.end());
+
+ Context *ctx = this->create_callback_context();
+ typename AsyncObjectThrottle<I>::ContextFactory context_factory(
+ boost::lambda::bind(boost::lambda::new_ptr<C_ScanPoolChildren<I> >(),
+ boost::lambda::_1, &image_ctx, pspec, pools, boost::lambda::_2));
+ AsyncObjectThrottle<I> *throttle = new AsyncObjectThrottle<I>(
+ nullptr, image_ctx, context_factory, ctx, NULL, 0, pools.size());
+ throttle->start_ops(
+ image_ctx.config.template get_val<uint64_t>("rbd_concurrent_management_ops"));
+}
+
+template <typename I>
+void SnapshotUnprotectRequest<I>::send_unprotect_snap_finish() {
+ I &image_ctx = this->m_image_ctx;
+ ceph_assert(ceph_mutex_is_locked(image_ctx.owner_lock));
+
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << this << " " << __func__ << dendl;
+
+ m_state = STATE_UNPROTECT_SNAP_FINISH;
+
+ librados::ObjectWriteOperation op;
+ cls_client::set_protection_status(&op, m_snap_id,
+ RBD_PROTECTION_STATUS_UNPROTECTED);
+
+ librados::AioCompletion *comp = this->create_callback_completion();
+ int r = image_ctx.md_ctx.aio_operate(image_ctx.header_oid, comp, &op);
+ ceph_assert(r == 0);
+ comp->release();
+}
+
+template <typename I>
+void SnapshotUnprotectRequest<I>::send_unprotect_snap_rollback() {
+ I &image_ctx = this->m_image_ctx;
+ ceph_assert(ceph_mutex_is_locked(image_ctx.owner_lock));
+
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << this << " " << __func__ << dendl;
+
+ m_state = STATE_UNPROTECT_SNAP_ROLLBACK;
+
+ librados::ObjectWriteOperation op;
+ cls_client::set_protection_status(&op, m_snap_id,
+ RBD_PROTECTION_STATUS_PROTECTED);
+
+ librados::AioCompletion *comp = this->create_callback_completion();
+ int r = image_ctx.md_ctx.aio_operate(image_ctx.header_oid, comp, &op);
+ ceph_assert(r == 0);
+ comp->release();
+}
+
+template <typename I>
+int SnapshotUnprotectRequest<I>::verify_and_send_unprotect_snap_start() {
+ I &image_ctx = this->m_image_ctx;
+ std::shared_lock image_locker{image_ctx.image_lock};
+
+ CephContext *cct = image_ctx.cct;
+ if ((image_ctx.features & RBD_FEATURE_LAYERING) == 0) {
+ lderr(cct) << "image must support layering" << dendl;
+ return -ENOSYS;
+ }
+
+ m_snap_id = image_ctx.get_snap_id(m_snap_namespace, m_snap_name);
+ if (m_snap_id == CEPH_NOSNAP) {
+ return -ENOENT;
+ }
+
+ bool is_unprotected;
+ int r = image_ctx.is_snap_unprotected(m_snap_id, &is_unprotected);
+ if (r < 0) {
+ return r;
+ }
+
+ if (is_unprotected) {
+ lderr(cct) << "snapshot is already unprotected" << dendl;
+ return -EINVAL;
+ }
+
+ librados::ObjectWriteOperation op;
+ cls_client::set_protection_status(&op, m_snap_id,
+ RBD_PROTECTION_STATUS_UNPROTECTING);
+
+ librados::AioCompletion *comp = this->create_callback_completion();
+ r = image_ctx.md_ctx.aio_operate(image_ctx.header_oid, comp, &op);
+ ceph_assert(r == 0);
+ comp->release();
+
+ // TODO legacy code threw a notification post UNPROTECTING update -- required?
+ return 0;
+}
+
+} // namespace operation
+} // namespace librbd
+
+template class librbd::operation::SnapshotUnprotectRequest<librbd::ImageCtx>;
diff --git a/src/librbd/operation/SnapshotUnprotectRequest.h b/src/librbd/operation/SnapshotUnprotectRequest.h
new file mode 100644
index 000000000..19cc6d32b
--- /dev/null
+++ b/src/librbd/operation/SnapshotUnprotectRequest.h
@@ -0,0 +1,94 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_OPERATION_SNAPSHOT_UNPROTECT_REQUEST_H
+#define CEPH_LIBRBD_OPERATION_SNAPSHOT_UNPROTECT_REQUEST_H
+
+#include "librbd/operation/Request.h"
+#include <string>
+
+class Context;
+
+namespace librbd {
+
+class ImageCtx;
+
+namespace operation {
+
+template <typename ImageCtxT = ImageCtx>
+class SnapshotUnprotectRequest : public Request<ImageCtxT> {
+public:
+ /**
+ * Snap Unprotect goes through the following state machine:
+ *
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * STATE_UNPROTECT_SNAP_START
+ * |
+ * v
+ * STATE_SCAN_POOL_CHILDREN * * * * > STATE_UNPROTECT_SNAP_ROLLBACK
+ * | |
+ * v |
+ * STATE_UNPROTECT_SNAP_FINISH |
+ * | |
+ * v |
+ * <finish> <----------------------------/
+ *
+ * @endverbatim
+ *
+ * If the unprotect operation needs to abort, the error path is followed
+ * to rollback the unprotect in-progress status on the image.
+ */
+ enum State {
+ STATE_UNPROTECT_SNAP_START,
+ STATE_SCAN_POOL_CHILDREN,
+ STATE_UNPROTECT_SNAP_FINISH,
+ STATE_UNPROTECT_SNAP_ROLLBACK
+ };
+
+ SnapshotUnprotectRequest(ImageCtxT &image_ctx, Context *on_finish,
+ const cls::rbd::SnapshotNamespace &snap_namespace,
+ const std::string &snap_name);
+
+protected:
+ void send_op() override;
+ bool should_complete(int r) override;
+
+ int filter_return_code(int r) const override {
+ if (m_ret_val < 0) {
+ return m_ret_val;
+ }
+ return 0;
+ }
+
+ journal::Event create_event(uint64_t op_tid) const override {
+ return journal::SnapUnprotectEvent(op_tid, m_snap_namespace, m_snap_name);
+ }
+
+private:
+ cls::rbd::SnapshotNamespace m_snap_namespace;
+ std::string m_snap_name;
+ State m_state;
+
+ int m_ret_val;
+ uint64_t m_snap_id;
+
+ bool should_complete_error();
+
+ void send_unprotect_snap_start();
+ void send_scan_pool_children();
+ void send_unprotect_snap_finish();
+ void send_unprotect_snap_rollback();
+
+ int verify_and_send_unprotect_snap_start();
+};
+
+} // namespace operation
+} // namespace librbd
+
+extern template class librbd::operation::SnapshotUnprotectRequest<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_OPERATION_SNAPSHOT_UNPROTECT_REQUEST_H
diff --git a/src/librbd/operation/SparsifyRequest.cc b/src/librbd/operation/SparsifyRequest.cc
new file mode 100644
index 000000000..5d9837c3e
--- /dev/null
+++ b/src/librbd/operation/SparsifyRequest.cc
@@ -0,0 +1,514 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/operation/SparsifyRequest.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "include/err.h"
+#include "librbd/AsyncObjectThrottle.h"
+#include "librbd/ExclusiveLock.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/Types.h"
+#include "librbd/io/ObjectRequest.h"
+#include "osdc/Striper.h"
+#include <boost/lambda/bind.hpp>
+#include <boost/lambda/construct.hpp>
+
+#define dout_subsys ceph_subsys_rbd
+
+namespace librbd {
+namespace operation {
+
+namespace {
+
+bool may_be_trimmed(const std::map<uint64_t,uint64_t> &extent_map,
+ const bufferlist &bl, size_t sparse_size,
+ uint64_t *new_end_ptr) {
+ if (extent_map.empty()) {
+ *new_end_ptr = 0;
+ return true;
+ }
+
+ uint64_t end = extent_map.rbegin()->first + extent_map.rbegin()->second;
+ uint64_t new_end = end;
+ uint64_t bl_off = bl.length();
+
+ for (auto it = extent_map.rbegin(); it != extent_map.rend(); it++) {
+ auto off = it->first;
+ auto len = it->second;
+
+ new_end = p2roundup<uint64_t>(off + len, sparse_size);
+
+ uint64_t extent_left = len;
+ uint64_t sub_len = len % sparse_size;
+ if (sub_len == 0) {
+ sub_len = sparse_size;
+ }
+ while (extent_left > 0) {
+ ceph_assert(bl_off >= sub_len);
+ bl_off -= sub_len;
+ bufferlist sub_bl;
+ sub_bl.substr_of(bl, bl_off, sub_len);
+ if (!sub_bl.is_zero()) {
+ break;
+ }
+ new_end -= sparse_size;
+ extent_left -= sub_len;
+ sub_len = sparse_size;
+ }
+ if (extent_left > 0) {
+ break;
+ }
+ }
+
+ if (new_end < end) {
+ *new_end_ptr = new_end;
+ return true;
+ }
+
+ return false;
+}
+
+} // anonymous namespace
+
+using util::create_context_callback;
+using util::create_rados_callback;
+
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::operation::SparsifyObject: " << this \
+ << " " << m_oid << " " << __func__ << ": "
+
+template <typename I>
+class C_SparsifyObject : public C_AsyncObjectThrottle<I> {
+public:
+
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v (not supported)
+ * SPARSIFY * * * * * * * * * * * * > READ < * * * * * * * * * * (concurrent
+ * | | * update is
+ * | (object map disabled) | (can trim) * detected)
+ * |------------------------\ V *
+ * | | PRE UPDATE OBJECT MAP *
+ * | (object map enabled) | | (if needed) *
+ * v | V *
+ * PRE UPDATE OBJECT MAP | TRIM * * * * * * * * * * *
+ * | | |
+ * v | V
+ * CHECK EXISTS | POST UPDATE OBJECT MAP
+ * | | | (if needed)
+ * v | |
+ * POST UPDATE OBJECT MAP | |
+ * | | |
+ * v | |
+ * <finish> <------------------/<-------/
+ *
+ * @endverbatim
+ *
+ */
+
+ C_SparsifyObject(AsyncObjectThrottle<I> &throttle, I *image_ctx,
+ uint64_t object_no, size_t sparse_size)
+ : C_AsyncObjectThrottle<I>(throttle, *image_ctx), m_cct(image_ctx->cct),
+ m_object_no(object_no), m_sparse_size(sparse_size),
+ m_oid(image_ctx->get_object_name(object_no)) {
+ }
+
+ int send() override {
+ I &image_ctx = this->m_image_ctx;
+ ceph_assert(ceph_mutex_is_locked(image_ctx.owner_lock));
+
+ ldout(m_cct, 20) << dendl;
+
+ if (!image_ctx.data_ctx.is_valid()) {
+ lderr(m_cct) << "missing data pool" << dendl;
+ return -ENODEV;
+ }
+
+ if (image_ctx.exclusive_lock != nullptr &&
+ !image_ctx.exclusive_lock->is_lock_owner()) {
+ ldout(m_cct, 1) << "lost exclusive lock during sparsify" << dendl;
+ return -ERESTART;
+ }
+
+ {
+ std::shared_lock image_locker{image_ctx.image_lock};
+ if (image_ctx.object_map != nullptr &&
+ !image_ctx.object_map->object_may_exist(m_object_no)) {
+ // can skip because the object does not exist
+ return 1;
+ }
+
+ uint64_t overlap_objects = 0;
+ uint64_t overlap;
+ int r = image_ctx.get_parent_overlap(CEPH_NOSNAP, &overlap);
+ if (r == 0 && overlap > 0) {
+ overlap_objects = Striper::get_num_objects(image_ctx.layout, overlap);
+ }
+ m_remove_empty = (m_object_no >= overlap_objects);
+ }
+
+ send_sparsify();
+ return 0;
+ }
+
+ void send_sparsify() {
+ I &image_ctx = this->m_image_ctx;
+ ldout(m_cct, 20) << dendl;
+
+ librados::ObjectWriteOperation op;
+ cls_client::sparsify(&op, m_sparse_size, m_remove_empty);
+ auto comp = create_rados_callback<
+ C_SparsifyObject, &C_SparsifyObject::handle_sparsify>(this);
+ int r = image_ctx.data_ctx.aio_operate(m_oid, comp, &op);
+ ceph_assert(r == 0);
+ comp->release();
+ }
+
+ void handle_sparsify(int r) {
+ ldout(m_cct, 20) << "r=" << r << dendl;
+
+ if (r == -EOPNOTSUPP) {
+ m_trying_trim = true;
+ send_read();
+ return;
+ }
+
+ if (r == -ENOENT) {
+ finish_op(0);
+ return;
+ }
+
+ if (r < 0) {
+ lderr(m_cct) << "failed to sparsify: " << cpp_strerror(r) << dendl;
+ finish_op(r);
+ return;
+ }
+
+ send_pre_update_object_map();
+ }
+
+ void send_pre_update_object_map() {
+ I &image_ctx = this->m_image_ctx;
+
+ if (m_trying_trim) {
+ if (!m_remove_empty || m_new_end != 0 ||
+ !image_ctx.test_features(RBD_FEATURE_OBJECT_MAP)) {
+ send_trim();
+ return;
+ }
+ } else if (!m_remove_empty ||
+ !image_ctx.test_features(RBD_FEATURE_OBJECT_MAP)) {
+ finish_op(0);
+ return;
+ }
+
+ ldout(m_cct, 20) << dendl;
+
+ image_ctx.owner_lock.lock_shared();
+ image_ctx.image_lock.lock_shared();
+ if (image_ctx.object_map == nullptr) {
+ // possible that exclusive lock was lost in background
+ lderr(m_cct) << "object map is not initialized" << dendl;
+
+ image_ctx.image_lock.unlock_shared();
+ image_ctx.owner_lock.unlock_shared();
+ finish_op(-EINVAL);
+ return;
+ }
+
+ int r;
+ m_finish_op_ctx = image_ctx.exclusive_lock->start_op(&r);
+ if (m_finish_op_ctx == nullptr) {
+ lderr(m_cct) << "lost exclusive lock" << dendl;
+ image_ctx.image_lock.unlock_shared();
+ image_ctx.owner_lock.unlock_shared();
+ finish_op(r);
+ return;
+ }
+
+ auto ctx = create_context_callback<
+ C_SparsifyObject<I>,
+ &C_SparsifyObject<I>::handle_pre_update_object_map>(this);
+
+ bool sent = image_ctx.object_map->template aio_update<
+ Context, &Context::complete>(CEPH_NOSNAP, m_object_no, OBJECT_PENDING,
+ OBJECT_EXISTS, {}, false, ctx);
+
+ // NOTE: state machine might complete before we reach here
+ image_ctx.image_lock.unlock_shared();
+ image_ctx.owner_lock.unlock_shared();
+ if (!sent) {
+ finish_op(0);
+ }
+ }
+
+ void handle_pre_update_object_map(int r) {
+ ldout(m_cct, 20) << "r=" << r << dendl;
+
+ if (r < 0) {
+ lderr(m_cct) << "failed to update object map: " << cpp_strerror(r)
+ << dendl;
+ finish_op(r);
+ return;
+ }
+
+ if (m_trying_trim) {
+ send_trim();
+ } else {
+ send_check_exists();
+ }
+ }
+
+ void send_check_exists() {
+ I &image_ctx = this->m_image_ctx;
+
+ ldout(m_cct, 20) << dendl;
+
+ librados::ObjectReadOperation op;
+ op.stat(NULL, NULL, NULL);
+ m_bl.clear();
+ auto comp = create_rados_callback<
+ C_SparsifyObject, &C_SparsifyObject::handle_check_exists>(this);
+ int r = image_ctx.data_ctx.aio_operate(m_oid, comp, &op, &m_bl);
+ ceph_assert(r == 0);
+ comp->release();
+ }
+
+ void handle_check_exists(int r) {
+ ldout(m_cct, 20) << "r=" << r << dendl;
+
+ if (r < 0 && r != -ENOENT) {
+ lderr(m_cct) << "stat failed: " << cpp_strerror(r) << dendl;
+ finish_op(r);
+ return;
+ }
+
+ send_post_update_object_map(r == 0);
+ }
+
+ void send_post_update_object_map(bool exists) {
+ I &image_ctx = this->m_image_ctx;
+
+ ldout(m_cct, 20) << dendl;
+
+ auto ctx = create_context_callback<
+ C_SparsifyObject<I>,
+ &C_SparsifyObject<I>::handle_post_update_object_map>(this);
+ bool sent;
+ {
+ std::shared_lock owner_locker{image_ctx.owner_lock};
+ std::shared_lock image_locker{image_ctx.image_lock};
+
+ assert(image_ctx.exclusive_lock->is_lock_owner());
+ assert(image_ctx.object_map != nullptr);
+
+ sent = image_ctx.object_map->template aio_update<
+ Context, &Context::complete>(CEPH_NOSNAP, m_object_no,
+ exists ? OBJECT_EXISTS : OBJECT_NONEXISTENT,
+ OBJECT_PENDING, {}, false, ctx);
+ }
+ if (!sent) {
+ ctx->complete(0);
+ }
+ }
+
+ void handle_post_update_object_map(int r) {
+ ldout(m_cct, 20) << "r=" << r << dendl;
+
+ if (r < 0) {
+ lderr(m_cct) << "failed to update object map: " << cpp_strerror(r)
+ << dendl;
+ finish_op(r);
+ return;
+ }
+
+ finish_op(0);
+ }
+
+ void send_read() {
+ I &image_ctx = this->m_image_ctx;
+
+ ldout(m_cct, 20) << dendl;
+
+ librados::ObjectReadOperation op;
+ m_bl.clear();
+ op.sparse_read(0, image_ctx.layout.object_size, &m_extent_map, &m_bl,
+ nullptr);
+ auto comp = create_rados_callback<
+ C_SparsifyObject, &C_SparsifyObject::handle_read>(this);
+ int r = image_ctx.data_ctx.aio_operate(m_oid, comp, &op, &m_bl);
+ ceph_assert(r == 0);
+ comp->release();
+ }
+
+ void handle_read(int r) {
+ ldout(m_cct, 20) << "r=" << r << dendl;
+
+ if (r < 0) {
+ if (r == -ENOENT) {
+ r = 0;
+ } else {
+ lderr(m_cct) << "failed to read object: " << cpp_strerror(r) << dendl;
+ }
+ finish_op(r);
+ return;
+ }
+
+ if (!may_be_trimmed(m_extent_map, m_bl, m_sparse_size, &m_new_end)) {
+ finish_op(0);
+ return;
+ }
+
+ send_pre_update_object_map();
+ }
+
+ void send_trim() {
+ I &image_ctx = this->m_image_ctx;
+
+ ldout(m_cct, 20) << dendl;
+
+ ceph_assert(m_new_end < image_ctx.layout.object_size);
+
+ librados::ObjectWriteOperation op;
+ m_bl.clear();
+ m_bl.append_zero(image_ctx.layout.object_size - m_new_end);
+ op.cmpext(m_new_end, m_bl, nullptr);
+ if (m_new_end == 0 && m_remove_empty) {
+ op.remove();
+ } else {
+ op.truncate(m_new_end);
+ }
+
+ auto comp = create_rados_callback<
+ C_SparsifyObject, &C_SparsifyObject::handle_trim>(this);
+ int r = image_ctx.data_ctx.aio_operate(m_oid, comp, &op);
+ ceph_assert(r == 0);
+ comp->release();
+ }
+
+ void handle_trim(int r) {
+ I &image_ctx = this->m_image_ctx;
+
+ ldout(m_cct, 20) << "r=" << r << dendl;
+
+ if (r <= -MAX_ERRNO) {
+ m_finish_op_ctx->complete(0);
+ m_finish_op_ctx = nullptr;
+ send_read();
+ return;
+ }
+
+ if (r < 0 && r != -ENOENT) {
+ lderr(m_cct) << "failed to trim: " << cpp_strerror(r) << dendl;
+ finish_op(r);
+ return;
+ }
+
+ if (!m_remove_empty || m_new_end != 0 ||
+ !image_ctx.test_features(RBD_FEATURE_OBJECT_MAP)) {
+ finish_op(0);
+ return;
+ }
+
+ send_post_update_object_map(false);
+ }
+
+ void finish_op(int r) {
+ ldout(m_cct, 20) << "r=" << r << dendl;
+
+ if (m_finish_op_ctx != nullptr) {
+ m_finish_op_ctx->complete(0);
+ }
+ this->complete(r);
+ }
+
+private:
+ CephContext *m_cct;
+ uint64_t m_object_no;
+ size_t m_sparse_size;
+ std::string m_oid;
+
+ bool m_remove_empty = false;
+ bool m_trying_trim = false;
+ bufferlist m_bl;
+ std::map<uint64_t,uint64_t> m_extent_map;
+ uint64_t m_new_end = 0;
+ Context *m_finish_op_ctx = nullptr;
+};
+
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::operation::SparsifyRequest: " << this \
+ << " " << __func__ << ": "
+
+template <typename I>
+bool SparsifyRequest<I>::should_complete(int r) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << "r=" << r << dendl;
+ if (r < 0) {
+ lderr(cct) << "encountered error: " << cpp_strerror(r) << dendl;
+ }
+ return true;
+}
+
+template <typename I>
+void SparsifyRequest<I>::send_op() {
+ sparsify_objects();
+}
+
+template <typename I>
+void SparsifyRequest<I>::sparsify_objects() {
+ I &image_ctx = this->m_image_ctx;
+ ceph_assert(ceph_mutex_is_locked(image_ctx.owner_lock));
+
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << dendl;
+
+ assert(ceph_mutex_is_locked(image_ctx.owner_lock));
+
+ uint64_t objects = 0;
+ {
+ std::shared_lock image_locker{image_ctx.image_lock};
+ objects = image_ctx.get_object_count(CEPH_NOSNAP);
+ }
+
+ auto ctx = create_context_callback<
+ SparsifyRequest<I>,
+ &SparsifyRequest<I>::handle_sparsify_objects>(this);
+ typename AsyncObjectThrottle<I>::ContextFactory context_factory(
+ boost::lambda::bind(boost::lambda::new_ptr<C_SparsifyObject<I> >(),
+ boost::lambda::_1, &image_ctx, boost::lambda::_2, m_sparse_size));
+ AsyncObjectThrottle<I> *throttle = new AsyncObjectThrottle<I>(
+ this, image_ctx, context_factory, ctx, &m_prog_ctx, 0, objects);
+ throttle->start_ops(
+ image_ctx.config.template get_val<uint64_t>("rbd_concurrent_management_ops"));
+}
+
+template <typename I>
+void SparsifyRequest<I>::handle_sparsify_objects(int r) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << "r=" << r << dendl;
+
+ if (r == -ERESTART) {
+ ldout(cct, 5) << "sparsify operation interrupted" << dendl;
+ this->complete(r);
+ return;
+ } else if (r < 0) {
+ lderr(cct) << "sparsify encountered an error: " << cpp_strerror(r) << dendl;
+ this->complete(r);
+ return;
+ }
+
+ this->complete(0);
+}
+
+} // namespace operation
+} // namespace librbd
+
+template class librbd::operation::SparsifyRequest<librbd::ImageCtx>;
diff --git a/src/librbd/operation/SparsifyRequest.h b/src/librbd/operation/SparsifyRequest.h
new file mode 100644
index 000000000..74f9eb727
--- /dev/null
+++ b/src/librbd/operation/SparsifyRequest.h
@@ -0,0 +1,64 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+#ifndef CEPH_LIBRBD_OPERATION_SPARSIFY_REQUEST_H
+#define CEPH_LIBRBD_OPERATION_SPARSIFY_REQUEST_H
+
+#include "librbd/operation/Request.h"
+#include "common/snap_types.h"
+
+namespace librbd {
+
+class ImageCtx;
+class ProgressContext;
+
+namespace operation {
+
+template <typename ImageCtxT = ImageCtx>
+class SparsifyRequest : public Request<ImageCtxT>
+{
+public:
+ SparsifyRequest(ImageCtxT &image_ctx, size_t sparse_size, Context *on_finish,
+ ProgressContext &prog_ctx)
+ : Request<ImageCtxT>(image_ctx, on_finish), m_sparse_size(sparse_size),
+ m_prog_ctx(prog_ctx) {
+ }
+
+protected:
+ void send_op() override;
+ bool should_complete(int r) override;
+ bool can_affect_io() const override {
+ return true;
+ }
+ journal::Event create_event(uint64_t op_tid) const override {
+ ceph_abort();
+ return journal::UnknownEvent();
+ }
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * SPARSIFY OBJECTS
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+
+ size_t m_sparse_size;
+ ProgressContext &m_prog_ctx;
+
+ void sparsify_objects();
+ void handle_sparsify_objects(int r);
+};
+
+} // namespace operation
+} // namespace librbd
+
+extern template class librbd::operation::SparsifyRequest<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_OPERATION_SPARSIFY_REQUEST_H
diff --git a/src/librbd/operation/TrimRequest.cc b/src/librbd/operation/TrimRequest.cc
new file mode 100644
index 000000000..b8ecf10ac
--- /dev/null
+++ b/src/librbd/operation/TrimRequest.cc
@@ -0,0 +1,373 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/operation/TrimRequest.h"
+#include "librbd/AsyncObjectThrottle.h"
+#include "librbd/ExclusiveLock.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/internal.h"
+#include "librbd/ObjectMap.h"
+#include "librbd/Utils.h"
+#include "librbd/io/ObjectDispatchSpec.h"
+#include "librbd/io/ObjectDispatcherInterface.h"
+#include "common/ContextCompletion.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "osdc/Striper.h"
+
+#include <boost/lambda/bind.hpp>
+#include <boost/lambda/construct.hpp>
+#include <boost/scope_exit.hpp>
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::TrimRequest: "
+
+namespace librbd {
+namespace operation {
+
+template <typename I>
+class C_CopyupObject : public C_AsyncObjectThrottle<I> {
+public:
+ C_CopyupObject(AsyncObjectThrottle<I> &throttle, I *image_ctx,
+ IOContext io_context, uint64_t object_no)
+ : C_AsyncObjectThrottle<I>(throttle, *image_ctx), m_io_context(io_context),
+ m_object_no(object_no)
+ {
+ }
+
+ int send() override {
+ I &image_ctx = this->m_image_ctx;
+ ceph_assert(ceph_mutex_is_locked(image_ctx.owner_lock));
+ ceph_assert(image_ctx.exclusive_lock == nullptr ||
+ image_ctx.exclusive_lock->is_lock_owner());
+
+ string oid = image_ctx.get_object_name(m_object_no);
+ ldout(image_ctx.cct, 10) << "removing (with copyup) " << oid << dendl;
+
+ auto object_dispatch_spec = io::ObjectDispatchSpec::create_discard(
+ &image_ctx, io::OBJECT_DISPATCH_LAYER_NONE, m_object_no, 0,
+ image_ctx.layout.object_size, m_io_context,
+ io::OBJECT_DISCARD_FLAG_DISABLE_OBJECT_MAP_UPDATE, 0, {}, this);
+ object_dispatch_spec->send();
+ return 0;
+ }
+private:
+ IOContext m_io_context;
+ uint64_t m_object_no;
+};
+
+template <typename I>
+class C_RemoveObject : public C_AsyncObjectThrottle<I> {
+public:
+ C_RemoveObject(AsyncObjectThrottle<I> &throttle, I *image_ctx,
+ uint64_t object_no)
+ : C_AsyncObjectThrottle<I>(throttle, *image_ctx), m_object_no(object_no)
+ {
+ }
+
+ int send() override {
+ I &image_ctx = this->m_image_ctx;
+ ceph_assert(ceph_mutex_is_locked(image_ctx.owner_lock));
+ ceph_assert(image_ctx.exclusive_lock == nullptr ||
+ image_ctx.exclusive_lock->is_lock_owner());
+
+ {
+ std::shared_lock image_locker{image_ctx.image_lock};
+ if (image_ctx.object_map != nullptr &&
+ !image_ctx.object_map->object_may_exist(m_object_no)) {
+ return 1;
+ }
+ }
+
+ string oid = image_ctx.get_object_name(m_object_no);
+ ldout(image_ctx.cct, 10) << "removing " << oid << dendl;
+
+ librados::AioCompletion *rados_completion =
+ util::create_rados_callback(this);
+ int r = image_ctx.data_ctx.aio_remove(oid, rados_completion);
+ ceph_assert(r == 0);
+ rados_completion->release();
+ return 0;
+ }
+
+private:
+ uint64_t m_object_no;
+};
+
+template <typename I>
+TrimRequest<I>::TrimRequest(I &image_ctx, Context *on_finish,
+ uint64_t original_size, uint64_t new_size,
+ ProgressContext &prog_ctx)
+ : AsyncRequest<I>(image_ctx, on_finish), m_new_size(new_size),
+ m_prog_ctx(prog_ctx)
+{
+ uint64_t period = image_ctx.get_stripe_period();
+ uint64_t new_num_periods = ((m_new_size + period - 1) / period);
+ m_delete_off = std::min(new_num_periods * period, original_size);
+ // first object we can delete free and clear
+ m_delete_start = new_num_periods * image_ctx.get_stripe_count();
+ m_delete_start_min = m_delete_start;
+ m_num_objects = Striper::get_num_objects(image_ctx.layout, original_size);
+
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 10) << this << " trim image " << original_size << " -> "
+ << m_new_size << " periods " << new_num_periods
+ << " discard to offset " << m_delete_off
+ << " delete objects " << m_delete_start
+ << " to " << m_num_objects << dendl;
+}
+
+template <typename I>
+bool TrimRequest<I>::should_complete(int r)
+{
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << this << " should_complete: r=" << r << dendl;
+ if (r == -ERESTART) {
+ ldout(cct, 5) << "trim operation interrupted" << dendl;
+ return true;
+ } else if (r < 0) {
+ lderr(cct) << "trim encountered an error: " << cpp_strerror(r) << dendl;
+ return true;
+ }
+
+ std::shared_lock owner_lock{image_ctx.owner_lock};
+ switch (m_state) {
+ case STATE_PRE_TRIM:
+ ldout(cct, 5) << " PRE_TRIM" << dendl;
+ send_copyup_objects();
+ break;
+
+ case STATE_COPYUP_OBJECTS:
+ ldout(cct, 5) << " COPYUP_OBJECTS" << dendl;
+ send_remove_objects();
+ break;
+
+ case STATE_REMOVE_OBJECTS:
+ ldout(cct, 5) << " REMOVE_OBJECTS" << dendl;
+ send_post_trim();
+ break;
+
+ case STATE_POST_TRIM:
+ ldout(cct, 5) << " POST_TRIM" << dendl;
+ send_clean_boundary();
+ break;
+
+ case STATE_CLEAN_BOUNDARY:
+ ldout(cct, 5) << "CLEAN_BOUNDARY" << dendl;
+ send_finish(0);
+ break;
+
+ case STATE_FINISHED:
+ ldout(cct, 5) << "FINISHED" << dendl;
+ return true;
+
+ default:
+ lderr(cct) << "invalid state: " << m_state << dendl;
+ ceph_abort();
+ break;
+ }
+ return false;
+}
+
+template <typename I>
+void TrimRequest<I>::send() {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+
+ if (!image_ctx.data_ctx.is_valid()) {
+ lderr(cct) << "missing data pool" << dendl;
+ send_finish(-ENODEV);
+ return;
+ }
+
+ send_pre_trim();
+}
+
+template<typename I>
+void TrimRequest<I>::send_pre_trim() {
+ I &image_ctx = this->m_image_ctx;
+ ceph_assert(ceph_mutex_is_locked(image_ctx.owner_lock));
+
+ if (m_delete_start >= m_num_objects) {
+ send_clean_boundary();
+ return;
+ }
+
+ {
+ std::shared_lock image_locker{image_ctx.image_lock};
+ if (image_ctx.object_map != nullptr) {
+ ldout(image_ctx.cct, 5) << this << " send_pre_trim: "
+ << " delete_start_min=" << m_delete_start_min
+ << " num_objects=" << m_num_objects << dendl;
+ m_state = STATE_PRE_TRIM;
+
+ ceph_assert(image_ctx.exclusive_lock->is_lock_owner());
+
+ if (image_ctx.object_map->template aio_update<AsyncRequest<I> >(
+ CEPH_NOSNAP, m_delete_start_min, m_num_objects, OBJECT_PENDING,
+ OBJECT_EXISTS, {}, false, this)) {
+ return;
+ }
+ }
+ }
+
+ send_copyup_objects();
+}
+
+template<typename I>
+void TrimRequest<I>::send_copyup_objects() {
+ I &image_ctx = this->m_image_ctx;
+ ceph_assert(ceph_mutex_is_locked(image_ctx.owner_lock));
+
+ IOContext io_context;
+ bool has_snapshots;
+ uint64_t parent_overlap;
+ {
+ std::shared_lock image_locker{image_ctx.image_lock};
+
+ io_context = image_ctx.get_data_io_context();
+ has_snapshots = !image_ctx.snaps.empty();
+ int r = image_ctx.get_parent_overlap(CEPH_NOSNAP, &parent_overlap);
+ ceph_assert(r == 0);
+ }
+
+ // copyup is only required for portion of image that overlaps parent
+ uint64_t copyup_end = Striper::get_num_objects(image_ctx.layout,
+ parent_overlap);
+
+ // TODO: protect against concurrent shrink and snap create?
+ // skip to remove if no copyup is required.
+ if (copyup_end <= m_delete_start || !has_snapshots) {
+ send_remove_objects();
+ return;
+ }
+
+ uint64_t copyup_start = m_delete_start;
+ m_delete_start = copyup_end;
+
+ ldout(image_ctx.cct, 5) << this << " send_copyup_objects: "
+ << " start object=" << copyup_start << ", "
+ << " end object=" << copyup_end << dendl;
+ m_state = STATE_COPYUP_OBJECTS;
+
+ Context *ctx = this->create_callback_context();
+ typename AsyncObjectThrottle<I>::ContextFactory context_factory(
+ boost::lambda::bind(boost::lambda::new_ptr<C_CopyupObject<I> >(),
+ boost::lambda::_1, &image_ctx, io_context, boost::lambda::_2));
+ AsyncObjectThrottle<I> *throttle = new AsyncObjectThrottle<I>(
+ this, image_ctx, context_factory, ctx, &m_prog_ctx, copyup_start,
+ copyup_end);
+ throttle->start_ops(
+ image_ctx.config.template get_val<uint64_t>("rbd_concurrent_management_ops"));
+}
+
+template <typename I>
+void TrimRequest<I>::send_remove_objects() {
+ I &image_ctx = this->m_image_ctx;
+ ceph_assert(ceph_mutex_is_locked(image_ctx.owner_lock));
+
+ ldout(image_ctx.cct, 5) << this << " send_remove_objects: "
+ << " delete_start=" << m_delete_start
+ << " num_objects=" << m_num_objects << dendl;
+ m_state = STATE_REMOVE_OBJECTS;
+
+ Context *ctx = this->create_callback_context();
+ typename AsyncObjectThrottle<I>::ContextFactory context_factory(
+ boost::lambda::bind(boost::lambda::new_ptr<C_RemoveObject<I> >(),
+ boost::lambda::_1, &image_ctx, boost::lambda::_2));
+ AsyncObjectThrottle<I> *throttle = new AsyncObjectThrottle<I>(
+ this, image_ctx, context_factory, ctx, &m_prog_ctx, m_delete_start,
+ m_num_objects);
+ throttle->start_ops(
+ image_ctx.config.template get_val<uint64_t>("rbd_concurrent_management_ops"));
+}
+
+template<typename I>
+void TrimRequest<I>::send_post_trim() {
+ I &image_ctx = this->m_image_ctx;
+ ceph_assert(ceph_mutex_is_locked(image_ctx.owner_lock));
+
+ {
+ std::shared_lock image_locker{image_ctx.image_lock};
+ if (image_ctx.object_map != nullptr) {
+ ldout(image_ctx.cct, 5) << this << " send_post_trim:"
+ << " delete_start_min=" << m_delete_start_min
+ << " num_objects=" << m_num_objects << dendl;
+ m_state = STATE_POST_TRIM;
+
+ ceph_assert(image_ctx.exclusive_lock->is_lock_owner());
+
+ if (image_ctx.object_map->template aio_update<AsyncRequest<I> >(
+ CEPH_NOSNAP, m_delete_start_min, m_num_objects, OBJECT_NONEXISTENT,
+ OBJECT_PENDING, {}, false, this)) {
+ return;
+ }
+ }
+ }
+
+ send_clean_boundary();
+}
+
+template <typename I>
+void TrimRequest<I>::send_clean_boundary() {
+ I &image_ctx = this->m_image_ctx;
+ ceph_assert(ceph_mutex_is_locked(image_ctx.owner_lock));
+ CephContext *cct = image_ctx.cct;
+ if (m_delete_off <= m_new_size) {
+ send_finish(0);
+ return;
+ }
+
+ // should have been canceled prior to releasing lock
+ ceph_assert(image_ctx.exclusive_lock == nullptr ||
+ image_ctx.exclusive_lock->is_lock_owner());
+ uint64_t delete_len = m_delete_off - m_new_size;
+ ldout(image_ctx.cct, 5) << this << " send_clean_boundary: "
+ << " delete_off=" << m_delete_off
+ << " length=" << delete_len << dendl;
+ m_state = STATE_CLEAN_BOUNDARY;
+
+ IOContext io_context;
+ {
+ std::shared_lock image_locker{image_ctx.image_lock};
+ io_context = image_ctx.get_data_io_context();
+ }
+
+ // discard the weird boundary
+ std::vector<ObjectExtent> extents;
+ Striper::file_to_extents(cct, image_ctx.format_string,
+ &image_ctx.layout, m_new_size, delete_len, 0,
+ extents);
+
+ ContextCompletion *completion =
+ new ContextCompletion(this->create_async_callback_context(), true);
+ for (vector<ObjectExtent>::iterator p = extents.begin();
+ p != extents.end(); ++p) {
+ ldout(cct, 20) << " ex " << *p << dendl;
+ Context *req_comp = new C_ContextCompletion(*completion);
+
+ if (p->offset == 0) {
+ // treat as a full object delete on the boundary
+ p->length = image_ctx.layout.object_size;
+ }
+
+ auto object_dispatch_spec = io::ObjectDispatchSpec::create_discard(
+ &image_ctx, io::OBJECT_DISPATCH_LAYER_NONE, p->objectno, p->offset,
+ p->length, io_context, 0, 0, {}, req_comp);
+ object_dispatch_spec->send();
+ }
+ completion->finish_adding_requests();
+}
+
+template <typename I>
+void TrimRequest<I>::send_finish(int r) {
+ m_state = STATE_FINISHED;
+ this->async_complete(r);
+}
+
+} // namespace operation
+} // namespace librbd
+
+template class librbd::operation::TrimRequest<librbd::ImageCtx>;
diff --git a/src/librbd/operation/TrimRequest.h b/src/librbd/operation/TrimRequest.h
new file mode 100644
index 000000000..8526046c9
--- /dev/null
+++ b/src/librbd/operation/TrimRequest.h
@@ -0,0 +1,107 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+#ifndef CEPH_LIBRBD_OPERATION_TRIM_REQUEST_H
+#define CEPH_LIBRBD_OPERATION_TRIM_REQUEST_H
+
+#include "librbd/AsyncRequest.h"
+
+namespace librbd
+{
+
+class ImageCtx;
+class ProgressContext;
+
+namespace operation {
+
+template <typename ImageCtxT = ImageCtx>
+class TrimRequest : public AsyncRequest<ImageCtxT>
+{
+public:
+ static TrimRequest *create(ImageCtxT &image_ctx, Context *on_finish,
+ uint64_t original_size, uint64_t new_size,
+ ProgressContext &prog_ctx) {
+ return new TrimRequest(image_ctx, on_finish, original_size, new_size,
+ prog_ctx);
+ }
+
+ TrimRequest(ImageCtxT &image_ctx, Context *on_finish,
+ uint64_t original_size, uint64_t new_size,
+ ProgressContext &prog_ctx);
+
+ void send() override;
+
+protected:
+ /**
+ * Trim goes through the following state machine to remove whole objects,
+ * clean partially trimmed objects, and update the object map:
+ *
+ * @verbatim
+ *
+ * <start> . . . . . . . . . . . . . . . . .
+ * | .
+ * v (skip if not needed) .
+ * STATE_PRE_TRIM .
+ * | .
+ * v (skip if not needed) .
+ * STATE_COPYUP_OBJECTS .
+ * | .
+ * v (skip if not needed) .
+ * STATE_REMOVE_OBJECTS .
+ * | .
+ * v (skip if not needed) .
+ * STATE_POST_TRIM .
+ * | .
+ * v (skip if not needed) .
+ * STATE_CLEAN_BOUNDARY .
+ * | .
+ * v .
+ * STATE_FINISHED < . . . . . . . . . . . . . . .
+ * |
+ * v
+ * <finish>
+ *
+ * The _COPYUP_OBJECTS state is skipped if there is no parent overlap
+ * within the new image size and the image does not have any snapshots.
+ * The _PRE_TRIM/_POST_TRIM states are skipped if the object map
+ * isn't enabled. The _REMOVE_OBJECTS state is skipped if no whole objects
+ * are removed. The _CLEAN_BOUNDARY state is skipped if no boundary
+ * objects are cleaned. The state machine will immediately transition
+ * to _FINISHED state if there are no bytes to trim.
+ */
+
+ enum State {
+ STATE_PRE_TRIM,
+ STATE_COPYUP_OBJECTS,
+ STATE_REMOVE_OBJECTS,
+ STATE_POST_TRIM,
+ STATE_CLEAN_BOUNDARY,
+ STATE_FINISHED
+ };
+
+ bool should_complete(int r) override;
+
+ State m_state = STATE_PRE_TRIM;
+
+private:
+ uint64_t m_delete_start;
+ uint64_t m_delete_start_min = 0;
+ uint64_t m_num_objects;
+ uint64_t m_delete_off;
+ uint64_t m_new_size;
+ ProgressContext &m_prog_ctx;
+
+ void send_pre_trim();
+ void send_copyup_objects();
+ void send_remove_objects();
+ void send_post_trim();
+
+ void send_clean_boundary();
+ void send_finish(int r);
+};
+
+} // namespace operation
+} // namespace librbd
+
+extern template class librbd::operation::TrimRequest<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_OPERATION_TRIM_REQUEST_H