summaryrefslogtreecommitdiffstats
path: root/src/librbd/exclusive_lock
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/librbd/exclusive_lock/AutomaticPolicy.cc29
-rw-r--r--src/librbd/exclusive_lock/AutomaticPolicy.h34
-rw-r--r--src/librbd/exclusive_lock/ImageDispatch.cc320
-rw-r--r--src/librbd/exclusive_lock/ImageDispatch.h133
-rw-r--r--src/librbd/exclusive_lock/Policy.h31
-rw-r--r--src/librbd/exclusive_lock/PostAcquireRequest.cc368
-rw-r--r--src/librbd/exclusive_lock/PostAcquireRequest.h124
-rw-r--r--src/librbd/exclusive_lock/PreAcquireRequest.cc95
-rw-r--r--src/librbd/exclusive_lock/PreAcquireRequest.h75
-rw-r--r--src/librbd/exclusive_lock/PreReleaseRequest.cc363
-rw-r--r--src/librbd/exclusive_lock/PreReleaseRequest.h139
-rw-r--r--src/librbd/exclusive_lock/StandardPolicy.cc29
-rw-r--r--src/librbd/exclusive_lock/StandardPolicy.h37
13 files changed, 1777 insertions, 0 deletions
diff --git a/src/librbd/exclusive_lock/AutomaticPolicy.cc b/src/librbd/exclusive_lock/AutomaticPolicy.cc
new file mode 100644
index 000000000..bfaddc1b2
--- /dev/null
+++ b/src/librbd/exclusive_lock/AutomaticPolicy.cc
@@ -0,0 +1,29 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/exclusive_lock/AutomaticPolicy.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ExclusiveLock.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::ExclusiveLock::AutomaticPolicy "
+
+namespace librbd {
+namespace exclusive_lock {
+
+int AutomaticPolicy::lock_requested(bool force) {
+ ceph_assert(ceph_mutex_is_locked(m_image_ctx->owner_lock));
+ ceph_assert(m_image_ctx->exclusive_lock != nullptr);
+
+ ldout(m_image_ctx->cct, 20) << this << " " << __func__ << ": force=" << force
+ << dendl;
+
+ // release the lock upon request (ignore forced requests)
+ m_image_ctx->exclusive_lock->release_lock(nullptr);
+ return 0;
+}
+
+} // namespace exclusive_lock
+} // namespace librbd
+
diff --git a/src/librbd/exclusive_lock/AutomaticPolicy.h b/src/librbd/exclusive_lock/AutomaticPolicy.h
new file mode 100644
index 000000000..12ba9b6c4
--- /dev/null
+++ b/src/librbd/exclusive_lock/AutomaticPolicy.h
@@ -0,0 +1,34 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_EXCLUSIVE_LOCK_AUTOMATIC_POLICY_H
+#define CEPH_LIBRBD_EXCLUSIVE_LOCK_AUTOMATIC_POLICY_H
+
+#include "librbd/exclusive_lock/Policy.h"
+
+namespace librbd {
+
+struct ImageCtx;
+
+namespace exclusive_lock {
+
+class AutomaticPolicy : public Policy {
+public:
+ AutomaticPolicy(ImageCtx *image_ctx) : m_image_ctx(image_ctx) {
+ }
+
+ bool may_auto_request_lock() override {
+ return true;
+ }
+
+ int lock_requested(bool force) override;
+
+private:
+ ImageCtx *m_image_ctx;
+
+};
+
+} // namespace exclusive_lock
+} // namespace librbd
+
+#endif // CEPH_LIBRBD_EXCLUSIVE_LOCK_AUTOMATIC_POLICY_H
diff --git a/src/librbd/exclusive_lock/ImageDispatch.cc b/src/librbd/exclusive_lock/ImageDispatch.cc
new file mode 100644
index 000000000..5939c7a81
--- /dev/null
+++ b/src/librbd/exclusive_lock/ImageDispatch.cc
@@ -0,0 +1,320 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/exclusive_lock/ImageDispatch.h"
+#include "include/Context.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "librbd/ExclusiveLock.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/Utils.h"
+#include "librbd/asio/ContextWQ.h"
+#include "librbd/exclusive_lock/Policy.h"
+#include "librbd/io/AioCompletion.h"
+#include "librbd/io/ImageDispatchSpec.h"
+#include "librbd/io/ImageDispatcherInterface.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::exclusive_lock::ImageDispatch: " \
+ << this << " " << __func__ << ": "
+
+namespace librbd {
+namespace exclusive_lock {
+
+using util::create_context_callback;
+using util::create_async_context_callback;
+
+template <typename I>
+ImageDispatch<I>::ImageDispatch(I* image_ctx)
+ : m_image_ctx(image_ctx),
+ m_lock(ceph::make_shared_mutex(
+ util::unique_lock_name("librbd::exclusve_lock::ImageDispatch::m_lock",
+ this))) {
+}
+
+template <typename I>
+void ImageDispatch<I>::shut_down(Context* on_finish) {
+ // release any IO waiting on exclusive lock
+ Contexts on_dispatches;
+ {
+ std::unique_lock locker{m_lock};
+ std::swap(on_dispatches, m_on_dispatches);
+ }
+
+ for (auto ctx : on_dispatches) {
+ ctx->complete(0);
+ }
+
+ on_finish->complete(0);
+}
+
+template <typename I>
+void ImageDispatch<I>::set_require_lock(bool init_shutdown,
+ io::Direction direction,
+ Context* on_finish) {
+ // pause any matching IO from proceeding past this layer
+ set_require_lock(direction, true);
+
+ if (direction == io::DIRECTION_READ) {
+ on_finish->complete(0);
+ return;
+ }
+
+ // push through a flush for any in-flight writes at lower levels
+ auto aio_comp = io::AioCompletion::create_and_start(
+ on_finish, util::get_image_ctx(m_image_ctx), io::AIO_TYPE_FLUSH);
+ auto req = io::ImageDispatchSpec::create_flush(
+ *m_image_ctx, io::IMAGE_DISPATCH_LAYER_EXCLUSIVE_LOCK, aio_comp,
+ (init_shutdown ?
+ io::FLUSH_SOURCE_EXCLUSIVE_LOCK_SKIP_REFRESH :
+ io::FLUSH_SOURCE_EXCLUSIVE_LOCK), {});
+ req->send();
+}
+
+template <typename I>
+void ImageDispatch<I>::unset_require_lock(io::Direction direction) {
+ set_require_lock(direction, false);
+}
+
+template <typename I>
+bool ImageDispatch<I>::set_require_lock(io::Direction direction, bool enabled) {
+ auto cct = m_image_ctx->cct;
+ ldout(cct, 20) << "direction=" << direction << ", enabled=" << enabled
+ << dendl;
+
+ std::unique_lock locker{m_lock};
+ auto prev_require_lock = (m_require_lock_on_read || m_require_lock_on_write);
+
+ switch (direction) {
+ case io::DIRECTION_READ:
+ m_require_lock_on_read = enabled;
+ break;
+ case io::DIRECTION_WRITE:
+ m_require_lock_on_write = enabled;
+ break;
+ case io::DIRECTION_BOTH:
+ m_require_lock_on_read = enabled;
+ m_require_lock_on_write = enabled;
+ break;
+ }
+
+ bool require_lock = (m_require_lock_on_read || m_require_lock_on_write);
+ return ((enabled && !prev_require_lock && require_lock) ||
+ (!enabled && prev_require_lock && !require_lock));
+}
+
+template <typename I>
+bool ImageDispatch<I>::read(
+ io::AioCompletion* aio_comp, io::Extents &&image_extents,
+ io::ReadResult &&read_result, IOContext io_context, int op_flags,
+ int read_flags, const ZTracer::Trace &parent_trace, uint64_t tid,
+ std::atomic<uint32_t>* image_dispatch_flags,
+ io::DispatchResult* dispatch_result, Context** on_finish,
+ Context* on_dispatched) {
+ auto cct = m_image_ctx->cct;
+ ldout(cct, 20) << "image_extents=" << image_extents << dendl;
+
+ if (needs_exclusive_lock(true, tid, dispatch_result, on_dispatched)) {
+ return true;
+ }
+
+ return false;
+}
+
+template <typename I>
+bool ImageDispatch<I>::write(
+ io::AioCompletion* aio_comp, io::Extents &&image_extents, bufferlist &&bl,
+ int op_flags, const ZTracer::Trace &parent_trace,
+ uint64_t tid, std::atomic<uint32_t>* image_dispatch_flags,
+ io::DispatchResult* dispatch_result, Context** on_finish,
+ Context* on_dispatched) {
+ auto cct = m_image_ctx->cct;
+ ldout(cct, 20) << "tid=" << tid << ", image_extents=" << image_extents
+ << dendl;
+
+ if (needs_exclusive_lock(false, tid, dispatch_result, on_dispatched)) {
+ return true;
+ }
+
+ return false;
+}
+
+template <typename I>
+bool ImageDispatch<I>::discard(
+ io::AioCompletion* aio_comp, io::Extents &&image_extents,
+ uint32_t discard_granularity_bytes, const ZTracer::Trace &parent_trace,
+ uint64_t tid, std::atomic<uint32_t>* image_dispatch_flags,
+ io::DispatchResult* dispatch_result, Context** on_finish,
+ Context* on_dispatched) {
+ auto cct = m_image_ctx->cct;
+ ldout(cct, 20) << "tid=" << tid << ", image_extents=" << image_extents
+ << dendl;
+
+ if (needs_exclusive_lock(false, tid, dispatch_result, on_dispatched)) {
+ return true;
+ }
+
+ return false;
+}
+
+template <typename I>
+bool ImageDispatch<I>::write_same(
+ io::AioCompletion* aio_comp, io::Extents &&image_extents, bufferlist &&bl,
+ int op_flags, const ZTracer::Trace &parent_trace,
+ uint64_t tid, std::atomic<uint32_t>* image_dispatch_flags,
+ io::DispatchResult* dispatch_result, Context** on_finish,
+ Context* on_dispatched) {
+ auto cct = m_image_ctx->cct;
+ ldout(cct, 20) << "tid=" << tid << ", image_extents=" << image_extents
+ << dendl;
+
+ if (needs_exclusive_lock(false, tid, dispatch_result, on_dispatched)) {
+ return true;
+ }
+
+ return false;
+}
+
+template <typename I>
+bool ImageDispatch<I>::compare_and_write(
+ io::AioCompletion* aio_comp, io::Extents &&image_extents,
+ bufferlist &&cmp_bl, bufferlist &&bl, uint64_t *mismatch_offset,
+ int op_flags, const ZTracer::Trace &parent_trace,
+ uint64_t tid, std::atomic<uint32_t>* image_dispatch_flags,
+ io::DispatchResult* dispatch_result, Context** on_finish,
+ Context* on_dispatched) {
+ auto cct = m_image_ctx->cct;
+ ldout(cct, 20) << "tid=" << tid << ", image_extents=" << image_extents
+ << dendl;
+
+ if (needs_exclusive_lock(false, tid, dispatch_result, on_dispatched)) {
+ return true;
+ }
+
+ return false;
+}
+
+template <typename I>
+bool ImageDispatch<I>::flush(
+ io::AioCompletion* aio_comp, io::FlushSource flush_source,
+ const ZTracer::Trace &parent_trace, uint64_t tid,
+ std::atomic<uint32_t>* image_dispatch_flags,
+ io::DispatchResult* dispatch_result, Context** on_finish,
+ Context* on_dispatched) {
+ auto cct = m_image_ctx->cct;
+ ldout(cct, 20) << "tid=" << tid << dendl;
+
+ // don't attempt to grab the exclusive lock if were are just internally
+ // clearing out our in-flight IO queue
+ if (flush_source != io::FLUSH_SOURCE_USER) {
+ return false;
+ }
+
+ if (needs_exclusive_lock(false, tid, dispatch_result, on_dispatched)) {
+ return true;
+ }
+
+ return false;
+}
+
+template <typename I>
+bool ImageDispatch<I>::is_lock_required(bool read_op) const {
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+ return ((read_op && m_require_lock_on_read) ||
+ (!read_op && m_require_lock_on_write));
+}
+
+template <typename I>
+bool ImageDispatch<I>::needs_exclusive_lock(bool read_op, uint64_t tid,
+ io::DispatchResult* dispatch_result,
+ Context* on_dispatched) {
+ auto cct = m_image_ctx->cct;
+ bool lock_required = false;
+ {
+ std::shared_lock locker{m_lock};
+ lock_required = is_lock_required(read_op);
+ }
+
+ if (lock_required) {
+ std::shared_lock owner_locker{m_image_ctx->owner_lock};
+ if (m_image_ctx->exclusive_lock == nullptr) {
+ // raced with the exclusive lock being disabled
+ return false;
+ }
+
+ ldout(cct, 5) << "exclusive lock required: delaying IO" << dendl;
+ if (!m_image_ctx->get_exclusive_lock_policy()->may_auto_request_lock()) {
+ lderr(cct) << "op requires exclusive lock" << dendl;
+
+ *dispatch_result = io::DISPATCH_RESULT_CONTINUE;
+ on_dispatched->complete(
+ m_image_ctx->exclusive_lock->get_unlocked_op_error());
+ return true;
+ }
+
+ // block potential races with other incoming IOs
+ std::unique_lock locker{m_lock};
+ bool retesting_lock = (
+ !m_on_dispatches.empty() && m_on_dispatches.front() == on_dispatched);
+ if (!m_on_dispatches.empty() && !retesting_lock) {
+ *dispatch_result = io::DISPATCH_RESULT_RESTART;
+ m_on_dispatches.push_back(on_dispatched);
+ return true;
+ }
+
+ if (!is_lock_required(read_op)) {
+ return false;
+ }
+
+ ceph_assert(m_on_dispatches.empty() || retesting_lock);
+ m_on_dispatches.push_back(on_dispatched);
+ locker.unlock();
+
+ *dispatch_result = io::DISPATCH_RESULT_RESTART;
+ auto ctx = create_async_context_callback(
+ *m_image_ctx, create_context_callback<
+ ImageDispatch<I>, &ImageDispatch<I>::handle_acquire_lock>(this));
+ m_image_ctx->exclusive_lock->acquire_lock(ctx);
+ return true;
+ }
+
+ return false;
+}
+
+template <typename I>
+void ImageDispatch<I>::handle_acquire_lock(int r) {
+ auto cct = m_image_ctx->cct;
+ ldout(cct, 5) << "r=" << r << dendl;
+
+ std::unique_lock locker{m_lock};
+ ceph_assert(!m_on_dispatches.empty());
+
+ Context* failed_dispatch = nullptr;
+ Contexts on_dispatches;
+ if (r == -ERESTART) {
+ ldout(cct, 5) << "IO raced with exclusive lock shutdown" << dendl;
+ } else if (r < 0) {
+ lderr(cct) << "failed to acquire exclusive lock: " << cpp_strerror(r)
+ << dendl;
+ failed_dispatch = m_on_dispatches.front();
+ m_on_dispatches.pop_front();
+ }
+
+ // re-test if lock is still required (i.e. it wasn't acquired/lost) via a
+ // restart dispatch
+ std::swap(on_dispatches, m_on_dispatches);
+ locker.unlock();
+
+ if (failed_dispatch != nullptr) {
+ failed_dispatch->complete(r);
+ }
+ for (auto ctx : on_dispatches) {
+ ctx->complete(0);
+ }
+}
+
+} // namespace exclusive_lock
+} // namespace librbd
+
+template class librbd::exclusive_lock::ImageDispatch<librbd::ImageCtx>;
diff --git a/src/librbd/exclusive_lock/ImageDispatch.h b/src/librbd/exclusive_lock/ImageDispatch.h
new file mode 100644
index 000000000..c0d9d49f5
--- /dev/null
+++ b/src/librbd/exclusive_lock/ImageDispatch.h
@@ -0,0 +1,133 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_EXCLUSIVE_LOCK_IMAGE_DISPATCH_H
+#define CEPH_LIBRBD_EXCLUSIVE_LOCK_IMAGE_DISPATCH_H
+
+#include "librbd/io/ImageDispatchInterface.h"
+#include "include/int_types.h"
+#include "include/buffer.h"
+#include "common/ceph_mutex.h"
+#include "common/zipkin_trace.h"
+#include "librbd/io/ReadResult.h"
+#include "librbd/io/Types.h"
+#include <atomic>
+#include <list>
+#include <unordered_set>
+
+struct Context;
+
+namespace librbd {
+
+struct ImageCtx;
+
+namespace io {
+struct AioCompletion;
+}
+
+namespace exclusive_lock {
+
+template <typename ImageCtxT>
+class ImageDispatch : public io::ImageDispatchInterface {
+public:
+ static ImageDispatch* create(ImageCtxT* image_ctx) {
+ return new ImageDispatch(image_ctx);
+ }
+ void destroy() {
+ delete this;
+ }
+
+ ImageDispatch(ImageCtxT* image_ctx);
+
+ io::ImageDispatchLayer get_dispatch_layer() const override {
+ return io::IMAGE_DISPATCH_LAYER_EXCLUSIVE_LOCK;
+ }
+
+ void set_require_lock(bool init_shutdown,
+ io::Direction direction, Context* on_finish);
+ void unset_require_lock(io::Direction direction);
+
+ void shut_down(Context* on_finish) override;
+
+ bool read(
+ io::AioCompletion* aio_comp, io::Extents &&image_extents,
+ io::ReadResult &&read_result, IOContext io_context, int op_flags,
+ int read_flags, const ZTracer::Trace &parent_trace, uint64_t tid,
+ std::atomic<uint32_t>* image_dispatch_flags,
+ io::DispatchResult* dispatch_result, Context** on_finish,
+ Context* on_dispatched) override;
+ bool write(
+ io::AioCompletion* aio_comp, io::Extents &&image_extents, bufferlist &&bl,
+ int op_flags, const ZTracer::Trace &parent_trace,
+ uint64_t tid, std::atomic<uint32_t>* image_dispatch_flags,
+ io::DispatchResult* dispatch_result, Context** on_finish,
+ Context* on_dispatched) override;
+ bool discard(
+ io::AioCompletion* aio_comp, io::Extents &&image_extents,
+ uint32_t discard_granularity_bytes, const ZTracer::Trace &parent_trace,
+ uint64_t tid, std::atomic<uint32_t>* image_dispatch_flags,
+ io::DispatchResult* dispatch_result, Context** on_finish,
+ Context* on_dispatched) override;
+ bool write_same(
+ io::AioCompletion* aio_comp, io::Extents &&image_extents, bufferlist &&bl,
+ int op_flags, const ZTracer::Trace &parent_trace,
+ uint64_t tid, std::atomic<uint32_t>* image_dispatch_flags,
+ io::DispatchResult* dispatch_result, Context** on_finish,
+ Context* on_dispatched) override;
+ bool compare_and_write(
+ io::AioCompletion* aio_comp, io::Extents &&image_extents,
+ bufferlist &&cmp_bl, bufferlist &&bl, uint64_t *mismatch_offset,
+ int op_flags, const ZTracer::Trace &parent_trace,
+ uint64_t tid, std::atomic<uint32_t>* image_dispatch_flags,
+ io::DispatchResult* dispatch_result, Context** on_finish,
+ Context* on_dispatched) override;
+ bool flush(
+ io::AioCompletion* aio_comp, io::FlushSource flush_source,
+ const ZTracer::Trace &parent_trace, uint64_t tid,
+ std::atomic<uint32_t>* image_dispatch_flags,
+ io::DispatchResult* dispatch_result, Context** on_finish,
+ Context* on_dispatched) override;
+
+ bool list_snaps(
+ io::AioCompletion* aio_comp, io::Extents&& image_extents,
+ io::SnapIds&& snap_ids, int list_snaps_flags,
+ io::SnapshotDelta* snapshot_delta, const ZTracer::Trace &parent_trace,
+ uint64_t tid, std::atomic<uint32_t>* image_dispatch_flags,
+ io::DispatchResult* dispatch_result, Context** on_finish,
+ Context* on_dispatched) override {
+ return false;
+ }
+
+ bool invalidate_cache(Context* on_finish) override {
+ return false;
+ }
+
+private:
+ typedef std::list<Context*> Contexts;
+ typedef std::unordered_set<uint64_t> Tids;
+
+ ImageCtxT* m_image_ctx;
+ mutable ceph::shared_mutex m_lock;
+
+ bool m_require_lock_on_read = false;
+ bool m_require_lock_on_write = false;
+
+ Contexts m_on_dispatches;
+
+ bool set_require_lock(io::Direction direction, bool enabled);
+
+ bool is_lock_required(bool read_op) const;
+
+ bool needs_exclusive_lock(bool read_op, uint64_t tid,
+ io::DispatchResult* dispatch_result,
+ Context* on_dispatched);
+
+ void handle_acquire_lock(int r);
+};
+
+} // namespace exclusiv_lock
+} // namespace librbd
+
+extern template class librbd::exclusive_lock::ImageDispatch<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_EXCLUSIVE_LOCK_IMAGE_DISPATCH_H
diff --git a/src/librbd/exclusive_lock/Policy.h b/src/librbd/exclusive_lock/Policy.h
new file mode 100644
index 000000000..7064a6515
--- /dev/null
+++ b/src/librbd/exclusive_lock/Policy.h
@@ -0,0 +1,31 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_EXCLUSIVE_LOCK_POLICY_H
+#define CEPH_LIBRBD_EXCLUSIVE_LOCK_POLICY_H
+
+namespace librbd {
+namespace exclusive_lock {
+
+enum OperationRequestType {
+ OPERATION_REQUEST_TYPE_GENERAL = 0,
+ OPERATION_REQUEST_TYPE_TRASH_SNAP_REMOVE = 1,
+ OPERATION_REQUEST_TYPE_FORCE_PROMOTION = 2,
+};
+
+struct Policy {
+ virtual ~Policy() {
+ }
+
+ virtual bool may_auto_request_lock() = 0;
+ virtual int lock_requested(bool force) = 0;
+
+ virtual bool accept_blocked_request(OperationRequestType) {
+ return false;
+ }
+};
+
+} // namespace exclusive_lock
+} // namespace librbd
+
+#endif // CEPH_LIBRBD_EXCLUSIVE_LOCK_POLICY_H
diff --git a/src/librbd/exclusive_lock/PostAcquireRequest.cc b/src/librbd/exclusive_lock/PostAcquireRequest.cc
new file mode 100644
index 000000000..4553b2158
--- /dev/null
+++ b/src/librbd/exclusive_lock/PostAcquireRequest.cc
@@ -0,0 +1,368 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/exclusive_lock/PostAcquireRequest.h"
+#include "cls/lock/cls_lock_client.h"
+#include "cls/lock/cls_lock_types.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "include/stringify.h"
+#include "librbd/ExclusiveLock.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/ImageWatcher.h"
+#include "librbd/Journal.h"
+#include "librbd/ObjectMap.h"
+#include "librbd/Utils.h"
+#include "librbd/image/RefreshRequest.h"
+#include "librbd/journal/Policy.h"
+#include "librbd/PluginRegistry.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::exclusive_lock::PostAcquireRequest: " \
+ << this << " " << __func__ << ": "
+
+namespace librbd {
+namespace exclusive_lock {
+
+using util::create_async_context_callback;
+using util::create_context_callback;
+using util::create_rados_callback;
+
+template <typename I>
+PostAcquireRequest<I>* PostAcquireRequest<I>::create(I &image_ctx,
+ Context *on_acquire,
+ Context *on_finish) {
+ return new PostAcquireRequest(image_ctx, on_acquire, on_finish);
+}
+
+template <typename I>
+PostAcquireRequest<I>::PostAcquireRequest(I &image_ctx, Context *on_acquire,
+ Context *on_finish)
+ : m_image_ctx(image_ctx),
+ m_on_acquire(on_acquire),
+ m_on_finish(create_async_context_callback(image_ctx, on_finish)),
+ m_object_map(nullptr), m_journal(nullptr), m_error_result(0) {
+}
+
+template <typename I>
+PostAcquireRequest<I>::~PostAcquireRequest() {
+ if (!m_prepare_lock_completed) {
+ m_image_ctx.state->handle_prepare_lock_complete();
+ }
+ delete m_on_acquire;
+}
+
+template <typename I>
+void PostAcquireRequest<I>::send() {
+ send_refresh();
+}
+
+template <typename I>
+void PostAcquireRequest<I>::send_refresh() {
+ if (!m_image_ctx.state->is_refresh_required()) {
+ send_open_object_map();
+ return;
+ }
+
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 10) << dendl;
+
+ using klass = PostAcquireRequest<I>;
+ Context *ctx = create_async_context_callback(
+ m_image_ctx, create_context_callback<klass, &klass::handle_refresh>(this));
+
+ // ImageState is blocked waiting for lock to complete -- safe to directly
+ // refresh
+ image::RefreshRequest<I> *req = image::RefreshRequest<I>::create(
+ m_image_ctx, true, false, ctx);
+ req->send();
+}
+
+template <typename I>
+void PostAcquireRequest<I>::handle_refresh(int r) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 10) << "r=" << r << dendl;
+
+ if (r == -ERESTART) {
+ // next issued IO or op will (re)-refresh the image and shut down lock
+ ldout(cct, 5) << "exclusive lock dynamically disabled" << dendl;
+ r = 0;
+ } else if (r < 0) {
+ lderr(cct) << "failed to refresh image: " << cpp_strerror(r) << dendl;
+ save_result(r);
+ revert();
+ finish();
+ return;
+ }
+
+ send_open_object_map();
+}
+
+template <typename I>
+void PostAcquireRequest<I>::send_open_journal() {
+ // alert caller that we now own the exclusive lock
+ m_on_acquire->complete(0);
+ m_on_acquire = nullptr;
+
+ bool journal_enabled;
+ {
+ std::shared_lock image_locker{m_image_ctx.image_lock};
+ journal_enabled = (m_image_ctx.test_features(RBD_FEATURE_JOURNALING,
+ m_image_ctx.image_lock) &&
+ !m_image_ctx.get_journal_policy()->journal_disabled());
+ }
+ if (!journal_enabled) {
+ apply();
+ send_process_plugin_acquire_lock();
+ return;
+ }
+
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 10) << dendl;
+
+ using klass = PostAcquireRequest<I>;
+ Context *ctx = create_context_callback<klass, &klass::handle_open_journal>(
+ this);
+ m_journal = m_image_ctx.create_journal();
+
+ // journal playback requires object map (if enabled) and itself
+ apply();
+
+ m_journal->open(ctx);
+}
+
+template <typename I>
+void PostAcquireRequest<I>::handle_open_journal(int r) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 10) << "r=" << r << dendl;
+
+ save_result(r);
+ if (r < 0) {
+ lderr(cct) << "failed to open journal: " << cpp_strerror(r) << dendl;
+ send_close_journal();
+ return;
+ }
+
+ send_allocate_journal_tag();
+}
+
+template <typename I>
+void PostAcquireRequest<I>::send_allocate_journal_tag() {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 10) << dendl;
+
+ std::shared_lock image_locker{m_image_ctx.image_lock};
+ using klass = PostAcquireRequest<I>;
+ Context *ctx = create_context_callback<
+ klass, &klass::handle_allocate_journal_tag>(this, m_journal);
+ m_image_ctx.get_journal_policy()->allocate_tag_on_lock(ctx);
+}
+
+template <typename I>
+void PostAcquireRequest<I>::handle_allocate_journal_tag(int r) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 10) << "r=" << r << dendl;
+
+ save_result(r);
+ if (r < 0) {
+ lderr(cct) << "failed to allocate journal tag: " << cpp_strerror(r)
+ << dendl;
+ send_close_journal();
+ return;
+ }
+
+ send_process_plugin_acquire_lock();
+}
+
+template <typename I>
+void PostAcquireRequest<I>::send_process_plugin_acquire_lock() {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 10) << dendl;
+
+ using klass = PostAcquireRequest<I>;
+ Context *ctx = create_context_callback<
+ klass, &klass::handle_process_plugin_acquire_lock>(this);
+ m_image_ctx.plugin_registry->acquired_exclusive_lock(ctx);
+}
+
+template <typename I>
+void PostAcquireRequest<I>::handle_process_plugin_acquire_lock(int r) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 10) << "r=" << r << dendl;
+
+ save_result(r);
+ if (r < 0) {
+ lderr(cct) << "failed to process plugins: " << cpp_strerror(r)
+ << dendl;
+ send_process_plugin_release_lock();
+ return;
+ }
+
+ finish();
+}
+
+template <typename I>
+void PostAcquireRequest<I>::send_process_plugin_release_lock() {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 10) << dendl;
+
+ using klass = PostAcquireRequest<I>;
+ Context *ctx = create_context_callback<
+ klass, &klass::handle_process_plugin_release_lock>(this);
+ m_image_ctx.plugin_registry->prerelease_exclusive_lock(ctx);
+}
+
+template <typename I>
+void PostAcquireRequest<I>::handle_process_plugin_release_lock(int r) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 10) << "r=" << r << dendl;
+
+ save_result(r);
+ if (r < 0) {
+ lderr(cct) << "failed to release plugins: " << cpp_strerror(r)
+ << dendl;
+ }
+ send_close_journal();
+}
+
+template <typename I>
+void PostAcquireRequest<I>::send_close_journal() {
+ if (m_journal == nullptr) {
+ send_close_object_map();
+ return;
+ }
+
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 10) << dendl;
+
+ using klass = PostAcquireRequest<I>;
+ Context *ctx = create_context_callback<klass, &klass::handle_close_journal>(
+ this);
+ m_journal->close(ctx);
+}
+
+template <typename I>
+void PostAcquireRequest<I>::handle_close_journal(int r) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 10) << "r=" << r << dendl;
+
+ save_result(r);
+ if (r < 0) {
+ lderr(cct) << "failed to close journal: " << cpp_strerror(r) << dendl;
+ }
+
+ send_close_object_map();
+}
+
+template <typename I>
+void PostAcquireRequest<I>::send_open_object_map() {
+ if (!m_image_ctx.test_features(RBD_FEATURE_OBJECT_MAP)) {
+ send_open_journal();
+ return;
+ }
+
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 10) << dendl;
+
+ using klass = PostAcquireRequest<I>;
+ Context *ctx = create_context_callback<klass, &klass::handle_open_object_map>(
+ this);
+
+ m_object_map = m_image_ctx.create_object_map(CEPH_NOSNAP);
+ m_object_map->open(ctx);
+}
+
+template <typename I>
+void PostAcquireRequest<I>::handle_open_object_map(int r) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ lderr(cct) << "failed to open object map: " << cpp_strerror(r) << dendl;
+ m_object_map->put();
+ m_object_map = nullptr;
+
+ if (r != -EFBIG) {
+ save_result(r);
+ revert();
+ finish();
+ return;
+ }
+ }
+
+ send_open_journal();
+}
+
+template <typename I>
+void PostAcquireRequest<I>::send_close_object_map() {
+ if (m_object_map == nullptr) {
+ revert();
+ finish();
+ return;
+ }
+
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 10) << dendl;
+
+ using klass = PostAcquireRequest<I>;
+ Context *ctx = create_context_callback<
+ klass, &klass::handle_close_object_map>(this);
+ m_object_map->close(ctx);
+}
+
+template <typename I>
+void PostAcquireRequest<I>::handle_close_object_map(int r) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ lderr(cct) << "failed to close object map: " << cpp_strerror(r) << dendl;
+ }
+
+ revert();
+ finish();
+}
+
+template <typename I>
+void PostAcquireRequest<I>::apply() {
+ {
+ std::unique_lock image_locker{m_image_ctx.image_lock};
+ ceph_assert(m_image_ctx.object_map == nullptr);
+ m_image_ctx.object_map = m_object_map;
+
+ ceph_assert(m_image_ctx.journal == nullptr);
+ m_image_ctx.journal = m_journal;
+ }
+
+ m_prepare_lock_completed = true;
+ m_image_ctx.state->handle_prepare_lock_complete();
+}
+
+template <typename I>
+void PostAcquireRequest<I>::revert() {
+ std::unique_lock image_locker{m_image_ctx.image_lock};
+ m_image_ctx.object_map = nullptr;
+ m_image_ctx.journal = nullptr;
+
+ if (m_object_map) {
+ m_object_map->put();
+ }
+ if (m_journal) {
+ m_journal->put();
+ }
+
+ ceph_assert(m_error_result < 0);
+}
+
+template <typename I>
+void PostAcquireRequest<I>::finish() {
+ m_on_finish->complete(m_error_result);
+ delete this;
+}
+
+} // namespace exclusive_lock
+} // namespace librbd
+
+template class librbd::exclusive_lock::PostAcquireRequest<librbd::ImageCtx>;
diff --git a/src/librbd/exclusive_lock/PostAcquireRequest.h b/src/librbd/exclusive_lock/PostAcquireRequest.h
new file mode 100644
index 000000000..2f7efdf07
--- /dev/null
+++ b/src/librbd/exclusive_lock/PostAcquireRequest.h
@@ -0,0 +1,124 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_EXCLUSIVE_LOCK_POST_ACQUIRE_REQUEST_H
+#define CEPH_LIBRBD_EXCLUSIVE_LOCK_POST_ACQUIRE_REQUEST_H
+
+#include "include/int_types.h"
+#include "include/buffer.h"
+#include "librbd/ImageCtx.h"
+#include "msg/msg_types.h"
+#include <string>
+
+class Context;
+
+namespace librbd {
+
+namespace exclusive_lock {
+
+template <typename ImageCtxT = ImageCtx>
+class PostAcquireRequest {
+public:
+ static PostAcquireRequest* create(ImageCtxT &image_ctx, Context *on_acquire,
+ Context *on_finish);
+
+ ~PostAcquireRequest();
+ void send();
+
+private:
+
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * |
+ * v
+ * REFRESH (skip if not
+ * | needed)
+ * v
+ * OPEN_OBJECT_MAP (skip if
+ * | disabled)
+ * v
+ * OPEN_JOURNAL (skip if
+ * | * disabled)
+ * | *
+ * | * * * * * * * *
+ * v *
+ * ALLOCATE_JOURNAL_TAG *
+ * | * *
+ * | * *
+ * v * *
+ * PROCESS_PLUGIN_ACQUIRE*
+ * | * *
+ * | * *
+ * | v v v
+ * | PROCESS_PLUGIN_RELEASE
+ * | |
+ * | v
+ * | CLOSE_JOURNAL
+ * | |
+ * | v
+ * | CLOSE_OBJECT_MAP
+ * | |
+ * v |
+ * <finish> <----------/
+ *
+ * @endverbatim
+ */
+
+ PostAcquireRequest(ImageCtxT &image_ctx, Context *on_acquire,
+ Context *on_finish);
+
+ ImageCtxT &m_image_ctx;
+ Context *m_on_acquire;
+ Context *m_on_finish;
+
+ decltype(m_image_ctx.object_map) m_object_map;
+ decltype(m_image_ctx.journal) m_journal;
+
+ bool m_prepare_lock_completed = false;
+ int m_error_result;
+
+ void send_refresh();
+ void handle_refresh(int r);
+
+ void send_open_journal();
+ void handle_open_journal(int r);
+
+ void send_allocate_journal_tag();
+ void handle_allocate_journal_tag(int r);
+
+ void send_open_object_map();
+ void handle_open_object_map(int r);
+
+ void send_close_journal();
+ void handle_close_journal(int r);
+
+ void send_close_object_map();
+ void handle_close_object_map(int r);
+
+ void send_process_plugin_acquire_lock();
+ void handle_process_plugin_acquire_lock(int r);
+
+ void send_process_plugin_release_lock();
+ void handle_process_plugin_release_lock(int r);
+
+ void apply();
+ void revert();
+
+ void finish();
+
+ void save_result(int result) {
+ if (m_error_result == 0 && result < 0) {
+ m_error_result = result;
+ }
+ }
+};
+
+} // namespace exclusive_lock
+} // namespace librbd
+
+extern template class librbd::exclusive_lock::PostAcquireRequest<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_EXCLUSIVE_LOCK_POST_ACQUIRE_REQUEST_H
diff --git a/src/librbd/exclusive_lock/PreAcquireRequest.cc b/src/librbd/exclusive_lock/PreAcquireRequest.cc
new file mode 100644
index 000000000..feb0913d7
--- /dev/null
+++ b/src/librbd/exclusive_lock/PreAcquireRequest.cc
@@ -0,0 +1,95 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/exclusive_lock/PreAcquireRequest.h"
+#include "librbd/Utils.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageWatcher.h"
+#include "librbd/ImageState.h"
+#include "librbd/asio/ContextWQ.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::exclusive_lock::PreAcquireRequest: " \
+ << this << " " << __func__ << ": "
+
+namespace librbd {
+namespace exclusive_lock {
+
+using util::create_async_context_callback;
+using util::create_context_callback;
+using util::create_rados_callback;
+
+template <typename I>
+PreAcquireRequest<I>* PreAcquireRequest<I>::create(I &image_ctx,
+ Context *on_finish) {
+ return new PreAcquireRequest(image_ctx, on_finish);
+}
+
+template <typename I>
+PreAcquireRequest<I>::PreAcquireRequest(I &image_ctx, Context *on_finish)
+ : m_image_ctx(image_ctx),
+ m_on_finish(create_async_context_callback(image_ctx, on_finish)),
+ m_error_result(0) {
+}
+
+template <typename I>
+PreAcquireRequest<I>::~PreAcquireRequest() {
+}
+
+template <typename I>
+void PreAcquireRequest<I>::send() {
+ send_prepare_lock();
+}
+
+template <typename I>
+void PreAcquireRequest<I>::send_prepare_lock() {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 10) << dendl;
+
+ // acquire the lock if the image is not busy performing other actions
+ Context *ctx = create_context_callback<
+ PreAcquireRequest<I>, &PreAcquireRequest<I>::handle_prepare_lock>(this);
+ m_image_ctx.state->prepare_lock(ctx);
+}
+
+template <typename I>
+void PreAcquireRequest<I>::handle_prepare_lock(int r) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 10) << "r=" << r << dendl;
+
+ send_flush_notifies();
+}
+
+template <typename I>
+void PreAcquireRequest<I>::send_flush_notifies() {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 10) << dendl;
+
+ using klass = PreAcquireRequest<I>;
+ Context *ctx = create_context_callback<klass, &klass::handle_flush_notifies>(
+ this);
+ m_image_ctx.image_watcher->flush(ctx);
+}
+
+template <typename I>
+void PreAcquireRequest<I>::handle_flush_notifies(int r) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 10) << dendl;
+
+ ceph_assert(r == 0);
+ finish();
+}
+
+template <typename I>
+void PreAcquireRequest<I>::finish() {
+ m_on_finish->complete(m_error_result);
+ delete this;
+}
+
+} // namespace exclusive_lock
+} // namespace librbd
+
+template class librbd::exclusive_lock::PreAcquireRequest<librbd::ImageCtx>;
diff --git a/src/librbd/exclusive_lock/PreAcquireRequest.h b/src/librbd/exclusive_lock/PreAcquireRequest.h
new file mode 100644
index 000000000..15d4b2c12
--- /dev/null
+++ b/src/librbd/exclusive_lock/PreAcquireRequest.h
@@ -0,0 +1,75 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_EXCLUSIVE_LOCK_PRE_ACQUIRE_REQUEST_H
+#define CEPH_LIBRBD_EXCLUSIVE_LOCK_PRE_ACQUIRE_REQUEST_H
+
+#include "include/int_types.h"
+#include "include/buffer.h"
+#include "librbd/ImageCtx.h"
+#include "msg/msg_types.h"
+#include <string>
+
+class Context;
+
+namespace librbd {
+
+namespace exclusive_lock {
+
+template <typename ImageCtxT = ImageCtx>
+class PreAcquireRequest {
+public:
+ static PreAcquireRequest* create(ImageCtxT &image_ctx, Context *on_finish);
+
+ ~PreAcquireRequest();
+ void send();
+
+private:
+
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * PREPARE_LOCK
+ * |
+ * v
+ * FLUSH_NOTIFIES
+ * |
+ * |
+ * |
+ v
+ * <finish>
+ *
+ * @endverbatim
+ */
+
+ PreAcquireRequest(ImageCtxT &image_ctx, Context *on_finish);
+
+ ImageCtxT &m_image_ctx;
+ Context *m_on_finish;
+
+ int m_error_result;
+
+ void send_prepare_lock();
+ void handle_prepare_lock(int r);
+
+ void send_flush_notifies();
+ void handle_flush_notifies(int r);
+
+ void finish();
+
+ void save_result(int result) {
+ if (m_error_result == 0 && result < 0) {
+ m_error_result = result;
+ }
+ }
+};
+
+} // namespace exclusive_lock
+} // namespace librbd
+
+extern template class librbd::exclusive_lock::PreAcquireRequest<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_EXCLUSIVE_LOCK_ACQUIRE_REQUEST_H
diff --git a/src/librbd/exclusive_lock/PreReleaseRequest.cc b/src/librbd/exclusive_lock/PreReleaseRequest.cc
new file mode 100644
index 000000000..a9cd1248a
--- /dev/null
+++ b/src/librbd/exclusive_lock/PreReleaseRequest.cc
@@ -0,0 +1,363 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/exclusive_lock/PreReleaseRequest.h"
+#include "common/AsyncOpTracker.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "librbd/ExclusiveLock.h"
+#include "librbd/ImageState.h"
+#include "librbd/ImageWatcher.h"
+#include "librbd/Journal.h"
+#include "librbd/ObjectMap.h"
+#include "librbd/Utils.h"
+#include "librbd/exclusive_lock/ImageDispatch.h"
+#include "librbd/io/AioCompletion.h"
+#include "librbd/io/ImageDispatchSpec.h"
+#include "librbd/io/ImageDispatcherInterface.h"
+#include "librbd/io/ObjectDispatcherInterface.h"
+#include "librbd/io/Types.h"
+#include "librbd/PluginRegistry.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::exclusive_lock::PreReleaseRequest: " \
+ << this << " " << __func__ << ": "
+
+namespace librbd {
+namespace exclusive_lock {
+
+using util::create_async_context_callback;
+using util::create_context_callback;
+
+template <typename I>
+PreReleaseRequest<I>* PreReleaseRequest<I>::create(
+ I &image_ctx, ImageDispatch<I>* image_dispatch, bool shutting_down,
+ AsyncOpTracker &async_op_tracker, Context *on_finish) {
+ return new PreReleaseRequest(image_ctx, image_dispatch, shutting_down,
+ async_op_tracker, on_finish);
+}
+
+template <typename I>
+PreReleaseRequest<I>::PreReleaseRequest(I &image_ctx,
+ ImageDispatch<I>* image_dispatch,
+ bool shutting_down,
+ AsyncOpTracker &async_op_tracker,
+ Context *on_finish)
+ : m_image_ctx(image_ctx), m_image_dispatch(image_dispatch),
+ m_shutting_down(shutting_down), m_async_op_tracker(async_op_tracker),
+ m_on_finish(create_async_context_callback(image_ctx, on_finish)) {
+}
+
+template <typename I>
+PreReleaseRequest<I>::~PreReleaseRequest() {
+ if (!m_shutting_down) {
+ m_image_ctx.state->handle_prepare_lock_complete();
+ }
+}
+
+template <typename I>
+void PreReleaseRequest<I>::send() {
+ send_cancel_op_requests();
+}
+
+template <typename I>
+void PreReleaseRequest<I>::send_cancel_op_requests() {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 10) << dendl;
+
+ using klass = PreReleaseRequest<I>;
+ Context *ctx = create_context_callback<
+ klass, &klass::handle_cancel_op_requests>(this);
+ m_image_ctx.cancel_async_requests(ctx);
+}
+
+template <typename I>
+void PreReleaseRequest<I>::handle_cancel_op_requests(int r) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 10) << "r=" << r << dendl;
+
+ ceph_assert(r == 0);
+
+ send_set_require_lock();
+}
+
+template <typename I>
+void PreReleaseRequest<I>::send_set_require_lock() {
+ if (!m_image_ctx.test_features(RBD_FEATURE_EXCLUSIVE_LOCK)) {
+ // exclusive-lock was disabled, no need to block IOs
+ send_wait_for_ops();
+ return;
+ }
+
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 10) << dendl;
+
+ using klass = PreReleaseRequest<I>;
+ Context *ctx = create_context_callback<
+ klass, &klass::handle_set_require_lock>(this);
+
+ // setting the lock as required will automatically cause the IO
+ // queue to re-request the lock if any IO is queued
+ if (m_image_ctx.clone_copy_on_read ||
+ m_image_ctx.test_features(RBD_FEATURE_JOURNALING) ||
+ m_image_ctx.test_features(RBD_FEATURE_DIRTY_CACHE)) {
+ m_image_dispatch->set_require_lock(m_shutting_down,
+ io::DIRECTION_BOTH, ctx);
+ } else {
+ m_image_dispatch->set_require_lock(m_shutting_down,
+ io::DIRECTION_WRITE, ctx);
+ }
+}
+
+template <typename I>
+void PreReleaseRequest<I>::handle_set_require_lock(int r) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ // IOs are still flushed regardless of the error
+ lderr(cct) << "failed to set lock: " << cpp_strerror(r) << dendl;
+ }
+
+ send_wait_for_ops();
+}
+
+template <typename I>
+void PreReleaseRequest<I>::send_wait_for_ops() {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 10) << dendl;
+
+ Context *ctx = create_context_callback<
+ PreReleaseRequest<I>, &PreReleaseRequest<I>::handle_wait_for_ops>(this);
+ m_async_op_tracker.wait_for_ops(ctx);
+}
+
+template <typename I>
+void PreReleaseRequest<I>::handle_wait_for_ops(int r) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 10) << dendl;
+
+ send_prepare_lock();
+}
+
+template <typename I>
+void PreReleaseRequest<I>::send_prepare_lock() {
+ if (m_shutting_down) {
+ send_process_plugin_release_lock();
+ return;
+ }
+
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 10) << dendl;
+
+ // release the lock if the image is not busy performing other actions
+ Context *ctx = create_context_callback<
+ PreReleaseRequest<I>, &PreReleaseRequest<I>::handle_prepare_lock>(this);
+ m_image_ctx.state->prepare_lock(ctx);
+}
+
+template <typename I>
+void PreReleaseRequest<I>::handle_prepare_lock(int r) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 10) << "r=" << r << dendl;
+
+ send_process_plugin_release_lock();
+}
+
+template <typename I>
+void PreReleaseRequest<I>::send_process_plugin_release_lock() {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 10) << dendl;
+
+ std::shared_lock owner_lock{m_image_ctx.owner_lock};
+ Context *ctx = create_async_context_callback(m_image_ctx, create_context_callback<
+ PreReleaseRequest<I>,
+ &PreReleaseRequest<I>::handle_process_plugin_release_lock>(this));
+ m_image_ctx.plugin_registry->prerelease_exclusive_lock(ctx);
+}
+
+template <typename I>
+void PreReleaseRequest<I>::handle_process_plugin_release_lock(int r) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ lderr(cct) << "failed to handle plugins before releasing lock: "
+ << cpp_strerror(r) << dendl;
+ m_image_dispatch->unset_require_lock(io::DIRECTION_BOTH);
+ save_result(r);
+ finish();
+ return;
+ }
+
+ send_invalidate_cache();
+}
+
+template <typename I>
+void PreReleaseRequest<I>::send_invalidate_cache() {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 10) << dendl;
+
+ Context *ctx = create_context_callback<
+ PreReleaseRequest<I>,
+ &PreReleaseRequest<I>::handle_invalidate_cache>(this);
+ m_image_ctx.io_image_dispatcher->invalidate_cache(ctx);
+}
+
+template <typename I>
+void PreReleaseRequest<I>::handle_invalidate_cache(int r) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 10) << "r=" << r << dendl;
+
+ if (r < 0 && r != -EBLOCKLISTED && r != -EBUSY) {
+ lderr(cct) << "failed to invalidate cache: " << cpp_strerror(r)
+ << dendl;
+ m_image_dispatch->unset_require_lock(io::DIRECTION_BOTH);
+ save_result(r);
+ finish();
+ return;
+ }
+
+ send_flush_io();
+}
+
+template <typename I>
+void PreReleaseRequest<I>::send_flush_io() {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 10) << dendl;
+
+ // ensure that all in-flight IO is flushed -- skipping the refresh layer
+ // since it should have been flushed when the lock was required and now
+ // refreshes are disabled / interlocked w/ this state machine.
+ auto ctx = create_context_callback<
+ PreReleaseRequest<I>, &PreReleaseRequest<I>::handle_flush_io>(this);
+ auto aio_comp = io::AioCompletion::create_and_start(
+ ctx, util::get_image_ctx(&m_image_ctx), librbd::io::AIO_TYPE_FLUSH);
+ auto req = io::ImageDispatchSpec::create_flush(
+ m_image_ctx, io::IMAGE_DISPATCH_LAYER_EXCLUSIVE_LOCK, aio_comp,
+ io::FLUSH_SOURCE_EXCLUSIVE_LOCK_SKIP_REFRESH, {});
+ req->send();
+}
+
+template <typename I>
+void PreReleaseRequest<I>::handle_flush_io(int r) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ lderr(cct) << "failed to flush IO: " << cpp_strerror(r) << dendl;
+ }
+
+ send_flush_notifies();
+}
+
+template <typename I>
+void PreReleaseRequest<I>::send_flush_notifies() {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 10) << dendl;
+
+ using klass = PreReleaseRequest<I>;
+ Context *ctx =
+ create_context_callback<klass, &klass::handle_flush_notifies>(this);
+ m_image_ctx.image_watcher->flush(ctx);
+}
+
+template <typename I>
+void PreReleaseRequest<I>::handle_flush_notifies(int r) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 10) << dendl;
+
+ ceph_assert(r == 0);
+ send_close_journal();
+}
+
+template <typename I>
+void PreReleaseRequest<I>::send_close_journal() {
+ {
+ std::unique_lock image_locker{m_image_ctx.image_lock};
+ std::swap(m_journal, m_image_ctx.journal);
+ }
+
+ if (m_journal == nullptr) {
+ send_close_object_map();
+ return;
+ }
+
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 10) << dendl;
+
+ using klass = PreReleaseRequest<I>;
+ Context *ctx = create_context_callback<klass, &klass::handle_close_journal>(
+ this);
+ m_journal->close(ctx);
+}
+
+template <typename I>
+void PreReleaseRequest<I>::handle_close_journal(int r) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ // error implies some journal events were not flushed -- continue
+ lderr(cct) << "failed to close journal: " << cpp_strerror(r) << dendl;
+ }
+
+ m_journal->put();
+ m_journal = nullptr;
+
+ send_close_object_map();
+}
+
+template <typename I>
+void PreReleaseRequest<I>::send_close_object_map() {
+ {
+ std::unique_lock image_locker{m_image_ctx.image_lock};
+ std::swap(m_object_map, m_image_ctx.object_map);
+ }
+
+ if (m_object_map == nullptr) {
+ send_unlock();
+ return;
+ }
+
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 10) << dendl;
+
+ using klass = PreReleaseRequest<I>;
+ Context *ctx = create_context_callback<
+ klass, &klass::handle_close_object_map>(this, m_object_map);
+ m_object_map->close(ctx);
+}
+
+template <typename I>
+void PreReleaseRequest<I>::handle_close_object_map(int r) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ lderr(cct) << "failed to close object map: " << cpp_strerror(r) << dendl;
+ }
+ m_object_map->put();
+
+ send_unlock();
+}
+
+template <typename I>
+void PreReleaseRequest<I>::send_unlock() {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 10) << dendl;
+
+ finish();
+}
+
+template <typename I>
+void PreReleaseRequest<I>::finish() {
+ m_on_finish->complete(m_error_result);
+ delete this;
+}
+
+} // namespace exclusive_lock
+} // namespace librbd
+
+template class librbd::exclusive_lock::PreReleaseRequest<librbd::ImageCtx>;
diff --git a/src/librbd/exclusive_lock/PreReleaseRequest.h b/src/librbd/exclusive_lock/PreReleaseRequest.h
new file mode 100644
index 000000000..426337943
--- /dev/null
+++ b/src/librbd/exclusive_lock/PreReleaseRequest.h
@@ -0,0 +1,139 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_EXCLUSIVE_LOCK_PRE_RELEASE_REQUEST_H
+#define CEPH_LIBRBD_EXCLUSIVE_LOCK_PRE_RELEASE_REQUEST_H
+
+#include "librbd/ImageCtx.h"
+#include <string>
+
+class AsyncOpTracker;
+class Context;
+
+namespace librbd {
+
+struct ImageCtx;
+
+namespace exclusive_lock {
+
+template <typename> struct ImageDispatch;
+
+template <typename ImageCtxT = ImageCtx>
+class PreReleaseRequest {
+public:
+ static PreReleaseRequest* create(ImageCtxT &image_ctx,
+ ImageDispatch<ImageCtxT>* image_dispatch,
+ bool shutting_down,
+ AsyncOpTracker &async_op_tracker,
+ Context *on_finish);
+
+ ~PreReleaseRequest();
+ void send();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * CANCEL_OP_REQUESTS
+ * |
+ * v
+ * SET_REQUIRE_LOCK
+ * |
+ * v
+ * WAIT_FOR_OPS
+ * |
+ * v
+ * PREPARE_LOCK
+ * |
+ * v
+ * PROCESS_PLUGIN_RELEASE
+ * |
+ * v
+ * SHUT_DOWN_IMAGE_CACHE
+ * |
+ * v
+ * INVALIDATE_CACHE
+ * |
+ * v
+ * FLUSH_IO
+ * |
+ * v
+ * FLUSH_NOTIFIES . . . . . . . . . . . . . .
+ * | .
+ * v .
+ * CLOSE_JOURNAL .
+ * | (journal disabled, .
+ * v object map enabled) .
+ * CLOSE_OBJECT_MAP < . . . . . . . . . . . .
+ * | .
+ * v (object map disabled) .
+ * <finish> < . . . . . . . . . . . . . . . . .
+ *
+ * @endverbatim
+ */
+
+ PreReleaseRequest(ImageCtxT &image_ctx,
+ ImageDispatch<ImageCtxT>* image_dispatch,
+ bool shutting_down, AsyncOpTracker &async_op_tracker,
+ Context *on_finish);
+
+ ImageCtxT &m_image_ctx;
+ ImageDispatch<ImageCtxT>* m_image_dispatch;
+ bool m_shutting_down;
+ AsyncOpTracker &m_async_op_tracker;
+ Context *m_on_finish;
+
+ int m_error_result = 0;
+
+ decltype(m_image_ctx.object_map) m_object_map = nullptr;
+ decltype(m_image_ctx.journal) m_journal = nullptr;
+
+ void send_cancel_op_requests();
+ void handle_cancel_op_requests(int r);
+
+ void send_set_require_lock();
+ void handle_set_require_lock(int r);
+
+ void send_wait_for_ops();
+ void handle_wait_for_ops(int r);
+
+ void send_prepare_lock();
+ void handle_prepare_lock(int r);
+
+ void send_process_plugin_release_lock();
+ void handle_process_plugin_release_lock(int r);
+
+ void send_invalidate_cache();
+ void handle_invalidate_cache(int r);
+
+ void send_flush_io();
+ void handle_flush_io(int r);
+
+ void send_flush_notifies();
+ void handle_flush_notifies(int r);
+
+ void send_close_journal();
+ void handle_close_journal(int r);
+
+ void send_close_object_map();
+ void handle_close_object_map(int r);
+
+ void send_unlock();
+
+ void finish();
+
+ void save_result(int result) {
+ if (m_error_result == 0 && result < 0) {
+ m_error_result = result;
+ }
+ }
+
+};
+
+} // namespace exclusive_lock
+} // namespace librbd
+
+#endif // CEPH_LIBRBD_EXCLUSIVE_LOCK_PRE_RELEASE_REQUEST_H
diff --git a/src/librbd/exclusive_lock/StandardPolicy.cc b/src/librbd/exclusive_lock/StandardPolicy.cc
new file mode 100644
index 000000000..519e9618e
--- /dev/null
+++ b/src/librbd/exclusive_lock/StandardPolicy.cc
@@ -0,0 +1,29 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/exclusive_lock/StandardPolicy.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ExclusiveLock.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::ExclusiveLock::StandardPolicy "
+
+namespace librbd {
+namespace exclusive_lock {
+
+template <typename I>
+int StandardPolicy<I>::lock_requested(bool force) {
+ ceph_assert(ceph_mutex_is_locked(m_image_ctx->owner_lock));
+ ceph_assert(m_image_ctx->exclusive_lock != nullptr);
+
+ ldout(m_image_ctx->cct, 20) << this << " " << __func__ << ": force=" << force
+ << dendl;
+
+ return -EROFS;
+}
+
+} // namespace exclusive_lock
+} // namespace librbd
+
+template class librbd::exclusive_lock::StandardPolicy<librbd::ImageCtx>;
diff --git a/src/librbd/exclusive_lock/StandardPolicy.h b/src/librbd/exclusive_lock/StandardPolicy.h
new file mode 100644
index 000000000..dd4e19050
--- /dev/null
+++ b/src/librbd/exclusive_lock/StandardPolicy.h
@@ -0,0 +1,37 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_EXCLUSIVE_LOCK_STANDARD_POLICY_H
+#define CEPH_LIBRBD_EXCLUSIVE_LOCK_STANDARD_POLICY_H
+
+#include "librbd/exclusive_lock/Policy.h"
+
+namespace librbd {
+
+struct ImageCtx;
+
+namespace exclusive_lock {
+
+template <typename ImageCtxT = ImageCtx>
+class StandardPolicy : public Policy {
+public:
+ StandardPolicy(ImageCtxT* image_ctx) : m_image_ctx(image_ctx) {
+ }
+
+ bool may_auto_request_lock() override {
+ return false;
+ }
+
+ int lock_requested(bool force) override;
+
+private:
+ ImageCtxT* m_image_ctx;
+
+};
+
+} // namespace exclusive_lock
+} // namespace librbd
+
+extern template class librbd::exclusive_lock::StandardPolicy<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_EXCLUSIVE_LOCK_STANDARD_POLICY_H