summaryrefslogtreecommitdiffstats
path: root/src/librbd/object_map
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 18:45:59 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 18:45:59 +0000
commit19fcec84d8d7d21e796c7624e521b60d28ee21ed (patch)
tree42d26aa27d1e3f7c0b8bd3fd14e7d7082f5008dc /src/librbd/object_map
parentInitial commit. (diff)
downloadceph-upstream/16.2.11+ds.tar.xz
ceph-upstream/16.2.11+ds.zip
Adding upstream version 16.2.11+ds.upstream/16.2.11+dsupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/librbd/object_map')
-rw-r--r--src/librbd/object_map/CreateRequest.cc94
-rw-r--r--src/librbd/object_map/CreateRequest.h59
-rw-r--r--src/librbd/object_map/DiffRequest.cc258
-rw-r--r--src/librbd/object_map/DiffRequest.h87
-rw-r--r--src/librbd/object_map/InvalidateRequest.cc83
-rw-r--r--src/librbd/object_map/InvalidateRequest.h45
-rw-r--r--src/librbd/object_map/LockRequest.cc157
-rw-r--r--src/librbd/object_map/LockRequest.h75
-rw-r--r--src/librbd/object_map/RefreshRequest.cc311
-rw-r--r--src/librbd/object_map/RefreshRequest.h102
-rw-r--r--src/librbd/object_map/RemoveRequest.cc88
-rw-r--r--src/librbd/object_map/RemoveRequest.h63
-rw-r--r--src/librbd/object_map/Request.cc74
-rw-r--r--src/librbd/object_map/Request.h66
-rw-r--r--src/librbd/object_map/ResizeRequest.cc65
-rw-r--r--src/librbd/object_map/ResizeRequest.h52
-rw-r--r--src/librbd/object_map/SnapshotCreateRequest.cc147
-rw-r--r--src/librbd/object_map/SnapshotCreateRequest.h80
-rw-r--r--src/librbd/object_map/SnapshotRemoveRequest.cc227
-rw-r--r--src/librbd/object_map/SnapshotRemoveRequest.h88
-rw-r--r--src/librbd/object_map/SnapshotRollbackRequest.cc131
-rw-r--r--src/librbd/object_map/SnapshotRollbackRequest.h74
-rw-r--r--src/librbd/object_map/Types.h20
-rw-r--r--src/librbd/object_map/UnlockRequest.cc66
-rw-r--r--src/librbd/object_map/UnlockRequest.h47
-rw-r--r--src/librbd/object_map/UpdateRequest.cc129
-rw-r--r--src/librbd/object_map/UpdateRequest.h106
27 files changed, 2794 insertions, 0 deletions
diff --git a/src/librbd/object_map/CreateRequest.cc b/src/librbd/object_map/CreateRequest.cc
new file mode 100644
index 000000000..d26f929fa
--- /dev/null
+++ b/src/librbd/object_map/CreateRequest.cc
@@ -0,0 +1,94 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/object_map/CreateRequest.h"
+#include "include/ceph_assert.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "osdc/Striper.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ObjectMap.h"
+#include "librbd/Utils.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::object_map::CreateRequest: "
+
+namespace librbd {
+namespace object_map {
+
+using util::create_context_callback;
+using util::create_rados_callback;
+
+template <typename I>
+CreateRequest<I>::CreateRequest(I *image_ctx, Context *on_finish)
+ : m_image_ctx(image_ctx), m_on_finish(on_finish) {
+}
+
+template <typename I>
+void CreateRequest<I>::send() {
+ CephContext *cct = m_image_ctx->cct;
+
+ uint64_t max_size = m_image_ctx->size;
+
+ {
+ std::unique_lock image_locker{m_image_ctx->image_lock};
+ m_snap_ids.push_back(CEPH_NOSNAP);
+ for (auto it : m_image_ctx->snap_info) {
+ max_size = std::max(max_size, it.second.size);
+ m_snap_ids.push_back(it.first);
+ }
+
+ if (ObjectMap<>::is_compatible(m_image_ctx->layout, max_size)) {
+ send_object_map_resize();
+ return;
+ }
+ }
+
+ lderr(cct) << "image size not compatible with object map" << dendl;
+ m_on_finish->complete(-EINVAL);
+}
+
+template <typename I>
+void CreateRequest<I>::send_object_map_resize() {
+ CephContext *cct = m_image_ctx->cct;
+ ldout(cct, 20) << __func__ << dendl;
+
+ Context *ctx = create_context_callback<
+ CreateRequest<I>, &CreateRequest<I>::handle_object_map_resize>(this);
+ C_Gather *gather_ctx = new C_Gather(cct, ctx);
+
+ for (auto snap_id : m_snap_ids) {
+ librados::ObjectWriteOperation op;
+ uint64_t snap_size = m_image_ctx->get_image_size(snap_id);
+
+ cls_client::object_map_resize(&op, Striper::get_num_objects(
+ m_image_ctx->layout, snap_size),
+ OBJECT_NONEXISTENT);
+
+ std::string oid(ObjectMap<>::object_map_name(m_image_ctx->id, snap_id));
+ librados::AioCompletion *comp = create_rados_callback(gather_ctx->new_sub());
+ int r = m_image_ctx->md_ctx.aio_operate(oid, comp, &op);
+ ceph_assert(r == 0);
+ comp->release();
+ }
+ gather_ctx->activate();
+}
+
+template <typename I>
+Context *CreateRequest<I>::handle_object_map_resize(int *result) {
+ CephContext *cct = m_image_ctx->cct;
+ ldout(cct, 20) << __func__ << ": r=" << *result << dendl;
+
+ if (*result < 0) {
+ lderr(cct) << "object map resize failed: " << cpp_strerror(*result)
+ << dendl;
+ }
+ return m_on_finish;
+}
+
+} // namespace object_map
+} // namespace librbd
+
+template class librbd::object_map::CreateRequest<librbd::ImageCtx>;
diff --git a/src/librbd/object_map/CreateRequest.h b/src/librbd/object_map/CreateRequest.h
new file mode 100644
index 000000000..33984cda1
--- /dev/null
+++ b/src/librbd/object_map/CreateRequest.h
@@ -0,0 +1,59 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_OBJECT_MAP_CREATE_REQUEST_H
+#define CEPH_LIBRBD_OBJECT_MAP_CREATE_REQUEST_H
+
+#include "include/buffer.h"
+#include <map>
+#include <string>
+
+class Context;
+
+namespace librbd {
+
+class ImageCtx;
+
+namespace object_map {
+
+template <typename ImageCtxT = ImageCtx>
+class CreateRequest {
+public:
+ static CreateRequest *create(ImageCtxT *image_ctx, Context *on_finish) {
+ return new CreateRequest(image_ctx, on_finish);
+ }
+
+ void send();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * | . . .
+ * v v .
+ * OBJECT_MAP_RESIZE . (for every snapshot)
+ * | . .
+ * v . . .
+ * <finis>
+ *
+ * @endverbatim
+ */
+
+ CreateRequest(ImageCtxT *image_ctx, Context *on_finish);
+
+ ImageCtxT *m_image_ctx;
+ Context *m_on_finish;
+
+ std::vector<uint64_t> m_snap_ids;
+
+ void send_object_map_resize();
+ Context *handle_object_map_resize(int *result);
+};
+
+} // namespace object_map
+} // namespace librbd
+
+extern template class librbd::object_map::CreateRequest<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_OBJECT_MAP_CREATE_REQUEST_H
diff --git a/src/librbd/object_map/DiffRequest.cc b/src/librbd/object_map/DiffRequest.cc
new file mode 100644
index 000000000..566e98ac0
--- /dev/null
+++ b/src/librbd/object_map/DiffRequest.cc
@@ -0,0 +1,258 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/object_map/DiffRequest.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ObjectMap.h"
+#include "librbd/Utils.h"
+#include "osdc/Striper.h"
+#include <string>
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::object_map::DiffRequest: " \
+ << this << " " << __func__ << ": "
+
+namespace librbd {
+namespace object_map {
+
+using util::create_rados_callback;
+
+template <typename I>
+void DiffRequest<I>::send() {
+ auto cct = m_image_ctx->cct;
+
+ if (m_snap_id_start == CEPH_NOSNAP || m_snap_id_start > m_snap_id_end) {
+ lderr(cct) << "invalid start/end snap ids: "
+ << "snap_id_start=" << m_snap_id_start << ", "
+ << "snap_id_end=" << m_snap_id_end << dendl;
+ finish(-EINVAL);
+ return;
+ } else if (m_snap_id_start == m_snap_id_end) {
+ // no delta between the same snapshot
+ finish(0);
+ return;
+ }
+
+ m_object_diff_state->clear();
+
+ // collect all the snap ids in the provided range (inclusive)
+ if (m_snap_id_start != 0) {
+ m_snap_ids.insert(m_snap_id_start);
+ }
+
+ std::shared_lock image_locker{m_image_ctx->image_lock};
+ auto snap_info_it = m_image_ctx->snap_info.upper_bound(m_snap_id_start);
+ auto snap_info_it_end = m_image_ctx->snap_info.lower_bound(m_snap_id_end);
+ for (; snap_info_it != snap_info_it_end; ++snap_info_it) {
+ m_snap_ids.insert(snap_info_it->first);
+ }
+ m_snap_ids.insert(m_snap_id_end);
+
+ load_object_map(&image_locker);
+}
+
+template <typename I>
+void DiffRequest<I>::load_object_map(
+ std::shared_lock<ceph::shared_mutex>* image_locker) {
+ ceph_assert(ceph_mutex_is_locked(m_image_ctx->image_lock));
+
+ if (m_snap_ids.empty()) {
+ image_locker->unlock();
+
+ finish(0);
+ return;
+ }
+
+ m_current_snap_id = *m_snap_ids.begin();
+ m_snap_ids.erase(m_current_snap_id);
+
+ auto cct = m_image_ctx->cct;
+ ldout(cct, 10) << "snap_id=" << m_current_snap_id << dendl;
+
+ if ((m_image_ctx->features & RBD_FEATURE_FAST_DIFF) == 0) {
+ image_locker->unlock();
+
+ ldout(cct, 10) << "fast-diff feature not enabled" << dendl;
+ finish(-EINVAL);
+ return;
+ }
+
+ // ignore ENOENT with intermediate snapshots since deleted
+ // snaps will get merged with later snapshots
+ m_ignore_enoent = (m_current_snap_id != m_snap_id_start &&
+ m_current_snap_id != m_snap_id_end);
+
+ if (m_current_snap_id == CEPH_NOSNAP) {
+ m_current_size = m_image_ctx->size;
+ } else {
+ auto snap_it = m_image_ctx->snap_info.find(m_current_snap_id);
+ if (snap_it == m_image_ctx->snap_info.end()) {
+ ldout(cct, 10) << "snapshot " << m_current_snap_id << " does not exist"
+ << dendl;
+ if (!m_ignore_enoent) {
+ image_locker->unlock();
+
+ finish(-ENOENT);
+ return;
+ }
+
+ load_object_map(image_locker);
+ return;
+ }
+
+ m_current_size = snap_it->second.size;
+ }
+
+ uint64_t flags = 0;
+ int r = m_image_ctx->get_flags(m_current_snap_id, &flags);
+ if (r < 0) {
+ image_locker->unlock();
+
+ lderr(cct) << "failed to retrieve image flags: " << cpp_strerror(r)
+ << dendl;
+ finish(r);
+ return;
+ }
+ image_locker->unlock();
+
+ if ((flags & RBD_FLAG_FAST_DIFF_INVALID) != 0) {
+ ldout(cct, 1) << "cannot perform fast diff on invalid object map"
+ << dendl;
+ finish(-EINVAL);
+ return;
+ }
+
+ std::string oid(ObjectMap<>::object_map_name(m_image_ctx->id,
+ m_current_snap_id));
+
+ librados::ObjectReadOperation op;
+ cls_client::object_map_load_start(&op);
+
+ m_out_bl.clear();
+ auto aio_comp = create_rados_callback<
+ DiffRequest<I>, &DiffRequest<I>::handle_load_object_map>(this);
+ r = m_image_ctx->md_ctx.aio_operate(oid, aio_comp, &op, &m_out_bl);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void DiffRequest<I>::handle_load_object_map(int r) {
+ auto cct = m_image_ctx->cct;
+ ldout(cct, 10) << "r=" << r << dendl;
+
+ if (r == 0) {
+ auto bl_it = m_out_bl.cbegin();
+ r = cls_client::object_map_load_finish(&bl_it, &m_object_map);
+ }
+
+ std::string oid(ObjectMap<>::object_map_name(m_image_ctx->id,
+ m_current_snap_id));
+ if (r == -ENOENT && m_ignore_enoent) {
+ ldout(cct, 10) << "object map " << oid << " does not exist" << dendl;
+
+ std::shared_lock image_locker{m_image_ctx->image_lock};
+ load_object_map(&image_locker);
+ return;
+ } else if (r < 0) {
+ lderr(cct) << "failed to load object map: " << oid << dendl;
+ finish(r);
+ return;
+ }
+ ldout(cct, 20) << "loaded object map " << oid << dendl;
+
+ uint64_t num_objs = Striper::get_num_objects(m_image_ctx->layout,
+ m_current_size);
+ if (m_object_map.size() < num_objs) {
+ ldout(cct, 1) << "object map too small: "
+ << m_object_map.size() << " < " << num_objs << dendl;
+ finish(-EINVAL);
+ return;
+ } else {
+ m_object_map.resize(num_objs);
+ }
+
+ size_t prev_object_diff_state_size = m_object_diff_state->size();
+ if (prev_object_diff_state_size < num_objs) {
+ // the diff state should be the largest of all snapshots in the set
+ m_object_diff_state->resize(num_objs);
+ }
+ if (m_object_map.size() < m_object_diff_state->size()) {
+ // the image was shrunk so expanding the object map will flag end objects
+ // as non-existent and they will be compared against the previous object
+ // diff state
+ m_object_map.resize(m_object_diff_state->size());
+ }
+
+ uint64_t overlap = std::min(m_object_map.size(), prev_object_diff_state_size);
+ auto it = m_object_map.begin();
+ auto overlap_end_it = it + overlap;
+ auto diff_it = m_object_diff_state->begin();
+ uint64_t i = 0;
+ for (; it != overlap_end_it; ++it, ++diff_it, ++i) {
+ uint8_t object_map_state = *it;
+ uint8_t prev_object_diff_state = *diff_it;
+ if (object_map_state == OBJECT_EXISTS ||
+ object_map_state == OBJECT_PENDING ||
+ (object_map_state == OBJECT_EXISTS_CLEAN &&
+ prev_object_diff_state != DIFF_STATE_DATA &&
+ prev_object_diff_state != DIFF_STATE_DATA_UPDATED)) {
+ *diff_it = DIFF_STATE_DATA_UPDATED;
+ } else if (object_map_state == OBJECT_NONEXISTENT &&
+ prev_object_diff_state != DIFF_STATE_HOLE &&
+ prev_object_diff_state != DIFF_STATE_HOLE_UPDATED) {
+ *diff_it = DIFF_STATE_HOLE_UPDATED;
+ }
+
+ ldout(cct, 20) << "object state: " << i << " "
+ << static_cast<uint32_t>(prev_object_diff_state)
+ << "->" << static_cast<uint32_t>(*diff_it) << " ("
+ << static_cast<uint32_t>(object_map_state) << ")"
+ << dendl;
+ }
+ ldout(cct, 20) << "computed overlap diffs" << dendl;
+
+ bool diff_from_start = (m_snap_id_start == 0);
+ auto end_it = m_object_map.end();
+ if (m_object_map.size() > prev_object_diff_state_size) {
+ for (; it != end_it; ++it,++diff_it, ++i) {
+ uint8_t object_map_state = *it;
+ if (object_map_state == OBJECT_NONEXISTENT) {
+ *diff_it = DIFF_STATE_HOLE;
+ } else if (diff_from_start ||
+ (m_object_diff_state_valid &&
+ object_map_state != OBJECT_EXISTS_CLEAN)) {
+ *diff_it = DIFF_STATE_DATA_UPDATED;
+ } else {
+ *diff_it = DIFF_STATE_DATA;
+ }
+
+ ldout(cct, 20) << "object state: " << i << " "
+ << "->" << static_cast<uint32_t>(*diff_it) << " ("
+ << static_cast<uint32_t>(*it) << ")" << dendl;
+ }
+ }
+ ldout(cct, 20) << "computed resize diffs" << dendl;
+
+ m_object_diff_state_valid = true;
+
+ std::shared_lock image_locker{m_image_ctx->image_lock};
+ load_object_map(&image_locker);
+}
+
+template <typename I>
+void DiffRequest<I>::finish(int r) {
+ auto cct = m_image_ctx->cct;
+ ldout(cct, 10) << "r=" << r << dendl;
+
+ m_on_finish->complete(r);
+ delete this;
+}
+
+} // namespace object_map
+} // namespace librbd
+
+template class librbd::object_map::DiffRequest<librbd::ImageCtx>;
diff --git a/src/librbd/object_map/DiffRequest.h b/src/librbd/object_map/DiffRequest.h
new file mode 100644
index 000000000..e83a1629e
--- /dev/null
+++ b/src/librbd/object_map/DiffRequest.h
@@ -0,0 +1,87 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_OBJECT_MAP_DIFF_REQUEST_H
+#define CEPH_LIBRBD_OBJECT_MAP_DIFF_REQUEST_H
+
+#include "include/int_types.h"
+#include "common/bit_vector.hpp"
+#include "common/ceph_mutex.h"
+#include "librbd/object_map/Types.h"
+#include <set>
+
+struct Context;
+
+namespace librbd {
+
+struct ImageCtx;
+
+namespace object_map {
+
+template <typename ImageCtxT>
+class DiffRequest {
+public:
+ static DiffRequest* create(ImageCtxT* image_ctx, uint64_t snap_id_start,
+ uint64_t snap_id_end,
+ BitVector<2>* object_diff_state,
+ Context* on_finish) {
+ return new DiffRequest(image_ctx, snap_id_start, snap_id_end,
+ object_diff_state, on_finish);
+ }
+
+ DiffRequest(ImageCtxT* image_ctx, uint64_t snap_id_start,
+ uint64_t snap_id_end, BitVector<2>* object_diff_state,
+ Context* on_finish)
+ : m_image_ctx(image_ctx), m_snap_id_start(snap_id_start),
+ m_snap_id_end(snap_id_end), m_object_diff_state(object_diff_state),
+ m_on_finish(on_finish) {
+ }
+
+ void send();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * | /---------\
+ * | | |
+ * v v |
+ * LOAD_OBJECT_MAP ---/
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+ ImageCtxT* m_image_ctx;
+ uint64_t m_snap_id_start;
+ uint64_t m_snap_id_end;
+ BitVector<2>* m_object_diff_state;
+ Context* m_on_finish;
+
+ std::set<uint64_t> m_snap_ids;
+ uint64_t m_current_snap_id = 0;
+ bool m_ignore_enoent = false;
+
+ uint64_t m_current_size = 0;
+
+ BitVector<2> m_object_map;
+ bool m_object_diff_state_valid = false;
+
+ bufferlist m_out_bl;
+
+ void load_object_map(std::shared_lock<ceph::shared_mutex>* image_locker);
+ void handle_load_object_map(int r);
+
+ void finish(int r);
+
+};
+
+} // namespace object_map
+} // namespace librbd
+
+extern template class librbd::object_map::DiffRequest<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_OBJECT_MAP_DIFF_REQUEST_H
diff --git a/src/librbd/object_map/InvalidateRequest.cc b/src/librbd/object_map/InvalidateRequest.cc
new file mode 100644
index 000000000..bf2db9660
--- /dev/null
+++ b/src/librbd/object_map/InvalidateRequest.cc
@@ -0,0 +1,83 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/object_map/InvalidateRequest.h"
+#include "common/dout.h"
+#include "librbd/ExclusiveLock.h"
+#include "librbd/ImageCtx.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::object_map::InvalidateRequest: "
+
+namespace librbd {
+namespace object_map {
+
+template <typename I>
+InvalidateRequest<I>* InvalidateRequest<I>::create(I &image_ctx,
+ uint64_t snap_id, bool force,
+ Context *on_finish) {
+ return new InvalidateRequest<I>(image_ctx, snap_id, force, on_finish);
+}
+
+template <typename I>
+void InvalidateRequest<I>::send() {
+ I &image_ctx = this->m_image_ctx;
+ ceph_assert(ceph_mutex_is_locked(image_ctx.owner_lock));
+ ceph_assert(ceph_mutex_is_wlocked(image_ctx.image_lock));
+
+ uint64_t snap_flags;
+ int r = image_ctx.get_flags(m_snap_id, &snap_flags);
+ if (r < 0 || ((snap_flags & RBD_FLAG_OBJECT_MAP_INVALID) != 0)) {
+ this->async_complete(r);
+ return;
+ }
+
+ CephContext *cct = image_ctx.cct;
+ lderr(cct) << this << " invalidating object map in-memory" << dendl;
+
+ // update in-memory flags
+ uint64_t flags = RBD_FLAG_OBJECT_MAP_INVALID;
+ if ((image_ctx.features & RBD_FEATURE_FAST_DIFF) != 0) {
+ flags |= RBD_FLAG_FAST_DIFF_INVALID;
+ }
+
+ r = image_ctx.update_flags(m_snap_id, flags, true);
+ if (r < 0) {
+ this->async_complete(r);
+ return;
+ }
+
+ // do not update on-disk flags if not image owner
+ if (image_ctx.image_watcher == nullptr ||
+ (!m_force && m_snap_id == CEPH_NOSNAP &&
+ image_ctx.exclusive_lock != nullptr &&
+ !image_ctx.exclusive_lock->is_lock_owner())) {
+ this->async_complete(-EROFS);
+ return;
+ }
+
+ lderr(cct) << this << " invalidating object map on-disk" << dendl;
+ librados::ObjectWriteOperation op;
+ cls_client::set_flags(&op, m_snap_id, flags, flags);
+
+ librados::AioCompletion *rados_completion =
+ this->create_callback_completion();
+ r = image_ctx.md_ctx.aio_operate(image_ctx.header_oid, rados_completion,
+ &op);
+ ceph_assert(r == 0);
+ rados_completion->release();
+}
+
+template <typename I>
+bool InvalidateRequest<I>::should_complete(int r) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ lderr(cct) << this << " " << __func__ << ": r=" << r << dendl;
+ return true;
+}
+
+} // namespace object_map
+} // namespace librbd
+
+template class librbd::object_map::InvalidateRequest<librbd::ImageCtx>;
diff --git a/src/librbd/object_map/InvalidateRequest.h b/src/librbd/object_map/InvalidateRequest.h
new file mode 100644
index 000000000..ce15bb2d3
--- /dev/null
+++ b/src/librbd/object_map/InvalidateRequest.h
@@ -0,0 +1,45 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_OBJECT_MAP_INVALIDATE_REQUEST_H
+#define CEPH_LIBRBD_OBJECT_MAP_INVALIDATE_REQUEST_H
+
+#include "include/int_types.h"
+#include "librbd/AsyncRequest.h"
+
+class Context;
+
+namespace librbd {
+
+class ImageCtx;
+
+namespace object_map {
+
+template <typename ImageCtxT = ImageCtx>
+class InvalidateRequest : public AsyncRequest<ImageCtxT> {
+public:
+ static InvalidateRequest* create(ImageCtxT &image_ctx, uint64_t snap_id,
+ bool force, Context *on_finish);
+
+ InvalidateRequest(ImageCtxT &image_ctx, uint64_t snap_id, bool force,
+ Context *on_finish)
+ : AsyncRequest<ImageCtxT>(image_ctx, on_finish),
+ m_snap_id(snap_id), m_force(force) {
+ }
+
+ void send() override;
+
+protected:
+ bool should_complete(int r) override;
+
+private:
+ uint64_t m_snap_id;
+ bool m_force;
+};
+
+} // namespace object_map
+} // namespace librbd
+
+extern template class librbd::object_map::InvalidateRequest<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_OBJECT_MAP_INVALIDATE_REQUEST_H
diff --git a/src/librbd/object_map/LockRequest.cc b/src/librbd/object_map/LockRequest.cc
new file mode 100644
index 000000000..b9dc3c42e
--- /dev/null
+++ b/src/librbd/object_map/LockRequest.cc
@@ -0,0 +1,157 @@
+// -*- mode:C; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/object_map/LockRequest.h"
+#include "cls/lock/cls_lock_client.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ObjectMap.h"
+#include "librbd/Utils.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::object_map::LockRequest: "
+
+namespace librbd {
+namespace object_map {
+
+using util::create_rados_callback;
+
+template <typename I>
+LockRequest<I>::LockRequest(I &image_ctx, Context *on_finish)
+ : m_image_ctx(image_ctx), m_on_finish(on_finish), m_broke_lock(false) {
+}
+
+template <typename I>
+void LockRequest<I>::send() {
+ send_lock();
+}
+
+template <typename I>
+void LockRequest<I>::send_lock() {
+ CephContext *cct = m_image_ctx.cct;
+ std::string oid(ObjectMap<>::object_map_name(m_image_ctx.id, CEPH_NOSNAP));
+ ldout(cct, 10) << this << " " << __func__ << ": oid=" << oid << dendl;
+
+ librados::ObjectWriteOperation op;
+ rados::cls::lock::lock(&op, RBD_LOCK_NAME, ClsLockType::EXCLUSIVE, "", "", "",
+ utime_t(), 0);
+
+ using klass = LockRequest<I>;
+ librados::AioCompletion *rados_completion =
+ create_rados_callback<klass, &klass::handle_lock>(this);
+ int r = m_image_ctx.md_ctx.aio_operate(oid, rados_completion, &op);
+ ceph_assert(r == 0);
+ rados_completion->release();
+}
+
+template <typename I>
+Context *LockRequest<I>::handle_lock(int *ret_val) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 10) << this << " " << __func__ << ": r=" << *ret_val << dendl;
+
+ if (*ret_val == 0) {
+ return m_on_finish;
+ } else if (*ret_val == -EEXIST) {
+ // already locked by myself
+ *ret_val = 0;
+ return m_on_finish;
+ } else if (m_broke_lock || *ret_val != -EBUSY) {
+ lderr(cct) << "failed to lock object map: " << cpp_strerror(*ret_val)
+ << dendl;
+ *ret_val = 0;
+ return m_on_finish;
+ }
+
+ send_get_lock_info();
+ return nullptr;
+}
+
+template <typename I>
+void LockRequest<I>::send_get_lock_info() {
+ CephContext *cct = m_image_ctx.cct;
+ std::string oid(ObjectMap<>::object_map_name(m_image_ctx.id, CEPH_NOSNAP));
+ ldout(cct, 10) << this << " " << __func__ << ": oid=" << oid << dendl;
+
+ librados::ObjectReadOperation op;
+ rados::cls::lock::get_lock_info_start(&op, RBD_LOCK_NAME);
+
+ using klass = LockRequest<I>;
+ librados::AioCompletion *rados_completion =
+ create_rados_callback<klass, &klass::handle_get_lock_info>(this);
+ int r = m_image_ctx.md_ctx.aio_operate(oid, rados_completion, &op, &m_out_bl);
+ ceph_assert(r == 0);
+ rados_completion->release();
+}
+
+template <typename I>
+Context *LockRequest<I>::handle_get_lock_info(int *ret_val) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 10) << this << " " << __func__ << ": r=" << *ret_val << dendl;
+
+ if (*ret_val == -ENOENT) {
+ send_lock();
+ return nullptr;
+ }
+
+ ClsLockType lock_type;
+ std::string lock_tag;
+ if (*ret_val == 0) {
+ auto it = m_out_bl.cbegin();
+ *ret_val = rados::cls::lock::get_lock_info_finish(&it, &m_lockers,
+ &lock_type, &lock_tag);
+ }
+ if (*ret_val < 0) {
+ lderr(cct) << "failed to list object map locks: " << cpp_strerror(*ret_val)
+ << dendl;
+ *ret_val = 0;
+ return m_on_finish;
+ }
+
+ send_break_locks();
+ return nullptr;
+}
+
+template <typename I>
+void LockRequest<I>::send_break_locks() {
+ CephContext *cct = m_image_ctx.cct;
+ std::string oid(ObjectMap<>::object_map_name(m_image_ctx.id, CEPH_NOSNAP));
+ ldout(cct, 10) << this << " " << __func__ << ": oid=" << oid << ", "
+ << "num_lockers=" << m_lockers.size() << dendl;
+
+ librados::ObjectWriteOperation op;
+ for (auto &locker : m_lockers) {
+ rados::cls::lock::break_lock(&op, RBD_LOCK_NAME, locker.first.cookie,
+ locker.first.locker);
+ }
+
+ using klass = LockRequest<I>;
+ librados::AioCompletion *rados_completion =
+ create_rados_callback<klass, &klass::handle_break_locks>(this);
+ int r = m_image_ctx.md_ctx.aio_operate(oid, rados_completion, &op);
+ ceph_assert(r == 0);
+ rados_completion->release();
+}
+
+template <typename I>
+Context *LockRequest<I>::handle_break_locks(int *ret_val) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 10) << this << " " << __func__ << ": r=" << *ret_val << dendl;
+
+ m_broke_lock = true;
+ if (*ret_val == 0 || *ret_val == -ENOENT) {
+ send_lock();
+ return nullptr;
+ }
+
+ lderr(cct) << "failed to break object map lock: " << cpp_strerror(*ret_val)
+ << dendl;
+ *ret_val = 0;
+ return m_on_finish;
+}
+
+} // namespace object_map
+} // namespace librbd
+
+template class librbd::object_map::LockRequest<librbd::ImageCtx>;
diff --git a/src/librbd/object_map/LockRequest.h b/src/librbd/object_map/LockRequest.h
new file mode 100644
index 000000000..0333548e6
--- /dev/null
+++ b/src/librbd/object_map/LockRequest.h
@@ -0,0 +1,75 @@
+// -*- mode:C; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_OBJECT_MAP_LOCK_REQUEST_H
+#define CEPH_LIBRBD_OBJECT_MAP_LOCK_REQUEST_H
+
+#include "include/buffer.h"
+#include "cls/lock/cls_lock_types.h"
+#include <map>
+
+class Context;
+
+namespace librbd {
+
+class ImageCtx;
+
+namespace object_map {
+
+template <typename ImageCtxT = ImageCtx>
+class LockRequest {
+public:
+ static LockRequest* create(ImageCtxT &image_ctx, Context *on_finish) {
+ return new LockRequest(image_ctx, on_finish);
+ }
+ LockRequest(ImageCtxT &image_ctx, Context *on_finish);
+
+ void send();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start> /------------------------------------- BREAK_LOCKS * * *
+ * | | ^ *
+ * | | | *
+ * | | | *
+ * | v (EBUSY && !broke_lock) | *
+ * \---------> LOCK_OBJECT_MAP * * * * * * * * * * * > GET_LOCK_INFO * *
+ * | * ^ * *
+ * | * * * *
+ * | * * (ENOENT) * *
+ * | * * * * * * * * * * * * * * * * * *
+ * | * *
+ * | * (other errors) *
+ * | * *
+ * v v (other errors) *
+ * <finish> < * * * * * * * * * * * * * * * * * * * * * * * *
+ *
+ * @endverbatim
+ */
+
+ ImageCtxT &m_image_ctx;
+ Context *m_on_finish;
+
+ bool m_broke_lock;
+ std::map<rados::cls::lock::locker_id_t,
+ rados::cls::lock::locker_info_t> m_lockers;
+ bufferlist m_out_bl;
+
+ void send_lock();
+ Context *handle_lock(int *ret_val);
+
+ void send_get_lock_info();
+ Context *handle_get_lock_info(int *ret_val);
+
+ void send_break_locks();
+ Context *handle_break_locks(int *ret_val);
+};
+
+} // namespace object_map
+} // namespace librbd
+
+extern template class librbd::object_map::LockRequest<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_OBJECT_MAP_LOCK_REQUEST_H
diff --git a/src/librbd/object_map/RefreshRequest.cc b/src/librbd/object_map/RefreshRequest.cc
new file mode 100644
index 000000000..0f6b81923
--- /dev/null
+++ b/src/librbd/object_map/RefreshRequest.cc
@@ -0,0 +1,311 @@
+// -*- mode:C; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/object_map/RefreshRequest.h"
+#include "cls/lock/cls_lock_client.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ObjectMap.h"
+#include "librbd/object_map/InvalidateRequest.h"
+#include "librbd/object_map/LockRequest.h"
+#include "librbd/object_map/ResizeRequest.h"
+#include "librbd/Utils.h"
+#include "osdc/Striper.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::object_map::RefreshRequest: "
+
+namespace librbd {
+
+using util::create_context_callback;
+using util::create_rados_callback;
+
+namespace object_map {
+
+template <typename I>
+RefreshRequest<I>::RefreshRequest(I &image_ctx, ceph::shared_mutex* object_map_lock,
+ ceph::BitVector<2> *object_map,
+ uint64_t snap_id, Context *on_finish)
+ : m_image_ctx(image_ctx), m_object_map_lock(object_map_lock),
+ m_object_map(object_map), m_snap_id(snap_id), m_on_finish(on_finish),
+ m_object_count(0), m_truncate_on_disk_object_map(false) {
+}
+
+template <typename I>
+void RefreshRequest<I>::send() {
+ {
+ std::shared_lock image_locker{m_image_ctx.image_lock};
+ m_object_count = Striper::get_num_objects(
+ m_image_ctx.layout, m_image_ctx.get_image_size(m_snap_id));
+ }
+
+
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 20) << this << " " << __func__ << ": "
+ << "object_count=" << m_object_count << dendl;
+ send_lock();
+}
+
+template <typename I>
+void RefreshRequest<I>::apply() {
+ uint64_t num_objs;
+ {
+ std::shared_lock image_locker{m_image_ctx.image_lock};
+ num_objs = Striper::get_num_objects(
+ m_image_ctx.layout, m_image_ctx.get_image_size(m_snap_id));
+ }
+ ceph_assert(m_on_disk_object_map.size() >= num_objs);
+
+ std::unique_lock object_map_locker{*m_object_map_lock};
+ *m_object_map = m_on_disk_object_map;
+}
+
+template <typename I>
+void RefreshRequest<I>::send_lock() {
+ CephContext *cct = m_image_ctx.cct;
+ if (m_object_count > cls::rbd::MAX_OBJECT_MAP_OBJECT_COUNT) {
+ send_invalidate_and_close();
+ return;
+ } else if (m_snap_id != CEPH_NOSNAP) {
+ send_load();
+ return;
+ }
+
+ std::string oid(ObjectMap<>::object_map_name(m_image_ctx.id, m_snap_id));
+ ldout(cct, 10) << this << " " << __func__ << ": oid=" << oid << dendl;
+
+ using klass = RefreshRequest<I>;
+ Context *ctx = create_context_callback<
+ klass, &klass::handle_lock>(this);
+
+ LockRequest<I> *req = LockRequest<I>::create(m_image_ctx, ctx);
+ req->send();
+}
+
+template <typename I>
+Context *RefreshRequest<I>::handle_lock(int *ret_val) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 10) << this << " " << __func__ << dendl;
+
+ ceph_assert(*ret_val == 0);
+ send_load();
+ return nullptr;
+}
+
+template <typename I>
+void RefreshRequest<I>::send_load() {
+ CephContext *cct = m_image_ctx.cct;
+ std::string oid(ObjectMap<>::object_map_name(m_image_ctx.id, m_snap_id));
+ ldout(cct, 10) << this << " " << __func__ << ": oid=" << oid << dendl;
+
+ librados::ObjectReadOperation op;
+ cls_client::object_map_load_start(&op);
+
+ using klass = RefreshRequest<I>;
+ m_out_bl.clear();
+ librados::AioCompletion *rados_completion =
+ create_rados_callback<klass, &klass::handle_load>(this);
+ int r = m_image_ctx.md_ctx.aio_operate(oid, rados_completion, &op, &m_out_bl);
+ ceph_assert(r == 0);
+ rados_completion->release();
+}
+
+template <typename I>
+Context *RefreshRequest<I>::handle_load(int *ret_val) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 10) << this << " " << __func__ << ": r=" << *ret_val << dendl;
+
+ if (*ret_val == 0) {
+ auto bl_it = m_out_bl.cbegin();
+ *ret_val = cls_client::object_map_load_finish(&bl_it,
+ &m_on_disk_object_map);
+ }
+
+ std::string oid(ObjectMap<>::object_map_name(m_image_ctx.id, m_snap_id));
+ if (*ret_val == -EINVAL) {
+ // object map is corrupt on-disk -- clear it and properly size it
+ // so future IO can keep the object map in sync
+ lderr(cct) << "object map corrupt on-disk: " << oid << dendl;
+ m_truncate_on_disk_object_map = true;
+ send_resize_invalidate();
+ return nullptr;
+ } else if (*ret_val < 0) {
+ lderr(cct) << "failed to load object map: " << oid << dendl;
+ if (*ret_val == -ETIMEDOUT &&
+ !cct->_conf.get_val<bool>("rbd_invalidate_object_map_on_timeout")) {
+ return m_on_finish;
+ }
+
+ send_invalidate();
+ return nullptr;
+ }
+
+ if (m_on_disk_object_map.size() < m_object_count) {
+ lderr(cct) << "object map smaller than current object count: "
+ << m_on_disk_object_map.size() << " != "
+ << m_object_count << dendl;
+ send_resize_invalidate();
+ return nullptr;
+ }
+
+ ldout(cct, 20) << "refreshed object map: num_objs="
+ << m_on_disk_object_map.size() << dendl;
+ if (m_on_disk_object_map.size() > m_object_count) {
+ // resize op might have been interrupted
+ ldout(cct, 1) << "object map larger than current object count: "
+ << m_on_disk_object_map.size() << " != "
+ << m_object_count << dendl;
+ }
+
+ apply();
+ return m_on_finish;
+}
+
+template <typename I>
+void RefreshRequest<I>::send_invalidate() {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 10) << this << " " << __func__ << dendl;
+
+ m_on_disk_object_map.clear();
+ object_map::ResizeRequest::resize(&m_on_disk_object_map, m_object_count,
+ OBJECT_EXISTS);
+
+ using klass = RefreshRequest<I>;
+ Context *ctx = create_context_callback<
+ klass, &klass::handle_invalidate>(this);
+ InvalidateRequest<I> *req = InvalidateRequest<I>::create(
+ m_image_ctx, m_snap_id, true, ctx);
+
+ std::shared_lock owner_locker{m_image_ctx.owner_lock};
+ std::unique_lock image_locker{m_image_ctx.image_lock};
+ req->send();
+}
+
+template <typename I>
+Context *RefreshRequest<I>::handle_invalidate(int *ret_val) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 10) << this << " " << __func__ << ": r=" << *ret_val << dendl;
+
+ if (*ret_val < 0) {
+ lderr(cct) << "failed to invalidate object map: " << cpp_strerror(*ret_val)
+ << dendl;
+ }
+
+ apply();
+ return m_on_finish;
+}
+
+template <typename I>
+void RefreshRequest<I>::send_resize_invalidate() {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 10) << this << " " << __func__ << dendl;
+
+ m_on_disk_object_map.clear();
+ object_map::ResizeRequest::resize(&m_on_disk_object_map, m_object_count,
+ OBJECT_EXISTS);
+
+ using klass = RefreshRequest<I>;
+ Context *ctx = create_context_callback<
+ klass, &klass::handle_resize_invalidate>(this);
+ InvalidateRequest<I> *req = InvalidateRequest<I>::create(
+ m_image_ctx, m_snap_id, true, ctx);
+
+ std::shared_lock owner_locker{m_image_ctx.owner_lock};
+ std::unique_lock image_locker{m_image_ctx.image_lock};
+ req->send();
+}
+
+template <typename I>
+Context *RefreshRequest<I>::handle_resize_invalidate(int *ret_val) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 10) << this << " " << __func__ << ": r=" << *ret_val << dendl;
+
+ if (*ret_val < 0) {
+ lderr(cct) << "failed to invalidate object map: " << cpp_strerror(*ret_val)
+ << dendl;
+ apply();
+ return m_on_finish;
+ }
+
+ send_resize();
+ return nullptr;
+}
+
+template <typename I>
+void RefreshRequest<I>::send_resize() {
+ CephContext *cct = m_image_ctx.cct;
+ std::string oid(ObjectMap<>::object_map_name(m_image_ctx.id, m_snap_id));
+ ldout(cct, 10) << this << " " << __func__ << ": oid=" << oid << dendl;
+
+ librados::ObjectWriteOperation op;
+ if (m_snap_id == CEPH_NOSNAP) {
+ rados::cls::lock::assert_locked(&op, RBD_LOCK_NAME, ClsLockType::EXCLUSIVE, "", "");
+ }
+ if (m_truncate_on_disk_object_map) {
+ op.truncate(0);
+ }
+ cls_client::object_map_resize(&op, m_object_count, OBJECT_NONEXISTENT);
+
+ using klass = RefreshRequest<I>;
+ librados::AioCompletion *rados_completion =
+ create_rados_callback<klass, &klass::handle_resize>(this);
+ int r = m_image_ctx.md_ctx.aio_operate(oid, rados_completion, &op);
+ ceph_assert(r == 0);
+ rados_completion->release();
+}
+
+template <typename I>
+Context *RefreshRequest<I>::handle_resize(int *ret_val) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 10) << this << " " << __func__ << ": r=" << *ret_val << dendl;
+
+ if (*ret_val < 0) {
+ lderr(cct) << "failed to adjust object map size: " << cpp_strerror(*ret_val)
+ << dendl;
+ *ret_val = 0;
+ }
+
+ apply();
+ return m_on_finish;
+}
+
+template <typename I>
+void RefreshRequest<I>::send_invalidate_and_close() {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 10) << this << " " << __func__ << dendl;
+
+ using klass = RefreshRequest<I>;
+ Context *ctx = create_context_callback<
+ klass, &klass::handle_invalidate_and_close>(this);
+ InvalidateRequest<I> *req = InvalidateRequest<I>::create(
+ m_image_ctx, m_snap_id, false, ctx);
+
+ lderr(cct) << "object map too large: " << m_object_count << dendl;
+ std::shared_lock owner_locker{m_image_ctx.owner_lock};
+ std::unique_lock image_locker{m_image_ctx.image_lock};
+ req->send();
+}
+
+template <typename I>
+Context *RefreshRequest<I>::handle_invalidate_and_close(int *ret_val) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 10) << this << " " << __func__ << ": r=" << *ret_val << dendl;
+
+ if (*ret_val < 0) {
+ lderr(cct) << "failed to invalidate object map: " << cpp_strerror(*ret_val)
+ << dendl;
+ } else {
+ *ret_val = -EFBIG;
+ }
+
+ std::unique_lock object_map_locker{*m_object_map_lock};
+ m_object_map->clear();
+ return m_on_finish;
+}
+
+} // namespace object_map
+} // namespace librbd
+
+template class librbd::object_map::RefreshRequest<librbd::ImageCtx>;
diff --git a/src/librbd/object_map/RefreshRequest.h b/src/librbd/object_map/RefreshRequest.h
new file mode 100644
index 000000000..0bca85079
--- /dev/null
+++ b/src/librbd/object_map/RefreshRequest.h
@@ -0,0 +1,102 @@
+// -*- mode:C; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_OBJECT_MAP_REFRESH_REQUEST_H
+#define CEPH_LIBRBD_OBJECT_MAP_REFRESH_REQUEST_H
+
+#include "include/int_types.h"
+#include "include/buffer.h"
+#include "common/bit_vector.hpp"
+#include "common/ceph_mutex.h"
+
+class Context;
+class RWLock;
+
+namespace librbd {
+
+class ImageCtx;
+
+namespace object_map {
+
+template <typename ImageCtxT = ImageCtx>
+class RefreshRequest {
+public:
+ static RefreshRequest *create(ImageCtxT &image_ctx,
+ ceph::shared_mutex* object_map_lock,
+ ceph::BitVector<2> *object_map,
+ uint64_t snap_id, Context *on_finish) {
+ return new RefreshRequest(image_ctx, object_map_lock, object_map, snap_id,
+ on_finish);
+ }
+
+ RefreshRequest(ImageCtxT &image_ctx, ceph::shared_mutex* object_map_lock,
+ ceph::BitVector<2> *object_map, uint64_t snap_id,
+ Context *on_finish);
+
+ void send();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start> -----> LOCK (skip if snapshot)
+ * * |
+ * * v (other errors)
+ * * LOAD * * * * * * * > INVALIDATE ------------\
+ * * | * |
+ * * | * (-EINVAL or too small) |
+ * * | * * * * * * > INVALIDATE_AND_RESIZE |
+ * * | | * |
+ * * | | * |
+ * * | v * |
+ * * | RESIZE * |
+ * * | | * |
+ * * | | * * * * * * * |
+ * * | | * |
+ * * | v v |
+ * * \--------------------> LOCK <-------------/
+ * * |
+ * v v
+ * INVALIDATE_AND_CLOSE ---------------> <finish>
+ *
+ * @endverbatim
+ */
+
+ ImageCtxT &m_image_ctx;
+ ceph::shared_mutex* m_object_map_lock;
+ ceph::BitVector<2> *m_object_map;
+ uint64_t m_snap_id;
+ Context *m_on_finish;
+
+ uint64_t m_object_count;
+ ceph::BitVector<2> m_on_disk_object_map;
+ bool m_truncate_on_disk_object_map;
+ bufferlist m_out_bl;
+
+ void send_lock();
+ Context *handle_lock(int *ret_val);
+
+ void send_load();
+ Context *handle_load(int *ret_val);
+
+ void send_invalidate();
+ Context *handle_invalidate(int *ret_val);
+
+ void send_resize_invalidate();
+ Context *handle_resize_invalidate(int *ret_val);
+
+ void send_resize();
+ Context *handle_resize(int *ret_val);
+
+ void send_invalidate_and_close();
+ Context *handle_invalidate_and_close(int *ret_val);
+
+ void apply();
+};
+
+} // namespace object_map
+} // namespace librbd
+
+extern template class librbd::object_map::RefreshRequest<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_OBJECT_MAP_REFRESH_REQUEST_H
diff --git a/src/librbd/object_map/RemoveRequest.cc b/src/librbd/object_map/RemoveRequest.cc
new file mode 100644
index 000000000..a718d81fc
--- /dev/null
+++ b/src/librbd/object_map/RemoveRequest.cc
@@ -0,0 +1,88 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/object_map/RemoveRequest.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ObjectMap.h"
+#include "librbd/Utils.h"
+#include "include/ceph_assert.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::object_map::RemoveRequest: "
+
+namespace librbd {
+namespace object_map {
+
+using util::create_rados_callback;
+
+template <typename I>
+RemoveRequest<I>::RemoveRequest(I *image_ctx, Context *on_finish)
+ : m_image_ctx(image_ctx), m_on_finish(on_finish) {
+}
+
+template <typename I>
+void RemoveRequest<I>::send() {
+ send_remove_object_map();
+}
+
+template <typename I>
+void RemoveRequest<I>::send_remove_object_map() {
+ CephContext *cct = m_image_ctx->cct;
+ ldout(cct, 20) << __func__ << dendl;
+
+ std::unique_lock image_locker{m_image_ctx->image_lock};
+ std::vector<uint64_t> snap_ids;
+ snap_ids.push_back(CEPH_NOSNAP);
+ for (auto it : m_image_ctx->snap_info) {
+ snap_ids.push_back(it.first);
+ }
+
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_ref_counter == 0);
+
+ for (auto snap_id : snap_ids) {
+ m_ref_counter++;
+ std::string oid(ObjectMap<>::object_map_name(m_image_ctx->id, snap_id));
+ using klass = RemoveRequest<I>;
+ librados::AioCompletion *comp =
+ create_rados_callback<klass, &klass::handle_remove_object_map>(this);
+
+ int r = m_image_ctx->md_ctx.aio_remove(oid, comp);
+ ceph_assert(r == 0);
+ comp->release();
+ }
+}
+
+template <typename I>
+Context *RemoveRequest<I>::handle_remove_object_map(int *result) {
+ CephContext *cct = m_image_ctx->cct;
+ ldout(cct, 20) << __func__ << ": r=" << *result << dendl;
+
+ {
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_ref_counter > 0);
+ m_ref_counter--;
+
+ if (*result < 0 && *result != -ENOENT) {
+ lderr(cct) << "failed to remove object map: " << cpp_strerror(*result)
+ << dendl;
+ m_error_result = *result;
+ }
+ if (m_ref_counter > 0) {
+ return nullptr;
+ }
+ }
+ if (m_error_result < 0) {
+ *result = m_error_result;
+ }
+ return m_on_finish;
+}
+
+} // namespace object_map
+} // namespace librbd
+
+template class librbd::object_map::RemoveRequest<librbd::ImageCtx>;
diff --git a/src/librbd/object_map/RemoveRequest.h b/src/librbd/object_map/RemoveRequest.h
new file mode 100644
index 000000000..ce82e603c
--- /dev/null
+++ b/src/librbd/object_map/RemoveRequest.h
@@ -0,0 +1,63 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_OBJECT_MAP_REMOVE_REQUEST_H
+#define CEPH_LIBRBD_OBJECT_MAP_REMOVE_REQUEST_H
+
+#include "include/buffer.h"
+#include "common/ceph_mutex.h"
+#include <map>
+#include <string>
+
+class Context;
+
+namespace librbd {
+
+class ImageCtx;
+
+namespace object_map {
+
+template <typename ImageCtxT = ImageCtx>
+class RemoveRequest {
+public:
+ static RemoveRequest *create(ImageCtxT *image_ctx, Context *on_finish) {
+ return new RemoveRequest(image_ctx, on_finish);
+ }
+
+ void send();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * | . . .
+ * v v .
+ * REMOVE_OBJECT_MAP . (for every snapshot)
+ * | . .
+ * v . . .
+ * <finis>
+ *
+ * @endverbatim
+ */
+
+ RemoveRequest(ImageCtxT *image_ctx, Context *on_finish);
+
+ ImageCtxT *m_image_ctx;
+ Context *m_on_finish;
+
+ int m_error_result = 0;
+ int m_ref_counter = 0;
+ mutable ceph::mutex m_lock =
+ ceph::make_mutex("object_map::RemoveRequest::m_lock");
+
+ void send_remove_object_map();
+ Context *handle_remove_object_map(int *result);
+};
+
+} // namespace object_map
+} // namespace librbd
+
+extern template class librbd::object_map::RemoveRequest<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_OBJECT_MAP_REMOVE_REQUEST_H
diff --git a/src/librbd/object_map/Request.cc b/src/librbd/object_map/Request.cc
new file mode 100644
index 000000000..1e1aab2ae
--- /dev/null
+++ b/src/librbd/object_map/Request.cc
@@ -0,0 +1,74 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/object_map/Request.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "common/RWLock.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/object_map/InvalidateRequest.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::object_map::Request: "
+
+namespace librbd {
+namespace object_map {
+
+bool Request::should_complete(int r) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 20) << this << " should_complete: r=" << r << dendl;
+
+ switch (m_state)
+ {
+ case STATE_REQUEST:
+ if (r == -ETIMEDOUT &&
+ !cct->_conf.get_val<bool>("rbd_invalidate_object_map_on_timeout")) {
+ m_state = STATE_TIMEOUT;
+ return true;
+ } else if (r < 0) {
+ lderr(cct) << "failed to update object map: " << cpp_strerror(r)
+ << dendl;
+ return invalidate();
+ }
+
+ finish_request();
+ return true;
+
+ case STATE_INVALIDATE:
+ ldout(cct, 20) << "INVALIDATE" << dendl;
+ if (r < 0) {
+ lderr(cct) << "failed to invalidate object map: " << cpp_strerror(r)
+ << dendl;
+ }
+ return true;
+
+ default:
+ lderr(cct) << "invalid state: " << m_state << dendl;
+ ceph_abort();
+ break;
+ }
+ return false;
+}
+
+bool Request::invalidate() {
+ bool flags_set;
+ int r = m_image_ctx.test_flags(m_snap_id, RBD_FLAG_OBJECT_MAP_INVALID,
+ &flags_set);
+ if (r < 0 || flags_set) {
+ return true;
+ }
+
+ m_state = STATE_INVALIDATE;
+
+ std::shared_lock owner_locker{m_image_ctx.owner_lock};
+ std::unique_lock image_locker{m_image_ctx.image_lock};
+ InvalidateRequest<> *req = new InvalidateRequest<>(m_image_ctx, m_snap_id,
+ true,
+ create_callback_context());
+ req->send();
+ return false;
+}
+
+} // namespace object_map
+} // namespace librbd
diff --git a/src/librbd/object_map/Request.h b/src/librbd/object_map/Request.h
new file mode 100644
index 000000000..7e9bfb88d
--- /dev/null
+++ b/src/librbd/object_map/Request.h
@@ -0,0 +1,66 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_OBJECT_MAP_REQUEST_H
+#define CEPH_LIBRBD_OBJECT_MAP_REQUEST_H
+
+#include "include/int_types.h"
+#include "librbd/AsyncRequest.h"
+
+class Context;
+
+namespace librbd {
+
+class ImageCtx;
+
+namespace object_map {
+
+class Request : public AsyncRequest<> {
+public:
+ Request(ImageCtx &image_ctx, uint64_t snap_id, Context *on_finish)
+ : AsyncRequest(image_ctx, on_finish), m_snap_id(snap_id),
+ m_state(STATE_REQUEST)
+ {
+ }
+
+ void send() override = 0;
+
+protected:
+ const uint64_t m_snap_id;
+
+ bool should_complete(int r) override;
+ int filter_return_code(int r) const override {
+ if (m_state == STATE_REQUEST) {
+ // never propagate an error back to the caller
+ return 0;
+ }
+ return r;
+ }
+ virtual void finish_request() {
+ }
+
+private:
+ /**
+ * STATE_TIMEOUT --------\
+ * ^ |
+ * | v
+ * <start> ---> STATE_REQUEST ---> <finish>
+ * | ^
+ * v |
+ * STATE_INVALIDATE -------/
+ */
+ enum State {
+ STATE_REQUEST,
+ STATE_TIMEOUT,
+ STATE_INVALIDATE
+ };
+
+ State m_state;
+
+ bool invalidate();
+};
+
+} // namespace object_map
+} // namespace librbd
+
+#endif // CEPH_LIBRBD_OBJECT_MAP_REQUEST_H
diff --git a/src/librbd/object_map/ResizeRequest.cc b/src/librbd/object_map/ResizeRequest.cc
new file mode 100644
index 000000000..91a3140ed
--- /dev/null
+++ b/src/librbd/object_map/ResizeRequest.cc
@@ -0,0 +1,65 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/object_map/ResizeRequest.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "osdc/Striper.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ObjectMap.h"
+#include "cls/lock/cls_lock_client.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::object_map::ResizeRequest: "
+
+namespace librbd {
+namespace object_map {
+
+void ResizeRequest::resize(ceph::BitVector<2> *object_map, uint64_t num_objs,
+ uint8_t default_state) {
+ size_t orig_object_map_size = object_map->size();
+ object_map->resize(num_objs);
+ if (num_objs > orig_object_map_size) {
+ auto it = object_map->begin() + orig_object_map_size;
+ auto end_it = object_map->begin() + num_objs;
+ for (;it != end_it; ++it) {
+ *it = default_state;
+ }
+ }
+}
+
+void ResizeRequest::send() {
+ CephContext *cct = m_image_ctx.cct;
+
+ std::unique_lock l{*m_object_map_lock};
+ m_num_objs = Striper::get_num_objects(m_image_ctx.layout, m_new_size);
+
+ std::string oid(ObjectMap<>::object_map_name(m_image_ctx.id, m_snap_id));
+ ldout(cct, 5) << this << " resizing on-disk object map: "
+ << "ictx=" << &m_image_ctx << ", "
+ << "oid=" << oid << ", num_objs=" << m_num_objs << dendl;
+
+ librados::ObjectWriteOperation op;
+ if (m_snap_id == CEPH_NOSNAP) {
+ rados::cls::lock::assert_locked(&op, RBD_LOCK_NAME, ClsLockType::EXCLUSIVE, "", "");
+ }
+ cls_client::object_map_resize(&op, m_num_objs, m_default_object_state);
+
+ librados::AioCompletion *rados_completion = create_callback_completion();
+ int r = m_image_ctx.md_ctx.aio_operate(oid, rados_completion, &op);
+ ceph_assert(r == 0);
+ rados_completion->release();
+}
+
+void ResizeRequest::finish_request() {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 5) << this << " resizing in-memory object map: "
+ << m_num_objs << dendl;
+
+ std::unique_lock object_map_locker{*m_object_map_lock};
+ resize(m_object_map, m_num_objs, m_default_object_state);
+}
+
+} // namespace object_map
+} // namespace librbd
diff --git a/src/librbd/object_map/ResizeRequest.h b/src/librbd/object_map/ResizeRequest.h
new file mode 100644
index 000000000..dccdef133
--- /dev/null
+++ b/src/librbd/object_map/ResizeRequest.h
@@ -0,0 +1,52 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_OBJECT_MAP_RESIZE_REQUEST_H
+#define CEPH_LIBRBD_OBJECT_MAP_RESIZE_REQUEST_H
+
+#include "include/int_types.h"
+#include "librbd/object_map/Request.h"
+#include "common/bit_vector.hpp"
+
+class Context;
+class RWLock;
+
+namespace librbd {
+
+class ImageCtx;
+
+namespace object_map {
+
+class ResizeRequest : public Request {
+public:
+ ResizeRequest(ImageCtx &image_ctx, ceph::shared_mutex *object_map_lock,
+ ceph::BitVector<2> *object_map, uint64_t snap_id,
+ uint64_t new_size, uint8_t default_object_state,
+ Context *on_finish)
+ : Request(image_ctx, snap_id, on_finish),
+ m_object_map_lock(object_map_lock), m_object_map(object_map),
+ m_num_objs(0), m_new_size(new_size),
+ m_default_object_state(default_object_state)
+ {
+ }
+
+ static void resize(ceph::BitVector<2> *object_map, uint64_t num_objs,
+ uint8_t default_state);
+
+ void send() override;
+
+protected:
+ void finish_request() override;
+
+private:
+ ceph::shared_mutex* m_object_map_lock;
+ ceph::BitVector<2> *m_object_map;
+ uint64_t m_num_objs;
+ uint64_t m_new_size;
+ uint8_t m_default_object_state;
+};
+
+} // namespace object_map
+} // namespace librbd
+
+#endif // CEPH_LIBRBD_OBJECT_MAP_RESIZE_REQUEST_H
diff --git a/src/librbd/object_map/SnapshotCreateRequest.cc b/src/librbd/object_map/SnapshotCreateRequest.cc
new file mode 100644
index 000000000..3b2e7ee82
--- /dev/null
+++ b/src/librbd/object_map/SnapshotCreateRequest.cc
@@ -0,0 +1,147 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/object_map/SnapshotCreateRequest.h"
+#include "common/dout.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ObjectMap.h"
+#include "cls/lock/cls_lock_client.h"
+#include <iostream>
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::object_map::SnapshotCreateRequest: "
+
+namespace librbd {
+namespace object_map {
+
+namespace {
+
+std::ostream& operator<<(std::ostream& os,
+ const SnapshotCreateRequest::State& state) {
+ switch(state) {
+ case SnapshotCreateRequest::STATE_READ_MAP:
+ os << "READ_MAP";
+ break;
+ case SnapshotCreateRequest::STATE_WRITE_MAP:
+ os << "WRITE_MAP";
+ break;
+ case SnapshotCreateRequest::STATE_ADD_SNAPSHOT:
+ os << "ADD_SNAPSHOT";
+ break;
+ default:
+ os << "UNKNOWN (" << static_cast<uint32_t>(state) << ")";
+ break;
+ }
+ return os;
+}
+
+} // anonymous namespace
+
+void SnapshotCreateRequest::send() {
+ send_read_map();
+}
+
+bool SnapshotCreateRequest::should_complete(int r) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 5) << this << " " << __func__ << ": state=" << m_state << ", "
+ << "r=" << r << dendl;
+ if (r < 0 && m_ret_val == 0) {
+ m_ret_val = r;
+ }
+ if (m_ret_val < 0) {
+ // pass errors down to base class to invalidate the object map
+ return Request::should_complete(r);
+ }
+
+ std::shared_lock owner_locker{m_image_ctx.owner_lock};
+ bool finished = false;
+ switch (m_state) {
+ case STATE_READ_MAP:
+ send_write_map();
+ break;
+ case STATE_WRITE_MAP:
+ finished = send_add_snapshot();
+ break;
+ case STATE_ADD_SNAPSHOT:
+ update_object_map();
+ finished = true;
+ break;
+ default:
+ ceph_abort();
+ break;
+ }
+ return finished;
+}
+
+void SnapshotCreateRequest::send_read_map() {
+ ceph_assert(ceph_mutex_is_locked(m_image_ctx.image_lock));
+
+ CephContext *cct = m_image_ctx.cct;
+ std::string oid(ObjectMap<>::object_map_name(m_image_ctx.id, CEPH_NOSNAP));
+ ldout(cct, 5) << this << " " << __func__ << ": oid=" << oid << dendl;
+ m_state = STATE_READ_MAP;
+
+ // IO is blocked due to the snapshot creation -- consistent to read from disk
+ librados::ObjectReadOperation op;
+ op.read(0, 0, NULL, NULL);
+
+ librados::AioCompletion *rados_completion = create_callback_completion();
+ int r = m_image_ctx.md_ctx.aio_operate(oid, rados_completion, &op,
+ &m_read_bl);
+ ceph_assert(r == 0);
+ rados_completion->release();
+}
+
+void SnapshotCreateRequest::send_write_map() {
+ CephContext *cct = m_image_ctx.cct;
+ std::string snap_oid(ObjectMap<>::object_map_name(m_image_ctx.id, m_snap_id));
+ ldout(cct, 5) << this << " " << __func__ << ": snap_oid=" << snap_oid
+ << dendl;
+ m_state = STATE_WRITE_MAP;
+
+ librados::ObjectWriteOperation op;
+ op.write_full(m_read_bl);
+
+ librados::AioCompletion *rados_completion = create_callback_completion();
+ int r = m_image_ctx.md_ctx.aio_operate(snap_oid, rados_completion, &op);
+ ceph_assert(r == 0);
+ rados_completion->release();
+}
+
+bool SnapshotCreateRequest::send_add_snapshot() {
+ std::shared_lock image_locker{m_image_ctx.image_lock};
+ if ((m_image_ctx.features & RBD_FEATURE_FAST_DIFF) == 0) {
+ return true;
+ }
+
+ CephContext *cct = m_image_ctx.cct;
+ std::string oid(ObjectMap<>::object_map_name(m_image_ctx.id, CEPH_NOSNAP));
+ ldout(cct, 5) << this << " " << __func__ << ": oid=" << oid << dendl;
+ m_state = STATE_ADD_SNAPSHOT;
+
+ librados::ObjectWriteOperation op;
+ rados::cls::lock::assert_locked(&op, RBD_LOCK_NAME, ClsLockType::EXCLUSIVE, "", "");
+ cls_client::object_map_snap_add(&op);
+
+ librados::AioCompletion *rados_completion = create_callback_completion();
+ int r = m_image_ctx.md_ctx.aio_operate(oid, rados_completion, &op);
+ ceph_assert(r == 0);
+ rados_completion->release();
+ return false;
+}
+
+void SnapshotCreateRequest::update_object_map() {
+ std::unique_lock object_map_locker{*m_object_map_lock};
+
+ auto it = m_object_map.begin();
+ auto end_it = m_object_map.end();
+ for (; it != end_it; ++it) {
+ if (*it == OBJECT_EXISTS) {
+ *it = OBJECT_EXISTS_CLEAN;
+ }
+ }
+}
+
+} // namespace object_map
+} // namespace librbd
diff --git a/src/librbd/object_map/SnapshotCreateRequest.h b/src/librbd/object_map/SnapshotCreateRequest.h
new file mode 100644
index 000000000..3074d059d
--- /dev/null
+++ b/src/librbd/object_map/SnapshotCreateRequest.h
@@ -0,0 +1,80 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_OBJECT_MAP_SNAPSHOT_CREATE_REQUEST_H
+#define CEPH_LIBRBD_OBJECT_MAP_SNAPSHOT_CREATE_REQUEST_H
+
+#include "include/int_types.h"
+#include "common/bit_vector.hpp"
+#include "librbd/object_map/Request.h"
+
+class Context;
+class RWLock;
+
+namespace librbd {
+
+class ImageCtx;
+
+namespace object_map {
+
+class SnapshotCreateRequest : public Request {
+public:
+ /**
+ * Snapshot create goes through the following state machine:
+ *
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * STATE_READ_MAP
+ * |
+ * v (skip)
+ * STATE_WRITE_MAP . . . . . . .
+ * | .
+ * v v
+ * STATE_ADD_SNAPSHOT ---> <finish>
+ *
+ * @endverbatim
+ *
+ * The _ADD_SNAPSHOT state is skipped if the FAST_DIFF feature isn't enabled.
+ */
+ enum State {
+ STATE_READ_MAP,
+ STATE_WRITE_MAP,
+ STATE_ADD_SNAPSHOT
+ };
+
+ SnapshotCreateRequest(ImageCtx &image_ctx, ceph::shared_mutex* object_map_lock,
+ ceph::BitVector<2> *object_map, uint64_t snap_id,
+ Context *on_finish)
+ : Request(image_ctx, snap_id, on_finish),
+ m_object_map_lock(object_map_lock), m_object_map(*object_map),
+ m_ret_val(0) {
+ }
+
+ void send() override;
+
+protected:
+ bool should_complete(int r) override;
+
+private:
+ ceph::shared_mutex* m_object_map_lock;
+ ceph::BitVector<2> &m_object_map;
+
+ State m_state = STATE_READ_MAP;
+ bufferlist m_read_bl;
+ int m_ret_val;
+
+ void send_read_map();
+ void send_write_map();
+ bool send_add_snapshot();
+
+ void update_object_map();
+
+};
+
+} // namespace object_map
+} // namespace librbd
+
+#endif // CEPH_LIBRBD_OBJECT_MAP_SNAPSHOT_CREATE_REQUEST_H
diff --git a/src/librbd/object_map/SnapshotRemoveRequest.cc b/src/librbd/object_map/SnapshotRemoveRequest.cc
new file mode 100644
index 000000000..1c2ffc753
--- /dev/null
+++ b/src/librbd/object_map/SnapshotRemoveRequest.cc
@@ -0,0 +1,227 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/object_map/SnapshotRemoveRequest.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ObjectMap.h"
+#include "librbd/object_map/InvalidateRequest.h"
+#include "cls/lock/cls_lock_client.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::object_map::SnapshotRemoveRequest: " \
+ << this << " " << __func__ << ": "
+
+namespace librbd {
+namespace object_map {
+
+void SnapshotRemoveRequest::send() {
+ ceph_assert(ceph_mutex_is_locked(m_image_ctx.owner_lock));
+ ceph_assert(ceph_mutex_is_wlocked(m_image_ctx.image_lock));
+
+ if ((m_image_ctx.features & RBD_FEATURE_FAST_DIFF) != 0) {
+ int r = m_image_ctx.get_flags(m_snap_id, &m_flags);
+ ceph_assert(r == 0);
+
+ compute_next_snap_id();
+ load_map();
+ } else {
+ remove_map();
+ }
+}
+
+void SnapshotRemoveRequest::load_map() {
+ CephContext *cct = m_image_ctx.cct;
+ std::string snap_oid(ObjectMap<>::object_map_name(m_image_ctx.id, m_snap_id));
+ ldout(cct, 5) << "snap_oid=" << snap_oid << dendl;
+
+ librados::ObjectReadOperation op;
+ cls_client::object_map_load_start(&op);
+
+ auto rados_completion = librbd::util::create_rados_callback<
+ SnapshotRemoveRequest, &SnapshotRemoveRequest::handle_load_map>(this);
+ int r = m_image_ctx.md_ctx.aio_operate(snap_oid, rados_completion, &op,
+ &m_out_bl);
+ ceph_assert(r == 0);
+ rados_completion->release();
+}
+
+void SnapshotRemoveRequest::handle_load_map(int r) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 5) << "r=" << r << dendl;
+
+ if (r == 0) {
+ auto it = m_out_bl.cbegin();
+ r = cls_client::object_map_load_finish(&it, &m_snap_object_map);
+ }
+ if (r == -ENOENT) {
+ // implies we have already deleted this snapshot and handled the
+ // necessary fast-diff cleanup
+ complete(0);
+ return;
+ } else if (r < 0) {
+ std::string oid(ObjectMap<>::object_map_name(m_image_ctx.id, m_snap_id));
+ lderr(cct) << "failed to load object map " << oid << ": "
+ << cpp_strerror(r) << dendl;
+
+ std::shared_lock owner_locker{m_image_ctx.owner_lock};
+ std::unique_lock image_locker{m_image_ctx.image_lock};
+ invalidate_next_map();
+ return;
+ }
+
+ remove_snapshot();
+}
+
+void SnapshotRemoveRequest::remove_snapshot() {
+ if ((m_flags & RBD_FLAG_OBJECT_MAP_INVALID) != 0) {
+ // snapshot object map exists on disk but is invalid. cannot clean fast-diff
+ // on next snapshot if current snapshot was invalid.
+ std::shared_lock owner_locker{m_image_ctx.owner_lock};
+ std::unique_lock image_locker{m_image_ctx.image_lock};
+ invalidate_next_map();
+ return;
+ }
+
+ CephContext *cct = m_image_ctx.cct;
+ std::string oid(ObjectMap<>::object_map_name(m_image_ctx.id, m_next_snap_id));
+ ldout(cct, 5) << "oid=" << oid << dendl;
+
+ librados::ObjectWriteOperation op;
+ if (m_next_snap_id == CEPH_NOSNAP) {
+ rados::cls::lock::assert_locked(&op, RBD_LOCK_NAME, ClsLockType::EXCLUSIVE, "", "");
+ }
+ cls_client::object_map_snap_remove(&op, m_snap_object_map);
+
+ auto rados_completion = librbd::util::create_rados_callback<
+ SnapshotRemoveRequest,
+ &SnapshotRemoveRequest::handle_remove_snapshot>(this);
+ int r = m_image_ctx.md_ctx.aio_operate(oid, rados_completion, &op);
+ ceph_assert(r == 0);
+ rados_completion->release();
+}
+
+void SnapshotRemoveRequest::handle_remove_snapshot(int r) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 5) << "r=" << r << dendl;
+ if (r < 0 && r != -ENOENT) {
+ std::string oid(ObjectMap<>::object_map_name(m_image_ctx.id,
+ m_next_snap_id));
+ lderr(cct) << "failed to remove object map snapshot " << oid << ": "
+ << cpp_strerror(r) << dendl;
+
+ std::shared_lock owner_locker{m_image_ctx.owner_lock};
+ std::unique_lock image_locker{m_image_ctx.image_lock};
+ invalidate_next_map();
+ return;
+ }
+
+ std::shared_lock image_locker{m_image_ctx.image_lock};
+ update_object_map();
+ remove_map();
+}
+
+void SnapshotRemoveRequest::invalidate_next_map() {
+ ceph_assert(ceph_mutex_is_locked(m_image_ctx.owner_lock));
+ ceph_assert(ceph_mutex_is_wlocked(m_image_ctx.image_lock));
+
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 5) << dendl;
+
+ auto ctx = librbd::util::create_context_callback<
+ SnapshotRemoveRequest,
+ &SnapshotRemoveRequest::handle_invalidate_next_map>(this);
+ InvalidateRequest<> *req = new InvalidateRequest<>(m_image_ctx,
+ m_next_snap_id, true, ctx);
+ req->send();
+}
+
+void SnapshotRemoveRequest::handle_invalidate_next_map(int r) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 5) << "r=" << r << dendl;
+
+ if (r < 0) {
+ std::string oid(ObjectMap<>::object_map_name(m_image_ctx.id,
+ m_next_snap_id));
+ lderr(cct) << "failed to invalidate object map " << oid << ": "
+ << cpp_strerror(r) << dendl;
+ complete(r);
+ return;
+ }
+
+ remove_map();
+}
+
+void SnapshotRemoveRequest::remove_map() {
+ CephContext *cct = m_image_ctx.cct;
+ std::string oid(ObjectMap<>::object_map_name(m_image_ctx.id, m_snap_id));
+ ldout(cct, 5) << "oid=" << oid << dendl;
+
+ librados::ObjectWriteOperation op;
+ op.remove();
+
+ auto rados_completion = librbd::util::create_rados_callback<
+ SnapshotRemoveRequest, &SnapshotRemoveRequest::handle_remove_map>(this);
+ int r = m_image_ctx.md_ctx.aio_operate(oid, rados_completion, &op);
+ ceph_assert(r == 0);
+ rados_completion->release();
+}
+
+void SnapshotRemoveRequest::handle_remove_map(int r) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 5) << "r=" << r << dendl;
+
+ if (r < 0 && r != -ENOENT) {
+ std::string oid(ObjectMap<>::object_map_name(m_image_ctx.id, m_snap_id));
+ lderr(cct) << "failed to remove object map " << oid << ": "
+ << cpp_strerror(r) << dendl;
+ complete(r);
+ return;
+ }
+
+ complete(0);
+}
+
+void SnapshotRemoveRequest::compute_next_snap_id() {
+ ceph_assert(ceph_mutex_is_locked(m_image_ctx.image_lock));
+
+ m_next_snap_id = CEPH_NOSNAP;
+ std::map<librados::snap_t, SnapInfo>::const_iterator it =
+ m_image_ctx.snap_info.find(m_snap_id);
+ ceph_assert(it != m_image_ctx.snap_info.end());
+
+ ++it;
+ if (it != m_image_ctx.snap_info.end()) {
+ m_next_snap_id = it->first;
+ }
+}
+
+void SnapshotRemoveRequest::update_object_map() {
+ assert(ceph_mutex_is_locked(m_image_ctx.image_lock));
+ std::unique_lock object_map_locker{*m_object_map_lock};
+ if (m_next_snap_id == m_image_ctx.snap_id && m_next_snap_id == CEPH_NOSNAP) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 5) << dendl;
+
+ auto it = m_object_map.begin();
+ auto end_it = m_object_map.end();
+ auto snap_it = m_snap_object_map.begin();
+ uint64_t i = 0;
+ for (; it != end_it; ++it) {
+ if (*it == OBJECT_EXISTS_CLEAN &&
+ (i >= m_snap_object_map.size() ||
+ *snap_it == OBJECT_EXISTS)) {
+ *it = OBJECT_EXISTS;
+ }
+ if (i < m_snap_object_map.size()) {
+ ++snap_it;
+ }
+ ++i;
+ }
+ }
+}
+
+} // namespace object_map
+} // namespace librbd
diff --git a/src/librbd/object_map/SnapshotRemoveRequest.h b/src/librbd/object_map/SnapshotRemoveRequest.h
new file mode 100644
index 000000000..1e9c75d81
--- /dev/null
+++ b/src/librbd/object_map/SnapshotRemoveRequest.h
@@ -0,0 +1,88 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_OBJECT_MAP_SNAPSHOT_REMOVE_REQUEST_H
+#define CEPH_LIBRBD_OBJECT_MAP_SNAPSHOT_REMOVE_REQUEST_H
+
+#include "include/int_types.h"
+#include "include/buffer.h"
+#include "common/bit_vector.hpp"
+#include "librbd/AsyncRequest.h"
+
+namespace librbd {
+namespace object_map {
+
+class SnapshotRemoveRequest : public AsyncRequest<> {
+public:
+ /**
+ * Snapshot rollback goes through the following state machine:
+ *
+ * @verbatim
+ *
+ * <start> -----------> STATE_LOAD_MAP ----\
+ * . * |
+ * . * (error) |
+ * . (invalid object map) v |
+ * . . . > STATE_INVALIDATE_NEXT_MAP |
+ * . | |
+ * . | |
+ * . (fast diff disabled) v v
+ * . . . . . . . . . . > STATE_REMOVE_MAP
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ *
+ * The _LOAD_MAP state is skipped if the fast diff feature is disabled.
+ * If the fast diff feature is enabled and the snapshot is flagged as
+ * invalid, the next snapshot / HEAD object mapis flagged as invalid;
+ * otherwise, the state machine proceeds to remove the object map.
+ */
+
+ SnapshotRemoveRequest(ImageCtx &image_ctx, ceph::shared_mutex* object_map_lock,
+ ceph::BitVector<2> *object_map, uint64_t snap_id,
+ Context *on_finish)
+ : AsyncRequest(image_ctx, on_finish),
+ m_object_map_lock(object_map_lock), m_object_map(*object_map),
+ m_snap_id(snap_id), m_next_snap_id(CEPH_NOSNAP) {
+ }
+
+ void send() override;
+
+protected:
+ bool should_complete(int r) override {
+ return true;
+ }
+
+private:
+ ceph::shared_mutex* m_object_map_lock;
+ ceph::BitVector<2> &m_object_map;
+ uint64_t m_snap_id;
+ uint64_t m_next_snap_id;
+
+ uint64_t m_flags = 0;
+
+ ceph::BitVector<2> m_snap_object_map;
+ bufferlist m_out_bl;
+
+ void load_map();
+ void handle_load_map(int r);
+
+ void remove_snapshot();
+ void handle_remove_snapshot(int r);
+
+ void invalidate_next_map();
+ void handle_invalidate_next_map(int r);
+
+ void remove_map();
+ void handle_remove_map(int r);
+
+ void compute_next_snap_id();
+ void update_object_map();
+};
+
+} // namespace object_map
+} // namespace librbd
+
+#endif // CEPH_LIBRBD_OBJECT_MAP_SNAPSHOT_REMOVE_REQUEST_H
diff --git a/src/librbd/object_map/SnapshotRollbackRequest.cc b/src/librbd/object_map/SnapshotRollbackRequest.cc
new file mode 100644
index 000000000..7c2f441cc
--- /dev/null
+++ b/src/librbd/object_map/SnapshotRollbackRequest.cc
@@ -0,0 +1,131 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/object_map/SnapshotRollbackRequest.h"
+#include "common/dout.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ObjectMap.h"
+#include "librbd/object_map/InvalidateRequest.h"
+#include "cls/lock/cls_lock_client.h"
+#include <iostream>
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::object_map::SnapshotRollbackRequest: "
+
+namespace librbd {
+namespace object_map {
+
+namespace {
+
+std::ostream& operator<<(std::ostream& os,
+ const SnapshotRollbackRequest::State& state) {
+ switch(state) {
+ case SnapshotRollbackRequest::STATE_READ_MAP:
+ os << "READ_MAP";
+ break;
+ case SnapshotRollbackRequest::STATE_INVALIDATE_MAP:
+ os << "INVALIDATE_MAP";
+ break;
+ case SnapshotRollbackRequest::STATE_WRITE_MAP:
+ os << "WRITE_MAP";
+ break;
+ default:
+ os << "UNKNOWN (" << static_cast<uint32_t>(state) << ")";
+ break;
+ }
+ return os;
+}
+
+} // anonymous namespace
+
+void SnapshotRollbackRequest::send() {
+ send_read_map();
+}
+
+bool SnapshotRollbackRequest::should_complete(int r) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 5) << this << " " << __func__ << ": state=" << m_state << ", "
+ << "r=" << r << dendl;
+ if (r < 0 && m_ret_val == 0) {
+ m_ret_val = r;
+ }
+
+ bool finished = false;
+ switch (m_state) {
+ case STATE_READ_MAP:
+ if (r < 0) {
+ // invalidate the snapshot object map
+ send_invalidate_map();
+ } else {
+ send_write_map();
+ }
+ break;
+ case STATE_INVALIDATE_MAP:
+ // invalidate the HEAD object map as well
+ finished = Request::should_complete(m_ret_val);
+ break;
+ case STATE_WRITE_MAP:
+ finished = Request::should_complete(r);
+ break;
+ default:
+ ceph_abort();
+ break;
+ }
+ return finished;
+}
+
+void SnapshotRollbackRequest::send_read_map() {
+ std::string snap_oid(ObjectMap<>::object_map_name(m_image_ctx.id, m_snap_id));
+
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 5) << this << " " << __func__ << ": snap_oid=" << snap_oid
+ << dendl;
+ m_state = STATE_READ_MAP;
+
+ librados::ObjectReadOperation op;
+ op.read(0, 0, NULL, NULL);
+
+ librados::AioCompletion *rados_completion = create_callback_completion();
+ int r = m_image_ctx.md_ctx.aio_operate(snap_oid, rados_completion, &op,
+ &m_read_bl);
+ ceph_assert(r == 0);
+ rados_completion->release();
+}
+
+void SnapshotRollbackRequest::send_write_map() {
+ std::shared_lock owner_locker{m_image_ctx.owner_lock};
+
+ CephContext *cct = m_image_ctx.cct;
+ std::string snap_oid(ObjectMap<>::object_map_name(m_image_ctx.id,
+ CEPH_NOSNAP));
+ ldout(cct, 5) << this << " " << __func__ << ": snap_oid=" << snap_oid
+ << dendl;
+ m_state = STATE_WRITE_MAP;
+
+ librados::ObjectWriteOperation op;
+ rados::cls::lock::assert_locked(&op, RBD_LOCK_NAME, ClsLockType::EXCLUSIVE, "", "");
+ op.write_full(m_read_bl);
+
+ librados::AioCompletion *rados_completion = create_callback_completion();
+ int r = m_image_ctx.md_ctx.aio_operate(snap_oid, rados_completion, &op);
+ ceph_assert(r == 0);
+ rados_completion->release();
+}
+
+void SnapshotRollbackRequest::send_invalidate_map() {
+ std::shared_lock owner_locker{m_image_ctx.owner_lock};
+ std::unique_lock image_locker{m_image_ctx.image_lock};
+
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 5) << this << " " << __func__ << dendl;
+ m_state = STATE_INVALIDATE_MAP;
+
+ InvalidateRequest<> *req = new InvalidateRequest<>(m_image_ctx, m_snap_id,
+ false,
+ create_callback_context());
+ req->send();
+}
+
+} // namespace object_map
+} // namespace librbd
diff --git a/src/librbd/object_map/SnapshotRollbackRequest.h b/src/librbd/object_map/SnapshotRollbackRequest.h
new file mode 100644
index 000000000..e26b1e0a3
--- /dev/null
+++ b/src/librbd/object_map/SnapshotRollbackRequest.h
@@ -0,0 +1,74 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_OBJECT_MAP_SNAPSHOT_ROLLBACK_REQUEST_H
+#define CEPH_LIBRBD_OBJECT_MAP_SNAPSHOT_ROLLBACK_REQUEST_H
+
+#include "include/int_types.h"
+#include "librbd/object_map/Request.h"
+
+class Context;
+
+namespace librbd {
+
+class ImageCtx;
+
+namespace object_map {
+
+class SnapshotRollbackRequest : public Request {
+public:
+ /**
+ * Snapshot rollback goes through the following state machine:
+ *
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v (error)
+ * STATE_READ_MAP * * * * > STATE_INVALIDATE_MAP
+ * | |
+ * v v
+ * STATE_WRITE_MAP -------> <finish>
+ *
+ * @endverbatim
+ *
+ * If an error occurs within the READ_MAP state, the associated snapshot's
+ * object map will be flagged as invalid. Otherwise, an error from any state
+ * will result in the HEAD object map being flagged as invalid via the base
+ * class.
+ */
+ enum State {
+ STATE_READ_MAP,
+ STATE_INVALIDATE_MAP,
+ STATE_WRITE_MAP
+ };
+
+ SnapshotRollbackRequest(ImageCtx &image_ctx, uint64_t snap_id,
+ Context *on_finish)
+ : Request(image_ctx, CEPH_NOSNAP, on_finish),
+ m_snap_id(snap_id), m_ret_val(0) {
+ ceph_assert(snap_id != CEPH_NOSNAP);
+ }
+
+ void send() override;
+
+protected:
+ bool should_complete(int r) override;
+
+private:
+ State m_state = STATE_READ_MAP;
+ uint64_t m_snap_id;
+ int m_ret_val;
+
+ bufferlist m_read_bl;
+
+ void send_read_map();
+ void send_invalidate_map();
+ void send_write_map();
+
+};
+
+} // namespace object_map
+} // namespace librbd
+
+#endif // CEPH_LIBRBD_OBJECT_MAP_SNAPSHOT_ROLLBACK_REQUEST_H
diff --git a/src/librbd/object_map/Types.h b/src/librbd/object_map/Types.h
new file mode 100644
index 000000000..0ce91bd96
--- /dev/null
+++ b/src/librbd/object_map/Types.h
@@ -0,0 +1,20 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_OBJECT_MAP_TYPES_H
+#define CEPH_LIBRBD_OBJECT_MAP_TYPES_H
+
+namespace librbd {
+namespace object_map {
+
+enum DiffState {
+ DIFF_STATE_HOLE = 0, /* unchanged hole */
+ DIFF_STATE_DATA = 1, /* unchanged data */
+ DIFF_STATE_HOLE_UPDATED = 2, /* new hole */
+ DIFF_STATE_DATA_UPDATED = 3 /* new data */
+};
+
+} // namespace object_map
+} // namespace librbd
+
+#endif // CEPH_LIBRBD_OBJECT_MAP_TYPES_H
diff --git a/src/librbd/object_map/UnlockRequest.cc b/src/librbd/object_map/UnlockRequest.cc
new file mode 100644
index 000000000..0220ec900
--- /dev/null
+++ b/src/librbd/object_map/UnlockRequest.cc
@@ -0,0 +1,66 @@
+// -*- mode:C; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/object_map/UnlockRequest.h"
+#include "cls/lock/cls_lock_client.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ObjectMap.h"
+#include "librbd/Utils.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::object_map::UnlockRequest: "
+
+namespace librbd {
+namespace object_map {
+
+using util::create_rados_callback;
+
+template <typename I>
+UnlockRequest<I>::UnlockRequest(I &image_ctx, Context *on_finish)
+ : m_image_ctx(image_ctx), m_on_finish(on_finish) {
+}
+
+template <typename I>
+void UnlockRequest<I>::send() {
+ send_unlock();
+}
+
+template <typename I>
+void UnlockRequest<I>::send_unlock() {
+ CephContext *cct = m_image_ctx.cct;
+ std::string oid(ObjectMap<>::object_map_name(m_image_ctx.id, CEPH_NOSNAP));
+ ldout(cct, 10) << this << " " << __func__ << ": oid=" << oid << dendl;
+
+ librados::ObjectWriteOperation op;
+ rados::cls::lock::unlock(&op, RBD_LOCK_NAME, "");
+
+ using klass = UnlockRequest<I>;
+ librados::AioCompletion *rados_completion =
+ create_rados_callback<klass, &klass::handle_unlock>(this);
+ int r = m_image_ctx.md_ctx.aio_operate(oid, rados_completion, &op);
+ ceph_assert(r == 0);
+ rados_completion->release();
+}
+
+template <typename I>
+Context *UnlockRequest<I>::handle_unlock(int *ret_val) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 10) << this << " " << __func__ << ": r=" << *ret_val << dendl;
+
+ if (*ret_val < 0 && *ret_val != -ENOENT) {
+ lderr(m_image_ctx.cct) << "failed to release object map lock: "
+ << cpp_strerror(*ret_val) << dendl;
+
+ }
+
+ *ret_val = 0;
+ return m_on_finish;
+}
+
+} // namespace object_map
+} // namespace librbd
+
+template class librbd::object_map::UnlockRequest<librbd::ImageCtx>;
diff --git a/src/librbd/object_map/UnlockRequest.h b/src/librbd/object_map/UnlockRequest.h
new file mode 100644
index 000000000..ae1d9e934
--- /dev/null
+++ b/src/librbd/object_map/UnlockRequest.h
@@ -0,0 +1,47 @@
+// -*- mode:C; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_OBJECT_MAP_UNLOCK_REQUEST_H
+#define CEPH_LIBRBD_OBJECT_MAP_UNLOCK_REQUEST_H
+
+class Context;
+
+namespace librbd {
+
+class ImageCtx;
+
+namespace object_map {
+
+template <typename ImageCtxT = ImageCtx>
+class UnlockRequest {
+public:
+ static UnlockRequest *create(ImageCtxT &image_ctx, Context *on_finish) {
+ return new UnlockRequest(image_ctx, on_finish);
+ }
+
+ UnlockRequest(ImageCtxT &image_ctx, Context *on_finish);
+
+ void send();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start> ----> UNLOCK ----> <finish>
+ *
+ * @endverbatim
+ */
+
+ ImageCtxT &m_image_ctx;
+ Context *m_on_finish;
+
+ void send_unlock();
+ Context* handle_unlock(int *ret_val);
+};
+
+} // namespace object_map
+} // namespace librbd
+
+extern template class librbd::object_map::UnlockRequest<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_OBJECT_MAP_UNLOCK_REQUEST_H
diff --git a/src/librbd/object_map/UpdateRequest.cc b/src/librbd/object_map/UpdateRequest.cc
new file mode 100644
index 000000000..30a1f2121
--- /dev/null
+++ b/src/librbd/object_map/UpdateRequest.cc
@@ -0,0 +1,129 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/object_map/UpdateRequest.h"
+#include "include/rbd/object_map_types.h"
+#include "include/stringify.h"
+#include "common/dout.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ObjectMap.h"
+#include "librbd/Utils.h"
+#include "cls/lock/cls_lock_client.h"
+#include <string>
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::object_map::UpdateRequest: " << this \
+ << " " << __func__ << ": "
+
+namespace librbd {
+namespace object_map {
+
+namespace {
+
+// keep aligned to bit_vector 4K block sizes
+const uint64_t MAX_OBJECTS_PER_UPDATE = 256 * (1 << 10);
+
+}
+
+template <typename I>
+void UpdateRequest<I>::send() {
+ update_object_map();
+}
+
+template <typename I>
+void UpdateRequest<I>::update_object_map() {
+ ceph_assert(ceph_mutex_is_locked(m_image_ctx.image_lock));
+ ceph_assert(ceph_mutex_is_locked(*m_object_map_lock));
+ CephContext *cct = m_image_ctx.cct;
+
+ // break very large requests into manageable batches
+ m_update_end_object_no = std::min(
+ m_end_object_no, m_update_start_object_no + MAX_OBJECTS_PER_UPDATE);
+
+ std::string oid(ObjectMap<>::object_map_name(m_image_ctx.id, m_snap_id));
+ ldout(cct, 20) << "ictx=" << &m_image_ctx << ", oid=" << oid << ", "
+ << "[" << m_update_start_object_no << ","
+ << m_update_end_object_no << ") = "
+ << (m_current_state ?
+ stringify(static_cast<uint32_t>(*m_current_state)) : "")
+ << "->" << static_cast<uint32_t>(m_new_state)
+ << dendl;
+
+ librados::ObjectWriteOperation op;
+ if (m_snap_id == CEPH_NOSNAP) {
+ rados::cls::lock::assert_locked(&op, RBD_LOCK_NAME, ClsLockType::EXCLUSIVE, "", "");
+ }
+ cls_client::object_map_update(&op, m_update_start_object_no,
+ m_update_end_object_no, m_new_state,
+ m_current_state);
+
+ auto rados_completion = librbd::util::create_rados_callback<
+ UpdateRequest<I>, &UpdateRequest<I>::handle_update_object_map>(this);
+ std::vector<librados::snap_t> snaps;
+ int r = m_image_ctx.md_ctx.aio_operate(
+ oid, rados_completion, &op, 0, snaps,
+ (m_trace.valid() ? m_trace.get_info() : nullptr));
+ ceph_assert(r == 0);
+ rados_completion->release();
+}
+
+template <typename I>
+void UpdateRequest<I>::handle_update_object_map(int r) {
+ ldout(m_image_ctx.cct, 20) << "r=" << r << dendl;
+
+ if (r == -ENOENT && m_ignore_enoent) {
+ r = 0;
+ }
+ if (r < 0 && m_ret_val == 0) {
+ m_ret_val = r;
+ }
+
+ {
+ std::shared_lock image_locker{m_image_ctx.image_lock};
+ std::unique_lock object_map_locker{*m_object_map_lock};
+ update_in_memory_object_map();
+
+ if (m_update_end_object_no < m_end_object_no) {
+ m_update_start_object_no = m_update_end_object_no;
+ update_object_map();
+ return;
+ }
+ }
+
+ // no more batch updates to send
+ complete(m_ret_val);
+}
+
+template <typename I>
+void UpdateRequest<I>::update_in_memory_object_map() {
+ ceph_assert(ceph_mutex_is_locked(m_image_ctx.image_lock));
+ ceph_assert(ceph_mutex_is_locked(*m_object_map_lock));
+
+ // rebuilding the object map might update on-disk only
+ if (m_snap_id == m_image_ctx.snap_id) {
+ ldout(m_image_ctx.cct, 20) << dendl;
+
+ auto it = m_object_map.begin() +
+ std::min(m_update_start_object_no, m_object_map.size());
+ auto end_it = m_object_map.begin() +
+ std::min(m_update_end_object_no, m_object_map.size());
+ for (; it != end_it; ++it) {
+ auto state_ref = *it;
+ uint8_t state = state_ref;
+ if (!m_current_state || state == *m_current_state ||
+ (*m_current_state == OBJECT_EXISTS && state == OBJECT_EXISTS_CLEAN)) {
+ state_ref = m_new_state;
+ }
+ }
+ }
+}
+
+template <typename I>
+void UpdateRequest<I>::finish_request() {
+}
+
+} // namespace object_map
+} // namespace librbd
+
+template class librbd::object_map::UpdateRequest<librbd::ImageCtx>;
diff --git a/src/librbd/object_map/UpdateRequest.h b/src/librbd/object_map/UpdateRequest.h
new file mode 100644
index 000000000..b5a72d591
--- /dev/null
+++ b/src/librbd/object_map/UpdateRequest.h
@@ -0,0 +1,106 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_OBJECT_MAP_UPDATE_REQUEST_H
+#define CEPH_LIBRBD_OBJECT_MAP_UPDATE_REQUEST_H
+
+#include "include/int_types.h"
+#include "librbd/object_map/Request.h"
+#include "common/bit_vector.hpp"
+#include "common/zipkin_trace.h"
+#include "librbd/Utils.h"
+#include <boost/optional.hpp>
+
+class Context;
+
+namespace librbd {
+
+class ImageCtx;
+
+namespace object_map {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class UpdateRequest : public Request {
+public:
+ static UpdateRequest *create(ImageCtx &image_ctx,
+ ceph::shared_mutex* object_map_lock,
+ ceph::BitVector<2> *object_map,
+ uint64_t snap_id, uint64_t start_object_no,
+ uint64_t end_object_no, uint8_t new_state,
+ const boost::optional<uint8_t> &current_state,
+ const ZTracer::Trace &parent_trace,
+ bool ignore_enoent, Context *on_finish) {
+ return new UpdateRequest(image_ctx, object_map_lock, object_map, snap_id,
+ start_object_no, end_object_no, new_state,
+ current_state, parent_trace, ignore_enoent,
+ on_finish);
+ }
+
+ UpdateRequest(ImageCtx &image_ctx, ceph::shared_mutex* object_map_lock,
+ ceph::BitVector<2> *object_map, uint64_t snap_id,
+ uint64_t start_object_no, uint64_t end_object_no,
+ uint8_t new_state,
+ const boost::optional<uint8_t> &current_state,
+ const ZTracer::Trace &parent_trace, bool ignore_enoent,
+ Context *on_finish)
+ : Request(image_ctx, snap_id, on_finish),
+ m_object_map_lock(object_map_lock), m_object_map(*object_map),
+ m_start_object_no(start_object_no), m_end_object_no(end_object_no),
+ m_update_start_object_no(start_object_no), m_new_state(new_state),
+ m_current_state(current_state),
+ m_trace(util::create_trace(image_ctx, "update object map", parent_trace)),
+ m_ignore_enoent(ignore_enoent)
+ {
+ m_trace.event("start");
+ }
+ virtual ~UpdateRequest() {
+ m_trace.event("finish");
+ }
+
+ void send() override;
+
+protected:
+ void finish_request() override;
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * |/------------------\
+ * v | (repeat in batches)
+ * UPDATE_OBJECT_MAP -----/
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+
+ ceph::shared_mutex* m_object_map_lock;
+ ceph::BitVector<2> &m_object_map;
+ uint64_t m_start_object_no;
+ uint64_t m_end_object_no;
+ uint64_t m_update_start_object_no;
+ uint64_t m_update_end_object_no = 0;
+ uint8_t m_new_state;
+ boost::optional<uint8_t> m_current_state;
+ ZTracer::Trace m_trace;
+ bool m_ignore_enoent;
+
+ int m_ret_val = 0;
+
+ void update_object_map();
+ void handle_update_object_map(int r);
+
+ void update_in_memory_object_map();
+
+};
+
+} // namespace object_map
+} // namespace librbd
+
+extern template class librbd::object_map::UpdateRequest<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_OBJECT_MAP_UPDATE_REQUEST_H