summaryrefslogtreecommitdiffstats
path: root/src/librbd/deep_copy
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-27 18:24:20 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-27 18:24:20 +0000
commit483eb2f56657e8e7f419ab1a4fab8dce9ade8609 (patch)
treee5d88d25d870d5dedacb6bbdbe2a966086a0a5cf /src/librbd/deep_copy
parentInitial commit. (diff)
downloadceph-483eb2f56657e8e7f419ab1a4fab8dce9ade8609.tar.xz
ceph-483eb2f56657e8e7f419ab1a4fab8dce9ade8609.zip
Adding upstream version 14.2.21.upstream/14.2.21upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/librbd/deep_copy')
-rw-r--r--src/librbd/deep_copy/ImageCopyRequest.cc187
-rw-r--r--src/librbd/deep_copy/ImageCopyRequest.h109
-rw-r--r--src/librbd/deep_copy/MetadataCopyRequest.cc123
-rw-r--r--src/librbd/deep_copy/MetadataCopyRequest.h77
-rw-r--r--src/librbd/deep_copy/ObjectCopyRequest.cc998
-rw-r--r--src/librbd/deep_copy/ObjectCopyRequest.h210
-rw-r--r--src/librbd/deep_copy/SetHeadRequest.cc224
-rw-r--r--src/librbd/deep_copy/SetHeadRequest.h87
-rw-r--r--src/librbd/deep_copy/SnapshotCopyRequest.cc730
-rw-r--r--src/librbd/deep_copy/SnapshotCopyRequest.h148
-rw-r--r--src/librbd/deep_copy/SnapshotCreateRequest.cc187
-rw-r--r--src/librbd/deep_copy/SnapshotCreateRequest.h95
-rw-r--r--src/librbd/deep_copy/Types.h22
-rw-r--r--src/librbd/deep_copy/Utils.cc61
-rw-r--r--src/librbd/deep_copy/Utils.h30
15 files changed, 3288 insertions, 0 deletions
diff --git a/src/librbd/deep_copy/ImageCopyRequest.cc b/src/librbd/deep_copy/ImageCopyRequest.cc
new file mode 100644
index 00000000..62e5409c
--- /dev/null
+++ b/src/librbd/deep_copy/ImageCopyRequest.cc
@@ -0,0 +1,187 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "ImageCopyRequest.h"
+#include "ObjectCopyRequest.h"
+#include "common/errno.h"
+#include "librbd/Utils.h"
+#include "librbd/deep_copy/Utils.h"
+#include "librbd/image/CloseRequest.h"
+#include "librbd/image/OpenRequest.h"
+#include "osdc/Striper.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::deep_copy::ImageCopyRequest: " \
+ << this << " " << __func__ << ": "
+
+namespace librbd {
+namespace deep_copy {
+
+using librbd::util::create_context_callback;
+using librbd::util::unique_lock_name;
+
+template <typename I>
+ImageCopyRequest<I>::ImageCopyRequest(I *src_image_ctx, I *dst_image_ctx,
+ librados::snap_t src_snap_id_start,
+ librados::snap_t src_snap_id_end,
+ librados::snap_t dst_snap_id_start,
+ bool flatten,
+ const ObjectNumber &object_number,
+ const SnapSeqs &snap_seqs,
+ ProgressContext *prog_ctx,
+ Context *on_finish)
+ : RefCountedObject(dst_image_ctx->cct, 1), m_src_image_ctx(src_image_ctx),
+ m_dst_image_ctx(dst_image_ctx), m_src_snap_id_start(src_snap_id_start),
+ m_src_snap_id_end(src_snap_id_end), m_dst_snap_id_start(dst_snap_id_start),
+ m_flatten(flatten), m_object_number(object_number), m_snap_seqs(snap_seqs),
+ m_prog_ctx(prog_ctx), m_on_finish(on_finish), m_cct(dst_image_ctx->cct),
+ m_lock(unique_lock_name("ImageCopyRequest::m_lock", this)) {
+}
+
+template <typename I>
+void ImageCopyRequest<I>::send() {
+ m_dst_image_ctx->snap_lock.get_read();
+ util::compute_snap_map(m_dst_image_ctx->cct, m_src_snap_id_start,
+ m_src_snap_id_end, m_dst_image_ctx->snaps, m_snap_seqs,
+ &m_snap_map);
+ m_dst_image_ctx->snap_lock.put_read();
+
+ if (m_snap_map.empty()) {
+ lderr(m_cct) << "failed to map snapshots within boundary" << dendl;
+ finish(-EINVAL);
+ return;
+ }
+
+ send_object_copies();
+}
+
+template <typename I>
+void ImageCopyRequest<I>::cancel() {
+ Mutex::Locker locker(m_lock);
+
+ ldout(m_cct, 20) << dendl;
+ m_canceled = true;
+}
+
+template <typename I>
+void ImageCopyRequest<I>::send_object_copies() {
+ m_object_no = 0;
+ if (m_object_number) {
+ m_object_no = *m_object_number + 1;
+ }
+
+ uint64_t size;
+ {
+ RWLock::RLocker snap_locker(m_src_image_ctx->snap_lock);
+ size = m_src_image_ctx->get_image_size(CEPH_NOSNAP);
+ for (auto snap_id : m_src_image_ctx->snaps) {
+ size = std::max(size, m_src_image_ctx->get_image_size(snap_id));
+ }
+ }
+ m_end_object_no = Striper::get_num_objects(m_dst_image_ctx->layout, size);
+
+ ldout(m_cct, 20) << "start_object=" << m_object_no << ", "
+ << "end_object=" << m_end_object_no << dendl;
+
+ bool complete;
+ {
+ Mutex::Locker locker(m_lock);
+ for (uint64_t i = 0;
+ i < m_src_image_ctx->config.template get_val<uint64_t>("rbd_concurrent_management_ops");
+ ++i) {
+ send_next_object_copy();
+ if (m_ret_val < 0 && m_current_ops == 0) {
+ break;
+ }
+ }
+ complete = (m_current_ops == 0) && !m_updating_progress;
+ }
+
+ if (complete) {
+ finish(m_ret_val);
+ }
+}
+
+template <typename I>
+void ImageCopyRequest<I>::send_next_object_copy() {
+ ceph_assert(m_lock.is_locked());
+
+ if (m_canceled && m_ret_val == 0) {
+ ldout(m_cct, 10) << "image copy canceled" << dendl;
+ m_ret_val = -ECANCELED;
+ }
+
+ if (m_ret_val < 0 || m_object_no >= m_end_object_no) {
+ return;
+ }
+
+ uint64_t ono = m_object_no++;
+
+ ldout(m_cct, 20) << "object_num=" << ono << dendl;
+
+ ++m_current_ops;
+
+ Context *ctx = new FunctionContext(
+ [this, ono](int r) {
+ handle_object_copy(ono, r);
+ });
+ ObjectCopyRequest<I> *req = ObjectCopyRequest<I>::create(
+ m_src_image_ctx, m_dst_image_ctx, m_src_snap_id_start, m_dst_snap_id_start,
+ m_snap_map, ono, m_flatten, ctx);
+ req->send();
+}
+
+template <typename I>
+void ImageCopyRequest<I>::handle_object_copy(uint64_t object_no, int r) {
+ ldout(m_cct, 20) << "object_no=" << object_no << ", r=" << r << dendl;
+
+ bool complete;
+ {
+ Mutex::Locker locker(m_lock);
+ ceph_assert(m_current_ops > 0);
+ --m_current_ops;
+
+ if (r < 0 && r != -ENOENT) {
+ lderr(m_cct) << "object copy failed: " << cpp_strerror(r) << dendl;
+ if (m_ret_val == 0) {
+ m_ret_val = r;
+ }
+ } else {
+ m_copied_objects.push(object_no);
+ while (!m_updating_progress && !m_copied_objects.empty() &&
+ m_copied_objects.top() ==
+ (m_object_number ? *m_object_number + 1 : 0)) {
+ m_object_number = m_copied_objects.top();
+ m_copied_objects.pop();
+ uint64_t progress_object_no = *m_object_number + 1;
+ m_updating_progress = true;
+ m_lock.Unlock();
+ m_prog_ctx->update_progress(progress_object_no, m_end_object_no);
+ m_lock.Lock();
+ ceph_assert(m_updating_progress);
+ m_updating_progress = false;
+ }
+ }
+
+ send_next_object_copy();
+ complete = (m_current_ops == 0) && !m_updating_progress;
+ }
+
+ if (complete) {
+ finish(m_ret_val);
+ }
+}
+
+template <typename I>
+void ImageCopyRequest<I>::finish(int r) {
+ ldout(m_cct, 20) << "r=" << r << dendl;
+
+ m_on_finish->complete(r);
+ put();
+}
+
+} // namespace deep_copy
+} // namespace librbd
+
+template class librbd::deep_copy::ImageCopyRequest<librbd::ImageCtx>;
diff --git a/src/librbd/deep_copy/ImageCopyRequest.h b/src/librbd/deep_copy/ImageCopyRequest.h
new file mode 100644
index 00000000..189cb237
--- /dev/null
+++ b/src/librbd/deep_copy/ImageCopyRequest.h
@@ -0,0 +1,109 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_DEEP_COPY_IMAGE_DEEP_COPY_REQUEST_H
+#define CEPH_LIBRBD_DEEP_COPY_IMAGE_DEEP_COPY_REQUEST_H
+
+#include "include/int_types.h"
+#include "include/rados/librados.hpp"
+#include "common/Mutex.h"
+#include "common/RefCountedObj.h"
+#include "librbd/Types.h"
+#include "librbd/deep_copy/Types.h"
+#include <functional>
+#include <map>
+#include <queue>
+#include <vector>
+#include <boost/optional.hpp>
+
+class Context;
+
+namespace librbd {
+
+class ImageCtx;
+class ProgressContext;
+
+namespace deep_copy {
+
+template <typename ImageCtxT = ImageCtx>
+class ImageCopyRequest : public RefCountedObject {
+public:
+ static ImageCopyRequest* create(ImageCtxT *src_image_ctx,
+ ImageCtxT *dst_image_ctx,
+ librados::snap_t src_snap_id_start,
+ librados::snap_t src_snap_id_end,
+ librados::snap_t dst_snap_id_start,
+ bool flatten,
+ const ObjectNumber &object_number,
+ const SnapSeqs &snap_seqs,
+ ProgressContext *prog_ctx,
+ Context *on_finish) {
+ return new ImageCopyRequest(src_image_ctx, dst_image_ctx, src_snap_id_start,
+ src_snap_id_end, dst_snap_id_start, flatten,
+ object_number, snap_seqs, prog_ctx, on_finish);
+ }
+
+ ImageCopyRequest(ImageCtxT *src_image_ctx, ImageCtxT *dst_image_ctx,
+ librados::snap_t src_snap_id_start,
+ librados::snap_t src_snap_id_end,
+ librados::snap_t dst_snap_id_start,
+ bool flatten, const ObjectNumber &object_number,
+ const SnapSeqs &snap_seqs, ProgressContext *prog_ctx,
+ Context *on_finish);
+
+ void send();
+ void cancel();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * | . . . . .
+ * | . . (parallel execution of
+ * v v . multiple objects at once)
+ * COPY_OBJECT . . . .
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+
+ ImageCtxT *m_src_image_ctx;
+ ImageCtxT *m_dst_image_ctx;
+ librados::snap_t m_src_snap_id_start;
+ librados::snap_t m_src_snap_id_end;
+ librados::snap_t m_dst_snap_id_start;
+ bool m_flatten;
+ ObjectNumber m_object_number;
+ SnapSeqs m_snap_seqs;
+ ProgressContext *m_prog_ctx;
+ Context *m_on_finish;
+
+ CephContext *m_cct;
+ Mutex m_lock;
+ bool m_canceled = false;
+
+ uint64_t m_object_no = 0;
+ uint64_t m_end_object_no = 0;
+ uint64_t m_current_ops = 0;
+ std::priority_queue<
+ uint64_t, std::vector<uint64_t>, std::greater<uint64_t>> m_copied_objects;
+ bool m_updating_progress = false;
+ SnapMap m_snap_map;
+ int m_ret_val = 0;
+
+ void send_object_copies();
+ void send_next_object_copy();
+ void handle_object_copy(uint64_t object_no, int r);
+
+ void finish(int r);
+};
+
+} // namespace deep_copy
+} // namespace librbd
+
+extern template class librbd::deep_copy::ImageCopyRequest<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_DEEP_COPY_IMAGE_DEEP_COPY_REQUEST_H
diff --git a/src/librbd/deep_copy/MetadataCopyRequest.cc b/src/librbd/deep_copy/MetadataCopyRequest.cc
new file mode 100644
index 00000000..cbce9a19
--- /dev/null
+++ b/src/librbd/deep_copy/MetadataCopyRequest.cc
@@ -0,0 +1,123 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "MetadataCopyRequest.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "librbd/Utils.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::deep_copy::MetadataCopyRequest: " \
+ << this << " " << __func__ << ": "
+
+namespace librbd {
+namespace deep_copy {
+
+namespace {
+
+const uint64_t MAX_METADATA_ITEMS = 128;
+
+} // anonymous namespace
+
+using librbd::util::create_rados_callback;
+
+template <typename I>
+MetadataCopyRequest<I>::MetadataCopyRequest(I *src_image_ctx, I *dst_image_ctx,
+ Context *on_finish)
+ : m_src_image_ctx(src_image_ctx), m_dst_image_ctx(dst_image_ctx),
+ m_on_finish(on_finish), m_cct(dst_image_ctx->cct) {
+}
+
+template <typename I>
+void MetadataCopyRequest<I>::send() {
+ list_src_metadata();
+}
+
+template <typename I>
+void MetadataCopyRequest<I>::list_src_metadata() {
+ ldout(m_cct, 20) << "start_key=" << m_last_metadata_key << dendl;
+
+ librados::ObjectReadOperation op;
+ librbd::cls_client::metadata_list_start(&op, m_last_metadata_key, MAX_METADATA_ITEMS);
+
+ librados::AioCompletion *aio_comp = create_rados_callback<
+ MetadataCopyRequest<I>,
+ &MetadataCopyRequest<I>::handle_list_src_metadata>(this);
+ m_out_bl.clear();
+ m_src_image_ctx->md_ctx.aio_operate(m_src_image_ctx->header_oid,
+ aio_comp, &op, &m_out_bl);
+ aio_comp->release();
+}
+
+template <typename I>
+void MetadataCopyRequest<I>::handle_list_src_metadata(int r) {
+ ldout(m_cct, 20) << "r=" << r << dendl;
+
+ Metadata metadata;
+ if (r == 0) {
+ auto it = m_out_bl.cbegin();
+ r = librbd::cls_client::metadata_list_finish(&it, &metadata);
+ }
+
+ if (r < 0) {
+ lderr(m_cct) << "failed to retrieve metadata: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ if (metadata.empty()) {
+ finish(0);
+ return;
+ }
+
+ m_last_metadata_key = metadata.rbegin()->first;
+ m_more_metadata = (metadata.size() >= MAX_METADATA_ITEMS);
+ set_dst_metadata(metadata);
+}
+
+template <typename I>
+void MetadataCopyRequest<I>::set_dst_metadata(const Metadata& metadata) {
+ ldout(m_cct, 20) << "count=" << metadata.size() << dendl;
+
+ librados::ObjectWriteOperation op;
+ librbd::cls_client::metadata_set(&op, metadata);
+
+ librados::AioCompletion *aio_comp = create_rados_callback<
+ MetadataCopyRequest<I>,
+ &MetadataCopyRequest<I>::handle_set_dst_metadata>(this);
+ m_dst_image_ctx->md_ctx.aio_operate(m_dst_image_ctx->header_oid, aio_comp,
+ &op);
+ aio_comp->release();
+}
+
+template <typename I>
+void MetadataCopyRequest<I>::handle_set_dst_metadata(int r) {
+ ldout(m_cct, 20) << "r=" << r << dendl;
+
+ if (r < 0) {
+ lderr(m_cct) << "failed to set metadata: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ if (m_more_metadata) {
+ list_src_metadata();
+ return;
+ }
+
+ finish(0);
+}
+
+template <typename I>
+void MetadataCopyRequest<I>::finish(int r) {
+ ldout(m_cct, 20) << "r=" << r << dendl;
+ m_on_finish->complete(r);
+ delete this;
+}
+
+} // namespace deep_copy
+} // namespace librbd
+
+template class librbd::deep_copy::MetadataCopyRequest<librbd::ImageCtx>;
diff --git a/src/librbd/deep_copy/MetadataCopyRequest.h b/src/librbd/deep_copy/MetadataCopyRequest.h
new file mode 100644
index 00000000..5bd69d8b
--- /dev/null
+++ b/src/librbd/deep_copy/MetadataCopyRequest.h
@@ -0,0 +1,77 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_DEEP_COPY_METADATA_COPY_REQUEST_H
+#define CEPH_LIBRBD_DEEP_COPY_METADATA_COPY_REQUEST_H
+
+#include "include/int_types.h"
+#include "include/buffer.h"
+#include "include/rados/librados.hpp"
+#include "librbd/ImageCtx.h"
+#include <map>
+#include <string>
+
+class Context;
+
+namespace librbd {
+namespace deep_copy {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class MetadataCopyRequest {
+public:
+ static MetadataCopyRequest* create(ImageCtxT *src_image_ctx,
+ ImageCtxT *dst_image_ctx,
+ Context *on_finish) {
+ return new MetadataCopyRequest(src_image_ctx, dst_image_ctx, on_finish);
+ }
+
+ MetadataCopyRequest(ImageCtxT *src_image_ctx, ImageCtxT *dst_image_ctx,
+ Context *on_finish);
+
+ void send();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * LIST_SRC_METADATA <------\
+ * | | (repeat if additional
+ * v | metadata)
+ * SET_DST_METADATA --------/
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+ typedef std::map<std::string, bufferlist> Metadata;
+
+ ImageCtxT *m_src_image_ctx;
+ ImageCtxT *m_dst_image_ctx;
+ Context *m_on_finish;
+
+ CephContext *m_cct;
+ bufferlist m_out_bl;
+
+ std::string m_last_metadata_key;
+ bool m_more_metadata = false;
+
+ void list_src_metadata();
+ void handle_list_src_metadata(int r);
+
+ void set_dst_metadata(const Metadata& metadata);
+ void handle_set_dst_metadata(int r);
+
+ void finish(int r);
+
+};
+
+} // namespace deep_copy
+} // namespace librbd
+
+extern template class librbd::deep_copy::MetadataCopyRequest<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_DEEP_COPY_METADATA_COPY_REQUEST_H
diff --git a/src/librbd/deep_copy/ObjectCopyRequest.cc b/src/librbd/deep_copy/ObjectCopyRequest.cc
new file mode 100644
index 00000000..2400757a
--- /dev/null
+++ b/src/librbd/deep_copy/ObjectCopyRequest.cc
@@ -0,0 +1,998 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "ObjectCopyRequest.h"
+#include "common/errno.h"
+#include "librados/snap_set_diff.h"
+#include "librbd/ExclusiveLock.h"
+#include "librbd/ObjectMap.h"
+#include "librbd/Utils.h"
+#include "librbd/io/AioCompletion.h"
+#include "librbd/io/AsyncOperation.h"
+#include "librbd/io/ImageRequest.h"
+#include "librbd/io/ReadResult.h"
+#include "osdc/Striper.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::deep_copy::ObjectCopyRequest: " \
+ << this << " " << __func__ << ": "
+
+namespace librados {
+
+inline bool operator==(const clone_info_t& rhs, const clone_info_t& lhs) {
+ return (rhs.cloneid == lhs.cloneid &&
+ rhs.snaps == lhs.snaps &&
+ rhs.overlap == lhs.overlap &&
+ rhs.size == lhs.size);
+}
+
+inline bool operator==(const snap_set_t& rhs, const snap_set_t& lhs) {
+ return (rhs.clones == lhs.clones &&
+ rhs.seq == lhs.seq);
+}
+
+} // namespace librados
+
+namespace librbd {
+namespace deep_copy {
+
+using librbd::util::create_context_callback;
+using librbd::util::create_rados_callback;
+
+template <typename I>
+ObjectCopyRequest<I>::ObjectCopyRequest(I *src_image_ctx,
+ I *dst_image_ctx,
+ librados::snap_t src_snap_id_start,
+ librados::snap_t dst_snap_id_start,
+ const SnapMap &snap_map,
+ uint64_t dst_object_number,
+ bool flatten, Context *on_finish)
+ : m_src_image_ctx(src_image_ctx),
+ m_dst_image_ctx(dst_image_ctx), m_cct(dst_image_ctx->cct),
+ m_src_snap_id_start(src_snap_id_start),
+ m_dst_snap_id_start(dst_snap_id_start), m_snap_map(snap_map),
+ m_dst_object_number(dst_object_number), m_flatten(flatten),
+ m_on_finish(on_finish) {
+ ceph_assert(src_image_ctx->data_ctx.is_valid());
+ ceph_assert(dst_image_ctx->data_ctx.is_valid());
+ ceph_assert(!m_snap_map.empty());
+
+ m_src_async_op = new io::AsyncOperation();
+ m_src_async_op->start_op(*util::get_image_ctx(m_src_image_ctx));
+
+ m_src_io_ctx.dup(m_src_image_ctx->data_ctx);
+ m_dst_io_ctx.dup(m_dst_image_ctx->data_ctx);
+
+ m_dst_oid = m_dst_image_ctx->get_object_name(dst_object_number);
+
+ ldout(m_cct, 20) << "dst_oid=" << m_dst_oid << dendl;
+
+ compute_src_object_extents();
+}
+
+template <typename I>
+void ObjectCopyRequest<I>::send() {
+ send_list_snaps();
+}
+
+template <typename I>
+void ObjectCopyRequest<I>::send_list_snaps() {
+ ceph_assert(!m_src_objects.empty());
+ m_src_ono = *m_src_objects.begin();
+ m_src_oid = m_src_image_ctx->get_object_name(m_src_ono);
+
+ ldout(m_cct, 20) << "src_oid=" << m_src_oid << dendl;
+
+ librados::AioCompletion *rados_completion = create_rados_callback<
+ ObjectCopyRequest<I>, &ObjectCopyRequest<I>::handle_list_snaps>(this);
+
+ librados::ObjectReadOperation op;
+ m_snap_set = {};
+ m_snap_ret = 0;
+ op.list_snaps(&m_snap_set, &m_snap_ret);
+
+ m_src_io_ctx.snap_set_read(CEPH_SNAPDIR);
+ int r = m_src_io_ctx.aio_operate(m_src_oid, rados_completion, &op,
+ nullptr);
+ ceph_assert(r == 0);
+ rados_completion->release();
+}
+
+template <typename I>
+void ObjectCopyRequest<I>::handle_list_snaps(int r) {
+ if (r == 0 && m_snap_ret < 0) {
+ r = m_snap_ret;
+ }
+
+ ldout(m_cct, 20) << "r=" << r << dendl;
+
+ if (r < 0 && r != -ENOENT) {
+ lderr(m_cct) << "failed to list snaps: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ if (m_retry_missing_read) {
+ if (m_snap_set == m_retry_snap_set) {
+ lderr(m_cct) << "read encountered missing object using up-to-date snap set"
+ << dendl;
+ finish(-ENOENT);
+ return;
+ }
+
+ ldout(m_cct, 20) << "retrying using updated snap set" << dendl;
+ m_retry_missing_read = false;
+ m_retry_snap_set = {};
+ }
+
+ if (r == -ENOENT) {
+ for (auto &it : m_src_object_extents) {
+ auto &e = it.second;
+ if (e.object_no == m_src_ono) {
+ e.noent = true;
+ }
+ }
+ m_read_ops = {};
+ m_read_snaps = {};
+ m_zero_interval = {};
+ } else {
+ compute_read_ops();
+ }
+ send_read_object();
+}
+
+template <typename I>
+void ObjectCopyRequest<I>::send_read_object() {
+
+ if (m_read_snaps.empty()) {
+ // all snapshots have been read
+ merge_write_ops();
+
+ ceph_assert(!m_src_objects.empty());
+ m_src_objects.erase(m_src_objects.begin());
+
+ if (!m_src_objects.empty()) {
+ send_list_snaps();
+ return;
+ }
+
+ // all objects have been read
+ send_read_from_parent();
+ return;
+ }
+
+ auto index = *m_read_snaps.begin();
+ auto src_snap_seq = index.second;
+
+ bool read_required = false;
+ librados::ObjectReadOperation op;
+
+ for (auto &copy_op : m_read_ops[index]) {
+ if (!read_required) {
+ // map the copy op start snap id back to the necessary read snap id
+ m_src_io_ctx.snap_set_read(src_snap_seq);
+
+ ldout(m_cct, 20) << "src_snap_seq=" << src_snap_seq << dendl;
+ read_required = true;
+ }
+ ldout(m_cct, 20) << "read op: " << copy_op.src_offset << "~"
+ << copy_op.length << dendl;
+ op.sparse_read(copy_op.src_offset, copy_op.length, &copy_op.src_extent_map,
+ &copy_op.out_bl, nullptr);
+ op.set_op_flags2(LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL |
+ LIBRADOS_OP_FLAG_FADVISE_NOCACHE);
+ }
+
+ if (!read_required) {
+ // nothing written to this object for this snapshot (must be trunc/remove)
+ handle_read_object(0);
+ return;
+ }
+
+ auto ctx = create_context_callback<
+ ObjectCopyRequest<I>, &ObjectCopyRequest<I>::handle_read_object>(this);
+ auto comp = create_rados_callback(ctx);
+
+ ldout(m_cct, 20) << "read " << m_src_oid << dendl;
+
+ int r = m_src_io_ctx.aio_operate(m_src_oid, comp, &op, nullptr);
+ ceph_assert(r == 0);
+ comp->release();
+}
+
+template <typename I>
+void ObjectCopyRequest<I>::handle_read_object(int r) {
+ ldout(m_cct, 20) << "r=" << r << dendl;
+
+ if (r == -ENOENT) {
+ m_retry_snap_set = m_snap_set;
+ m_retry_missing_read = true;
+
+ ldout(m_cct, 5) << "object missing potentially due to removed snapshot"
+ << dendl;
+ send_list_snaps();
+ return;
+ }
+
+ if (r < 0) {
+ lderr(m_cct) << "failed to read from source object: " << cpp_strerror(r)
+ << dendl;
+ finish(r);
+ return;
+ }
+
+ ceph_assert(!m_read_snaps.empty());
+ m_read_snaps.erase(m_read_snaps.begin());
+
+ send_read_object();
+}
+
+template <typename I>
+void ObjectCopyRequest<I>::send_read_from_parent() {
+ m_src_image_ctx->snap_lock.get_read();
+ m_src_image_ctx->parent_lock.get_read();
+ io::Extents image_extents;
+ compute_read_from_parent_ops(&image_extents);
+ m_src_image_ctx->snap_lock.put_read();
+
+ if (image_extents.empty()) {
+ m_src_image_ctx->parent_lock.put_read();
+ handle_read_from_parent(0);
+ return;
+ }
+
+ ldout(m_cct, 20) << dendl;
+
+ ceph_assert(m_src_image_ctx->parent != nullptr);
+
+ auto ctx = create_context_callback<
+ ObjectCopyRequest<I>, &ObjectCopyRequest<I>::handle_read_from_parent>(this);
+ auto comp = io::AioCompletion::create_and_start(
+ ctx, util::get_image_ctx(m_src_image_ctx->parent), io::AIO_TYPE_READ);
+ ldout(m_cct, 20) << "completion " << comp << ", extents " << image_extents
+ << dendl;
+
+ auto src_image_ctx = m_src_image_ctx;
+ io::ImageRequest<I>::aio_read(src_image_ctx->parent, comp,
+ std::move(image_extents),
+ io::ReadResult{&m_read_from_parent_data}, 0,
+ ZTracer::Trace());
+ src_image_ctx->parent_lock.put_read();
+}
+
+template <typename I>
+void ObjectCopyRequest<I>::handle_read_from_parent(int r) {
+ ldout(m_cct, 20) << "r=" << r << dendl;
+
+ if (r < 0) {
+ lderr(m_cct) << "failed to read from parent: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ if (!m_read_ops.empty()) {
+ ceph_assert(m_read_ops.size() == 1);
+ auto src_snap_seq = m_read_ops.begin()->first.first;
+ auto &copy_ops = m_read_ops.begin()->second;
+ uint64_t offset = 0;
+ for (auto it = copy_ops.begin(); it != copy_ops.end(); ) {
+ it->out_bl.substr_of(m_read_from_parent_data, offset, it->length);
+ offset += it->length;
+ if (it->out_bl.is_zero()) {
+ m_zero_interval[src_snap_seq].insert(it->dst_offset, it->length);
+ it = copy_ops.erase(it);
+ } else {
+ it++;
+ }
+ }
+ merge_write_ops();
+ }
+
+ compute_dst_object_may_exist();
+ compute_zero_ops();
+
+ if (m_write_ops.empty()) {
+ // nothing to copy
+ finish(-ENOENT);
+ return;
+ }
+
+ send_write_object();
+ return;
+}
+
+template <typename I>
+void ObjectCopyRequest<I>::send_write_object() {
+ ceph_assert(!m_write_ops.empty());
+ auto& copy_ops = m_write_ops.begin()->second;
+
+ // retrieve the destination snap context for the op
+ SnapIds dst_snap_ids;
+ librados::snap_t dst_snap_seq = 0;
+ librados::snap_t src_snap_seq = m_write_ops.begin()->first;
+ if (src_snap_seq != 0) {
+ auto snap_map_it = m_snap_map.find(src_snap_seq);
+ ceph_assert(snap_map_it != m_snap_map.end());
+
+ auto dst_snap_id = snap_map_it->second.front();
+ auto dst_may_exist_it = m_dst_object_may_exist.find(dst_snap_id);
+ ceph_assert(dst_may_exist_it != m_dst_object_may_exist.end());
+ if (!dst_may_exist_it->second && !copy_ops.empty()) {
+ // if the object cannot exist, the only valid op is to remove it
+ ceph_assert(copy_ops.size() == 1U);
+ ceph_assert(copy_ops.begin()->type == COPY_OP_TYPE_REMOVE);
+ }
+
+ // write snapshot context should be before actual snapshot
+ ceph_assert(!snap_map_it->second.empty());
+ auto dst_snap_ids_it = snap_map_it->second.begin();
+ ++dst_snap_ids_it;
+
+ dst_snap_ids = SnapIds{dst_snap_ids_it, snap_map_it->second.end()};
+ if (!dst_snap_ids.empty()) {
+ dst_snap_seq = dst_snap_ids.front();
+ }
+ ceph_assert(dst_snap_seq != CEPH_NOSNAP);
+ }
+
+ ldout(m_cct, 20) << "dst_snap_seq=" << dst_snap_seq << ", "
+ << "dst_snaps=" << dst_snap_ids << dendl;
+
+ librados::ObjectWriteOperation op;
+ uint64_t buffer_offset;
+
+ if (!m_dst_image_ctx->migration_info.empty()) {
+ cls_client::assert_snapc_seq(&op, dst_snap_seq,
+ cls::rbd::ASSERT_SNAPC_SEQ_GT_SNAPSET_SEQ);
+ }
+
+ for (auto &copy_op : copy_ops) {
+ switch (copy_op.type) {
+ case COPY_OP_TYPE_WRITE:
+ buffer_offset = 0;
+ for (auto &e : copy_op.dst_extent_map) {
+ ldout(m_cct, 20) << "write op: " << e.first << "~" << e.second
+ << dendl;
+ bufferlist tmpbl;
+ tmpbl.substr_of(copy_op.out_bl, buffer_offset, e.second);
+ op.write(e.first, tmpbl);
+ op.set_op_flags2(LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL |
+ LIBRADOS_OP_FLAG_FADVISE_NOCACHE);
+ buffer_offset += e.second;
+ }
+ break;
+ case COPY_OP_TYPE_ZERO:
+ ldout(m_cct, 20) << "zero op: " << copy_op.dst_offset << "~"
+ << copy_op.length << dendl;
+ op.zero(copy_op.dst_offset, copy_op.length);
+ break;
+ case COPY_OP_TYPE_REMOVE_TRUNC:
+ ldout(m_cct, 20) << "create op" << dendl;
+ op.create(false);
+ // fall through
+ case COPY_OP_TYPE_TRUNC:
+ ldout(m_cct, 20) << "trunc op: " << copy_op.dst_offset << dendl;
+ op.truncate(copy_op.dst_offset);
+ break;
+ case COPY_OP_TYPE_REMOVE:
+ ldout(m_cct, 20) << "remove op" << dendl;
+ op.remove();
+ break;
+ default:
+ ceph_abort();
+ }
+ }
+
+ if (op.size() == (m_dst_image_ctx->migration_info.empty() ? 0 : 1)) {
+ handle_write_object(0);
+ return;
+ }
+
+ int r;
+ Context *finish_op_ctx;
+ {
+ RWLock::RLocker owner_locker(m_dst_image_ctx->owner_lock);
+ finish_op_ctx = start_lock_op(m_dst_image_ctx->owner_lock, &r);
+ }
+ if (finish_op_ctx == nullptr) {
+ lderr(m_cct) << "lost exclusive lock" << dendl;
+ finish(r);
+ return;
+ }
+
+ auto ctx = new FunctionContext([this, finish_op_ctx](int r) {
+ handle_write_object(r);
+ finish_op_ctx->complete(0);
+ });
+ librados::AioCompletion *comp = create_rados_callback(ctx);
+ r = m_dst_io_ctx.aio_operate(m_dst_oid, comp, &op, dst_snap_seq, dst_snap_ids,
+ nullptr);
+ ceph_assert(r == 0);
+ comp->release();
+}
+
+template <typename I>
+void ObjectCopyRequest<I>::handle_write_object(int r) {
+ ldout(m_cct, 20) << "r=" << r << dendl;
+
+ if (r == -ENOENT) {
+ r = 0;
+ } else if (r == -ERANGE) {
+ ldout(m_cct, 10) << "concurrent deep copy" << dendl;
+ r = 0;
+ }
+ if (r < 0) {
+ lderr(m_cct) << "failed to write to destination object: " << cpp_strerror(r)
+ << dendl;
+ finish(r);
+ return;
+ }
+
+ m_write_ops.erase(m_write_ops.begin());
+ if (!m_write_ops.empty()) {
+ send_write_object();
+ return;
+ }
+
+ send_update_object_map();
+}
+
+template <typename I>
+void ObjectCopyRequest<I>::send_update_object_map() {
+ if (!m_dst_image_ctx->test_features(RBD_FEATURE_OBJECT_MAP) ||
+ m_dst_object_state.empty()) {
+ finish(0);
+ return;
+ }
+
+ m_dst_image_ctx->owner_lock.get_read();
+ m_dst_image_ctx->snap_lock.get_read();
+ if (m_dst_image_ctx->object_map == nullptr) {
+ // possible that exclusive lock was lost in background
+ lderr(m_cct) << "object map is not initialized" << dendl;
+
+ m_dst_image_ctx->snap_lock.put_read();
+ m_dst_image_ctx->owner_lock.put_read();
+ finish(-EINVAL);
+ return;
+ }
+
+ auto &dst_object_state = *m_dst_object_state.begin();
+ auto it = m_snap_map.find(dst_object_state.first);
+ ceph_assert(it != m_snap_map.end());
+ auto dst_snap_id = it->second.front();
+ auto object_state = dst_object_state.second;
+ m_dst_object_state.erase(m_dst_object_state.begin());
+
+ ldout(m_cct, 20) << "dst_snap_id=" << dst_snap_id << ", object_state="
+ << static_cast<uint32_t>(object_state) << dendl;
+
+ int r;
+ auto finish_op_ctx = start_lock_op(m_dst_image_ctx->owner_lock, &r);
+ if (finish_op_ctx == nullptr) {
+ lderr(m_cct) << "lost exclusive lock" << dendl;
+ m_dst_image_ctx->snap_lock.put_read();
+ m_dst_image_ctx->owner_lock.put_read();
+ finish(r);
+ return;
+ }
+
+ auto ctx = new FunctionContext([this, finish_op_ctx](int r) {
+ handle_update_object_map(r);
+ finish_op_ctx->complete(0);
+ });
+
+ auto dst_image_ctx = m_dst_image_ctx;
+ dst_image_ctx->object_map_lock.get_write();
+ bool sent = dst_image_ctx->object_map->template aio_update<
+ Context, &Context::complete>(dst_snap_id, m_dst_object_number, object_state,
+ {}, {}, false, ctx);
+
+ // NOTE: state machine might complete before we reach here
+ dst_image_ctx->object_map_lock.put_write();
+ dst_image_ctx->snap_lock.put_read();
+ dst_image_ctx->owner_lock.put_read();
+ if (!sent) {
+ ceph_assert(dst_snap_id == CEPH_NOSNAP);
+ ctx->complete(0);
+ }
+}
+
+template <typename I>
+void ObjectCopyRequest<I>::handle_update_object_map(int r) {
+ ldout(m_cct, 20) << "r=" << r << dendl;
+
+ if (r < 0) {
+ lderr(m_cct) << "failed to update object map: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ if (!m_dst_object_state.empty()) {
+ send_update_object_map();
+ return;
+ }
+ finish(0);
+}
+
+template <typename I>
+Context *ObjectCopyRequest<I>::start_lock_op(RWLock &owner_lock, int* r) {
+ ceph_assert(m_dst_image_ctx->owner_lock.is_locked());
+ if (m_dst_image_ctx->exclusive_lock == nullptr) {
+ return new FunctionContext([](int r) {});
+ }
+ return m_dst_image_ctx->exclusive_lock->start_op(r);
+}
+
+template <typename I>
+uint64_t ObjectCopyRequest<I>::src_to_dst_object_offset(uint64_t objectno,
+ uint64_t offset) {
+ std::vector<std::pair<uint64_t, uint64_t>> image_extents;
+ Striper::extent_to_file(m_cct, &m_src_image_ctx->layout, objectno, offset, 1,
+ image_extents);
+ ceph_assert(image_extents.size() == 1);
+ auto dst_object_offset = image_extents.begin()->first;
+
+ std::map<object_t, std::vector<ObjectExtent>> dst_object_extents;
+ Striper::file_to_extents(m_cct, m_dst_image_ctx->format_string,
+ &m_dst_image_ctx->layout, dst_object_offset, 1, 0,
+ dst_object_extents);
+ ceph_assert(dst_object_extents.size() == 1);
+ ceph_assert(dst_object_extents.begin()->second.size() == 1);
+ auto &e = *dst_object_extents.begin()->second.begin();
+ ceph_assert(e.objectno == m_dst_object_number);
+
+ return e.offset;
+}
+
+template <typename I>
+void ObjectCopyRequest<I>::compute_src_object_extents() {
+ std::vector<std::pair<uint64_t, uint64_t>> image_extents;
+ Striper::extent_to_file(m_cct, &m_dst_image_ctx->layout, m_dst_object_number,
+ 0, m_dst_image_ctx->layout.object_size, image_extents);
+
+ size_t total = 0;
+ for (auto &e : image_extents) {
+ std::map<object_t, std::vector<ObjectExtent>> src_object_extents;
+ Striper::file_to_extents(m_cct, m_src_image_ctx->format_string,
+ &m_src_image_ctx->layout, e.first, e.second, 0,
+ src_object_extents);
+ auto stripe_unit = std::min(m_src_image_ctx->layout.stripe_unit,
+ m_dst_image_ctx->layout.stripe_unit);
+ for (auto &p : src_object_extents) {
+ for (auto &s : p.second) {
+ m_src_objects.insert(s.objectno);
+ total += s.length;
+ while (s.length > 0) {
+ ceph_assert(s.length >= stripe_unit);
+ auto dst_object_offset = src_to_dst_object_offset(s.objectno, s.offset);
+ m_src_object_extents[dst_object_offset] = {s.objectno, s.offset,
+ stripe_unit};
+ s.offset += stripe_unit;
+ s.length -= stripe_unit;
+ }
+ }
+ }
+ }
+
+ ceph_assert(total == m_dst_image_ctx->layout.object_size);
+
+ ldout(m_cct, 20) << m_src_object_extents.size() << " src extents" << dendl;
+}
+
+template <typename I>
+void ObjectCopyRequest<I>::compute_read_ops() {
+ m_read_ops = {};
+ m_read_snaps = {};
+ m_zero_interval = {};
+
+ m_src_image_ctx->parent_lock.get_read();
+ bool hide_parent = (m_src_image_ctx->parent != nullptr);
+ m_src_image_ctx->parent_lock.put_read();
+
+ librados::snap_t src_copy_point_snap_id = m_snap_map.rbegin()->first;
+ bool prev_exists = (hide_parent || m_src_snap_id_start > 0);
+ uint64_t prev_end_size = prev_exists ?
+ m_src_image_ctx->layout.object_size : 0;
+ librados::snap_t start_src_snap_id = m_src_snap_id_start;
+
+ for (auto &pair : m_snap_map) {
+ ceph_assert(!pair.second.empty());
+ librados::snap_t end_src_snap_id = pair.first;
+ librados::snap_t end_dst_snap_id = pair.second.front();
+
+ interval_set<uint64_t> diff;
+ uint64_t end_size;
+ bool exists;
+ librados::snap_t clone_end_snap_id;
+ calc_snap_set_diff(m_cct, m_snap_set, start_src_snap_id,
+ end_src_snap_id, &diff, &end_size, &exists,
+ &clone_end_snap_id, &m_read_whole_object);
+
+ if (m_read_whole_object) {
+ ldout(m_cct, 1) << "need to read full object" << dendl;
+ diff.insert(0, m_src_image_ctx->layout.object_size);
+ exists = true;
+ end_size = m_src_image_ctx->layout.object_size;
+ clone_end_snap_id = end_src_snap_id;
+ } else if (!exists) {
+ end_size = 0;
+ if (hide_parent && end_src_snap_id == m_snap_map.begin()->first &&
+ m_snap_set.clones.empty()) {
+ ldout(m_cct, 20) << "no clones for existing object" << dendl;
+ exists = true;
+ diff.insert(0, m_src_image_ctx->layout.object_size);
+ clone_end_snap_id = end_src_snap_id;
+ }
+ }
+
+ ldout(m_cct, 20) << "start_src_snap_id=" << start_src_snap_id << ", "
+ << "end_src_snap_id=" << end_src_snap_id << ", "
+ << "clone_end_snap_id=" << clone_end_snap_id << ", "
+ << "end_dst_snap_id=" << end_dst_snap_id << ", "
+ << "diff=" << diff << ", "
+ << "end_size=" << end_size << ", "
+ << "exists=" << exists << dendl;
+
+ m_zero_interval[end_src_snap_id] = {};
+
+ if (exists || prev_exists) {
+ // clip diff to size of object (in case it was truncated)
+ if (end_size < prev_end_size) {
+ interval_set<uint64_t> trunc;
+ trunc.insert(end_size, prev_end_size);
+ trunc.intersection_of(diff);
+ diff.subtract(trunc);
+ ldout(m_cct, 20) << "clearing truncate diff: " << trunc << dendl;
+ }
+
+ if (exists) {
+ // reads should be issued against the newest (existing) snapshot within
+ // the associated snapshot object clone. writes should be issued
+ // against the oldest snapshot in the snap_map.
+ ceph_assert(clone_end_snap_id >= end_src_snap_id);
+ if (clone_end_snap_id > src_copy_point_snap_id) {
+ // do not read past the copy point snapshot
+ clone_end_snap_id = src_copy_point_snap_id;
+ }
+ }
+
+ for (auto &it : m_src_object_extents) {
+ auto dst_object_offset = it.first;
+ auto &e = it.second;
+
+ if (e.object_no != m_src_ono) {
+ continue;
+ }
+
+ interval_set<uint64_t> read_interval;
+ read_interval.insert(e.offset, e.length);
+
+ if (end_size < prev_end_size) {
+ interval_set<uint64_t> zero_interval;
+ zero_interval.insert(end_size, prev_end_size - end_size);
+ zero_interval.intersection_of(read_interval);
+ if (!zero_interval.empty()) {
+ auto it = zero_interval.begin();
+ auto offset = it.get_start() - e.offset;
+ m_zero_interval[end_src_snap_id].insert(dst_object_offset + offset,
+ it.get_len());
+ ldout(m_cct, 20) << "extent " << e.offset << "~" << e.length
+ << " intersects truncation " << end_size << "~"
+ << prev_end_size - end_size << ", inserting zero "
+ << dst_object_offset + offset << "~"
+ << it.get_len() << dendl;
+ }
+ }
+
+ // limit read interval to diff
+ read_interval.intersection_of(diff);
+
+ ldout(m_cct, 20) << "src_object_extent: " << e.offset << "~" << e.length
+ << ", dst_object_offset=" << dst_object_offset
+ << ", read: " << read_interval << dendl;
+
+ ceph_assert(exists || read_interval.empty());
+
+ for (auto it = read_interval.begin(); it != read_interval.end();
+ it++) {
+ ceph_assert(it.get_start() >= e.offset);
+ auto offset = it.get_start() - e.offset;
+ ldout(m_cct, 20) << "read/write op: " << it.get_start() << "~"
+ << it.get_len() << " dst: "
+ << dst_object_offset + offset << dendl;
+ m_read_ops[{end_src_snap_id, clone_end_snap_id}]
+ .emplace_back(COPY_OP_TYPE_WRITE, it.get_start(),
+ dst_object_offset + offset, it.get_len());
+ }
+ }
+ }
+
+ prev_end_size = end_size;
+ prev_exists = exists;
+ if (hide_parent && prev_exists && prev_end_size == 0) {
+ // hide parent
+ prev_end_size = m_src_image_ctx->layout.object_size;
+ }
+ start_src_snap_id = end_src_snap_id;
+ }
+
+ for (auto &it : m_read_ops) {
+ m_read_snaps.push_back(it.first);
+ }
+}
+
+template <typename I>
+void ObjectCopyRequest<I>::compute_read_from_parent_ops(
+ io::Extents *parent_image_extents) {
+ assert(m_src_image_ctx->snap_lock.is_locked());
+ assert(m_src_image_ctx->parent_lock.is_locked());
+
+ m_read_ops = {};
+ m_zero_interval = {};
+ parent_image_extents->clear();
+
+ if (m_src_image_ctx->parent == nullptr) {
+ ldout(m_cct, 20) << "no parent" << dendl;
+ return;
+ }
+
+ size_t noent_count = 0;
+ for (auto &it : m_src_object_extents) {
+ if (it.second.noent) {
+ noent_count++;
+ }
+ }
+
+ if (noent_count == 0) {
+ ldout(m_cct, 20) << "no extents need read from parent" << dendl;
+ return;
+ }
+
+ if (noent_count == m_src_object_extents.size() && !m_flatten) {
+ ldout(m_cct, 20) << "reading all extents skipped when no flatten"
+ << dendl;
+ return;
+ }
+
+ ldout(m_cct, 20) << dendl;
+
+ auto src_snap_seq = m_snap_map.begin()->first;
+
+ uint64_t parent_overlap;
+ int r = m_src_image_ctx->get_parent_overlap(src_snap_seq, &parent_overlap);
+ if (r < 0) {
+ ldout(m_cct, 5) << "failed getting parent overlap for snap_id: "
+ << src_snap_seq << ": " << cpp_strerror(r) << dendl;
+ return;
+ }
+ if (parent_overlap == 0) {
+ ldout(m_cct, 20) << "no parent overlap" << dendl;
+ return;
+ }
+
+ for (auto &it : m_src_object_extents) {
+ auto dst_object_offset = it.first;
+ auto &e = it.second;
+
+ if (!e.noent) {
+ continue;
+ }
+
+ std::vector<std::pair<uint64_t, uint64_t>> image_extents;
+ Striper::extent_to_file(m_cct, &m_src_image_ctx->layout, e.object_no,
+ e.offset, e.length, image_extents);
+
+ uint64_t overlap = m_src_image_ctx->prune_parent_extents(image_extents,
+ parent_overlap);
+ if (overlap == 0) {
+ ldout(m_cct, 20) << "no parent overlap for object_no " << e.object_no
+ << " extent " << e.offset << "~" << e.length << dendl;
+ continue;
+ }
+
+ ldout(m_cct, 20) << "object_no " << e.object_no << " extent " << e.offset
+ << "~" << e.length << " overlap " << parent_overlap
+ << " parent extents " << image_extents << dendl;
+
+ ceph_assert(image_extents.size() == 1);
+
+ auto src_image_offset = image_extents.begin()->first;
+ auto length = image_extents.begin()->second;
+ m_read_ops[{src_snap_seq, 0}].emplace_back(COPY_OP_TYPE_WRITE, e.offset,
+ dst_object_offset, length);
+ m_read_ops[{src_snap_seq, 0}].rbegin()->src_extent_map[e.offset] = length;
+ parent_image_extents->emplace_back(src_image_offset, length);
+ }
+
+ if (!parent_image_extents->empty()) {
+ m_dst_object_state[src_snap_seq] = OBJECT_EXISTS;
+ }
+}
+
+template <typename I>
+void ObjectCopyRequest<I>::merge_write_ops() {
+ ldout(m_cct, 20) << dendl;
+
+ for (auto &it : m_zero_interval) {
+ m_dst_zero_interval[it.first].insert(it.second);
+ }
+
+ for (auto &it : m_read_ops) {
+ auto src_snap_seq = it.first.first;
+ auto &copy_ops = it.second;
+ for (auto &copy_op : copy_ops) {
+ uint64_t src_offset = copy_op.src_offset;
+ uint64_t dst_offset = copy_op.dst_offset;
+ for (auto &e : copy_op.src_extent_map) {
+ uint64_t zero_len = e.first - src_offset;
+ if (zero_len > 0) {
+ ldout(m_cct, 20) << "src_snap_seq=" << src_snap_seq
+ << ", inserting zero " << dst_offset << "~"
+ << zero_len << dendl;
+ m_dst_zero_interval[src_snap_seq].insert(dst_offset, zero_len);
+ src_offset += zero_len;
+ dst_offset += zero_len;
+ }
+ copy_op.dst_extent_map[dst_offset] = e.second;
+ src_offset += e.second;
+ dst_offset += e.second;
+ }
+ if (dst_offset < copy_op.dst_offset + copy_op.length) {
+ uint64_t zero_len = copy_op.dst_offset + copy_op.length - dst_offset;
+ ldout(m_cct, 20) << "src_snap_seq=" << src_snap_seq
+ << ", inserting zero " << dst_offset << "~"
+ << zero_len << dendl;
+ m_dst_zero_interval[src_snap_seq].insert(dst_offset, zero_len);
+ } else {
+ ceph_assert(dst_offset == copy_op.dst_offset + copy_op.length);
+ }
+ m_write_ops[src_snap_seq].emplace_back(std::move(copy_op));
+ }
+ }
+}
+
+template <typename I>
+void ObjectCopyRequest<I>::compute_zero_ops() {
+ ldout(m_cct, 20) << dendl;
+
+ bool fast_diff = m_dst_image_ctx->test_features(RBD_FEATURE_FAST_DIFF);
+ uint64_t prev_end_size = 0;
+
+ m_src_image_ctx->parent_lock.get_read();
+ bool hide_parent = (m_src_image_ctx->parent != nullptr);
+ m_src_image_ctx->parent_lock.put_read();
+
+ for (auto &it : m_dst_zero_interval) {
+ auto src_snap_seq = it.first;
+ auto &zero_interval = it.second;
+
+ auto snap_map_it = m_snap_map.find(src_snap_seq);
+ ceph_assert(snap_map_it != m_snap_map.end());
+ auto dst_snap_seq = snap_map_it->second.front();
+
+ auto dst_may_exist_it = m_dst_object_may_exist.find(dst_snap_seq);
+ ceph_assert(dst_may_exist_it != m_dst_object_may_exist.end());
+ if (!dst_may_exist_it->second && prev_end_size > 0) {
+ ldout(m_cct, 5) << "object DNE for snap_id: " << dst_snap_seq << dendl;
+ m_write_ops[src_snap_seq].emplace_back(COPY_OP_TYPE_REMOVE, 0, 0, 0);
+ prev_end_size = 0;
+ continue;
+ }
+
+ if (hide_parent) {
+ RWLock::RLocker snap_locker(m_dst_image_ctx->snap_lock);
+ RWLock::RLocker parent_locker(m_dst_image_ctx->parent_lock);
+ uint64_t parent_overlap = 0;
+ int r = m_dst_image_ctx->get_parent_overlap(dst_snap_seq, &parent_overlap);
+ if (r < 0) {
+ ldout(m_cct, 5) << "failed getting parent overlap for snap_id: "
+ << dst_snap_seq << ": " << cpp_strerror(r) << dendl;
+ }
+ if (parent_overlap == 0) {
+ ldout(m_cct, 20) << "no parent overlap" << dendl;
+ hide_parent = false;
+ } else {
+ std::vector<std::pair<uint64_t, uint64_t>> image_extents;
+ Striper::extent_to_file(m_cct, &m_dst_image_ctx->layout,
+ m_dst_object_number, 0,
+ m_dst_image_ctx->layout.object_size,
+ image_extents);
+ uint64_t overlap = m_dst_image_ctx->prune_parent_extents(image_extents,
+ parent_overlap);
+ if (overlap == 0) {
+ ldout(m_cct, 20) << "no parent overlap" << dendl;
+ hide_parent = false;
+ } else if (src_snap_seq == m_dst_zero_interval.begin()->first) {
+ for (auto e : image_extents) {
+ prev_end_size += e.second;
+ }
+ ceph_assert(prev_end_size <= m_dst_image_ctx->layout.object_size);
+ }
+ }
+ }
+
+ uint64_t end_size = prev_end_size;
+
+ // update end_size if there are writes into higher offsets
+ auto iter = m_write_ops.find(src_snap_seq);
+ if (iter != m_write_ops.end()) {
+ for (auto &copy_op : iter->second) {
+ for (auto &e : copy_op.dst_extent_map) {
+ end_size = std::max(end_size, e.first + e.second);
+ }
+ }
+ }
+
+ for (auto z = zero_interval.begin(); z != zero_interval.end(); z++) {
+ if (z.get_start() + z.get_len() >= end_size) {
+ // zero interval at the object end
+ if (z.get_start() == 0 && hide_parent) {
+ m_write_ops[src_snap_seq]
+ .emplace_back(COPY_OP_TYPE_REMOVE_TRUNC, 0, 0, 0);
+ ldout(m_cct, 20) << "COPY_OP_TYPE_REMOVE_TRUNC" << dendl;
+ } else if (z.get_start() < prev_end_size) {
+ if (z.get_start() == 0) {
+ m_write_ops[src_snap_seq]
+ .emplace_back(COPY_OP_TYPE_REMOVE, 0, 0, 0);
+ ldout(m_cct, 20) << "COPY_OP_TYPE_REMOVE" << dendl;
+ } else {
+ m_write_ops[src_snap_seq]
+ .emplace_back(COPY_OP_TYPE_TRUNC, 0, z.get_start(), 0);
+ ldout(m_cct, 20) << "COPY_OP_TYPE_TRUNC " << z.get_start() << dendl;
+ }
+ }
+ end_size = std::min(end_size, z.get_start());
+ } else {
+ // zero interval inside the object
+ m_write_ops[src_snap_seq]
+ .emplace_back(COPY_OP_TYPE_ZERO, 0, z.get_start(), z.get_len());
+ ldout(m_cct, 20) << "COPY_OP_TYPE_ZERO " << z.get_start() << "~"
+ << z.get_len() << dendl;
+ }
+ }
+ ldout(m_cct, 20) << "src_snap_seq=" << src_snap_seq << ", end_size="
+ << end_size << dendl;
+ if (end_size > 0 || hide_parent) {
+ m_dst_object_state[src_snap_seq] = OBJECT_EXISTS;
+ if (fast_diff && end_size == prev_end_size &&
+ m_write_ops[src_snap_seq].empty()) {
+ m_dst_object_state[src_snap_seq] = OBJECT_EXISTS_CLEAN;
+ }
+ }
+ prev_end_size = end_size;
+ }
+}
+
+template <typename I>
+void ObjectCopyRequest<I>::finish(int r) {
+ ldout(m_cct, 20) << "r=" << r << dendl;
+
+ // ensure IoCtxs are closed prior to proceeding
+ auto on_finish = m_on_finish;
+
+ m_src_async_op->finish_op();
+ delete m_src_async_op;
+ delete this;
+
+ on_finish->complete(r);
+}
+
+template <typename I>
+void ObjectCopyRequest<I>::compute_dst_object_may_exist() {
+ RWLock::RLocker snap_locker(m_dst_image_ctx->snap_lock);
+
+ auto snap_ids = m_dst_image_ctx->snaps;
+ snap_ids.push_back(CEPH_NOSNAP);
+
+ for (auto snap_id : snap_ids) {
+ m_dst_object_may_exist[snap_id] =
+ (m_dst_object_number < m_dst_image_ctx->get_object_count(snap_id));
+ }
+}
+
+} // namespace deep_copy
+} // namespace librbd
+
+template class librbd::deep_copy::ObjectCopyRequest<librbd::ImageCtx>;
diff --git a/src/librbd/deep_copy/ObjectCopyRequest.h b/src/librbd/deep_copy/ObjectCopyRequest.h
new file mode 100644
index 00000000..a0445f3b
--- /dev/null
+++ b/src/librbd/deep_copy/ObjectCopyRequest.h
@@ -0,0 +1,210 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_DEEP_COPY_OBJECT_COPY_REQUEST_H
+#define CEPH_LIBRBD_DEEP_COPY_OBJECT_COPY_REQUEST_H
+
+#include "include/int_types.h"
+#include "include/interval_set.h"
+#include "include/rados/librados.hpp"
+#include "common/snap_types.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/deep_copy/Types.h"
+#include "librbd/io/Types.h"
+#include <list>
+#include <map>
+#include <string>
+
+class Context;
+class RWLock;
+
+namespace librbd {
+
+namespace io { class AsyncOperation; }
+
+namespace deep_copy {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class ObjectCopyRequest {
+public:
+ static ObjectCopyRequest* create(ImageCtxT *src_image_ctx,
+ ImageCtxT *dst_image_ctx,
+ librados::snap_t src_snap_id_start,
+ librados::snap_t dst_snap_id_start,
+ const SnapMap &snap_map,
+ uint64_t object_number, bool flatten,
+ Context *on_finish) {
+ return new ObjectCopyRequest(src_image_ctx, dst_image_ctx,
+ src_snap_id_start, dst_snap_id_start, snap_map,
+ object_number, flatten, on_finish);
+ }
+
+ ObjectCopyRequest(ImageCtxT *src_image_ctx, ImageCtxT *dst_image_ctx,
+ librados::snap_t src_snap_id_start,
+ librados::snap_t dst_snap_id_start, const SnapMap &snap_map,
+ uint64_t object_number, bool flatten, Context *on_finish);
+
+ void send();
+
+ // testing support
+ inline librados::IoCtx &get_src_io_ctx() {
+ return m_src_io_ctx;
+ }
+ inline librados::IoCtx &get_dst_io_ctx() {
+ return m_dst_io_ctx;
+ }
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * | /----------------------\
+ * | | |
+ * v v | (repeat for each src object)
+ * LIST_SNAPS < * * * |
+ * | * (-ENOENT and snap set stale)
+ * | * * * * * * |
+ * | * /-----------\ |
+ * | * | | (repeat for each snapshot)
+ * v * v | |
+ * READ_OBJECT ---------/ |
+ * | | |
+ * | \----------------------/
+ * v
+ * READ_FROM_PARENT (skip if not needed)
+ * |
+ * | /-----------\
+ * | | | (repeat for each snapshot)
+ * v v |
+ * WRITE_OBJECT --------/
+ * |
+ * | /-----------\
+ * | | | (repeat for each snapshot)
+ * v v |
+ * UPDATE_OBJECT_MAP ---/ (skip if object
+ * | map disabled)
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+
+ struct SrcObjectExtent {
+ uint64_t object_no = 0;
+ uint64_t offset = 0;
+ uint64_t length = 0;
+ bool noent = false;
+
+ SrcObjectExtent() {
+ }
+ SrcObjectExtent(uint64_t object_no, uint64_t offset, uint64_t length)
+ : object_no(object_no), offset(offset), length(length) {
+ }
+ };
+
+ typedef std::map<uint64_t, SrcObjectExtent> SrcObjectExtents;
+
+ enum CopyOpType {
+ COPY_OP_TYPE_WRITE,
+ COPY_OP_TYPE_ZERO,
+ COPY_OP_TYPE_TRUNC,
+ COPY_OP_TYPE_REMOVE,
+ COPY_OP_TYPE_REMOVE_TRUNC,
+ };
+
+ typedef std::map<uint64_t, uint64_t> ExtentMap;
+
+ struct CopyOp {
+ CopyOp(CopyOpType type, uint64_t src_offset, uint64_t dst_offset,
+ uint64_t length)
+ : type(type), src_offset(src_offset), dst_offset(dst_offset),
+ length(length) {
+ }
+
+ CopyOpType type;
+ uint64_t src_offset;
+ uint64_t dst_offset;
+ uint64_t length;
+
+ ExtentMap src_extent_map;
+ ExtentMap dst_extent_map;
+ bufferlist out_bl;
+ };
+
+ typedef std::list<CopyOp> CopyOps;
+ typedef std::pair<librados::snap_t, librados::snap_t> WriteReadSnapIds;
+ typedef std::map<librados::snap_t, uint8_t> SnapObjectStates;
+ typedef std::map<librados::snap_t, std::map<uint64_t, uint64_t>> SnapObjectSizes;
+
+ ImageCtxT *m_src_image_ctx;
+ ImageCtxT *m_dst_image_ctx;
+ CephContext *m_cct;
+ librados::snap_t m_src_snap_id_start;
+ librados::snap_t m_dst_snap_id_start;
+ SnapMap m_snap_map;
+ uint64_t m_dst_object_number;
+ bool m_flatten;
+ Context *m_on_finish;
+
+ decltype(m_src_image_ctx->data_ctx) m_src_io_ctx;
+ decltype(m_dst_image_ctx->data_ctx) m_dst_io_ctx;
+ std::string m_dst_oid;
+
+ std::set<uint64_t> m_src_objects;
+ uint64_t m_src_ono;
+ std::string m_src_oid;
+ SrcObjectExtents m_src_object_extents;
+ librados::snap_set_t m_snap_set;
+ int m_snap_ret = 0;
+ bool m_retry_missing_read = false;
+ librados::snap_set_t m_retry_snap_set;
+ bool m_read_whole_object = false;
+
+ std::map<WriteReadSnapIds, CopyOps> m_read_ops;
+ std::list<WriteReadSnapIds> m_read_snaps;
+ std::map<librados::snap_t, CopyOps> m_write_ops;
+ std::map<librados::snap_t, interval_set<uint64_t>> m_zero_interval;
+ std::map<librados::snap_t, interval_set<uint64_t>> m_dst_zero_interval;
+ std::map<librados::snap_t, uint8_t> m_dst_object_state;
+ std::map<librados::snap_t, bool> m_dst_object_may_exist;
+ bufferlist m_read_from_parent_data;
+
+ io::AsyncOperation* m_src_async_op = nullptr;
+
+ void send_list_snaps();
+ void handle_list_snaps(int r);
+
+ void send_read_object();
+ void handle_read_object(int r);
+
+ void send_read_from_parent();
+ void handle_read_from_parent(int r);
+
+ void send_write_object();
+ void handle_write_object(int r);
+
+ void send_update_object_map();
+ void handle_update_object_map(int r);
+
+ Context *start_lock_op(RWLock &owner_lock, int* r);
+
+ uint64_t src_to_dst_object_offset(uint64_t objectno, uint64_t offset);
+
+ void compute_src_object_extents();
+ void compute_read_ops();
+ void compute_read_from_parent_ops(io::Extents *image_extents);
+ void merge_write_ops();
+ void compute_zero_ops();
+
+ void compute_dst_object_may_exist();
+
+ void finish(int r);
+};
+
+} // namespace deep_copy
+} // namespace librbd
+
+extern template class librbd::deep_copy::ObjectCopyRequest<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_DEEP_COPY_OBJECT_COPY_REQUEST_H
diff --git a/src/librbd/deep_copy/SetHeadRequest.cc b/src/librbd/deep_copy/SetHeadRequest.cc
new file mode 100644
index 00000000..5670ba07
--- /dev/null
+++ b/src/librbd/deep_copy/SetHeadRequest.cc
@@ -0,0 +1,224 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "SetHeadRequest.h"
+#include "common/errno.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "cls/rbd/cls_rbd_types.h"
+#include "librbd/ExclusiveLock.h"
+#include "librbd/Utils.h"
+#include "librbd/image/AttachParentRequest.h"
+#include "librbd/image/DetachParentRequest.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::deep_copy::SetHeadRequest: " \
+ << this << " " << __func__ << ": "
+
+namespace librbd {
+namespace deep_copy {
+
+using librbd::util::create_context_callback;
+using librbd::util::create_rados_callback;
+
+template <typename I>
+SetHeadRequest<I>::SetHeadRequest(I *image_ctx, uint64_t size,
+ const cls::rbd::ParentImageSpec &spec,
+ uint64_t parent_overlap,
+ Context *on_finish)
+ : m_image_ctx(image_ctx), m_size(size), m_parent_spec(spec),
+ m_parent_overlap(parent_overlap), m_on_finish(on_finish),
+ m_cct(image_ctx->cct) {
+ ceph_assert(m_parent_overlap <= m_size);
+}
+
+template <typename I>
+void SetHeadRequest<I>::send() {
+ send_set_size();
+}
+
+template <typename I>
+void SetHeadRequest<I>::send_set_size() {
+ m_image_ctx->snap_lock.get_read();
+ if (m_image_ctx->size == m_size) {
+ m_image_ctx->snap_lock.put_read();
+ send_detach_parent();
+ return;
+ }
+ m_image_ctx->snap_lock.put_read();
+
+ ldout(m_cct, 20) << dendl;
+
+ // Change the image size on disk so that the snapshot picks up
+ // the expected size. We can do this because the last snapshot
+ // we process is the sync snapshot which was created to match the
+ // image size. We also don't need to worry about trimming because
+ // we track the highest possible object number within the sync record
+ librados::ObjectWriteOperation op;
+ librbd::cls_client::set_size(&op, m_size);
+
+ int r;
+ auto finish_op_ctx = start_lock_op(&r);
+ if (finish_op_ctx == nullptr) {
+ lderr(m_cct) << "lost exclusive lock" << dendl;
+ finish(r);
+ return;
+ }
+
+ auto ctx = new FunctionContext([this, finish_op_ctx](int r) {
+ handle_set_size(r);
+ finish_op_ctx->complete(0);
+ });
+ librados::AioCompletion *comp = create_rados_callback(ctx);
+ r = m_image_ctx->md_ctx.aio_operate(m_image_ctx->header_oid, comp, &op);
+ ceph_assert(r == 0);
+ comp->release();
+}
+
+template <typename I>
+void SetHeadRequest<I>::handle_set_size(int r) {
+ ldout(m_cct, 20) << "r=" << r << dendl;
+
+ if (r < 0) {
+ lderr(m_cct) << "failed to update image size: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ {
+ // adjust in-memory image size now that it's updated on disk
+ RWLock::WLocker snap_locker(m_image_ctx->snap_lock);
+ if (m_image_ctx->size > m_size) {
+ RWLock::WLocker parent_locker(m_image_ctx->parent_lock);
+ if (m_image_ctx->parent_md.spec.pool_id != -1 &&
+ m_image_ctx->parent_md.overlap > m_size) {
+ m_image_ctx->parent_md.overlap = m_size;
+ }
+ }
+ m_image_ctx->size = m_size;
+ }
+
+ send_detach_parent();
+}
+
+template <typename I>
+void SetHeadRequest<I>::send_detach_parent() {
+ m_image_ctx->parent_lock.get_read();
+ if (m_image_ctx->parent_md.spec.pool_id == -1 ||
+ (m_image_ctx->parent_md.spec == m_parent_spec &&
+ m_image_ctx->parent_md.overlap == m_parent_overlap)) {
+ m_image_ctx->parent_lock.put_read();
+ send_attach_parent();
+ return;
+ }
+ m_image_ctx->parent_lock.put_read();
+
+ ldout(m_cct, 20) << dendl;
+
+ int r;
+ auto finish_op_ctx = start_lock_op(&r);
+ if (finish_op_ctx == nullptr) {
+ lderr(m_cct) << "lost exclusive lock" << dendl;
+ finish(r);
+ return;
+ }
+
+ auto ctx = new FunctionContext([this, finish_op_ctx](int r) {
+ handle_detach_parent(r);
+ finish_op_ctx->complete(0);
+ });
+ auto req = image::DetachParentRequest<I>::create(*m_image_ctx, ctx);
+ req->send();
+}
+
+template <typename I>
+void SetHeadRequest<I>::handle_detach_parent(int r) {
+ ldout(m_cct, 20) << "r=" << r << dendl;
+
+ if (r < 0) {
+ lderr(m_cct) << "failed to remove parent: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ {
+ // adjust in-memory parent now that it's updated on disk
+ RWLock::WLocker parent_locker(m_image_ctx->parent_lock);
+ m_image_ctx->parent_md.spec = {};
+ m_image_ctx->parent_md.overlap = 0;
+ }
+
+ send_attach_parent();
+}
+
+template <typename I>
+void SetHeadRequest<I>::send_attach_parent() {
+ m_image_ctx->parent_lock.get_read();
+ if (m_image_ctx->parent_md.spec == m_parent_spec &&
+ m_image_ctx->parent_md.overlap == m_parent_overlap) {
+ m_image_ctx->parent_lock.put_read();
+ finish(0);
+ return;
+ }
+ m_image_ctx->parent_lock.put_read();
+
+ ldout(m_cct, 20) << dendl;
+
+ int r;
+ auto finish_op_ctx = start_lock_op(&r);
+ if (finish_op_ctx == nullptr) {
+ lderr(m_cct) << "lost exclusive lock" << dendl;
+ finish(r);
+ return;
+ }
+
+ auto ctx = new FunctionContext([this, finish_op_ctx](int r) {
+ handle_attach_parent(r);
+ finish_op_ctx->complete(0);
+ });
+ auto req = image::AttachParentRequest<I>::create(
+ *m_image_ctx, m_parent_spec, m_parent_overlap, false, ctx);
+ req->send();
+}
+
+template <typename I>
+void SetHeadRequest<I>::handle_attach_parent(int r) {
+ ldout(m_cct, 20) << "r=" << r << dendl;
+
+ if (r < 0) {
+ lderr(m_cct) << "failed to attach parent: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ {
+ // adjust in-memory parent now that it's updated on disk
+ RWLock::WLocker parent_locker(m_image_ctx->parent_lock);
+ m_image_ctx->parent_md.spec = m_parent_spec;
+ m_image_ctx->parent_md.overlap = m_parent_overlap;
+ }
+
+ finish(0);
+}
+
+template <typename I>
+Context *SetHeadRequest<I>::start_lock_op(int* r) {
+ RWLock::RLocker owner_locker(m_image_ctx->owner_lock);
+ if (m_image_ctx->exclusive_lock == nullptr) {
+ return new FunctionContext([](int r) {});
+ }
+ return m_image_ctx->exclusive_lock->start_op(r);
+}
+
+template <typename I>
+void SetHeadRequest<I>::finish(int r) {
+ ldout(m_cct, 20) << "r=" << r << dendl;
+
+ m_on_finish->complete(r);
+ delete this;
+}
+
+} // namespace deep_copy
+} // namespace librbd
+
+template class librbd::deep_copy::SetHeadRequest<librbd::ImageCtx>;
diff --git a/src/librbd/deep_copy/SetHeadRequest.h b/src/librbd/deep_copy/SetHeadRequest.h
new file mode 100644
index 00000000..9a17c9fd
--- /dev/null
+++ b/src/librbd/deep_copy/SetHeadRequest.h
@@ -0,0 +1,87 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_DEEP_COPY_SET_HEAD_REQUEST_H
+#define CEPH_LIBRBD_DEEP_COPY_SET_HEAD_REQUEST_H
+
+#include "include/int_types.h"
+#include "include/rados/librados.hpp"
+#include "common/snap_types.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/Types.h"
+#include <map>
+#include <set>
+#include <string>
+#include <tuple>
+
+class Context;
+
+namespace librbd {
+namespace deep_copy {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class SetHeadRequest {
+public:
+ static SetHeadRequest* create(ImageCtxT *image_ctx, uint64_t size,
+ const cls::rbd::ParentImageSpec &parent_spec,
+ uint64_t parent_overlap,
+ Context *on_finish) {
+ return new SetHeadRequest(image_ctx, size, parent_spec, parent_overlap,
+ on_finish);
+ }
+
+ SetHeadRequest(ImageCtxT *image_ctx, uint64_t size,
+ const cls::rbd::ParentImageSpec &parent_spec,
+ uint64_t parent_overlap, Context *on_finish);
+
+ void send();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v (skip if not needed)
+ * SET_SIZE
+ * |
+ * v (skip if not needed)
+ * DETACH_PARENT
+ * |
+ * v (skip if not needed)
+ * ATTACH_PARENT
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+
+ ImageCtxT *m_image_ctx;
+ uint64_t m_size;
+ cls::rbd::ParentImageSpec m_parent_spec;
+ uint64_t m_parent_overlap;
+ Context *m_on_finish;
+
+ CephContext *m_cct;
+
+ void send_set_size();
+ void handle_set_size(int r);
+
+ void send_detach_parent();
+ void handle_detach_parent(int r);
+
+ void send_attach_parent();
+ void handle_attach_parent(int r);
+
+ Context *start_lock_op(int* r);
+
+ void finish(int r);
+};
+
+} // namespace deep_copy
+} // namespace librbd
+
+extern template class librbd::deep_copy::SetHeadRequest<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_DEEP_COPY_SET_HEAD_REQUEST_H
diff --git a/src/librbd/deep_copy/SnapshotCopyRequest.cc b/src/librbd/deep_copy/SnapshotCopyRequest.cc
new file mode 100644
index 00000000..d1375a42
--- /dev/null
+++ b/src/librbd/deep_copy/SnapshotCopyRequest.cc
@@ -0,0 +1,730 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "SnapshotCopyRequest.h"
+#include "SetHeadRequest.h"
+#include "SnapshotCreateRequest.h"
+#include "common/errno.h"
+#include "common/WorkQueue.h"
+#include "librbd/ExclusiveLock.h"
+#include "librbd/ObjectMap.h"
+#include "librbd/Operations.h"
+#include "librbd/Utils.h"
+#include "osdc/Striper.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::deep_copy::SnapshotCopyRequest: " \
+ << this << " " << __func__ << ": "
+
+namespace librbd {
+namespace deep_copy {
+
+namespace {
+
+template <typename I>
+const std::string &get_snapshot_name(I *image_ctx, librados::snap_t snap_id) {
+ auto snap_it = std::find_if(image_ctx->snap_ids.begin(),
+ image_ctx->snap_ids.end(),
+ [snap_id](
+ const std::pair<
+ std::pair<cls::rbd::SnapshotNamespace,
+ std::string>,
+ librados::snap_t> &pair) {
+ return pair.second == snap_id;
+ });
+ ceph_assert(snap_it != image_ctx->snap_ids.end());
+ return snap_it->first.second;
+}
+
+} // anonymous namespace
+
+using librbd::util::create_context_callback;
+using librbd::util::unique_lock_name;
+
+template <typename I>
+SnapshotCopyRequest<I>::SnapshotCopyRequest(I *src_image_ctx,
+ I *dst_image_ctx,
+ librados::snap_t src_snap_id_start,
+ librados::snap_t src_snap_id_end,
+ librados::snap_t dst_snap_id_start,
+ bool flatten, ContextWQ *work_queue,
+ SnapSeqs *snap_seqs,
+ Context *on_finish)
+ : RefCountedObject(dst_image_ctx->cct, 1), m_src_image_ctx(src_image_ctx),
+ m_dst_image_ctx(dst_image_ctx), m_src_snap_id_start(src_snap_id_start),
+ m_src_snap_id_end(src_snap_id_end), m_dst_snap_id_start(dst_snap_id_start),
+ m_flatten(flatten), m_work_queue(work_queue), m_snap_seqs_result(snap_seqs),
+ m_snap_seqs(*snap_seqs), m_on_finish(on_finish), m_cct(dst_image_ctx->cct),
+ m_lock(unique_lock_name("SnapshotCopyRequest::m_lock", this)) {
+ ceph_assert((m_src_snap_id_start == 0 && m_dst_snap_id_start == 0) ||
+ (m_src_snap_id_start > 0 && m_dst_snap_id_start > 0));
+
+ // snap ids ordered from oldest to newest
+ m_src_image_ctx->snap_lock.get_read();
+ m_src_snap_ids.insert(src_image_ctx->snaps.begin(),
+ src_image_ctx->snaps.end());
+ m_src_image_ctx->snap_lock.put_read();
+
+ m_dst_image_ctx->snap_lock.get_read();
+ m_dst_snap_ids.insert(dst_image_ctx->snaps.begin(),
+ dst_image_ctx->snaps.end());
+ m_dst_image_ctx->snap_lock.put_read();
+
+ if (m_src_snap_id_end != CEPH_NOSNAP) {
+ m_src_snap_ids.erase(m_src_snap_ids.upper_bound(m_src_snap_id_end),
+ m_src_snap_ids.end());
+ }
+}
+
+template <typename I>
+void SnapshotCopyRequest<I>::send() {
+ cls::rbd::ParentImageSpec src_parent_spec;
+ int r = validate_parent(m_src_image_ctx, &src_parent_spec);
+ if (r < 0) {
+ lderr(m_cct) << "source image parent spec mismatch" << dendl;
+ error(r);
+ return;
+ }
+
+ r = validate_parent(m_dst_image_ctx, &m_dst_parent_spec);
+ if (r < 0) {
+ lderr(m_cct) << "destination image parent spec mismatch" << dendl;
+ error(r);
+ return;
+ }
+
+ send_snap_unprotect();
+}
+
+template <typename I>
+void SnapshotCopyRequest<I>::cancel() {
+ Mutex::Locker locker(m_lock);
+
+ ldout(m_cct, 20) << dendl;
+ m_canceled = true;
+}
+
+template <typename I>
+void SnapshotCopyRequest<I>::send_snap_unprotect() {
+
+ SnapIdSet::iterator snap_id_it = m_dst_snap_ids.begin();
+ if (m_prev_snap_id != CEPH_NOSNAP) {
+ snap_id_it = m_dst_snap_ids.upper_bound(m_prev_snap_id);
+ } else if (m_dst_snap_id_start > 0) {
+ snap_id_it = m_dst_snap_ids.upper_bound(m_dst_snap_id_start);
+ }
+
+ for (; snap_id_it != m_dst_snap_ids.end(); ++snap_id_it) {
+ librados::snap_t dst_snap_id = *snap_id_it;
+
+ m_dst_image_ctx->snap_lock.get_read();
+
+ bool dst_unprotected;
+ int r = m_dst_image_ctx->is_snap_unprotected(dst_snap_id, &dst_unprotected);
+ if (r < 0) {
+ lderr(m_cct) << "failed to retrieve destination snap unprotect status: "
+ << cpp_strerror(r) << dendl;
+ m_dst_image_ctx->snap_lock.put_read();
+ finish(r);
+ return;
+ }
+ m_dst_image_ctx->snap_lock.put_read();
+
+ if (dst_unprotected) {
+ // snap is already unprotected -- check next snap
+ continue;
+ }
+
+ // if destination snapshot is protected and (1) it isn't in our mapping
+ // table, or (2) the source snapshot isn't protected, unprotect it
+ auto snap_seq_it = std::find_if(
+ m_snap_seqs.begin(), m_snap_seqs.end(),
+ [dst_snap_id](const SnapSeqs::value_type& pair) {
+ return pair.second == dst_snap_id;
+ });
+
+ if (snap_seq_it != m_snap_seqs.end()) {
+ m_src_image_ctx->snap_lock.get_read();
+ bool src_unprotected;
+ r = m_src_image_ctx->is_snap_unprotected(snap_seq_it->first,
+ &src_unprotected);
+ ldout(m_cct, 20) << "m_src_image_ctx->is_snap_unprotected("
+ << snap_seq_it->first << "): r=" << r
+ << ", src_unprotected=" << src_unprotected << dendl;
+ if (r == -ENOENT) {
+ src_unprotected = true;
+ r = 0;
+ }
+ if (r < 0) {
+ lderr(m_cct) << "failed to retrieve source snap unprotect status: "
+ << cpp_strerror(r) << dendl;
+ m_src_image_ctx->snap_lock.put_read();
+ finish(r);
+ return;
+ }
+ m_src_image_ctx->snap_lock.put_read();
+
+ if (src_unprotected) {
+ // source is unprotected -- unprotect destination snap
+ break;
+ }
+ } else {
+ // source snapshot doesn't exist -- unprotect destination snap
+ break;
+ }
+ }
+
+ if (snap_id_it == m_dst_snap_ids.end()) {
+ // no destination snapshots to unprotect
+ m_prev_snap_id = CEPH_NOSNAP;
+ send_snap_remove();
+ return;
+ }
+
+ m_prev_snap_id = *snap_id_it;
+ m_snap_name = get_snapshot_name(m_dst_image_ctx, m_prev_snap_id);
+
+ ldout(m_cct, 20) << "snap_name=" << m_snap_name << ", "
+ << "snap_id=" << m_prev_snap_id << dendl;
+
+ int r;
+ auto finish_op_ctx = start_lock_op(&r);
+ if (finish_op_ctx == nullptr) {
+ lderr(m_cct) << "lost exclusive lock" << dendl;
+ finish(r);
+ return;
+ }
+
+ auto ctx = new FunctionContext([this, finish_op_ctx](int r) {
+ handle_snap_unprotect(r);
+ finish_op_ctx->complete(0);
+ });
+ RWLock::RLocker owner_locker(m_dst_image_ctx->owner_lock);
+ m_dst_image_ctx->operations->execute_snap_unprotect(
+ cls::rbd::UserSnapshotNamespace(), m_snap_name.c_str(), ctx);
+}
+
+template <typename I>
+void SnapshotCopyRequest<I>::handle_snap_unprotect(int r) {
+ ldout(m_cct, 20) << "r=" << r << dendl;
+
+ if (r < 0) {
+ lderr(m_cct) << "failed to unprotect snapshot '" << m_snap_name << "': "
+ << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ {
+ // avoid the need to refresh to delete the newly unprotected snapshot
+ RWLock::RLocker snap_locker(m_dst_image_ctx->snap_lock);
+ auto snap_info_it = m_dst_image_ctx->snap_info.find(m_prev_snap_id);
+ if (snap_info_it != m_dst_image_ctx->snap_info.end()) {
+ snap_info_it->second.protection_status =
+ RBD_PROTECTION_STATUS_UNPROTECTED;
+ }
+ }
+
+ if (handle_cancellation()) {
+ return;
+ }
+
+ send_snap_unprotect();
+}
+
+template <typename I>
+void SnapshotCopyRequest<I>::send_snap_remove() {
+ SnapIdSet::iterator snap_id_it = m_dst_snap_ids.begin();
+ if (m_prev_snap_id != CEPH_NOSNAP) {
+ snap_id_it = m_dst_snap_ids.upper_bound(m_prev_snap_id);
+ } else if (m_dst_snap_id_start > 0) {
+ snap_id_it = m_dst_snap_ids.upper_bound(m_dst_snap_id_start);
+ }
+
+ for (; snap_id_it != m_dst_snap_ids.end(); ++snap_id_it) {
+ librados::snap_t dst_snap_id = *snap_id_it;
+
+ cls::rbd::SnapshotNamespace snap_namespace;
+ m_dst_image_ctx->snap_lock.get_read();
+ int r = m_dst_image_ctx->get_snap_namespace(dst_snap_id, &snap_namespace);
+ m_dst_image_ctx->snap_lock.put_read();
+ if (r < 0) {
+ lderr(m_cct) << "failed to retrieve destination snap namespace: "
+ << m_snap_name << dendl;
+ finish(r);
+ return;
+ }
+
+ if (boost::get<cls::rbd::UserSnapshotNamespace>(&snap_namespace) ==
+ nullptr) {
+ continue;
+ }
+
+ // if the destination snapshot isn't in our mapping table, remove it
+ auto snap_seq_it = std::find_if(
+ m_snap_seqs.begin(), m_snap_seqs.end(),
+ [dst_snap_id](const SnapSeqs::value_type& pair) {
+ return pair.second == dst_snap_id;
+ });
+
+ if (snap_seq_it == m_snap_seqs.end()) {
+ break;
+ }
+ }
+
+ if (snap_id_it == m_dst_snap_ids.end()) {
+ // no destination snapshots to delete
+ m_prev_snap_id = CEPH_NOSNAP;
+ send_snap_create();
+ return;
+ }
+
+ m_prev_snap_id = *snap_id_it;
+ m_snap_name = get_snapshot_name(m_dst_image_ctx, m_prev_snap_id);
+
+ ldout(m_cct, 20) << ""
+ << "snap_name=" << m_snap_name << ", "
+ << "snap_id=" << m_prev_snap_id << dendl;
+
+ int r;
+ auto finish_op_ctx = start_lock_op(&r);
+ if (finish_op_ctx == nullptr) {
+ lderr(m_cct) << "lost exclusive lock" << dendl;
+ finish(r);
+ return;
+ }
+
+ auto ctx = new FunctionContext([this, finish_op_ctx](int r) {
+ handle_snap_remove(r);
+ finish_op_ctx->complete(0);
+ });
+ RWLock::RLocker owner_locker(m_dst_image_ctx->owner_lock);
+ m_dst_image_ctx->operations->execute_snap_remove(
+ cls::rbd::UserSnapshotNamespace(), m_snap_name.c_str(), ctx);
+}
+
+template <typename I>
+void SnapshotCopyRequest<I>::handle_snap_remove(int r) {
+ ldout(m_cct, 20) << "r=" << r << dendl;
+
+ if (r < 0) {
+ lderr(m_cct) << "failed to remove snapshot '" << m_snap_name << "': "
+ << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+ if (handle_cancellation()) {
+ return;
+ }
+
+ send_snap_remove();
+}
+
+template <typename I>
+void SnapshotCopyRequest<I>::send_snap_create() {
+ SnapIdSet::iterator snap_id_it = m_src_snap_ids.begin();
+ if (m_prev_snap_id != CEPH_NOSNAP) {
+ snap_id_it = m_src_snap_ids.upper_bound(m_prev_snap_id);
+ } else if (m_src_snap_id_start > 0) {
+ snap_id_it = m_src_snap_ids.upper_bound(m_src_snap_id_start);
+ }
+
+ for (; snap_id_it != m_src_snap_ids.end(); ++snap_id_it) {
+ librados::snap_t src_snap_id = *snap_id_it;
+
+ cls::rbd::SnapshotNamespace snap_namespace;
+ m_src_image_ctx->snap_lock.get_read();
+ int r = m_src_image_ctx->get_snap_namespace(src_snap_id, &snap_namespace);
+ m_src_image_ctx->snap_lock.put_read();
+ if (r < 0) {
+ lderr(m_cct) << "failed to retrieve source snap namespace: "
+ << m_snap_name << dendl;
+ finish(r);
+ return;
+ }
+
+ if (m_snap_seqs.find(src_snap_id) == m_snap_seqs.end()) {
+ // the source snapshot is not in our mapping table, ...
+ if (boost::get<cls::rbd::UserSnapshotNamespace>(&snap_namespace) !=
+ nullptr) {
+ // ... create it since it's a user snapshot
+ break;
+ } else if (src_snap_id == m_src_snap_id_end) {
+ // ... map it to destination HEAD since it's not a user snapshot that we
+ // will create (e.g. MirrorPrimarySnapshotNamespace)
+ m_snap_seqs[src_snap_id] = CEPH_NOSNAP;
+ }
+ }
+ }
+
+ if (snap_id_it == m_src_snap_ids.end()) {
+ // no source snapshots to create
+ m_prev_snap_id = CEPH_NOSNAP;
+ send_snap_protect();
+ return;
+ }
+
+ m_prev_snap_id = *snap_id_it;
+ m_snap_name = get_snapshot_name(m_src_image_ctx, m_prev_snap_id);
+
+ m_src_image_ctx->snap_lock.get_read();
+ auto snap_info_it = m_src_image_ctx->snap_info.find(m_prev_snap_id);
+ if (snap_info_it == m_src_image_ctx->snap_info.end()) {
+ m_src_image_ctx->snap_lock.put_read();
+ lderr(m_cct) << "failed to retrieve source snap info: " << m_snap_name
+ << dendl;
+ finish(-ENOENT);
+ return;
+ }
+
+ uint64_t size = snap_info_it->second.size;
+ m_snap_namespace = snap_info_it->second.snap_namespace;
+ cls::rbd::ParentImageSpec parent_spec;
+ uint64_t parent_overlap = 0;
+ if (!m_flatten && snap_info_it->second.parent.spec.pool_id != -1) {
+ parent_spec = m_dst_parent_spec;
+ parent_overlap = snap_info_it->second.parent.overlap;
+ }
+ m_src_image_ctx->snap_lock.put_read();
+
+ ldout(m_cct, 20) << "snap_name=" << m_snap_name << ", "
+ << "snap_id=" << m_prev_snap_id << ", "
+ << "size=" << size << ", "
+ << "parent_info=["
+ << "pool_id=" << parent_spec.pool_id << ", "
+ << "image_id=" << parent_spec.image_id << ", "
+ << "snap_id=" << parent_spec.snap_id << ", "
+ << "overlap=" << parent_overlap << "]" << dendl;
+
+ int r;
+ Context *finish_op_ctx = start_lock_op(&r);
+ if (finish_op_ctx == nullptr) {
+ lderr(m_cct) << "lost exclusive lock" << dendl;
+ finish(r);
+ return;
+ }
+
+ auto ctx = new FunctionContext([this, finish_op_ctx](int r) {
+ handle_snap_create(r);
+ finish_op_ctx->complete(0);
+ });
+ SnapshotCreateRequest<I> *req = SnapshotCreateRequest<I>::create(
+ m_dst_image_ctx, m_snap_name, m_snap_namespace, size, parent_spec,
+ parent_overlap, ctx);
+ req->send();
+}
+
+template <typename I>
+void SnapshotCopyRequest<I>::handle_snap_create(int r) {
+ ldout(m_cct, 20) << "r=" << r << dendl;
+
+ if (r < 0) {
+ lderr(m_cct) << "failed to create snapshot '" << m_snap_name << "': "
+ << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+ if (handle_cancellation()) {
+ return;
+ }
+
+ ceph_assert(m_prev_snap_id != CEPH_NOSNAP);
+
+ auto snap_it = m_dst_image_ctx->snap_ids.find(
+ {cls::rbd::UserSnapshotNamespace(), m_snap_name});
+ ceph_assert(snap_it != m_dst_image_ctx->snap_ids.end());
+ librados::snap_t dst_snap_id = snap_it->second;
+
+ ldout(m_cct, 20) << "mapping source snap id " << m_prev_snap_id << " to "
+ << dst_snap_id << dendl;
+ m_snap_seqs[m_prev_snap_id] = dst_snap_id;
+
+ send_snap_create();
+}
+
+template <typename I>
+void SnapshotCopyRequest<I>::send_snap_protect() {
+ SnapIdSet::iterator snap_id_it = m_src_snap_ids.begin();
+ if (m_prev_snap_id != CEPH_NOSNAP) {
+ snap_id_it = m_src_snap_ids.upper_bound(m_prev_snap_id);
+ } else if (m_src_snap_id_start > 0) {
+ snap_id_it = m_src_snap_ids.upper_bound(m_src_snap_id_start);
+ }
+
+ for (; snap_id_it != m_src_snap_ids.end(); ++snap_id_it) {
+ librados::snap_t src_snap_id = *snap_id_it;
+
+ m_src_image_ctx->snap_lock.get_read();
+
+ bool src_protected;
+ int r = m_src_image_ctx->is_snap_protected(src_snap_id, &src_protected);
+ if (r < 0) {
+ lderr(m_cct) << "failed to retrieve source snap protect status: "
+ << cpp_strerror(r) << dendl;
+ m_src_image_ctx->snap_lock.put_read();
+ finish(r);
+ return;
+ }
+ m_src_image_ctx->snap_lock.put_read();
+
+ if (!src_protected) {
+ // snap is not protected -- check next snap
+ continue;
+ }
+
+ // if destination snapshot is not protected, protect it
+ auto snap_seq_it = m_snap_seqs.find(src_snap_id);
+ ceph_assert(snap_seq_it != m_snap_seqs.end());
+ if (snap_seq_it->second == CEPH_NOSNAP) {
+ // implies src end snapshot is mapped to a non-copyable snapshot
+ ceph_assert(src_snap_id == m_src_snap_id_end);
+ break;
+ }
+
+ m_dst_image_ctx->snap_lock.get_read();
+ bool dst_protected;
+ r = m_dst_image_ctx->is_snap_protected(snap_seq_it->second, &dst_protected);
+ if (r < 0) {
+ lderr(m_cct) << "failed to retrieve destination snap protect status: "
+ << cpp_strerror(r) << dendl;
+ m_dst_image_ctx->snap_lock.put_read();
+ finish(r);
+ return;
+ }
+ m_dst_image_ctx->snap_lock.put_read();
+
+ if (!dst_protected) {
+ break;
+ }
+ }
+
+ if (snap_id_it == m_src_snap_ids.end()) {
+ // no destination snapshots to protect
+ m_prev_snap_id = CEPH_NOSNAP;
+ send_set_head();
+ return;
+ }
+
+ m_prev_snap_id = *snap_id_it;
+ m_snap_name = get_snapshot_name(m_src_image_ctx, m_prev_snap_id);
+
+ ldout(m_cct, 20) << "snap_name=" << m_snap_name << ", "
+ << "snap_id=" << m_prev_snap_id << dendl;
+
+ int r;
+ auto finish_op_ctx = start_lock_op(&r);
+ if (finish_op_ctx == nullptr) {
+ lderr(m_cct) << "lost exclusive lock" << dendl;
+ finish(r);
+ return;
+ }
+
+ auto ctx = new FunctionContext([this, finish_op_ctx](int r) {
+ handle_snap_protect(r);
+ finish_op_ctx->complete(0);
+ });
+ RWLock::RLocker owner_locker(m_dst_image_ctx->owner_lock);
+ m_dst_image_ctx->operations->execute_snap_protect(
+ cls::rbd::UserSnapshotNamespace(), m_snap_name.c_str(), ctx);
+}
+
+template <typename I>
+void SnapshotCopyRequest<I>::handle_snap_protect(int r) {
+ ldout(m_cct, 20) << "r=" << r << dendl;
+
+ if (r < 0) {
+ lderr(m_cct) << "failed to protect snapshot '" << m_snap_name << "': "
+ << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+ if (handle_cancellation()) {
+ return;
+ }
+
+ send_snap_protect();
+}
+
+template <typename I>
+void SnapshotCopyRequest<I>::send_set_head() {
+ auto snap_seq_it = m_snap_seqs.find(m_src_snap_id_end);
+ if (m_src_snap_id_end != CEPH_NOSNAP &&
+ (snap_seq_it == m_snap_seqs.end() ||
+ snap_seq_it->second != CEPH_NOSNAP)) {
+ // not copying to src nor dst HEAD revision
+ finish(0);
+ return;
+ }
+
+ ldout(m_cct, 20) << dendl;
+
+ uint64_t size;
+ cls::rbd::ParentImageSpec parent_spec;
+ uint64_t parent_overlap = 0;
+ {
+ RWLock::RLocker src_locker(m_src_image_ctx->snap_lock);
+ auto snap_info_it = m_src_image_ctx->snap_info.find(m_src_snap_id_end);
+ if (snap_info_it != m_src_image_ctx->snap_info.end()) {
+ auto& snap_info = snap_info_it->second;
+ size = snap_info.size;
+ if (!m_flatten) {
+ parent_spec = snap_info.parent.spec;
+ parent_overlap = snap_info.parent.overlap;
+ }
+ } else {
+ size = m_src_image_ctx->size;
+ if (!m_flatten) {
+ parent_spec = m_src_image_ctx->parent_md.spec;
+ parent_overlap = m_src_image_ctx->parent_md.overlap;
+ }
+ }
+ }
+
+ auto ctx = create_context_callback<
+ SnapshotCopyRequest<I>, &SnapshotCopyRequest<I>::handle_set_head>(this);
+ auto req = SetHeadRequest<I>::create(m_dst_image_ctx, size, parent_spec,
+ parent_overlap, ctx);
+ req->send();
+}
+
+template <typename I>
+void SnapshotCopyRequest<I>::handle_set_head(int r) {
+ ldout(m_cct, 20) << "r=" << r << dendl;
+
+ if (r < 0) {
+ lderr(m_cct) << "failed to set head: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ if (handle_cancellation()) {
+ return;
+ }
+
+ send_resize_object_map();
+}
+
+template <typename I>
+void SnapshotCopyRequest<I>::send_resize_object_map() {
+ int r = 0;
+
+ if (m_dst_image_ctx->test_features(RBD_FEATURE_OBJECT_MAP)) {
+ RWLock::RLocker owner_locker(m_dst_image_ctx->owner_lock);
+ RWLock::RLocker snap_locker(m_dst_image_ctx->snap_lock);
+
+ if (m_dst_image_ctx->object_map != nullptr &&
+ Striper::get_num_objects(m_dst_image_ctx->layout,
+ m_dst_image_ctx->size) !=
+ m_dst_image_ctx->object_map->size()) {
+
+ ldout(m_cct, 20) << dendl;
+
+ auto finish_op_ctx = start_lock_op(m_dst_image_ctx->owner_lock, &r);
+ if (finish_op_ctx != nullptr) {
+ auto ctx = new FunctionContext([this, finish_op_ctx](int r) {
+ handle_resize_object_map(r);
+ finish_op_ctx->complete(0);
+ });
+
+ m_dst_image_ctx->object_map->aio_resize(m_dst_image_ctx->size,
+ OBJECT_NONEXISTENT, ctx);
+ return;
+ }
+
+ lderr(m_cct) << "lost exclusive lock" << dendl;
+ }
+ }
+
+ finish(r);
+}
+
+template <typename I>
+void SnapshotCopyRequest<I>::handle_resize_object_map(int r) {
+ ldout(m_cct, 20) << "r=" << r << dendl;
+
+ if (r < 0) {
+ lderr(m_cct) << "failed to resize object map: " << cpp_strerror(r)
+ << dendl;
+ finish(r);
+ return;
+ }
+
+ finish(0);
+}
+
+template <typename I>
+bool SnapshotCopyRequest<I>::handle_cancellation() {
+ {
+ Mutex::Locker locker(m_lock);
+ if (!m_canceled) {
+ return false;
+ }
+ }
+ ldout(m_cct, 10) << "snapshot copy canceled" << dendl;
+ finish(-ECANCELED);
+ return true;
+}
+
+template <typename I>
+void SnapshotCopyRequest<I>::error(int r) {
+ ldout(m_cct, 20) << "r=" << r << dendl;
+
+ m_work_queue->queue(new FunctionContext([this, r](int r1) { finish(r); }));
+}
+
+template <typename I>
+int SnapshotCopyRequest<I>::validate_parent(I *image_ctx,
+ cls::rbd::ParentImageSpec *spec) {
+ RWLock::RLocker owner_locker(image_ctx->owner_lock);
+ RWLock::RLocker snap_locker(image_ctx->snap_lock);
+
+ // ensure source image's parent specs are still consistent
+ *spec = image_ctx->parent_md.spec;
+ for (auto &snap_info_pair : image_ctx->snap_info) {
+ auto &parent_spec = snap_info_pair.second.parent.spec;
+ if (parent_spec.pool_id == -1) {
+ continue;
+ } else if (spec->pool_id == -1) {
+ *spec = parent_spec;
+ continue;
+ }
+
+ if (*spec != parent_spec) {
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+
+template <typename I>
+Context *SnapshotCopyRequest<I>::start_lock_op(int* r) {
+ RWLock::RLocker owner_locker(m_dst_image_ctx->owner_lock);
+ return start_lock_op(m_dst_image_ctx->owner_lock, r);
+}
+
+template <typename I>
+Context *SnapshotCopyRequest<I>::start_lock_op(RWLock &owner_lock, int* r) {
+ ceph_assert(m_dst_image_ctx->owner_lock.is_locked());
+ if (m_dst_image_ctx->exclusive_lock == nullptr) {
+ return new FunctionContext([](int r) {});
+ }
+ return m_dst_image_ctx->exclusive_lock->start_op(r);
+}
+
+template <typename I>
+void SnapshotCopyRequest<I>::finish(int r) {
+ ldout(m_cct, 20) << "r=" << r << dendl;
+
+ if (r == 0) {
+ *m_snap_seqs_result = m_snap_seqs;
+ }
+
+ m_on_finish->complete(r);
+ put();
+}
+
+} // namespace deep_copy
+} // namespace librbd
+
+template class librbd::deep_copy::SnapshotCopyRequest<librbd::ImageCtx>;
diff --git a/src/librbd/deep_copy/SnapshotCopyRequest.h b/src/librbd/deep_copy/SnapshotCopyRequest.h
new file mode 100644
index 00000000..12068c9a
--- /dev/null
+++ b/src/librbd/deep_copy/SnapshotCopyRequest.h
@@ -0,0 +1,148 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_DEEP_COPY_SNAPSHOT_COPY_REQUEST_H
+#define CEPH_LIBRBD_DEEP_COPY_SNAPSHOT_COPY_REQUEST_H
+
+#include "include/int_types.h"
+#include "include/rados/librados.hpp"
+#include "common/RefCountedObj.h"
+#include "common/snap_types.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/Types.h"
+#include <map>
+#include <set>
+#include <string>
+#include <tuple>
+
+class Context;
+
+namespace librbd {
+namespace deep_copy {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class SnapshotCopyRequest : public RefCountedObject {
+public:
+ static SnapshotCopyRequest* create(ImageCtxT *src_image_ctx,
+ ImageCtxT *dst_image_ctx,
+ librados::snap_t src_snap_id_start,
+ librados::snap_t src_snap_id_end,
+ librados::snap_t dst_snap_id_start,
+ bool flatten, ContextWQ *work_queue,
+ SnapSeqs *snap_seqs, Context *on_finish) {
+ return new SnapshotCopyRequest(src_image_ctx, dst_image_ctx,
+ src_snap_id_start, src_snap_id_end,
+ dst_snap_id_start, flatten, work_queue,
+ snap_seqs, on_finish);
+ }
+
+ SnapshotCopyRequest(ImageCtxT *src_image_ctx, ImageCtxT *dst_image_ctx,
+ librados::snap_t src_snap_id_start,
+ librados::snap_t src_snap_id_end,
+ librados::snap_t dst_snap_id_start,
+ bool flatten, ContextWQ *work_queue, SnapSeqs *snap_seqs,
+ Context *on_finish);
+
+ void send();
+ void cancel();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * | /-----------\
+ * | | |
+ * v v | (repeat as needed)
+ * UNPROTECT_SNAP ----/
+ * |
+ * | /-----------\
+ * | | |
+ * v v | (repeat as needed)
+ * REMOVE_SNAP -------/
+ * |
+ * | /-----------\
+ * | | |
+ * v v | (repeat as needed)
+ * CREATE_SNAP -------/
+ * |
+ * | /-----------\
+ * | | |
+ * v v | (repeat as needed)
+ * PROTECT_SNAP ------/
+ * |
+ * v
+ * SET_HEAD (skip if not needed)
+ * |
+ * v
+ * RESIZE_OBJECT_MAP (skip if not needed)
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+
+ typedef std::set<librados::snap_t> SnapIdSet;
+
+ ImageCtxT *m_src_image_ctx;
+ ImageCtxT *m_dst_image_ctx;
+ librados::snap_t m_src_snap_id_start;
+ librados::snap_t m_src_snap_id_end;
+ librados::snap_t m_dst_snap_id_start;
+ bool m_flatten;
+ ContextWQ *m_work_queue;
+ SnapSeqs *m_snap_seqs_result;
+ SnapSeqs m_snap_seqs;
+ Context *m_on_finish;
+
+ CephContext *m_cct;
+ SnapIdSet m_src_snap_ids;
+ SnapIdSet m_dst_snap_ids;
+ librados::snap_t m_prev_snap_id = CEPH_NOSNAP;
+
+ std::string m_snap_name;
+ cls::rbd::SnapshotNamespace m_snap_namespace;
+
+ cls::rbd::ParentImageSpec m_dst_parent_spec;
+
+ Mutex m_lock;
+ bool m_canceled = false;
+
+ void send_snap_unprotect();
+ void handle_snap_unprotect(int r);
+
+ void send_snap_remove();
+ void handle_snap_remove(int r);
+
+ void send_snap_create();
+ void handle_snap_create(int r);
+
+ void send_snap_protect();
+ void handle_snap_protect(int r);
+
+ void send_set_head();
+ void handle_set_head(int r);
+
+ void send_resize_object_map();
+ void handle_resize_object_map(int r);
+
+ bool handle_cancellation();
+
+ void error(int r);
+
+ int validate_parent(ImageCtxT *image_ctx, cls::rbd::ParentImageSpec *spec);
+
+ Context *start_lock_op(int* r);
+ Context *start_lock_op(RWLock &owner_locki, int* r);
+
+ void finish(int r);
+};
+
+} // namespace deep_copy
+} // namespace librbd
+
+extern template class librbd::deep_copy::SnapshotCopyRequest<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_DEEP_COPY_SNAPSHOT_COPY_REQUEST_H
diff --git a/src/librbd/deep_copy/SnapshotCreateRequest.cc b/src/librbd/deep_copy/SnapshotCreateRequest.cc
new file mode 100644
index 00000000..0ff9c832
--- /dev/null
+++ b/src/librbd/deep_copy/SnapshotCreateRequest.cc
@@ -0,0 +1,187 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "SetHeadRequest.h"
+#include "SnapshotCreateRequest.h"
+#include "common/errno.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "cls/rbd/cls_rbd_types.h"
+#include "librbd/ExclusiveLock.h"
+#include "librbd/ObjectMap.h"
+#include "librbd/Operations.h"
+#include "librbd/Utils.h"
+#include "osdc/Striper.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::deep_copy::SnapshotCreateRequest: " \
+ << this << " " << __func__ << ": "
+
+namespace librbd {
+namespace deep_copy {
+
+using librbd::util::create_context_callback;
+using librbd::util::create_rados_callback;
+
+template <typename I>
+SnapshotCreateRequest<I>::SnapshotCreateRequest(
+ I *dst_image_ctx, const std::string &snap_name,
+ const cls::rbd::SnapshotNamespace &snap_namespace,
+ uint64_t size, const cls::rbd::ParentImageSpec &spec,
+ uint64_t parent_overlap, Context *on_finish)
+ : m_dst_image_ctx(dst_image_ctx), m_snap_name(snap_name),
+ m_snap_namespace(snap_namespace), m_size(size),
+ m_parent_spec(spec), m_parent_overlap(parent_overlap),
+ m_on_finish(on_finish), m_cct(dst_image_ctx->cct) {
+}
+
+template <typename I>
+void SnapshotCreateRequest<I>::send() {
+ send_set_head();
+}
+
+template <typename I>
+void SnapshotCreateRequest<I>::send_set_head() {
+ ldout(m_cct, 20) << dendl;
+
+ auto ctx = create_context_callback<
+ SnapshotCreateRequest<I>, &SnapshotCreateRequest<I>::handle_set_head>(this);
+ auto req = SetHeadRequest<I>::create(m_dst_image_ctx, m_size, m_parent_spec,
+ m_parent_overlap, ctx);
+ req->send();
+}
+
+template <typename I>
+void SnapshotCreateRequest<I>::handle_set_head(int r) {
+ ldout(m_cct, 20) << "r=" << r << dendl;
+
+ if (r < 0) {
+ lderr(m_cct) << "failed to set head: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ send_create_snap();
+}
+
+template <typename I>
+void SnapshotCreateRequest<I>::send_create_snap() {
+ ldout(m_cct, 20) << "snap_name=" << m_snap_name << dendl;
+
+ int r;
+ auto finish_op_ctx = start_lock_op(&r);
+ if (finish_op_ctx == nullptr) {
+ lderr(m_cct) << "lost exclusive lock" << dendl;
+ finish(r);
+ return;
+ }
+
+ auto ctx = new FunctionContext([this, finish_op_ctx](int r) {
+ handle_create_snap(r);
+ finish_op_ctx->complete(0);
+ });
+ RWLock::RLocker owner_locker(m_dst_image_ctx->owner_lock);
+ m_dst_image_ctx->operations->execute_snap_create(m_snap_namespace,
+ m_snap_name.c_str(),
+ ctx,
+ 0U, true);
+}
+
+template <typename I>
+void SnapshotCreateRequest<I>::handle_create_snap(int r) {
+ ldout(m_cct, 20) << "r=" << r << dendl;
+
+ if (r < 0) {
+ lderr(m_cct) << "failed to create snapshot '" << m_snap_name << "': "
+ << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ send_create_object_map();
+}
+template <typename I>
+void SnapshotCreateRequest<I>::send_create_object_map() {
+
+ if (!m_dst_image_ctx->test_features(RBD_FEATURE_OBJECT_MAP)) {
+ finish(0);
+ return;
+ }
+
+ m_dst_image_ctx->snap_lock.get_read();
+ auto snap_it = m_dst_image_ctx->snap_ids.find(
+ {cls::rbd::UserSnapshotNamespace(), m_snap_name});
+ if (snap_it == m_dst_image_ctx->snap_ids.end()) {
+ lderr(m_cct) << "failed to locate snap: " << m_snap_name << dendl;
+ m_dst_image_ctx->snap_lock.put_read();
+ finish(-ENOENT);
+ return;
+ }
+ librados::snap_t local_snap_id = snap_it->second;
+ m_dst_image_ctx->snap_lock.put_read();
+
+ std::string object_map_oid(librbd::ObjectMap<>::object_map_name(
+ m_dst_image_ctx->id, local_snap_id));
+ uint64_t object_count = Striper::get_num_objects(m_dst_image_ctx->layout,
+ m_size);
+ ldout(m_cct, 20) << "object_map_oid=" << object_map_oid << ", "
+ << "object_count=" << object_count << dendl;
+
+ // initialize an empty object map of the correct size (object sync
+ // will populate the object map)
+ librados::ObjectWriteOperation op;
+ librbd::cls_client::object_map_resize(&op, object_count, OBJECT_NONEXISTENT);
+
+ int r;
+ auto finish_op_ctx = start_lock_op(&r);
+ if (finish_op_ctx == nullptr) {
+ lderr(m_cct) << "lost exclusive lock" << dendl;
+ finish(r);
+ return;
+ }
+
+ auto ctx = new FunctionContext([this, finish_op_ctx](int r) {
+ handle_create_object_map(r);
+ finish_op_ctx->complete(0);
+ });
+ librados::AioCompletion *comp = create_rados_callback(ctx);
+ r = m_dst_image_ctx->md_ctx.aio_operate(object_map_oid, comp, &op);
+ ceph_assert(r == 0);
+ comp->release();
+}
+
+template <typename I>
+void SnapshotCreateRequest<I>::handle_create_object_map(int r) {
+ ldout(m_cct, 20) << "r=" << r << dendl;
+
+ if (r < 0) {
+ lderr(m_cct) << "failed to create object map: " << cpp_strerror(r)
+ << dendl;
+ finish(r);
+ return;
+ }
+
+ finish(0);
+}
+
+template <typename I>
+Context *SnapshotCreateRequest<I>::start_lock_op(int* r) {
+ RWLock::RLocker owner_locker(m_dst_image_ctx->owner_lock);
+ if (m_dst_image_ctx->exclusive_lock == nullptr) {
+ return new FunctionContext([](int r) {});
+ }
+ return m_dst_image_ctx->exclusive_lock->start_op(r);
+}
+
+template <typename I>
+void SnapshotCreateRequest<I>::finish(int r) {
+ ldout(m_cct, 20) << "r=" << r << dendl;
+
+ m_on_finish->complete(r);
+ delete this;
+}
+
+} // namespace deep_copy
+} // namespace librbd
+
+template class librbd::deep_copy::SnapshotCreateRequest<librbd::ImageCtx>;
diff --git a/src/librbd/deep_copy/SnapshotCreateRequest.h b/src/librbd/deep_copy/SnapshotCreateRequest.h
new file mode 100644
index 00000000..8c228063
--- /dev/null
+++ b/src/librbd/deep_copy/SnapshotCreateRequest.h
@@ -0,0 +1,95 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_DEEP_COPY_SNAPSHOT_CREATE_REQUEST_H
+#define CEPH_LIBRBD_DEEP_COPY_SNAPSHOT_CREATE_REQUEST_H
+
+#include "include/int_types.h"
+#include "include/rados/librados.hpp"
+#include "common/snap_types.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/Types.h"
+#include <map>
+#include <set>
+#include <string>
+#include <tuple>
+
+class Context;
+
+namespace librbd {
+namespace deep_copy {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class SnapshotCreateRequest {
+public:
+ static SnapshotCreateRequest* create(ImageCtxT *dst_image_ctx,
+ const std::string &snap_name,
+ const cls::rbd::SnapshotNamespace &snap_namespace,
+ uint64_t size,
+ const cls::rbd::ParentImageSpec &parent_spec,
+ uint64_t parent_overlap,
+ Context *on_finish) {
+ return new SnapshotCreateRequest(dst_image_ctx, snap_name, snap_namespace, size,
+ parent_spec, parent_overlap, on_finish);
+ }
+
+ SnapshotCreateRequest(ImageCtxT *dst_image_ctx,
+ const std::string &snap_name,
+ const cls::rbd::SnapshotNamespace &snap_namespace,
+ uint64_t size,
+ const cls::rbd::ParentImageSpec &parent_spec,
+ uint64_t parent_overlap, Context *on_finish);
+
+ void send();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * SET_HEAD
+ * |
+ * v
+ * CREATE_SNAP
+ * |
+ * v (skip if not needed)
+ * CREATE_OBJECT_MAP
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+
+ ImageCtxT *m_dst_image_ctx;
+ std::string m_snap_name;
+ cls::rbd::SnapshotNamespace m_snap_namespace;
+ uint64_t m_size;
+ cls::rbd::ParentImageSpec m_parent_spec;
+ uint64_t m_parent_overlap;
+ Context *m_on_finish;
+
+ CephContext *m_cct;
+
+ void send_set_head();
+ void handle_set_head(int r);
+
+ void send_create_snap();
+ void handle_create_snap(int r);
+
+ void send_create_object_map();
+ void handle_create_object_map(int r);
+
+ Context *start_lock_op(int* r);
+
+ void finish(int r);
+};
+
+} // namespace deep_copy
+} // namespace librbd
+
+extern template class librbd::deep_copy::SnapshotCreateRequest<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_DEEP_COPY_SNAPSHOT_CREATE_REQUEST_H
diff --git a/src/librbd/deep_copy/Types.h b/src/librbd/deep_copy/Types.h
new file mode 100644
index 00000000..10d3c7c1
--- /dev/null
+++ b/src/librbd/deep_copy/Types.h
@@ -0,0 +1,22 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_DEEP_COPY_TYPES_H
+#define CEPH_LIBRBD_DEEP_COPY_TYPES_H
+
+#include "include/int_types.h"
+#include "include/rados/librados.hpp"
+#include <boost/optional.hpp>
+
+namespace librbd {
+namespace deep_copy {
+
+typedef std::vector<librados::snap_t> SnapIds;
+typedef std::map<librados::snap_t, SnapIds> SnapMap;
+
+typedef boost::optional<uint64_t> ObjectNumber;
+
+} // namespace deep_copy
+} // namespace librbd
+
+#endif // CEPH_LIBRBD_DEEP_COPY_TYPES_H
diff --git a/src/librbd/deep_copy/Utils.cc b/src/librbd/deep_copy/Utils.cc
new file mode 100644
index 00000000..c2dd2502
--- /dev/null
+++ b/src/librbd/deep_copy/Utils.cc
@@ -0,0 +1,61 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "common/debug.h"
+#include "Utils.h"
+#include <set>
+
+namespace librbd {
+namespace deep_copy {
+namespace util {
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::deep_copy::util::" << __func__ << ": "
+
+void compute_snap_map(CephContext* cct,
+ librados::snap_t src_snap_id_start,
+ librados::snap_t src_snap_id_end,
+ const SnapIds& dst_snap_ids,
+ const SnapSeqs &snap_seqs,
+ SnapMap *snap_map) {
+ std::set<librados::snap_t> ordered_dst_snap_ids{
+ dst_snap_ids.begin(), dst_snap_ids.end()};
+ auto dst_snap_id_it = ordered_dst_snap_ids.begin();
+
+ SnapIds snap_ids;
+ for (auto &it : snap_seqs) {
+ // ensure all dst snap ids are included in the mapping table since
+ // deep copy will skip non-user snapshots
+ while (dst_snap_id_it != ordered_dst_snap_ids.end()) {
+ if (*dst_snap_id_it < it.second) {
+ snap_ids.insert(snap_ids.begin(), *dst_snap_id_it);
+ } else if (*dst_snap_id_it > it.second) {
+ break;
+ }
+ ++dst_snap_id_it;
+ }
+
+ // we should only have the HEAD revision in the the last snap seq
+ ceph_assert(snap_ids.empty() || snap_ids[0] != CEPH_NOSNAP);
+ snap_ids.insert(snap_ids.begin(), it.second);
+
+ if (it.first < src_snap_id_start) {
+ continue;
+ } else if (it.first > src_snap_id_end) {
+ break;
+ }
+
+ (*snap_map)[it.first] = snap_ids;
+ }
+
+ ldout(cct, 10) << "src_snap_id_start=" << src_snap_id_start << ", "
+ << "src_snap_id_end=" << src_snap_id_end << ", "
+ << "dst_snap_ids=" << dst_snap_ids << ", "
+ << "snap_seqs=" << snap_seqs << ", "
+ << "snap_map=" << *snap_map << dendl;
+}
+
+} // namespace util
+} // namespace deep_copy
+} // namespace librbd
diff --git a/src/librbd/deep_copy/Utils.h b/src/librbd/deep_copy/Utils.h
new file mode 100644
index 00000000..0681548d
--- /dev/null
+++ b/src/librbd/deep_copy/Utils.h
@@ -0,0 +1,30 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_DEEP_COPY_UTILS_H
+#define CEPH_LIBRBD_DEEP_COPY_UTILS_H
+
+#include "include/rados/librados.hpp"
+#include "librbd/Types.h"
+#include "librbd/deep_copy/Types.h"
+
+#include <boost/optional.hpp>
+
+struct CephContext;
+
+namespace librbd {
+namespace deep_copy {
+namespace util {
+
+void compute_snap_map(CephContext* cct,
+ librados::snap_t src_snap_id_start,
+ librados::snap_t src_snap_id_end,
+ const SnapIds& dst_snap_ids,
+ const SnapSeqs &snap_seqs,
+ SnapMap *snap_map);
+
+} // namespace util
+} // namespace deep_copy
+} // namespace librbd
+
+#endif // CEPH_LIBRBD_DEEP_COPY_UTILS_H