diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 18:24:20 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 18:24:20 +0000 |
commit | 483eb2f56657e8e7f419ab1a4fab8dce9ade8609 (patch) | |
tree | e5d88d25d870d5dedacb6bbdbe2a966086a0a5cf /src/librbd/deep_copy | |
parent | Initial commit. (diff) | |
download | ceph-483eb2f56657e8e7f419ab1a4fab8dce9ade8609.tar.xz ceph-483eb2f56657e8e7f419ab1a4fab8dce9ade8609.zip |
Adding upstream version 14.2.21.upstream/14.2.21upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/librbd/deep_copy')
-rw-r--r-- | src/librbd/deep_copy/ImageCopyRequest.cc | 187 | ||||
-rw-r--r-- | src/librbd/deep_copy/ImageCopyRequest.h | 109 | ||||
-rw-r--r-- | src/librbd/deep_copy/MetadataCopyRequest.cc | 123 | ||||
-rw-r--r-- | src/librbd/deep_copy/MetadataCopyRequest.h | 77 | ||||
-rw-r--r-- | src/librbd/deep_copy/ObjectCopyRequest.cc | 998 | ||||
-rw-r--r-- | src/librbd/deep_copy/ObjectCopyRequest.h | 210 | ||||
-rw-r--r-- | src/librbd/deep_copy/SetHeadRequest.cc | 224 | ||||
-rw-r--r-- | src/librbd/deep_copy/SetHeadRequest.h | 87 | ||||
-rw-r--r-- | src/librbd/deep_copy/SnapshotCopyRequest.cc | 730 | ||||
-rw-r--r-- | src/librbd/deep_copy/SnapshotCopyRequest.h | 148 | ||||
-rw-r--r-- | src/librbd/deep_copy/SnapshotCreateRequest.cc | 187 | ||||
-rw-r--r-- | src/librbd/deep_copy/SnapshotCreateRequest.h | 95 | ||||
-rw-r--r-- | src/librbd/deep_copy/Types.h | 22 | ||||
-rw-r--r-- | src/librbd/deep_copy/Utils.cc | 61 | ||||
-rw-r--r-- | src/librbd/deep_copy/Utils.h | 30 |
15 files changed, 3288 insertions, 0 deletions
diff --git a/src/librbd/deep_copy/ImageCopyRequest.cc b/src/librbd/deep_copy/ImageCopyRequest.cc new file mode 100644 index 00000000..62e5409c --- /dev/null +++ b/src/librbd/deep_copy/ImageCopyRequest.cc @@ -0,0 +1,187 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "ImageCopyRequest.h" +#include "ObjectCopyRequest.h" +#include "common/errno.h" +#include "librbd/Utils.h" +#include "librbd/deep_copy/Utils.h" +#include "librbd/image/CloseRequest.h" +#include "librbd/image/OpenRequest.h" +#include "osdc/Striper.h" + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::deep_copy::ImageCopyRequest: " \ + << this << " " << __func__ << ": " + +namespace librbd { +namespace deep_copy { + +using librbd::util::create_context_callback; +using librbd::util::unique_lock_name; + +template <typename I> +ImageCopyRequest<I>::ImageCopyRequest(I *src_image_ctx, I *dst_image_ctx, + librados::snap_t src_snap_id_start, + librados::snap_t src_snap_id_end, + librados::snap_t dst_snap_id_start, + bool flatten, + const ObjectNumber &object_number, + const SnapSeqs &snap_seqs, + ProgressContext *prog_ctx, + Context *on_finish) + : RefCountedObject(dst_image_ctx->cct, 1), m_src_image_ctx(src_image_ctx), + m_dst_image_ctx(dst_image_ctx), m_src_snap_id_start(src_snap_id_start), + m_src_snap_id_end(src_snap_id_end), m_dst_snap_id_start(dst_snap_id_start), + m_flatten(flatten), m_object_number(object_number), m_snap_seqs(snap_seqs), + m_prog_ctx(prog_ctx), m_on_finish(on_finish), m_cct(dst_image_ctx->cct), + m_lock(unique_lock_name("ImageCopyRequest::m_lock", this)) { +} + +template <typename I> +void ImageCopyRequest<I>::send() { + m_dst_image_ctx->snap_lock.get_read(); + util::compute_snap_map(m_dst_image_ctx->cct, m_src_snap_id_start, + m_src_snap_id_end, m_dst_image_ctx->snaps, m_snap_seqs, + &m_snap_map); + m_dst_image_ctx->snap_lock.put_read(); + + if (m_snap_map.empty()) { + lderr(m_cct) << "failed to map snapshots within boundary" << dendl; + finish(-EINVAL); + return; + } + + send_object_copies(); +} + +template <typename I> +void ImageCopyRequest<I>::cancel() { + Mutex::Locker locker(m_lock); + + ldout(m_cct, 20) << dendl; + m_canceled = true; +} + +template <typename I> +void ImageCopyRequest<I>::send_object_copies() { + m_object_no = 0; + if (m_object_number) { + m_object_no = *m_object_number + 1; + } + + uint64_t size; + { + RWLock::RLocker snap_locker(m_src_image_ctx->snap_lock); + size = m_src_image_ctx->get_image_size(CEPH_NOSNAP); + for (auto snap_id : m_src_image_ctx->snaps) { + size = std::max(size, m_src_image_ctx->get_image_size(snap_id)); + } + } + m_end_object_no = Striper::get_num_objects(m_dst_image_ctx->layout, size); + + ldout(m_cct, 20) << "start_object=" << m_object_no << ", " + << "end_object=" << m_end_object_no << dendl; + + bool complete; + { + Mutex::Locker locker(m_lock); + for (uint64_t i = 0; + i < m_src_image_ctx->config.template get_val<uint64_t>("rbd_concurrent_management_ops"); + ++i) { + send_next_object_copy(); + if (m_ret_val < 0 && m_current_ops == 0) { + break; + } + } + complete = (m_current_ops == 0) && !m_updating_progress; + } + + if (complete) { + finish(m_ret_val); + } +} + +template <typename I> +void ImageCopyRequest<I>::send_next_object_copy() { + ceph_assert(m_lock.is_locked()); + + if (m_canceled && m_ret_val == 0) { + ldout(m_cct, 10) << "image copy canceled" << dendl; + m_ret_val = -ECANCELED; + } + + if (m_ret_val < 0 || m_object_no >= m_end_object_no) { + return; + } + + uint64_t ono = m_object_no++; + + ldout(m_cct, 20) << "object_num=" << ono << dendl; + + ++m_current_ops; + + Context *ctx = new FunctionContext( + [this, ono](int r) { + handle_object_copy(ono, r); + }); + ObjectCopyRequest<I> *req = ObjectCopyRequest<I>::create( + m_src_image_ctx, m_dst_image_ctx, m_src_snap_id_start, m_dst_snap_id_start, + m_snap_map, ono, m_flatten, ctx); + req->send(); +} + +template <typename I> +void ImageCopyRequest<I>::handle_object_copy(uint64_t object_no, int r) { + ldout(m_cct, 20) << "object_no=" << object_no << ", r=" << r << dendl; + + bool complete; + { + Mutex::Locker locker(m_lock); + ceph_assert(m_current_ops > 0); + --m_current_ops; + + if (r < 0 && r != -ENOENT) { + lderr(m_cct) << "object copy failed: " << cpp_strerror(r) << dendl; + if (m_ret_val == 0) { + m_ret_val = r; + } + } else { + m_copied_objects.push(object_no); + while (!m_updating_progress && !m_copied_objects.empty() && + m_copied_objects.top() == + (m_object_number ? *m_object_number + 1 : 0)) { + m_object_number = m_copied_objects.top(); + m_copied_objects.pop(); + uint64_t progress_object_no = *m_object_number + 1; + m_updating_progress = true; + m_lock.Unlock(); + m_prog_ctx->update_progress(progress_object_no, m_end_object_no); + m_lock.Lock(); + ceph_assert(m_updating_progress); + m_updating_progress = false; + } + } + + send_next_object_copy(); + complete = (m_current_ops == 0) && !m_updating_progress; + } + + if (complete) { + finish(m_ret_val); + } +} + +template <typename I> +void ImageCopyRequest<I>::finish(int r) { + ldout(m_cct, 20) << "r=" << r << dendl; + + m_on_finish->complete(r); + put(); +} + +} // namespace deep_copy +} // namespace librbd + +template class librbd::deep_copy::ImageCopyRequest<librbd::ImageCtx>; diff --git a/src/librbd/deep_copy/ImageCopyRequest.h b/src/librbd/deep_copy/ImageCopyRequest.h new file mode 100644 index 00000000..189cb237 --- /dev/null +++ b/src/librbd/deep_copy/ImageCopyRequest.h @@ -0,0 +1,109 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_DEEP_COPY_IMAGE_DEEP_COPY_REQUEST_H +#define CEPH_LIBRBD_DEEP_COPY_IMAGE_DEEP_COPY_REQUEST_H + +#include "include/int_types.h" +#include "include/rados/librados.hpp" +#include "common/Mutex.h" +#include "common/RefCountedObj.h" +#include "librbd/Types.h" +#include "librbd/deep_copy/Types.h" +#include <functional> +#include <map> +#include <queue> +#include <vector> +#include <boost/optional.hpp> + +class Context; + +namespace librbd { + +class ImageCtx; +class ProgressContext; + +namespace deep_copy { + +template <typename ImageCtxT = ImageCtx> +class ImageCopyRequest : public RefCountedObject { +public: + static ImageCopyRequest* create(ImageCtxT *src_image_ctx, + ImageCtxT *dst_image_ctx, + librados::snap_t src_snap_id_start, + librados::snap_t src_snap_id_end, + librados::snap_t dst_snap_id_start, + bool flatten, + const ObjectNumber &object_number, + const SnapSeqs &snap_seqs, + ProgressContext *prog_ctx, + Context *on_finish) { + return new ImageCopyRequest(src_image_ctx, dst_image_ctx, src_snap_id_start, + src_snap_id_end, dst_snap_id_start, flatten, + object_number, snap_seqs, prog_ctx, on_finish); + } + + ImageCopyRequest(ImageCtxT *src_image_ctx, ImageCtxT *dst_image_ctx, + librados::snap_t src_snap_id_start, + librados::snap_t src_snap_id_end, + librados::snap_t dst_snap_id_start, + bool flatten, const ObjectNumber &object_number, + const SnapSeqs &snap_seqs, ProgressContext *prog_ctx, + Context *on_finish); + + void send(); + void cancel(); + +private: + /** + * @verbatim + * + * <start> + * | . . . . . + * | . . (parallel execution of + * v v . multiple objects at once) + * COPY_OBJECT . . . . + * | + * v + * <finish> + * + * @endverbatim + */ + + ImageCtxT *m_src_image_ctx; + ImageCtxT *m_dst_image_ctx; + librados::snap_t m_src_snap_id_start; + librados::snap_t m_src_snap_id_end; + librados::snap_t m_dst_snap_id_start; + bool m_flatten; + ObjectNumber m_object_number; + SnapSeqs m_snap_seqs; + ProgressContext *m_prog_ctx; + Context *m_on_finish; + + CephContext *m_cct; + Mutex m_lock; + bool m_canceled = false; + + uint64_t m_object_no = 0; + uint64_t m_end_object_no = 0; + uint64_t m_current_ops = 0; + std::priority_queue< + uint64_t, std::vector<uint64_t>, std::greater<uint64_t>> m_copied_objects; + bool m_updating_progress = false; + SnapMap m_snap_map; + int m_ret_val = 0; + + void send_object_copies(); + void send_next_object_copy(); + void handle_object_copy(uint64_t object_no, int r); + + void finish(int r); +}; + +} // namespace deep_copy +} // namespace librbd + +extern template class librbd::deep_copy::ImageCopyRequest<librbd::ImageCtx>; + +#endif // CEPH_LIBRBD_DEEP_COPY_IMAGE_DEEP_COPY_REQUEST_H diff --git a/src/librbd/deep_copy/MetadataCopyRequest.cc b/src/librbd/deep_copy/MetadataCopyRequest.cc new file mode 100644 index 00000000..cbce9a19 --- /dev/null +++ b/src/librbd/deep_copy/MetadataCopyRequest.cc @@ -0,0 +1,123 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "MetadataCopyRequest.h" +#include "common/dout.h" +#include "common/errno.h" +#include "cls/rbd/cls_rbd_client.h" +#include "librbd/Utils.h" + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::deep_copy::MetadataCopyRequest: " \ + << this << " " << __func__ << ": " + +namespace librbd { +namespace deep_copy { + +namespace { + +const uint64_t MAX_METADATA_ITEMS = 128; + +} // anonymous namespace + +using librbd::util::create_rados_callback; + +template <typename I> +MetadataCopyRequest<I>::MetadataCopyRequest(I *src_image_ctx, I *dst_image_ctx, + Context *on_finish) + : m_src_image_ctx(src_image_ctx), m_dst_image_ctx(dst_image_ctx), + m_on_finish(on_finish), m_cct(dst_image_ctx->cct) { +} + +template <typename I> +void MetadataCopyRequest<I>::send() { + list_src_metadata(); +} + +template <typename I> +void MetadataCopyRequest<I>::list_src_metadata() { + ldout(m_cct, 20) << "start_key=" << m_last_metadata_key << dendl; + + librados::ObjectReadOperation op; + librbd::cls_client::metadata_list_start(&op, m_last_metadata_key, MAX_METADATA_ITEMS); + + librados::AioCompletion *aio_comp = create_rados_callback< + MetadataCopyRequest<I>, + &MetadataCopyRequest<I>::handle_list_src_metadata>(this); + m_out_bl.clear(); + m_src_image_ctx->md_ctx.aio_operate(m_src_image_ctx->header_oid, + aio_comp, &op, &m_out_bl); + aio_comp->release(); +} + +template <typename I> +void MetadataCopyRequest<I>::handle_list_src_metadata(int r) { + ldout(m_cct, 20) << "r=" << r << dendl; + + Metadata metadata; + if (r == 0) { + auto it = m_out_bl.cbegin(); + r = librbd::cls_client::metadata_list_finish(&it, &metadata); + } + + if (r < 0) { + lderr(m_cct) << "failed to retrieve metadata: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + if (metadata.empty()) { + finish(0); + return; + } + + m_last_metadata_key = metadata.rbegin()->first; + m_more_metadata = (metadata.size() >= MAX_METADATA_ITEMS); + set_dst_metadata(metadata); +} + +template <typename I> +void MetadataCopyRequest<I>::set_dst_metadata(const Metadata& metadata) { + ldout(m_cct, 20) << "count=" << metadata.size() << dendl; + + librados::ObjectWriteOperation op; + librbd::cls_client::metadata_set(&op, metadata); + + librados::AioCompletion *aio_comp = create_rados_callback< + MetadataCopyRequest<I>, + &MetadataCopyRequest<I>::handle_set_dst_metadata>(this); + m_dst_image_ctx->md_ctx.aio_operate(m_dst_image_ctx->header_oid, aio_comp, + &op); + aio_comp->release(); +} + +template <typename I> +void MetadataCopyRequest<I>::handle_set_dst_metadata(int r) { + ldout(m_cct, 20) << "r=" << r << dendl; + + if (r < 0) { + lderr(m_cct) << "failed to set metadata: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + if (m_more_metadata) { + list_src_metadata(); + return; + } + + finish(0); +} + +template <typename I> +void MetadataCopyRequest<I>::finish(int r) { + ldout(m_cct, 20) << "r=" << r << dendl; + m_on_finish->complete(r); + delete this; +} + +} // namespace deep_copy +} // namespace librbd + +template class librbd::deep_copy::MetadataCopyRequest<librbd::ImageCtx>; diff --git a/src/librbd/deep_copy/MetadataCopyRequest.h b/src/librbd/deep_copy/MetadataCopyRequest.h new file mode 100644 index 00000000..5bd69d8b --- /dev/null +++ b/src/librbd/deep_copy/MetadataCopyRequest.h @@ -0,0 +1,77 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_DEEP_COPY_METADATA_COPY_REQUEST_H +#define CEPH_LIBRBD_DEEP_COPY_METADATA_COPY_REQUEST_H + +#include "include/int_types.h" +#include "include/buffer.h" +#include "include/rados/librados.hpp" +#include "librbd/ImageCtx.h" +#include <map> +#include <string> + +class Context; + +namespace librbd { +namespace deep_copy { + +template <typename ImageCtxT = librbd::ImageCtx> +class MetadataCopyRequest { +public: + static MetadataCopyRequest* create(ImageCtxT *src_image_ctx, + ImageCtxT *dst_image_ctx, + Context *on_finish) { + return new MetadataCopyRequest(src_image_ctx, dst_image_ctx, on_finish); + } + + MetadataCopyRequest(ImageCtxT *src_image_ctx, ImageCtxT *dst_image_ctx, + Context *on_finish); + + void send(); + +private: + /** + * @verbatim + * + * <start> + * | + * v + * LIST_SRC_METADATA <------\ + * | | (repeat if additional + * v | metadata) + * SET_DST_METADATA --------/ + * | + * v + * <finish> + * + * @endverbatim + */ + typedef std::map<std::string, bufferlist> Metadata; + + ImageCtxT *m_src_image_ctx; + ImageCtxT *m_dst_image_ctx; + Context *m_on_finish; + + CephContext *m_cct; + bufferlist m_out_bl; + + std::string m_last_metadata_key; + bool m_more_metadata = false; + + void list_src_metadata(); + void handle_list_src_metadata(int r); + + void set_dst_metadata(const Metadata& metadata); + void handle_set_dst_metadata(int r); + + void finish(int r); + +}; + +} // namespace deep_copy +} // namespace librbd + +extern template class librbd::deep_copy::MetadataCopyRequest<librbd::ImageCtx>; + +#endif // CEPH_LIBRBD_DEEP_COPY_METADATA_COPY_REQUEST_H diff --git a/src/librbd/deep_copy/ObjectCopyRequest.cc b/src/librbd/deep_copy/ObjectCopyRequest.cc new file mode 100644 index 00000000..2400757a --- /dev/null +++ b/src/librbd/deep_copy/ObjectCopyRequest.cc @@ -0,0 +1,998 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "ObjectCopyRequest.h" +#include "common/errno.h" +#include "librados/snap_set_diff.h" +#include "librbd/ExclusiveLock.h" +#include "librbd/ObjectMap.h" +#include "librbd/Utils.h" +#include "librbd/io/AioCompletion.h" +#include "librbd/io/AsyncOperation.h" +#include "librbd/io/ImageRequest.h" +#include "librbd/io/ReadResult.h" +#include "osdc/Striper.h" + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::deep_copy::ObjectCopyRequest: " \ + << this << " " << __func__ << ": " + +namespace librados { + +inline bool operator==(const clone_info_t& rhs, const clone_info_t& lhs) { + return (rhs.cloneid == lhs.cloneid && + rhs.snaps == lhs.snaps && + rhs.overlap == lhs.overlap && + rhs.size == lhs.size); +} + +inline bool operator==(const snap_set_t& rhs, const snap_set_t& lhs) { + return (rhs.clones == lhs.clones && + rhs.seq == lhs.seq); +} + +} // namespace librados + +namespace librbd { +namespace deep_copy { + +using librbd::util::create_context_callback; +using librbd::util::create_rados_callback; + +template <typename I> +ObjectCopyRequest<I>::ObjectCopyRequest(I *src_image_ctx, + I *dst_image_ctx, + librados::snap_t src_snap_id_start, + librados::snap_t dst_snap_id_start, + const SnapMap &snap_map, + uint64_t dst_object_number, + bool flatten, Context *on_finish) + : m_src_image_ctx(src_image_ctx), + m_dst_image_ctx(dst_image_ctx), m_cct(dst_image_ctx->cct), + m_src_snap_id_start(src_snap_id_start), + m_dst_snap_id_start(dst_snap_id_start), m_snap_map(snap_map), + m_dst_object_number(dst_object_number), m_flatten(flatten), + m_on_finish(on_finish) { + ceph_assert(src_image_ctx->data_ctx.is_valid()); + ceph_assert(dst_image_ctx->data_ctx.is_valid()); + ceph_assert(!m_snap_map.empty()); + + m_src_async_op = new io::AsyncOperation(); + m_src_async_op->start_op(*util::get_image_ctx(m_src_image_ctx)); + + m_src_io_ctx.dup(m_src_image_ctx->data_ctx); + m_dst_io_ctx.dup(m_dst_image_ctx->data_ctx); + + m_dst_oid = m_dst_image_ctx->get_object_name(dst_object_number); + + ldout(m_cct, 20) << "dst_oid=" << m_dst_oid << dendl; + + compute_src_object_extents(); +} + +template <typename I> +void ObjectCopyRequest<I>::send() { + send_list_snaps(); +} + +template <typename I> +void ObjectCopyRequest<I>::send_list_snaps() { + ceph_assert(!m_src_objects.empty()); + m_src_ono = *m_src_objects.begin(); + m_src_oid = m_src_image_ctx->get_object_name(m_src_ono); + + ldout(m_cct, 20) << "src_oid=" << m_src_oid << dendl; + + librados::AioCompletion *rados_completion = create_rados_callback< + ObjectCopyRequest<I>, &ObjectCopyRequest<I>::handle_list_snaps>(this); + + librados::ObjectReadOperation op; + m_snap_set = {}; + m_snap_ret = 0; + op.list_snaps(&m_snap_set, &m_snap_ret); + + m_src_io_ctx.snap_set_read(CEPH_SNAPDIR); + int r = m_src_io_ctx.aio_operate(m_src_oid, rados_completion, &op, + nullptr); + ceph_assert(r == 0); + rados_completion->release(); +} + +template <typename I> +void ObjectCopyRequest<I>::handle_list_snaps(int r) { + if (r == 0 && m_snap_ret < 0) { + r = m_snap_ret; + } + + ldout(m_cct, 20) << "r=" << r << dendl; + + if (r < 0 && r != -ENOENT) { + lderr(m_cct) << "failed to list snaps: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + if (m_retry_missing_read) { + if (m_snap_set == m_retry_snap_set) { + lderr(m_cct) << "read encountered missing object using up-to-date snap set" + << dendl; + finish(-ENOENT); + return; + } + + ldout(m_cct, 20) << "retrying using updated snap set" << dendl; + m_retry_missing_read = false; + m_retry_snap_set = {}; + } + + if (r == -ENOENT) { + for (auto &it : m_src_object_extents) { + auto &e = it.second; + if (e.object_no == m_src_ono) { + e.noent = true; + } + } + m_read_ops = {}; + m_read_snaps = {}; + m_zero_interval = {}; + } else { + compute_read_ops(); + } + send_read_object(); +} + +template <typename I> +void ObjectCopyRequest<I>::send_read_object() { + + if (m_read_snaps.empty()) { + // all snapshots have been read + merge_write_ops(); + + ceph_assert(!m_src_objects.empty()); + m_src_objects.erase(m_src_objects.begin()); + + if (!m_src_objects.empty()) { + send_list_snaps(); + return; + } + + // all objects have been read + send_read_from_parent(); + return; + } + + auto index = *m_read_snaps.begin(); + auto src_snap_seq = index.second; + + bool read_required = false; + librados::ObjectReadOperation op; + + for (auto ©_op : m_read_ops[index]) { + if (!read_required) { + // map the copy op start snap id back to the necessary read snap id + m_src_io_ctx.snap_set_read(src_snap_seq); + + ldout(m_cct, 20) << "src_snap_seq=" << src_snap_seq << dendl; + read_required = true; + } + ldout(m_cct, 20) << "read op: " << copy_op.src_offset << "~" + << copy_op.length << dendl; + op.sparse_read(copy_op.src_offset, copy_op.length, ©_op.src_extent_map, + ©_op.out_bl, nullptr); + op.set_op_flags2(LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL | + LIBRADOS_OP_FLAG_FADVISE_NOCACHE); + } + + if (!read_required) { + // nothing written to this object for this snapshot (must be trunc/remove) + handle_read_object(0); + return; + } + + auto ctx = create_context_callback< + ObjectCopyRequest<I>, &ObjectCopyRequest<I>::handle_read_object>(this); + auto comp = create_rados_callback(ctx); + + ldout(m_cct, 20) << "read " << m_src_oid << dendl; + + int r = m_src_io_ctx.aio_operate(m_src_oid, comp, &op, nullptr); + ceph_assert(r == 0); + comp->release(); +} + +template <typename I> +void ObjectCopyRequest<I>::handle_read_object(int r) { + ldout(m_cct, 20) << "r=" << r << dendl; + + if (r == -ENOENT) { + m_retry_snap_set = m_snap_set; + m_retry_missing_read = true; + + ldout(m_cct, 5) << "object missing potentially due to removed snapshot" + << dendl; + send_list_snaps(); + return; + } + + if (r < 0) { + lderr(m_cct) << "failed to read from source object: " << cpp_strerror(r) + << dendl; + finish(r); + return; + } + + ceph_assert(!m_read_snaps.empty()); + m_read_snaps.erase(m_read_snaps.begin()); + + send_read_object(); +} + +template <typename I> +void ObjectCopyRequest<I>::send_read_from_parent() { + m_src_image_ctx->snap_lock.get_read(); + m_src_image_ctx->parent_lock.get_read(); + io::Extents image_extents; + compute_read_from_parent_ops(&image_extents); + m_src_image_ctx->snap_lock.put_read(); + + if (image_extents.empty()) { + m_src_image_ctx->parent_lock.put_read(); + handle_read_from_parent(0); + return; + } + + ldout(m_cct, 20) << dendl; + + ceph_assert(m_src_image_ctx->parent != nullptr); + + auto ctx = create_context_callback< + ObjectCopyRequest<I>, &ObjectCopyRequest<I>::handle_read_from_parent>(this); + auto comp = io::AioCompletion::create_and_start( + ctx, util::get_image_ctx(m_src_image_ctx->parent), io::AIO_TYPE_READ); + ldout(m_cct, 20) << "completion " << comp << ", extents " << image_extents + << dendl; + + auto src_image_ctx = m_src_image_ctx; + io::ImageRequest<I>::aio_read(src_image_ctx->parent, comp, + std::move(image_extents), + io::ReadResult{&m_read_from_parent_data}, 0, + ZTracer::Trace()); + src_image_ctx->parent_lock.put_read(); +} + +template <typename I> +void ObjectCopyRequest<I>::handle_read_from_parent(int r) { + ldout(m_cct, 20) << "r=" << r << dendl; + + if (r < 0) { + lderr(m_cct) << "failed to read from parent: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + if (!m_read_ops.empty()) { + ceph_assert(m_read_ops.size() == 1); + auto src_snap_seq = m_read_ops.begin()->first.first; + auto ©_ops = m_read_ops.begin()->second; + uint64_t offset = 0; + for (auto it = copy_ops.begin(); it != copy_ops.end(); ) { + it->out_bl.substr_of(m_read_from_parent_data, offset, it->length); + offset += it->length; + if (it->out_bl.is_zero()) { + m_zero_interval[src_snap_seq].insert(it->dst_offset, it->length); + it = copy_ops.erase(it); + } else { + it++; + } + } + merge_write_ops(); + } + + compute_dst_object_may_exist(); + compute_zero_ops(); + + if (m_write_ops.empty()) { + // nothing to copy + finish(-ENOENT); + return; + } + + send_write_object(); + return; +} + +template <typename I> +void ObjectCopyRequest<I>::send_write_object() { + ceph_assert(!m_write_ops.empty()); + auto& copy_ops = m_write_ops.begin()->second; + + // retrieve the destination snap context for the op + SnapIds dst_snap_ids; + librados::snap_t dst_snap_seq = 0; + librados::snap_t src_snap_seq = m_write_ops.begin()->first; + if (src_snap_seq != 0) { + auto snap_map_it = m_snap_map.find(src_snap_seq); + ceph_assert(snap_map_it != m_snap_map.end()); + + auto dst_snap_id = snap_map_it->second.front(); + auto dst_may_exist_it = m_dst_object_may_exist.find(dst_snap_id); + ceph_assert(dst_may_exist_it != m_dst_object_may_exist.end()); + if (!dst_may_exist_it->second && !copy_ops.empty()) { + // if the object cannot exist, the only valid op is to remove it + ceph_assert(copy_ops.size() == 1U); + ceph_assert(copy_ops.begin()->type == COPY_OP_TYPE_REMOVE); + } + + // write snapshot context should be before actual snapshot + ceph_assert(!snap_map_it->second.empty()); + auto dst_snap_ids_it = snap_map_it->second.begin(); + ++dst_snap_ids_it; + + dst_snap_ids = SnapIds{dst_snap_ids_it, snap_map_it->second.end()}; + if (!dst_snap_ids.empty()) { + dst_snap_seq = dst_snap_ids.front(); + } + ceph_assert(dst_snap_seq != CEPH_NOSNAP); + } + + ldout(m_cct, 20) << "dst_snap_seq=" << dst_snap_seq << ", " + << "dst_snaps=" << dst_snap_ids << dendl; + + librados::ObjectWriteOperation op; + uint64_t buffer_offset; + + if (!m_dst_image_ctx->migration_info.empty()) { + cls_client::assert_snapc_seq(&op, dst_snap_seq, + cls::rbd::ASSERT_SNAPC_SEQ_GT_SNAPSET_SEQ); + } + + for (auto ©_op : copy_ops) { + switch (copy_op.type) { + case COPY_OP_TYPE_WRITE: + buffer_offset = 0; + for (auto &e : copy_op.dst_extent_map) { + ldout(m_cct, 20) << "write op: " << e.first << "~" << e.second + << dendl; + bufferlist tmpbl; + tmpbl.substr_of(copy_op.out_bl, buffer_offset, e.second); + op.write(e.first, tmpbl); + op.set_op_flags2(LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL | + LIBRADOS_OP_FLAG_FADVISE_NOCACHE); + buffer_offset += e.second; + } + break; + case COPY_OP_TYPE_ZERO: + ldout(m_cct, 20) << "zero op: " << copy_op.dst_offset << "~" + << copy_op.length << dendl; + op.zero(copy_op.dst_offset, copy_op.length); + break; + case COPY_OP_TYPE_REMOVE_TRUNC: + ldout(m_cct, 20) << "create op" << dendl; + op.create(false); + // fall through + case COPY_OP_TYPE_TRUNC: + ldout(m_cct, 20) << "trunc op: " << copy_op.dst_offset << dendl; + op.truncate(copy_op.dst_offset); + break; + case COPY_OP_TYPE_REMOVE: + ldout(m_cct, 20) << "remove op" << dendl; + op.remove(); + break; + default: + ceph_abort(); + } + } + + if (op.size() == (m_dst_image_ctx->migration_info.empty() ? 0 : 1)) { + handle_write_object(0); + return; + } + + int r; + Context *finish_op_ctx; + { + RWLock::RLocker owner_locker(m_dst_image_ctx->owner_lock); + finish_op_ctx = start_lock_op(m_dst_image_ctx->owner_lock, &r); + } + if (finish_op_ctx == nullptr) { + lderr(m_cct) << "lost exclusive lock" << dendl; + finish(r); + return; + } + + auto ctx = new FunctionContext([this, finish_op_ctx](int r) { + handle_write_object(r); + finish_op_ctx->complete(0); + }); + librados::AioCompletion *comp = create_rados_callback(ctx); + r = m_dst_io_ctx.aio_operate(m_dst_oid, comp, &op, dst_snap_seq, dst_snap_ids, + nullptr); + ceph_assert(r == 0); + comp->release(); +} + +template <typename I> +void ObjectCopyRequest<I>::handle_write_object(int r) { + ldout(m_cct, 20) << "r=" << r << dendl; + + if (r == -ENOENT) { + r = 0; + } else if (r == -ERANGE) { + ldout(m_cct, 10) << "concurrent deep copy" << dendl; + r = 0; + } + if (r < 0) { + lderr(m_cct) << "failed to write to destination object: " << cpp_strerror(r) + << dendl; + finish(r); + return; + } + + m_write_ops.erase(m_write_ops.begin()); + if (!m_write_ops.empty()) { + send_write_object(); + return; + } + + send_update_object_map(); +} + +template <typename I> +void ObjectCopyRequest<I>::send_update_object_map() { + if (!m_dst_image_ctx->test_features(RBD_FEATURE_OBJECT_MAP) || + m_dst_object_state.empty()) { + finish(0); + return; + } + + m_dst_image_ctx->owner_lock.get_read(); + m_dst_image_ctx->snap_lock.get_read(); + if (m_dst_image_ctx->object_map == nullptr) { + // possible that exclusive lock was lost in background + lderr(m_cct) << "object map is not initialized" << dendl; + + m_dst_image_ctx->snap_lock.put_read(); + m_dst_image_ctx->owner_lock.put_read(); + finish(-EINVAL); + return; + } + + auto &dst_object_state = *m_dst_object_state.begin(); + auto it = m_snap_map.find(dst_object_state.first); + ceph_assert(it != m_snap_map.end()); + auto dst_snap_id = it->second.front(); + auto object_state = dst_object_state.second; + m_dst_object_state.erase(m_dst_object_state.begin()); + + ldout(m_cct, 20) << "dst_snap_id=" << dst_snap_id << ", object_state=" + << static_cast<uint32_t>(object_state) << dendl; + + int r; + auto finish_op_ctx = start_lock_op(m_dst_image_ctx->owner_lock, &r); + if (finish_op_ctx == nullptr) { + lderr(m_cct) << "lost exclusive lock" << dendl; + m_dst_image_ctx->snap_lock.put_read(); + m_dst_image_ctx->owner_lock.put_read(); + finish(r); + return; + } + + auto ctx = new FunctionContext([this, finish_op_ctx](int r) { + handle_update_object_map(r); + finish_op_ctx->complete(0); + }); + + auto dst_image_ctx = m_dst_image_ctx; + dst_image_ctx->object_map_lock.get_write(); + bool sent = dst_image_ctx->object_map->template aio_update< + Context, &Context::complete>(dst_snap_id, m_dst_object_number, object_state, + {}, {}, false, ctx); + + // NOTE: state machine might complete before we reach here + dst_image_ctx->object_map_lock.put_write(); + dst_image_ctx->snap_lock.put_read(); + dst_image_ctx->owner_lock.put_read(); + if (!sent) { + ceph_assert(dst_snap_id == CEPH_NOSNAP); + ctx->complete(0); + } +} + +template <typename I> +void ObjectCopyRequest<I>::handle_update_object_map(int r) { + ldout(m_cct, 20) << "r=" << r << dendl; + + if (r < 0) { + lderr(m_cct) << "failed to update object map: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + if (!m_dst_object_state.empty()) { + send_update_object_map(); + return; + } + finish(0); +} + +template <typename I> +Context *ObjectCopyRequest<I>::start_lock_op(RWLock &owner_lock, int* r) { + ceph_assert(m_dst_image_ctx->owner_lock.is_locked()); + if (m_dst_image_ctx->exclusive_lock == nullptr) { + return new FunctionContext([](int r) {}); + } + return m_dst_image_ctx->exclusive_lock->start_op(r); +} + +template <typename I> +uint64_t ObjectCopyRequest<I>::src_to_dst_object_offset(uint64_t objectno, + uint64_t offset) { + std::vector<std::pair<uint64_t, uint64_t>> image_extents; + Striper::extent_to_file(m_cct, &m_src_image_ctx->layout, objectno, offset, 1, + image_extents); + ceph_assert(image_extents.size() == 1); + auto dst_object_offset = image_extents.begin()->first; + + std::map<object_t, std::vector<ObjectExtent>> dst_object_extents; + Striper::file_to_extents(m_cct, m_dst_image_ctx->format_string, + &m_dst_image_ctx->layout, dst_object_offset, 1, 0, + dst_object_extents); + ceph_assert(dst_object_extents.size() == 1); + ceph_assert(dst_object_extents.begin()->second.size() == 1); + auto &e = *dst_object_extents.begin()->second.begin(); + ceph_assert(e.objectno == m_dst_object_number); + + return e.offset; +} + +template <typename I> +void ObjectCopyRequest<I>::compute_src_object_extents() { + std::vector<std::pair<uint64_t, uint64_t>> image_extents; + Striper::extent_to_file(m_cct, &m_dst_image_ctx->layout, m_dst_object_number, + 0, m_dst_image_ctx->layout.object_size, image_extents); + + size_t total = 0; + for (auto &e : image_extents) { + std::map<object_t, std::vector<ObjectExtent>> src_object_extents; + Striper::file_to_extents(m_cct, m_src_image_ctx->format_string, + &m_src_image_ctx->layout, e.first, e.second, 0, + src_object_extents); + auto stripe_unit = std::min(m_src_image_ctx->layout.stripe_unit, + m_dst_image_ctx->layout.stripe_unit); + for (auto &p : src_object_extents) { + for (auto &s : p.second) { + m_src_objects.insert(s.objectno); + total += s.length; + while (s.length > 0) { + ceph_assert(s.length >= stripe_unit); + auto dst_object_offset = src_to_dst_object_offset(s.objectno, s.offset); + m_src_object_extents[dst_object_offset] = {s.objectno, s.offset, + stripe_unit}; + s.offset += stripe_unit; + s.length -= stripe_unit; + } + } + } + } + + ceph_assert(total == m_dst_image_ctx->layout.object_size); + + ldout(m_cct, 20) << m_src_object_extents.size() << " src extents" << dendl; +} + +template <typename I> +void ObjectCopyRequest<I>::compute_read_ops() { + m_read_ops = {}; + m_read_snaps = {}; + m_zero_interval = {}; + + m_src_image_ctx->parent_lock.get_read(); + bool hide_parent = (m_src_image_ctx->parent != nullptr); + m_src_image_ctx->parent_lock.put_read(); + + librados::snap_t src_copy_point_snap_id = m_snap_map.rbegin()->first; + bool prev_exists = (hide_parent || m_src_snap_id_start > 0); + uint64_t prev_end_size = prev_exists ? + m_src_image_ctx->layout.object_size : 0; + librados::snap_t start_src_snap_id = m_src_snap_id_start; + + for (auto &pair : m_snap_map) { + ceph_assert(!pair.second.empty()); + librados::snap_t end_src_snap_id = pair.first; + librados::snap_t end_dst_snap_id = pair.second.front(); + + interval_set<uint64_t> diff; + uint64_t end_size; + bool exists; + librados::snap_t clone_end_snap_id; + calc_snap_set_diff(m_cct, m_snap_set, start_src_snap_id, + end_src_snap_id, &diff, &end_size, &exists, + &clone_end_snap_id, &m_read_whole_object); + + if (m_read_whole_object) { + ldout(m_cct, 1) << "need to read full object" << dendl; + diff.insert(0, m_src_image_ctx->layout.object_size); + exists = true; + end_size = m_src_image_ctx->layout.object_size; + clone_end_snap_id = end_src_snap_id; + } else if (!exists) { + end_size = 0; + if (hide_parent && end_src_snap_id == m_snap_map.begin()->first && + m_snap_set.clones.empty()) { + ldout(m_cct, 20) << "no clones for existing object" << dendl; + exists = true; + diff.insert(0, m_src_image_ctx->layout.object_size); + clone_end_snap_id = end_src_snap_id; + } + } + + ldout(m_cct, 20) << "start_src_snap_id=" << start_src_snap_id << ", " + << "end_src_snap_id=" << end_src_snap_id << ", " + << "clone_end_snap_id=" << clone_end_snap_id << ", " + << "end_dst_snap_id=" << end_dst_snap_id << ", " + << "diff=" << diff << ", " + << "end_size=" << end_size << ", " + << "exists=" << exists << dendl; + + m_zero_interval[end_src_snap_id] = {}; + + if (exists || prev_exists) { + // clip diff to size of object (in case it was truncated) + if (end_size < prev_end_size) { + interval_set<uint64_t> trunc; + trunc.insert(end_size, prev_end_size); + trunc.intersection_of(diff); + diff.subtract(trunc); + ldout(m_cct, 20) << "clearing truncate diff: " << trunc << dendl; + } + + if (exists) { + // reads should be issued against the newest (existing) snapshot within + // the associated snapshot object clone. writes should be issued + // against the oldest snapshot in the snap_map. + ceph_assert(clone_end_snap_id >= end_src_snap_id); + if (clone_end_snap_id > src_copy_point_snap_id) { + // do not read past the copy point snapshot + clone_end_snap_id = src_copy_point_snap_id; + } + } + + for (auto &it : m_src_object_extents) { + auto dst_object_offset = it.first; + auto &e = it.second; + + if (e.object_no != m_src_ono) { + continue; + } + + interval_set<uint64_t> read_interval; + read_interval.insert(e.offset, e.length); + + if (end_size < prev_end_size) { + interval_set<uint64_t> zero_interval; + zero_interval.insert(end_size, prev_end_size - end_size); + zero_interval.intersection_of(read_interval); + if (!zero_interval.empty()) { + auto it = zero_interval.begin(); + auto offset = it.get_start() - e.offset; + m_zero_interval[end_src_snap_id].insert(dst_object_offset + offset, + it.get_len()); + ldout(m_cct, 20) << "extent " << e.offset << "~" << e.length + << " intersects truncation " << end_size << "~" + << prev_end_size - end_size << ", inserting zero " + << dst_object_offset + offset << "~" + << it.get_len() << dendl; + } + } + + // limit read interval to diff + read_interval.intersection_of(diff); + + ldout(m_cct, 20) << "src_object_extent: " << e.offset << "~" << e.length + << ", dst_object_offset=" << dst_object_offset + << ", read: " << read_interval << dendl; + + ceph_assert(exists || read_interval.empty()); + + for (auto it = read_interval.begin(); it != read_interval.end(); + it++) { + ceph_assert(it.get_start() >= e.offset); + auto offset = it.get_start() - e.offset; + ldout(m_cct, 20) << "read/write op: " << it.get_start() << "~" + << it.get_len() << " dst: " + << dst_object_offset + offset << dendl; + m_read_ops[{end_src_snap_id, clone_end_snap_id}] + .emplace_back(COPY_OP_TYPE_WRITE, it.get_start(), + dst_object_offset + offset, it.get_len()); + } + } + } + + prev_end_size = end_size; + prev_exists = exists; + if (hide_parent && prev_exists && prev_end_size == 0) { + // hide parent + prev_end_size = m_src_image_ctx->layout.object_size; + } + start_src_snap_id = end_src_snap_id; + } + + for (auto &it : m_read_ops) { + m_read_snaps.push_back(it.first); + } +} + +template <typename I> +void ObjectCopyRequest<I>::compute_read_from_parent_ops( + io::Extents *parent_image_extents) { + assert(m_src_image_ctx->snap_lock.is_locked()); + assert(m_src_image_ctx->parent_lock.is_locked()); + + m_read_ops = {}; + m_zero_interval = {}; + parent_image_extents->clear(); + + if (m_src_image_ctx->parent == nullptr) { + ldout(m_cct, 20) << "no parent" << dendl; + return; + } + + size_t noent_count = 0; + for (auto &it : m_src_object_extents) { + if (it.second.noent) { + noent_count++; + } + } + + if (noent_count == 0) { + ldout(m_cct, 20) << "no extents need read from parent" << dendl; + return; + } + + if (noent_count == m_src_object_extents.size() && !m_flatten) { + ldout(m_cct, 20) << "reading all extents skipped when no flatten" + << dendl; + return; + } + + ldout(m_cct, 20) << dendl; + + auto src_snap_seq = m_snap_map.begin()->first; + + uint64_t parent_overlap; + int r = m_src_image_ctx->get_parent_overlap(src_snap_seq, &parent_overlap); + if (r < 0) { + ldout(m_cct, 5) << "failed getting parent overlap for snap_id: " + << src_snap_seq << ": " << cpp_strerror(r) << dendl; + return; + } + if (parent_overlap == 0) { + ldout(m_cct, 20) << "no parent overlap" << dendl; + return; + } + + for (auto &it : m_src_object_extents) { + auto dst_object_offset = it.first; + auto &e = it.second; + + if (!e.noent) { + continue; + } + + std::vector<std::pair<uint64_t, uint64_t>> image_extents; + Striper::extent_to_file(m_cct, &m_src_image_ctx->layout, e.object_no, + e.offset, e.length, image_extents); + + uint64_t overlap = m_src_image_ctx->prune_parent_extents(image_extents, + parent_overlap); + if (overlap == 0) { + ldout(m_cct, 20) << "no parent overlap for object_no " << e.object_no + << " extent " << e.offset << "~" << e.length << dendl; + continue; + } + + ldout(m_cct, 20) << "object_no " << e.object_no << " extent " << e.offset + << "~" << e.length << " overlap " << parent_overlap + << " parent extents " << image_extents << dendl; + + ceph_assert(image_extents.size() == 1); + + auto src_image_offset = image_extents.begin()->first; + auto length = image_extents.begin()->second; + m_read_ops[{src_snap_seq, 0}].emplace_back(COPY_OP_TYPE_WRITE, e.offset, + dst_object_offset, length); + m_read_ops[{src_snap_seq, 0}].rbegin()->src_extent_map[e.offset] = length; + parent_image_extents->emplace_back(src_image_offset, length); + } + + if (!parent_image_extents->empty()) { + m_dst_object_state[src_snap_seq] = OBJECT_EXISTS; + } +} + +template <typename I> +void ObjectCopyRequest<I>::merge_write_ops() { + ldout(m_cct, 20) << dendl; + + for (auto &it : m_zero_interval) { + m_dst_zero_interval[it.first].insert(it.second); + } + + for (auto &it : m_read_ops) { + auto src_snap_seq = it.first.first; + auto ©_ops = it.second; + for (auto ©_op : copy_ops) { + uint64_t src_offset = copy_op.src_offset; + uint64_t dst_offset = copy_op.dst_offset; + for (auto &e : copy_op.src_extent_map) { + uint64_t zero_len = e.first - src_offset; + if (zero_len > 0) { + ldout(m_cct, 20) << "src_snap_seq=" << src_snap_seq + << ", inserting zero " << dst_offset << "~" + << zero_len << dendl; + m_dst_zero_interval[src_snap_seq].insert(dst_offset, zero_len); + src_offset += zero_len; + dst_offset += zero_len; + } + copy_op.dst_extent_map[dst_offset] = e.second; + src_offset += e.second; + dst_offset += e.second; + } + if (dst_offset < copy_op.dst_offset + copy_op.length) { + uint64_t zero_len = copy_op.dst_offset + copy_op.length - dst_offset; + ldout(m_cct, 20) << "src_snap_seq=" << src_snap_seq + << ", inserting zero " << dst_offset << "~" + << zero_len << dendl; + m_dst_zero_interval[src_snap_seq].insert(dst_offset, zero_len); + } else { + ceph_assert(dst_offset == copy_op.dst_offset + copy_op.length); + } + m_write_ops[src_snap_seq].emplace_back(std::move(copy_op)); + } + } +} + +template <typename I> +void ObjectCopyRequest<I>::compute_zero_ops() { + ldout(m_cct, 20) << dendl; + + bool fast_diff = m_dst_image_ctx->test_features(RBD_FEATURE_FAST_DIFF); + uint64_t prev_end_size = 0; + + m_src_image_ctx->parent_lock.get_read(); + bool hide_parent = (m_src_image_ctx->parent != nullptr); + m_src_image_ctx->parent_lock.put_read(); + + for (auto &it : m_dst_zero_interval) { + auto src_snap_seq = it.first; + auto &zero_interval = it.second; + + auto snap_map_it = m_snap_map.find(src_snap_seq); + ceph_assert(snap_map_it != m_snap_map.end()); + auto dst_snap_seq = snap_map_it->second.front(); + + auto dst_may_exist_it = m_dst_object_may_exist.find(dst_snap_seq); + ceph_assert(dst_may_exist_it != m_dst_object_may_exist.end()); + if (!dst_may_exist_it->second && prev_end_size > 0) { + ldout(m_cct, 5) << "object DNE for snap_id: " << dst_snap_seq << dendl; + m_write_ops[src_snap_seq].emplace_back(COPY_OP_TYPE_REMOVE, 0, 0, 0); + prev_end_size = 0; + continue; + } + + if (hide_parent) { + RWLock::RLocker snap_locker(m_dst_image_ctx->snap_lock); + RWLock::RLocker parent_locker(m_dst_image_ctx->parent_lock); + uint64_t parent_overlap = 0; + int r = m_dst_image_ctx->get_parent_overlap(dst_snap_seq, &parent_overlap); + if (r < 0) { + ldout(m_cct, 5) << "failed getting parent overlap for snap_id: " + << dst_snap_seq << ": " << cpp_strerror(r) << dendl; + } + if (parent_overlap == 0) { + ldout(m_cct, 20) << "no parent overlap" << dendl; + hide_parent = false; + } else { + std::vector<std::pair<uint64_t, uint64_t>> image_extents; + Striper::extent_to_file(m_cct, &m_dst_image_ctx->layout, + m_dst_object_number, 0, + m_dst_image_ctx->layout.object_size, + image_extents); + uint64_t overlap = m_dst_image_ctx->prune_parent_extents(image_extents, + parent_overlap); + if (overlap == 0) { + ldout(m_cct, 20) << "no parent overlap" << dendl; + hide_parent = false; + } else if (src_snap_seq == m_dst_zero_interval.begin()->first) { + for (auto e : image_extents) { + prev_end_size += e.second; + } + ceph_assert(prev_end_size <= m_dst_image_ctx->layout.object_size); + } + } + } + + uint64_t end_size = prev_end_size; + + // update end_size if there are writes into higher offsets + auto iter = m_write_ops.find(src_snap_seq); + if (iter != m_write_ops.end()) { + for (auto ©_op : iter->second) { + for (auto &e : copy_op.dst_extent_map) { + end_size = std::max(end_size, e.first + e.second); + } + } + } + + for (auto z = zero_interval.begin(); z != zero_interval.end(); z++) { + if (z.get_start() + z.get_len() >= end_size) { + // zero interval at the object end + if (z.get_start() == 0 && hide_parent) { + m_write_ops[src_snap_seq] + .emplace_back(COPY_OP_TYPE_REMOVE_TRUNC, 0, 0, 0); + ldout(m_cct, 20) << "COPY_OP_TYPE_REMOVE_TRUNC" << dendl; + } else if (z.get_start() < prev_end_size) { + if (z.get_start() == 0) { + m_write_ops[src_snap_seq] + .emplace_back(COPY_OP_TYPE_REMOVE, 0, 0, 0); + ldout(m_cct, 20) << "COPY_OP_TYPE_REMOVE" << dendl; + } else { + m_write_ops[src_snap_seq] + .emplace_back(COPY_OP_TYPE_TRUNC, 0, z.get_start(), 0); + ldout(m_cct, 20) << "COPY_OP_TYPE_TRUNC " << z.get_start() << dendl; + } + } + end_size = std::min(end_size, z.get_start()); + } else { + // zero interval inside the object + m_write_ops[src_snap_seq] + .emplace_back(COPY_OP_TYPE_ZERO, 0, z.get_start(), z.get_len()); + ldout(m_cct, 20) << "COPY_OP_TYPE_ZERO " << z.get_start() << "~" + << z.get_len() << dendl; + } + } + ldout(m_cct, 20) << "src_snap_seq=" << src_snap_seq << ", end_size=" + << end_size << dendl; + if (end_size > 0 || hide_parent) { + m_dst_object_state[src_snap_seq] = OBJECT_EXISTS; + if (fast_diff && end_size == prev_end_size && + m_write_ops[src_snap_seq].empty()) { + m_dst_object_state[src_snap_seq] = OBJECT_EXISTS_CLEAN; + } + } + prev_end_size = end_size; + } +} + +template <typename I> +void ObjectCopyRequest<I>::finish(int r) { + ldout(m_cct, 20) << "r=" << r << dendl; + + // ensure IoCtxs are closed prior to proceeding + auto on_finish = m_on_finish; + + m_src_async_op->finish_op(); + delete m_src_async_op; + delete this; + + on_finish->complete(r); +} + +template <typename I> +void ObjectCopyRequest<I>::compute_dst_object_may_exist() { + RWLock::RLocker snap_locker(m_dst_image_ctx->snap_lock); + + auto snap_ids = m_dst_image_ctx->snaps; + snap_ids.push_back(CEPH_NOSNAP); + + for (auto snap_id : snap_ids) { + m_dst_object_may_exist[snap_id] = + (m_dst_object_number < m_dst_image_ctx->get_object_count(snap_id)); + } +} + +} // namespace deep_copy +} // namespace librbd + +template class librbd::deep_copy::ObjectCopyRequest<librbd::ImageCtx>; diff --git a/src/librbd/deep_copy/ObjectCopyRequest.h b/src/librbd/deep_copy/ObjectCopyRequest.h new file mode 100644 index 00000000..a0445f3b --- /dev/null +++ b/src/librbd/deep_copy/ObjectCopyRequest.h @@ -0,0 +1,210 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_DEEP_COPY_OBJECT_COPY_REQUEST_H +#define CEPH_LIBRBD_DEEP_COPY_OBJECT_COPY_REQUEST_H + +#include "include/int_types.h" +#include "include/interval_set.h" +#include "include/rados/librados.hpp" +#include "common/snap_types.h" +#include "librbd/ImageCtx.h" +#include "librbd/deep_copy/Types.h" +#include "librbd/io/Types.h" +#include <list> +#include <map> +#include <string> + +class Context; +class RWLock; + +namespace librbd { + +namespace io { class AsyncOperation; } + +namespace deep_copy { + +template <typename ImageCtxT = librbd::ImageCtx> +class ObjectCopyRequest { +public: + static ObjectCopyRequest* create(ImageCtxT *src_image_ctx, + ImageCtxT *dst_image_ctx, + librados::snap_t src_snap_id_start, + librados::snap_t dst_snap_id_start, + const SnapMap &snap_map, + uint64_t object_number, bool flatten, + Context *on_finish) { + return new ObjectCopyRequest(src_image_ctx, dst_image_ctx, + src_snap_id_start, dst_snap_id_start, snap_map, + object_number, flatten, on_finish); + } + + ObjectCopyRequest(ImageCtxT *src_image_ctx, ImageCtxT *dst_image_ctx, + librados::snap_t src_snap_id_start, + librados::snap_t dst_snap_id_start, const SnapMap &snap_map, + uint64_t object_number, bool flatten, Context *on_finish); + + void send(); + + // testing support + inline librados::IoCtx &get_src_io_ctx() { + return m_src_io_ctx; + } + inline librados::IoCtx &get_dst_io_ctx() { + return m_dst_io_ctx; + } + +private: + /** + * @verbatim + * + * <start> + * | /----------------------\ + * | | | + * v v | (repeat for each src object) + * LIST_SNAPS < * * * | + * | * (-ENOENT and snap set stale) + * | * * * * * * | + * | * /-----------\ | + * | * | | (repeat for each snapshot) + * v * v | | + * READ_OBJECT ---------/ | + * | | | + * | \----------------------/ + * v + * READ_FROM_PARENT (skip if not needed) + * | + * | /-----------\ + * | | | (repeat for each snapshot) + * v v | + * WRITE_OBJECT --------/ + * | + * | /-----------\ + * | | | (repeat for each snapshot) + * v v | + * UPDATE_OBJECT_MAP ---/ (skip if object + * | map disabled) + * v + * <finish> + * + * @endverbatim + */ + + struct SrcObjectExtent { + uint64_t object_no = 0; + uint64_t offset = 0; + uint64_t length = 0; + bool noent = false; + + SrcObjectExtent() { + } + SrcObjectExtent(uint64_t object_no, uint64_t offset, uint64_t length) + : object_no(object_no), offset(offset), length(length) { + } + }; + + typedef std::map<uint64_t, SrcObjectExtent> SrcObjectExtents; + + enum CopyOpType { + COPY_OP_TYPE_WRITE, + COPY_OP_TYPE_ZERO, + COPY_OP_TYPE_TRUNC, + COPY_OP_TYPE_REMOVE, + COPY_OP_TYPE_REMOVE_TRUNC, + }; + + typedef std::map<uint64_t, uint64_t> ExtentMap; + + struct CopyOp { + CopyOp(CopyOpType type, uint64_t src_offset, uint64_t dst_offset, + uint64_t length) + : type(type), src_offset(src_offset), dst_offset(dst_offset), + length(length) { + } + + CopyOpType type; + uint64_t src_offset; + uint64_t dst_offset; + uint64_t length; + + ExtentMap src_extent_map; + ExtentMap dst_extent_map; + bufferlist out_bl; + }; + + typedef std::list<CopyOp> CopyOps; + typedef std::pair<librados::snap_t, librados::snap_t> WriteReadSnapIds; + typedef std::map<librados::snap_t, uint8_t> SnapObjectStates; + typedef std::map<librados::snap_t, std::map<uint64_t, uint64_t>> SnapObjectSizes; + + ImageCtxT *m_src_image_ctx; + ImageCtxT *m_dst_image_ctx; + CephContext *m_cct; + librados::snap_t m_src_snap_id_start; + librados::snap_t m_dst_snap_id_start; + SnapMap m_snap_map; + uint64_t m_dst_object_number; + bool m_flatten; + Context *m_on_finish; + + decltype(m_src_image_ctx->data_ctx) m_src_io_ctx; + decltype(m_dst_image_ctx->data_ctx) m_dst_io_ctx; + std::string m_dst_oid; + + std::set<uint64_t> m_src_objects; + uint64_t m_src_ono; + std::string m_src_oid; + SrcObjectExtents m_src_object_extents; + librados::snap_set_t m_snap_set; + int m_snap_ret = 0; + bool m_retry_missing_read = false; + librados::snap_set_t m_retry_snap_set; + bool m_read_whole_object = false; + + std::map<WriteReadSnapIds, CopyOps> m_read_ops; + std::list<WriteReadSnapIds> m_read_snaps; + std::map<librados::snap_t, CopyOps> m_write_ops; + std::map<librados::snap_t, interval_set<uint64_t>> m_zero_interval; + std::map<librados::snap_t, interval_set<uint64_t>> m_dst_zero_interval; + std::map<librados::snap_t, uint8_t> m_dst_object_state; + std::map<librados::snap_t, bool> m_dst_object_may_exist; + bufferlist m_read_from_parent_data; + + io::AsyncOperation* m_src_async_op = nullptr; + + void send_list_snaps(); + void handle_list_snaps(int r); + + void send_read_object(); + void handle_read_object(int r); + + void send_read_from_parent(); + void handle_read_from_parent(int r); + + void send_write_object(); + void handle_write_object(int r); + + void send_update_object_map(); + void handle_update_object_map(int r); + + Context *start_lock_op(RWLock &owner_lock, int* r); + + uint64_t src_to_dst_object_offset(uint64_t objectno, uint64_t offset); + + void compute_src_object_extents(); + void compute_read_ops(); + void compute_read_from_parent_ops(io::Extents *image_extents); + void merge_write_ops(); + void compute_zero_ops(); + + void compute_dst_object_may_exist(); + + void finish(int r); +}; + +} // namespace deep_copy +} // namespace librbd + +extern template class librbd::deep_copy::ObjectCopyRequest<librbd::ImageCtx>; + +#endif // CEPH_LIBRBD_DEEP_COPY_OBJECT_COPY_REQUEST_H diff --git a/src/librbd/deep_copy/SetHeadRequest.cc b/src/librbd/deep_copy/SetHeadRequest.cc new file mode 100644 index 00000000..5670ba07 --- /dev/null +++ b/src/librbd/deep_copy/SetHeadRequest.cc @@ -0,0 +1,224 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "SetHeadRequest.h" +#include "common/errno.h" +#include "cls/rbd/cls_rbd_client.h" +#include "cls/rbd/cls_rbd_types.h" +#include "librbd/ExclusiveLock.h" +#include "librbd/Utils.h" +#include "librbd/image/AttachParentRequest.h" +#include "librbd/image/DetachParentRequest.h" + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::deep_copy::SetHeadRequest: " \ + << this << " " << __func__ << ": " + +namespace librbd { +namespace deep_copy { + +using librbd::util::create_context_callback; +using librbd::util::create_rados_callback; + +template <typename I> +SetHeadRequest<I>::SetHeadRequest(I *image_ctx, uint64_t size, + const cls::rbd::ParentImageSpec &spec, + uint64_t parent_overlap, + Context *on_finish) + : m_image_ctx(image_ctx), m_size(size), m_parent_spec(spec), + m_parent_overlap(parent_overlap), m_on_finish(on_finish), + m_cct(image_ctx->cct) { + ceph_assert(m_parent_overlap <= m_size); +} + +template <typename I> +void SetHeadRequest<I>::send() { + send_set_size(); +} + +template <typename I> +void SetHeadRequest<I>::send_set_size() { + m_image_ctx->snap_lock.get_read(); + if (m_image_ctx->size == m_size) { + m_image_ctx->snap_lock.put_read(); + send_detach_parent(); + return; + } + m_image_ctx->snap_lock.put_read(); + + ldout(m_cct, 20) << dendl; + + // Change the image size on disk so that the snapshot picks up + // the expected size. We can do this because the last snapshot + // we process is the sync snapshot which was created to match the + // image size. We also don't need to worry about trimming because + // we track the highest possible object number within the sync record + librados::ObjectWriteOperation op; + librbd::cls_client::set_size(&op, m_size); + + int r; + auto finish_op_ctx = start_lock_op(&r); + if (finish_op_ctx == nullptr) { + lderr(m_cct) << "lost exclusive lock" << dendl; + finish(r); + return; + } + + auto ctx = new FunctionContext([this, finish_op_ctx](int r) { + handle_set_size(r); + finish_op_ctx->complete(0); + }); + librados::AioCompletion *comp = create_rados_callback(ctx); + r = m_image_ctx->md_ctx.aio_operate(m_image_ctx->header_oid, comp, &op); + ceph_assert(r == 0); + comp->release(); +} + +template <typename I> +void SetHeadRequest<I>::handle_set_size(int r) { + ldout(m_cct, 20) << "r=" << r << dendl; + + if (r < 0) { + lderr(m_cct) << "failed to update image size: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + { + // adjust in-memory image size now that it's updated on disk + RWLock::WLocker snap_locker(m_image_ctx->snap_lock); + if (m_image_ctx->size > m_size) { + RWLock::WLocker parent_locker(m_image_ctx->parent_lock); + if (m_image_ctx->parent_md.spec.pool_id != -1 && + m_image_ctx->parent_md.overlap > m_size) { + m_image_ctx->parent_md.overlap = m_size; + } + } + m_image_ctx->size = m_size; + } + + send_detach_parent(); +} + +template <typename I> +void SetHeadRequest<I>::send_detach_parent() { + m_image_ctx->parent_lock.get_read(); + if (m_image_ctx->parent_md.spec.pool_id == -1 || + (m_image_ctx->parent_md.spec == m_parent_spec && + m_image_ctx->parent_md.overlap == m_parent_overlap)) { + m_image_ctx->parent_lock.put_read(); + send_attach_parent(); + return; + } + m_image_ctx->parent_lock.put_read(); + + ldout(m_cct, 20) << dendl; + + int r; + auto finish_op_ctx = start_lock_op(&r); + if (finish_op_ctx == nullptr) { + lderr(m_cct) << "lost exclusive lock" << dendl; + finish(r); + return; + } + + auto ctx = new FunctionContext([this, finish_op_ctx](int r) { + handle_detach_parent(r); + finish_op_ctx->complete(0); + }); + auto req = image::DetachParentRequest<I>::create(*m_image_ctx, ctx); + req->send(); +} + +template <typename I> +void SetHeadRequest<I>::handle_detach_parent(int r) { + ldout(m_cct, 20) << "r=" << r << dendl; + + if (r < 0) { + lderr(m_cct) << "failed to remove parent: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + { + // adjust in-memory parent now that it's updated on disk + RWLock::WLocker parent_locker(m_image_ctx->parent_lock); + m_image_ctx->parent_md.spec = {}; + m_image_ctx->parent_md.overlap = 0; + } + + send_attach_parent(); +} + +template <typename I> +void SetHeadRequest<I>::send_attach_parent() { + m_image_ctx->parent_lock.get_read(); + if (m_image_ctx->parent_md.spec == m_parent_spec && + m_image_ctx->parent_md.overlap == m_parent_overlap) { + m_image_ctx->parent_lock.put_read(); + finish(0); + return; + } + m_image_ctx->parent_lock.put_read(); + + ldout(m_cct, 20) << dendl; + + int r; + auto finish_op_ctx = start_lock_op(&r); + if (finish_op_ctx == nullptr) { + lderr(m_cct) << "lost exclusive lock" << dendl; + finish(r); + return; + } + + auto ctx = new FunctionContext([this, finish_op_ctx](int r) { + handle_attach_parent(r); + finish_op_ctx->complete(0); + }); + auto req = image::AttachParentRequest<I>::create( + *m_image_ctx, m_parent_spec, m_parent_overlap, false, ctx); + req->send(); +} + +template <typename I> +void SetHeadRequest<I>::handle_attach_parent(int r) { + ldout(m_cct, 20) << "r=" << r << dendl; + + if (r < 0) { + lderr(m_cct) << "failed to attach parent: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + { + // adjust in-memory parent now that it's updated on disk + RWLock::WLocker parent_locker(m_image_ctx->parent_lock); + m_image_ctx->parent_md.spec = m_parent_spec; + m_image_ctx->parent_md.overlap = m_parent_overlap; + } + + finish(0); +} + +template <typename I> +Context *SetHeadRequest<I>::start_lock_op(int* r) { + RWLock::RLocker owner_locker(m_image_ctx->owner_lock); + if (m_image_ctx->exclusive_lock == nullptr) { + return new FunctionContext([](int r) {}); + } + return m_image_ctx->exclusive_lock->start_op(r); +} + +template <typename I> +void SetHeadRequest<I>::finish(int r) { + ldout(m_cct, 20) << "r=" << r << dendl; + + m_on_finish->complete(r); + delete this; +} + +} // namespace deep_copy +} // namespace librbd + +template class librbd::deep_copy::SetHeadRequest<librbd::ImageCtx>; diff --git a/src/librbd/deep_copy/SetHeadRequest.h b/src/librbd/deep_copy/SetHeadRequest.h new file mode 100644 index 00000000..9a17c9fd --- /dev/null +++ b/src/librbd/deep_copy/SetHeadRequest.h @@ -0,0 +1,87 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_DEEP_COPY_SET_HEAD_REQUEST_H +#define CEPH_LIBRBD_DEEP_COPY_SET_HEAD_REQUEST_H + +#include "include/int_types.h" +#include "include/rados/librados.hpp" +#include "common/snap_types.h" +#include "librbd/ImageCtx.h" +#include "librbd/Types.h" +#include <map> +#include <set> +#include <string> +#include <tuple> + +class Context; + +namespace librbd { +namespace deep_copy { + +template <typename ImageCtxT = librbd::ImageCtx> +class SetHeadRequest { +public: + static SetHeadRequest* create(ImageCtxT *image_ctx, uint64_t size, + const cls::rbd::ParentImageSpec &parent_spec, + uint64_t parent_overlap, + Context *on_finish) { + return new SetHeadRequest(image_ctx, size, parent_spec, parent_overlap, + on_finish); + } + + SetHeadRequest(ImageCtxT *image_ctx, uint64_t size, + const cls::rbd::ParentImageSpec &parent_spec, + uint64_t parent_overlap, Context *on_finish); + + void send(); + +private: + /** + * @verbatim + * + * <start> + * | + * v (skip if not needed) + * SET_SIZE + * | + * v (skip if not needed) + * DETACH_PARENT + * | + * v (skip if not needed) + * ATTACH_PARENT + * | + * v + * <finish> + * + * @endverbatim + */ + + ImageCtxT *m_image_ctx; + uint64_t m_size; + cls::rbd::ParentImageSpec m_parent_spec; + uint64_t m_parent_overlap; + Context *m_on_finish; + + CephContext *m_cct; + + void send_set_size(); + void handle_set_size(int r); + + void send_detach_parent(); + void handle_detach_parent(int r); + + void send_attach_parent(); + void handle_attach_parent(int r); + + Context *start_lock_op(int* r); + + void finish(int r); +}; + +} // namespace deep_copy +} // namespace librbd + +extern template class librbd::deep_copy::SetHeadRequest<librbd::ImageCtx>; + +#endif // CEPH_LIBRBD_DEEP_COPY_SET_HEAD_REQUEST_H diff --git a/src/librbd/deep_copy/SnapshotCopyRequest.cc b/src/librbd/deep_copy/SnapshotCopyRequest.cc new file mode 100644 index 00000000..d1375a42 --- /dev/null +++ b/src/librbd/deep_copy/SnapshotCopyRequest.cc @@ -0,0 +1,730 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "SnapshotCopyRequest.h" +#include "SetHeadRequest.h" +#include "SnapshotCreateRequest.h" +#include "common/errno.h" +#include "common/WorkQueue.h" +#include "librbd/ExclusiveLock.h" +#include "librbd/ObjectMap.h" +#include "librbd/Operations.h" +#include "librbd/Utils.h" +#include "osdc/Striper.h" + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::deep_copy::SnapshotCopyRequest: " \ + << this << " " << __func__ << ": " + +namespace librbd { +namespace deep_copy { + +namespace { + +template <typename I> +const std::string &get_snapshot_name(I *image_ctx, librados::snap_t snap_id) { + auto snap_it = std::find_if(image_ctx->snap_ids.begin(), + image_ctx->snap_ids.end(), + [snap_id]( + const std::pair< + std::pair<cls::rbd::SnapshotNamespace, + std::string>, + librados::snap_t> &pair) { + return pair.second == snap_id; + }); + ceph_assert(snap_it != image_ctx->snap_ids.end()); + return snap_it->first.second; +} + +} // anonymous namespace + +using librbd::util::create_context_callback; +using librbd::util::unique_lock_name; + +template <typename I> +SnapshotCopyRequest<I>::SnapshotCopyRequest(I *src_image_ctx, + I *dst_image_ctx, + librados::snap_t src_snap_id_start, + librados::snap_t src_snap_id_end, + librados::snap_t dst_snap_id_start, + bool flatten, ContextWQ *work_queue, + SnapSeqs *snap_seqs, + Context *on_finish) + : RefCountedObject(dst_image_ctx->cct, 1), m_src_image_ctx(src_image_ctx), + m_dst_image_ctx(dst_image_ctx), m_src_snap_id_start(src_snap_id_start), + m_src_snap_id_end(src_snap_id_end), m_dst_snap_id_start(dst_snap_id_start), + m_flatten(flatten), m_work_queue(work_queue), m_snap_seqs_result(snap_seqs), + m_snap_seqs(*snap_seqs), m_on_finish(on_finish), m_cct(dst_image_ctx->cct), + m_lock(unique_lock_name("SnapshotCopyRequest::m_lock", this)) { + ceph_assert((m_src_snap_id_start == 0 && m_dst_snap_id_start == 0) || + (m_src_snap_id_start > 0 && m_dst_snap_id_start > 0)); + + // snap ids ordered from oldest to newest + m_src_image_ctx->snap_lock.get_read(); + m_src_snap_ids.insert(src_image_ctx->snaps.begin(), + src_image_ctx->snaps.end()); + m_src_image_ctx->snap_lock.put_read(); + + m_dst_image_ctx->snap_lock.get_read(); + m_dst_snap_ids.insert(dst_image_ctx->snaps.begin(), + dst_image_ctx->snaps.end()); + m_dst_image_ctx->snap_lock.put_read(); + + if (m_src_snap_id_end != CEPH_NOSNAP) { + m_src_snap_ids.erase(m_src_snap_ids.upper_bound(m_src_snap_id_end), + m_src_snap_ids.end()); + } +} + +template <typename I> +void SnapshotCopyRequest<I>::send() { + cls::rbd::ParentImageSpec src_parent_spec; + int r = validate_parent(m_src_image_ctx, &src_parent_spec); + if (r < 0) { + lderr(m_cct) << "source image parent spec mismatch" << dendl; + error(r); + return; + } + + r = validate_parent(m_dst_image_ctx, &m_dst_parent_spec); + if (r < 0) { + lderr(m_cct) << "destination image parent spec mismatch" << dendl; + error(r); + return; + } + + send_snap_unprotect(); +} + +template <typename I> +void SnapshotCopyRequest<I>::cancel() { + Mutex::Locker locker(m_lock); + + ldout(m_cct, 20) << dendl; + m_canceled = true; +} + +template <typename I> +void SnapshotCopyRequest<I>::send_snap_unprotect() { + + SnapIdSet::iterator snap_id_it = m_dst_snap_ids.begin(); + if (m_prev_snap_id != CEPH_NOSNAP) { + snap_id_it = m_dst_snap_ids.upper_bound(m_prev_snap_id); + } else if (m_dst_snap_id_start > 0) { + snap_id_it = m_dst_snap_ids.upper_bound(m_dst_snap_id_start); + } + + for (; snap_id_it != m_dst_snap_ids.end(); ++snap_id_it) { + librados::snap_t dst_snap_id = *snap_id_it; + + m_dst_image_ctx->snap_lock.get_read(); + + bool dst_unprotected; + int r = m_dst_image_ctx->is_snap_unprotected(dst_snap_id, &dst_unprotected); + if (r < 0) { + lderr(m_cct) << "failed to retrieve destination snap unprotect status: " + << cpp_strerror(r) << dendl; + m_dst_image_ctx->snap_lock.put_read(); + finish(r); + return; + } + m_dst_image_ctx->snap_lock.put_read(); + + if (dst_unprotected) { + // snap is already unprotected -- check next snap + continue; + } + + // if destination snapshot is protected and (1) it isn't in our mapping + // table, or (2) the source snapshot isn't protected, unprotect it + auto snap_seq_it = std::find_if( + m_snap_seqs.begin(), m_snap_seqs.end(), + [dst_snap_id](const SnapSeqs::value_type& pair) { + return pair.second == dst_snap_id; + }); + + if (snap_seq_it != m_snap_seqs.end()) { + m_src_image_ctx->snap_lock.get_read(); + bool src_unprotected; + r = m_src_image_ctx->is_snap_unprotected(snap_seq_it->first, + &src_unprotected); + ldout(m_cct, 20) << "m_src_image_ctx->is_snap_unprotected(" + << snap_seq_it->first << "): r=" << r + << ", src_unprotected=" << src_unprotected << dendl; + if (r == -ENOENT) { + src_unprotected = true; + r = 0; + } + if (r < 0) { + lderr(m_cct) << "failed to retrieve source snap unprotect status: " + << cpp_strerror(r) << dendl; + m_src_image_ctx->snap_lock.put_read(); + finish(r); + return; + } + m_src_image_ctx->snap_lock.put_read(); + + if (src_unprotected) { + // source is unprotected -- unprotect destination snap + break; + } + } else { + // source snapshot doesn't exist -- unprotect destination snap + break; + } + } + + if (snap_id_it == m_dst_snap_ids.end()) { + // no destination snapshots to unprotect + m_prev_snap_id = CEPH_NOSNAP; + send_snap_remove(); + return; + } + + m_prev_snap_id = *snap_id_it; + m_snap_name = get_snapshot_name(m_dst_image_ctx, m_prev_snap_id); + + ldout(m_cct, 20) << "snap_name=" << m_snap_name << ", " + << "snap_id=" << m_prev_snap_id << dendl; + + int r; + auto finish_op_ctx = start_lock_op(&r); + if (finish_op_ctx == nullptr) { + lderr(m_cct) << "lost exclusive lock" << dendl; + finish(r); + return; + } + + auto ctx = new FunctionContext([this, finish_op_ctx](int r) { + handle_snap_unprotect(r); + finish_op_ctx->complete(0); + }); + RWLock::RLocker owner_locker(m_dst_image_ctx->owner_lock); + m_dst_image_ctx->operations->execute_snap_unprotect( + cls::rbd::UserSnapshotNamespace(), m_snap_name.c_str(), ctx); +} + +template <typename I> +void SnapshotCopyRequest<I>::handle_snap_unprotect(int r) { + ldout(m_cct, 20) << "r=" << r << dendl; + + if (r < 0) { + lderr(m_cct) << "failed to unprotect snapshot '" << m_snap_name << "': " + << cpp_strerror(r) << dendl; + finish(r); + return; + } + + { + // avoid the need to refresh to delete the newly unprotected snapshot + RWLock::RLocker snap_locker(m_dst_image_ctx->snap_lock); + auto snap_info_it = m_dst_image_ctx->snap_info.find(m_prev_snap_id); + if (snap_info_it != m_dst_image_ctx->snap_info.end()) { + snap_info_it->second.protection_status = + RBD_PROTECTION_STATUS_UNPROTECTED; + } + } + + if (handle_cancellation()) { + return; + } + + send_snap_unprotect(); +} + +template <typename I> +void SnapshotCopyRequest<I>::send_snap_remove() { + SnapIdSet::iterator snap_id_it = m_dst_snap_ids.begin(); + if (m_prev_snap_id != CEPH_NOSNAP) { + snap_id_it = m_dst_snap_ids.upper_bound(m_prev_snap_id); + } else if (m_dst_snap_id_start > 0) { + snap_id_it = m_dst_snap_ids.upper_bound(m_dst_snap_id_start); + } + + for (; snap_id_it != m_dst_snap_ids.end(); ++snap_id_it) { + librados::snap_t dst_snap_id = *snap_id_it; + + cls::rbd::SnapshotNamespace snap_namespace; + m_dst_image_ctx->snap_lock.get_read(); + int r = m_dst_image_ctx->get_snap_namespace(dst_snap_id, &snap_namespace); + m_dst_image_ctx->snap_lock.put_read(); + if (r < 0) { + lderr(m_cct) << "failed to retrieve destination snap namespace: " + << m_snap_name << dendl; + finish(r); + return; + } + + if (boost::get<cls::rbd::UserSnapshotNamespace>(&snap_namespace) == + nullptr) { + continue; + } + + // if the destination snapshot isn't in our mapping table, remove it + auto snap_seq_it = std::find_if( + m_snap_seqs.begin(), m_snap_seqs.end(), + [dst_snap_id](const SnapSeqs::value_type& pair) { + return pair.second == dst_snap_id; + }); + + if (snap_seq_it == m_snap_seqs.end()) { + break; + } + } + + if (snap_id_it == m_dst_snap_ids.end()) { + // no destination snapshots to delete + m_prev_snap_id = CEPH_NOSNAP; + send_snap_create(); + return; + } + + m_prev_snap_id = *snap_id_it; + m_snap_name = get_snapshot_name(m_dst_image_ctx, m_prev_snap_id); + + ldout(m_cct, 20) << "" + << "snap_name=" << m_snap_name << ", " + << "snap_id=" << m_prev_snap_id << dendl; + + int r; + auto finish_op_ctx = start_lock_op(&r); + if (finish_op_ctx == nullptr) { + lderr(m_cct) << "lost exclusive lock" << dendl; + finish(r); + return; + } + + auto ctx = new FunctionContext([this, finish_op_ctx](int r) { + handle_snap_remove(r); + finish_op_ctx->complete(0); + }); + RWLock::RLocker owner_locker(m_dst_image_ctx->owner_lock); + m_dst_image_ctx->operations->execute_snap_remove( + cls::rbd::UserSnapshotNamespace(), m_snap_name.c_str(), ctx); +} + +template <typename I> +void SnapshotCopyRequest<I>::handle_snap_remove(int r) { + ldout(m_cct, 20) << "r=" << r << dendl; + + if (r < 0) { + lderr(m_cct) << "failed to remove snapshot '" << m_snap_name << "': " + << cpp_strerror(r) << dendl; + finish(r); + return; + } + if (handle_cancellation()) { + return; + } + + send_snap_remove(); +} + +template <typename I> +void SnapshotCopyRequest<I>::send_snap_create() { + SnapIdSet::iterator snap_id_it = m_src_snap_ids.begin(); + if (m_prev_snap_id != CEPH_NOSNAP) { + snap_id_it = m_src_snap_ids.upper_bound(m_prev_snap_id); + } else if (m_src_snap_id_start > 0) { + snap_id_it = m_src_snap_ids.upper_bound(m_src_snap_id_start); + } + + for (; snap_id_it != m_src_snap_ids.end(); ++snap_id_it) { + librados::snap_t src_snap_id = *snap_id_it; + + cls::rbd::SnapshotNamespace snap_namespace; + m_src_image_ctx->snap_lock.get_read(); + int r = m_src_image_ctx->get_snap_namespace(src_snap_id, &snap_namespace); + m_src_image_ctx->snap_lock.put_read(); + if (r < 0) { + lderr(m_cct) << "failed to retrieve source snap namespace: " + << m_snap_name << dendl; + finish(r); + return; + } + + if (m_snap_seqs.find(src_snap_id) == m_snap_seqs.end()) { + // the source snapshot is not in our mapping table, ... + if (boost::get<cls::rbd::UserSnapshotNamespace>(&snap_namespace) != + nullptr) { + // ... create it since it's a user snapshot + break; + } else if (src_snap_id == m_src_snap_id_end) { + // ... map it to destination HEAD since it's not a user snapshot that we + // will create (e.g. MirrorPrimarySnapshotNamespace) + m_snap_seqs[src_snap_id] = CEPH_NOSNAP; + } + } + } + + if (snap_id_it == m_src_snap_ids.end()) { + // no source snapshots to create + m_prev_snap_id = CEPH_NOSNAP; + send_snap_protect(); + return; + } + + m_prev_snap_id = *snap_id_it; + m_snap_name = get_snapshot_name(m_src_image_ctx, m_prev_snap_id); + + m_src_image_ctx->snap_lock.get_read(); + auto snap_info_it = m_src_image_ctx->snap_info.find(m_prev_snap_id); + if (snap_info_it == m_src_image_ctx->snap_info.end()) { + m_src_image_ctx->snap_lock.put_read(); + lderr(m_cct) << "failed to retrieve source snap info: " << m_snap_name + << dendl; + finish(-ENOENT); + return; + } + + uint64_t size = snap_info_it->second.size; + m_snap_namespace = snap_info_it->second.snap_namespace; + cls::rbd::ParentImageSpec parent_spec; + uint64_t parent_overlap = 0; + if (!m_flatten && snap_info_it->second.parent.spec.pool_id != -1) { + parent_spec = m_dst_parent_spec; + parent_overlap = snap_info_it->second.parent.overlap; + } + m_src_image_ctx->snap_lock.put_read(); + + ldout(m_cct, 20) << "snap_name=" << m_snap_name << ", " + << "snap_id=" << m_prev_snap_id << ", " + << "size=" << size << ", " + << "parent_info=[" + << "pool_id=" << parent_spec.pool_id << ", " + << "image_id=" << parent_spec.image_id << ", " + << "snap_id=" << parent_spec.snap_id << ", " + << "overlap=" << parent_overlap << "]" << dendl; + + int r; + Context *finish_op_ctx = start_lock_op(&r); + if (finish_op_ctx == nullptr) { + lderr(m_cct) << "lost exclusive lock" << dendl; + finish(r); + return; + } + + auto ctx = new FunctionContext([this, finish_op_ctx](int r) { + handle_snap_create(r); + finish_op_ctx->complete(0); + }); + SnapshotCreateRequest<I> *req = SnapshotCreateRequest<I>::create( + m_dst_image_ctx, m_snap_name, m_snap_namespace, size, parent_spec, + parent_overlap, ctx); + req->send(); +} + +template <typename I> +void SnapshotCopyRequest<I>::handle_snap_create(int r) { + ldout(m_cct, 20) << "r=" << r << dendl; + + if (r < 0) { + lderr(m_cct) << "failed to create snapshot '" << m_snap_name << "': " + << cpp_strerror(r) << dendl; + finish(r); + return; + } + if (handle_cancellation()) { + return; + } + + ceph_assert(m_prev_snap_id != CEPH_NOSNAP); + + auto snap_it = m_dst_image_ctx->snap_ids.find( + {cls::rbd::UserSnapshotNamespace(), m_snap_name}); + ceph_assert(snap_it != m_dst_image_ctx->snap_ids.end()); + librados::snap_t dst_snap_id = snap_it->second; + + ldout(m_cct, 20) << "mapping source snap id " << m_prev_snap_id << " to " + << dst_snap_id << dendl; + m_snap_seqs[m_prev_snap_id] = dst_snap_id; + + send_snap_create(); +} + +template <typename I> +void SnapshotCopyRequest<I>::send_snap_protect() { + SnapIdSet::iterator snap_id_it = m_src_snap_ids.begin(); + if (m_prev_snap_id != CEPH_NOSNAP) { + snap_id_it = m_src_snap_ids.upper_bound(m_prev_snap_id); + } else if (m_src_snap_id_start > 0) { + snap_id_it = m_src_snap_ids.upper_bound(m_src_snap_id_start); + } + + for (; snap_id_it != m_src_snap_ids.end(); ++snap_id_it) { + librados::snap_t src_snap_id = *snap_id_it; + + m_src_image_ctx->snap_lock.get_read(); + + bool src_protected; + int r = m_src_image_ctx->is_snap_protected(src_snap_id, &src_protected); + if (r < 0) { + lderr(m_cct) << "failed to retrieve source snap protect status: " + << cpp_strerror(r) << dendl; + m_src_image_ctx->snap_lock.put_read(); + finish(r); + return; + } + m_src_image_ctx->snap_lock.put_read(); + + if (!src_protected) { + // snap is not protected -- check next snap + continue; + } + + // if destination snapshot is not protected, protect it + auto snap_seq_it = m_snap_seqs.find(src_snap_id); + ceph_assert(snap_seq_it != m_snap_seqs.end()); + if (snap_seq_it->second == CEPH_NOSNAP) { + // implies src end snapshot is mapped to a non-copyable snapshot + ceph_assert(src_snap_id == m_src_snap_id_end); + break; + } + + m_dst_image_ctx->snap_lock.get_read(); + bool dst_protected; + r = m_dst_image_ctx->is_snap_protected(snap_seq_it->second, &dst_protected); + if (r < 0) { + lderr(m_cct) << "failed to retrieve destination snap protect status: " + << cpp_strerror(r) << dendl; + m_dst_image_ctx->snap_lock.put_read(); + finish(r); + return; + } + m_dst_image_ctx->snap_lock.put_read(); + + if (!dst_protected) { + break; + } + } + + if (snap_id_it == m_src_snap_ids.end()) { + // no destination snapshots to protect + m_prev_snap_id = CEPH_NOSNAP; + send_set_head(); + return; + } + + m_prev_snap_id = *snap_id_it; + m_snap_name = get_snapshot_name(m_src_image_ctx, m_prev_snap_id); + + ldout(m_cct, 20) << "snap_name=" << m_snap_name << ", " + << "snap_id=" << m_prev_snap_id << dendl; + + int r; + auto finish_op_ctx = start_lock_op(&r); + if (finish_op_ctx == nullptr) { + lderr(m_cct) << "lost exclusive lock" << dendl; + finish(r); + return; + } + + auto ctx = new FunctionContext([this, finish_op_ctx](int r) { + handle_snap_protect(r); + finish_op_ctx->complete(0); + }); + RWLock::RLocker owner_locker(m_dst_image_ctx->owner_lock); + m_dst_image_ctx->operations->execute_snap_protect( + cls::rbd::UserSnapshotNamespace(), m_snap_name.c_str(), ctx); +} + +template <typename I> +void SnapshotCopyRequest<I>::handle_snap_protect(int r) { + ldout(m_cct, 20) << "r=" << r << dendl; + + if (r < 0) { + lderr(m_cct) << "failed to protect snapshot '" << m_snap_name << "': " + << cpp_strerror(r) << dendl; + finish(r); + return; + } + if (handle_cancellation()) { + return; + } + + send_snap_protect(); +} + +template <typename I> +void SnapshotCopyRequest<I>::send_set_head() { + auto snap_seq_it = m_snap_seqs.find(m_src_snap_id_end); + if (m_src_snap_id_end != CEPH_NOSNAP && + (snap_seq_it == m_snap_seqs.end() || + snap_seq_it->second != CEPH_NOSNAP)) { + // not copying to src nor dst HEAD revision + finish(0); + return; + } + + ldout(m_cct, 20) << dendl; + + uint64_t size; + cls::rbd::ParentImageSpec parent_spec; + uint64_t parent_overlap = 0; + { + RWLock::RLocker src_locker(m_src_image_ctx->snap_lock); + auto snap_info_it = m_src_image_ctx->snap_info.find(m_src_snap_id_end); + if (snap_info_it != m_src_image_ctx->snap_info.end()) { + auto& snap_info = snap_info_it->second; + size = snap_info.size; + if (!m_flatten) { + parent_spec = snap_info.parent.spec; + parent_overlap = snap_info.parent.overlap; + } + } else { + size = m_src_image_ctx->size; + if (!m_flatten) { + parent_spec = m_src_image_ctx->parent_md.spec; + parent_overlap = m_src_image_ctx->parent_md.overlap; + } + } + } + + auto ctx = create_context_callback< + SnapshotCopyRequest<I>, &SnapshotCopyRequest<I>::handle_set_head>(this); + auto req = SetHeadRequest<I>::create(m_dst_image_ctx, size, parent_spec, + parent_overlap, ctx); + req->send(); +} + +template <typename I> +void SnapshotCopyRequest<I>::handle_set_head(int r) { + ldout(m_cct, 20) << "r=" << r << dendl; + + if (r < 0) { + lderr(m_cct) << "failed to set head: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + if (handle_cancellation()) { + return; + } + + send_resize_object_map(); +} + +template <typename I> +void SnapshotCopyRequest<I>::send_resize_object_map() { + int r = 0; + + if (m_dst_image_ctx->test_features(RBD_FEATURE_OBJECT_MAP)) { + RWLock::RLocker owner_locker(m_dst_image_ctx->owner_lock); + RWLock::RLocker snap_locker(m_dst_image_ctx->snap_lock); + + if (m_dst_image_ctx->object_map != nullptr && + Striper::get_num_objects(m_dst_image_ctx->layout, + m_dst_image_ctx->size) != + m_dst_image_ctx->object_map->size()) { + + ldout(m_cct, 20) << dendl; + + auto finish_op_ctx = start_lock_op(m_dst_image_ctx->owner_lock, &r); + if (finish_op_ctx != nullptr) { + auto ctx = new FunctionContext([this, finish_op_ctx](int r) { + handle_resize_object_map(r); + finish_op_ctx->complete(0); + }); + + m_dst_image_ctx->object_map->aio_resize(m_dst_image_ctx->size, + OBJECT_NONEXISTENT, ctx); + return; + } + + lderr(m_cct) << "lost exclusive lock" << dendl; + } + } + + finish(r); +} + +template <typename I> +void SnapshotCopyRequest<I>::handle_resize_object_map(int r) { + ldout(m_cct, 20) << "r=" << r << dendl; + + if (r < 0) { + lderr(m_cct) << "failed to resize object map: " << cpp_strerror(r) + << dendl; + finish(r); + return; + } + + finish(0); +} + +template <typename I> +bool SnapshotCopyRequest<I>::handle_cancellation() { + { + Mutex::Locker locker(m_lock); + if (!m_canceled) { + return false; + } + } + ldout(m_cct, 10) << "snapshot copy canceled" << dendl; + finish(-ECANCELED); + return true; +} + +template <typename I> +void SnapshotCopyRequest<I>::error(int r) { + ldout(m_cct, 20) << "r=" << r << dendl; + + m_work_queue->queue(new FunctionContext([this, r](int r1) { finish(r); })); +} + +template <typename I> +int SnapshotCopyRequest<I>::validate_parent(I *image_ctx, + cls::rbd::ParentImageSpec *spec) { + RWLock::RLocker owner_locker(image_ctx->owner_lock); + RWLock::RLocker snap_locker(image_ctx->snap_lock); + + // ensure source image's parent specs are still consistent + *spec = image_ctx->parent_md.spec; + for (auto &snap_info_pair : image_ctx->snap_info) { + auto &parent_spec = snap_info_pair.second.parent.spec; + if (parent_spec.pool_id == -1) { + continue; + } else if (spec->pool_id == -1) { + *spec = parent_spec; + continue; + } + + if (*spec != parent_spec) { + return -EINVAL; + } + } + return 0; +} + +template <typename I> +Context *SnapshotCopyRequest<I>::start_lock_op(int* r) { + RWLock::RLocker owner_locker(m_dst_image_ctx->owner_lock); + return start_lock_op(m_dst_image_ctx->owner_lock, r); +} + +template <typename I> +Context *SnapshotCopyRequest<I>::start_lock_op(RWLock &owner_lock, int* r) { + ceph_assert(m_dst_image_ctx->owner_lock.is_locked()); + if (m_dst_image_ctx->exclusive_lock == nullptr) { + return new FunctionContext([](int r) {}); + } + return m_dst_image_ctx->exclusive_lock->start_op(r); +} + +template <typename I> +void SnapshotCopyRequest<I>::finish(int r) { + ldout(m_cct, 20) << "r=" << r << dendl; + + if (r == 0) { + *m_snap_seqs_result = m_snap_seqs; + } + + m_on_finish->complete(r); + put(); +} + +} // namespace deep_copy +} // namespace librbd + +template class librbd::deep_copy::SnapshotCopyRequest<librbd::ImageCtx>; diff --git a/src/librbd/deep_copy/SnapshotCopyRequest.h b/src/librbd/deep_copy/SnapshotCopyRequest.h new file mode 100644 index 00000000..12068c9a --- /dev/null +++ b/src/librbd/deep_copy/SnapshotCopyRequest.h @@ -0,0 +1,148 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_DEEP_COPY_SNAPSHOT_COPY_REQUEST_H +#define CEPH_LIBRBD_DEEP_COPY_SNAPSHOT_COPY_REQUEST_H + +#include "include/int_types.h" +#include "include/rados/librados.hpp" +#include "common/RefCountedObj.h" +#include "common/snap_types.h" +#include "librbd/ImageCtx.h" +#include "librbd/Types.h" +#include <map> +#include <set> +#include <string> +#include <tuple> + +class Context; + +namespace librbd { +namespace deep_copy { + +template <typename ImageCtxT = librbd::ImageCtx> +class SnapshotCopyRequest : public RefCountedObject { +public: + static SnapshotCopyRequest* create(ImageCtxT *src_image_ctx, + ImageCtxT *dst_image_ctx, + librados::snap_t src_snap_id_start, + librados::snap_t src_snap_id_end, + librados::snap_t dst_snap_id_start, + bool flatten, ContextWQ *work_queue, + SnapSeqs *snap_seqs, Context *on_finish) { + return new SnapshotCopyRequest(src_image_ctx, dst_image_ctx, + src_snap_id_start, src_snap_id_end, + dst_snap_id_start, flatten, work_queue, + snap_seqs, on_finish); + } + + SnapshotCopyRequest(ImageCtxT *src_image_ctx, ImageCtxT *dst_image_ctx, + librados::snap_t src_snap_id_start, + librados::snap_t src_snap_id_end, + librados::snap_t dst_snap_id_start, + bool flatten, ContextWQ *work_queue, SnapSeqs *snap_seqs, + Context *on_finish); + + void send(); + void cancel(); + +private: + /** + * @verbatim + * + * <start> + * | + * | /-----------\ + * | | | + * v v | (repeat as needed) + * UNPROTECT_SNAP ----/ + * | + * | /-----------\ + * | | | + * v v | (repeat as needed) + * REMOVE_SNAP -------/ + * | + * | /-----------\ + * | | | + * v v | (repeat as needed) + * CREATE_SNAP -------/ + * | + * | /-----------\ + * | | | + * v v | (repeat as needed) + * PROTECT_SNAP ------/ + * | + * v + * SET_HEAD (skip if not needed) + * | + * v + * RESIZE_OBJECT_MAP (skip if not needed) + * | + * v + * <finish> + * + * @endverbatim + */ + + typedef std::set<librados::snap_t> SnapIdSet; + + ImageCtxT *m_src_image_ctx; + ImageCtxT *m_dst_image_ctx; + librados::snap_t m_src_snap_id_start; + librados::snap_t m_src_snap_id_end; + librados::snap_t m_dst_snap_id_start; + bool m_flatten; + ContextWQ *m_work_queue; + SnapSeqs *m_snap_seqs_result; + SnapSeqs m_snap_seqs; + Context *m_on_finish; + + CephContext *m_cct; + SnapIdSet m_src_snap_ids; + SnapIdSet m_dst_snap_ids; + librados::snap_t m_prev_snap_id = CEPH_NOSNAP; + + std::string m_snap_name; + cls::rbd::SnapshotNamespace m_snap_namespace; + + cls::rbd::ParentImageSpec m_dst_parent_spec; + + Mutex m_lock; + bool m_canceled = false; + + void send_snap_unprotect(); + void handle_snap_unprotect(int r); + + void send_snap_remove(); + void handle_snap_remove(int r); + + void send_snap_create(); + void handle_snap_create(int r); + + void send_snap_protect(); + void handle_snap_protect(int r); + + void send_set_head(); + void handle_set_head(int r); + + void send_resize_object_map(); + void handle_resize_object_map(int r); + + bool handle_cancellation(); + + void error(int r); + + int validate_parent(ImageCtxT *image_ctx, cls::rbd::ParentImageSpec *spec); + + Context *start_lock_op(int* r); + Context *start_lock_op(RWLock &owner_locki, int* r); + + void finish(int r); +}; + +} // namespace deep_copy +} // namespace librbd + +extern template class librbd::deep_copy::SnapshotCopyRequest<librbd::ImageCtx>; + +#endif // CEPH_LIBRBD_DEEP_COPY_SNAPSHOT_COPY_REQUEST_H diff --git a/src/librbd/deep_copy/SnapshotCreateRequest.cc b/src/librbd/deep_copy/SnapshotCreateRequest.cc new file mode 100644 index 00000000..0ff9c832 --- /dev/null +++ b/src/librbd/deep_copy/SnapshotCreateRequest.cc @@ -0,0 +1,187 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "SetHeadRequest.h" +#include "SnapshotCreateRequest.h" +#include "common/errno.h" +#include "cls/rbd/cls_rbd_client.h" +#include "cls/rbd/cls_rbd_types.h" +#include "librbd/ExclusiveLock.h" +#include "librbd/ObjectMap.h" +#include "librbd/Operations.h" +#include "librbd/Utils.h" +#include "osdc/Striper.h" + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::deep_copy::SnapshotCreateRequest: " \ + << this << " " << __func__ << ": " + +namespace librbd { +namespace deep_copy { + +using librbd::util::create_context_callback; +using librbd::util::create_rados_callback; + +template <typename I> +SnapshotCreateRequest<I>::SnapshotCreateRequest( + I *dst_image_ctx, const std::string &snap_name, + const cls::rbd::SnapshotNamespace &snap_namespace, + uint64_t size, const cls::rbd::ParentImageSpec &spec, + uint64_t parent_overlap, Context *on_finish) + : m_dst_image_ctx(dst_image_ctx), m_snap_name(snap_name), + m_snap_namespace(snap_namespace), m_size(size), + m_parent_spec(spec), m_parent_overlap(parent_overlap), + m_on_finish(on_finish), m_cct(dst_image_ctx->cct) { +} + +template <typename I> +void SnapshotCreateRequest<I>::send() { + send_set_head(); +} + +template <typename I> +void SnapshotCreateRequest<I>::send_set_head() { + ldout(m_cct, 20) << dendl; + + auto ctx = create_context_callback< + SnapshotCreateRequest<I>, &SnapshotCreateRequest<I>::handle_set_head>(this); + auto req = SetHeadRequest<I>::create(m_dst_image_ctx, m_size, m_parent_spec, + m_parent_overlap, ctx); + req->send(); +} + +template <typename I> +void SnapshotCreateRequest<I>::handle_set_head(int r) { + ldout(m_cct, 20) << "r=" << r << dendl; + + if (r < 0) { + lderr(m_cct) << "failed to set head: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + send_create_snap(); +} + +template <typename I> +void SnapshotCreateRequest<I>::send_create_snap() { + ldout(m_cct, 20) << "snap_name=" << m_snap_name << dendl; + + int r; + auto finish_op_ctx = start_lock_op(&r); + if (finish_op_ctx == nullptr) { + lderr(m_cct) << "lost exclusive lock" << dendl; + finish(r); + return; + } + + auto ctx = new FunctionContext([this, finish_op_ctx](int r) { + handle_create_snap(r); + finish_op_ctx->complete(0); + }); + RWLock::RLocker owner_locker(m_dst_image_ctx->owner_lock); + m_dst_image_ctx->operations->execute_snap_create(m_snap_namespace, + m_snap_name.c_str(), + ctx, + 0U, true); +} + +template <typename I> +void SnapshotCreateRequest<I>::handle_create_snap(int r) { + ldout(m_cct, 20) << "r=" << r << dendl; + + if (r < 0) { + lderr(m_cct) << "failed to create snapshot '" << m_snap_name << "': " + << cpp_strerror(r) << dendl; + finish(r); + return; + } + + send_create_object_map(); +} +template <typename I> +void SnapshotCreateRequest<I>::send_create_object_map() { + + if (!m_dst_image_ctx->test_features(RBD_FEATURE_OBJECT_MAP)) { + finish(0); + return; + } + + m_dst_image_ctx->snap_lock.get_read(); + auto snap_it = m_dst_image_ctx->snap_ids.find( + {cls::rbd::UserSnapshotNamespace(), m_snap_name}); + if (snap_it == m_dst_image_ctx->snap_ids.end()) { + lderr(m_cct) << "failed to locate snap: " << m_snap_name << dendl; + m_dst_image_ctx->snap_lock.put_read(); + finish(-ENOENT); + return; + } + librados::snap_t local_snap_id = snap_it->second; + m_dst_image_ctx->snap_lock.put_read(); + + std::string object_map_oid(librbd::ObjectMap<>::object_map_name( + m_dst_image_ctx->id, local_snap_id)); + uint64_t object_count = Striper::get_num_objects(m_dst_image_ctx->layout, + m_size); + ldout(m_cct, 20) << "object_map_oid=" << object_map_oid << ", " + << "object_count=" << object_count << dendl; + + // initialize an empty object map of the correct size (object sync + // will populate the object map) + librados::ObjectWriteOperation op; + librbd::cls_client::object_map_resize(&op, object_count, OBJECT_NONEXISTENT); + + int r; + auto finish_op_ctx = start_lock_op(&r); + if (finish_op_ctx == nullptr) { + lderr(m_cct) << "lost exclusive lock" << dendl; + finish(r); + return; + } + + auto ctx = new FunctionContext([this, finish_op_ctx](int r) { + handle_create_object_map(r); + finish_op_ctx->complete(0); + }); + librados::AioCompletion *comp = create_rados_callback(ctx); + r = m_dst_image_ctx->md_ctx.aio_operate(object_map_oid, comp, &op); + ceph_assert(r == 0); + comp->release(); +} + +template <typename I> +void SnapshotCreateRequest<I>::handle_create_object_map(int r) { + ldout(m_cct, 20) << "r=" << r << dendl; + + if (r < 0) { + lderr(m_cct) << "failed to create object map: " << cpp_strerror(r) + << dendl; + finish(r); + return; + } + + finish(0); +} + +template <typename I> +Context *SnapshotCreateRequest<I>::start_lock_op(int* r) { + RWLock::RLocker owner_locker(m_dst_image_ctx->owner_lock); + if (m_dst_image_ctx->exclusive_lock == nullptr) { + return new FunctionContext([](int r) {}); + } + return m_dst_image_ctx->exclusive_lock->start_op(r); +} + +template <typename I> +void SnapshotCreateRequest<I>::finish(int r) { + ldout(m_cct, 20) << "r=" << r << dendl; + + m_on_finish->complete(r); + delete this; +} + +} // namespace deep_copy +} // namespace librbd + +template class librbd::deep_copy::SnapshotCreateRequest<librbd::ImageCtx>; diff --git a/src/librbd/deep_copy/SnapshotCreateRequest.h b/src/librbd/deep_copy/SnapshotCreateRequest.h new file mode 100644 index 00000000..8c228063 --- /dev/null +++ b/src/librbd/deep_copy/SnapshotCreateRequest.h @@ -0,0 +1,95 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_DEEP_COPY_SNAPSHOT_CREATE_REQUEST_H +#define CEPH_LIBRBD_DEEP_COPY_SNAPSHOT_CREATE_REQUEST_H + +#include "include/int_types.h" +#include "include/rados/librados.hpp" +#include "common/snap_types.h" +#include "librbd/ImageCtx.h" +#include "librbd/Types.h" +#include <map> +#include <set> +#include <string> +#include <tuple> + +class Context; + +namespace librbd { +namespace deep_copy { + +template <typename ImageCtxT = librbd::ImageCtx> +class SnapshotCreateRequest { +public: + static SnapshotCreateRequest* create(ImageCtxT *dst_image_ctx, + const std::string &snap_name, + const cls::rbd::SnapshotNamespace &snap_namespace, + uint64_t size, + const cls::rbd::ParentImageSpec &parent_spec, + uint64_t parent_overlap, + Context *on_finish) { + return new SnapshotCreateRequest(dst_image_ctx, snap_name, snap_namespace, size, + parent_spec, parent_overlap, on_finish); + } + + SnapshotCreateRequest(ImageCtxT *dst_image_ctx, + const std::string &snap_name, + const cls::rbd::SnapshotNamespace &snap_namespace, + uint64_t size, + const cls::rbd::ParentImageSpec &parent_spec, + uint64_t parent_overlap, Context *on_finish); + + void send(); + +private: + /** + * @verbatim + * + * <start> + * | + * v + * SET_HEAD + * | + * v + * CREATE_SNAP + * | + * v (skip if not needed) + * CREATE_OBJECT_MAP + * | + * v + * <finish> + * + * @endverbatim + */ + + ImageCtxT *m_dst_image_ctx; + std::string m_snap_name; + cls::rbd::SnapshotNamespace m_snap_namespace; + uint64_t m_size; + cls::rbd::ParentImageSpec m_parent_spec; + uint64_t m_parent_overlap; + Context *m_on_finish; + + CephContext *m_cct; + + void send_set_head(); + void handle_set_head(int r); + + void send_create_snap(); + void handle_create_snap(int r); + + void send_create_object_map(); + void handle_create_object_map(int r); + + Context *start_lock_op(int* r); + + void finish(int r); +}; + +} // namespace deep_copy +} // namespace librbd + +extern template class librbd::deep_copy::SnapshotCreateRequest<librbd::ImageCtx>; + +#endif // CEPH_LIBRBD_DEEP_COPY_SNAPSHOT_CREATE_REQUEST_H diff --git a/src/librbd/deep_copy/Types.h b/src/librbd/deep_copy/Types.h new file mode 100644 index 00000000..10d3c7c1 --- /dev/null +++ b/src/librbd/deep_copy/Types.h @@ -0,0 +1,22 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_DEEP_COPY_TYPES_H +#define CEPH_LIBRBD_DEEP_COPY_TYPES_H + +#include "include/int_types.h" +#include "include/rados/librados.hpp" +#include <boost/optional.hpp> + +namespace librbd { +namespace deep_copy { + +typedef std::vector<librados::snap_t> SnapIds; +typedef std::map<librados::snap_t, SnapIds> SnapMap; + +typedef boost::optional<uint64_t> ObjectNumber; + +} // namespace deep_copy +} // namespace librbd + +#endif // CEPH_LIBRBD_DEEP_COPY_TYPES_H diff --git a/src/librbd/deep_copy/Utils.cc b/src/librbd/deep_copy/Utils.cc new file mode 100644 index 00000000..c2dd2502 --- /dev/null +++ b/src/librbd/deep_copy/Utils.cc @@ -0,0 +1,61 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "common/debug.h" +#include "Utils.h" +#include <set> + +namespace librbd { +namespace deep_copy { +namespace util { + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::deep_copy::util::" << __func__ << ": " + +void compute_snap_map(CephContext* cct, + librados::snap_t src_snap_id_start, + librados::snap_t src_snap_id_end, + const SnapIds& dst_snap_ids, + const SnapSeqs &snap_seqs, + SnapMap *snap_map) { + std::set<librados::snap_t> ordered_dst_snap_ids{ + dst_snap_ids.begin(), dst_snap_ids.end()}; + auto dst_snap_id_it = ordered_dst_snap_ids.begin(); + + SnapIds snap_ids; + for (auto &it : snap_seqs) { + // ensure all dst snap ids are included in the mapping table since + // deep copy will skip non-user snapshots + while (dst_snap_id_it != ordered_dst_snap_ids.end()) { + if (*dst_snap_id_it < it.second) { + snap_ids.insert(snap_ids.begin(), *dst_snap_id_it); + } else if (*dst_snap_id_it > it.second) { + break; + } + ++dst_snap_id_it; + } + + // we should only have the HEAD revision in the the last snap seq + ceph_assert(snap_ids.empty() || snap_ids[0] != CEPH_NOSNAP); + snap_ids.insert(snap_ids.begin(), it.second); + + if (it.first < src_snap_id_start) { + continue; + } else if (it.first > src_snap_id_end) { + break; + } + + (*snap_map)[it.first] = snap_ids; + } + + ldout(cct, 10) << "src_snap_id_start=" << src_snap_id_start << ", " + << "src_snap_id_end=" << src_snap_id_end << ", " + << "dst_snap_ids=" << dst_snap_ids << ", " + << "snap_seqs=" << snap_seqs << ", " + << "snap_map=" << *snap_map << dendl; +} + +} // namespace util +} // namespace deep_copy +} // namespace librbd diff --git a/src/librbd/deep_copy/Utils.h b/src/librbd/deep_copy/Utils.h new file mode 100644 index 00000000..0681548d --- /dev/null +++ b/src/librbd/deep_copy/Utils.h @@ -0,0 +1,30 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_DEEP_COPY_UTILS_H +#define CEPH_LIBRBD_DEEP_COPY_UTILS_H + +#include "include/rados/librados.hpp" +#include "librbd/Types.h" +#include "librbd/deep_copy/Types.h" + +#include <boost/optional.hpp> + +struct CephContext; + +namespace librbd { +namespace deep_copy { +namespace util { + +void compute_snap_map(CephContext* cct, + librados::snap_t src_snap_id_start, + librados::snap_t src_snap_id_end, + const SnapIds& dst_snap_ids, + const SnapSeqs &snap_seqs, + SnapMap *snap_map); + +} // namespace util +} // namespace deep_copy +} // namespace librbd + +#endif // CEPH_LIBRBD_DEEP_COPY_UTILS_H |