// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- // vim: ts=8 sw=2 smarttab #include "librbd/api/DiffIterate.h" #include "librbd/ImageCtx.h" #include "librbd/ImageState.h" #include "librbd/ObjectMap.h" #include "librbd/Utils.h" #include "librbd/internal.h" #include "librbd/io/AioCompletion.h" #include "librbd/io/ImageDispatchSpec.h" #include "librbd/io/ImageRequestWQ.h" #include "include/rados/librados.hpp" #include "include/interval_set.h" #include "common/errno.h" #include "common/Throttle.h" #include "osdc/Striper.h" #include "librados/snap_set_diff.h" #include #include #include #include #define dout_subsys ceph_subsys_rbd #undef dout_prefix #define dout_prefix *_dout << "librbd::DiffIterate: " namespace librbd { namespace api { namespace { enum ObjectDiffState { OBJECT_DIFF_STATE_NONE = 0, OBJECT_DIFF_STATE_UPDATED = 1, OBJECT_DIFF_STATE_HOLE = 2 }; struct DiffContext { DiffIterate<>::Callback callback; void *callback_arg; bool whole_object; uint64_t from_snap_id; uint64_t end_snap_id; interval_set parent_diff; OrderedThrottle throttle; template DiffContext(I &image_ctx, DiffIterate<>::Callback callback, void *callback_arg, bool _whole_object, uint64_t _from_snap_id, uint64_t _end_snap_id) : callback(callback), callback_arg(callback_arg), whole_object(_whole_object), from_snap_id(_from_snap_id), end_snap_id(_end_snap_id), throttle(image_ctx.config.template get_val("rbd_concurrent_management_ops"), true) { } }; class C_DiffObject : public Context { public: template C_DiffObject(I &image_ctx, librados::IoCtx &head_ctx, DiffContext &diff_context, const std::string &oid, uint64_t offset, const std::vector &object_extents) : m_cct(image_ctx.cct), m_head_ctx(head_ctx), m_diff_context(diff_context), m_oid(oid), m_offset(offset), m_object_extents(object_extents), m_snap_ret(0) { } void send() { C_OrderedThrottle *ctx = m_diff_context.throttle.start_op(this); librados::AioCompletion *rados_completion = util::create_rados_callback(ctx); librados::ObjectReadOperation op; op.list_snaps(&m_snap_set, &m_snap_ret); int r = m_head_ctx.aio_operate(m_oid, rados_completion, &op, NULL); ceph_assert(r == 0); rados_completion->release(); } protected: typedef boost::tuple Diff; typedef std::list Diffs; void finish(int r) override { CephContext *cct = m_cct; if (r == 0 && m_snap_ret < 0) { r = m_snap_ret; } Diffs diffs; if (r == 0) { ldout(cct, 20) << "object " << m_oid << ": list_snaps complete" << dendl; compute_diffs(&diffs); } else if (r == -ENOENT) { ldout(cct, 20) << "object " << m_oid << ": list_snaps (not found)" << dendl; r = 0; compute_parent_overlap(&diffs); } else { ldout(cct, 20) << "object " << m_oid << ": list_snaps failed: " << cpp_strerror(r) << dendl; } if (r == 0) { for (Diffs::const_iterator d = diffs.begin(); d != diffs.end(); ++d) { r = m_diff_context.callback(d->get<0>(), d->get<1>(), d->get<2>(), m_diff_context.callback_arg); if (r < 0) { break; } } } m_diff_context.throttle.end_op(r); } private: CephContext *m_cct; librados::IoCtx &m_head_ctx; DiffContext &m_diff_context; std::string m_oid; uint64_t m_offset; std::vector m_object_extents; librados::snap_set_t m_snap_set; int m_snap_ret; void compute_diffs(Diffs *diffs) { CephContext *cct = m_cct; // calc diff from from_snap_id -> to_snap_id interval_set diff; uint64_t end_size; bool end_exists; librados::snap_t clone_end_snap_id; bool whole_object; calc_snap_set_diff(cct, m_snap_set, m_diff_context.from_snap_id, m_diff_context.end_snap_id, &diff, &end_size, &end_exists, &clone_end_snap_id, &whole_object); if (whole_object) { ldout(cct, 1) << "object " << m_oid << ": need to provide full object" << dendl; } ldout(cct, 20) << " diff " << diff << " end_exists=" << end_exists << dendl; if (diff.empty() && !whole_object) { if (m_diff_context.from_snap_id == 0 && !end_exists) { compute_parent_overlap(diffs); } return; } else if (m_diff_context.whole_object || whole_object) { // provide the full object extents to the callback for (vector::iterator q = m_object_extents.begin(); q != m_object_extents.end(); ++q) { diffs->push_back(boost::make_tuple(m_offset + q->offset, q->length, end_exists)); } return; } for (vector::iterator q = m_object_extents.begin(); q != m_object_extents.end(); ++q) { ldout(cct, 20) << "diff_iterate object " << m_oid << " extent " << q->offset << "~" << q->length << " from " << q->buffer_extents << dendl; uint64_t opos = q->offset; for (vector >::iterator r = q->buffer_extents.begin(); r != q->buffer_extents.end(); ++r) { interval_set overlap; // object extents overlap.insert(opos, r->second); overlap.intersection_of(diff); ldout(cct, 20) << " opos " << opos << " buf " << r->first << "~" << r->second << " overlap " << overlap << dendl; for (interval_set::iterator s = overlap.begin(); s != overlap.end(); ++s) { uint64_t su_off = s.get_start() - opos; uint64_t logical_off = m_offset + r->first + su_off; ldout(cct, 20) << " overlap extent " << s.get_start() << "~" << s.get_len() << " logical " << logical_off << "~" << s.get_len() << dendl; diffs->push_back(boost::make_tuple(logical_off, s.get_len(), end_exists)); } opos += r->second; } ceph_assert(opos == q->offset + q->length); } } void compute_parent_overlap(Diffs *diffs) { if (m_diff_context.from_snap_id == 0 && !m_diff_context.parent_diff.empty()) { // report parent diff instead for (vector::iterator q = m_object_extents.begin(); q != m_object_extents.end(); ++q) { for (vector >::iterator r = q->buffer_extents.begin(); r != q->buffer_extents.end(); ++r) { interval_set o; o.insert(m_offset + r->first, r->second); o.intersection_of(m_diff_context.parent_diff); ldout(m_cct, 20) << " reporting parent overlap " << o << dendl; for (interval_set::iterator s = o.begin(); s != o.end(); ++s) { diffs->push_back(boost::make_tuple(s.get_start(), s.get_len(), true)); } } } } } }; int simple_diff_cb(uint64_t off, size_t len, int exists, void *arg) { // it's possible for a discard to create a hole in the parent image -- ignore if (exists) { interval_set *diff = static_cast *>(arg); diff->insert(off, len); } return 0; } } // anonymous namespace template int DiffIterate::diff_iterate(I *ictx, const cls::rbd::SnapshotNamespace& from_snap_namespace, const char *fromsnapname, uint64_t off, uint64_t len, bool include_parent, bool whole_object, int (*cb)(uint64_t, size_t, int, void *), void *arg) { ldout(ictx->cct, 20) << "diff_iterate " << ictx << " off = " << off << " len = " << len << dendl; if (!ictx->data_ctx.is_valid()) { return -ENODEV; } // ensure previous writes are visible to listsnaps C_SaferCond flush_ctx; { RWLock::RLocker owner_locker(ictx->owner_lock); auto aio_comp = io::AioCompletion::create_and_start(&flush_ctx, ictx, io::AIO_TYPE_FLUSH); auto req = io::ImageDispatchSpec::create_flush_request( *ictx, aio_comp, io::FLUSH_SOURCE_INTERNAL, {}); req->send(); delete req; } int r = flush_ctx.wait(); if (r < 0) { return r; } r = ictx->state->refresh_if_required(); if (r < 0) { return r; } ictx->snap_lock.get_read(); r = clip_io(ictx, off, &len); ictx->snap_lock.put_read(); if (r < 0) { return r; } DiffIterate command(*ictx, from_snap_namespace, fromsnapname, off, len, include_parent, whole_object, cb, arg); r = command.execute(); return r; } template int DiffIterate::execute() { CephContext* cct = m_image_ctx.cct; ceph_assert(m_image_ctx.data_ctx.is_valid()); librados::IoCtx head_ctx; librados::snap_t from_snap_id = 0; librados::snap_t end_snap_id; uint64_t from_size = 0; uint64_t end_size; { RWLock::RLocker md_locker(m_image_ctx.md_lock); RWLock::RLocker snap_locker(m_image_ctx.snap_lock); head_ctx.dup(m_image_ctx.data_ctx); if (m_from_snap_name) { from_snap_id = m_image_ctx.get_snap_id(m_from_snap_namespace, m_from_snap_name); from_size = m_image_ctx.get_image_size(from_snap_id); } end_snap_id = m_image_ctx.snap_id; end_size = m_image_ctx.get_image_size(end_snap_id); } if (from_snap_id == CEPH_NOSNAP) { return -ENOENT; } if (from_snap_id == end_snap_id) { // no diff. return 0; } if (from_snap_id >= end_snap_id) { return -EINVAL; } int r; bool fast_diff_enabled = false; BitVector<2> object_diff_state; { RWLock::RLocker snap_locker(m_image_ctx.snap_lock); if (m_whole_object && (m_image_ctx.features & RBD_FEATURE_FAST_DIFF) != 0) { r = diff_object_map(from_snap_id, end_snap_id, &object_diff_state); if (r < 0) { ldout(cct, 5) << "fast diff disabled" << dendl; } else { ldout(cct, 5) << "fast diff enabled" << dendl; fast_diff_enabled = true; } } } // we must list snaps via the head, not end snap head_ctx.snap_set_read(CEPH_SNAPDIR); ldout(cct, 5) << "diff_iterate from " << from_snap_id << " to " << end_snap_id << " size from " << from_size << " to " << end_size << dendl; // check parent overlap only if we are comparing to the beginning of time DiffContext diff_context(m_image_ctx, m_callback, m_callback_arg, m_whole_object, from_snap_id, end_snap_id); if (m_include_parent && from_snap_id == 0) { RWLock::RLocker l(m_image_ctx.snap_lock); RWLock::RLocker l2(m_image_ctx.parent_lock); uint64_t overlap = 0; m_image_ctx.get_parent_overlap(m_image_ctx.snap_id, &overlap); r = 0; if (m_image_ctx.parent && overlap > 0) { ldout(cct, 10) << " first getting parent diff" << dendl; DiffIterate diff_parent(*m_image_ctx.parent, {}, nullptr, 0, overlap, m_include_parent, m_whole_object, &simple_diff_cb, &diff_context.parent_diff); r = diff_parent.execute(); } if (r < 0) { return r; } } uint64_t period = m_image_ctx.get_stripe_period(); uint64_t off = m_offset; uint64_t left = m_length; while (left > 0) { uint64_t period_off = off - (off % period); uint64_t read_len = min(period_off + period - off, left); // map to extents map > object_extents; Striper::file_to_extents(cct, m_image_ctx.format_string, &m_image_ctx.layout, off, read_len, 0, object_extents, 0); // get snap info for each object for (map >::iterator p = object_extents.begin(); p != object_extents.end(); ++p) { ldout(cct, 20) << "object " << p->first << dendl; if (fast_diff_enabled) { const uint64_t object_no = p->second.front().objectno; if (object_diff_state[object_no] == OBJECT_DIFF_STATE_NONE && from_snap_id == 0 && !diff_context.parent_diff.empty()) { // no data in child object -- report parent diff instead for (auto& oe : p->second) { for (auto& be : oe.buffer_extents) { interval_set o; o.insert(off + be.first, be.second); o.intersection_of(diff_context.parent_diff); ldout(cct, 20) << " reporting parent overlap " << o << dendl; for (auto e = o.begin(); e != o.end(); ++e) { r = m_callback(e.get_start(), e.get_len(), true, m_callback_arg); if (r < 0) { return r; } } } } } else if (object_diff_state[object_no] != OBJECT_DIFF_STATE_NONE) { bool updated = (object_diff_state[object_no] == OBJECT_DIFF_STATE_UPDATED); for (std::vector::iterator q = p->second.begin(); q != p->second.end(); ++q) { r = m_callback(off + q->offset, q->length, updated, m_callback_arg); if (r < 0) { return r; } } } } else { C_DiffObject *diff_object = new C_DiffObject(m_image_ctx, head_ctx, diff_context, p->first.name, off, p->second); diff_object->send(); if (diff_context.throttle.pending_error()) { r = diff_context.throttle.wait_for_ret(); return r; } } } left -= read_len; off += read_len; } r = diff_context.throttle.wait_for_ret(); if (r < 0) { return r; } return 0; } template int DiffIterate::diff_object_map(uint64_t from_snap_id, uint64_t to_snap_id, BitVector<2>* object_diff_state) { ceph_assert(m_image_ctx.snap_lock.is_locked()); CephContext* cct = m_image_ctx.cct; bool diff_from_start = (from_snap_id == 0); if (from_snap_id == 0) { if (!m_image_ctx.snaps.empty()) { from_snap_id = m_image_ctx.snaps.back(); } else { from_snap_id = CEPH_NOSNAP; } } object_diff_state->clear(); uint64_t current_snap_id = from_snap_id; uint64_t next_snap_id = to_snap_id; BitVector<2> prev_object_map; bool prev_object_map_valid = false; while (true) { uint64_t current_size = m_image_ctx.size; if (current_snap_id != CEPH_NOSNAP) { std::map::const_iterator snap_it = m_image_ctx.snap_info.find(current_snap_id); ceph_assert(snap_it != m_image_ctx.snap_info.end()); current_size = snap_it->second.size; ++snap_it; if (snap_it != m_image_ctx.snap_info.end()) { next_snap_id = snap_it->first; } else { next_snap_id = CEPH_NOSNAP; } } uint64_t flags; int r = m_image_ctx.get_flags(from_snap_id, &flags); if (r < 0) { lderr(cct) << "diff_object_map: failed to retrieve image flags" << dendl; return r; } if ((flags & RBD_FLAG_FAST_DIFF_INVALID) != 0) { ldout(cct, 1) << "diff_object_map: cannot perform fast diff on invalid " << "object map" << dendl; return -EINVAL; } BitVector<2> object_map; std::string oid(ObjectMap<>::object_map_name(m_image_ctx.id, current_snap_id)); r = cls_client::object_map_load(&m_image_ctx.md_ctx, oid, &object_map); if (r < 0) { lderr(cct) << "diff_object_map: failed to load object map " << oid << dendl; return r; } ldout(cct, 20) << "diff_object_map: loaded object map " << oid << dendl; uint64_t num_objs = Striper::get_num_objects(m_image_ctx.layout, current_size); if (object_map.size() < num_objs) { ldout(cct, 1) << "diff_object_map: object map too small: " << object_map.size() << " < " << num_objs << dendl; return -EINVAL; } object_map.resize(num_objs); object_diff_state->resize(object_map.size()); uint64_t overlap = std::min(object_map.size(), prev_object_map.size()); auto it = object_map.begin(); auto overlap_end_it = it + overlap; auto pre_it = prev_object_map.begin(); auto diff_it = object_diff_state->begin(); uint64_t i = 0; for (; it != overlap_end_it; ++it, ++pre_it, ++diff_it, ++i) { ldout(cct, 20) << __func__ << ": object state: " << i << " " << static_cast(*pre_it) << "->" << static_cast(*it) << dendl; if (*it == OBJECT_NONEXISTENT) { if (*pre_it != OBJECT_NONEXISTENT) { *diff_it = OBJECT_DIFF_STATE_HOLE; } } else if (*it == OBJECT_EXISTS || (*pre_it != *it && !(*pre_it == OBJECT_EXISTS && *it == OBJECT_EXISTS_CLEAN))) { *diff_it = OBJECT_DIFF_STATE_UPDATED; } } ldout(cct, 20) << "diff_object_map: computed overlap diffs" << dendl; auto end_it = object_map.end(); if (object_map.size() > prev_object_map.size() && (diff_from_start || prev_object_map_valid)) { for (; it != end_it; ++it,++diff_it, ++i) { ldout(cct, 20) << __func__ << ": object state: " << i << " " << "->" << static_cast(*it) << dendl; if (*it == OBJECT_NONEXISTENT) { *diff_it = OBJECT_DIFF_STATE_NONE; } else { *diff_it = OBJECT_DIFF_STATE_UPDATED; } } } ldout(cct, 20) << "diff_object_map: computed resize diffs" << dendl; if (current_snap_id == next_snap_id || next_snap_id > to_snap_id) { break; } current_snap_id = next_snap_id; prev_object_map = object_map; prev_object_map_valid = true; } return 0; } } // namespace api } // namespace librbd template class librbd::api::DiffIterate;