summaryrefslogtreecommitdiffstats
path: root/src/librbd/api/DiffIterate.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/librbd/api/DiffIterate.cc')
-rw-r--r--src/librbd/api/DiffIterate.cc554
1 files changed, 554 insertions, 0 deletions
diff --git a/src/librbd/api/DiffIterate.cc b/src/librbd/api/DiffIterate.cc
new file mode 100644
index 00000000..f96264e3
--- /dev/null
+++ b/src/librbd/api/DiffIterate.cc
@@ -0,0 +1,554 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/api/DiffIterate.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/ObjectMap.h"
+#include "librbd/Utils.h"
+#include "librbd/internal.h"
+#include "librbd/io/AioCompletion.h"
+#include "librbd/io/ImageDispatchSpec.h"
+#include "librbd/io/ImageRequestWQ.h"
+#include "include/rados/librados.hpp"
+#include "include/interval_set.h"
+#include "common/errno.h"
+#include "common/Throttle.h"
+#include "osdc/Striper.h"
+#include "librados/snap_set_diff.h"
+#include <boost/tuple/tuple.hpp>
+#include <list>
+#include <map>
+#include <vector>
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::DiffIterate: "
+
+namespace librbd {
+namespace api {
+
+namespace {
+
+enum ObjectDiffState {
+ OBJECT_DIFF_STATE_NONE = 0,
+ OBJECT_DIFF_STATE_UPDATED = 1,
+ OBJECT_DIFF_STATE_HOLE = 2
+};
+
+struct DiffContext {
+ DiffIterate<>::Callback callback;
+ void *callback_arg;
+ bool whole_object;
+ uint64_t from_snap_id;
+ uint64_t end_snap_id;
+ interval_set<uint64_t> parent_diff;
+ OrderedThrottle throttle;
+
+ template <typename I>
+ DiffContext(I &image_ctx, DiffIterate<>::Callback callback,
+ void *callback_arg, bool _whole_object, uint64_t _from_snap_id,
+ uint64_t _end_snap_id)
+ : callback(callback), callback_arg(callback_arg),
+ whole_object(_whole_object), from_snap_id(_from_snap_id),
+ end_snap_id(_end_snap_id),
+ throttle(image_ctx.config.template get_val<uint64_t>("rbd_concurrent_management_ops"), true) {
+ }
+};
+
+class C_DiffObject : public Context {
+public:
+ template <typename I>
+ C_DiffObject(I &image_ctx, librados::IoCtx &head_ctx,
+ DiffContext &diff_context, const std::string &oid,
+ uint64_t offset, const std::vector<ObjectExtent> &object_extents)
+ : m_cct(image_ctx.cct), m_head_ctx(head_ctx),
+ m_diff_context(diff_context), m_oid(oid), m_offset(offset),
+ m_object_extents(object_extents), m_snap_ret(0) {
+ }
+
+ void send() {
+ C_OrderedThrottle *ctx = m_diff_context.throttle.start_op(this);
+ librados::AioCompletion *rados_completion =
+ util::create_rados_callback(ctx);
+
+ librados::ObjectReadOperation op;
+ op.list_snaps(&m_snap_set, &m_snap_ret);
+
+ int r = m_head_ctx.aio_operate(m_oid, rados_completion, &op, NULL);
+ ceph_assert(r == 0);
+ rados_completion->release();
+ }
+
+protected:
+ typedef boost::tuple<uint64_t, size_t, bool> Diff;
+ typedef std::list<Diff> Diffs;
+
+ void finish(int r) override {
+ CephContext *cct = m_cct;
+ if (r == 0 && m_snap_ret < 0) {
+ r = m_snap_ret;
+ }
+
+ Diffs diffs;
+ if (r == 0) {
+ ldout(cct, 20) << "object " << m_oid << ": list_snaps complete" << dendl;
+ compute_diffs(&diffs);
+ } else if (r == -ENOENT) {
+ ldout(cct, 20) << "object " << m_oid << ": list_snaps (not found)"
+ << dendl;
+ r = 0;
+ compute_parent_overlap(&diffs);
+ } else {
+ ldout(cct, 20) << "object " << m_oid << ": list_snaps failed: "
+ << cpp_strerror(r) << dendl;
+ }
+
+ if (r == 0) {
+ for (Diffs::const_iterator d = diffs.begin(); d != diffs.end(); ++d) {
+ r = m_diff_context.callback(d->get<0>(), d->get<1>(), d->get<2>(),
+ m_diff_context.callback_arg);
+ if (r < 0) {
+ break;
+ }
+ }
+ }
+ m_diff_context.throttle.end_op(r);
+ }
+
+private:
+ CephContext *m_cct;
+ librados::IoCtx &m_head_ctx;
+ DiffContext &m_diff_context;
+ std::string m_oid;
+ uint64_t m_offset;
+ std::vector<ObjectExtent> m_object_extents;
+
+ librados::snap_set_t m_snap_set;
+ int m_snap_ret;
+
+ void compute_diffs(Diffs *diffs) {
+ CephContext *cct = m_cct;
+
+ // calc diff from from_snap_id -> to_snap_id
+ interval_set<uint64_t> diff;
+ uint64_t end_size;
+ bool end_exists;
+ librados::snap_t clone_end_snap_id;
+ bool whole_object;
+ calc_snap_set_diff(cct, m_snap_set, m_diff_context.from_snap_id,
+ m_diff_context.end_snap_id, &diff, &end_size,
+ &end_exists, &clone_end_snap_id, &whole_object);
+ if (whole_object) {
+ ldout(cct, 1) << "object " << m_oid << ": need to provide full object"
+ << dendl;
+ }
+ ldout(cct, 20) << " diff " << diff << " end_exists=" << end_exists
+ << dendl;
+ if (diff.empty() && !whole_object) {
+ if (m_diff_context.from_snap_id == 0 && !end_exists) {
+ compute_parent_overlap(diffs);
+ }
+ return;
+ } else if (m_diff_context.whole_object || whole_object) {
+ // provide the full object extents to the callback
+ for (vector<ObjectExtent>::iterator q = m_object_extents.begin();
+ q != m_object_extents.end(); ++q) {
+ diffs->push_back(boost::make_tuple(m_offset + q->offset, q->length,
+ end_exists));
+ }
+ return;
+ }
+
+ for (vector<ObjectExtent>::iterator q = m_object_extents.begin();
+ q != m_object_extents.end(); ++q) {
+ ldout(cct, 20) << "diff_iterate object " << m_oid << " extent "
+ << q->offset << "~" << q->length << " from "
+ << q->buffer_extents << dendl;
+ uint64_t opos = q->offset;
+ for (vector<pair<uint64_t,uint64_t> >::iterator r =
+ q->buffer_extents.begin();
+ r != q->buffer_extents.end(); ++r) {
+ interval_set<uint64_t> overlap; // object extents
+ overlap.insert(opos, r->second);
+ overlap.intersection_of(diff);
+ ldout(cct, 20) << " opos " << opos
+ << " buf " << r->first << "~" << r->second
+ << " overlap " << overlap << dendl;
+ for (interval_set<uint64_t>::iterator s = overlap.begin();
+ s != overlap.end(); ++s) {
+ uint64_t su_off = s.get_start() - opos;
+ uint64_t logical_off = m_offset + r->first + su_off;
+ ldout(cct, 20) << " overlap extent " << s.get_start() << "~"
+ << s.get_len() << " logical " << logical_off << "~"
+ << s.get_len() << dendl;
+ diffs->push_back(boost::make_tuple(logical_off, s.get_len(),
+ end_exists));
+ }
+ opos += r->second;
+ }
+ ceph_assert(opos == q->offset + q->length);
+ }
+ }
+
+ void compute_parent_overlap(Diffs *diffs) {
+ if (m_diff_context.from_snap_id == 0 &&
+ !m_diff_context.parent_diff.empty()) {
+ // report parent diff instead
+ for (vector<ObjectExtent>::iterator q = m_object_extents.begin();
+ q != m_object_extents.end(); ++q) {
+ for (vector<pair<uint64_t,uint64_t> >::iterator r =
+ q->buffer_extents.begin();
+ r != q->buffer_extents.end(); ++r) {
+ interval_set<uint64_t> o;
+ o.insert(m_offset + r->first, r->second);
+ o.intersection_of(m_diff_context.parent_diff);
+ ldout(m_cct, 20) << " reporting parent overlap " << o << dendl;
+ for (interval_set<uint64_t>::iterator s = o.begin(); s != o.end();
+ ++s) {
+ diffs->push_back(boost::make_tuple(s.get_start(), s.get_len(),
+ true));
+ }
+ }
+ }
+ }
+ }
+};
+
+int simple_diff_cb(uint64_t off, size_t len, int exists, void *arg) {
+ // it's possible for a discard to create a hole in the parent image -- ignore
+ if (exists) {
+ interval_set<uint64_t> *diff = static_cast<interval_set<uint64_t> *>(arg);
+ diff->insert(off, len);
+ }
+ return 0;
+}
+
+} // anonymous namespace
+
+template <typename I>
+int DiffIterate<I>::diff_iterate(I *ictx,
+ const cls::rbd::SnapshotNamespace& from_snap_namespace,
+ const char *fromsnapname,
+ uint64_t off, uint64_t len,
+ bool include_parent, bool whole_object,
+ int (*cb)(uint64_t, size_t, int, void *),
+ void *arg)
+{
+ ldout(ictx->cct, 20) << "diff_iterate " << ictx << " off = " << off
+ << " len = " << len << dendl;
+
+ if (!ictx->data_ctx.is_valid()) {
+ return -ENODEV;
+ }
+
+ // ensure previous writes are visible to listsnaps
+ C_SaferCond flush_ctx;
+ {
+ RWLock::RLocker owner_locker(ictx->owner_lock);
+ auto aio_comp = io::AioCompletion::create_and_start(&flush_ctx, ictx,
+ io::AIO_TYPE_FLUSH);
+ auto req = io::ImageDispatchSpec<I>::create_flush_request(
+ *ictx, aio_comp, io::FLUSH_SOURCE_INTERNAL, {});
+ req->send();
+ delete req;
+ }
+ int r = flush_ctx.wait();
+ if (r < 0) {
+ return r;
+ }
+
+ r = ictx->state->refresh_if_required();
+ if (r < 0) {
+ return r;
+ }
+
+ ictx->snap_lock.get_read();
+ r = clip_io(ictx, off, &len);
+ ictx->snap_lock.put_read();
+ if (r < 0) {
+ return r;
+ }
+
+ DiffIterate command(*ictx, from_snap_namespace, fromsnapname, off, len,
+ include_parent, whole_object, cb, arg);
+ r = command.execute();
+ return r;
+}
+
+template <typename I>
+int DiffIterate<I>::execute() {
+ CephContext* cct = m_image_ctx.cct;
+
+ ceph_assert(m_image_ctx.data_ctx.is_valid());
+
+ librados::IoCtx head_ctx;
+ librados::snap_t from_snap_id = 0;
+ librados::snap_t end_snap_id;
+ uint64_t from_size = 0;
+ uint64_t end_size;
+ {
+ RWLock::RLocker md_locker(m_image_ctx.md_lock);
+ RWLock::RLocker snap_locker(m_image_ctx.snap_lock);
+ head_ctx.dup(m_image_ctx.data_ctx);
+ if (m_from_snap_name) {
+ from_snap_id = m_image_ctx.get_snap_id(m_from_snap_namespace, m_from_snap_name);
+ from_size = m_image_ctx.get_image_size(from_snap_id);
+ }
+ end_snap_id = m_image_ctx.snap_id;
+ end_size = m_image_ctx.get_image_size(end_snap_id);
+ }
+
+ if (from_snap_id == CEPH_NOSNAP) {
+ return -ENOENT;
+ }
+ if (from_snap_id == end_snap_id) {
+ // no diff.
+ return 0;
+ }
+ if (from_snap_id >= end_snap_id) {
+ return -EINVAL;
+ }
+
+ int r;
+ bool fast_diff_enabled = false;
+ BitVector<2> object_diff_state;
+ {
+ RWLock::RLocker snap_locker(m_image_ctx.snap_lock);
+ if (m_whole_object && (m_image_ctx.features & RBD_FEATURE_FAST_DIFF) != 0) {
+ r = diff_object_map(from_snap_id, end_snap_id, &object_diff_state);
+ if (r < 0) {
+ ldout(cct, 5) << "fast diff disabled" << dendl;
+ } else {
+ ldout(cct, 5) << "fast diff enabled" << dendl;
+ fast_diff_enabled = true;
+ }
+ }
+ }
+
+ // we must list snaps via the head, not end snap
+ head_ctx.snap_set_read(CEPH_SNAPDIR);
+
+ ldout(cct, 5) << "diff_iterate from " << from_snap_id << " to "
+ << end_snap_id << " size from " << from_size
+ << " to " << end_size << dendl;
+
+ // check parent overlap only if we are comparing to the beginning of time
+ DiffContext diff_context(m_image_ctx, m_callback, m_callback_arg,
+ m_whole_object, from_snap_id, end_snap_id);
+ if (m_include_parent && from_snap_id == 0) {
+ RWLock::RLocker l(m_image_ctx.snap_lock);
+ RWLock::RLocker l2(m_image_ctx.parent_lock);
+ uint64_t overlap = 0;
+ m_image_ctx.get_parent_overlap(m_image_ctx.snap_id, &overlap);
+ r = 0;
+ if (m_image_ctx.parent && overlap > 0) {
+ ldout(cct, 10) << " first getting parent diff" << dendl;
+ DiffIterate diff_parent(*m_image_ctx.parent, {},
+ nullptr, 0, overlap,
+ m_include_parent, m_whole_object,
+ &simple_diff_cb,
+ &diff_context.parent_diff);
+ r = diff_parent.execute();
+ }
+ if (r < 0) {
+ return r;
+ }
+ }
+
+ uint64_t period = m_image_ctx.get_stripe_period();
+ uint64_t off = m_offset;
+ uint64_t left = m_length;
+
+ while (left > 0) {
+ uint64_t period_off = off - (off % period);
+ uint64_t read_len = min(period_off + period - off, left);
+
+ // map to extents
+ map<object_t,vector<ObjectExtent> > object_extents;
+ Striper::file_to_extents(cct, m_image_ctx.format_string,
+ &m_image_ctx.layout, off, read_len, 0,
+ object_extents, 0);
+
+ // get snap info for each object
+ for (map<object_t,vector<ObjectExtent> >::iterator p =
+ object_extents.begin();
+ p != object_extents.end(); ++p) {
+ ldout(cct, 20) << "object " << p->first << dendl;
+
+ if (fast_diff_enabled) {
+ const uint64_t object_no = p->second.front().objectno;
+ if (object_diff_state[object_no] == OBJECT_DIFF_STATE_NONE &&
+ from_snap_id == 0 && !diff_context.parent_diff.empty()) {
+ // no data in child object -- report parent diff instead
+ for (auto& oe : p->second) {
+ for (auto& be : oe.buffer_extents) {
+ interval_set<uint64_t> o;
+ o.insert(off + be.first, be.second);
+ o.intersection_of(diff_context.parent_diff);
+ ldout(cct, 20) << " reporting parent overlap " << o << dendl;
+ for (auto e = o.begin(); e != o.end(); ++e) {
+ r = m_callback(e.get_start(), e.get_len(), true,
+ m_callback_arg);
+ if (r < 0) {
+ return r;
+ }
+ }
+ }
+ }
+ } else if (object_diff_state[object_no] != OBJECT_DIFF_STATE_NONE) {
+ bool updated = (object_diff_state[object_no] ==
+ OBJECT_DIFF_STATE_UPDATED);
+ for (std::vector<ObjectExtent>::iterator q = p->second.begin();
+ q != p->second.end(); ++q) {
+ r = m_callback(off + q->offset, q->length, updated, m_callback_arg);
+ if (r < 0) {
+ return r;
+ }
+ }
+ }
+ } else {
+ C_DiffObject *diff_object = new C_DiffObject(m_image_ctx, head_ctx,
+ diff_context,
+ p->first.name, off,
+ p->second);
+ diff_object->send();
+
+ if (diff_context.throttle.pending_error()) {
+ r = diff_context.throttle.wait_for_ret();
+ return r;
+ }
+ }
+ }
+
+ left -= read_len;
+ off += read_len;
+ }
+
+ r = diff_context.throttle.wait_for_ret();
+ if (r < 0) {
+ return r;
+ }
+ return 0;
+}
+
+template <typename I>
+int DiffIterate<I>::diff_object_map(uint64_t from_snap_id, uint64_t to_snap_id,
+ BitVector<2>* object_diff_state) {
+ ceph_assert(m_image_ctx.snap_lock.is_locked());
+ CephContext* cct = m_image_ctx.cct;
+
+ bool diff_from_start = (from_snap_id == 0);
+ if (from_snap_id == 0) {
+ if (!m_image_ctx.snaps.empty()) {
+ from_snap_id = m_image_ctx.snaps.back();
+ } else {
+ from_snap_id = CEPH_NOSNAP;
+ }
+ }
+
+ object_diff_state->clear();
+ uint64_t current_snap_id = from_snap_id;
+ uint64_t next_snap_id = to_snap_id;
+ BitVector<2> prev_object_map;
+ bool prev_object_map_valid = false;
+ while (true) {
+ uint64_t current_size = m_image_ctx.size;
+ if (current_snap_id != CEPH_NOSNAP) {
+ std::map<librados::snap_t, SnapInfo>::const_iterator snap_it =
+ m_image_ctx.snap_info.find(current_snap_id);
+ ceph_assert(snap_it != m_image_ctx.snap_info.end());
+ current_size = snap_it->second.size;
+
+ ++snap_it;
+ if (snap_it != m_image_ctx.snap_info.end()) {
+ next_snap_id = snap_it->first;
+ } else {
+ next_snap_id = CEPH_NOSNAP;
+ }
+ }
+
+ uint64_t flags;
+ int r = m_image_ctx.get_flags(from_snap_id, &flags);
+ if (r < 0) {
+ lderr(cct) << "diff_object_map: failed to retrieve image flags" << dendl;
+ return r;
+ }
+ if ((flags & RBD_FLAG_FAST_DIFF_INVALID) != 0) {
+ ldout(cct, 1) << "diff_object_map: cannot perform fast diff on invalid "
+ << "object map" << dendl;
+ return -EINVAL;
+ }
+
+ BitVector<2> object_map;
+ std::string oid(ObjectMap<>::object_map_name(m_image_ctx.id,
+ current_snap_id));
+ r = cls_client::object_map_load(&m_image_ctx.md_ctx, oid, &object_map);
+ if (r < 0) {
+ lderr(cct) << "diff_object_map: failed to load object map " << oid
+ << dendl;
+ return r;
+ }
+ ldout(cct, 20) << "diff_object_map: loaded object map " << oid << dendl;
+
+ uint64_t num_objs = Striper::get_num_objects(m_image_ctx.layout,
+ current_size);
+ if (object_map.size() < num_objs) {
+ ldout(cct, 1) << "diff_object_map: object map too small: "
+ << object_map.size() << " < " << num_objs << dendl;
+ return -EINVAL;
+ }
+ object_map.resize(num_objs);
+ object_diff_state->resize(object_map.size());
+
+ uint64_t overlap = std::min(object_map.size(), prev_object_map.size());
+ auto it = object_map.begin();
+ auto overlap_end_it = it + overlap;
+ auto pre_it = prev_object_map.begin();
+ auto diff_it = object_diff_state->begin();
+ uint64_t i = 0;
+ for (; it != overlap_end_it; ++it, ++pre_it, ++diff_it, ++i) {
+ ldout(cct, 20) << __func__ << ": object state: " << i << " "
+ << static_cast<uint32_t>(*pre_it)
+ << "->" << static_cast<uint32_t>(*it) << dendl;
+ if (*it == OBJECT_NONEXISTENT) {
+ if (*pre_it != OBJECT_NONEXISTENT) {
+ *diff_it = OBJECT_DIFF_STATE_HOLE;
+ }
+ } else if (*it == OBJECT_EXISTS ||
+ (*pre_it != *it &&
+ !(*pre_it == OBJECT_EXISTS &&
+ *it == OBJECT_EXISTS_CLEAN))) {
+ *diff_it = OBJECT_DIFF_STATE_UPDATED;
+ }
+ }
+ ldout(cct, 20) << "diff_object_map: computed overlap diffs" << dendl;
+ auto end_it = object_map.end();
+ if (object_map.size() > prev_object_map.size() &&
+ (diff_from_start || prev_object_map_valid)) {
+ for (; it != end_it; ++it,++diff_it, ++i) {
+ ldout(cct, 20) << __func__ << ": object state: " << i << " "
+ << "->" << static_cast<uint32_t>(*it) << dendl;
+ if (*it == OBJECT_NONEXISTENT) {
+ *diff_it = OBJECT_DIFF_STATE_NONE;
+ } else {
+ *diff_it = OBJECT_DIFF_STATE_UPDATED;
+ }
+ }
+ }
+ ldout(cct, 20) << "diff_object_map: computed resize diffs" << dendl;
+
+ if (current_snap_id == next_snap_id || next_snap_id > to_snap_id) {
+ break;
+ }
+ current_snap_id = next_snap_id;
+ prev_object_map = object_map;
+ prev_object_map_valid = true;
+ }
+ return 0;
+}
+
+} // namespace api
+} // namespace librbd
+
+template class librbd::api::DiffIterate<librbd::ImageCtx>;