summaryrefslogtreecommitdiffstats
path: root/src/librbd/operation/SparsifyRequest.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/librbd/operation/SparsifyRequest.cc')
-rw-r--r--src/librbd/operation/SparsifyRequest.cc519
1 files changed, 519 insertions, 0 deletions
diff --git a/src/librbd/operation/SparsifyRequest.cc b/src/librbd/operation/SparsifyRequest.cc
new file mode 100644
index 00000000..53049f88
--- /dev/null
+++ b/src/librbd/operation/SparsifyRequest.cc
@@ -0,0 +1,519 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/operation/SparsifyRequest.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "include/err.h"
+#include "librbd/AsyncObjectThrottle.h"
+#include "librbd/ExclusiveLock.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/Types.h"
+#include "librbd/io/ObjectRequest.h"
+#include "osdc/Striper.h"
+#include <boost/lambda/bind.hpp>
+#include <boost/lambda/construct.hpp>
+
+#define dout_subsys ceph_subsys_rbd
+
+namespace librbd {
+namespace operation {
+
+namespace {
+
+bool may_be_trimmed(const std::map<uint64_t,uint64_t> &extent_map,
+ const bufferlist &bl, size_t sparse_size,
+ uint64_t *new_end_ptr) {
+ if (extent_map.empty()) {
+ *new_end_ptr = 0;
+ return true;
+ }
+
+ uint64_t end = extent_map.rbegin()->first + extent_map.rbegin()->second;
+ uint64_t new_end = end;
+ uint64_t bl_off = bl.length();
+
+ for (auto it = extent_map.rbegin(); it != extent_map.rend(); it++) {
+ auto off = it->first;
+ auto len = it->second;
+
+ new_end = p2roundup<uint64_t>(off + len, sparse_size);
+
+ uint64_t extent_left = len;
+ uint64_t sub_len = len % sparse_size;
+ if (sub_len == 0) {
+ sub_len = sparse_size;
+ }
+ while (extent_left > 0) {
+ ceph_assert(bl_off >= sub_len);
+ bl_off -= sub_len;
+ bufferlist sub_bl;
+ sub_bl.substr_of(bl, bl_off, sub_len);
+ if (!sub_bl.is_zero()) {
+ break;
+ }
+ new_end -= sparse_size;
+ extent_left -= sub_len;
+ sub_len = sparse_size;
+ }
+ if (extent_left > 0) {
+ break;
+ }
+ }
+
+ if (new_end < end) {
+ *new_end_ptr = new_end;
+ return true;
+ }
+
+ return false;
+}
+
+} // anonymous namespace
+
+using util::create_context_callback;
+using util::create_rados_callback;
+
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::operation::SparsifyObject: " << this \
+ << " " << m_oid << " " << __func__ << ": "
+
+template <typename I>
+class C_SparsifyObject : public C_AsyncObjectThrottle<I> {
+public:
+
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v (not supported)
+ * SPARSIFY * * * * * * * * * * * * > READ < * * * * * * * * * * (concurrent
+ * | | * update is
+ * | (object map disabled) | (can trim) * detected)
+ * |------------------------\ V *
+ * | | PRE UPDATE OBJECT MAP *
+ * | (object map enabled) | | (if needed) *
+ * v | V *
+ * PRE UPDATE OBJECT MAP | TRIM * * * * * * * * * * *
+ * | | |
+ * v | V
+ * CHECK EXISTS | POST UPDATE OBJECT MAP
+ * | | | (if needed)
+ * v | |
+ * POST UPDATE OBJECT MAP | |
+ * | | |
+ * v | |
+ * <finish> <------------------/<-------/
+ *
+ * @endverbatim
+ *
+ */
+
+ C_SparsifyObject(AsyncObjectThrottle<I> &throttle, I *image_ctx,
+ uint64_t object_no, size_t sparse_size)
+ : C_AsyncObjectThrottle<I>(throttle, *image_ctx), m_cct(image_ctx->cct),
+ m_object_no(object_no), m_sparse_size(sparse_size),
+ m_oid(image_ctx->get_object_name(object_no)) {
+ }
+
+ int send() override {
+ I &image_ctx = this->m_image_ctx;
+ ceph_assert(image_ctx.owner_lock.is_locked());
+
+ ldout(m_cct, 20) << dendl;
+
+ if (!image_ctx.data_ctx.is_valid()) {
+ lderr(m_cct) << "missing data pool" << dendl;
+ return -ENODEV;
+ }
+
+ if (image_ctx.exclusive_lock != nullptr &&
+ !image_ctx.exclusive_lock->is_lock_owner()) {
+ ldout(m_cct, 1) << "lost exclusive lock during sparsify" << dendl;
+ return -ERESTART;
+ }
+
+ {
+ RWLock::RLocker snap_locker(image_ctx.snap_lock);
+ if (image_ctx.object_map != nullptr &&
+ !image_ctx.object_map->object_may_exist(m_object_no)) {
+ // can skip because the object does not exist
+ return 1;
+ }
+
+ RWLock::RLocker parent_locker(image_ctx.parent_lock);
+ uint64_t overlap_objects = 0;
+ uint64_t overlap;
+ int r = image_ctx.get_parent_overlap(CEPH_NOSNAP, &overlap);
+ if (r == 0 && overlap > 0) {
+ overlap_objects = Striper::get_num_objects(image_ctx.layout, overlap);
+ }
+ m_remove_empty = (m_object_no >= overlap_objects);
+ }
+
+ send_sparsify();
+ return 0;
+ }
+
+ void send_sparsify() {
+ I &image_ctx = this->m_image_ctx;
+ ldout(m_cct, 20) << dendl;
+
+ librados::ObjectWriteOperation op;
+ cls_client::sparsify(&op, m_sparse_size, m_remove_empty);
+ auto comp = create_rados_callback<
+ C_SparsifyObject, &C_SparsifyObject::handle_sparsify>(this);
+ int r = image_ctx.data_ctx.aio_operate(m_oid, comp, &op);
+ ceph_assert(r == 0);
+ comp->release();
+ }
+
+ void handle_sparsify(int r) {
+ ldout(m_cct, 20) << "r=" << r << dendl;
+
+ if (r == -EOPNOTSUPP) {
+ m_trying_trim = true;
+ send_read();
+ return;
+ }
+
+ if (r == -ENOENT) {
+ finish_op(0);
+ return;
+ }
+
+ if (r < 0) {
+ lderr(m_cct) << "failed to sparsify: " << cpp_strerror(r) << dendl;
+ finish_op(r);
+ return;
+ }
+
+ send_pre_update_object_map();
+ }
+
+ void send_pre_update_object_map() {
+ I &image_ctx = this->m_image_ctx;
+
+ if (m_trying_trim) {
+ if (!m_remove_empty || m_new_end != 0 ||
+ !image_ctx.test_features(RBD_FEATURE_OBJECT_MAP)) {
+ send_trim();
+ return;
+ }
+ } else if (!m_remove_empty ||
+ !image_ctx.test_features(RBD_FEATURE_OBJECT_MAP)) {
+ finish_op(0);
+ return;
+ }
+
+ ldout(m_cct, 20) << dendl;
+
+ image_ctx.owner_lock.get_read();
+ image_ctx.snap_lock.get_read();
+ if (image_ctx.object_map == nullptr) {
+ // possible that exclusive lock was lost in background
+ lderr(m_cct) << "object map is not initialized" << dendl;
+
+ image_ctx.snap_lock.put_read();
+ image_ctx.owner_lock.put_read();
+ finish_op(-EINVAL);
+ return;
+ }
+
+ int r;
+ m_finish_op_ctx = image_ctx.exclusive_lock->start_op(&r);
+ if (m_finish_op_ctx == nullptr) {
+ lderr(m_cct) << "lost exclusive lock" << dendl;
+ image_ctx.snap_lock.put_read();
+ image_ctx.owner_lock.put_read();
+ finish_op(r);
+ return;
+ }
+
+ auto ctx = create_context_callback<
+ C_SparsifyObject<I>,
+ &C_SparsifyObject<I>::handle_pre_update_object_map>(this);
+
+ image_ctx.object_map_lock.get_write();
+ bool sent = image_ctx.object_map->template aio_update<
+ Context, &Context::complete>(CEPH_NOSNAP, m_object_no, OBJECT_PENDING,
+ OBJECT_EXISTS, {}, false, ctx);
+
+ // NOTE: state machine might complete before we reach here
+ image_ctx.object_map_lock.put_write();
+ image_ctx.snap_lock.put_read();
+ image_ctx.owner_lock.put_read();
+ if (!sent) {
+ finish_op(0);
+ }
+ }
+
+ void handle_pre_update_object_map(int r) {
+ ldout(m_cct, 20) << "r=" << r << dendl;
+
+ if (r < 0) {
+ lderr(m_cct) << "failed to update object map: " << cpp_strerror(r)
+ << dendl;
+ finish_op(r);
+ return;
+ }
+
+ if (m_trying_trim) {
+ send_trim();
+ } else {
+ send_check_exists();
+ }
+ }
+
+ void send_check_exists() {
+ I &image_ctx = this->m_image_ctx;
+
+ ldout(m_cct, 20) << dendl;
+
+ librados::ObjectReadOperation op;
+ op.stat(NULL, NULL, NULL);
+ m_bl.clear();
+ auto comp = create_rados_callback<
+ C_SparsifyObject, &C_SparsifyObject::handle_check_exists>(this);
+ int r = image_ctx.data_ctx.aio_operate(m_oid, comp, &op, &m_bl);
+ ceph_assert(r == 0);
+ comp->release();
+ }
+
+ void handle_check_exists(int r) {
+ ldout(m_cct, 20) << "r=" << r << dendl;
+
+ if (r < 0 && r != -ENOENT) {
+ lderr(m_cct) << "stat failed: " << cpp_strerror(r) << dendl;
+ finish_op(r);
+ return;
+ }
+
+ send_post_update_object_map(r == 0);
+ }
+
+ void send_post_update_object_map(bool exists) {
+ I &image_ctx = this->m_image_ctx;
+
+ ldout(m_cct, 20) << dendl;
+
+ auto ctx = create_context_callback<
+ C_SparsifyObject<I>,
+ &C_SparsifyObject<I>::handle_post_update_object_map>(this);
+ bool sent;
+ {
+ RWLock::RLocker owner_locker(image_ctx.owner_lock);
+ RWLock::RLocker snap_locker(image_ctx.snap_lock);
+
+ assert(image_ctx.exclusive_lock->is_lock_owner());
+ assert(image_ctx.object_map != nullptr);
+
+ RWLock::WLocker object_map_locker(image_ctx.object_map_lock);
+
+ sent = image_ctx.object_map->template aio_update<
+ Context, &Context::complete>(CEPH_NOSNAP, m_object_no,
+ exists ? OBJECT_EXISTS : OBJECT_NONEXISTENT,
+ OBJECT_PENDING, {}, false, ctx);
+ }
+ if (!sent) {
+ ctx->complete(0);
+ }
+ }
+
+ void handle_post_update_object_map(int r) {
+ ldout(m_cct, 20) << "r=" << r << dendl;
+
+ if (r < 0) {
+ lderr(m_cct) << "failed to update object map: " << cpp_strerror(r)
+ << dendl;
+ finish_op(r);
+ return;
+ }
+
+ finish_op(0);
+ }
+
+ void send_read() {
+ I &image_ctx = this->m_image_ctx;
+
+ ldout(m_cct, 20) << dendl;
+
+ librados::ObjectReadOperation op;
+ m_bl.clear();
+ op.sparse_read(0, image_ctx.layout.object_size, &m_extent_map, &m_bl,
+ nullptr);
+ auto comp = create_rados_callback<
+ C_SparsifyObject, &C_SparsifyObject::handle_read>(this);
+ int r = image_ctx.data_ctx.aio_operate(m_oid, comp, &op, &m_bl);
+ ceph_assert(r == 0);
+ comp->release();
+ }
+
+ void handle_read(int r) {
+ ldout(m_cct, 20) << "r=" << r << dendl;
+
+ if (r < 0) {
+ if (r == -ENOENT) {
+ r = 0;
+ } else {
+ lderr(m_cct) << "failed to read object: " << cpp_strerror(r) << dendl;
+ }
+ finish_op(r);
+ return;
+ }
+
+ if (!may_be_trimmed(m_extent_map, m_bl, m_sparse_size, &m_new_end)) {
+ finish_op(0);
+ return;
+ }
+
+ send_pre_update_object_map();
+ }
+
+ void send_trim() {
+ I &image_ctx = this->m_image_ctx;
+
+ ldout(m_cct, 20) << dendl;
+
+ ceph_assert(m_new_end < image_ctx.layout.object_size);
+
+ librados::ObjectWriteOperation op;
+ m_bl.clear();
+ m_bl.append_zero(image_ctx.layout.object_size - m_new_end);
+ op.cmpext(m_new_end, m_bl, nullptr);
+ if (m_new_end == 0 && m_remove_empty) {
+ op.remove();
+ } else {
+ op.truncate(m_new_end);
+ }
+
+ auto comp = create_rados_callback<
+ C_SparsifyObject, &C_SparsifyObject::handle_trim>(this);
+ int r = image_ctx.data_ctx.aio_operate(m_oid, comp, &op);
+ ceph_assert(r == 0);
+ comp->release();
+ }
+
+ void handle_trim(int r) {
+ I &image_ctx = this->m_image_ctx;
+
+ ldout(m_cct, 20) << "r=" << r << dendl;
+
+ if (r <= -MAX_ERRNO) {
+ m_finish_op_ctx->complete(0);
+ m_finish_op_ctx = nullptr;
+ send_read();
+ return;
+ }
+
+ if (r < 0 && r != -ENOENT) {
+ lderr(m_cct) << "failed to trim: " << cpp_strerror(r) << dendl;
+ finish_op(r);
+ return;
+ }
+
+ if (!m_remove_empty || m_new_end != 0 ||
+ !image_ctx.test_features(RBD_FEATURE_OBJECT_MAP)) {
+ finish_op(0);
+ return;
+ }
+
+ send_post_update_object_map(false);
+ }
+
+ void finish_op(int r) {
+ ldout(m_cct, 20) << "r=" << r << dendl;
+
+ if (m_finish_op_ctx != nullptr) {
+ m_finish_op_ctx->complete(0);
+ }
+ this->complete(r);
+ }
+
+private:
+ CephContext *m_cct;
+ uint64_t m_object_no;
+ size_t m_sparse_size;
+ std::string m_oid;
+
+ bool m_remove_empty = false;
+ bool m_trying_trim = false;
+ bufferlist m_bl;
+ std::map<uint64_t,uint64_t> m_extent_map;
+ uint64_t m_new_end = 0;
+ Context *m_finish_op_ctx = nullptr;
+};
+
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::operation::SparsifyRequest: " << this \
+ << " " << __func__ << ": "
+
+template <typename I>
+bool SparsifyRequest<I>::should_complete(int r) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << "r=" << r << dendl;
+ if (r < 0) {
+ lderr(cct) << "encountered error: " << cpp_strerror(r) << dendl;
+ }
+ return true;
+}
+
+template <typename I>
+void SparsifyRequest<I>::send_op() {
+ sparsify_objects();
+}
+
+template <typename I>
+void SparsifyRequest<I>::sparsify_objects() {
+ I &image_ctx = this->m_image_ctx;
+ ceph_assert(image_ctx.owner_lock.is_locked());
+
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << dendl;
+
+ assert(image_ctx.owner_lock.is_locked());
+
+ uint64_t objects = 0;
+ {
+ RWLock::RLocker snap_locker(image_ctx.snap_lock);
+ objects = image_ctx.get_object_count(CEPH_NOSNAP);
+ }
+
+ auto ctx = create_context_callback<
+ SparsifyRequest<I>,
+ &SparsifyRequest<I>::handle_sparsify_objects>(this);
+ typename AsyncObjectThrottle<I>::ContextFactory context_factory(
+ boost::lambda::bind(boost::lambda::new_ptr<C_SparsifyObject<I> >(),
+ boost::lambda::_1, &image_ctx, boost::lambda::_2, m_sparse_size));
+ AsyncObjectThrottle<I> *throttle = new AsyncObjectThrottle<I>(
+ this, image_ctx, context_factory, ctx, &m_prog_ctx, 0, objects);
+ throttle->start_ops(
+ image_ctx.config.template get_val<uint64_t>("rbd_concurrent_management_ops"));
+}
+
+template <typename I>
+void SparsifyRequest<I>::handle_sparsify_objects(int r) {
+ I &image_ctx = this->m_image_ctx;
+ CephContext *cct = image_ctx.cct;
+ ldout(cct, 5) << "r=" << r << dendl;
+
+ if (r == -ERESTART) {
+ ldout(cct, 5) << "sparsify operation interrupted" << dendl;
+ this->complete(r);
+ return;
+ } else if (r < 0) {
+ lderr(cct) << "sparsify encountered an error: " << cpp_strerror(r) << dendl;
+ this->complete(r);
+ return;
+ }
+
+ this->complete(0);
+}
+
+} // namespace operation
+} // namespace librbd
+
+template class librbd::operation::SparsifyRequest<librbd::ImageCtx>;