diff options
Diffstat (limited to 'src/librbd/object_map')
27 files changed, 2794 insertions, 0 deletions
diff --git a/src/librbd/object_map/CreateRequest.cc b/src/librbd/object_map/CreateRequest.cc new file mode 100644 index 000000000..d26f929fa --- /dev/null +++ b/src/librbd/object_map/CreateRequest.cc @@ -0,0 +1,94 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/object_map/CreateRequest.h" +#include "include/ceph_assert.h" +#include "common/dout.h" +#include "common/errno.h" +#include "cls/rbd/cls_rbd_client.h" +#include "osdc/Striper.h" +#include "librbd/ImageCtx.h" +#include "librbd/ObjectMap.h" +#include "librbd/Utils.h" + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::object_map::CreateRequest: " + +namespace librbd { +namespace object_map { + +using util::create_context_callback; +using util::create_rados_callback; + +template <typename I> +CreateRequest<I>::CreateRequest(I *image_ctx, Context *on_finish) + : m_image_ctx(image_ctx), m_on_finish(on_finish) { +} + +template <typename I> +void CreateRequest<I>::send() { + CephContext *cct = m_image_ctx->cct; + + uint64_t max_size = m_image_ctx->size; + + { + std::unique_lock image_locker{m_image_ctx->image_lock}; + m_snap_ids.push_back(CEPH_NOSNAP); + for (auto it : m_image_ctx->snap_info) { + max_size = std::max(max_size, it.second.size); + m_snap_ids.push_back(it.first); + } + + if (ObjectMap<>::is_compatible(m_image_ctx->layout, max_size)) { + send_object_map_resize(); + return; + } + } + + lderr(cct) << "image size not compatible with object map" << dendl; + m_on_finish->complete(-EINVAL); +} + +template <typename I> +void CreateRequest<I>::send_object_map_resize() { + CephContext *cct = m_image_ctx->cct; + ldout(cct, 20) << __func__ << dendl; + + Context *ctx = create_context_callback< + CreateRequest<I>, &CreateRequest<I>::handle_object_map_resize>(this); + C_Gather *gather_ctx = new C_Gather(cct, ctx); + + for (auto snap_id : m_snap_ids) { + librados::ObjectWriteOperation op; + uint64_t snap_size = m_image_ctx->get_image_size(snap_id); + + cls_client::object_map_resize(&op, Striper::get_num_objects( + m_image_ctx->layout, snap_size), + OBJECT_NONEXISTENT); + + std::string oid(ObjectMap<>::object_map_name(m_image_ctx->id, snap_id)); + librados::AioCompletion *comp = create_rados_callback(gather_ctx->new_sub()); + int r = m_image_ctx->md_ctx.aio_operate(oid, comp, &op); + ceph_assert(r == 0); + comp->release(); + } + gather_ctx->activate(); +} + +template <typename I> +Context *CreateRequest<I>::handle_object_map_resize(int *result) { + CephContext *cct = m_image_ctx->cct; + ldout(cct, 20) << __func__ << ": r=" << *result << dendl; + + if (*result < 0) { + lderr(cct) << "object map resize failed: " << cpp_strerror(*result) + << dendl; + } + return m_on_finish; +} + +} // namespace object_map +} // namespace librbd + +template class librbd::object_map::CreateRequest<librbd::ImageCtx>; diff --git a/src/librbd/object_map/CreateRequest.h b/src/librbd/object_map/CreateRequest.h new file mode 100644 index 000000000..33984cda1 --- /dev/null +++ b/src/librbd/object_map/CreateRequest.h @@ -0,0 +1,59 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_OBJECT_MAP_CREATE_REQUEST_H +#define CEPH_LIBRBD_OBJECT_MAP_CREATE_REQUEST_H + +#include "include/buffer.h" +#include <map> +#include <string> + +class Context; + +namespace librbd { + +class ImageCtx; + +namespace object_map { + +template <typename ImageCtxT = ImageCtx> +class CreateRequest { +public: + static CreateRequest *create(ImageCtxT *image_ctx, Context *on_finish) { + return new CreateRequest(image_ctx, on_finish); + } + + void send(); + +private: + /** + * @verbatim + * + * <start> + * | . . . + * v v . + * OBJECT_MAP_RESIZE . (for every snapshot) + * | . . + * v . . . + * <finis> + * + * @endverbatim + */ + + CreateRequest(ImageCtxT *image_ctx, Context *on_finish); + + ImageCtxT *m_image_ctx; + Context *m_on_finish; + + std::vector<uint64_t> m_snap_ids; + + void send_object_map_resize(); + Context *handle_object_map_resize(int *result); +}; + +} // namespace object_map +} // namespace librbd + +extern template class librbd::object_map::CreateRequest<librbd::ImageCtx>; + +#endif // CEPH_LIBRBD_OBJECT_MAP_CREATE_REQUEST_H diff --git a/src/librbd/object_map/DiffRequest.cc b/src/librbd/object_map/DiffRequest.cc new file mode 100644 index 000000000..566e98ac0 --- /dev/null +++ b/src/librbd/object_map/DiffRequest.cc @@ -0,0 +1,258 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/object_map/DiffRequest.h" +#include "common/debug.h" +#include "common/errno.h" +#include "librbd/ImageCtx.h" +#include "librbd/ObjectMap.h" +#include "librbd/Utils.h" +#include "osdc/Striper.h" +#include <string> + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::object_map::DiffRequest: " \ + << this << " " << __func__ << ": " + +namespace librbd { +namespace object_map { + +using util::create_rados_callback; + +template <typename I> +void DiffRequest<I>::send() { + auto cct = m_image_ctx->cct; + + if (m_snap_id_start == CEPH_NOSNAP || m_snap_id_start > m_snap_id_end) { + lderr(cct) << "invalid start/end snap ids: " + << "snap_id_start=" << m_snap_id_start << ", " + << "snap_id_end=" << m_snap_id_end << dendl; + finish(-EINVAL); + return; + } else if (m_snap_id_start == m_snap_id_end) { + // no delta between the same snapshot + finish(0); + return; + } + + m_object_diff_state->clear(); + + // collect all the snap ids in the provided range (inclusive) + if (m_snap_id_start != 0) { + m_snap_ids.insert(m_snap_id_start); + } + + std::shared_lock image_locker{m_image_ctx->image_lock}; + auto snap_info_it = m_image_ctx->snap_info.upper_bound(m_snap_id_start); + auto snap_info_it_end = m_image_ctx->snap_info.lower_bound(m_snap_id_end); + for (; snap_info_it != snap_info_it_end; ++snap_info_it) { + m_snap_ids.insert(snap_info_it->first); + } + m_snap_ids.insert(m_snap_id_end); + + load_object_map(&image_locker); +} + +template <typename I> +void DiffRequest<I>::load_object_map( + std::shared_lock<ceph::shared_mutex>* image_locker) { + ceph_assert(ceph_mutex_is_locked(m_image_ctx->image_lock)); + + if (m_snap_ids.empty()) { + image_locker->unlock(); + + finish(0); + return; + } + + m_current_snap_id = *m_snap_ids.begin(); + m_snap_ids.erase(m_current_snap_id); + + auto cct = m_image_ctx->cct; + ldout(cct, 10) << "snap_id=" << m_current_snap_id << dendl; + + if ((m_image_ctx->features & RBD_FEATURE_FAST_DIFF) == 0) { + image_locker->unlock(); + + ldout(cct, 10) << "fast-diff feature not enabled" << dendl; + finish(-EINVAL); + return; + } + + // ignore ENOENT with intermediate snapshots since deleted + // snaps will get merged with later snapshots + m_ignore_enoent = (m_current_snap_id != m_snap_id_start && + m_current_snap_id != m_snap_id_end); + + if (m_current_snap_id == CEPH_NOSNAP) { + m_current_size = m_image_ctx->size; + } else { + auto snap_it = m_image_ctx->snap_info.find(m_current_snap_id); + if (snap_it == m_image_ctx->snap_info.end()) { + ldout(cct, 10) << "snapshot " << m_current_snap_id << " does not exist" + << dendl; + if (!m_ignore_enoent) { + image_locker->unlock(); + + finish(-ENOENT); + return; + } + + load_object_map(image_locker); + return; + } + + m_current_size = snap_it->second.size; + } + + uint64_t flags = 0; + int r = m_image_ctx->get_flags(m_current_snap_id, &flags); + if (r < 0) { + image_locker->unlock(); + + lderr(cct) << "failed to retrieve image flags: " << cpp_strerror(r) + << dendl; + finish(r); + return; + } + image_locker->unlock(); + + if ((flags & RBD_FLAG_FAST_DIFF_INVALID) != 0) { + ldout(cct, 1) << "cannot perform fast diff on invalid object map" + << dendl; + finish(-EINVAL); + return; + } + + std::string oid(ObjectMap<>::object_map_name(m_image_ctx->id, + m_current_snap_id)); + + librados::ObjectReadOperation op; + cls_client::object_map_load_start(&op); + + m_out_bl.clear(); + auto aio_comp = create_rados_callback< + DiffRequest<I>, &DiffRequest<I>::handle_load_object_map>(this); + r = m_image_ctx->md_ctx.aio_operate(oid, aio_comp, &op, &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void DiffRequest<I>::handle_load_object_map(int r) { + auto cct = m_image_ctx->cct; + ldout(cct, 10) << "r=" << r << dendl; + + if (r == 0) { + auto bl_it = m_out_bl.cbegin(); + r = cls_client::object_map_load_finish(&bl_it, &m_object_map); + } + + std::string oid(ObjectMap<>::object_map_name(m_image_ctx->id, + m_current_snap_id)); + if (r == -ENOENT && m_ignore_enoent) { + ldout(cct, 10) << "object map " << oid << " does not exist" << dendl; + + std::shared_lock image_locker{m_image_ctx->image_lock}; + load_object_map(&image_locker); + return; + } else if (r < 0) { + lderr(cct) << "failed to load object map: " << oid << dendl; + finish(r); + return; + } + ldout(cct, 20) << "loaded object map " << oid << dendl; + + uint64_t num_objs = Striper::get_num_objects(m_image_ctx->layout, + m_current_size); + if (m_object_map.size() < num_objs) { + ldout(cct, 1) << "object map too small: " + << m_object_map.size() << " < " << num_objs << dendl; + finish(-EINVAL); + return; + } else { + m_object_map.resize(num_objs); + } + + size_t prev_object_diff_state_size = m_object_diff_state->size(); + if (prev_object_diff_state_size < num_objs) { + // the diff state should be the largest of all snapshots in the set + m_object_diff_state->resize(num_objs); + } + if (m_object_map.size() < m_object_diff_state->size()) { + // the image was shrunk so expanding the object map will flag end objects + // as non-existent and they will be compared against the previous object + // diff state + m_object_map.resize(m_object_diff_state->size()); + } + + uint64_t overlap = std::min(m_object_map.size(), prev_object_diff_state_size); + auto it = m_object_map.begin(); + auto overlap_end_it = it + overlap; + auto diff_it = m_object_diff_state->begin(); + uint64_t i = 0; + for (; it != overlap_end_it; ++it, ++diff_it, ++i) { + uint8_t object_map_state = *it; + uint8_t prev_object_diff_state = *diff_it; + if (object_map_state == OBJECT_EXISTS || + object_map_state == OBJECT_PENDING || + (object_map_state == OBJECT_EXISTS_CLEAN && + prev_object_diff_state != DIFF_STATE_DATA && + prev_object_diff_state != DIFF_STATE_DATA_UPDATED)) { + *diff_it = DIFF_STATE_DATA_UPDATED; + } else if (object_map_state == OBJECT_NONEXISTENT && + prev_object_diff_state != DIFF_STATE_HOLE && + prev_object_diff_state != DIFF_STATE_HOLE_UPDATED) { + *diff_it = DIFF_STATE_HOLE_UPDATED; + } + + ldout(cct, 20) << "object state: " << i << " " + << static_cast<uint32_t>(prev_object_diff_state) + << "->" << static_cast<uint32_t>(*diff_it) << " (" + << static_cast<uint32_t>(object_map_state) << ")" + << dendl; + } + ldout(cct, 20) << "computed overlap diffs" << dendl; + + bool diff_from_start = (m_snap_id_start == 0); + auto end_it = m_object_map.end(); + if (m_object_map.size() > prev_object_diff_state_size) { + for (; it != end_it; ++it,++diff_it, ++i) { + uint8_t object_map_state = *it; + if (object_map_state == OBJECT_NONEXISTENT) { + *diff_it = DIFF_STATE_HOLE; + } else if (diff_from_start || + (m_object_diff_state_valid && + object_map_state != OBJECT_EXISTS_CLEAN)) { + *diff_it = DIFF_STATE_DATA_UPDATED; + } else { + *diff_it = DIFF_STATE_DATA; + } + + ldout(cct, 20) << "object state: " << i << " " + << "->" << static_cast<uint32_t>(*diff_it) << " (" + << static_cast<uint32_t>(*it) << ")" << dendl; + } + } + ldout(cct, 20) << "computed resize diffs" << dendl; + + m_object_diff_state_valid = true; + + std::shared_lock image_locker{m_image_ctx->image_lock}; + load_object_map(&image_locker); +} + +template <typename I> +void DiffRequest<I>::finish(int r) { + auto cct = m_image_ctx->cct; + ldout(cct, 10) << "r=" << r << dendl; + + m_on_finish->complete(r); + delete this; +} + +} // namespace object_map +} // namespace librbd + +template class librbd::object_map::DiffRequest<librbd::ImageCtx>; diff --git a/src/librbd/object_map/DiffRequest.h b/src/librbd/object_map/DiffRequest.h new file mode 100644 index 000000000..e83a1629e --- /dev/null +++ b/src/librbd/object_map/DiffRequest.h @@ -0,0 +1,87 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_OBJECT_MAP_DIFF_REQUEST_H +#define CEPH_LIBRBD_OBJECT_MAP_DIFF_REQUEST_H + +#include "include/int_types.h" +#include "common/bit_vector.hpp" +#include "common/ceph_mutex.h" +#include "librbd/object_map/Types.h" +#include <set> + +struct Context; + +namespace librbd { + +struct ImageCtx; + +namespace object_map { + +template <typename ImageCtxT> +class DiffRequest { +public: + static DiffRequest* create(ImageCtxT* image_ctx, uint64_t snap_id_start, + uint64_t snap_id_end, + BitVector<2>* object_diff_state, + Context* on_finish) { + return new DiffRequest(image_ctx, snap_id_start, snap_id_end, + object_diff_state, on_finish); + } + + DiffRequest(ImageCtxT* image_ctx, uint64_t snap_id_start, + uint64_t snap_id_end, BitVector<2>* object_diff_state, + Context* on_finish) + : m_image_ctx(image_ctx), m_snap_id_start(snap_id_start), + m_snap_id_end(snap_id_end), m_object_diff_state(object_diff_state), + m_on_finish(on_finish) { + } + + void send(); + +private: + /** + * @verbatim + * + * <start> + * | + * | /---------\ + * | | | + * v v | + * LOAD_OBJECT_MAP ---/ + * | + * v + * <finish> + * + * @endverbatim + */ + ImageCtxT* m_image_ctx; + uint64_t m_snap_id_start; + uint64_t m_snap_id_end; + BitVector<2>* m_object_diff_state; + Context* m_on_finish; + + std::set<uint64_t> m_snap_ids; + uint64_t m_current_snap_id = 0; + bool m_ignore_enoent = false; + + uint64_t m_current_size = 0; + + BitVector<2> m_object_map; + bool m_object_diff_state_valid = false; + + bufferlist m_out_bl; + + void load_object_map(std::shared_lock<ceph::shared_mutex>* image_locker); + void handle_load_object_map(int r); + + void finish(int r); + +}; + +} // namespace object_map +} // namespace librbd + +extern template class librbd::object_map::DiffRequest<librbd::ImageCtx>; + +#endif // CEPH_LIBRBD_OBJECT_MAP_DIFF_REQUEST_H diff --git a/src/librbd/object_map/InvalidateRequest.cc b/src/librbd/object_map/InvalidateRequest.cc new file mode 100644 index 000000000..bf2db9660 --- /dev/null +++ b/src/librbd/object_map/InvalidateRequest.cc @@ -0,0 +1,83 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/object_map/InvalidateRequest.h" +#include "common/dout.h" +#include "librbd/ExclusiveLock.h" +#include "librbd/ImageCtx.h" + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::object_map::InvalidateRequest: " + +namespace librbd { +namespace object_map { + +template <typename I> +InvalidateRequest<I>* InvalidateRequest<I>::create(I &image_ctx, + uint64_t snap_id, bool force, + Context *on_finish) { + return new InvalidateRequest<I>(image_ctx, snap_id, force, on_finish); +} + +template <typename I> +void InvalidateRequest<I>::send() { + I &image_ctx = this->m_image_ctx; + ceph_assert(ceph_mutex_is_locked(image_ctx.owner_lock)); + ceph_assert(ceph_mutex_is_wlocked(image_ctx.image_lock)); + + uint64_t snap_flags; + int r = image_ctx.get_flags(m_snap_id, &snap_flags); + if (r < 0 || ((snap_flags & RBD_FLAG_OBJECT_MAP_INVALID) != 0)) { + this->async_complete(r); + return; + } + + CephContext *cct = image_ctx.cct; + lderr(cct) << this << " invalidating object map in-memory" << dendl; + + // update in-memory flags + uint64_t flags = RBD_FLAG_OBJECT_MAP_INVALID; + if ((image_ctx.features & RBD_FEATURE_FAST_DIFF) != 0) { + flags |= RBD_FLAG_FAST_DIFF_INVALID; + } + + r = image_ctx.update_flags(m_snap_id, flags, true); + if (r < 0) { + this->async_complete(r); + return; + } + + // do not update on-disk flags if not image owner + if (image_ctx.image_watcher == nullptr || + (!m_force && m_snap_id == CEPH_NOSNAP && + image_ctx.exclusive_lock != nullptr && + !image_ctx.exclusive_lock->is_lock_owner())) { + this->async_complete(-EROFS); + return; + } + + lderr(cct) << this << " invalidating object map on-disk" << dendl; + librados::ObjectWriteOperation op; + cls_client::set_flags(&op, m_snap_id, flags, flags); + + librados::AioCompletion *rados_completion = + this->create_callback_completion(); + r = image_ctx.md_ctx.aio_operate(image_ctx.header_oid, rados_completion, + &op); + ceph_assert(r == 0); + rados_completion->release(); +} + +template <typename I> +bool InvalidateRequest<I>::should_complete(int r) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + lderr(cct) << this << " " << __func__ << ": r=" << r << dendl; + return true; +} + +} // namespace object_map +} // namespace librbd + +template class librbd::object_map::InvalidateRequest<librbd::ImageCtx>; diff --git a/src/librbd/object_map/InvalidateRequest.h b/src/librbd/object_map/InvalidateRequest.h new file mode 100644 index 000000000..ce15bb2d3 --- /dev/null +++ b/src/librbd/object_map/InvalidateRequest.h @@ -0,0 +1,45 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_OBJECT_MAP_INVALIDATE_REQUEST_H +#define CEPH_LIBRBD_OBJECT_MAP_INVALIDATE_REQUEST_H + +#include "include/int_types.h" +#include "librbd/AsyncRequest.h" + +class Context; + +namespace librbd { + +class ImageCtx; + +namespace object_map { + +template <typename ImageCtxT = ImageCtx> +class InvalidateRequest : public AsyncRequest<ImageCtxT> { +public: + static InvalidateRequest* create(ImageCtxT &image_ctx, uint64_t snap_id, + bool force, Context *on_finish); + + InvalidateRequest(ImageCtxT &image_ctx, uint64_t snap_id, bool force, + Context *on_finish) + : AsyncRequest<ImageCtxT>(image_ctx, on_finish), + m_snap_id(snap_id), m_force(force) { + } + + void send() override; + +protected: + bool should_complete(int r) override; + +private: + uint64_t m_snap_id; + bool m_force; +}; + +} // namespace object_map +} // namespace librbd + +extern template class librbd::object_map::InvalidateRequest<librbd::ImageCtx>; + +#endif // CEPH_LIBRBD_OBJECT_MAP_INVALIDATE_REQUEST_H diff --git a/src/librbd/object_map/LockRequest.cc b/src/librbd/object_map/LockRequest.cc new file mode 100644 index 000000000..b9dc3c42e --- /dev/null +++ b/src/librbd/object_map/LockRequest.cc @@ -0,0 +1,157 @@ +// -*- mode:C; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/object_map/LockRequest.h" +#include "cls/lock/cls_lock_client.h" +#include "common/dout.h" +#include "common/errno.h" +#include "librbd/ImageCtx.h" +#include "librbd/ObjectMap.h" +#include "librbd/Utils.h" + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::object_map::LockRequest: " + +namespace librbd { +namespace object_map { + +using util::create_rados_callback; + +template <typename I> +LockRequest<I>::LockRequest(I &image_ctx, Context *on_finish) + : m_image_ctx(image_ctx), m_on_finish(on_finish), m_broke_lock(false) { +} + +template <typename I> +void LockRequest<I>::send() { + send_lock(); +} + +template <typename I> +void LockRequest<I>::send_lock() { + CephContext *cct = m_image_ctx.cct; + std::string oid(ObjectMap<>::object_map_name(m_image_ctx.id, CEPH_NOSNAP)); + ldout(cct, 10) << this << " " << __func__ << ": oid=" << oid << dendl; + + librados::ObjectWriteOperation op; + rados::cls::lock::lock(&op, RBD_LOCK_NAME, ClsLockType::EXCLUSIVE, "", "", "", + utime_t(), 0); + + using klass = LockRequest<I>; + librados::AioCompletion *rados_completion = + create_rados_callback<klass, &klass::handle_lock>(this); + int r = m_image_ctx.md_ctx.aio_operate(oid, rados_completion, &op); + ceph_assert(r == 0); + rados_completion->release(); +} + +template <typename I> +Context *LockRequest<I>::handle_lock(int *ret_val) { + CephContext *cct = m_image_ctx.cct; + ldout(cct, 10) << this << " " << __func__ << ": r=" << *ret_val << dendl; + + if (*ret_val == 0) { + return m_on_finish; + } else if (*ret_val == -EEXIST) { + // already locked by myself + *ret_val = 0; + return m_on_finish; + } else if (m_broke_lock || *ret_val != -EBUSY) { + lderr(cct) << "failed to lock object map: " << cpp_strerror(*ret_val) + << dendl; + *ret_val = 0; + return m_on_finish; + } + + send_get_lock_info(); + return nullptr; +} + +template <typename I> +void LockRequest<I>::send_get_lock_info() { + CephContext *cct = m_image_ctx.cct; + std::string oid(ObjectMap<>::object_map_name(m_image_ctx.id, CEPH_NOSNAP)); + ldout(cct, 10) << this << " " << __func__ << ": oid=" << oid << dendl; + + librados::ObjectReadOperation op; + rados::cls::lock::get_lock_info_start(&op, RBD_LOCK_NAME); + + using klass = LockRequest<I>; + librados::AioCompletion *rados_completion = + create_rados_callback<klass, &klass::handle_get_lock_info>(this); + int r = m_image_ctx.md_ctx.aio_operate(oid, rados_completion, &op, &m_out_bl); + ceph_assert(r == 0); + rados_completion->release(); +} + +template <typename I> +Context *LockRequest<I>::handle_get_lock_info(int *ret_val) { + CephContext *cct = m_image_ctx.cct; + ldout(cct, 10) << this << " " << __func__ << ": r=" << *ret_val << dendl; + + if (*ret_val == -ENOENT) { + send_lock(); + return nullptr; + } + + ClsLockType lock_type; + std::string lock_tag; + if (*ret_val == 0) { + auto it = m_out_bl.cbegin(); + *ret_val = rados::cls::lock::get_lock_info_finish(&it, &m_lockers, + &lock_type, &lock_tag); + } + if (*ret_val < 0) { + lderr(cct) << "failed to list object map locks: " << cpp_strerror(*ret_val) + << dendl; + *ret_val = 0; + return m_on_finish; + } + + send_break_locks(); + return nullptr; +} + +template <typename I> +void LockRequest<I>::send_break_locks() { + CephContext *cct = m_image_ctx.cct; + std::string oid(ObjectMap<>::object_map_name(m_image_ctx.id, CEPH_NOSNAP)); + ldout(cct, 10) << this << " " << __func__ << ": oid=" << oid << ", " + << "num_lockers=" << m_lockers.size() << dendl; + + librados::ObjectWriteOperation op; + for (auto &locker : m_lockers) { + rados::cls::lock::break_lock(&op, RBD_LOCK_NAME, locker.first.cookie, + locker.first.locker); + } + + using klass = LockRequest<I>; + librados::AioCompletion *rados_completion = + create_rados_callback<klass, &klass::handle_break_locks>(this); + int r = m_image_ctx.md_ctx.aio_operate(oid, rados_completion, &op); + ceph_assert(r == 0); + rados_completion->release(); +} + +template <typename I> +Context *LockRequest<I>::handle_break_locks(int *ret_val) { + CephContext *cct = m_image_ctx.cct; + ldout(cct, 10) << this << " " << __func__ << ": r=" << *ret_val << dendl; + + m_broke_lock = true; + if (*ret_val == 0 || *ret_val == -ENOENT) { + send_lock(); + return nullptr; + } + + lderr(cct) << "failed to break object map lock: " << cpp_strerror(*ret_val) + << dendl; + *ret_val = 0; + return m_on_finish; +} + +} // namespace object_map +} // namespace librbd + +template class librbd::object_map::LockRequest<librbd::ImageCtx>; diff --git a/src/librbd/object_map/LockRequest.h b/src/librbd/object_map/LockRequest.h new file mode 100644 index 000000000..0333548e6 --- /dev/null +++ b/src/librbd/object_map/LockRequest.h @@ -0,0 +1,75 @@ +// -*- mode:C; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_OBJECT_MAP_LOCK_REQUEST_H +#define CEPH_LIBRBD_OBJECT_MAP_LOCK_REQUEST_H + +#include "include/buffer.h" +#include "cls/lock/cls_lock_types.h" +#include <map> + +class Context; + +namespace librbd { + +class ImageCtx; + +namespace object_map { + +template <typename ImageCtxT = ImageCtx> +class LockRequest { +public: + static LockRequest* create(ImageCtxT &image_ctx, Context *on_finish) { + return new LockRequest(image_ctx, on_finish); + } + LockRequest(ImageCtxT &image_ctx, Context *on_finish); + + void send(); + +private: + /** + * @verbatim + * + * <start> /------------------------------------- BREAK_LOCKS * * * + * | | ^ * + * | | | * + * | | | * + * | v (EBUSY && !broke_lock) | * + * \---------> LOCK_OBJECT_MAP * * * * * * * * * * * > GET_LOCK_INFO * * + * | * ^ * * + * | * * * * + * | * * (ENOENT) * * + * | * * * * * * * * * * * * * * * * * * + * | * * + * | * (other errors) * + * | * * + * v v (other errors) * + * <finish> < * * * * * * * * * * * * * * * * * * * * * * * * + * + * @endverbatim + */ + + ImageCtxT &m_image_ctx; + Context *m_on_finish; + + bool m_broke_lock; + std::map<rados::cls::lock::locker_id_t, + rados::cls::lock::locker_info_t> m_lockers; + bufferlist m_out_bl; + + void send_lock(); + Context *handle_lock(int *ret_val); + + void send_get_lock_info(); + Context *handle_get_lock_info(int *ret_val); + + void send_break_locks(); + Context *handle_break_locks(int *ret_val); +}; + +} // namespace object_map +} // namespace librbd + +extern template class librbd::object_map::LockRequest<librbd::ImageCtx>; + +#endif // CEPH_LIBRBD_OBJECT_MAP_LOCK_REQUEST_H diff --git a/src/librbd/object_map/RefreshRequest.cc b/src/librbd/object_map/RefreshRequest.cc new file mode 100644 index 000000000..0f6b81923 --- /dev/null +++ b/src/librbd/object_map/RefreshRequest.cc @@ -0,0 +1,311 @@ +// -*- mode:C; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/object_map/RefreshRequest.h" +#include "cls/lock/cls_lock_client.h" +#include "common/dout.h" +#include "common/errno.h" +#include "librbd/ImageCtx.h" +#include "librbd/ObjectMap.h" +#include "librbd/object_map/InvalidateRequest.h" +#include "librbd/object_map/LockRequest.h" +#include "librbd/object_map/ResizeRequest.h" +#include "librbd/Utils.h" +#include "osdc/Striper.h" + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::object_map::RefreshRequest: " + +namespace librbd { + +using util::create_context_callback; +using util::create_rados_callback; + +namespace object_map { + +template <typename I> +RefreshRequest<I>::RefreshRequest(I &image_ctx, ceph::shared_mutex* object_map_lock, + ceph::BitVector<2> *object_map, + uint64_t snap_id, Context *on_finish) + : m_image_ctx(image_ctx), m_object_map_lock(object_map_lock), + m_object_map(object_map), m_snap_id(snap_id), m_on_finish(on_finish), + m_object_count(0), m_truncate_on_disk_object_map(false) { +} + +template <typename I> +void RefreshRequest<I>::send() { + { + std::shared_lock image_locker{m_image_ctx.image_lock}; + m_object_count = Striper::get_num_objects( + m_image_ctx.layout, m_image_ctx.get_image_size(m_snap_id)); + } + + + CephContext *cct = m_image_ctx.cct; + ldout(cct, 20) << this << " " << __func__ << ": " + << "object_count=" << m_object_count << dendl; + send_lock(); +} + +template <typename I> +void RefreshRequest<I>::apply() { + uint64_t num_objs; + { + std::shared_lock image_locker{m_image_ctx.image_lock}; + num_objs = Striper::get_num_objects( + m_image_ctx.layout, m_image_ctx.get_image_size(m_snap_id)); + } + ceph_assert(m_on_disk_object_map.size() >= num_objs); + + std::unique_lock object_map_locker{*m_object_map_lock}; + *m_object_map = m_on_disk_object_map; +} + +template <typename I> +void RefreshRequest<I>::send_lock() { + CephContext *cct = m_image_ctx.cct; + if (m_object_count > cls::rbd::MAX_OBJECT_MAP_OBJECT_COUNT) { + send_invalidate_and_close(); + return; + } else if (m_snap_id != CEPH_NOSNAP) { + send_load(); + return; + } + + std::string oid(ObjectMap<>::object_map_name(m_image_ctx.id, m_snap_id)); + ldout(cct, 10) << this << " " << __func__ << ": oid=" << oid << dendl; + + using klass = RefreshRequest<I>; + Context *ctx = create_context_callback< + klass, &klass::handle_lock>(this); + + LockRequest<I> *req = LockRequest<I>::create(m_image_ctx, ctx); + req->send(); +} + +template <typename I> +Context *RefreshRequest<I>::handle_lock(int *ret_val) { + CephContext *cct = m_image_ctx.cct; + ldout(cct, 10) << this << " " << __func__ << dendl; + + ceph_assert(*ret_val == 0); + send_load(); + return nullptr; +} + +template <typename I> +void RefreshRequest<I>::send_load() { + CephContext *cct = m_image_ctx.cct; + std::string oid(ObjectMap<>::object_map_name(m_image_ctx.id, m_snap_id)); + ldout(cct, 10) << this << " " << __func__ << ": oid=" << oid << dendl; + + librados::ObjectReadOperation op; + cls_client::object_map_load_start(&op); + + using klass = RefreshRequest<I>; + m_out_bl.clear(); + librados::AioCompletion *rados_completion = + create_rados_callback<klass, &klass::handle_load>(this); + int r = m_image_ctx.md_ctx.aio_operate(oid, rados_completion, &op, &m_out_bl); + ceph_assert(r == 0); + rados_completion->release(); +} + +template <typename I> +Context *RefreshRequest<I>::handle_load(int *ret_val) { + CephContext *cct = m_image_ctx.cct; + ldout(cct, 10) << this << " " << __func__ << ": r=" << *ret_val << dendl; + + if (*ret_val == 0) { + auto bl_it = m_out_bl.cbegin(); + *ret_val = cls_client::object_map_load_finish(&bl_it, + &m_on_disk_object_map); + } + + std::string oid(ObjectMap<>::object_map_name(m_image_ctx.id, m_snap_id)); + if (*ret_val == -EINVAL) { + // object map is corrupt on-disk -- clear it and properly size it + // so future IO can keep the object map in sync + lderr(cct) << "object map corrupt on-disk: " << oid << dendl; + m_truncate_on_disk_object_map = true; + send_resize_invalidate(); + return nullptr; + } else if (*ret_val < 0) { + lderr(cct) << "failed to load object map: " << oid << dendl; + if (*ret_val == -ETIMEDOUT && + !cct->_conf.get_val<bool>("rbd_invalidate_object_map_on_timeout")) { + return m_on_finish; + } + + send_invalidate(); + return nullptr; + } + + if (m_on_disk_object_map.size() < m_object_count) { + lderr(cct) << "object map smaller than current object count: " + << m_on_disk_object_map.size() << " != " + << m_object_count << dendl; + send_resize_invalidate(); + return nullptr; + } + + ldout(cct, 20) << "refreshed object map: num_objs=" + << m_on_disk_object_map.size() << dendl; + if (m_on_disk_object_map.size() > m_object_count) { + // resize op might have been interrupted + ldout(cct, 1) << "object map larger than current object count: " + << m_on_disk_object_map.size() << " != " + << m_object_count << dendl; + } + + apply(); + return m_on_finish; +} + +template <typename I> +void RefreshRequest<I>::send_invalidate() { + CephContext *cct = m_image_ctx.cct; + ldout(cct, 10) << this << " " << __func__ << dendl; + + m_on_disk_object_map.clear(); + object_map::ResizeRequest::resize(&m_on_disk_object_map, m_object_count, + OBJECT_EXISTS); + + using klass = RefreshRequest<I>; + Context *ctx = create_context_callback< + klass, &klass::handle_invalidate>(this); + InvalidateRequest<I> *req = InvalidateRequest<I>::create( + m_image_ctx, m_snap_id, true, ctx); + + std::shared_lock owner_locker{m_image_ctx.owner_lock}; + std::unique_lock image_locker{m_image_ctx.image_lock}; + req->send(); +} + +template <typename I> +Context *RefreshRequest<I>::handle_invalidate(int *ret_val) { + CephContext *cct = m_image_ctx.cct; + ldout(cct, 10) << this << " " << __func__ << ": r=" << *ret_val << dendl; + + if (*ret_val < 0) { + lderr(cct) << "failed to invalidate object map: " << cpp_strerror(*ret_val) + << dendl; + } + + apply(); + return m_on_finish; +} + +template <typename I> +void RefreshRequest<I>::send_resize_invalidate() { + CephContext *cct = m_image_ctx.cct; + ldout(cct, 10) << this << " " << __func__ << dendl; + + m_on_disk_object_map.clear(); + object_map::ResizeRequest::resize(&m_on_disk_object_map, m_object_count, + OBJECT_EXISTS); + + using klass = RefreshRequest<I>; + Context *ctx = create_context_callback< + klass, &klass::handle_resize_invalidate>(this); + InvalidateRequest<I> *req = InvalidateRequest<I>::create( + m_image_ctx, m_snap_id, true, ctx); + + std::shared_lock owner_locker{m_image_ctx.owner_lock}; + std::unique_lock image_locker{m_image_ctx.image_lock}; + req->send(); +} + +template <typename I> +Context *RefreshRequest<I>::handle_resize_invalidate(int *ret_val) { + CephContext *cct = m_image_ctx.cct; + ldout(cct, 10) << this << " " << __func__ << ": r=" << *ret_val << dendl; + + if (*ret_val < 0) { + lderr(cct) << "failed to invalidate object map: " << cpp_strerror(*ret_val) + << dendl; + apply(); + return m_on_finish; + } + + send_resize(); + return nullptr; +} + +template <typename I> +void RefreshRequest<I>::send_resize() { + CephContext *cct = m_image_ctx.cct; + std::string oid(ObjectMap<>::object_map_name(m_image_ctx.id, m_snap_id)); + ldout(cct, 10) << this << " " << __func__ << ": oid=" << oid << dendl; + + librados::ObjectWriteOperation op; + if (m_snap_id == CEPH_NOSNAP) { + rados::cls::lock::assert_locked(&op, RBD_LOCK_NAME, ClsLockType::EXCLUSIVE, "", ""); + } + if (m_truncate_on_disk_object_map) { + op.truncate(0); + } + cls_client::object_map_resize(&op, m_object_count, OBJECT_NONEXISTENT); + + using klass = RefreshRequest<I>; + librados::AioCompletion *rados_completion = + create_rados_callback<klass, &klass::handle_resize>(this); + int r = m_image_ctx.md_ctx.aio_operate(oid, rados_completion, &op); + ceph_assert(r == 0); + rados_completion->release(); +} + +template <typename I> +Context *RefreshRequest<I>::handle_resize(int *ret_val) { + CephContext *cct = m_image_ctx.cct; + ldout(cct, 10) << this << " " << __func__ << ": r=" << *ret_val << dendl; + + if (*ret_val < 0) { + lderr(cct) << "failed to adjust object map size: " << cpp_strerror(*ret_val) + << dendl; + *ret_val = 0; + } + + apply(); + return m_on_finish; +} + +template <typename I> +void RefreshRequest<I>::send_invalidate_and_close() { + CephContext *cct = m_image_ctx.cct; + ldout(cct, 10) << this << " " << __func__ << dendl; + + using klass = RefreshRequest<I>; + Context *ctx = create_context_callback< + klass, &klass::handle_invalidate_and_close>(this); + InvalidateRequest<I> *req = InvalidateRequest<I>::create( + m_image_ctx, m_snap_id, false, ctx); + + lderr(cct) << "object map too large: " << m_object_count << dendl; + std::shared_lock owner_locker{m_image_ctx.owner_lock}; + std::unique_lock image_locker{m_image_ctx.image_lock}; + req->send(); +} + +template <typename I> +Context *RefreshRequest<I>::handle_invalidate_and_close(int *ret_val) { + CephContext *cct = m_image_ctx.cct; + ldout(cct, 10) << this << " " << __func__ << ": r=" << *ret_val << dendl; + + if (*ret_val < 0) { + lderr(cct) << "failed to invalidate object map: " << cpp_strerror(*ret_val) + << dendl; + } else { + *ret_val = -EFBIG; + } + + std::unique_lock object_map_locker{*m_object_map_lock}; + m_object_map->clear(); + return m_on_finish; +} + +} // namespace object_map +} // namespace librbd + +template class librbd::object_map::RefreshRequest<librbd::ImageCtx>; diff --git a/src/librbd/object_map/RefreshRequest.h b/src/librbd/object_map/RefreshRequest.h new file mode 100644 index 000000000..0bca85079 --- /dev/null +++ b/src/librbd/object_map/RefreshRequest.h @@ -0,0 +1,102 @@ +// -*- mode:C; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_OBJECT_MAP_REFRESH_REQUEST_H +#define CEPH_LIBRBD_OBJECT_MAP_REFRESH_REQUEST_H + +#include "include/int_types.h" +#include "include/buffer.h" +#include "common/bit_vector.hpp" +#include "common/ceph_mutex.h" + +class Context; +class RWLock; + +namespace librbd { + +class ImageCtx; + +namespace object_map { + +template <typename ImageCtxT = ImageCtx> +class RefreshRequest { +public: + static RefreshRequest *create(ImageCtxT &image_ctx, + ceph::shared_mutex* object_map_lock, + ceph::BitVector<2> *object_map, + uint64_t snap_id, Context *on_finish) { + return new RefreshRequest(image_ctx, object_map_lock, object_map, snap_id, + on_finish); + } + + RefreshRequest(ImageCtxT &image_ctx, ceph::shared_mutex* object_map_lock, + ceph::BitVector<2> *object_map, uint64_t snap_id, + Context *on_finish); + + void send(); + +private: + /** + * @verbatim + * + * <start> -----> LOCK (skip if snapshot) + * * | + * * v (other errors) + * * LOAD * * * * * * * > INVALIDATE ------------\ + * * | * | + * * | * (-EINVAL or too small) | + * * | * * * * * * > INVALIDATE_AND_RESIZE | + * * | | * | + * * | | * | + * * | v * | + * * | RESIZE * | + * * | | * | + * * | | * * * * * * * | + * * | | * | + * * | v v | + * * \--------------------> LOCK <-------------/ + * * | + * v v + * INVALIDATE_AND_CLOSE ---------------> <finish> + * + * @endverbatim + */ + + ImageCtxT &m_image_ctx; + ceph::shared_mutex* m_object_map_lock; + ceph::BitVector<2> *m_object_map; + uint64_t m_snap_id; + Context *m_on_finish; + + uint64_t m_object_count; + ceph::BitVector<2> m_on_disk_object_map; + bool m_truncate_on_disk_object_map; + bufferlist m_out_bl; + + void send_lock(); + Context *handle_lock(int *ret_val); + + void send_load(); + Context *handle_load(int *ret_val); + + void send_invalidate(); + Context *handle_invalidate(int *ret_val); + + void send_resize_invalidate(); + Context *handle_resize_invalidate(int *ret_val); + + void send_resize(); + Context *handle_resize(int *ret_val); + + void send_invalidate_and_close(); + Context *handle_invalidate_and_close(int *ret_val); + + void apply(); +}; + +} // namespace object_map +} // namespace librbd + +extern template class librbd::object_map::RefreshRequest<librbd::ImageCtx>; + +#endif // CEPH_LIBRBD_OBJECT_MAP_REFRESH_REQUEST_H diff --git a/src/librbd/object_map/RemoveRequest.cc b/src/librbd/object_map/RemoveRequest.cc new file mode 100644 index 000000000..a718d81fc --- /dev/null +++ b/src/librbd/object_map/RemoveRequest.cc @@ -0,0 +1,88 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/object_map/RemoveRequest.h" +#include "common/dout.h" +#include "common/errno.h" +#include "cls/rbd/cls_rbd_client.h" +#include "librbd/ImageCtx.h" +#include "librbd/ObjectMap.h" +#include "librbd/Utils.h" +#include "include/ceph_assert.h" + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::object_map::RemoveRequest: " + +namespace librbd { +namespace object_map { + +using util::create_rados_callback; + +template <typename I> +RemoveRequest<I>::RemoveRequest(I *image_ctx, Context *on_finish) + : m_image_ctx(image_ctx), m_on_finish(on_finish) { +} + +template <typename I> +void RemoveRequest<I>::send() { + send_remove_object_map(); +} + +template <typename I> +void RemoveRequest<I>::send_remove_object_map() { + CephContext *cct = m_image_ctx->cct; + ldout(cct, 20) << __func__ << dendl; + + std::unique_lock image_locker{m_image_ctx->image_lock}; + std::vector<uint64_t> snap_ids; + snap_ids.push_back(CEPH_NOSNAP); + for (auto it : m_image_ctx->snap_info) { + snap_ids.push_back(it.first); + } + + std::lock_guard locker{m_lock}; + ceph_assert(m_ref_counter == 0); + + for (auto snap_id : snap_ids) { + m_ref_counter++; + std::string oid(ObjectMap<>::object_map_name(m_image_ctx->id, snap_id)); + using klass = RemoveRequest<I>; + librados::AioCompletion *comp = + create_rados_callback<klass, &klass::handle_remove_object_map>(this); + + int r = m_image_ctx->md_ctx.aio_remove(oid, comp); + ceph_assert(r == 0); + comp->release(); + } +} + +template <typename I> +Context *RemoveRequest<I>::handle_remove_object_map(int *result) { + CephContext *cct = m_image_ctx->cct; + ldout(cct, 20) << __func__ << ": r=" << *result << dendl; + + { + std::lock_guard locker{m_lock}; + ceph_assert(m_ref_counter > 0); + m_ref_counter--; + + if (*result < 0 && *result != -ENOENT) { + lderr(cct) << "failed to remove object map: " << cpp_strerror(*result) + << dendl; + m_error_result = *result; + } + if (m_ref_counter > 0) { + return nullptr; + } + } + if (m_error_result < 0) { + *result = m_error_result; + } + return m_on_finish; +} + +} // namespace object_map +} // namespace librbd + +template class librbd::object_map::RemoveRequest<librbd::ImageCtx>; diff --git a/src/librbd/object_map/RemoveRequest.h b/src/librbd/object_map/RemoveRequest.h new file mode 100644 index 000000000..ce82e603c --- /dev/null +++ b/src/librbd/object_map/RemoveRequest.h @@ -0,0 +1,63 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_OBJECT_MAP_REMOVE_REQUEST_H +#define CEPH_LIBRBD_OBJECT_MAP_REMOVE_REQUEST_H + +#include "include/buffer.h" +#include "common/ceph_mutex.h" +#include <map> +#include <string> + +class Context; + +namespace librbd { + +class ImageCtx; + +namespace object_map { + +template <typename ImageCtxT = ImageCtx> +class RemoveRequest { +public: + static RemoveRequest *create(ImageCtxT *image_ctx, Context *on_finish) { + return new RemoveRequest(image_ctx, on_finish); + } + + void send(); + +private: + /** + * @verbatim + * + * <start> + * | . . . + * v v . + * REMOVE_OBJECT_MAP . (for every snapshot) + * | . . + * v . . . + * <finis> + * + * @endverbatim + */ + + RemoveRequest(ImageCtxT *image_ctx, Context *on_finish); + + ImageCtxT *m_image_ctx; + Context *m_on_finish; + + int m_error_result = 0; + int m_ref_counter = 0; + mutable ceph::mutex m_lock = + ceph::make_mutex("object_map::RemoveRequest::m_lock"); + + void send_remove_object_map(); + Context *handle_remove_object_map(int *result); +}; + +} // namespace object_map +} // namespace librbd + +extern template class librbd::object_map::RemoveRequest<librbd::ImageCtx>; + +#endif // CEPH_LIBRBD_OBJECT_MAP_REMOVE_REQUEST_H diff --git a/src/librbd/object_map/Request.cc b/src/librbd/object_map/Request.cc new file mode 100644 index 000000000..1e1aab2ae --- /dev/null +++ b/src/librbd/object_map/Request.cc @@ -0,0 +1,74 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/object_map/Request.h" +#include "common/dout.h" +#include "common/errno.h" +#include "common/RWLock.h" +#include "librbd/ImageCtx.h" +#include "librbd/object_map/InvalidateRequest.h" + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::object_map::Request: " + +namespace librbd { +namespace object_map { + +bool Request::should_complete(int r) { + CephContext *cct = m_image_ctx.cct; + ldout(cct, 20) << this << " should_complete: r=" << r << dendl; + + switch (m_state) + { + case STATE_REQUEST: + if (r == -ETIMEDOUT && + !cct->_conf.get_val<bool>("rbd_invalidate_object_map_on_timeout")) { + m_state = STATE_TIMEOUT; + return true; + } else if (r < 0) { + lderr(cct) << "failed to update object map: " << cpp_strerror(r) + << dendl; + return invalidate(); + } + + finish_request(); + return true; + + case STATE_INVALIDATE: + ldout(cct, 20) << "INVALIDATE" << dendl; + if (r < 0) { + lderr(cct) << "failed to invalidate object map: " << cpp_strerror(r) + << dendl; + } + return true; + + default: + lderr(cct) << "invalid state: " << m_state << dendl; + ceph_abort(); + break; + } + return false; +} + +bool Request::invalidate() { + bool flags_set; + int r = m_image_ctx.test_flags(m_snap_id, RBD_FLAG_OBJECT_MAP_INVALID, + &flags_set); + if (r < 0 || flags_set) { + return true; + } + + m_state = STATE_INVALIDATE; + + std::shared_lock owner_locker{m_image_ctx.owner_lock}; + std::unique_lock image_locker{m_image_ctx.image_lock}; + InvalidateRequest<> *req = new InvalidateRequest<>(m_image_ctx, m_snap_id, + true, + create_callback_context()); + req->send(); + return false; +} + +} // namespace object_map +} // namespace librbd diff --git a/src/librbd/object_map/Request.h b/src/librbd/object_map/Request.h new file mode 100644 index 000000000..7e9bfb88d --- /dev/null +++ b/src/librbd/object_map/Request.h @@ -0,0 +1,66 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_OBJECT_MAP_REQUEST_H +#define CEPH_LIBRBD_OBJECT_MAP_REQUEST_H + +#include "include/int_types.h" +#include "librbd/AsyncRequest.h" + +class Context; + +namespace librbd { + +class ImageCtx; + +namespace object_map { + +class Request : public AsyncRequest<> { +public: + Request(ImageCtx &image_ctx, uint64_t snap_id, Context *on_finish) + : AsyncRequest(image_ctx, on_finish), m_snap_id(snap_id), + m_state(STATE_REQUEST) + { + } + + void send() override = 0; + +protected: + const uint64_t m_snap_id; + + bool should_complete(int r) override; + int filter_return_code(int r) const override { + if (m_state == STATE_REQUEST) { + // never propagate an error back to the caller + return 0; + } + return r; + } + virtual void finish_request() { + } + +private: + /** + * STATE_TIMEOUT --------\ + * ^ | + * | v + * <start> ---> STATE_REQUEST ---> <finish> + * | ^ + * v | + * STATE_INVALIDATE -------/ + */ + enum State { + STATE_REQUEST, + STATE_TIMEOUT, + STATE_INVALIDATE + }; + + State m_state; + + bool invalidate(); +}; + +} // namespace object_map +} // namespace librbd + +#endif // CEPH_LIBRBD_OBJECT_MAP_REQUEST_H diff --git a/src/librbd/object_map/ResizeRequest.cc b/src/librbd/object_map/ResizeRequest.cc new file mode 100644 index 000000000..91a3140ed --- /dev/null +++ b/src/librbd/object_map/ResizeRequest.cc @@ -0,0 +1,65 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/object_map/ResizeRequest.h" +#include "common/dout.h" +#include "common/errno.h" +#include "osdc/Striper.h" +#include "librbd/ImageCtx.h" +#include "librbd/ObjectMap.h" +#include "cls/lock/cls_lock_client.h" + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::object_map::ResizeRequest: " + +namespace librbd { +namespace object_map { + +void ResizeRequest::resize(ceph::BitVector<2> *object_map, uint64_t num_objs, + uint8_t default_state) { + size_t orig_object_map_size = object_map->size(); + object_map->resize(num_objs); + if (num_objs > orig_object_map_size) { + auto it = object_map->begin() + orig_object_map_size; + auto end_it = object_map->begin() + num_objs; + for (;it != end_it; ++it) { + *it = default_state; + } + } +} + +void ResizeRequest::send() { + CephContext *cct = m_image_ctx.cct; + + std::unique_lock l{*m_object_map_lock}; + m_num_objs = Striper::get_num_objects(m_image_ctx.layout, m_new_size); + + std::string oid(ObjectMap<>::object_map_name(m_image_ctx.id, m_snap_id)); + ldout(cct, 5) << this << " resizing on-disk object map: " + << "ictx=" << &m_image_ctx << ", " + << "oid=" << oid << ", num_objs=" << m_num_objs << dendl; + + librados::ObjectWriteOperation op; + if (m_snap_id == CEPH_NOSNAP) { + rados::cls::lock::assert_locked(&op, RBD_LOCK_NAME, ClsLockType::EXCLUSIVE, "", ""); + } + cls_client::object_map_resize(&op, m_num_objs, m_default_object_state); + + librados::AioCompletion *rados_completion = create_callback_completion(); + int r = m_image_ctx.md_ctx.aio_operate(oid, rados_completion, &op); + ceph_assert(r == 0); + rados_completion->release(); +} + +void ResizeRequest::finish_request() { + CephContext *cct = m_image_ctx.cct; + ldout(cct, 5) << this << " resizing in-memory object map: " + << m_num_objs << dendl; + + std::unique_lock object_map_locker{*m_object_map_lock}; + resize(m_object_map, m_num_objs, m_default_object_state); +} + +} // namespace object_map +} // namespace librbd diff --git a/src/librbd/object_map/ResizeRequest.h b/src/librbd/object_map/ResizeRequest.h new file mode 100644 index 000000000..dccdef133 --- /dev/null +++ b/src/librbd/object_map/ResizeRequest.h @@ -0,0 +1,52 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_OBJECT_MAP_RESIZE_REQUEST_H +#define CEPH_LIBRBD_OBJECT_MAP_RESIZE_REQUEST_H + +#include "include/int_types.h" +#include "librbd/object_map/Request.h" +#include "common/bit_vector.hpp" + +class Context; +class RWLock; + +namespace librbd { + +class ImageCtx; + +namespace object_map { + +class ResizeRequest : public Request { +public: + ResizeRequest(ImageCtx &image_ctx, ceph::shared_mutex *object_map_lock, + ceph::BitVector<2> *object_map, uint64_t snap_id, + uint64_t new_size, uint8_t default_object_state, + Context *on_finish) + : Request(image_ctx, snap_id, on_finish), + m_object_map_lock(object_map_lock), m_object_map(object_map), + m_num_objs(0), m_new_size(new_size), + m_default_object_state(default_object_state) + { + } + + static void resize(ceph::BitVector<2> *object_map, uint64_t num_objs, + uint8_t default_state); + + void send() override; + +protected: + void finish_request() override; + +private: + ceph::shared_mutex* m_object_map_lock; + ceph::BitVector<2> *m_object_map; + uint64_t m_num_objs; + uint64_t m_new_size; + uint8_t m_default_object_state; +}; + +} // namespace object_map +} // namespace librbd + +#endif // CEPH_LIBRBD_OBJECT_MAP_RESIZE_REQUEST_H diff --git a/src/librbd/object_map/SnapshotCreateRequest.cc b/src/librbd/object_map/SnapshotCreateRequest.cc new file mode 100644 index 000000000..3b2e7ee82 --- /dev/null +++ b/src/librbd/object_map/SnapshotCreateRequest.cc @@ -0,0 +1,147 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/object_map/SnapshotCreateRequest.h" +#include "common/dout.h" +#include "librbd/ImageCtx.h" +#include "librbd/ObjectMap.h" +#include "cls/lock/cls_lock_client.h" +#include <iostream> + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::object_map::SnapshotCreateRequest: " + +namespace librbd { +namespace object_map { + +namespace { + +std::ostream& operator<<(std::ostream& os, + const SnapshotCreateRequest::State& state) { + switch(state) { + case SnapshotCreateRequest::STATE_READ_MAP: + os << "READ_MAP"; + break; + case SnapshotCreateRequest::STATE_WRITE_MAP: + os << "WRITE_MAP"; + break; + case SnapshotCreateRequest::STATE_ADD_SNAPSHOT: + os << "ADD_SNAPSHOT"; + break; + default: + os << "UNKNOWN (" << static_cast<uint32_t>(state) << ")"; + break; + } + return os; +} + +} // anonymous namespace + +void SnapshotCreateRequest::send() { + send_read_map(); +} + +bool SnapshotCreateRequest::should_complete(int r) { + CephContext *cct = m_image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << ": state=" << m_state << ", " + << "r=" << r << dendl; + if (r < 0 && m_ret_val == 0) { + m_ret_val = r; + } + if (m_ret_val < 0) { + // pass errors down to base class to invalidate the object map + return Request::should_complete(r); + } + + std::shared_lock owner_locker{m_image_ctx.owner_lock}; + bool finished = false; + switch (m_state) { + case STATE_READ_MAP: + send_write_map(); + break; + case STATE_WRITE_MAP: + finished = send_add_snapshot(); + break; + case STATE_ADD_SNAPSHOT: + update_object_map(); + finished = true; + break; + default: + ceph_abort(); + break; + } + return finished; +} + +void SnapshotCreateRequest::send_read_map() { + ceph_assert(ceph_mutex_is_locked(m_image_ctx.image_lock)); + + CephContext *cct = m_image_ctx.cct; + std::string oid(ObjectMap<>::object_map_name(m_image_ctx.id, CEPH_NOSNAP)); + ldout(cct, 5) << this << " " << __func__ << ": oid=" << oid << dendl; + m_state = STATE_READ_MAP; + + // IO is blocked due to the snapshot creation -- consistent to read from disk + librados::ObjectReadOperation op; + op.read(0, 0, NULL, NULL); + + librados::AioCompletion *rados_completion = create_callback_completion(); + int r = m_image_ctx.md_ctx.aio_operate(oid, rados_completion, &op, + &m_read_bl); + ceph_assert(r == 0); + rados_completion->release(); +} + +void SnapshotCreateRequest::send_write_map() { + CephContext *cct = m_image_ctx.cct; + std::string snap_oid(ObjectMap<>::object_map_name(m_image_ctx.id, m_snap_id)); + ldout(cct, 5) << this << " " << __func__ << ": snap_oid=" << snap_oid + << dendl; + m_state = STATE_WRITE_MAP; + + librados::ObjectWriteOperation op; + op.write_full(m_read_bl); + + librados::AioCompletion *rados_completion = create_callback_completion(); + int r = m_image_ctx.md_ctx.aio_operate(snap_oid, rados_completion, &op); + ceph_assert(r == 0); + rados_completion->release(); +} + +bool SnapshotCreateRequest::send_add_snapshot() { + std::shared_lock image_locker{m_image_ctx.image_lock}; + if ((m_image_ctx.features & RBD_FEATURE_FAST_DIFF) == 0) { + return true; + } + + CephContext *cct = m_image_ctx.cct; + std::string oid(ObjectMap<>::object_map_name(m_image_ctx.id, CEPH_NOSNAP)); + ldout(cct, 5) << this << " " << __func__ << ": oid=" << oid << dendl; + m_state = STATE_ADD_SNAPSHOT; + + librados::ObjectWriteOperation op; + rados::cls::lock::assert_locked(&op, RBD_LOCK_NAME, ClsLockType::EXCLUSIVE, "", ""); + cls_client::object_map_snap_add(&op); + + librados::AioCompletion *rados_completion = create_callback_completion(); + int r = m_image_ctx.md_ctx.aio_operate(oid, rados_completion, &op); + ceph_assert(r == 0); + rados_completion->release(); + return false; +} + +void SnapshotCreateRequest::update_object_map() { + std::unique_lock object_map_locker{*m_object_map_lock}; + + auto it = m_object_map.begin(); + auto end_it = m_object_map.end(); + for (; it != end_it; ++it) { + if (*it == OBJECT_EXISTS) { + *it = OBJECT_EXISTS_CLEAN; + } + } +} + +} // namespace object_map +} // namespace librbd diff --git a/src/librbd/object_map/SnapshotCreateRequest.h b/src/librbd/object_map/SnapshotCreateRequest.h new file mode 100644 index 000000000..3074d059d --- /dev/null +++ b/src/librbd/object_map/SnapshotCreateRequest.h @@ -0,0 +1,80 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_OBJECT_MAP_SNAPSHOT_CREATE_REQUEST_H +#define CEPH_LIBRBD_OBJECT_MAP_SNAPSHOT_CREATE_REQUEST_H + +#include "include/int_types.h" +#include "common/bit_vector.hpp" +#include "librbd/object_map/Request.h" + +class Context; +class RWLock; + +namespace librbd { + +class ImageCtx; + +namespace object_map { + +class SnapshotCreateRequest : public Request { +public: + /** + * Snapshot create goes through the following state machine: + * + * @verbatim + * + * <start> + * | + * v + * STATE_READ_MAP + * | + * v (skip) + * STATE_WRITE_MAP . . . . . . . + * | . + * v v + * STATE_ADD_SNAPSHOT ---> <finish> + * + * @endverbatim + * + * The _ADD_SNAPSHOT state is skipped if the FAST_DIFF feature isn't enabled. + */ + enum State { + STATE_READ_MAP, + STATE_WRITE_MAP, + STATE_ADD_SNAPSHOT + }; + + SnapshotCreateRequest(ImageCtx &image_ctx, ceph::shared_mutex* object_map_lock, + ceph::BitVector<2> *object_map, uint64_t snap_id, + Context *on_finish) + : Request(image_ctx, snap_id, on_finish), + m_object_map_lock(object_map_lock), m_object_map(*object_map), + m_ret_val(0) { + } + + void send() override; + +protected: + bool should_complete(int r) override; + +private: + ceph::shared_mutex* m_object_map_lock; + ceph::BitVector<2> &m_object_map; + + State m_state = STATE_READ_MAP; + bufferlist m_read_bl; + int m_ret_val; + + void send_read_map(); + void send_write_map(); + bool send_add_snapshot(); + + void update_object_map(); + +}; + +} // namespace object_map +} // namespace librbd + +#endif // CEPH_LIBRBD_OBJECT_MAP_SNAPSHOT_CREATE_REQUEST_H diff --git a/src/librbd/object_map/SnapshotRemoveRequest.cc b/src/librbd/object_map/SnapshotRemoveRequest.cc new file mode 100644 index 000000000..1c2ffc753 --- /dev/null +++ b/src/librbd/object_map/SnapshotRemoveRequest.cc @@ -0,0 +1,227 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/object_map/SnapshotRemoveRequest.h" +#include "common/dout.h" +#include "common/errno.h" +#include "librbd/ImageCtx.h" +#include "librbd/ObjectMap.h" +#include "librbd/object_map/InvalidateRequest.h" +#include "cls/lock/cls_lock_client.h" + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::object_map::SnapshotRemoveRequest: " \ + << this << " " << __func__ << ": " + +namespace librbd { +namespace object_map { + +void SnapshotRemoveRequest::send() { + ceph_assert(ceph_mutex_is_locked(m_image_ctx.owner_lock)); + ceph_assert(ceph_mutex_is_wlocked(m_image_ctx.image_lock)); + + if ((m_image_ctx.features & RBD_FEATURE_FAST_DIFF) != 0) { + int r = m_image_ctx.get_flags(m_snap_id, &m_flags); + ceph_assert(r == 0); + + compute_next_snap_id(); + load_map(); + } else { + remove_map(); + } +} + +void SnapshotRemoveRequest::load_map() { + CephContext *cct = m_image_ctx.cct; + std::string snap_oid(ObjectMap<>::object_map_name(m_image_ctx.id, m_snap_id)); + ldout(cct, 5) << "snap_oid=" << snap_oid << dendl; + + librados::ObjectReadOperation op; + cls_client::object_map_load_start(&op); + + auto rados_completion = librbd::util::create_rados_callback< + SnapshotRemoveRequest, &SnapshotRemoveRequest::handle_load_map>(this); + int r = m_image_ctx.md_ctx.aio_operate(snap_oid, rados_completion, &op, + &m_out_bl); + ceph_assert(r == 0); + rados_completion->release(); +} + +void SnapshotRemoveRequest::handle_load_map(int r) { + CephContext *cct = m_image_ctx.cct; + ldout(cct, 5) << "r=" << r << dendl; + + if (r == 0) { + auto it = m_out_bl.cbegin(); + r = cls_client::object_map_load_finish(&it, &m_snap_object_map); + } + if (r == -ENOENT) { + // implies we have already deleted this snapshot and handled the + // necessary fast-diff cleanup + complete(0); + return; + } else if (r < 0) { + std::string oid(ObjectMap<>::object_map_name(m_image_ctx.id, m_snap_id)); + lderr(cct) << "failed to load object map " << oid << ": " + << cpp_strerror(r) << dendl; + + std::shared_lock owner_locker{m_image_ctx.owner_lock}; + std::unique_lock image_locker{m_image_ctx.image_lock}; + invalidate_next_map(); + return; + } + + remove_snapshot(); +} + +void SnapshotRemoveRequest::remove_snapshot() { + if ((m_flags & RBD_FLAG_OBJECT_MAP_INVALID) != 0) { + // snapshot object map exists on disk but is invalid. cannot clean fast-diff + // on next snapshot if current snapshot was invalid. + std::shared_lock owner_locker{m_image_ctx.owner_lock}; + std::unique_lock image_locker{m_image_ctx.image_lock}; + invalidate_next_map(); + return; + } + + CephContext *cct = m_image_ctx.cct; + std::string oid(ObjectMap<>::object_map_name(m_image_ctx.id, m_next_snap_id)); + ldout(cct, 5) << "oid=" << oid << dendl; + + librados::ObjectWriteOperation op; + if (m_next_snap_id == CEPH_NOSNAP) { + rados::cls::lock::assert_locked(&op, RBD_LOCK_NAME, ClsLockType::EXCLUSIVE, "", ""); + } + cls_client::object_map_snap_remove(&op, m_snap_object_map); + + auto rados_completion = librbd::util::create_rados_callback< + SnapshotRemoveRequest, + &SnapshotRemoveRequest::handle_remove_snapshot>(this); + int r = m_image_ctx.md_ctx.aio_operate(oid, rados_completion, &op); + ceph_assert(r == 0); + rados_completion->release(); +} + +void SnapshotRemoveRequest::handle_remove_snapshot(int r) { + CephContext *cct = m_image_ctx.cct; + ldout(cct, 5) << "r=" << r << dendl; + if (r < 0 && r != -ENOENT) { + std::string oid(ObjectMap<>::object_map_name(m_image_ctx.id, + m_next_snap_id)); + lderr(cct) << "failed to remove object map snapshot " << oid << ": " + << cpp_strerror(r) << dendl; + + std::shared_lock owner_locker{m_image_ctx.owner_lock}; + std::unique_lock image_locker{m_image_ctx.image_lock}; + invalidate_next_map(); + return; + } + + std::shared_lock image_locker{m_image_ctx.image_lock}; + update_object_map(); + remove_map(); +} + +void SnapshotRemoveRequest::invalidate_next_map() { + ceph_assert(ceph_mutex_is_locked(m_image_ctx.owner_lock)); + ceph_assert(ceph_mutex_is_wlocked(m_image_ctx.image_lock)); + + CephContext *cct = m_image_ctx.cct; + ldout(cct, 5) << dendl; + + auto ctx = librbd::util::create_context_callback< + SnapshotRemoveRequest, + &SnapshotRemoveRequest::handle_invalidate_next_map>(this); + InvalidateRequest<> *req = new InvalidateRequest<>(m_image_ctx, + m_next_snap_id, true, ctx); + req->send(); +} + +void SnapshotRemoveRequest::handle_invalidate_next_map(int r) { + CephContext *cct = m_image_ctx.cct; + ldout(cct, 5) << "r=" << r << dendl; + + if (r < 0) { + std::string oid(ObjectMap<>::object_map_name(m_image_ctx.id, + m_next_snap_id)); + lderr(cct) << "failed to invalidate object map " << oid << ": " + << cpp_strerror(r) << dendl; + complete(r); + return; + } + + remove_map(); +} + +void SnapshotRemoveRequest::remove_map() { + CephContext *cct = m_image_ctx.cct; + std::string oid(ObjectMap<>::object_map_name(m_image_ctx.id, m_snap_id)); + ldout(cct, 5) << "oid=" << oid << dendl; + + librados::ObjectWriteOperation op; + op.remove(); + + auto rados_completion = librbd::util::create_rados_callback< + SnapshotRemoveRequest, &SnapshotRemoveRequest::handle_remove_map>(this); + int r = m_image_ctx.md_ctx.aio_operate(oid, rados_completion, &op); + ceph_assert(r == 0); + rados_completion->release(); +} + +void SnapshotRemoveRequest::handle_remove_map(int r) { + CephContext *cct = m_image_ctx.cct; + ldout(cct, 5) << "r=" << r << dendl; + + if (r < 0 && r != -ENOENT) { + std::string oid(ObjectMap<>::object_map_name(m_image_ctx.id, m_snap_id)); + lderr(cct) << "failed to remove object map " << oid << ": " + << cpp_strerror(r) << dendl; + complete(r); + return; + } + + complete(0); +} + +void SnapshotRemoveRequest::compute_next_snap_id() { + ceph_assert(ceph_mutex_is_locked(m_image_ctx.image_lock)); + + m_next_snap_id = CEPH_NOSNAP; + std::map<librados::snap_t, SnapInfo>::const_iterator it = + m_image_ctx.snap_info.find(m_snap_id); + ceph_assert(it != m_image_ctx.snap_info.end()); + + ++it; + if (it != m_image_ctx.snap_info.end()) { + m_next_snap_id = it->first; + } +} + +void SnapshotRemoveRequest::update_object_map() { + assert(ceph_mutex_is_locked(m_image_ctx.image_lock)); + std::unique_lock object_map_locker{*m_object_map_lock}; + if (m_next_snap_id == m_image_ctx.snap_id && m_next_snap_id == CEPH_NOSNAP) { + CephContext *cct = m_image_ctx.cct; + ldout(cct, 5) << dendl; + + auto it = m_object_map.begin(); + auto end_it = m_object_map.end(); + auto snap_it = m_snap_object_map.begin(); + uint64_t i = 0; + for (; it != end_it; ++it) { + if (*it == OBJECT_EXISTS_CLEAN && + (i >= m_snap_object_map.size() || + *snap_it == OBJECT_EXISTS)) { + *it = OBJECT_EXISTS; + } + if (i < m_snap_object_map.size()) { + ++snap_it; + } + ++i; + } + } +} + +} // namespace object_map +} // namespace librbd diff --git a/src/librbd/object_map/SnapshotRemoveRequest.h b/src/librbd/object_map/SnapshotRemoveRequest.h new file mode 100644 index 000000000..1e9c75d81 --- /dev/null +++ b/src/librbd/object_map/SnapshotRemoveRequest.h @@ -0,0 +1,88 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_OBJECT_MAP_SNAPSHOT_REMOVE_REQUEST_H +#define CEPH_LIBRBD_OBJECT_MAP_SNAPSHOT_REMOVE_REQUEST_H + +#include "include/int_types.h" +#include "include/buffer.h" +#include "common/bit_vector.hpp" +#include "librbd/AsyncRequest.h" + +namespace librbd { +namespace object_map { + +class SnapshotRemoveRequest : public AsyncRequest<> { +public: + /** + * Snapshot rollback goes through the following state machine: + * + * @verbatim + * + * <start> -----------> STATE_LOAD_MAP ----\ + * . * | + * . * (error) | + * . (invalid object map) v | + * . . . > STATE_INVALIDATE_NEXT_MAP | + * . | | + * . | | + * . (fast diff disabled) v v + * . . . . . . . . . . > STATE_REMOVE_MAP + * | + * v + * <finish> + * + * @endverbatim + * + * The _LOAD_MAP state is skipped if the fast diff feature is disabled. + * If the fast diff feature is enabled and the snapshot is flagged as + * invalid, the next snapshot / HEAD object mapis flagged as invalid; + * otherwise, the state machine proceeds to remove the object map. + */ + + SnapshotRemoveRequest(ImageCtx &image_ctx, ceph::shared_mutex* object_map_lock, + ceph::BitVector<2> *object_map, uint64_t snap_id, + Context *on_finish) + : AsyncRequest(image_ctx, on_finish), + m_object_map_lock(object_map_lock), m_object_map(*object_map), + m_snap_id(snap_id), m_next_snap_id(CEPH_NOSNAP) { + } + + void send() override; + +protected: + bool should_complete(int r) override { + return true; + } + +private: + ceph::shared_mutex* m_object_map_lock; + ceph::BitVector<2> &m_object_map; + uint64_t m_snap_id; + uint64_t m_next_snap_id; + + uint64_t m_flags = 0; + + ceph::BitVector<2> m_snap_object_map; + bufferlist m_out_bl; + + void load_map(); + void handle_load_map(int r); + + void remove_snapshot(); + void handle_remove_snapshot(int r); + + void invalidate_next_map(); + void handle_invalidate_next_map(int r); + + void remove_map(); + void handle_remove_map(int r); + + void compute_next_snap_id(); + void update_object_map(); +}; + +} // namespace object_map +} // namespace librbd + +#endif // CEPH_LIBRBD_OBJECT_MAP_SNAPSHOT_REMOVE_REQUEST_H diff --git a/src/librbd/object_map/SnapshotRollbackRequest.cc b/src/librbd/object_map/SnapshotRollbackRequest.cc new file mode 100644 index 000000000..7c2f441cc --- /dev/null +++ b/src/librbd/object_map/SnapshotRollbackRequest.cc @@ -0,0 +1,131 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/object_map/SnapshotRollbackRequest.h" +#include "common/dout.h" +#include "librbd/ImageCtx.h" +#include "librbd/ObjectMap.h" +#include "librbd/object_map/InvalidateRequest.h" +#include "cls/lock/cls_lock_client.h" +#include <iostream> + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::object_map::SnapshotRollbackRequest: " + +namespace librbd { +namespace object_map { + +namespace { + +std::ostream& operator<<(std::ostream& os, + const SnapshotRollbackRequest::State& state) { + switch(state) { + case SnapshotRollbackRequest::STATE_READ_MAP: + os << "READ_MAP"; + break; + case SnapshotRollbackRequest::STATE_INVALIDATE_MAP: + os << "INVALIDATE_MAP"; + break; + case SnapshotRollbackRequest::STATE_WRITE_MAP: + os << "WRITE_MAP"; + break; + default: + os << "UNKNOWN (" << static_cast<uint32_t>(state) << ")"; + break; + } + return os; +} + +} // anonymous namespace + +void SnapshotRollbackRequest::send() { + send_read_map(); +} + +bool SnapshotRollbackRequest::should_complete(int r) { + CephContext *cct = m_image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << ": state=" << m_state << ", " + << "r=" << r << dendl; + if (r < 0 && m_ret_val == 0) { + m_ret_val = r; + } + + bool finished = false; + switch (m_state) { + case STATE_READ_MAP: + if (r < 0) { + // invalidate the snapshot object map + send_invalidate_map(); + } else { + send_write_map(); + } + break; + case STATE_INVALIDATE_MAP: + // invalidate the HEAD object map as well + finished = Request::should_complete(m_ret_val); + break; + case STATE_WRITE_MAP: + finished = Request::should_complete(r); + break; + default: + ceph_abort(); + break; + } + return finished; +} + +void SnapshotRollbackRequest::send_read_map() { + std::string snap_oid(ObjectMap<>::object_map_name(m_image_ctx.id, m_snap_id)); + + CephContext *cct = m_image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << ": snap_oid=" << snap_oid + << dendl; + m_state = STATE_READ_MAP; + + librados::ObjectReadOperation op; + op.read(0, 0, NULL, NULL); + + librados::AioCompletion *rados_completion = create_callback_completion(); + int r = m_image_ctx.md_ctx.aio_operate(snap_oid, rados_completion, &op, + &m_read_bl); + ceph_assert(r == 0); + rados_completion->release(); +} + +void SnapshotRollbackRequest::send_write_map() { + std::shared_lock owner_locker{m_image_ctx.owner_lock}; + + CephContext *cct = m_image_ctx.cct; + std::string snap_oid(ObjectMap<>::object_map_name(m_image_ctx.id, + CEPH_NOSNAP)); + ldout(cct, 5) << this << " " << __func__ << ": snap_oid=" << snap_oid + << dendl; + m_state = STATE_WRITE_MAP; + + librados::ObjectWriteOperation op; + rados::cls::lock::assert_locked(&op, RBD_LOCK_NAME, ClsLockType::EXCLUSIVE, "", ""); + op.write_full(m_read_bl); + + librados::AioCompletion *rados_completion = create_callback_completion(); + int r = m_image_ctx.md_ctx.aio_operate(snap_oid, rados_completion, &op); + ceph_assert(r == 0); + rados_completion->release(); +} + +void SnapshotRollbackRequest::send_invalidate_map() { + std::shared_lock owner_locker{m_image_ctx.owner_lock}; + std::unique_lock image_locker{m_image_ctx.image_lock}; + + CephContext *cct = m_image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << dendl; + m_state = STATE_INVALIDATE_MAP; + + InvalidateRequest<> *req = new InvalidateRequest<>(m_image_ctx, m_snap_id, + false, + create_callback_context()); + req->send(); +} + +} // namespace object_map +} // namespace librbd diff --git a/src/librbd/object_map/SnapshotRollbackRequest.h b/src/librbd/object_map/SnapshotRollbackRequest.h new file mode 100644 index 000000000..e26b1e0a3 --- /dev/null +++ b/src/librbd/object_map/SnapshotRollbackRequest.h @@ -0,0 +1,74 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_OBJECT_MAP_SNAPSHOT_ROLLBACK_REQUEST_H +#define CEPH_LIBRBD_OBJECT_MAP_SNAPSHOT_ROLLBACK_REQUEST_H + +#include "include/int_types.h" +#include "librbd/object_map/Request.h" + +class Context; + +namespace librbd { + +class ImageCtx; + +namespace object_map { + +class SnapshotRollbackRequest : public Request { +public: + /** + * Snapshot rollback goes through the following state machine: + * + * @verbatim + * + * <start> + * | + * v (error) + * STATE_READ_MAP * * * * > STATE_INVALIDATE_MAP + * | | + * v v + * STATE_WRITE_MAP -------> <finish> + * + * @endverbatim + * + * If an error occurs within the READ_MAP state, the associated snapshot's + * object map will be flagged as invalid. Otherwise, an error from any state + * will result in the HEAD object map being flagged as invalid via the base + * class. + */ + enum State { + STATE_READ_MAP, + STATE_INVALIDATE_MAP, + STATE_WRITE_MAP + }; + + SnapshotRollbackRequest(ImageCtx &image_ctx, uint64_t snap_id, + Context *on_finish) + : Request(image_ctx, CEPH_NOSNAP, on_finish), + m_snap_id(snap_id), m_ret_val(0) { + ceph_assert(snap_id != CEPH_NOSNAP); + } + + void send() override; + +protected: + bool should_complete(int r) override; + +private: + State m_state = STATE_READ_MAP; + uint64_t m_snap_id; + int m_ret_val; + + bufferlist m_read_bl; + + void send_read_map(); + void send_invalidate_map(); + void send_write_map(); + +}; + +} // namespace object_map +} // namespace librbd + +#endif // CEPH_LIBRBD_OBJECT_MAP_SNAPSHOT_ROLLBACK_REQUEST_H diff --git a/src/librbd/object_map/Types.h b/src/librbd/object_map/Types.h new file mode 100644 index 000000000..0ce91bd96 --- /dev/null +++ b/src/librbd/object_map/Types.h @@ -0,0 +1,20 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_OBJECT_MAP_TYPES_H +#define CEPH_LIBRBD_OBJECT_MAP_TYPES_H + +namespace librbd { +namespace object_map { + +enum DiffState { + DIFF_STATE_HOLE = 0, /* unchanged hole */ + DIFF_STATE_DATA = 1, /* unchanged data */ + DIFF_STATE_HOLE_UPDATED = 2, /* new hole */ + DIFF_STATE_DATA_UPDATED = 3 /* new data */ +}; + +} // namespace object_map +} // namespace librbd + +#endif // CEPH_LIBRBD_OBJECT_MAP_TYPES_H diff --git a/src/librbd/object_map/UnlockRequest.cc b/src/librbd/object_map/UnlockRequest.cc new file mode 100644 index 000000000..0220ec900 --- /dev/null +++ b/src/librbd/object_map/UnlockRequest.cc @@ -0,0 +1,66 @@ +// -*- mode:C; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/object_map/UnlockRequest.h" +#include "cls/lock/cls_lock_client.h" +#include "common/dout.h" +#include "common/errno.h" +#include "librbd/ImageCtx.h" +#include "librbd/ObjectMap.h" +#include "librbd/Utils.h" + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::object_map::UnlockRequest: " + +namespace librbd { +namespace object_map { + +using util::create_rados_callback; + +template <typename I> +UnlockRequest<I>::UnlockRequest(I &image_ctx, Context *on_finish) + : m_image_ctx(image_ctx), m_on_finish(on_finish) { +} + +template <typename I> +void UnlockRequest<I>::send() { + send_unlock(); +} + +template <typename I> +void UnlockRequest<I>::send_unlock() { + CephContext *cct = m_image_ctx.cct; + std::string oid(ObjectMap<>::object_map_name(m_image_ctx.id, CEPH_NOSNAP)); + ldout(cct, 10) << this << " " << __func__ << ": oid=" << oid << dendl; + + librados::ObjectWriteOperation op; + rados::cls::lock::unlock(&op, RBD_LOCK_NAME, ""); + + using klass = UnlockRequest<I>; + librados::AioCompletion *rados_completion = + create_rados_callback<klass, &klass::handle_unlock>(this); + int r = m_image_ctx.md_ctx.aio_operate(oid, rados_completion, &op); + ceph_assert(r == 0); + rados_completion->release(); +} + +template <typename I> +Context *UnlockRequest<I>::handle_unlock(int *ret_val) { + CephContext *cct = m_image_ctx.cct; + ldout(cct, 10) << this << " " << __func__ << ": r=" << *ret_val << dendl; + + if (*ret_val < 0 && *ret_val != -ENOENT) { + lderr(m_image_ctx.cct) << "failed to release object map lock: " + << cpp_strerror(*ret_val) << dendl; + + } + + *ret_val = 0; + return m_on_finish; +} + +} // namespace object_map +} // namespace librbd + +template class librbd::object_map::UnlockRequest<librbd::ImageCtx>; diff --git a/src/librbd/object_map/UnlockRequest.h b/src/librbd/object_map/UnlockRequest.h new file mode 100644 index 000000000..ae1d9e934 --- /dev/null +++ b/src/librbd/object_map/UnlockRequest.h @@ -0,0 +1,47 @@ +// -*- mode:C; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_OBJECT_MAP_UNLOCK_REQUEST_H +#define CEPH_LIBRBD_OBJECT_MAP_UNLOCK_REQUEST_H + +class Context; + +namespace librbd { + +class ImageCtx; + +namespace object_map { + +template <typename ImageCtxT = ImageCtx> +class UnlockRequest { +public: + static UnlockRequest *create(ImageCtxT &image_ctx, Context *on_finish) { + return new UnlockRequest(image_ctx, on_finish); + } + + UnlockRequest(ImageCtxT &image_ctx, Context *on_finish); + + void send(); + +private: + /** + * @verbatim + * + * <start> ----> UNLOCK ----> <finish> + * + * @endverbatim + */ + + ImageCtxT &m_image_ctx; + Context *m_on_finish; + + void send_unlock(); + Context* handle_unlock(int *ret_val); +}; + +} // namespace object_map +} // namespace librbd + +extern template class librbd::object_map::UnlockRequest<librbd::ImageCtx>; + +#endif // CEPH_LIBRBD_OBJECT_MAP_UNLOCK_REQUEST_H diff --git a/src/librbd/object_map/UpdateRequest.cc b/src/librbd/object_map/UpdateRequest.cc new file mode 100644 index 000000000..30a1f2121 --- /dev/null +++ b/src/librbd/object_map/UpdateRequest.cc @@ -0,0 +1,129 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/object_map/UpdateRequest.h" +#include "include/rbd/object_map_types.h" +#include "include/stringify.h" +#include "common/dout.h" +#include "librbd/ImageCtx.h" +#include "librbd/ObjectMap.h" +#include "librbd/Utils.h" +#include "cls/lock/cls_lock_client.h" +#include <string> + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::object_map::UpdateRequest: " << this \ + << " " << __func__ << ": " + +namespace librbd { +namespace object_map { + +namespace { + +// keep aligned to bit_vector 4K block sizes +const uint64_t MAX_OBJECTS_PER_UPDATE = 256 * (1 << 10); + +} + +template <typename I> +void UpdateRequest<I>::send() { + update_object_map(); +} + +template <typename I> +void UpdateRequest<I>::update_object_map() { + ceph_assert(ceph_mutex_is_locked(m_image_ctx.image_lock)); + ceph_assert(ceph_mutex_is_locked(*m_object_map_lock)); + CephContext *cct = m_image_ctx.cct; + + // break very large requests into manageable batches + m_update_end_object_no = std::min( + m_end_object_no, m_update_start_object_no + MAX_OBJECTS_PER_UPDATE); + + std::string oid(ObjectMap<>::object_map_name(m_image_ctx.id, m_snap_id)); + ldout(cct, 20) << "ictx=" << &m_image_ctx << ", oid=" << oid << ", " + << "[" << m_update_start_object_no << "," + << m_update_end_object_no << ") = " + << (m_current_state ? + stringify(static_cast<uint32_t>(*m_current_state)) : "") + << "->" << static_cast<uint32_t>(m_new_state) + << dendl; + + librados::ObjectWriteOperation op; + if (m_snap_id == CEPH_NOSNAP) { + rados::cls::lock::assert_locked(&op, RBD_LOCK_NAME, ClsLockType::EXCLUSIVE, "", ""); + } + cls_client::object_map_update(&op, m_update_start_object_no, + m_update_end_object_no, m_new_state, + m_current_state); + + auto rados_completion = librbd::util::create_rados_callback< + UpdateRequest<I>, &UpdateRequest<I>::handle_update_object_map>(this); + std::vector<librados::snap_t> snaps; + int r = m_image_ctx.md_ctx.aio_operate( + oid, rados_completion, &op, 0, snaps, + (m_trace.valid() ? m_trace.get_info() : nullptr)); + ceph_assert(r == 0); + rados_completion->release(); +} + +template <typename I> +void UpdateRequest<I>::handle_update_object_map(int r) { + ldout(m_image_ctx.cct, 20) << "r=" << r << dendl; + + if (r == -ENOENT && m_ignore_enoent) { + r = 0; + } + if (r < 0 && m_ret_val == 0) { + m_ret_val = r; + } + + { + std::shared_lock image_locker{m_image_ctx.image_lock}; + std::unique_lock object_map_locker{*m_object_map_lock}; + update_in_memory_object_map(); + + if (m_update_end_object_no < m_end_object_no) { + m_update_start_object_no = m_update_end_object_no; + update_object_map(); + return; + } + } + + // no more batch updates to send + complete(m_ret_val); +} + +template <typename I> +void UpdateRequest<I>::update_in_memory_object_map() { + ceph_assert(ceph_mutex_is_locked(m_image_ctx.image_lock)); + ceph_assert(ceph_mutex_is_locked(*m_object_map_lock)); + + // rebuilding the object map might update on-disk only + if (m_snap_id == m_image_ctx.snap_id) { + ldout(m_image_ctx.cct, 20) << dendl; + + auto it = m_object_map.begin() + + std::min(m_update_start_object_no, m_object_map.size()); + auto end_it = m_object_map.begin() + + std::min(m_update_end_object_no, m_object_map.size()); + for (; it != end_it; ++it) { + auto state_ref = *it; + uint8_t state = state_ref; + if (!m_current_state || state == *m_current_state || + (*m_current_state == OBJECT_EXISTS && state == OBJECT_EXISTS_CLEAN)) { + state_ref = m_new_state; + } + } + } +} + +template <typename I> +void UpdateRequest<I>::finish_request() { +} + +} // namespace object_map +} // namespace librbd + +template class librbd::object_map::UpdateRequest<librbd::ImageCtx>; diff --git a/src/librbd/object_map/UpdateRequest.h b/src/librbd/object_map/UpdateRequest.h new file mode 100644 index 000000000..b5a72d591 --- /dev/null +++ b/src/librbd/object_map/UpdateRequest.h @@ -0,0 +1,106 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_OBJECT_MAP_UPDATE_REQUEST_H +#define CEPH_LIBRBD_OBJECT_MAP_UPDATE_REQUEST_H + +#include "include/int_types.h" +#include "librbd/object_map/Request.h" +#include "common/bit_vector.hpp" +#include "common/zipkin_trace.h" +#include "librbd/Utils.h" +#include <boost/optional.hpp> + +class Context; + +namespace librbd { + +class ImageCtx; + +namespace object_map { + +template <typename ImageCtxT = librbd::ImageCtx> +class UpdateRequest : public Request { +public: + static UpdateRequest *create(ImageCtx &image_ctx, + ceph::shared_mutex* object_map_lock, + ceph::BitVector<2> *object_map, + uint64_t snap_id, uint64_t start_object_no, + uint64_t end_object_no, uint8_t new_state, + const boost::optional<uint8_t> ¤t_state, + const ZTracer::Trace &parent_trace, + bool ignore_enoent, Context *on_finish) { + return new UpdateRequest(image_ctx, object_map_lock, object_map, snap_id, + start_object_no, end_object_no, new_state, + current_state, parent_trace, ignore_enoent, + on_finish); + } + + UpdateRequest(ImageCtx &image_ctx, ceph::shared_mutex* object_map_lock, + ceph::BitVector<2> *object_map, uint64_t snap_id, + uint64_t start_object_no, uint64_t end_object_no, + uint8_t new_state, + const boost::optional<uint8_t> ¤t_state, + const ZTracer::Trace &parent_trace, bool ignore_enoent, + Context *on_finish) + : Request(image_ctx, snap_id, on_finish), + m_object_map_lock(object_map_lock), m_object_map(*object_map), + m_start_object_no(start_object_no), m_end_object_no(end_object_no), + m_update_start_object_no(start_object_no), m_new_state(new_state), + m_current_state(current_state), + m_trace(util::create_trace(image_ctx, "update object map", parent_trace)), + m_ignore_enoent(ignore_enoent) + { + m_trace.event("start"); + } + virtual ~UpdateRequest() { + m_trace.event("finish"); + } + + void send() override; + +protected: + void finish_request() override; + +private: + /** + * @verbatim + * + * <start> + * | + * |/------------------\ + * v | (repeat in batches) + * UPDATE_OBJECT_MAP -----/ + * | + * v + * <finish> + * + * @endverbatim + */ + + ceph::shared_mutex* m_object_map_lock; + ceph::BitVector<2> &m_object_map; + uint64_t m_start_object_no; + uint64_t m_end_object_no; + uint64_t m_update_start_object_no; + uint64_t m_update_end_object_no = 0; + uint8_t m_new_state; + boost::optional<uint8_t> m_current_state; + ZTracer::Trace m_trace; + bool m_ignore_enoent; + + int m_ret_val = 0; + + void update_object_map(); + void handle_update_object_map(int r); + + void update_in_memory_object_map(); + +}; + +} // namespace object_map +} // namespace librbd + +extern template class librbd::object_map::UpdateRequest<librbd::ImageCtx>; + +#endif // CEPH_LIBRBD_OBJECT_MAP_UPDATE_REQUEST_H |