// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- // vim: ts=8 sw=2 smarttab #include "librbd/io/CopyupRequest.h" #include "common/ceph_context.h" #include "common/dout.h" #include "common/errno.h" #include "common/Mutex.h" #include "common/WorkQueue.h" #include "librbd/AsyncObjectThrottle.h" #include "librbd/ExclusiveLock.h" #include "librbd/ImageCtx.h" #include "librbd/ObjectMap.h" #include "librbd/Utils.h" #include "librbd/deep_copy/ObjectCopyRequest.h" #include "librbd/io/AioCompletion.h" #include "librbd/io/ImageRequest.h" #include "librbd/io/ObjectRequest.h" #include "librbd/io/ReadResult.h" #include #include #include #define dout_subsys ceph_subsys_rbd #undef dout_prefix #define dout_prefix *_dout << "librbd::io::CopyupRequest: " << this \ << " " << __func__ << ": " namespace librbd { namespace io { namespace { template class C_UpdateObjectMap : public C_AsyncObjectThrottle { public: C_UpdateObjectMap(AsyncObjectThrottle &throttle, I *image_ctx, uint64_t object_no, uint8_t head_object_map_state, const std::vector *snap_ids, bool first_snap_is_clean, const ZTracer::Trace &trace, size_t snap_id_idx) : C_AsyncObjectThrottle(throttle, *image_ctx), m_object_no(object_no), m_head_object_map_state(head_object_map_state), m_snap_ids(*snap_ids), m_first_snap_is_clean(first_snap_is_clean), m_trace(trace), m_snap_id_idx(snap_id_idx) { } int send() override { auto& image_ctx = this->m_image_ctx; ceph_assert(image_ctx.owner_lock.is_locked()); if (image_ctx.exclusive_lock == nullptr) { return 1; } ceph_assert(image_ctx.exclusive_lock->is_lock_owner()); RWLock::RLocker snap_locker(image_ctx.snap_lock); if (image_ctx.object_map == nullptr) { return 1; } uint64_t snap_id = m_snap_ids[m_snap_id_idx]; if (snap_id == CEPH_NOSNAP) { return update_head(); } else { return update_snapshot(snap_id); } } int update_head() { auto& image_ctx = this->m_image_ctx; RWLock::WLocker object_map_locker(image_ctx.object_map_lock); bool sent = image_ctx.object_map->template aio_update( CEPH_NOSNAP, m_object_no, m_head_object_map_state, {}, m_trace, false, this); return (sent ? 0 : 1); } int update_snapshot(uint64_t snap_id) { auto& image_ctx = this->m_image_ctx; uint8_t state = OBJECT_EXISTS; if (image_ctx.test_features(RBD_FEATURE_FAST_DIFF, image_ctx.snap_lock) && (m_snap_id_idx > 0 || m_first_snap_is_clean)) { // first snapshot should be exists+dirty since it contains // the copyup data -- later snapshots inherit the data. state = OBJECT_EXISTS_CLEAN; } RWLock::RLocker object_map_locker(image_ctx.object_map_lock); bool sent = image_ctx.object_map->template aio_update( snap_id, m_object_no, state, {}, m_trace, true, this); ceph_assert(sent); return 0; } private: uint64_t m_object_no; uint8_t m_head_object_map_state; const std::vector &m_snap_ids; bool m_first_snap_is_clean; const ZTracer::Trace &m_trace; size_t m_snap_id_idx; }; } // anonymous namespace template CopyupRequest::CopyupRequest(I *ictx, const std::string &oid, uint64_t objectno, Extents &&image_extents, const ZTracer::Trace &parent_trace) : m_image_ctx(ictx), m_oid(oid), m_object_no(objectno), m_image_extents(image_extents), m_trace(util::create_trace(*m_image_ctx, "copy-up", parent_trace)), m_lock("CopyupRequest", false, false) { ceph_assert(m_image_ctx->data_ctx.is_valid()); m_async_op.start_op(*util::get_image_ctx(m_image_ctx)); } template CopyupRequest::~CopyupRequest() { ceph_assert(m_pending_requests.empty()); m_async_op.finish_op(); } template void CopyupRequest::append_request(AbstractObjectWriteRequest *req) { Mutex::Locker locker(m_lock); auto cct = m_image_ctx->cct; ldout(cct, 20) << "oid=" << m_oid << ", " << "object_request=" << req << ", " << "append=" << m_append_request_permitted << dendl; if (m_append_request_permitted) { m_pending_requests.push_back(req); } else { m_restart_requests.push_back(req); } } template void CopyupRequest::send() { read_from_parent(); } template void CopyupRequest::read_from_parent() { auto cct = m_image_ctx->cct; RWLock::RLocker snap_locker(m_image_ctx->snap_lock); RWLock::RLocker parent_locker(m_image_ctx->parent_lock); if (m_image_ctx->parent == nullptr) { ldout(cct, 5) << "parent detached" << dendl; m_image_ctx->op_work_queue->queue( util::create_context_callback< CopyupRequest, &CopyupRequest::handle_read_from_parent>(this), -ENOENT); return; } else if (is_deep_copy()) { deep_copy(); return; } auto comp = AioCompletion::create_and_start< CopyupRequest, &CopyupRequest::handle_read_from_parent>( this, util::get_image_ctx(m_image_ctx->parent), AIO_TYPE_READ); ldout(cct, 20) << "oid=" << m_oid << ", " << "completion=" << comp << ", " << "extents=" << m_image_extents << dendl; ImageRequest::aio_read(m_image_ctx->parent, comp, std::move(m_image_extents), ReadResult{&m_copyup_data}, 0, m_trace); } template void CopyupRequest::handle_read_from_parent(int r) { auto cct = m_image_ctx->cct; ldout(cct, 20) << "oid=" << m_oid << ", r=" << r << dendl; m_image_ctx->snap_lock.get_read(); m_lock.Lock(); m_copyup_is_zero = m_copyup_data.is_zero(); m_copyup_required = is_copyup_required(); disable_append_requests(); if (r < 0 && r != -ENOENT) { m_lock.Unlock(); m_image_ctx->snap_lock.put_read(); lderr(cct) << "error reading from parent: " << cpp_strerror(r) << dendl; finish(r); return; } if (!m_copyup_required) { m_lock.Unlock(); m_image_ctx->snap_lock.put_read(); ldout(cct, 20) << "no-op, skipping" << dendl; finish(0); return; } // copyup() will affect snapshots only if parent data is not all // zeros. if (!m_copyup_is_zero) { m_snap_ids.insert(m_snap_ids.end(), m_image_ctx->snaps.rbegin(), m_image_ctx->snaps.rend()); } m_lock.Unlock(); m_image_ctx->snap_lock.put_read(); update_object_maps(); } template void CopyupRequest::deep_copy() { auto cct = m_image_ctx->cct; ceph_assert(m_image_ctx->snap_lock.is_locked()); ceph_assert(m_image_ctx->parent_lock.is_locked()); ceph_assert(m_image_ctx->parent != nullptr); m_lock.Lock(); m_flatten = is_copyup_required() ? true : m_image_ctx->migration_info.flatten; m_lock.Unlock(); ldout(cct, 20) << "oid=" << m_oid << ", flatten=" << m_flatten << dendl; auto ctx = util::create_context_callback< CopyupRequest, &CopyupRequest::handle_deep_copy>(this); auto req = deep_copy::ObjectCopyRequest::create( m_image_ctx->parent, m_image_ctx, 0, 0, m_image_ctx->migration_info.snap_map, m_object_no, m_flatten, ctx); req->send(); } template void CopyupRequest::handle_deep_copy(int r) { auto cct = m_image_ctx->cct; ldout(cct, 20) << "oid=" << m_oid << ", r=" << r << dendl; m_image_ctx->snap_lock.get_read(); m_lock.Lock(); m_copyup_required = is_copyup_required(); if (r == -ENOENT && !m_flatten && m_copyup_required) { m_lock.Unlock(); m_image_ctx->snap_lock.put_read(); ldout(cct, 10) << "restart deep-copy with flatten" << dendl; send(); return; } disable_append_requests(); if (r < 0 && r != -ENOENT) { m_lock.Unlock(); m_image_ctx->snap_lock.put_read(); lderr(cct) << "error encountered during deep-copy: " << cpp_strerror(r) << dendl; finish(r); return; } if (!m_copyup_required && !is_update_object_map_required(r)) { m_lock.Unlock(); m_image_ctx->snap_lock.put_read(); if (r == -ENOENT) { r = 0; } ldout(cct, 20) << "skipping" << dendl; finish(r); return; } // For deep-copy, copyup() will never affect snapshots. However, // this state machine is responsible for updating object maps for // snapshots that have been created on destination image after // migration started. if (r != -ENOENT) { compute_deep_copy_snap_ids(); } m_lock.Unlock(); m_image_ctx->snap_lock.put_read(); update_object_maps(); } template void CopyupRequest::update_object_maps() { RWLock::RLocker owner_locker(m_image_ctx->owner_lock); RWLock::RLocker snap_locker(m_image_ctx->snap_lock); if (m_image_ctx->object_map == nullptr) { snap_locker.unlock(); owner_locker.unlock(); copyup(); return; } auto cct = m_image_ctx->cct; ldout(cct, 20) << "oid=" << m_oid << dendl; bool copy_on_read = m_pending_requests.empty(); uint8_t head_object_map_state = OBJECT_EXISTS; if (copy_on_read && !m_snap_ids.empty() && m_image_ctx->test_features(RBD_FEATURE_FAST_DIFF, m_image_ctx->snap_lock)) { // HEAD is non-dirty since data is tied to first snapshot head_object_map_state = OBJECT_EXISTS_CLEAN; } auto r_it = m_pending_requests.rbegin(); if (r_it != m_pending_requests.rend()) { // last write-op determines the final object map state head_object_map_state = (*r_it)->get_pre_write_object_map_state(); } RWLock::WLocker object_map_locker(m_image_ctx->object_map_lock); if ((*m_image_ctx->object_map)[m_object_no] != head_object_map_state) { // (maybe) need to update the HEAD object map state m_snap_ids.push_back(CEPH_NOSNAP); } object_map_locker.unlock(); snap_locker.unlock(); ceph_assert(m_image_ctx->exclusive_lock->is_lock_owner()); typename AsyncObjectThrottle::ContextFactory context_factory( boost::lambda::bind(boost::lambda::new_ptr>(), boost::lambda::_1, m_image_ctx, m_object_no, head_object_map_state, &m_snap_ids, m_first_snap_is_clean, m_trace, boost::lambda::_2)); auto ctx = util::create_context_callback< CopyupRequest, &CopyupRequest::handle_update_object_maps>(this); auto throttle = new AsyncObjectThrottle( nullptr, *m_image_ctx, context_factory, ctx, nullptr, 0, m_snap_ids.size()); throttle->start_ops( m_image_ctx->config.template get_val("rbd_concurrent_management_ops")); } template void CopyupRequest::handle_update_object_maps(int r) { auto cct = m_image_ctx->cct; ldout(cct, 20) << "oid=" << m_oid << ", r=" << r << dendl; if (r < 0) { lderr(m_image_ctx->cct) << "failed to update object map: " << cpp_strerror(r) << dendl; finish(r); return; } copyup(); } template void CopyupRequest::copyup() { auto cct = m_image_ctx->cct; m_image_ctx->snap_lock.get_read(); auto snapc = m_image_ctx->snapc; m_image_ctx->snap_lock.put_read(); m_lock.Lock(); if (!m_copyup_required) { m_lock.Unlock(); ldout(cct, 20) << "skipping copyup" << dendl; finish(0); return; } ldout(cct, 20) << "oid=" << m_oid << dendl; bool copy_on_read = m_pending_requests.empty(); bool deep_copyup = !snapc.snaps.empty() && !m_copyup_is_zero; if (m_copyup_is_zero) { m_copyup_data.clear(); } int r; librados::ObjectWriteOperation copyup_op; if (copy_on_read || deep_copyup) { copyup_op.exec("rbd", "copyup", m_copyup_data); ObjectRequest::add_write_hint(*m_image_ctx, ©up_op); ++m_pending_copyups; } librados::ObjectWriteOperation write_op; if (!copy_on_read) { if (!deep_copyup) { write_op.exec("rbd", "copyup", m_copyup_data); ObjectRequest::add_write_hint(*m_image_ctx, &write_op); } // merge all pending write ops into this single RADOS op for (auto req : m_pending_requests) { ldout(cct, 20) << "add_copyup_ops " << req << dendl; req->add_copyup_ops(&write_op); } if (write_op.size() > 0) { ++m_pending_copyups; } } m_lock.Unlock(); // issue librados ops at the end to simplify test cases std::vector snaps; if (copyup_op.size() > 0) { // send only the copyup request with a blank snapshot context so that // all snapshots are detected from the parent for this object. If // this is a CoW request, a second request will be created for the // actual modification. ldout(cct, 20) << "copyup with empty snapshot context" << dendl; auto comp = util::create_rados_callback< CopyupRequest, &CopyupRequest::handle_copyup>(this); r = m_image_ctx->data_ctx.aio_operate( m_oid, comp, ©up_op, 0, snaps, (m_trace.valid() ? m_trace.get_info() : nullptr)); ceph_assert(r == 0); comp->release(); } if (write_op.size() > 0) { // compare-and-write doesn't add any write ops (copyup+cmpext+write // can't be executed in the same RADOS op because, unless the object // was already present in the clone, cmpext wouldn't see it) ldout(cct, 20) << (!deep_copyup && write_op.size() > 2 ? "copyup + ops" : !deep_copyup ? "copyup" : "ops") << " with current snapshot context" << dendl; snaps.insert(snaps.end(), snapc.snaps.begin(), snapc.snaps.end()); auto comp = util::create_rados_callback< CopyupRequest, &CopyupRequest::handle_copyup>(this); r = m_image_ctx->data_ctx.aio_operate( m_oid, comp, &write_op, snapc.seq, snaps, (m_trace.valid() ? m_trace.get_info() : nullptr)); ceph_assert(r == 0); comp->release(); } } template void CopyupRequest::handle_copyup(int r) { auto cct = m_image_ctx->cct; unsigned pending_copyups; { Mutex::Locker locker(m_lock); ceph_assert(m_pending_copyups > 0); pending_copyups = --m_pending_copyups; } ldout(cct, 20) << "oid=" << m_oid << ", " << "r=" << r << ", " << "pending=" << pending_copyups << dendl; if (r < 0 && r != -ENOENT) { lderr(cct) << "failed to copyup object: " << cpp_strerror(r) << dendl; complete_requests(false, r); } if (pending_copyups == 0) { finish(0); } } template void CopyupRequest::finish(int r) { auto cct = m_image_ctx->cct; ldout(cct, 20) << "oid=" << m_oid << ", r=" << r << dendl; complete_requests(true, r); delete this; } template void CopyupRequest::complete_requests(bool override_restart_retval, int r) { auto cct = m_image_ctx->cct; remove_from_list(); while (!m_pending_requests.empty()) { auto it = m_pending_requests.begin(); auto req = *it; ldout(cct, 20) << "completing request " << req << dendl; req->handle_copyup(r); m_pending_requests.erase(it); } if (override_restart_retval) { r = -ERESTART; } while (!m_restart_requests.empty()) { auto it = m_restart_requests.begin(); auto req = *it; ldout(cct, 20) << "restarting request " << req << dendl; req->handle_copyup(r); m_restart_requests.erase(it); } } template void CopyupRequest::disable_append_requests() { ceph_assert(m_lock.is_locked()); m_append_request_permitted = false; } template void CopyupRequest::remove_from_list() { Mutex::Locker copyup_list_locker(m_image_ctx->copyup_list_lock); auto it = m_image_ctx->copyup_list.find(m_object_no); if (it != m_image_ctx->copyup_list.end()) { m_image_ctx->copyup_list.erase(it); } } template bool CopyupRequest::is_copyup_required() { ceph_assert(m_lock.is_locked()); bool copy_on_read = m_pending_requests.empty(); if (copy_on_read) { // always force a copyup if CoR enabled return true; } if (!m_copyup_is_zero) { return true; } for (auto req : m_pending_requests) { if (!req->is_empty_write_op()) { return true; } } return false; } template bool CopyupRequest::is_deep_copy() const { ceph_assert(m_image_ctx->snap_lock.is_locked()); return !m_image_ctx->migration_info.empty(); } template bool CopyupRequest::is_update_object_map_required(int r) { ceph_assert(m_image_ctx->snap_lock.is_locked()); if (r < 0) { return false; } if (m_image_ctx->object_map == nullptr) { return false; } if (m_image_ctx->migration_info.empty()) { // migration might have completed while IO was in-flight, // assume worst-case and perform an object map update return true; } auto it = m_image_ctx->migration_info.snap_map.find(CEPH_NOSNAP); ceph_assert(it != m_image_ctx->migration_info.snap_map.end()); return it->second[0] != CEPH_NOSNAP; } template void CopyupRequest::compute_deep_copy_snap_ids() { ceph_assert(m_image_ctx->snap_lock.is_locked()); // don't copy ids for the snaps updated by object deep copy or // that don't overlap std::set deep_copied; for (auto &it : m_image_ctx->migration_info.snap_map) { if (it.first != CEPH_NOSNAP) { deep_copied.insert(it.second.front()); } } RWLock::RLocker parent_locker(m_image_ctx->parent_lock); std::copy_if(m_image_ctx->snaps.rbegin(), m_image_ctx->snaps.rend(), std::back_inserter(m_snap_ids), [this, cct=m_image_ctx->cct, &deep_copied](uint64_t snap_id) { if (deep_copied.count(snap_id)) { m_first_snap_is_clean = true; return false; } uint64_t parent_overlap = 0; int r = m_image_ctx->get_parent_overlap(snap_id, &parent_overlap); if (r < 0) { ldout(cct, 5) << "failed getting parent overlap for snap_id: " << snap_id << ": " << cpp_strerror(r) << dendl; } if (parent_overlap == 0) { return false; } std::vector> extents; Striper::extent_to_file(cct, &m_image_ctx->layout, m_object_no, 0, m_image_ctx->layout.object_size, extents); auto overlap = m_image_ctx->prune_parent_extents( extents, parent_overlap); return overlap > 0; }); } } // namespace io } // namespace librbd template class librbd::io::CopyupRequest;