// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- // vim: ts=8 sw=2 smarttab #include "include/compat.h" #include "BootstrapRequest.h" #include "CloseImageRequest.h" #include "CreateImageRequest.h" #include "IsPrimaryRequest.h" #include "OpenImageRequest.h" #include "OpenLocalImageRequest.h" #include "common/debug.h" #include "common/dout.h" #include "common/errno.h" #include "common/WorkQueue.h" #include "cls/rbd/cls_rbd_client.h" #include "journal/Journaler.h" #include "librbd/ImageCtx.h" #include "librbd/ImageState.h" #include "librbd/internal.h" #include "librbd/Journal.h" #include "librbd/Utils.h" #include "librbd/journal/Types.h" #include "tools/rbd_mirror/ProgressContext.h" #include "tools/rbd_mirror/ImageSync.h" #include "tools/rbd_mirror/Threads.h" #define dout_context g_ceph_context #define dout_subsys ceph_subsys_rbd_mirror #undef dout_prefix #define dout_prefix *_dout << "rbd::mirror::image_replayer::BootstrapRequest: " \ << this << " " << __func__ << ": " namespace rbd { namespace mirror { namespace image_replayer { using librbd::util::create_context_callback; using librbd::util::create_rados_callback; using librbd::util::unique_lock_name; template BootstrapRequest::BootstrapRequest( Threads* threads, librados::IoCtx &local_io_ctx, librados::IoCtx &remote_io_ctx, InstanceWatcher *instance_watcher, I **local_image_ctx, const std::string &local_image_id, const std::string &remote_image_id, const std::string &global_image_id, const std::string &local_mirror_uuid, const std::string &remote_mirror_uuid, Journaler *journaler, cls::journal::ClientState *client_state, MirrorPeerClientMeta *client_meta, Context *on_finish, bool *do_resync, rbd::mirror::ProgressContext *progress_ctx) : BaseRequest("rbd::mirror::image_replayer::BootstrapRequest", reinterpret_cast(local_io_ctx.cct()), on_finish), m_threads(threads), m_local_io_ctx(local_io_ctx), m_remote_io_ctx(remote_io_ctx), m_instance_watcher(instance_watcher), m_local_image_ctx(local_image_ctx), m_local_image_id(local_image_id), m_remote_image_id(remote_image_id), m_global_image_id(global_image_id), m_local_mirror_uuid(local_mirror_uuid), m_remote_mirror_uuid(remote_mirror_uuid), m_journaler(journaler), m_client_state(client_state), m_client_meta(client_meta), m_progress_ctx(progress_ctx), m_do_resync(do_resync), m_lock(unique_lock_name("BootstrapRequest::m_lock", this)) { dout(10) << dendl; } template BootstrapRequest::~BootstrapRequest() { ceph_assert(m_remote_image_ctx == nullptr); } template bool BootstrapRequest::is_syncing() const { Mutex::Locker locker(m_lock); return (m_image_sync != nullptr); } template void BootstrapRequest::send() { *m_do_resync = false; get_remote_tag_class(); } template void BootstrapRequest::cancel() { dout(10) << dendl; Mutex::Locker locker(m_lock); m_canceled = true; if (m_image_sync != nullptr) { m_image_sync->cancel(); } } template void BootstrapRequest::get_remote_tag_class() { dout(15) << dendl; update_progress("GET_REMOTE_TAG_CLASS"); Context *ctx = create_context_callback< BootstrapRequest, &BootstrapRequest::handle_get_remote_tag_class>( this); m_journaler->get_client(librbd::Journal<>::IMAGE_CLIENT_ID, &m_client, ctx); } template void BootstrapRequest::handle_get_remote_tag_class(int r) { dout(15) << "r=" << r << dendl; if (r < 0) { derr << "failed to retrieve remote client: " << cpp_strerror(r) << dendl; finish(r); return; } librbd::journal::ClientData client_data; auto it = m_client.data.cbegin(); try { decode(client_data, it); } catch (const buffer::error &err) { derr << "failed to decode remote client meta data: " << err.what() << dendl; finish(-EBADMSG); return; } librbd::journal::ImageClientMeta *client_meta = boost::get(&client_data.client_meta); if (client_meta == nullptr) { derr << "unknown remote client registration" << dendl; finish(-EINVAL); return; } m_remote_tag_class = client_meta->tag_class; dout(10) << "remote tag class=" << m_remote_tag_class << dendl; open_remote_image(); } template void BootstrapRequest::open_remote_image() { dout(15) << "remote_image_id=" << m_remote_image_id << dendl; update_progress("OPEN_REMOTE_IMAGE"); Context *ctx = create_context_callback< BootstrapRequest, &BootstrapRequest::handle_open_remote_image>( this); OpenImageRequest *request = OpenImageRequest::create( m_remote_io_ctx, &m_remote_image_ctx, m_remote_image_id, false, ctx); request->send(); } template void BootstrapRequest::handle_open_remote_image(int r) { dout(15) << "r=" << r << dendl; if (r < 0) { derr << "failed to open remote image: " << cpp_strerror(r) << dendl; ceph_assert(m_remote_image_ctx == nullptr); finish(r); return; } is_primary(); } template void BootstrapRequest::is_primary() { dout(15) << dendl; update_progress("OPEN_REMOTE_IMAGE"); Context *ctx = create_context_callback< BootstrapRequest, &BootstrapRequest::handle_is_primary>( this); IsPrimaryRequest *request = IsPrimaryRequest::create(m_remote_image_ctx, &m_primary, ctx); request->send(); } template void BootstrapRequest::handle_is_primary(int r) { dout(15) << "r=" << r << dendl; if (r == -ENOENT) { dout(5) << "remote image is not mirrored" << dendl; m_ret_val = -EREMOTEIO; close_remote_image(); return; } else if (r < 0) { derr << "error querying remote image primary status: " << cpp_strerror(r) << dendl; m_ret_val = r; close_remote_image(); return; } if (!m_primary) { if (m_local_image_id.empty()) { // no local image and remote isn't primary -- don't sync it dout(5) << "remote image is not primary -- not syncing" << dendl; m_ret_val = -EREMOTEIO; close_remote_image(); return; } else if (m_client_meta->state != librbd::journal::MIRROR_PEER_STATE_REPLAYING) { // ensure we attempt to re-sync to remote if it's re-promoted dout(5) << "remote image is not primary -- sync interrupted" << dendl; m_ret_val = -EREMOTEIO; update_client_state(); return; } } if (!m_client_meta->image_id.empty()) { // have an image id -- use that to open the image since a deletion (resync) // will leave the old image id registered in the peer m_local_image_id = m_client_meta->image_id; } if (m_local_image_id.empty()) { // prepare to create local image update_client_image(); return; } open_local_image(); } template void BootstrapRequest::update_client_state() { dout(15) << dendl; update_progress("UPDATE_CLIENT_STATE"); librbd::journal::MirrorPeerClientMeta client_meta(*m_client_meta); client_meta.state = librbd::journal::MIRROR_PEER_STATE_REPLAYING; librbd::journal::ClientData client_data(client_meta); bufferlist data_bl; encode(client_data, data_bl); Context *ctx = create_context_callback< BootstrapRequest, &BootstrapRequest::handle_update_client_state>( this); m_journaler->update_client(data_bl, ctx); } template void BootstrapRequest::handle_update_client_state(int r) { dout(15) << "r=" << r << dendl; if (r < 0) { derr << "failed to update client: " << cpp_strerror(r) << dendl; } else { m_client_meta->state = librbd::journal::MIRROR_PEER_STATE_REPLAYING; } close_remote_image(); } template void BootstrapRequest::open_local_image() { dout(15) << "local_image_id=" << m_local_image_id << dendl; update_progress("OPEN_LOCAL_IMAGE"); Context *ctx = create_context_callback< BootstrapRequest, &BootstrapRequest::handle_open_local_image>( this); OpenLocalImageRequest *request = OpenLocalImageRequest::create( m_local_io_ctx, m_local_image_ctx, m_local_image_id, m_threads->work_queue, ctx); request->send(); } template void BootstrapRequest::handle_open_local_image(int r) { dout(15) << "r=" << r << dendl; if (r == -ENOENT) { ceph_assert(*m_local_image_ctx == nullptr); dout(10) << "local image missing" << dendl; unregister_client(); return; } else if (r == -EREMOTEIO) { ceph_assert(*m_local_image_ctx == nullptr); dout(10) << "local image is primary -- skipping image replay" << dendl; m_ret_val = r; close_remote_image(); return; } else if (r < 0) { ceph_assert(*m_local_image_ctx == nullptr); derr << "failed to open local image: " << cpp_strerror(r) << dendl; m_ret_val = r; close_remote_image(); return; } I *local_image_ctx = (*m_local_image_ctx); { local_image_ctx->snap_lock.get_read(); if (local_image_ctx->journal == nullptr) { local_image_ctx->snap_lock.put_read(); derr << "local image does not support journaling" << dendl; m_ret_val = -EINVAL; close_local_image(); return; } r = (*m_local_image_ctx)->journal->is_resync_requested(m_do_resync); if (r < 0) { local_image_ctx->snap_lock.put_read(); derr << "failed to check if a resync was requested" << dendl; m_ret_val = r; close_local_image(); return; } m_local_tag_tid = local_image_ctx->journal->get_tag_tid(); m_local_tag_data = local_image_ctx->journal->get_tag_data(); dout(10) << "local tag=" << m_local_tag_tid << ", " << "local tag data=" << m_local_tag_data << dendl; local_image_ctx->snap_lock.put_read(); } if (m_local_tag_data.mirror_uuid != m_remote_mirror_uuid && !m_primary) { // if the local mirror is not linked to the (now) non-primary image, // stop the replay. Otherwise, we ignore that the remote is non-primary // so that we can replay the demotion dout(5) << "remote image is not primary -- skipping image replay" << dendl; m_ret_val = -EREMOTEIO; close_local_image(); return; } if (*m_do_resync) { close_remote_image(); return; } if (*m_client_state == cls::journal::CLIENT_STATE_DISCONNECTED) { dout(10) << "client flagged disconnected -- skipping bootstrap" << dendl; // The caller is expected to detect disconnect initializing remote journal. m_ret_val = 0; close_remote_image(); return; } get_remote_tags(); } template void BootstrapRequest::unregister_client() { dout(15) << dendl; update_progress("UNREGISTER_CLIENT"); m_local_image_id = ""; Context *ctx = create_context_callback< BootstrapRequest, &BootstrapRequest::handle_unregister_client>( this); m_journaler->unregister_client(ctx); } template void BootstrapRequest::handle_unregister_client(int r) { dout(15) << "r=" << r << dendl; if (r < 0) { derr << "failed to unregister with remote journal: " << cpp_strerror(r) << dendl; m_ret_val = r; close_remote_image(); return; } *m_client_meta = librbd::journal::MirrorPeerClientMeta(""); register_client(); } template void BootstrapRequest::register_client() { dout(15) << dendl; update_progress("REGISTER_CLIENT"); ceph_assert(m_local_image_id.empty()); librbd::journal::MirrorPeerClientMeta mirror_peer_client_meta; mirror_peer_client_meta.state = librbd::journal::MIRROR_PEER_STATE_REPLAYING; librbd::journal::ClientData client_data{mirror_peer_client_meta}; bufferlist client_data_bl; encode(client_data, client_data_bl); Context *ctx = create_context_callback< BootstrapRequest, &BootstrapRequest::handle_register_client>( this); m_journaler->register_client(client_data_bl, ctx); } template void BootstrapRequest::handle_register_client(int r) { dout(15) << "r=" << r << dendl; if (r < 0) { derr << "failed to register with remote journal: " << cpp_strerror(r) << dendl; m_ret_val = r; close_remote_image(); return; } *m_client_state = cls::journal::CLIENT_STATE_CONNECTED; *m_client_meta = librbd::journal::MirrorPeerClientMeta(); m_client_meta->state = librbd::journal::MIRROR_PEER_STATE_REPLAYING; is_primary(); } template void BootstrapRequest::update_client_image() { ceph_assert(m_local_image_id.empty()); assert(m_local_image_id.empty()); m_local_image_id = librbd::util::generate_image_id(m_local_io_ctx); dout(15) << "local_image_id=" << m_local_image_id << dendl; update_progress("UPDATE_CLIENT_IMAGE"); librbd::journal::MirrorPeerClientMeta client_meta{m_local_image_id}; client_meta.state = librbd::journal::MIRROR_PEER_STATE_SYNCING; librbd::journal::ClientData client_data(client_meta); bufferlist data_bl; encode(client_data, data_bl); Context *ctx = create_context_callback< BootstrapRequest, &BootstrapRequest::handle_update_client_image>( this); m_journaler->update_client(data_bl, ctx); } template void BootstrapRequest::handle_update_client_image(int r) { dout(15) << "r=" << r << dendl; if (r < 0) { derr << "failed to update client: " << cpp_strerror(r) << dendl; m_ret_val = r; close_remote_image(); return; } if (m_canceled) { dout(10) << "request canceled" << dendl; m_ret_val = -ECANCELED; close_remote_image(); return; } *m_client_meta = {m_local_image_id}; m_client_meta->state = librbd::journal::MIRROR_PEER_STATE_SYNCING; create_local_image(); } template void BootstrapRequest::create_local_image() { dout(15) << "local_image_id=" << m_local_image_id << dendl; update_progress("CREATE_LOCAL_IMAGE"); m_remote_image_ctx->snap_lock.get_read(); std::string image_name = m_remote_image_ctx->name; m_remote_image_ctx->snap_lock.put_read(); Context *ctx = create_context_callback< BootstrapRequest, &BootstrapRequest::handle_create_local_image>( this); CreateImageRequest *request = CreateImageRequest::create( m_threads, m_local_io_ctx, m_global_image_id, m_remote_mirror_uuid, image_name, m_local_image_id, m_remote_image_ctx, ctx); request->send(); } template void BootstrapRequest::handle_create_local_image(int r) { dout(15) << "r=" << r << dendl; if (r == -EBADF) { dout(5) << "image id " << m_local_image_id << " already in-use" << dendl; m_local_image_id = ""; update_client_image(); return; } else if (r < 0) { if (r == -ENOENT) { dout(10) << "parent image does not exist" << dendl; } else { derr << "failed to create local image: " << cpp_strerror(r) << dendl; } m_ret_val = r; close_remote_image(); return; } open_local_image(); } template void BootstrapRequest::get_remote_tags() { if (m_client_meta->state == librbd::journal::MIRROR_PEER_STATE_SYNCING) { // optimization -- no need to compare remote tags if we just created // the image locally or sync was interrupted image_sync(); return; } dout(15) << dendl; update_progress("GET_REMOTE_TAGS"); Context *ctx = create_context_callback< BootstrapRequest, &BootstrapRequest::handle_get_remote_tags>(this); m_journaler->get_tags(m_remote_tag_class, &m_remote_tags, ctx); } template void BootstrapRequest::handle_get_remote_tags(int r) { dout(15) << "r=" << r << dendl; if (r < 0) { derr << "failed to retrieve remote tags: " << cpp_strerror(r) << dendl; m_ret_val = r; close_local_image(); return; } if (m_canceled) { dout(10) << "request canceled" << dendl; m_ret_val = -ECANCELED; close_local_image(); return; } // At this point, the local image was existing, non-primary, and replaying; // and the remote image is primary. Attempt to link the local image's most // recent tag to the remote image's tag chain. bool remote_tag_data_valid = false; librbd::journal::TagData remote_tag_data; boost::optional remote_orphan_tag_tid = boost::make_optional(false, 0U); bool reconnect_orphan = false; // decode the remote tags for (auto &remote_tag : m_remote_tags) { if (m_local_tag_data.predecessor.commit_valid && m_local_tag_data.predecessor.mirror_uuid == m_remote_mirror_uuid && m_local_tag_data.predecessor.tag_tid > remote_tag.tid) { dout(15) << "skipping processed predecessor remote tag " << remote_tag.tid << dendl; continue; } try { auto it = remote_tag.data.cbegin(); decode(remote_tag_data, it); remote_tag_data_valid = true; } catch (const buffer::error &err) { derr << "failed to decode remote tag " << remote_tag.tid << ": " << err.what() << dendl; m_ret_val = -EBADMSG; close_local_image(); return; } dout(10) << "decoded remote tag " << remote_tag.tid << ": " << remote_tag_data << dendl; if (!m_local_tag_data.predecessor.commit_valid) { // newly synced local image (no predecessor) replays from the first tag if (remote_tag_data.mirror_uuid != librbd::Journal<>::LOCAL_MIRROR_UUID) { dout(15) << "skipping non-primary remote tag" << dendl; continue; } dout(10) << "using initial primary remote tag" << dendl; break; } if (m_local_tag_data.mirror_uuid == librbd::Journal<>::ORPHAN_MIRROR_UUID) { // demotion last available local epoch if (remote_tag_data.mirror_uuid == m_local_tag_data.mirror_uuid && remote_tag_data.predecessor.commit_valid && remote_tag_data.predecessor.tag_tid == m_local_tag_data.predecessor.tag_tid) { // demotion matches remote epoch if (remote_tag_data.predecessor.mirror_uuid == m_local_mirror_uuid && m_local_tag_data.predecessor.mirror_uuid == librbd::Journal<>::LOCAL_MIRROR_UUID) { // local demoted and remote has matching event dout(15) << "found matching local demotion tag" << dendl; remote_orphan_tag_tid = remote_tag.tid; continue; } if (m_local_tag_data.predecessor.mirror_uuid == m_remote_mirror_uuid && remote_tag_data.predecessor.mirror_uuid == librbd::Journal<>::LOCAL_MIRROR_UUID) { // remote demoted and local has matching event dout(15) << "found matching remote demotion tag" << dendl; remote_orphan_tag_tid = remote_tag.tid; continue; } } if (remote_tag_data.mirror_uuid == librbd::Journal<>::LOCAL_MIRROR_UUID && remote_tag_data.predecessor.mirror_uuid == librbd::Journal<>::ORPHAN_MIRROR_UUID && remote_tag_data.predecessor.commit_valid && remote_orphan_tag_tid && remote_tag_data.predecessor.tag_tid == *remote_orphan_tag_tid) { // remote promotion tag chained to remote/local demotion tag dout(15) << "found chained remote promotion tag" << dendl; reconnect_orphan = true; break; } // promotion must follow demotion remote_orphan_tag_tid = boost::none; } } if (remote_tag_data_valid && m_local_tag_data.mirror_uuid == m_remote_mirror_uuid) { dout(10) << "local image is in clean replay state" << dendl; } else if (reconnect_orphan) { dout(10) << "remote image was demoted/promoted" << dendl; } else { derr << "split-brain detected -- skipping image replay" << dendl; m_ret_val = -EEXIST; close_local_image(); return; } image_sync(); } template void BootstrapRequest::image_sync() { if (m_client_meta->state == librbd::journal::MIRROR_PEER_STATE_REPLAYING) { // clean replay state -- no image sync required close_remote_image(); return; } { Mutex::Locker locker(m_lock); if (m_canceled) { m_ret_val = -ECANCELED; } else { dout(15) << dendl; ceph_assert(m_image_sync == nullptr); Context *ctx = create_context_callback< BootstrapRequest, &BootstrapRequest::handle_image_sync>(this); m_image_sync = ImageSync::create( *m_local_image_ctx, m_remote_image_ctx, m_threads->timer, &m_threads->timer_lock, m_local_mirror_uuid, m_journaler, m_client_meta, m_threads->work_queue, m_instance_watcher, ctx, m_progress_ctx); m_image_sync->get(); m_lock.Unlock(); update_progress("IMAGE_SYNC"); m_lock.Lock(); m_image_sync->send(); return; } } dout(10) << "request canceled" << dendl; close_remote_image(); } template void BootstrapRequest::handle_image_sync(int r) { dout(15) << "r=" << r << dendl; { Mutex::Locker locker(m_lock); m_image_sync->put(); m_image_sync = nullptr; if (m_canceled) { dout(10) << "request canceled" << dendl; m_ret_val = -ECANCELED; } if (r < 0) { derr << "failed to sync remote image: " << cpp_strerror(r) << dendl; m_ret_val = r; } } close_remote_image(); } template void BootstrapRequest::close_local_image() { dout(15) << dendl; update_progress("CLOSE_LOCAL_IMAGE"); Context *ctx = create_context_callback< BootstrapRequest, &BootstrapRequest::handle_close_local_image>( this); CloseImageRequest *request = CloseImageRequest::create( m_local_image_ctx, ctx); request->send(); } template void BootstrapRequest::handle_close_local_image(int r) { dout(15) << "r=" << r << dendl; if (r < 0) { derr << "error encountered closing local image: " << cpp_strerror(r) << dendl; } close_remote_image(); } template void BootstrapRequest::close_remote_image() { dout(15) << dendl; update_progress("CLOSE_REMOTE_IMAGE"); Context *ctx = create_context_callback< BootstrapRequest, &BootstrapRequest::handle_close_remote_image>( this); CloseImageRequest *request = CloseImageRequest::create( &m_remote_image_ctx, ctx); request->send(); } template void BootstrapRequest::handle_close_remote_image(int r) { dout(15) << "r=" << r << dendl; if (r < 0) { derr << "error encountered closing remote image: " << cpp_strerror(r) << dendl; } finish(m_ret_val); } template void BootstrapRequest::update_progress(const std::string &description) { dout(15) << description << dendl; if (m_progress_ctx) { m_progress_ctx->update_progress(description); } } } // namespace image_replayer } // namespace mirror } // namespace rbd template class rbd::mirror::image_replayer::BootstrapRequest;