diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
commit | e6918187568dbd01842d8d1d2c808ce16a894239 (patch) | |
tree | 64f88b554b444a49f656b6c656111a145cbbaa28 /src/tools/rbd_mirror | |
parent | Initial commit. (diff) | |
download | ceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip |
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/tools/rbd_mirror')
136 files changed, 30106 insertions, 0 deletions
diff --git a/src/tools/rbd_mirror/BaseRequest.h b/src/tools/rbd_mirror/BaseRequest.h new file mode 100644 index 000000000..0da98651d --- /dev/null +++ b/src/tools/rbd_mirror/BaseRequest.h @@ -0,0 +1,33 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_BASE_REQUEST_H +#define CEPH_RBD_MIRROR_BASE_REQUEST_H + +#include "include/Context.h" + +namespace rbd { +namespace mirror { + +class BaseRequest { +public: + BaseRequest(Context *on_finish) : m_on_finish(on_finish) { + } + virtual ~BaseRequest() {} + + virtual void send() = 0; + +protected: + virtual void finish(int r) { + m_on_finish->complete(r); + delete this; + } + +private: + Context *m_on_finish; +}; + +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_BASE_REQUEST_H diff --git a/src/tools/rbd_mirror/CMakeLists.txt b/src/tools/rbd_mirror/CMakeLists.txt new file mode 100644 index 000000000..43a6f03fe --- /dev/null +++ b/src/tools/rbd_mirror/CMakeLists.txt @@ -0,0 +1,91 @@ +add_library(rbd_mirror_types STATIC + image_map/Types.cc + instance_watcher/Types.cc + leader_watcher/Types.cc) + +set(rbd_mirror_internal + ClusterWatcher.cc + ImageDeleter.cc + ImageMap.cc + ImageReplayer.cc + ImageSync.cc + InstanceReplayer.cc + InstanceWatcher.cc + Instances.cc + LeaderWatcher.cc + Mirror.cc + MirrorStatusUpdater.cc + MirrorStatusWatcher.cc + NamespaceReplayer.cc + PoolMetaCache.cc + PoolReplayer.cc + PoolWatcher.cc + RemotePoolPoller.cc + ServiceDaemon.cc + Threads.cc + Throttler.cc + Types.cc + image_deleter/SnapshotPurgeRequest.cc + image_deleter/TrashMoveRequest.cc + image_deleter/TrashRemoveRequest.cc + image_deleter/TrashWatcher.cc + image_map/LoadRequest.cc + image_map/Policy.cc + image_map/SimplePolicy.cc + image_map/StateTransition.cc + image_map/UpdateRequest.cc + image_replayer/BootstrapRequest.cc + image_replayer/CloseImageRequest.cc + image_replayer/CreateImageRequest.cc + image_replayer/GetMirrorImageIdRequest.cc + image_replayer/OpenImageRequest.cc + image_replayer/OpenLocalImageRequest.cc + image_replayer/PrepareLocalImageRequest.cc + image_replayer/PrepareRemoteImageRequest.cc + image_replayer/StateBuilder.cc + image_replayer/TimeRollingMean.cc + image_replayer/Utils.cc + image_replayer/journal/CreateLocalImageRequest.cc + image_replayer/journal/EventPreprocessor.cc + image_replayer/journal/PrepareReplayRequest.cc + image_replayer/journal/Replayer.cc + image_replayer/journal/ReplayStatusFormatter.cc + image_replayer/journal/StateBuilder.cc + image_replayer/journal/SyncPointHandler.cc + image_replayer/snapshot/ApplyImageStateRequest.cc + image_replayer/snapshot/CreateLocalImageRequest.cc + image_replayer/snapshot/PrepareReplayRequest.cc + image_replayer/snapshot/Replayer.cc + image_replayer/snapshot/StateBuilder.cc + image_replayer/snapshot/Utils.cc + image_sync/SyncPointCreateRequest.cc + image_sync/SyncPointPruneRequest.cc + image_sync/Utils.cc + pool_watcher/RefreshImagesRequest.cc + service_daemon/Types.cc) + +add_library(rbd_mirror_internal STATIC + ${rbd_mirror_internal} + $<TARGET_OBJECTS:common_prioritycache_obj>) + +add_executable(rbd-mirror + main.cc) +target_link_libraries(rbd-mirror + rbd_mirror_internal + rbd_mirror_types + rbd_api + rbd_internal + rbd_types + journal + libneorados + librados + osdc + cls_rbd_client + cls_lock_client + cls_journal_client + global + heap_profiler + ${ALLOC_LIBS} + OpenSSL::SSL) +install(TARGETS rbd-mirror + DESTINATION ${CMAKE_INSTALL_BINDIR}) diff --git a/src/tools/rbd_mirror/CancelableRequest.h b/src/tools/rbd_mirror/CancelableRequest.h new file mode 100644 index 000000000..26e8dcb5b --- /dev/null +++ b/src/tools/rbd_mirror/CancelableRequest.h @@ -0,0 +1,44 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_CANCELABLE_REQUEST_H +#define CEPH_RBD_MIRROR_CANCELABLE_REQUEST_H + +#include "common/RefCountedObj.h" +#include "include/Context.h" + +namespace rbd { +namespace mirror { + +class CancelableRequest : public RefCountedObject { +public: + CancelableRequest(const std::string& name, CephContext *cct, + Context *on_finish) + : RefCountedObject(cct), m_name(name), m_cct(cct), + m_on_finish(on_finish) { + } + + virtual void send() = 0; + virtual void cancel() {} + +protected: + virtual void finish(int r) { + if (m_cct) { + lsubdout(m_cct, rbd_mirror, 20) << m_name << "::finish: r=" << r << dendl; + } + if (m_on_finish) { + m_on_finish->complete(r); + } + put(); + } + +private: + const std::string m_name; + CephContext *m_cct; + Context *m_on_finish; +}; + +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_CANCELABLE_REQUEST_H diff --git a/src/tools/rbd_mirror/ClusterWatcher.cc b/src/tools/rbd_mirror/ClusterWatcher.cc new file mode 100644 index 000000000..8bafb336e --- /dev/null +++ b/src/tools/rbd_mirror/ClusterWatcher.cc @@ -0,0 +1,252 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "ClusterWatcher.h" +#include "include/stringify.h" +#include "common/ceph_json.h" +#include "common/debug.h" +#include "common/errno.h" +#include "cls/rbd/cls_rbd_client.h" +#include "librbd/internal.h" +#include "librbd/api/Mirror.h" +#include "tools/rbd_mirror/ServiceDaemon.h" +#include "json_spirit/json_spirit.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::ClusterWatcher:" << this << " " \ + << __func__ << ": " + +using std::list; +using std::map; +using std::pair; +using std::set; +using std::string; +using std::unique_ptr; +using std::vector; + +using librados::Rados; +using librados::IoCtx; + +namespace rbd { +namespace mirror { + +ClusterWatcher::ClusterWatcher(RadosRef cluster, ceph::mutex &lock, + ServiceDaemon<librbd::ImageCtx>* service_daemon) + : m_cluster(cluster), m_lock(lock), m_service_daemon(service_daemon) +{ +} + +const ClusterWatcher::PoolPeers& ClusterWatcher::get_pool_peers() const +{ + ceph_assert(ceph_mutex_is_locked(m_lock)); + return m_pool_peers; +} + +std::string ClusterWatcher::get_site_name() const { + ceph_assert(ceph_mutex_is_locked(m_lock)); + return m_site_name; +} + +void ClusterWatcher::refresh_pools() +{ + dout(20) << "enter" << dendl; + + PoolPeers pool_peers; + read_pool_peers(&pool_peers); + + std::string site_name; + int r = read_site_name(&site_name); + + std::lock_guard l{m_lock}; + m_pool_peers = pool_peers; + + if (r >= 0) { + m_site_name = site_name; + } + + // TODO: perhaps use a workqueue instead, once we get notifications + // about config changes for existing pools +} + +void ClusterWatcher::read_pool_peers(PoolPeers *pool_peers) +{ + int r = m_cluster->wait_for_latest_osdmap(); + if (r < 0) { + derr << "error waiting for OSD map: " << cpp_strerror(r) << dendl; + return; + } + + list<pair<int64_t, string> > pools; + r = m_cluster->pool_list2(pools); + if (r < 0) { + derr << "error listing pools: " << cpp_strerror(r) << dendl; + return; + } + + std::set<int64_t> service_pool_ids; + for (auto& kv : pools) { + int64_t pool_id = kv.first; + auto& pool_name = kv.second; + int64_t base_tier; + r = m_cluster->pool_get_base_tier(pool_id, &base_tier); + if (r == -ENOENT) { + dout(10) << "pool " << pool_name << " no longer exists" << dendl; + continue; + } else if (r < 0) { + derr << "Error retrieving base tier for pool " << pool_name << dendl; + continue; + } + if (pool_id != base_tier) { + // pool is a cache; skip it + continue; + } + + IoCtx ioctx; + r = m_cluster->ioctx_create2(pool_id, ioctx); + if (r == -ENOENT) { + dout(10) << "pool " << pool_id << " no longer exists" << dendl; + continue; + } else if (r < 0) { + derr << "Error accessing pool " << pool_name << cpp_strerror(r) << dendl; + continue; + } + + cls::rbd::MirrorMode mirror_mode_internal; + r = librbd::cls_client::mirror_mode_get(&ioctx, &mirror_mode_internal); + if (r == 0 && mirror_mode_internal == cls::rbd::MIRROR_MODE_DISABLED) { + dout(10) << "mirroring is disabled for pool " << pool_name << dendl; + continue; + } + + service_pool_ids.insert(pool_id); + if (m_service_pools.find(pool_id) == m_service_pools.end()) { + m_service_pools[pool_id] = {}; + m_service_daemon->add_pool(pool_id, pool_name); + } + + if (r == -EPERM) { + dout(10) << "access denied querying pool " << pool_name << dendl; + m_service_pools[pool_id] = m_service_daemon->add_or_update_callout( + pool_id, m_service_pools[pool_id], + service_daemon::CALLOUT_LEVEL_WARNING, "access denied"); + continue; + } else if (r < 0) { + derr << "could not tell whether mirroring was enabled for " << pool_name + << " : " << cpp_strerror(r) << dendl; + m_service_pools[pool_id] = m_service_daemon->add_or_update_callout( + pool_id, m_service_pools[pool_id], + service_daemon::CALLOUT_LEVEL_WARNING, "mirroring mode query failed"); + continue; + } + + vector<librbd::mirror_peer_site_t> configs; + r = librbd::api::Mirror<>::peer_site_list(ioctx, &configs); + if (r < 0) { + derr << "error reading mirroring config for pool " << pool_name + << cpp_strerror(r) << dendl; + m_service_pools[pool_id] = m_service_daemon->add_or_update_callout( + pool_id, m_service_pools[pool_id], + service_daemon::CALLOUT_LEVEL_ERROR, "mirroring peer list failed"); + continue; + } + + std::vector<PeerSpec> peers; + peers.reserve(configs.size()); + for (auto& peer : configs) { + if (peer.direction != RBD_MIRROR_PEER_DIRECTION_TX) { + peers.push_back(peer); + } + } + + for (auto& peer : peers) { + r = resolve_peer_site_config_keys(pool_id, pool_name, &peer); + if (r < 0) { + break; + } + } + + if (m_service_pools[pool_id] != service_daemon::CALLOUT_ID_NONE) { + m_service_daemon->remove_callout(pool_id, m_service_pools[pool_id]); + m_service_pools[pool_id] = service_daemon::CALLOUT_ID_NONE; + } + + pool_peers->emplace(pool_id, Peers{peers.begin(), peers.end()}); + } + + for (auto it = m_service_pools.begin(); it != m_service_pools.end(); ) { + auto current_it(it++); + if (service_pool_ids.find(current_it->first) == service_pool_ids.end()) { + m_service_daemon->remove_pool(current_it->first); + m_service_pools.erase(current_it->first); + } + } +} + +int ClusterWatcher::read_site_name(std::string* site_name) { + dout(10) << dendl; + + librbd::RBD rbd; + return rbd.mirror_site_name_get(*m_cluster, site_name); +} + +int ClusterWatcher::resolve_peer_site_config_keys(int64_t pool_id, + const std::string& pool_name, + PeerSpec* peer) { + dout(10) << "retrieving config-key: pool_id=" << pool_id << ", " + << "pool_name=" << pool_name << ", " + << "peer_uuid=" << peer->uuid << dendl; + + std::string cmd = + "{" + "\"prefix\": \"config-key get\", " + "\"key\": \"" RBD_MIRROR_PEER_CONFIG_KEY_PREFIX + stringify(pool_id) + + "/" + peer->uuid + "\"" + "}"; + + bufferlist in_bl; + bufferlist out_bl; + int r = m_cluster->mon_command(cmd, in_bl, &out_bl, nullptr); + if (r == -ENOENT || out_bl.length() == 0) { + return 0; + } else if (r < 0) { + derr << "error reading mirroring peer config for pool " << pool_name << ": " + << cpp_strerror(r) << dendl; + m_service_pools[pool_id] = m_service_daemon->add_or_update_callout( + pool_id, m_service_pools[pool_id], + service_daemon::CALLOUT_LEVEL_WARNING, + "mirroring peer config-key query failed"); + return r; + } + + bool json_valid = false; + json_spirit::mValue json_root; + if(json_spirit::read(out_bl.to_str(), json_root)) { + try { + auto& json_obj = json_root.get_obj(); + if (json_obj.count("mon_host")) { + peer->mon_host = json_obj["mon_host"].get_str(); + } + if (json_obj.count("key")) { + peer->key = json_obj["key"].get_str(); + } + json_valid = true; + } catch (std::runtime_error&) { + } + } + + if (!json_valid) { + derr << "error parsing mirroring peer config for pool " << pool_name << ", " + << "peer " << peer->uuid << dendl; + m_service_pools[pool_id] = m_service_daemon->add_or_update_callout( + pool_id, m_service_pools[pool_id], + service_daemon::CALLOUT_LEVEL_WARNING, + "mirroring peer config-key decode failed"); + } + + return 0; +} + +} // namespace mirror +} // namespace rbd diff --git a/src/tools/rbd_mirror/ClusterWatcher.h b/src/tools/rbd_mirror/ClusterWatcher.h new file mode 100644 index 000000000..93356fec6 --- /dev/null +++ b/src/tools/rbd_mirror/ClusterWatcher.h @@ -0,0 +1,73 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_CLUSTER_WATCHER_H +#define CEPH_RBD_MIRROR_CLUSTER_WATCHER_H + +#include <map> +#include <memory> +#include <set> + +#include "common/ceph_context.h" +#include "common/ceph_mutex.h" +#include "common/Timer.h" +#include "include/rados/librados.hpp" +#include "tools/rbd_mirror/Types.h" +#include "tools/rbd_mirror/service_daemon/Types.h" +#include <unordered_map> + +namespace librbd { struct ImageCtx; } + +namespace rbd { +namespace mirror { + +template <typename> class ServiceDaemon; + +/** + * Tracks mirroring configuration for pools in a single + * cluster. + */ +class ClusterWatcher { +public: + struct PeerSpecCompare { + bool operator()(const PeerSpec& lhs, const PeerSpec& rhs) const { + return (lhs.uuid < rhs.uuid); + } + }; + typedef std::set<PeerSpec, PeerSpecCompare> Peers; + typedef std::map<int64_t, Peers> PoolPeers; + + ClusterWatcher(RadosRef cluster, ceph::mutex &lock, + ServiceDaemon<librbd::ImageCtx>* service_daemon); + ~ClusterWatcher() = default; + ClusterWatcher(const ClusterWatcher&) = delete; + ClusterWatcher& operator=(const ClusterWatcher&) = delete; + + // Caller controls frequency of calls + void refresh_pools(); + const PoolPeers& get_pool_peers() const; + std::string get_site_name() const; + +private: + typedef std::unordered_map<int64_t, service_daemon::CalloutId> ServicePools; + + RadosRef m_cluster; + ceph::mutex &m_lock; + ServiceDaemon<librbd::ImageCtx>* m_service_daemon; + + ServicePools m_service_pools; + PoolPeers m_pool_peers; + std::string m_site_name; + + void read_pool_peers(PoolPeers *pool_peers); + + int read_site_name(std::string* site_name); + + int resolve_peer_site_config_keys( + int64_t pool_id, const std::string& pool_name, PeerSpec* peer); +}; + +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_CLUSTER_WATCHER_H diff --git a/src/tools/rbd_mirror/ImageDeleter.cc b/src/tools/rbd_mirror/ImageDeleter.cc new file mode 100644 index 000000000..ba137e6fd --- /dev/null +++ b/src/tools/rbd_mirror/ImageDeleter.cc @@ -0,0 +1,549 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2016 SUSE LINUX GmbH + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include "include/rados/librados.hpp" +#include "common/Formatter.h" +#include "common/admin_socket.h" +#include "common/debug.h" +#include "common/errno.h" +#include "common/Timer.h" +#include "global/global_context.h" +#include "librbd/internal.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/Operations.h" +#include "librbd/asio/ContextWQ.h" +#include "cls/rbd/cls_rbd_client.h" +#include "cls/rbd/cls_rbd_types.h" +#include "librbd/Utils.h" +#include "ImageDeleter.h" +#include "tools/rbd_mirror/Threads.h" +#include "tools/rbd_mirror/Throttler.h" +#include "tools/rbd_mirror/image_deleter/TrashMoveRequest.h" +#include "tools/rbd_mirror/image_deleter/TrashRemoveRequest.h" +#include "tools/rbd_mirror/image_deleter/TrashWatcher.h" +#include <map> +#include <sstream> + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror + +using std::string; +using std::stringstream; +using std::vector; +using std::pair; +using std::make_pair; + +using librados::IoCtx; +using namespace librbd; + +namespace rbd { +namespace mirror { + +using librbd::util::create_async_context_callback; + +namespace { + +class ImageDeleterAdminSocketCommand { +public: + virtual ~ImageDeleterAdminSocketCommand() {} + virtual int call(Formatter *f) = 0; +}; + +template <typename I> +class StatusCommand : public ImageDeleterAdminSocketCommand { +public: + explicit StatusCommand(ImageDeleter<I> *image_del) : image_del(image_del) {} + + int call(Formatter *f) override { + image_del->print_status(f); + return 0; + } + +private: + ImageDeleter<I> *image_del; +}; + +} // anonymous namespace + +template <typename I> +class ImageDeleterAdminSocketHook : public AdminSocketHook { +public: + ImageDeleterAdminSocketHook(CephContext *cct, const std::string& pool_name, + ImageDeleter<I> *image_del) : + admin_socket(cct->get_admin_socket()) { + + std::string command; + int r; + + command = "rbd mirror deletion status " + pool_name; + r = admin_socket->register_command(command, this, + "get status for image deleter"); + if (r == 0) { + commands[command] = new StatusCommand<I>(image_del); + } + + } + + ~ImageDeleterAdminSocketHook() override { + (void)admin_socket->unregister_commands(this); + for (Commands::const_iterator i = commands.begin(); i != commands.end(); + ++i) { + delete i->second; + } + } + + int call(std::string_view command, const cmdmap_t& cmdmap, + const bufferlist&, + Formatter *f, + std::ostream& errss, + bufferlist& out) override { + Commands::const_iterator i = commands.find(command); + ceph_assert(i != commands.end()); + return i->second->call(f); + } + +private: + typedef std::map<std::string, ImageDeleterAdminSocketCommand*, + std::less<>> Commands; + AdminSocket *admin_socket; + Commands commands; +}; + +template <typename I> +ImageDeleter<I>::ImageDeleter( + librados::IoCtx& local_io_ctx, Threads<librbd::ImageCtx>* threads, + Throttler<librbd::ImageCtx>* image_deletion_throttler, + ServiceDaemon<librbd::ImageCtx>* service_daemon) + : m_local_io_ctx(local_io_ctx), m_threads(threads), + m_image_deletion_throttler(image_deletion_throttler), + m_service_daemon(service_daemon), m_trash_listener(this), + m_lock(ceph::make_mutex( + librbd::util::unique_lock_name("rbd::mirror::ImageDeleter::m_lock", + this))) { +} + +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::ImageDeleter: " << " " \ + << __func__ << ": " + +template <typename I> +void ImageDeleter<I>::trash_move(librados::IoCtx& local_io_ctx, + const std::string& global_image_id, + bool resync, + librbd::asio::ContextWQ* work_queue, + Context* on_finish) { + dout(10) << "global_image_id=" << global_image_id << ", " + << "resync=" << resync << dendl; + + auto req = rbd::mirror::image_deleter::TrashMoveRequest<>::create( + local_io_ctx, global_image_id, resync, work_queue, on_finish); + req->send(); +} + +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::ImageDeleter: " << this << " " \ + << __func__ << ": " + +template <typename I> +void ImageDeleter<I>::init(Context* on_finish) { + dout(10) << dendl; + + m_asok_hook = new ImageDeleterAdminSocketHook<I>( + g_ceph_context, m_local_io_ctx.get_pool_name(), this); + + m_trash_watcher = image_deleter::TrashWatcher<I>::create(m_local_io_ctx, + m_threads, + m_trash_listener); + m_trash_watcher->init(on_finish); +} + +template <typename I> +void ImageDeleter<I>::shut_down(Context* on_finish) { + dout(10) << dendl; + + delete m_asok_hook; + m_asok_hook = nullptr; + + m_image_deletion_throttler->drain(m_local_io_ctx.get_namespace(), + -ESTALE); + + shut_down_trash_watcher(on_finish); +} + +template <typename I> +void ImageDeleter<I>::shut_down_trash_watcher(Context* on_finish) { + dout(10) << dendl; + ceph_assert(m_trash_watcher); + auto ctx = new LambdaContext([this, on_finish](int r) { + delete m_trash_watcher; + m_trash_watcher = nullptr; + + wait_for_ops(on_finish); + }); + m_trash_watcher->shut_down(ctx); +} + +template <typename I> +void ImageDeleter<I>::wait_for_ops(Context* on_finish) { + { + std::scoped_lock locker{m_threads->timer_lock, m_lock}; + m_running = false; + cancel_retry_timer(); + } + + auto ctx = new LambdaContext([this, on_finish](int) { + cancel_all_deletions(on_finish); + }); + m_async_op_tracker.wait_for_ops(ctx); +} + +template <typename I> +void ImageDeleter<I>::cancel_all_deletions(Context* on_finish) { + m_image_deletion_throttler->drain(m_local_io_ctx.get_namespace(), + -ECANCELED); + { + std::lock_guard locker{m_lock}; + // wake up any external state machines waiting on deletions + ceph_assert(m_in_flight_delete_queue.empty()); + for (auto& queue : {&m_delete_queue, &m_retry_delete_queue}) { + for (auto& info : *queue) { + notify_on_delete(info->image_id, -ECANCELED); + } + queue->clear(); + } + } + on_finish->complete(0); +} + +template <typename I> +void ImageDeleter<I>::wait_for_deletion(const std::string& image_id, + bool scheduled_only, + Context* on_finish) { + dout(5) << "image_id=" << image_id << dendl; + + on_finish = new LambdaContext([this, on_finish](int r) { + m_threads->work_queue->queue(on_finish, r); + }); + + std::lock_guard locker{m_lock}; + auto del_info = find_delete_info(image_id); + if (!del_info && scheduled_only) { + // image not scheduled for deletion + on_finish->complete(0); + return; + } + + notify_on_delete(image_id, -ESTALE); + m_on_delete_contexts[image_id] = on_finish; +} + +template <typename I> +void ImageDeleter<I>::complete_active_delete(DeleteInfoRef* delete_info, + int r) { + dout(20) << "info=" << *delete_info << ", r=" << r << dendl; + std::lock_guard locker{m_lock}; + notify_on_delete((*delete_info)->image_id, r); + delete_info->reset(); +} + +template <typename I> +void ImageDeleter<I>::enqueue_failed_delete(DeleteInfoRef* delete_info, + int error_code, + double retry_delay) { + dout(20) << "info=" << *delete_info << ", r=" << error_code << dendl; + if (error_code == -EBLOCKLISTED) { + std::lock_guard locker{m_lock}; + derr << "blocklisted while deleting local image" << dendl; + complete_active_delete(delete_info, error_code); + return; + } + + std::scoped_lock locker{m_threads->timer_lock, m_lock}; + auto& delete_info_ref = *delete_info; + notify_on_delete(delete_info_ref->image_id, error_code); + delete_info_ref->error_code = error_code; + ++delete_info_ref->retries; + delete_info_ref->retry_time = (clock_t::now() + + ceph::make_timespan(retry_delay)); + m_retry_delete_queue.push_back(delete_info_ref); + + schedule_retry_timer(); +} + +template <typename I> +typename ImageDeleter<I>::DeleteInfoRef +ImageDeleter<I>::find_delete_info(const std::string &image_id) { + ceph_assert(ceph_mutex_is_locked(m_lock)); + DeleteQueue delete_queues[] = {m_in_flight_delete_queue, + m_retry_delete_queue, + m_delete_queue}; + + DeleteInfo delete_info{image_id}; + for (auto& queue : delete_queues) { + auto it = std::find_if(queue.begin(), queue.end(), + [&delete_info](const DeleteInfoRef& ref) { + return delete_info == *ref; + }); + if (it != queue.end()) { + return *it; + } + } + return {}; +} + +template <typename I> +void ImageDeleter<I>::print_status(Formatter *f) { + dout(20) << dendl; + + f->open_object_section("image_deleter_status"); + f->open_array_section("delete_images_queue"); + + std::lock_guard l{m_lock}; + for (const auto& image : m_delete_queue) { + image->print_status(f); + } + + f->close_section(); + f->open_array_section("failed_deletes_queue"); + for (const auto& image : m_retry_delete_queue) { + image->print_status(f, true); + } + + f->close_section(); + f->close_section(); +} + +template <typename I> +vector<string> ImageDeleter<I>::get_delete_queue_items() { + vector<string> items; + + std::lock_guard l{m_lock}; + for (const auto& del_info : m_delete_queue) { + items.push_back(del_info->image_id); + } + + return items; +} + +template <typename I> +vector<pair<string, int> > ImageDeleter<I>::get_failed_queue_items() { + vector<pair<string, int> > items; + + std::lock_guard l{m_lock}; + for (const auto& del_info : m_retry_delete_queue) { + items.push_back(make_pair(del_info->image_id, + del_info->error_code)); + } + + return items; +} + +template <typename I> +void ImageDeleter<I>::remove_images() { + dout(10) << dendl; + + std::lock_guard locker{m_lock}; + while (m_running && !m_delete_queue.empty()) { + + DeleteInfoRef delete_info = m_delete_queue.front(); + m_delete_queue.pop_front(); + + ceph_assert(delete_info); + + auto on_start = create_async_context_callback( + m_threads->work_queue, new LambdaContext( + [this, delete_info](int r) { + if (r < 0) { + notify_on_delete(delete_info->image_id, r); + return; + } + remove_image(delete_info); + })); + + m_image_deletion_throttler->start_op(m_local_io_ctx.get_namespace(), + delete_info->image_id, on_start); + } +} + +template <typename I> +void ImageDeleter<I>::remove_image(DeleteInfoRef delete_info) { + dout(10) << "info=" << *delete_info << dendl; + + std::lock_guard locker{m_lock}; + + m_in_flight_delete_queue.push_back(delete_info); + m_async_op_tracker.start_op(); + + auto ctx = new LambdaContext([this, delete_info](int r) { + handle_remove_image(delete_info, r); + m_async_op_tracker.finish_op(); + }); + + auto req = image_deleter::TrashRemoveRequest<I>::create( + m_local_io_ctx, delete_info->image_id, &delete_info->error_result, + m_threads->work_queue, ctx); + req->send(); +} + +template <typename I> +void ImageDeleter<I>::handle_remove_image(DeleteInfoRef delete_info, + int r) { + dout(10) << "info=" << *delete_info << ", r=" << r << dendl; + + m_image_deletion_throttler->finish_op(m_local_io_ctx.get_namespace(), + delete_info->image_id); + { + std::lock_guard locker{m_lock}; + ceph_assert(ceph_mutex_is_locked(m_lock)); + auto it = std::find(m_in_flight_delete_queue.begin(), + m_in_flight_delete_queue.end(), delete_info); + ceph_assert(it != m_in_flight_delete_queue.end()); + m_in_flight_delete_queue.erase(it); + } + + if (r < 0) { + if (delete_info->error_result == image_deleter::ERROR_RESULT_COMPLETE) { + complete_active_delete(&delete_info, r); + } else if (delete_info->error_result == + image_deleter::ERROR_RESULT_RETRY_IMMEDIATELY) { + enqueue_failed_delete(&delete_info, r, m_busy_interval); + } else { + auto cct = reinterpret_cast<CephContext *>(m_local_io_ctx.cct()); + double failed_interval = cct->_conf.get_val<double>( + "rbd_mirror_delete_retry_interval"); + enqueue_failed_delete(&delete_info, r, failed_interval); + } + } else { + complete_active_delete(&delete_info, 0); + } + + // process the next queued image to delete + remove_images(); +} + +template <typename I> +void ImageDeleter<I>::schedule_retry_timer() { + ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock)); + ceph_assert(ceph_mutex_is_locked(m_lock)); + if (!m_running || m_timer_ctx != nullptr || m_retry_delete_queue.empty()) { + return; + } + + dout(10) << dendl; + auto &delete_info = m_retry_delete_queue.front(); + m_timer_ctx = new LambdaContext([this](int r) { + handle_retry_timer(); + }); + m_threads->timer->add_event_at(delete_info->retry_time, m_timer_ctx); +} + +template <typename I> +void ImageDeleter<I>::cancel_retry_timer() { + dout(10) << dendl; + ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock)); + if (m_timer_ctx != nullptr) { + bool canceled = m_threads->timer->cancel_event(m_timer_ctx); + m_timer_ctx = nullptr; + ceph_assert(canceled); + } +} + +template <typename I> +void ImageDeleter<I>::handle_retry_timer() { + dout(10) << dendl; + ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock)); + std::lock_guard locker{m_lock}; + + ceph_assert(m_timer_ctx != nullptr); + m_timer_ctx = nullptr; + + ceph_assert(m_running); + ceph_assert(!m_retry_delete_queue.empty()); + + // move all ready-to-ready items back to main queue + auto now = clock_t::now(); + while (!m_retry_delete_queue.empty()) { + auto &delete_info = m_retry_delete_queue.front(); + if (delete_info->retry_time > now) { + break; + } + + m_delete_queue.push_back(delete_info); + m_retry_delete_queue.pop_front(); + } + + // schedule wake up for any future retries + schedule_retry_timer(); + + // start (concurrent) removal of images + m_async_op_tracker.start_op(); + auto ctx = new LambdaContext([this](int r) { + remove_images(); + m_async_op_tracker.finish_op(); + }); + m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +void ImageDeleter<I>::handle_trash_image(const std::string& image_id, + const ImageDeleter<I>::clock_t::time_point& deferment_end_time) { + std::scoped_lock locker{m_threads->timer_lock, m_lock}; + + auto del_info = find_delete_info(image_id); + if (del_info != nullptr) { + dout(20) << "image " << image_id << " " + << "was already scheduled for deletion" << dendl; + return; + } + + dout(10) << "image_id=" << image_id << ", " + << "deferment_end_time=" << utime_t{deferment_end_time} << dendl; + + del_info.reset(new DeleteInfo(image_id)); + del_info->retry_time = deferment_end_time; + m_retry_delete_queue.push_back(del_info); + + schedule_retry_timer(); +} + +template <typename I> +void ImageDeleter<I>::notify_on_delete(const std::string& image_id, + int r) { + dout(10) << "image_id=" << image_id << ", r=" << r << dendl; + auto it = m_on_delete_contexts.find(image_id); + if (it == m_on_delete_contexts.end()) { + return; + } + + it->second->complete(r); + m_on_delete_contexts.erase(it); +} + +template <typename I> +void ImageDeleter<I>::DeleteInfo::print_status(Formatter *f, + bool print_failure_info) { + f->open_object_section("delete_info"); + f->dump_string("image_id", image_id); + if (print_failure_info) { + f->dump_string("error_code", cpp_strerror(error_code)); + f->dump_int("retries", retries); + } + f->close_section(); +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::ImageDeleter<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/ImageDeleter.h b/src/tools/rbd_mirror/ImageDeleter.h new file mode 100644 index 000000000..5fe79496b --- /dev/null +++ b/src/tools/rbd_mirror/ImageDeleter.h @@ -0,0 +1,189 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2016 SUSE LINUX GmbH + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#ifndef CEPH_RBD_MIRROR_IMAGE_DELETER_H +#define CEPH_RBD_MIRROR_IMAGE_DELETER_H + +#include "include/utime.h" +#include "common/AsyncOpTracker.h" +#include "common/ceph_mutex.h" +#include "common/Timer.h" +#include "tools/rbd_mirror/Types.h" +#include "tools/rbd_mirror/image_deleter/Types.h" +#include <atomic> +#include <deque> +#include <iosfwd> +#include <map> +#include <memory> +#include <vector> + +class AdminSocketHook; +class Context; +namespace librbd { +struct ImageCtx; +namespace asio { struct ContextWQ; } +} // namespace librbd + +namespace rbd { +namespace mirror { + +template <typename> class ServiceDaemon; +template <typename> class Threads; +template <typename> class Throttler; + +namespace image_deleter { template <typename> struct TrashWatcher; } + +/** + * Manage deletion of non-primary images. + */ +template <typename ImageCtxT = librbd::ImageCtx> +class ImageDeleter { +public: + static ImageDeleter* create( + librados::IoCtx& local_io_ctx, Threads<librbd::ImageCtx>* threads, + Throttler<librbd::ImageCtx>* image_deletion_throttler, + ServiceDaemon<librbd::ImageCtx>* service_daemon) { + return new ImageDeleter(local_io_ctx, threads, image_deletion_throttler, + service_daemon); + } + + ImageDeleter(librados::IoCtx& local_io_ctx, + Threads<librbd::ImageCtx>* threads, + Throttler<librbd::ImageCtx>* image_deletion_throttler, + ServiceDaemon<librbd::ImageCtx>* service_daemon); + + ImageDeleter(const ImageDeleter&) = delete; + ImageDeleter& operator=(const ImageDeleter&) = delete; + + static void trash_move(librados::IoCtx& local_io_ctx, + const std::string& global_image_id, bool resync, + librbd::asio::ContextWQ* work_queue, + Context* on_finish); + + void init(Context* on_finish); + void shut_down(Context* on_finish); + + void print_status(Formatter *f); + + // for testing purposes + void wait_for_deletion(const std::string &image_id, + bool scheduled_only, Context* on_finish); + + std::vector<std::string> get_delete_queue_items(); + std::vector<std::pair<std::string, int> > get_failed_queue_items(); + + inline void set_busy_timer_interval(double interval) { + m_busy_interval = interval; + } + +private: + using clock_t = ceph::real_clock; + struct TrashListener : public image_deleter::TrashListener { + ImageDeleter *image_deleter; + + TrashListener(ImageDeleter *image_deleter) : image_deleter(image_deleter) { + } + + void handle_trash_image(const std::string& image_id, + const ceph::real_clock::time_point& deferment_end_time) override { + image_deleter->handle_trash_image(image_id, deferment_end_time); + } + }; + + struct DeleteInfo { + std::string image_id; + + image_deleter::ErrorResult error_result = {}; + int error_code = 0; + clock_t::time_point retry_time; + int retries = 0; + + DeleteInfo(const std::string& image_id) + : image_id(image_id) { + } + + inline bool operator==(const DeleteInfo& delete_info) const { + return (image_id == delete_info.image_id); + } + + friend std::ostream& operator<<(std::ostream& os, DeleteInfo& delete_info) { + os << "[image_id=" << delete_info.image_id << "]"; + return os; + } + + void print_status(Formatter *f, + bool print_failure_info=false); + }; + typedef std::shared_ptr<DeleteInfo> DeleteInfoRef; + typedef std::deque<DeleteInfoRef> DeleteQueue; + typedef std::map<std::string, Context*> OnDeleteContexts; + + librados::IoCtx& m_local_io_ctx; + Threads<librbd::ImageCtx>* m_threads; + Throttler<librbd::ImageCtx>* m_image_deletion_throttler; + ServiceDaemon<librbd::ImageCtx>* m_service_daemon; + + image_deleter::TrashWatcher<ImageCtxT>* m_trash_watcher = nullptr; + TrashListener m_trash_listener; + + std::atomic<unsigned> m_running { 1 }; + + double m_busy_interval = 1; + + AsyncOpTracker m_async_op_tracker; + + ceph::mutex m_lock; + DeleteQueue m_delete_queue; + DeleteQueue m_retry_delete_queue; + DeleteQueue m_in_flight_delete_queue; + + OnDeleteContexts m_on_delete_contexts; + + AdminSocketHook *m_asok_hook = nullptr; + + Context *m_timer_ctx = nullptr; + + bool process_image_delete(); + + void complete_active_delete(DeleteInfoRef* delete_info, int r); + void enqueue_failed_delete(DeleteInfoRef* delete_info, int error_code, + double retry_delay); + + DeleteInfoRef find_delete_info(const std::string &image_id); + + void remove_images(); + void remove_image(DeleteInfoRef delete_info); + void handle_remove_image(DeleteInfoRef delete_info, int r); + + void schedule_retry_timer(); + void cancel_retry_timer(); + void handle_retry_timer(); + + void handle_trash_image(const std::string& image_id, + const clock_t::time_point& deferment_end_time); + + void shut_down_trash_watcher(Context* on_finish); + void wait_for_ops(Context* on_finish); + void cancel_all_deletions(Context* on_finish); + + void notify_on_delete(const std::string& image_id, int r); + +}; + +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::ImageDeleter<librbd::ImageCtx>; + +#endif // CEPH_RBD_MIRROR_IMAGE_DELETER_H diff --git a/src/tools/rbd_mirror/ImageMap.cc b/src/tools/rbd_mirror/ImageMap.cc new file mode 100644 index 000000000..bd005b466 --- /dev/null +++ b/src/tools/rbd_mirror/ImageMap.cc @@ -0,0 +1,604 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "common/debug.h" +#include "common/errno.h" +#include "common/Timer.h" + +#include "librbd/Utils.h" +#include "librbd/asio/ContextWQ.h" +#include "tools/rbd_mirror/Threads.h" + +#include "ImageMap.h" +#include "image_map/LoadRequest.h" +#include "image_map/SimplePolicy.h" +#include "image_map/UpdateRequest.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::ImageMap: " << this << " " \ + << __func__ << ": " + +using namespace std; + +namespace rbd { +namespace mirror { + +using ::operator<<; +using image_map::Policy; + +using librbd::util::unique_lock_name; +using librbd::util::create_async_context_callback; + +template <typename I> +struct ImageMap<I>::C_NotifyInstance : public Context { + ImageMap* image_map; + std::string global_image_id; + bool acquire_release; + + C_NotifyInstance(ImageMap* image_map, const std::string& global_image_id, + bool acquire_release) + : image_map(image_map), global_image_id(global_image_id), + acquire_release(acquire_release) { + image_map->start_async_op(); + } + + void finish(int r) override { + if (acquire_release) { + image_map->handle_peer_ack(global_image_id, r); + } else { + image_map->handle_peer_ack_remove(global_image_id, r); + } + image_map->finish_async_op(); + } +}; + +template <typename I> +ImageMap<I>::ImageMap(librados::IoCtx &ioctx, Threads<I> *threads, + const std::string& instance_id, + image_map::Listener &listener) + : m_ioctx(ioctx), m_threads(threads), m_instance_id(instance_id), + m_listener(listener), + m_lock(ceph::make_mutex( + unique_lock_name("rbd::mirror::ImageMap::m_lock", this))) { +} + +template <typename I> +ImageMap<I>::~ImageMap() { + ceph_assert(m_async_op_tracker.empty()); + ceph_assert(m_timer_task == nullptr); + ceph_assert(m_rebalance_task == nullptr); +} + +template <typename I> +void ImageMap<I>::continue_action(const std::set<std::string> &global_image_ids, + int r) { + dout(20) << dendl; + + { + std::lock_guard locker{m_lock}; + if (m_shutting_down) { + return; + } + + for (auto const &global_image_id : global_image_ids) { + bool schedule = m_policy->finish_action(global_image_id, r); + if (schedule) { + schedule_action(global_image_id); + } + } + } + + schedule_update_task(); +} + +template <typename I> +void ImageMap<I>::handle_update_request( + const Updates &updates, + const std::set<std::string> &remove_global_image_ids, int r) { + dout(20) << "r=" << r << dendl; + + std::set<std::string> global_image_ids; + + global_image_ids.insert(remove_global_image_ids.begin(), + remove_global_image_ids.end()); + for (auto const &update : updates) { + global_image_ids.insert(update.global_image_id); + } + + continue_action(global_image_ids, r); +} + +template <typename I> +void ImageMap<I>::update_image_mapping(Updates&& map_updates, + std::set<std::string>&& map_removals) { + if (map_updates.empty() && map_removals.empty()) { + return; + } + + dout(5) << "updates=[" << map_updates << "], " + << "removes=[" << map_removals << "]" << dendl; + + Context *on_finish = new LambdaContext( + [this, map_updates, map_removals](int r) { + handle_update_request(map_updates, map_removals, r); + finish_async_op(); + }); + on_finish = create_async_context_callback(m_threads->work_queue, on_finish); + + // empty meta policy for now.. + image_map::PolicyMetaNone policy_meta; + + bufferlist bl; + encode(image_map::PolicyData(policy_meta), bl); + + // prepare update map + std::map<std::string, cls::rbd::MirrorImageMap> update_mapping; + for (auto const &update : map_updates) { + update_mapping.emplace( + update.global_image_id, cls::rbd::MirrorImageMap(update.instance_id, + update.mapped_time, bl)); + } + + start_async_op(); + image_map::UpdateRequest<I> *req = image_map::UpdateRequest<I>::create( + m_ioctx, std::move(update_mapping), std::move(map_removals), on_finish); + req->send(); +} + +template <typename I> +void ImageMap<I>::process_updates() { + dout(20) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock)); + ceph_assert(m_timer_task == nullptr); + + Updates map_updates; + std::set<std::string> map_removals; + Updates acquire_updates; + Updates release_updates; + + // gather updates by advancing the state machine + m_lock.lock(); + for (auto const &global_image_id : m_global_image_ids) { + image_map::ActionType action_type = + m_policy->start_action(global_image_id); + image_map::LookupInfo info = m_policy->lookup(global_image_id); + + dout(15) << "global_image_id=" << global_image_id << ", " + << "action=" << action_type << ", " + << "instance=" << info.instance_id << dendl; + switch (action_type) { + case image_map::ACTION_TYPE_NONE: + continue; + case image_map::ACTION_TYPE_MAP_UPDATE: + ceph_assert(info.instance_id != image_map::UNMAPPED_INSTANCE_ID); + map_updates.emplace_back(global_image_id, info.instance_id, + info.mapped_time); + break; + case image_map::ACTION_TYPE_MAP_REMOVE: + map_removals.emplace(global_image_id); + break; + case image_map::ACTION_TYPE_ACQUIRE: + ceph_assert(info.instance_id != image_map::UNMAPPED_INSTANCE_ID); + acquire_updates.emplace_back(global_image_id, info.instance_id); + break; + case image_map::ACTION_TYPE_RELEASE: + ceph_assert(info.instance_id != image_map::UNMAPPED_INSTANCE_ID); + release_updates.emplace_back(global_image_id, info.instance_id); + break; + } + } + m_global_image_ids.clear(); + m_lock.unlock(); + + // notify listener (acquire, release) and update on-disk map. note + // that its safe to process this outside m_lock as we still hold + // timer lock. + notify_listener_acquire_release_images(acquire_updates, release_updates); + update_image_mapping(std::move(map_updates), std::move(map_removals)); +} + +template <typename I> +void ImageMap<I>::schedule_update_task() { + std::lock_guard timer_lock{m_threads->timer_lock}; + schedule_update_task(m_threads->timer_lock); +} + +template <typename I> +void ImageMap<I>::schedule_update_task(const ceph::mutex &timer_lock) { + ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock)); + + schedule_rebalance_task(); + + if (m_timer_task != nullptr) { + return; + } + + { + std::lock_guard locker{m_lock}; + if (m_global_image_ids.empty()) { + return; + } + } + + m_timer_task = new LambdaContext([this](int r) { + ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock)); + m_timer_task = nullptr; + + process_updates(); + }); + + CephContext *cct = reinterpret_cast<CephContext *>(m_ioctx.cct()); + double after = cct->_conf.get_val<double>("rbd_mirror_image_policy_update_throttle_interval"); + + dout(20) << "scheduling image check update (" << m_timer_task << ")" + << " after " << after << " second(s)" << dendl; + m_threads->timer->add_event_after(after, m_timer_task); +} + +template <typename I> +void ImageMap<I>::rebalance() { + ceph_assert(m_rebalance_task == nullptr); + + { + std::lock_guard locker{m_lock}; + if (m_async_op_tracker.empty() && m_global_image_ids.empty()){ + dout(20) << "starting rebalance" << dendl; + + std::set<std::string> remap_global_image_ids; + m_policy->add_instances({}, &remap_global_image_ids); + + for (auto const &global_image_id : remap_global_image_ids) { + schedule_action(global_image_id); + } + } + } + + schedule_update_task(m_threads->timer_lock); +} + +template <typename I> +void ImageMap<I>::schedule_rebalance_task() { + ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock)); + + CephContext *cct = reinterpret_cast<CephContext *>(m_ioctx.cct()); + + // fetch the updated value of idle timeout for (re)scheduling + double resched_after = cct->_conf.get_val<double>( + "rbd_mirror_image_policy_rebalance_timeout"); + if (!resched_after) { + return; + } + + // cancel existing rebalance task if any before scheduling + if (m_rebalance_task != nullptr) { + m_threads->timer->cancel_event(m_rebalance_task); + } + + m_rebalance_task = new LambdaContext([this](int _) { + ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock)); + m_rebalance_task = nullptr; + + rebalance(); + }); + + dout(20) << "scheduling rebalance (" << m_rebalance_task << ")" + << " after " << resched_after << " second(s)" << dendl; + m_threads->timer->add_event_after(resched_after, m_rebalance_task); +} + +template <typename I> +void ImageMap<I>::schedule_action(const std::string &global_image_id) { + dout(20) << "global_image_id=" << global_image_id << dendl; + ceph_assert(ceph_mutex_is_locked(m_lock)); + + m_global_image_ids.emplace(global_image_id); +} + +template <typename I> +void ImageMap<I>::notify_listener_acquire_release_images( + const Updates &acquire, const Updates &release) { + if (acquire.empty() && release.empty()) { + return; + } + + dout(5) << "acquire=[" << acquire << "], " + << "release=[" << release << "]" << dendl; + + for (auto const &update : acquire) { + m_listener.acquire_image( + update.global_image_id, update.instance_id, + create_async_context_callback( + m_threads->work_queue, + new C_NotifyInstance(this, update.global_image_id, true))); + } + + for (auto const &update : release) { + m_listener.release_image( + update.global_image_id, update.instance_id, + create_async_context_callback( + m_threads->work_queue, + new C_NotifyInstance(this, update.global_image_id, true))); + } +} + +template <typename I> +void ImageMap<I>::notify_listener_remove_images(const std::string &peer_uuid, + const Updates &remove) { + dout(5) << "peer_uuid=" << peer_uuid << ", " + << "remove=[" << remove << "]" << dendl; + + for (auto const &update : remove) { + m_listener.remove_image( + peer_uuid, update.global_image_id, update.instance_id, + create_async_context_callback( + m_threads->work_queue, + new C_NotifyInstance(this, update.global_image_id, false))); + } +} + +template <typename I> +void ImageMap<I>::handle_load(const std::map<std::string, + cls::rbd::MirrorImageMap> &image_mapping) { + dout(20) << dendl; + + { + std::lock_guard locker{m_lock}; + m_policy->init(image_mapping); + + for (auto& pair : image_mapping) { + schedule_action(pair.first); + } + } + schedule_update_task(); +} + +template <typename I> +void ImageMap<I>::handle_peer_ack_remove(const std::string &global_image_id, + int r) { + std::lock_guard locker{m_lock}; + dout(5) << "global_image_id=" << global_image_id << dendl; + + if (r < 0) { + derr << "failed to remove global_image_id=" << global_image_id << dendl; + } + + auto peer_it = m_peer_map.find(global_image_id); + if (peer_it == m_peer_map.end()) { + return; + } + + m_peer_map.erase(peer_it); +} + +template <typename I> +void ImageMap<I>::update_images_added( + const std::string &peer_uuid, + const std::set<std::string> &global_image_ids) { + dout(5) << "peer_uuid=" << peer_uuid << ", " + << "global_image_ids=[" << global_image_ids << "]" << dendl; + ceph_assert(ceph_mutex_is_locked(m_lock)); + + for (auto const &global_image_id : global_image_ids) { + auto result = m_peer_map[global_image_id].insert(peer_uuid); + if (result.second && m_peer_map[global_image_id].size() == 1) { + if (m_policy->add_image(global_image_id)) { + schedule_action(global_image_id); + } + } + } +} + +template <typename I> +void ImageMap<I>::update_images_removed( + const std::string &peer_uuid, + const std::set<std::string> &global_image_ids) { + dout(5) << "peer_uuid=" << peer_uuid << ", " + << "global_image_ids=[" << global_image_ids << "]" << dendl; + ceph_assert(ceph_mutex_is_locked(m_lock)); + + Updates to_remove; + for (auto const &global_image_id : global_image_ids) { + image_map::LookupInfo info = m_policy->lookup(global_image_id); + bool image_mapped = (info.instance_id != image_map::UNMAPPED_INSTANCE_ID); + + bool image_removed = image_mapped; + bool peer_removed = false; + auto peer_it = m_peer_map.find(global_image_id); + if (peer_it != m_peer_map.end()) { + auto& peer_set = peer_it->second; + peer_removed = peer_set.erase(peer_uuid); + image_removed = peer_removed && peer_set.empty(); + } + + if (image_mapped && peer_removed && !peer_uuid.empty()) { + // peer image has been deleted + to_remove.emplace_back(global_image_id, info.instance_id); + } + + if (image_removed) { + // local and peer images have been deleted + if (m_policy->remove_image(global_image_id)) { + schedule_action(global_image_id); + } + } + } + + if (!to_remove.empty()) { + // removal notification will be notified instantly. this is safe + // even after scheduling action for images as we still hold m_lock + notify_listener_remove_images(peer_uuid, to_remove); + } +} + +template <typename I> +void ImageMap<I>::update_instances_added( + const std::vector<std::string> &instance_ids) { + { + std::lock_guard locker{m_lock}; + if (m_shutting_down) { + return; + } + + std::vector<std::string> filtered_instance_ids; + filter_instance_ids(instance_ids, &filtered_instance_ids, false); + if (filtered_instance_ids.empty()) { + return; + } + + dout(20) << "instance_ids=" << filtered_instance_ids << dendl; + + std::set<std::string> remap_global_image_ids; + m_policy->add_instances(filtered_instance_ids, &remap_global_image_ids); + + for (auto const &global_image_id : remap_global_image_ids) { + schedule_action(global_image_id); + } + } + + schedule_update_task(); +} + +template <typename I> +void ImageMap<I>::update_instances_removed( + const std::vector<std::string> &instance_ids) { + { + std::lock_guard locker{m_lock}; + if (m_shutting_down) { + return; + } + + std::vector<std::string> filtered_instance_ids; + filter_instance_ids(instance_ids, &filtered_instance_ids, true); + if (filtered_instance_ids.empty()) { + return; + } + + dout(20) << "instance_ids=" << filtered_instance_ids << dendl; + + std::set<std::string> remap_global_image_ids; + m_policy->remove_instances(filtered_instance_ids, &remap_global_image_ids); + + for (auto const &global_image_id : remap_global_image_ids) { + schedule_action(global_image_id); + } + } + + schedule_update_task(); +} + +template <typename I> +void ImageMap<I>::update_images(const std::string &peer_uuid, + std::set<std::string> &&added_global_image_ids, + std::set<std::string> &&removed_global_image_ids) { + dout(5) << "peer_uuid=" << peer_uuid << ", " << "added_count=" + << added_global_image_ids.size() << ", " << "removed_count=" + << removed_global_image_ids.size() << dendl; + + { + std::lock_guard locker{m_lock}; + if (m_shutting_down) { + return; + } + + if (!removed_global_image_ids.empty()) { + update_images_removed(peer_uuid, removed_global_image_ids); + } + if (!added_global_image_ids.empty()) { + update_images_added(peer_uuid, added_global_image_ids); + } + } + + schedule_update_task(); +} + +template <typename I> +void ImageMap<I>::handle_peer_ack(const std::string &global_image_id, int r) { + dout (20) << "global_image_id=" << global_image_id << ", r=" << r + << dendl; + + continue_action({global_image_id}, r); +} + +template <typename I> +void ImageMap<I>::init(Context *on_finish) { + dout(20) << dendl; + + CephContext *cct = reinterpret_cast<CephContext *>(m_ioctx.cct()); + std::string policy_type = cct->_conf.get_val<string>("rbd_mirror_image_policy_type"); + + if (policy_type == "none" || policy_type == "simple") { + m_policy.reset(image_map::SimplePolicy::create(m_ioctx)); + } else { + ceph_abort(); // not really needed as such, but catch it. + } + + dout(20) << "mapping policy=" << policy_type << dendl; + + start_async_op(); + C_LoadMap *ctx = new C_LoadMap(this, on_finish); + image_map::LoadRequest<I> *req = image_map::LoadRequest<I>::create( + m_ioctx, &ctx->image_mapping, ctx); + req->send(); +} + +template <typename I> +void ImageMap<I>::shut_down(Context *on_finish) { + dout(20) << dendl; + + { + std::lock_guard timer_lock{m_threads->timer_lock}; + + { + std::lock_guard locker{m_lock}; + ceph_assert(!m_shutting_down); + + m_shutting_down = true; + m_policy.reset(); + } + + if (m_timer_task != nullptr) { + m_threads->timer->cancel_event(m_timer_task); + m_timer_task = nullptr; + } + if (m_rebalance_task != nullptr) { + m_threads->timer->cancel_event(m_rebalance_task); + m_rebalance_task = nullptr; + } + } + + wait_for_async_ops(on_finish); +} + +template <typename I> +void ImageMap<I>::filter_instance_ids( + const std::vector<std::string> &instance_ids, + std::vector<std::string> *filtered_instance_ids, bool removal) const { + CephContext *cct = reinterpret_cast<CephContext *>(m_ioctx.cct()); + std::string policy_type = cct->_conf.get_val<string>("rbd_mirror_image_policy_type"); + + if (policy_type != "none") { + *filtered_instance_ids = instance_ids; + return; + } + + if (removal) { + // propagate removals for external instances + for (auto& instance_id : instance_ids) { + if (instance_id != m_instance_id) { + filtered_instance_ids->push_back(instance_id); + } + } + } else if (std::find(instance_ids.begin(), instance_ids.end(), + m_instance_id) != instance_ids.end()) { + // propagate addition only for local instance + filtered_instance_ids->push_back(m_instance_id); + } +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::ImageMap<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/ImageMap.h b/src/tools/rbd_mirror/ImageMap.h new file mode 100644 index 000000000..9dd61ee0d --- /dev/null +++ b/src/tools/rbd_mirror/ImageMap.h @@ -0,0 +1,175 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_MAP_H +#define CEPH_RBD_MIRROR_IMAGE_MAP_H + +#include <vector> + +#include "common/ceph_mutex.h" +#include "include/Context.h" +#include "common/AsyncOpTracker.h" +#include "cls/rbd/cls_rbd_types.h" +#include "include/rados/librados.hpp" + +#include "image_map/Policy.h" +#include "image_map/Types.h" + +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { + +template <typename> struct Threads; + +template <typename ImageCtxT = librbd::ImageCtx> +class ImageMap { +public: + static ImageMap *create(librados::IoCtx &ioctx, Threads<ImageCtxT> *threads, + const std::string& instance_id, + image_map::Listener &listener) { + return new ImageMap(ioctx, threads, instance_id, listener); + } + + ~ImageMap(); + + // init (load) the instance map from disk + void init(Context *on_finish); + + // shut down map operations + void shut_down(Context *on_finish); + + // update (add/remove) images + void update_images(const std::string &peer_uuid, + std::set<std::string> &&added_global_image_ids, + std::set<std::string> &&removed_global_image_ids); + + // add/remove instances + void update_instances_added(const std::vector<std::string> &instances); + void update_instances_removed(const std::vector<std::string> &instances); + +private: + struct C_NotifyInstance; + + ImageMap(librados::IoCtx &ioctx, Threads<ImageCtxT> *threads, + const std::string& instance_id, image_map::Listener &listener); + + struct Update { + std::string global_image_id; + std::string instance_id; + utime_t mapped_time; + + Update(const std::string &global_image_id, const std::string &instance_id, + utime_t mapped_time) + : global_image_id(global_image_id), + instance_id(instance_id), + mapped_time(mapped_time) { + } + Update(const std::string &global_image_id, const std::string &instance_id) + : Update(global_image_id, instance_id, ceph_clock_now()) { + } + + friend std::ostream& operator<<(std::ostream& os, + const Update& update) { + os << "{global_image_id=" << update.global_image_id << ", " + << "instance_id=" << update.instance_id << "}"; + return os; + } + + }; + typedef std::list<Update> Updates; + + // Lock ordering: m_threads->timer_lock, m_lock + + librados::IoCtx &m_ioctx; + Threads<ImageCtxT> *m_threads; + std::string m_instance_id; + image_map::Listener &m_listener; + + std::unique_ptr<image_map::Policy> m_policy; // our mapping policy + + Context *m_timer_task = nullptr; + ceph::mutex m_lock; + bool m_shutting_down = false; + AsyncOpTracker m_async_op_tracker; + + // global_image_id -> registered peers ("" == local, remote otherwise) + std::map<std::string, std::set<std::string> > m_peer_map; + + std::set<std::string> m_global_image_ids; + + Context *m_rebalance_task = nullptr; + + struct C_LoadMap : Context { + ImageMap *image_map; + Context *on_finish; + + std::map<std::string, cls::rbd::MirrorImageMap> image_mapping; + + C_LoadMap(ImageMap *image_map, Context *on_finish) + : image_map(image_map), + on_finish(on_finish) { + } + + void finish(int r) override { + if (r == 0) { + image_map->handle_load(image_mapping); + } + + image_map->finish_async_op(); + on_finish->complete(r); + } + }; + + // async op-tracker helper routines + void start_async_op() { + m_async_op_tracker.start_op(); + } + void finish_async_op() { + m_async_op_tracker.finish_op(); + } + void wait_for_async_ops(Context *on_finish) { + m_async_op_tracker.wait_for_ops(on_finish); + } + + void handle_peer_ack(const std::string &global_image_id, int r); + void handle_peer_ack_remove(const std::string &global_image_id, int r); + + void handle_load(const std::map<std::string, cls::rbd::MirrorImageMap> &image_mapping); + void handle_update_request(const Updates &updates, + const std::set<std::string> &remove_global_image_ids, int r); + + // continue (retry or resume depending on state machine) processing + // current action. + void continue_action(const std::set<std::string> &global_image_ids, int r); + + // schedule an image for update + void schedule_action(const std::string &global_image_id); + + void schedule_update_task(); + void schedule_update_task(const ceph::mutex &timer_lock); + void process_updates(); + void update_image_mapping(Updates&& map_updates, + std::set<std::string>&& map_removals); + + void rebalance(); + void schedule_rebalance_task(); + + void notify_listener_acquire_release_images(const Updates &acquire, const Updates &release); + void notify_listener_remove_images(const std::string &peer_uuid, const Updates &remove); + + void update_images_added(const std::string &peer_uuid, + const std::set<std::string> &global_image_ids); + void update_images_removed(const std::string &peer_uuid, + const std::set<std::string> &global_image_ids); + + void filter_instance_ids(const std::vector<std::string> &instance_ids, + std::vector<std::string> *filtered_instance_ids, + bool removal) const; + +}; + +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_IMAGE_MAP_H diff --git a/src/tools/rbd_mirror/ImageReplayer.cc b/src/tools/rbd_mirror/ImageReplayer.cc new file mode 100644 index 000000000..1e88c3262 --- /dev/null +++ b/src/tools/rbd_mirror/ImageReplayer.cc @@ -0,0 +1,1201 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "include/compat.h" +#include "common/Formatter.h" +#include "common/admin_socket.h" +#include "common/debug.h" +#include "common/errno.h" +#include "include/stringify.h" +#include "cls/rbd/cls_rbd_client.h" +#include "common/Timer.h" +#include "global/global_context.h" +#include "journal/Journaler.h" +#include "librbd/ExclusiveLock.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/Journal.h" +#include "librbd/Operations.h" +#include "librbd/Utils.h" +#include "librbd/asio/ContextWQ.h" +#include "ImageDeleter.h" +#include "ImageReplayer.h" +#include "MirrorStatusUpdater.h" +#include "Threads.h" +#include "tools/rbd_mirror/image_replayer/BootstrapRequest.h" +#include "tools/rbd_mirror/image_replayer/ReplayerListener.h" +#include "tools/rbd_mirror/image_replayer/StateBuilder.h" +#include "tools/rbd_mirror/image_replayer/Utils.h" +#include "tools/rbd_mirror/image_replayer/journal/Replayer.h" +#include "tools/rbd_mirror/image_replayer/journal/StateBuilder.h" +#include <map> + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::" << *this << " " \ + << __func__ << ": " + +namespace rbd { +namespace mirror { + +using librbd::util::create_context_callback; + +template <typename I> +std::ostream &operator<<(std::ostream &os, + const typename ImageReplayer<I>::State &state); + +namespace { + +template <typename I> +class ImageReplayerAdminSocketCommand { +public: + ImageReplayerAdminSocketCommand(const std::string &desc, + ImageReplayer<I> *replayer) + : desc(desc), replayer(replayer) { + } + virtual ~ImageReplayerAdminSocketCommand() {} + virtual int call(Formatter *f) = 0; + + std::string desc; + ImageReplayer<I> *replayer; + bool registered = false; +}; + +template <typename I> +class StatusCommand : public ImageReplayerAdminSocketCommand<I> { +public: + explicit StatusCommand(const std::string &desc, ImageReplayer<I> *replayer) + : ImageReplayerAdminSocketCommand<I>(desc, replayer) { + } + + int call(Formatter *f) override { + this->replayer->print_status(f); + return 0; + } +}; + +template <typename I> +class StartCommand : public ImageReplayerAdminSocketCommand<I> { +public: + explicit StartCommand(const std::string &desc, ImageReplayer<I> *replayer) + : ImageReplayerAdminSocketCommand<I>(desc, replayer) { + } + + int call(Formatter *f) override { + this->replayer->start(nullptr, true); + return 0; + } +}; + +template <typename I> +class StopCommand : public ImageReplayerAdminSocketCommand<I> { +public: + explicit StopCommand(const std::string &desc, ImageReplayer<I> *replayer) + : ImageReplayerAdminSocketCommand<I>(desc, replayer) { + } + + int call(Formatter *f) override { + this->replayer->stop(nullptr, true); + return 0; + } +}; + +template <typename I> +class RestartCommand : public ImageReplayerAdminSocketCommand<I> { +public: + explicit RestartCommand(const std::string &desc, ImageReplayer<I> *replayer) + : ImageReplayerAdminSocketCommand<I>(desc, replayer) { + } + + int call(Formatter *f) override { + this->replayer->restart(); + return 0; + } +}; + +template <typename I> +class FlushCommand : public ImageReplayerAdminSocketCommand<I> { +public: + explicit FlushCommand(const std::string &desc, ImageReplayer<I> *replayer) + : ImageReplayerAdminSocketCommand<I>(desc, replayer) { + } + + int call(Formatter *f) override { + this->replayer->flush(); + return 0; + } +}; + +template <typename I> +class ImageReplayerAdminSocketHook : public AdminSocketHook { +public: + ImageReplayerAdminSocketHook(CephContext *cct, const std::string &name, + ImageReplayer<I> *replayer) + : admin_socket(cct->get_admin_socket()), + commands{{"rbd mirror flush " + name, + new FlushCommand<I>("flush rbd mirror " + name, replayer)}, + {"rbd mirror restart " + name, + new RestartCommand<I>("restart rbd mirror " + name, replayer)}, + {"rbd mirror start " + name, + new StartCommand<I>("start rbd mirror " + name, replayer)}, + {"rbd mirror status " + name, + new StatusCommand<I>("get status for rbd mirror " + name, replayer)}, + {"rbd mirror stop " + name, + new StopCommand<I>("stop rbd mirror " + name, replayer)}} { + } + + int register_commands() { + for (auto &it : commands) { + int r = admin_socket->register_command(it.first, this, + it.second->desc); + if (r < 0) { + return r; + } + it.second->registered = true; + } + return 0; + } + + ~ImageReplayerAdminSocketHook() override { + admin_socket->unregister_commands(this); + for (auto &it : commands) { + delete it.second; + } + commands.clear(); + } + + int call(std::string_view command, const cmdmap_t& cmdmap, + const bufferlist&, + Formatter *f, + std::ostream& errss, + bufferlist& out) override { + auto i = commands.find(command); + ceph_assert(i != commands.end()); + return i->second->call(f); + } + +private: + typedef std::map<std::string, ImageReplayerAdminSocketCommand<I>*, + std::less<>> Commands; + + AdminSocket *admin_socket; + Commands commands; +}; + +} // anonymous namespace + +template <typename I> +void ImageReplayer<I>::BootstrapProgressContext::update_progress( + const std::string &description, bool flush) +{ + const std::string desc = "bootstrapping, " + description; + replayer->set_state_description(0, desc); + if (flush) { + replayer->update_mirror_image_status(false, boost::none); + } +} + +template <typename I> +struct ImageReplayer<I>::ReplayerListener + : public image_replayer::ReplayerListener { + ImageReplayer<I>* image_replayer; + + ReplayerListener(ImageReplayer<I>* image_replayer) + : image_replayer(image_replayer) { + } + + void handle_notification() override { + image_replayer->handle_replayer_notification(); + } +}; + +template <typename I> +ImageReplayer<I>::ImageReplayer( + librados::IoCtx &local_io_ctx, const std::string &local_mirror_uuid, + const std::string &global_image_id, Threads<I> *threads, + InstanceWatcher<I> *instance_watcher, + MirrorStatusUpdater<I>* local_status_updater, + journal::CacheManagerHandler *cache_manager_handler, + PoolMetaCache* pool_meta_cache) : + m_local_io_ctx(local_io_ctx), m_local_mirror_uuid(local_mirror_uuid), + m_global_image_id(global_image_id), m_threads(threads), + m_instance_watcher(instance_watcher), + m_local_status_updater(local_status_updater), + m_cache_manager_handler(cache_manager_handler), + m_pool_meta_cache(pool_meta_cache), + m_local_image_name(global_image_id), + m_lock(ceph::make_mutex("rbd::mirror::ImageReplayer " + + stringify(local_io_ctx.get_id()) + " " + global_image_id)), + m_progress_cxt(this), + m_replayer_listener(new ReplayerListener(this)) +{ + // Register asok commands using a temporary "remote_pool_name/global_image_id" + // name. When the image name becomes known on start the asok commands will be + // re-registered using "remote_pool_name/remote_image_name" name. + + m_image_spec = image_replayer::util::compute_image_spec( + local_io_ctx, global_image_id); + register_admin_socket_hook(); +} + +template <typename I> +ImageReplayer<I>::~ImageReplayer() +{ + unregister_admin_socket_hook(); + ceph_assert(m_state_builder == nullptr); + ceph_assert(m_on_start_finish == nullptr); + ceph_assert(m_on_stop_contexts.empty()); + ceph_assert(m_bootstrap_request == nullptr); + ceph_assert(m_update_status_task == nullptr); + delete m_replayer_listener; +} + +template <typename I> +image_replayer::HealthState ImageReplayer<I>::get_health_state() const { + std::lock_guard locker{m_lock}; + + if (!m_mirror_image_status_state) { + return image_replayer::HEALTH_STATE_OK; + } else if (*m_mirror_image_status_state == + cls::rbd::MIRROR_IMAGE_STATUS_STATE_SYNCING || + *m_mirror_image_status_state == + cls::rbd::MIRROR_IMAGE_STATUS_STATE_UNKNOWN) { + return image_replayer::HEALTH_STATE_WARNING; + } + return image_replayer::HEALTH_STATE_ERROR; +} + +template <typename I> +void ImageReplayer<I>::add_peer(const Peer<I>& peer) { + dout(10) << "peer=" << peer << dendl; + + std::lock_guard locker{m_lock}; + auto it = m_peers.find(peer); + if (it == m_peers.end()) { + m_peers.insert(peer); + } +} + +template <typename I> +void ImageReplayer<I>::set_state_description(int r, const std::string &desc) { + dout(10) << "r=" << r << ", desc=" << desc << dendl; + + std::lock_guard l{m_lock}; + m_last_r = r; + m_state_desc = desc; +} + +template <typename I> +void ImageReplayer<I>::start(Context *on_finish, bool manual, bool restart) +{ + dout(10) << "on_finish=" << on_finish << dendl; + + int r = 0; + { + std::lock_guard locker{m_lock}; + if (!is_stopped_()) { + derr << "already running" << dendl; + r = -EINVAL; + } else if (m_manual_stop && !manual) { + dout(5) << "stopped manually, ignoring start without manual flag" + << dendl; + r = -EPERM; + } else if (restart && !m_restart_requested) { + dout(10) << "canceled restart" << dendl; + r = -ECANCELED; + } else { + m_state = STATE_STARTING; + m_last_r = 0; + m_state_desc.clear(); + m_manual_stop = false; + m_delete_requested = false; + m_restart_requested = false; + m_status_removed = false; + + if (on_finish != nullptr) { + ceph_assert(m_on_start_finish == nullptr); + m_on_start_finish = on_finish; + } + ceph_assert(m_on_stop_contexts.empty()); + } + } + + if (r < 0) { + if (on_finish) { + on_finish->complete(r); + } + return; + } + + bootstrap(); +} + +template <typename I> +void ImageReplayer<I>::bootstrap() { + dout(10) << dendl; + + std::unique_lock locker{m_lock}; + if (m_peers.empty()) { + locker.unlock(); + + dout(5) << "no peer clusters" << dendl; + on_start_fail(-ENOENT, "no peer clusters"); + return; + } + + // TODO need to support multiple remote images + ceph_assert(!m_peers.empty()); + m_remote_image_peer = *m_peers.begin(); + + ceph_assert(m_state_builder == nullptr); + auto ctx = create_context_callback< + ImageReplayer, &ImageReplayer<I>::handle_bootstrap>(this); + auto request = image_replayer::BootstrapRequest<I>::create( + m_threads, m_local_io_ctx, m_remote_image_peer.io_ctx, m_instance_watcher, + m_global_image_id, m_local_mirror_uuid, + m_remote_image_peer.remote_pool_meta, m_cache_manager_handler, + m_pool_meta_cache, &m_progress_cxt, &m_state_builder, &m_resync_requested, + ctx); + + request->get(); + m_bootstrap_request = request; + + // proceed even if stop was requested to allow for m_delete_requested + // to get set; cancel() would prevent BootstrapRequest from going into + // image sync + if (m_stop_requested) { + request->cancel(); + } + locker.unlock(); + + update_mirror_image_status(false, boost::none); + request->send(); +} + +template <typename I> +void ImageReplayer<I>::handle_bootstrap(int r) { + dout(10) << "r=" << r << dendl; + { + std::lock_guard locker{m_lock}; + m_bootstrap_request->put(); + m_bootstrap_request = nullptr; + } + + // set m_delete_requested early to ensure that in case remote + // image no longer exists local image gets deleted even if start + // is interrupted + if (r == -ENOLINK) { + dout(5) << "remote image no longer exists" << dendl; + m_delete_requested = true; + } + + if (on_start_interrupted()) { + return; + } else if (r == -ENOMSG) { + dout(5) << "local image is primary" << dendl; + on_start_fail(0, "local image is primary"); + return; + } else if (r == -EREMOTEIO) { + dout(5) << "remote image is not primary" << dendl; + on_start_fail(-EREMOTEIO, "remote image is not primary"); + return; + } else if (r == -EEXIST) { + on_start_fail(r, "split-brain detected"); + return; + } else if (r == -ENOLINK) { + on_start_fail(0, "remote image no longer exists"); + return; + } else if (r == -ERESTART) { + on_start_fail(r, "image in transient state, try again"); + return; + } else if (r < 0) { + on_start_fail(r, "error bootstrapping replay"); + return; + } else if (m_resync_requested) { + on_start_fail(0, "resync requested"); + return; + } + + start_replay(); +} + +template <typename I> +void ImageReplayer<I>::start_replay() { + dout(10) << dendl; + + std::unique_lock locker{m_lock}; + ceph_assert(m_replayer == nullptr); + m_replayer = m_state_builder->create_replayer(m_threads, m_instance_watcher, + m_local_mirror_uuid, + m_pool_meta_cache, + m_replayer_listener); + + auto ctx = create_context_callback< + ImageReplayer<I>, &ImageReplayer<I>::handle_start_replay>(this); + m_replayer->init(ctx); +} + +template <typename I> +void ImageReplayer<I>::handle_start_replay(int r) { + dout(10) << "r=" << r << dendl; + + if (on_start_interrupted()) { + return; + } else if (r < 0) { + std::string error_description = m_replayer->get_error_description(); + if (r == -ENOTCONN && m_replayer->is_resync_requested()) { + std::unique_lock locker{m_lock}; + m_resync_requested = true; + } + + // shut down not required if init failed + m_replayer->destroy(); + m_replayer = nullptr; + + derr << "error starting replay: " << cpp_strerror(r) << dendl; + on_start_fail(r, error_description); + return; + } + + Context *on_finish = nullptr; + { + std::unique_lock locker{m_lock}; + ceph_assert(m_state == STATE_STARTING); + m_state = STATE_REPLAYING; + std::swap(m_on_start_finish, on_finish); + + std::unique_lock timer_locker{m_threads->timer_lock}; + schedule_update_mirror_image_replay_status(); + } + + update_mirror_image_status(true, boost::none); + if (on_replay_interrupted()) { + if (on_finish != nullptr) { + on_finish->complete(r); + } + return; + } + + dout(10) << "start succeeded" << dendl; + if (on_finish != nullptr) { + dout(10) << "on finish complete, r=" << r << dendl; + on_finish->complete(r); + } +} + +template <typename I> +void ImageReplayer<I>::on_start_fail(int r, const std::string &desc) +{ + dout(10) << "r=" << r << ", desc=" << desc << dendl; + Context *ctx = new LambdaContext([this, r, desc](int _r) { + { + std::lock_guard locker{m_lock}; + ceph_assert(m_state == STATE_STARTING); + m_state = STATE_STOPPING; + if (r < 0 && r != -ECANCELED && r != -EREMOTEIO && r != -ENOENT) { + derr << "start failed: " << cpp_strerror(r) << dendl; + } else { + dout(10) << "start canceled" << dendl; + } + } + + set_state_description(r, desc); + update_mirror_image_status(false, boost::none); + shut_down(r); + }); + m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +bool ImageReplayer<I>::on_start_interrupted() { + std::lock_guard locker{m_lock}; + return on_start_interrupted(m_lock); +} + +template <typename I> +bool ImageReplayer<I>::on_start_interrupted(ceph::mutex& lock) { + ceph_assert(ceph_mutex_is_locked(m_lock)); + ceph_assert(m_state == STATE_STARTING); + if (!m_stop_requested) { + return false; + } + + on_start_fail(-ECANCELED, ""); + return true; +} + +template <typename I> +void ImageReplayer<I>::stop(Context *on_finish, bool manual, bool restart) +{ + dout(10) << "on_finish=" << on_finish << ", manual=" << manual + << ", restart=" << restart << dendl; + + image_replayer::BootstrapRequest<I> *bootstrap_request = nullptr; + bool shut_down_replay = false; + bool is_stopped = false; + { + std::lock_guard locker{m_lock}; + + if (!is_running_()) { + if (manual && !m_manual_stop) { + dout(10) << "marking manual" << dendl; + m_manual_stop = true; + } + if (!restart && m_restart_requested) { + dout(10) << "canceling restart" << dendl; + m_restart_requested = false; + } + if (is_stopped_()) { + dout(10) << "already stopped" << dendl; + is_stopped = true; + } else { + dout(10) << "joining in-flight stop" << dendl; + if (on_finish != nullptr) { + m_on_stop_contexts.push_back(on_finish); + } + } + } else { + if (m_state == STATE_STARTING) { + dout(10) << "canceling start" << dendl; + if (m_bootstrap_request != nullptr) { + bootstrap_request = m_bootstrap_request; + bootstrap_request->get(); + } + } else { + dout(10) << "interrupting replay" << dendl; + shut_down_replay = true; + } + + ceph_assert(m_on_stop_contexts.empty()); + if (on_finish != nullptr) { + m_on_stop_contexts.push_back(on_finish); + } + m_stop_requested = true; + m_manual_stop = manual; + } + } + + if (is_stopped) { + if (on_finish) { + on_finish->complete(-EINVAL); + } + return; + } + + // avoid holding lock since bootstrap request will update status + if (bootstrap_request != nullptr) { + dout(10) << "canceling bootstrap" << dendl; + bootstrap_request->cancel(); + bootstrap_request->put(); + } + + if (shut_down_replay) { + on_stop_journal_replay(); + } +} + +template <typename I> +void ImageReplayer<I>::on_stop_journal_replay(int r, const std::string &desc) +{ + dout(10) << dendl; + + { + std::lock_guard locker{m_lock}; + if (m_state != STATE_REPLAYING) { + // might be invoked multiple times while stopping + return; + } + + m_stop_requested = true; + m_state = STATE_STOPPING; + } + + cancel_update_mirror_image_replay_status(); + set_state_description(r, desc); + update_mirror_image_status(true, boost::none); + shut_down(0); +} + +template <typename I> +void ImageReplayer<I>::restart(Context *on_finish) +{ + { + std::lock_guard locker{m_lock}; + m_restart_requested = true; + } + + auto ctx = new LambdaContext( + [this, on_finish](int r) { + if (r < 0) { + // Try start anyway. + } + start(on_finish, true, true); + }); + stop(ctx, false, true); +} + +template <typename I> +void ImageReplayer<I>::flush() +{ + C_SaferCond ctx; + + { + std::unique_lock locker{m_lock}; + if (m_state != STATE_REPLAYING) { + return; + } + + dout(10) << dendl; + ceph_assert(m_replayer != nullptr); + m_replayer->flush(&ctx); + } + + int r = ctx.wait(); + if (r >= 0) { + update_mirror_image_status(false, boost::none); + } +} + +template <typename I> +bool ImageReplayer<I>::on_replay_interrupted() +{ + bool shut_down; + { + std::lock_guard locker{m_lock}; + shut_down = m_stop_requested; + } + + if (shut_down) { + on_stop_journal_replay(); + } + return shut_down; +} + +template <typename I> +void ImageReplayer<I>::print_status(Formatter *f) +{ + dout(10) << dendl; + + std::lock_guard l{m_lock}; + + f->open_object_section("image_replayer"); + f->dump_string("name", m_image_spec); + f->dump_string("state", to_string(m_state)); + f->close_section(); +} + +template <typename I> +void ImageReplayer<I>::schedule_update_mirror_image_replay_status() { + ceph_assert(ceph_mutex_is_locked_by_me(m_lock)); + ceph_assert(ceph_mutex_is_locked_by_me(m_threads->timer_lock)); + if (m_state != STATE_REPLAYING) { + return; + } + + dout(10) << dendl; + + // periodically update the replaying status even if nothing changes + // so that we can adjust our performance stats + ceph_assert(m_update_status_task == nullptr); + m_update_status_task = create_context_callback< + ImageReplayer<I>, + &ImageReplayer<I>::handle_update_mirror_image_replay_status>(this); + m_threads->timer->add_event_after(10, m_update_status_task); +} + +template <typename I> +void ImageReplayer<I>::handle_update_mirror_image_replay_status(int r) { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked_by_me(m_threads->timer_lock)); + + ceph_assert(m_update_status_task != nullptr); + m_update_status_task = nullptr; + + auto ctx = new LambdaContext([this](int) { + update_mirror_image_status(false, boost::none); + + std::unique_lock locker{m_lock}; + std::unique_lock timer_locker{m_threads->timer_lock}; + + schedule_update_mirror_image_replay_status(); + m_in_flight_op_tracker.finish_op(); + }); + + m_in_flight_op_tracker.start_op(); + m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +void ImageReplayer<I>::cancel_update_mirror_image_replay_status() { + std::unique_lock timer_locker{m_threads->timer_lock}; + if (m_update_status_task != nullptr) { + dout(10) << dendl; + + if (m_threads->timer->cancel_event(m_update_status_task)) { + m_update_status_task = nullptr; + } + } +} + +template <typename I> +void ImageReplayer<I>::update_mirror_image_status( + bool force, const OptionalState &opt_state) { + dout(15) << "force=" << force << ", " + << "state=" << opt_state << dendl; + + { + std::lock_guard locker{m_lock}; + if (!force && !is_stopped_() && !is_running_()) { + dout(15) << "shut down in-progress: ignoring update" << dendl; + return; + } + } + + m_in_flight_op_tracker.start_op(); + auto ctx = new LambdaContext( + [this, force, opt_state](int r) { + set_mirror_image_status_update(force, opt_state); + }); + m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +void ImageReplayer<I>::set_mirror_image_status_update( + bool force, const OptionalState &opt_state) { + dout(15) << "force=" << force << ", " + << "state=" << opt_state << dendl; + + reregister_admin_socket_hook(); + + State state; + std::string state_desc; + int last_r; + bool stopping_replay; + + auto mirror_image_status_state = boost::make_optional( + false, cls::rbd::MIRROR_IMAGE_STATUS_STATE_UNKNOWN); + image_replayer::BootstrapRequest<I>* bootstrap_request = nullptr; + { + std::lock_guard locker{m_lock}; + state = m_state; + state_desc = m_state_desc; + mirror_image_status_state = m_mirror_image_status_state; + last_r = m_last_r; + stopping_replay = (m_replayer != nullptr); + + if (m_bootstrap_request != nullptr) { + bootstrap_request = m_bootstrap_request; + bootstrap_request->get(); + } + } + + bool syncing = false; + if (bootstrap_request != nullptr) { + syncing = bootstrap_request->is_syncing(); + bootstrap_request->put(); + bootstrap_request = nullptr; + } + + if (opt_state) { + state = *opt_state; + } + + cls::rbd::MirrorImageSiteStatus status; + status.up = true; + switch (state) { + case STATE_STARTING: + if (syncing) { + status.state = cls::rbd::MIRROR_IMAGE_STATUS_STATE_SYNCING; + status.description = state_desc.empty() ? "syncing" : state_desc; + mirror_image_status_state = status.state; + } else { + status.state = cls::rbd::MIRROR_IMAGE_STATUS_STATE_STARTING_REPLAY; + status.description = "starting replay"; + } + break; + case STATE_REPLAYING: + status.state = cls::rbd::MIRROR_IMAGE_STATUS_STATE_REPLAYING; + { + std::string desc; + auto on_req_finish = new LambdaContext( + [this, force](int r) { + dout(15) << "replay status ready: r=" << r << dendl; + if (r >= 0) { + set_mirror_image_status_update(force, boost::none); + } else if (r == -EAGAIN) { + m_in_flight_op_tracker.finish_op(); + } + }); + + ceph_assert(m_replayer != nullptr); + if (!m_replayer->get_replay_status(&desc, on_req_finish)) { + dout(15) << "waiting for replay status" << dendl; + return; + } + + status.description = "replaying, " + desc; + mirror_image_status_state = boost::make_optional( + false, cls::rbd::MIRROR_IMAGE_STATUS_STATE_UNKNOWN); + } + break; + case STATE_STOPPING: + if (stopping_replay) { + status.state = cls::rbd::MIRROR_IMAGE_STATUS_STATE_STOPPING_REPLAY; + status.description = state_desc.empty() ? "stopping replay" : state_desc; + break; + } + // FALLTHROUGH + case STATE_STOPPED: + if (last_r == -EREMOTEIO) { + status.state = cls::rbd::MIRROR_IMAGE_STATUS_STATE_UNKNOWN; + status.description = state_desc; + mirror_image_status_state = status.state; + } else if (last_r < 0 && last_r != -ECANCELED) { + status.state = cls::rbd::MIRROR_IMAGE_STATUS_STATE_ERROR; + status.description = state_desc; + mirror_image_status_state = status.state; + } else { + status.state = cls::rbd::MIRROR_IMAGE_STATUS_STATE_STOPPED; + status.description = state_desc.empty() ? "stopped" : state_desc; + mirror_image_status_state = boost::none; + } + break; + default: + ceph_assert(!"invalid state"); + } + + { + std::lock_guard locker{m_lock}; + m_mirror_image_status_state = mirror_image_status_state; + } + + // prevent the status from ping-ponging when failed replays are restarted + if (mirror_image_status_state && + *mirror_image_status_state == cls::rbd::MIRROR_IMAGE_STATUS_STATE_ERROR) { + status.state = *mirror_image_status_state; + } + + dout(15) << "status=" << status << dendl; + m_local_status_updater->set_mirror_image_status(m_global_image_id, status, + force); + if (m_remote_image_peer.mirror_status_updater != nullptr) { + m_remote_image_peer.mirror_status_updater->set_mirror_image_status( + m_global_image_id, status, force); + } + + m_in_flight_op_tracker.finish_op(); +} + +template <typename I> +void ImageReplayer<I>::shut_down(int r) { + dout(10) << "r=" << r << dendl; + + { + std::lock_guard locker{m_lock}; + ceph_assert(m_state == STATE_STOPPING); + } + + if (!m_in_flight_op_tracker.empty()) { + dout(15) << "waiting for in-flight operations to complete" << dendl; + m_in_flight_op_tracker.wait_for_ops(new LambdaContext([this, r](int) { + shut_down(r); + })); + return; + } + + // chain the shut down sequence (reverse order) + Context *ctx = new LambdaContext( + [this, r](int _r) { + update_mirror_image_status(true, STATE_STOPPED); + handle_shut_down(r); + }); + + // destruct the state builder + if (m_state_builder != nullptr) { + ctx = new LambdaContext([this, ctx](int r) { + m_state_builder->close(ctx); + }); + } + + // close the replayer + if (m_replayer != nullptr) { + ctx = new LambdaContext([this, ctx](int r) { + m_replayer->destroy(); + m_replayer = nullptr; + ctx->complete(0); + }); + ctx = new LambdaContext([this, ctx](int r) { + m_replayer->shut_down(ctx); + }); + } + + m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +void ImageReplayer<I>::handle_shut_down(int r) { + bool resync_requested = false; + bool delete_requested = false; + bool unregister_asok_hook = false; + { + std::lock_guard locker{m_lock}; + + if (m_delete_requested && m_state_builder != nullptr && + !m_state_builder->local_image_id.empty()) { + ceph_assert(m_state_builder->remote_image_id.empty()); + dout(0) << "remote image no longer exists: scheduling deletion" << dendl; + unregister_asok_hook = true; + std::swap(delete_requested, m_delete_requested); + m_delete_in_progress = true; + } + + std::swap(resync_requested, m_resync_requested); + if (!delete_requested && !resync_requested && m_last_r == -ENOENT && + ((m_state_builder == nullptr) || + (m_state_builder->local_image_id.empty() && + m_state_builder->remote_image_id.empty()))) { + dout(0) << "mirror image no longer exists" << dendl; + unregister_asok_hook = true; + m_finished = true; + } + } + + if (unregister_asok_hook) { + unregister_admin_socket_hook(); + } + + if (delete_requested || resync_requested) { + dout(5) << "moving image to trash" << dendl; + auto ctx = new LambdaContext([this, r](int) { + handle_shut_down(r); + }); + ImageDeleter<I>::trash_move(m_local_io_ctx, m_global_image_id, + resync_requested, m_threads->work_queue, ctx); + return; + } + + if (!m_in_flight_op_tracker.empty()) { + dout(15) << "waiting for in-flight operations to complete" << dendl; + m_in_flight_op_tracker.wait_for_ops(new LambdaContext([this, r](int) { + handle_shut_down(r); + })); + return; + } + + if (!m_status_removed) { + auto ctx = new LambdaContext([this, r](int) { + m_status_removed = true; + handle_shut_down(r); + }); + remove_image_status(m_delete_in_progress, ctx); + return; + } + + if (m_state_builder != nullptr) { + m_state_builder->destroy(); + m_state_builder = nullptr; + } + + dout(10) << "stop complete" << dendl; + Context *on_start = nullptr; + Contexts on_stop_contexts; + { + std::lock_guard locker{m_lock}; + std::swap(on_start, m_on_start_finish); + on_stop_contexts = std::move(m_on_stop_contexts); + m_stop_requested = false; + ceph_assert(m_state == STATE_STOPPING); + m_state = STATE_STOPPED; + } + + if (on_start != nullptr) { + dout(10) << "on start finish complete, r=" << r << dendl; + on_start->complete(r); + r = 0; + } + for (auto ctx : on_stop_contexts) { + dout(10) << "on stop finish " << ctx << " complete, r=" << r << dendl; + ctx->complete(r); + } +} + +template <typename I> +void ImageReplayer<I>::handle_replayer_notification() { + dout(10) << dendl; + + std::unique_lock locker{m_lock}; + if (m_state != STATE_REPLAYING) { + // might be attempting to shut down + return; + } + + { + // detect a rename of the local image + ceph_assert(m_state_builder != nullptr && + m_state_builder->local_image_ctx != nullptr); + std::shared_lock image_locker{m_state_builder->local_image_ctx->image_lock}; + if (m_local_image_name != m_state_builder->local_image_ctx->name) { + // will re-register with new name after next status update + dout(10) << "image renamed" << dendl; + m_local_image_name = m_state_builder->local_image_ctx->name; + } + } + + // replayer cannot be shut down while notification is in-flight + ceph_assert(m_replayer != nullptr); + locker.unlock(); + + if (m_replayer->is_resync_requested()) { + dout(10) << "resync requested" << dendl; + m_resync_requested = true; + on_stop_journal_replay(0, "resync requested"); + return; + } + + if (!m_replayer->is_replaying()) { + auto error_code = m_replayer->get_error_code(); + auto error_description = m_replayer->get_error_description(); + dout(10) << "replay interrupted: " + << "r=" << error_code << ", " + << "error=" << error_description << dendl; + on_stop_journal_replay(error_code, error_description); + return; + } + + update_mirror_image_status(false, {}); +} + +template <typename I> +std::string ImageReplayer<I>::to_string(const State state) { + switch (state) { + case ImageReplayer<I>::STATE_STARTING: + return "Starting"; + case ImageReplayer<I>::STATE_REPLAYING: + return "Replaying"; + case ImageReplayer<I>::STATE_STOPPING: + return "Stopping"; + case ImageReplayer<I>::STATE_STOPPED: + return "Stopped"; + default: + break; + } + return "Unknown(" + stringify(state) + ")"; +} + +template <typename I> +void ImageReplayer<I>::register_admin_socket_hook() { + ImageReplayerAdminSocketHook<I> *asok_hook; + { + std::lock_guard locker{m_lock}; + if (m_asok_hook != nullptr) { + return; + } + + dout(15) << "registered asok hook: " << m_image_spec << dendl; + asok_hook = new ImageReplayerAdminSocketHook<I>( + g_ceph_context, m_image_spec, this); + int r = asok_hook->register_commands(); + if (r == 0) { + m_asok_hook = asok_hook; + return; + } + derr << "error registering admin socket commands" << dendl; + } + delete asok_hook; +} + +template <typename I> +void ImageReplayer<I>::unregister_admin_socket_hook() { + dout(15) << dendl; + + AdminSocketHook *asok_hook = nullptr; + { + std::lock_guard locker{m_lock}; + std::swap(asok_hook, m_asok_hook); + } + delete asok_hook; +} + +template <typename I> +void ImageReplayer<I>::reregister_admin_socket_hook() { + std::unique_lock locker{m_lock}; + if (m_state == STATE_STARTING && m_bootstrap_request != nullptr) { + m_local_image_name = m_bootstrap_request->get_local_image_name(); + } + + auto image_spec = image_replayer::util::compute_image_spec( + m_local_io_ctx, m_local_image_name); + if (m_asok_hook != nullptr && m_image_spec == image_spec) { + return; + } + + dout(15) << "old_image_spec=" << m_image_spec << ", " + << "new_image_spec=" << image_spec << dendl; + m_image_spec = image_spec; + + if (m_state == STATE_STOPPING || m_state == STATE_STOPPED) { + // no need to re-register if stopping + return; + } + locker.unlock(); + + unregister_admin_socket_hook(); + register_admin_socket_hook(); +} + +template <typename I> +void ImageReplayer<I>::remove_image_status(bool force, Context *on_finish) +{ + auto ctx = new LambdaContext([this, force, on_finish](int) { + remove_image_status_remote(force, on_finish); + }); + + if (m_local_status_updater->exists(m_global_image_id)) { + dout(15) << "removing local mirror image status" << dendl; + if (force) { + m_local_status_updater->remove_mirror_image_status( + m_global_image_id, true, ctx); + } else { + m_local_status_updater->remove_refresh_mirror_image_status( + m_global_image_id, ctx); + } + return; + } + + ctx->complete(0); +} + +template <typename I> +void ImageReplayer<I>::remove_image_status_remote(bool force, Context *on_finish) +{ + if (m_remote_image_peer.mirror_status_updater != nullptr && + m_remote_image_peer.mirror_status_updater->exists(m_global_image_id)) { + dout(15) << "removing remote mirror image status" << dendl; + if (force) { + m_remote_image_peer.mirror_status_updater->remove_mirror_image_status( + m_global_image_id, true, on_finish); + } else { + m_remote_image_peer.mirror_status_updater->remove_refresh_mirror_image_status( + m_global_image_id, on_finish); + } + return; + } + if (on_finish) { + on_finish->complete(0); + } +} + +template <typename I> +std::ostream &operator<<(std::ostream &os, const ImageReplayer<I> &replayer) +{ + os << "ImageReplayer: " << &replayer << " [" << replayer.get_local_pool_id() + << "/" << replayer.get_global_image_id() << "]"; + return os; +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::ImageReplayer<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/ImageReplayer.h b/src/tools/rbd_mirror/ImageReplayer.h new file mode 100644 index 000000000..432fdf225 --- /dev/null +++ b/src/tools/rbd_mirror/ImageReplayer.h @@ -0,0 +1,273 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_REPLAYER_H +#define CEPH_RBD_MIRROR_IMAGE_REPLAYER_H + +#include "common/AsyncOpTracker.h" +#include "common/ceph_mutex.h" +#include "include/rados/librados.hpp" +#include "cls/rbd/cls_rbd_types.h" +#include "ProgressContext.h" +#include "tools/rbd_mirror/Types.h" +#include "tools/rbd_mirror/image_replayer/Types.h" +#include <boost/optional.hpp> +#include <string> + +class AdminSocketHook; + +namespace journal { struct CacheManagerHandler; } +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { + +template <typename> struct InstanceWatcher; +template <typename> struct MirrorStatusUpdater; +struct PoolMetaCache; +template <typename> struct Threads; + +namespace image_replayer { + +class Replayer; +template <typename> class BootstrapRequest; +template <typename> class StateBuilder; + +} // namespace image_replayer + +/** + * Replays changes from a remote cluster for a single image. + */ +template <typename ImageCtxT = librbd::ImageCtx> +class ImageReplayer { +public: + static ImageReplayer *create( + librados::IoCtx &local_io_ctx, const std::string &local_mirror_uuid, + const std::string &global_image_id, Threads<ImageCtxT> *threads, + InstanceWatcher<ImageCtxT> *instance_watcher, + MirrorStatusUpdater<ImageCtxT>* local_status_updater, + journal::CacheManagerHandler *cache_manager_handler, + PoolMetaCache* pool_meta_cache) { + return new ImageReplayer(local_io_ctx, local_mirror_uuid, global_image_id, + threads, instance_watcher, local_status_updater, + cache_manager_handler, pool_meta_cache); + } + void destroy() { + delete this; + } + + ImageReplayer(librados::IoCtx &local_io_ctx, + const std::string &local_mirror_uuid, + const std::string &global_image_id, + Threads<ImageCtxT> *threads, + InstanceWatcher<ImageCtxT> *instance_watcher, + MirrorStatusUpdater<ImageCtxT>* local_status_updater, + journal::CacheManagerHandler *cache_manager_handler, + PoolMetaCache* pool_meta_cache); + virtual ~ImageReplayer(); + ImageReplayer(const ImageReplayer&) = delete; + ImageReplayer& operator=(const ImageReplayer&) = delete; + + bool is_stopped() { std::lock_guard l{m_lock}; return is_stopped_(); } + bool is_running() { std::lock_guard l{m_lock}; return is_running_(); } + bool is_replaying() { std::lock_guard l{m_lock}; return is_replaying_(); } + + std::string get_name() { std::lock_guard l{m_lock}; return m_image_spec; }; + void set_state_description(int r, const std::string &desc); + + // TODO temporary until policy handles release of image replayers + inline bool is_finished() const { + std::lock_guard locker{m_lock}; + return m_finished; + } + inline void set_finished(bool finished) { + std::lock_guard locker{m_lock}; + m_finished = finished; + } + + inline bool is_blocklisted() const { + std::lock_guard locker{m_lock}; + return (m_last_r == -EBLOCKLISTED); + } + + image_replayer::HealthState get_health_state() const; + + void add_peer(const Peer<ImageCtxT>& peer); + + inline int64_t get_local_pool_id() const { + return m_local_io_ctx.get_id(); + } + inline const std::string& get_global_image_id() const { + return m_global_image_id; + } + + void start(Context *on_finish, bool manual = false, bool restart = false); + void stop(Context *on_finish, bool manual = false, bool restart = false); + void restart(Context *on_finish = nullptr); + void flush(); + + void print_status(Formatter *f); + +protected: + /** + * @verbatim + * (error) + * <uninitialized> <------------------------------------ FAIL + * | ^ + * v * + * <starting> * + * | * + * v (error) * + * BOOTSTRAP_IMAGE * * * * * * * * * * * * * * * * * * * * + * | * + * v (error) * + * START_REPLAY * * * * * * * * * * * * * * * * * * * * * * + * | + * v + * REPLAYING + * | + * v + * JOURNAL_REPLAY_SHUT_DOWN + * | + * v + * LOCAL_IMAGE_CLOSE + * | + * v + * <stopped> + * + * @endverbatim + */ + + void on_start_fail(int r, const std::string &desc); + bool on_start_interrupted(); + bool on_start_interrupted(ceph::mutex& lock); + + void on_stop_journal_replay(int r = 0, const std::string &desc = ""); + + bool on_replay_interrupted(); + +private: + typedef std::set<Peer<ImageCtxT>> Peers; + typedef std::list<Context *> Contexts; + + enum State { + STATE_UNKNOWN, + STATE_STARTING, + STATE_REPLAYING, + STATE_STOPPING, + STATE_STOPPED, + }; + + struct ReplayerListener; + + typedef boost::optional<State> OptionalState; + typedef boost::optional<cls::rbd::MirrorImageStatusState> + OptionalMirrorImageStatusState; + + class BootstrapProgressContext : public ProgressContext { + public: + BootstrapProgressContext(ImageReplayer<ImageCtxT> *replayer) : + replayer(replayer) { + } + + void update_progress(const std::string &description, + bool flush = true) override; + + private: + ImageReplayer<ImageCtxT> *replayer; + }; + + librados::IoCtx &m_local_io_ctx; + std::string m_local_mirror_uuid; + std::string m_global_image_id; + Threads<ImageCtxT> *m_threads; + InstanceWatcher<ImageCtxT> *m_instance_watcher; + MirrorStatusUpdater<ImageCtxT>* m_local_status_updater; + journal::CacheManagerHandler *m_cache_manager_handler; + PoolMetaCache* m_pool_meta_cache; + + Peers m_peers; + Peer<ImageCtxT> m_remote_image_peer; + + std::string m_local_image_name; + std::string m_image_spec; + + mutable ceph::mutex m_lock; + State m_state = STATE_STOPPED; + std::string m_state_desc; + + OptionalMirrorImageStatusState m_mirror_image_status_state = + boost::make_optional(false, cls::rbd::MIRROR_IMAGE_STATUS_STATE_UNKNOWN); + int m_last_r = 0; + + BootstrapProgressContext m_progress_cxt; + + bool m_finished = false; + bool m_delete_in_progress = false; + bool m_delete_requested = false; + bool m_resync_requested = false; + bool m_restart_requested = false; + + bool m_status_removed = false; + + image_replayer::StateBuilder<ImageCtxT>* m_state_builder = nullptr; + image_replayer::Replayer* m_replayer = nullptr; + ReplayerListener* m_replayer_listener = nullptr; + + Context *m_on_start_finish = nullptr; + Contexts m_on_stop_contexts; + bool m_stop_requested = false; + bool m_manual_stop = false; + + AdminSocketHook *m_asok_hook = nullptr; + + image_replayer::BootstrapRequest<ImageCtxT> *m_bootstrap_request = nullptr; + + AsyncOpTracker m_in_flight_op_tracker; + + Context* m_update_status_task = nullptr; + + static std::string to_string(const State state); + + bool is_stopped_() const { + return m_state == STATE_STOPPED; + } + bool is_running_() const { + return !is_stopped_() && m_state != STATE_STOPPING && !m_stop_requested; + } + bool is_replaying_() const { + return (m_state == STATE_REPLAYING); + } + + void schedule_update_mirror_image_replay_status(); + void handle_update_mirror_image_replay_status(int r); + void cancel_update_mirror_image_replay_status(); + + void update_mirror_image_status(bool force, const OptionalState &state); + void set_mirror_image_status_update(bool force, const OptionalState &state); + + void shut_down(int r); + void handle_shut_down(int r); + + void bootstrap(); + void handle_bootstrap(int r); + + void start_replay(); + void handle_start_replay(int r); + + void handle_replayer_notification(); + + void register_admin_socket_hook(); + void unregister_admin_socket_hook(); + void reregister_admin_socket_hook(); + void remove_image_status(bool force, Context *on_finish); + void remove_image_status_remote(bool force, Context *on_finish); + +}; + +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::ImageReplayer<librbd::ImageCtx>; + +#endif // CEPH_RBD_MIRROR_IMAGE_REPLAYER_H diff --git a/src/tools/rbd_mirror/ImageSync.cc b/src/tools/rbd_mirror/ImageSync.cc new file mode 100644 index 000000000..43d0c6663 --- /dev/null +++ b/src/tools/rbd_mirror/ImageSync.cc @@ -0,0 +1,469 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "ImageSync.h" +#include "InstanceWatcher.h" +#include "ProgressContext.h" +#include "common/debug.h" +#include "common/Timer.h" +#include "common/errno.h" +#include "librbd/DeepCopyRequest.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/Utils.h" +#include "librbd/internal.h" +#include "librbd/asio/ContextWQ.h" +#include "librbd/deep_copy/Handler.h" +#include "tools/rbd_mirror/Threads.h" +#include "tools/rbd_mirror/image_sync/SyncPointCreateRequest.h" +#include "tools/rbd_mirror/image_sync/SyncPointPruneRequest.h" +#include "tools/rbd_mirror/image_sync/Types.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::ImageSync: " \ + << this << " " << __func__ + +namespace rbd { +namespace mirror { + +using namespace image_sync; +using librbd::util::create_async_context_callback; +using librbd::util::create_context_callback; +using librbd::util::unique_lock_name; + +template <typename I> +class ImageSync<I>::ImageCopyProgressHandler + : public librbd::deep_copy::NoOpHandler { +public: + ImageCopyProgressHandler(ImageSync *image_sync) : image_sync(image_sync) { + } + + int update_progress(uint64_t object_no, uint64_t object_count) override { + image_sync->handle_copy_image_update_progress(object_no, object_count); + return 0; + } + + ImageSync *image_sync; +}; + +template <typename I> +ImageSync<I>::ImageSync( + Threads<I>* threads, + I *local_image_ctx, + I *remote_image_ctx, + const std::string &local_mirror_uuid, + image_sync::SyncPointHandler* sync_point_handler, + InstanceWatcher<I> *instance_watcher, + ProgressContext *progress_ctx, + Context *on_finish) + : CancelableRequest("rbd::mirror::ImageSync", local_image_ctx->cct, + on_finish), + m_threads(threads), + m_local_image_ctx(local_image_ctx), + m_remote_image_ctx(remote_image_ctx), + m_local_mirror_uuid(local_mirror_uuid), + m_sync_point_handler(sync_point_handler), + m_instance_watcher(instance_watcher), + m_progress_ctx(progress_ctx), + m_lock(ceph::make_mutex(unique_lock_name("ImageSync::m_lock", this))), + m_update_sync_point_interval( + m_local_image_ctx->cct->_conf.template get_val<double>( + "rbd_mirror_sync_point_update_age")) { +} + +template <typename I> +ImageSync<I>::~ImageSync() { + ceph_assert(m_image_copy_request == nullptr); + ceph_assert(m_image_copy_prog_handler == nullptr); + ceph_assert(m_update_sync_ctx == nullptr); +} + +template <typename I> +void ImageSync<I>::send() { + send_notify_sync_request(); +} + +template <typename I> +void ImageSync<I>::cancel() { + std::lock_guard locker{m_lock}; + + dout(10) << dendl; + + m_canceled = true; + + if (m_instance_watcher->cancel_sync_request(m_local_image_ctx->id)) { + return; + } + + if (m_image_copy_request != nullptr) { + m_image_copy_request->cancel(); + } +} + +template <typename I> +void ImageSync<I>::send_notify_sync_request() { + update_progress("NOTIFY_SYNC_REQUEST"); + + dout(10) << dendl; + + m_lock.lock(); + if (m_canceled) { + m_lock.unlock(); + CancelableRequest::finish(-ECANCELED); + return; + } + + Context *ctx = create_async_context_callback( + m_threads->work_queue, create_context_callback< + ImageSync<I>, &ImageSync<I>::handle_notify_sync_request>(this)); + m_instance_watcher->notify_sync_request(m_local_image_ctx->id, ctx); + m_lock.unlock(); +} + +template <typename I> +void ImageSync<I>::handle_notify_sync_request(int r) { + dout(10) << ": r=" << r << dendl; + + m_lock.lock(); + if (r == 0 && m_canceled) { + r = -ECANCELED; + } + m_lock.unlock(); + + if (r < 0) { + CancelableRequest::finish(r); + return; + } + + send_prune_catch_up_sync_point(); +} + +template <typename I> +void ImageSync<I>::send_prune_catch_up_sync_point() { + update_progress("PRUNE_CATCH_UP_SYNC_POINT"); + + if (m_sync_point_handler->get_sync_points().empty()) { + send_create_sync_point(); + return; + } + + dout(10) << dendl; + + // prune will remove sync points with missing snapshots and + // ensure we have a maximum of one sync point (in case we + // restarted) + Context *ctx = create_context_callback< + ImageSync<I>, &ImageSync<I>::handle_prune_catch_up_sync_point>(this); + SyncPointPruneRequest<I> *request = SyncPointPruneRequest<I>::create( + m_remote_image_ctx, false, m_sync_point_handler, ctx); + request->send(); +} + +template <typename I> +void ImageSync<I>::handle_prune_catch_up_sync_point(int r) { + dout(10) << ": r=" << r << dendl; + + if (r < 0) { + derr << ": failed to prune catch-up sync point: " + << cpp_strerror(r) << dendl; + finish(r); + return; + } + + send_create_sync_point(); +} + +template <typename I> +void ImageSync<I>::send_create_sync_point() { + update_progress("CREATE_SYNC_POINT"); + + // TODO: when support for disconnecting laggy clients is added, + // re-connect and create catch-up sync point + if (!m_sync_point_handler->get_sync_points().empty()) { + send_copy_image(); + return; + } + + dout(10) << dendl; + + Context *ctx = create_context_callback< + ImageSync<I>, &ImageSync<I>::handle_create_sync_point>(this); + SyncPointCreateRequest<I> *request = SyncPointCreateRequest<I>::create( + m_remote_image_ctx, m_local_mirror_uuid, m_sync_point_handler, ctx); + request->send(); +} + +template <typename I> +void ImageSync<I>::handle_create_sync_point(int r) { + dout(10) << ": r=" << r << dendl; + + if (r < 0) { + derr << ": failed to create sync point: " << cpp_strerror(r) + << dendl; + finish(r); + return; + } + + send_copy_image(); +} + +template <typename I> +void ImageSync<I>::send_copy_image() { + librados::snap_t snap_id_start = 0; + librados::snap_t snap_id_end; + librbd::deep_copy::ObjectNumber object_number; + int r = 0; + + m_snap_seqs_copy = m_sync_point_handler->get_snap_seqs(); + m_sync_points_copy = m_sync_point_handler->get_sync_points(); + ceph_assert(!m_sync_points_copy.empty()); + auto &sync_point = m_sync_points_copy.front(); + + { + std::shared_lock image_locker{m_remote_image_ctx->image_lock}; + snap_id_end = m_remote_image_ctx->get_snap_id( + cls::rbd::UserSnapshotNamespace(), sync_point.snap_name); + if (snap_id_end == CEPH_NOSNAP) { + derr << ": failed to locate snapshot: " << sync_point.snap_name << dendl; + r = -ENOENT; + } else if (!sync_point.from_snap_name.empty()) { + snap_id_start = m_remote_image_ctx->get_snap_id( + cls::rbd::UserSnapshotNamespace(), sync_point.from_snap_name); + if (snap_id_start == CEPH_NOSNAP) { + derr << ": failed to locate from snapshot: " + << sync_point.from_snap_name << dendl; + r = -ENOENT; + } + } + object_number = sync_point.object_number; + } + if (r < 0) { + finish(r); + return; + } + + m_lock.lock(); + if (m_canceled) { + m_lock.unlock(); + finish(-ECANCELED); + return; + } + + dout(10) << dendl; + + Context *ctx = create_context_callback< + ImageSync<I>, &ImageSync<I>::handle_copy_image>(this); + m_image_copy_prog_handler = new ImageCopyProgressHandler(this); + m_image_copy_request = librbd::DeepCopyRequest<I>::create( + m_remote_image_ctx, m_local_image_ctx, snap_id_start, snap_id_end, + 0, false, object_number, m_threads->work_queue, &m_snap_seqs_copy, + m_image_copy_prog_handler, ctx); + m_image_copy_request->get(); + m_lock.unlock(); + + update_progress("COPY_IMAGE"); + + m_image_copy_request->send(); +} + +template <typename I> +void ImageSync<I>::handle_copy_image(int r) { + dout(10) << ": r=" << r << dendl; + + { + std::scoped_lock locker{m_threads->timer_lock, m_lock}; + m_image_copy_request->put(); + m_image_copy_request = nullptr; + delete m_image_copy_prog_handler; + m_image_copy_prog_handler = nullptr; + if (r == 0 && m_canceled) { + r = -ECANCELED; + } + + if (m_update_sync_ctx != nullptr) { + m_threads->timer->cancel_event(m_update_sync_ctx); + m_update_sync_ctx = nullptr; + } + + if (m_updating_sync_point) { + m_ret_val = r; + return; + } + } + + if (r == -ECANCELED) { + dout(10) << ": image copy canceled" << dendl; + finish(r); + return; + } else if (r < 0) { + derr << ": failed to copy image: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + send_flush_sync_point(); +} + +template <typename I> +void ImageSync<I>::handle_copy_image_update_progress(uint64_t object_no, + uint64_t object_count) { + int percent = 100 * object_no / object_count; + update_progress("COPY_IMAGE " + stringify(percent) + "%"); + + std::lock_guard locker{m_lock}; + m_image_copy_object_no = object_no; + m_image_copy_object_count = object_count; + + if (m_update_sync_ctx == nullptr && !m_updating_sync_point) { + send_update_sync_point(); + } +} + +template <typename I> +void ImageSync<I>::send_update_sync_point() { + ceph_assert(ceph_mutex_is_locked(m_lock)); + + m_update_sync_ctx = nullptr; + + if (m_canceled) { + return; + } + + ceph_assert(!m_sync_points_copy.empty()); + auto sync_point = &m_sync_points_copy.front(); + + if (sync_point->object_number && + (m_image_copy_object_no - 1) == sync_point->object_number.get()) { + // update sync point did not progress since last sync + return; + } + + m_updating_sync_point = true; + + if (m_image_copy_object_no > 0) { + sync_point->object_number = m_image_copy_object_no - 1; + } + + auto ctx = create_context_callback< + ImageSync<I>, &ImageSync<I>::handle_update_sync_point>(this); + m_sync_point_handler->update_sync_points(m_snap_seqs_copy, + m_sync_points_copy, false, ctx); +} + +template <typename I> +void ImageSync<I>::handle_update_sync_point(int r) { + CephContext *cct = m_local_image_ctx->cct; + ldout(cct, 20) << ": r=" << r << dendl; + + { + std::scoped_lock locker{m_threads->timer_lock, m_lock}; + m_updating_sync_point = false; + + if (m_image_copy_request != nullptr) { + m_update_sync_ctx = new LambdaContext( + [this](int r) { + std::lock_guard locker{m_lock}; + this->send_update_sync_point(); + }); + m_threads->timer->add_event_after( + m_update_sync_point_interval, m_update_sync_ctx); + return; + } + } + + send_flush_sync_point(); +} + +template <typename I> +void ImageSync<I>::send_flush_sync_point() { + if (m_ret_val < 0) { + finish(m_ret_val); + return; + } + + update_progress("FLUSH_SYNC_POINT"); + + ceph_assert(!m_sync_points_copy.empty()); + auto sync_point = &m_sync_points_copy.front(); + + if (m_image_copy_object_no > 0) { + sync_point->object_number = m_image_copy_object_no - 1; + } else { + sync_point->object_number = boost::none; + } + + auto ctx = create_context_callback< + ImageSync<I>, &ImageSync<I>::handle_flush_sync_point>(this); + m_sync_point_handler->update_sync_points(m_snap_seqs_copy, + m_sync_points_copy, false, ctx); +} + +template <typename I> +void ImageSync<I>::handle_flush_sync_point(int r) { + dout(10) << ": r=" << r << dendl; + + if (r < 0) { + derr << ": failed to update client data: " << cpp_strerror(r) + << dendl; + finish(r); + return; + } + + send_prune_sync_points(); +} + +template <typename I> +void ImageSync<I>::send_prune_sync_points() { + dout(10) << dendl; + + update_progress("PRUNE_SYNC_POINTS"); + + Context *ctx = create_context_callback< + ImageSync<I>, &ImageSync<I>::handle_prune_sync_points>(this); + SyncPointPruneRequest<I> *request = SyncPointPruneRequest<I>::create( + m_remote_image_ctx, true, m_sync_point_handler, ctx); + request->send(); +} + +template <typename I> +void ImageSync<I>::handle_prune_sync_points(int r) { + dout(10) << ": r=" << r << dendl; + + if (r < 0) { + derr << ": failed to prune sync point: " + << cpp_strerror(r) << dendl; + finish(r); + return; + } + + if (!m_sync_point_handler->get_sync_points().empty()) { + send_copy_image(); + return; + } + + finish(0); +} + +template <typename I> +void ImageSync<I>::update_progress(const std::string &description) { + dout(20) << ": " << description << dendl; + + if (m_progress_ctx) { + m_progress_ctx->update_progress("IMAGE_SYNC/" + description); + } +} + +template <typename I> +void ImageSync<I>::finish(int r) { + dout(20) << ": r=" << r << dendl; + + m_instance_watcher->notify_sync_complete(m_local_image_ctx->id); + CancelableRequest::finish(r); +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::ImageSync<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/ImageSync.h b/src/tools/rbd_mirror/ImageSync.h new file mode 100644 index 000000000..b3389ce18 --- /dev/null +++ b/src/tools/rbd_mirror/ImageSync.h @@ -0,0 +1,151 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_SYNC_H +#define RBD_MIRROR_IMAGE_SYNC_H + +#include "include/int_types.h" +#include "librbd/ImageCtx.h" +#include "librbd/Types.h" +#include "common/ceph_mutex.h" +#include "tools/rbd_mirror/CancelableRequest.h" +#include "tools/rbd_mirror/image_sync/Types.h" + +class Context; +namespace journal { class Journaler; } +namespace librbd { template <typename> class DeepCopyRequest; } + +namespace rbd { +namespace mirror { + +class ProgressContext; +template <typename> class InstanceWatcher; +template <typename> class Threads; + +namespace image_sync { struct SyncPointHandler; } + +template <typename ImageCtxT = librbd::ImageCtx> +class ImageSync : public CancelableRequest { +public: + static ImageSync* create( + Threads<ImageCtxT>* threads, + ImageCtxT *local_image_ctx, + ImageCtxT *remote_image_ctx, + const std::string &local_mirror_uuid, + image_sync::SyncPointHandler* sync_point_handler, + InstanceWatcher<ImageCtxT> *instance_watcher, + ProgressContext *progress_ctx, + Context *on_finish) { + return new ImageSync(threads, local_image_ctx, remote_image_ctx, + local_mirror_uuid, sync_point_handler, + instance_watcher, progress_ctx, on_finish); + } + + ImageSync( + Threads<ImageCtxT>* threads, + ImageCtxT *local_image_ctx, + ImageCtxT *remote_image_ctx, + const std::string &local_mirror_uuid, + image_sync::SyncPointHandler* sync_point_handler, + InstanceWatcher<ImageCtxT> *instance_watcher, + ProgressContext *progress_ctx, + Context *on_finish); + ~ImageSync() override; + + void send() override; + void cancel() override; + +protected: + void finish(int r) override; + +private: + /** + * @verbatim + * + * <start> + * | + * v + * NOTIFY_SYNC_REQUEST + * | + * v + * PRUNE_CATCH_UP_SYNC_POINT + * | + * v + * CREATE_SYNC_POINT (skip if already exists and + * | not disconnected) + * v + * COPY_IMAGE . . . . . . . . . . . . . . + * | . + * v . + * FLUSH_SYNC_POINT . + * | . (image sync canceled) + * v . + * PRUNE_SYNC_POINTS . + * | . + * v . + * <finish> < . . . . . . . . . . . . . . + * + * @endverbatim + */ + + class ImageCopyProgressHandler; + + Threads<ImageCtxT>* m_threads; + ImageCtxT *m_local_image_ctx; + ImageCtxT *m_remote_image_ctx; + std::string m_local_mirror_uuid; + image_sync::SyncPointHandler* m_sync_point_handler; + InstanceWatcher<ImageCtxT> *m_instance_watcher; + ProgressContext *m_progress_ctx; + + ceph::mutex m_lock; + bool m_canceled = false; + + librbd::DeepCopyRequest<ImageCtxT> *m_image_copy_request = nullptr; + ImageCopyProgressHandler *m_image_copy_prog_handler = nullptr; + + bool m_updating_sync_point = false; + Context *m_update_sync_ctx = nullptr; + double m_update_sync_point_interval; + uint64_t m_image_copy_object_no = 0; + uint64_t m_image_copy_object_count = 0; + + librbd::SnapSeqs m_snap_seqs_copy; + image_sync::SyncPoints m_sync_points_copy; + + int m_ret_val = 0; + + void send_notify_sync_request(); + void handle_notify_sync_request(int r); + + void send_prune_catch_up_sync_point(); + void handle_prune_catch_up_sync_point(int r); + + void send_create_sync_point(); + void handle_create_sync_point(int r); + + void send_update_max_object_count(); + void handle_update_max_object_count(int r); + + void send_copy_image(); + void handle_copy_image(int r); + void handle_copy_image_update_progress(uint64_t object_no, + uint64_t object_count); + void send_update_sync_point(); + void handle_update_sync_point(int r); + + void send_flush_sync_point(); + void handle_flush_sync_point(int r); + + void send_prune_sync_points(); + void handle_prune_sync_points(int r); + + void update_progress(const std::string &description); +}; + +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::ImageSync<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_SYNC_H diff --git a/src/tools/rbd_mirror/InstanceReplayer.cc b/src/tools/rbd_mirror/InstanceReplayer.cc new file mode 100644 index 000000000..e625bf365 --- /dev/null +++ b/src/tools/rbd_mirror/InstanceReplayer.cc @@ -0,0 +1,543 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "include/stringify.h" +#include "common/Cond.h" +#include "common/Timer.h" +#include "common/debug.h" +#include "common/errno.h" +#include "librbd/Utils.h" +#include "librbd/asio/ContextWQ.h" +#include "ImageReplayer.h" +#include "InstanceReplayer.h" +#include "ServiceDaemon.h" +#include "Threads.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::InstanceReplayer: " \ + << this << " " << __func__ << ": " + +namespace rbd { +namespace mirror { + +namespace { + +const std::string SERVICE_DAEMON_ASSIGNED_COUNT_KEY("image_assigned_count"); +const std::string SERVICE_DAEMON_WARNING_COUNT_KEY("image_warning_count"); +const std::string SERVICE_DAEMON_ERROR_COUNT_KEY("image_error_count"); + +} // anonymous namespace + +using librbd::util::create_async_context_callback; +using librbd::util::create_context_callback; + +template <typename I> +InstanceReplayer<I>::InstanceReplayer( + librados::IoCtx &local_io_ctx, const std::string &local_mirror_uuid, + Threads<I> *threads, ServiceDaemon<I>* service_daemon, + MirrorStatusUpdater<I>* local_status_updater, + journal::CacheManagerHandler *cache_manager_handler, + PoolMetaCache* pool_meta_cache) + : m_local_io_ctx(local_io_ctx), m_local_mirror_uuid(local_mirror_uuid), + m_threads(threads), m_service_daemon(service_daemon), + m_local_status_updater(local_status_updater), + m_cache_manager_handler(cache_manager_handler), + m_pool_meta_cache(pool_meta_cache), + m_lock(ceph::make_mutex("rbd::mirror::InstanceReplayer " + + stringify(local_io_ctx.get_id()))) { +} + +template <typename I> +InstanceReplayer<I>::~InstanceReplayer() { + ceph_assert(m_image_state_check_task == nullptr); + ceph_assert(m_async_op_tracker.empty()); + ceph_assert(m_image_replayers.empty()); +} + +template <typename I> +bool InstanceReplayer<I>::is_blocklisted() const { + std::lock_guard locker{m_lock}; + return m_blocklisted; +} + +template <typename I> +int InstanceReplayer<I>::init() { + C_SaferCond init_ctx; + init(&init_ctx); + return init_ctx.wait(); +} + +template <typename I> +void InstanceReplayer<I>::init(Context *on_finish) { + dout(10) << dendl; + + Context *ctx = new LambdaContext( + [this, on_finish] (int r) { + { + std::lock_guard timer_locker{m_threads->timer_lock}; + schedule_image_state_check_task(); + } + on_finish->complete(0); + }); + + m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +void InstanceReplayer<I>::shut_down() { + C_SaferCond shut_down_ctx; + shut_down(&shut_down_ctx); + int r = shut_down_ctx.wait(); + ceph_assert(r == 0); +} + +template <typename I> +void InstanceReplayer<I>::shut_down(Context *on_finish) { + dout(10) << dendl; + + std::lock_guard locker{m_lock}; + + ceph_assert(m_on_shut_down == nullptr); + m_on_shut_down = on_finish; + + Context *ctx = new LambdaContext( + [this] (int r) { + cancel_image_state_check_task(); + wait_for_ops(); + }); + + m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +void InstanceReplayer<I>::add_peer(const Peer<I>& peer) { + dout(10) << "peer=" << peer << dendl; + + std::lock_guard locker{m_lock}; + auto result = m_peers.insert(peer).second; + ceph_assert(result); +} + +template <typename I> +void InstanceReplayer<I>::release_all(Context *on_finish) { + dout(10) << dendl; + + std::lock_guard locker{m_lock}; + + C_Gather *gather_ctx = new C_Gather(g_ceph_context, on_finish); + for (auto it = m_image_replayers.begin(); it != m_image_replayers.end(); + it = m_image_replayers.erase(it)) { + auto image_replayer = it->second; + auto ctx = gather_ctx->new_sub(); + ctx = new LambdaContext( + [image_replayer, ctx] (int r) { + image_replayer->destroy(); + ctx->complete(0); + }); + stop_image_replayer(image_replayer, ctx); + } + gather_ctx->activate(); +} + +template <typename I> +void InstanceReplayer<I>::acquire_image(InstanceWatcher<I> *instance_watcher, + const std::string &global_image_id, + Context *on_finish) { + dout(10) << "global_image_id=" << global_image_id << dendl; + + std::lock_guard locker{m_lock}; + + ceph_assert(m_on_shut_down == nullptr); + + auto it = m_image_replayers.find(global_image_id); + if (it == m_image_replayers.end()) { + auto image_replayer = ImageReplayer<I>::create( + m_local_io_ctx, m_local_mirror_uuid, global_image_id, + m_threads, instance_watcher, m_local_status_updater, + m_cache_manager_handler, m_pool_meta_cache); + + dout(10) << global_image_id << ": creating replayer " << image_replayer + << dendl; + + it = m_image_replayers.insert(std::make_pair(global_image_id, + image_replayer)).first; + + // TODO only a single peer is currently supported + ceph_assert(m_peers.size() == 1); + auto peer = *m_peers.begin(); + image_replayer->add_peer(peer); + start_image_replayer(image_replayer); + } else { + // A duplicate acquire notification implies (1) connection hiccup or + // (2) new leader election. For the second case, restart the replayer to + // detect if the image has been deleted while the leader was offline + auto& image_replayer = it->second; + image_replayer->set_finished(false); + image_replayer->restart(new C_TrackedOp(m_async_op_tracker, nullptr)); + } + + m_threads->work_queue->queue(on_finish, 0); +} + +template <typename I> +void InstanceReplayer<I>::release_image(const std::string &global_image_id, + Context *on_finish) { + dout(10) << "global_image_id=" << global_image_id << dendl; + + std::lock_guard locker{m_lock}; + ceph_assert(m_on_shut_down == nullptr); + + auto it = m_image_replayers.find(global_image_id); + if (it == m_image_replayers.end()) { + dout(5) << global_image_id << ": not found" << dendl; + m_threads->work_queue->queue(on_finish, 0); + return; + } + + auto image_replayer = it->second; + m_image_replayers.erase(it); + + on_finish = new LambdaContext( + [image_replayer, on_finish] (int r) { + image_replayer->destroy(); + on_finish->complete(0); + }); + stop_image_replayer(image_replayer, on_finish); +} + +template <typename I> +void InstanceReplayer<I>::remove_peer_image(const std::string &global_image_id, + const std::string &peer_mirror_uuid, + Context *on_finish) { + dout(10) << "global_image_id=" << global_image_id << ", " + << "peer_mirror_uuid=" << peer_mirror_uuid << dendl; + + std::lock_guard locker{m_lock}; + ceph_assert(m_on_shut_down == nullptr); + + auto it = m_image_replayers.find(global_image_id); + if (it != m_image_replayers.end()) { + // TODO only a single peer is currently supported, therefore + // we can just interrupt the current image replayer and + // it will eventually detect that the peer image is missing and + // determine if a delete propagation is required. + auto image_replayer = it->second; + image_replayer->restart(new C_TrackedOp(m_async_op_tracker, nullptr)); + } + m_threads->work_queue->queue(on_finish, 0); +} + +template <typename I> +void InstanceReplayer<I>::print_status(Formatter *f) { + dout(10) << dendl; + + std::lock_guard locker{m_lock}; + + f->open_array_section("image_replayers"); + for (auto &kv : m_image_replayers) { + auto &image_replayer = kv.second; + image_replayer->print_status(f); + } + f->close_section(); +} + +template <typename I> +void InstanceReplayer<I>::start() +{ + dout(10) << dendl; + + std::lock_guard locker{m_lock}; + + m_manual_stop = false; + + auto cct = static_cast<CephContext *>(m_local_io_ctx.cct()); + auto gather_ctx = new C_Gather( + cct, new C_TrackedOp(m_async_op_tracker, nullptr)); + for (auto &kv : m_image_replayers) { + auto &image_replayer = kv.second; + image_replayer->start(gather_ctx->new_sub(), true); + } + + gather_ctx->activate(); +} + +template <typename I> +void InstanceReplayer<I>::stop() +{ + stop(nullptr); +} + +template <typename I> +void InstanceReplayer<I>::stop(Context *on_finish) +{ + dout(10) << dendl; + + if (on_finish == nullptr) { + on_finish = new C_TrackedOp(m_async_op_tracker, on_finish); + } else { + on_finish = new LambdaContext( + [this, on_finish] (int r) { + m_async_op_tracker.wait_for_ops(on_finish); + }); + } + + auto cct = static_cast<CephContext *>(m_local_io_ctx.cct()); + auto gather_ctx = new C_Gather(cct, on_finish); + { + std::lock_guard locker{m_lock}; + + m_manual_stop = true; + + for (auto &kv : m_image_replayers) { + auto &image_replayer = kv.second; + image_replayer->stop(gather_ctx->new_sub(), true); + } + } + + gather_ctx->activate(); +} + +template <typename I> +void InstanceReplayer<I>::restart() +{ + dout(10) << dendl; + + std::lock_guard locker{m_lock}; + + m_manual_stop = false; + + for (auto &kv : m_image_replayers) { + auto &image_replayer = kv.second; + image_replayer->restart(new C_TrackedOp(m_async_op_tracker, nullptr)); + } +} + +template <typename I> +void InstanceReplayer<I>::flush() +{ + dout(10) << dendl; + + std::lock_guard locker{m_lock}; + + for (auto &kv : m_image_replayers) { + auto &image_replayer = kv.second; + image_replayer->flush(); + } +} + +template <typename I> +void InstanceReplayer<I>::start_image_replayer( + ImageReplayer<I> *image_replayer) { + ceph_assert(ceph_mutex_is_locked(m_lock)); + + std::string global_image_id = image_replayer->get_global_image_id(); + if (!image_replayer->is_stopped()) { + return; + } else if (image_replayer->is_blocklisted()) { + derr << "global_image_id=" << global_image_id << ": blocklisted detected " + << "during image replay" << dendl; + m_blocklisted = true; + return; + } else if (image_replayer->is_finished()) { + // TODO temporary until policy integrated + dout(5) << "removing image replayer for global_image_id=" + << global_image_id << dendl; + m_image_replayers.erase(image_replayer->get_global_image_id()); + image_replayer->destroy(); + return; + } else if (m_manual_stop) { + return; + } + + dout(10) << "global_image_id=" << global_image_id << dendl; + image_replayer->start(new C_TrackedOp(m_async_op_tracker, nullptr), false); +} + +template <typename I> +void InstanceReplayer<I>::queue_start_image_replayers() { + dout(10) << dendl; + + Context *ctx = create_context_callback< + InstanceReplayer, &InstanceReplayer<I>::start_image_replayers>(this); + m_async_op_tracker.start_op(); + m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +void InstanceReplayer<I>::start_image_replayers(int r) { + dout(10) << dendl; + + std::lock_guard locker{m_lock}; + if (m_on_shut_down != nullptr) { + m_async_op_tracker.finish_op(); + return; + } + + uint64_t image_count = 0; + uint64_t warning_count = 0; + uint64_t error_count = 0; + for (auto it = m_image_replayers.begin(); + it != m_image_replayers.end();) { + auto current_it(it); + ++it; + + ++image_count; + auto health_state = current_it->second->get_health_state(); + if (health_state == image_replayer::HEALTH_STATE_WARNING) { + ++warning_count; + } else if (health_state == image_replayer::HEALTH_STATE_ERROR) { + ++error_count; + } + + start_image_replayer(current_it->second); + } + + m_service_daemon->add_or_update_namespace_attribute( + m_local_io_ctx.get_id(), m_local_io_ctx.get_namespace(), + SERVICE_DAEMON_ASSIGNED_COUNT_KEY, image_count); + m_service_daemon->add_or_update_namespace_attribute( + m_local_io_ctx.get_id(), m_local_io_ctx.get_namespace(), + SERVICE_DAEMON_WARNING_COUNT_KEY, warning_count); + m_service_daemon->add_or_update_namespace_attribute( + m_local_io_ctx.get_id(), m_local_io_ctx.get_namespace(), + SERVICE_DAEMON_ERROR_COUNT_KEY, error_count); + + m_async_op_tracker.finish_op(); +} + +template <typename I> +void InstanceReplayer<I>::stop_image_replayer(ImageReplayer<I> *image_replayer, + Context *on_finish) { + dout(10) << image_replayer << " global_image_id=" + << image_replayer->get_global_image_id() << ", on_finish=" + << on_finish << dendl; + + if (image_replayer->is_stopped()) { + m_threads->work_queue->queue(on_finish, 0); + return; + } + + m_async_op_tracker.start_op(); + Context *ctx = create_async_context_callback( + m_threads->work_queue, new LambdaContext( + [this, image_replayer, on_finish] (int r) { + stop_image_replayer(image_replayer, on_finish); + m_async_op_tracker.finish_op(); + })); + + if (image_replayer->is_running()) { + image_replayer->stop(ctx, false); + } else { + int after = 1; + dout(10) << "scheduling image replayer " << image_replayer << " stop after " + << after << " sec (task " << ctx << ")" << dendl; + ctx = new LambdaContext( + [this, after, ctx] (int r) { + std::lock_guard timer_locker{m_threads->timer_lock}; + m_threads->timer->add_event_after(after, ctx); + }); + m_threads->work_queue->queue(ctx, 0); + } +} + +template <typename I> +void InstanceReplayer<I>::wait_for_ops() { + dout(10) << dendl; + + Context *ctx = create_context_callback< + InstanceReplayer, &InstanceReplayer<I>::handle_wait_for_ops>(this); + + m_async_op_tracker.wait_for_ops(ctx); +} + +template <typename I> +void InstanceReplayer<I>::handle_wait_for_ops(int r) { + dout(10) << "r=" << r << dendl; + + ceph_assert(r == 0); + + std::lock_guard locker{m_lock}; + stop_image_replayers(); +} + +template <typename I> +void InstanceReplayer<I>::stop_image_replayers() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + + Context *ctx = create_async_context_callback( + m_threads->work_queue, create_context_callback<InstanceReplayer<I>, + &InstanceReplayer<I>::handle_stop_image_replayers>(this)); + + C_Gather *gather_ctx = new C_Gather(g_ceph_context, ctx); + for (auto &it : m_image_replayers) { + stop_image_replayer(it.second, gather_ctx->new_sub()); + } + gather_ctx->activate(); +} + +template <typename I> +void InstanceReplayer<I>::handle_stop_image_replayers(int r) { + dout(10) << "r=" << r << dendl; + + ceph_assert(r == 0); + + Context *on_finish = nullptr; + { + std::lock_guard locker{m_lock}; + + for (auto &it : m_image_replayers) { + ceph_assert(it.second->is_stopped()); + it.second->destroy(); + } + m_image_replayers.clear(); + + ceph_assert(m_on_shut_down != nullptr); + std::swap(on_finish, m_on_shut_down); + } + on_finish->complete(r); +} + +template <typename I> +void InstanceReplayer<I>::cancel_image_state_check_task() { + std::lock_guard timer_locker{m_threads->timer_lock}; + + if (m_image_state_check_task == nullptr) { + return; + } + + dout(10) << m_image_state_check_task << dendl; + bool canceled = m_threads->timer->cancel_event(m_image_state_check_task); + ceph_assert(canceled); + m_image_state_check_task = nullptr; +} + +template <typename I> +void InstanceReplayer<I>::schedule_image_state_check_task() { + ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock)); + ceph_assert(m_image_state_check_task == nullptr); + + m_image_state_check_task = new LambdaContext( + [this](int r) { + ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock)); + m_image_state_check_task = nullptr; + schedule_image_state_check_task(); + queue_start_image_replayers(); + }); + + auto cct = static_cast<CephContext *>(m_local_io_ctx.cct()); + int after = cct->_conf.get_val<uint64_t>( + "rbd_mirror_image_state_check_interval"); + + dout(10) << "scheduling image state check after " << after << " sec (task " + << m_image_state_check_task << ")" << dendl; + m_threads->timer->add_event_after(after, m_image_state_check_task); +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::InstanceReplayer<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/InstanceReplayer.h b/src/tools/rbd_mirror/InstanceReplayer.h new file mode 100644 index 000000000..7a5c79723 --- /dev/null +++ b/src/tools/rbd_mirror/InstanceReplayer.h @@ -0,0 +1,138 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_INSTANCE_REPLAYER_H +#define RBD_MIRROR_INSTANCE_REPLAYER_H + +#include <map> +#include <sstream> + +#include "common/AsyncOpTracker.h" +#include "common/Formatter.h" +#include "common/ceph_mutex.h" +#include "tools/rbd_mirror/Types.h" + +namespace journal { struct CacheManagerHandler; } + +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { + +template <typename> class ImageReplayer; +template <typename> class InstanceWatcher; +template <typename> class MirrorStatusUpdater; +struct PoolMetaCache; +template <typename> class ServiceDaemon; +template <typename> struct Threads; + +template <typename ImageCtxT = librbd::ImageCtx> +class InstanceReplayer { +public: + static InstanceReplayer* create( + librados::IoCtx &local_io_ctx, const std::string &local_mirror_uuid, + Threads<ImageCtxT> *threads, ServiceDaemon<ImageCtxT> *service_daemon, + MirrorStatusUpdater<ImageCtxT>* local_status_updater, + journal::CacheManagerHandler *cache_manager_handler, + PoolMetaCache* pool_meta_cache) { + return new InstanceReplayer(local_io_ctx, local_mirror_uuid, threads, + service_daemon, local_status_updater, + cache_manager_handler, pool_meta_cache); + } + void destroy() { + delete this; + } + + InstanceReplayer(librados::IoCtx &local_io_ctx, + const std::string &local_mirror_uuid, + Threads<ImageCtxT> *threads, + ServiceDaemon<ImageCtxT> *service_daemon, + MirrorStatusUpdater<ImageCtxT>* local_status_updater, + journal::CacheManagerHandler *cache_manager_handler, + PoolMetaCache* pool_meta_cache); + ~InstanceReplayer(); + + bool is_blocklisted() const; + + int init(); + void shut_down(); + + void init(Context *on_finish); + void shut_down(Context *on_finish); + + void add_peer(const Peer<ImageCtxT>& peer); + + void acquire_image(InstanceWatcher<ImageCtxT> *instance_watcher, + const std::string &global_image_id, Context *on_finish); + void release_image(const std::string &global_image_id, Context *on_finish); + void remove_peer_image(const std::string &global_image_id, + const std::string &peer_mirror_uuid, + Context *on_finish); + + void release_all(Context *on_finish); + + void print_status(Formatter *f); + void start(); + void stop(); + void restart(); + void flush(); + + void stop(Context *on_finish); + +private: + /** + * @verbatim + * + * <uninitialized> <-------------------\ + * | (init) | (repeat for each + * v STOP_IMAGE_REPLAYER ---\ image replayer) + * SCHEDULE_IMAGE_STATE_CHECK_TASK ^ ^ | + * | | | | + * v (shut_down) | \---------/ + * <initialized> -----------------> WAIT_FOR_OPS + * + * @endverbatim + */ + + typedef std::set<Peer<ImageCtxT>> Peers; + + librados::IoCtx &m_local_io_ctx; + std::string m_local_mirror_uuid; + Threads<ImageCtxT> *m_threads; + ServiceDaemon<ImageCtxT> *m_service_daemon; + MirrorStatusUpdater<ImageCtxT>* m_local_status_updater; + journal::CacheManagerHandler *m_cache_manager_handler; + PoolMetaCache* m_pool_meta_cache; + + mutable ceph::mutex m_lock; + AsyncOpTracker m_async_op_tracker; + std::map<std::string, ImageReplayer<ImageCtxT> *> m_image_replayers; + Peers m_peers; + Context *m_image_state_check_task = nullptr; + Context *m_on_shut_down = nullptr; + bool m_manual_stop = false; + bool m_blocklisted = false; + + void wait_for_ops(); + void handle_wait_for_ops(int r); + + void start_image_replayer(ImageReplayer<ImageCtxT> *image_replayer); + void queue_start_image_replayers(); + void start_image_replayers(int r); + + void stop_image_replayer(ImageReplayer<ImageCtxT> *image_replayer, + Context *on_finish); + + void stop_image_replayers(); + void handle_stop_image_replayers(int r); + + void schedule_image_state_check_task(); + void cancel_image_state_check_task(); +}; + +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::InstanceReplayer<librbd::ImageCtx>; + +#endif // RBD_MIRROR_INSTANCE_REPLAYER_H diff --git a/src/tools/rbd_mirror/InstanceWatcher.cc b/src/tools/rbd_mirror/InstanceWatcher.cc new file mode 100644 index 000000000..7b531064d --- /dev/null +++ b/src/tools/rbd_mirror/InstanceWatcher.cc @@ -0,0 +1,1290 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "InstanceWatcher.h" +#include "include/stringify.h" +#include "common/debug.h" +#include "common/errno.h" +#include "cls/rbd/cls_rbd_client.h" +#include "librbd/AsioEngine.h" +#include "librbd/ManagedLock.h" +#include "librbd/Utils.h" +#include "librbd/asio/ContextWQ.h" +#include "InstanceReplayer.h" +#include "Throttler.h" +#include "common/Cond.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::InstanceWatcher: " + +namespace rbd { +namespace mirror { + +using namespace instance_watcher; + +using librbd::util::create_async_context_callback; +using librbd::util::create_context_callback; +using librbd::util::create_rados_callback; +using librbd::util::unique_lock_name; + +namespace { + +struct C_GetInstances : public Context { + std::vector<std::string> *instance_ids; + Context *on_finish; + bufferlist out_bl; + + C_GetInstances(std::vector<std::string> *instance_ids, Context *on_finish) + : instance_ids(instance_ids), on_finish(on_finish) { + } + + void finish(int r) override { + dout(10) << "C_GetInstances: " << this << " " << __func__ << ": r=" << r + << dendl; + + if (r == 0) { + auto it = out_bl.cbegin(); + r = librbd::cls_client::mirror_instances_list_finish(&it, instance_ids); + } else if (r == -ENOENT) { + r = 0; + } + on_finish->complete(r); + } +}; + +template <typename I> +struct C_RemoveInstanceRequest : public Context { + InstanceWatcher<I> instance_watcher; + Context *on_finish; + + C_RemoveInstanceRequest(librados::IoCtx &io_ctx, + librbd::AsioEngine& asio_engine, + const std::string &instance_id, Context *on_finish) + : instance_watcher(io_ctx, asio_engine, nullptr, nullptr, instance_id), + on_finish(on_finish) { + } + + void send() { + dout(10) << "C_RemoveInstanceRequest: " << this << " " << __func__ << dendl; + + instance_watcher.remove(this); + } + + void finish(int r) override { + dout(10) << "C_RemoveInstanceRequest: " << this << " " << __func__ << ": r=" + << r << dendl; + ceph_assert(r == 0); + + on_finish->complete(r); + } +}; + +} // anonymous namespace + +template <typename I> +struct InstanceWatcher<I>::C_NotifyInstanceRequest : public Context { + InstanceWatcher<I> *instance_watcher; + std::string instance_id; + uint64_t request_id; + bufferlist bl; + Context *on_finish; + bool send_to_leader; + std::unique_ptr<librbd::watcher::Notifier> notifier; + librbd::watcher::NotifyResponse response; + bool canceling = false; + + C_NotifyInstanceRequest(InstanceWatcher<I> *instance_watcher, + const std::string &instance_id, uint64_t request_id, + bufferlist &&bl, Context *on_finish) + : instance_watcher(instance_watcher), instance_id(instance_id), + request_id(request_id), bl(bl), on_finish(on_finish), + send_to_leader(instance_id.empty()) { + dout(10) << "C_NotifyInstanceRequest: " << this << " " << __func__ + << ": instance_watcher=" << instance_watcher << ", instance_id=" + << instance_id << ", request_id=" << request_id << dendl; + + ceph_assert(ceph_mutex_is_locked(instance_watcher->m_lock)); + + if (!send_to_leader) { + ceph_assert((!instance_id.empty())); + notifier.reset(new librbd::watcher::Notifier( + instance_watcher->m_work_queue, + instance_watcher->m_ioctx, + RBD_MIRROR_INSTANCE_PREFIX + instance_id)); + } + + instance_watcher->m_notify_op_tracker.start_op(); + auto result = instance_watcher->m_notify_ops.insert( + std::make_pair(instance_id, this)).second; + ceph_assert(result); + } + + void send() { + dout(10) << "C_NotifyInstanceRequest: " << this << " " << __func__ << dendl; + + ceph_assert(ceph_mutex_is_locked(instance_watcher->m_lock)); + + if (canceling) { + dout(10) << "C_NotifyInstanceRequest: " << this << " " << __func__ + << ": canceling" << dendl; + instance_watcher->m_work_queue->queue(this, -ECANCELED); + return; + } + + if (send_to_leader) { + if (instance_watcher->m_leader_instance_id.empty()) { + dout(10) << "C_NotifyInstanceRequest: " << this << " " << __func__ + << ": suspending" << dendl; + instance_watcher->suspend_notify_request(this); + return; + } + + if (instance_watcher->m_leader_instance_id != instance_id) { + auto count = instance_watcher->m_notify_ops.erase( + std::make_pair(instance_id, this)); + ceph_assert(count > 0); + + instance_id = instance_watcher->m_leader_instance_id; + + auto result = instance_watcher->m_notify_ops.insert( + std::make_pair(instance_id, this)).second; + ceph_assert(result); + + notifier.reset(new librbd::watcher::Notifier( + instance_watcher->m_work_queue, + instance_watcher->m_ioctx, + RBD_MIRROR_INSTANCE_PREFIX + instance_id)); + } + } + + dout(10) << "C_NotifyInstanceRequest: " << this << " " << __func__ + << ": sending to " << instance_id << dendl; + notifier->notify(bl, &response, this); + } + + void cancel() { + dout(10) << "C_NotifyInstanceRequest: " << this << " " << __func__ << dendl; + + ceph_assert(ceph_mutex_is_locked(instance_watcher->m_lock)); + + canceling = true; + instance_watcher->unsuspend_notify_request(this); + } + + void finish(int r) override { + dout(10) << "C_NotifyInstanceRequest: " << this << " " << __func__ << ": r=" + << r << dendl; + + if (r == 0 || r == -ETIMEDOUT) { + bool found = false; + for (auto &it : response.acks) { + auto &bl = it.second; + if (it.second.length() == 0) { + dout(5) << "C_NotifyInstanceRequest: " << this << " " << __func__ + << ": no payload in ack, ignoring" << dendl; + continue; + } + try { + auto iter = bl.cbegin(); + NotifyAckPayload ack; + decode(ack, iter); + if (ack.instance_id != instance_watcher->get_instance_id()) { + derr << "C_NotifyInstanceRequest: " << this << " " << __func__ + << ": ack instance_id (" << ack.instance_id << ") " + << "does not match, ignoring" << dendl; + continue; + } + if (ack.request_id != request_id) { + derr << "C_NotifyInstanceRequest: " << this << " " << __func__ + << ": ack request_id (" << ack.request_id << ") " + << "does not match, ignoring" << dendl; + continue; + } + r = ack.ret_val; + found = true; + break; + } catch (const buffer::error &err) { + derr << "C_NotifyInstanceRequest: " << this << " " << __func__ + << ": failed to decode ack: " << err.what() << dendl; + continue; + } + } + + if (!found) { + if (r == -ETIMEDOUT) { + derr << "C_NotifyInstanceRequest: " << this << " " << __func__ + << ": resending after timeout" << dendl; + std::lock_guard locker{instance_watcher->m_lock}; + send(); + return; + } else { + r = -EINVAL; + } + } else { + if (r == -ESTALE && send_to_leader) { + derr << "C_NotifyInstanceRequest: " << this << " " << __func__ + << ": resending due to leader change" << dendl; + std::lock_guard locker{instance_watcher->m_lock}; + send(); + return; + } + } + } + + on_finish->complete(r); + + { + std::lock_guard locker{instance_watcher->m_lock}; + auto result = instance_watcher->m_notify_ops.erase( + std::make_pair(instance_id, this)); + ceph_assert(result > 0); + instance_watcher->m_notify_op_tracker.finish_op(); + } + + delete this; + } + + void complete(int r) override { + finish(r); + } +}; + +template <typename I> +struct InstanceWatcher<I>::C_SyncRequest : public Context { + InstanceWatcher<I> *instance_watcher; + std::string sync_id; + Context *on_start; + Context *on_complete = nullptr; + C_NotifyInstanceRequest *req = nullptr; + + C_SyncRequest(InstanceWatcher<I> *instance_watcher, + const std::string &sync_id, Context *on_start) + : instance_watcher(instance_watcher), sync_id(sync_id), + on_start(on_start) { + dout(10) << "C_SyncRequest: " << this << " " << __func__ << ": sync_id=" + << sync_id << dendl; + } + + void finish(int r) override { + dout(10) << "C_SyncRequest: " << this << " " << __func__ << ": r=" + << r << dendl; + + if (on_start != nullptr) { + instance_watcher->handle_notify_sync_request(this, r); + } else { + instance_watcher->handle_notify_sync_complete(this, r); + delete this; + } + } + + // called twice + void complete(int r) override { + finish(r); + } +}; + +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::InstanceWatcher: " \ + << this << " " << __func__ << ": " +template <typename I> +void InstanceWatcher<I>::get_instances(librados::IoCtx &io_ctx, + std::vector<std::string> *instance_ids, + Context *on_finish) { + librados::ObjectReadOperation op; + librbd::cls_client::mirror_instances_list_start(&op); + C_GetInstances *ctx = new C_GetInstances(instance_ids, on_finish); + librados::AioCompletion *aio_comp = create_rados_callback(ctx); + + int r = io_ctx.aio_operate(RBD_MIRROR_LEADER, aio_comp, &op, &ctx->out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void InstanceWatcher<I>::remove_instance(librados::IoCtx &io_ctx, + librbd::AsioEngine& asio_engine, + const std::string &instance_id, + Context *on_finish) { + auto req = new C_RemoveInstanceRequest<I>(io_ctx, asio_engine, instance_id, + on_finish); + req->send(); +} + +template <typename I> +InstanceWatcher<I> *InstanceWatcher<I>::create( + librados::IoCtx &io_ctx, librbd::AsioEngine& asio_engine, + InstanceReplayer<I> *instance_replayer, + Throttler<I> *image_sync_throttler) { + return new InstanceWatcher<I>(io_ctx, asio_engine, instance_replayer, + image_sync_throttler, + stringify(io_ctx.get_instance_id())); +} + +template <typename I> +InstanceWatcher<I>::InstanceWatcher(librados::IoCtx &io_ctx, + librbd::AsioEngine& asio_engine, + InstanceReplayer<I> *instance_replayer, + Throttler<I> *image_sync_throttler, + const std::string &instance_id) + : Watcher(io_ctx, asio_engine.get_work_queue(), + RBD_MIRROR_INSTANCE_PREFIX + instance_id), + m_instance_replayer(instance_replayer), + m_image_sync_throttler(image_sync_throttler), m_instance_id(instance_id), + m_lock(ceph::make_mutex( + unique_lock_name("rbd::mirror::InstanceWatcher::m_lock", this))), + m_instance_lock(librbd::ManagedLock<I>::create( + m_ioctx, asio_engine, m_oid, this, librbd::managed_lock::EXCLUSIVE, true, + m_cct->_conf.get_val<uint64_t>("rbd_blocklist_expire_seconds"))) { +} + +template <typename I> +InstanceWatcher<I>::~InstanceWatcher() { + ceph_assert(m_requests.empty()); + ceph_assert(m_notify_ops.empty()); + ceph_assert(m_notify_op_tracker.empty()); + ceph_assert(m_suspended_ops.empty()); + ceph_assert(m_inflight_sync_reqs.empty()); + m_instance_lock->destroy(); +} + +template <typename I> +int InstanceWatcher<I>::init() { + C_SaferCond init_ctx; + init(&init_ctx); + return init_ctx.wait(); +} + +template <typename I> +void InstanceWatcher<I>::init(Context *on_finish) { + dout(10) << "instance_id=" << m_instance_id << dendl; + + std::lock_guard locker{m_lock}; + + ceph_assert(m_on_finish == nullptr); + m_on_finish = on_finish; + m_ret_val = 0; + + register_instance(); +} + +template <typename I> +void InstanceWatcher<I>::shut_down() { + C_SaferCond shut_down_ctx; + shut_down(&shut_down_ctx); + int r = shut_down_ctx.wait(); + ceph_assert(r == 0); +} + +template <typename I> +void InstanceWatcher<I>::shut_down(Context *on_finish) { + dout(10) << dendl; + + std::lock_guard locker{m_lock}; + + ceph_assert(m_on_finish == nullptr); + m_on_finish = on_finish; + m_ret_val = 0; + + release_lock(); +} + +template <typename I> +void InstanceWatcher<I>::remove(Context *on_finish) { + dout(10) << dendl; + + std::lock_guard locker{m_lock}; + + ceph_assert(m_on_finish == nullptr); + m_on_finish = on_finish; + m_ret_val = 0; + + get_instance_locker(); +} + +template <typename I> +void InstanceWatcher<I>::notify_image_acquire( + const std::string &instance_id, const std::string &global_image_id, + Context *on_notify_ack) { + dout(10) << "instance_id=" << instance_id << ", global_image_id=" + << global_image_id << dendl; + + std::lock_guard locker{m_lock}; + + ceph_assert(m_on_finish == nullptr); + + uint64_t request_id = ++m_request_seq; + bufferlist bl; + encode(NotifyMessage{ImageAcquirePayload{request_id, global_image_id}}, bl); + auto req = new C_NotifyInstanceRequest(this, instance_id, request_id, + std::move(bl), on_notify_ack); + req->send(); +} + +template <typename I> +void InstanceWatcher<I>::notify_image_release( + const std::string &instance_id, const std::string &global_image_id, + Context *on_notify_ack) { + dout(10) << "instance_id=" << instance_id << ", global_image_id=" + << global_image_id << dendl; + + std::lock_guard locker{m_lock}; + + ceph_assert(m_on_finish == nullptr); + + uint64_t request_id = ++m_request_seq; + bufferlist bl; + encode(NotifyMessage{ImageReleasePayload{request_id, global_image_id}}, bl); + auto req = new C_NotifyInstanceRequest(this, instance_id, request_id, + std::move(bl), on_notify_ack); + req->send(); +} + +template <typename I> +void InstanceWatcher<I>::notify_peer_image_removed( + const std::string &instance_id, const std::string &global_image_id, + const std::string &peer_mirror_uuid, Context *on_notify_ack) { + dout(10) << "instance_id=" << instance_id << ", " + << "global_image_id=" << global_image_id << ", " + << "peer_mirror_uuid=" << peer_mirror_uuid << dendl; + + std::lock_guard locker{m_lock}; + ceph_assert(m_on_finish == nullptr); + + uint64_t request_id = ++m_request_seq; + bufferlist bl; + encode(NotifyMessage{PeerImageRemovedPayload{request_id, global_image_id, + peer_mirror_uuid}}, bl); + auto req = new C_NotifyInstanceRequest(this, instance_id, request_id, + std::move(bl), on_notify_ack); + req->send(); +} + +template <typename I> +void InstanceWatcher<I>::notify_sync_request(const std::string &sync_id, + Context *on_sync_start) { + dout(10) << "sync_id=" << sync_id << dendl; + + std::lock_guard locker{m_lock}; + + ceph_assert(m_inflight_sync_reqs.count(sync_id) == 0); + + uint64_t request_id = ++m_request_seq; + + bufferlist bl; + encode(NotifyMessage{SyncRequestPayload{request_id, sync_id}}, bl); + + auto sync_ctx = new C_SyncRequest(this, sync_id, on_sync_start); + sync_ctx->req = new C_NotifyInstanceRequest(this, "", request_id, + std::move(bl), sync_ctx); + + m_inflight_sync_reqs[sync_id] = sync_ctx; + sync_ctx->req->send(); +} + +template <typename I> +bool InstanceWatcher<I>::cancel_sync_request(const std::string &sync_id) { + dout(10) << "sync_id=" << sync_id << dendl; + + std::lock_guard locker{m_lock}; + + auto it = m_inflight_sync_reqs.find(sync_id); + if (it == m_inflight_sync_reqs.end()) { + return false; + } + + auto sync_ctx = it->second; + + if (sync_ctx->on_start == nullptr) { + return false; + } + + ceph_assert(sync_ctx->req != nullptr); + sync_ctx->req->cancel(); + return true; +} + +template <typename I> +void InstanceWatcher<I>::notify_sync_start(const std::string &instance_id, + const std::string &sync_id) { + dout(10) << "sync_id=" << sync_id << dendl; + + std::lock_guard locker{m_lock}; + + uint64_t request_id = ++m_request_seq; + + bufferlist bl; + encode(NotifyMessage{SyncStartPayload{request_id, sync_id}}, bl); + + auto ctx = new LambdaContext( + [this, sync_id] (int r) { + dout(10) << "finish: sync_id=" << sync_id << ", r=" << r << dendl; + std::lock_guard locker{m_lock}; + if (r != -ESTALE && is_leader()) { + m_image_sync_throttler->finish_op(m_ioctx.get_namespace(), sync_id); + } + }); + auto req = new C_NotifyInstanceRequest(this, instance_id, request_id, + std::move(bl), ctx); + req->send(); +} + +template <typename I> +void InstanceWatcher<I>::notify_sync_complete(const std::string &sync_id) { + std::lock_guard locker{m_lock}; + notify_sync_complete(m_lock, sync_id); +} + +template <typename I> +void InstanceWatcher<I>::notify_sync_complete(const ceph::mutex&, + const std::string &sync_id) { + dout(10) << "sync_id=" << sync_id << dendl; + ceph_assert(ceph_mutex_is_locked(m_lock)); + + auto it = m_inflight_sync_reqs.find(sync_id); + ceph_assert(it != m_inflight_sync_reqs.end()); + + auto sync_ctx = it->second; + ceph_assert(sync_ctx->req == nullptr); + + m_inflight_sync_reqs.erase(it); + m_work_queue->queue(sync_ctx, 0); +} + +template <typename I> +void InstanceWatcher<I>::handle_notify_sync_request(C_SyncRequest *sync_ctx, + int r) { + dout(10) << "sync_id=" << sync_ctx->sync_id << ", r=" << r << dendl; + + Context *on_start = nullptr; + { + std::lock_guard locker{m_lock}; + ceph_assert(sync_ctx->req != nullptr); + ceph_assert(sync_ctx->on_start != nullptr); + + if (sync_ctx->req->canceling) { + r = -ECANCELED; + } + + std::swap(sync_ctx->on_start, on_start); + sync_ctx->req = nullptr; + + if (r == -ECANCELED) { + notify_sync_complete(m_lock, sync_ctx->sync_id); + } + } + + on_start->complete(r == -ECANCELED ? r : 0); +} + +template <typename I> +void InstanceWatcher<I>::handle_notify_sync_complete(C_SyncRequest *sync_ctx, + int r) { + dout(10) << "sync_id=" << sync_ctx->sync_id << ", r=" << r << dendl; + + if (sync_ctx->on_complete != nullptr) { + sync_ctx->on_complete->complete(r); + } +} + +template <typename I> +void InstanceWatcher<I>::handle_acquire_leader() { + dout(10) << dendl; + + std::lock_guard locker{m_lock}; + + m_leader_instance_id = m_instance_id; + unsuspend_notify_requests(); +} + +template <typename I> +void InstanceWatcher<I>::handle_release_leader() { + dout(10) << dendl; + + std::lock_guard locker{m_lock}; + + m_leader_instance_id.clear(); + + m_image_sync_throttler->drain(m_ioctx.get_namespace(), -ESTALE); +} + +template <typename I> +void InstanceWatcher<I>::handle_update_leader( + const std::string &leader_instance_id) { + dout(10) << "leader_instance_id=" << leader_instance_id << dendl; + + std::lock_guard locker{m_lock}; + + m_leader_instance_id = leader_instance_id; + + if (!m_leader_instance_id.empty()) { + unsuspend_notify_requests(); + } +} + +template <typename I> +void InstanceWatcher<I>::cancel_notify_requests( + const std::string &instance_id) { + dout(10) << "instance_id=" << instance_id << dendl; + + std::lock_guard locker{m_lock}; + + for (auto op : m_notify_ops) { + if (op.first == instance_id && !op.second->send_to_leader) { + op.second->cancel(); + } + } +} + +template <typename I> +void InstanceWatcher<I>::register_instance() { + ceph_assert(ceph_mutex_is_locked(m_lock)); + + dout(10) << dendl; + + librados::ObjectWriteOperation op; + librbd::cls_client::mirror_instances_add(&op, m_instance_id); + librados::AioCompletion *aio_comp = create_rados_callback< + InstanceWatcher<I>, &InstanceWatcher<I>::handle_register_instance>(this); + + int r = m_ioctx.aio_operate(RBD_MIRROR_LEADER, aio_comp, &op); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void InstanceWatcher<I>::handle_register_instance(int r) { + dout(10) << "r=" << r << dendl; + + Context *on_finish = nullptr; + { + std::lock_guard locker{m_lock}; + + if (r == 0) { + create_instance_object(); + return; + } + + derr << "error registering instance: " << cpp_strerror(r) << dendl; + + std::swap(on_finish, m_on_finish); + } + on_finish->complete(r); +} + + +template <typename I> +void InstanceWatcher<I>::create_instance_object() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + + librados::ObjectWriteOperation op; + op.create(true); + + librados::AioCompletion *aio_comp = create_rados_callback< + InstanceWatcher<I>, + &InstanceWatcher<I>::handle_create_instance_object>(this); + int r = m_ioctx.aio_operate(m_oid, aio_comp, &op); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void InstanceWatcher<I>::handle_create_instance_object(int r) { + dout(10) << "r=" << r << dendl; + + std::lock_guard locker{m_lock}; + + if (r < 0) { + derr << "error creating " << m_oid << " object: " << cpp_strerror(r) + << dendl; + + m_ret_val = r; + unregister_instance(); + return; + } + + register_watch(); +} + +template <typename I> +void InstanceWatcher<I>::register_watch() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback< + InstanceWatcher<I>, &InstanceWatcher<I>::handle_register_watch>(this)); + + librbd::Watcher::register_watch(ctx); +} + +template <typename I> +void InstanceWatcher<I>::handle_register_watch(int r) { + dout(10) << "r=" << r << dendl; + + std::lock_guard locker{m_lock}; + + if (r < 0) { + derr << "error registering instance watcher for " << m_oid << " object: " + << cpp_strerror(r) << dendl; + + m_ret_val = r; + remove_instance_object(); + return; + } + + acquire_lock(); +} + +template <typename I> +void InstanceWatcher<I>::acquire_lock() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback< + InstanceWatcher<I>, &InstanceWatcher<I>::handle_acquire_lock>(this)); + + m_instance_lock->acquire_lock(ctx); +} + +template <typename I> +void InstanceWatcher<I>::handle_acquire_lock(int r) { + dout(10) << "r=" << r << dendl; + + Context *on_finish = nullptr; + { + std::lock_guard locker{m_lock}; + + if (r < 0) { + + derr << "error acquiring instance lock: " << cpp_strerror(r) << dendl; + + m_ret_val = r; + unregister_watch(); + return; + } + + std::swap(on_finish, m_on_finish); + } + + on_finish->complete(r); +} + +template <typename I> +void InstanceWatcher<I>::release_lock() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback< + InstanceWatcher<I>, &InstanceWatcher<I>::handle_release_lock>(this)); + + m_instance_lock->shut_down(ctx); +} + +template <typename I> +void InstanceWatcher<I>::handle_release_lock(int r) { + dout(10) << "r=" << r << dendl; + + std::lock_guard locker{m_lock}; + + if (r < 0) { + derr << "error releasing instance lock: " << cpp_strerror(r) << dendl; + } + + unregister_watch(); +} + +template <typename I> +void InstanceWatcher<I>::unregister_watch() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback< + InstanceWatcher<I>, &InstanceWatcher<I>::handle_unregister_watch>(this)); + + librbd::Watcher::unregister_watch(ctx); +} + +template <typename I> +void InstanceWatcher<I>::handle_unregister_watch(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "error unregistering instance watcher for " << m_oid << " object: " + << cpp_strerror(r) << dendl; + } + + std::lock_guard locker{m_lock}; + remove_instance_object(); +} + +template <typename I> +void InstanceWatcher<I>::remove_instance_object() { + ceph_assert(ceph_mutex_is_locked(m_lock)); + + dout(10) << dendl; + + librados::ObjectWriteOperation op; + op.remove(); + + librados::AioCompletion *aio_comp = create_rados_callback< + InstanceWatcher<I>, + &InstanceWatcher<I>::handle_remove_instance_object>(this); + int r = m_ioctx.aio_operate(m_oid, aio_comp, &op); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void InstanceWatcher<I>::handle_remove_instance_object(int r) { + dout(10) << "r=" << r << dendl; + + if (r == -ENOENT) { + r = 0; + } + + if (r < 0) { + derr << "error removing " << m_oid << " object: " << cpp_strerror(r) + << dendl; + } + + std::lock_guard locker{m_lock}; + unregister_instance(); +} + +template <typename I> +void InstanceWatcher<I>::unregister_instance() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + + librados::ObjectWriteOperation op; + librbd::cls_client::mirror_instances_remove(&op, m_instance_id); + librados::AioCompletion *aio_comp = create_rados_callback< + InstanceWatcher<I>, &InstanceWatcher<I>::handle_unregister_instance>(this); + + int r = m_ioctx.aio_operate(RBD_MIRROR_LEADER, aio_comp, &op); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void InstanceWatcher<I>::handle_unregister_instance(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "error unregistering instance: " << cpp_strerror(r) << dendl; + } + + std::lock_guard locker{m_lock}; + wait_for_notify_ops(); +} + +template <typename I> +void InstanceWatcher<I>::wait_for_notify_ops() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + + for (auto op : m_notify_ops) { + op.second->cancel(); + } + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback< + InstanceWatcher<I>, &InstanceWatcher<I>::handle_wait_for_notify_ops>(this)); + + m_notify_op_tracker.wait_for_ops(ctx); +} + +template <typename I> +void InstanceWatcher<I>::handle_wait_for_notify_ops(int r) { + dout(10) << "r=" << r << dendl; + + ceph_assert(r == 0); + + Context *on_finish = nullptr; + { + std::lock_guard locker{m_lock}; + + ceph_assert(m_notify_ops.empty()); + + std::swap(on_finish, m_on_finish); + r = m_ret_val; + } + on_finish->complete(r); +} + +template <typename I> +void InstanceWatcher<I>::get_instance_locker() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback< + InstanceWatcher<I>, &InstanceWatcher<I>::handle_get_instance_locker>(this)); + + m_instance_lock->get_locker(&m_instance_locker, ctx); +} + +template <typename I> +void InstanceWatcher<I>::handle_get_instance_locker(int r) { + dout(10) << "r=" << r << dendl; + + std::lock_guard locker{m_lock}; + + if (r < 0) { + if (r != -ENOENT) { + derr << "error retrieving instance locker: " << cpp_strerror(r) << dendl; + } + remove_instance_object(); + return; + } + + break_instance_lock(); +} + +template <typename I> +void InstanceWatcher<I>::break_instance_lock() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback< + InstanceWatcher<I>, &InstanceWatcher<I>::handle_break_instance_lock>(this)); + + m_instance_lock->break_lock(m_instance_locker, true, ctx); +} + +template <typename I> +void InstanceWatcher<I>::handle_break_instance_lock(int r) { + dout(10) << "r=" << r << dendl; + + std::lock_guard locker{m_lock}; + + if (r < 0) { + if (r != -ENOENT) { + derr << "error breaking instance lock: " << cpp_strerror(r) << dendl; + } + remove_instance_object(); + return; + } + + remove_instance_object(); +} + +template <typename I> +void InstanceWatcher<I>::suspend_notify_request(C_NotifyInstanceRequest *req) { + dout(10) << req << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + + auto result = m_suspended_ops.insert(req).second; + ceph_assert(result); +} + +template <typename I> +bool InstanceWatcher<I>::unsuspend_notify_request( + C_NotifyInstanceRequest *req) { + dout(10) << req << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + + auto result = m_suspended_ops.erase(req); + if (result == 0) { + return false; + } + + req->send(); + return true; +} + +template <typename I> +void InstanceWatcher<I>::unsuspend_notify_requests() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + + std::set<C_NotifyInstanceRequest *> suspended_ops; + std::swap(m_suspended_ops, suspended_ops); + + for (auto op : suspended_ops) { + op->send(); + } +} + +template <typename I> +Context *InstanceWatcher<I>::prepare_request(const std::string &instance_id, + uint64_t request_id, + C_NotifyAck *on_notify_ack) { + dout(10) << "instance_id=" << instance_id << ", request_id=" << request_id + << dendl; + + std::lock_guard locker{m_lock}; + + Context *ctx = nullptr; + Request request(instance_id, request_id); + auto it = m_requests.find(request); + + if (it != m_requests.end()) { + dout(10) << "duplicate for in-progress request" << dendl; + delete it->on_notify_ack; + m_requests.erase(it); + } else { + ctx = create_async_context_callback( + m_work_queue, new LambdaContext( + [this, instance_id, request_id] (int r) { + complete_request(instance_id, request_id, r); + })); + } + + request.on_notify_ack = on_notify_ack; + m_requests.insert(request); + return ctx; +} + +template <typename I> +void InstanceWatcher<I>::complete_request(const std::string &instance_id, + uint64_t request_id, int r) { + dout(10) << "instance_id=" << instance_id << ", request_id=" << request_id + << dendl; + + C_NotifyAck *on_notify_ack; + { + std::lock_guard locker{m_lock}; + Request request(instance_id, request_id); + auto it = m_requests.find(request); + ceph_assert(it != m_requests.end()); + on_notify_ack = it->on_notify_ack; + m_requests.erase(it); + } + + encode(NotifyAckPayload(instance_id, request_id, r), on_notify_ack->out); + on_notify_ack->complete(0); +} + +template <typename I> +void InstanceWatcher<I>::handle_notify(uint64_t notify_id, uint64_t handle, + uint64_t notifier_id, bufferlist &bl) { + dout(10) << "notify_id=" << notify_id << ", handle=" << handle << ", " + << "notifier_id=" << notifier_id << dendl; + + auto ctx = new C_NotifyAck(this, notify_id, handle); + + NotifyMessage notify_message; + try { + auto iter = bl.cbegin(); + decode(notify_message, iter); + } catch (const buffer::error &err) { + derr << "error decoding image notification: " << err.what() << dendl; + ctx->complete(0); + return; + } + + apply_visitor(HandlePayloadVisitor(this, stringify(notifier_id), ctx), + notify_message.payload); +} + +template <typename I> +void InstanceWatcher<I>::handle_image_acquire( + const std::string &global_image_id, Context *on_finish) { + dout(10) << "global_image_id=" << global_image_id << dendl; + + auto ctx = new LambdaContext( + [this, global_image_id, on_finish] (int r) { + m_instance_replayer->acquire_image(this, global_image_id, on_finish); + m_notify_op_tracker.finish_op(); + }); + + m_notify_op_tracker.start_op(); + m_work_queue->queue(ctx, 0); +} + +template <typename I> +void InstanceWatcher<I>::handle_image_release( + const std::string &global_image_id, Context *on_finish) { + dout(10) << "global_image_id=" << global_image_id << dendl; + + auto ctx = new LambdaContext( + [this, global_image_id, on_finish] (int r) { + m_instance_replayer->release_image(global_image_id, on_finish); + m_notify_op_tracker.finish_op(); + }); + + m_notify_op_tracker.start_op(); + m_work_queue->queue(ctx, 0); +} + +template <typename I> +void InstanceWatcher<I>::handle_peer_image_removed( + const std::string &global_image_id, const std::string &peer_mirror_uuid, + Context *on_finish) { + dout(10) << "global_image_id=" << global_image_id << ", " + << "peer_mirror_uuid=" << peer_mirror_uuid << dendl; + + auto ctx = new LambdaContext( + [this, peer_mirror_uuid, global_image_id, on_finish] (int r) { + m_instance_replayer->remove_peer_image(global_image_id, + peer_mirror_uuid, on_finish); + m_notify_op_tracker.finish_op(); + }); + + m_notify_op_tracker.start_op(); + m_work_queue->queue(ctx, 0); +} + +template <typename I> +void InstanceWatcher<I>::handle_sync_request(const std::string &instance_id, + const std::string &sync_id, + Context *on_finish) { + dout(10) << "instance_id=" << instance_id << ", sync_id=" << sync_id << dendl; + + std::lock_guard locker{m_lock}; + + if (!is_leader()) { + dout(10) << "sync request for non-leader" << dendl; + m_work_queue->queue(on_finish, -ESTALE); + return; + } + + Context *on_start = create_async_context_callback( + m_work_queue, new LambdaContext( + [this, instance_id, sync_id, on_finish] (int r) { + dout(10) << "handle_sync_request: finish: instance_id=" << instance_id + << ", sync_id=" << sync_id << ", r=" << r << dendl; + if (r == 0) { + notify_sync_start(instance_id, sync_id); + } + if (r == -ENOENT) { + r = 0; + } + on_finish->complete(r); + })); + m_image_sync_throttler->start_op(m_ioctx.get_namespace(), sync_id, on_start); +} + +template <typename I> +void InstanceWatcher<I>::handle_sync_start(const std::string &instance_id, + const std::string &sync_id, + Context *on_finish) { + dout(10) << "instance_id=" << instance_id << ", sync_id=" << sync_id << dendl; + + std::lock_guard locker{m_lock}; + + auto it = m_inflight_sync_reqs.find(sync_id); + if (it == m_inflight_sync_reqs.end()) { + dout(5) << "not found" << dendl; + m_work_queue->queue(on_finish, 0); + return; + } + + auto sync_ctx = it->second; + + if (sync_ctx->on_complete != nullptr) { + dout(5) << "duplicate request" << dendl; + m_work_queue->queue(sync_ctx->on_complete, -ESTALE); + } + + sync_ctx->on_complete = on_finish; +} + +template <typename I> +void InstanceWatcher<I>::handle_payload(const std::string &instance_id, + const ImageAcquirePayload &payload, + C_NotifyAck *on_notify_ack) { + dout(10) << "image_acquire: instance_id=" << instance_id << ", " + << "request_id=" << payload.request_id << dendl; + + auto on_finish = prepare_request(instance_id, payload.request_id, + on_notify_ack); + if (on_finish != nullptr) { + handle_image_acquire(payload.global_image_id, on_finish); + } +} + +template <typename I> +void InstanceWatcher<I>::handle_payload(const std::string &instance_id, + const ImageReleasePayload &payload, + C_NotifyAck *on_notify_ack) { + dout(10) << "image_release: instance_id=" << instance_id << ", " + << "request_id=" << payload.request_id << dendl; + + auto on_finish = prepare_request(instance_id, payload.request_id, + on_notify_ack); + if (on_finish != nullptr) { + handle_image_release(payload.global_image_id, on_finish); + } +} + +template <typename I> +void InstanceWatcher<I>::handle_payload(const std::string &instance_id, + const PeerImageRemovedPayload &payload, + C_NotifyAck *on_notify_ack) { + dout(10) << "remove_peer_image: instance_id=" << instance_id << ", " + << "request_id=" << payload.request_id << dendl; + + auto on_finish = prepare_request(instance_id, payload.request_id, + on_notify_ack); + if (on_finish != nullptr) { + handle_peer_image_removed(payload.global_image_id, payload.peer_mirror_uuid, + on_finish); + } +} + +template <typename I> +void InstanceWatcher<I>::handle_payload(const std::string &instance_id, + const SyncRequestPayload &payload, + C_NotifyAck *on_notify_ack) { + dout(10) << "sync_request: instance_id=" << instance_id << ", " + << "request_id=" << payload.request_id << dendl; + + auto on_finish = prepare_request(instance_id, payload.request_id, + on_notify_ack); + if (on_finish == nullptr) { + return; + } + + handle_sync_request(instance_id, payload.sync_id, on_finish); +} + +template <typename I> +void InstanceWatcher<I>::handle_payload(const std::string &instance_id, + const SyncStartPayload &payload, + C_NotifyAck *on_notify_ack) { + dout(10) << "sync_start: instance_id=" << instance_id << ", " + << "request_id=" << payload.request_id << dendl; + + auto on_finish = prepare_request(instance_id, payload.request_id, + on_notify_ack); + if (on_finish == nullptr) { + return; + } + + handle_sync_start(instance_id, payload.sync_id, on_finish); +} + +template <typename I> +void InstanceWatcher<I>::handle_payload(const std::string &instance_id, + const UnknownPayload &payload, + C_NotifyAck *on_notify_ack) { + dout(5) << "unknown: instance_id=" << instance_id << dendl; + + on_notify_ack->complete(0); +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::InstanceWatcher<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/InstanceWatcher.h b/src/tools/rbd_mirror/InstanceWatcher.h new file mode 100644 index 000000000..08e40b40b --- /dev/null +++ b/src/tools/rbd_mirror/InstanceWatcher.h @@ -0,0 +1,269 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_INSTANCE_WATCHER_H +#define CEPH_RBD_MIRROR_INSTANCE_WATCHER_H + +#include <map> +#include <memory> +#include <set> +#include <string> +#include <vector> + +#include "common/AsyncOpTracker.h" +#include "librbd/Watcher.h" +#include "librbd/managed_lock/Types.h" +#include "tools/rbd_mirror/instance_watcher/Types.h" + +namespace librbd { + +class AsioEngine; +class ImageCtx; +template <typename> class ManagedLock; + +} // namespace librbd + +namespace rbd { +namespace mirror { + +template <typename> class InstanceReplayer; +template <typename> class Throttler; +template <typename> struct Threads; + +template <typename ImageCtxT = librbd::ImageCtx> +class InstanceWatcher : protected librbd::Watcher { + using librbd::Watcher::unregister_watch; // Silence overloaded virtual warning +public: + static void get_instances(librados::IoCtx &io_ctx, + std::vector<std::string> *instance_ids, + Context *on_finish); + static void remove_instance(librados::IoCtx &io_ctx, + librbd::AsioEngine& asio_engine, + const std::string &instance_id, + Context *on_finish); + + static InstanceWatcher *create( + librados::IoCtx &io_ctx, librbd::AsioEngine& asio_engine, + InstanceReplayer<ImageCtxT> *instance_replayer, + Throttler<ImageCtxT> *image_sync_throttler); + void destroy() { + delete this; + } + + InstanceWatcher(librados::IoCtx &io_ctx, librbd::AsioEngine& asio_engine, + InstanceReplayer<ImageCtxT> *instance_replayer, + Throttler<ImageCtxT> *image_sync_throttler, + const std::string &instance_id); + ~InstanceWatcher() override; + + inline std::string &get_instance_id() { + return m_instance_id; + } + + int init(); + void shut_down(); + + void init(Context *on_finish); + void shut_down(Context *on_finish); + void remove(Context *on_finish); + + void notify_image_acquire(const std::string &instance_id, + const std::string &global_image_id, + Context *on_notify_ack); + void notify_image_release(const std::string &instance_id, + const std::string &global_image_id, + Context *on_notify_ack); + void notify_peer_image_removed(const std::string &instance_id, + const std::string &global_image_id, + const std::string &peer_mirror_uuid, + Context *on_notify_ack); + + void notify_sync_request(const std::string &sync_id, Context *on_sync_start); + bool cancel_sync_request(const std::string &sync_id); + void notify_sync_complete(const std::string &sync_id); + + void cancel_notify_requests(const std::string &instance_id); + + void handle_acquire_leader(); + void handle_release_leader(); + void handle_update_leader(const std::string &leader_instance_id); + +private: + /** + * @verbatim + * + * BREAK_INSTANCE_LOCK -------\ + * ^ | + * | (error) | + * GET_INSTANCE_LOCKER * * *>| + * ^ (remove) | + * | | + * <uninitialized> <----------------+---- WAIT_FOR_NOTIFY_OPS + * | (init) ^ | ^ + * v (error) * | | + * REGISTER_INSTANCE * * * * * *|* *> UNREGISTER_INSTANCE + * | * | ^ + * v (error) * v | + * CREATE_INSTANCE_OBJECT * * * * * *> REMOVE_INSTANCE_OBJECT + * | * ^ + * v (error) * | + * REGISTER_WATCH * * * * * * * * * *> UNREGISTER_WATCH + * | * ^ + * v (error) * | + * ACQUIRE_LOCK * * * * * * * * * * * RELEASE_LOCK + * | ^ + * v (shut_down) | + * <watching> -------------------------------/ + * + * @endverbatim + */ + + struct C_NotifyInstanceRequest; + struct C_SyncRequest; + + typedef std::pair<std::string, std::string> Id; + + struct HandlePayloadVisitor : public boost::static_visitor<void> { + InstanceWatcher *instance_watcher; + std::string instance_id; + C_NotifyAck *on_notify_ack; + + HandlePayloadVisitor(InstanceWatcher *instance_watcher, + const std::string &instance_id, + C_NotifyAck *on_notify_ack) + : instance_watcher(instance_watcher), instance_id(instance_id), + on_notify_ack(on_notify_ack) { + } + + template <typename Payload> + inline void operator()(const Payload &payload) const { + instance_watcher->handle_payload(instance_id, payload, on_notify_ack); + } + }; + + struct Request { + std::string instance_id; + uint64_t request_id; + C_NotifyAck *on_notify_ack = nullptr; + + Request(const std::string &instance_id, uint64_t request_id) + : instance_id(instance_id), request_id(request_id) { + } + + inline bool operator<(const Request &rhs) const { + return instance_id < rhs.instance_id || + (instance_id == rhs.instance_id && request_id < rhs.request_id); + } + }; + + Threads<ImageCtxT> *m_threads; + InstanceReplayer<ImageCtxT> *m_instance_replayer; + Throttler<ImageCtxT> *m_image_sync_throttler; + std::string m_instance_id; + + mutable ceph::mutex m_lock; + librbd::ManagedLock<ImageCtxT> *m_instance_lock; + Context *m_on_finish = nullptr; + int m_ret_val = 0; + std::string m_leader_instance_id; + librbd::managed_lock::Locker m_instance_locker; + std::set<std::pair<std::string, C_NotifyInstanceRequest *>> m_notify_ops; + AsyncOpTracker m_notify_op_tracker; + uint64_t m_request_seq = 0; + std::set<Request> m_requests; + std::set<C_NotifyInstanceRequest *> m_suspended_ops; + std::map<std::string, C_SyncRequest *> m_inflight_sync_reqs; + + inline bool is_leader() const { + return m_leader_instance_id == m_instance_id; + } + + void register_instance(); + void handle_register_instance(int r); + + void create_instance_object(); + void handle_create_instance_object(int r); + + void register_watch(); + void handle_register_watch(int r); + + void acquire_lock(); + void handle_acquire_lock(int r); + + void release_lock(); + void handle_release_lock(int r); + + void unregister_watch(); + void handle_unregister_watch(int r); + + void remove_instance_object(); + void handle_remove_instance_object(int r); + + void unregister_instance(); + void handle_unregister_instance(int r); + + void wait_for_notify_ops(); + void handle_wait_for_notify_ops(int r); + + void get_instance_locker(); + void handle_get_instance_locker(int r); + + void break_instance_lock(); + void handle_break_instance_lock(int r); + + void suspend_notify_request(C_NotifyInstanceRequest *req); + bool unsuspend_notify_request(C_NotifyInstanceRequest *req); + void unsuspend_notify_requests(); + + void notify_sync_complete(const ceph::mutex& lock, const std::string &sync_id); + void handle_notify_sync_request(C_SyncRequest *sync_ctx, int r); + void handle_notify_sync_complete(C_SyncRequest *sync_ctx, int r); + + void notify_sync_start(const std::string &instance_id, + const std::string &sync_id); + + Context *prepare_request(const std::string &instance_id, uint64_t request_id, + C_NotifyAck *on_notify_ack); + void complete_request(const std::string &instance_id, uint64_t request_id, + int r); + + void handle_notify(uint64_t notify_id, uint64_t handle, + uint64_t notifier_id, bufferlist &bl) override; + + void handle_image_acquire(const std::string &global_image_id, + Context *on_finish); + void handle_image_release(const std::string &global_image_id, + Context *on_finish); + void handle_peer_image_removed(const std::string &global_image_id, + const std::string &peer_mirror_uuid, + Context *on_finish); + + void handle_sync_request(const std::string &instance_id, + const std::string &sync_id, Context *on_finish); + void handle_sync_start(const std::string &instance_id, + const std::string &sync_id, Context *on_finish); + + void handle_payload(const std::string &instance_id, + const instance_watcher::ImageAcquirePayload &payload, + C_NotifyAck *on_notify_ack); + void handle_payload(const std::string &instance_id, + const instance_watcher::ImageReleasePayload &payload, + C_NotifyAck *on_notify_ack); + void handle_payload(const std::string &instance_id, + const instance_watcher::PeerImageRemovedPayload &payload, + C_NotifyAck *on_notify_ack); + void handle_payload(const std::string &instance_id, + const instance_watcher::SyncRequestPayload &payload, + C_NotifyAck *on_notify_ack); + void handle_payload(const std::string &instance_id, + const instance_watcher::SyncStartPayload &payload, + C_NotifyAck *on_notify_ack); + void handle_payload(const std::string &instance_id, + const instance_watcher::UnknownPayload &payload, + C_NotifyAck *on_notify_ack); +}; + +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_INSTANCE_WATCHER_H diff --git a/src/tools/rbd_mirror/Instances.cc b/src/tools/rbd_mirror/Instances.cc new file mode 100644 index 000000000..ca291bb5f --- /dev/null +++ b/src/tools/rbd_mirror/Instances.cc @@ -0,0 +1,356 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "include/stringify.h" +#include "common/Timer.h" +#include "common/debug.h" +#include "common/errno.h" +#include "librbd/Utils.h" +#include "librbd/asio/ContextWQ.h" +#include "InstanceWatcher.h" +#include "Instances.h" +#include "Threads.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::Instances: " \ + << this << " " << __func__ << ": " + +namespace rbd { +namespace mirror { + +using librbd::util::create_async_context_callback; +using librbd::util::create_context_callback; +using librbd::util::create_rados_callback; + +template <typename I> +Instances<I>::Instances(Threads<I> *threads, librados::IoCtx &ioctx, + const std::string& instance_id, + instances::Listener& listener) : + m_threads(threads), m_ioctx(ioctx), m_instance_id(instance_id), + m_listener(listener), m_cct(reinterpret_cast<CephContext *>(ioctx.cct())), + m_lock(ceph::make_mutex("rbd::mirror::Instances " + ioctx.get_pool_name())) { +} + +template <typename I> +Instances<I>::~Instances() { +} + +template <typename I> +void Instances<I>::init(Context *on_finish) { + dout(10) << dendl; + + std::lock_guard locker{m_lock}; + ceph_assert(m_on_finish == nullptr); + m_on_finish = on_finish; + get_instances(); +} + +template <typename I> +void Instances<I>::shut_down(Context *on_finish) { + dout(10) << dendl; + + std::lock_guard locker{m_lock}; + ceph_assert(m_on_finish == nullptr); + m_on_finish = on_finish; + + Context *ctx = new LambdaContext( + [this](int r) { + std::scoped_lock locker{m_threads->timer_lock, m_lock}; + cancel_remove_task(); + wait_for_ops(); + }); + + m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +void Instances<I>::unblock_listener() { + dout(5) << dendl; + + std::lock_guard locker{m_lock}; + ceph_assert(m_listener_blocked); + m_listener_blocked = false; + + InstanceIds added_instance_ids; + for (auto& pair : m_instances) { + if (pair.second.state == INSTANCE_STATE_ADDING) { + added_instance_ids.push_back(pair.first); + } + } + + if (!added_instance_ids.empty()) { + m_threads->work_queue->queue( + new C_NotifyInstancesAdded(this, added_instance_ids), 0); + } +} + +template <typename I> +void Instances<I>::acked(const InstanceIds& instance_ids) { + dout(10) << "instance_ids=" << instance_ids << dendl; + + std::lock_guard locker{m_lock}; + if (m_on_finish != nullptr) { + dout(5) << "received on shut down, ignoring" << dendl; + return; + } + + Context *ctx = new C_HandleAcked(this, instance_ids); + m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +void Instances<I>::handle_acked(const InstanceIds& instance_ids) { + dout(5) << "instance_ids=" << instance_ids << dendl; + + std::scoped_lock locker{m_threads->timer_lock, m_lock}; + if (m_on_finish != nullptr) { + dout(5) << "handled on shut down, ignoring" << dendl; + return; + } + + InstanceIds added_instance_ids; + auto time = clock_t::now(); + for (auto& instance_id : instance_ids) { + auto &instance = m_instances.insert( + std::make_pair(instance_id, Instance{})).first->second; + instance.acked_time = time; + if (instance.state == INSTANCE_STATE_ADDING) { + added_instance_ids.push_back(instance_id); + } + } + + schedule_remove_task(time); + if (!m_listener_blocked && !added_instance_ids.empty()) { + m_threads->work_queue->queue( + new C_NotifyInstancesAdded(this, added_instance_ids), 0); + } +} + +template <typename I> +void Instances<I>::notify_instances_added(const InstanceIds& instance_ids) { + std::unique_lock locker{m_lock}; + InstanceIds added_instance_ids; + for (auto& instance_id : instance_ids) { + auto it = m_instances.find(instance_id); + if (it != m_instances.end() && it->second.state == INSTANCE_STATE_ADDING) { + added_instance_ids.push_back(instance_id); + } + } + + if (added_instance_ids.empty()) { + return; + } + + dout(5) << "instance_ids=" << added_instance_ids << dendl; + locker.unlock(); + m_listener.handle_added(added_instance_ids); + locker.lock(); + + for (auto& instance_id : added_instance_ids) { + auto it = m_instances.find(instance_id); + if (it != m_instances.end() && it->second.state == INSTANCE_STATE_ADDING) { + it->second.state = INSTANCE_STATE_IDLE; + } + } +} + +template <typename I> +void Instances<I>::notify_instances_removed(const InstanceIds& instance_ids) { + dout(5) << "instance_ids=" << instance_ids << dendl; + m_listener.handle_removed(instance_ids); + + std::lock_guard locker{m_lock}; + for (auto& instance_id : instance_ids) { + m_instances.erase(instance_id); + } +} + +template <typename I> +void Instances<I>::list(std::vector<std::string> *instance_ids) { + dout(20) << dendl; + + std::lock_guard locker{m_lock}; + + for (auto it : m_instances) { + instance_ids->push_back(it.first); + } +} + + +template <typename I> +void Instances<I>::get_instances() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + + Context *ctx = create_context_callback< + Instances, &Instances<I>::handle_get_instances>(this); + + InstanceWatcher<I>::get_instances(m_ioctx, &m_instance_ids, ctx); +} + +template <typename I> +void Instances<I>::handle_get_instances(int r) { + dout(10) << "r=" << r << dendl; + + Context *on_finish = nullptr; + { + std::lock_guard locker{m_lock}; + std::swap(on_finish, m_on_finish); + } + + if (r < 0) { + derr << "error retrieving instances: " << cpp_strerror(r) << dendl; + } else { + handle_acked(m_instance_ids); + } + on_finish->complete(r); +} + +template <typename I> +void Instances<I>::wait_for_ops() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + + Context *ctx = create_async_context_callback( + m_threads->work_queue, create_context_callback< + Instances, &Instances<I>::handle_wait_for_ops>(this)); + + m_async_op_tracker.wait_for_ops(ctx); +} + +template <typename I> +void Instances<I>::handle_wait_for_ops(int r) { + dout(10) << "r=" << r << dendl; + + ceph_assert(r == 0); + + Context *on_finish = nullptr; + { + std::lock_guard locker{m_lock}; + std::swap(on_finish, m_on_finish); + } + on_finish->complete(r); +} + +template <typename I> +void Instances<I>::remove_instances(const Instances<I>::clock_t::time_point& time) { + ceph_assert(ceph_mutex_is_locked(m_lock)); + + InstanceIds instance_ids; + for (auto& instance_pair : m_instances) { + if (instance_pair.first == m_instance_id) { + continue; + } + auto& instance = instance_pair.second; + if (instance.state != INSTANCE_STATE_REMOVING && + instance.acked_time <= time) { + instance.state = INSTANCE_STATE_REMOVING; + instance_ids.push_back(instance_pair.first); + } + } + ceph_assert(!instance_ids.empty()); + + dout(10) << "instance_ids=" << instance_ids << dendl; + Context* ctx = new LambdaContext([this, instance_ids](int r) { + handle_remove_instances(r, instance_ids); + }); + ctx = create_async_context_callback(m_threads->work_queue, ctx); + + auto gather_ctx = new C_Gather(m_cct, ctx); + for (auto& instance_id : instance_ids) { + InstanceWatcher<I>::remove_instance(m_ioctx, *m_threads->asio_engine, + instance_id, gather_ctx->new_sub()); + } + + m_async_op_tracker.start_op(); + gather_ctx->activate(); +} + +template <typename I> +void Instances<I>::handle_remove_instances( + int r, const InstanceIds& instance_ids) { + std::scoped_lock locker{m_threads->timer_lock, m_lock}; + + dout(10) << "r=" << r << ", instance_ids=" << instance_ids << dendl; + ceph_assert(r == 0); + + // fire removed notification now that instances have been blocklisted + m_threads->work_queue->queue( + new C_NotifyInstancesRemoved(this, instance_ids), 0); + + // reschedule the timer for the next batch + schedule_remove_task(clock_t::now()); + m_async_op_tracker.finish_op(); +} + +template <typename I> +void Instances<I>::cancel_remove_task() { + ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock)); + ceph_assert(ceph_mutex_is_locked(m_lock)); + + if (m_timer_task == nullptr) { + return; + } + + dout(10) << dendl; + + bool canceled = m_threads->timer->cancel_event(m_timer_task); + ceph_assert(canceled); + m_timer_task = nullptr; +} + +template <typename I> +void Instances<I>::schedule_remove_task(const Instances<I>::clock_t::time_point& time) { + cancel_remove_task(); + if (m_on_finish != nullptr) { + dout(10) << "received on shut down, ignoring" << dendl; + return; + } + + int after = m_cct->_conf.get_val<uint64_t>("rbd_mirror_leader_heartbeat_interval") * + (1 + m_cct->_conf.get_val<uint64_t>("rbd_mirror_leader_max_missed_heartbeats") + + m_cct->_conf.get_val<uint64_t>("rbd_mirror_leader_max_acquire_attempts_before_break")); + + bool schedule = false; + auto oldest_time = time; + for (auto& instance : m_instances) { + if (instance.first == m_instance_id) { + continue; + } + if (instance.second.state == INSTANCE_STATE_REMOVING) { + // removal is already in-flight + continue; + } + + oldest_time = std::min(oldest_time, instance.second.acked_time); + schedule = true; + } + + if (!schedule) { + return; + } + + dout(10) << dendl; + + // schedule a time to fire when the oldest instance should be removed + m_timer_task = new LambdaContext( + [this, oldest_time](int r) { + ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock)); + std::lock_guard locker{m_lock}; + m_timer_task = nullptr; + + remove_instances(oldest_time); + }); + + oldest_time += ceph::make_timespan(after); + m_threads->timer->add_event_at(oldest_time, m_timer_task); +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::Instances<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/Instances.h b/src/tools/rbd_mirror/Instances.h new file mode 100644 index 000000000..e6e104b73 --- /dev/null +++ b/src/tools/rbd_mirror/Instances.h @@ -0,0 +1,168 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_INSTANCES_H +#define CEPH_RBD_MIRROR_INSTANCES_H + +#include <map> +#include <vector> + +#include "include/buffer_fwd.h" +#include "include/rados/librados_fwd.hpp" +#include "common/AsyncOpTracker.h" +#include "common/ceph_mutex.h" +#include "librbd/Watcher.h" +#include "tools/rbd_mirror/instances/Types.h" + +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { + +template <typename> struct Threads; + +template <typename ImageCtxT = librbd::ImageCtx> +class Instances { +public: + typedef std::vector<std::string> InstanceIds; + + static Instances *create(Threads<ImageCtxT> *threads, + librados::IoCtx &ioctx, + const std::string& instance_id, + instances::Listener& listener) { + return new Instances(threads, ioctx, instance_id, listener); + } + void destroy() { + delete this; + } + + Instances(Threads<ImageCtxT> *threads, librados::IoCtx &ioctx, + const std::string& instance_id, instances::Listener& listener); + virtual ~Instances(); + + void init(Context *on_finish); + void shut_down(Context *on_finish); + + void unblock_listener(); + + void acked(const InstanceIds& instance_ids); + + void list(std::vector<std::string> *instance_ids); + +private: + /** + * @verbatim + * + * <uninitialized> <---------------------\ + * | (init) ^ | + * v (error) * | + * GET_INSTANCES * * * * * WAIT_FOR_OPS + * | ^ + * v (shut_down) | + * <initialized> ------------------------/ + * . + * . (remove_instance) + * v + * REMOVE_INSTANCE + * + * @endverbatim + */ + + enum InstanceState { + INSTANCE_STATE_ADDING, + INSTANCE_STATE_IDLE, + INSTANCE_STATE_REMOVING + }; + + using clock_t = ceph::real_clock; + struct Instance { + clock_t::time_point acked_time{}; + InstanceState state = INSTANCE_STATE_ADDING; + }; + + struct C_NotifyBase : public Context { + Instances *instances; + InstanceIds instance_ids; + + C_NotifyBase(Instances *instances, const InstanceIds& instance_ids) + : instances(instances), instance_ids(instance_ids) { + instances->m_async_op_tracker.start_op(); + } + + void finish(int r) override { + execute(); + instances->m_async_op_tracker.finish_op(); + } + + virtual void execute() = 0; + }; + + struct C_HandleAcked : public C_NotifyBase { + C_HandleAcked(Instances *instances, const InstanceIds& instance_ids) + : C_NotifyBase(instances, instance_ids) { + } + + void execute() override { + this->instances->handle_acked(this->instance_ids); + } + }; + + struct C_NotifyInstancesAdded : public C_NotifyBase { + C_NotifyInstancesAdded(Instances *instances, + const InstanceIds& instance_ids) + : C_NotifyBase(instances, instance_ids) { + } + + void execute() override { + this->instances->notify_instances_added(this->instance_ids); + } + }; + + struct C_NotifyInstancesRemoved : public C_NotifyBase { + C_NotifyInstancesRemoved(Instances *instances, + const InstanceIds& instance_ids) + : C_NotifyBase(instances, instance_ids) { + } + + void execute() override { + this->instances->notify_instances_removed(this->instance_ids); + } + }; + + Threads<ImageCtxT> *m_threads; + librados::IoCtx &m_ioctx; + std::string m_instance_id; + instances::Listener& m_listener; + CephContext *m_cct; + + ceph::mutex m_lock; + InstanceIds m_instance_ids; + std::map<std::string, Instance> m_instances; + Context *m_on_finish = nullptr; + AsyncOpTracker m_async_op_tracker; + + Context *m_timer_task = nullptr; + + bool m_listener_blocked = true; + + void handle_acked(const InstanceIds& instance_ids); + void notify_instances_added(const InstanceIds& instance_ids); + void notify_instances_removed(const InstanceIds& instance_ids); + + void get_instances(); + void handle_get_instances(int r); + + void wait_for_ops(); + void handle_wait_for_ops(int r); + + void remove_instances(const clock_t::time_point& time); + void handle_remove_instances(int r, const InstanceIds& instance_ids); + + void cancel_remove_task(); + void schedule_remove_task(const clock_t::time_point& time); +}; + +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_INSTANCES_H diff --git a/src/tools/rbd_mirror/LeaderWatcher.cc b/src/tools/rbd_mirror/LeaderWatcher.cc new file mode 100644 index 000000000..8f12af14c --- /dev/null +++ b/src/tools/rbd_mirror/LeaderWatcher.cc @@ -0,0 +1,1069 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "LeaderWatcher.h" +#include "common/Cond.h" +#include "common/Timer.h" +#include "common/debug.h" +#include "common/errno.h" +#include "cls/rbd/cls_rbd_client.h" +#include "include/stringify.h" +#include "librbd/Utils.h" +#include "librbd/asio/ContextWQ.h" +#include "librbd/watcher/Types.h" +#include "Threads.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::LeaderWatcher: " \ + << this << " " << __func__ << ": " +namespace rbd { +namespace mirror { + +using namespace leader_watcher; + +using librbd::util::create_async_context_callback; +using librbd::util::create_context_callback; +using librbd::util::create_rados_callback; + +template <typename I> +LeaderWatcher<I>::LeaderWatcher(Threads<I> *threads, librados::IoCtx &io_ctx, + leader_watcher::Listener *listener) + : Watcher(io_ctx, threads->work_queue, RBD_MIRROR_LEADER), + m_threads(threads), m_listener(listener), m_instances_listener(this), + m_lock(ceph::make_mutex("rbd::mirror::LeaderWatcher " + + io_ctx.get_pool_name())), + m_notifier_id(librados::Rados(io_ctx).get_instance_id()), + m_instance_id(stringify(m_notifier_id)), + m_leader_lock(new LeaderLock(m_ioctx, *m_threads->asio_engine, m_oid, this, + true, m_cct->_conf.get_val<uint64_t>( + "rbd_blocklist_expire_seconds"))) { +} + +template <typename I> +LeaderWatcher<I>::~LeaderWatcher() { + ceph_assert(m_instances == nullptr); + ceph_assert(m_timer_task == nullptr); + + delete m_leader_lock; +} + +template <typename I> +std::string LeaderWatcher<I>::get_instance_id() { + return m_instance_id; +} + +template <typename I> +int LeaderWatcher<I>::init() { + C_SaferCond init_ctx; + init(&init_ctx); + return init_ctx.wait(); +} + +template <typename I> +void LeaderWatcher<I>::init(Context *on_finish) { + dout(10) << "notifier_id=" << m_notifier_id << dendl; + + std::lock_guard locker{m_lock}; + + ceph_assert(m_on_finish == nullptr); + m_on_finish = on_finish; + + create_leader_object(); +} + +template <typename I> +void LeaderWatcher<I>::create_leader_object() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + + librados::ObjectWriteOperation op; + op.create(false); + + librados::AioCompletion *aio_comp = create_rados_callback< + LeaderWatcher<I>, &LeaderWatcher<I>::handle_create_leader_object>(this); + int r = m_ioctx.aio_operate(m_oid, aio_comp, &op); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void LeaderWatcher<I>::handle_create_leader_object(int r) { + dout(10) << "r=" << r << dendl; + + Context *on_finish = nullptr; + { + std::lock_guard locker{m_lock}; + + if (r == 0) { + register_watch(); + return; + } + + derr << "error creating " << m_oid << " object: " << cpp_strerror(r) + << dendl; + + std::swap(on_finish, m_on_finish); + } + on_finish->complete(r); +} + +template <typename I> +void LeaderWatcher<I>::register_watch() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback< + LeaderWatcher<I>, &LeaderWatcher<I>::handle_register_watch>(this)); + + librbd::Watcher::register_watch(ctx); +} + +template <typename I> +void LeaderWatcher<I>::handle_register_watch(int r) { + dout(10) << "r=" << r << dendl; + + Context *on_finish = nullptr; + { + std::lock_guard timer_locker(m_threads->timer_lock); + std::lock_guard locker{m_lock}; + + if (r < 0) { + derr << "error registering leader watcher for " << m_oid << " object: " + << cpp_strerror(r) << dendl; + } else { + schedule_acquire_leader_lock(0); + } + + ceph_assert(m_on_finish != nullptr); + std::swap(on_finish, m_on_finish); + } + + on_finish->complete(r); +} + +template <typename I> +void LeaderWatcher<I>::shut_down() { + C_SaferCond shut_down_ctx; + shut_down(&shut_down_ctx); + int r = shut_down_ctx.wait(); + ceph_assert(r == 0); +} + +template <typename I> +void LeaderWatcher<I>::shut_down(Context *on_finish) { + dout(10) << dendl; + + std::scoped_lock locker{m_threads->timer_lock, m_lock}; + + ceph_assert(m_on_shut_down_finish == nullptr); + m_on_shut_down_finish = on_finish; + cancel_timer_task(); + shut_down_leader_lock(); +} + +template <typename I> +void LeaderWatcher<I>::shut_down_leader_lock() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback< + LeaderWatcher<I>, &LeaderWatcher<I>::handle_shut_down_leader_lock>(this)); + + m_leader_lock->shut_down(ctx); +} + +template <typename I> +void LeaderWatcher<I>::handle_shut_down_leader_lock(int r) { + dout(10) << "r=" << r << dendl; + + std::lock_guard locker{m_lock}; + + if (r < 0) { + derr << "error shutting down leader lock: " << cpp_strerror(r) << dendl; + } + + unregister_watch(); +} + +template <typename I> +void LeaderWatcher<I>::unregister_watch() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback< + LeaderWatcher<I>, &LeaderWatcher<I>::handle_unregister_watch>(this)); + + librbd::Watcher::unregister_watch(ctx); +} + +template <typename I> +void LeaderWatcher<I>::handle_unregister_watch(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "error unregistering leader watcher for " << m_oid << " object: " + << cpp_strerror(r) << dendl; + } + wait_for_tasks(); +} + +template <typename I> +void LeaderWatcher<I>::wait_for_tasks() { + dout(10) << dendl; + + std::scoped_lock locker{m_threads->timer_lock, m_lock}; + schedule_timer_task("wait for tasks", 0, false, + &LeaderWatcher<I>::handle_wait_for_tasks, true); +} + +template <typename I> +void LeaderWatcher<I>::handle_wait_for_tasks() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock)); + ceph_assert(ceph_mutex_is_locked(m_lock)); + ceph_assert(m_on_shut_down_finish != nullptr); + + ceph_assert(!m_timer_op_tracker.empty()); + m_timer_op_tracker.finish_op(); + + auto ctx = new LambdaContext([this](int r) { + Context *on_finish; + { + // ensure lock isn't held when completing shut down + std::lock_guard locker{m_lock}; + ceph_assert(m_on_shut_down_finish != nullptr); + on_finish = m_on_shut_down_finish; + } + on_finish->complete(0); + }); + m_work_queue->queue(ctx, 0); +} + +template <typename I> +bool LeaderWatcher<I>::is_blocklisted() const { + std::lock_guard locker{m_lock}; + return m_blocklisted; +} + +template <typename I> +bool LeaderWatcher<I>::is_leader() const { + std::lock_guard locker{m_lock}; + return is_leader(m_lock); +} + +template <typename I> +bool LeaderWatcher<I>::is_leader(ceph::mutex &lock) const { + ceph_assert(ceph_mutex_is_locked(m_lock)); + + bool leader = m_leader_lock->is_leader(); + dout(10) << leader << dendl; + return leader; +} + +template <typename I> +bool LeaderWatcher<I>::is_releasing_leader() const { + std::lock_guard locker{m_lock}; + return is_releasing_leader(m_lock); +} + +template <typename I> +bool LeaderWatcher<I>::is_releasing_leader(ceph::mutex &lock) const { + ceph_assert(ceph_mutex_is_locked(m_lock)); + + bool releasing = m_leader_lock->is_releasing_leader(); + dout(10) << releasing << dendl; + return releasing; +} + +template <typename I> +bool LeaderWatcher<I>::get_leader_instance_id(std::string *instance_id) const { + dout(10) << dendl; + + std::lock_guard locker{m_lock}; + + if (is_leader(m_lock) || is_releasing_leader(m_lock)) { + *instance_id = m_instance_id; + return true; + } + + if (!m_locker.cookie.empty()) { + *instance_id = stringify(m_locker.entity.num()); + return true; + } + + return false; +} + +template <typename I> +void LeaderWatcher<I>::release_leader() { + dout(10) << dendl; + + std::lock_guard locker{m_lock}; + if (!is_leader(m_lock)) { + return; + } + + release_leader_lock(); +} + +template <typename I> +void LeaderWatcher<I>::list_instances(std::vector<std::string> *instance_ids) { + dout(10) << dendl; + + std::lock_guard locker{m_lock}; + + instance_ids->clear(); + if (m_instances != nullptr) { + m_instances->list(instance_ids); + } +} + +template <typename I> +void LeaderWatcher<I>::cancel_timer_task() { + ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock)); + ceph_assert(ceph_mutex_is_locked(m_lock)); + + if (m_timer_task == nullptr) { + return; + } + + dout(10) << m_timer_task << dendl; + bool canceled = m_threads->timer->cancel_event(m_timer_task); + ceph_assert(canceled); + m_timer_task = nullptr; +} + +template <typename I> +void LeaderWatcher<I>::schedule_timer_task(const std::string &name, + int delay_factor, bool leader, + TimerCallback timer_callback, + bool shutting_down) { + ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock)); + ceph_assert(ceph_mutex_is_locked(m_lock)); + + if (!shutting_down && m_on_shut_down_finish != nullptr) { + return; + } + + cancel_timer_task(); + + m_timer_task = new LambdaContext( + [this, leader, timer_callback](int r) { + ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock)); + m_timer_task = nullptr; + + if (m_timer_op_tracker.empty()) { + std::lock_guard locker{m_lock}; + execute_timer_task(leader, timer_callback); + return; + } + + // old timer task is still running -- do not start next + // task until the previous task completes + if (m_timer_gate == nullptr) { + m_timer_gate = new C_TimerGate(this); + m_timer_op_tracker.wait_for_ops(m_timer_gate); + } + m_timer_gate->leader = leader; + m_timer_gate->timer_callback = timer_callback; + }); + + int after = delay_factor * m_cct->_conf.get_val<uint64_t>( + "rbd_mirror_leader_heartbeat_interval"); + + dout(10) << "scheduling " << name << " after " << after << " sec (task " + << m_timer_task << ")" << dendl; + m_threads->timer->add_event_after(after, m_timer_task); +} + +template <typename I> +void LeaderWatcher<I>::execute_timer_task(bool leader, + TimerCallback timer_callback) { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock)); + ceph_assert(ceph_mutex_is_locked(m_lock)); + ceph_assert(m_timer_op_tracker.empty()); + + if (is_leader(m_lock) != leader) { + return; + } + + m_timer_op_tracker.start_op(); + (this->*timer_callback)(); +} + +template <typename I> +void LeaderWatcher<I>::handle_post_acquire_leader_lock(int r, + Context *on_finish) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + if (r == -EAGAIN) { + dout(10) << "already locked" << dendl; + } else { + derr << "error acquiring leader lock: " << cpp_strerror(r) << dendl; + } + on_finish->complete(r); + return; + } + + std::lock_guard locker{m_lock}; + ceph_assert(m_on_finish == nullptr); + m_on_finish = on_finish; + m_ret_val = 0; + + init_instances(); +} + +template <typename I> +void LeaderWatcher<I>::handle_pre_release_leader_lock(Context *on_finish) { + dout(10) << dendl; + + std::lock_guard locker{m_lock}; + ceph_assert(m_on_finish == nullptr); + m_on_finish = on_finish; + m_ret_val = 0; + + notify_listener(); +} + +template <typename I> +void LeaderWatcher<I>::handle_post_release_leader_lock(int r, + Context *on_finish) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + on_finish->complete(r); + return; + } + + std::lock_guard locker{m_lock}; + ceph_assert(m_on_finish == nullptr); + m_on_finish = on_finish; + + notify_lock_released(); +} + +template <typename I> +void LeaderWatcher<I>::break_leader_lock() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock)); + ceph_assert(ceph_mutex_is_locked(m_lock)); + ceph_assert(!m_timer_op_tracker.empty()); + + if (m_locker.cookie.empty()) { + get_locker(); + return; + } + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback< + LeaderWatcher<I>, &LeaderWatcher<I>::handle_break_leader_lock>(this)); + + m_leader_lock->break_lock(m_locker, true, ctx); +} + +template <typename I> +void LeaderWatcher<I>::handle_break_leader_lock(int r) { + dout(10) << "r=" << r << dendl; + + std::scoped_lock locker{m_threads->timer_lock, m_lock}; + ceph_assert(!m_timer_op_tracker.empty()); + + if (m_leader_lock->is_shutdown()) { + dout(10) << "canceling due to shutdown" << dendl; + m_timer_op_tracker.finish_op(); + return; + } + + if (r < 0 && r != -ENOENT) { + derr << "error breaking leader lock: " << cpp_strerror(r) << dendl; + schedule_acquire_leader_lock(1); + m_timer_op_tracker.finish_op(); + return; + } + + m_locker = {}; + m_acquire_attempts = 0; + acquire_leader_lock(); +} + +template <typename I> +void LeaderWatcher<I>::schedule_get_locker(bool reset_leader, + uint32_t delay_factor) { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock)); + ceph_assert(ceph_mutex_is_locked(m_lock)); + + if (reset_leader) { + m_locker = {}; + m_acquire_attempts = 0; + } + + schedule_timer_task("get locker", delay_factor, false, + &LeaderWatcher<I>::get_locker, false); +} + +template <typename I> +void LeaderWatcher<I>::get_locker() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock)); + ceph_assert(ceph_mutex_is_locked(m_lock)); + ceph_assert(!m_timer_op_tracker.empty()); + + C_GetLocker *get_locker_ctx = new C_GetLocker(this); + Context *ctx = create_async_context_callback(m_work_queue, get_locker_ctx); + + m_leader_lock->get_locker(&get_locker_ctx->locker, ctx); +} + +template <typename I> +void LeaderWatcher<I>::handle_get_locker(int r, + librbd::managed_lock::Locker& locker) { + dout(10) << "r=" << r << dendl; + + std::scoped_lock l{m_threads->timer_lock, m_lock}; + ceph_assert(!m_timer_op_tracker.empty()); + + if (m_leader_lock->is_shutdown()) { + dout(10) << "canceling due to shutdown" << dendl; + m_timer_op_tracker.finish_op(); + return; + } + + if (is_leader(m_lock)) { + m_locker = {}; + m_timer_op_tracker.finish_op(); + return; + } + + if (r == -ENOENT) { + m_locker = {}; + m_acquire_attempts = 0; + acquire_leader_lock(); + return; + } else if (r < 0) { + derr << "error retrieving leader locker: " << cpp_strerror(r) << dendl; + schedule_get_locker(true, 1); + m_timer_op_tracker.finish_op(); + return; + } + + bool notify_listener = false; + if (m_locker != locker) { + m_locker = locker; + notify_listener = true; + if (m_acquire_attempts > 1) { + dout(10) << "new lock owner detected -- resetting heartbeat counter" + << dendl; + m_acquire_attempts = 0; + } + } + + if (m_acquire_attempts >= m_cct->_conf.get_val<uint64_t>( + "rbd_mirror_leader_max_acquire_attempts_before_break")) { + dout(0) << "breaking leader lock after " << m_acquire_attempts << " " + << "failed attempts to acquire" << dendl; + break_leader_lock(); + return; + } + + schedule_acquire_leader_lock(1); + + if (!notify_listener) { + m_timer_op_tracker.finish_op(); + return; + } + + auto ctx = new LambdaContext( + [this](int r) { + std::string instance_id; + if (get_leader_instance_id(&instance_id)) { + m_listener->update_leader_handler(instance_id); + } + std::scoped_lock locker{m_threads->timer_lock, m_lock}; + m_timer_op_tracker.finish_op(); + }); + m_work_queue->queue(ctx, 0); +} + +template <typename I> +void LeaderWatcher<I>::schedule_acquire_leader_lock(uint32_t delay_factor) { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock)); + ceph_assert(ceph_mutex_is_locked(m_lock)); + + schedule_timer_task("acquire leader lock", + delay_factor * + m_cct->_conf.get_val<uint64_t>("rbd_mirror_leader_max_missed_heartbeats"), + false, &LeaderWatcher<I>::acquire_leader_lock, false); +} + +template <typename I> +void LeaderWatcher<I>::acquire_leader_lock() { + ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock)); + ceph_assert(ceph_mutex_is_locked(m_lock)); + ceph_assert(!m_timer_op_tracker.empty()); + + ++m_acquire_attempts; + dout(10) << "acquire_attempts=" << m_acquire_attempts << dendl; + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback< + LeaderWatcher<I>, &LeaderWatcher<I>::handle_acquire_leader_lock>(this)); + m_leader_lock->try_acquire_lock(ctx); +} + +template <typename I> +void LeaderWatcher<I>::handle_acquire_leader_lock(int r) { + dout(10) << "r=" << r << dendl; + + std::scoped_lock locker{m_threads->timer_lock, m_lock}; + ceph_assert(!m_timer_op_tracker.empty()); + + if (m_leader_lock->is_shutdown()) { + dout(10) << "canceling due to shutdown" << dendl; + m_timer_op_tracker.finish_op(); + return; + } + + if (r < 0) { + if (r == -EAGAIN) { + dout(10) << "already locked" << dendl; + } else { + derr << "error acquiring lock: " << cpp_strerror(r) << dendl; + } + + get_locker(); + return; + } + + m_locker = {}; + m_acquire_attempts = 0; + + if (m_ret_val) { + dout(5) << "releasing due to error on notify" << dendl; + release_leader_lock(); + m_timer_op_tracker.finish_op(); + return; + } + + notify_heartbeat(); +} + +template <typename I> +void LeaderWatcher<I>::release_leader_lock() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback< + LeaderWatcher<I>, &LeaderWatcher<I>::handle_release_leader_lock>(this)); + + m_leader_lock->release_lock(ctx); +} + +template <typename I> +void LeaderWatcher<I>::handle_release_leader_lock(int r) { + dout(10) << "r=" << r << dendl; + + std::scoped_lock locker{m_threads->timer_lock, m_lock}; + + if (r < 0) { + derr << "error releasing lock: " << cpp_strerror(r) << dendl; + return; + } + + schedule_acquire_leader_lock(1); +} + +template <typename I> +void LeaderWatcher<I>::init_instances() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + ceph_assert(m_instances == nullptr); + + m_instances = Instances<I>::create(m_threads, m_ioctx, m_instance_id, + m_instances_listener); + + Context *ctx = create_context_callback< + LeaderWatcher<I>, &LeaderWatcher<I>::handle_init_instances>(this); + + m_instances->init(ctx); +} + +template <typename I> +void LeaderWatcher<I>::handle_init_instances(int r) { + dout(10) << "r=" << r << dendl; + + Context *on_finish = nullptr; + if (r < 0) { + std::lock_guard locker{m_lock}; + derr << "error initializing instances: " << cpp_strerror(r) << dendl; + m_instances->destroy(); + m_instances = nullptr; + + ceph_assert(m_on_finish != nullptr); + std::swap(m_on_finish, on_finish); + } else { + std::lock_guard locker{m_lock}; + notify_listener(); + return; + } + + on_finish->complete(r); +} + +template <typename I> +void LeaderWatcher<I>::shut_down_instances() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + ceph_assert(m_instances != nullptr); + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback<LeaderWatcher<I>, + &LeaderWatcher<I>::handle_shut_down_instances>(this)); + + m_instances->shut_down(ctx); +} + +template <typename I> +void LeaderWatcher<I>::handle_shut_down_instances(int r) { + dout(10) << "r=" << r << dendl; + ceph_assert(r == 0); + + Context *on_finish = nullptr; + { + std::lock_guard locker{m_lock}; + + m_instances->destroy(); + m_instances = nullptr; + + ceph_assert(m_on_finish != nullptr); + std::swap(m_on_finish, on_finish); + } + on_finish->complete(r); +} + +template <typename I> +void LeaderWatcher<I>::notify_listener() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback< + LeaderWatcher<I>, &LeaderWatcher<I>::handle_notify_listener>(this)); + + if (is_leader(m_lock)) { + ctx = new LambdaContext( + [this, ctx](int r) { + m_listener->post_acquire_handler(ctx); + }); + } else { + ctx = new LambdaContext( + [this, ctx](int r) { + m_listener->pre_release_handler(ctx); + }); + } + m_work_queue->queue(ctx, 0); +} + +template <typename I> +void LeaderWatcher<I>::handle_notify_listener(int r) { + dout(10) << "r=" << r << dendl; + + std::lock_guard locker{m_lock}; + + if (r < 0) { + derr << "error notifying listener: " << cpp_strerror(r) << dendl; + m_ret_val = r; + } + + if (is_leader(m_lock)) { + notify_lock_acquired(); + } else { + shut_down_instances(); + } +} + +template <typename I> +void LeaderWatcher<I>::notify_lock_acquired() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + + Context *ctx = create_context_callback< + LeaderWatcher<I>, &LeaderWatcher<I>::handle_notify_lock_acquired>(this); + + bufferlist bl; + encode(NotifyMessage{LockAcquiredPayload{}}, bl); + + send_notify(bl, nullptr, ctx); +} + +template <typename I> +void LeaderWatcher<I>::handle_notify_lock_acquired(int r) { + dout(10) << "r=" << r << dendl; + + Context *on_finish = nullptr; + { + std::lock_guard locker{m_lock}; + if (r < 0 && r != -ETIMEDOUT) { + derr << "error notifying leader lock acquired: " << cpp_strerror(r) + << dendl; + m_ret_val = r; + } + + ceph_assert(m_on_finish != nullptr); + std::swap(m_on_finish, on_finish); + + if (m_ret_val == 0) { + // listener should be ready for instance add/remove events now + m_instances->unblock_listener(); + } + } + on_finish->complete(0); +} + +template <typename I> +void LeaderWatcher<I>::notify_lock_released() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + + Context *ctx = create_context_callback< + LeaderWatcher<I>, &LeaderWatcher<I>::handle_notify_lock_released>(this); + + bufferlist bl; + encode(NotifyMessage{LockReleasedPayload{}}, bl); + + send_notify(bl, nullptr, ctx); +} + +template <typename I> +void LeaderWatcher<I>::handle_notify_lock_released(int r) { + dout(10) << "r=" << r << dendl; + + Context *on_finish = nullptr; + { + std::lock_guard locker{m_lock}; + if (r < 0 && r != -ETIMEDOUT) { + derr << "error notifying leader lock released: " << cpp_strerror(r) + << dendl; + } + + ceph_assert(m_on_finish != nullptr); + std::swap(m_on_finish, on_finish); + } + on_finish->complete(r); +} + +template <typename I> +void LeaderWatcher<I>::notify_heartbeat() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock)); + ceph_assert(ceph_mutex_is_locked(m_lock)); + ceph_assert(!m_timer_op_tracker.empty()); + + if (!is_leader(m_lock)) { + dout(5) << "not leader, canceling" << dendl; + m_timer_op_tracker.finish_op(); + return; + } + + Context *ctx = create_context_callback< + LeaderWatcher<I>, &LeaderWatcher<I>::handle_notify_heartbeat>(this); + + bufferlist bl; + encode(NotifyMessage{HeartbeatPayload{}}, bl); + + m_heartbeat_response.acks.clear(); + send_notify(bl, &m_heartbeat_response, ctx); +} + +template <typename I> +void LeaderWatcher<I>::handle_notify_heartbeat(int r) { + dout(10) << "r=" << r << dendl; + + std::scoped_lock locker{m_threads->timer_lock, m_lock}; + ceph_assert(!m_timer_op_tracker.empty()); + + m_timer_op_tracker.finish_op(); + if (m_leader_lock->is_shutdown()) { + dout(10) << "canceling due to shutdown" << dendl; + return; + } else if (!is_leader(m_lock)) { + return; + } + + if (r < 0 && r != -ETIMEDOUT) { + derr << "error notifying heartbeat: " << cpp_strerror(r) + << ", releasing leader" << dendl; + release_leader_lock(); + return; + } + + dout(10) << m_heartbeat_response.acks.size() << " acks received, " + << m_heartbeat_response.timeouts.size() << " timed out" << dendl; + + std::vector<std::string> instance_ids; + for (auto &it: m_heartbeat_response.acks) { + uint64_t notifier_id = it.first.gid; + instance_ids.push_back(stringify(notifier_id)); + } + if (!instance_ids.empty()) { + m_instances->acked(instance_ids); + } + + schedule_timer_task("heartbeat", 1, true, + &LeaderWatcher<I>::notify_heartbeat, false); +} + +template <typename I> +void LeaderWatcher<I>::handle_heartbeat(Context *on_notify_ack) { + dout(10) << dendl; + + { + std::scoped_lock locker{m_threads->timer_lock, m_lock}; + if (is_leader(m_lock)) { + dout(5) << "got another leader heartbeat, ignoring" << dendl; + } else if (!m_locker.cookie.empty()) { + cancel_timer_task(); + m_acquire_attempts = 0; + schedule_acquire_leader_lock(1); + } + } + + on_notify_ack->complete(0); +} + +template <typename I> +void LeaderWatcher<I>::handle_lock_acquired(Context *on_notify_ack) { + dout(10) << dendl; + + { + std::scoped_lock locker{m_threads->timer_lock, m_lock}; + if (is_leader(m_lock)) { + dout(5) << "got another leader lock_acquired, ignoring" << dendl; + } else { + cancel_timer_task(); + schedule_get_locker(true, 0); + } + } + + on_notify_ack->complete(0); +} + +template <typename I> +void LeaderWatcher<I>::handle_lock_released(Context *on_notify_ack) { + dout(10) << dendl; + + { + std::scoped_lock locker{m_threads->timer_lock, m_lock}; + if (is_leader(m_lock)) { + dout(5) << "got another leader lock_released, ignoring" << dendl; + } else { + cancel_timer_task(); + schedule_get_locker(true, 0); + } + } + + on_notify_ack->complete(0); +} + +template <typename I> +void LeaderWatcher<I>::handle_notify(uint64_t notify_id, uint64_t handle, + uint64_t notifier_id, bufferlist &bl) { + dout(10) << "notify_id=" << notify_id << ", handle=" << handle << ", " + << "notifier_id=" << notifier_id << dendl; + + Context *ctx = new C_NotifyAck(this, notify_id, handle); + + if (notifier_id == m_notifier_id) { + dout(10) << "our own notification, ignoring" << dendl; + ctx->complete(0); + return; + } + + NotifyMessage notify_message; + try { + auto iter = bl.cbegin(); + decode(notify_message, iter); + } catch (const buffer::error &err) { + derr << "error decoding image notification: " << err.what() << dendl; + ctx->complete(0); + return; + } + + apply_visitor(HandlePayloadVisitor(this, ctx), notify_message.payload); +} + +template <typename I> +void LeaderWatcher<I>::handle_rewatch_complete(int r) { + dout(5) << "r=" << r << dendl; + + if (r == -EBLOCKLISTED) { + dout(1) << "blocklisted detected" << dendl; + m_blocklisted = true; + return; + } + + m_leader_lock->reacquire_lock(nullptr); +} + +template <typename I> +void LeaderWatcher<I>::handle_payload(const HeartbeatPayload &payload, + Context *on_notify_ack) { + dout(10) << "heartbeat" << dendl; + + handle_heartbeat(on_notify_ack); +} + +template <typename I> +void LeaderWatcher<I>::handle_payload(const LockAcquiredPayload &payload, + Context *on_notify_ack) { + dout(10) << "lock_acquired" << dendl; + + handle_lock_acquired(on_notify_ack); +} + +template <typename I> +void LeaderWatcher<I>::handle_payload(const LockReleasedPayload &payload, + Context *on_notify_ack) { + dout(10) << "lock_released" << dendl; + + handle_lock_released(on_notify_ack); +} + +template <typename I> +void LeaderWatcher<I>::handle_payload(const UnknownPayload &payload, + Context *on_notify_ack) { + dout(10) << "unknown" << dendl; + + on_notify_ack->complete(0); +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::LeaderWatcher<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/LeaderWatcher.h b/src/tools/rbd_mirror/LeaderWatcher.h new file mode 100644 index 000000000..58f23148f --- /dev/null +++ b/src/tools/rbd_mirror/LeaderWatcher.h @@ -0,0 +1,313 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_LEADER_WATCHER_H +#define CEPH_RBD_MIRROR_LEADER_WATCHER_H + +#include <list> +#include <memory> +#include <string> + +#include "common/AsyncOpTracker.h" +#include "librbd/ManagedLock.h" +#include "librbd/Watcher.h" +#include "librbd/managed_lock/Types.h" +#include "librbd/watcher/Types.h" +#include "Instances.h" +#include "tools/rbd_mirror/instances/Types.h" +#include "tools/rbd_mirror/leader_watcher/Types.h" + +namespace librbd { +class ImageCtx; +namespace asio { struct ContextWQ; } +} // namespace librbd + +namespace rbd { +namespace mirror { + +template <typename> struct Threads; + +template <typename ImageCtxT = librbd::ImageCtx> +class LeaderWatcher : protected librbd::Watcher { + using librbd::Watcher::unregister_watch; // Silence overloaded virtual warning +public: + static LeaderWatcher* create(Threads<ImageCtxT> *threads, + librados::IoCtx &io_ctx, + leader_watcher::Listener *listener) { + return new LeaderWatcher(threads, io_ctx, listener); + } + + LeaderWatcher(Threads<ImageCtxT> *threads, librados::IoCtx &io_ctx, + leader_watcher::Listener *listener); + ~LeaderWatcher() override; + + int init(); + void shut_down(); + + void init(Context *on_finish); + void shut_down(Context *on_finish); + + bool is_blocklisted() const; + bool is_leader() const; + bool is_releasing_leader() const; + bool get_leader_instance_id(std::string *instance_id) const; + void release_leader(); + void list_instances(std::vector<std::string> *instance_ids); + + std::string get_instance_id(); + +private: + /** + * @verbatim + * + * <uninitialized> <------------------------------ WAIT_FOR_TASKS + * | (init) ^ ^ + * v * | + * CREATE_OBJECT * * * * * (error) UNREGISTER_WATCH + * | * ^ + * v * | + * REGISTER_WATCH * * * * * SHUT_DOWN_LEADER_LOCK + * | ^ + * | (no leader heartbeat and acquire failed) | + * | BREAK_LOCK <-------------------------------------\ | + * | | (no leader heartbeat) | | (shut down) + * | | /----------------------------------------\ | | + * | | | (lock_released received) | | + * | | | /-------------------------------------\ | | + * | | | | (lock_acquired or | | | + * | | | | heartbeat received) | | | + * | | | | (ENOENT) /-----------\ | | | + * | | | | * * * * * * * * * * | | | | | + * v v v v v (error) * v | | | | + * ACQUIRE_LEADER_LOCK * * * * *> GET_LOCKER ---> <secondary> + * | * ^ + * ....|...................*.................... .....|..................... + * . v * . . | post_release . + * .INIT_INSTANCES * * * * * . .NOTIFY_LOCK_RELEASED . + * . | . .....^..................... + * . v . | + * .NOTIFY_LISTENER . RELEASE_LEADER_LOCK + * . | . ^ + * . v . .....|..................... + * .NOTIFY_LOCK_ACQUIRED . . | . + * . | post_acquire . .SHUT_DOWN_INSTANCES . + * ....|........................................ . ^ . + * v . | . + * <leader> -----------------------------------> .NOTIFY_LISTENER . + * (shut_down, release_leader, . pre_release . + * notify error) ........................... + * @endverbatim + */ + + struct InstancesListener : public instances::Listener { + LeaderWatcher* leader_watcher; + + InstancesListener(LeaderWatcher* leader_watcher) + : leader_watcher(leader_watcher) { + } + + void handle_added(const InstanceIds& instance_ids) override { + leader_watcher->m_listener->handle_instances_added(instance_ids); + } + + void handle_removed(const InstanceIds& instance_ids) override { + leader_watcher->m_listener->handle_instances_removed(instance_ids); + } + }; + + class LeaderLock : public librbd::ManagedLock<ImageCtxT> { + public: + typedef librbd::ManagedLock<ImageCtxT> Parent; + + LeaderLock(librados::IoCtx& ioctx, librbd::AsioEngine& asio_engine, + const std::string& oid, LeaderWatcher *watcher, + bool blocklist_on_break_lock, + uint32_t blocklist_expire_seconds) + : Parent(ioctx, asio_engine, oid, watcher, + librbd::managed_lock::EXCLUSIVE, blocklist_on_break_lock, + blocklist_expire_seconds), + watcher(watcher) { + } + + bool is_leader() const { + std::lock_guard locker{Parent::m_lock}; + return Parent::is_state_post_acquiring() || Parent::is_state_locked(); + } + + bool is_releasing_leader() const { + std::lock_guard locker{Parent::m_lock}; + return Parent::is_state_pre_releasing(); + } + + protected: + void post_acquire_lock_handler(int r, Context *on_finish) { + if (r == 0) { + // lock is owned at this point + std::lock_guard locker{Parent::m_lock}; + Parent::set_state_post_acquiring(); + } + watcher->handle_post_acquire_leader_lock(r, on_finish); + } + void pre_release_lock_handler(bool shutting_down, + Context *on_finish) { + watcher->handle_pre_release_leader_lock(on_finish); + } + void post_release_lock_handler(bool shutting_down, int r, + Context *on_finish) { + watcher->handle_post_release_leader_lock(r, on_finish); + } + private: + LeaderWatcher *watcher; + }; + + struct HandlePayloadVisitor : public boost::static_visitor<void> { + LeaderWatcher *leader_watcher; + Context *on_notify_ack; + + HandlePayloadVisitor(LeaderWatcher *leader_watcher, Context *on_notify_ack) + : leader_watcher(leader_watcher), on_notify_ack(on_notify_ack) { + } + + template <typename Payload> + inline void operator()(const Payload &payload) const { + leader_watcher->handle_payload(payload, on_notify_ack); + } + }; + + struct C_GetLocker : public Context { + LeaderWatcher *leader_watcher; + librbd::managed_lock::Locker locker; + + C_GetLocker(LeaderWatcher *leader_watcher) + : leader_watcher(leader_watcher) { + } + + void finish(int r) override { + leader_watcher->handle_get_locker(r, locker); + } + }; + + typedef void (LeaderWatcher<ImageCtxT>::*TimerCallback)(); + + struct C_TimerGate : public Context { + LeaderWatcher *leader_watcher; + + bool leader = false; + TimerCallback timer_callback = nullptr; + + C_TimerGate(LeaderWatcher *leader_watcher) + : leader_watcher(leader_watcher) { + } + + void finish(int r) override { + leader_watcher->m_timer_gate = nullptr; + leader_watcher->execute_timer_task(leader, timer_callback); + } + }; + + Threads<ImageCtxT> *m_threads; + leader_watcher::Listener *m_listener; + + InstancesListener m_instances_listener; + mutable ceph::mutex m_lock; + uint64_t m_notifier_id; + std::string m_instance_id; + LeaderLock *m_leader_lock; + Context *m_on_finish = nullptr; + Context *m_on_shut_down_finish = nullptr; + uint64_t m_acquire_attempts = 0; + int m_ret_val = 0; + Instances<ImageCtxT> *m_instances = nullptr; + librbd::managed_lock::Locker m_locker; + + bool m_blocklisted = false; + + AsyncOpTracker m_timer_op_tracker; + Context *m_timer_task = nullptr; + C_TimerGate *m_timer_gate = nullptr; + + librbd::watcher::NotifyResponse m_heartbeat_response; + + bool is_leader(ceph::mutex &m_lock) const; + bool is_releasing_leader(ceph::mutex &m_lock) const; + + void cancel_timer_task(); + void schedule_timer_task(const std::string &name, + int delay_factor, bool leader, + TimerCallback callback, bool shutting_down); + void execute_timer_task(bool leader, TimerCallback timer_callback); + + void create_leader_object(); + void handle_create_leader_object(int r); + + void register_watch(); + void handle_register_watch(int r); + + void shut_down_leader_lock(); + void handle_shut_down_leader_lock(int r); + + void unregister_watch(); + void handle_unregister_watch(int r); + + void wait_for_tasks(); + void handle_wait_for_tasks(); + + void break_leader_lock(); + void handle_break_leader_lock(int r); + + void schedule_get_locker(bool reset_leader, uint32_t delay_factor); + void get_locker(); + void handle_get_locker(int r, librbd::managed_lock::Locker& locker); + + void schedule_acquire_leader_lock(uint32_t delay_factor); + void acquire_leader_lock(); + void handle_acquire_leader_lock(int r); + + void release_leader_lock(); + void handle_release_leader_lock(int r); + + void init_instances(); + void handle_init_instances(int r); + + void shut_down_instances(); + void handle_shut_down_instances(int r); + + void notify_listener(); + void handle_notify_listener(int r); + + void notify_lock_acquired(); + void handle_notify_lock_acquired(int r); + + void notify_lock_released(); + void handle_notify_lock_released(int r); + + void notify_heartbeat(); + void handle_notify_heartbeat(int r); + + void handle_post_acquire_leader_lock(int r, Context *on_finish); + void handle_pre_release_leader_lock(Context *on_finish); + void handle_post_release_leader_lock(int r, Context *on_finish); + + void handle_notify(uint64_t notify_id, uint64_t handle, + uint64_t notifier_id, bufferlist &bl) override; + + void handle_rewatch_complete(int r) override; + + void handle_heartbeat(Context *on_ack); + void handle_lock_acquired(Context *on_ack); + void handle_lock_released(Context *on_ack); + + void handle_payload(const leader_watcher::HeartbeatPayload &payload, + Context *on_notify_ack); + void handle_payload(const leader_watcher::LockAcquiredPayload &payload, + Context *on_notify_ack); + void handle_payload(const leader_watcher::LockReleasedPayload &payload, + Context *on_notify_ack); + void handle_payload(const leader_watcher::UnknownPayload &payload, + Context *on_notify_ack); +}; + +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_LEADER_WATCHER_H diff --git a/src/tools/rbd_mirror/Mirror.cc b/src/tools/rbd_mirror/Mirror.cc new file mode 100644 index 000000000..e87009281 --- /dev/null +++ b/src/tools/rbd_mirror/Mirror.cc @@ -0,0 +1,763 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include <signal.h> + +#include <boost/range/adaptor/map.hpp> + +#include "common/Formatter.h" +#include "common/PriorityCache.h" +#include "common/admin_socket.h" +#include "common/debug.h" +#include "common/errno.h" +#include "journal/Types.h" +#include "librbd/ImageCtx.h" +#include "perfglue/heap_profiler.h" +#include "Mirror.h" +#include "PoolMetaCache.h" +#include "ServiceDaemon.h" +#include "Threads.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror + +using std::list; +using std::map; +using std::set; +using std::string; +using std::unique_ptr; +using std::vector; + +using librados::Rados; +using librados::IoCtx; +using librbd::mirror_peer_t; + +namespace rbd { +namespace mirror { + +namespace { + +class MirrorAdminSocketCommand { +public: + virtual ~MirrorAdminSocketCommand() {} + virtual int call(Formatter *f) = 0; +}; + +class StatusCommand : public MirrorAdminSocketCommand { +public: + explicit StatusCommand(Mirror *mirror) : mirror(mirror) {} + + int call(Formatter *f) override { + mirror->print_status(f); + return 0; + } + +private: + Mirror *mirror; +}; + +class StartCommand : public MirrorAdminSocketCommand { +public: + explicit StartCommand(Mirror *mirror) : mirror(mirror) {} + + int call(Formatter *f) override { + mirror->start(); + return 0; + } + +private: + Mirror *mirror; +}; + +class StopCommand : public MirrorAdminSocketCommand { +public: + explicit StopCommand(Mirror *mirror) : mirror(mirror) {} + + int call(Formatter *f) override { + mirror->stop(); + return 0; + } + +private: + Mirror *mirror; +}; + +class RestartCommand : public MirrorAdminSocketCommand { +public: + explicit RestartCommand(Mirror *mirror) : mirror(mirror) {} + + int call(Formatter *f) override { + mirror->restart(); + return 0; + } + +private: + Mirror *mirror; +}; + +class FlushCommand : public MirrorAdminSocketCommand { +public: + explicit FlushCommand(Mirror *mirror) : mirror(mirror) {} + + int call(Formatter *f) override { + mirror->flush(); + return 0; + } + +private: + Mirror *mirror; +}; + +class LeaderReleaseCommand : public MirrorAdminSocketCommand { +public: + explicit LeaderReleaseCommand(Mirror *mirror) : mirror(mirror) {} + + int call(Formatter *f) override { + mirror->release_leader(); + return 0; + } + +private: + Mirror *mirror; +}; + +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::PriCache: " << this << " " \ + << m_name << " " << __func__ << ": " + +struct PriCache : public PriorityCache::PriCache { + std::string m_name; + int64_t m_base_cache_max_size; + int64_t m_extra_cache_max_size; + + PriorityCache::Priority m_base_cache_pri = PriorityCache::Priority::PRI10; + PriorityCache::Priority m_extra_cache_pri = PriorityCache::Priority::PRI10; + int64_t m_base_cache_bytes = 0; + int64_t m_extra_cache_bytes = 0; + int64_t m_committed_bytes = 0; + double m_cache_ratio = 0; + + PriCache(const std::string &name, uint64_t min_size, uint64_t max_size) + : m_name(name), m_base_cache_max_size(min_size), + m_extra_cache_max_size(max_size - min_size) { + ceph_assert(max_size >= min_size); + } + + void prioritize() { + if (m_base_cache_pri == PriorityCache::Priority::PRI0) { + return; + } + auto pri = static_cast<uint8_t>(m_base_cache_pri); + m_base_cache_pri = static_cast<PriorityCache::Priority>(--pri); + + dout(30) << m_base_cache_pri << dendl; + } + + int64_t request_cache_bytes(PriorityCache::Priority pri, + uint64_t total_cache) const override { + int64_t cache_bytes = 0; + + if (pri == m_base_cache_pri) { + cache_bytes += m_base_cache_max_size; + } + if (pri == m_extra_cache_pri) { + cache_bytes += m_extra_cache_max_size; + } + + dout(30) << cache_bytes << dendl; + + return cache_bytes; + } + + int64_t get_cache_bytes(PriorityCache::Priority pri) const override { + int64_t cache_bytes = 0; + + if (pri == m_base_cache_pri) { + cache_bytes += m_base_cache_bytes; + } + if (pri == m_extra_cache_pri) { + cache_bytes += m_extra_cache_bytes; + } + + dout(30) << "pri=" << pri << " " << cache_bytes << dendl; + + return cache_bytes; + } + + int64_t get_cache_bytes() const override { + auto cache_bytes = m_base_cache_bytes + m_extra_cache_bytes; + + dout(30) << m_base_cache_bytes << "+" << m_extra_cache_bytes << "=" + << cache_bytes << dendl; + + return cache_bytes; + } + + void set_cache_bytes(PriorityCache::Priority pri, int64_t bytes) override { + ceph_assert(bytes >= 0); + ceph_assert(pri == m_base_cache_pri || pri == m_extra_cache_pri || + bytes == 0); + + dout(30) << "pri=" << pri << " " << bytes << dendl; + + if (pri == m_base_cache_pri) { + m_base_cache_bytes = std::min(m_base_cache_max_size, bytes); + bytes -= std::min(m_base_cache_bytes, bytes); + } + + if (pri == m_extra_cache_pri) { + m_extra_cache_bytes = bytes; + } + } + + void add_cache_bytes(PriorityCache::Priority pri, int64_t bytes) override { + ceph_assert(bytes >= 0); + ceph_assert(pri == m_base_cache_pri || pri == m_extra_cache_pri); + + dout(30) << "pri=" << pri << " " << bytes << dendl; + + if (pri == m_base_cache_pri) { + ceph_assert(m_base_cache_bytes <= m_base_cache_max_size); + + auto chunk = std::min(m_base_cache_max_size - m_base_cache_bytes, bytes); + m_base_cache_bytes += chunk; + bytes -= chunk; + } + + if (pri == m_extra_cache_pri) { + m_extra_cache_bytes += bytes; + } + } + + int64_t commit_cache_size(uint64_t total_cache) override { + m_committed_bytes = p2roundup<int64_t>(get_cache_bytes(), 4096); + + dout(30) << m_committed_bytes << dendl; + + return m_committed_bytes; + } + + int64_t get_committed_size() const override { + dout(30) << m_committed_bytes << dendl; + + return m_committed_bytes; + } + + double get_cache_ratio() const override { + dout(30) << m_cache_ratio << dendl; + + return m_cache_ratio; + } + + void set_cache_ratio(double ratio) override { + dout(30) << m_cache_ratio << dendl; + + m_cache_ratio = ratio; + } + + void shift_bins() override { + } + + void import_bins(const std::vector<uint64_t> &intervals) override { + } + + void set_bins(PriorityCache::Priority pri, uint64_t end_interval) override { + } + + uint64_t get_bins(PriorityCache::Priority pri) const override { + return 0; + } + + std::string get_cache_name() const override { + return m_name; + } +}; + +} // anonymous namespace + +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::Mirror: " << this << " " \ + << __func__ << ": " + +class MirrorAdminSocketHook : public AdminSocketHook { +public: + MirrorAdminSocketHook(CephContext *cct, Mirror *mirror) : + admin_socket(cct->get_admin_socket()) { + std::string command; + int r; + + command = "rbd mirror status"; + r = admin_socket->register_command(command, this, + "get status for rbd mirror"); + if (r == 0) { + commands[command] = new StatusCommand(mirror); + } + + command = "rbd mirror start"; + r = admin_socket->register_command(command, this, + "start rbd mirror"); + if (r == 0) { + commands[command] = new StartCommand(mirror); + } + + command = "rbd mirror stop"; + r = admin_socket->register_command(command, this, + "stop rbd mirror"); + if (r == 0) { + commands[command] = new StopCommand(mirror); + } + + command = "rbd mirror restart"; + r = admin_socket->register_command(command, this, + "restart rbd mirror"); + if (r == 0) { + commands[command] = new RestartCommand(mirror); + } + + command = "rbd mirror flush"; + r = admin_socket->register_command(command, this, + "flush rbd mirror"); + if (r == 0) { + commands[command] = new FlushCommand(mirror); + } + + command = "rbd mirror leader release"; + r = admin_socket->register_command(command, this, + "release rbd mirror leader"); + if (r == 0) { + commands[command] = new LeaderReleaseCommand(mirror); + } + } + + ~MirrorAdminSocketHook() override { + (void)admin_socket->unregister_commands(this); + for (Commands::const_iterator i = commands.begin(); i != commands.end(); + ++i) { + delete i->second; + } + } + + int call(std::string_view command, const cmdmap_t& cmdmap, + const bufferlist&, + Formatter *f, + std::ostream& errss, + bufferlist& out) override { + Commands::const_iterator i = commands.find(command); + ceph_assert(i != commands.end()); + return i->second->call(f); + } + +private: + typedef std::map<std::string, MirrorAdminSocketCommand*, std::less<>> Commands; + + AdminSocket *admin_socket; + Commands commands; +}; + +class CacheManagerHandler : public journal::CacheManagerHandler { +public: + CacheManagerHandler(CephContext *cct) + : m_cct(cct) { + + if (!m_cct->_conf.get_val<bool>("rbd_mirror_memory_autotune")) { + return; + } + + uint64_t base = m_cct->_conf.get_val<Option::size_t>( + "rbd_mirror_memory_base"); + double fragmentation = m_cct->_conf.get_val<double>( + "rbd_mirror_memory_expected_fragmentation"); + uint64_t target = m_cct->_conf.get_val<Option::size_t>( + "rbd_mirror_memory_target"); + uint64_t min = m_cct->_conf.get_val<Option::size_t>( + "rbd_mirror_memory_cache_min"); + uint64_t max = min; + + // When setting the maximum amount of memory to use for cache, first + // assume some base amount of memory for the daemon and then fudge in + // some overhead for fragmentation that scales with cache usage. + uint64_t ltarget = (1.0 - fragmentation) * target; + if (ltarget > base + min) { + max = ltarget - base; + } + + m_next_balance = ceph_clock_now(); + m_next_resize = ceph_clock_now(); + + m_cache_manager = std::make_unique<PriorityCache::Manager>( + m_cct, min, max, target, false); + } + + ~CacheManagerHandler() { + std::lock_guard locker{m_lock}; + + ceph_assert(m_caches.empty()); + } + + void register_cache(const std::string &cache_name, + uint64_t min_size, uint64_t max_size, + journal::CacheRebalanceHandler* handler) override { + if (!m_cache_manager) { + handler->handle_cache_rebalanced(max_size); + return; + } + + dout(20) << cache_name << " min_size=" << min_size << " max_size=" + << max_size << " handler=" << handler << dendl; + + std::lock_guard locker{m_lock}; + + auto p = m_caches.insert( + {cache_name, {cache_name, min_size, max_size, handler}}); + ceph_assert(p.second == true); + + m_cache_manager->insert(cache_name, p.first->second.pri_cache, false); + m_next_balance = ceph_clock_now(); + } + + void unregister_cache(const std::string &cache_name) override { + if (!m_cache_manager) { + return; + } + + dout(20) << cache_name << dendl; + + std::lock_guard locker{m_lock}; + + auto it = m_caches.find(cache_name); + ceph_assert(it != m_caches.end()); + + m_cache_manager->erase(cache_name); + m_caches.erase(it); + m_next_balance = ceph_clock_now(); + } + + void run_cache_manager() { + if (!m_cache_manager) { + return; + } + + std::lock_guard locker{m_lock}; + + // Before we trim, check and see if it's time to rebalance/resize. + auto autotune_interval = m_cct->_conf.get_val<double>( + "rbd_mirror_memory_cache_autotune_interval"); + auto resize_interval = m_cct->_conf.get_val<double>( + "rbd_mirror_memory_cache_resize_interval"); + + utime_t now = ceph_clock_now(); + + if (autotune_interval > 0 && m_next_balance <= now) { + dout(20) << "balance" << dendl; + m_cache_manager->balance(); + + for (auto &it : m_caches) { + auto pri_cache = static_cast<PriCache *>(it.second.pri_cache.get()); + auto new_cache_bytes = pri_cache->get_cache_bytes(); + it.second.handler->handle_cache_rebalanced(new_cache_bytes); + pri_cache->prioritize(); + } + + m_next_balance = ceph_clock_now(); + m_next_balance += autotune_interval; + } + + if (resize_interval > 0 && m_next_resize < now) { + if (ceph_using_tcmalloc()) { + dout(20) << "tune memory" << dendl; + m_cache_manager->tune_memory(); + } + + m_next_resize = ceph_clock_now(); + m_next_resize += resize_interval; + } + } + +private: + struct Cache { + std::shared_ptr<PriorityCache::PriCache> pri_cache; + journal::CacheRebalanceHandler *handler; + + Cache(const std::string name, uint64_t min_size, uint64_t max_size, + journal::CacheRebalanceHandler *handler) + : pri_cache(new PriCache(name, min_size, max_size)), handler(handler) { + } + }; + + CephContext *m_cct; + + mutable ceph::mutex m_lock = + ceph::make_mutex("rbd::mirror::CacheManagerHandler"); + std::unique_ptr<PriorityCache::Manager> m_cache_manager; + std::map<std::string, Cache> m_caches; + + utime_t m_next_balance; + utime_t m_next_resize; +}; + +Mirror::Mirror(CephContext *cct, const std::vector<const char*> &args) : + m_cct(cct), + m_args(args), + m_local(new librados::Rados()), + m_cache_manager_handler(new CacheManagerHandler(cct)), + m_pool_meta_cache(new PoolMetaCache(cct)), + m_asok_hook(new MirrorAdminSocketHook(cct, this)) { +} + +Mirror::~Mirror() +{ + delete m_asok_hook; +} + +void Mirror::handle_signal(int signum) +{ + dout(20) << signum << dendl; + + std::lock_guard l{m_lock}; + + switch (signum) { + case SIGHUP: + for (auto &it : m_pool_replayers) { + it.second->reopen_logs(); + } + g_ceph_context->reopen_logs(); + break; + + case SIGINT: + case SIGTERM: + m_stopping = true; + m_cond.notify_all(); + break; + + default: + ceph_abort_msgf("unexpected signal %d", signum); + } +} + +int Mirror::init() +{ + int r = m_local->init_with_context(m_cct); + if (r < 0) { + derr << "could not initialize rados handle" << dendl; + return r; + } + + r = m_local->connect(); + if (r < 0) { + derr << "error connecting to local cluster" << dendl; + return r; + } + + m_threads = &(m_cct->lookup_or_create_singleton_object< + Threads<librbd::ImageCtx>>("rbd_mirror::threads", false, m_local)); + m_service_daemon.reset(new ServiceDaemon<>(m_cct, m_local, m_threads)); + + r = m_service_daemon->init(); + if (r < 0) { + derr << "error registering service daemon: " << cpp_strerror(r) << dendl; + return r; + } + + m_local_cluster_watcher.reset(new ClusterWatcher(m_local, m_lock, + m_service_daemon.get())); + return r; +} + +void Mirror::run() +{ + dout(20) << "enter" << dendl; + + using namespace std::chrono_literals; + utime_t next_refresh_pools = ceph_clock_now(); + + while (!m_stopping) { + utime_t now = ceph_clock_now(); + bool refresh_pools = next_refresh_pools <= now; + if (refresh_pools) { + m_local_cluster_watcher->refresh_pools(); + next_refresh_pools = ceph_clock_now(); + next_refresh_pools += m_cct->_conf.get_val<uint64_t>( + "rbd_mirror_pool_replayers_refresh_interval"); + } + std::unique_lock l{m_lock}; + if (!m_manual_stop) { + if (refresh_pools) { + update_pool_replayers(m_local_cluster_watcher->get_pool_peers(), + m_local_cluster_watcher->get_site_name()); + } + m_cache_manager_handler->run_cache_manager(); + } + m_cond.wait_for(l, 1s); + } + + // stop all pool replayers in parallel + std::lock_guard locker{m_lock}; + for (auto &pool_replayer : m_pool_replayers) { + pool_replayer.second->stop(false); + } + dout(20) << "return" << dendl; +} + +void Mirror::print_status(Formatter *f) +{ + dout(20) << "enter" << dendl; + + std::lock_guard l{m_lock}; + + if (m_stopping) { + return; + } + + f->open_object_section("mirror_status"); + f->open_array_section("pool_replayers"); + for (auto &pool_replayer : m_pool_replayers) { + pool_replayer.second->print_status(f); + } + f->close_section(); + f->close_section(); +} + +void Mirror::start() +{ + dout(20) << "enter" << dendl; + std::lock_guard l{m_lock}; + + if (m_stopping) { + return; + } + + m_manual_stop = false; + + for (auto &pool_replayer : m_pool_replayers) { + pool_replayer.second->start(); + } +} + +void Mirror::stop() +{ + dout(20) << "enter" << dendl; + std::lock_guard l{m_lock}; + + if (m_stopping) { + return; + } + + m_manual_stop = true; + + for (auto &pool_replayer : m_pool_replayers) { + pool_replayer.second->stop(true); + } +} + +void Mirror::restart() +{ + dout(20) << "enter" << dendl; + std::lock_guard l{m_lock}; + + if (m_stopping) { + return; + } + + m_manual_stop = false; + + for (auto &pool_replayer : m_pool_replayers) { + pool_replayer.second->restart(); + } +} + +void Mirror::flush() +{ + dout(20) << "enter" << dendl; + std::lock_guard l{m_lock}; + + if (m_stopping || m_manual_stop) { + return; + } + + for (auto &pool_replayer : m_pool_replayers) { + pool_replayer.second->flush(); + } +} + +void Mirror::release_leader() +{ + dout(20) << "enter" << dendl; + std::lock_guard l{m_lock}; + + if (m_stopping) { + return; + } + + for (auto &pool_replayer : m_pool_replayers) { + pool_replayer.second->release_leader(); + } +} + +void Mirror::update_pool_replayers(const PoolPeers &pool_peers, + const std::string& site_name) +{ + dout(20) << "enter" << dendl; + ceph_assert(ceph_mutex_is_locked(m_lock)); + + // remove stale pool replayers before creating new pool replayers + for (auto it = m_pool_replayers.begin(); it != m_pool_replayers.end();) { + auto &peer = it->first.second; + auto pool_peer_it = pool_peers.find(it->first.first); + if (pool_peer_it == pool_peers.end() || + pool_peer_it->second.find(peer) == pool_peer_it->second.end()) { + dout(20) << "removing pool replayer for " << peer << dendl; + // TODO: make async + it->second->shut_down(); + it = m_pool_replayers.erase(it); + } else { + ++it; + } + } + + for (auto &kv : pool_peers) { + for (auto &peer : kv.second) { + PoolPeer pool_peer(kv.first, peer); + + auto pool_replayers_it = m_pool_replayers.find(pool_peer); + if (pool_replayers_it != m_pool_replayers.end()) { + auto& pool_replayer = pool_replayers_it->second; + if (!m_site_name.empty() && !site_name.empty() && + m_site_name != site_name) { + dout(0) << "restarting pool replayer for " << peer << " due to " + << "updated site name" << dendl; + // TODO: make async + pool_replayer->shut_down(); + pool_replayer->init(site_name); + } else if (pool_replayer->is_blocklisted()) { + derr << "restarting blocklisted pool replayer for " << peer << dendl; + // TODO: make async + pool_replayer->shut_down(); + pool_replayer->init(site_name); + } else if (!pool_replayer->is_running()) { + derr << "restarting failed pool replayer for " << peer << dendl; + // TODO: make async + pool_replayer->shut_down(); + pool_replayer->init(site_name); + } + } else { + dout(20) << "starting pool replayer for " << peer << dendl; + unique_ptr<PoolReplayer<>> pool_replayer( + new PoolReplayer<>(m_threads, m_service_daemon.get(), + m_cache_manager_handler.get(), + m_pool_meta_cache.get(), kv.first, peer, + m_args)); + + // TODO: make async + pool_replayer->init(site_name); + m_pool_replayers.emplace(pool_peer, std::move(pool_replayer)); + } + } + + // TODO currently only support a single peer + } + + m_site_name = site_name; +} + +} // namespace mirror +} // namespace rbd diff --git a/src/tools/rbd_mirror/Mirror.h b/src/tools/rbd_mirror/Mirror.h new file mode 100644 index 000000000..f92a63b68 --- /dev/null +++ b/src/tools/rbd_mirror/Mirror.h @@ -0,0 +1,89 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_H +#define CEPH_RBD_MIRROR_H + +#include "common/ceph_context.h" +#include "common/ceph_mutex.h" +#include "include/rados/librados.hpp" +#include "include/utime.h" +#include "ClusterWatcher.h" +#include "PoolReplayer.h" +#include "tools/rbd_mirror/Types.h" + +#include <set> +#include <map> +#include <memory> +#include <atomic> + +namespace journal { class CacheManagerHandler; } + +namespace librbd { struct ImageCtx; } + +namespace rbd { +namespace mirror { + +template <typename> struct ServiceDaemon; +template <typename> struct Threads; +class CacheManagerHandler; +class MirrorAdminSocketHook; +class PoolMetaCache; + +/** + * Contains the main loop and overall state for rbd-mirror. + * + * Sets up mirroring, and coordinates between noticing config + * changes and applying them. + */ +class Mirror { +public: + Mirror(CephContext *cct, const std::vector<const char*> &args); + Mirror(const Mirror&) = delete; + Mirror& operator=(const Mirror&) = delete; + ~Mirror(); + + int init(); + void run(); + void handle_signal(int signum); + + void print_status(Formatter *f); + void start(); + void stop(); + void restart(); + void flush(); + void release_leader(); + +private: + typedef ClusterWatcher::PoolPeers PoolPeers; + typedef std::pair<int64_t, PeerSpec> PoolPeer; + + void update_pool_replayers(const PoolPeers &pool_peers, + const std::string& site_name); + + void create_cache_manager(); + void run_cache_manager(utime_t *next_run_interval); + + CephContext *m_cct; + std::vector<const char*> m_args; + Threads<librbd::ImageCtx> *m_threads = nullptr; + ceph::mutex m_lock = ceph::make_mutex("rbd::mirror::Mirror"); + ceph::condition_variable m_cond; + RadosRef m_local; + std::unique_ptr<ServiceDaemon<librbd::ImageCtx>> m_service_daemon; + + // monitor local cluster for config changes in peers + std::unique_ptr<ClusterWatcher> m_local_cluster_watcher; + std::unique_ptr<CacheManagerHandler> m_cache_manager_handler; + std::unique_ptr<PoolMetaCache> m_pool_meta_cache; + std::map<PoolPeer, std::unique_ptr<PoolReplayer<>>> m_pool_replayers; + std::atomic<bool> m_stopping = { false }; + bool m_manual_stop = false; + MirrorAdminSocketHook *m_asok_hook; + std::string m_site_name; +}; + +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_H diff --git a/src/tools/rbd_mirror/MirrorStatusUpdater.cc b/src/tools/rbd_mirror/MirrorStatusUpdater.cc new file mode 100644 index 000000000..257cb1df2 --- /dev/null +++ b/src/tools/rbd_mirror/MirrorStatusUpdater.cc @@ -0,0 +1,397 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/MirrorStatusUpdater.h" +#include "include/Context.h" +#include "include/stringify.h" +#include "common/debug.h" +#include "common/errno.h" +#include "common/Timer.h" +#include "librbd/ImageCtx.h" +#include "librbd/Utils.h" +#include "librbd/asio/ContextWQ.h" +#include "tools/rbd_mirror/MirrorStatusWatcher.h" +#include "tools/rbd_mirror/Threads.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::MirrorStatusUpdater " << this \ + << " " << __func__ << ": " + +namespace rbd { +namespace mirror { + +static const double UPDATE_INTERVAL_SECONDS = 30; +static const uint32_t MAX_UPDATES_PER_OP = 100; + +using librbd::util::create_context_callback; +using librbd::util::create_rados_callback; + +template <typename I> +MirrorStatusUpdater<I>::MirrorStatusUpdater( + librados::IoCtx& io_ctx, Threads<I> *threads, + const std::string& local_mirror_uuid) + : m_io_ctx(io_ctx), m_threads(threads), + m_local_mirror_uuid(local_mirror_uuid), + m_lock(ceph::make_mutex("rbd::mirror::MirrorStatusUpdater " + + stringify(m_io_ctx.get_id()))) { + dout(10) << "local_mirror_uuid=" << local_mirror_uuid << ", " + << "pool_id=" << m_io_ctx.get_id() << dendl; +} + +template <typename I> +MirrorStatusUpdater<I>::~MirrorStatusUpdater() { + ceph_assert(!m_initialized); + delete m_mirror_status_watcher; +} + +template <typename I> +void MirrorStatusUpdater<I>::init(Context* on_finish) { + dout(10) << dendl; + + ceph_assert(!m_initialized); + m_initialized = true; + + { + std::lock_guard timer_locker{m_threads->timer_lock}; + schedule_timer_task(); + } + + init_mirror_status_watcher(on_finish); +} + +template <typename I> +void MirrorStatusUpdater<I>::init_mirror_status_watcher(Context* on_finish) { + dout(10) << dendl; + + auto ctx = new LambdaContext([this, on_finish](int r) { + handle_init_mirror_status_watcher(r, on_finish); + }); + m_mirror_status_watcher = MirrorStatusWatcher<I>::create( + m_io_ctx, m_threads->work_queue); + m_mirror_status_watcher->init(ctx); +} + +template <typename I> +void MirrorStatusUpdater<I>::handle_init_mirror_status_watcher( + int r, Context* on_finish) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to init mirror status watcher: " << cpp_strerror(r) + << dendl; + + delete m_mirror_status_watcher; + m_mirror_status_watcher = nullptr; + + on_finish = new LambdaContext([r, on_finish](int) { + on_finish->complete(r); + }); + shut_down(on_finish); + return; + } + + m_threads->work_queue->queue(on_finish, 0); +} + +template <typename I> +void MirrorStatusUpdater<I>::shut_down(Context* on_finish) { + dout(10) << dendl; + + { + std::lock_guard timer_locker{m_threads->timer_lock}; + ceph_assert(m_timer_task != nullptr); + m_threads->timer->cancel_event(m_timer_task); + } + + { + std::unique_lock locker(m_lock); + ceph_assert(m_initialized); + m_initialized = false; + } + + shut_down_mirror_status_watcher(on_finish); +} + +template <typename I> +void MirrorStatusUpdater<I>::shut_down_mirror_status_watcher( + Context* on_finish) { + if (m_mirror_status_watcher == nullptr) { + finalize_shutdown(0, on_finish); + return; + } + + dout(10) << dendl; + + auto ctx = new LambdaContext([this, on_finish](int r) { + handle_shut_down_mirror_status_watcher(r, on_finish); + }); + m_mirror_status_watcher->shut_down(ctx); +} + +template <typename I> +void MirrorStatusUpdater<I>::handle_shut_down_mirror_status_watcher( + int r, Context* on_finish) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to shut down mirror status watcher: " << cpp_strerror(r) + << dendl; + } + + finalize_shutdown(r, on_finish); +} + +template <typename I> +void MirrorStatusUpdater<I>::finalize_shutdown(int r, Context* on_finish) { + dout(10) << dendl; + + { + std::unique_lock locker(m_lock); + if (m_update_in_progress) { + if (r < 0) { + on_finish = new LambdaContext([r, on_finish](int) { + on_finish->complete(r); + }); + } + + m_update_on_finish_ctxs.push_back(on_finish); + return; + } + } + + m_threads->work_queue->queue(on_finish, r); +} + +template <typename I> +bool MirrorStatusUpdater<I>::exists(const std::string& global_image_id) { + dout(15) << "global_image_id=" << global_image_id << dendl; + + std::unique_lock locker(m_lock); + return (m_global_image_status.count(global_image_id) > 0); +} + +template <typename I> +void MirrorStatusUpdater<I>::set_mirror_image_status( + const std::string& global_image_id, + const cls::rbd::MirrorImageSiteStatus& mirror_image_site_status, + bool immediate_update) { + dout(15) << "global_image_id=" << global_image_id << ", " + << "mirror_image_site_status=" << mirror_image_site_status << dendl; + + std::unique_lock locker(m_lock); + + m_global_image_status[global_image_id] = mirror_image_site_status; + if (immediate_update) { + m_update_global_image_ids.insert(global_image_id); + queue_update_task(std::move(locker)); + } +} + +template <typename I> +void MirrorStatusUpdater<I>::remove_refresh_mirror_image_status( + const std::string& global_image_id, + Context* on_finish) { + if (try_remove_mirror_image_status(global_image_id, false, false, + on_finish)) { + m_threads->work_queue->queue(on_finish, 0); + } +} + +template <typename I> +void MirrorStatusUpdater<I>::remove_mirror_image_status( + const std::string& global_image_id, bool immediate_update, + Context* on_finish) { + if (try_remove_mirror_image_status(global_image_id, true, immediate_update, + on_finish)) { + m_threads->work_queue->queue(on_finish, 0); + } +} + +template <typename I> +bool MirrorStatusUpdater<I>::try_remove_mirror_image_status( + const std::string& global_image_id, bool queue_update, + bool immediate_update, Context* on_finish) { + dout(15) << "global_image_id=" << global_image_id << ", " + << "queue_update=" << queue_update << ", " + << "immediate_update=" << immediate_update << dendl; + + std::unique_lock locker(m_lock); + if ((m_update_in_flight && + m_updating_global_image_ids.count(global_image_id) > 0) || + ((m_update_in_progress || m_update_requested) && + m_update_global_image_ids.count(global_image_id) > 0)) { + // if update is scheduled/in-progress, wait for it to complete + on_finish = new LambdaContext( + [this, global_image_id, queue_update, immediate_update, + on_finish](int r) { + if (try_remove_mirror_image_status(global_image_id, queue_update, + immediate_update, on_finish)) { + on_finish->complete(0); + } + }); + m_update_on_finish_ctxs.push_back(on_finish); + return false; + } + + m_global_image_status.erase(global_image_id); + if (queue_update) { + m_update_global_image_ids.insert(global_image_id); + if (immediate_update) { + queue_update_task(std::move(locker)); + } + } + + return true; +} + +template <typename I> +void MirrorStatusUpdater<I>::schedule_timer_task() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock)); + ceph_assert(m_timer_task == nullptr); + m_timer_task = create_context_callback< + MirrorStatusUpdater<I>, + &MirrorStatusUpdater<I>::handle_timer_task>(this); + m_threads->timer->add_event_after(UPDATE_INTERVAL_SECONDS, m_timer_task); +} + +template <typename I> +void MirrorStatusUpdater<I>::handle_timer_task(int r) { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock)); + ceph_assert(m_timer_task != nullptr); + m_timer_task = nullptr; + schedule_timer_task(); + + std::unique_lock locker(m_lock); + for (auto& pair : m_global_image_status) { + m_update_global_image_ids.insert(pair.first); + } + + queue_update_task(std::move(locker)); +} + +template <typename I> +void MirrorStatusUpdater<I>::queue_update_task( + std::unique_lock<ceph::mutex>&& locker) { + if (!m_initialized) { + return; + } + + if (m_update_in_progress) { + if (m_update_in_flight) { + dout(10) << "deferring update due to in-flight ops" << dendl; + m_update_requested = true; + } + return; + } + + m_update_in_progress = true; + ceph_assert(!m_update_in_flight); + ceph_assert(!m_update_requested); + locker.unlock(); + + dout(10) << dendl; + auto ctx = create_context_callback< + MirrorStatusUpdater<I>, + &MirrorStatusUpdater<I>::update_task>(this); + m_threads->work_queue->queue(ctx); +} + +template <typename I> +void MirrorStatusUpdater<I>::update_task(int r) { + dout(10) << dendl; + + std::unique_lock locker(m_lock); + ceph_assert(m_update_in_progress); + ceph_assert(!m_update_in_flight); + m_update_in_flight = true; + + std::swap(m_updating_global_image_ids, m_update_global_image_ids); + auto updating_global_image_ids = m_updating_global_image_ids; + auto global_image_status = m_global_image_status; + locker.unlock(); + + Context* ctx = create_context_callback< + MirrorStatusUpdater<I>, + &MirrorStatusUpdater<I>::handle_update_task>(this); + if (updating_global_image_ids.empty()) { + ctx->complete(0); + return; + } + + auto gather = new C_Gather(g_ceph_context, ctx); + + auto it = updating_global_image_ids.begin(); + while (it != updating_global_image_ids.end()) { + librados::ObjectWriteOperation op; + uint32_t op_count = 0; + + while (it != updating_global_image_ids.end() && + op_count < MAX_UPDATES_PER_OP) { + auto& global_image_id = *it; + ++it; + + auto status_it = global_image_status.find(global_image_id); + if (status_it == global_image_status.end()) { + librbd::cls_client::mirror_image_status_remove(&op, global_image_id); + ++op_count; + continue; + } + + status_it->second.mirror_uuid = m_local_mirror_uuid; + librbd::cls_client::mirror_image_status_set(&op, global_image_id, + status_it->second); + ++op_count; + } + + auto aio_comp = create_rados_callback(gather->new_sub()); + int r = m_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op); + ceph_assert(r == 0); + aio_comp->release(); + } + + gather->activate(); +} + +template <typename I> +void MirrorStatusUpdater<I>::handle_update_task(int r) { + dout(10) << dendl; + if (r < 0) { + derr << "failed to update mirror image statuses: " << cpp_strerror(r) + << dendl; + } + + std::unique_lock locker(m_lock); + + Contexts on_finish_ctxs; + std::swap(on_finish_ctxs, m_update_on_finish_ctxs); + + ceph_assert(m_update_in_progress); + m_update_in_progress = false; + + ceph_assert(m_update_in_flight); + m_update_in_flight = false; + + m_updating_global_image_ids.clear(); + + if (m_update_requested) { + m_update_requested = false; + queue_update_task(std::move(locker)); + } else { + locker.unlock(); + } + + for (auto on_finish : on_finish_ctxs) { + on_finish->complete(0); + } +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::MirrorStatusUpdater<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/MirrorStatusUpdater.h b/src/tools/rbd_mirror/MirrorStatusUpdater.h new file mode 100644 index 000000000..783b818fc --- /dev/null +++ b/src/tools/rbd_mirror/MirrorStatusUpdater.h @@ -0,0 +1,119 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_MIRROR_STATUS_UPDATER_H +#define CEPH_RBD_MIRROR_MIRROR_STATUS_UPDATER_H + +#include "include/rados/librados.hpp" +#include "common/ceph_mutex.h" +#include "cls/rbd/cls_rbd_types.h" +#include <list> +#include <map> +#include <set> +#include <string> + +struct Context; +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { + +template <typename> struct MirrorStatusWatcher; +template <typename> struct Threads; + +template <typename ImageCtxT = librbd::ImageCtx> +class MirrorStatusUpdater { +public: + + static MirrorStatusUpdater* create(librados::IoCtx& io_ctx, + Threads<ImageCtxT> *threads, + const std::string& local_mirror_uuid) { + return new MirrorStatusUpdater(io_ctx, threads, local_mirror_uuid); + } + + MirrorStatusUpdater(librados::IoCtx& io_ctx, Threads<ImageCtxT> *threads, + const std::string& local_mirror_uuid); + ~MirrorStatusUpdater(); + + void init(Context* on_finish); + void shut_down(Context* on_finish); + + bool exists(const std::string& global_image_id); + void set_mirror_image_status( + const std::string& global_image_id, + const cls::rbd::MirrorImageSiteStatus& mirror_image_site_status, + bool immediate_update); + void remove_mirror_image_status(const std::string& global_image_id, + bool immediate_update, Context* on_finish); + void remove_refresh_mirror_image_status(const std::string& global_image_id, + Context* on_finish); + +private: + /** + * @verbatim + * + * <uninitialized> <----------------------\ + * | (init) ^ (error) | + * v * | + * INIT_STATUS_WATCHER * * * * * | + * | | + * | SHUT_DOWN_STATUS_WATCHER + * | ^ + * | | + * | (shutdown) | + * <initialized> -------------------------/ + * + * @endverbatim + */ + typedef std::list<Context*> Contexts; + typedef std::set<std::string> GlobalImageIds; + typedef std::map<std::string, cls::rbd::MirrorImageSiteStatus> + GlobalImageStatus; + + librados::IoCtx m_io_ctx; + Threads<ImageCtxT>* m_threads; + std::string m_local_mirror_uuid; + + Context* m_timer_task = nullptr; + + ceph::mutex m_lock; + + bool m_initialized = false; + + MirrorStatusWatcher<ImageCtxT>* m_mirror_status_watcher = nullptr; + + GlobalImageIds m_update_global_image_ids; + GlobalImageStatus m_global_image_status; + + bool m_update_in_progress = false; + bool m_update_in_flight = false; + bool m_update_requested = false; + Contexts m_update_on_finish_ctxs; + GlobalImageIds m_updating_global_image_ids; + + bool try_remove_mirror_image_status(const std::string& global_image_id, + bool queue_update, bool immediate_update, + Context* on_finish); + + void init_mirror_status_watcher(Context* on_finish); + void handle_init_mirror_status_watcher(int r, Context* on_finish); + + void shut_down_mirror_status_watcher(Context* on_finish); + void handle_shut_down_mirror_status_watcher(int r, Context* on_finish); + void finalize_shutdown(int r, Context* on_finish); + + void schedule_timer_task(); + void handle_timer_task(int r); + + void queue_update_task(std::unique_lock<ceph::mutex>&& locker); + void update_task(int r); + void handle_update_task(int r); + +}; + +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::MirrorStatusUpdater<librbd::ImageCtx>; + +#endif // CEPH_RBD_MIRROR_MIRROR_STATUS_UPDATER_H diff --git a/src/tools/rbd_mirror/MirrorStatusWatcher.cc b/src/tools/rbd_mirror/MirrorStatusWatcher.cc new file mode 100644 index 000000000..3e1564c5b --- /dev/null +++ b/src/tools/rbd_mirror/MirrorStatusWatcher.cc @@ -0,0 +1,74 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "MirrorStatusWatcher.h" +#include "common/debug.h" +#include "common/errno.h" +#include "cls/rbd/cls_rbd_client.h" +#include "librbd/Utils.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::MirrorStatusWatcher: " \ + << this << " " << __func__ << ": " + +namespace rbd { +namespace mirror { + +using librbd::util::create_rados_callback; + +template <typename I> +MirrorStatusWatcher<I>::MirrorStatusWatcher(librados::IoCtx &io_ctx, + librbd::asio::ContextWQ *work_queue) + : Watcher(io_ctx, work_queue, RBD_MIRRORING) { +} + +template <typename I> +MirrorStatusWatcher<I>::~MirrorStatusWatcher() { +} + +template <typename I> +void MirrorStatusWatcher<I>::init(Context *on_finish) { + dout(20) << dendl; + + on_finish = new LambdaContext( + [this, on_finish] (int r) { + if (r < 0) { + derr << "error removing down statuses: " << cpp_strerror(r) << dendl; + on_finish->complete(r); + return; + } + register_watch(on_finish); + }); + + librados::ObjectWriteOperation op; + librbd::cls_client::mirror_image_status_remove_down(&op); + librados::AioCompletion *aio_comp = create_rados_callback(on_finish); + + int r = m_ioctx.aio_operate(RBD_MIRRORING, aio_comp, &op); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void MirrorStatusWatcher<I>::shut_down(Context *on_finish) { + dout(20) << dendl; + + unregister_watch(on_finish); +} + +template <typename I> +void MirrorStatusWatcher<I>::handle_notify(uint64_t notify_id, uint64_t handle, + uint64_t notifier_id, + bufferlist &bl) { + dout(20) << dendl; + + bufferlist out; + acknowledge_notify(notify_id, handle, out); +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::MirrorStatusWatcher<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/MirrorStatusWatcher.h b/src/tools/rbd_mirror/MirrorStatusWatcher.h new file mode 100644 index 000000000..3335e9e63 --- /dev/null +++ b/src/tools/rbd_mirror/MirrorStatusWatcher.h @@ -0,0 +1,43 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_MIRROR_STATUS_WATCHER_H +#define CEPH_RBD_MIRROR_MIRROR_STATUS_WATCHER_H + +#include "librbd/Watcher.h" + +namespace librbd { +class ImageCtx; +namespace asio { struct ContextWQ; } +} // namespace librbd + +namespace rbd { +namespace mirror { + +template <typename ImageCtxT = librbd::ImageCtx> +class MirrorStatusWatcher : protected librbd::Watcher { +public: + static MirrorStatusWatcher *create(librados::IoCtx &io_ctx, + librbd::asio::ContextWQ *work_queue) { + return new MirrorStatusWatcher(io_ctx, work_queue); + } + void destroy() { + delete this; + } + + MirrorStatusWatcher(librados::IoCtx &io_ctx, + librbd::asio::ContextWQ *work_queue); + ~MirrorStatusWatcher() override; + + void init(Context *on_finish); + void shut_down(Context *on_finish); + +protected: + void handle_notify(uint64_t notify_id, uint64_t handle, + uint64_t notifier_id, bufferlist &bl) override; +}; + +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_MIRROR_STATUS_WATCHER_H diff --git a/src/tools/rbd_mirror/NamespaceReplayer.cc b/src/tools/rbd_mirror/NamespaceReplayer.cc new file mode 100644 index 000000000..d305d8472 --- /dev/null +++ b/src/tools/rbd_mirror/NamespaceReplayer.cc @@ -0,0 +1,862 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "NamespaceReplayer.h" +#include "common/Formatter.h" +#include "common/debug.h" +#include "common/errno.h" +#include "cls/rbd/cls_rbd_client.h" +#include "librbd/Utils.h" +#include "librbd/api/Config.h" +#include "librbd/api/Mirror.h" +#include "librbd/asio/ContextWQ.h" +#include "ServiceDaemon.h" +#include "Threads.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::NamespaceReplayer: " \ + << this << " " << __func__ << ": " + +using librbd::util::create_async_context_callback; +using librbd::util::create_context_callback; + +namespace rbd { +namespace mirror { + +using ::operator<<; + +namespace { + +const std::string SERVICE_DAEMON_LOCAL_COUNT_KEY("image_local_count"); +const std::string SERVICE_DAEMON_REMOTE_COUNT_KEY("image_remote_count"); + +} // anonymous namespace + +template <typename I> +NamespaceReplayer<I>::NamespaceReplayer( + const std::string &name, + librados::IoCtx &local_io_ctx, librados::IoCtx &remote_io_ctx, + const std::string &local_mirror_uuid, + const std::string& local_mirror_peer_uuid, + const RemotePoolMeta& remote_pool_meta, + Threads<I> *threads, + Throttler<I> *image_sync_throttler, + Throttler<I> *image_deletion_throttler, + ServiceDaemon<I> *service_daemon, + journal::CacheManagerHandler *cache_manager_handler, + PoolMetaCache* pool_meta_cache) : + m_namespace_name(name), + m_local_mirror_uuid(local_mirror_uuid), + m_local_mirror_peer_uuid(local_mirror_peer_uuid), + m_remote_pool_meta(remote_pool_meta), + m_threads(threads), m_image_sync_throttler(image_sync_throttler), + m_image_deletion_throttler(image_deletion_throttler), + m_service_daemon(service_daemon), + m_cache_manager_handler(cache_manager_handler), + m_pool_meta_cache(pool_meta_cache), + m_lock(ceph::make_mutex(librbd::util::unique_lock_name( + "rbd::mirror::NamespaceReplayer " + name, this))), + m_local_pool_watcher_listener(this, true), + m_remote_pool_watcher_listener(this, false), + m_image_map_listener(this) { + dout(10) << name << dendl; + + m_local_io_ctx.dup(local_io_ctx); + m_local_io_ctx.set_namespace(name); + m_remote_io_ctx.dup(remote_io_ctx); + m_remote_io_ctx.set_namespace(name); +} + +template <typename I> +bool NamespaceReplayer<I>::is_blocklisted() const { + std::lock_guard locker{m_lock}; + return m_instance_replayer->is_blocklisted() || + (m_local_pool_watcher && + m_local_pool_watcher->is_blocklisted()) || + (m_remote_pool_watcher && + m_remote_pool_watcher->is_blocklisted()); +} + +template <typename I> +void NamespaceReplayer<I>::init(Context *on_finish) { + dout(20) << dendl; + + std::lock_guard locker{m_lock}; + + ceph_assert(m_on_finish == nullptr); + m_on_finish = on_finish; + + init_local_status_updater(); +} + + +template <typename I> +void NamespaceReplayer<I>::shut_down(Context *on_finish) { + dout(20) << dendl; + + { + std::lock_guard locker{m_lock}; + + ceph_assert(m_on_finish == nullptr); + m_on_finish = on_finish; + + if (!m_image_map) { + stop_instance_replayer(); + return; + } + } + + auto ctx = new LambdaContext( + [this] (int r) { + std::lock_guard locker{m_lock}; + stop_instance_replayer(); + }); + handle_release_leader(ctx); +} + +template <typename I> +void NamespaceReplayer<I>::print_status(Formatter *f) +{ + dout(20) << dendl; + + ceph_assert(f); + + std::lock_guard locker{m_lock}; + + m_instance_replayer->print_status(f); + + if (m_image_deleter) { + f->open_object_section("image_deleter"); + m_image_deleter->print_status(f); + f->close_section(); + } +} + +template <typename I> +void NamespaceReplayer<I>::start() +{ + dout(20) << dendl; + + std::lock_guard locker{m_lock}; + + m_instance_replayer->start(); +} + +template <typename I> +void NamespaceReplayer<I>::stop() +{ + dout(20) << dendl; + + std::lock_guard locker{m_lock}; + + m_instance_replayer->stop(); +} + +template <typename I> +void NamespaceReplayer<I>::restart() +{ + dout(20) << dendl; + + std::lock_guard locker{m_lock}; + + m_instance_replayer->restart(); +} + +template <typename I> +void NamespaceReplayer<I>::flush() +{ + dout(20) << dendl; + + std::lock_guard locker{m_lock}; + + m_instance_replayer->flush(); +} + +template <typename I> +void NamespaceReplayer<I>::handle_update(const std::string &mirror_uuid, + ImageIds &&added_image_ids, + ImageIds &&removed_image_ids) { + std::lock_guard locker{m_lock}; + + if (!m_image_map) { + dout(20) << "not leader" << dendl; + return; + } + + dout(10) << "mirror_uuid=" << mirror_uuid << ", " + << "added_count=" << added_image_ids.size() << ", " + << "removed_count=" << removed_image_ids.size() << dendl; + + m_service_daemon->add_or_update_namespace_attribute( + m_local_io_ctx.get_id(), m_local_io_ctx.get_namespace(), + SERVICE_DAEMON_LOCAL_COUNT_KEY, m_local_pool_watcher->get_image_count()); + if (m_remote_pool_watcher) { + m_service_daemon->add_or_update_namespace_attribute( + m_local_io_ctx.get_id(), m_local_io_ctx.get_namespace(), + SERVICE_DAEMON_REMOTE_COUNT_KEY, + m_remote_pool_watcher->get_image_count()); + } + + std::set<std::string> added_global_image_ids; + for (auto& image_id : added_image_ids) { + added_global_image_ids.insert(image_id.global_id); + } + + std::set<std::string> removed_global_image_ids; + for (auto& image_id : removed_image_ids) { + removed_global_image_ids.insert(image_id.global_id); + } + + m_image_map->update_images(mirror_uuid, + std::move(added_global_image_ids), + std::move(removed_global_image_ids)); +} + +template <typename I> +void NamespaceReplayer<I>::handle_acquire_leader(Context *on_finish) { + dout(10) << dendl; + + m_instance_watcher->handle_acquire_leader(); + + init_image_map(on_finish); +} + +template <typename I> +void NamespaceReplayer<I>::handle_release_leader(Context *on_finish) { + dout(10) << dendl; + + m_instance_watcher->handle_release_leader(); + shut_down_image_deleter(on_finish); +} + +template <typename I> +void NamespaceReplayer<I>::handle_update_leader( + const std::string &leader_instance_id) { + dout(10) << "leader_instance_id=" << leader_instance_id << dendl; + + m_instance_watcher->handle_update_leader(leader_instance_id); +} + +template <typename I> +void NamespaceReplayer<I>::handle_instances_added( + const std::vector<std::string> &instance_ids) { + dout(10) << "instance_ids=" << instance_ids << dendl; + + std::lock_guard locker{m_lock}; + + if (!m_image_map) { + return; + } + + m_image_map->update_instances_added(instance_ids); +} + +template <typename I> +void NamespaceReplayer<I>::handle_instances_removed( + const std::vector<std::string> &instance_ids) { + dout(10) << "instance_ids=" << instance_ids << dendl; + + std::lock_guard locker{m_lock}; + + if (!m_image_map) { + return; + } + + m_image_map->update_instances_removed(instance_ids); +} + +template <typename I> +void NamespaceReplayer<I>::init_local_status_updater() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + ceph_assert(!m_local_status_updater); + + m_local_status_updater.reset(MirrorStatusUpdater<I>::create( + m_local_io_ctx, m_threads, "")); + auto ctx = create_context_callback< + NamespaceReplayer<I>, + &NamespaceReplayer<I>::handle_init_local_status_updater>(this); + + m_local_status_updater->init(ctx); +} + +template <typename I> +void NamespaceReplayer<I>::handle_init_local_status_updater(int r) { + dout(10) << "r=" << r << dendl; + + std::lock_guard locker{m_lock}; + + if (r < 0) { + derr << "error initializing local mirror status updater: " + << cpp_strerror(r) << dendl; + + m_local_status_updater.reset(); + ceph_assert(m_on_finish != nullptr); + m_threads->work_queue->queue(m_on_finish, r); + m_on_finish = nullptr; + return; + } + + init_remote_status_updater(); +} + +template <typename I> +void NamespaceReplayer<I>::init_remote_status_updater() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + ceph_assert(!m_remote_status_updater); + + m_remote_status_updater.reset(MirrorStatusUpdater<I>::create( + m_remote_io_ctx, m_threads, m_local_mirror_uuid)); + auto ctx = create_context_callback< + NamespaceReplayer<I>, + &NamespaceReplayer<I>::handle_init_remote_status_updater>(this); + m_remote_status_updater->init(ctx); +} + +template <typename I> +void NamespaceReplayer<I>::handle_init_remote_status_updater(int r) { + dout(10) << "r=" << r << dendl; + + std::lock_guard locker{m_lock}; + + if (r < 0) { + derr << "error initializing remote mirror status updater: " + << cpp_strerror(r) << dendl; + + m_remote_status_updater.reset(); + m_ret_val = r; + shut_down_local_status_updater(); + return; + } + + init_instance_replayer(); +} + +template <typename I> +void NamespaceReplayer<I>::init_instance_replayer() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + ceph_assert(!m_instance_replayer); + + m_instance_replayer.reset(InstanceReplayer<I>::create( + m_local_io_ctx, m_local_mirror_uuid, m_threads, m_service_daemon, + m_local_status_updater.get(), m_cache_manager_handler, + m_pool_meta_cache)); + auto ctx = create_context_callback<NamespaceReplayer<I>, + &NamespaceReplayer<I>::handle_init_instance_replayer>(this); + + m_instance_replayer->init(ctx); +} + +template <typename I> +void NamespaceReplayer<I>::handle_init_instance_replayer(int r) { + dout(10) << "r=" << r << dendl; + + std::lock_guard locker{m_lock}; + + if (r < 0) { + derr << "error initializing instance replayer: " << cpp_strerror(r) + << dendl; + + m_instance_replayer.reset(); + m_ret_val = r; + shut_down_remote_status_updater(); + return; + } + + m_instance_replayer->add_peer({m_local_mirror_peer_uuid, m_remote_io_ctx, + m_remote_pool_meta, + m_remote_status_updater.get()}); + + init_instance_watcher(); +} + +template <typename I> +void NamespaceReplayer<I>::init_instance_watcher() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + ceph_assert(!m_instance_watcher); + + m_instance_watcher.reset(InstanceWatcher<I>::create( + m_local_io_ctx, *m_threads->asio_engine, m_instance_replayer.get(), + m_image_sync_throttler)); + auto ctx = create_context_callback<NamespaceReplayer<I>, + &NamespaceReplayer<I>::handle_init_instance_watcher>(this); + + m_instance_watcher->init(ctx); +} + +template <typename I> +void NamespaceReplayer<I>::handle_init_instance_watcher(int r) { + dout(10) << "r=" << r << dendl; + + std::lock_guard locker{m_lock}; + + if (r < 0) { + derr << "error initializing instance watcher: " << cpp_strerror(r) + << dendl; + + m_instance_watcher.reset(); + m_ret_val = r; + shut_down_instance_replayer(); + return; + } + + ceph_assert(m_on_finish != nullptr); + m_threads->work_queue->queue(m_on_finish); + m_on_finish = nullptr; +} + +template <typename I> +void NamespaceReplayer<I>::stop_instance_replayer() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + + Context *ctx = create_async_context_callback( + m_threads->work_queue, create_context_callback<NamespaceReplayer<I>, + &NamespaceReplayer<I>::handle_stop_instance_replayer>(this)); + + m_instance_replayer->stop(ctx); +} + +template <typename I> +void NamespaceReplayer<I>::handle_stop_instance_replayer(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "error stopping instance replayer: " << cpp_strerror(r) << dendl; + } + + std::lock_guard locker{m_lock}; + + shut_down_instance_watcher(); +} + +template <typename I> +void NamespaceReplayer<I>::shut_down_instance_watcher() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + ceph_assert(m_instance_watcher); + + Context *ctx = create_async_context_callback( + m_threads->work_queue, create_context_callback<NamespaceReplayer<I>, + &NamespaceReplayer<I>::handle_shut_down_instance_watcher>(this)); + + m_instance_watcher->shut_down(ctx); +} + +template <typename I> +void NamespaceReplayer<I>::handle_shut_down_instance_watcher(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "error shutting instance watcher down: " << cpp_strerror(r) + << dendl; + } + + std::lock_guard locker{m_lock}; + + m_instance_watcher.reset(); + + shut_down_instance_replayer(); +} + +template <typename I> +void NamespaceReplayer<I>::shut_down_instance_replayer() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + ceph_assert(m_instance_replayer); + + Context *ctx = create_async_context_callback( + m_threads->work_queue, create_context_callback<NamespaceReplayer<I>, + &NamespaceReplayer<I>::handle_shut_down_instance_replayer>(this)); + + m_instance_replayer->shut_down(ctx); +} + +template <typename I> +void NamespaceReplayer<I>::handle_shut_down_instance_replayer(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "error shutting instance replayer down: " << cpp_strerror(r) + << dendl; + } + + std::lock_guard locker{m_lock}; + + m_instance_replayer.reset(); + + shut_down_remote_status_updater(); +} + +template <typename I> +void NamespaceReplayer<I>::shut_down_remote_status_updater() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + ceph_assert(m_remote_status_updater); + + auto ctx = create_async_context_callback( + m_threads->work_queue, create_context_callback< + NamespaceReplayer<I>, + &NamespaceReplayer<I>::handle_shut_down_remote_status_updater>(this)); + m_remote_status_updater->shut_down(ctx); +} + +template <typename I> +void NamespaceReplayer<I>::handle_shut_down_remote_status_updater(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "error shutting remote mirror status updater down: " + << cpp_strerror(r) << dendl; + } + + std::lock_guard locker{m_lock}; + m_remote_status_updater.reset(); + + shut_down_local_status_updater(); +} + +template <typename I> +void NamespaceReplayer<I>::shut_down_local_status_updater() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + ceph_assert(m_local_status_updater); + + auto ctx = create_async_context_callback( + m_threads->work_queue, create_context_callback< + NamespaceReplayer<I>, + &NamespaceReplayer<I>::handle_shut_down_local_status_updater>(this)); + + m_local_status_updater->shut_down(ctx); +} + +template <typename I> +void NamespaceReplayer<I>::handle_shut_down_local_status_updater(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "error shutting local mirror status updater down: " + << cpp_strerror(r) << dendl; + } + + std::lock_guard locker{m_lock}; + + m_local_status_updater.reset(); + + ceph_assert(!m_image_map); + ceph_assert(!m_image_deleter); + ceph_assert(!m_local_pool_watcher); + ceph_assert(!m_remote_pool_watcher); + ceph_assert(!m_instance_watcher); + ceph_assert(!m_instance_replayer); + + ceph_assert(m_on_finish != nullptr); + m_threads->work_queue->queue(m_on_finish, m_ret_val); + m_on_finish = nullptr; + m_ret_val = 0; +} + +template <typename I> +void NamespaceReplayer<I>::init_image_map(Context *on_finish) { + dout(10) << dendl; + + auto image_map = ImageMap<I>::create(m_local_io_ctx, m_threads, + m_instance_watcher->get_instance_id(), + m_image_map_listener); + + auto ctx = new LambdaContext( + [this, image_map, on_finish](int r) { + handle_init_image_map(r, image_map, on_finish); + }); + image_map->init(create_async_context_callback( + m_threads->work_queue, ctx)); +} + +template <typename I> +void NamespaceReplayer<I>::handle_init_image_map(int r, ImageMap<I> *image_map, + Context *on_finish) { + dout(10) << "r=" << r << dendl; + if (r < 0) { + derr << "failed to init image map: " << cpp_strerror(r) << dendl; + on_finish = new LambdaContext([image_map, on_finish, r](int) { + delete image_map; + on_finish->complete(r); + }); + image_map->shut_down(on_finish); + return; + } + + ceph_assert(!m_image_map); + m_image_map.reset(image_map); + + init_local_pool_watcher(on_finish); +} + +template <typename I> +void NamespaceReplayer<I>::init_local_pool_watcher(Context *on_finish) { + dout(10) << dendl; + + std::lock_guard locker{m_lock}; + ceph_assert(!m_local_pool_watcher); + m_local_pool_watcher.reset(PoolWatcher<I>::create( + m_threads, m_local_io_ctx, m_local_mirror_uuid, + m_local_pool_watcher_listener)); + + // ensure the initial set of local images is up-to-date + // after acquiring the leader role + auto ctx = new LambdaContext([this, on_finish](int r) { + handle_init_local_pool_watcher(r, on_finish); + }); + m_local_pool_watcher->init(create_async_context_callback( + m_threads->work_queue, ctx)); +} + +template <typename I> +void NamespaceReplayer<I>::handle_init_local_pool_watcher( + int r, Context *on_finish) { + dout(10) << "r=" << r << dendl; + if (r < 0) { + derr << "failed to retrieve local images: " << cpp_strerror(r) << dendl; + on_finish = new LambdaContext([on_finish, r](int) { + on_finish->complete(r); + }); + shut_down_pool_watchers(on_finish); + return; + } + + init_remote_pool_watcher(on_finish); +} + +template <typename I> +void NamespaceReplayer<I>::init_remote_pool_watcher(Context *on_finish) { + dout(10) << dendl; + + std::lock_guard locker{m_lock}; + ceph_assert(!m_remote_pool_watcher); + m_remote_pool_watcher.reset(PoolWatcher<I>::create( + m_threads, m_remote_io_ctx, m_remote_pool_meta.mirror_uuid, + m_remote_pool_watcher_listener)); + + auto ctx = new LambdaContext([this, on_finish](int r) { + handle_init_remote_pool_watcher(r, on_finish); + }); + m_remote_pool_watcher->init(create_async_context_callback( + m_threads->work_queue, ctx)); +} + +template <typename I> +void NamespaceReplayer<I>::handle_init_remote_pool_watcher( + int r, Context *on_finish) { + dout(10) << "r=" << r << dendl; + if (r == -ENOENT) { + // Technically nothing to do since the other side doesn't + // have mirroring enabled. Eventually the remote pool watcher will + // detect images (if mirroring is enabled), so no point propagating + // an error which would just busy-spin the state machines. + dout(0) << "remote peer does not have mirroring configured" << dendl; + } else if (r < 0) { + derr << "failed to retrieve remote images: " << cpp_strerror(r) << dendl; + on_finish = new LambdaContext([on_finish, r](int) { + on_finish->complete(r); + }); + shut_down_pool_watchers(on_finish); + return; + } + + init_image_deleter(on_finish); +} + +template <typename I> +void NamespaceReplayer<I>::init_image_deleter(Context *on_finish) { + dout(10) << dendl; + + std::lock_guard locker{m_lock}; + ceph_assert(!m_image_deleter); + + on_finish = new LambdaContext([this, on_finish](int r) { + handle_init_image_deleter(r, on_finish); + }); + m_image_deleter.reset(ImageDeleter<I>::create(m_local_io_ctx, m_threads, + m_image_deletion_throttler, + m_service_daemon)); + m_image_deleter->init(create_async_context_callback( + m_threads->work_queue, on_finish)); +} + +template <typename I> +void NamespaceReplayer<I>::handle_init_image_deleter( + int r, Context *on_finish) { + dout(10) << "r=" << r << dendl; + if (r < 0) { + derr << "failed to init image deleter: " << cpp_strerror(r) << dendl; + on_finish = new LambdaContext([on_finish, r](int) { + on_finish->complete(r); + }); + shut_down_image_deleter(on_finish); + return; + } + + on_finish->complete(0); +} + +template <typename I> +void NamespaceReplayer<I>::shut_down_image_deleter(Context* on_finish) { + dout(10) << dendl; + { + std::lock_guard locker{m_lock}; + if (m_image_deleter) { + Context *ctx = new LambdaContext([this, on_finish](int r) { + handle_shut_down_image_deleter(r, on_finish); + }); + ctx = create_async_context_callback(m_threads->work_queue, ctx); + + m_image_deleter->shut_down(ctx); + return; + } + } + shut_down_pool_watchers(on_finish); +} + +template <typename I> +void NamespaceReplayer<I>::handle_shut_down_image_deleter( + int r, Context* on_finish) { + dout(10) << "r=" << r << dendl; + + { + std::lock_guard locker{m_lock}; + ceph_assert(m_image_deleter); + m_image_deleter.reset(); + } + + shut_down_pool_watchers(on_finish); +} + +template <typename I> +void NamespaceReplayer<I>::shut_down_pool_watchers(Context *on_finish) { + dout(10) << dendl; + + { + std::lock_guard locker{m_lock}; + if (m_local_pool_watcher) { + Context *ctx = new LambdaContext([this, on_finish](int r) { + handle_shut_down_pool_watchers(r, on_finish); + }); + ctx = create_async_context_callback(m_threads->work_queue, ctx); + + auto gather_ctx = new C_Gather(g_ceph_context, ctx); + m_local_pool_watcher->shut_down(gather_ctx->new_sub()); + if (m_remote_pool_watcher) { + m_remote_pool_watcher->shut_down(gather_ctx->new_sub()); + } + gather_ctx->activate(); + return; + } + } + + on_finish->complete(0); +} + +template <typename I> +void NamespaceReplayer<I>::handle_shut_down_pool_watchers( + int r, Context *on_finish) { + dout(10) << "r=" << r << dendl; + + { + std::lock_guard locker{m_lock}; + ceph_assert(m_local_pool_watcher); + m_local_pool_watcher.reset(); + + if (m_remote_pool_watcher) { + m_remote_pool_watcher.reset(); + } + } + shut_down_image_map(on_finish); +} + +template <typename I> +void NamespaceReplayer<I>::shut_down_image_map(Context *on_finish) { + dout(5) << dendl; + + std::lock_guard locker{m_lock}; + if (m_image_map) { + on_finish = new LambdaContext( + [this, on_finish](int r) { + handle_shut_down_image_map(r, on_finish); + }); + m_image_map->shut_down(create_async_context_callback( + m_threads->work_queue, on_finish)); + return; + } + + m_threads->work_queue->queue(on_finish); +} + +template <typename I> +void NamespaceReplayer<I>::handle_shut_down_image_map(int r, Context *on_finish) { + dout(5) << "r=" << r << dendl; + if (r < 0 && r != -EBLOCKLISTED) { + derr << "failed to shut down image map: " << cpp_strerror(r) << dendl; + } + + std::lock_guard locker{m_lock}; + ceph_assert(m_image_map); + m_image_map.reset(); + + m_instance_replayer->release_all(create_async_context_callback( + m_threads->work_queue, on_finish)); +} + +template <typename I> +void NamespaceReplayer<I>::handle_acquire_image(const std::string &global_image_id, + const std::string &instance_id, + Context* on_finish) { + dout(5) << "global_image_id=" << global_image_id << ", " + << "instance_id=" << instance_id << dendl; + + m_instance_watcher->notify_image_acquire(instance_id, global_image_id, + on_finish); +} + +template <typename I> +void NamespaceReplayer<I>::handle_release_image(const std::string &global_image_id, + const std::string &instance_id, + Context* on_finish) { + dout(5) << "global_image_id=" << global_image_id << ", " + << "instance_id=" << instance_id << dendl; + + m_instance_watcher->notify_image_release(instance_id, global_image_id, + on_finish); +} + +template <typename I> +void NamespaceReplayer<I>::handle_remove_image(const std::string &mirror_uuid, + const std::string &global_image_id, + const std::string &instance_id, + Context* on_finish) { + ceph_assert(!mirror_uuid.empty()); + dout(5) << "mirror_uuid=" << mirror_uuid << ", " + << "global_image_id=" << global_image_id << ", " + << "instance_id=" << instance_id << dendl; + + m_instance_watcher->notify_peer_image_removed(instance_id, global_image_id, + mirror_uuid, on_finish); +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::NamespaceReplayer<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/NamespaceReplayer.h b/src/tools/rbd_mirror/NamespaceReplayer.h new file mode 100644 index 000000000..e304b8253 --- /dev/null +++ b/src/tools/rbd_mirror/NamespaceReplayer.h @@ -0,0 +1,308 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_NAMESPACE_REPLAYER_H +#define CEPH_RBD_MIRROR_NAMESPACE_REPLAYER_H + +#include "common/AsyncOpTracker.h" +#include "common/ceph_mutex.h" +#include "include/rados/librados.hpp" + +#include "tools/rbd_mirror/ImageDeleter.h" +#include "tools/rbd_mirror/ImageMap.h" +#include "tools/rbd_mirror/InstanceReplayer.h" +#include "tools/rbd_mirror/InstanceWatcher.h" +#include "tools/rbd_mirror/MirrorStatusUpdater.h" +#include "tools/rbd_mirror/PoolWatcher.h" +#include "tools/rbd_mirror/Types.h" +#include "tools/rbd_mirror/image_map/Types.h" +#include "tools/rbd_mirror/pool_watcher/Types.h" + +#include <memory> +#include <string> +#include <vector> + +class AdminSocketHook; + +namespace journal { struct CacheManagerHandler; } + +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { + +struct PoolMetaCache; +template <typename> class ServiceDaemon; +template <typename> class Throttler; +template <typename> struct Threads; + +/** + * Controls mirroring for a single remote cluster. + */ +template <typename ImageCtxT = librbd::ImageCtx> +class NamespaceReplayer { +public: + static NamespaceReplayer *create( + const std::string &name, + librados::IoCtx &local_ioctx, + librados::IoCtx &remote_ioctx, + const std::string &local_mirror_uuid, + const std::string &local_mirror_peer_uuid, + const RemotePoolMeta& remote_pool_meta, + Threads<ImageCtxT> *threads, + Throttler<ImageCtxT> *image_sync_throttler, + Throttler<ImageCtxT> *image_deletion_throttler, + ServiceDaemon<ImageCtxT> *service_daemon, + journal::CacheManagerHandler *cache_manager_handler, + PoolMetaCache* pool_meta_cache) { + return new NamespaceReplayer(name, local_ioctx, remote_ioctx, + local_mirror_uuid, local_mirror_peer_uuid, + remote_pool_meta, threads, + image_sync_throttler, image_deletion_throttler, + service_daemon, cache_manager_handler, + pool_meta_cache); + } + + NamespaceReplayer(const std::string &name, + librados::IoCtx &local_ioctx, + librados::IoCtx &remote_ioctx, + const std::string &local_mirror_uuid, + const std::string& local_mirror_peer_uuid, + const RemotePoolMeta& remote_pool_meta, + Threads<ImageCtxT> *threads, + Throttler<ImageCtxT> *image_sync_throttler, + Throttler<ImageCtxT> *image_deletion_throttler, + ServiceDaemon<ImageCtxT> *service_daemon, + journal::CacheManagerHandler *cache_manager_handler, + PoolMetaCache* pool_meta_cache); + NamespaceReplayer(const NamespaceReplayer&) = delete; + NamespaceReplayer& operator=(const NamespaceReplayer&) = delete; + + bool is_blocklisted() const; + + void init(Context *on_finish); + void shut_down(Context *on_finish); + + void handle_acquire_leader(Context *on_finish); + void handle_release_leader(Context *on_finish); + void handle_update_leader(const std::string &leader_instance_id); + void handle_instances_added(const std::vector<std::string> &instance_ids); + void handle_instances_removed(const std::vector<std::string> &instance_ids); + + void print_status(Formatter *f); + void start(); + void stop(); + void restart(); + void flush(); + +private: + /** + * @verbatim + * + * <uninitialized> <------------------------------------\ + * | (init) ^ (error) | + * v * | + * INIT_LOCAL_STATUS_UPDATER * * * * * * * * > SHUT_DOWN_LOCAL_STATUS_UPDATER + * | * (error) ^ + * v * | + * INIT_REMOTE_STATUS_UPDATER * * * * * * * > SHUT_DOWN_REMOTE_STATUS_UPDATER + * | * (error) ^ + * v * | + * INIT_INSTANCE_REPLAYER * * * * * * * * * > SHUT_DOWN_INSTANCE_REPLAYER + * | * ^ + * v * | + * INIT_INSTANCE_WATCHER * * * * * * * * * * SHUT_DOWN_INSTANCE_WATCHER + * | (error) ^ + * | | + * v STOP_INSTANCE_REPLAYER + * | ^ + * | (shut down) | + * | /----------------------------------------------/ + * v | + * <follower> <---------------------------\ + * . | + * . | + * v (leader acquired) | + * INIT_IMAGE_MAP | + * | | + * v | + * INIT_LOCAL_POOL_WATCHER SHUT_DOWN_IMAGE_MAP + * | ^ + * v | + * INIT_REMOTE_POOL_WATCHER SHUT_DOWN_POOL_WATCHERS + * | ^ + * v | + * INIT_IMAGE_DELETER SHUT_DOWN_IMAGE_DELETER + * | ^ + * v . + * <leader> <-----------\ . + * . | . + * . (image update) | . + * . . > NOTIFY_INSTANCE_WATCHER . + * . . + * . (leader lost / shut down) . + * . . . . . . . . . . . . . . . . . . . + * + * @endverbatim + */ + + struct PoolWatcherListener : public pool_watcher::Listener { + NamespaceReplayer *namespace_replayer; + bool local; + + PoolWatcherListener(NamespaceReplayer *namespace_replayer, bool local) + : namespace_replayer(namespace_replayer), local(local) { + } + + void handle_update(const std::string &mirror_uuid, + ImageIds &&added_image_ids, + ImageIds &&removed_image_ids) override { + namespace_replayer->handle_update((local ? "" : mirror_uuid), + std::move(added_image_ids), + std::move(removed_image_ids)); + } + }; + + struct ImageMapListener : public image_map::Listener { + NamespaceReplayer *namespace_replayer; + + ImageMapListener(NamespaceReplayer *namespace_replayer) + : namespace_replayer(namespace_replayer) { + } + + void acquire_image(const std::string &global_image_id, + const std::string &instance_id, + Context* on_finish) override { + namespace_replayer->handle_acquire_image(global_image_id, instance_id, + on_finish); + } + + void release_image(const std::string &global_image_id, + const std::string &instance_id, + Context* on_finish) override { + namespace_replayer->handle_release_image(global_image_id, instance_id, + on_finish); + } + + void remove_image(const std::string &mirror_uuid, + const std::string &global_image_id, + const std::string &instance_id, + Context* on_finish) override { + namespace_replayer->handle_remove_image(mirror_uuid, global_image_id, + instance_id, on_finish); + } + }; + + void handle_update(const std::string &mirror_uuid, + ImageIds &&added_image_ids, + ImageIds &&removed_image_ids); + + int init_rados(const std::string &cluster_name, + const std::string &client_name, + const std::string &mon_host, + const std::string &key, + const std::string &description, RadosRef *rados_ref, + bool strip_cluster_overrides); + + void init_local_status_updater(); + void handle_init_local_status_updater(int r); + + void init_remote_status_updater(); + void handle_init_remote_status_updater(int r); + + void init_instance_replayer(); + void handle_init_instance_replayer(int r); + + void init_instance_watcher(); + void handle_init_instance_watcher(int r); + + void stop_instance_replayer(); + void handle_stop_instance_replayer(int r); + + void shut_down_instance_watcher(); + void handle_shut_down_instance_watcher(int r); + + void shut_down_instance_replayer(); + void handle_shut_down_instance_replayer(int r); + + void shut_down_remote_status_updater(); + void handle_shut_down_remote_status_updater(int r); + + void shut_down_local_status_updater(); + void handle_shut_down_local_status_updater(int r); + + void init_image_map(Context *on_finish); + void handle_init_image_map(int r, ImageMap<ImageCtxT> *image_map, + Context *on_finish); + + void init_local_pool_watcher(Context *on_finish); + void handle_init_local_pool_watcher(int r, Context *on_finish); + + void init_remote_pool_watcher(Context *on_finish); + void handle_init_remote_pool_watcher(int r, Context *on_finish); + + void init_image_deleter(Context* on_finish); + void handle_init_image_deleter(int r, Context* on_finish); + + void shut_down_image_deleter(Context* on_finish); + void handle_shut_down_image_deleter(int r, Context* on_finish); + + void shut_down_pool_watchers(Context *on_finish); + void handle_shut_down_pool_watchers(int r, Context *on_finish); + + void shut_down_image_map(Context *on_finish); + void handle_shut_down_image_map(int r, Context *on_finish); + + void handle_acquire_image(const std::string &global_image_id, + const std::string &instance_id, + Context* on_finish); + void handle_release_image(const std::string &global_image_id, + const std::string &instance_id, + Context* on_finish); + void handle_remove_image(const std::string &mirror_uuid, + const std::string &global_image_id, + const std::string &instance_id, + Context* on_finish); + + std::string m_namespace_name; + librados::IoCtx m_local_io_ctx; + librados::IoCtx m_remote_io_ctx; + std::string m_local_mirror_uuid; + std::string m_local_mirror_peer_uuid; + RemotePoolMeta m_remote_pool_meta; + Threads<ImageCtxT> *m_threads; + Throttler<ImageCtxT> *m_image_sync_throttler; + Throttler<ImageCtxT> *m_image_deletion_throttler; + ServiceDaemon<ImageCtxT> *m_service_daemon; + journal::CacheManagerHandler *m_cache_manager_handler; + PoolMetaCache* m_pool_meta_cache; + + mutable ceph::mutex m_lock; + + int m_ret_val = 0; + Context *m_on_finish = nullptr; + + std::unique_ptr<MirrorStatusUpdater<ImageCtxT>> m_local_status_updater; + std::unique_ptr<MirrorStatusUpdater<ImageCtxT>> m_remote_status_updater; + + PoolWatcherListener m_local_pool_watcher_listener; + std::unique_ptr<PoolWatcher<ImageCtxT>> m_local_pool_watcher; + + PoolWatcherListener m_remote_pool_watcher_listener; + std::unique_ptr<PoolWatcher<ImageCtxT>> m_remote_pool_watcher; + + std::unique_ptr<InstanceReplayer<ImageCtxT>> m_instance_replayer; + std::unique_ptr<ImageDeleter<ImageCtxT>> m_image_deleter; + + ImageMapListener m_image_map_listener; + std::unique_ptr<ImageMap<ImageCtxT>> m_image_map; + + std::unique_ptr<InstanceWatcher<ImageCtxT>> m_instance_watcher; +}; + +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::NamespaceReplayer<librbd::ImageCtx>; + +#endif // CEPH_RBD_MIRROR_NAMESPACE_REPLAYER_H diff --git a/src/tools/rbd_mirror/PoolMetaCache.cc b/src/tools/rbd_mirror/PoolMetaCache.cc new file mode 100644 index 000000000..261802a55 --- /dev/null +++ b/src/tools/rbd_mirror/PoolMetaCache.cc @@ -0,0 +1,83 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "common/debug.h" +#include "common/dout.h" +#include "tools/rbd_mirror/PoolMetaCache.h" +#include <shared_mutex> + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::PoolMetaCache: " \ + << this << " " << __func__ << ": " + +namespace rbd { +namespace mirror { + +int PoolMetaCache::get_local_pool_meta( + int64_t pool_id, + LocalPoolMeta* local_pool_meta) const { + dout(15) << "pool_id=" << pool_id << dendl; + + std::shared_lock locker{m_lock}; + auto it = m_local_pool_metas.find(pool_id); + if (it == m_local_pool_metas.end()) { + return -ENOENT; + } + + *local_pool_meta = it->second; + return 0; +} + +void PoolMetaCache::set_local_pool_meta( + int64_t pool_id, + const LocalPoolMeta& local_pool_meta) { + dout(15) << "pool_id=" << pool_id << ", " + << "local_pool_meta=" << local_pool_meta << dendl; + + std::unique_lock locker(m_lock); + m_local_pool_metas[pool_id] = local_pool_meta; +} + +void PoolMetaCache::remove_local_pool_meta(int64_t pool_id) { + dout(15) << "pool_id=" << pool_id << dendl; + + std::unique_lock locker(m_lock); + m_local_pool_metas.erase(pool_id); +} + +int PoolMetaCache::get_remote_pool_meta( + int64_t pool_id, + RemotePoolMeta* remote_pool_meta) const { + dout(15) << "pool_id=" << pool_id << dendl; + + std::shared_lock locker{m_lock}; + auto it = m_remote_pool_metas.find(pool_id); + if (it == m_remote_pool_metas.end()) { + return -ENOENT; + } + + *remote_pool_meta = it->second; + return 0; +} + +void PoolMetaCache::set_remote_pool_meta( + int64_t pool_id, + const RemotePoolMeta& remote_pool_meta) { + dout(15) << "pool_id=" << pool_id << ", " + << "remote_pool_meta=" << remote_pool_meta << dendl; + + std::unique_lock locker(m_lock); + m_remote_pool_metas[pool_id] = remote_pool_meta; +} + +void PoolMetaCache::remove_remote_pool_meta(int64_t pool_id) { + dout(15) << "pool_id=" << pool_id << dendl; + + std::unique_lock locker(m_lock); + m_remote_pool_metas.erase(pool_id); +} + +} // namespace mirror +} // namespace rbd diff --git a/src/tools/rbd_mirror/PoolMetaCache.h b/src/tools/rbd_mirror/PoolMetaCache.h new file mode 100644 index 000000000..f0440120f --- /dev/null +++ b/src/tools/rbd_mirror/PoolMetaCache.h @@ -0,0 +1,47 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_POOL_META_CACHE_H +#define CEPH_RBD_MIRROR_POOL_META_CACHE_H + +#include "include/int_types.h" +#include "common/ceph_mutex.h" +#include "tools/rbd_mirror/Types.h" +#include <map> + +namespace rbd { +namespace mirror { + +class PoolMetaCache { +public: + PoolMetaCache(CephContext* cct) + : m_cct(cct) { + } + PoolMetaCache(const PoolMetaCache&) = delete; + PoolMetaCache& operator=(const PoolMetaCache&) = delete; + + int get_local_pool_meta(int64_t pool_id, + LocalPoolMeta* local_pool_meta) const; + void set_local_pool_meta(int64_t pool_id, + const LocalPoolMeta& local_pool_meta); + void remove_local_pool_meta(int64_t pool_id); + + int get_remote_pool_meta(int64_t pool_id, + RemotePoolMeta* remote_pool_meta) const; + void set_remote_pool_meta(int64_t pool_id, + const RemotePoolMeta& remote_pool_meta); + void remove_remote_pool_meta(int64_t pool_id); + +private: + CephContext* m_cct; + + mutable ceph::shared_mutex m_lock = + ceph::make_shared_mutex("rbd::mirror::PoolMetaCache::m_lock"); + std::map<int64_t, LocalPoolMeta> m_local_pool_metas; + std::map<int64_t, RemotePoolMeta> m_remote_pool_metas; +}; + +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_POOL_META_CACHE_H diff --git a/src/tools/rbd_mirror/PoolReplayer.cc b/src/tools/rbd_mirror/PoolReplayer.cc new file mode 100644 index 000000000..8a04219da --- /dev/null +++ b/src/tools/rbd_mirror/PoolReplayer.cc @@ -0,0 +1,1110 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "PoolReplayer.h" +#include "common/Cond.h" +#include "common/Formatter.h" +#include "common/admin_socket.h" +#include "common/ceph_argparse.h" +#include "common/code_environment.h" +#include "common/common_init.h" +#include "common/debug.h" +#include "common/errno.h" +#include "cls/rbd/cls_rbd_client.h" +#include "global/global_context.h" +#include "librbd/api/Config.h" +#include "librbd/api/Namespace.h" +#include "PoolMetaCache.h" +#include "RemotePoolPoller.h" +#include "ServiceDaemon.h" +#include "Threads.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::PoolReplayer: " \ + << this << " " << __func__ << ": " + +namespace rbd { +namespace mirror { + +using ::operator<<; + +namespace { + +const std::string SERVICE_DAEMON_INSTANCE_ID_KEY("instance_id"); +const std::string SERVICE_DAEMON_LEADER_KEY("leader"); + +const std::vector<std::string> UNIQUE_PEER_CONFIG_KEYS { + {"monmap", "mon_host", "mon_dns_srv_name", "key", "keyfile", "keyring"}}; + +template <typename I> +class PoolReplayerAdminSocketCommand { +public: + PoolReplayerAdminSocketCommand(PoolReplayer<I> *pool_replayer) + : pool_replayer(pool_replayer) { + } + virtual ~PoolReplayerAdminSocketCommand() {} + virtual int call(Formatter *f) = 0; +protected: + PoolReplayer<I> *pool_replayer; +}; + +template <typename I> +class StatusCommand : public PoolReplayerAdminSocketCommand<I> { +public: + explicit StatusCommand(PoolReplayer<I> *pool_replayer) + : PoolReplayerAdminSocketCommand<I>(pool_replayer) { + } + + int call(Formatter *f) override { + this->pool_replayer->print_status(f); + return 0; + } +}; + +template <typename I> +class StartCommand : public PoolReplayerAdminSocketCommand<I> { +public: + explicit StartCommand(PoolReplayer<I> *pool_replayer) + : PoolReplayerAdminSocketCommand<I>(pool_replayer) { + } + + int call(Formatter *f) override { + this->pool_replayer->start(); + return 0; + } +}; + +template <typename I> +class StopCommand : public PoolReplayerAdminSocketCommand<I> { +public: + explicit StopCommand(PoolReplayer<I> *pool_replayer) + : PoolReplayerAdminSocketCommand<I>(pool_replayer) { + } + + int call(Formatter *f) override { + this->pool_replayer->stop(true); + return 0; + } +}; + +template <typename I> +class RestartCommand : public PoolReplayerAdminSocketCommand<I> { +public: + explicit RestartCommand(PoolReplayer<I> *pool_replayer) + : PoolReplayerAdminSocketCommand<I>(pool_replayer) { + } + + int call(Formatter *f) override { + this->pool_replayer->restart(); + return 0; + } +}; + +template <typename I> +class FlushCommand : public PoolReplayerAdminSocketCommand<I> { +public: + explicit FlushCommand(PoolReplayer<I> *pool_replayer) + : PoolReplayerAdminSocketCommand<I>(pool_replayer) { + } + + int call(Formatter *f) override { + this->pool_replayer->flush(); + return 0; + } +}; + +template <typename I> +class LeaderReleaseCommand : public PoolReplayerAdminSocketCommand<I> { +public: + explicit LeaderReleaseCommand(PoolReplayer<I> *pool_replayer) + : PoolReplayerAdminSocketCommand<I>(pool_replayer) { + } + + int call(Formatter *f) override { + this->pool_replayer->release_leader(); + return 0; + } +}; + +template <typename I> +class PoolReplayerAdminSocketHook : public AdminSocketHook { +public: + PoolReplayerAdminSocketHook(CephContext *cct, const std::string &name, + PoolReplayer<I> *pool_replayer) + : admin_socket(cct->get_admin_socket()) { + std::string command; + int r; + + command = "rbd mirror status " + name; + r = admin_socket->register_command(command, this, + "get status for rbd mirror " + name); + if (r == 0) { + commands[command] = new StatusCommand<I>(pool_replayer); + } + + command = "rbd mirror start " + name; + r = admin_socket->register_command(command, this, + "start rbd mirror " + name); + if (r == 0) { + commands[command] = new StartCommand<I>(pool_replayer); + } + + command = "rbd mirror stop " + name; + r = admin_socket->register_command(command, this, + "stop rbd mirror " + name); + if (r == 0) { + commands[command] = new StopCommand<I>(pool_replayer); + } + + command = "rbd mirror restart " + name; + r = admin_socket->register_command(command, this, + "restart rbd mirror " + name); + if (r == 0) { + commands[command] = new RestartCommand<I>(pool_replayer); + } + + command = "rbd mirror flush " + name; + r = admin_socket->register_command(command, this, + "flush rbd mirror " + name); + if (r == 0) { + commands[command] = new FlushCommand<I>(pool_replayer); + } + + command = "rbd mirror leader release " + name; + r = admin_socket->register_command(command, this, + "release rbd mirror leader " + name); + if (r == 0) { + commands[command] = new LeaderReleaseCommand<I>(pool_replayer); + } + } + + ~PoolReplayerAdminSocketHook() override { + (void)admin_socket->unregister_commands(this); + for (auto i = commands.begin(); i != commands.end(); ++i) { + delete i->second; + } + } + + int call(std::string_view command, const cmdmap_t& cmdmap, + const bufferlist&, + Formatter *f, + std::ostream& ss, + bufferlist& out) override { + auto i = commands.find(command); + ceph_assert(i != commands.end()); + return i->second->call(f); + } + +private: + typedef std::map<std::string, PoolReplayerAdminSocketCommand<I>*, + std::less<>> Commands; + + AdminSocket *admin_socket; + Commands commands; +}; + +} // anonymous namespace + +template <typename I> +struct PoolReplayer<I>::RemotePoolPollerListener + : public remote_pool_poller::Listener { + + PoolReplayer<I>* m_pool_replayer; + + RemotePoolPollerListener(PoolReplayer<I>* pool_replayer) + : m_pool_replayer(pool_replayer) { + } + + void handle_updated(const RemotePoolMeta& remote_pool_meta) override { + m_pool_replayer->handle_remote_pool_meta_updated(remote_pool_meta); + } +}; + +template <typename I> +PoolReplayer<I>::PoolReplayer( + Threads<I> *threads, ServiceDaemon<I> *service_daemon, + journal::CacheManagerHandler *cache_manager_handler, + PoolMetaCache* pool_meta_cache, int64_t local_pool_id, + const PeerSpec &peer, const std::vector<const char*> &args) : + m_threads(threads), + m_service_daemon(service_daemon), + m_cache_manager_handler(cache_manager_handler), + m_pool_meta_cache(pool_meta_cache), + m_local_pool_id(local_pool_id), + m_peer(peer), + m_args(args), + m_lock(ceph::make_mutex("rbd::mirror::PoolReplayer " + stringify(peer))), + m_pool_replayer_thread(this), + m_leader_listener(this) { +} + +template <typename I> +PoolReplayer<I>::~PoolReplayer() +{ + shut_down(); + + ceph_assert(m_asok_hook == nullptr); +} + +template <typename I> +bool PoolReplayer<I>::is_blocklisted() const { + std::lock_guard locker{m_lock}; + return m_blocklisted; +} + +template <typename I> +bool PoolReplayer<I>::is_leader() const { + std::lock_guard locker{m_lock}; + return m_leader_watcher && m_leader_watcher->is_leader(); +} + +template <typename I> +bool PoolReplayer<I>::is_running() const { + return m_pool_replayer_thread.is_started() && !m_stopping; +} + +template <typename I> +void PoolReplayer<I>::init(const std::string& site_name) { + std::lock_guard locker{m_lock}; + + ceph_assert(!m_pool_replayer_thread.is_started()); + + // reset state + m_stopping = false; + m_blocklisted = false; + m_site_name = site_name; + + dout(10) << "replaying for " << m_peer << dendl; + int r = init_rados(g_ceph_context->_conf->cluster, + g_ceph_context->_conf->name.to_str(), + "", "", "local cluster", &m_local_rados, false); + if (r < 0) { + m_callout_id = m_service_daemon->add_or_update_callout( + m_local_pool_id, m_callout_id, service_daemon::CALLOUT_LEVEL_ERROR, + "unable to connect to local cluster"); + return; + } + + r = init_rados(m_peer.cluster_name, m_peer.client_name, + m_peer.mon_host, m_peer.key, + std::string("remote peer ") + stringify(m_peer), + &m_remote_rados, true); + if (r < 0) { + m_callout_id = m_service_daemon->add_or_update_callout( + m_local_pool_id, m_callout_id, service_daemon::CALLOUT_LEVEL_ERROR, + "unable to connect to remote cluster"); + return; + } + + r = m_local_rados->ioctx_create2(m_local_pool_id, m_local_io_ctx); + if (r < 0) { + derr << "error accessing local pool " << m_local_pool_id << ": " + << cpp_strerror(r) << dendl; + return; + } + + auto cct = reinterpret_cast<CephContext *>(m_local_io_ctx.cct()); + librbd::api::Config<I>::apply_pool_overrides(m_local_io_ctx, &cct->_conf); + + r = librbd::cls_client::mirror_uuid_get(&m_local_io_ctx, + &m_local_mirror_uuid); + if (r < 0) { + derr << "failed to retrieve local mirror uuid from pool " + << m_local_io_ctx.get_pool_name() << ": " << cpp_strerror(r) << dendl; + m_callout_id = m_service_daemon->add_or_update_callout( + m_local_pool_id, m_callout_id, service_daemon::CALLOUT_LEVEL_ERROR, + "unable to query local mirror uuid"); + return; + } + + r = m_remote_rados->ioctx_create(m_local_io_ctx.get_pool_name().c_str(), + m_remote_io_ctx); + if (r < 0) { + derr << "error accessing remote pool " << m_local_io_ctx.get_pool_name() + << ": " << cpp_strerror(r) << dendl; + m_callout_id = m_service_daemon->add_or_update_callout( + m_local_pool_id, m_callout_id, service_daemon::CALLOUT_LEVEL_WARNING, + "unable to access remote pool"); + return; + } + + dout(10) << "connected to " << m_peer << dendl; + + m_image_sync_throttler.reset( + Throttler<I>::create(cct, "rbd_mirror_concurrent_image_syncs")); + + m_image_deletion_throttler.reset( + Throttler<I>::create(cct, "rbd_mirror_concurrent_image_deletions")); + + m_remote_pool_poller_listener.reset(new RemotePoolPollerListener(this)); + m_remote_pool_poller.reset(RemotePoolPoller<I>::create( + m_threads, m_remote_io_ctx, m_site_name, m_local_mirror_uuid, + *m_remote_pool_poller_listener)); + + C_SaferCond on_pool_poller_init; + m_remote_pool_poller->init(&on_pool_poller_init); + r = on_pool_poller_init.wait(); + if (r < 0) { + derr << "failed to initialize remote pool poller: " << cpp_strerror(r) + << dendl; + m_callout_id = m_service_daemon->add_or_update_callout( + m_local_pool_id, m_callout_id, service_daemon::CALLOUT_LEVEL_ERROR, + "unable to initialize remote pool poller"); + m_remote_pool_poller.reset(); + return; + } + ceph_assert(!m_remote_pool_meta.mirror_uuid.empty()); + m_pool_meta_cache->set_remote_pool_meta( + m_remote_io_ctx.get_id(), m_remote_pool_meta); + m_pool_meta_cache->set_local_pool_meta( + m_local_io_ctx.get_id(), {m_local_mirror_uuid}); + + m_default_namespace_replayer.reset(NamespaceReplayer<I>::create( + "", m_local_io_ctx, m_remote_io_ctx, m_local_mirror_uuid, m_peer.uuid, + m_remote_pool_meta, m_threads, m_image_sync_throttler.get(), + m_image_deletion_throttler.get(), m_service_daemon, + m_cache_manager_handler, m_pool_meta_cache)); + + C_SaferCond on_init; + m_default_namespace_replayer->init(&on_init); + r = on_init.wait(); + if (r < 0) { + derr << "error initializing default namespace replayer: " << cpp_strerror(r) + << dendl; + m_callout_id = m_service_daemon->add_or_update_callout( + m_local_pool_id, m_callout_id, service_daemon::CALLOUT_LEVEL_ERROR, + "unable to initialize default namespace replayer"); + m_default_namespace_replayer.reset(); + return; + } + + m_leader_watcher.reset(LeaderWatcher<I>::create(m_threads, m_local_io_ctx, + &m_leader_listener)); + r = m_leader_watcher->init(); + if (r < 0) { + derr << "error initializing leader watcher: " << cpp_strerror(r) << dendl; + m_callout_id = m_service_daemon->add_or_update_callout( + m_local_pool_id, m_callout_id, service_daemon::CALLOUT_LEVEL_ERROR, + "unable to initialize leader messenger object"); + m_leader_watcher.reset(); + return; + } + + if (m_callout_id != service_daemon::CALLOUT_ID_NONE) { + m_service_daemon->remove_callout(m_local_pool_id, m_callout_id); + m_callout_id = service_daemon::CALLOUT_ID_NONE; + } + + m_service_daemon->add_or_update_attribute( + m_local_io_ctx.get_id(), SERVICE_DAEMON_INSTANCE_ID_KEY, + stringify(m_local_io_ctx.get_instance_id())); + + m_pool_replayer_thread.create("pool replayer"); +} + +template <typename I> +void PoolReplayer<I>::shut_down() { + { + std::lock_guard l{m_lock}; + m_stopping = true; + m_cond.notify_all(); + } + if (m_pool_replayer_thread.is_started()) { + m_pool_replayer_thread.join(); + } + + if (m_leader_watcher) { + m_leader_watcher->shut_down(); + } + m_leader_watcher.reset(); + + if (m_default_namespace_replayer) { + C_SaferCond on_shut_down; + m_default_namespace_replayer->shut_down(&on_shut_down); + on_shut_down.wait(); + } + m_default_namespace_replayer.reset(); + + if (m_remote_pool_poller) { + C_SaferCond ctx; + m_remote_pool_poller->shut_down(&ctx); + ctx.wait(); + + m_pool_meta_cache->remove_remote_pool_meta(m_remote_io_ctx.get_id()); + m_pool_meta_cache->remove_local_pool_meta(m_local_io_ctx.get_id()); + } + m_remote_pool_poller.reset(); + m_remote_pool_poller_listener.reset(); + + m_image_sync_throttler.reset(); + m_image_deletion_throttler.reset(); + + m_local_rados.reset(); + m_remote_rados.reset(); +} + +template <typename I> +int PoolReplayer<I>::init_rados(const std::string &cluster_name, + const std::string &client_name, + const std::string &mon_host, + const std::string &key, + const std::string &description, + RadosRef *rados_ref, + bool strip_cluster_overrides) { + // NOTE: manually bootstrap a CephContext here instead of via + // the librados API to avoid mixing global singletons between + // the librados shared library and the daemon + // TODO: eliminate intermingling of global singletons within Ceph APIs + CephInitParameters iparams(CEPH_ENTITY_TYPE_CLIENT); + if (client_name.empty() || !iparams.name.from_str(client_name)) { + derr << "error initializing cluster handle for " << description << dendl; + return -EINVAL; + } + + CephContext *cct = common_preinit(iparams, CODE_ENVIRONMENT_LIBRARY, + CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS); + cct->_conf->cluster = cluster_name; + + // librados::Rados::conf_read_file + int r = cct->_conf.parse_config_files(nullptr, nullptr, 0); + if (r < 0 && r != -ENOENT) { + // do not treat this as fatal, it might still be able to connect + derr << "could not read ceph conf for " << description << ": " + << cpp_strerror(r) << dendl; + } + + // preserve cluster-specific config settings before applying environment/cli + // overrides + std::map<std::string, std::string> config_values; + if (strip_cluster_overrides) { + // remote peer connections shouldn't apply cluster-specific + // configuration settings + for (auto& key : UNIQUE_PEER_CONFIG_KEYS) { + config_values[key] = cct->_conf.get_val<std::string>(key); + } + } + + cct->_conf.parse_env(cct->get_module_type()); + + // librados::Rados::conf_parse_env + std::vector<const char*> args; + r = cct->_conf.parse_argv(args); + if (r < 0) { + derr << "could not parse environment for " << description << ":" + << cpp_strerror(r) << dendl; + cct->put(); + return r; + } + cct->_conf.parse_env(cct->get_module_type()); + + if (!m_args.empty()) { + // librados::Rados::conf_parse_argv + args = m_args; + r = cct->_conf.parse_argv(args); + if (r < 0) { + derr << "could not parse command line args for " << description << ": " + << cpp_strerror(r) << dendl; + cct->put(); + return r; + } + } + + if (strip_cluster_overrides) { + // remote peer connections shouldn't apply cluster-specific + // configuration settings + for (auto& pair : config_values) { + auto value = cct->_conf.get_val<std::string>(pair.first); + if (pair.second != value) { + dout(0) << "reverting global config option override: " + << pair.first << ": " << value << " -> " << pair.second + << dendl; + cct->_conf.set_val_or_die(pair.first, pair.second); + } + } + } + + if (!g_ceph_context->_conf->admin_socket.empty()) { + cct->_conf.set_val_or_die("admin_socket", + "$run_dir/$name.$pid.$cluster.$cctid.asok"); + } + + if (!mon_host.empty()) { + r = cct->_conf.set_val("mon_host", mon_host); + if (r < 0) { + derr << "failed to set mon_host config for " << description << ": " + << cpp_strerror(r) << dendl; + cct->put(); + return r; + } + } + + if (!key.empty()) { + r = cct->_conf.set_val("key", key); + if (r < 0) { + derr << "failed to set key config for " << description << ": " + << cpp_strerror(r) << dendl; + cct->put(); + return r; + } + } + + // disable unnecessary librbd cache + cct->_conf.set_val_or_die("rbd_cache", "false"); + cct->_conf.apply_changes(nullptr); + cct->_conf.complain_about_parse_error(cct); + + rados_ref->reset(new librados::Rados()); + + r = (*rados_ref)->init_with_context(cct); + ceph_assert(r == 0); + cct->put(); + + r = (*rados_ref)->connect(); + if (r < 0) { + derr << "error connecting to " << description << ": " + << cpp_strerror(r) << dendl; + return r; + } + + return 0; +} + +template <typename I> +void PoolReplayer<I>::run() { + dout(20) << dendl; + + while (true) { + std::string asok_hook_name = m_local_io_ctx.get_pool_name() + " " + + m_peer.cluster_name; + if (m_asok_hook_name != asok_hook_name || m_asok_hook == nullptr) { + m_asok_hook_name = asok_hook_name; + delete m_asok_hook; + + m_asok_hook = new PoolReplayerAdminSocketHook<I>(g_ceph_context, + m_asok_hook_name, this); + } + + with_namespace_replayers([this]() { update_namespace_replayers(); }); + + std::unique_lock locker{m_lock}; + + if (m_leader_watcher->is_blocklisted() || + m_default_namespace_replayer->is_blocklisted()) { + m_blocklisted = true; + m_stopping = true; + } + + for (auto &it : m_namespace_replayers) { + if (it.second->is_blocklisted()) { + m_blocklisted = true; + m_stopping = true; + break; + } + } + + if (m_stopping) { + break; + } + + auto seconds = g_ceph_context->_conf.get_val<uint64_t>( + "rbd_mirror_pool_replayers_refresh_interval"); + m_cond.wait_for(locker, ceph::make_timespan(seconds)); + } + + // shut down namespace replayers + with_namespace_replayers([this]() { update_namespace_replayers(); }); + + delete m_asok_hook; + m_asok_hook = nullptr; +} + +template <typename I> +void PoolReplayer<I>::update_namespace_replayers() { + dout(20) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + + std::set<std::string> mirroring_namespaces; + if (!m_stopping) { + int r = list_mirroring_namespaces(&mirroring_namespaces); + if (r < 0) { + return; + } + } + + auto cct = reinterpret_cast<CephContext *>(m_local_io_ctx.cct()); + C_SaferCond cond; + auto gather_ctx = new C_Gather(cct, &cond); + for (auto it = m_namespace_replayers.begin(); + it != m_namespace_replayers.end(); ) { + auto iter = mirroring_namespaces.find(it->first); + if (iter == mirroring_namespaces.end()) { + auto namespace_replayer = it->second; + auto on_shut_down = new LambdaContext( + [namespace_replayer, ctx=gather_ctx->new_sub()](int r) { + delete namespace_replayer; + ctx->complete(r); + }); + m_service_daemon->remove_namespace(m_local_pool_id, it->first); + namespace_replayer->shut_down(on_shut_down); + it = m_namespace_replayers.erase(it); + } else { + mirroring_namespaces.erase(iter); + it++; + } + } + + for (auto &name : mirroring_namespaces) { + auto namespace_replayer = NamespaceReplayer<I>::create( + name, m_local_io_ctx, m_remote_io_ctx, m_local_mirror_uuid, m_peer.uuid, + m_remote_pool_meta, m_threads, m_image_sync_throttler.get(), + m_image_deletion_throttler.get(), m_service_daemon, + m_cache_manager_handler, m_pool_meta_cache); + auto on_init = new LambdaContext( + [this, namespace_replayer, name, &mirroring_namespaces, + ctx=gather_ctx->new_sub()](int r) { + std::lock_guard locker{m_lock}; + if (r < 0) { + derr << "failed to initialize namespace replayer for namespace " + << name << ": " << cpp_strerror(r) << dendl; + delete namespace_replayer; + mirroring_namespaces.erase(name); + } else { + m_namespace_replayers[name] = namespace_replayer; + m_service_daemon->add_namespace(m_local_pool_id, name); + } + ctx->complete(r); + }); + namespace_replayer->init(on_init); + } + + gather_ctx->activate(); + + m_lock.unlock(); + cond.wait(); + m_lock.lock(); + + if (m_leader) { + C_SaferCond acquire_cond; + auto acquire_gather_ctx = new C_Gather(cct, &acquire_cond); + + for (auto &name : mirroring_namespaces) { + namespace_replayer_acquire_leader(name, acquire_gather_ctx->new_sub()); + } + acquire_gather_ctx->activate(); + + m_lock.unlock(); + acquire_cond.wait(); + m_lock.lock(); + + std::vector<std::string> instance_ids; + m_leader_watcher->list_instances(&instance_ids); + + for (auto &name : mirroring_namespaces) { + auto it = m_namespace_replayers.find(name); + if (it == m_namespace_replayers.end()) { + // acuire leader for this namespace replayer failed + continue; + } + it->second->handle_instances_added(instance_ids); + } + } else { + std::string leader_instance_id; + if (m_leader_watcher->get_leader_instance_id(&leader_instance_id)) { + for (auto &name : mirroring_namespaces) { + m_namespace_replayers[name]->handle_update_leader(leader_instance_id); + } + } + } +} + +template <typename I> +int PoolReplayer<I>::list_mirroring_namespaces( + std::set<std::string> *namespaces) { + ceph_assert(ceph_mutex_is_locked(m_lock)); + + std::vector<std::string> names; + + int r = librbd::api::Namespace<I>::list(m_local_io_ctx, &names); + if (r < 0) { + derr << "failed to list namespaces: " << cpp_strerror(r) << dendl; + return r; + } + + for (auto &name : names) { + cls::rbd::MirrorMode mirror_mode = cls::rbd::MIRROR_MODE_DISABLED; + int r = librbd::cls_client::mirror_mode_get(&m_local_io_ctx, &mirror_mode); + if (r < 0 && r != -ENOENT) { + derr << "failed to get namespace mirror mode: " << cpp_strerror(r) + << dendl; + if (m_namespace_replayers.count(name) == 0) { + continue; + } + } else if (mirror_mode == cls::rbd::MIRROR_MODE_DISABLED) { + dout(10) << "mirroring is disabled for namespace " << name << dendl; + continue; + } + + namespaces->insert(name); + } + + return 0; +} + +template <typename I> +void PoolReplayer<I>::reopen_logs() +{ + std::lock_guard locker{m_lock}; + + if (m_local_rados) { + reinterpret_cast<CephContext *>(m_local_rados->cct())->reopen_logs(); + } + if (m_remote_rados) { + reinterpret_cast<CephContext *>(m_remote_rados->cct())->reopen_logs(); + } +} + +template <typename I> +void PoolReplayer<I>::namespace_replayer_acquire_leader(const std::string &name, + Context *on_finish) { + ceph_assert(ceph_mutex_is_locked(m_lock)); + + auto it = m_namespace_replayers.find(name); + ceph_assert(it != m_namespace_replayers.end()); + + on_finish = new LambdaContext( + [this, name, on_finish](int r) { + if (r < 0) { + derr << "failed to handle acquire leader for namespace: " + << name << ": " << cpp_strerror(r) << dendl; + + // remove the namespace replayer -- update_namespace_replayers will + // retry to create it and acquire leader. + + std::lock_guard locker{m_lock}; + + auto namespace_replayer = m_namespace_replayers[name]; + m_namespace_replayers.erase(name); + auto on_shut_down = new LambdaContext( + [namespace_replayer, on_finish](int r) { + delete namespace_replayer; + on_finish->complete(r); + }); + m_service_daemon->remove_namespace(m_local_pool_id, name); + namespace_replayer->shut_down(on_shut_down); + return; + } + on_finish->complete(0); + }); + + it->second->handle_acquire_leader(on_finish); +} + +template <typename I> +void PoolReplayer<I>::print_status(Formatter *f) { + dout(20) << dendl; + + assert(f); + + std::lock_guard l{m_lock}; + + f->open_object_section("pool_replayer_status"); + f->dump_stream("peer") << m_peer; + if (m_local_io_ctx.is_valid()) { + f->dump_string("pool", m_local_io_ctx.get_pool_name()); + f->dump_stream("instance_id") << m_local_io_ctx.get_instance_id(); + } + + std::string state("running"); + if (m_manual_stop) { + state = "stopped (manual)"; + } else if (m_stopping) { + state = "stopped"; + } else if (!is_running()) { + state = "error"; + } + f->dump_string("state", state); + + if (m_leader_watcher) { + std::string leader_instance_id; + m_leader_watcher->get_leader_instance_id(&leader_instance_id); + f->dump_string("leader_instance_id", leader_instance_id); + + bool leader = m_leader_watcher->is_leader(); + f->dump_bool("leader", leader); + if (leader) { + std::vector<std::string> instance_ids; + m_leader_watcher->list_instances(&instance_ids); + f->open_array_section("instances"); + for (auto instance_id : instance_ids) { + f->dump_string("instance_id", instance_id); + } + f->close_section(); // instances + } + } + + if (m_local_rados) { + auto cct = reinterpret_cast<CephContext *>(m_local_rados->cct()); + f->dump_string("local_cluster_admin_socket", + cct->_conf.get_val<std::string>("admin_socket")); + } + if (m_remote_rados) { + auto cct = reinterpret_cast<CephContext *>(m_remote_rados->cct()); + f->dump_string("remote_cluster_admin_socket", + cct->_conf.get_val<std::string>("admin_socket")); + } + + if (m_image_sync_throttler) { + f->open_object_section("sync_throttler"); + m_image_sync_throttler->print_status(f); + f->close_section(); // sync_throttler + } + + if (m_image_deletion_throttler) { + f->open_object_section("deletion_throttler"); + m_image_deletion_throttler->print_status(f); + f->close_section(); // deletion_throttler + } + + if (m_default_namespace_replayer) { + m_default_namespace_replayer->print_status(f); + } + + f->open_array_section("namespaces"); + for (auto &it : m_namespace_replayers) { + f->open_object_section("namespace"); + f->dump_string("name", it.first); + it.second->print_status(f); + f->close_section(); // namespace + } + f->close_section(); // namespaces + + f->close_section(); // pool_replayer_status +} + +template <typename I> +void PoolReplayer<I>::start() { + dout(20) << dendl; + + std::lock_guard l{m_lock}; + + if (m_stopping) { + return; + } + + m_manual_stop = false; + + if (m_default_namespace_replayer) { + m_default_namespace_replayer->start(); + } + for (auto &it : m_namespace_replayers) { + it.second->start(); + } +} + +template <typename I> +void PoolReplayer<I>::stop(bool manual) { + dout(20) << "enter: manual=" << manual << dendl; + + std::lock_guard l{m_lock}; + if (!manual) { + m_stopping = true; + m_cond.notify_all(); + return; + } else if (m_stopping) { + return; + } + + m_manual_stop = true; + + if (m_default_namespace_replayer) { + m_default_namespace_replayer->stop(); + } + for (auto &it : m_namespace_replayers) { + it.second->stop(); + } +} + +template <typename I> +void PoolReplayer<I>::restart() { + dout(20) << dendl; + + std::lock_guard l{m_lock}; + + if (m_stopping) { + return; + } + + if (m_default_namespace_replayer) { + m_default_namespace_replayer->restart(); + } + for (auto &it : m_namespace_replayers) { + it.second->restart(); + } +} + +template <typename I> +void PoolReplayer<I>::flush() { + dout(20) << dendl; + + std::lock_guard l{m_lock}; + + if (m_stopping || m_manual_stop) { + return; + } + + if (m_default_namespace_replayer) { + m_default_namespace_replayer->flush(); + } + for (auto &it : m_namespace_replayers) { + it.second->flush(); + } +} + +template <typename I> +void PoolReplayer<I>::release_leader() { + dout(20) << dendl; + + std::lock_guard l{m_lock}; + + if (m_stopping || !m_leader_watcher) { + return; + } + + m_leader_watcher->release_leader(); +} + +template <typename I> +void PoolReplayer<I>::handle_post_acquire_leader(Context *on_finish) { + dout(20) << dendl; + + with_namespace_replayers( + [this](Context *on_finish) { + dout(10) << "handle_post_acquire_leader" << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + + m_service_daemon->add_or_update_attribute(m_local_pool_id, + SERVICE_DAEMON_LEADER_KEY, + true); + auto ctx = new LambdaContext( + [this, on_finish](int r) { + if (r == 0) { + std::lock_guard locker{m_lock}; + m_leader = true; + } + on_finish->complete(r); + }); + + auto cct = reinterpret_cast<CephContext *>(m_local_io_ctx.cct()); + auto gather_ctx = new C_Gather(cct, ctx); + + m_default_namespace_replayer->handle_acquire_leader( + gather_ctx->new_sub()); + + for (auto &it : m_namespace_replayers) { + namespace_replayer_acquire_leader(it.first, gather_ctx->new_sub()); + } + + gather_ctx->activate(); + }, on_finish); +} + +template <typename I> +void PoolReplayer<I>::handle_pre_release_leader(Context *on_finish) { + dout(20) << dendl; + + with_namespace_replayers( + [this](Context *on_finish) { + dout(10) << "handle_pre_release_leader" << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + + m_leader = false; + m_service_daemon->remove_attribute(m_local_pool_id, + SERVICE_DAEMON_LEADER_KEY); + + auto cct = reinterpret_cast<CephContext *>(m_local_io_ctx.cct()); + auto gather_ctx = new C_Gather(cct, on_finish); + + m_default_namespace_replayer->handle_release_leader( + gather_ctx->new_sub()); + + for (auto &it : m_namespace_replayers) { + it.second->handle_release_leader(gather_ctx->new_sub()); + } + + gather_ctx->activate(); + }, on_finish); +} + +template <typename I> +void PoolReplayer<I>::handle_update_leader( + const std::string &leader_instance_id) { + dout(10) << "leader_instance_id=" << leader_instance_id << dendl; + + std::lock_guard locker{m_lock}; + + m_default_namespace_replayer->handle_update_leader(leader_instance_id); + + for (auto &it : m_namespace_replayers) { + it.second->handle_update_leader(leader_instance_id); + } +} + +template <typename I> +void PoolReplayer<I>::handle_instances_added( + const std::vector<std::string> &instance_ids) { + dout(5) << "instance_ids=" << instance_ids << dendl; + + std::lock_guard locker{m_lock}; + if (!m_leader_watcher->is_leader()) { + return; + } + + m_default_namespace_replayer->handle_instances_added(instance_ids); + + for (auto &it : m_namespace_replayers) { + it.second->handle_instances_added(instance_ids); + } +} + +template <typename I> +void PoolReplayer<I>::handle_instances_removed( + const std::vector<std::string> &instance_ids) { + dout(5) << "instance_ids=" << instance_ids << dendl; + + std::lock_guard locker{m_lock}; + if (!m_leader_watcher->is_leader()) { + return; + } + + m_default_namespace_replayer->handle_instances_removed(instance_ids); + + for (auto &it : m_namespace_replayers) { + it.second->handle_instances_removed(instance_ids); + } +} + +template <typename I> +void PoolReplayer<I>::handle_remote_pool_meta_updated( + const RemotePoolMeta& remote_pool_meta) { + dout(5) << "remote_pool_meta=" << remote_pool_meta << dendl; + + if (!m_default_namespace_replayer) { + m_remote_pool_meta = remote_pool_meta; + return; + } + + derr << "remote pool metadata updated unexpectedly" << dendl; + std::unique_lock locker{m_lock}; + m_stopping = true; + m_cond.notify_all(); +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::PoolReplayer<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/PoolReplayer.h b/src/tools/rbd_mirror/PoolReplayer.h new file mode 100644 index 000000000..e0fd75377 --- /dev/null +++ b/src/tools/rbd_mirror/PoolReplayer.h @@ -0,0 +1,288 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_POOL_REPLAYER_H +#define CEPH_RBD_MIRROR_POOL_REPLAYER_H + +#include "common/Cond.h" +#include "common/ceph_mutex.h" +#include "include/rados/librados.hpp" +#include "librbd/Utils.h" +#include "librbd/asio/ContextWQ.h" + +#include "tools/rbd_mirror/LeaderWatcher.h" +#include "tools/rbd_mirror/NamespaceReplayer.h" +#include "tools/rbd_mirror/Throttler.h" +#include "tools/rbd_mirror/Types.h" +#include "tools/rbd_mirror/leader_watcher/Types.h" +#include "tools/rbd_mirror/service_daemon/Types.h" + +#include <map> +#include <memory> +#include <string> +#include <vector> + +class AdminSocketHook; + +namespace journal { struct CacheManagerHandler; } + +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { + +template <typename> class RemotePoolPoller; +namespace remote_pool_poller { struct Listener; } + +struct PoolMetaCache; +template <typename> class ServiceDaemon; +template <typename> struct Threads; + + +/** + * Controls mirroring for a single remote cluster. + */ +template <typename ImageCtxT = librbd::ImageCtx> +class PoolReplayer { +public: + PoolReplayer(Threads<ImageCtxT> *threads, + ServiceDaemon<ImageCtxT> *service_daemon, + journal::CacheManagerHandler *cache_manager_handler, + PoolMetaCache* pool_meta_cache, + int64_t local_pool_id, const PeerSpec &peer, + const std::vector<const char*> &args); + ~PoolReplayer(); + PoolReplayer(const PoolReplayer&) = delete; + PoolReplayer& operator=(const PoolReplayer&) = delete; + + bool is_blocklisted() const; + bool is_leader() const; + bool is_running() const; + + void init(const std::string& site_name); + void shut_down(); + + void run(); + + void print_status(Formatter *f); + void start(); + void stop(bool manual); + void restart(); + void flush(); + void release_leader(); + void reopen_logs(); + +private: + /** + * @verbatim + * + * <start> + * | + * v + * INIT + * | + * v + * <follower> <---------------------\ + * . | + * . (leader acquired) | + * v | + * NOTIFY_NAMESPACE_WATCHERS NOTIFY_NAMESPACE_WATCHERS + * | ^ + * v . + * <leader> . + * . . + * . (leader lost / shut down) . + * . . . . . . . . . . . . . . . . + * + * @endverbatim + */ + + struct RemotePoolPollerListener; + + int init_rados(const std::string &cluster_name, + const std::string &client_name, + const std::string &mon_host, + const std::string &key, + const std::string &description, RadosRef *rados_ref, + bool strip_cluster_overrides); + + void update_namespace_replayers(); + int list_mirroring_namespaces(std::set<std::string> *namespaces); + + void namespace_replayer_acquire_leader(const std::string &name, + Context *on_finish); + + void handle_post_acquire_leader(Context *on_finish); + void handle_pre_release_leader(Context *on_finish); + + void handle_update_leader(const std::string &leader_instance_id); + + void handle_instances_added(const std::vector<std::string> &instance_ids); + void handle_instances_removed(const std::vector<std::string> &instance_ids); + + // sync version, executed in the caller thread + template <typename L> + void with_namespace_replayers(L &&callback) { + std::lock_guard locker{m_lock}; + + if (m_namespace_replayers_locked) { + ceph_assert(m_on_namespace_replayers_unlocked == nullptr); + C_SaferCond cond; + m_on_namespace_replayers_unlocked = &cond; + m_lock.unlock(); + cond.wait(); + m_lock.lock(); + } else { + m_namespace_replayers_locked = true; + } + + ceph_assert(m_namespace_replayers_locked); + callback(); // may temporary release the lock + ceph_assert(m_namespace_replayers_locked); + + if (m_on_namespace_replayers_unlocked == nullptr) { + m_namespace_replayers_locked = false; + return; + } + + m_threads->work_queue->queue(m_on_namespace_replayers_unlocked); + m_on_namespace_replayers_unlocked = nullptr; + } + + // async version + template <typename L> + void with_namespace_replayers(L &&callback, Context *on_finish) { + std::lock_guard locker{m_lock}; + + on_finish = librbd::util::create_async_context_callback( + m_threads->work_queue, new LambdaContext( + [this, on_finish](int r) { + { + std::lock_guard locker{m_lock}; + ceph_assert(m_namespace_replayers_locked); + + m_namespace_replayers_locked = false; + + if (m_on_namespace_replayers_unlocked != nullptr) { + m_namespace_replayers_locked = true; + m_threads->work_queue->queue(m_on_namespace_replayers_unlocked); + m_on_namespace_replayers_unlocked = nullptr; + } + } + on_finish->complete(r); + })); + + auto on_lock = new LambdaContext( + [this, callback, on_finish](int) { + std::lock_guard locker{m_lock}; + ceph_assert(m_namespace_replayers_locked); + + callback(on_finish); + }); + + if (m_namespace_replayers_locked) { + ceph_assert(m_on_namespace_replayers_unlocked == nullptr); + m_on_namespace_replayers_unlocked = on_lock; + return; + } + + m_namespace_replayers_locked = true; + m_threads->work_queue->queue(on_lock); + } + + void handle_remote_pool_meta_updated(const RemotePoolMeta& remote_pool_meta); + + Threads<ImageCtxT> *m_threads; + ServiceDaemon<ImageCtxT> *m_service_daemon; + journal::CacheManagerHandler *m_cache_manager_handler; + PoolMetaCache* m_pool_meta_cache; + int64_t m_local_pool_id = -1; + PeerSpec m_peer; + std::vector<const char*> m_args; + + mutable ceph::mutex m_lock; + ceph::condition_variable m_cond; + std::string m_site_name; + bool m_stopping = false; + bool m_manual_stop = false; + bool m_blocklisted = false; + + RadosRef m_local_rados; + RadosRef m_remote_rados; + + librados::IoCtx m_local_io_ctx; + librados::IoCtx m_remote_io_ctx; + + std::string m_local_mirror_uuid; + + RemotePoolMeta m_remote_pool_meta; + std::unique_ptr<remote_pool_poller::Listener> m_remote_pool_poller_listener; + std::unique_ptr<RemotePoolPoller<ImageCtxT>> m_remote_pool_poller; + + std::unique_ptr<NamespaceReplayer<ImageCtxT>> m_default_namespace_replayer; + std::map<std::string, NamespaceReplayer<ImageCtxT> *> m_namespace_replayers; + + std::string m_asok_hook_name; + AdminSocketHook *m_asok_hook = nullptr; + + service_daemon::CalloutId m_callout_id = service_daemon::CALLOUT_ID_NONE; + + bool m_leader = false; + bool m_namespace_replayers_locked = false; + Context *m_on_namespace_replayers_unlocked = nullptr; + + class PoolReplayerThread : public Thread { + PoolReplayer *m_pool_replayer; + public: + PoolReplayerThread(PoolReplayer *pool_replayer) + : m_pool_replayer(pool_replayer) { + } + void *entry() override { + m_pool_replayer->run(); + return 0; + } + } m_pool_replayer_thread; + + class LeaderListener : public leader_watcher::Listener { + public: + LeaderListener(PoolReplayer *pool_replayer) + : m_pool_replayer(pool_replayer) { + } + + protected: + void post_acquire_handler(Context *on_finish) override { + m_pool_replayer->handle_post_acquire_leader(on_finish); + } + + void pre_release_handler(Context *on_finish) override { + m_pool_replayer->handle_pre_release_leader(on_finish); + } + + void update_leader_handler( + const std::string &leader_instance_id) override { + m_pool_replayer->handle_update_leader(leader_instance_id); + } + + void handle_instances_added(const InstanceIds& instance_ids) override { + m_pool_replayer->handle_instances_added(instance_ids); + } + + void handle_instances_removed(const InstanceIds& instance_ids) override { + m_pool_replayer->handle_instances_removed(instance_ids); + } + + private: + PoolReplayer *m_pool_replayer; + } m_leader_listener; + + std::unique_ptr<LeaderWatcher<ImageCtxT>> m_leader_watcher; + std::unique_ptr<Throttler<ImageCtxT>> m_image_sync_throttler; + std::unique_ptr<Throttler<ImageCtxT>> m_image_deletion_throttler; +}; + +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::PoolReplayer<librbd::ImageCtx>; + +#endif // CEPH_RBD_MIRROR_POOL_REPLAYER_H diff --git a/src/tools/rbd_mirror/PoolWatcher.cc b/src/tools/rbd_mirror/PoolWatcher.cc new file mode 100644 index 000000000..bec931cf3 --- /dev/null +++ b/src/tools/rbd_mirror/PoolWatcher.cc @@ -0,0 +1,473 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/PoolWatcher.h" +#include "include/rbd_types.h" +#include "cls/rbd/cls_rbd_client.h" +#include "common/debug.h" +#include "common/errno.h" +#include "common/Timer.h" +#include "librbd/ImageCtx.h" +#include "librbd/internal.h" +#include "librbd/MirroringWatcher.h" +#include "librbd/Utils.h" +#include "librbd/api/Image.h" +#include "librbd/api/Mirror.h" +#include "librbd/asio/ContextWQ.h" +#include "tools/rbd_mirror/Threads.h" +#include "tools/rbd_mirror/pool_watcher/RefreshImagesRequest.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::PoolWatcher: " << this << " " \ + << __func__ << ": " + +using std::list; +using std::string; +using std::unique_ptr; +using std::vector; +using librbd::util::create_context_callback; +using librbd::util::create_rados_callback; + +namespace rbd { +namespace mirror { + +template <typename I> +class PoolWatcher<I>::MirroringWatcher : public librbd::MirroringWatcher<I> { +public: + using ContextWQ = typename std::decay< + typename std::remove_pointer< + decltype(Threads<I>::work_queue)>::type>::type; + + MirroringWatcher(librados::IoCtx &io_ctx, ContextWQ *work_queue, + PoolWatcher *pool_watcher) + : librbd::MirroringWatcher<I>(io_ctx, work_queue), + m_pool_watcher(pool_watcher) { + } + + void handle_rewatch_complete(int r) override { + m_pool_watcher->handle_rewatch_complete(r); + } + + void handle_mode_updated(cls::rbd::MirrorMode mirror_mode) override { + // invalidate all image state and refresh the pool contents + m_pool_watcher->schedule_refresh_images(5); + } + + void handle_image_updated(cls::rbd::MirrorImageState state, + const std::string &image_id, + const std::string &global_image_id) override { + bool enabled = (state == cls::rbd::MIRROR_IMAGE_STATE_ENABLED); + m_pool_watcher->handle_image_updated(image_id, global_image_id, + enabled); + } + +private: + PoolWatcher *m_pool_watcher; +}; + +template <typename I> +PoolWatcher<I>::PoolWatcher(Threads<I> *threads, + librados::IoCtx &io_ctx, + const std::string& mirror_uuid, + pool_watcher::Listener &listener) + : m_threads(threads), + m_io_ctx(io_ctx), + m_mirror_uuid(mirror_uuid), + m_listener(listener), + m_lock(ceph::make_mutex(librbd::util::unique_lock_name( + "rbd::mirror::PoolWatcher", this))) { + m_mirroring_watcher = new MirroringWatcher(m_io_ctx, + m_threads->work_queue, this); +} + +template <typename I> +PoolWatcher<I>::~PoolWatcher() { + delete m_mirroring_watcher; +} + +template <typename I> +bool PoolWatcher<I>::is_blocklisted() const { + std::lock_guard locker{m_lock}; + return m_blocklisted; +} + +template <typename I> +void PoolWatcher<I>::init(Context *on_finish) { + dout(5) << dendl; + + { + std::lock_guard locker{m_lock}; + m_on_init_finish = on_finish; + + ceph_assert(!m_refresh_in_progress); + m_refresh_in_progress = true; + } + + // start async updates for mirror image directory + register_watcher(); +} + +template <typename I> +void PoolWatcher<I>::shut_down(Context *on_finish) { + dout(5) << dendl; + + { + std::scoped_lock locker{m_threads->timer_lock, m_lock}; + + ceph_assert(!m_shutting_down); + m_shutting_down = true; + if (m_timer_ctx != nullptr) { + m_threads->timer->cancel_event(m_timer_ctx); + m_timer_ctx = nullptr; + } + } + + // in-progress unregister tracked as async op + unregister_watcher(); + + m_async_op_tracker.wait_for_ops(on_finish); +} + +template <typename I> +void PoolWatcher<I>::register_watcher() { + { + std::lock_guard locker{m_lock}; + ceph_assert(m_image_ids_invalid); + ceph_assert(m_refresh_in_progress); + } + + // if the watch registration is in-flight, let the watcher + // handle the transition -- only (re-)register if it's not registered + if (!m_mirroring_watcher->is_unregistered()) { + refresh_images(); + return; + } + + // first time registering or the watch failed + dout(5) << dendl; + m_async_op_tracker.start_op(); + + Context *ctx = create_context_callback< + PoolWatcher, &PoolWatcher<I>::handle_register_watcher>(this); + m_mirroring_watcher->register_watch(ctx); +} + +template <typename I> +void PoolWatcher<I>::handle_register_watcher(int r) { + dout(5) << "r=" << r << dendl; + + { + std::lock_guard locker{m_lock}; + ceph_assert(m_image_ids_invalid); + ceph_assert(m_refresh_in_progress); + if (r < 0) { + m_refresh_in_progress = false; + } + } + + Context *on_init_finish = nullptr; + if (r >= 0) { + refresh_images(); + } else if (r == -EBLOCKLISTED) { + dout(0) << "detected client is blocklisted" << dendl; + + std::lock_guard locker{m_lock}; + m_blocklisted = true; + std::swap(on_init_finish, m_on_init_finish); + } else if (r == -ENOENT) { + dout(5) << "mirroring directory does not exist" << dendl; + { + std::lock_guard locker{m_lock}; + std::swap(on_init_finish, m_on_init_finish); + } + + schedule_refresh_images(30); + } else { + derr << "unexpected error registering mirroring directory watch: " + << cpp_strerror(r) << dendl; + schedule_refresh_images(10); + } + + m_async_op_tracker.finish_op(); + if (on_init_finish != nullptr) { + on_init_finish->complete(r); + } +} + +template <typename I> +void PoolWatcher<I>::unregister_watcher() { + dout(5) << dendl; + + m_async_op_tracker.start_op(); + Context *ctx = new LambdaContext([this](int r) { + dout(5) << "unregister_watcher: r=" << r << dendl; + if (r < 0) { + derr << "error unregistering watcher for " + << m_mirroring_watcher->get_oid() << " object: " << cpp_strerror(r) + << dendl; + } + m_async_op_tracker.finish_op(); + }); + + m_mirroring_watcher->unregister_watch(ctx); +} + +template <typename I> +void PoolWatcher<I>::refresh_images() { + dout(5) << dendl; + + { + std::lock_guard locker{m_lock}; + ceph_assert(m_image_ids_invalid); + ceph_assert(m_refresh_in_progress); + + // clear all pending notification events since we need to perform + // a full image list refresh + m_pending_added_image_ids.clear(); + m_pending_removed_image_ids.clear(); + } + + m_async_op_tracker.start_op(); + m_refresh_image_ids.clear(); + Context *ctx = create_context_callback< + PoolWatcher, &PoolWatcher<I>::handle_refresh_images>(this); + auto req = pool_watcher::RefreshImagesRequest<I>::create(m_io_ctx, + &m_refresh_image_ids, + ctx); + req->send(); +} + +template <typename I> +void PoolWatcher<I>::handle_refresh_images(int r) { + dout(5) << "r=" << r << dendl; + + bool deferred_refresh = false; + bool retry_refresh = false; + Context *on_init_finish = nullptr; + { + std::lock_guard locker{m_lock}; + ceph_assert(m_image_ids_invalid); + ceph_assert(m_refresh_in_progress); + m_refresh_in_progress = false; + + if (r == -ENOENT) { + dout(5) << "mirroring directory not found" << dendl; + r = 0; + m_refresh_image_ids.clear(); + } + + if (m_deferred_refresh) { + // need to refresh -- skip the notification + deferred_refresh = true; + } else if (r >= 0) { + m_pending_image_ids = std::move(m_refresh_image_ids); + m_image_ids_invalid = false; + std::swap(on_init_finish, m_on_init_finish); + + schedule_listener(); + } else if (r == -EBLOCKLISTED) { + dout(0) << "detected client is blocklisted during image refresh" << dendl; + + m_blocklisted = true; + std::swap(on_init_finish, m_on_init_finish); + } else { + retry_refresh = true; + } + } + + if (deferred_refresh) { + dout(5) << "scheduling deferred refresh" << dendl; + schedule_refresh_images(0); + } else if (retry_refresh) { + derr << "failed to retrieve mirroring directory: " << cpp_strerror(r) + << dendl; + schedule_refresh_images(10); + } + + m_async_op_tracker.finish_op(); + if (on_init_finish != nullptr) { + on_init_finish->complete(r); + } +} + +template <typename I> +void PoolWatcher<I>::schedule_refresh_images(double interval) { + std::scoped_lock locker{m_threads->timer_lock, m_lock}; + if (m_shutting_down || m_refresh_in_progress || m_timer_ctx != nullptr) { + if (m_refresh_in_progress && !m_deferred_refresh) { + dout(5) << "deferring refresh until in-flight refresh completes" << dendl; + m_deferred_refresh = true; + } + return; + } + + m_image_ids_invalid = true; + m_timer_ctx = m_threads->timer->add_event_after( + interval, + new LambdaContext([this](int r) { + process_refresh_images(); + })); +} + +template <typename I> +void PoolWatcher<I>::handle_rewatch_complete(int r) { + dout(5) << "r=" << r << dendl; + + if (r == -EBLOCKLISTED) { + dout(0) << "detected client is blocklisted" << dendl; + + std::lock_guard locker{m_lock}; + m_blocklisted = true; + return; + } else if (r == -ENOENT) { + dout(5) << "mirroring directory deleted" << dendl; + } else if (r < 0) { + derr << "unexpected error re-registering mirroring directory watch: " + << cpp_strerror(r) << dendl; + } + + schedule_refresh_images(5); +} + +template <typename I> +void PoolWatcher<I>::handle_image_updated(const std::string &id, + const std::string &global_image_id, + bool enabled) { + dout(10) << "image_id=" << id << ", " + << "global_image_id=" << global_image_id << ", " + << "enabled=" << enabled << dendl; + + std::lock_guard locker{m_lock}; + ImageId image_id(global_image_id, id); + m_pending_added_image_ids.erase(image_id); + m_pending_removed_image_ids.erase(image_id); + + if (enabled) { + m_pending_added_image_ids.insert(image_id); + schedule_listener(); + } else { + m_pending_removed_image_ids.insert(image_id); + schedule_listener(); + } +} + +template <typename I> +void PoolWatcher<I>::process_refresh_images() { + ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock)); + ceph_assert(m_timer_ctx != nullptr); + m_timer_ctx = nullptr; + + { + std::lock_guard locker{m_lock}; + ceph_assert(!m_refresh_in_progress); + m_refresh_in_progress = true; + m_deferred_refresh = false; + } + + // execute outside of the timer's lock + m_async_op_tracker.start_op(); + Context *ctx = new LambdaContext([this](int r) { + register_watcher(); + m_async_op_tracker.finish_op(); + }); + m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +void PoolWatcher<I>::schedule_listener() { + ceph_assert(ceph_mutex_is_locked(m_lock)); + m_pending_updates = true; + if (m_shutting_down || m_image_ids_invalid || m_notify_listener_in_progress) { + return; + } + + dout(20) << dendl; + + m_async_op_tracker.start_op(); + Context *ctx = new LambdaContext([this](int r) { + notify_listener(); + m_async_op_tracker.finish_op(); + }); + + m_notify_listener_in_progress = true; + m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +void PoolWatcher<I>::notify_listener() { + dout(10) << dendl; + + std::string mirror_uuid; + ImageIds added_image_ids; + ImageIds removed_image_ids; + { + std::lock_guard locker{m_lock}; + ceph_assert(m_notify_listener_in_progress); + } + + if (!removed_image_ids.empty()) { + m_listener.handle_update(mirror_uuid, {}, std::move(removed_image_ids)); + removed_image_ids.clear(); + } + + { + std::lock_guard locker{m_lock}; + ceph_assert(m_notify_listener_in_progress); + + // if the watch failed while we didn't own the lock, we are going + // to need to perform a full refresh + if (m_image_ids_invalid) { + m_notify_listener_in_progress = false; + return; + } + + // merge add/remove notifications into pending set (a given image + // can only be in one set or another) + for (auto &image_id : m_pending_removed_image_ids) { + dout(20) << "image_id=" << image_id << dendl; + m_pending_image_ids.erase(image_id); + } + + for (auto &image_id : m_pending_added_image_ids) { + dout(20) << "image_id=" << image_id << dendl; + m_pending_image_ids.erase(image_id); + m_pending_image_ids.insert(image_id); + } + m_pending_added_image_ids.clear(); + + // compute added/removed images + for (auto &image_id : m_image_ids) { + auto it = m_pending_image_ids.find(image_id); + if (it == m_pending_image_ids.end() || it->id != image_id.id) { + removed_image_ids.insert(image_id); + } + } + for (auto &image_id : m_pending_image_ids) { + auto it = m_image_ids.find(image_id); + if (it == m_image_ids.end() || it->id != image_id.id) { + added_image_ids.insert(image_id); + } + } + + m_pending_updates = false; + m_image_ids = m_pending_image_ids; + } + + m_listener.handle_update(m_mirror_uuid, std::move(added_image_ids), + std::move(removed_image_ids)); + + { + std::lock_guard locker{m_lock}; + m_notify_listener_in_progress = false; + if (m_pending_updates) { + schedule_listener(); + } + } +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::PoolWatcher<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/PoolWatcher.h b/src/tools/rbd_mirror/PoolWatcher.h new file mode 100644 index 000000000..2905de15f --- /dev/null +++ b/src/tools/rbd_mirror/PoolWatcher.h @@ -0,0 +1,161 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_POOL_WATCHER_H +#define CEPH_RBD_MIRROR_POOL_WATCHER_H + +#include <map> +#include <memory> +#include <set> +#include <string> + +#include "common/AsyncOpTracker.h" +#include "common/ceph_context.h" +#include "common/ceph_mutex.h" +#include "include/rados/librados.hpp" +#include "tools/rbd_mirror/Types.h" +#include <boost/functional/hash.hpp> +#include <boost/optional.hpp> +#include "include/ceph_assert.h" +#include "tools/rbd_mirror/pool_watcher/Types.h" + +namespace librbd { struct ImageCtx; } + +namespace rbd { +namespace mirror { + +template <typename> struct Threads; + +/** + * Keeps track of images that have mirroring enabled within all + * pools. + */ +template <typename ImageCtxT = librbd::ImageCtx> +class PoolWatcher { +public: + static PoolWatcher* create(Threads<ImageCtxT> *threads, + librados::IoCtx &io_ctx, + const std::string& mirror_uuid, + pool_watcher::Listener &listener) { + return new PoolWatcher(threads, io_ctx, mirror_uuid, listener); + } + + PoolWatcher(Threads<ImageCtxT> *threads, + librados::IoCtx &io_ctx, + const std::string& mirror_uuid, + pool_watcher::Listener &listener); + ~PoolWatcher(); + PoolWatcher(const PoolWatcher&) = delete; + PoolWatcher& operator=(const PoolWatcher&) = delete; + + bool is_blocklisted() const; + + void init(Context *on_finish = nullptr); + void shut_down(Context *on_finish); + + inline uint64_t get_image_count() const { + std::lock_guard locker{m_lock}; + return m_image_ids.size(); + } + +private: + /** + * @verbatim + * + * <start> + * | + * v + * INIT + * | + * v + * REGISTER_WATCHER + * | + * |/--------------------------------\ + * | | + * v | + * REFRESH_IMAGES | + * | | + * |/----------------------------\ | + * | | | + * v | | + * NOTIFY_LISTENER | | + * | | | + * v | | + * IDLE ---\ | | + * | | | | + * | |\---> IMAGE_UPDATED | | + * | | | | | + * | | v | | + * | | GET_IMAGE_NAME --/ | + * | | | + * | \----> WATCH_ERROR ---------/ + * v + * SHUT_DOWN + * | + * v + * UNREGISTER_WATCHER + * | + * v + * <finish> + * + * @endverbatim + */ + class MirroringWatcher; + + Threads<ImageCtxT> *m_threads; + librados::IoCtx m_io_ctx; + std::string m_mirror_uuid; + pool_watcher::Listener &m_listener; + + ImageIds m_refresh_image_ids; + bufferlist m_out_bl; + + mutable ceph::mutex m_lock; + + Context *m_on_init_finish = nullptr; + + ImageIds m_image_ids; + + bool m_pending_updates = false; + bool m_notify_listener_in_progress = false; + ImageIds m_pending_image_ids; + ImageIds m_pending_added_image_ids; + ImageIds m_pending_removed_image_ids; + + MirroringWatcher *m_mirroring_watcher; + + Context *m_timer_ctx = nullptr; + + AsyncOpTracker m_async_op_tracker; + bool m_blocklisted = false; + bool m_shutting_down = false; + bool m_image_ids_invalid = true; + bool m_refresh_in_progress = false; + bool m_deferred_refresh = false; + + void register_watcher(); + void handle_register_watcher(int r); + void unregister_watcher(); + + void refresh_images(); + void handle_refresh_images(int r); + + void schedule_refresh_images(double interval); + void process_refresh_images(); + + void handle_rewatch_complete(int r); + void handle_image_updated(const std::string &image_id, + const std::string &global_image_id, + bool enabled); + + void schedule_listener(); + void notify_listener(); + +}; + +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::PoolWatcher<librbd::ImageCtx>; + +#endif // CEPH_RBD_MIRROR_POOL_WATCHER_H diff --git a/src/tools/rbd_mirror/ProgressContext.h b/src/tools/rbd_mirror/ProgressContext.h new file mode 100644 index 000000000..e4430ee6a --- /dev/null +++ b/src/tools/rbd_mirror/ProgressContext.h @@ -0,0 +1,21 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_PROGRESS_CONTEXT_H +#define RBD_MIRROR_PROGRESS_CONTEXT_H + +namespace rbd { +namespace mirror { + +class ProgressContext +{ +public: + virtual ~ProgressContext() {} + virtual void update_progress(const std::string &description, + bool flush = true) = 0; +}; + +} // namespace mirror +} // namespace rbd + +#endif // RBD_MIRROR_PROGRESS_CONTEXT_H diff --git a/src/tools/rbd_mirror/RemotePoolPoller.cc b/src/tools/rbd_mirror/RemotePoolPoller.cc new file mode 100644 index 000000000..8bfb35d4a --- /dev/null +++ b/src/tools/rbd_mirror/RemotePoolPoller.cc @@ -0,0 +1,267 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "RemotePoolPoller.h" +#include "include/ceph_assert.h" +#include "common/debug.h" +#include "common/errno.h" +#include "common/Timer.h" +#include "cls/rbd/cls_rbd_client.h" +#include "librbd/ImageCtx.h" +#include "librbd/Utils.h" +#include "librbd/asio/ContextWQ.h" +#include "tools/rbd_mirror/Threads.h" +#include "tools/rbd_mirror/Types.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::RemotePoolPoller: " << this << " " \ + << __func__ << ": " + +namespace rbd { +namespace mirror { + +static const double POLL_INTERVAL_SECONDS = 30; + +using librbd::util::create_rados_callback; + +template <typename I> +RemotePoolPoller<I>::~RemotePoolPoller() { + ceph_assert(m_timer_task == nullptr); +} + +template <typename I> +void RemotePoolPoller<I>::init(Context* on_finish) { + dout(10) << dendl; + + ceph_assert(m_state == STATE_INITIALIZING); + ceph_assert(m_on_finish == nullptr); + m_on_finish = on_finish; + + get_mirror_uuid(); +} + +template <typename I> +void RemotePoolPoller<I>::shut_down(Context* on_finish) { + dout(10) << dendl; + + std::unique_lock locker(m_threads->timer_lock); + ceph_assert(m_state == STATE_POLLING); + m_state = STATE_SHUTTING_DOWN; + + if (m_timer_task == nullptr) { + // currently executing a poll + ceph_assert(m_on_finish == nullptr); + m_on_finish = on_finish; + return; + } + + m_threads->timer->cancel_event(m_timer_task); + m_timer_task = nullptr; + m_threads->work_queue->queue(on_finish, 0); +} + +template <typename I> +void RemotePoolPoller<I>::get_mirror_uuid() { + dout(10) << dendl; + + librados::ObjectReadOperation op; + librbd::cls_client::mirror_uuid_get_start(&op); + + auto aio_comp = create_rados_callback< + RemotePoolPoller<I>, &RemotePoolPoller<I>::handle_get_mirror_uuid>(this); + m_out_bl.clear(); + int r = m_remote_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op, &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void RemotePoolPoller<I>::handle_get_mirror_uuid(int r) { + dout(10) << "r=" << r << dendl; + std::string remote_mirror_uuid; + if (r >= 0) { + auto it = m_out_bl.cbegin(); + r = librbd::cls_client::mirror_uuid_get_finish(&it, &remote_mirror_uuid); + if (r >= 0 && remote_mirror_uuid.empty()) { + r = -ENOENT; + } + } + + if (r < 0) { + if (r == -ENOENT) { + dout(5) << "remote mirror uuid missing" << dendl; + } else { + derr << "failed to retrieve remote mirror uuid: " << cpp_strerror(r) + << dendl; + } + + m_remote_pool_meta.mirror_uuid = ""; + } + + // if we have the mirror uuid, we will poll until shut down + if (m_state == STATE_INITIALIZING) { + if (r < 0) { + schedule_task(r); + return; + } + + m_state = STATE_POLLING; + } + + dout(10) << "remote_mirror_uuid=" << remote_mirror_uuid << dendl; + if (m_remote_pool_meta.mirror_uuid != remote_mirror_uuid) { + m_remote_pool_meta.mirror_uuid = remote_mirror_uuid; + m_updated = true; + } + + mirror_peer_ping(); +} + +template <typename I> +void RemotePoolPoller<I>::mirror_peer_ping() { + dout(10) << dendl; + + librados::ObjectWriteOperation op; + librbd::cls_client::mirror_peer_ping(&op, m_site_name, m_local_mirror_uuid); + + auto aio_comp = create_rados_callback< + RemotePoolPoller<I>, &RemotePoolPoller<I>::handle_mirror_peer_ping>(this); + int r = m_remote_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void RemotePoolPoller<I>::handle_mirror_peer_ping(int r) { + dout(10) << "r=" << r << dendl; + + if (r == -EOPNOTSUPP) { + // older OSD that doesn't support snaphot-based mirroring, so no need + // to query remote peers + dout(10) << "remote peer does not support snapshot-based mirroring" + << dendl; + notify_listener(); + return; + } else if (r < 0) { + // we can still see if we can perform a peer list and find outselves + derr << "failed to ping remote mirror peer: " << cpp_strerror(r) << dendl; + } + + mirror_peer_list(); +} + +template <typename I> +void RemotePoolPoller<I>::mirror_peer_list() { + dout(10) << dendl; + + librados::ObjectReadOperation op; + librbd::cls_client::mirror_peer_list_start(&op); + + auto aio_comp = create_rados_callback< + RemotePoolPoller<I>, &RemotePoolPoller<I>::handle_mirror_peer_list>(this); + m_out_bl.clear(); + int r = m_remote_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op, &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void RemotePoolPoller<I>::handle_mirror_peer_list(int r) { + dout(10) << "r=" << r << dendl; + + std::vector<cls::rbd::MirrorPeer> peers; + if (r == 0) { + auto iter = m_out_bl.cbegin(); + r = librbd::cls_client::mirror_peer_list_finish(&iter, &peers); + } + + if (r < 0) { + derr << "failed to retrieve mirror peers: " << cpp_strerror(r) << dendl; + } + + cls::rbd::MirrorPeer* matched_peer = nullptr; + for (auto& peer : peers) { + if (peer.mirror_peer_direction == cls::rbd::MIRROR_PEER_DIRECTION_RX) { + continue; + } + + if (peer.mirror_uuid == m_local_mirror_uuid) { + matched_peer = &peer; + break; + } else if (peer.site_name == m_site_name) { + // keep searching in case we hit an exact match by fsid + matched_peer = &peer; + } + } + + // older OSDs don't support peer ping so we might fail to find a match, + // which will prevent snapshot mirroring from functioning + std::string remote_mirror_peer_uuid; + if (matched_peer != nullptr) { + remote_mirror_peer_uuid = matched_peer->uuid; + } + + dout(10) << "remote_mirror_peer_uuid=" << remote_mirror_peer_uuid << dendl; + if (m_remote_pool_meta.mirror_peer_uuid != remote_mirror_peer_uuid) { + m_remote_pool_meta.mirror_peer_uuid = remote_mirror_peer_uuid; + m_updated = true; + } + + notify_listener(); +} + +template <typename I> +void RemotePoolPoller<I>::notify_listener() { + bool updated = false; + std::swap(updated, m_updated); + if (updated) { + dout(10) << dendl; + m_listener.handle_updated(m_remote_pool_meta); + } + + schedule_task(0); +} + +template <typename I> +void RemotePoolPoller<I>::schedule_task(int r) { + std::unique_lock locker{m_threads->timer_lock}; + + if (m_state == STATE_POLLING) { + dout(10) << dendl; + + ceph_assert(m_timer_task == nullptr); + m_timer_task = new LambdaContext([this](int) { + handle_task(); + }); + + m_threads->timer->add_event_after(POLL_INTERVAL_SECONDS, m_timer_task); + } + + // finish init or shut down callback + if (m_on_finish != nullptr) { + locker.unlock(); + Context* on_finish = nullptr; + std::swap(on_finish, m_on_finish); + on_finish->complete(m_state == STATE_SHUTTING_DOWN ? 0 : r); + } +} + +template <typename I> +void RemotePoolPoller<I>::handle_task() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked_by_me(m_threads->timer_lock)); + m_timer_task = nullptr; + + auto ctx = new LambdaContext([this](int) { + get_mirror_uuid(); + }); + m_threads->work_queue->queue(ctx); +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::RemotePoolPoller<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/RemotePoolPoller.h b/src/tools/rbd_mirror/RemotePoolPoller.h new file mode 100644 index 000000000..19d803ca1 --- /dev/null +++ b/src/tools/rbd_mirror/RemotePoolPoller.h @@ -0,0 +1,133 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_REMOTE_POOL_POLLER_H +#define CEPH_RBD_MIRROR_REMOTE_POOL_POLLER_H + +#include "include/rados/librados.hpp" +#include "tools/rbd_mirror/Types.h" +#include <string> + +struct Context; +namespace librbd { struct ImageCtx; } + +namespace rbd { +namespace mirror { + +template <typename> struct Threads; + +namespace remote_pool_poller { + +struct Listener { + virtual ~Listener() {} + + virtual void handle_updated(const RemotePoolMeta& remote_pool_meta) = 0; +}; + +}; // namespace remote_pool_poller + +template <typename ImageCtxT> +class RemotePoolPoller { +public: + static RemotePoolPoller* create( + Threads<ImageCtxT>* threads, + librados::IoCtx& remote_io_ctx, + const std::string& site_name, + const std::string& local_mirror_uuid, + remote_pool_poller::Listener& listener) { + return new RemotePoolPoller(threads, remote_io_ctx, site_name, + local_mirror_uuid, listener); + } + + RemotePoolPoller( + Threads<ImageCtxT>* threads, + librados::IoCtx& remote_io_ctx, + const std::string& site_name, + const std::string& local_mirror_uuid, + remote_pool_poller::Listener& listener) + : m_threads(threads), + m_remote_io_ctx(remote_io_ctx), + m_site_name(site_name), + m_local_mirror_uuid(local_mirror_uuid), + m_listener(listener) { + } + ~RemotePoolPoller(); + + void init(Context* on_finish); + void shut_down(Context* on_finish); + +private: + /** + * @verbatim + * + * <start> + * | + * |/----------------------------\ + * | | + * v | + * MIRROR_UUID_GET | + * | | + * v | + * MIRROR_PEER_PING | + * | | + * v | + * MIRROR_PEER_LIST | + * | | + * v | + * MIRROR_UUID_GET | + * | | + * v (skip if no changes) | + * NOTIFY_LISTENER | + * | | + * | (repeat periodically) | + * |\----------------------------/ + * | + * v + * <finish> + * + * @endverbatim + */ + + enum State { + STATE_INITIALIZING, + STATE_POLLING, + STATE_SHUTTING_DOWN + }; + + Threads<ImageCtxT>* m_threads; + librados::IoCtx& m_remote_io_ctx; + std::string m_site_name; + std::string m_local_mirror_uuid; + remote_pool_poller::Listener& m_listener; + + bufferlist m_out_bl; + + RemotePoolMeta m_remote_pool_meta; + bool m_updated = false; + + State m_state = STATE_INITIALIZING; + Context* m_timer_task = nullptr; + Context* m_on_finish = nullptr; + + void get_mirror_uuid(); + void handle_get_mirror_uuid(int r); + + void mirror_peer_ping(); + void handle_mirror_peer_ping(int r); + + void mirror_peer_list(); + void handle_mirror_peer_list(int r); + + void notify_listener(); + + void schedule_task(int r); + void handle_task(); + +}; + +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::RemotePoolPoller<librbd::ImageCtx>; + +#endif // CEPH_RBD_MIRROR_REMOTE_POOL_POLLER_H diff --git a/src/tools/rbd_mirror/ServiceDaemon.cc b/src/tools/rbd_mirror/ServiceDaemon.cc new file mode 100644 index 000000000..f3cabcc87 --- /dev/null +++ b/src/tools/rbd_mirror/ServiceDaemon.cc @@ -0,0 +1,327 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/ServiceDaemon.h" +#include "include/Context.h" +#include "include/stringify.h" +#include "common/ceph_context.h" +#include "common/config.h" +#include "common/debug.h" +#include "common/errno.h" +#include "common/Formatter.h" +#include "common/Timer.h" +#include "tools/rbd_mirror/Threads.h" +#include <sstream> + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::ServiceDaemon: " << this << " " \ + << __func__ << ": " + +namespace rbd { +namespace mirror { + +namespace { + +const std::string RBD_MIRROR_AUTH_ID_PREFIX("rbd-mirror."); + +struct AttributeDumpVisitor : public boost::static_visitor<void> { + ceph::Formatter *f; + const std::string& name; + + AttributeDumpVisitor(ceph::Formatter *f, const std::string& name) + : f(f), name(name) { + } + + void operator()(bool val) const { + f->dump_bool(name.c_str(), val); + } + void operator()(uint64_t val) const { + f->dump_unsigned(name.c_str(), val); + } + void operator()(const std::string& val) const { + f->dump_string(name.c_str(), val); + } +}; + +} // anonymous namespace + +using namespace service_daemon; + +template <typename I> +ServiceDaemon<I>::ServiceDaemon(CephContext *cct, RadosRef rados, + Threads<I>* threads) + : m_cct(cct), m_rados(rados), m_threads(threads) { + dout(20) << dendl; +} + +template <typename I> +ServiceDaemon<I>::~ServiceDaemon() { + dout(20) << dendl; + std::lock_guard timer_locker{m_threads->timer_lock}; + if (m_timer_ctx != nullptr) { + m_threads->timer->cancel_event(m_timer_ctx); + update_status(); + } +} + +template <typename I> +int ServiceDaemon<I>::init() { + dout(20) << dendl; + + std::string id = m_cct->_conf->name.get_id(); + if (id.find(RBD_MIRROR_AUTH_ID_PREFIX) == 0) { + id = id.substr(RBD_MIRROR_AUTH_ID_PREFIX.size()); + } + + std::string instance_id = stringify(m_rados->get_instance_id()); + std::map<std::string, std::string> service_metadata = { + {"id", id}, {"instance_id", instance_id}}; + int r = m_rados->service_daemon_register("rbd-mirror", instance_id, + service_metadata); + if (r < 0) { + return r; + } + + return 0; +} + +template <typename I> +void ServiceDaemon<I>::add_pool(int64_t pool_id, const std::string& pool_name) { + dout(20) << "pool_id=" << pool_id << ", pool_name=" << pool_name << dendl; + + { + std::lock_guard locker{m_lock}; + m_pools.insert({pool_id, {pool_name}}); + } + schedule_update_status(); +} + +template <typename I> +void ServiceDaemon<I>::remove_pool(int64_t pool_id) { + dout(20) << "pool_id=" << pool_id << dendl; + { + std::lock_guard locker{m_lock}; + m_pools.erase(pool_id); + } + schedule_update_status(); +} + +template <typename I> +void ServiceDaemon<I>::add_namespace(int64_t pool_id, + const std::string& namespace_name) { + dout(20) << "pool_id=" << pool_id << ", namespace=" << namespace_name + << dendl; + + std::lock_guard locker{m_lock}; + auto pool_it = m_pools.find(pool_id); + if (pool_it == m_pools.end()) { + return; + } + pool_it->second.ns_attributes[namespace_name]; + + // don't schedule update status as the namespace attributes are empty yet +} + +template <typename I> +void ServiceDaemon<I>::remove_namespace(int64_t pool_id, + const std::string& namespace_name) { + dout(20) << "pool_id=" << pool_id << ", namespace=" << namespace_name + << dendl; + { + std::lock_guard locker{m_lock}; + auto pool_it = m_pools.find(pool_id); + if (pool_it == m_pools.end()) { + return; + } + pool_it->second.ns_attributes.erase(namespace_name); + } + schedule_update_status(); +} + +template <typename I> +uint64_t ServiceDaemon<I>::add_or_update_callout(int64_t pool_id, + uint64_t callout_id, + CalloutLevel callout_level, + const std::string& text) { + dout(20) << "pool_id=" << pool_id << ", " + << "callout_id=" << callout_id << ", " + << "callout_level=" << callout_level << ", " + << "text=" << text << dendl; + + { + std::lock_guard locker{m_lock}; + auto pool_it = m_pools.find(pool_id); + if (pool_it == m_pools.end()) { + return CALLOUT_ID_NONE; + } + + if (callout_id == CALLOUT_ID_NONE) { + callout_id = ++m_callout_id; + } + pool_it->second.callouts[callout_id] = {callout_level, text}; + } + + schedule_update_status(); + return callout_id; +} + +template <typename I> +void ServiceDaemon<I>::remove_callout(int64_t pool_id, uint64_t callout_id) { + dout(20) << "pool_id=" << pool_id << ", " + << "callout_id=" << callout_id << dendl; + + { + std::lock_guard locker{m_lock}; + auto pool_it = m_pools.find(pool_id); + if (pool_it == m_pools.end()) { + return; + } + pool_it->second.callouts.erase(callout_id); + } + + schedule_update_status(); +} + +template <typename I> +void ServiceDaemon<I>::add_or_update_attribute(int64_t pool_id, + const std::string& key, + const AttributeValue& value) { + dout(20) << "pool_id=" << pool_id << ", " + << "key=" << key << ", " + << "value=" << value << dendl; + + { + std::lock_guard locker{m_lock}; + auto pool_it = m_pools.find(pool_id); + if (pool_it == m_pools.end()) { + return; + } + pool_it->second.attributes[key] = value; + } + + schedule_update_status(); +} + +template <typename I> +void ServiceDaemon<I>::add_or_update_namespace_attribute( + int64_t pool_id, const std::string& namespace_name, const std::string& key, + const AttributeValue& value) { + if (namespace_name.empty()) { + add_or_update_attribute(pool_id, key, value); + return; + } + + dout(20) << "pool_id=" << pool_id << ", " + << "namespace=" << namespace_name << ", " + << "key=" << key << ", " + << "value=" << value << dendl; + + { + std::lock_guard locker{m_lock}; + auto pool_it = m_pools.find(pool_id); + if (pool_it == m_pools.end()) { + return; + } + + auto ns_it = pool_it->second.ns_attributes.find(namespace_name); + if (ns_it == pool_it->second.ns_attributes.end()) { + return; + } + + ns_it->second[key] = value; + } + + schedule_update_status(); +} + +template <typename I> +void ServiceDaemon<I>::remove_attribute(int64_t pool_id, + const std::string& key) { + dout(20) << "pool_id=" << pool_id << ", " + << "key=" << key << dendl; + + { + std::lock_guard locker{m_lock}; + auto pool_it = m_pools.find(pool_id); + if (pool_it == m_pools.end()) { + return; + } + pool_it->second.attributes.erase(key); + } + + schedule_update_status(); +} + +template <typename I> +void ServiceDaemon<I>::schedule_update_status() { + std::lock_guard timer_locker{m_threads->timer_lock}; + if (m_timer_ctx != nullptr) { + return; + } + + m_timer_ctx = new LambdaContext([this](int) { + m_timer_ctx = nullptr; + update_status(); + }); + m_threads->timer->add_event_after(1, m_timer_ctx); +} + +template <typename I> +void ServiceDaemon<I>::update_status() { + dout(20) << dendl; + ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock)); + + ceph::JSONFormatter f; + { + std::lock_guard locker{m_lock}; + f.open_object_section("pools"); + for (auto& pool_pair : m_pools) { + f.open_object_section(stringify(pool_pair.first).c_str()); + f.dump_string("name", pool_pair.second.name); + f.open_object_section("callouts"); + for (auto& callout : pool_pair.second.callouts) { + f.open_object_section(stringify(callout.first).c_str()); + f.dump_string("level", stringify(callout.second.level).c_str()); + f.dump_string("text", callout.second.text.c_str()); + f.close_section(); + } + f.close_section(); // callouts + + for (auto& attribute : pool_pair.second.attributes) { + AttributeDumpVisitor attribute_dump_visitor(&f, attribute.first); + boost::apply_visitor(attribute_dump_visitor, attribute.second); + } + + if (!pool_pair.second.ns_attributes.empty()) { + f.open_object_section("namespaces"); + for (auto& [ns, attributes] : pool_pair.second.ns_attributes) { + f.open_object_section(ns.c_str()); + for (auto& [key, value] : attributes) { + AttributeDumpVisitor attribute_dump_visitor(&f, key); + boost::apply_visitor(attribute_dump_visitor, value); + } + f.close_section(); // namespace + } + f.close_section(); // namespaces + } + f.close_section(); // pool + } + f.close_section(); // pools + } + + std::stringstream ss; + f.flush(ss); + + int r = m_rados->service_daemon_update_status({{"json", ss.str()}}); + if (r < 0) { + derr << "failed to update service daemon status: " << cpp_strerror(r) + << dendl; + } +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::ServiceDaemon<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/ServiceDaemon.h b/src/tools/rbd_mirror/ServiceDaemon.h new file mode 100644 index 000000000..8b1e0f584 --- /dev/null +++ b/src/tools/rbd_mirror/ServiceDaemon.h @@ -0,0 +1,94 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_SERVICE_DAEMON_H +#define CEPH_RBD_MIRROR_SERVICE_DAEMON_H + +#include "common/ceph_mutex.h" +#include "include/common_fwd.h" +#include "tools/rbd_mirror/Types.h" +#include "tools/rbd_mirror/service_daemon/Types.h" +#include <map> +#include <string> + +struct Context; +namespace librbd { struct ImageCtx; } + +namespace rbd { +namespace mirror { + +template <typename> struct Threads; + +template <typename ImageCtxT = librbd::ImageCtx> +class ServiceDaemon { +public: + ServiceDaemon(CephContext *cct, RadosRef rados, Threads<ImageCtxT>* threads); + ~ServiceDaemon(); + + int init(); + + void add_pool(int64_t pool_id, const std::string& pool_name); + void remove_pool(int64_t pool_id); + + void add_namespace(int64_t pool_id, const std::string& namespace_name); + void remove_namespace(int64_t pool_id, const std::string& namespace_name); + + uint64_t add_or_update_callout(int64_t pool_id, uint64_t callout_id, + service_daemon::CalloutLevel callout_level, + const std::string& text); + void remove_callout(int64_t pool_id, uint64_t callout_id); + + void add_or_update_attribute(int64_t pool_id, const std::string& key, + const service_daemon::AttributeValue& value); + void add_or_update_namespace_attribute( + int64_t pool_id, const std::string& namespace_name, + const std::string& key, const service_daemon::AttributeValue& value); + void remove_attribute(int64_t pool_id, const std::string& key); + +private: + struct Callout { + service_daemon::CalloutLevel level; + std::string text; + + Callout() : level(service_daemon::CALLOUT_LEVEL_INFO) { + } + Callout(service_daemon::CalloutLevel level, const std::string& text) + : level(level), text(text) { + } + }; + typedef std::map<uint64_t, Callout> Callouts; + typedef std::map<std::string, service_daemon::AttributeValue> Attributes; + typedef std::map<std::string, Attributes> NamespaceAttributes; + + struct Pool { + std::string name; + Callouts callouts; + Attributes attributes; + NamespaceAttributes ns_attributes; + + Pool(const std::string& name) : name(name) { + } + }; + + typedef std::map<int64_t, Pool> Pools; + + CephContext *m_cct; + RadosRef m_rados; + Threads<ImageCtxT>* m_threads; + + ceph::mutex m_lock = ceph::make_mutex("rbd::mirror::ServiceDaemon"); + Pools m_pools; + uint64_t m_callout_id = service_daemon::CALLOUT_ID_NONE; + + Context* m_timer_ctx = nullptr; + + void schedule_update_status(); + void update_status(); +}; + +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::ServiceDaemon<librbd::ImageCtx>; + +#endif // CEPH_RBD_MIRROR_SERVICE_DAEMON_H diff --git a/src/tools/rbd_mirror/Threads.cc b/src/tools/rbd_mirror/Threads.cc new file mode 100644 index 000000000..b0c762641 --- /dev/null +++ b/src/tools/rbd_mirror/Threads.cc @@ -0,0 +1,38 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/Threads.h" +#include "common/Timer.h" +#include "librbd/AsioEngine.h" +#include "librbd/ImageCtx.h" +#include "librbd/asio/ContextWQ.h" + +namespace rbd { +namespace mirror { + +template <typename I> +Threads<I>::Threads(std::shared_ptr<librados::Rados>& rados) { + auto cct = static_cast<CephContext*>(rados->cct()); + asio_engine = new librbd::AsioEngine(rados); + work_queue = asio_engine->get_work_queue(); + + timer = new SafeTimer(cct, timer_lock, true); + timer->init(); +} + +template <typename I> +Threads<I>::~Threads() { + { + std::lock_guard timer_locker{timer_lock}; + timer->shutdown(); + } + delete timer; + + work_queue->drain(); + delete asio_engine; +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::Threads<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/Threads.h b/src/tools/rbd_mirror/Threads.h new file mode 100644 index 000000000..35c0b0f1c --- /dev/null +++ b/src/tools/rbd_mirror/Threads.h @@ -0,0 +1,45 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_THREADS_H +#define CEPH_RBD_MIRROR_THREADS_H + +#include "include/common_fwd.h" +#include "include/rados/librados_fwd.hpp" +#include "common/ceph_mutex.h" +#include "common/Timer.h" +#include <memory> + +class ThreadPool; + +namespace librbd { +struct AsioEngine; +struct ImageCtx; +namespace asio { struct ContextWQ; } +} // namespace librbd + +namespace rbd { +namespace mirror { + +template <typename ImageCtxT = librbd::ImageCtx> +class Threads { +public: + librbd::AsioEngine* asio_engine = nullptr; + librbd::asio::ContextWQ* work_queue = nullptr; + + SafeTimer *timer = nullptr; + ceph::mutex timer_lock = ceph::make_mutex("Threads::timer_lock"); + + explicit Threads(std::shared_ptr<librados::Rados>& rados); + Threads(const Threads&) = delete; + Threads& operator=(const Threads&) = delete; + + ~Threads(); +}; + +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::Threads<librbd::ImageCtx>; + +#endif // CEPH_RBD_MIRROR_THREADS_H diff --git a/src/tools/rbd_mirror/Throttler.cc b/src/tools/rbd_mirror/Throttler.cc new file mode 100644 index 000000000..07d6e397e --- /dev/null +++ b/src/tools/rbd_mirror/Throttler.cc @@ -0,0 +1,240 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2016 SUSE LINUX GmbH + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include "Throttler.h" +#include "common/Formatter.h" +#include "common/debug.h" +#include "common/errno.h" +#include "librbd/Utils.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::Throttler:: " << this \ + << " " << __func__ << ": " + +namespace rbd { +namespace mirror { + +template <typename I> +Throttler<I>::Throttler(CephContext *cct, const std::string &config_key) + : m_cct(cct), m_config_key(config_key), + m_config_keys{m_config_key.c_str(), nullptr}, + m_lock(ceph::make_mutex( + librbd::util::unique_lock_name("rbd::mirror::Throttler", this))), + m_max_concurrent_ops(cct->_conf.get_val<uint64_t>(m_config_key)) { + dout(20) << m_config_key << "=" << m_max_concurrent_ops << dendl; + m_cct->_conf.add_observer(this); +} + +template <typename I> +Throttler<I>::~Throttler() { + m_cct->_conf.remove_observer(this); + + std::lock_guard locker{m_lock}; + ceph_assert(m_inflight_ops.empty()); + ceph_assert(m_queue.empty()); +} + +template <typename I> +void Throttler<I>::start_op(const std::string &ns, + const std::string &id_, + Context *on_start) { + Id id{ns, id_}; + + dout(20) << "id=" << id << dendl; + + int r = 0; + { + std::lock_guard locker{m_lock}; + + if (m_inflight_ops.count(id) > 0) { + dout(20) << "duplicate for already started op " << id << dendl; + } else if (m_queued_ops.count(id) > 0) { + dout(20) << "duplicate for already queued op " << id << dendl; + std::swap(m_queued_ops[id], on_start); + r = -ENOENT; + } else if (m_max_concurrent_ops == 0 || + m_inflight_ops.size() < m_max_concurrent_ops) { + ceph_assert(m_queue.empty()); + m_inflight_ops.insert(id); + dout(20) << "ready to start op for " << id << " [" + << m_inflight_ops.size() << "/" << m_max_concurrent_ops << "]" + << dendl; + } else { + m_queue.push_back(id); + std::swap(m_queued_ops[id], on_start); + dout(20) << "op for " << id << " has been queued" << dendl; + } + } + + if (on_start != nullptr) { + on_start->complete(r); + } +} + +template <typename I> +bool Throttler<I>::cancel_op(const std::string &ns, + const std::string &id_) { + Id id{ns, id_}; + + dout(20) << "id=" << id << dendl; + + Context *on_start = nullptr; + { + std::lock_guard locker{m_lock}; + auto it = m_queued_ops.find(id); + if (it != m_queued_ops.end()) { + dout(20) << "canceled queued op for " << id << dendl; + m_queue.remove(id); + on_start = it->second; + m_queued_ops.erase(it); + } + } + + if (on_start == nullptr) { + return false; + } + + on_start->complete(-ECANCELED); + return true; +} + +template <typename I> +void Throttler<I>::finish_op(const std::string &ns, + const std::string &id_) { + Id id{ns, id_}; + + dout(20) << "id=" << id << dendl; + + if (cancel_op(ns, id_)) { + return; + } + + Context *on_start = nullptr; + { + std::lock_guard locker{m_lock}; + + m_inflight_ops.erase(id); + + if (m_inflight_ops.size() < m_max_concurrent_ops && !m_queue.empty()) { + auto id = m_queue.front(); + auto it = m_queued_ops.find(id); + ceph_assert(it != m_queued_ops.end()); + m_inflight_ops.insert(id); + dout(20) << "ready to start op for " << id << " [" + << m_inflight_ops.size() << "/" << m_max_concurrent_ops << "]" + << dendl; + on_start = it->second; + m_queued_ops.erase(it); + m_queue.pop_front(); + } + } + + if (on_start != nullptr) { + on_start->complete(0); + } +} + +template <typename I> +void Throttler<I>::drain(const std::string &ns, int r) { + dout(20) << "ns=" << ns << dendl; + + std::map<Id, Context *> queued_ops; + { + std::lock_guard locker{m_lock}; + for (auto it = m_queued_ops.begin(); it != m_queued_ops.end(); ) { + if (it->first.first == ns) { + queued_ops[it->first] = it->second; + m_queue.remove(it->first); + it = m_queued_ops.erase(it); + } else { + it++; + } + } + for (auto it = m_inflight_ops.begin(); it != m_inflight_ops.end(); ) { + if (it->first == ns) { + dout(20) << "inflight_op " << *it << dendl; + it = m_inflight_ops.erase(it); + } else { + it++; + } + } + } + + for (auto &it : queued_ops) { + dout(20) << "queued_op " << it.first << dendl; + it.second->complete(r); + } +} + +template <typename I> +void Throttler<I>::set_max_concurrent_ops(uint32_t max) { + dout(20) << "max=" << max << dendl; + + std::list<Context *> ops; + { + std::lock_guard locker{m_lock}; + m_max_concurrent_ops = max; + + // Start waiting ops in the case of available free slots + while ((m_max_concurrent_ops == 0 || + m_inflight_ops.size() < m_max_concurrent_ops) && + !m_queue.empty()) { + auto id = m_queue.front(); + m_inflight_ops.insert(id); + dout(20) << "ready to start op for " << id << " [" + << m_inflight_ops.size() << "/" << m_max_concurrent_ops << "]" + << dendl; + auto it = m_queued_ops.find(id); + ceph_assert(it != m_queued_ops.end()); + ops.push_back(it->second); + m_queued_ops.erase(it); + m_queue.pop_front(); + } + } + + for (const auto& ctx : ops) { + ctx->complete(0); + } +} + +template <typename I> +void Throttler<I>::print_status(ceph::Formatter *f) { + dout(20) << dendl; + + std::lock_guard locker{m_lock}; + + f->dump_int("max_parallel_requests", m_max_concurrent_ops); + f->dump_int("running_requests", m_inflight_ops.size()); + f->dump_int("waiting_requests", m_queue.size()); +} + +template <typename I> +const char** Throttler<I>::get_tracked_conf_keys() const { + return m_config_keys; +} + +template <typename I> +void Throttler<I>::handle_conf_change(const ConfigProxy& conf, + const std::set<std::string> &changed) { + if (changed.count(m_config_key)) { + set_max_concurrent_ops(conf.get_val<uint64_t>(m_config_key)); + } +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::Throttler<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/Throttler.h b/src/tools/rbd_mirror/Throttler.h new file mode 100644 index 000000000..32080238a --- /dev/null +++ b/src/tools/rbd_mirror/Throttler.h @@ -0,0 +1,74 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_THROTTLER_H +#define RBD_MIRROR_THROTTLER_H + +#include <list> +#include <map> +#include <set> +#include <sstream> +#include <string> +#include <utility> + +#include "common/ceph_mutex.h" +#include "common/config_obs.h" +#include "include/common_fwd.h" + +class Context; + +namespace ceph { class Formatter; } +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { + +template <typename ImageCtxT = librbd::ImageCtx> +class Throttler : public md_config_obs_t { +public: + static Throttler *create( + CephContext *cct, + const std::string &config_key) { + return new Throttler(cct, config_key); + } + void destroy() { + delete this; + } + + Throttler(CephContext *cct, + const std::string &config_key); + ~Throttler() override; + + void set_max_concurrent_ops(uint32_t max); + void start_op(const std::string &ns, const std::string &id, + Context *on_start); + bool cancel_op(const std::string &ns, const std::string &id); + void finish_op(const std::string &ns, const std::string &id); + void drain(const std::string &ns, int r); + + void print_status(ceph::Formatter *f); + +private: + typedef std::pair<std::string, std::string> Id; + + CephContext *m_cct; + const std::string m_config_key; + mutable const char* m_config_keys[2]; + + ceph::mutex m_lock; + uint32_t m_max_concurrent_ops; + std::list<Id> m_queue; + std::map<Id, Context *> m_queued_ops; + std::set<Id> m_inflight_ops; + + const char **get_tracked_conf_keys() const override; + void handle_conf_change(const ConfigProxy& conf, + const std::set<std::string> &changed) override; +}; + +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::Throttler<librbd::ImageCtx>; + +#endif // RBD_MIRROR_THROTTLER_H diff --git a/src/tools/rbd_mirror/Types.cc b/src/tools/rbd_mirror/Types.cc new file mode 100644 index 000000000..cd71c73b1 --- /dev/null +++ b/src/tools/rbd_mirror/Types.cc @@ -0,0 +1,32 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/Types.h" + +namespace rbd { +namespace mirror { + +std::ostream &operator<<(std::ostream &os, const ImageId &image_id) { + return os << "global id=" << image_id.global_id << ", " + << "id=" << image_id.id; +} + +std::ostream& operator<<(std::ostream& lhs, + const LocalPoolMeta& rhs) { + return lhs << "mirror_uuid=" << rhs.mirror_uuid; +} + +std::ostream& operator<<(std::ostream& lhs, + const RemotePoolMeta& rhs) { + return lhs << "mirror_uuid=" << rhs.mirror_uuid << ", " + "mirror_peer_uuid=" << rhs.mirror_peer_uuid; +} + +std::ostream& operator<<(std::ostream& lhs, const PeerSpec &peer) { + return lhs << "uuid: " << peer.uuid + << " cluster: " << peer.cluster_name + << " client: " << peer.client_name; +} + +} // namespace mirror +} // namespace rbd diff --git a/src/tools/rbd_mirror/Types.h b/src/tools/rbd_mirror/Types.h new file mode 100644 index 000000000..9bba58fb1 --- /dev/null +++ b/src/tools/rbd_mirror/Types.h @@ -0,0 +1,171 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_TYPES_H +#define CEPH_RBD_MIRROR_TYPES_H + +#include <iostream> +#include <memory> +#include <set> +#include <string> +#include <vector> + +#include "include/rados/librados.hpp" +#include "include/rbd/librbd.hpp" + +namespace rbd { +namespace mirror { + +template <typename> struct MirrorStatusUpdater; + +// Performance counters +enum { + l_rbd_mirror_journal_first = 27000, + l_rbd_mirror_journal_entries, + l_rbd_mirror_journal_replay_bytes, + l_rbd_mirror_journal_replay_latency, + l_rbd_mirror_journal_last, + l_rbd_mirror_snapshot_first, + l_rbd_mirror_snapshot_snapshots, + l_rbd_mirror_snapshot_sync_time, + l_rbd_mirror_snapshot_sync_bytes, + // per-image only counters below + l_rbd_mirror_snapshot_remote_timestamp, + l_rbd_mirror_snapshot_local_timestamp, + l_rbd_mirror_snapshot_last_sync_time, + l_rbd_mirror_snapshot_last_sync_bytes, + l_rbd_mirror_snapshot_last, +}; + +typedef std::shared_ptr<librados::Rados> RadosRef; +typedef std::shared_ptr<librados::IoCtx> IoCtxRef; +typedef std::shared_ptr<librbd::Image> ImageRef; + +struct ImageId { + std::string global_id; + std::string id; + + explicit ImageId(const std::string &global_id) : global_id(global_id) { + } + ImageId(const std::string &global_id, const std::string &id) + : global_id(global_id), id(id) { + } + + inline bool operator==(const ImageId &rhs) const { + return (global_id == rhs.global_id && id == rhs.id); + } + inline bool operator<(const ImageId &rhs) const { + return global_id < rhs.global_id; + } +}; + +std::ostream &operator<<(std::ostream &, const ImageId &image_id); + +typedef std::set<ImageId> ImageIds; + +struct LocalPoolMeta { + LocalPoolMeta() {} + LocalPoolMeta(const std::string& mirror_uuid) + : mirror_uuid(mirror_uuid) { + } + + std::string mirror_uuid; +}; + +std::ostream& operator<<(std::ostream& lhs, + const LocalPoolMeta& local_pool_meta); + +struct RemotePoolMeta { + RemotePoolMeta() {} + RemotePoolMeta(const std::string& mirror_uuid, + const std::string& mirror_peer_uuid) + : mirror_uuid(mirror_uuid), + mirror_peer_uuid(mirror_peer_uuid) { + } + + std::string mirror_uuid; + std::string mirror_peer_uuid; +}; + +std::ostream& operator<<(std::ostream& lhs, + const RemotePoolMeta& remote_pool_meta); + +template <typename I> +struct Peer { + std::string uuid; + mutable librados::IoCtx io_ctx; + RemotePoolMeta remote_pool_meta; + MirrorStatusUpdater<I>* mirror_status_updater = nullptr; + + Peer() { + } + Peer(const std::string& uuid, + librados::IoCtx& io_ctx, + const RemotePoolMeta& remote_pool_meta, + MirrorStatusUpdater<I>* mirror_status_updater) + : io_ctx(io_ctx), + remote_pool_meta(remote_pool_meta), + mirror_status_updater(mirror_status_updater) { + } + + inline bool operator<(const Peer &rhs) const { + return uuid < rhs.uuid; + } +}; + +template <typename I> +std::ostream& operator<<(std::ostream& lhs, const Peer<I>& peer) { + return lhs << peer.remote_pool_meta; +} + +struct PeerSpec { + PeerSpec() = default; + PeerSpec(const std::string &uuid, const std::string &cluster_name, + const std::string &client_name) + : uuid(uuid), cluster_name(cluster_name), client_name(client_name) + { + } + PeerSpec(const librbd::mirror_peer_site_t &peer) : + uuid(peer.uuid), + cluster_name(peer.site_name), + client_name(peer.client_name) + { + } + + std::string uuid; + std::string cluster_name; + std::string client_name; + + /// optional config properties + std::string mon_host; + std::string key; + + bool operator==(const PeerSpec& rhs) const { + return (uuid == rhs.uuid && + cluster_name == rhs.cluster_name && + client_name == rhs.client_name && + mon_host == rhs.mon_host && + key == rhs.key); + } + bool operator<(const PeerSpec& rhs) const { + if (uuid != rhs.uuid) { + return uuid < rhs.uuid; + } else if (cluster_name != rhs.cluster_name) { + return cluster_name < rhs.cluster_name; + } else if (client_name != rhs.client_name) { + return client_name < rhs.client_name; + } else if (mon_host < rhs.mon_host) { + return mon_host < rhs.mon_host; + } else { + return key < rhs.key; + } + } +}; + +std::ostream& operator<<(std::ostream& lhs, const PeerSpec &peer); + +} // namespace mirror +} // namespace rbd + + +#endif // CEPH_RBD_MIRROR_TYPES_H diff --git a/src/tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.cc b/src/tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.cc new file mode 100644 index 000000000..19a98804c --- /dev/null +++ b/src/tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.cc @@ -0,0 +1,299 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.h" +#include "common/debug.h" +#include "common/errno.h" +#include "librbd/ExclusiveLock.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/Operations.h" +#include "librbd/Utils.h" +#include "librbd/journal/Policy.h" +#include "tools/rbd_mirror/image_deleter/Types.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_deleter::SnapshotPurgeRequest: " \ + << this << " " << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace image_deleter { + +using librbd::util::create_context_callback; + +template <typename I> +void SnapshotPurgeRequest<I>::send() { + open_image(); +} + +template <typename I> +void SnapshotPurgeRequest<I>::open_image() { + dout(10) << dendl; + m_image_ctx = I::create("", m_image_id, nullptr, m_io_ctx, false); + + // ensure non-primary images can be modified + m_image_ctx->read_only_mask &= ~librbd::IMAGE_READ_ONLY_FLAG_NON_PRIMARY; + + { + std::unique_lock image_locker{m_image_ctx->image_lock}; + m_image_ctx->set_journal_policy(new JournalPolicy()); + } + + Context *ctx = create_context_callback< + SnapshotPurgeRequest<I>, &SnapshotPurgeRequest<I>::handle_open_image>( + this); + m_image_ctx->state->open(librbd::OPEN_FLAG_SKIP_OPEN_PARENT, ctx); +} + +template <typename I> +void SnapshotPurgeRequest<I>::handle_open_image(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to open image '" << m_image_id << "': " << cpp_strerror(r) + << dendl; + m_image_ctx = nullptr; + + finish(r); + return; + } + + acquire_lock(); +} + +template <typename I> +void SnapshotPurgeRequest<I>::acquire_lock() { + dout(10) << dendl; + + m_image_ctx->owner_lock.lock_shared(); + if (m_image_ctx->exclusive_lock == nullptr) { + m_image_ctx->owner_lock.unlock_shared(); + + start_snap_unprotect(); + return; + } + + m_image_ctx->exclusive_lock->acquire_lock(create_context_callback< + SnapshotPurgeRequest<I>, &SnapshotPurgeRequest<I>::handle_acquire_lock>( + this)); + m_image_ctx->owner_lock.unlock_shared(); +} + +template <typename I> +void SnapshotPurgeRequest<I>::handle_acquire_lock(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to acquire exclusive lock: " << cpp_strerror(r) << dendl; + m_ret_val = r; + close_image(); + return; + } + + start_snap_unprotect(); +} + +template <typename I> +void SnapshotPurgeRequest<I>::start_snap_unprotect() { + dout(10) << dendl; + + { + std::shared_lock image_locker{m_image_ctx->image_lock}; + m_snaps = m_image_ctx->snaps; + } + snap_unprotect(); +} + +template <typename I> +void SnapshotPurgeRequest<I>::snap_unprotect() { + if (m_snaps.empty()) { + close_image(); + return; + } + + librados::snap_t snap_id = m_snaps.back(); + m_image_ctx->image_lock.lock_shared(); + int r = m_image_ctx->get_snap_namespace(snap_id, &m_snap_namespace); + if (r < 0) { + m_image_ctx->image_lock.unlock_shared(); + + derr << "failed to get snap namespace: " << cpp_strerror(r) << dendl; + m_ret_val = r; + close_image(); + return; + } + + r = m_image_ctx->get_snap_name(snap_id, &m_snap_name); + if (r < 0) { + m_image_ctx->image_lock.unlock_shared(); + + derr << "failed to get snap name: " << cpp_strerror(r) << dendl; + m_ret_val = r; + close_image(); + return; + } + + bool is_protected; + r = m_image_ctx->is_snap_protected(snap_id, &is_protected); + if (r < 0) { + m_image_ctx->image_lock.unlock_shared(); + + derr << "failed to get snap protection status: " << cpp_strerror(r) + << dendl; + m_ret_val = r; + close_image(); + return; + } + m_image_ctx->image_lock.unlock_shared(); + + if (!is_protected) { + snap_remove(); + return; + } + + dout(10) << "snap_id=" << snap_id << ", " + << "snap_namespace=" << m_snap_namespace << ", " + << "snap_name=" << m_snap_name << dendl; + + auto finish_op_ctx = start_lock_op(&r); + if (finish_op_ctx == nullptr) { + derr << "lost exclusive lock" << dendl; + m_ret_val = r; + close_image(); + return; + } + + auto ctx = new LambdaContext([this, finish_op_ctx](int r) { + handle_snap_unprotect(r); + finish_op_ctx->complete(0); + }); + std::shared_lock owner_locker{m_image_ctx->owner_lock}; + m_image_ctx->operations->execute_snap_unprotect( + m_snap_namespace, m_snap_name.c_str(), ctx); +} + +template <typename I> +void SnapshotPurgeRequest<I>::handle_snap_unprotect(int r) { + dout(10) << "r=" << r << dendl; + + if (r == -EBUSY) { + dout(10) << "snapshot in-use" << dendl; + m_ret_val = r; + close_image(); + return; + } else if (r < 0) { + derr << "failed to unprotect snapshot: " << cpp_strerror(r) << dendl; + m_ret_val = r; + close_image(); + return; + } + + { + // avoid the need to refresh to delete the newly unprotected snapshot + std::shared_lock image_locker{m_image_ctx->image_lock}; + librados::snap_t snap_id = m_snaps.back(); + auto snap_info_it = m_image_ctx->snap_info.find(snap_id); + if (snap_info_it != m_image_ctx->snap_info.end()) { + snap_info_it->second.protection_status = + RBD_PROTECTION_STATUS_UNPROTECTED; + } + } + + snap_remove(); +} + +template <typename I> +void SnapshotPurgeRequest<I>::snap_remove() { + librados::snap_t snap_id = m_snaps.back(); + dout(10) << "snap_id=" << snap_id << ", " + << "snap_namespace=" << m_snap_namespace << ", " + << "snap_name=" << m_snap_name << dendl; + + int r; + auto finish_op_ctx = start_lock_op(&r); + if (finish_op_ctx == nullptr) { + derr << "lost exclusive lock" << dendl; + m_ret_val = r; + close_image(); + return; + } + + auto ctx = new LambdaContext([this, finish_op_ctx](int r) { + handle_snap_remove(r); + finish_op_ctx->complete(0); + }); + std::shared_lock owner_locker{m_image_ctx->owner_lock}; + m_image_ctx->operations->execute_snap_remove( + m_snap_namespace, m_snap_name.c_str(), ctx); +} + +template <typename I> +void SnapshotPurgeRequest<I>::handle_snap_remove(int r) { + dout(10) << "r=" << r << dendl; + + if (r == -EBUSY) { + dout(10) << "snapshot in-use" << dendl; + m_ret_val = r; + close_image(); + return; + } else if (r < 0) { + derr << "failed to remove snapshot: " << cpp_strerror(r) << dendl; + m_ret_val = r; + close_image(); + return; + } + + m_snaps.pop_back(); + snap_unprotect(); +} + +template <typename I> +void SnapshotPurgeRequest<I>::close_image() { + dout(10) << dendl; + + m_image_ctx->state->close(create_context_callback< + SnapshotPurgeRequest<I>, + &SnapshotPurgeRequest<I>::handle_close_image>(this)); +} + +template <typename I> +void SnapshotPurgeRequest<I>::handle_close_image(int r) { + dout(10) << "r=" << r << dendl; + + m_image_ctx = nullptr; + + if (r < 0) { + derr << "failed to close: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + finish(0); +} + +template <typename I> +void SnapshotPurgeRequest<I>::finish(int r) { + if (m_ret_val < 0) { + r = m_ret_val; + } + + m_on_finish->complete(r); + delete this; +} + +template <typename I> +Context *SnapshotPurgeRequest<I>::start_lock_op(int* r) { + std::shared_lock owner_locker{m_image_ctx->owner_lock}; + if (m_image_ctx->exclusive_lock == nullptr) { + return new LambdaContext([](int r) {}); + } + return m_image_ctx->exclusive_lock->start_op(r); +} + +} // namespace image_deleter +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_deleter::SnapshotPurgeRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.h b/src/tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.h new file mode 100644 index 000000000..70cae8518 --- /dev/null +++ b/src/tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.h @@ -0,0 +1,105 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_DELETER_SNAPSHOT_PURGE_REQUEST_H +#define CEPH_RBD_MIRROR_IMAGE_DELETER_SNAPSHOT_PURGE_REQUEST_H + +#include "include/rados/librados.hpp" +#include "cls/rbd/cls_rbd_types.h" +#include <string> +#include <vector> + +class Context; +namespace librbd { struct ImageCtx; } + +namespace rbd { +namespace mirror { +namespace image_deleter { + +template <typename ImageCtxT = librbd::ImageCtx> +class SnapshotPurgeRequest { +public: + static SnapshotPurgeRequest* create(librados::IoCtx &io_ctx, + const std::string &image_id, + Context *on_finish) { + return new SnapshotPurgeRequest(io_ctx, image_id, on_finish); + } + + SnapshotPurgeRequest(librados::IoCtx &io_ctx, const std::string &image_id, + Context *on_finish) + : m_io_ctx(io_ctx), m_image_id(image_id), m_on_finish(on_finish) { + } + + void send(); + +private: + /* + * @verbatim + * + * <start> + * | + * v + * OPEN_IMAGE + * | + * v + * ACQUIRE_LOCK + * | + * | (repeat for each snapshot) + * |/------------------------\ + * | | + * v (skip if not needed) | + * SNAP_UNPROTECT | + * | | + * v (skip if not needed) | + * SNAP_REMOVE -----------------/ + * | + * v + * CLOSE_IMAGE + * | + * v + * <finish> + * + * @endverbatim + */ + + librados::IoCtx &m_io_ctx; + std::string m_image_id; + Context *m_on_finish; + + ImageCtxT *m_image_ctx = nullptr; + int m_ret_val = 0; + + std::vector<librados::snap_t> m_snaps; + cls::rbd::SnapshotNamespace m_snap_namespace; + std::string m_snap_name; + + void open_image(); + void handle_open_image(int r); + + void acquire_lock(); + void handle_acquire_lock(int r); + + void start_snap_unprotect(); + void snap_unprotect(); + void handle_snap_unprotect(int r); + + void snap_remove(); + void handle_snap_remove(int r); + + void close_image(); + void handle_close_image(int r); + + void finish(int r); + + Context *start_lock_op(int* r); + +}; + +} // namespace image_deleter +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_deleter::SnapshotPurgeRequest<librbd::ImageCtx>; + +#endif // CEPH_RBD_MIRROR_IMAGE_DELETER_SNAPSHOT_PURGE_REQUEST_H + diff --git a/src/tools/rbd_mirror/image_deleter/TrashMoveRequest.cc b/src/tools/rbd_mirror/image_deleter/TrashMoveRequest.cc new file mode 100644 index 000000000..e53923ef3 --- /dev/null +++ b/src/tools/rbd_mirror/image_deleter/TrashMoveRequest.cc @@ -0,0 +1,419 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/image_deleter/TrashMoveRequest.h" +#include "include/rbd_types.h" +#include "cls/rbd/cls_rbd_client.h" +#include "common/debug.h" +#include "common/errno.h" +#include "common/WorkQueue.h" +#include "librbd/ExclusiveLock.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/Journal.h" +#include "librbd/TrashWatcher.h" +#include "librbd/Utils.h" +#include "librbd/asio/ContextWQ.h" +#include "librbd/journal/ResetRequest.h" +#include "librbd/mirror/ImageRemoveRequest.h" +#include "librbd/mirror/GetInfoRequest.h" +#include "librbd/trash/MoveRequest.h" +#include "tools/rbd_mirror/image_deleter/Types.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_deleter::TrashMoveRequest: " \ + << this << " " << __func__ << ": " +namespace rbd { +namespace mirror { +namespace image_deleter { + +using librbd::util::create_context_callback; +using librbd::util::create_rados_callback; + +template <typename I> +void TrashMoveRequest<I>::send() { + get_mirror_image_id(); +} + +template <typename I> +void TrashMoveRequest<I>::get_mirror_image_id() { + dout(10) << dendl; + + librados::ObjectReadOperation op; + librbd::cls_client::mirror_image_get_image_id_start(&op, m_global_image_id); + + auto aio_comp = create_rados_callback< + TrashMoveRequest<I>, + &TrashMoveRequest<I>::handle_get_mirror_image_id>(this); + m_out_bl.clear(); + int r = m_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op, &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void TrashMoveRequest<I>::handle_get_mirror_image_id(int r) { + dout(10) << "r=" << r << dendl; + + if (r == 0) { + auto bl_it = m_out_bl.cbegin(); + r = librbd::cls_client::mirror_image_get_image_id_finish(&bl_it, + &m_image_id); + } + if (r == -ENOENT) { + dout(10) << "image " << m_global_image_id << " is not mirrored" << dendl; + finish(r); + return; + } else if (r < 0) { + derr << "error retrieving local id for image " << m_global_image_id << ": " + << cpp_strerror(r) << dendl; + finish(r); + return; + } + + get_mirror_info(); +} + +template <typename I> +void TrashMoveRequest<I>::get_mirror_info() { + dout(10) << dendl; + + auto ctx = create_context_callback< + TrashMoveRequest<I>, &TrashMoveRequest<I>::handle_get_mirror_info>(this); + auto req = librbd::mirror::GetInfoRequest<I>::create( + m_io_ctx, m_op_work_queue, m_image_id, &m_mirror_image, &m_promotion_state, + &m_primary_mirror_uuid, ctx); + req->send(); +} + +template <typename I> +void TrashMoveRequest<I>::handle_get_mirror_info(int r) { + dout(10) << "r=" << r << dendl; + + if (r == -ENOENT) { + dout(5) << "image " << m_global_image_id << " is not mirrored" << dendl; + finish(r); + return; + } else if (r < 0) { + derr << "error retrieving image primary info for image " + << m_global_image_id << ": " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + if (m_promotion_state == librbd::mirror::PROMOTION_STATE_PRIMARY) { + dout(10) << "image " << m_global_image_id << " is local primary" << dendl; + finish(-EPERM); + return; + } else if (m_promotion_state == librbd::mirror::PROMOTION_STATE_ORPHAN && + !m_resync) { + dout(10) << "image " << m_global_image_id << " is orphaned" << dendl; + finish(-EPERM); + return; + } + + disable_mirror_image(); +} + +template <typename I> +void TrashMoveRequest<I>::disable_mirror_image() { + dout(10) << dendl; + + m_mirror_image.state = cls::rbd::MIRROR_IMAGE_STATE_DISABLING; + + librados::ObjectWriteOperation op; + librbd::cls_client::mirror_image_set(&op, m_image_id, m_mirror_image); + + auto aio_comp = create_rados_callback< + TrashMoveRequest<I>, + &TrashMoveRequest<I>::handle_disable_mirror_image>(this); + int r = m_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void TrashMoveRequest<I>::handle_disable_mirror_image(int r) { + dout(10) << "r=" << r << dendl; + + if (r == -ENOENT) { + dout(10) << "local image is not mirrored, aborting deletion." << dendl; + finish(r); + return; + } else if (r == -EEXIST || r == -EINVAL) { + derr << "cannot disable mirroring for image " << m_global_image_id + << ": global_image_id has changed/reused: " + << cpp_strerror(r) << dendl; + finish(r); + return; + } else if (r < 0) { + derr << "cannot disable mirroring for image " << m_global_image_id + << ": " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + open_image(); +} + +template <typename I> +void TrashMoveRequest<I>::open_image() { + dout(10) << dendl; + + m_image_ctx = I::create("", m_image_id, nullptr, m_io_ctx, false); + + // ensure non-primary images can be modified + m_image_ctx->read_only_mask &= ~librbd::IMAGE_READ_ONLY_FLAG_NON_PRIMARY; + + { + // don't attempt to open the journal + std::unique_lock image_locker{m_image_ctx->image_lock}; + m_image_ctx->set_journal_policy(new JournalPolicy()); + } + + Context *ctx = create_context_callback< + TrashMoveRequest<I>, &TrashMoveRequest<I>::handle_open_image>(this); + m_image_ctx->state->open(librbd::OPEN_FLAG_SKIP_OPEN_PARENT, ctx); +} + +template <typename I> +void TrashMoveRequest<I>::handle_open_image(int r) { + dout(10) << "r=" << r << dendl; + + if (r == -ENOENT) { + dout(5) << "mirror image does not exist, removing orphaned metadata" << dendl; + m_image_ctx = nullptr; + remove_mirror_image(); + return; + } + + if (r < 0) { + derr << "failed to open image: " << cpp_strerror(r) << dendl; + m_image_ctx = nullptr; + finish(r); + return; + } + + if (m_image_ctx->old_format) { + derr << "cannot move v1 image to trash" << dendl; + m_ret_val = -EINVAL; + close_image(); + return; + } + + reset_journal(); +} + +template <typename I> +void TrashMoveRequest<I>::reset_journal() { + if (m_mirror_image.mode == cls::rbd::MIRROR_IMAGE_MODE_SNAPSHOT) { + // snapshot-based mirroring doesn't require journal feature + acquire_lock(); + return; + } + + dout(10) << dendl; + + // TODO use Journal thread pool for journal ops until converted to ASIO + ContextWQ* context_wq; + librbd::Journal<>::get_work_queue( + reinterpret_cast<CephContext*>(m_io_ctx.cct()), &context_wq); + + // ensure that if the image is recovered any peers will split-brain + auto ctx = create_context_callback< + TrashMoveRequest<I>, &TrashMoveRequest<I>::handle_reset_journal>(this); + auto req = librbd::journal::ResetRequest<I>::create( + m_io_ctx, m_image_id, librbd::Journal<>::IMAGE_CLIENT_ID, + librbd::Journal<>::LOCAL_MIRROR_UUID, context_wq, ctx); + req->send(); +} + +template <typename I> +void TrashMoveRequest<I>::handle_reset_journal(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0 && r != -ENOENT) { + derr << "failed to reset journal: " << cpp_strerror(r) << dendl; + m_ret_val = r; + close_image(); + return; + } + + acquire_lock(); +} + +template <typename I> +void TrashMoveRequest<I>::acquire_lock() { + m_image_ctx->owner_lock.lock_shared(); + if (m_image_ctx->exclusive_lock == nullptr) { + m_image_ctx->owner_lock.unlock_shared(); + + if (m_mirror_image.mode == cls::rbd::MIRROR_IMAGE_MODE_SNAPSHOT) { + // snapshot-based mirroring doesn't require exclusive-lock + trash_move(); + } else { + derr << "exclusive lock feature not enabled" << dendl; + m_ret_val = -EINVAL; + close_image(); + } + return; + } + + dout(10) << dendl; + + Context *ctx = create_context_callback< + TrashMoveRequest<I>, &TrashMoveRequest<I>::handle_acquire_lock>(this); + m_image_ctx->exclusive_lock->block_requests(0); + m_image_ctx->exclusive_lock->acquire_lock(ctx); + m_image_ctx->owner_lock.unlock_shared(); +} + +template <typename I> +void TrashMoveRequest<I>::handle_acquire_lock(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to acquire exclusive lock: " << cpp_strerror(r) << dendl; + m_ret_val = r; + close_image(); + return; + } + + trash_move(); +} + +template <typename I> +void TrashMoveRequest<I>::trash_move() { + dout(10) << dendl; + + utime_t delete_time{ceph_clock_now()}; + utime_t deferment_end_time{delete_time}; + deferment_end_time += + m_image_ctx->config.template get_val<uint64_t>("rbd_mirroring_delete_delay"); + + m_trash_image_spec = { + cls::rbd::TRASH_IMAGE_SOURCE_MIRRORING, m_image_ctx->name, delete_time, + deferment_end_time}; + + Context *ctx = create_context_callback< + TrashMoveRequest<I>, &TrashMoveRequest<I>::handle_trash_move>(this); + auto req = librbd::trash::MoveRequest<I>::create( + m_io_ctx, m_image_id, m_trash_image_spec, ctx); + req->send(); +} + +template <typename I> +void TrashMoveRequest<I>::handle_trash_move(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to move image to trash: " << cpp_strerror(r) << dendl; + m_ret_val = r; + close_image(); + return; + } + + m_moved_to_trash = true; + remove_mirror_image(); +} + +template <typename I> +void TrashMoveRequest<I>::remove_mirror_image() { + dout(10) << dendl; + + auto ctx = create_context_callback< + TrashMoveRequest<I>, + &TrashMoveRequest<I>::handle_remove_mirror_image>(this); + auto req = librbd::mirror::ImageRemoveRequest<I>::create( + m_io_ctx, m_global_image_id, m_image_id, ctx); + req->send(); +} + +template <typename I> +void TrashMoveRequest<I>::handle_remove_mirror_image(int r) { + dout(10) << "r=" << r << dendl; + + if (r == -ENOENT) { + dout(10) << "local image is not mirrored" << dendl; + } else if (r < 0) { + derr << "failed to remove mirror image state for " << m_global_image_id + << ": " << cpp_strerror(r) << dendl; + m_ret_val = r; + } + + close_image(); +} + +template <typename I> +void TrashMoveRequest<I>::close_image() { + dout(10) << dendl; + + if (m_image_ctx == nullptr) { + handle_close_image(0); + return; + } + Context *ctx = create_context_callback< + TrashMoveRequest<I>, &TrashMoveRequest<I>::handle_close_image>(this); + m_image_ctx->state->close(ctx); +} + +template <typename I> +void TrashMoveRequest<I>::handle_close_image(int r) { + dout(10) << "r=" << r << dendl; + + m_image_ctx = nullptr; + + if (r < 0) { + derr << "failed to close image: " << cpp_strerror(r) << dendl; + } + + // don't send notification if we failed + if (!m_moved_to_trash) { + finish(0); + return; + } + + notify_trash_add(); +} + +template <typename I> +void TrashMoveRequest<I>::notify_trash_add() { + dout(10) << dendl; + + Context *ctx = create_context_callback< + TrashMoveRequest<I>, &TrashMoveRequest<I>::handle_notify_trash_add>(this); + librbd::TrashWatcher<I>::notify_image_added(m_io_ctx, m_image_id, + m_trash_image_spec, ctx); +} + +template <typename I> +void TrashMoveRequest<I>::handle_notify_trash_add(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to notify trash watchers: " << cpp_strerror(r) << dendl; + } + + finish(0); +} + +template <typename I> +void TrashMoveRequest<I>::finish(int r) { + if (m_ret_val < 0) { + r = m_ret_val; + } + + dout(10) << "r=" << r << dendl; + + m_on_finish->complete(r); + delete this; +} + +} // namespace image_deleter +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_deleter::TrashMoveRequest<librbd::ImageCtx>; + diff --git a/src/tools/rbd_mirror/image_deleter/TrashMoveRequest.h b/src/tools/rbd_mirror/image_deleter/TrashMoveRequest.h new file mode 100644 index 000000000..5b3f02519 --- /dev/null +++ b/src/tools/rbd_mirror/image_deleter/TrashMoveRequest.h @@ -0,0 +1,142 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_DELETE_TRASH_MOVE_REQUEST_H +#define CEPH_RBD_MIRROR_IMAGE_DELETE_TRASH_MOVE_REQUEST_H + +#include "include/buffer.h" +#include "include/rados/librados.hpp" +#include "cls/rbd/cls_rbd_types.h" +#include "librbd/mirror/Types.h" +#include <string> + +struct Context; +namespace librbd { +struct ImageCtx; +namespace asio { struct ContextWQ; } +} // namespace librbd + +namespace rbd { +namespace mirror { +namespace image_deleter { + +template <typename ImageCtxT = librbd::ImageCtx> +class TrashMoveRequest { +public: + static TrashMoveRequest* create(librados::IoCtx& io_ctx, + const std::string& global_image_id, + bool resync, + librbd::asio::ContextWQ* op_work_queue, + Context* on_finish) { + return new TrashMoveRequest(io_ctx, global_image_id, resync, op_work_queue, + on_finish); + } + + TrashMoveRequest(librados::IoCtx& io_ctx, const std::string& global_image_id, + bool resync, librbd::asio::ContextWQ* op_work_queue, + Context* on_finish) + : m_io_ctx(io_ctx), m_global_image_id(global_image_id), m_resync(resync), + m_op_work_queue(op_work_queue), m_on_finish(on_finish) { + } + + void send(); + +private: + /* + * @verbatim + * + * <start> + * | + * v + * GET_MIRROR_IMAGE_ID + * | + * v + * GET_MIRROR_INFO + * | + * v + * DISABLE_MIRROR_IMAGE + * | + * v + * OPEN_IMAGE + * | + * v (skip if not needed) + * RESET_JOURNAL + * | + * v (skip if not needed) + * ACQUIRE_LOCK + * | + * v + * TRASH_MOVE + * | + * v + * REMOVE_MIRROR_IMAGE + * | + * v + * CLOSE_IMAGE + * | + * v + * NOTIFY_TRASH_ADD + * | + * v + * <finish> + * + * @endverbatim + */ + + librados::IoCtx &m_io_ctx; + std::string m_global_image_id; + bool m_resync; + librbd::asio::ContextWQ *m_op_work_queue; + Context *m_on_finish; + + ceph::bufferlist m_out_bl; + std::string m_image_id; + cls::rbd::MirrorImage m_mirror_image; + librbd::mirror::PromotionState m_promotion_state; + std::string m_primary_mirror_uuid; + cls::rbd::TrashImageSpec m_trash_image_spec; + ImageCtxT *m_image_ctx = nullptr;; + int m_ret_val = 0; + bool m_moved_to_trash = false; + + void get_mirror_image_id(); + void handle_get_mirror_image_id(int r); + + void get_mirror_info(); + void handle_get_mirror_info(int r); + + void disable_mirror_image(); + void handle_disable_mirror_image(int r); + + void open_image(); + void handle_open_image(int r); + + void reset_journal(); + void handle_reset_journal(int r); + + void acquire_lock(); + void handle_acquire_lock(int r); + + void trash_move(); + void handle_trash_move(int r); + + void remove_mirror_image(); + void handle_remove_mirror_image(int r); + + void close_image(); + void handle_close_image(int r); + + void notify_trash_add(); + void handle_notify_trash_add(int r); + + void finish(int r); + +}; + +} // namespace image_deleter +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_deleter::TrashMoveRequest<librbd::ImageCtx>; + +#endif // CEPH_RBD_MIRROR_IMAGE_DELETE_TRASH_WATCHER_H diff --git a/src/tools/rbd_mirror/image_deleter/TrashRemoveRequest.cc b/src/tools/rbd_mirror/image_deleter/TrashRemoveRequest.cc new file mode 100644 index 000000000..4d7c1c9df --- /dev/null +++ b/src/tools/rbd_mirror/image_deleter/TrashRemoveRequest.cc @@ -0,0 +1,265 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/image_deleter/TrashRemoveRequest.h" +#include "include/ceph_assert.h" +#include "common/debug.h" +#include "common/errno.h" +#include "common/WorkQueue.h" +#include "cls/rbd/cls_rbd_client.h" +#include "librbd/ImageCtx.h" +#include "librbd/TrashWatcher.h" +#include "librbd/Utils.h" +#include "librbd/asio/ContextWQ.h" +#include "librbd/trash/RemoveRequest.h" +#include "tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_deleter::TrashRemoveRequest: " \ + << this << " " << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace image_deleter { + +using librbd::util::create_context_callback; +using librbd::util::create_rados_callback; + +template <typename I> +void TrashRemoveRequest<I>::send() { + *m_error_result = ERROR_RESULT_RETRY; + + get_trash_image_spec(); +} + +template <typename I> +void TrashRemoveRequest<I>::get_trash_image_spec() { + dout(10) << dendl; + + librados::ObjectReadOperation op; + librbd::cls_client::trash_get_start(&op, m_image_id); + + auto aio_comp = create_rados_callback< + TrashRemoveRequest<I>, + &TrashRemoveRequest<I>::handle_get_trash_image_spec>(this); + m_out_bl.clear(); + int r = m_io_ctx.aio_operate(RBD_TRASH, aio_comp, &op, &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void TrashRemoveRequest<I>::handle_get_trash_image_spec(int r) { + dout(10) << "r=" << r << dendl; + + if (r == 0) { + auto bl_it = m_out_bl.cbegin(); + r = librbd::cls_client::trash_get_finish(&bl_it, &m_trash_image_spec); + } + + if (r == -ENOENT || (r >= 0 && m_trash_image_spec.source != + cls::rbd::TRASH_IMAGE_SOURCE_MIRRORING)) { + dout(10) << "image id " << m_image_id << " not in mirroring trash" << dendl; + finish(0); + return; + } else if (r < 0) { + derr << "error getting image id " << m_image_id << " info from trash: " + << cpp_strerror(r) << dendl; + finish(r); + return; + } + + if (m_trash_image_spec.state != cls::rbd::TRASH_IMAGE_STATE_NORMAL && + m_trash_image_spec.state != cls::rbd::TRASH_IMAGE_STATE_REMOVING) { + dout(10) << "image " << m_image_id << " is not in an expected trash state: " + << m_trash_image_spec.state << dendl; + *m_error_result = ERROR_RESULT_RETRY_IMMEDIATELY; + finish(-EBUSY); + return; + } + + set_trash_state(); +} + +template <typename I> +void TrashRemoveRequest<I>::set_trash_state() { + if (m_trash_image_spec.state == cls::rbd::TRASH_IMAGE_STATE_REMOVING) { + get_snap_context(); + return; + } + + dout(10) << dendl; + + librados::ObjectWriteOperation op; + librbd::cls_client::trash_state_set(&op, m_image_id, + cls::rbd::TRASH_IMAGE_STATE_REMOVING, + cls::rbd::TRASH_IMAGE_STATE_NORMAL); + + auto aio_comp = create_rados_callback< + TrashRemoveRequest<I>, + &TrashRemoveRequest<I>::handle_set_trash_state>(this); + int r = m_io_ctx.aio_operate(RBD_TRASH, aio_comp, &op); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void TrashRemoveRequest<I>::handle_set_trash_state(int r) { + dout(10) << "r=" << r << dendl; + + if (r == -ENOENT) { + dout(10) << "image id " << m_image_id << " not in mirroring trash" << dendl; + finish(0); + return; + } else if (r < 0 && r != -EOPNOTSUPP) { + derr << "error setting trash image state for image id " << m_image_id + << ": " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + get_snap_context(); +} + +template <typename I> +void TrashRemoveRequest<I>::get_snap_context() { + dout(10) << dendl; + + librados::ObjectReadOperation op; + librbd::cls_client::get_snapcontext_start(&op); + + std::string header_oid = librbd::util::header_name(m_image_id); + + auto aio_comp = create_rados_callback< + TrashRemoveRequest<I>, + &TrashRemoveRequest<I>::handle_get_snap_context>(this); + m_out_bl.clear(); + int r = m_io_ctx.aio_operate(header_oid, aio_comp, &op, &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void TrashRemoveRequest<I>::handle_get_snap_context(int r) { + dout(10) << "r=" << r << dendl; + + ::SnapContext snapc; + if (r == 0) { + auto bl_it = m_out_bl.cbegin(); + r = librbd::cls_client::get_snapcontext_finish(&bl_it, &snapc); + } + if (r < 0 && r != -ENOENT) { + derr << "error retrieving snapshot context for image " + << m_image_id << ": " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + m_has_snapshots = (!snapc.empty()); + purge_snapshots(); +} + +template <typename I> +void TrashRemoveRequest<I>::purge_snapshots() { + if (!m_has_snapshots) { + remove_image(); + return; + } + + dout(10) << dendl; + auto ctx = create_context_callback< + TrashRemoveRequest<I>, + &TrashRemoveRequest<I>::handle_purge_snapshots>(this); + auto req = SnapshotPurgeRequest<I>::create(m_io_ctx, m_image_id, ctx); + req->send(); +} + +template <typename I> +void TrashRemoveRequest<I>::handle_purge_snapshots(int r) { + dout(10) << "r=" << r << dendl; + + if (r == -EBUSY) { + dout(10) << "snapshots still in-use" << dendl; + *m_error_result = ERROR_RESULT_RETRY_IMMEDIATELY; + finish(r); + return; + } else if (r < 0) { + derr << "failed to purge image snapshots: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + remove_image(); +} + +template <typename I> +void TrashRemoveRequest<I>::remove_image() { + dout(10) << dendl; + + auto ctx = create_context_callback< + TrashRemoveRequest<I>, + &TrashRemoveRequest<I>::handle_remove_image>(this); + auto req = librbd::trash::RemoveRequest<I>::create( + m_io_ctx, m_image_id, m_op_work_queue, true, m_progress_ctx, + ctx); + req->send(); +} + +template <typename I> +void TrashRemoveRequest<I>::handle_remove_image(int r) { + dout(10) << "r=" << r << dendl; + if (r == -ENOTEMPTY) { + // image must have clone v2 snapshot still associated to child + dout(10) << "snapshots still in-use" << dendl; + *m_error_result = ERROR_RESULT_RETRY_IMMEDIATELY; + finish(-EBUSY); + return; + } + + if (r < 0 && r != -ENOENT) { + derr << "error removing image " << m_image_id << " " + << "(" << m_image_id << ") from local pool: " + << cpp_strerror(r) << dendl; + finish(r); + return; + } + + notify_trash_removed(); +} + +template <typename I> +void TrashRemoveRequest<I>::notify_trash_removed() { + dout(10) << dendl; + + Context *ctx = create_context_callback< + TrashRemoveRequest<I>, + &TrashRemoveRequest<I>::handle_notify_trash_removed>(this); + librbd::TrashWatcher<I>::notify_image_removed(m_io_ctx, m_image_id, ctx); +} + +template <typename I> +void TrashRemoveRequest<I>::handle_notify_trash_removed(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to notify trash watchers: " << cpp_strerror(r) << dendl; + } + + finish(0); +} + +template <typename I> +void TrashRemoveRequest<I>::finish(int r) { + dout(10) << "r=" << r << dendl; + + m_on_finish->complete(r); + delete this; +} + +} // namespace image_deleter +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_deleter::TrashRemoveRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_deleter/TrashRemoveRequest.h b/src/tools/rbd_mirror/image_deleter/TrashRemoveRequest.h new file mode 100644 index 000000000..b99736b33 --- /dev/null +++ b/src/tools/rbd_mirror/image_deleter/TrashRemoveRequest.h @@ -0,0 +1,117 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_DELETER_TRASH_REMOVE_REQUEST_H +#define CEPH_RBD_MIRROR_IMAGE_DELETER_TRASH_REMOVE_REQUEST_H + +#include "include/rados/librados.hpp" +#include "include/buffer.h" +#include "cls/rbd/cls_rbd_types.h" +#include "librbd/internal.h" +#include "tools/rbd_mirror/image_deleter/Types.h" +#include <string> +#include <vector> + +class Context; +class ContextWQ; +namespace librbd { +struct ImageCtx; +namespace asio { struct ContextWQ; } +} // namespace librbd + +namespace rbd { +namespace mirror { +namespace image_deleter { + +template <typename ImageCtxT = librbd::ImageCtx> +class TrashRemoveRequest { +public: + static TrashRemoveRequest* create(librados::IoCtx &io_ctx, + const std::string &image_id, + ErrorResult *error_result, + librbd::asio::ContextWQ *op_work_queue, + Context *on_finish) { + return new TrashRemoveRequest(io_ctx, image_id, error_result, op_work_queue, + on_finish); + } + + TrashRemoveRequest(librados::IoCtx &io_ctx, const std::string &image_id, + ErrorResult *error_result, + librbd::asio::ContextWQ *op_work_queue, + Context *on_finish) + : m_io_ctx(io_ctx), m_image_id(image_id), m_error_result(error_result), + m_op_work_queue(op_work_queue), m_on_finish(on_finish) { + } + + void send(); + +private: + /* + * @verbatim + * + * <start> + * | + * v + * GET_TRASH_IMAGE_SPEC + * | + * v + * SET_TRASH_STATE + * | + * v + * GET_SNAP_CONTEXT + * | + * v + * PURGE_SNAPSHOTS + * | + * v + * TRASH_REMOVE + * | + * v + * NOTIFY_TRASH_REMOVE + * | + * v + * <finish> + * + * @endverbatim + */ + + librados::IoCtx &m_io_ctx; + std::string m_image_id; + ErrorResult *m_error_result; + librbd::asio::ContextWQ *m_op_work_queue; + Context *m_on_finish; + + ceph::bufferlist m_out_bl; + cls::rbd::TrashImageSpec m_trash_image_spec; + bool m_has_snapshots = false; + librbd::NoOpProgressContext m_progress_ctx; + + void get_trash_image_spec(); + void handle_get_trash_image_spec(int r); + + void set_trash_state(); + void handle_set_trash_state(int r); + + void get_snap_context(); + void handle_get_snap_context(int r); + + void purge_snapshots(); + void handle_purge_snapshots(int r); + + void remove_image(); + void handle_remove_image(int r); + + void notify_trash_removed(); + void handle_notify_trash_removed(int r); + + void finish(int r); + +}; + +} // namespace image_deleter +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_deleter::TrashRemoveRequest<librbd::ImageCtx>; + +#endif // CEPH_RBD_MIRROR_IMAGE_DELETER_TRASH_REMOVE_REQUEST_H diff --git a/src/tools/rbd_mirror/image_deleter/TrashWatcher.cc b/src/tools/rbd_mirror/image_deleter/TrashWatcher.cc new file mode 100644 index 000000000..552d77e0e --- /dev/null +++ b/src/tools/rbd_mirror/image_deleter/TrashWatcher.cc @@ -0,0 +1,384 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/image_deleter/TrashWatcher.h" +#include "include/rbd_types.h" +#include "cls/rbd/cls_rbd_client.h" +#include "common/debug.h" +#include "common/errno.h" +#include "common/Timer.h" +#include "librbd/ImageCtx.h" +#include "librbd/Utils.h" +#include "librbd/asio/ContextWQ.h" +#include "tools/rbd_mirror/Threads.h" +#include "tools/rbd_mirror/image_deleter/Types.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_deleter::TrashWatcher: " \ + << this << " " << __func__ << ": " + +using librbd::util::create_context_callback; +using librbd::util::create_rados_callback; + +namespace rbd { +namespace mirror { +namespace image_deleter { + +namespace { + +const size_t MAX_RETURN = 1024; + +} // anonymous namespace + +template <typename I> +TrashWatcher<I>::TrashWatcher(librados::IoCtx &io_ctx, Threads<I> *threads, + TrashListener& trash_listener) + : librbd::TrashWatcher<I>(io_ctx, threads->work_queue), + m_io_ctx(io_ctx), m_threads(threads), m_trash_listener(trash_listener), + m_lock(ceph::make_mutex(librbd::util::unique_lock_name( + "rbd::mirror::image_deleter::TrashWatcher", this))) { +} + +template <typename I> +void TrashWatcher<I>::init(Context *on_finish) { + dout(5) << dendl; + + { + std::lock_guard locker{m_lock}; + m_on_init_finish = on_finish; + + ceph_assert(!m_trash_list_in_progress); + m_trash_list_in_progress = true; + } + + create_trash(); +} + +template <typename I> +void TrashWatcher<I>::shut_down(Context *on_finish) { + dout(5) << dendl; + + { + std::scoped_lock locker{m_threads->timer_lock, m_lock}; + + ceph_assert(!m_shutting_down); + m_shutting_down = true; + if (m_timer_ctx != nullptr) { + m_threads->timer->cancel_event(m_timer_ctx); + m_timer_ctx = nullptr; + } + } + + auto ctx = new LambdaContext([this, on_finish](int r) { + unregister_watcher(on_finish); + }); + m_async_op_tracker.wait_for_ops(ctx); +} + +template <typename I> +void TrashWatcher<I>::handle_image_added(const std::string &image_id, + const cls::rbd::TrashImageSpec& spec) { + dout(10) << "image_id=" << image_id << dendl; + + std::lock_guard locker{m_lock}; + add_image(image_id, spec); +} + +template <typename I> +void TrashWatcher<I>::handle_image_removed(const std::string &image_id) { + // ignore removals -- the image deleter will ignore -ENOENTs +} + +template <typename I> +void TrashWatcher<I>::handle_rewatch_complete(int r) { + dout(5) << "r=" << r << dendl; + + if (r == -EBLOCKLISTED) { + dout(0) << "detected client is blocklisted" << dendl; + return; + } else if (r == -ENOENT) { + dout(5) << "trash directory deleted" << dendl; + } else if (r < 0) { + derr << "unexpected error re-registering trash directory watch: " + << cpp_strerror(r) << dendl; + } + schedule_trash_list(30); +} + +template <typename I> +void TrashWatcher<I>::create_trash() { + dout(20) << dendl; + { + std::lock_guard locker{m_lock}; + ceph_assert(m_trash_list_in_progress); + } + + librados::ObjectWriteOperation op; + op.create(false); + + m_async_op_tracker.start_op(); + auto aio_comp = create_rados_callback< + TrashWatcher<I>, &TrashWatcher<I>::handle_create_trash>(this); + int r = m_io_ctx.aio_operate(RBD_TRASH, aio_comp, &op); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void TrashWatcher<I>::handle_create_trash(int r) { + dout(20) << "r=" << r << dendl; + { + std::lock_guard locker{m_lock}; + ceph_assert(m_trash_list_in_progress); + } + + Context* on_init_finish = nullptr; + if (r == -EBLOCKLISTED || r == -ENOENT) { + if (r == -EBLOCKLISTED) { + dout(0) << "detected client is blocklisted" << dendl; + } else { + dout(0) << "detected pool no longer exists" << dendl; + } + + std::lock_guard locker{m_lock}; + std::swap(on_init_finish, m_on_init_finish); + m_trash_list_in_progress = false; + } else if (r < 0 && r != -EEXIST) { + derr << "failed to create trash object: " << cpp_strerror(r) << dendl; + { + std::lock_guard locker{m_lock}; + m_trash_list_in_progress = false; + } + + schedule_trash_list(30); + } else { + register_watcher(); + } + + m_async_op_tracker.finish_op(); + if (on_init_finish != nullptr) { + on_init_finish->complete(r); + } +} + +template <typename I> +void TrashWatcher<I>::register_watcher() { + { + std::lock_guard locker{m_lock}; + ceph_assert(m_trash_list_in_progress); + } + + // if the watch registration is in-flight, let the watcher + // handle the transition -- only (re-)register if it's not registered + if (!this->is_unregistered()) { + trash_list(true); + return; + } + + // first time registering or the watch failed + dout(5) << dendl; + m_async_op_tracker.start_op(); + + Context *ctx = create_context_callback< + TrashWatcher, &TrashWatcher<I>::handle_register_watcher>(this); + this->register_watch(ctx); +} + +template <typename I> +void TrashWatcher<I>::handle_register_watcher(int r) { + dout(5) << "r=" << r << dendl; + + { + std::lock_guard locker{m_lock}; + ceph_assert(m_trash_list_in_progress); + if (r < 0) { + m_trash_list_in_progress = false; + } + } + + Context *on_init_finish = nullptr; + if (r >= 0) { + trash_list(true); + } else if (r == -EBLOCKLISTED) { + dout(0) << "detected client is blocklisted" << dendl; + + std::lock_guard locker{m_lock}; + std::swap(on_init_finish, m_on_init_finish); + } else { + derr << "unexpected error registering trash directory watch: " + << cpp_strerror(r) << dendl; + schedule_trash_list(10); + } + + m_async_op_tracker.finish_op(); + if (on_init_finish != nullptr) { + on_init_finish->complete(r); + } +} + +template <typename I> +void TrashWatcher<I>::unregister_watcher(Context* on_finish) { + dout(5) << dendl; + + m_async_op_tracker.start_op(); + Context *ctx = new LambdaContext([this, on_finish](int r) { + handle_unregister_watcher(r, on_finish); + }); + this->unregister_watch(ctx); +} + +template <typename I> +void TrashWatcher<I>::handle_unregister_watcher(int r, Context* on_finish) { + dout(5) << "unregister_watcher: r=" << r << dendl; + if (r < 0) { + derr << "error unregistering watcher for trash directory: " + << cpp_strerror(r) << dendl; + } + m_async_op_tracker.finish_op(); + on_finish->complete(0); +} + +template <typename I> +void TrashWatcher<I>::trash_list(bool initial_request) { + if (initial_request) { + m_async_op_tracker.start_op(); + m_last_image_id = ""; + } + + dout(5) << "last_image_id=" << m_last_image_id << dendl; + + { + std::lock_guard locker{m_lock}; + ceph_assert(m_trash_list_in_progress); + } + + librados::ObjectReadOperation op; + librbd::cls_client::trash_list_start(&op, m_last_image_id, MAX_RETURN); + + librados::AioCompletion *aio_comp = create_rados_callback< + TrashWatcher<I>, &TrashWatcher<I>::handle_trash_list>(this); + m_out_bl.clear(); + int r = m_io_ctx.aio_operate(RBD_TRASH, aio_comp, &op, &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void TrashWatcher<I>::handle_trash_list(int r) { + dout(5) << "r=" << r << dendl; + + std::map<std::string, cls::rbd::TrashImageSpec> images; + if (r >= 0) { + auto bl_it = m_out_bl.cbegin(); + r = librbd::cls_client::trash_list_finish(&bl_it, &images); + } + + Context *on_init_finish = nullptr; + { + std::lock_guard locker{m_lock}; + ceph_assert(m_trash_list_in_progress); + if (r >= 0) { + for (auto& image : images) { + add_image(image.first, image.second); + } + } else if (r == -ENOENT) { + r = 0; + } + + if (r == -EBLOCKLISTED) { + dout(0) << "detected client is blocklisted during trash refresh" << dendl; + m_trash_list_in_progress = false; + std::swap(on_init_finish, m_on_init_finish); + } else if (r >= 0 && images.size() < MAX_RETURN) { + m_trash_list_in_progress = false; + std::swap(on_init_finish, m_on_init_finish); + } else if (r < 0) { + m_trash_list_in_progress = false; + } + } + + if (r >= 0 && images.size() == MAX_RETURN) { + m_last_image_id = images.rbegin()->first; + trash_list(false); + return; + } else if (r < 0 && r != -EBLOCKLISTED) { + derr << "failed to retrieve trash directory: " << cpp_strerror(r) << dendl; + schedule_trash_list(10); + } + + m_async_op_tracker.finish_op(); + if (on_init_finish != nullptr) { + on_init_finish->complete(r); + } +} + +template <typename I> +void TrashWatcher<I>::schedule_trash_list(double interval) { + std::scoped_lock locker{m_threads->timer_lock, m_lock}; + if (m_shutting_down || m_trash_list_in_progress || m_timer_ctx != nullptr) { + if (m_trash_list_in_progress && !m_deferred_trash_list) { + dout(5) << "deferring refresh until in-flight refresh completes" << dendl; + m_deferred_trash_list = true; + } + return; + } + + dout(5) << dendl; + m_timer_ctx = m_threads->timer->add_event_after( + interval, + new LambdaContext([this](int r) { + process_trash_list(); + })); +} + +template <typename I> +void TrashWatcher<I>::process_trash_list() { + dout(5) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock)); + ceph_assert(m_timer_ctx != nullptr); + m_timer_ctx = nullptr; + + { + std::lock_guard locker{m_lock}; + ceph_assert(!m_trash_list_in_progress); + m_trash_list_in_progress = true; + } + + // execute outside of the timer's lock + m_async_op_tracker.start_op(); + Context *ctx = new LambdaContext([this](int r) { + create_trash(); + m_async_op_tracker.finish_op(); + }); + m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +void TrashWatcher<I>::add_image(const std::string& image_id, + const cls::rbd::TrashImageSpec& spec) { + if (spec.source != cls::rbd::TRASH_IMAGE_SOURCE_MIRRORING) { + return; + } + + ceph_assert(ceph_mutex_is_locked(m_lock)); + auto& deferment_end_time = spec.deferment_end_time; + dout(10) << "image_id=" << image_id << ", " + << "deferment_end_time=" << deferment_end_time << dendl; + + m_async_op_tracker.start_op(); + auto ctx = new LambdaContext([this, image_id, deferment_end_time](int r) { + m_trash_listener.handle_trash_image(image_id, + deferment_end_time.to_real_time()); + m_async_op_tracker.finish_op(); + }); + m_threads->work_queue->queue(ctx, 0); +} + +} // namespace image_deleter; +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_deleter::TrashWatcher<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_deleter/TrashWatcher.h b/src/tools/rbd_mirror/image_deleter/TrashWatcher.h new file mode 100644 index 000000000..e818a102c --- /dev/null +++ b/src/tools/rbd_mirror/image_deleter/TrashWatcher.h @@ -0,0 +1,139 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_DELETE_TRASH_WATCHER_H +#define CEPH_RBD_MIRROR_IMAGE_DELETE_TRASH_WATCHER_H + +#include "include/rados/librados.hpp" +#include "common/AsyncOpTracker.h" +#include "common/ceph_mutex.h" +#include "librbd/TrashWatcher.h" +#include <set> +#include <string> + +struct Context; +namespace librbd { struct ImageCtx; } + +namespace rbd { +namespace mirror { + +template <typename> struct Threads; + +namespace image_deleter { + +struct TrashListener; + +template <typename ImageCtxT = librbd::ImageCtx> +class TrashWatcher : public librbd::TrashWatcher<ImageCtxT> { +public: + static TrashWatcher* create(librados::IoCtx &io_ctx, + Threads<ImageCtxT> *threads, + TrashListener& trash_listener) { + return new TrashWatcher(io_ctx, threads, trash_listener); + } + + TrashWatcher(librados::IoCtx &io_ctx, Threads<ImageCtxT> *threads, + TrashListener& trash_listener); + TrashWatcher(const TrashWatcher&) = delete; + TrashWatcher& operator=(const TrashWatcher&) = delete; + + void init(Context *on_finish); + void shut_down(Context *on_finish); + +protected: + void handle_image_added(const std::string &image_id, + const cls::rbd::TrashImageSpec& spec) override; + + void handle_image_removed(const std::string &image_id) override; + + void handle_rewatch_complete(int r) override; + +private: + /** + * @verbatim + * + * <start> + * | + * v + * INIT + * | + * v + * CREATE_TRASH + * | + * v + * REGISTER_WATCHER + * | + * |/--------------------------------\ + * | | + * |/---------\ | + * | | | + * v | (more images) | + * TRASH_LIST ---/ | + * | | + * |/----------------------------\ | + * | | | + * v | | + * <idle> --\ | | + * | | | | + * | |\---> IMAGE_ADDED -----/ | + * | | | + * | \----> WATCH_ERROR ---------/ + * v + * SHUT_DOWN + * | + * v + * UNREGISTER_WATCHER + * | + * v + * <finish> + * + * @endverbatim + */ + + librados::IoCtx m_io_ctx; + Threads<ImageCtxT> *m_threads; + TrashListener& m_trash_listener; + + std::string m_last_image_id; + bufferlist m_out_bl; + + mutable ceph::mutex m_lock; + + Context *m_on_init_finish = nullptr; + Context *m_timer_ctx = nullptr; + + AsyncOpTracker m_async_op_tracker; + bool m_trash_list_in_progress = false; + bool m_deferred_trash_list = false; + bool m_shutting_down = false; + + void register_watcher(); + void handle_register_watcher(int r); + + void create_trash(); + void handle_create_trash(int r); + + void unregister_watcher(Context* on_finish); + void handle_unregister_watcher(int r, Context* on_finish); + + void trash_list(bool initial_request); + void handle_trash_list(int r); + + void schedule_trash_list(double interval); + void process_trash_list(); + + void get_mirror_uuid(); + void handle_get_mirror_uuid(int r); + + void add_image(const std::string& image_id, + const cls::rbd::TrashImageSpec& spec); + +}; + +} // namespace image_deleter +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_deleter::TrashWatcher<librbd::ImageCtx>; + +#endif // CEPH_RBD_MIRROR_IMAGE_DELETE_TRASH_WATCHER_H diff --git a/src/tools/rbd_mirror/image_deleter/Types.h b/src/tools/rbd_mirror/image_deleter/Types.h new file mode 100644 index 000000000..1c70b7e14 --- /dev/null +++ b/src/tools/rbd_mirror/image_deleter/Types.h @@ -0,0 +1,54 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_DELETER_TYPES_H +#define CEPH_RBD_MIRROR_IMAGE_DELETER_TYPES_H + +#include "include/Context.h" +#include "librbd/journal/Policy.h" +#include <string> + +struct utime_t; + +namespace rbd { +namespace mirror { +namespace image_deleter { + +enum ErrorResult { + ERROR_RESULT_COMPLETE, + ERROR_RESULT_RETRY, + ERROR_RESULT_RETRY_IMMEDIATELY +}; + +struct TrashListener { + TrashListener() { + } + TrashListener(const TrashListener&) = delete; + TrashListener& operator=(const TrashListener&) = delete; + + virtual ~TrashListener() { + } + + virtual void handle_trash_image(const std::string& image_id, + const ceph::real_clock::time_point& deferment_end_time) = 0; + +}; + +struct JournalPolicy : public librbd::journal::Policy { + bool append_disabled() const override { + return true; + } + bool journal_disabled() const override { + return true; + } + + void allocate_tag_on_lock(Context *on_finish) override { + on_finish->complete(0); + } +}; + +} // namespace image_deleter +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_IMAGE_DELETER_TYPES_H diff --git a/src/tools/rbd_mirror/image_map/LoadRequest.cc b/src/tools/rbd_mirror/image_map/LoadRequest.cc new file mode 100644 index 000000000..46564a160 --- /dev/null +++ b/src/tools/rbd_mirror/image_map/LoadRequest.cc @@ -0,0 +1,174 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "common/debug.h" +#include "common/errno.h" + +#include "librbd/Utils.h" +#include "include/rbd_types.h" +#include "cls/rbd/cls_rbd_client.h" + +#include "UpdateRequest.h" +#include "LoadRequest.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_map::LoadRequest: " \ + << this << " " << __func__ + +namespace rbd { +namespace mirror { +namespace image_map { + +static const uint32_t MAX_RETURN = 1024; + +using librbd::util::create_rados_callback; +using librbd::util::create_context_callback; + +template<typename I> +LoadRequest<I>::LoadRequest(librados::IoCtx &ioctx, + std::map<std::string, cls::rbd::MirrorImageMap> *image_mapping, + Context *on_finish) + : m_ioctx(ioctx), + m_image_mapping(image_mapping), + m_on_finish(on_finish) { +} + +template<typename I> +void LoadRequest<I>::send() { + dout(20) << dendl; + + image_map_list(); +} + +template<typename I> +void LoadRequest<I>::image_map_list() { + dout(20) << dendl; + + librados::ObjectReadOperation op; + librbd::cls_client::mirror_image_map_list_start(&op, m_start_after, MAX_RETURN); + + librados::AioCompletion *aio_comp = create_rados_callback< + LoadRequest, &LoadRequest::handle_image_map_list>(this); + + m_out_bl.clear(); + int r = m_ioctx.aio_operate(RBD_MIRROR_LEADER, aio_comp, &op, &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template<typename I> +void LoadRequest<I>::handle_image_map_list(int r) { + dout(20) << ": r=" << r << dendl; + + std::map<std::string, cls::rbd::MirrorImageMap> image_mapping; + if (r == 0) { + auto it = m_out_bl.cbegin(); + r = librbd::cls_client::mirror_image_map_list_finish(&it, &image_mapping); + } + + if (r < 0) { + derr << ": failed to get image map: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + m_image_mapping->insert(image_mapping.begin(), image_mapping.end()); + + if (image_mapping.size() == MAX_RETURN) { + m_start_after = image_mapping.rbegin()->first; + image_map_list(); + return; + } + + mirror_image_list(); +} + +template<typename I> +void LoadRequest<I>::mirror_image_list() { + dout(20) << dendl; + + librados::ObjectReadOperation op; + librbd::cls_client::mirror_image_list_start(&op, m_start_after, MAX_RETURN); + + m_out_bl.clear(); + librados::AioCompletion *aio_comp = create_rados_callback< + LoadRequest<I>, + &LoadRequest<I>::handle_mirror_image_list>(this); + int r = m_ioctx.aio_operate(RBD_MIRRORING, aio_comp, &op, &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template<typename I> +void LoadRequest<I>::handle_mirror_image_list(int r) { + dout(20) << ": r=" << r << dendl; + + std::map<std::string, std::string> ids; + if (r == 0) { + auto it = m_out_bl.cbegin(); + r = librbd::cls_client::mirror_image_list_finish(&it, &ids); + } + + if (r < 0 && r != -ENOENT) { + derr << "failed to list mirrored images: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + for (auto &id : ids) { + m_global_image_ids.emplace(id.second); + } + + if (ids.size() == MAX_RETURN) { + m_start_after = ids.rbegin()->first; + mirror_image_list(); + return; + } + + cleanup_image_map(); +} + +template<typename I> +void LoadRequest<I>::cleanup_image_map() { + dout(20) << dendl; + + std::set<std::string> map_removals; + + auto it = m_image_mapping->begin(); + while (it != m_image_mapping->end()) { + if (m_global_image_ids.count(it->first) > 0) { + ++it; + continue; + } + map_removals.emplace(it->first); + it = m_image_mapping->erase(it); + } + + if (map_removals.size() == 0) { + finish(0); + return; + } + + auto ctx = create_context_callback< + LoadRequest<I>, + &LoadRequest<I>::finish>(this); + image_map::UpdateRequest<I> *req = image_map::UpdateRequest<I>::create( + m_ioctx, {}, std::move(map_removals), ctx); + req->send(); +} + +template<typename I> +void LoadRequest<I>::finish(int r) { + dout(20) << ": r=" << r << dendl; + + m_on_finish->complete(r); + delete this; +} + +} // namespace image_map +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_map::LoadRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_map/LoadRequest.h b/src/tools/rbd_mirror/image_map/LoadRequest.h new file mode 100644 index 000000000..9b1be9685 --- /dev/null +++ b/src/tools/rbd_mirror/image_map/LoadRequest.h @@ -0,0 +1,77 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_MAP_LOAD_REQUEST_H +#define CEPH_RBD_MIRROR_IMAGE_MAP_LOAD_REQUEST_H + +#include "cls/rbd/cls_rbd_types.h" +#include "include/rados/librados.hpp" + +class Context; + +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { +namespace image_map { + +template<typename ImageCtxT = librbd::ImageCtx> +class LoadRequest { +public: + static LoadRequest *create(librados::IoCtx &ioctx, + std::map<std::string, cls::rbd::MirrorImageMap> *image_mapping, + Context *on_finish) { + return new LoadRequest(ioctx, image_mapping, on_finish); + } + + void send(); + +private: + /** + * @verbatim + * + * <start> + * | . . . . . . . . + * v v . MAX_RETURN + * IMAGE_MAP_LIST. . . . . . . + * | + * v + * MIRROR_IMAGE_LIST + * | + * v + * CLEANUP_IMAGE_MAP + * | + * v + * <finish> + * + * @endverbatim + */ + LoadRequest(librados::IoCtx &ioctx, + std::map<std::string, cls::rbd::MirrorImageMap> *image_mapping, + Context *on_finish); + + librados::IoCtx &m_ioctx; + std::map<std::string, cls::rbd::MirrorImageMap> *m_image_mapping; + Context *m_on_finish; + + std::set<std::string> m_global_image_ids; + + bufferlist m_out_bl; + std::string m_start_after; + + void image_map_list(); + void handle_image_map_list(int r); + + void mirror_image_list(); + void handle_mirror_image_list(int r); + + void cleanup_image_map(); + + void finish(int r); +}; + +} // namespace image_map +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_IMAGE_MAP_LOAD_REQUEST_H diff --git a/src/tools/rbd_mirror/image_map/Policy.cc b/src/tools/rbd_mirror/image_map/Policy.cc new file mode 100644 index 000000000..62fbd12dc --- /dev/null +++ b/src/tools/rbd_mirror/image_map/Policy.cc @@ -0,0 +1,407 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "common/debug.h" +#include "common/errno.h" + +#include "librbd/Utils.h" +#include "Policy.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_map::Policy: " << this \ + << " " << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace image_map { + +namespace { + +bool is_instance_action(ActionType action_type) { + switch (action_type) { + case ACTION_TYPE_ACQUIRE: + case ACTION_TYPE_RELEASE: + return true; + case ACTION_TYPE_NONE: + case ACTION_TYPE_MAP_UPDATE: + case ACTION_TYPE_MAP_REMOVE: + break; + } + return false; +} + +} // anonymous namespace + +using ::operator<<; +using librbd::util::unique_lock_name; + +Policy::Policy(librados::IoCtx &ioctx) + : m_ioctx(ioctx), + m_map_lock(ceph::make_shared_mutex( + unique_lock_name("rbd::mirror::image_map::Policy::m_map_lock", this))) { + + // map should at least have once instance + std::string instance_id = stringify(ioctx.get_instance_id()); + m_map.emplace(instance_id, std::set<std::string>{}); +} + +void Policy::init( + const std::map<std::string, cls::rbd::MirrorImageMap> &image_mapping) { + dout(20) << dendl; + + std::unique_lock map_lock{m_map_lock}; + for (auto& it : image_mapping) { + ceph_assert(!it.second.instance_id.empty()); + auto map_result = m_map[it.second.instance_id].emplace(it.first); + ceph_assert(map_result.second); + + auto image_state_result = m_image_states.emplace( + it.first, ImageState{it.second.instance_id, it.second.mapped_time}); + ceph_assert(image_state_result.second); + + // ensure we (re)send image acquire actions to the instance + auto& image_state = image_state_result.first->second; + auto start_action = set_state(&image_state, + StateTransition::STATE_INITIALIZING, false); + ceph_assert(start_action); + } +} + +LookupInfo Policy::lookup(const std::string &global_image_id) { + dout(20) << "global_image_id=" << global_image_id << dendl; + + std::shared_lock map_lock{m_map_lock}; + LookupInfo info; + + auto it = m_image_states.find(global_image_id); + if (it != m_image_states.end()) { + info.instance_id = it->second.instance_id; + info.mapped_time = it->second.mapped_time; + } + return info; +} + +bool Policy::add_image(const std::string &global_image_id) { + dout(5) << "global_image_id=" << global_image_id << dendl; + + std::unique_lock map_lock{m_map_lock}; + auto image_state_result = m_image_states.emplace(global_image_id, + ImageState{}); + auto& image_state = image_state_result.first->second; + if (image_state.state == StateTransition::STATE_INITIALIZING) { + // avoid duplicate acquire notifications upon leader startup + return false; + } + + return set_state(&image_state, StateTransition::STATE_ASSOCIATING, false); +} + +bool Policy::remove_image(const std::string &global_image_id) { + dout(5) << "global_image_id=" << global_image_id << dendl; + + std::unique_lock map_lock{m_map_lock}; + auto it = m_image_states.find(global_image_id); + if (it == m_image_states.end()) { + return false; + } + + auto& image_state = it->second; + return set_state(&image_state, StateTransition::STATE_DISSOCIATING, false); +} + +void Policy::add_instances(const InstanceIds &instance_ids, + GlobalImageIds* global_image_ids) { + dout(5) << "instance_ids=" << instance_ids << dendl; + + std::unique_lock map_lock{m_map_lock}; + for (auto& instance : instance_ids) { + ceph_assert(!instance.empty()); + m_map.emplace(instance, std::set<std::string>{}); + } + + // post-failover, remove any dead instances and re-shuffle their images + if (m_initial_update) { + dout(5) << "initial instance update" << dendl; + m_initial_update = false; + + std::set<std::string> alive_instances(instance_ids.begin(), + instance_ids.end()); + InstanceIds dead_instances; + for (auto& map_pair : m_map) { + if (alive_instances.find(map_pair.first) == alive_instances.end()) { + dead_instances.push_back(map_pair.first); + } + } + + if (!dead_instances.empty()) { + remove_instances(m_map_lock, dead_instances, global_image_ids); + } + } + + GlobalImageIds shuffle_global_image_ids; + do_shuffle_add_instances(m_map, m_image_states.size(), &shuffle_global_image_ids); + dout(5) << "shuffling global_image_ids=[" << shuffle_global_image_ids + << "]" << dendl; + for (auto& global_image_id : shuffle_global_image_ids) { + auto it = m_image_states.find(global_image_id); + ceph_assert(it != m_image_states.end()); + + auto& image_state = it->second; + if (set_state(&image_state, StateTransition::STATE_SHUFFLING, false)) { + global_image_ids->emplace(global_image_id); + } + } +} + +void Policy::remove_instances(const InstanceIds &instance_ids, + GlobalImageIds* global_image_ids) { + std::unique_lock map_lock{m_map_lock}; + remove_instances(m_map_lock, instance_ids, global_image_ids); +} + +void Policy::remove_instances(const ceph::shared_mutex& lock, + const InstanceIds &instance_ids, + GlobalImageIds* global_image_ids) { + ceph_assert(ceph_mutex_is_wlocked(m_map_lock)); + dout(5) << "instance_ids=" << instance_ids << dendl; + + for (auto& instance_id : instance_ids) { + auto map_it = m_map.find(instance_id); + if (map_it == m_map.end()) { + continue; + } + + auto& instance_global_image_ids = map_it->second; + if (instance_global_image_ids.empty()) { + m_map.erase(map_it); + continue; + } + + m_dead_instances.insert(instance_id); + dout(5) << "force shuffling: instance_id=" << instance_id << ", " + << "global_image_ids=[" << instance_global_image_ids << "]"<< dendl; + for (auto& global_image_id : instance_global_image_ids) { + auto it = m_image_states.find(global_image_id); + ceph_assert(it != m_image_states.end()); + + auto& image_state = it->second; + if (is_state_scheduled(image_state, + StateTransition::STATE_DISSOCIATING)) { + // don't shuffle images that no longer exist + continue; + } + + if (set_state(&image_state, StateTransition::STATE_SHUFFLING, true)) { + global_image_ids->emplace(global_image_id); + } + } + } +} + +ActionType Policy::start_action(const std::string &global_image_id) { + std::unique_lock map_lock{m_map_lock}; + + auto it = m_image_states.find(global_image_id); + ceph_assert(it != m_image_states.end()); + + auto& image_state = it->second; + auto& transition = image_state.transition; + ceph_assert(transition.action_type != ACTION_TYPE_NONE); + + dout(5) << "global_image_id=" << global_image_id << ", " + << "state=" << image_state.state << ", " + << "action_type=" << transition.action_type << dendl; + if (transition.start_policy_action) { + execute_policy_action(global_image_id, &image_state, + *transition.start_policy_action); + transition.start_policy_action = boost::none; + } + return transition.action_type; +} + +bool Policy::finish_action(const std::string &global_image_id, int r) { + std::unique_lock map_lock{m_map_lock}; + + auto it = m_image_states.find(global_image_id); + ceph_assert(it != m_image_states.end()); + + auto& image_state = it->second; + auto& transition = image_state.transition; + dout(5) << "global_image_id=" << global_image_id << ", " + << "state=" << image_state.state << ", " + << "action_type=" << transition.action_type << ", " + << "r=" << r << dendl; + + // retry on failure unless it's an RPC message to an instance that is dead + if (r < 0 && + (!is_instance_action(image_state.transition.action_type) || + image_state.instance_id == UNMAPPED_INSTANCE_ID || + m_dead_instances.find(image_state.instance_id) == + m_dead_instances.end())) { + return true; + } + + auto finish_policy_action = transition.finish_policy_action; + StateTransition::transit(image_state.state, &image_state.transition); + if (transition.finish_state) { + // in-progress state machine complete + ceph_assert(StateTransition::is_idle(*transition.finish_state)); + image_state.state = *transition.finish_state; + image_state.transition = {}; + } + + if (StateTransition::is_idle(image_state.state) && image_state.next_state) { + // advance to pending state machine + bool start_action = set_state(&image_state, *image_state.next_state, false); + ceph_assert(start_action); + } + + // image state may get purged in execute_policy_action() + bool pending_action = image_state.transition.action_type != ACTION_TYPE_NONE; + if (finish_policy_action) { + execute_policy_action(global_image_id, &image_state, *finish_policy_action); + } + + return pending_action; +} + +void Policy::execute_policy_action( + const std::string& global_image_id, ImageState* image_state, + StateTransition::PolicyAction policy_action) { + dout(5) << "global_image_id=" << global_image_id << ", " + << "policy_action=" << policy_action << dendl; + + switch (policy_action) { + case StateTransition::POLICY_ACTION_MAP: + map(global_image_id, image_state); + break; + case StateTransition::POLICY_ACTION_UNMAP: + unmap(global_image_id, image_state); + break; + case StateTransition::POLICY_ACTION_REMOVE: + if (image_state->state == StateTransition::STATE_UNASSOCIATED) { + ceph_assert(image_state->instance_id == UNMAPPED_INSTANCE_ID); + ceph_assert(!image_state->next_state); + m_image_states.erase(global_image_id); + } + break; + } +} + +void Policy::map(const std::string& global_image_id, ImageState* image_state) { + ceph_assert(ceph_mutex_is_wlocked(m_map_lock)); + + std::string instance_id = image_state->instance_id; + if (instance_id != UNMAPPED_INSTANCE_ID && !is_dead_instance(instance_id)) { + return; + } + if (is_dead_instance(instance_id)) { + unmap(global_image_id, image_state); + } + + instance_id = do_map(m_map, global_image_id); + ceph_assert(!instance_id.empty()); + dout(5) << "global_image_id=" << global_image_id << ", " + << "instance_id=" << instance_id << dendl; + + image_state->instance_id = instance_id; + image_state->mapped_time = ceph_clock_now(); + + auto ins = m_map[instance_id].emplace(global_image_id); + ceph_assert(ins.second); +} + +void Policy::unmap(const std::string &global_image_id, + ImageState* image_state) { + ceph_assert(ceph_mutex_is_wlocked(m_map_lock)); + + std::string instance_id = image_state->instance_id; + if (instance_id == UNMAPPED_INSTANCE_ID) { + return; + } + + dout(5) << "global_image_id=" << global_image_id << ", " + << "instance_id=" << instance_id << dendl; + + ceph_assert(!instance_id.empty()); + m_map[instance_id].erase(global_image_id); + image_state->instance_id = UNMAPPED_INSTANCE_ID; + image_state->mapped_time = {}; + + if (is_dead_instance(instance_id) && m_map[instance_id].empty()) { + dout(5) << "removing dead instance_id=" << instance_id << dendl; + m_map.erase(instance_id); + m_dead_instances.erase(instance_id); + } +} + +bool Policy::is_image_shuffling(const std::string &global_image_id) { + ceph_assert(ceph_mutex_is_locked(m_map_lock)); + + auto it = m_image_states.find(global_image_id); + ceph_assert(it != m_image_states.end()); + auto& image_state = it->second; + + // avoid attempting to re-shuffle a pending shuffle + auto result = is_state_scheduled(image_state, + StateTransition::STATE_SHUFFLING); + dout(20) << "global_image_id=" << global_image_id << ", " + << "result=" << result << dendl; + return result; +} + +bool Policy::can_shuffle_image(const std::string &global_image_id) { + ceph_assert(ceph_mutex_is_locked(m_map_lock)); + + CephContext *cct = reinterpret_cast<CephContext *>(m_ioctx.cct()); + int migration_throttle = cct->_conf.get_val<uint64_t>( + "rbd_mirror_image_policy_migration_throttle"); + + auto it = m_image_states.find(global_image_id); + ceph_assert(it != m_image_states.end()); + auto& image_state = it->second; + + utime_t last_shuffled_time = image_state.mapped_time; + + // idle images that haven't been recently remapped can shuffle + utime_t now = ceph_clock_now(); + auto result = (StateTransition::is_idle(image_state.state) && + ((migration_throttle <= 0) || + (now - last_shuffled_time >= migration_throttle))); + dout(10) << "global_image_id=" << global_image_id << ", " + << "migration_throttle=" << migration_throttle << ", " + << "last_shuffled_time=" << last_shuffled_time << ", " + << "result=" << result << dendl; + return result; +} + +bool Policy::set_state(ImageState* image_state, StateTransition::State state, + bool ignore_current_state) { + if (!ignore_current_state && image_state->state == state) { + image_state->next_state = boost::none; + return false; + } else if (StateTransition::is_idle(image_state->state)) { + image_state->state = state; + image_state->next_state = boost::none; + + StateTransition::transit(image_state->state, &image_state->transition); + ceph_assert(image_state->transition.action_type != ACTION_TYPE_NONE); + ceph_assert(!image_state->transition.finish_state); + return true; + } + + image_state->next_state = state; + return false; +} + +bool Policy::is_state_scheduled(const ImageState& image_state, + StateTransition::State state) const { + return (image_state.state == state || + (image_state.next_state && *image_state.next_state == state)); +} + +} // namespace image_map +} // namespace mirror +} // namespace rbd diff --git a/src/tools/rbd_mirror/image_map/Policy.h b/src/tools/rbd_mirror/image_map/Policy.h new file mode 100644 index 000000000..b256e2f1d --- /dev/null +++ b/src/tools/rbd_mirror/image_map/Policy.h @@ -0,0 +1,122 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_MAP_POLICY_H +#define CEPH_RBD_MIRROR_IMAGE_MAP_POLICY_H + +#include <map> +#include <tuple> +#include <boost/optional.hpp> + +#include "cls/rbd/cls_rbd_types.h" +#include "include/rados/librados.hpp" +#include "tools/rbd_mirror/image_map/StateTransition.h" +#include "tools/rbd_mirror/image_map/Types.h" + +class Context; + +namespace rbd { +namespace mirror { +namespace image_map { + +class Policy { +public: + Policy(librados::IoCtx &ioctx); + + virtual ~Policy() { + } + + // init -- called during initialization + void init( + const std::map<std::string, cls::rbd::MirrorImageMap> &image_mapping); + + // lookup an image from the map + LookupInfo lookup(const std::string &global_image_id); + + // add, remove + bool add_image(const std::string &global_image_id); + bool remove_image(const std::string &global_image_id); + + // shuffle images when instances are added/removed + void add_instances(const InstanceIds &instance_ids, + GlobalImageIds* global_image_ids); + void remove_instances(const InstanceIds &instance_ids, + GlobalImageIds* global_image_ids); + + ActionType start_action(const std::string &global_image_id); + bool finish_action(const std::string &global_image_id, int r); + +protected: + typedef std::map<std::string, std::set<std::string> > InstanceToImageMap; + + bool is_dead_instance(const std::string instance_id) { + ceph_assert(ceph_mutex_is_locked(m_map_lock)); + return m_dead_instances.find(instance_id) != m_dead_instances.end(); + } + + bool is_image_shuffling(const std::string &global_image_id); + bool can_shuffle_image(const std::string &global_image_id); + + // map an image (global image id) to an instance + virtual std::string do_map(const InstanceToImageMap& map, + const std::string &global_image_id) = 0; + + // shuffle images when instances are added/removed + virtual void do_shuffle_add_instances( + const InstanceToImageMap& map, size_t image_count, + std::set<std::string> *remap_global_image_ids) = 0; + +private: + struct ImageState { + std::string instance_id = UNMAPPED_INSTANCE_ID; + utime_t mapped_time; + + ImageState() {} + ImageState(const std::string& instance_id, const utime_t& mapped_time) + : instance_id(instance_id), mapped_time(mapped_time) { + } + + // active state and action + StateTransition::State state = StateTransition::STATE_UNASSOCIATED; + StateTransition::Transition transition; + + // next scheduled state + boost::optional<StateTransition::State> next_state = boost::none; + }; + + typedef std::map<std::string, ImageState> ImageStates; + + librados::IoCtx &m_ioctx; + + ceph::shared_mutex m_map_lock; // protects m_map + InstanceToImageMap m_map; // instance_id -> global_id map + + ImageStates m_image_states; + std::set<std::string> m_dead_instances; + + bool m_initial_update = true; + + void remove_instances(const ceph::shared_mutex& lock, + const InstanceIds &instance_ids, + GlobalImageIds* global_image_ids); + + bool set_state(ImageState* image_state, StateTransition::State state, + bool ignore_current_state); + + void execute_policy_action(const std::string& global_image_id, + ImageState* image_state, + StateTransition::PolicyAction policy_action); + + void map(const std::string& global_image_id, ImageState* image_state); + void unmap(const std::string &global_image_id, ImageState* image_state); + + bool is_state_scheduled(const ImageState& image_state, + StateTransition::State state) const; + +}; + +} // namespace image_map +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_IMAGE_MAP_POLICY_H diff --git a/src/tools/rbd_mirror/image_map/SimplePolicy.cc b/src/tools/rbd_mirror/image_map/SimplePolicy.cc new file mode 100644 index 000000000..f26805819 --- /dev/null +++ b/src/tools/rbd_mirror/image_map/SimplePolicy.cc @@ -0,0 +1,89 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "common/debug.h" +#include "common/errno.h" + +#include "SimplePolicy.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_map::SimplePolicy: " << this \ + << " " << __func__ << ": " +namespace rbd { +namespace mirror { +namespace image_map { + +SimplePolicy::SimplePolicy(librados::IoCtx &ioctx) + : Policy(ioctx) { +} + +size_t SimplePolicy::calc_images_per_instance(const InstanceToImageMap& map, + size_t image_count) { + size_t nr_instances = 0; + for (auto const &it : map) { + if (!Policy::is_dead_instance(it.first)) { + ++nr_instances; + } + } + ceph_assert(nr_instances > 0); + + size_t images_per_instance = image_count / nr_instances; + if (images_per_instance == 0) { + ++images_per_instance; + } + + return images_per_instance; +} + +void SimplePolicy::do_shuffle_add_instances( + const InstanceToImageMap& map, size_t image_count, + std::set<std::string> *remap_global_image_ids) { + uint64_t images_per_instance = calc_images_per_instance(map, image_count); + dout(5) << "images per instance=" << images_per_instance << dendl; + + for (auto const &instance : map) { + if (instance.second.size() <= images_per_instance) { + continue; + } + + auto it = instance.second.begin(); + uint64_t cut_off = instance.second.size() - images_per_instance; + + while (it != instance.second.end() && cut_off > 0) { + if (Policy::is_image_shuffling(*it)) { + --cut_off; + } else if (Policy::can_shuffle_image(*it)) { + --cut_off; + remap_global_image_ids->emplace(*it); + } + + ++it; + } + } +} + +std::string SimplePolicy::do_map(const InstanceToImageMap& map, + const std::string &global_image_id) { + auto min_it = map.end(); + for (auto it = map.begin(); it != map.end(); ++it) { + ceph_assert(it->second.find(global_image_id) == it->second.end()); + if (Policy::is_dead_instance(it->first)) { + continue; + } else if (min_it == map.end()) { + min_it = it; + } else if (it->second.size() < min_it->second.size()) { + min_it = it; + } + } + + ceph_assert(min_it != map.end()); + dout(20) << "global_image_id=" << global_image_id << " maps to instance_id=" + << min_it->first << dendl; + return min_it->first; +} + +} // namespace image_map +} // namespace mirror +} // namespace rbd diff --git a/src/tools/rbd_mirror/image_map/SimplePolicy.h b/src/tools/rbd_mirror/image_map/SimplePolicy.h new file mode 100644 index 000000000..ad2071b2c --- /dev/null +++ b/src/tools/rbd_mirror/image_map/SimplePolicy.h @@ -0,0 +1,39 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_MAP_SIMPLE_POLICY_H +#define CEPH_RBD_MIRROR_IMAGE_MAP_SIMPLE_POLICY_H + +#include "Policy.h" + +namespace rbd { +namespace mirror { +namespace image_map { + +class SimplePolicy : public Policy { +public: + static SimplePolicy *create(librados::IoCtx &ioctx) { + return new SimplePolicy(ioctx); + } + +protected: + SimplePolicy(librados::IoCtx &ioctx); + + std::string do_map(const InstanceToImageMap& map, + const std::string &global_image_id) override; + + void do_shuffle_add_instances( + const InstanceToImageMap& map, size_t image_count, + std::set<std::string> *remap_global_image_ids) override; + +private: + size_t calc_images_per_instance(const InstanceToImageMap& map, + size_t image_count); + +}; + +} // namespace image_map +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_IMAGE_MAP_SIMPLE_POLICY_H diff --git a/src/tools/rbd_mirror/image_map/StateTransition.cc b/src/tools/rbd_mirror/image_map/StateTransition.cc new file mode 100644 index 000000000..ec5f07ff9 --- /dev/null +++ b/src/tools/rbd_mirror/image_map/StateTransition.cc @@ -0,0 +1,94 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include <ostream> +#include "include/ceph_assert.h" +#include "StateTransition.h" + +namespace rbd { +namespace mirror { +namespace image_map { + +std::ostream &operator<<(std::ostream &os, + const StateTransition::State &state) { + switch(state) { + case StateTransition::STATE_INITIALIZING: + os << "INITIALIZING"; + break; + case StateTransition::STATE_ASSOCIATING: + os << "ASSOCIATING"; + break; + case StateTransition::STATE_ASSOCIATED: + os << "ASSOCIATED"; + break; + case StateTransition::STATE_SHUFFLING: + os << "SHUFFLING"; + break; + case StateTransition::STATE_DISSOCIATING: + os << "DISSOCIATING"; + break; + case StateTransition::STATE_UNASSOCIATED: + os << "UNASSOCIATED"; + break; + } + return os; +} + +std::ostream &operator<<(std::ostream &os, + const StateTransition::PolicyAction &policy_action) { + switch(policy_action) { + case StateTransition::POLICY_ACTION_MAP: + os << "MAP"; + break; + case StateTransition::POLICY_ACTION_UNMAP: + os << "UNMAP"; + break; + case StateTransition::POLICY_ACTION_REMOVE: + os << "REMOVE"; + break; + } + return os; +} + +const StateTransition::TransitionTable StateTransition::s_transition_table { + // state current_action Transition + // --------------------------------------------------------------------------- + {{STATE_INITIALIZING, ACTION_TYPE_NONE}, {ACTION_TYPE_ACQUIRE, {}, {}, + {}}}, + {{STATE_INITIALIZING, ACTION_TYPE_ACQUIRE}, {ACTION_TYPE_NONE, {}, {}, + {STATE_ASSOCIATED}}}, + + {{STATE_ASSOCIATING, ACTION_TYPE_NONE}, {ACTION_TYPE_MAP_UPDATE, + {POLICY_ACTION_MAP}, {}, {}}}, + {{STATE_ASSOCIATING, ACTION_TYPE_MAP_UPDATE}, {ACTION_TYPE_ACQUIRE, {}, {}, + {}}}, + {{STATE_ASSOCIATING, ACTION_TYPE_ACQUIRE}, {ACTION_TYPE_NONE, {}, {}, + {STATE_ASSOCIATED}}}, + + {{STATE_DISSOCIATING, ACTION_TYPE_NONE}, {ACTION_TYPE_RELEASE, {}, + {POLICY_ACTION_UNMAP}, {}}}, + {{STATE_DISSOCIATING, ACTION_TYPE_RELEASE}, {ACTION_TYPE_MAP_REMOVE, {}, + {POLICY_ACTION_REMOVE}, {}}}, + {{STATE_DISSOCIATING, ACTION_TYPE_MAP_REMOVE}, {ACTION_TYPE_NONE, {}, + {}, {STATE_UNASSOCIATED}}}, + + {{STATE_SHUFFLING, ACTION_TYPE_NONE}, {ACTION_TYPE_RELEASE, {}, + {POLICY_ACTION_UNMAP}, {}}}, + {{STATE_SHUFFLING, ACTION_TYPE_RELEASE}, {ACTION_TYPE_MAP_UPDATE, + {POLICY_ACTION_MAP}, {}, {}}}, + {{STATE_SHUFFLING, ACTION_TYPE_MAP_UPDATE}, {ACTION_TYPE_ACQUIRE, {}, {}, + {}}}, + {{STATE_SHUFFLING, ACTION_TYPE_ACQUIRE}, {ACTION_TYPE_NONE, {}, {}, + {STATE_ASSOCIATED}}} +}; + +void StateTransition::transit(State state, Transition* transition) { + auto it = s_transition_table.find({state, transition->action_type}); + ceph_assert(it != s_transition_table.end()); + + *transition = it->second; +} + +} // namespace image_map +} // namespace mirror +} // namespace rbd diff --git a/src/tools/rbd_mirror/image_map/StateTransition.h b/src/tools/rbd_mirror/image_map/StateTransition.h new file mode 100644 index 000000000..02a5ce4e9 --- /dev/null +++ b/src/tools/rbd_mirror/image_map/StateTransition.h @@ -0,0 +1,76 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_MAP_STATE_TRANSITION_H +#define CEPH_RBD_MIRROR_IMAGE_MAP_STATE_TRANSITION_H + +#include "tools/rbd_mirror/image_map/Types.h" +#include <boost/optional.hpp> +#include <map> + +namespace rbd { +namespace mirror { +namespace image_map { + +class StateTransition { +public: + enum State { + STATE_UNASSOCIATED, + STATE_INITIALIZING, + STATE_ASSOCIATING, + STATE_ASSOCIATED, + STATE_SHUFFLING, + STATE_DISSOCIATING + }; + + enum PolicyAction { + POLICY_ACTION_MAP, + POLICY_ACTION_UNMAP, + POLICY_ACTION_REMOVE + }; + + struct Transition { + // image map action + ActionType action_type = ACTION_TYPE_NONE; + + // policy internal action + boost::optional<PolicyAction> start_policy_action; + boost::optional<PolicyAction> finish_policy_action; + + // state machine complete + boost::optional<State> finish_state; + + Transition() { + } + Transition(ActionType action_type, + const boost::optional<PolicyAction>& start_policy_action, + const boost::optional<PolicyAction>& finish_policy_action, + const boost::optional<State>& finish_state) + : action_type(action_type), start_policy_action(start_policy_action), + finish_policy_action(finish_policy_action), finish_state(finish_state) { + } + }; + + static bool is_idle(State state) { + return (state == STATE_UNASSOCIATED || state == STATE_ASSOCIATED); + } + + static void transit(State state, Transition* transition); + +private: + typedef std::pair<State, ActionType> TransitionKey; + typedef std::map<TransitionKey, Transition> TransitionTable; + + // image transition table + static const TransitionTable s_transition_table; +}; + +std::ostream &operator<<(std::ostream &os, const StateTransition::State &state); +std::ostream &operator<<(std::ostream &os, + const StateTransition::PolicyAction &policy_action); + +} // namespace image_map +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_IMAGE_MAP_STATE_TRANSITION_H diff --git a/src/tools/rbd_mirror/image_map/Types.cc b/src/tools/rbd_mirror/image_map/Types.cc new file mode 100644 index 000000000..47de9c3cf --- /dev/null +++ b/src/tools/rbd_mirror/image_map/Types.cc @@ -0,0 +1,138 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "Types.h" +#include "include/ceph_assert.h" +#include "include/stringify.h" +#include "common/Formatter.h" +#include <iostream> + +namespace rbd { +namespace mirror { +namespace image_map { + +const std::string UNMAPPED_INSTANCE_ID(""); + +namespace { + +template <typename E> +class GetTypeVisitor : public boost::static_visitor<E> { +public: + template <typename T> + inline E operator()(const T&) const { + return T::TYPE; + } +}; + +class EncodeVisitor : public boost::static_visitor<void> { +public: + explicit EncodeVisitor(bufferlist &bl) : m_bl(bl) { + } + + template <typename T> + inline void operator()(const T& t) const { + using ceph::encode; + encode(static_cast<uint32_t>(T::TYPE), m_bl); + t.encode(m_bl); + } +private: + bufferlist &m_bl; +}; + +class DecodeVisitor : public boost::static_visitor<void> { +public: + DecodeVisitor(__u8 version, bufferlist::const_iterator &iter) + : m_version(version), m_iter(iter) { + } + + template <typename T> + inline void operator()(T& t) const { + t.decode(m_version, m_iter); + } +private: + __u8 m_version; + bufferlist::const_iterator &m_iter; +}; + +class DumpVisitor : public boost::static_visitor<void> { +public: + explicit DumpVisitor(Formatter *formatter, const std::string &key) + : m_formatter(formatter), m_key(key) {} + + template <typename T> + inline void operator()(const T& t) const { + auto type = T::TYPE; + m_formatter->dump_string(m_key.c_str(), stringify(type)); + t.dump(m_formatter); + } +private: + ceph::Formatter *m_formatter; + std::string m_key; +}; + +} // anonymous namespace + +PolicyMetaType PolicyData::get_policy_meta_type() const { + return boost::apply_visitor(GetTypeVisitor<PolicyMetaType>(), policy_meta); +} + +void PolicyData::encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + boost::apply_visitor(EncodeVisitor(bl), policy_meta); + ENCODE_FINISH(bl); +} + +void PolicyData::decode(bufferlist::const_iterator& it) { + DECODE_START(1, it); + + uint32_t policy_meta_type; + decode(policy_meta_type, it); + + switch (policy_meta_type) { + case POLICY_META_TYPE_NONE: + policy_meta = PolicyMetaNone(); + break; + default: + policy_meta = PolicyMetaUnknown(); + break; + } + + boost::apply_visitor(DecodeVisitor(struct_v, it), policy_meta); + DECODE_FINISH(it); +} + +void PolicyData::dump(Formatter *f) const { + boost::apply_visitor(DumpVisitor(f, "policy_meta_type"), policy_meta); +} + +void PolicyData::generate_test_instances(std::list<PolicyData *> &o) { + o.push_back(new PolicyData(PolicyMetaNone())); +} + +std::ostream &operator<<(std::ostream &os, const ActionType& action_type) { + switch (action_type) { + case ACTION_TYPE_NONE: + os << "NONE"; + break; + case ACTION_TYPE_MAP_UPDATE: + os << "MAP_UPDATE"; + break; + case ACTION_TYPE_MAP_REMOVE: + os << "MAP_REMOVE"; + break; + case ACTION_TYPE_ACQUIRE: + os << "ACQUIRE"; + break; + case ACTION_TYPE_RELEASE: + os << "RELEASE"; + break; + default: + os << "UNKNOWN (" << static_cast<uint32_t>(action_type) << ")"; + break; + } + return os; +} + +} // namespace image_map +} // namespace mirror +} // namespace rbd diff --git a/src/tools/rbd_mirror/image_map/Types.h b/src/tools/rbd_mirror/image_map/Types.h new file mode 100644 index 000000000..5a97430f3 --- /dev/null +++ b/src/tools/rbd_mirror/image_map/Types.h @@ -0,0 +1,130 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_MAP_TYPES_H +#define CEPH_RBD_MIRROR_IMAGE_MAP_TYPES_H + +#include <iosfwd> +#include <map> +#include <set> +#include <string> +#include <boost/variant.hpp> + +#include "include/buffer.h" +#include "include/encoding.h" +#include "include/utime.h" +#include "tools/rbd_mirror/Types.h" + +struct Context; + +namespace ceph { +class Formatter; +} + +namespace rbd { +namespace mirror { +namespace image_map { + +extern const std::string UNMAPPED_INSTANCE_ID; + +struct Listener { + virtual ~Listener() { + } + + virtual void acquire_image(const std::string &global_image_id, + const std::string &instance_id, + Context* on_finish) = 0; + virtual void release_image(const std::string &global_image_id, + const std::string &instance_id, + Context* on_finish) = 0; + virtual void remove_image(const std::string &mirror_uuid, + const std::string &global_image_id, + const std::string &instance_id, + Context* on_finish) = 0; +}; + +struct LookupInfo { + std::string instance_id = UNMAPPED_INSTANCE_ID; + utime_t mapped_time; +}; + +enum ActionType { + ACTION_TYPE_NONE, + ACTION_TYPE_MAP_UPDATE, + ACTION_TYPE_MAP_REMOVE, + ACTION_TYPE_ACQUIRE, + ACTION_TYPE_RELEASE +}; + +typedef std::vector<std::string> InstanceIds; +typedef std::set<std::string> GlobalImageIds; +typedef std::map<std::string, ActionType> ImageActionTypes; + +enum PolicyMetaType { + POLICY_META_TYPE_NONE = 0, +}; + +struct PolicyMetaNone { + static const PolicyMetaType TYPE = POLICY_META_TYPE_NONE; + + PolicyMetaNone() { + } + + void encode(bufferlist& bl) const { + } + + void decode(__u8 version, bufferlist::const_iterator& it) { + } + + void dump(Formatter *f) const { + } +}; + +struct PolicyMetaUnknown { + static const PolicyMetaType TYPE = static_cast<PolicyMetaType>(-1); + + PolicyMetaUnknown() { + } + + void encode(bufferlist& bl) const { + ceph_abort(); + } + + void decode(__u8 version, bufferlist::const_iterator& it) { + } + + void dump(Formatter *f) const { + } +}; + +typedef boost::variant<PolicyMetaNone, + PolicyMetaUnknown> PolicyMeta; + +struct PolicyData { + PolicyData() + : policy_meta(PolicyMetaUnknown()) { + } + PolicyData(const PolicyMeta &policy_meta) + : policy_meta(policy_meta) { + } + + PolicyMeta policy_meta; + + PolicyMetaType get_policy_meta_type() const; + + void encode(bufferlist& bl) const; + void decode(bufferlist::const_iterator& it); + void dump(Formatter *f) const; + + static void generate_test_instances(std::list<PolicyData *> &o); +}; + +WRITE_CLASS_ENCODER(PolicyData); + +std::ostream &operator<<(std::ostream &os, const ActionType &action_type); + +} // namespace image_map +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_IMAGE_MAP_TYPES_H diff --git a/src/tools/rbd_mirror/image_map/UpdateRequest.cc b/src/tools/rbd_mirror/image_map/UpdateRequest.cc new file mode 100644 index 000000000..799c5670f --- /dev/null +++ b/src/tools/rbd_mirror/image_map/UpdateRequest.cc @@ -0,0 +1,100 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "common/debug.h" +#include "common/errno.h" + +#include "librbd/Utils.h" +#include "include/rbd_types.h" +#include "cls/rbd/cls_rbd_client.h" + +#include "UpdateRequest.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_map::UpdateRequest: " \ + << this << " " << __func__ + +namespace rbd { +namespace mirror { +namespace image_map { + +using librbd::util::create_rados_callback; + +static const uint32_t MAX_UPDATE = 256; + +template <typename I> +UpdateRequest<I>::UpdateRequest(librados::IoCtx &ioctx, + std::map<std::string, cls::rbd::MirrorImageMap> &&update_mapping, + std::set<std::string> &&remove_global_image_ids, Context *on_finish) + : m_ioctx(ioctx), + m_update_mapping(update_mapping), + m_remove_global_image_ids(remove_global_image_ids), + m_on_finish(on_finish) { +} + +template <typename I> +void UpdateRequest<I>::send() { + dout(20) << dendl; + + update_image_map(); +} + +template <typename I> +void UpdateRequest<I>::update_image_map() { + dout(20) << dendl; + + if (m_update_mapping.empty() && m_remove_global_image_ids.empty()) { + finish(0); + return; + } + + uint32_t nr_updates = 0; + librados::ObjectWriteOperation op; + + auto it1 = m_update_mapping.begin(); + while (it1 != m_update_mapping.end() && nr_updates++ < MAX_UPDATE) { + librbd::cls_client::mirror_image_map_update(&op, it1->first, it1->second); + it1 = m_update_mapping.erase(it1); + } + + auto it2 = m_remove_global_image_ids.begin(); + while (it2 != m_remove_global_image_ids.end() && nr_updates++ < MAX_UPDATE) { + librbd::cls_client::mirror_image_map_remove(&op, *it2); + it2 = m_remove_global_image_ids.erase(it2); + } + + librados::AioCompletion *aio_comp = create_rados_callback< + UpdateRequest, &UpdateRequest::handle_update_image_map>(this); + int r = m_ioctx.aio_operate(RBD_MIRROR_LEADER, aio_comp, &op); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void UpdateRequest<I>::handle_update_image_map(int r) { + dout(20) << ": r=" << r << dendl; + + if (r < 0) { + derr << ": failed to update image map: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + update_image_map(); +} + +template <typename I> +void UpdateRequest<I>::finish(int r) { + dout(20) << ": r=" << r << dendl; + + m_on_finish->complete(r); + delete this; +} + +} // namespace image_map +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_map::UpdateRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_map/UpdateRequest.h b/src/tools/rbd_mirror/image_map/UpdateRequest.h new file mode 100644 index 000000000..841cc6f9b --- /dev/null +++ b/src/tools/rbd_mirror/image_map/UpdateRequest.h @@ -0,0 +1,65 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_MAP_UPDATE_REQUEST_H +#define CEPH_RBD_MIRROR_IMAGE_MAP_UPDATE_REQUEST_H + +#include "cls/rbd/cls_rbd_types.h" +#include "include/rados/librados.hpp" + +class Context; + +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { +namespace image_map { + +template<typename ImageCtxT = librbd::ImageCtx> +class UpdateRequest { +public: + // accepts an image map for updation and a collection of + // global image ids to purge. + static UpdateRequest *create(librados::IoCtx &ioctx, + std::map<std::string, cls::rbd::MirrorImageMap> &&update_mapping, + std::set<std::string> &&remove_global_image_ids, Context *on_finish) { + return new UpdateRequest(ioctx, std::move(update_mapping), std::move(remove_global_image_ids), + on_finish); + } + + void send(); + +private: + /** + * @verbatim + * + * <start> + * | . . . . . . . . + * v v . MAX_UPDATE + * UPDATE_IMAGE_MAP. . . . . . . + * | + * v + * <finish> + * + * @endverbatim + */ + UpdateRequest(librados::IoCtx &ioctx, + std::map<std::string, cls::rbd::MirrorImageMap> &&update_mapping, + std::set<std::string> &&remove_global_image_ids, Context *on_finish); + + librados::IoCtx &m_ioctx; + std::map<std::string, cls::rbd::MirrorImageMap> m_update_mapping; + std::set<std::string> m_remove_global_image_ids; + Context *m_on_finish; + + void update_image_map(); + void handle_update_image_map(int r); + + void finish(int r); +}; + +} // namespace image_map +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_IMAGE_MAP_UPDATE_REQUEST_H diff --git a/src/tools/rbd_mirror/image_replayer/BootstrapRequest.cc b/src/tools/rbd_mirror/image_replayer/BootstrapRequest.cc new file mode 100644 index 000000000..bda5b5f9b --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/BootstrapRequest.cc @@ -0,0 +1,485 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "include/compat.h" +#include "BootstrapRequest.h" +#include "CreateImageRequest.h" +#include "OpenImageRequest.h" +#include "OpenLocalImageRequest.h" +#include "common/debug.h" +#include "common/dout.h" +#include "common/errno.h" +#include "cls/rbd/cls_rbd_client.h" +#include "journal/Journaler.h" +#include "journal/Settings.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/internal.h" +#include "librbd/Journal.h" +#include "librbd/Utils.h" +#include "librbd/asio/ContextWQ.h" +#include "librbd/journal/Types.h" +#include "tools/rbd_mirror/BaseRequest.h" +#include "tools/rbd_mirror/ImageSync.h" +#include "tools/rbd_mirror/ProgressContext.h" +#include "tools/rbd_mirror/Threads.h" +#include "tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.h" +#include "tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.h" +#include "tools/rbd_mirror/image_replayer/journal/StateBuilder.h" +#include "tools/rbd_mirror/image_replayer/journal/SyncPointHandler.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::" \ + << "BootstrapRequest: " << this << " " \ + << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace image_replayer { + +using librbd::util::create_context_callback; +using librbd::util::unique_lock_name; + +template <typename I> +BootstrapRequest<I>::BootstrapRequest( + Threads<I>* threads, + librados::IoCtx& local_io_ctx, + librados::IoCtx& remote_io_ctx, + InstanceWatcher<I>* instance_watcher, + const std::string& global_image_id, + const std::string& local_mirror_uuid, + const RemotePoolMeta& remote_pool_meta, + ::journal::CacheManagerHandler* cache_manager_handler, + PoolMetaCache* pool_meta_cache, + ProgressContext* progress_ctx, + StateBuilder<I>** state_builder, + bool* do_resync, + Context* on_finish) + : CancelableRequest("rbd::mirror::image_replayer::BootstrapRequest", + reinterpret_cast<CephContext*>(local_io_ctx.cct()), + on_finish), + m_threads(threads), + m_local_io_ctx(local_io_ctx), + m_remote_io_ctx(remote_io_ctx), + m_instance_watcher(instance_watcher), + m_global_image_id(global_image_id), + m_local_mirror_uuid(local_mirror_uuid), + m_remote_pool_meta(remote_pool_meta), + m_cache_manager_handler(cache_manager_handler), + m_pool_meta_cache(pool_meta_cache), + m_progress_ctx(progress_ctx), + m_state_builder(state_builder), + m_do_resync(do_resync), + m_lock(ceph::make_mutex(unique_lock_name("BootstrapRequest::m_lock", + this))) { + dout(10) << dendl; +} + +template <typename I> +bool BootstrapRequest<I>::is_syncing() const { + std::lock_guard locker{m_lock}; + return (m_image_sync != nullptr); +} + +template <typename I> +void BootstrapRequest<I>::send() { + *m_do_resync = false; + + prepare_local_image(); +} + +template <typename I> +void BootstrapRequest<I>::cancel() { + dout(10) << dendl; + + std::lock_guard locker{m_lock}; + m_canceled = true; + + if (m_image_sync != nullptr) { + m_image_sync->cancel(); + } +} + +template <typename I> +std::string BootstrapRequest<I>::get_local_image_name() const { + std::unique_lock locker{m_lock}; + return m_local_image_name; +} + +template <typename I> +void BootstrapRequest<I>::prepare_local_image() { + dout(10) << dendl; + update_progress("PREPARE_LOCAL_IMAGE"); + + { + std::unique_lock locker{m_lock}; + m_local_image_name = m_global_image_id; + } + + ceph_assert(*m_state_builder == nullptr); + auto ctx = create_context_callback< + BootstrapRequest, &BootstrapRequest<I>::handle_prepare_local_image>(this); + auto req = image_replayer::PrepareLocalImageRequest<I>::create( + m_local_io_ctx, m_global_image_id, &m_prepare_local_image_name, + m_state_builder, m_threads->work_queue, ctx); + req->send(); +} + +template <typename I> +void BootstrapRequest<I>::handle_prepare_local_image(int r) { + dout(10) << "r=" << r << dendl; + + ceph_assert(r < 0 || *m_state_builder != nullptr); + if (r == -ENOENT) { + dout(10) << "local image does not exist" << dendl; + } else if (r < 0) { + derr << "error preparing local image for replay: " << cpp_strerror(r) + << dendl; + finish(r); + return; + } + + // image replayer will detect the name change (if any) at next + // status update + if (r >= 0 && !m_prepare_local_image_name.empty()) { + std::unique_lock locker{m_lock}; + m_local_image_name = m_prepare_local_image_name; + } + + prepare_remote_image(); +} + +template <typename I> +void BootstrapRequest<I>::prepare_remote_image() { + dout(10) << dendl; + update_progress("PREPARE_REMOTE_IMAGE"); + + Context *ctx = create_context_callback< + BootstrapRequest, &BootstrapRequest<I>::handle_prepare_remote_image>(this); + auto req = image_replayer::PrepareRemoteImageRequest<I>::create( + m_threads, m_local_io_ctx, m_remote_io_ctx, m_global_image_id, + m_local_mirror_uuid, m_remote_pool_meta, m_cache_manager_handler, + m_state_builder, ctx); + req->send(); +} + +template <typename I> +void BootstrapRequest<I>::handle_prepare_remote_image(int r) { + dout(10) << "r=" << r << dendl; + + auto state_builder = *m_state_builder; + ceph_assert(state_builder == nullptr || + !state_builder->remote_mirror_uuid.empty()); + + if (state_builder != nullptr && state_builder->is_local_primary()) { + dout(5) << "local image is primary" << dendl; + finish(-ENOMSG); + return; + } else if (r == -ENOENT || state_builder == nullptr) { + dout(10) << "remote image does not exist"; + if (state_builder != nullptr) { + *_dout << ": " + << "local_image_id=" << state_builder->local_image_id << ", " + << "remote_image_id=" << state_builder->remote_image_id << ", " + << "is_linked=" << state_builder->is_linked(); + } + *_dout << dendl; + + // TODO need to support multiple remote images + if (state_builder != nullptr && + state_builder->remote_image_id.empty() && + (state_builder->local_image_id.empty() || + state_builder->is_linked())) { + // both images doesn't exist or local image exists and is non-primary + // and linked to the missing remote image + finish(-ENOLINK); + } else { + finish(-ENOENT); + } + return; + } else if (r < 0) { + derr << "error preparing remote image for replay: " << cpp_strerror(r) + << dendl; + finish(r); + return; + } + + if (!state_builder->is_remote_primary()) { + ceph_assert(!state_builder->remote_image_id.empty()); + if (state_builder->local_image_id.empty()) { + dout(10) << "local image does not exist and remote image is not primary" + << dendl; + finish(-EREMOTEIO); + return; + } else if (!state_builder->is_linked()) { + dout(10) << "local image is unlinked and remote image is not primary" + << dendl; + finish(-EREMOTEIO); + return; + } + // if the local image is linked to the remote image, we ignore that + // the remote image is not primary so that we can replay demotion + } + + open_remote_image(); +} + +template <typename I> +void BootstrapRequest<I>::open_remote_image() { + ceph_assert(*m_state_builder != nullptr); + auto remote_image_id = (*m_state_builder)->remote_image_id; + dout(15) << "remote_image_id=" << remote_image_id << dendl; + + update_progress("OPEN_REMOTE_IMAGE"); + + auto ctx = create_context_callback< + BootstrapRequest<I>, + &BootstrapRequest<I>::handle_open_remote_image>(this); + ceph_assert(*m_state_builder != nullptr); + OpenImageRequest<I> *request = OpenImageRequest<I>::create( + m_remote_io_ctx, &(*m_state_builder)->remote_image_ctx, remote_image_id, + false, ctx); + request->send(); +} + +template <typename I> +void BootstrapRequest<I>::handle_open_remote_image(int r) { + dout(15) << "r=" << r << dendl; + + ceph_assert(*m_state_builder != nullptr); + if (r < 0) { + derr << "failed to open remote image: " << cpp_strerror(r) << dendl; + ceph_assert((*m_state_builder)->remote_image_ctx == nullptr); + finish(r); + return; + } + + if ((*m_state_builder)->local_image_id.empty()) { + create_local_image(); + return; + } + + open_local_image(); +} + +template <typename I> +void BootstrapRequest<I>::open_local_image() { + ceph_assert(*m_state_builder != nullptr); + auto local_image_id = (*m_state_builder)->local_image_id; + + dout(15) << "local_image_id=" << local_image_id << dendl; + + update_progress("OPEN_LOCAL_IMAGE"); + + Context *ctx = create_context_callback< + BootstrapRequest<I>, &BootstrapRequest<I>::handle_open_local_image>( + this); + OpenLocalImageRequest<I> *request = OpenLocalImageRequest<I>::create( + m_local_io_ctx, &(*m_state_builder)->local_image_ctx, local_image_id, + m_threads->work_queue, ctx); + request->send(); +} + +template <typename I> +void BootstrapRequest<I>::handle_open_local_image(int r) { + dout(15) << "r=" << r << dendl; + + ceph_assert(*m_state_builder != nullptr); + auto local_image_ctx = (*m_state_builder)->local_image_ctx; + ceph_assert((r >= 0 && local_image_ctx != nullptr) || + (r < 0 && local_image_ctx == nullptr)); + + if (r == -ENOENT) { + dout(10) << "local image missing" << dendl; + create_local_image(); + return; + } else if (r == -EREMOTEIO) { + dout(10) << "local image is primary -- skipping image replay" << dendl; + m_ret_val = r; + close_remote_image(); + return; + } else if (r < 0) { + derr << "failed to open local image: " << cpp_strerror(r) << dendl; + m_ret_val = r; + close_remote_image(); + return; + } + + prepare_replay(); +} + +template <typename I> +void BootstrapRequest<I>::prepare_replay() { + dout(10) << dendl; + update_progress("PREPARE_REPLAY"); + + ceph_assert(*m_state_builder != nullptr); + auto ctx = create_context_callback< + BootstrapRequest<I>, &BootstrapRequest<I>::handle_prepare_replay>(this); + auto request = (*m_state_builder)->create_prepare_replay_request( + m_local_mirror_uuid, m_progress_ctx, m_do_resync, &m_syncing, ctx); + request->send(); +} + +template <typename I> +void BootstrapRequest<I>::handle_prepare_replay(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to prepare local replay: " << cpp_strerror(r) << dendl; + m_ret_val = r; + close_remote_image(); + return; + } else if (*m_do_resync) { + dout(10) << "local image resync requested" << dendl; + close_remote_image(); + return; + } else if ((*m_state_builder)->is_disconnected()) { + dout(10) << "client flagged disconnected -- skipping bootstrap" << dendl; + // The caller is expected to detect disconnect initializing remote journal. + m_ret_val = 0; + close_remote_image(); + return; + } else if (m_syncing) { + dout(10) << "local image still syncing to remote image" << dendl; + image_sync(); + return; + } + + close_remote_image(); +} + +template <typename I> +void BootstrapRequest<I>::create_local_image() { + dout(10) << dendl; + update_progress("CREATE_LOCAL_IMAGE"); + + ceph_assert(*m_state_builder != nullptr); + auto ctx = create_context_callback< + BootstrapRequest<I>, + &BootstrapRequest<I>::handle_create_local_image>(this); + auto request = (*m_state_builder)->create_local_image_request( + m_threads, m_local_io_ctx, m_global_image_id, m_pool_meta_cache, + m_progress_ctx, ctx); + request->send(); +} + +template <typename I> +void BootstrapRequest<I>::handle_create_local_image(int r) { + dout(15) << "r=" << r << dendl; + + if (r < 0) { + if (r == -ENOENT) { + dout(10) << "parent image does not exist" << dendl; + } else { + derr << "failed to create local image: " << cpp_strerror(r) << dendl; + } + m_ret_val = r; + close_remote_image(); + return; + } + + open_local_image(); +} + +template <typename I> +void BootstrapRequest<I>::image_sync() { + std::unique_lock locker{m_lock}; + if (m_canceled) { + locker.unlock(); + + m_ret_val = -ECANCELED; + dout(10) << "request canceled" << dendl; + close_remote_image(); + return; + } + + dout(15) << dendl; + ceph_assert(m_image_sync == nullptr); + + auto state_builder = *m_state_builder; + auto sync_point_handler = state_builder->create_sync_point_handler(); + + Context *ctx = create_context_callback< + BootstrapRequest<I>, &BootstrapRequest<I>::handle_image_sync>(this); + m_image_sync = ImageSync<I>::create( + m_threads, state_builder->local_image_ctx, state_builder->remote_image_ctx, + m_local_mirror_uuid, sync_point_handler, m_instance_watcher, + m_progress_ctx, ctx); + m_image_sync->get(); + locker.unlock(); + + update_progress("IMAGE_SYNC"); + m_image_sync->send(); +} + +template <typename I> +void BootstrapRequest<I>::handle_image_sync(int r) { + dout(15) << "r=" << r << dendl; + + { + std::lock_guard locker{m_lock}; + m_image_sync->put(); + m_image_sync = nullptr; + + (*m_state_builder)->destroy_sync_point_handler(); + } + + if (r < 0) { + if (r == -ECANCELED) { + dout(10) << "request canceled" << dendl; + } else { + derr << "failed to sync remote image: " << cpp_strerror(r) << dendl; + } + m_ret_val = r; + } + + close_remote_image(); +} + +template <typename I> +void BootstrapRequest<I>::close_remote_image() { + if ((*m_state_builder)->replay_requires_remote_image()) { + finish(m_ret_val); + return; + } + + dout(15) << dendl; + + update_progress("CLOSE_REMOTE_IMAGE"); + + auto ctx = create_context_callback< + BootstrapRequest<I>, + &BootstrapRequest<I>::handle_close_remote_image>(this); + ceph_assert(*m_state_builder != nullptr); + (*m_state_builder)->close_remote_image(ctx); +} + +template <typename I> +void BootstrapRequest<I>::handle_close_remote_image(int r) { + dout(15) << "r=" << r << dendl; + + if (r < 0) { + derr << "error encountered closing remote image: " << cpp_strerror(r) + << dendl; + } + + finish(m_ret_val); +} + +template <typename I> +void BootstrapRequest<I>::update_progress(const std::string &description) { + dout(15) << description << dendl; + + if (m_progress_ctx) { + m_progress_ctx->update_progress(description); + } +} + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::BootstrapRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/BootstrapRequest.h b/src/tools/rbd_mirror/image_replayer/BootstrapRequest.h new file mode 100644 index 000000000..f5bb8dd8a --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/BootstrapRequest.h @@ -0,0 +1,181 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_BOOTSTRAP_REQUEST_H +#define RBD_MIRROR_IMAGE_REPLAYER_BOOTSTRAP_REQUEST_H + +#include "include/int_types.h" +#include "include/rados/librados.hpp" +#include "common/ceph_mutex.h" +#include "common/Timer.h" +#include "cls/rbd/cls_rbd_types.h" +#include "librbd/mirror/Types.h" +#include "tools/rbd_mirror/CancelableRequest.h" +#include "tools/rbd_mirror/Types.h" +#include <string> + +class Context; + +namespace journal { class CacheManagerHandler; } +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { + +class ProgressContext; + +template <typename> class ImageSync; +template <typename> class InstanceWatcher; +struct PoolMetaCache; +template <typename> struct Threads; + +namespace image_replayer { + +template <typename> class StateBuilder; + +template <typename ImageCtxT = librbd::ImageCtx> +class BootstrapRequest : public CancelableRequest { +public: + typedef rbd::mirror::ProgressContext ProgressContext; + + static BootstrapRequest* create( + Threads<ImageCtxT>* threads, + librados::IoCtx& local_io_ctx, + librados::IoCtx& remote_io_ctx, + InstanceWatcher<ImageCtxT>* instance_watcher, + const std::string& global_image_id, + const std::string& local_mirror_uuid, + const RemotePoolMeta& remote_pool_meta, + ::journal::CacheManagerHandler* cache_manager_handler, + PoolMetaCache* pool_meta_cache, + ProgressContext* progress_ctx, + StateBuilder<ImageCtxT>** state_builder, + bool* do_resync, + Context* on_finish) { + return new BootstrapRequest( + threads, local_io_ctx, remote_io_ctx, instance_watcher, global_image_id, + local_mirror_uuid, remote_pool_meta, cache_manager_handler, + pool_meta_cache, progress_ctx, state_builder, do_resync, on_finish); + } + + BootstrapRequest( + Threads<ImageCtxT>* threads, + librados::IoCtx& local_io_ctx, + librados::IoCtx& remote_io_ctx, + InstanceWatcher<ImageCtxT>* instance_watcher, + const std::string& global_image_id, + const std::string& local_mirror_uuid, + const RemotePoolMeta& remote_pool_meta, + ::journal::CacheManagerHandler* cache_manager_handler, + PoolMetaCache* pool_meta_cache, + ProgressContext* progress_ctx, + StateBuilder<ImageCtxT>** state_builder, + bool* do_resync, + Context* on_finish); + + bool is_syncing() const; + + void send() override; + void cancel() override; + + std::string get_local_image_name() const; + +private: + /** + * @verbatim + * + * <start> + * | + * v (error) + * PREPARE_LOCAL_IMAGE * * * * * * * * * * * * * * * * * * + * | * + * v (error) * + * PREPARE_REMOTE_IMAGE * * * * * * * * * * * * * * * * * * + * | * + * v (error) * + * OPEN_REMOTE_IMAGE * * * * * * * * * * * * * * * * * * * + * | * + * | * + * \----> CREATE_LOCAL_IMAGE * * * * * * * * * * * * * + * | | ^ * * + * | | . * * + * | v . (image DNE) * * + * \----> OPEN_LOCAL_IMAGE * * * * * * * * * * * * * * + * | * * + * | * * + * v * * + * PREPARE_REPLAY * * * * * * * * * * * * * * * + * | * * + * | * * + * v (skip if not needed) * * + * IMAGE_SYNC * * * * * * * * * * * * * * * * * + * | * * + * | * * + * /---------/ * * + * | * * + * v * * + * CLOSE_REMOTE_IMAGE < * * * * * * * * * * * * * * * * * + * | * + * v * + * <finish> < * * * * * * * * * * * * * * * * * * * * * * * + * + * @endverbatim + */ + Threads<ImageCtxT>* m_threads; + librados::IoCtx &m_local_io_ctx; + librados::IoCtx &m_remote_io_ctx; + InstanceWatcher<ImageCtxT> *m_instance_watcher; + std::string m_global_image_id; + std::string m_local_mirror_uuid; + RemotePoolMeta m_remote_pool_meta; + ::journal::CacheManagerHandler *m_cache_manager_handler; + PoolMetaCache* m_pool_meta_cache; + ProgressContext *m_progress_ctx; + StateBuilder<ImageCtxT>** m_state_builder; + bool *m_do_resync; + + mutable ceph::mutex m_lock; + bool m_canceled = false; + + int m_ret_val = 0; + + std::string m_local_image_name; + std::string m_prepare_local_image_name; + + bool m_syncing = false; + ImageSync<ImageCtxT> *m_image_sync = nullptr; + + void prepare_local_image(); + void handle_prepare_local_image(int r); + + void prepare_remote_image(); + void handle_prepare_remote_image(int r); + + void open_remote_image(); + void handle_open_remote_image(int r); + + void open_local_image(); + void handle_open_local_image(int r); + + void create_local_image(); + void handle_create_local_image(int r); + + void prepare_replay(); + void handle_prepare_replay(int r); + + void image_sync(); + void handle_image_sync(int r); + + void close_remote_image(); + void handle_close_remote_image(int r); + + void update_progress(const std::string &description); +}; + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::BootstrapRequest<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_REPLAYER_BOOTSTRAP_REQUEST_H diff --git a/src/tools/rbd_mirror/image_replayer/CloseImageRequest.cc b/src/tools/rbd_mirror/image_replayer/CloseImageRequest.cc new file mode 100644 index 000000000..872c8baa9 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/CloseImageRequest.cc @@ -0,0 +1,62 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "CloseImageRequest.h" +#include "common/debug.h" +#include "common/errno.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/Utils.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::CloseImageRequest: " \ + << this << " " << __func__ + +namespace rbd { +namespace mirror { +namespace image_replayer { + +using librbd::util::create_context_callback; + +template <typename I> +CloseImageRequest<I>::CloseImageRequest(I **image_ctx, Context *on_finish) + : m_image_ctx(image_ctx), m_on_finish(on_finish) { +} + +template <typename I> +void CloseImageRequest<I>::send() { + close_image(); +} + +template <typename I> +void CloseImageRequest<I>::close_image() { + dout(20) << dendl; + + Context *ctx = create_context_callback< + CloseImageRequest<I>, &CloseImageRequest<I>::handle_close_image>(this); + (*m_image_ctx)->state->close(ctx); +} + +template <typename I> +void CloseImageRequest<I>::handle_close_image(int r) { + dout(20) << ": r=" << r << dendl; + + if (r < 0) { + derr << ": error encountered while closing image: " << cpp_strerror(r) + << dendl; + } + + *m_image_ctx = nullptr; + + m_on_finish->complete(0); + delete this; +} + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::CloseImageRequest<librbd::ImageCtx>; + diff --git a/src/tools/rbd_mirror/image_replayer/CloseImageRequest.h b/src/tools/rbd_mirror/image_replayer/CloseImageRequest.h new file mode 100644 index 000000000..02481369d --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/CloseImageRequest.h @@ -0,0 +1,56 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_CLOSE_IMAGE_REQUEST_H +#define RBD_MIRROR_IMAGE_REPLAYER_CLOSE_IMAGE_REQUEST_H + +#include "include/int_types.h" +#include "librbd/ImageCtx.h" +#include <string> + +class Context; +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { +namespace image_replayer { + +template <typename ImageCtxT = librbd::ImageCtx> +class CloseImageRequest { +public: + static CloseImageRequest* create(ImageCtxT **image_ctx, Context *on_finish) { + return new CloseImageRequest(image_ctx, on_finish); + } + + CloseImageRequest(ImageCtxT **image_ctx, Context *on_finish); + + void send(); + +private: + /** + * @verbatim + * + * <start> + * | + * v + * CLOSE_IMAGE + * | + * v + * <finish> + * + * @endverbatim + */ + ImageCtxT **m_image_ctx; + Context *m_on_finish; + + void close_image(); + void handle_close_image(int r); +}; + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::CloseImageRequest<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_REPLAYER_CLOSE_IMAGE_REQUEST_H diff --git a/src/tools/rbd_mirror/image_replayer/CreateImageRequest.cc b/src/tools/rbd_mirror/image_replayer/CreateImageRequest.cc new file mode 100644 index 000000000..641bb03e8 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/CreateImageRequest.cc @@ -0,0 +1,451 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "CreateImageRequest.h" +#include "CloseImageRequest.h" +#include "OpenImageRequest.h" +#include "common/debug.h" +#include "common/errno.h" +#include "cls/rbd/cls_rbd_client.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/internal.h" +#include "librbd/Utils.h" +#include "librbd/asio/ContextWQ.h" +#include "librbd/image/CreateRequest.h" +#include "librbd/image/CloneRequest.h" +#include "tools/rbd_mirror/PoolMetaCache.h" +#include "tools/rbd_mirror/Types.h" +#include "tools/rbd_mirror/Threads.h" +#include "tools/rbd_mirror/image_replayer/Utils.h" +#include "tools/rbd_mirror/image_sync/Utils.h" +#include <boost/algorithm/string/predicate.hpp> + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::CreateImageRequest: " \ + << this << " " << __func__ << ": " + +using librbd::util::create_async_context_callback; +using librbd::util::create_context_callback; +using librbd::util::create_rados_callback; + +namespace rbd { +namespace mirror { +namespace image_replayer { + +template <typename I> +CreateImageRequest<I>::CreateImageRequest( + Threads<I>* threads, + librados::IoCtx &local_io_ctx, + const std::string &global_image_id, + const std::string &remote_mirror_uuid, + const std::string &local_image_name, + const std::string &local_image_id, + I *remote_image_ctx, + PoolMetaCache* pool_meta_cache, + cls::rbd::MirrorImageMode mirror_image_mode, + Context *on_finish) + : m_threads(threads), m_local_io_ctx(local_io_ctx), + m_global_image_id(global_image_id), + m_remote_mirror_uuid(remote_mirror_uuid), + m_local_image_name(local_image_name), m_local_image_id(local_image_id), + m_remote_image_ctx(remote_image_ctx), + m_pool_meta_cache(pool_meta_cache), + m_mirror_image_mode(mirror_image_mode), m_on_finish(on_finish) { +} + +template <typename I> +void CreateImageRequest<I>::send() { + int r = validate_parent(); + if (r < 0) { + error(r); + return; + } + + if (m_remote_parent_spec.pool_id == -1) { + create_image(); + } else { + get_parent_global_image_id(); + } +} + +template <typename I> +void CreateImageRequest<I>::create_image() { + dout(10) << dendl; + + using klass = CreateImageRequest<I>; + Context *ctx = create_context_callback< + klass, &klass::handle_create_image>(this); + + std::shared_lock image_locker{m_remote_image_ctx->image_lock}; + + auto& config{ + reinterpret_cast<CephContext*>(m_local_io_ctx.cct())->_conf}; + + librbd::ImageOptions image_options; + populate_image_options(&image_options); + + auto req = librbd::image::CreateRequest<I>::create( + config, m_local_io_ctx, m_local_image_name, m_local_image_id, + m_remote_image_ctx->size, image_options, 0U, m_mirror_image_mode, + m_global_image_id, m_remote_mirror_uuid, m_remote_image_ctx->op_work_queue, + ctx); + req->send(); +} + +template <typename I> +void CreateImageRequest<I>::handle_create_image(int r) { + dout(10) << "r=" << r << dendl; + if (r == -EBADF) { + dout(5) << "image id " << m_local_image_id << " already in-use" << dendl; + finish(r); + return; + } else if (r < 0) { + derr << "failed to create local image: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + finish(0); +} + +template <typename I> +void CreateImageRequest<I>::get_parent_global_image_id() { + dout(10) << dendl; + + librados::ObjectReadOperation op; + librbd::cls_client::mirror_image_get_start(&op, + m_remote_parent_spec.image_id); + + librados::AioCompletion *aio_comp = create_rados_callback< + CreateImageRequest<I>, + &CreateImageRequest<I>::handle_get_parent_global_image_id>(this); + m_out_bl.clear(); + int r = m_remote_parent_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op, + &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void CreateImageRequest<I>::handle_get_parent_global_image_id(int r) { + dout(10) << "r=" << r << dendl; + if (r == 0) { + cls::rbd::MirrorImage mirror_image; + auto iter = m_out_bl.cbegin(); + r = librbd::cls_client::mirror_image_get_finish(&iter, &mirror_image); + if (r == 0) { + m_parent_global_image_id = mirror_image.global_image_id; + dout(15) << "parent_global_image_id=" << m_parent_global_image_id + << dendl; + } + } + + if (r == -ENOENT) { + dout(10) << "parent image " << m_remote_parent_spec.image_id + << " not mirrored" << dendl; + finish(r); + return; + } else if (r < 0) { + derr << "failed to retrieve global image id for parent image " + << m_remote_parent_spec.image_id << ": " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + get_local_parent_image_id(); +} + +template <typename I> +void CreateImageRequest<I>::get_local_parent_image_id() { + dout(10) << dendl; + + librados::ObjectReadOperation op; + librbd::cls_client::mirror_image_get_image_id_start( + &op, m_parent_global_image_id); + + librados::AioCompletion *aio_comp = create_rados_callback< + CreateImageRequest<I>, + &CreateImageRequest<I>::handle_get_local_parent_image_id>(this); + m_out_bl.clear(); + int r = m_local_parent_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op, + &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void CreateImageRequest<I>::handle_get_local_parent_image_id(int r) { + dout(10) << "r=" << r << dendl; + + if (r == 0) { + auto iter = m_out_bl.cbegin(); + r = librbd::cls_client::mirror_image_get_image_id_finish( + &iter, &m_local_parent_spec.image_id); + } + + if (r == -ENOENT) { + dout(10) << "parent image " << m_parent_global_image_id << " not " + << "registered locally" << dendl; + finish(r); + return; + } else if (r < 0) { + derr << "failed to retrieve local image id for parent image " + << m_parent_global_image_id << ": " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + open_remote_parent_image(); +} + +template <typename I> +void CreateImageRequest<I>::open_remote_parent_image() { + dout(10) << dendl; + + Context *ctx = create_context_callback< + CreateImageRequest<I>, + &CreateImageRequest<I>::handle_open_remote_parent_image>(this); + OpenImageRequest<I> *request = OpenImageRequest<I>::create( + m_remote_parent_io_ctx, &m_remote_parent_image_ctx, + m_remote_parent_spec.image_id, true, ctx); + request->send(); +} + +template <typename I> +void CreateImageRequest<I>::handle_open_remote_parent_image(int r) { + dout(10) << "r=" << r << dendl; + if (r < 0) { + derr << "failed to open remote parent image " << m_parent_pool_name << "/" + << m_remote_parent_spec.image_id << dendl; + finish(r); + return; + } + + clone_image(); +} + +template <typename I> +void CreateImageRequest<I>::clone_image() { + dout(10) << dendl; + + LocalPoolMeta local_parent_pool_meta; + int r = m_pool_meta_cache->get_local_pool_meta( + m_local_parent_io_ctx.get_id(), &local_parent_pool_meta); + if (r < 0) { + derr << "failed to retrieve local parent mirror uuid for pool " + << m_local_parent_io_ctx.get_id() << dendl; + m_ret_val = r; + close_remote_parent_image(); + return; + } + + // ensure no image sync snapshots for the local cluster exist in the + // remote image + bool found_parent_snap = false; + bool found_image_sync_snap = false; + std::string snap_name; + cls::rbd::SnapshotNamespace snap_namespace; + { + auto snap_prefix = image_sync::util::get_snapshot_name_prefix( + local_parent_pool_meta.mirror_uuid); + + std::shared_lock remote_image_locker(m_remote_parent_image_ctx->image_lock); + for (auto snap_info : m_remote_parent_image_ctx->snap_info) { + if (snap_info.first == m_remote_parent_spec.snap_id) { + found_parent_snap = true; + snap_name = snap_info.second.name; + snap_namespace = snap_info.second.snap_namespace; + } else if (boost::starts_with(snap_info.second.name, snap_prefix)) { + found_image_sync_snap = true; + } + } + } + + if (!found_parent_snap) { + dout(15) << "remote parent image snapshot not found" << dendl; + m_ret_val = -ENOENT; + close_remote_parent_image(); + return; + } else if (found_image_sync_snap) { + dout(15) << "parent image not synced to local cluster" << dendl; + m_ret_val = -ENOENT; + close_remote_parent_image(); + return; + } + + librbd::ImageOptions opts; + populate_image_options(&opts); + + auto& config{ + reinterpret_cast<CephContext*>(m_local_io_ctx.cct())->_conf}; + + using klass = CreateImageRequest<I>; + Context *ctx = create_context_callback< + klass, &klass::handle_clone_image>(this); + + librbd::image::CloneRequest<I> *req = librbd::image::CloneRequest<I>::create( + config, m_local_parent_io_ctx, m_local_parent_spec.image_id, snap_name, + snap_namespace, CEPH_NOSNAP, m_local_io_ctx, m_local_image_name, + m_local_image_id, opts, m_mirror_image_mode, m_global_image_id, + m_remote_mirror_uuid, m_remote_image_ctx->op_work_queue, ctx); + req->send(); +} + +template <typename I> +void CreateImageRequest<I>::handle_clone_image(int r) { + dout(10) << "r=" << r << dendl; + if (r == -EBADF) { + dout(5) << "image id " << m_local_image_id << " already in-use" << dendl; + m_ret_val = r; + } else if (r < 0) { + derr << "failed to clone image " << m_parent_pool_name << "/" + << m_remote_parent_spec.image_id << " to " + << m_local_image_name << dendl; + m_ret_val = r; + } + + close_remote_parent_image(); +} + +template <typename I> +void CreateImageRequest<I>::close_remote_parent_image() { + dout(10) << dendl; + Context *ctx = create_context_callback< + CreateImageRequest<I>, + &CreateImageRequest<I>::handle_close_remote_parent_image>(this); + CloseImageRequest<I> *request = CloseImageRequest<I>::create( + &m_remote_parent_image_ctx, ctx); + request->send(); +} + +template <typename I> +void CreateImageRequest<I>::handle_close_remote_parent_image(int r) { + dout(10) << "r=" << r << dendl; + if (r < 0) { + derr << "error encountered closing remote parent image: " + << cpp_strerror(r) << dendl; + } + + finish(m_ret_val); +} + +template <typename I> +void CreateImageRequest<I>::error(int r) { + dout(10) << "r=" << r << dendl; + + m_threads->work_queue->queue(create_context_callback< + CreateImageRequest<I>, &CreateImageRequest<I>::finish>(this), r); +} + +template <typename I> +void CreateImageRequest<I>::finish(int r) { + dout(10) << "r=" << r << dendl; + m_on_finish->complete(r); + delete this; +} + +template <typename I> +int CreateImageRequest<I>::validate_parent() { + std::shared_lock owner_locker{m_remote_image_ctx->owner_lock}; + std::shared_lock image_locker{m_remote_image_ctx->image_lock}; + + m_remote_parent_spec = m_remote_image_ctx->parent_md.spec; + + // scan all remote snapshots for a linked parent + for (auto &snap_info_pair : m_remote_image_ctx->snap_info) { + auto &parent_spec = snap_info_pair.second.parent.spec; + if (parent_spec.pool_id == -1) { + continue; + } else if (m_remote_parent_spec.pool_id == -1) { + m_remote_parent_spec = parent_spec; + continue; + } + + if (m_remote_parent_spec != parent_spec) { + derr << "remote image parent spec mismatch" << dendl; + return -EINVAL; + } + } + + if (m_remote_parent_spec.pool_id == -1) { + return 0; + } + + // map remote parent pool to local parent pool + int r = librbd::util::create_ioctx( + m_remote_image_ctx->md_ctx, "remote parent pool", + m_remote_parent_spec.pool_id, m_remote_parent_spec.pool_namespace, + &m_remote_parent_io_ctx); + if (r < 0) { + derr << "failed to open remote parent pool " << m_remote_parent_spec.pool_id + << ": " << cpp_strerror(r) << dendl; + return r; + } + + m_parent_pool_name = m_remote_parent_io_ctx.get_pool_name(); + + librados::Rados local_rados(m_local_io_ctx); + r = local_rados.ioctx_create(m_parent_pool_name.c_str(), + m_local_parent_io_ctx); + if (r < 0) { + derr << "failed to open local parent pool " << m_parent_pool_name << ": " + << cpp_strerror(r) << dendl; + return r; + } + m_local_parent_io_ctx.set_namespace(m_remote_parent_io_ctx.get_namespace()); + + return 0; +} + +template <typename I> +void CreateImageRequest<I>::populate_image_options( + librbd::ImageOptions* image_options) { + image_options->set(RBD_IMAGE_OPTION_FEATURES, + m_remote_image_ctx->features); + image_options->set(RBD_IMAGE_OPTION_ORDER, m_remote_image_ctx->order); + image_options->set(RBD_IMAGE_OPTION_STRIPE_UNIT, + m_remote_image_ctx->stripe_unit); + image_options->set(RBD_IMAGE_OPTION_STRIPE_COUNT, + m_remote_image_ctx->stripe_count); + + // Determine the data pool for the local image as follows: + // 1. If the local pool has a default data pool, use it. + // 2. If the remote image has a data pool different from its metadata pool and + // a pool with the same name exists locally, use it. + // 3. Don't set the data pool explicitly. + std::string data_pool; + librados::Rados local_rados(m_local_io_ctx); + auto default_data_pool = g_ceph_context->_conf.get_val<std::string>("rbd_default_data_pool"); + auto remote_md_pool = m_remote_image_ctx->md_ctx.get_pool_name(); + auto remote_data_pool = m_remote_image_ctx->data_ctx.get_pool_name(); + + if (default_data_pool != "") { + data_pool = default_data_pool; + } else if (remote_data_pool != remote_md_pool) { + if (local_rados.pool_lookup(remote_data_pool.c_str()) >= 0) { + data_pool = remote_data_pool; + } + } + + if (data_pool != "") { + image_options->set(RBD_IMAGE_OPTION_DATA_POOL, data_pool); + } + + if (m_remote_parent_spec.pool_id != -1) { + uint64_t clone_format = 1; + if (m_remote_image_ctx->test_op_features( + RBD_OPERATION_FEATURE_CLONE_CHILD)) { + clone_format = 2; + } + image_options->set(RBD_IMAGE_OPTION_CLONE_FORMAT, clone_format); + } +} + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::CreateImageRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/CreateImageRequest.h b/src/tools/rbd_mirror/image_replayer/CreateImageRequest.h new file mode 100644 index 000000000..2ff7794e8 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/CreateImageRequest.h @@ -0,0 +1,144 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_CREATE_IMAGE_REQUEST_H +#define RBD_MIRROR_IMAGE_REPLAYER_CREATE_IMAGE_REQUEST_H + +#include "include/int_types.h" +#include "include/types.h" +#include "include/rados/librados.hpp" +#include "cls/rbd/cls_rbd_types.h" +#include "librbd/Types.h" +#include <string> + +class Context; +namespace librbd { class ImageCtx; } +namespace librbd { class ImageOptions; } + +namespace rbd { +namespace mirror { + +class PoolMetaCache; +template <typename> struct Threads; + +namespace image_replayer { + +template <typename ImageCtxT = librbd::ImageCtx> +class CreateImageRequest { +public: + static CreateImageRequest *create( + Threads<ImageCtxT> *threads, + librados::IoCtx &local_io_ctx, + const std::string &global_image_id, + const std::string &remote_mirror_uuid, + const std::string &local_image_name, + const std::string &local_image_id, + ImageCtxT *remote_image_ctx, + PoolMetaCache* pool_meta_cache, + cls::rbd::MirrorImageMode mirror_image_mode, + Context *on_finish) { + return new CreateImageRequest(threads, local_io_ctx, global_image_id, + remote_mirror_uuid, local_image_name, + local_image_id, remote_image_ctx, + pool_meta_cache, mirror_image_mode, + on_finish); + } + + CreateImageRequest( + Threads<ImageCtxT> *threads, librados::IoCtx &local_io_ctx, + const std::string &global_image_id, + const std::string &remote_mirror_uuid, + const std::string &local_image_name, + const std::string &local_image_id, + ImageCtxT *remote_image_ctx, + PoolMetaCache* pool_meta_cache, + cls::rbd::MirrorImageMode mirror_image_mode, + Context *on_finish); + + void send(); + +private: + /** + * @verbatim + * + * <start> * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * | * + * | (non-clone) * + * |\------------> CREATE_IMAGE ---------------------\ * (error) + * | | * + * | (clone) | * + * \-------------> GET_PARENT_GLOBAL_IMAGE_ID * * * | * * * * + * | | * * + * v | * + * GET_LOCAL_PARENT_IMAGE_ID * * * * | * * * * + * | | * * + * v | * + * OPEN_REMOTE_PARENT * * * * * * * | * * * * + * | | * * + * v | * + * CLONE_IMAGE | * + * | | * + * v | * + * CLOSE_REMOTE_PARENT | * + * | v * + * \------------------------> <finish> < * * + * @endverbatim + */ + + Threads<ImageCtxT> *m_threads; + librados::IoCtx &m_local_io_ctx; + std::string m_global_image_id; + std::string m_remote_mirror_uuid; + std::string m_local_image_name; + std::string m_local_image_id; + ImageCtxT *m_remote_image_ctx; + PoolMetaCache* m_pool_meta_cache; + cls::rbd::MirrorImageMode m_mirror_image_mode; + Context *m_on_finish; + + librados::IoCtx m_remote_parent_io_ctx; + ImageCtxT *m_remote_parent_image_ctx = nullptr; + cls::rbd::ParentImageSpec m_remote_parent_spec; + + librados::IoCtx m_local_parent_io_ctx; + cls::rbd::ParentImageSpec m_local_parent_spec; + + bufferlist m_out_bl; + std::string m_parent_global_image_id; + std::string m_parent_pool_name; + int m_ret_val = 0; + + void create_image(); + void handle_create_image(int r); + + void get_parent_global_image_id(); + void handle_get_parent_global_image_id(int r); + + void get_local_parent_image_id(); + void handle_get_local_parent_image_id(int r); + + void open_remote_parent_image(); + void handle_open_remote_parent_image(int r); + + void clone_image(); + void handle_clone_image(int r); + + void close_remote_parent_image(); + void handle_close_remote_parent_image(int r); + + void error(int r); + void finish(int r); + + int validate_parent(); + + void populate_image_options(librbd::ImageOptions* image_options); + +}; + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::CreateImageRequest<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_REPLAYER_CREATE_IMAGE_REQUEST_H diff --git a/src/tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.cc b/src/tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.cc new file mode 100644 index 000000000..74e975373 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.cc @@ -0,0 +1,85 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.h" +#include "include/rados/librados.hpp" +#include "cls/rbd/cls_rbd_client.h" +#include "common/debug.h" +#include "common/errno.h" +#include "librbd/ImageCtx.h" +#include "librbd/Utils.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::" \ + << "GetMirrorImageIdRequest: " << this << " " \ + << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace image_replayer { + +using librbd::util::create_rados_callback; + +template <typename I> +void GetMirrorImageIdRequest<I>::send() { + dout(20) << dendl; + get_image_id(); +} + +template <typename I> +void GetMirrorImageIdRequest<I>::get_image_id() { + dout(20) << dendl; + + // attempt to cross-reference a image id by the global image id + librados::ObjectReadOperation op; + librbd::cls_client::mirror_image_get_image_id_start(&op, m_global_image_id); + + librados::AioCompletion *aio_comp = create_rados_callback< + GetMirrorImageIdRequest<I>, + &GetMirrorImageIdRequest<I>::handle_get_image_id>( + this); + int r = m_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op, &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void GetMirrorImageIdRequest<I>::handle_get_image_id(int r) { + if (r == 0) { + auto iter = m_out_bl.cbegin(); + r = librbd::cls_client::mirror_image_get_image_id_finish( + &iter, m_image_id); + } + + dout(20) << "r=" << r << ", " + << "image_id=" << *m_image_id << dendl; + + if (r < 0) { + if (r == -ENOENT) { + dout(10) << "global image " << m_global_image_id << " not registered" + << dendl; + } else { + derr << "failed to retrieve image id: " << cpp_strerror(r) << dendl; + } + finish(r); + return; + } + + finish(0); +} + +template <typename I> +void GetMirrorImageIdRequest<I>::finish(int r) { + dout(20) << "r=" << r << dendl; + + m_on_finish->complete(r); + delete this; +} + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::GetMirrorImageIdRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.h b/src/tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.h new file mode 100644 index 000000000..b26645138 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.h @@ -0,0 +1,75 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_GET_MIRROR_IMAGE_ID_REQUEST_H +#define RBD_MIRROR_IMAGE_REPLAYER_GET_MIRROR_IMAGE_ID_REQUEST_H + +#include "include/buffer.h" +#include "include/rados/librados_fwd.hpp" +#include <string> + +namespace librbd { struct ImageCtx; } + +struct Context; + +namespace rbd { +namespace mirror { +namespace image_replayer { + +template <typename ImageCtxT = librbd::ImageCtx> +class GetMirrorImageIdRequest { +public: + static GetMirrorImageIdRequest *create(librados::IoCtx &io_ctx, + const std::string &global_image_id, + std::string *image_id, + Context *on_finish) { + return new GetMirrorImageIdRequest(io_ctx, global_image_id, image_id, + on_finish); + } + + GetMirrorImageIdRequest(librados::IoCtx &io_ctx, + const std::string &global_image_id, + std::string *image_id, + Context *on_finish) + : m_io_ctx(io_ctx), m_global_image_id(global_image_id), + m_image_id(image_id), m_on_finish(on_finish) { + } + + void send(); + +private: + /** + * @verbatim + * + * <start> + * | + * v + * GET_IMAGE_ID + * | + * v + * <finish> + + * @endverbatim + */ + + librados::IoCtx &m_io_ctx; + std::string m_global_image_id; + std::string *m_image_id; + Context *m_on_finish; + + bufferlist m_out_bl; + + void get_image_id(); + void handle_get_image_id(int r); + + void finish(int r); + +}; + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::GetMirrorImageIdRequest<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_REPLAYER_GET_MIRROR_IMAGE_ID_REQUEST_H diff --git a/src/tools/rbd_mirror/image_replayer/OpenImageRequest.cc b/src/tools/rbd_mirror/image_replayer/OpenImageRequest.cc new file mode 100644 index 000000000..e6ab382be --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/OpenImageRequest.cc @@ -0,0 +1,79 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "OpenImageRequest.h" +#include "common/debug.h" +#include "common/errno.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/Utils.h" +#include <type_traits> + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::OpenImageRequest: " \ + << this << " " << __func__ << " " + +namespace rbd { +namespace mirror { +namespace image_replayer { + +using librbd::util::create_context_callback; + +template <typename I> +OpenImageRequest<I>::OpenImageRequest(librados::IoCtx &io_ctx, I **image_ctx, + const std::string &image_id, + bool read_only, Context *on_finish) + : m_io_ctx(io_ctx), m_image_ctx(image_ctx), m_image_id(image_id), + m_read_only(read_only), m_on_finish(on_finish) { +} + +template <typename I> +void OpenImageRequest<I>::send() { + send_open_image(); +} + +template <typename I> +void OpenImageRequest<I>::send_open_image() { + dout(20) << dendl; + + *m_image_ctx = I::create("", m_image_id, nullptr, m_io_ctx, m_read_only); + + if (!m_read_only) { + // ensure non-primary images can be modified + (*m_image_ctx)->read_only_mask = ~librbd::IMAGE_READ_ONLY_FLAG_NON_PRIMARY; + } + + Context *ctx = create_context_callback< + OpenImageRequest<I>, &OpenImageRequest<I>::handle_open_image>( + this); + (*m_image_ctx)->state->open(0, ctx); +} + +template <typename I> +void OpenImageRequest<I>::handle_open_image(int r) { + dout(20) << ": r=" << r << dendl; + + if (r < 0) { + derr << ": failed to open image '" << m_image_id << "': " + << cpp_strerror(r) << dendl; + *m_image_ctx = nullptr; + } + + finish(r); +} + +template <typename I> +void OpenImageRequest<I>::finish(int r) { + dout(20) << ": r=" << r << dendl; + + m_on_finish->complete(r); + delete this; +} + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::OpenImageRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/OpenImageRequest.h b/src/tools/rbd_mirror/image_replayer/OpenImageRequest.h new file mode 100644 index 000000000..01ab31171 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/OpenImageRequest.h @@ -0,0 +1,71 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_OPEN_IMAGE_REQUEST_H +#define RBD_MIRROR_IMAGE_REPLAYER_OPEN_IMAGE_REQUEST_H + +#include "include/int_types.h" +#include "librbd/ImageCtx.h" +#include <string> + +class Context; +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { +namespace image_replayer { + +template <typename ImageCtxT = librbd::ImageCtx> +class OpenImageRequest { +public: + static OpenImageRequest* create(librados::IoCtx &io_ctx, + ImageCtxT **image_ctx, + const std::string &image_id, + bool read_only, Context *on_finish) { + return new OpenImageRequest(io_ctx, image_ctx, image_id, read_only, + on_finish); + } + + OpenImageRequest(librados::IoCtx &io_ctx, ImageCtxT **image_ctx, + const std::string &image_id, bool read_only, + Context *on_finish); + + void send(); + +private: + /** + * @verbatim + * + * <start> + * | + * v + * OPEN_IMAGE + * | + * v + * <finish> + * + * @endverbatim + */ + librados::IoCtx &m_io_ctx; + ImageCtxT **m_image_ctx; + std::string m_image_id; + bool m_read_only; + Context *m_on_finish; + + void send_open_image(); + void handle_open_image(int r); + + void send_close_image(int r); + void handle_close_image(int r); + + void finish(int r); + +}; + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::OpenImageRequest<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_REPLAYER_OPEN_IMAGE_REQUEST_H diff --git a/src/tools/rbd_mirror/image_replayer/OpenLocalImageRequest.cc b/src/tools/rbd_mirror/image_replayer/OpenLocalImageRequest.cc new file mode 100644 index 000000000..7f8d9608e --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/OpenLocalImageRequest.cc @@ -0,0 +1,292 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "include/compat.h" +#include "CloseImageRequest.h" +#include "OpenLocalImageRequest.h" +#include "common/debug.h" +#include "common/errno.h" +#include "librbd/ExclusiveLock.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/Journal.h" +#include "librbd/Utils.h" +#include "librbd/asio/ContextWQ.h" +#include "librbd/exclusive_lock/Policy.h" +#include "librbd/journal/Policy.h" +#include "librbd/mirror/GetInfoRequest.h" +#include <type_traits> + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::OpenLocalImageRequest: " \ + << this << " " << __func__ << " " + +namespace rbd { +namespace mirror { +namespace image_replayer { + +using librbd::util::create_context_callback; + +namespace { + +template <typename I> +struct MirrorExclusiveLockPolicy : public librbd::exclusive_lock::Policy { + I *image_ctx; + + MirrorExclusiveLockPolicy(I *image_ctx) : image_ctx(image_ctx) { + } + + bool may_auto_request_lock() override { + return false; + } + + int lock_requested(bool force) override { + int r = -EROFS; + { + std::shared_lock owner_locker{image_ctx->owner_lock}; + std::shared_lock image_locker{image_ctx->image_lock}; + if (image_ctx->journal == nullptr || image_ctx->journal->is_tag_owner()) { + r = 0; + } + } + + if (r == 0) { + // if the local image journal has been closed or if it was (force) + // promoted allow the lock to be released to another client + image_ctx->exclusive_lock->release_lock(nullptr); + } + return r; + } + + bool accept_blocked_request( + librbd::exclusive_lock::OperationRequestType request_type) override { + switch (request_type) { + case librbd::exclusive_lock::OPERATION_REQUEST_TYPE_TRASH_SNAP_REMOVE: + case librbd::exclusive_lock::OPERATION_REQUEST_TYPE_FORCE_PROMOTION: + return true; + default: + return false; + } + } +}; + +struct MirrorJournalPolicy : public librbd::journal::Policy { + librbd::asio::ContextWQ *work_queue; + + MirrorJournalPolicy(librbd::asio::ContextWQ *work_queue) + : work_queue(work_queue) { + } + + bool append_disabled() const override { + // avoid recording any events to the local journal + return true; + } + bool journal_disabled() const override { + return false; + } + + void allocate_tag_on_lock(Context *on_finish) override { + // rbd-mirror will manually create tags by copying them from the peer + work_queue->queue(on_finish, 0); + } +}; + +} // anonymous namespace + +template <typename I> +OpenLocalImageRequest<I>::OpenLocalImageRequest( + librados::IoCtx &local_io_ctx, + I **local_image_ctx, + const std::string &local_image_id, + librbd::asio::ContextWQ *work_queue, + Context *on_finish) + : m_local_io_ctx(local_io_ctx), m_local_image_ctx(local_image_ctx), + m_local_image_id(local_image_id), m_work_queue(work_queue), + m_on_finish(on_finish) { +} + +template <typename I> +void OpenLocalImageRequest<I>::send() { + send_open_image(); +} + +template <typename I> +void OpenLocalImageRequest<I>::send_open_image() { + dout(20) << dendl; + + *m_local_image_ctx = I::create("", m_local_image_id, nullptr, + m_local_io_ctx, false); + + // ensure non-primary images can be modified + (*m_local_image_ctx)->read_only_mask = + ~librbd::IMAGE_READ_ONLY_FLAG_NON_PRIMARY; + + { + std::scoped_lock locker{(*m_local_image_ctx)->owner_lock, + (*m_local_image_ctx)->image_lock}; + (*m_local_image_ctx)->set_exclusive_lock_policy( + new MirrorExclusiveLockPolicy<I>(*m_local_image_ctx)); + (*m_local_image_ctx)->set_journal_policy( + new MirrorJournalPolicy(m_work_queue)); + } + + Context *ctx = create_context_callback< + OpenLocalImageRequest<I>, &OpenLocalImageRequest<I>::handle_open_image>( + this); + (*m_local_image_ctx)->state->open(0, ctx); +} + +template <typename I> +void OpenLocalImageRequest<I>::handle_open_image(int r) { + dout(20) << ": r=" << r << dendl; + + if (r < 0) { + if (r == -ENOENT) { + dout(10) << ": local image does not exist" << dendl; + } else { + derr << ": failed to open image '" << m_local_image_id << "': " + << cpp_strerror(r) << dendl; + } + *m_local_image_ctx = nullptr; + finish(r); + return; + } + + send_get_mirror_info(); +} + +template <typename I> +void OpenLocalImageRequest<I>::send_get_mirror_info() { + dout(20) << dendl; + + Context *ctx = create_context_callback< + OpenLocalImageRequest<I>, + &OpenLocalImageRequest<I>::handle_get_mirror_info>( + this); + auto request = librbd::mirror::GetInfoRequest<I>::create( + **m_local_image_ctx, &m_mirror_image, &m_promotion_state, + &m_primary_mirror_uuid, ctx); + request->send(); +} + +template <typename I> +void OpenLocalImageRequest<I>::handle_get_mirror_info(int r) { + dout(20) << ": r=" << r << dendl; + + if (r == -ENOENT) { + dout(5) << ": local image is not mirrored" << dendl; + send_close_image(r); + return; + } else if (r < 0) { + derr << ": error querying local image primary status: " << cpp_strerror(r) + << dendl; + send_close_image(r); + return; + } + + if (m_mirror_image.state == cls::rbd::MIRROR_IMAGE_STATE_DISABLING) { + dout(5) << ": local image mirroring is being disabled" << dendl; + send_close_image(-ENOENT); + return; + } + + // if the local image owns the tag -- don't steal the lock since + // we aren't going to mirror peer data into this image anyway + if (m_promotion_state == librbd::mirror::PROMOTION_STATE_PRIMARY) { + dout(10) << ": local image is primary -- skipping image replay" << dendl; + send_close_image(-EREMOTEIO); + return; + } + + send_lock_image(); +} + +template <typename I> +void OpenLocalImageRequest<I>::send_lock_image() { + std::shared_lock owner_locker{(*m_local_image_ctx)->owner_lock}; + if ((*m_local_image_ctx)->exclusive_lock == nullptr) { + owner_locker.unlock(); + if (m_mirror_image.mode == cls::rbd::MIRROR_IMAGE_MODE_SNAPSHOT) { + finish(0); + } else { + derr << ": image does not support exclusive lock" << dendl; + send_close_image(-EINVAL); + } + return; + } + + dout(20) << dendl; + + // disallow any proxied maintenance operations before grabbing lock + (*m_local_image_ctx)->exclusive_lock->block_requests(-EROFS); + + Context *ctx = create_context_callback< + OpenLocalImageRequest<I>, &OpenLocalImageRequest<I>::handle_lock_image>( + this); + + (*m_local_image_ctx)->exclusive_lock->acquire_lock(ctx); +} + +template <typename I> +void OpenLocalImageRequest<I>::handle_lock_image(int r) { + dout(20) << ": r=" << r << dendl; + + if (r < 0) { + derr << ": failed to lock image '" << m_local_image_id << "': " + << cpp_strerror(r) << dendl; + send_close_image(r); + return; + } + + { + std::shared_lock owner_locker{(*m_local_image_ctx)->owner_lock}; + if ((*m_local_image_ctx)->exclusive_lock == nullptr || + !(*m_local_image_ctx)->exclusive_lock->is_lock_owner()) { + derr << ": image is not locked" << dendl; + send_close_image(-EBUSY); + return; + } + } + + finish(0); +} + +template <typename I> +void OpenLocalImageRequest<I>::send_close_image(int r) { + dout(20) << dendl; + + if (m_ret_val == 0 && r < 0) { + m_ret_val = r; + } + + Context *ctx = create_context_callback< + OpenLocalImageRequest<I>, &OpenLocalImageRequest<I>::handle_close_image>( + this); + CloseImageRequest<I> *request = CloseImageRequest<I>::create( + m_local_image_ctx, ctx); + request->send(); +} + +template <typename I> +void OpenLocalImageRequest<I>::handle_close_image(int r) { + dout(20) << dendl; + + ceph_assert(r == 0); + finish(m_ret_val); +} + +template <typename I> +void OpenLocalImageRequest<I>::finish(int r) { + dout(20) << ": r=" << r << dendl; + + m_on_finish->complete(r); + delete this; +} + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::OpenLocalImageRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/OpenLocalImageRequest.h b/src/tools/rbd_mirror/image_replayer/OpenLocalImageRequest.h new file mode 100644 index 000000000..9a642bc39 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/OpenLocalImageRequest.h @@ -0,0 +1,97 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_OPEN_LOCAL_IMAGE_REQUEST_H +#define RBD_MIRROR_IMAGE_REPLAYER_OPEN_LOCAL_IMAGE_REQUEST_H + +#include "include/int_types.h" +#include "cls/rbd/cls_rbd_types.h" +#include "librbd/ImageCtx.h" +#include "librbd/mirror/Types.h" +#include <string> + +class Context; +namespace librbd { +class ImageCtx; +namespace asio { struct ContextWQ; } +} // namespace librbd + +namespace rbd { +namespace mirror { +namespace image_replayer { + +template <typename ImageCtxT = librbd::ImageCtx> +class OpenLocalImageRequest { +public: + static OpenLocalImageRequest* create(librados::IoCtx &local_io_ctx, + ImageCtxT **local_image_ctx, + const std::string &local_image_id, + librbd::asio::ContextWQ *work_queue, + Context *on_finish) { + return new OpenLocalImageRequest(local_io_ctx, local_image_ctx, + local_image_id, work_queue, on_finish); + } + + OpenLocalImageRequest(librados::IoCtx &local_io_ctx, + ImageCtxT **local_image_ctx, + const std::string &local_image_id, + librbd::asio::ContextWQ *work_queue, + Context *on_finish); + + void send(); + +private: + /** + * @verbatim + * + * <start> + * | + * v + * OPEN_IMAGE * * * * * * * * + * | * + * v * + * GET_MIRROR_INFO * * * * * + * | * + * v (skip if primary) v + * LOCK_IMAGE * * * > CLOSE_IMAGE + * | | + * v | + * <finish> <---------------/ + * + * @endverbatim + */ + librados::IoCtx &m_local_io_ctx; + ImageCtxT **m_local_image_ctx; + std::string m_local_image_id; + librbd::asio::ContextWQ *m_work_queue; + Context *m_on_finish; + + cls::rbd::MirrorImage m_mirror_image; + librbd::mirror::PromotionState m_promotion_state = + librbd::mirror::PROMOTION_STATE_NON_PRIMARY; + std::string m_primary_mirror_uuid; + int m_ret_val = 0; + + void send_open_image(); + void handle_open_image(int r); + + void send_get_mirror_info(); + void handle_get_mirror_info(int r); + + void send_lock_image(); + void handle_lock_image(int r); + + void send_close_image(int r); + void handle_close_image(int r); + + void finish(int r); + +}; + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::OpenLocalImageRequest<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_REPLAYER_OPEN_LOCAL_IMAGE_REQUEST_H diff --git a/src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.cc b/src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.cc new file mode 100644 index 000000000..b1fef7254 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.cc @@ -0,0 +1,197 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.h" +#include "include/rados/librados.hpp" +#include "cls/rbd/cls_rbd_client.h" +#include "common/debug.h" +#include "common/errno.h" +#include "librbd/ImageCtx.h" +#include "librbd/Journal.h" +#include "librbd/Utils.h" +#include "librbd/mirror/GetInfoRequest.h" +#include "tools/rbd_mirror/ImageDeleter.h" +#include "tools/rbd_mirror/Threads.h" +#include "tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.h" +#include "tools/rbd_mirror/image_replayer/journal/StateBuilder.h" +#include "tools/rbd_mirror/image_replayer/snapshot/StateBuilder.h" +#include <type_traits> + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::" \ + << "PrepareLocalImageRequest: " << this << " " \ + << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace image_replayer { + +using librbd::util::create_context_callback; +using librbd::util::create_rados_callback; + +template <typename I> +void PrepareLocalImageRequest<I>::send() { + dout(10) << dendl; + get_local_image_id(); +} + +template <typename I> +void PrepareLocalImageRequest<I>::get_local_image_id() { + dout(10) << dendl; + + Context *ctx = create_context_callback< + PrepareLocalImageRequest<I>, + &PrepareLocalImageRequest<I>::handle_get_local_image_id>(this); + auto req = GetMirrorImageIdRequest<I>::create(m_io_ctx, m_global_image_id, + &m_local_image_id, ctx); + req->send(); +} + +template <typename I> +void PrepareLocalImageRequest<I>::handle_get_local_image_id(int r) { + dout(10) << "r=" << r << ", " + << "local_image_id=" << m_local_image_id << dendl; + + if (r < 0) { + finish(r); + return; + } + + get_local_image_name(); +} + +template <typename I> +void PrepareLocalImageRequest<I>::get_local_image_name() { + dout(10) << dendl; + + librados::ObjectReadOperation op; + librbd::cls_client::dir_get_name_start(&op, m_local_image_id); + + m_out_bl.clear(); + librados::AioCompletion *aio_comp = create_rados_callback< + PrepareLocalImageRequest<I>, + &PrepareLocalImageRequest<I>::handle_get_local_image_name>(this); + int r = m_io_ctx.aio_operate(RBD_DIRECTORY, aio_comp, &op, &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void PrepareLocalImageRequest<I>::handle_get_local_image_name(int r) { + dout(10) << "r=" << r << dendl; + + if (r == 0) { + auto it = m_out_bl.cbegin(); + r = librbd::cls_client::dir_get_name_finish(&it, m_local_image_name); + } + + if (r == -ENOENT) { + // proceed we should have a mirror image record if we got this far + dout(10) << "image does not exist for local image id " << m_local_image_id + << dendl; + *m_local_image_name = ""; + } else if (r < 0) { + derr << "failed to retrieve image name: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + get_mirror_info(); +} + +template <typename I> +void PrepareLocalImageRequest<I>::get_mirror_info() { + dout(10) << dendl; + + auto ctx = create_context_callback< + PrepareLocalImageRequest<I>, + &PrepareLocalImageRequest<I>::handle_get_mirror_info>(this); + auto req = librbd::mirror::GetInfoRequest<I>::create( + m_io_ctx, m_work_queue, m_local_image_id, &m_mirror_image, + &m_promotion_state, &m_primary_mirror_uuid, ctx); + req->send(); +} + +template <typename I> +void PrepareLocalImageRequest<I>::handle_get_mirror_info(int r) { + dout(10) << ": r=" << r << dendl; + + if (r < 0) { + derr << "failed to retrieve local mirror image info: " << cpp_strerror(r) + << dendl; + finish(r); + return; + } + + if (m_mirror_image.state == cls::rbd::MIRROR_IMAGE_STATE_CREATING) { + dout(5) << "local image is still in creating state, issuing a removal" + << dendl; + move_to_trash(); + return; + } else if (m_mirror_image.state == cls::rbd::MIRROR_IMAGE_STATE_DISABLING) { + dout(5) << "local image mirroring is in disabling state" << dendl; + finish(-ERESTART); + return; + } + + switch (m_mirror_image.mode) { + case cls::rbd::MIRROR_IMAGE_MODE_JOURNAL: + // journal-based local image exists + { + auto state_builder = journal::StateBuilder<I>::create(m_global_image_id); + state_builder->local_primary_mirror_uuid = m_primary_mirror_uuid; + *m_state_builder = state_builder; + } + break; + case cls::rbd::MIRROR_IMAGE_MODE_SNAPSHOT: + // snapshot-based local image exists + *m_state_builder = snapshot::StateBuilder<I>::create(m_global_image_id); + break; + default: + derr << "unsupported mirror image mode " << m_mirror_image.mode << " " + << "for image " << m_global_image_id << dendl; + finish(-EOPNOTSUPP); + break; + } + + dout(10) << "local_image_id=" << m_local_image_id << ", " + << "local_promotion_state=" << m_promotion_state << ", " + << "local_primary_mirror_uuid=" << m_primary_mirror_uuid << dendl; + (*m_state_builder)->local_image_id = m_local_image_id; + (*m_state_builder)->local_promotion_state = m_promotion_state; + finish(0); +} + +template <typename I> +void PrepareLocalImageRequest<I>::move_to_trash() { + dout(10) << dendl; + + Context *ctx = create_context_callback< + PrepareLocalImageRequest<I>, + &PrepareLocalImageRequest<I>::handle_move_to_trash>(this); + ImageDeleter<I>::trash_move(m_io_ctx, m_global_image_id, + false, m_work_queue, ctx); +} + +template <typename I> +void PrepareLocalImageRequest<I>::handle_move_to_trash(int r) { + dout(10) << ": r=" << r << dendl; + + finish(-ENOENT); +} + +template <typename I> +void PrepareLocalImageRequest<I>::finish(int r) { + dout(10) << "r=" << r << dendl; + + m_on_finish->complete(r); + delete this; +} + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::PrepareLocalImageRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.h b/src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.h new file mode 100644 index 000000000..6372169ff --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.h @@ -0,0 +1,115 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_PREPARE_LOCAL_IMAGE_REQUEST_H +#define RBD_MIRROR_IMAGE_REPLAYER_PREPARE_LOCAL_IMAGE_REQUEST_H + +#include "include/buffer.h" +#include "include/rados/librados_fwd.hpp" +#include "cls/rbd/cls_rbd_types.h" +#include "librbd/mirror/Types.h" +#include <string> + +struct Context; + +namespace librbd { +struct ImageCtx; +namespace asio { struct ContextWQ; } +} // namespace librbd + +namespace rbd { +namespace mirror { +namespace image_replayer { + +template <typename> class StateBuilder; + +template <typename ImageCtxT = librbd::ImageCtx> +class PrepareLocalImageRequest { +public: + static PrepareLocalImageRequest *create( + librados::IoCtx &io_ctx, + const std::string &global_image_id, + std::string *local_image_name, + StateBuilder<ImageCtxT>** state_builder, + librbd::asio::ContextWQ *work_queue, + Context *on_finish) { + return new PrepareLocalImageRequest(io_ctx, global_image_id, + local_image_name, state_builder, + work_queue, on_finish); + } + + PrepareLocalImageRequest( + librados::IoCtx &io_ctx, + const std::string &global_image_id, + std::string *local_image_name, + StateBuilder<ImageCtxT>** state_builder, + librbd::asio::ContextWQ *work_queue, + Context *on_finish) + : m_io_ctx(io_ctx), m_global_image_id(global_image_id), + m_local_image_name(local_image_name), m_state_builder(state_builder), + m_work_queue(work_queue), m_on_finish(on_finish) { + } + + void send(); + +private: + /** + * @verbatim + * + * <start> + * | + * v + * GET_LOCAL_IMAGE_ID + * | + * v + * GET_LOCAL_IMAGE_NAME + * | + * v + * GET_MIRROR_INFO + * | + * | (if the image mirror state is CREATING) + * v + * TRASH_MOVE + * | + * v + * <finish> + * + * @endverbatim + */ + + librados::IoCtx &m_io_ctx; + std::string m_global_image_id; + std::string *m_local_image_name; + StateBuilder<ImageCtxT>** m_state_builder; + librbd::asio::ContextWQ *m_work_queue; + Context *m_on_finish; + + bufferlist m_out_bl; + std::string m_local_image_id; + cls::rbd::MirrorImage m_mirror_image; + librbd::mirror::PromotionState m_promotion_state; + std::string m_primary_mirror_uuid; + + void get_local_image_id(); + void handle_get_local_image_id(int r); + + void get_local_image_name(); + void handle_get_local_image_name(int r); + + void get_mirror_info(); + void handle_get_mirror_info(int r); + + void move_to_trash(); + void handle_move_to_trash(int r); + + void finish(int r); + +}; + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::PrepareLocalImageRequest<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_REPLAYER_PREPARE_LOCAL_IMAGE_REQUEST_H diff --git a/src/tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.cc b/src/tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.cc new file mode 100644 index 000000000..45a44a300 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.cc @@ -0,0 +1,283 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.h" +#include "include/rados/librados.hpp" +#include "cls/rbd/cls_rbd_client.h" +#include "common/debug.h" +#include "common/errno.h" +#include "journal/Journaler.h" +#include "journal/Settings.h" +#include "librbd/ImageCtx.h" +#include "librbd/Journal.h" +#include "librbd/Utils.h" +#include "librbd/asio/ContextWQ.h" +#include "librbd/mirror/GetInfoRequest.h" +#include "tools/rbd_mirror/Threads.h" +#include "tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.h" +#include "tools/rbd_mirror/image_replayer/Utils.h" +#include "tools/rbd_mirror/image_replayer/journal/StateBuilder.h" +#include "tools/rbd_mirror/image_replayer/snapshot/StateBuilder.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::" \ + << "PrepareRemoteImageRequest: " << this << " " \ + << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace image_replayer { + +using librbd::util::create_async_context_callback; +using librbd::util::create_context_callback; +using librbd::util::create_rados_callback; + +template <typename I> +void PrepareRemoteImageRequest<I>::send() { + if (*m_state_builder != nullptr) { + (*m_state_builder)->remote_mirror_uuid = m_remote_pool_meta.mirror_uuid; + auto state_builder = dynamic_cast<snapshot::StateBuilder<I>*>(*m_state_builder); + if (state_builder) { + state_builder->remote_mirror_peer_uuid = m_remote_pool_meta.mirror_peer_uuid; + } + } + + get_remote_image_id(); +} + +template <typename I> +void PrepareRemoteImageRequest<I>::get_remote_image_id() { + dout(10) << dendl; + + Context *ctx = create_context_callback< + PrepareRemoteImageRequest<I>, + &PrepareRemoteImageRequest<I>::handle_get_remote_image_id>(this); + auto req = GetMirrorImageIdRequest<I>::create(m_remote_io_ctx, + m_global_image_id, + &m_remote_image_id, ctx); + req->send(); +} + +template <typename I> +void PrepareRemoteImageRequest<I>::handle_get_remote_image_id(int r) { + dout(10) << "r=" << r << ", " + << "remote_image_id=" << m_remote_image_id << dendl; + + if (r < 0) { + finish(r); + return; + } + + get_mirror_info(); +} + +template <typename I> +void PrepareRemoteImageRequest<I>::get_mirror_info() { + dout(10) << dendl; + + auto ctx = create_context_callback< + PrepareRemoteImageRequest<I>, + &PrepareRemoteImageRequest<I>::handle_get_mirror_info>(this); + auto req = librbd::mirror::GetInfoRequest<I>::create( + m_remote_io_ctx, m_threads->work_queue, m_remote_image_id, + &m_mirror_image, &m_promotion_state, &m_primary_mirror_uuid, + ctx); + req->send(); +} + +template <typename I> +void PrepareRemoteImageRequest<I>::handle_get_mirror_info(int r) { + dout(10) << "r=" << r << dendl; + + if (r == -ENOENT) { + dout(10) << "image " << m_global_image_id << " not mirrored" << dendl; + finish(r); + return; + } else if (r < 0) { + derr << "failed to retrieve mirror image details for image " + << m_global_image_id << ": " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + auto state_builder = *m_state_builder; + if (state_builder != nullptr && + state_builder->get_mirror_image_mode() != m_mirror_image.mode) { + derr << "local and remote mirror image using different mirroring modes " + << "for image " << m_global_image_id << ": split-brain" << dendl; + finish(-EEXIST); + return; + } else if (m_mirror_image.state == cls::rbd::MIRROR_IMAGE_STATE_DISABLING) { + dout(5) << "remote image mirroring is being disabled" << dendl; + finish(-ENOENT); + return; + } + + switch (m_mirror_image.mode) { + case cls::rbd::MIRROR_IMAGE_MODE_JOURNAL: + get_client(); + break; + case cls::rbd::MIRROR_IMAGE_MODE_SNAPSHOT: + finalize_snapshot_state_builder(); + finish(0); + break; + default: + derr << "unsupported mirror image mode " << m_mirror_image.mode << " " + << "for image " << m_global_image_id << dendl; + finish(-EOPNOTSUPP); + break; + } +} + +template <typename I> +void PrepareRemoteImageRequest<I>::get_client() { + dout(10) << dendl; + + auto cct = static_cast<CephContext *>(m_local_io_ctx.cct()); + ::journal::Settings journal_settings; + journal_settings.commit_interval = cct->_conf.get_val<double>( + "rbd_mirror_journal_commit_age"); + + // TODO use Journal thread pool for journal ops until converted to ASIO + ContextWQ* context_wq; + librbd::Journal<>::get_work_queue(cct, &context_wq); + + ceph_assert(m_remote_journaler == nullptr); + m_remote_journaler = new Journaler(context_wq, m_threads->timer, + &m_threads->timer_lock, m_remote_io_ctx, + m_remote_image_id, m_local_mirror_uuid, + journal_settings, m_cache_manager_handler); + + Context *ctx = create_async_context_callback( + m_threads->work_queue, create_context_callback< + PrepareRemoteImageRequest<I>, + &PrepareRemoteImageRequest<I>::handle_get_client>(this)); + m_remote_journaler->get_client(m_local_mirror_uuid, &m_client, ctx); +} + +template <typename I> +void PrepareRemoteImageRequest<I>::handle_get_client(int r) { + dout(10) << "r=" << r << dendl; + + MirrorPeerClientMeta client_meta; + if (r == -ENOENT) { + dout(10) << "client not registered" << dendl; + register_client(); + } else if (r < 0) { + derr << "failed to retrieve client: " << cpp_strerror(r) << dendl; + finish(r); + } else if (!util::decode_client_meta(m_client, &client_meta)) { + // require operator intervention since the data is corrupt + finish(-EBADMSG); + } else { + // skip registration if it already exists + finalize_journal_state_builder(m_client.state, client_meta); + finish(0); + } +} + +template <typename I> +void PrepareRemoteImageRequest<I>::register_client() { + dout(10) << dendl; + + auto state_builder = *m_state_builder; + librbd::journal::MirrorPeerClientMeta client_meta{ + (state_builder == nullptr ? "" : state_builder->local_image_id)}; + client_meta.state = librbd::journal::MIRROR_PEER_STATE_REPLAYING; + + librbd::journal::ClientData client_data{client_meta}; + bufferlist client_data_bl; + encode(client_data, client_data_bl); + + Context *ctx = create_async_context_callback( + m_threads->work_queue, create_context_callback< + PrepareRemoteImageRequest<I>, + &PrepareRemoteImageRequest<I>::handle_register_client>(this)); + m_remote_journaler->register_client(client_data_bl, ctx); +} + +template <typename I> +void PrepareRemoteImageRequest<I>::handle_register_client(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to register with remote journal: " << cpp_strerror(r) + << dendl; + finish(r); + return; + } + + auto state_builder = *m_state_builder; + librbd::journal::MirrorPeerClientMeta client_meta{ + (state_builder == nullptr ? "" : state_builder->local_image_id)}; + client_meta.state = librbd::journal::MIRROR_PEER_STATE_REPLAYING; + finalize_journal_state_builder(cls::journal::CLIENT_STATE_CONNECTED, + client_meta); + finish(0); +} + +template <typename I> +void PrepareRemoteImageRequest<I>::finalize_journal_state_builder( + cls::journal::ClientState client_state, + const MirrorPeerClientMeta& client_meta) { + journal::StateBuilder<I>* state_builder = nullptr; + if (*m_state_builder != nullptr) { + // already verified that it's a matching builder in + // 'handle_get_mirror_info' + state_builder = dynamic_cast<journal::StateBuilder<I>*>(*m_state_builder); + ceph_assert(state_builder != nullptr); + } else { + state_builder = journal::StateBuilder<I>::create(m_global_image_id); + *m_state_builder = state_builder; + } + + state_builder->remote_mirror_uuid = m_remote_pool_meta.mirror_uuid; + state_builder->remote_image_id = m_remote_image_id; + state_builder->remote_promotion_state = m_promotion_state; + state_builder->remote_journaler = m_remote_journaler; + state_builder->remote_client_state = client_state; + state_builder->remote_client_meta = client_meta; +} + +template <typename I> +void PrepareRemoteImageRequest<I>::finalize_snapshot_state_builder() { + snapshot::StateBuilder<I>* state_builder = nullptr; + if (*m_state_builder != nullptr) { + state_builder = dynamic_cast<snapshot::StateBuilder<I>*>(*m_state_builder); + ceph_assert(state_builder != nullptr); + } else { + state_builder = snapshot::StateBuilder<I>::create(m_global_image_id); + *m_state_builder = state_builder; + } + + dout(10) << "remote_mirror_uuid=" << m_remote_pool_meta.mirror_uuid << ", " + << "remote_mirror_peer_uuid=" + << m_remote_pool_meta.mirror_peer_uuid << ", " + << "remote_image_id=" << m_remote_image_id << ", " + << "remote_promotion_state=" << m_promotion_state << dendl; + state_builder->remote_mirror_uuid = m_remote_pool_meta.mirror_uuid; + state_builder->remote_mirror_peer_uuid = m_remote_pool_meta.mirror_peer_uuid; + state_builder->remote_image_id = m_remote_image_id; + state_builder->remote_promotion_state = m_promotion_state; +} + +template <typename I> +void PrepareRemoteImageRequest<I>::finish(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + delete m_remote_journaler; + m_remote_journaler = nullptr; + } + + m_on_finish->complete(r); + delete this; +} + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::PrepareRemoteImageRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.h b/src/tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.h new file mode 100644 index 000000000..483cfc001 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.h @@ -0,0 +1,153 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_PREPARE_REMOTE_IMAGE_REQUEST_H +#define RBD_MIRROR_IMAGE_REPLAYER_PREPARE_REMOTE_IMAGE_REQUEST_H + +#include "include/buffer_fwd.h" +#include "include/rados/librados_fwd.hpp" +#include "cls/journal/cls_journal_types.h" +#include "cls/rbd/cls_rbd_types.h" +#include "librbd/journal/Types.h" +#include "librbd/journal/TypeTraits.h" +#include "librbd/mirror/Types.h" +#include "tools/rbd_mirror/Types.h" +#include <string> + +namespace journal { class Journaler; } +namespace journal { struct CacheManagerHandler; } +namespace librbd { struct ImageCtx; } +namespace librbd { namespace journal { struct MirrorPeerClientMeta; } } + +struct Context; + +namespace rbd { +namespace mirror { + +template <typename> struct Threads; + +namespace image_replayer { + +template <typename> class StateBuilder; + +template <typename ImageCtxT = librbd::ImageCtx> +class PrepareRemoteImageRequest { +public: + typedef librbd::journal::TypeTraits<ImageCtxT> TypeTraits; + typedef typename TypeTraits::Journaler Journaler; + typedef librbd::journal::MirrorPeerClientMeta MirrorPeerClientMeta; + + static PrepareRemoteImageRequest *create( + Threads<ImageCtxT> *threads, + librados::IoCtx &local_io_ctx, + librados::IoCtx &remote_io_ctx, + const std::string &global_image_id, + const std::string &local_mirror_uuid, + const RemotePoolMeta& remote_pool_meta, + ::journal::CacheManagerHandler *cache_manager_handler, + StateBuilder<ImageCtxT>** state_builder, + Context *on_finish) { + return new PrepareRemoteImageRequest(threads, local_io_ctx, remote_io_ctx, + global_image_id, local_mirror_uuid, + remote_pool_meta, + cache_manager_handler, state_builder, + on_finish); + } + + PrepareRemoteImageRequest( + Threads<ImageCtxT> *threads, + librados::IoCtx &local_io_ctx, + librados::IoCtx &remote_io_ctx, + const std::string &global_image_id, + const std::string &local_mirror_uuid, + const RemotePoolMeta& remote_pool_meta, + ::journal::CacheManagerHandler *cache_manager_handler, + StateBuilder<ImageCtxT>** state_builder, + Context *on_finish) + : m_threads(threads), + m_local_io_ctx(local_io_ctx), + m_remote_io_ctx(remote_io_ctx), + m_global_image_id(global_image_id), + m_local_mirror_uuid(local_mirror_uuid), + m_remote_pool_meta(remote_pool_meta), + m_cache_manager_handler(cache_manager_handler), + m_state_builder(state_builder), + m_on_finish(on_finish) { + } + + void send(); + +private: + /** + * @verbatim + * + * <start> + * | + * v + * GET_REMOTE_IMAGE_ID + * | + * v + * GET_REMOTE_MIRROR_INFO + * | + * | (journal) + * \-----------> GET_CLIENT + * | | + * | v (skip if not needed) + * | REGISTER_CLIENT + * | | + * | | + * |/----------------/ + * | + * v + * <finish> + * + * @endverbatim + */ + + Threads<ImageCtxT> *m_threads; + librados::IoCtx &m_local_io_ctx; + librados::IoCtx &m_remote_io_ctx; + std::string m_global_image_id; + std::string m_local_mirror_uuid; + RemotePoolMeta m_remote_pool_meta; + ::journal::CacheManagerHandler *m_cache_manager_handler; + StateBuilder<ImageCtxT>** m_state_builder; + Context *m_on_finish; + + bufferlist m_out_bl; + std::string m_remote_image_id; + cls::rbd::MirrorImage m_mirror_image; + librbd::mirror::PromotionState m_promotion_state = + librbd::mirror::PROMOTION_STATE_UNKNOWN; + std::string m_primary_mirror_uuid; + + // journal-based mirroring + Journaler *m_remote_journaler = nullptr; + cls::journal::Client m_client; + + void get_remote_image_id(); + void handle_get_remote_image_id(int r); + + void get_mirror_info(); + void handle_get_mirror_info(int r); + + void get_client(); + void handle_get_client(int r); + + void register_client(); + void handle_register_client(int r); + + void finalize_journal_state_builder(cls::journal::ClientState client_state, + const MirrorPeerClientMeta& client_meta); + void finalize_snapshot_state_builder(); + + void finish(int r); +}; + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::PrepareRemoteImageRequest<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_REPLAYER_PREPARE_REMOTE_IMAGE_REQUEST_H diff --git a/src/tools/rbd_mirror/image_replayer/Replayer.h b/src/tools/rbd_mirror/image_replayer/Replayer.h new file mode 100644 index 000000000..f3bfa4da0 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/Replayer.h @@ -0,0 +1,39 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_REPLAYER_H +#define RBD_MIRROR_IMAGE_REPLAYER_REPLAYER_H + +#include <string> + +struct Context; + +namespace rbd { +namespace mirror { +namespace image_replayer { + +struct Replayer { + virtual ~Replayer() {} + + virtual void destroy() = 0; + + virtual void init(Context* on_finish) = 0; + virtual void shut_down(Context* on_finish) = 0; + + virtual void flush(Context* on_finish) = 0; + + virtual bool get_replay_status(std::string* description, + Context* on_finish) = 0; + + virtual bool is_replaying() const = 0; + virtual bool is_resync_requested() const = 0; + + virtual int get_error_code() const = 0; + virtual std::string get_error_description() const = 0; +}; + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +#endif // RBD_MIRROR_IMAGE_REPLAYER_REPLAYER_H diff --git a/src/tools/rbd_mirror/image_replayer/ReplayerListener.h b/src/tools/rbd_mirror/image_replayer/ReplayerListener.h new file mode 100644 index 000000000..f17f401b1 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/ReplayerListener.h @@ -0,0 +1,21 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_REPLAYER_LISTENER_H +#define RBD_MIRROR_IMAGE_REPLAYER_REPLAYER_LISTENER_H + +namespace rbd { +namespace mirror { +namespace image_replayer { + +struct ReplayerListener { + virtual ~ReplayerListener() {} + + virtual void handle_notification() = 0; +}; + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +#endif // RBD_MIRROR_IMAGE_REPLAYER_REPLAYER_LISTENER_H diff --git a/src/tools/rbd_mirror/image_replayer/StateBuilder.cc b/src/tools/rbd_mirror/image_replayer/StateBuilder.cc new file mode 100644 index 000000000..55fb3509d --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/StateBuilder.cc @@ -0,0 +1,138 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "StateBuilder.h" +#include "include/ceph_assert.h" +#include "include/Context.h" +#include "common/debug.h" +#include "common/errno.h" +#include "journal/Journaler.h" +#include "librbd/ImageCtx.h" +#include "tools/rbd_mirror/image_replayer/CloseImageRequest.h" +#include "tools/rbd_mirror/image_sync/Types.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::" \ + << "StateBuilder: " << this << " " \ + << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace image_replayer { + +template <typename I> +StateBuilder<I>::StateBuilder(const std::string& global_image_id) + : global_image_id(global_image_id) { + dout(10) << "global_image_id=" << global_image_id << dendl; +} + +template <typename I> +StateBuilder<I>::~StateBuilder() { + ceph_assert(local_image_ctx == nullptr); + ceph_assert(remote_image_ctx == nullptr); + ceph_assert(m_sync_point_handler == nullptr); +} + +template <typename I> +bool StateBuilder<I>::is_local_primary() const { + if (local_promotion_state == librbd::mirror::PROMOTION_STATE_PRIMARY) { + ceph_assert(!local_image_id.empty()); + return true; + } + return false; +} + +template <typename I> +bool StateBuilder<I>::is_remote_primary() const { + if (remote_promotion_state == librbd::mirror::PROMOTION_STATE_PRIMARY) { + ceph_assert(!remote_image_id.empty()); + return true; + } + return false; +} + +template <typename I> +bool StateBuilder<I>::is_linked() const { + if (local_promotion_state == librbd::mirror::PROMOTION_STATE_NON_PRIMARY) { + ceph_assert(!local_image_id.empty()); + return is_linked_impl(); + } + return false; +} + +template <typename I> +void StateBuilder<I>::close_local_image(Context* on_finish) { + if (local_image_ctx == nullptr) { + on_finish->complete(0); + return; + } + + dout(10) << dendl; + auto ctx = new LambdaContext([this, on_finish](int r) { + handle_close_local_image(r, on_finish); + }); + auto request = image_replayer::CloseImageRequest<I>::create( + &local_image_ctx, ctx); + request->send(); +} + +template <typename I> +void StateBuilder<I>::handle_close_local_image(int r, Context* on_finish) { + dout(10) << "r=" << r << dendl; + + ceph_assert(local_image_ctx == nullptr); + if (r < 0) { + derr << "failed to close local image for image " << global_image_id << ": " + << cpp_strerror(r) << dendl; + } + + on_finish->complete(r); +} + +template <typename I> +void StateBuilder<I>::close_remote_image(Context* on_finish) { + if (remote_image_ctx == nullptr) { + on_finish->complete(0); + return; + } + + dout(10) << dendl; + auto ctx = new LambdaContext([this, on_finish](int r) { + handle_close_remote_image(r, on_finish); + }); + auto request = image_replayer::CloseImageRequest<I>::create( + &remote_image_ctx, ctx); + request->send(); +} + +template <typename I> +void StateBuilder<I>::handle_close_remote_image(int r, Context* on_finish) { + dout(10) << "r=" << r << dendl; + + ceph_assert(remote_image_ctx == nullptr); + if (r < 0) { + derr << "failed to close remote image for image " << global_image_id << ": " + << cpp_strerror(r) << dendl; + } + + on_finish->complete(r); +} + +template <typename I> +void StateBuilder<I>::destroy_sync_point_handler() { + if (m_sync_point_handler == nullptr) { + return; + } + + dout(15) << dendl; + m_sync_point_handler->destroy(); + m_sync_point_handler = nullptr; +} + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::StateBuilder<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/StateBuilder.h b/src/tools/rbd_mirror/image_replayer/StateBuilder.h new file mode 100644 index 000000000..51cf8668c --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/StateBuilder.h @@ -0,0 +1,114 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_REPLAYER_STATE_BUILDER_H +#define CEPH_RBD_MIRROR_IMAGE_REPLAYER_STATE_BUILDER_H + +#include "include/rados/librados_fwd.hpp" +#include "cls/rbd/cls_rbd_types.h" +#include "librbd/mirror/Types.h" + +struct Context; +namespace librbd { struct ImageCtx; } + +namespace rbd { +namespace mirror { + +struct BaseRequest; +template <typename> class InstanceWatcher; +struct PoolMetaCache; +struct ProgressContext; +template <typename> class Threads; + +namespace image_sync { struct SyncPointHandler; } + +namespace image_replayer { + +struct Replayer; +struct ReplayerListener; + +template <typename ImageCtxT> +class StateBuilder { +public: + StateBuilder(const StateBuilder&) = delete; + StateBuilder& operator=(const StateBuilder&) = delete; + + virtual ~StateBuilder(); + + virtual void destroy() { + delete this; + } + + virtual void close(Context* on_finish) = 0; + + virtual bool is_disconnected() const = 0; + + bool is_local_primary() const; + bool is_remote_primary() const; + bool is_linked() const; + + virtual cls::rbd::MirrorImageMode get_mirror_image_mode() const = 0; + + virtual image_sync::SyncPointHandler* create_sync_point_handler() = 0; + void destroy_sync_point_handler(); + + virtual bool replay_requires_remote_image() const = 0; + + void close_remote_image(Context* on_finish); + + virtual BaseRequest* create_local_image_request( + Threads<ImageCtxT>* threads, + librados::IoCtx& local_io_ctx, + const std::string& global_image_id, + PoolMetaCache* pool_meta_cache, + ProgressContext* progress_ctx, + Context* on_finish) = 0; + + virtual BaseRequest* create_prepare_replay_request( + const std::string& local_mirror_uuid, + ProgressContext* progress_ctx, + bool* resync_requested, + bool* syncing, + Context* on_finish) = 0; + + virtual Replayer* create_replayer( + Threads<ImageCtxT>* threads, + InstanceWatcher<ImageCtxT>* instance_watcher, + const std::string& local_mirror_uuid, + PoolMetaCache* pool_meta_cache, + ReplayerListener* replayer_listener) = 0; + + std::string global_image_id; + + std::string local_image_id; + librbd::mirror::PromotionState local_promotion_state = + librbd::mirror::PROMOTION_STATE_UNKNOWN; + ImageCtxT* local_image_ctx = nullptr; + + std::string remote_mirror_uuid; + std::string remote_image_id; + librbd::mirror::PromotionState remote_promotion_state = + librbd::mirror::PROMOTION_STATE_UNKNOWN; + ImageCtxT* remote_image_ctx = nullptr; + +protected: + image_sync::SyncPointHandler* m_sync_point_handler = nullptr; + + StateBuilder(const std::string& global_image_id); + + void close_local_image(Context* on_finish); + +private: + virtual bool is_linked_impl() const = 0; + + void handle_close_local_image(int r, Context* on_finish); + void handle_close_remote_image(int r, Context* on_finish); +}; + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::StateBuilder<librbd::ImageCtx>; + +#endif // CEPH_RBD_MIRROR_IMAGE_REPLAYER_STATE_BUILDER_H diff --git a/src/tools/rbd_mirror/image_replayer/TimeRollingMean.cc b/src/tools/rbd_mirror/image_replayer/TimeRollingMean.cc new file mode 100644 index 000000000..5d9c9aca1 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/TimeRollingMean.cc @@ -0,0 +1,34 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/image_replayer/TimeRollingMean.h" +#include "common/Clock.h" + +namespace rbd { +namespace mirror { +namespace image_replayer { + +void TimeRollingMean::operator()(uint32_t value) { + auto time = ceph_clock_now(); + if (m_last_time.is_zero()) { + m_last_time = time; + } else if (m_last_time.sec() < time.sec()) { + auto sec = m_last_time.sec(); + while (sec++ < time.sec()) { + m_rolling_mean(m_sum); + m_sum = 0; + } + + m_last_time = time; + } + + m_sum += value; +} + +double TimeRollingMean::get_average() const { + return boost::accumulators::rolling_mean(m_rolling_mean); +} + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd diff --git a/src/tools/rbd_mirror/image_replayer/TimeRollingMean.h b/src/tools/rbd_mirror/image_replayer/TimeRollingMean.h new file mode 100644 index 000000000..139ef893f --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/TimeRollingMean.h @@ -0,0 +1,40 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_TIME_ROLLING_MEAN_H +#define RBD_MIRROR_IMAGE_REPLAYER_TIME_ROLLING_MEAN_H + +#include "include/utime.h" +#include <boost/accumulators/accumulators.hpp> +#include <boost/accumulators/statistics/stats.hpp> +#include <boost/accumulators/statistics/rolling_mean.hpp> + +namespace rbd { +namespace mirror { +namespace image_replayer { + +class TimeRollingMean { +public: + + void operator()(uint32_t value); + + double get_average() const; + +private: + typedef boost::accumulators::accumulator_set< + uint64_t, boost::accumulators::stats< + boost::accumulators::tag::rolling_mean>> RollingMean; + + utime_t m_last_time; + uint64_t m_sum = 0; + + RollingMean m_rolling_mean{ + boost::accumulators::tag::rolling_window::window_size = 30}; + +}; + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +#endif // RBD_MIRROR_IMAGE_REPLAYER_TIME_ROLLING_MEAN_H diff --git a/src/tools/rbd_mirror/image_replayer/Types.h b/src/tools/rbd_mirror/image_replayer/Types.h new file mode 100644 index 000000000..6ab988a76 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/Types.h @@ -0,0 +1,21 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_REPLAYER_TYPES_H +#define CEPH_RBD_MIRROR_IMAGE_REPLAYER_TYPES_H + +namespace rbd { +namespace mirror { +namespace image_replayer { + +enum HealthState { + HEALTH_STATE_OK, + HEALTH_STATE_WARNING, + HEALTH_STATE_ERROR +}; + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_IMAGE_REPLAYER_TYPES_H diff --git a/src/tools/rbd_mirror/image_replayer/Utils.cc b/src/tools/rbd_mirror/image_replayer/Utils.cc new file mode 100644 index 000000000..55162a4e4 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/Utils.cc @@ -0,0 +1,61 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/image_replayer/Utils.h" +#include "include/rados/librados.hpp" +#include "common/debug.h" +#include "common/errno.h" +#include "cls/journal/cls_journal_types.h" +#include "librbd/journal/Types.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::util::" \ + << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace image_replayer { +namespace util { + +std::string compute_image_spec(librados::IoCtx& io_ctx, + const std::string& image_name) { + std::string name = io_ctx.get_namespace(); + if (!name.empty()) { + name += "/"; + } + + return io_ctx.get_pool_name() + "/" + name + image_name; +} + +bool decode_client_meta(const cls::journal::Client& client, + librbd::journal::MirrorPeerClientMeta* client_meta) { + dout(15) << dendl; + + librbd::journal::ClientData client_data; + auto it = client.data.cbegin(); + try { + decode(client_data, it); + } catch (const buffer::error &err) { + derr << "failed to decode client meta data: " << err.what() << dendl; + return false; + } + + auto local_client_meta = boost::get<librbd::journal::MirrorPeerClientMeta>( + &client_data.client_meta); + if (local_client_meta == nullptr) { + derr << "unknown peer registration" << dendl; + return false; + } + + *client_meta = *local_client_meta; + dout(15) << "client found: client_meta=" << *client_meta << dendl; + return true; +} + +} // namespace util +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + diff --git a/src/tools/rbd_mirror/image_replayer/Utils.h b/src/tools/rbd_mirror/image_replayer/Utils.h new file mode 100644 index 000000000..6c5352cd1 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/Utils.h @@ -0,0 +1,29 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_UTILS_H +#define RBD_MIRROR_IMAGE_REPLAYER_UTILS_H + +#include "include/rados/librados_fwd.hpp" +#include <string> + +namespace cls { namespace journal { struct Client; } } +namespace librbd { namespace journal { struct MirrorPeerClientMeta; } } + +namespace rbd { +namespace mirror { +namespace image_replayer { +namespace util { + +std::string compute_image_spec(librados::IoCtx& io_ctx, + const std::string& image_name); + +bool decode_client_meta(const cls::journal::Client& client, + librbd::journal::MirrorPeerClientMeta* client_meta); + +} // namespace util +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +#endif // RBD_MIRROR_IMAGE_REPLAYER_UTILS_H diff --git a/src/tools/rbd_mirror/image_replayer/journal/CreateLocalImageRequest.cc b/src/tools/rbd_mirror/image_replayer/journal/CreateLocalImageRequest.cc new file mode 100644 index 000000000..087cf4f5f --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/journal/CreateLocalImageRequest.cc @@ -0,0 +1,162 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "CreateLocalImageRequest.h" +#include "include/rados/librados.hpp" +#include "common/debug.h" +#include "common/dout.h" +#include "common/errno.h" +#include "journal/Journaler.h" +#include "librbd/ImageCtx.h" +#include "librbd/Utils.h" +#include "librbd/journal/Types.h" +#include "tools/rbd_mirror/PoolMetaCache.h" +#include "tools/rbd_mirror/ProgressContext.h" +#include "tools/rbd_mirror/Threads.h" +#include "tools/rbd_mirror/image_replayer/CreateImageRequest.h" +#include "tools/rbd_mirror/image_replayer/Utils.h" +#include "tools/rbd_mirror/image_replayer/journal/StateBuilder.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::journal::" \ + << "CreateLocalImageRequest: " << this << " " \ + << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace image_replayer { +namespace journal { + +using librbd::util::create_async_context_callback; +using librbd::util::create_context_callback; + +template <typename I> +void CreateLocalImageRequest<I>::send() { + unregister_client(); +} + +template <typename I> +void CreateLocalImageRequest<I>::unregister_client() { + dout(10) << dendl; + update_progress("UNREGISTER_CLIENT"); + + auto ctx = create_context_callback< + CreateLocalImageRequest<I>, + &CreateLocalImageRequest<I>::handle_unregister_client>(this); + m_state_builder->remote_journaler->unregister_client(ctx); +} + +template <typename I> +void CreateLocalImageRequest<I>::handle_unregister_client(int r) { + dout(10) << "r=" << r << dendl; + if (r < 0 && r != -ENOENT) { + derr << "failed to unregister with remote journal: " << cpp_strerror(r) + << dendl; + finish(r); + return; + } + + m_state_builder->local_image_id = ""; + m_state_builder->remote_client_meta = {}; + register_client(); +} + +template <typename I> +void CreateLocalImageRequest<I>::register_client() { + ceph_assert(m_state_builder->local_image_id.empty()); + m_state_builder->local_image_id = + librbd::util::generate_image_id<I>(m_local_io_ctx); + dout(10) << "local_image_id=" << m_state_builder->local_image_id << dendl; + update_progress("REGISTER_CLIENT"); + + librbd::journal::MirrorPeerClientMeta client_meta{ + m_state_builder->local_image_id}; + client_meta.state = librbd::journal::MIRROR_PEER_STATE_SYNCING; + + librbd::journal::ClientData client_data{client_meta}; + bufferlist client_data_bl; + encode(client_data, client_data_bl); + + auto ctx = create_context_callback< + CreateLocalImageRequest<I>, + &CreateLocalImageRequest<I>::handle_register_client>(this); + m_state_builder->remote_journaler->register_client(client_data_bl, ctx); +} + +template <typename I> +void CreateLocalImageRequest<I>::handle_register_client(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to register with remote journal: " << cpp_strerror(r) + << dendl; + finish(r); + return; + } + + m_state_builder->remote_client_state = cls::journal::CLIENT_STATE_CONNECTED; + m_state_builder->remote_client_meta = {m_state_builder->local_image_id}; + m_state_builder->remote_client_meta.state = + librbd::journal::MIRROR_PEER_STATE_SYNCING; + + create_local_image(); +} + +template <typename I> +void CreateLocalImageRequest<I>::create_local_image() { + dout(10) << "local_image_id=" << m_state_builder->local_image_id << dendl; + update_progress("CREATE_LOCAL_IMAGE"); + + m_remote_image_ctx->image_lock.lock_shared(); + std::string image_name = m_remote_image_ctx->name; + m_remote_image_ctx->image_lock.unlock_shared(); + + auto ctx = create_context_callback< + CreateLocalImageRequest<I>, + &CreateLocalImageRequest<I>::handle_create_local_image>(this); + auto request = CreateImageRequest<I>::create( + m_threads, m_local_io_ctx, m_global_image_id, + m_state_builder->remote_mirror_uuid, image_name, + m_state_builder->local_image_id, m_remote_image_ctx, + m_pool_meta_cache, cls::rbd::MIRROR_IMAGE_MODE_JOURNAL, ctx); + request->send(); +} +template <typename I> +void CreateLocalImageRequest<I>::handle_create_local_image(int r) { + dout(10) << "r=" << r << dendl; + + if (r == -EBADF) { + dout(5) << "image id " << m_state_builder->local_image_id << " " + << "already in-use" << dendl; + unregister_client(); + return; + } else if (r < 0) { + if (r == -ENOENT) { + dout(10) << "parent image does not exist" << dendl; + } else { + derr << "failed to create local image: " << cpp_strerror(r) << dendl; + } + finish(r); + return; + } + + finish(0); +} + +template <typename I> +void CreateLocalImageRequest<I>::update_progress( + const std::string& description) { + dout(15) << description << dendl; + if (m_progress_ctx != nullptr) { + m_progress_ctx->update_progress(description); + } +} + +} // namespace journal +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::journal::CreateLocalImageRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/journal/CreateLocalImageRequest.h b/src/tools/rbd_mirror/image_replayer/journal/CreateLocalImageRequest.h new file mode 100644 index 000000000..fc776ecc3 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/journal/CreateLocalImageRequest.h @@ -0,0 +1,116 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_CREATE_LOCAL_IMAGE_REQUEST_H +#define RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_CREATE_LOCAL_IMAGE_REQUEST_H + +#include "include/rados/librados_fwd.hpp" +#include "tools/rbd_mirror/BaseRequest.h" +#include <string> + +struct Context; +namespace journal { class Journaler; } +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { + +class PoolMetaCache; +class ProgressContext; +template <typename> struct Threads; + +namespace image_replayer { +namespace journal { + +template <typename> class StateBuilder; + +template <typename ImageCtxT> +class CreateLocalImageRequest : public BaseRequest { +public: + typedef rbd::mirror::ProgressContext ProgressContext; + + static CreateLocalImageRequest* create( + Threads<ImageCtxT>* threads, + librados::IoCtx& local_io_ctx, + ImageCtxT* remote_image_ctx, + const std::string& global_image_id, + PoolMetaCache* pool_meta_cache, + ProgressContext* progress_ctx, + StateBuilder<ImageCtxT>* state_builder, + Context* on_finish) { + return new CreateLocalImageRequest(threads, local_io_ctx, remote_image_ctx, + global_image_id, pool_meta_cache, + progress_ctx, state_builder, on_finish); + } + + CreateLocalImageRequest( + Threads<ImageCtxT>* threads, + librados::IoCtx& local_io_ctx, + ImageCtxT* remote_image_ctx, + const std::string& global_image_id, + PoolMetaCache* pool_meta_cache, + ProgressContext* progress_ctx, + StateBuilder<ImageCtxT>* state_builder, + Context* on_finish) + : BaseRequest(on_finish), + m_threads(threads), + m_local_io_ctx(local_io_ctx), + m_remote_image_ctx(remote_image_ctx), + m_global_image_id(global_image_id), + m_pool_meta_cache(pool_meta_cache), + m_progress_ctx(progress_ctx), + m_state_builder(state_builder) { + } + + void send(); + +private: + /** + * @verbatim + * + * <start> + * | + * v + * UNREGISTER_CLIENT < * * * * * * * * + * | * + * v * + * REGISTER_CLIENT * + * | * + * v (id exists) * + * CREATE_LOCAL_IMAGE * * * * * * * * * + * | + * v + * <finish> + * + * @endverbatim + */ + + Threads<ImageCtxT>* m_threads; + librados::IoCtx& m_local_io_ctx; + ImageCtxT* m_remote_image_ctx; + std::string m_global_image_id; + PoolMetaCache* m_pool_meta_cache; + ProgressContext* m_progress_ctx; + StateBuilder<ImageCtxT>* m_state_builder; + + void unregister_client(); + void handle_unregister_client(int r); + + void register_client(); + void handle_register_client(int r); + + void create_local_image(); + void handle_create_local_image(int r); + + void update_progress(const std::string& description); + +}; + +} // namespace journal +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::journal::CreateLocalImageRequest<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_CREATE_LOCAL_IMAGE_REQUEST_H diff --git a/src/tools/rbd_mirror/image_replayer/journal/EventPreprocessor.cc b/src/tools/rbd_mirror/image_replayer/journal/EventPreprocessor.cc new file mode 100644 index 000000000..f5d49048e --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/journal/EventPreprocessor.cc @@ -0,0 +1,206 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "EventPreprocessor.h" +#include "common/debug.h" +#include "common/dout.h" +#include "common/errno.h" +#include "journal/Journaler.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/Utils.h" +#include "librbd/asio/ContextWQ.h" +#include "librbd/journal/Types.h" +#include <boost/variant.hpp> + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror + +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::journal::" \ + << "EventPreprocessor: " << this << " " << __func__ \ + << ": " + +namespace rbd { +namespace mirror { +namespace image_replayer { +namespace journal { + +using librbd::util::create_context_callback; + +template <typename I> +EventPreprocessor<I>::EventPreprocessor(I &local_image_ctx, + Journaler &remote_journaler, + const std::string &local_mirror_uuid, + MirrorPeerClientMeta *client_meta, + librbd::asio::ContextWQ *work_queue) + : m_local_image_ctx(local_image_ctx), m_remote_journaler(remote_journaler), + m_local_mirror_uuid(local_mirror_uuid), m_client_meta(client_meta), + m_work_queue(work_queue) { +} + +template <typename I> +EventPreprocessor<I>::~EventPreprocessor() { + ceph_assert(!m_in_progress); +} + +template <typename I> +bool EventPreprocessor<I>::is_required(const EventEntry &event_entry) { + SnapSeqs snap_seqs(m_client_meta->snap_seqs); + return (prune_snap_map(&snap_seqs) || + event_entry.get_event_type() == + librbd::journal::EVENT_TYPE_SNAP_RENAME); +} + +template <typename I> +void EventPreprocessor<I>::preprocess(EventEntry *event_entry, + Context *on_finish) { + ceph_assert(!m_in_progress); + m_in_progress = true; + m_event_entry = event_entry; + m_on_finish = on_finish; + + refresh_image(); +} + +template <typename I> +void EventPreprocessor<I>::refresh_image() { + dout(20) << dendl; + + Context *ctx = create_context_callback< + EventPreprocessor<I>, &EventPreprocessor<I>::handle_refresh_image>(this); + m_local_image_ctx.state->refresh(ctx); +} + +template <typename I> +void EventPreprocessor<I>::handle_refresh_image(int r) { + dout(20) << "r=" << r << dendl; + + if (r < 0) { + derr << "error encountered during image refresh: " << cpp_strerror(r) + << dendl; + finish(r); + return; + } + + preprocess_event(); +} + +template <typename I> +void EventPreprocessor<I>::preprocess_event() { + dout(20) << dendl; + + m_snap_seqs = m_client_meta->snap_seqs; + m_snap_seqs_updated = prune_snap_map(&m_snap_seqs); + + int r = boost::apply_visitor(PreprocessEventVisitor(this), + m_event_entry->event); + if (r < 0) { + finish(r); + return; + } + + update_client(); +} + +template <typename I> +int EventPreprocessor<I>::preprocess_snap_rename( + librbd::journal::SnapRenameEvent &event) { + dout(20) << "remote_snap_id=" << event.snap_id << ", " + << "src_snap_name=" << event.src_snap_name << ", " + << "dest_snap_name=" << event.dst_snap_name << dendl; + + auto snap_seq_it = m_snap_seqs.find(event.snap_id); + if (snap_seq_it != m_snap_seqs.end()) { + dout(20) << "remapping remote snap id " << snap_seq_it->first << " " + << "to local snap id " << snap_seq_it->second << dendl; + event.snap_id = snap_seq_it->second; + return 0; + } + + auto snap_id_it = m_local_image_ctx.snap_ids.find({cls::rbd::UserSnapshotNamespace(), + event.src_snap_name}); + if (snap_id_it == m_local_image_ctx.snap_ids.end()) { + dout(20) << "cannot map remote snapshot '" << event.src_snap_name << "' " + << "to local snapshot" << dendl; + event.snap_id = CEPH_NOSNAP; + return -ENOENT; + } + + dout(20) << "mapping remote snap id " << event.snap_id << " " + << "to local snap id " << snap_id_it->second << dendl; + m_snap_seqs_updated = true; + m_snap_seqs[event.snap_id] = snap_id_it->second; + event.snap_id = snap_id_it->second; + return 0; +} + +template <typename I> +void EventPreprocessor<I>::update_client() { + if (!m_snap_seqs_updated) { + finish(0); + return; + } + + dout(20) << dendl; + librbd::journal::MirrorPeerClientMeta client_meta(*m_client_meta); + client_meta.snap_seqs = m_snap_seqs; + + librbd::journal::ClientData client_data(client_meta); + bufferlist data_bl; + encode(client_data, data_bl); + + Context *ctx = create_context_callback< + EventPreprocessor<I>, &EventPreprocessor<I>::handle_update_client>( + this); + m_remote_journaler.update_client(data_bl, ctx); +} + +template <typename I> +void EventPreprocessor<I>::handle_update_client(int r) { + dout(20) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to update mirror peer journal client: " + << cpp_strerror(r) << dendl; + finish(r); + return; + } + + m_client_meta->snap_seqs = m_snap_seqs; + finish(0); +} + +template <typename I> +bool EventPreprocessor<I>::prune_snap_map(SnapSeqs *snap_seqs) { + bool pruned = false; + + std::shared_lock image_locker{m_local_image_ctx.image_lock}; + for (auto it = snap_seqs->begin(); it != snap_seqs->end(); ) { + auto current_it(it++); + if (m_local_image_ctx.snap_info.count(current_it->second) == 0) { + snap_seqs->erase(current_it); + pruned = true; + } + } + return pruned; +} + +template <typename I> +void EventPreprocessor<I>::finish(int r) { + dout(20) << "r=" << r << dendl; + + Context *on_finish = m_on_finish; + m_on_finish = nullptr; + m_event_entry = nullptr; + m_in_progress = false; + m_snap_seqs_updated = false; + m_work_queue->queue(on_finish, r); +} + +} // namespace journal +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::journal::EventPreprocessor<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/journal/EventPreprocessor.h b/src/tools/rbd_mirror/image_replayer/journal/EventPreprocessor.h new file mode 100644 index 000000000..12f70eb93 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/journal/EventPreprocessor.h @@ -0,0 +1,127 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_EVENT_PREPROCESSOR_H +#define RBD_MIRROR_IMAGE_REPLAYER_EVENT_PREPROCESSOR_H + +#include "include/int_types.h" +#include "librbd/journal/Types.h" +#include "librbd/journal/TypeTraits.h" +#include <map> +#include <string> +#include <boost/variant/static_visitor.hpp> + +struct Context; +namespace journal { class Journaler; } +namespace librbd { +class ImageCtx; +namespace asio { struct ContextWQ; } +} // namespace librbd + +namespace rbd { +namespace mirror { +namespace image_replayer { +namespace journal { + +template <typename ImageCtxT = librbd::ImageCtx> +class EventPreprocessor { +public: + using Journaler = typename librbd::journal::TypeTraits<ImageCtxT>::Journaler; + using EventEntry = librbd::journal::EventEntry; + using MirrorPeerClientMeta = librbd::journal::MirrorPeerClientMeta; + + static EventPreprocessor *create(ImageCtxT &local_image_ctx, + Journaler &remote_journaler, + const std::string &local_mirror_uuid, + MirrorPeerClientMeta *client_meta, + librbd::asio::ContextWQ *work_queue) { + return new EventPreprocessor(local_image_ctx, remote_journaler, + local_mirror_uuid, client_meta, work_queue); + } + + static void destroy(EventPreprocessor* processor) { + delete processor; + } + + EventPreprocessor(ImageCtxT &local_image_ctx, Journaler &remote_journaler, + const std::string &local_mirror_uuid, + MirrorPeerClientMeta *client_meta, + librbd::asio::ContextWQ *work_queue); + ~EventPreprocessor(); + + bool is_required(const EventEntry &event_entry); + void preprocess(EventEntry *event_entry, Context *on_finish); + +private: + /** + * @verbatim + * + * <start> + * | + * v (skip if not required) + * REFRESH_IMAGE + * | + * v (skip if not required) + * PREPROCESS_EVENT + * | + * v (skip if not required) + * UPDATE_CLIENT + * + * @endverbatim + */ + + typedef std::map<uint64_t, uint64_t> SnapSeqs; + + class PreprocessEventVisitor : public boost::static_visitor<int> { + public: + EventPreprocessor *event_preprocessor; + + PreprocessEventVisitor(EventPreprocessor *event_preprocessor) + : event_preprocessor(event_preprocessor) { + } + + template <typename T> + inline int operator()(T&) const { + return 0; + } + inline int operator()(librbd::journal::SnapRenameEvent &event) const { + return event_preprocessor->preprocess_snap_rename(event); + } + }; + + ImageCtxT &m_local_image_ctx; + Journaler &m_remote_journaler; + std::string m_local_mirror_uuid; + MirrorPeerClientMeta *m_client_meta; + librbd::asio::ContextWQ *m_work_queue; + + bool m_in_progress = false; + EventEntry *m_event_entry = nullptr; + Context *m_on_finish = nullptr; + + SnapSeqs m_snap_seqs; + bool m_snap_seqs_updated = false; + + bool prune_snap_map(SnapSeqs *snap_seqs); + + void refresh_image(); + void handle_refresh_image(int r); + + void preprocess_event(); + int preprocess_snap_rename(librbd::journal::SnapRenameEvent &event); + + void update_client(); + void handle_update_client(int r); + + void finish(int r); + +}; + +} // namespace journal +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::journal::EventPreprocessor<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_REPLAYER_EVENT_PREPROCESSOR_H diff --git a/src/tools/rbd_mirror/image_replayer/journal/PrepareReplayRequest.cc b/src/tools/rbd_mirror/image_replayer/journal/PrepareReplayRequest.cc new file mode 100644 index 000000000..c8a96a4ad --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/journal/PrepareReplayRequest.cc @@ -0,0 +1,316 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "PrepareReplayRequest.h" +#include "common/debug.h" +#include "common/dout.h" +#include "common/errno.h" +#include "journal/Journaler.h" +#include "librbd/ImageCtx.h" +#include "librbd/Journal.h" +#include "librbd/Utils.h" +#include "tools/rbd_mirror/ProgressContext.h" +#include "tools/rbd_mirror/image_replayer/journal/StateBuilder.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::journal::" \ + << "PrepareReplayRequest: " << this << " " \ + << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace image_replayer { +namespace journal { + +using librbd::util::create_context_callback; + +template <typename I> +void PrepareReplayRequest<I>::send() { + *m_resync_requested = false; + *m_syncing = false; + + if (m_state_builder->local_image_id != + m_state_builder->remote_client_meta.image_id) { + // somehow our local image has a different image id than the image id + // registered in the remote image + derr << "split-brain detected: local_image_id=" + << m_state_builder->local_image_id << ", " + << "registered local_image_id=" + << m_state_builder->remote_client_meta.image_id << dendl; + finish(-EEXIST); + return; + } + + std::shared_lock image_locker(m_state_builder->local_image_ctx->image_lock); + if (m_state_builder->local_image_ctx->journal == nullptr) { + image_locker.unlock(); + + derr << "local image does not support journaling" << dendl; + finish(-EINVAL); + return; + } + + int r = m_state_builder->local_image_ctx->journal->is_resync_requested( + m_resync_requested); + if (r < 0) { + image_locker.unlock(); + + derr << "failed to check if a resync was requested" << dendl; + finish(r); + return; + } + + m_local_tag_tid = m_state_builder->local_image_ctx->journal->get_tag_tid(); + m_local_tag_data = m_state_builder->local_image_ctx->journal->get_tag_data(); + dout(10) << "local tag=" << m_local_tag_tid << ", " + << "local tag data=" << m_local_tag_data << dendl; + image_locker.unlock(); + + if (*m_resync_requested) { + finish(0); + return; + } else if (m_state_builder->remote_client_meta.state == + librbd::journal::MIRROR_PEER_STATE_SYNCING && + m_local_tag_data.mirror_uuid == + m_state_builder->remote_mirror_uuid) { + // if the initial sync hasn't completed, we cannot replay + *m_syncing = true; + finish(0); + return; + } + + update_client_state(); +} + +template <typename I> +void PrepareReplayRequest<I>::update_client_state() { + if (m_state_builder->remote_client_meta.state != + librbd::journal::MIRROR_PEER_STATE_SYNCING || + m_local_tag_data.mirror_uuid == m_state_builder->remote_mirror_uuid) { + get_remote_tag_class(); + return; + } + + // our local image is not primary, is flagged as syncing on the remote side, + // but is no longer tied to the remote -- this implies we were forced + // promoted and then demoted at some point + dout(15) << dendl; + update_progress("UPDATE_CLIENT_STATE"); + + auto client_meta = m_state_builder->remote_client_meta; + client_meta.state = librbd::journal::MIRROR_PEER_STATE_REPLAYING; + + librbd::journal::ClientData client_data(client_meta); + bufferlist data_bl; + encode(client_data, data_bl); + + auto ctx = create_context_callback< + PrepareReplayRequest<I>, + &PrepareReplayRequest<I>::handle_update_client_state>(this); + m_state_builder->remote_journaler->update_client(data_bl, ctx); +} + +template <typename I> +void PrepareReplayRequest<I>::handle_update_client_state(int r) { + dout(15) << "r=" << r << dendl; + if (r < 0) { + derr << "failed to update client: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + m_state_builder->remote_client_meta.state = + librbd::journal::MIRROR_PEER_STATE_REPLAYING; + get_remote_tag_class(); +} + +template <typename I> +void PrepareReplayRequest<I>::get_remote_tag_class() { + dout(10) << dendl; + update_progress("GET_REMOTE_TAG_CLASS"); + + auto ctx = create_context_callback< + PrepareReplayRequest<I>, + &PrepareReplayRequest<I>::handle_get_remote_tag_class>(this); + m_state_builder->remote_journaler->get_client( + librbd::Journal<>::IMAGE_CLIENT_ID, &m_client, ctx); +} + +template <typename I> +void PrepareReplayRequest<I>::handle_get_remote_tag_class(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to retrieve remote client: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + librbd::journal::ClientData client_data; + auto it = m_client.data.cbegin(); + try { + decode(client_data, it); + } catch (const buffer::error &err) { + derr << "failed to decode remote client meta data: " << err.what() + << dendl; + finish(-EBADMSG); + return; + } + + librbd::journal::ImageClientMeta *client_meta = + boost::get<librbd::journal::ImageClientMeta>(&client_data.client_meta); + if (client_meta == nullptr) { + derr << "unknown remote client registration" << dendl; + finish(-EINVAL); + return; + } + + m_remote_tag_class = client_meta->tag_class; + dout(10) << "remote tag class=" << m_remote_tag_class << dendl; + + get_remote_tags(); +} + +template <typename I> +void PrepareReplayRequest<I>::get_remote_tags() { + dout(10) << dendl; + update_progress("GET_REMOTE_TAGS"); + + auto ctx = create_context_callback< + PrepareReplayRequest<I>, + &PrepareReplayRequest<I>::handle_get_remote_tags>(this); + m_state_builder->remote_journaler->get_tags(m_remote_tag_class, + &m_remote_tags, ctx); +} + +template <typename I> +void PrepareReplayRequest<I>::handle_get_remote_tags(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to retrieve remote tags: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + // At this point, the local image was existing, non-primary, and replaying; + // and the remote image is primary. Attempt to link the local image's most + // recent tag to the remote image's tag chain. + bool remote_tag_data_valid = false; + librbd::journal::TagData remote_tag_data; + boost::optional<uint64_t> remote_orphan_tag_tid = + boost::make_optional<uint64_t>(false, 0U); + bool reconnect_orphan = false; + + // decode the remote tags + for (auto &remote_tag : m_remote_tags) { + if (m_local_tag_data.predecessor.commit_valid && + m_local_tag_data.predecessor.mirror_uuid == + m_state_builder->remote_mirror_uuid && + m_local_tag_data.predecessor.tag_tid > remote_tag.tid) { + dout(10) << "skipping processed predecessor remote tag " + << remote_tag.tid << dendl; + continue; + } + + try { + auto it = remote_tag.data.cbegin(); + decode(remote_tag_data, it); + remote_tag_data_valid = true; + } catch (const buffer::error &err) { + derr << "failed to decode remote tag " << remote_tag.tid << ": " + << err.what() << dendl; + finish(-EBADMSG); + return; + } + + dout(10) << "decoded remote tag " << remote_tag.tid << ": " + << remote_tag_data << dendl; + + if (!m_local_tag_data.predecessor.commit_valid) { + // newly synced local image (no predecessor) replays from the first tag + if (remote_tag_data.mirror_uuid != librbd::Journal<>::LOCAL_MIRROR_UUID) { + dout(10) << "skipping non-primary remote tag" << dendl; + continue; + } + + dout(10) << "using initial primary remote tag" << dendl; + break; + } + + if (m_local_tag_data.mirror_uuid == librbd::Journal<>::ORPHAN_MIRROR_UUID) { + // demotion last available local epoch + + if (remote_tag_data.mirror_uuid == m_local_tag_data.mirror_uuid && + remote_tag_data.predecessor.commit_valid && + remote_tag_data.predecessor.tag_tid == + m_local_tag_data.predecessor.tag_tid) { + // demotion matches remote epoch + + if (remote_tag_data.predecessor.mirror_uuid == m_local_mirror_uuid && + m_local_tag_data.predecessor.mirror_uuid == + librbd::Journal<>::LOCAL_MIRROR_UUID) { + // local demoted and remote has matching event + dout(10) << "found matching local demotion tag" << dendl; + remote_orphan_tag_tid = remote_tag.tid; + continue; + } + + if (m_local_tag_data.predecessor.mirror_uuid == + m_state_builder->remote_mirror_uuid && + remote_tag_data.predecessor.mirror_uuid == + librbd::Journal<>::LOCAL_MIRROR_UUID) { + // remote demoted and local has matching event + dout(10) << "found matching remote demotion tag" << dendl; + remote_orphan_tag_tid = remote_tag.tid; + continue; + } + } + + if (remote_tag_data.mirror_uuid == librbd::Journal<>::LOCAL_MIRROR_UUID && + remote_tag_data.predecessor.mirror_uuid == + librbd::Journal<>::ORPHAN_MIRROR_UUID && + remote_tag_data.predecessor.commit_valid && remote_orphan_tag_tid && + remote_tag_data.predecessor.tag_tid == *remote_orphan_tag_tid) { + // remote promotion tag chained to remote/local demotion tag + dout(10) << "found chained remote promotion tag" << dendl; + reconnect_orphan = true; + break; + } + + // promotion must follow demotion + remote_orphan_tag_tid = boost::none; + } + } + + if (remote_tag_data_valid && + m_local_tag_data.mirror_uuid == m_state_builder->remote_mirror_uuid) { + dout(10) << "local image is in clean replay state" << dendl; + } else if (reconnect_orphan) { + dout(10) << "remote image was demoted/promoted" << dendl; + } else { + derr << "split-brain detected -- skipping image replay" << dendl; + finish(-EEXIST); + return; + } + + finish(0); +} + +template <typename I> +void PrepareReplayRequest<I>::update_progress(const std::string &description) { + dout(10) << description << dendl; + + if (m_progress_ctx != nullptr) { + m_progress_ctx->update_progress(description); + } +} + +} // namespace journal +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::journal::PrepareReplayRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/journal/PrepareReplayRequest.h b/src/tools/rbd_mirror/image_replayer/journal/PrepareReplayRequest.h new file mode 100644 index 000000000..2b6fb659b --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/journal/PrepareReplayRequest.h @@ -0,0 +1,115 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_PREPARE_REPLAY_REQUEST_H +#define RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_PREPARE_REPLAY_REQUEST_H + +#include "include/int_types.h" +#include "cls/journal/cls_journal_types.h" +#include "librbd/journal/Types.h" +#include "librbd/mirror/Types.h" +#include "tools/rbd_mirror/BaseRequest.h" +#include <list> +#include <string> + +struct Context; +namespace librbd { struct ImageCtx; } + +namespace rbd { +namespace mirror { + +class ProgressContext; + +namespace image_replayer { +namespace journal { + +template <typename> class StateBuilder; + +template <typename ImageCtxT> +class PrepareReplayRequest : public BaseRequest { +public: + static PrepareReplayRequest* create( + const std::string& local_mirror_uuid, + ProgressContext* progress_ctx, + StateBuilder<ImageCtxT>* state_builder, + bool* resync_requested, + bool* syncing, + Context* on_finish) { + return new PrepareReplayRequest( + local_mirror_uuid, progress_ctx, state_builder, resync_requested, + syncing, on_finish); + } + + PrepareReplayRequest( + const std::string& local_mirror_uuid, + ProgressContext* progress_ctx, + StateBuilder<ImageCtxT>* state_builder, + bool* resync_requested, + bool* syncing, + Context* on_finish) + : BaseRequest(on_finish), + m_local_mirror_uuid(local_mirror_uuid), + m_progress_ctx(progress_ctx), + m_state_builder(state_builder), + m_resync_requested(resync_requested), + m_syncing(syncing) { + } + + void send() override; + +private: + /** + * @verbatim + * + * <start> + * | + * v + * UPDATE_CLIENT_STATE + * | + * v + * GET_REMOTE_TAG_CLASS + * | + * v + * GET_REMOTE_TAGS + * | + * v + * <finish> + * + * @endverbatim + */ + typedef std::list<cls::journal::Tag> Tags; + + std::string m_local_mirror_uuid; + ProgressContext* m_progress_ctx; + StateBuilder<ImageCtxT>* m_state_builder; + bool* m_resync_requested; + bool* m_syncing; + + uint64_t m_local_tag_tid = 0; + librbd::journal::TagData m_local_tag_data; + + uint64_t m_remote_tag_class = 0; + Tags m_remote_tags; + cls::journal::Client m_client; + + void update_client_state(); + void handle_update_client_state(int r); + + void get_remote_tag_class(); + void handle_get_remote_tag_class(int r); + + void get_remote_tags(); + void handle_get_remote_tags(int r); + + void update_progress(const std::string& description); + +}; + +} // namespace journal +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::journal::PrepareReplayRequest<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_PREPARE_REPLAY_REQUEST_H diff --git a/src/tools/rbd_mirror/image_replayer/journal/ReplayStatusFormatter.cc b/src/tools/rbd_mirror/image_replayer/journal/ReplayStatusFormatter.cc new file mode 100644 index 000000000..eb99d5add --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/journal/ReplayStatusFormatter.cc @@ -0,0 +1,284 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "ReplayStatusFormatter.h" +#include "common/debug.h" +#include "common/dout.h" +#include "common/errno.h" +#include "journal/Journaler.h" +#include "json_spirit/json_spirit.h" +#include "librbd/ImageCtx.h" +#include "librbd/Journal.h" +#include "librbd/Utils.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::journal::" \ + << "ReplayStatusFormatter: " << this << " " \ + << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace image_replayer { +namespace journal { + +using librbd::util::unique_lock_name; + +namespace { + +double round_to_two_places(double value) { + return abs(round(value * 100) / 100); +} + +json_spirit::mObject to_json_object( + const cls::journal::ObjectPosition& position) { + json_spirit::mObject object; + if (position != cls::journal::ObjectPosition{}) { + object["object_number"] = position.object_number; + object["tag_tid"] = position.tag_tid; + object["entry_tid"] = position.entry_tid; + } + return object; +} + +} // anonymous namespace + +template <typename I> +ReplayStatusFormatter<I>::ReplayStatusFormatter(Journaler *journaler, + const std::string &mirror_uuid) + : m_journaler(journaler), + m_mirror_uuid(mirror_uuid), + m_lock(ceph::make_mutex(unique_lock_name("ReplayStatusFormatter::m_lock", this))) { +} + +template <typename I> +void ReplayStatusFormatter<I>::handle_entry_processed(uint32_t bytes) { + dout(20) << dendl; + + m_bytes_per_second(bytes); + m_entries_per_second(1); +} + +template <typename I> +bool ReplayStatusFormatter<I>::get_or_send_update(std::string *description, + Context *on_finish) { + dout(20) << dendl; + + bool in_progress = false; + { + std::lock_guard locker{m_lock}; + if (m_on_finish) { + in_progress = true; + } else { + m_on_finish = on_finish; + } + } + + if (in_progress) { + dout(10) << "previous request is still in progress, ignoring" << dendl; + on_finish->complete(-EAGAIN); + return false; + } + + m_master_position = cls::journal::ObjectPosition(); + m_mirror_position = cls::journal::ObjectPosition(); + + cls::journal::Client master_client, mirror_client; + int r; + + r = m_journaler->get_cached_client(librbd::Journal<>::IMAGE_CLIENT_ID, + &master_client); + if (r < 0) { + derr << "error retrieving registered master client: " + << cpp_strerror(r) << dendl; + } else { + r = m_journaler->get_cached_client(m_mirror_uuid, &mirror_client); + if (r < 0) { + derr << "error retrieving registered mirror client: " + << cpp_strerror(r) << dendl; + } + } + + if (!master_client.commit_position.object_positions.empty()) { + m_master_position = + *(master_client.commit_position.object_positions.begin()); + } + + if (!mirror_client.commit_position.object_positions.empty()) { + m_mirror_position = + *(mirror_client.commit_position.object_positions.begin()); + } + + if (!calculate_behind_master_or_send_update()) { + dout(20) << "need to update tag cache" << dendl; + return false; + } + + format(description); + + { + std::lock_guard locker{m_lock}; + ceph_assert(m_on_finish == on_finish); + m_on_finish = nullptr; + } + + on_finish->complete(-EEXIST); + return true; +} + +template <typename I> +bool ReplayStatusFormatter<I>::calculate_behind_master_or_send_update() { + dout(20) << "m_master_position=" << m_master_position + << ", m_mirror_position=" << m_mirror_position << dendl; + + m_entries_behind_master = 0; + + if (m_master_position == cls::journal::ObjectPosition() || + m_master_position.tag_tid < m_mirror_position.tag_tid) { + return true; + } + + cls::journal::ObjectPosition master = m_master_position; + uint64_t mirror_tag_tid = m_mirror_position.tag_tid; + + while (master.tag_tid > mirror_tag_tid) { + auto tag_it = m_tag_cache.find(master.tag_tid); + if (tag_it == m_tag_cache.end()) { + send_update_tag_cache(master.tag_tid, mirror_tag_tid); + return false; + } + librbd::journal::TagData &tag_data = tag_it->second; + m_entries_behind_master += master.entry_tid; + master = {0, tag_data.predecessor.tag_tid, tag_data.predecessor.entry_tid}; + } + if (master.tag_tid == mirror_tag_tid && + master.entry_tid > m_mirror_position.entry_tid) { + m_entries_behind_master += master.entry_tid - m_mirror_position.entry_tid; + } + + dout(20) << "clearing tags not needed any more (below mirror position)" + << dendl; + + uint64_t tag_tid = mirror_tag_tid; + size_t old_size = m_tag_cache.size(); + while (tag_tid != 0) { + auto tag_it = m_tag_cache.find(tag_tid); + if (tag_it == m_tag_cache.end()) { + break; + } + librbd::journal::TagData &tag_data = tag_it->second; + + dout(20) << "erasing tag " << tag_data << "for tag_tid " << tag_tid + << dendl; + + tag_tid = tag_data.predecessor.tag_tid; + m_tag_cache.erase(tag_it); + } + + dout(20) << old_size - m_tag_cache.size() << " entries cleared" << dendl; + + return true; +} + +template <typename I> +void ReplayStatusFormatter<I>::send_update_tag_cache(uint64_t master_tag_tid, + uint64_t mirror_tag_tid) { + if (master_tag_tid <= mirror_tag_tid || + m_tag_cache.find(master_tag_tid) != m_tag_cache.end()) { + Context *on_finish = nullptr; + { + std::lock_guard locker{m_lock}; + std::swap(m_on_finish, on_finish); + } + + ceph_assert(on_finish); + on_finish->complete(0); + return; + } + + dout(20) << "master_tag_tid=" << master_tag_tid << ", mirror_tag_tid=" + << mirror_tag_tid << dendl; + + auto ctx = new LambdaContext( + [this, master_tag_tid, mirror_tag_tid](int r) { + handle_update_tag_cache(master_tag_tid, mirror_tag_tid, r); + }); + m_journaler->get_tag(master_tag_tid, &m_tag, ctx); +} + +template <typename I> +void ReplayStatusFormatter<I>::handle_update_tag_cache(uint64_t master_tag_tid, + uint64_t mirror_tag_tid, + int r) { + librbd::journal::TagData tag_data; + + if (r < 0) { + derr << "error retrieving tag " << master_tag_tid << ": " << cpp_strerror(r) + << dendl; + } else { + dout(20) << "retrieved tag " << master_tag_tid << ": " << m_tag << dendl; + + auto it = m_tag.data.cbegin(); + try { + decode(tag_data, it); + } catch (const buffer::error &err) { + derr << "error decoding tag " << master_tag_tid << ": " << err.what() + << dendl; + } + } + + if (tag_data.predecessor.mirror_uuid != + librbd::Journal<>::LOCAL_MIRROR_UUID && + tag_data.predecessor.mirror_uuid != + librbd::Journal<>::ORPHAN_MIRROR_UUID) { + dout(20) << "hit remote image non-primary epoch" << dendl; + tag_data.predecessor = {}; + } + + dout(20) << "decoded tag " << master_tag_tid << ": " << tag_data << dendl; + + m_tag_cache[master_tag_tid] = tag_data; + send_update_tag_cache(tag_data.predecessor.tag_tid, mirror_tag_tid); +} + +template <typename I> +void ReplayStatusFormatter<I>::format(std::string *description) { + dout(20) << "m_master_position=" << m_master_position + << ", m_mirror_position=" << m_mirror_position + << ", m_entries_behind_master=" << m_entries_behind_master << dendl; + + json_spirit::mObject root_obj; + root_obj["primary_position"] = to_json_object(m_master_position); + root_obj["non_primary_position"] = to_json_object(m_mirror_position); + root_obj["entries_behind_primary"] = ( + m_entries_behind_master > 0 ? m_entries_behind_master : 0); + + m_bytes_per_second(0); + root_obj["bytes_per_second"] = round_to_two_places( + m_bytes_per_second.get_average()); + + m_entries_per_second(0); + auto entries_per_second = m_entries_per_second.get_average(); + root_obj["entries_per_second"] = round_to_two_places(entries_per_second); + + if (m_entries_behind_master > 0 && entries_per_second > 0) { + std::uint64_t seconds_until_synced = round_to_two_places( + m_entries_behind_master / entries_per_second); + if (seconds_until_synced >= std::numeric_limits<uint64_t>::max()) { + seconds_until_synced = std::numeric_limits<uint64_t>::max(); + } + + root_obj["seconds_until_synced"] = seconds_until_synced; + } + + *description = json_spirit::write( + root_obj, json_spirit::remove_trailing_zeros); +} + +} // namespace journal +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::journal::ReplayStatusFormatter<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/journal/ReplayStatusFormatter.h b/src/tools/rbd_mirror/image_replayer/journal/ReplayStatusFormatter.h new file mode 100644 index 000000000..5dbbfe10d --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/journal/ReplayStatusFormatter.h @@ -0,0 +1,70 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_REPLAY_STATUS_FORMATTER_H +#define RBD_MIRROR_IMAGE_REPLAYER_REPLAY_STATUS_FORMATTER_H + +#include "include/Context.h" +#include "common/ceph_mutex.h" +#include "cls/journal/cls_journal_types.h" +#include "librbd/journal/Types.h" +#include "librbd/journal/TypeTraits.h" +#include "tools/rbd_mirror/image_replayer/TimeRollingMean.h" + +namespace journal { class Journaler; } +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { +namespace image_replayer { +namespace journal { + +template <typename ImageCtxT = librbd::ImageCtx> +class ReplayStatusFormatter { +public: + typedef typename librbd::journal::TypeTraits<ImageCtxT>::Journaler Journaler; + + static ReplayStatusFormatter* create(Journaler *journaler, + const std::string &mirror_uuid) { + return new ReplayStatusFormatter(journaler, mirror_uuid); + } + + static void destroy(ReplayStatusFormatter* formatter) { + delete formatter; + } + + ReplayStatusFormatter(Journaler *journaler, const std::string &mirror_uuid); + + void handle_entry_processed(uint32_t bytes); + + bool get_or_send_update(std::string *description, Context *on_finish); + +private: + Journaler *m_journaler; + std::string m_mirror_uuid; + ceph::mutex m_lock; + Context *m_on_finish = nullptr; + cls::journal::ObjectPosition m_master_position; + cls::journal::ObjectPosition m_mirror_position; + int64_t m_entries_behind_master = 0; + cls::journal::Tag m_tag; + std::map<uint64_t, librbd::journal::TagData> m_tag_cache; + + TimeRollingMean m_bytes_per_second; + TimeRollingMean m_entries_per_second; + + bool calculate_behind_master_or_send_update(); + void send_update_tag_cache(uint64_t master_tag_tid, uint64_t mirror_tag_tid); + void handle_update_tag_cache(uint64_t master_tag_tid, uint64_t mirror_tag_tid, + int r); + void format(std::string *description); +}; + +} // namespace journal +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::journal::ReplayStatusFormatter<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_REPLAYER_REPLAY_STATUS_FORMATTER_H diff --git a/src/tools/rbd_mirror/image_replayer/journal/Replayer.cc b/src/tools/rbd_mirror/image_replayer/journal/Replayer.cc new file mode 100644 index 000000000..20560038c --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/journal/Replayer.cc @@ -0,0 +1,1317 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "Replayer.h" +#include "common/debug.h" +#include "common/errno.h" +#include "common/perf_counters.h" +#include "common/perf_counters_key.h" +#include "common/Timer.h" +#include "librbd/Journal.h" +#include "librbd/Utils.h" +#include "librbd/asio/ContextWQ.h" +#include "librbd/journal/Replay.h" +#include "journal/Journaler.h" +#include "journal/JournalMetadataListener.h" +#include "journal/ReplayHandler.h" +#include "tools/rbd_mirror/Threads.h" +#include "tools/rbd_mirror/Types.h" +#include "tools/rbd_mirror/image_replayer/CloseImageRequest.h" +#include "tools/rbd_mirror/image_replayer/ReplayerListener.h" +#include "tools/rbd_mirror/image_replayer/Utils.h" +#include "tools/rbd_mirror/image_replayer/journal/EventPreprocessor.h" +#include "tools/rbd_mirror/image_replayer/journal/ReplayStatusFormatter.h" +#include "tools/rbd_mirror/image_replayer/journal/StateBuilder.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::journal::" \ + << "Replayer: " << this << " " << __func__ << ": " + +extern PerfCounters *g_journal_perf_counters; + +namespace rbd { +namespace mirror { +namespace image_replayer { +namespace journal { + +namespace { + +uint32_t calculate_replay_delay(const utime_t &event_time, + int mirroring_replay_delay) { + if (mirroring_replay_delay <= 0) { + return 0; + } + + utime_t now = ceph_clock_now(); + if (event_time + mirroring_replay_delay <= now) { + return 0; + } + + // ensure it is rounded up when converting to integer + return (event_time + mirroring_replay_delay - now) + 1; +} + +} // anonymous namespace + +using librbd::util::create_async_context_callback; +using librbd::util::create_context_callback; + +template <typename I> +struct Replayer<I>::C_ReplayCommitted : public Context { + Replayer* replayer; + ReplayEntry replay_entry; + uint64_t replay_bytes; + utime_t replay_start_time; + + C_ReplayCommitted(Replayer* replayer, ReplayEntry &&replay_entry, + uint64_t replay_bytes, const utime_t &replay_start_time) + : replayer(replayer), replay_entry(std::move(replay_entry)), + replay_bytes(replay_bytes), replay_start_time(replay_start_time) { + } + + void finish(int r) override { + replayer->handle_process_entry_safe(replay_entry, replay_bytes, + replay_start_time, r); + } +}; + +template <typename I> +struct Replayer<I>::RemoteJournalerListener + : public ::journal::JournalMetadataListener { + Replayer* replayer; + + RemoteJournalerListener(Replayer* replayer) : replayer(replayer) {} + + void handle_update(::journal::JournalMetadata*) override { + auto ctx = new C_TrackedOp( + replayer->m_in_flight_op_tracker, + new LambdaContext([this](int r) { + replayer->handle_remote_journal_metadata_updated(); + })); + replayer->m_threads->work_queue->queue(ctx, 0); + } +}; + +template <typename I> +struct Replayer<I>::RemoteReplayHandler : public ::journal::ReplayHandler { + Replayer* replayer; + + RemoteReplayHandler(Replayer* replayer) : replayer(replayer) {} + ~RemoteReplayHandler() override {}; + + void handle_entries_available() override { + replayer->handle_replay_ready(); + } + + void handle_complete(int r) override { + std::string error; + if (r == -ENOMEM) { + error = "not enough memory in autotune cache"; + } else if (r < 0) { + error = "replay completed with error: " + cpp_strerror(r); + } + replayer->handle_replay_complete(r, error); + } +}; + +template <typename I> +struct Replayer<I>::LocalJournalListener + : public librbd::journal::Listener { + Replayer* replayer; + + LocalJournalListener(Replayer* replayer) : replayer(replayer) { + } + + void handle_close() override { + replayer->handle_replay_complete(0, ""); + } + + void handle_promoted() override { + replayer->handle_replay_complete(0, "force promoted"); + } + + void handle_resync() override { + replayer->handle_resync_image(); + } +}; + +template <typename I> +Replayer<I>::Replayer( + Threads<I>* threads, + const std::string& local_mirror_uuid, + StateBuilder<I>* state_builder, + ReplayerListener* replayer_listener) + : m_threads(threads), + m_local_mirror_uuid(local_mirror_uuid), + m_state_builder(state_builder), + m_replayer_listener(replayer_listener), + m_lock(ceph::make_mutex(librbd::util::unique_lock_name( + "rbd::mirror::image_replayer::journal::Replayer", this))) { + dout(10) << dendl; +} + +template <typename I> +Replayer<I>::~Replayer() { + dout(10) << dendl; + + { + std::unique_lock locker{m_lock}; + unregister_perf_counters(); + } + + ceph_assert(m_remote_listener == nullptr); + ceph_assert(m_local_journal_listener == nullptr); + ceph_assert(m_local_journal_replay == nullptr); + ceph_assert(m_remote_replay_handler == nullptr); + ceph_assert(m_event_preprocessor == nullptr); + ceph_assert(m_replay_status_formatter == nullptr); + ceph_assert(m_delayed_preprocess_task == nullptr); + ceph_assert(m_flush_local_replay_task == nullptr); + ceph_assert(m_state_builder->local_image_ctx == nullptr); +} + +template <typename I> +void Replayer<I>::init(Context* on_finish) { + dout(10) << dendl; + + { + auto local_image_ctx = m_state_builder->local_image_ctx; + std::shared_lock image_locker{local_image_ctx->image_lock}; + m_image_spec = util::compute_image_spec(local_image_ctx->md_ctx, + local_image_ctx->name); + } + + { + std::unique_lock locker{m_lock}; + register_perf_counters(); + } + + ceph_assert(m_on_init_shutdown == nullptr); + m_on_init_shutdown = on_finish; + + init_remote_journaler(); +} + +template <typename I> +void Replayer<I>::shut_down(Context* on_finish) { + dout(10) << dendl; + + std::unique_lock locker{m_lock}; + ceph_assert(m_on_init_shutdown == nullptr); + m_on_init_shutdown = on_finish; + + if (m_state == STATE_INIT) { + // raced with the last piece of the init state machine + return; + } else if (m_state == STATE_REPLAYING) { + m_state = STATE_COMPLETE; + } + + // if shutting down due to an error notification, we don't + // need to propagate the same error again + m_error_code = 0; + m_error_description = ""; + + cancel_delayed_preprocess_task(); + cancel_flush_local_replay_task(); + wait_for_flush(); +} + +template <typename I> +void Replayer<I>::flush(Context* on_finish) { + dout(10) << dendl; + + flush_local_replay(new C_TrackedOp(m_in_flight_op_tracker, on_finish)); +} + +template <typename I> +bool Replayer<I>::get_replay_status(std::string* description, + Context* on_finish) { + dout(10) << dendl; + + std::unique_lock locker{m_lock}; + if (m_replay_status_formatter == nullptr) { + derr << "replay not running" << dendl; + locker.unlock(); + + on_finish->complete(-EAGAIN); + return false; + } + + on_finish = new C_TrackedOp(m_in_flight_op_tracker, on_finish); + return m_replay_status_formatter->get_or_send_update(description, + on_finish); +} + +template <typename I> +void Replayer<I>::init_remote_journaler() { + dout(10) << dendl; + + Context *ctx = create_context_callback< + Replayer, &Replayer<I>::handle_init_remote_journaler>(this); + m_state_builder->remote_journaler->init(ctx); +} + +template <typename I> +void Replayer<I>::handle_init_remote_journaler(int r) { + dout(10) << "r=" << r << dendl; + + std::unique_lock locker{m_lock}; + if (r < 0) { + derr << "failed to initialize remote journal: " << cpp_strerror(r) << dendl; + handle_replay_complete(locker, r, "error initializing remote journal"); + close_local_image(); + return; + } + + // listen for metadata updates to check for disconnect events + ceph_assert(m_remote_listener == nullptr); + m_remote_listener = new RemoteJournalerListener(this); + m_state_builder->remote_journaler->add_listener(m_remote_listener); + + cls::journal::Client remote_client; + r = m_state_builder->remote_journaler->get_cached_client(m_local_mirror_uuid, + &remote_client); + if (r < 0) { + derr << "error retrieving remote journal client: " << cpp_strerror(r) + << dendl; + handle_replay_complete(locker, r, "error retrieving remote journal client"); + close_local_image(); + return; + } + + std::string error; + r = validate_remote_client_state(remote_client, + &m_state_builder->remote_client_meta, + &m_resync_requested, &error); + if (r < 0) { + handle_replay_complete(locker, r, error); + close_local_image(); + return; + } + + start_external_replay(locker); +} + +template <typename I> +void Replayer<I>::start_external_replay(std::unique_lock<ceph::mutex>& locker) { + dout(10) << dendl; + + auto local_image_ctx = m_state_builder->local_image_ctx; + std::shared_lock local_image_locker{local_image_ctx->image_lock}; + + ceph_assert(m_local_journal == nullptr); + m_local_journal = local_image_ctx->journal; + if (m_local_journal == nullptr) { + local_image_locker.unlock(); + + derr << "local image journal closed" << dendl; + handle_replay_complete(locker, -EINVAL, "error accessing local journal"); + close_local_image(); + return; + } + + // safe to hold pointer to journal after external playback starts + Context *start_ctx = create_context_callback< + Replayer, &Replayer<I>::handle_start_external_replay>(this); + m_local_journal->start_external_replay(&m_local_journal_replay, start_ctx); +} + +template <typename I> +void Replayer<I>::handle_start_external_replay(int r) { + dout(10) << "r=" << r << dendl; + + std::unique_lock locker{m_lock}; + if (r < 0) { + ceph_assert(m_local_journal_replay == nullptr); + derr << "error starting external replay on local image " + << m_state_builder->local_image_ctx->id << ": " + << cpp_strerror(r) << dendl; + + handle_replay_complete(locker, r, "error starting replay on local image"); + close_local_image(); + return; + } + + if (!notify_init_complete(locker)) { + return; + } + + m_state = STATE_REPLAYING; + + // check for resync/promotion state after adding listener + if (!add_local_journal_listener(locker)) { + return; + } + + // start remote journal replay + m_event_preprocessor = EventPreprocessor<I>::create( + *m_state_builder->local_image_ctx, *m_state_builder->remote_journaler, + m_local_mirror_uuid, &m_state_builder->remote_client_meta, + m_threads->work_queue); + m_replay_status_formatter = ReplayStatusFormatter<I>::create( + m_state_builder->remote_journaler, m_local_mirror_uuid); + + auto cct = static_cast<CephContext *>(m_state_builder->local_image_ctx->cct); + double poll_seconds = cct->_conf.get_val<double>( + "rbd_mirror_journal_poll_age"); + m_remote_replay_handler = new RemoteReplayHandler(this); + m_state_builder->remote_journaler->start_live_replay(m_remote_replay_handler, + poll_seconds); + + notify_status_updated(); +} + +template <typename I> +bool Replayer<I>::add_local_journal_listener( + std::unique_lock<ceph::mutex>& locker) { + dout(10) << dendl; + + // listen for promotion and resync requests against local journal + ceph_assert(m_local_journal_listener == nullptr); + m_local_journal_listener = new LocalJournalListener(this); + m_local_journal->add_listener(m_local_journal_listener); + + // verify that the local image wasn't force-promoted and that a resync hasn't + // been requested now that we are listening for events + if (m_local_journal->is_tag_owner()) { + dout(10) << "local image force-promoted" << dendl; + handle_replay_complete(locker, 0, "force promoted"); + return false; + } + + bool resync_requested = false; + int r = m_local_journal->is_resync_requested(&resync_requested); + if (r < 0) { + dout(10) << "failed to determine resync state: " << cpp_strerror(r) + << dendl; + handle_replay_complete(locker, r, "error parsing resync state"); + return false; + } else if (resync_requested) { + dout(10) << "local image resync requested" << dendl; + handle_replay_complete(locker, 0, "resync requested"); + return false; + } + + return true; +} + +template <typename I> +bool Replayer<I>::notify_init_complete(std::unique_lock<ceph::mutex>& locker) { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked_by_me(m_lock)); + ceph_assert(m_state == STATE_INIT); + + // notify that init has completed + Context *on_finish = nullptr; + std::swap(m_on_init_shutdown, on_finish); + + locker.unlock(); + on_finish->complete(0); + locker.lock(); + + if (m_on_init_shutdown != nullptr) { + // shut down requested after we notified init complete but before we + // grabbed the lock + close_local_image(); + return false; + } + + return true; +} + +template <typename I> +void Replayer<I>::wait_for_flush() { + ceph_assert(ceph_mutex_is_locked_by_me(m_lock)); + + // ensure that we don't have two concurrent local journal replay shut downs + dout(10) << dendl; + auto ctx = create_async_context_callback( + m_threads->work_queue, create_context_callback< + Replayer<I>, &Replayer<I>::handle_wait_for_flush>(this)); + m_flush_tracker.wait_for_ops(ctx); +} + +template <typename I> +void Replayer<I>::handle_wait_for_flush(int r) { + dout(10) << "r=" << r << dendl; + + shut_down_local_journal_replay(); +} + +template <typename I> +void Replayer<I>::shut_down_local_journal_replay() { + std::unique_lock locker{m_lock}; + + if (m_local_journal_replay == nullptr) { + wait_for_event_replay(); + return; + } + + // It's required to stop the local journal replay state machine prior to + // waiting for the events to complete. This is to ensure that IO is properly + // flushed (it might be batched), wait for any running ops to complete, and + // to cancel any ops waiting for their associated OnFinish events. + dout(10) << dendl; + auto ctx = create_context_callback< + Replayer<I>, &Replayer<I>::handle_shut_down_local_journal_replay>(this); + m_local_journal_replay->shut_down(true, ctx); +} + +template <typename I> +void Replayer<I>::handle_shut_down_local_journal_replay(int r) { + dout(10) << "r=" << r << dendl; + + std::unique_lock locker{m_lock}; + if (r < 0) { + derr << "error shutting down journal replay: " << cpp_strerror(r) << dendl; + handle_replay_error(r, "failed to shut down local journal replay"); + } + + wait_for_event_replay(); +} + +template <typename I> +void Replayer<I>::wait_for_event_replay() { + ceph_assert(ceph_mutex_is_locked_by_me(m_lock)); + + dout(10) << dendl; + auto ctx = create_async_context_callback( + m_threads->work_queue, create_context_callback< + Replayer<I>, &Replayer<I>::handle_wait_for_event_replay>(this)); + m_event_replay_tracker.wait_for_ops(ctx); +} + +template <typename I> +void Replayer<I>::handle_wait_for_event_replay(int r) { + dout(10) << "r=" << r << dendl; + + std::unique_lock locker{m_lock}; + close_local_image(); +} + +template <typename I> +void Replayer<I>::close_local_image() { + ceph_assert(ceph_mutex_is_locked_by_me(m_lock)); + if (m_state_builder->local_image_ctx == nullptr) { + stop_remote_journaler_replay(); + return; + } + + dout(10) << dendl; + if (m_local_journal_listener != nullptr) { + // blocks if listener notification is in-progress + m_local_journal->remove_listener(m_local_journal_listener); + delete m_local_journal_listener; + m_local_journal_listener = nullptr; + } + + if (m_local_journal_replay != nullptr) { + m_local_journal->stop_external_replay(); + m_local_journal_replay = nullptr; + } + + if (m_event_preprocessor != nullptr) { + image_replayer::journal::EventPreprocessor<I>::destroy( + m_event_preprocessor); + m_event_preprocessor = nullptr; + } + + m_local_journal.reset(); + + // NOTE: it's important to ensure that the local image is fully + // closed before attempting to close the remote journal in + // case the remote cluster is unreachable + ceph_assert(m_state_builder->local_image_ctx != nullptr); + auto ctx = create_context_callback< + Replayer<I>, &Replayer<I>::handle_close_local_image>(this); + auto request = image_replayer::CloseImageRequest<I>::create( + &m_state_builder->local_image_ctx, ctx); + request->send(); +} + + +template <typename I> +void Replayer<I>::handle_close_local_image(int r) { + dout(10) << "r=" << r << dendl; + + std::unique_lock locker{m_lock}; + if (r < 0) { + derr << "error closing local iamge: " << cpp_strerror(r) << dendl; + handle_replay_error(r, "failed to close local image"); + } + + ceph_assert(m_state_builder->local_image_ctx == nullptr); + stop_remote_journaler_replay(); +} + +template <typename I> +void Replayer<I>::stop_remote_journaler_replay() { + ceph_assert(ceph_mutex_is_locked_by_me(m_lock)); + + if (m_state_builder->remote_journaler == nullptr) { + wait_for_in_flight_ops(); + return; + } else if (m_remote_replay_handler == nullptr) { + wait_for_in_flight_ops(); + return; + } + + dout(10) << dendl; + auto ctx = create_async_context_callback( + m_threads->work_queue, create_context_callback< + Replayer<I>, &Replayer<I>::handle_stop_remote_journaler_replay>(this)); + m_state_builder->remote_journaler->stop_replay(ctx); +} + +template <typename I> +void Replayer<I>::handle_stop_remote_journaler_replay(int r) { + dout(10) << "r=" << r << dendl; + + std::unique_lock locker{m_lock}; + if (r < 0) { + derr << "failed to stop remote journaler replay : " << cpp_strerror(r) + << dendl; + handle_replay_error(r, "failed to stop remote journaler replay"); + } + + delete m_remote_replay_handler; + m_remote_replay_handler = nullptr; + + wait_for_in_flight_ops(); +} + +template <typename I> +void Replayer<I>::wait_for_in_flight_ops() { + dout(10) << dendl; + if (m_remote_listener != nullptr) { + m_state_builder->remote_journaler->remove_listener(m_remote_listener); + delete m_remote_listener; + m_remote_listener = nullptr; + } + + auto ctx = create_async_context_callback( + m_threads->work_queue, create_context_callback< + Replayer<I>, &Replayer<I>::handle_wait_for_in_flight_ops>(this)); + m_in_flight_op_tracker.wait_for_ops(ctx); +} + +template <typename I> +void Replayer<I>::handle_wait_for_in_flight_ops(int r) { + dout(10) << "r=" << r << dendl; + + ReplayStatusFormatter<I>::destroy(m_replay_status_formatter); + m_replay_status_formatter = nullptr; + + Context* on_init_shutdown = nullptr; + { + std::unique_lock locker{m_lock}; + ceph_assert(m_on_init_shutdown != nullptr); + std::swap(m_on_init_shutdown, on_init_shutdown); + m_state = STATE_COMPLETE; + } + on_init_shutdown->complete(m_error_code); +} + +template <typename I> +void Replayer<I>::handle_remote_journal_metadata_updated() { + dout(20) << dendl; + + std::unique_lock locker{m_lock}; + if (m_state != STATE_REPLAYING) { + return; + } + + cls::journal::Client remote_client; + int r = m_state_builder->remote_journaler->get_cached_client( + m_local_mirror_uuid, &remote_client); + if (r < 0) { + derr << "failed to retrieve client: " << cpp_strerror(r) << dendl; + return; + } + + librbd::journal::MirrorPeerClientMeta remote_client_meta; + std::string error; + r = validate_remote_client_state(remote_client, &remote_client_meta, + &m_resync_requested, &error); + if (r < 0) { + dout(0) << "client flagged disconnected, stopping image replay" << dendl; + handle_replay_complete(locker, r, error); + } +} + +template <typename I> +void Replayer<I>::schedule_flush_local_replay_task() { + ceph_assert(ceph_mutex_is_locked_by_me(m_lock)); + + std::unique_lock timer_locker{m_threads->timer_lock}; + if (m_state != STATE_REPLAYING || m_flush_local_replay_task != nullptr) { + return; + } + + dout(15) << dendl; + m_flush_local_replay_task = create_async_context_callback( + m_threads->work_queue, create_context_callback< + Replayer<I>, &Replayer<I>::handle_flush_local_replay_task>(this)); + m_threads->timer->add_event_after(30, m_flush_local_replay_task); +} + +template <typename I> +void Replayer<I>::cancel_flush_local_replay_task() { + ceph_assert(ceph_mutex_is_locked_by_me(m_lock)); + + std::unique_lock timer_locker{m_threads->timer_lock}; + if (m_flush_local_replay_task != nullptr) { + dout(10) << dendl; + m_threads->timer->cancel_event(m_flush_local_replay_task); + m_flush_local_replay_task = nullptr; + } +} + +template <typename I> +void Replayer<I>::handle_flush_local_replay_task(int) { + dout(15) << dendl; + + m_in_flight_op_tracker.start_op(); + auto on_finish = new LambdaContext([this](int) { + std::unique_lock locker{m_lock}; + + { + std::unique_lock timer_locker{m_threads->timer_lock}; + m_flush_local_replay_task = nullptr; + } + + notify_status_updated(); + m_in_flight_op_tracker.finish_op(); + }); + flush_local_replay(on_finish); +} + +template <typename I> +void Replayer<I>::flush_local_replay(Context* on_flush) { + std::unique_lock locker{m_lock}; + if (m_state != STATE_REPLAYING) { + locker.unlock(); + on_flush->complete(0); + return; + } else if (m_local_journal_replay == nullptr) { + // raced w/ a tag creation stop/start, which implies that + // the replay is flushed + locker.unlock(); + flush_commit_position(on_flush); + return; + } + + dout(15) << dendl; + auto ctx = new LambdaContext( + [this, on_flush](int r) { + handle_flush_local_replay(on_flush, r); + }); + m_local_journal_replay->flush(ctx); +} + +template <typename I> +void Replayer<I>::handle_flush_local_replay(Context* on_flush, int r) { + dout(15) << "r=" << r << dendl; + if (r < 0) { + derr << "error flushing local replay: " << cpp_strerror(r) << dendl; + on_flush->complete(r); + return; + } + + flush_commit_position(on_flush); +} + +template <typename I> +void Replayer<I>::flush_commit_position(Context* on_flush) { + std::unique_lock locker{m_lock}; + if (m_state != STATE_REPLAYING) { + locker.unlock(); + on_flush->complete(0); + return; + } + + dout(15) << dendl; + auto ctx = new LambdaContext( + [this, on_flush](int r) { + handle_flush_commit_position(on_flush, r); + }); + m_state_builder->remote_journaler->flush_commit_position(ctx); +} + +template <typename I> +void Replayer<I>::handle_flush_commit_position(Context* on_flush, int r) { + dout(15) << "r=" << r << dendl; + if (r < 0) { + derr << "error flushing remote journal commit position: " + << cpp_strerror(r) << dendl; + } + + on_flush->complete(r); +} + +template <typename I> +void Replayer<I>::handle_replay_error(int r, const std::string &error) { + ceph_assert(ceph_mutex_is_locked_by_me(m_lock)); + + if (m_error_code == 0) { + m_error_code = r; + m_error_description = error; + } +} + +template <typename I> +bool Replayer<I>::is_replay_complete() const { + std::unique_lock locker{m_lock}; + return is_replay_complete(locker); +} + +template <typename I> +bool Replayer<I>::is_replay_complete( + const std::unique_lock<ceph::mutex>&) const { + ceph_assert(ceph_mutex_is_locked_by_me(m_lock)); + return (m_state == STATE_COMPLETE); +} + +template <typename I> +void Replayer<I>::handle_replay_complete(int r, const std::string &error) { + std::unique_lock locker{m_lock}; + handle_replay_complete(locker, r, error); +} + +template <typename I> +void Replayer<I>::handle_replay_complete( + const std::unique_lock<ceph::mutex>&, int r, const std::string &error) { + ceph_assert(ceph_mutex_is_locked_by_me(m_lock)); + + dout(10) << "r=" << r << ", error=" << error << dendl; + if (r < 0) { + derr << "replay encountered an error: " << cpp_strerror(r) << dendl; + handle_replay_error(r, error); + } + + if (m_state != STATE_REPLAYING) { + return; + } + + m_state = STATE_COMPLETE; + notify_status_updated(); +} + +template <typename I> +void Replayer<I>::handle_replay_ready() { + std::unique_lock locker{m_lock}; + handle_replay_ready(locker); +} + +template <typename I> +void Replayer<I>::handle_replay_ready( + std::unique_lock<ceph::mutex>& locker) { + ceph_assert(ceph_mutex_is_locked_by_me(m_lock)); + + dout(20) << dendl; + if (is_replay_complete(locker)) { + return; + } + + if (!m_state_builder->remote_journaler->try_pop_front(&m_replay_entry, + &m_replay_tag_tid)) { + dout(20) << "no entries ready for replay" << dendl; + return; + } + + // can safely drop lock once the entry is tracked + m_event_replay_tracker.start_op(); + locker.unlock(); + + dout(20) << "entry tid=" << m_replay_entry.get_commit_tid() + << "tag_tid=" << m_replay_tag_tid << dendl; + if (!m_replay_tag_valid || m_replay_tag.tid != m_replay_tag_tid) { + // must allocate a new local journal tag prior to processing + replay_flush(); + return; + } + + preprocess_entry(); +} + +template <typename I> +void Replayer<I>::replay_flush() { + dout(10) << dendl; + m_flush_tracker.start_op(); + + // shut down the replay to flush all IO and ops and create a new + // replayer to handle the new tag epoch + auto ctx = create_context_callback< + Replayer<I>, &Replayer<I>::handle_replay_flush_shut_down>(this); + ceph_assert(m_local_journal_replay != nullptr); + m_local_journal_replay->shut_down(false, ctx); +} + +template <typename I> +void Replayer<I>::handle_replay_flush_shut_down(int r) { + std::unique_lock locker{m_lock}; + dout(10) << "r=" << r << dendl; + + ceph_assert(m_local_journal != nullptr); + ceph_assert(m_local_journal_listener != nullptr); + + // blocks if listener notification is in-progress + m_local_journal->remove_listener(m_local_journal_listener); + delete m_local_journal_listener; + m_local_journal_listener = nullptr; + + m_local_journal->stop_external_replay(); + m_local_journal_replay = nullptr; + m_local_journal.reset(); + + if (r < 0) { + locker.unlock(); + + handle_replay_flush(r); + return; + } + + // journal might have been closed now that we stopped external replay + auto local_image_ctx = m_state_builder->local_image_ctx; + std::shared_lock local_image_locker{local_image_ctx->image_lock}; + m_local_journal = local_image_ctx->journal; + if (m_local_journal == nullptr) { + local_image_locker.unlock(); + locker.unlock(); + + derr << "local image journal closed" << dendl; + handle_replay_flush(-EINVAL); + return; + } + + auto ctx = create_context_callback< + Replayer<I>, &Replayer<I>::handle_replay_flush>(this); + m_local_journal->start_external_replay(&m_local_journal_replay, ctx); +} + +template <typename I> +void Replayer<I>::handle_replay_flush(int r) { + std::unique_lock locker{m_lock}; + dout(10) << "r=" << r << dendl; + m_flush_tracker.finish_op(); + + if (r < 0) { + derr << "replay flush encountered an error: " << cpp_strerror(r) << dendl; + handle_replay_complete(locker, r, "replay flush encountered an error"); + m_event_replay_tracker.finish_op(); + return; + } else if (is_replay_complete(locker)) { + m_event_replay_tracker.finish_op(); + return; + } + + // check for resync/promotion state after adding listener + if (!add_local_journal_listener(locker)) { + m_event_replay_tracker.finish_op(); + return; + } + locker.unlock(); + + get_remote_tag(); +} + +template <typename I> +void Replayer<I>::get_remote_tag() { + dout(15) << "tag_tid: " << m_replay_tag_tid << dendl; + + Context *ctx = create_context_callback< + Replayer, &Replayer<I>::handle_get_remote_tag>(this); + m_state_builder->remote_journaler->get_tag(m_replay_tag_tid, &m_replay_tag, + ctx); +} + +template <typename I> +void Replayer<I>::handle_get_remote_tag(int r) { + dout(15) << "r=" << r << dendl; + + if (r == 0) { + try { + auto it = m_replay_tag.data.cbegin(); + decode(m_replay_tag_data, it); + } catch (const buffer::error &err) { + r = -EBADMSG; + } + } + + if (r < 0) { + derr << "failed to retrieve remote tag " << m_replay_tag_tid << ": " + << cpp_strerror(r) << dendl; + handle_replay_complete(r, "failed to retrieve remote tag"); + m_event_replay_tracker.finish_op(); + return; + } + + m_replay_tag_valid = true; + dout(15) << "decoded remote tag " << m_replay_tag_tid << ": " + << m_replay_tag_data << dendl; + + allocate_local_tag(); +} + +template <typename I> +void Replayer<I>::allocate_local_tag() { + dout(15) << dendl; + + std::string mirror_uuid = m_replay_tag_data.mirror_uuid; + if (mirror_uuid == librbd::Journal<>::LOCAL_MIRROR_UUID) { + mirror_uuid = m_state_builder->remote_mirror_uuid; + } else if (mirror_uuid == m_local_mirror_uuid) { + mirror_uuid = librbd::Journal<>::LOCAL_MIRROR_UUID; + } else if (mirror_uuid == librbd::Journal<>::ORPHAN_MIRROR_UUID) { + // handle possible edge condition where daemon can failover and + // the local image has already been promoted/demoted + auto local_tag_data = m_local_journal->get_tag_data(); + if (local_tag_data.mirror_uuid == librbd::Journal<>::ORPHAN_MIRROR_UUID && + (local_tag_data.predecessor.commit_valid && + local_tag_data.predecessor.mirror_uuid == + librbd::Journal<>::LOCAL_MIRROR_UUID)) { + dout(15) << "skipping stale demotion event" << dendl; + handle_process_entry_safe(m_replay_entry, m_replay_bytes, + m_replay_start_time, 0); + handle_replay_ready(); + return; + } else { + dout(5) << "encountered image demotion: stopping" << dendl; + handle_replay_complete(0, ""); + } + } + + librbd::journal::TagPredecessor predecessor(m_replay_tag_data.predecessor); + if (predecessor.mirror_uuid == librbd::Journal<>::LOCAL_MIRROR_UUID) { + predecessor.mirror_uuid = m_state_builder->remote_mirror_uuid; + } else if (predecessor.mirror_uuid == m_local_mirror_uuid) { + predecessor.mirror_uuid = librbd::Journal<>::LOCAL_MIRROR_UUID; + } + + dout(15) << "mirror_uuid=" << mirror_uuid << ", " + << "predecessor=" << predecessor << ", " + << "replay_tag_tid=" << m_replay_tag_tid << dendl; + Context *ctx = create_context_callback< + Replayer, &Replayer<I>::handle_allocate_local_tag>(this); + m_local_journal->allocate_tag(mirror_uuid, predecessor, ctx); +} + +template <typename I> +void Replayer<I>::handle_allocate_local_tag(int r) { + dout(15) << "r=" << r << ", " + << "tag_tid=" << m_local_journal->get_tag_tid() << dendl; + if (r < 0) { + derr << "failed to allocate journal tag: " << cpp_strerror(r) << dendl; + handle_replay_complete(r, "failed to allocate journal tag"); + m_event_replay_tracker.finish_op(); + return; + } + + preprocess_entry(); +} + +template <typename I> +void Replayer<I>::preprocess_entry() { + dout(20) << "preprocessing entry tid=" << m_replay_entry.get_commit_tid() + << dendl; + + bufferlist data = m_replay_entry.get_data(); + auto it = data.cbegin(); + int r = m_local_journal_replay->decode(&it, &m_event_entry); + if (r < 0) { + derr << "failed to decode journal event" << dendl; + handle_replay_complete(r, "failed to decode journal event"); + m_event_replay_tracker.finish_op(); + return; + } + + m_replay_bytes = data.length(); + uint32_t delay = calculate_replay_delay( + m_event_entry.timestamp, + m_state_builder->local_image_ctx->mirroring_replay_delay); + if (delay == 0) { + handle_preprocess_entry_ready(0); + return; + } + + std::unique_lock locker{m_lock}; + if (is_replay_complete(locker)) { + // don't schedule a delayed replay task if a shut-down is in-progress + m_event_replay_tracker.finish_op(); + return; + } + + dout(20) << "delaying replay by " << delay << " sec" << dendl; + std::unique_lock timer_locker{m_threads->timer_lock}; + ceph_assert(m_delayed_preprocess_task == nullptr); + m_delayed_preprocess_task = create_context_callback< + Replayer<I>, &Replayer<I>::handle_delayed_preprocess_task>(this); + m_threads->timer->add_event_after(delay, m_delayed_preprocess_task); +} + +template <typename I> +void Replayer<I>::handle_delayed_preprocess_task(int r) { + dout(20) << "r=" << r << dendl; + + ceph_assert(ceph_mutex_is_locked_by_me(m_threads->timer_lock)); + m_delayed_preprocess_task = nullptr; + + m_threads->work_queue->queue(create_context_callback< + Replayer, &Replayer<I>::handle_preprocess_entry_ready>(this), 0); +} + +template <typename I> +void Replayer<I>::handle_preprocess_entry_ready(int r) { + dout(20) << "r=" << r << dendl; + ceph_assert(r == 0); + + m_replay_start_time = ceph_clock_now(); + if (!m_event_preprocessor->is_required(m_event_entry)) { + process_entry(); + return; + } + + Context *ctx = create_context_callback< + Replayer, &Replayer<I>::handle_preprocess_entry_safe>(this); + m_event_preprocessor->preprocess(&m_event_entry, ctx); +} + +template <typename I> +void Replayer<I>::handle_preprocess_entry_safe(int r) { + dout(20) << "r=" << r << dendl; + + if (r < 0) { + if (r == -ECANCELED) { + handle_replay_complete(0, "lost exclusive lock"); + } else { + derr << "failed to preprocess journal event" << dendl; + handle_replay_complete(r, "failed to preprocess journal event"); + } + + m_event_replay_tracker.finish_op(); + return; + } + + process_entry(); +} + +template <typename I> +void Replayer<I>::process_entry() { + dout(20) << "processing entry tid=" << m_replay_entry.get_commit_tid() + << dendl; + + Context *on_ready = create_context_callback< + Replayer, &Replayer<I>::handle_process_entry_ready>(this); + Context *on_commit = new C_ReplayCommitted(this, std::move(m_replay_entry), + m_replay_bytes, + m_replay_start_time); + + m_local_journal_replay->process(m_event_entry, on_ready, on_commit); +} + +template <typename I> +void Replayer<I>::handle_process_entry_ready(int r) { + std::unique_lock locker{m_lock}; + + dout(20) << dendl; + ceph_assert(r == 0); + + bool update_status = false; + { + auto local_image_ctx = m_state_builder->local_image_ctx; + std::shared_lock image_locker{local_image_ctx->image_lock}; + auto image_spec = util::compute_image_spec(local_image_ctx->md_ctx, + local_image_ctx->name); + if (m_image_spec != image_spec) { + m_image_spec = image_spec; + update_status = true; + } + } + + m_replay_status_formatter->handle_entry_processed(m_replay_bytes); + + if (update_status) { + unregister_perf_counters(); + register_perf_counters(); + notify_status_updated(); + } + + // attempt to process the next event + handle_replay_ready(locker); +} + +template <typename I> +void Replayer<I>::handle_process_entry_safe( + const ReplayEntry &replay_entry, uint64_t replay_bytes, + const utime_t &replay_start_time, int r) { + dout(20) << "commit_tid=" << replay_entry.get_commit_tid() << ", r=" << r + << dendl; + + if (r < 0) { + derr << "failed to commit journal event: " << cpp_strerror(r) << dendl; + handle_replay_complete(r, "failed to commit journal event"); + } else { + ceph_assert(m_state_builder->remote_journaler != nullptr); + m_state_builder->remote_journaler->committed(replay_entry); + } + + auto latency = ceph_clock_now() - replay_start_time; + if (g_journal_perf_counters) { + g_journal_perf_counters->inc(l_rbd_mirror_journal_entries); + g_journal_perf_counters->inc(l_rbd_mirror_journal_replay_bytes, + replay_bytes); + g_journal_perf_counters->tinc(l_rbd_mirror_journal_replay_latency, + latency); + } + + auto ctx = new LambdaContext( + [this, replay_bytes, latency](int r) { + std::unique_lock locker{m_lock}; + schedule_flush_local_replay_task(); + + if (m_perf_counters) { + m_perf_counters->inc(l_rbd_mirror_journal_entries); + m_perf_counters->inc(l_rbd_mirror_journal_replay_bytes, replay_bytes); + m_perf_counters->tinc(l_rbd_mirror_journal_replay_latency, latency); + } + + m_event_replay_tracker.finish_op(); + }); + m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +void Replayer<I>::handle_resync_image() { + dout(10) << dendl; + + std::unique_lock locker{m_lock}; + m_resync_requested = true; + handle_replay_complete(locker, 0, "resync requested"); +} + +template <typename I> +void Replayer<I>::notify_status_updated() { + ceph_assert(ceph_mutex_is_locked_by_me(m_lock)); + + dout(10) << dendl; + + auto ctx = new C_TrackedOp(m_in_flight_op_tracker, new LambdaContext( + [this](int) { + m_replayer_listener->handle_notification(); + })); + m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +void Replayer<I>::cancel_delayed_preprocess_task() { + ceph_assert(ceph_mutex_is_locked_by_me(m_lock)); + + bool canceled_delayed_preprocess_task = false; + { + std::unique_lock timer_locker{m_threads->timer_lock}; + if (m_delayed_preprocess_task != nullptr) { + dout(10) << dendl; + canceled_delayed_preprocess_task = m_threads->timer->cancel_event( + m_delayed_preprocess_task); + ceph_assert(canceled_delayed_preprocess_task); + m_delayed_preprocess_task = nullptr; + } + } + + if (canceled_delayed_preprocess_task) { + // wake up sleeping replay + m_event_replay_tracker.finish_op(); + } +} + +template <typename I> +int Replayer<I>::validate_remote_client_state( + const cls::journal::Client& remote_client, + librbd::journal::MirrorPeerClientMeta* remote_client_meta, + bool* resync_requested, std::string* error) { + ceph_assert(ceph_mutex_is_locked_by_me(m_lock)); + + if (!util::decode_client_meta(remote_client, remote_client_meta)) { + // require operator intervention since the data is corrupt + *error = "error retrieving remote journal client"; + return -EBADMSG; + } + + auto local_image_ctx = m_state_builder->local_image_ctx; + dout(5) << "image_id=" << local_image_ctx->id << ", " + << "remote_client_meta.image_id=" + << remote_client_meta->image_id << ", " + << "remote_client.state=" << remote_client.state << dendl; + if (remote_client_meta->image_id == local_image_ctx->id && + remote_client.state != cls::journal::CLIENT_STATE_CONNECTED) { + dout(5) << "client flagged disconnected, stopping image replay" << dendl; + if (local_image_ctx->config.template get_val<bool>( + "rbd_mirroring_resync_after_disconnect")) { + dout(10) << "disconnected: automatic resync" << dendl; + *resync_requested = true; + *error = "disconnected: automatic resync"; + return -ENOTCONN; + } else { + dout(10) << "disconnected" << dendl; + *error = "disconnected"; + return -ENOTCONN; + } + } + + return 0; +} + +template <typename I> +void Replayer<I>::register_perf_counters() { + dout(5) << dendl; + + ceph_assert(ceph_mutex_is_locked_by_me(m_lock)); + ceph_assert(m_perf_counters == nullptr); + + auto cct = static_cast<CephContext *>(m_state_builder->local_image_ctx->cct); + auto prio = cct->_conf.get_val<int64_t>("rbd_mirror_image_perf_stats_prio"); + + auto local_image_ctx = m_state_builder->local_image_ctx; + std::string labels = ceph::perf_counters::key_create( + "rbd_mirror_journal_image", + {{"pool", local_image_ctx->md_ctx.get_pool_name()}, + {"namespace", local_image_ctx->md_ctx.get_namespace()}, + {"image", local_image_ctx->name}}); + + PerfCountersBuilder plb(g_ceph_context, labels, l_rbd_mirror_journal_first, + l_rbd_mirror_journal_last); + plb.add_u64_counter(l_rbd_mirror_journal_entries, "entries", + "Number of entries replayed", nullptr, prio); + plb.add_u64_counter(l_rbd_mirror_journal_replay_bytes, "replay_bytes", + "Total bytes replayed", nullptr, prio, + unit_t(UNIT_BYTES)); + plb.add_time_avg(l_rbd_mirror_journal_replay_latency, "replay_latency", + "Replay latency", nullptr, prio); + m_perf_counters = plb.create_perf_counters(); + g_ceph_context->get_perfcounters_collection()->add(m_perf_counters); +} + +template <typename I> +void Replayer<I>::unregister_perf_counters() { + dout(5) << dendl; + ceph_assert(ceph_mutex_is_locked_by_me(m_lock)); + + PerfCounters *perf_counters = nullptr; + std::swap(perf_counters, m_perf_counters); + + if (perf_counters != nullptr) { + g_ceph_context->get_perfcounters_collection()->remove(perf_counters); + delete perf_counters; + } +} + +} // namespace journal +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::journal::Replayer<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/journal/Replayer.h b/src/tools/rbd_mirror/image_replayer/journal/Replayer.h new file mode 100644 index 000000000..6b1f36d9c --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/journal/Replayer.h @@ -0,0 +1,323 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_REPLAYER_H +#define RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_REPLAYER_H + +#include "tools/rbd_mirror/image_replayer/Replayer.h" +#include "include/utime.h" +#include "common/AsyncOpTracker.h" +#include "common/ceph_mutex.h" +#include "common/RefCountedObj.h" +#include "cls/journal/cls_journal_types.h" +#include "journal/ReplayEntry.h" +#include "librbd/ImageCtx.h" +#include "librbd/journal/Types.h" +#include "librbd/journal/TypeTraits.h" +#include <string> +#include <type_traits> + +namespace journal { class Journaler; } +namespace librbd { + +struct ImageCtx; +namespace journal { template <typename I> class Replay; } + +} // namespace librbd + +namespace rbd { +namespace mirror { + +template <typename> struct Threads; + +namespace image_replayer { + +struct ReplayerListener; + +namespace journal { + +template <typename> class EventPreprocessor; +template <typename> class ReplayStatusFormatter; +template <typename> class StateBuilder; + +template <typename ImageCtxT> +class Replayer : public image_replayer::Replayer { +public: + typedef typename librbd::journal::TypeTraits<ImageCtxT>::Journaler Journaler; + + static Replayer* create( + Threads<ImageCtxT>* threads, + const std::string& local_mirror_uuid, + StateBuilder<ImageCtxT>* state_builder, + ReplayerListener* replayer_listener) { + return new Replayer(threads, local_mirror_uuid, state_builder, + replayer_listener); + } + + Replayer( + Threads<ImageCtxT>* threads, + const std::string& local_mirror_uuid, + StateBuilder<ImageCtxT>* state_builder, + ReplayerListener* replayer_listener); + ~Replayer(); + + void destroy() override { + delete this; + } + + void init(Context* on_finish) override; + void shut_down(Context* on_finish) override; + + void flush(Context* on_finish) override; + + bool get_replay_status(std::string* description, Context* on_finish) override; + + bool is_replaying() const override { + std::unique_lock locker{m_lock}; + return (m_state == STATE_REPLAYING); + } + + bool is_resync_requested() const override { + std::unique_lock locker(m_lock); + return m_resync_requested; + } + + int get_error_code() const override { + std::unique_lock locker(m_lock); + return m_error_code; + } + + std::string get_error_description() const override { + std::unique_lock locker(m_lock); + return m_error_description; + } + + std::string get_image_spec() const { + std::unique_lock locker(m_lock); + return m_image_spec; + } + +private: + /** + * @verbatim + * + * <init> + * | + * v (error) + * INIT_REMOTE_JOURNALER * * * * * * * * * * * * * * * * * * * + * | * + * v (error) * + * START_EXTERNAL_REPLAY * * * * * * * * * * * * * * * * * * * + * | * + * | /--------------------------------------------\ * + * | | | * + * v v (asok flush) | * + * REPLAYING -------------> LOCAL_REPLAY_FLUSH | * + * | \ | | * + * | | v | * + * | | FLUSH_COMMIT_POSITION | * + * | | | | * + * | | \--------------------/| * + * | | | * + * | | (entries available) | * + * | \-----------> REPLAY_READY | * + * | | | * + * | | (skip if not | * + * | v needed) (error) * + * | REPLAY_FLUSH * * * * * * * * * * + * | | | * * + * | | (skip if not | * * + * | v needed) (error) * * + * | GET_REMOTE_TAG * * * * * * * * * + * | | | * * + * | | (skip if not | * * + * | v needed) (error) * * + * | ALLOCATE_LOCAL_TAG * * * * * * * + * | | | * * + * | v (error) * * + * | PREPROCESS_ENTRY * * * * * * * * + * | | | * * + * | v (error) * * + * | PROCESS_ENTRY * * * * * * * * * * + * | | | * * + * | \---------------------/ * * + * v (shutdown) * * + * REPLAY_COMPLETE < * * * * * * * * * * * * * * * * * * * * + * | * + * v * + * WAIT_FOR_FLUSH * + * | * + * v * + * SHUT_DOWN_LOCAL_JOURNAL_REPLAY * + * | * + * v * + * WAIT_FOR_REPLAY * + * | * + * v * + * CLOSE_LOCAL_IMAGE < * * * * * * * * * * * * * * * * * * * * + * | + * v (skip if not started) + * STOP_REMOTE_JOURNALER_REPLAY + * | + * v + * WAIT_FOR_IN_FLIGHT_OPS + * | + * v + * <shutdown> + * + * @endverbatim + */ + + typedef typename librbd::journal::TypeTraits<ImageCtxT>::ReplayEntry ReplayEntry; + + enum State { + STATE_INIT, + STATE_REPLAYING, + STATE_COMPLETE + }; + + struct C_ReplayCommitted; + struct RemoteJournalerListener; + struct RemoteReplayHandler; + struct LocalJournalListener; + + Threads<ImageCtxT>* m_threads; + std::string m_local_mirror_uuid; + StateBuilder<ImageCtxT>* m_state_builder; + ReplayerListener* m_replayer_listener; + + mutable ceph::mutex m_lock; + + std::string m_image_spec; + Context* m_on_init_shutdown = nullptr; + + State m_state = STATE_INIT; + int m_error_code = 0; + std::string m_error_description; + bool m_resync_requested = false; + + ceph::ref_t<typename std::remove_pointer<decltype(ImageCtxT::journal)>::type> + m_local_journal; + RemoteJournalerListener* m_remote_listener = nullptr; + + librbd::journal::Replay<ImageCtxT>* m_local_journal_replay = nullptr; + EventPreprocessor<ImageCtxT>* m_event_preprocessor = nullptr; + ReplayStatusFormatter<ImageCtxT>* m_replay_status_formatter = nullptr; + RemoteReplayHandler* m_remote_replay_handler = nullptr; + LocalJournalListener* m_local_journal_listener = nullptr; + + PerfCounters *m_perf_counters = nullptr; + + ReplayEntry m_replay_entry; + uint64_t m_replay_bytes = 0; + utime_t m_replay_start_time; + bool m_replay_tag_valid = false; + uint64_t m_replay_tag_tid = 0; + cls::journal::Tag m_replay_tag; + librbd::journal::TagData m_replay_tag_data; + librbd::journal::EventEntry m_event_entry; + + AsyncOpTracker m_flush_tracker; + + AsyncOpTracker m_event_replay_tracker; + Context *m_delayed_preprocess_task = nullptr; + + AsyncOpTracker m_in_flight_op_tracker; + Context *m_flush_local_replay_task = nullptr; + + void handle_remote_journal_metadata_updated(); + + void schedule_flush_local_replay_task(); + void cancel_flush_local_replay_task(); + void handle_flush_local_replay_task(int r); + + void flush_local_replay(Context* on_flush); + void handle_flush_local_replay(Context* on_flush, int r); + + void flush_commit_position(Context* on_flush); + void handle_flush_commit_position(Context* on_flush, int r); + + void init_remote_journaler(); + void handle_init_remote_journaler(int r); + + void start_external_replay(std::unique_lock<ceph::mutex>& locker); + void handle_start_external_replay(int r); + + bool add_local_journal_listener(std::unique_lock<ceph::mutex>& locker); + + bool notify_init_complete(std::unique_lock<ceph::mutex>& locker); + + void wait_for_flush(); + void handle_wait_for_flush(int r); + + void shut_down_local_journal_replay(); + void handle_shut_down_local_journal_replay(int r); + + void wait_for_event_replay(); + void handle_wait_for_event_replay(int r); + + void close_local_image(); + void handle_close_local_image(int r); + + void stop_remote_journaler_replay(); + void handle_stop_remote_journaler_replay(int r); + + void wait_for_in_flight_ops(); + void handle_wait_for_in_flight_ops(int r); + + void replay_flush(); + void handle_replay_flush_shut_down(int r); + void handle_replay_flush(int r); + + void get_remote_tag(); + void handle_get_remote_tag(int r); + + void allocate_local_tag(); + void handle_allocate_local_tag(int r); + + void handle_replay_error(int r, const std::string &error); + + bool is_replay_complete() const; + bool is_replay_complete(const std::unique_lock<ceph::mutex>& locker) const; + + void handle_replay_complete(int r, const std::string &error_desc); + void handle_replay_complete(const std::unique_lock<ceph::mutex>&, + int r, const std::string &error_desc); + void handle_replay_ready(); + void handle_replay_ready(std::unique_lock<ceph::mutex>& locker); + + void preprocess_entry(); + void handle_delayed_preprocess_task(int r); + void handle_preprocess_entry_ready(int r); + void handle_preprocess_entry_safe(int r); + + void process_entry(); + void handle_process_entry_ready(int r); + void handle_process_entry_safe(const ReplayEntry& replay_entry, + uint64_t relay_bytes, + const utime_t &replay_start_time, int r); + + void handle_resync_image(); + + void notify_status_updated(); + + void cancel_delayed_preprocess_task(); + + int validate_remote_client_state( + const cls::journal::Client& remote_client, + librbd::journal::MirrorPeerClientMeta* remote_client_meta, + bool* resync_requested, std::string* error); + + void register_perf_counters(); + void unregister_perf_counters(); + +}; + +} // namespace journal +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::journal::Replayer<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_REPLAYER_H diff --git a/src/tools/rbd_mirror/image_replayer/journal/StateBuilder.cc b/src/tools/rbd_mirror/image_replayer/journal/StateBuilder.cc new file mode 100644 index 000000000..5f1fb0e2f --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/journal/StateBuilder.cc @@ -0,0 +1,149 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "StateBuilder.h" +#include "include/ceph_assert.h" +#include "include/Context.h" +#include "common/debug.h" +#include "common/errno.h" +#include "journal/Journaler.h" +#include "librbd/ImageCtx.h" +#include "librbd/Journal.h" +#include "tools/rbd_mirror/image_replayer/journal/CreateLocalImageRequest.h" +#include "tools/rbd_mirror/image_replayer/journal/PrepareReplayRequest.h" +#include "tools/rbd_mirror/image_replayer/journal/Replayer.h" +#include "tools/rbd_mirror/image_replayer/journal/SyncPointHandler.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::journal::" \ + << "StateBuilder: " << this << " " \ + << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace image_replayer { +namespace journal { + +template <typename I> +StateBuilder<I>::StateBuilder(const std::string& global_image_id) + : image_replayer::StateBuilder<I>(global_image_id) { +} + +template <typename I> +StateBuilder<I>::~StateBuilder() { + ceph_assert(remote_journaler == nullptr); +} + +template <typename I> +void StateBuilder<I>::close(Context* on_finish) { + dout(10) << dendl; + + // close the remote journaler after closing the local image + // in case we have lost contact w/ the remote cluster and + // will block + on_finish = new LambdaContext([this, on_finish](int) { + shut_down_remote_journaler(on_finish); + }); + on_finish = new LambdaContext([this, on_finish](int) { + this->close_local_image(on_finish); + }); + this->close_remote_image(on_finish); +} + +template <typename I> +bool StateBuilder<I>::is_disconnected() const { + return (remote_client_state == cls::journal::CLIENT_STATE_DISCONNECTED); +} + +template <typename I> +bool StateBuilder<I>::is_linked_impl() const { + ceph_assert(!this->remote_mirror_uuid.empty()); + return (local_primary_mirror_uuid == this->remote_mirror_uuid); +} + +template <typename I> +cls::rbd::MirrorImageMode StateBuilder<I>::get_mirror_image_mode() const { + return cls::rbd::MIRROR_IMAGE_MODE_JOURNAL; +} + +template <typename I> +image_sync::SyncPointHandler* StateBuilder<I>::create_sync_point_handler() { + dout(10) << dendl; + + this->m_sync_point_handler = SyncPointHandler<I>::create(this); + return this->m_sync_point_handler; +} + +template <typename I> +BaseRequest* StateBuilder<I>::create_local_image_request( + Threads<I>* threads, + librados::IoCtx& local_io_ctx, + const std::string& global_image_id, + PoolMetaCache* pool_meta_cache, + ProgressContext* progress_ctx, + Context* on_finish) { + return CreateLocalImageRequest<I>::create( + threads, local_io_ctx, this->remote_image_ctx, this->global_image_id, + pool_meta_cache, progress_ctx, this, on_finish); +} + +template <typename I> +BaseRequest* StateBuilder<I>::create_prepare_replay_request( + const std::string& local_mirror_uuid, + ProgressContext* progress_ctx, + bool* resync_requested, + bool* syncing, + Context* on_finish) { + return PrepareReplayRequest<I>::create( + local_mirror_uuid, progress_ctx, this, resync_requested, syncing, + on_finish); +} + +template <typename I> +image_replayer::Replayer* StateBuilder<I>::create_replayer( + Threads<I>* threads, + InstanceWatcher<I>* instance_watcher, + const std::string& local_mirror_uuid, + PoolMetaCache* pool_meta_cache, + ReplayerListener* replayer_listener) { + return Replayer<I>::create( + threads, local_mirror_uuid, this, replayer_listener); +} + +template <typename I> +void StateBuilder<I>::shut_down_remote_journaler(Context* on_finish) { + if (remote_journaler == nullptr) { + on_finish->complete(0); + return; + } + + dout(10) << dendl; + auto ctx = new LambdaContext([this, on_finish](int r) { + handle_shut_down_remote_journaler(r, on_finish); + }); + remote_journaler->shut_down(ctx); +} + +template <typename I> +void StateBuilder<I>::handle_shut_down_remote_journaler(int r, + Context* on_finish) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to shut down remote journaler: " << cpp_strerror(r) + << dendl; + } + + delete remote_journaler; + remote_journaler = nullptr; + on_finish->complete(r); +} + +} // namespace journal +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::journal::StateBuilder<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/journal/StateBuilder.h b/src/tools/rbd_mirror/image_replayer/journal/StateBuilder.h new file mode 100644 index 000000000..790d1390b --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/journal/StateBuilder.h @@ -0,0 +1,94 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_STATE_BUILDER_H +#define CEPH_RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_STATE_BUILDER_H + +#include "tools/rbd_mirror/image_replayer/StateBuilder.h" +#include "cls/journal/cls_journal_types.h" +#include "librbd/journal/Types.h" +#include "librbd/journal/TypeTraits.h" +#include <string> + +struct Context; + +namespace librbd { struct ImageCtx; } + +namespace rbd { +namespace mirror { +namespace image_replayer { +namespace journal { + +template <typename> class SyncPointHandler; + +template <typename ImageCtxT> +class StateBuilder : public image_replayer::StateBuilder<ImageCtxT> { +public: + typedef librbd::journal::TypeTraits<ImageCtxT> TypeTraits; + typedef typename TypeTraits::Journaler Journaler; + + static StateBuilder* create(const std::string& global_image_id) { + return new StateBuilder(global_image_id); + } + + StateBuilder(const std::string& global_image_id); + ~StateBuilder() override; + + void close(Context* on_finish) override; + + bool is_disconnected() const override; + + cls::rbd::MirrorImageMode get_mirror_image_mode() const override; + + image_sync::SyncPointHandler* create_sync_point_handler() override; + + bool replay_requires_remote_image() const override { + return false; + } + + BaseRequest* create_local_image_request( + Threads<ImageCtxT>* threads, + librados::IoCtx& local_io_ctx, + const std::string& global_image_id, + PoolMetaCache* pool_meta_cache, + ProgressContext* progress_ctx, + Context* on_finish) override; + + BaseRequest* create_prepare_replay_request( + const std::string& local_mirror_uuid, + ProgressContext* progress_ctx, + bool* resync_requested, + bool* syncing, + Context* on_finish) override; + + image_replayer::Replayer* create_replayer( + Threads<ImageCtxT>* threads, + InstanceWatcher<ImageCtxT>* instance_watcher, + const std::string& local_mirror_uuid, + PoolMetaCache* pool_meta_cache, + ReplayerListener* replayer_listener) override; + + std::string local_primary_mirror_uuid; + + Journaler* remote_journaler = nullptr; + cls::journal::ClientState remote_client_state = + cls::journal::CLIENT_STATE_CONNECTED; + librbd::journal::MirrorPeerClientMeta remote_client_meta; + + SyncPointHandler<ImageCtxT>* sync_point_handler = nullptr; + +private: + bool is_linked_impl() const override; + + void shut_down_remote_journaler(Context* on_finish); + void handle_shut_down_remote_journaler(int r, Context* on_finish); +}; + +} // namespace journal +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::journal::StateBuilder<librbd::ImageCtx>; + +#endif // CEPH_RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_STATE_BUILDER_H diff --git a/src/tools/rbd_mirror/image_replayer/journal/SyncPointHandler.cc b/src/tools/rbd_mirror/image_replayer/journal/SyncPointHandler.cc new file mode 100644 index 000000000..66d13e555 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/journal/SyncPointHandler.cc @@ -0,0 +1,109 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "SyncPointHandler.h" +#include "StateBuilder.h" +#include "include/ceph_assert.h" +#include "include/Context.h" +#include "common/debug.h" +#include "common/errno.h" +#include "journal/Journaler.h" +#include "librbd/ImageCtx.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::journal::" \ + << "SyncPointHandler: " << this << " " \ + << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace image_replayer { +namespace journal { + +template <typename I> +SyncPointHandler<I>::SyncPointHandler(StateBuilder<I>* state_builder) + : m_state_builder(state_builder), + m_client_meta_copy(state_builder->remote_client_meta) { +} + +template <typename I> +typename SyncPointHandler<I>::SyncPoints +SyncPointHandler<I>::get_sync_points() const { + SyncPoints sync_points; + for (auto& sync_point : m_client_meta_copy.sync_points) { + sync_points.emplace_back( + sync_point.snap_namespace, + sync_point.snap_name, + sync_point.from_snap_name, + sync_point.object_number); + } + return sync_points; +} + +template <typename I> +librbd::SnapSeqs SyncPointHandler<I>::get_snap_seqs() const { + return m_client_meta_copy.snap_seqs; +} + +template <typename I> +void SyncPointHandler<I>::update_sync_points( + const librbd::SnapSeqs& snap_seqs, const SyncPoints& sync_points, + bool sync_complete, Context* on_finish) { + dout(10) << dendl; + + if (sync_complete && sync_points.empty()) { + m_client_meta_copy.state = librbd::journal::MIRROR_PEER_STATE_REPLAYING; + } + + m_client_meta_copy.snap_seqs = snap_seqs; + m_client_meta_copy.sync_points.clear(); + for (auto& sync_point : sync_points) { + m_client_meta_copy.sync_points.emplace_back( + sync_point.snap_namespace, + sync_point.snap_name, + sync_point.from_snap_name, + sync_point.object_number); + + if (sync_point.object_number) { + m_client_meta_copy.sync_object_count = std::max( + m_client_meta_copy.sync_object_count, *sync_point.object_number + 1); + } + } + + dout(20) << "client_meta=" << m_client_meta_copy << dendl; + bufferlist client_data_bl; + librbd::journal::ClientData client_data{m_client_meta_copy}; + encode(client_data, client_data_bl); + + auto ctx = new LambdaContext([this, on_finish](int r) { + handle_update_sync_points(r, on_finish); + }); + m_state_builder->remote_journaler->update_client(client_data_bl, ctx); +} + +template <typename I> +void SyncPointHandler<I>::handle_update_sync_points(int r, Context* on_finish) { + dout(10) << "r=" << r << dendl; + + if (r >= 0) { + m_state_builder->remote_client_meta.snap_seqs = + m_client_meta_copy.snap_seqs; + m_state_builder->remote_client_meta.sync_points = + m_client_meta_copy.sync_points; + } else { + derr << "failed to update remote journal client meta for image " + << m_state_builder->global_image_id << ": " << cpp_strerror(r) + << dendl; + } + + on_finish->complete(r); +} + +} // namespace journal +} // namespace image_sync +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::journal::SyncPointHandler<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/journal/SyncPointHandler.h b/src/tools/rbd_mirror/image_replayer/journal/SyncPointHandler.h new file mode 100644 index 000000000..b4f492c19 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/journal/SyncPointHandler.h @@ -0,0 +1,55 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_SYNC_POINT_HANDLER_H +#define RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_SYNC_POINT_HANDLER_H + +#include "tools/rbd_mirror/image_sync/Types.h" +#include "librbd/journal/Types.h" + +struct Context; +namespace librbd { struct ImageCtx; } + +namespace rbd { +namespace mirror { +namespace image_replayer { +namespace journal { + +template <typename> class StateBuilder; + +template <typename ImageCtxT> +class SyncPointHandler : public image_sync::SyncPointHandler { +public: + using SyncPoint = image_sync::SyncPoint; + using SyncPoints = image_sync::SyncPoints; + + static SyncPointHandler* create(StateBuilder<ImageCtxT>* state_builder) { + return new SyncPointHandler(state_builder); + } + SyncPointHandler(StateBuilder<ImageCtxT>* state_builder); + + SyncPoints get_sync_points() const override; + librbd::SnapSeqs get_snap_seqs() const override; + + void update_sync_points(const librbd::SnapSeqs& snap_seqs, + const SyncPoints& sync_points, + bool sync_complete, + Context* on_finish) override; + +private: + StateBuilder<ImageCtxT>* m_state_builder; + + librbd::journal::MirrorPeerClientMeta m_client_meta_copy; + + void handle_update_sync_points(int r, Context* on_finish); + +}; + +} // namespace journal +} // namespace image_sync +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::journal::SyncPointHandler<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_SYNC_POINT_HANDLER_H diff --git a/src/tools/rbd_mirror/image_replayer/snapshot/ApplyImageStateRequest.cc b/src/tools/rbd_mirror/image_replayer/snapshot/ApplyImageStateRequest.cc new file mode 100644 index 000000000..75881307c --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/snapshot/ApplyImageStateRequest.cc @@ -0,0 +1,658 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "ApplyImageStateRequest.h" +#include "common/debug.h" +#include "common/errno.h" +#include "cls/rbd/cls_rbd_client.h" +#include "librbd/ImageCtx.h" +#include "librbd/Operations.h" +#include "librbd/Utils.h" +#include "librbd/image/GetMetadataRequest.h" +#include "tools/rbd_mirror/image_replayer/snapshot/Utils.h" +#include <boost/algorithm/string/predicate.hpp> + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::snapshot::" \ + << "ApplyImageStateRequest: " << this << " " \ + << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace image_replayer { +namespace snapshot { + +using librbd::util::create_context_callback; +using librbd::util::create_rados_callback; + +template <typename I> +ApplyImageStateRequest<I>::ApplyImageStateRequest( + const std::string& local_mirror_uuid, + const std::string& remote_mirror_uuid, + I* local_image_ctx, + I* remote_image_ctx, + librbd::mirror::snapshot::ImageState image_state, + Context* on_finish) + : m_local_mirror_uuid(local_mirror_uuid), + m_remote_mirror_uuid(remote_mirror_uuid), + m_local_image_ctx(local_image_ctx), + m_remote_image_ctx(remote_image_ctx), + m_image_state(image_state), + m_on_finish(on_finish) { + dout(15) << "image_state=" << m_image_state << dendl; + + std::shared_lock image_locker{m_local_image_ctx->image_lock}; + m_features = m_local_image_ctx->features & ~RBD_FEATURES_IMPLICIT_ENABLE; + compute_local_to_remote_snap_ids(); +} + +template <typename I> +void ApplyImageStateRequest<I>::send() { + rename_image(); +} + +template <typename I> +void ApplyImageStateRequest<I>::rename_image() { + std::shared_lock owner_locker{m_local_image_ctx->owner_lock}; + std::shared_lock image_locker{m_local_image_ctx->image_lock}; + if (m_local_image_ctx->name == m_image_state.name) { + image_locker.unlock(); + owner_locker.unlock(); + + update_features(); + return; + } + image_locker.unlock(); + + dout(15) << "local_image_name=" << m_local_image_ctx->name << ", " + << "remote_image_name=" << m_image_state.name << dendl; + + auto ctx = create_context_callback< + ApplyImageStateRequest<I>, + &ApplyImageStateRequest<I>::handle_rename_image>(this); + m_local_image_ctx->operations->execute_rename(m_image_state.name, ctx); +} + +template <typename I> +void ApplyImageStateRequest<I>::handle_rename_image(int r) { + dout(15) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to rename image to '" << m_image_state.name << "': " + << cpp_strerror(r) << dendl; + finish(r); + return; + } + + update_features(); +} + +template <typename I> +void ApplyImageStateRequest<I>::update_features() { + uint64_t feature_updates = 0UL; + bool enabled = false; + + auto image_state_features = + m_image_state.features & ~RBD_FEATURES_IMPLICIT_ENABLE; + feature_updates = (m_features & ~image_state_features); + if (feature_updates == 0UL) { + feature_updates = (image_state_features & ~m_features); + enabled = (feature_updates != 0UL); + } + + if (feature_updates == 0UL) { + get_image_meta(); + return; + } + + dout(15) << "image_features=" << m_features << ", " + << "state_features=" << image_state_features << ", " + << "feature_updates=" << feature_updates << ", " + << "enabled=" << enabled << dendl; + + if (enabled) { + m_features |= feature_updates; + } else { + m_features &= ~feature_updates; + } + + std::shared_lock owner_lock{m_local_image_ctx->owner_lock}; + auto ctx = create_context_callback< + ApplyImageStateRequest<I>, + &ApplyImageStateRequest<I>::handle_update_features>(this); + m_local_image_ctx->operations->execute_update_features( + feature_updates, enabled, ctx, 0U); +} + +template <typename I> +void ApplyImageStateRequest<I>::handle_update_features(int r) { + dout(15) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to update image features: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + update_features(); +} + +template <typename I> +void ApplyImageStateRequest<I>::get_image_meta() { + dout(15) << dendl; + + auto ctx = create_context_callback< + ApplyImageStateRequest<I>, + &ApplyImageStateRequest<I>::handle_get_image_meta>(this); + auto req = librbd::image::GetMetadataRequest<I>::create( + m_local_image_ctx->md_ctx, m_local_image_ctx->header_oid, true, "", "", 0U, + &m_metadata, ctx); + req->send(); +} + +template <typename I> +void ApplyImageStateRequest<I>::handle_get_image_meta(int r) { + dout(15) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to fetch local image metadata: " << cpp_strerror(r) + << dendl; + finish(r); + return; + } + + update_image_meta(); +} + +template <typename I> +void ApplyImageStateRequest<I>::update_image_meta() { + std::set<std::string> keys_to_remove; + for (const auto& [key, value] : m_metadata) { + if (m_image_state.metadata.count(key) == 0) { + dout(15) << "removing image-meta key '" << key << "'" << dendl; + keys_to_remove.insert(key); + } + } + + std::map<std::string, bufferlist> metadata_to_update; + for (const auto& [key, value] : m_image_state.metadata) { + auto it = m_metadata.find(key); + if (it == m_metadata.end() || !it->second.contents_equal(value)) { + dout(15) << "updating image-meta key '" << key << "'" << dendl; + metadata_to_update.insert({key, value}); + } + } + + if (keys_to_remove.empty() && metadata_to_update.empty()) { + unprotect_snapshot(); + return; + } + + dout(15) << dendl; + + librados::ObjectWriteOperation op; + for (const auto& key : keys_to_remove) { + librbd::cls_client::metadata_remove(&op, key); + } + if (!metadata_to_update.empty()) { + librbd::cls_client::metadata_set(&op, metadata_to_update); + } + + auto aio_comp = create_rados_callback< + ApplyImageStateRequest<I>, + &ApplyImageStateRequest<I>::handle_update_image_meta>(this); + int r = m_local_image_ctx->md_ctx.aio_operate(m_local_image_ctx->header_oid, aio_comp, + &op); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void ApplyImageStateRequest<I>::handle_update_image_meta(int r) { + dout(15) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to update image metadata: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + m_metadata.clear(); + + m_prev_snap_id = CEPH_NOSNAP; + unprotect_snapshot(); +} + +template <typename I> +void ApplyImageStateRequest<I>::unprotect_snapshot() { + std::shared_lock image_locker{m_local_image_ctx->image_lock}; + + auto snap_it = m_local_image_ctx->snap_info.begin(); + if (m_prev_snap_id != CEPH_NOSNAP) { + snap_it = m_local_image_ctx->snap_info.upper_bound(m_prev_snap_id); + } + + for (; snap_it != m_local_image_ctx->snap_info.end(); ++snap_it) { + auto snap_id = snap_it->first; + const auto& snap_info = snap_it->second; + + auto user_ns = std::get_if<cls::rbd::UserSnapshotNamespace>( + &snap_info.snap_namespace); + if (user_ns == nullptr) { + dout(20) << "snapshot " << snap_id << " is not a user snapshot" << dendl; + continue; + } + + if (snap_info.protection_status == RBD_PROTECTION_STATUS_UNPROTECTED) { + dout(20) << "snapshot " << snap_id << " is already unprotected" << dendl; + continue; + } + + auto snap_id_map_it = m_local_to_remote_snap_ids.find(snap_id); + if (snap_id_map_it == m_local_to_remote_snap_ids.end()) { + dout(15) << "snapshot " << snap_id << " does not exist in remote image" + << dendl; + break; + } + + auto remote_snap_id = snap_id_map_it->second; + auto snap_state_it = m_image_state.snapshots.find(remote_snap_id); + if (snap_state_it == m_image_state.snapshots.end()) { + dout(15) << "snapshot " << snap_id << " does not exist in remote image " + << "state" << dendl; + break; + } + + const auto& snap_state = snap_state_it->second; + if (snap_state.protection_status == RBD_PROTECTION_STATUS_UNPROTECTED) { + dout(15) << "snapshot " << snap_id << " is unprotected in remote image" + << dendl; + break; + } + } + + if (snap_it == m_local_image_ctx->snap_info.end()) { + image_locker.unlock(); + + // no local snapshots to unprotect + m_prev_snap_id = CEPH_NOSNAP; + remove_snapshot(); + return; + } + + m_prev_snap_id = snap_it->first; + m_snap_name = snap_it->second.name; + image_locker.unlock(); + + dout(15) << "snap_name=" << m_snap_name << ", " + << "snap_id=" << m_prev_snap_id << dendl; + + std::shared_lock owner_locker{m_local_image_ctx->owner_lock}; + auto ctx = create_context_callback< + ApplyImageStateRequest<I>, + &ApplyImageStateRequest<I>::handle_unprotect_snapshot>(this); + m_local_image_ctx->operations->execute_snap_unprotect( + cls::rbd::UserSnapshotNamespace{}, m_snap_name.c_str(), ctx); +} + +template <typename I> +void ApplyImageStateRequest<I>::handle_unprotect_snapshot(int r) { + dout(15) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to unprotect snapshot " << m_snap_name << ": " + << cpp_strerror(r) << dendl; + finish(r); + return; + } + + unprotect_snapshot(); +} + +template <typename I> +void ApplyImageStateRequest<I>::remove_snapshot() { + std::shared_lock image_locker{m_local_image_ctx->image_lock}; + + auto snap_it = m_local_image_ctx->snap_info.begin(); + if (m_prev_snap_id != CEPH_NOSNAP) { + snap_it = m_local_image_ctx->snap_info.upper_bound(m_prev_snap_id); + } + + for (; snap_it != m_local_image_ctx->snap_info.end(); ++snap_it) { + auto snap_id = snap_it->first; + const auto& snap_info = snap_it->second; + + auto user_ns = std::get_if<cls::rbd::UserSnapshotNamespace>( + &snap_info.snap_namespace); + if (user_ns == nullptr) { + dout(20) << "snapshot " << snap_id << " is not a user snapshot" << dendl; + continue; + } + + auto snap_id_map_it = m_local_to_remote_snap_ids.find(snap_id); + if (snap_id_map_it == m_local_to_remote_snap_ids.end()) { + dout(15) << "snapshot " << snap_id << " does not exist in remote image" + << dendl; + break; + } + + auto remote_snap_id = snap_id_map_it->second; + auto snap_state_it = m_image_state.snapshots.find(remote_snap_id); + if (snap_state_it == m_image_state.snapshots.end()) { + dout(15) << "snapshot " << snap_id << " does not exist in remote image " + << "state" << dendl; + break; + } + } + + if (snap_it == m_local_image_ctx->snap_info.end()) { + image_locker.unlock(); + + // no local snapshots to remove + m_prev_snap_id = CEPH_NOSNAP; + protect_snapshot(); + return; + } + + m_prev_snap_id = snap_it->first; + m_snap_name = snap_it->second.name; + image_locker.unlock(); + + dout(15) << "snap_name=" << m_snap_name << ", " + << "snap_id=" << m_prev_snap_id << dendl; + + std::shared_lock owner_locker{m_local_image_ctx->owner_lock}; + auto ctx = create_context_callback< + ApplyImageStateRequest<I>, + &ApplyImageStateRequest<I>::handle_remove_snapshot>(this); + m_local_image_ctx->operations->execute_snap_remove( + cls::rbd::UserSnapshotNamespace{}, m_snap_name.c_str(), ctx); +} + +template <typename I> +void ApplyImageStateRequest<I>::handle_remove_snapshot(int r) { + dout(15) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to remove snapshot " << m_snap_name << ": " + << cpp_strerror(r) << dendl; + finish(r); + return; + } + + remove_snapshot(); +} + +template <typename I> +void ApplyImageStateRequest<I>::protect_snapshot() { + std::shared_lock image_locker{m_local_image_ctx->image_lock}; + + auto snap_it = m_local_image_ctx->snap_info.begin(); + if (m_prev_snap_id != CEPH_NOSNAP) { + snap_it = m_local_image_ctx->snap_info.upper_bound(m_prev_snap_id); + } + + for (; snap_it != m_local_image_ctx->snap_info.end(); ++snap_it) { + auto snap_id = snap_it->first; + const auto& snap_info = snap_it->second; + + auto user_ns = std::get_if<cls::rbd::UserSnapshotNamespace>( + &snap_info.snap_namespace); + if (user_ns == nullptr) { + dout(20) << "snapshot " << snap_id << " is not a user snapshot" << dendl; + continue; + } + + if (snap_info.protection_status == RBD_PROTECTION_STATUS_PROTECTED) { + dout(20) << "snapshot " << snap_id << " is already protected" << dendl; + continue; + } + + auto snap_id_map_it = m_local_to_remote_snap_ids.find(snap_id); + if (snap_id_map_it == m_local_to_remote_snap_ids.end()) { + dout(15) << "snapshot " << snap_id << " does not exist in remote image" + << dendl; + continue; + } + + auto remote_snap_id = snap_id_map_it->second; + auto snap_state_it = m_image_state.snapshots.find(remote_snap_id); + if (snap_state_it == m_image_state.snapshots.end()) { + dout(15) << "snapshot " << snap_id << " does not exist in remote image " + << "state" << dendl; + continue; + } + + const auto& snap_state = snap_state_it->second; + if (snap_state.protection_status == RBD_PROTECTION_STATUS_PROTECTED) { + dout(15) << "snapshot " << snap_id << " is protected in remote image" + << dendl; + break; + } + } + + if (snap_it == m_local_image_ctx->snap_info.end()) { + image_locker.unlock(); + + // no local snapshots to protect + m_prev_snap_id = CEPH_NOSNAP; + rename_snapshot(); + return; + } + + m_prev_snap_id = snap_it->first; + m_snap_name = snap_it->second.name; + image_locker.unlock(); + + dout(15) << "snap_name=" << m_snap_name << ", " + << "snap_id=" << m_prev_snap_id << dendl; + + std::shared_lock owner_locker{m_local_image_ctx->owner_lock}; + auto ctx = create_context_callback< + ApplyImageStateRequest<I>, + &ApplyImageStateRequest<I>::handle_protect_snapshot>(this); + m_local_image_ctx->operations->execute_snap_protect( + cls::rbd::UserSnapshotNamespace{}, m_snap_name.c_str(), ctx); +} + +template <typename I> +void ApplyImageStateRequest<I>::handle_protect_snapshot(int r) { + dout(15) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to protect snapshot " << m_snap_name << ": " + << cpp_strerror(r) << dendl; + finish(r); + return; + } + + protect_snapshot(); +} + +template <typename I> +void ApplyImageStateRequest<I>::rename_snapshot() { + std::shared_lock image_locker{m_local_image_ctx->image_lock}; + + auto snap_it = m_local_image_ctx->snap_info.begin(); + if (m_prev_snap_id != CEPH_NOSNAP) { + snap_it = m_local_image_ctx->snap_info.upper_bound(m_prev_snap_id); + } + + for (; snap_it != m_local_image_ctx->snap_info.end(); ++snap_it) { + auto snap_id = snap_it->first; + const auto& snap_info = snap_it->second; + + auto user_ns = std::get_if<cls::rbd::UserSnapshotNamespace>( + &snap_info.snap_namespace); + if (user_ns == nullptr) { + dout(20) << "snapshot " << snap_id << " is not a user snapshot" << dendl; + continue; + } + + auto snap_id_map_it = m_local_to_remote_snap_ids.find(snap_id); + if (snap_id_map_it == m_local_to_remote_snap_ids.end()) { + dout(15) << "snapshot " << snap_id << " does not exist in remote image" + << dendl; + continue; + } + + auto remote_snap_id = snap_id_map_it->second; + auto snap_state_it = m_image_state.snapshots.find(remote_snap_id); + if (snap_state_it == m_image_state.snapshots.end()) { + dout(15) << "snapshot " << snap_id << " does not exist in remote image " + << "state" << dendl; + continue; + } + + const auto& snap_state = snap_state_it->second; + if (snap_info.name != snap_state.name) { + dout(15) << "snapshot " << snap_id << " has been renamed from '" + << snap_info.name << "' to '" << snap_state.name << "'" + << dendl; + m_snap_name = snap_state.name; + break; + } + } + + if (snap_it == m_local_image_ctx->snap_info.end()) { + image_locker.unlock(); + + // no local snapshots to protect + m_prev_snap_id = CEPH_NOSNAP; + set_snapshot_limit(); + return; + } + + m_prev_snap_id = snap_it->first; + image_locker.unlock(); + + dout(15) << "snap_name=" << m_snap_name << ", " + << "snap_id=" << m_prev_snap_id << dendl; + + std::shared_lock owner_locker{m_local_image_ctx->owner_lock}; + auto ctx = create_context_callback< + ApplyImageStateRequest<I>, + &ApplyImageStateRequest<I>::handle_rename_snapshot>(this); + m_local_image_ctx->operations->execute_snap_rename( + m_prev_snap_id, m_snap_name.c_str(), ctx); +} + +template <typename I> +void ApplyImageStateRequest<I>::handle_rename_snapshot(int r) { + dout(15) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to protect snapshot " << m_snap_name << ": " + << cpp_strerror(r) << dendl; + finish(r); + return; + } + + rename_snapshot(); +} + +template <typename I> +void ApplyImageStateRequest<I>::set_snapshot_limit() { + dout(15) << "snap_limit=" << m_image_state.snap_limit << dendl; + + // no need to even check the current limit -- just set it + std::shared_lock owner_locker{m_local_image_ctx->owner_lock}; + auto ctx = create_context_callback< + ApplyImageStateRequest<I>, + &ApplyImageStateRequest<I>::handle_set_snapshot_limit>(this); + m_local_image_ctx->operations->execute_snap_set_limit( + m_image_state.snap_limit, ctx); +} + +template <typename I> +void ApplyImageStateRequest<I>::handle_set_snapshot_limit(int r) { + dout(15) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to update snapshot limit: " << cpp_strerror(r) + << dendl; + } + + finish(r); +} + +template <typename I> +void ApplyImageStateRequest<I>::finish(int r) { + dout(15) << "r=" << r << dendl; + + m_on_finish->complete(r); + delete this; +} + +template <typename I> +uint64_t ApplyImageStateRequest<I>::compute_remote_snap_id( + uint64_t local_snap_id) { + ceph_assert(ceph_mutex_is_locked(m_local_image_ctx->image_lock)); + ceph_assert(ceph_mutex_is_locked(m_remote_image_ctx->image_lock)); + + // Search our local non-primary snapshots for a mapping to the remote + // snapshot. The non-primary mirror snapshot with the mappings will always + // come at or after the snapshot we are searching against + auto remote_snap_id = util::compute_remote_snap_id( + m_local_image_ctx->image_lock, m_local_image_ctx->snap_info, + local_snap_id, m_remote_mirror_uuid); + if (remote_snap_id != CEPH_NOSNAP) { + return remote_snap_id; + } + + // if we failed to find a match to a remote snapshot in our local non-primary + // snapshots, check the remote image for non-primary snapshot mappings back + // to our snapshot + for (auto snap_it = m_remote_image_ctx->snap_info.begin(); + snap_it != m_remote_image_ctx->snap_info.end(); ++snap_it) { + auto snap_id = snap_it->first; + auto mirror_ns = std::get_if<cls::rbd::MirrorSnapshotNamespace>( + &snap_it->second.snap_namespace); + if (mirror_ns == nullptr || !mirror_ns->is_non_primary()) { + continue; + } + + if (mirror_ns->primary_mirror_uuid != m_local_mirror_uuid) { + dout(20) << "remote snapshot " << snap_id << " not tied to local" + << dendl; + continue; + } else if (mirror_ns->primary_snap_id == local_snap_id) { + dout(15) << "local snapshot " << local_snap_id << " maps to " + << "remote snapshot " << snap_id << dendl; + return snap_id; + } + + const auto& snap_seqs = mirror_ns->snap_seqs; + for (auto [local_snap_id_seq, remote_snap_id_seq] : snap_seqs) { + if (local_snap_id_seq == local_snap_id) { + dout(15) << "local snapshot " << local_snap_id << " maps to " + << "remote snapshot " << remote_snap_id_seq << dendl; + return remote_snap_id_seq; + } + } + } + + return CEPH_NOSNAP; +} + +template <typename I> +void ApplyImageStateRequest<I>::compute_local_to_remote_snap_ids() { + ceph_assert(ceph_mutex_is_locked(m_local_image_ctx->image_lock)); + std::shared_lock remote_image_locker{m_remote_image_ctx->image_lock}; + + for (const auto& [snap_id, snap_info] : m_local_image_ctx->snap_info) { + m_local_to_remote_snap_ids[snap_id] = compute_remote_snap_id(snap_id); + } + + dout(15) << "local_to_remote_snap_ids=" << m_local_to_remote_snap_ids + << dendl; +} + +} // namespace snapshot +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::snapshot::ApplyImageStateRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/snapshot/ApplyImageStateRequest.h b/src/tools/rbd_mirror/image_replayer/snapshot/ApplyImageStateRequest.h new file mode 100644 index 000000000..0e2d09ddf --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/snapshot/ApplyImageStateRequest.h @@ -0,0 +1,155 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_APPLY_IMAGE_STATE_REQUEST_H +#define RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_APPLY_IMAGE_STATE_REQUEST_H + +#include "common/ceph_mutex.h" +#include "librbd/mirror/snapshot/Types.h" +#include <map> +#include <string> + +struct Context; + +namespace librbd { + +struct ImageCtx; + +} // namespace librbd + +namespace rbd { +namespace mirror { +namespace image_replayer { +namespace snapshot { + +template <typename> class EventPreprocessor; +template <typename> class ReplayStatusFormatter; +template <typename> class StateBuilder; + +template <typename ImageCtxT> +class ApplyImageStateRequest { +public: + static ApplyImageStateRequest* create( + const std::string& local_mirror_uuid, + const std::string& remote_mirror_uuid, + ImageCtxT* local_image_ctx, + ImageCtxT* remote_image_ctx, + librbd::mirror::snapshot::ImageState image_state, + Context* on_finish) { + return new ApplyImageStateRequest(local_mirror_uuid, remote_mirror_uuid, + local_image_ctx, remote_image_ctx, + image_state, on_finish); + } + + ApplyImageStateRequest( + const std::string& local_mirror_uuid, + const std::string& remote_mirror_uuid, + ImageCtxT* local_image_ctx, + ImageCtxT* remote_image_ctx, + librbd::mirror::snapshot::ImageState image_state, + Context* on_finish); + + void send(); + +private: + /** + * @verbatim + * + * <start> + * | + * v + * RENAME_IMAGE + * | + * | /---------\ + * | | | + * v v | + * UPDATE_FEATURES -----/ + * | + * v + * GET_IMAGE_META + * | + * | /---------\ + * | | | + * v v | + * UPDATE_IMAGE_META ---/ + * | + * | /---------\ + * | | | + * v v | + * UNPROTECT_SNAPSHOT | + * | | + * v | + * REMOVE_SNAPSHOT | + * | | + * v | + * PROTECT_SNAPSHOT | + * | | + * v | + * RENAME_SNAPSHOT -----/ + * | + * v + * SET_SNAPSHOT_LIMIT + * | + * v + * <finish> + * + * @endverbatim + */ + + std::string m_local_mirror_uuid; + std::string m_remote_mirror_uuid; + ImageCtxT* m_local_image_ctx; + ImageCtxT* m_remote_image_ctx; + librbd::mirror::snapshot::ImageState m_image_state; + Context* m_on_finish; + + std::map<uint64_t, uint64_t> m_local_to_remote_snap_ids; + + uint64_t m_features = 0; + + std::map<std::string, bufferlist> m_metadata; + + uint64_t m_prev_snap_id = 0; + std::string m_snap_name; + + void rename_image(); + void handle_rename_image(int r); + + void update_features(); + void handle_update_features(int r); + + void get_image_meta(); + void handle_get_image_meta(int r); + + void update_image_meta(); + void handle_update_image_meta(int r); + + void unprotect_snapshot(); + void handle_unprotect_snapshot(int r); + + void remove_snapshot(); + void handle_remove_snapshot(int r); + + void protect_snapshot(); + void handle_protect_snapshot(int r); + + void rename_snapshot(); + void handle_rename_snapshot(int r); + + void set_snapshot_limit(); + void handle_set_snapshot_limit(int r); + + void finish(int r); + + uint64_t compute_remote_snap_id(uint64_t snap_id); + void compute_local_to_remote_snap_ids(); +}; + +} // namespace snapshot +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::snapshot::ApplyImageStateRequest<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_APPLY_IMAGE_STATE_REQUEST_H diff --git a/src/tools/rbd_mirror/image_replayer/snapshot/CreateLocalImageRequest.cc b/src/tools/rbd_mirror/image_replayer/snapshot/CreateLocalImageRequest.cc new file mode 100644 index 000000000..c923395c9 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/snapshot/CreateLocalImageRequest.cc @@ -0,0 +1,204 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "CreateLocalImageRequest.h" +#include "include/rados/librados.hpp" +#include "common/debug.h" +#include "common/dout.h" +#include "common/errno.h" +#include "cls/rbd/cls_rbd_client.h" +#include "cls/rbd/cls_rbd_types.h" +#include "librbd/ImageCtx.h" +#include "librbd/Utils.h" +#include "tools/rbd_mirror/ProgressContext.h" +#include "tools/rbd_mirror/image_replayer/CreateImageRequest.h" +#include "tools/rbd_mirror/image_replayer/snapshot/StateBuilder.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::snapshot::" \ + << "CreateLocalImageRequest: " << this << " " \ + << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace image_replayer { +namespace snapshot { + +using librbd::util::create_context_callback; +using librbd::util::create_rados_callback; + +template <typename I> +void CreateLocalImageRequest<I>::send() { + disable_mirror_image(); +} + +template <typename I> +void CreateLocalImageRequest<I>::disable_mirror_image() { + if (m_state_builder->local_image_id.empty()) { + add_mirror_image(); + return; + } + + dout(10) << dendl; + update_progress("DISABLE_MIRROR_IMAGE"); + + // need to send 'disabling' since the cls methods will fail if we aren't + // in that state + cls::rbd::MirrorImage mirror_image{ + cls::rbd::MIRROR_IMAGE_MODE_SNAPSHOT, m_global_image_id, + cls::rbd::MIRROR_IMAGE_STATE_DISABLING}; + librados::ObjectWriteOperation op; + librbd::cls_client::mirror_image_set(&op, m_state_builder->local_image_id, + mirror_image); + + auto aio_comp = create_rados_callback< + CreateLocalImageRequest<I>, + &CreateLocalImageRequest<I>::handle_disable_mirror_image>(this); + int r = m_local_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void CreateLocalImageRequest<I>::handle_disable_mirror_image(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to disable mirror image " << m_global_image_id << ": " + << cpp_strerror(r) << dendl; + finish(r); + return; + } + + remove_mirror_image(); +} + +template <typename I> +void CreateLocalImageRequest<I>::remove_mirror_image() { + dout(10) << dendl; + update_progress("REMOVE_MIRROR_IMAGE"); + + librados::ObjectWriteOperation op; + librbd::cls_client::mirror_image_remove(&op, m_state_builder->local_image_id); + + auto aio_comp = create_rados_callback< + CreateLocalImageRequest<I>, + &CreateLocalImageRequest<I>::handle_remove_mirror_image>(this); + int r = m_local_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void CreateLocalImageRequest<I>::handle_remove_mirror_image(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to remove mirror image " << m_global_image_id << ": " + << cpp_strerror(r) << dendl; + finish(r); + return; + } + + m_state_builder->local_image_id = ""; + add_mirror_image(); +} + +template <typename I> +void CreateLocalImageRequest<I>::add_mirror_image() { + ceph_assert(m_state_builder->local_image_id.empty()); + m_state_builder->local_image_id = + librbd::util::generate_image_id<I>(m_local_io_ctx); + + dout(10) << "local_image_id=" << m_state_builder->local_image_id << dendl; + update_progress("ADD_MIRROR_IMAGE"); + + // use 'creating' to track a partially constructed image. it will + // be switched to 'enabled' once the image is fully created + cls::rbd::MirrorImage mirror_image{ + cls::rbd::MIRROR_IMAGE_MODE_SNAPSHOT, m_global_image_id, + cls::rbd::MIRROR_IMAGE_STATE_CREATING}; + librados::ObjectWriteOperation op; + librbd::cls_client::mirror_image_set(&op, m_state_builder->local_image_id, + mirror_image); + + auto aio_comp = create_rados_callback< + CreateLocalImageRequest<I>, + &CreateLocalImageRequest<I>::handle_add_mirror_image>(this); + int r = m_local_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void CreateLocalImageRequest<I>::handle_add_mirror_image(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to register mirror image " << m_global_image_id << ": " + << cpp_strerror(r) << dendl; + this->finish(r); + return; + } + + create_local_image(); +} + +template <typename I> +void CreateLocalImageRequest<I>::create_local_image() { + dout(10) << "local_image_id=" << m_state_builder->local_image_id << dendl; + update_progress("CREATE_LOCAL_IMAGE"); + + m_remote_image_ctx->image_lock.lock_shared(); + std::string image_name = m_remote_image_ctx->name; + m_remote_image_ctx->image_lock.unlock_shared(); + + auto ctx = create_context_callback< + CreateLocalImageRequest<I>, + &CreateLocalImageRequest<I>::handle_create_local_image>(this); + auto request = CreateImageRequest<I>::create( + m_threads, m_local_io_ctx, m_global_image_id, + m_state_builder->remote_mirror_uuid, image_name, + m_state_builder->local_image_id, m_remote_image_ctx, + m_pool_meta_cache, cls::rbd::MIRROR_IMAGE_MODE_SNAPSHOT, ctx); + request->send(); +} +template <typename I> +void CreateLocalImageRequest<I>::handle_create_local_image(int r) { + dout(10) << "r=" << r << dendl; + + if (r == -EBADF) { + dout(5) << "image id " << m_state_builder->local_image_id << " " + << "already in-use" << dendl; + disable_mirror_image(); + return; + } else if (r < 0) { + if (r == -ENOENT) { + dout(10) << "parent image does not exist" << dendl; + } else { + derr << "failed to create local image: " << cpp_strerror(r) << dendl; + } + finish(r); + return; + } + + finish(0); +} + +template <typename I> +void CreateLocalImageRequest<I>::update_progress( + const std::string& description) { + dout(15) << description << dendl; + if (m_progress_ctx != nullptr) { + m_progress_ctx->update_progress(description); + } +} + +} // namespace snapshot +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::snapshot::CreateLocalImageRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/snapshot/CreateLocalImageRequest.h b/src/tools/rbd_mirror/image_replayer/snapshot/CreateLocalImageRequest.h new file mode 100644 index 000000000..3345154b4 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/snapshot/CreateLocalImageRequest.h @@ -0,0 +1,121 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_CREATE_LOCAL_IMAGE_REQUEST_H +#define RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_CREATE_LOCAL_IMAGE_REQUEST_H + +#include "include/rados/librados_fwd.hpp" +#include "tools/rbd_mirror/BaseRequest.h" +#include <string> + +struct Context; +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { + +class PoolMetaCache; +class ProgressContext; +template <typename> struct Threads; + +namespace image_replayer { +namespace snapshot { + +template <typename> class StateBuilder; + +template <typename ImageCtxT> +class CreateLocalImageRequest : public BaseRequest { +public: + typedef rbd::mirror::ProgressContext ProgressContext; + + static CreateLocalImageRequest* create( + Threads<ImageCtxT>* threads, + librados::IoCtx& local_io_ctx, + ImageCtxT* remote_image_ctx, + const std::string& global_image_id, + PoolMetaCache* pool_meta_cache, + ProgressContext* progress_ctx, + StateBuilder<ImageCtxT>* state_builder, + Context* on_finish) { + return new CreateLocalImageRequest(threads, local_io_ctx, remote_image_ctx, + global_image_id, pool_meta_cache, + progress_ctx, state_builder, on_finish); + } + + CreateLocalImageRequest( + Threads<ImageCtxT>* threads, + librados::IoCtx& local_io_ctx, + ImageCtxT* remote_image_ctx, + const std::string& global_image_id, + PoolMetaCache* pool_meta_cache, + ProgressContext* progress_ctx, + StateBuilder<ImageCtxT>* state_builder, + Context* on_finish) + : BaseRequest(on_finish), + m_threads(threads), + m_local_io_ctx(local_io_ctx), + m_remote_image_ctx(remote_image_ctx), + m_global_image_id(global_image_id), + m_pool_meta_cache(pool_meta_cache), + m_progress_ctx(progress_ctx), + m_state_builder(state_builder) { + } + + void send(); + +private: + /** + * @verbatim + * + * <start> + * | + * v + * DISABLE_MIRROR_IMAGE < * * * * * * + * | * + * v * + * REMOVE_MIRROR_IMAGE * + * | * + * v * + * ADD_MIRROR_IMAGE * + * | * + * v (id exists) * + * CREATE_LOCAL_IMAGE * * * * * * * * + * | + * v + * <finish> + * + * @endverbatim + */ + + Threads<ImageCtxT>* m_threads; + librados::IoCtx& m_local_io_ctx; + ImageCtxT* m_remote_image_ctx; + std::string m_global_image_id; + PoolMetaCache* m_pool_meta_cache; + ProgressContext* m_progress_ctx; + StateBuilder<ImageCtxT>* m_state_builder; + + void disable_mirror_image(); + void handle_disable_mirror_image(int r); + + void remove_mirror_image(); + void handle_remove_mirror_image(int r); + + void add_mirror_image(); + void handle_add_mirror_image(int r); + + void create_local_image(); + void handle_create_local_image(int r); + + void update_progress(const std::string& description); + +}; + +} // namespace snapshot +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::snapshot::CreateLocalImageRequest<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_CREATE_LOCAL_IMAGE_REQUEST_H diff --git a/src/tools/rbd_mirror/image_replayer/snapshot/PrepareReplayRequest.cc b/src/tools/rbd_mirror/image_replayer/snapshot/PrepareReplayRequest.cc new file mode 100644 index 000000000..575eb8534 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/snapshot/PrepareReplayRequest.cc @@ -0,0 +1,70 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "PrepareReplayRequest.h" +#include "common/debug.h" +#include "common/dout.h" +#include "common/errno.h" +#include "librbd/ImageCtx.h" +#include "librbd/Utils.h" +#include "librbd/mirror/snapshot/ImageMeta.h" +#include "tools/rbd_mirror/ProgressContext.h" +#include "tools/rbd_mirror/image_replayer/snapshot/StateBuilder.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::snapshot::" \ + << "PrepareReplayRequest: " << this << " " \ + << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace image_replayer { +namespace snapshot { + +using librbd::util::create_context_callback; + +template <typename I> +void PrepareReplayRequest<I>::send() { + *m_resync_requested = false; + *m_syncing = false; + + load_local_image_meta(); +} + +template <typename I> +void PrepareReplayRequest<I>::load_local_image_meta() { + dout(15) << dendl; + + ceph_assert(m_state_builder->local_image_meta == nullptr); + m_state_builder->local_image_meta = + librbd::mirror::snapshot::ImageMeta<I>::create( + m_state_builder->local_image_ctx, m_local_mirror_uuid); + + auto ctx = create_context_callback< + PrepareReplayRequest<I>, + &PrepareReplayRequest<I>::handle_load_local_image_meta>(this); + m_state_builder->local_image_meta->load(ctx); +} + +template <typename I> +void PrepareReplayRequest<I>::handle_load_local_image_meta(int r) { + dout(15) << "r=" << r << dendl; + + if (r < 0 && r != -ENOENT) { + derr << "failed to load local image-meta: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + *m_resync_requested = m_state_builder->local_image_meta->resync_requested; + finish(0); +} + +} // namespace snapshot +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::snapshot::PrepareReplayRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/snapshot/PrepareReplayRequest.h b/src/tools/rbd_mirror/image_replayer/snapshot/PrepareReplayRequest.h new file mode 100644 index 000000000..4e9246acd --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/snapshot/PrepareReplayRequest.h @@ -0,0 +1,92 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_PREPARE_REPLAY_REQUEST_H +#define RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_PREPARE_REPLAY_REQUEST_H + +#include "include/int_types.h" +#include "librbd/mirror/Types.h" +#include "tools/rbd_mirror/BaseRequest.h" +#include <list> +#include <string> + +struct Context; +namespace librbd { struct ImageCtx; } + +namespace rbd { +namespace mirror { + +class ProgressContext; + +namespace image_replayer { +namespace snapshot { + +template <typename> class StateBuilder; + +template <typename ImageCtxT> +class PrepareReplayRequest : public BaseRequest { +public: + static PrepareReplayRequest* create( + const std::string& local_mirror_uuid, + ProgressContext* progress_ctx, + StateBuilder<ImageCtxT>* state_builder, + bool* resync_requested, + bool* syncing, + Context* on_finish) { + return new PrepareReplayRequest( + local_mirror_uuid, progress_ctx, state_builder, resync_requested, + syncing, on_finish); + } + + PrepareReplayRequest( + const std::string& local_mirror_uuid, + ProgressContext* progress_ctx, + StateBuilder<ImageCtxT>* state_builder, + bool* resync_requested, + bool* syncing, + Context* on_finish) + : BaseRequest(on_finish), + m_local_mirror_uuid(local_mirror_uuid), + m_progress_ctx(progress_ctx), + m_state_builder(state_builder), + m_resync_requested(resync_requested), + m_syncing(syncing) { + } + + void send() override; + +private: + // TODO + /** + * @verbatim + * + * <start> + * | + * v + * LOAD_LOCAL_IMAGE_META + * | + * v + * <finish> + * + * @endverbatim + */ + + std::string m_local_mirror_uuid; + ProgressContext* m_progress_ctx; + StateBuilder<ImageCtxT>* m_state_builder; + bool* m_resync_requested; + bool* m_syncing; + + void load_local_image_meta(); + void handle_load_local_image_meta(int r); + +}; + +} // namespace snapshot +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::snapshot::PrepareReplayRequest<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_PREPARE_REPLAY_REQUEST_H diff --git a/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.cc b/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.cc new file mode 100644 index 000000000..67eaa9777 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.cc @@ -0,0 +1,1633 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "Replayer.h" +#include "common/debug.h" +#include "common/errno.h" +#include "common/perf_counters.h" +#include "common/perf_counters_key.h" +#include "include/stringify.h" +#include "common/Timer.h" +#include "cls/rbd/cls_rbd_client.h" +#include "json_spirit/json_spirit.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/Operations.h" +#include "librbd/Utils.h" +#include "librbd/asio/ContextWQ.h" +#include "librbd/deep_copy/Handler.h" +#include "librbd/deep_copy/ImageCopyRequest.h" +#include "librbd/deep_copy/SnapshotCopyRequest.h" +#include "librbd/mirror/ImageStateUpdateRequest.h" +#include "librbd/mirror/snapshot/CreateNonPrimaryRequest.h" +#include "librbd/mirror/snapshot/GetImageStateRequest.h" +#include "librbd/mirror/snapshot/ImageMeta.h" +#include "librbd/mirror/snapshot/UnlinkPeerRequest.h" +#include "tools/rbd_mirror/InstanceWatcher.h" +#include "tools/rbd_mirror/PoolMetaCache.h" +#include "tools/rbd_mirror/Threads.h" +#include "tools/rbd_mirror/Types.h" +#include "tools/rbd_mirror/image_replayer/CloseImageRequest.h" +#include "tools/rbd_mirror/image_replayer/ReplayerListener.h" +#include "tools/rbd_mirror/image_replayer/Utils.h" +#include "tools/rbd_mirror/image_replayer/snapshot/ApplyImageStateRequest.h" +#include "tools/rbd_mirror/image_replayer/snapshot/StateBuilder.h" +#include "tools/rbd_mirror/image_replayer/snapshot/Utils.h" +#include <set> + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::snapshot::" \ + << "Replayer: " << this << " " << __func__ << ": " + +extern PerfCounters *g_snapshot_perf_counters; + +namespace rbd { +namespace mirror { +namespace image_replayer { +namespace snapshot { + +namespace { + +double round_to_two_places(double value) { + return abs(round(value * 100) / 100); +} + +template<typename I> +std::pair<uint64_t, librbd::SnapInfo*> get_newest_mirror_snapshot( + I* image_ctx) { + for (auto snap_info_it = image_ctx->snap_info.rbegin(); + snap_info_it != image_ctx->snap_info.rend(); ++snap_info_it) { + const auto& snap_ns = snap_info_it->second.snap_namespace; + auto mirror_ns = std::get_if< + cls::rbd::MirrorSnapshotNamespace>(&snap_ns); + if (mirror_ns == nullptr || !mirror_ns->complete) { + continue; + } + + return {snap_info_it->first, &snap_info_it->second}; + } + + return {CEPH_NOSNAP, nullptr}; +} + +} // anonymous namespace + +using librbd::util::create_async_context_callback; +using librbd::util::create_context_callback; +using librbd::util::create_rados_callback; + +template <typename I> +struct Replayer<I>::C_UpdateWatchCtx : public librbd::UpdateWatchCtx { + Replayer<I>* replayer; + + C_UpdateWatchCtx(Replayer<I>* replayer) : replayer(replayer) { + } + + void handle_notify() override { + replayer->handle_image_update_notify(); + } +}; + +template <typename I> +struct Replayer<I>::DeepCopyHandler : public librbd::deep_copy::Handler { + Replayer *replayer; + + DeepCopyHandler(Replayer* replayer) : replayer(replayer) { + } + + void handle_read(uint64_t bytes_read) override { + replayer->handle_copy_image_read(bytes_read); + } + + int update_progress(uint64_t object_number, uint64_t object_count) override { + replayer->handle_copy_image_progress(object_number, object_count); + return 0; + } +}; + +template <typename I> +Replayer<I>::Replayer( + Threads<I>* threads, + InstanceWatcher<I>* instance_watcher, + const std::string& local_mirror_uuid, + PoolMetaCache* pool_meta_cache, + StateBuilder<I>* state_builder, + ReplayerListener* replayer_listener) + : m_threads(threads), + m_instance_watcher(instance_watcher), + m_local_mirror_uuid(local_mirror_uuid), + m_pool_meta_cache(pool_meta_cache), + m_state_builder(state_builder), + m_replayer_listener(replayer_listener), + m_lock(ceph::make_mutex(librbd::util::unique_lock_name( + "rbd::mirror::image_replayer::snapshot::Replayer", this))) { + dout(10) << dendl; +} + +template <typename I> +Replayer<I>::~Replayer() { + dout(10) << dendl; + + { + std::unique_lock locker{m_lock}; + unregister_perf_counters(); + } + + ceph_assert(m_state == STATE_COMPLETE); + ceph_assert(m_update_watch_ctx == nullptr); + ceph_assert(m_deep_copy_handler == nullptr); +} + +template <typename I> +void Replayer<I>::init(Context* on_finish) { + dout(10) << dendl; + + ceph_assert(m_state == STATE_INIT); + + RemotePoolMeta remote_pool_meta; + int r = m_pool_meta_cache->get_remote_pool_meta( + m_state_builder->remote_image_ctx->md_ctx.get_id(), &remote_pool_meta); + if (r < 0 || remote_pool_meta.mirror_peer_uuid.empty()) { + derr << "failed to retrieve mirror peer uuid from remote pool" << dendl; + m_state = STATE_COMPLETE; + m_threads->work_queue->queue(on_finish, r); + return; + } + + m_remote_mirror_peer_uuid = remote_pool_meta.mirror_peer_uuid; + dout(10) << "remote_mirror_peer_uuid=" << m_remote_mirror_peer_uuid << dendl; + + { + auto local_image_ctx = m_state_builder->local_image_ctx; + std::shared_lock image_locker{local_image_ctx->image_lock}; + m_image_spec = image_replayer::util::compute_image_spec( + local_image_ctx->md_ctx, local_image_ctx->name); + } + + { + std::unique_lock locker{m_lock}; + register_perf_counters(); + } + + ceph_assert(m_on_init_shutdown == nullptr); + m_on_init_shutdown = on_finish; + + register_local_update_watcher(); +} + +template <typename I> +void Replayer<I>::shut_down(Context* on_finish) { + dout(10) << dendl; + + std::unique_lock locker{m_lock}; + ceph_assert(m_on_init_shutdown == nullptr); + m_on_init_shutdown = on_finish; + m_error_code = 0; + m_error_description = ""; + + ceph_assert(m_state != STATE_INIT); + auto state = STATE_COMPLETE; + std::swap(m_state, state); + + if (state == STATE_REPLAYING) { + // if a sync request was pending, request a cancelation + m_instance_watcher->cancel_sync_request( + m_state_builder->local_image_ctx->id); + + // TODO interrupt snapshot copy and image copy state machines even if remote + // cluster is unreachable + dout(10) << "shut down pending on completion of snapshot replay" << dendl; + return; + } + locker.unlock(); + + unregister_remote_update_watcher(); +} + +template <typename I> +void Replayer<I>::flush(Context* on_finish) { + dout(10) << dendl; + + // TODO + m_threads->work_queue->queue(on_finish, 0); +} + +template <typename I> +bool Replayer<I>::get_replay_status(std::string* description, + Context* on_finish) { + dout(10) << dendl; + + std::unique_lock locker{m_lock}; + if (m_state != STATE_REPLAYING && m_state != STATE_IDLE) { + locker.unlock(); + + derr << "replay not running" << dendl; + on_finish->complete(-EAGAIN); + return false; + } + + std::shared_lock local_image_locker{ + m_state_builder->local_image_ctx->image_lock}; + auto [local_snap_id, local_snap_info] = get_newest_mirror_snapshot( + m_state_builder->local_image_ctx); + + std::shared_lock remote_image_locker{ + m_state_builder->remote_image_ctx->image_lock}; + auto [remote_snap_id, remote_snap_info] = get_newest_mirror_snapshot( + m_state_builder->remote_image_ctx); + + if (remote_snap_info == nullptr) { + remote_image_locker.unlock(); + local_image_locker.unlock(); + locker.unlock(); + + derr << "remote image does not contain mirror snapshots" << dendl; + on_finish->complete(-EAGAIN); + return false; + } + + std::string replay_state = "idle"; + if (m_remote_snap_id_end != CEPH_NOSNAP) { + replay_state = "syncing"; + } + + json_spirit::mObject root_obj; + root_obj["replay_state"] = replay_state; + root_obj["remote_snapshot_timestamp"] = remote_snap_info->timestamp.sec(); + if (m_perf_counters) { + m_perf_counters->tset(l_rbd_mirror_snapshot_remote_timestamp, + remote_snap_info->timestamp); + } + + auto matching_remote_snap_id = util::compute_remote_snap_id( + m_state_builder->local_image_ctx->image_lock, + m_state_builder->local_image_ctx->snap_info, + local_snap_id, m_state_builder->remote_mirror_uuid); + auto matching_remote_snap_it = + m_state_builder->remote_image_ctx->snap_info.find(matching_remote_snap_id); + if (matching_remote_snap_id != CEPH_NOSNAP && + matching_remote_snap_it != + m_state_builder->remote_image_ctx->snap_info.end()) { + // use the timestamp from the matching remote image since + // the local snapshot would just be the time the snapshot was + // synced and not the consistency point in time. + root_obj["local_snapshot_timestamp"] = + matching_remote_snap_it->second.timestamp.sec(); + if (m_perf_counters) { + m_perf_counters->tset(l_rbd_mirror_snapshot_local_timestamp, + matching_remote_snap_it->second.timestamp); + } + } + + matching_remote_snap_it = m_state_builder->remote_image_ctx->snap_info.find( + m_remote_snap_id_end); + if (m_remote_snap_id_end != CEPH_NOSNAP && + matching_remote_snap_it != + m_state_builder->remote_image_ctx->snap_info.end()) { + root_obj["syncing_snapshot_timestamp"] = remote_snap_info->timestamp.sec(); + + if (m_local_object_count > 0) { + root_obj["syncing_percent"] = + 100 * m_local_mirror_snap_ns.last_copied_object_number / + m_local_object_count; + } else { + // Set syncing_percent to 0 if m_local_object_count has + // not yet been set (last_copied_object_number may be > 0 + // if the sync is being resumed). + root_obj["syncing_percent"] = 0; + } + } + + m_bytes_per_second(0); + auto bytes_per_second = m_bytes_per_second.get_average(); + root_obj["bytes_per_second"] = round_to_two_places(bytes_per_second); + + auto bytes_per_snapshot = boost::accumulators::rolling_mean( + m_bytes_per_snapshot); + root_obj["bytes_per_snapshot"] = round_to_two_places(bytes_per_snapshot); + + root_obj["last_snapshot_sync_seconds"] = m_last_snapshot_sync_seconds; + root_obj["last_snapshot_bytes"] = m_last_snapshot_bytes; + + auto pending_bytes = bytes_per_snapshot * m_pending_snapshots; + if (bytes_per_second > 0 && m_pending_snapshots > 0) { + std::uint64_t seconds_until_synced = round_to_two_places( + pending_bytes / bytes_per_second); + if (seconds_until_synced >= std::numeric_limits<uint64_t>::max()) { + seconds_until_synced = std::numeric_limits<uint64_t>::max(); + } + + root_obj["seconds_until_synced"] = seconds_until_synced; + } + + *description = json_spirit::write( + root_obj, json_spirit::remove_trailing_zeros); + + local_image_locker.unlock(); + remote_image_locker.unlock(); + locker.unlock(); + on_finish->complete(-EEXIST); + return true; +} + +template <typename I> +void Replayer<I>::load_local_image_meta() { + dout(10) << dendl; + + { + // reset state in case new snapshot is added while we are scanning + std::unique_lock locker{m_lock}; + m_image_updated = false; + } + + bool update_status = false; + { + auto local_image_ctx = m_state_builder->local_image_ctx; + std::shared_lock image_locker{local_image_ctx->image_lock}; + auto image_spec = image_replayer::util::compute_image_spec( + local_image_ctx->md_ctx, local_image_ctx->name); + if (m_image_spec != image_spec) { + m_image_spec = image_spec; + update_status = true; + } + } + if (update_status) { + std::unique_lock locker{m_lock}; + unregister_perf_counters(); + register_perf_counters(); + notify_status_updated(); + } + + ceph_assert(m_state_builder->local_image_meta != nullptr); + auto ctx = create_context_callback< + Replayer<I>, &Replayer<I>::handle_load_local_image_meta>(this); + m_state_builder->local_image_meta->load(ctx); +} + +template <typename I> +void Replayer<I>::handle_load_local_image_meta(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0 && r != -ENOENT) { + derr << "failed to load local image-meta: " << cpp_strerror(r) << dendl; + handle_replay_complete(r, "failed to load local image-meta"); + return; + } + + if (r >= 0 && m_state_builder->local_image_meta->resync_requested) { + m_resync_requested = true; + + dout(10) << "local image resync requested" << dendl; + handle_replay_complete(0, "resync requested"); + return; + } + + refresh_local_image(); +} + +template <typename I> +void Replayer<I>::refresh_local_image() { + if (!m_state_builder->local_image_ctx->state->is_refresh_required()) { + refresh_remote_image(); + return; + } + + dout(10) << dendl; + auto ctx = create_context_callback< + Replayer<I>, &Replayer<I>::handle_refresh_local_image>(this); + m_state_builder->local_image_ctx->state->refresh(ctx); +} + +template <typename I> +void Replayer<I>::handle_refresh_local_image(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to refresh local image: " << cpp_strerror(r) << dendl; + handle_replay_complete(r, "failed to refresh local image"); + return; + } + + refresh_remote_image(); +} + +template <typename I> +void Replayer<I>::refresh_remote_image() { + if (!m_state_builder->remote_image_ctx->state->is_refresh_required()) { + std::unique_lock locker{m_lock}; + scan_local_mirror_snapshots(&locker); + return; + } + + dout(10) << dendl; + auto ctx = create_context_callback< + Replayer<I>, &Replayer<I>::handle_refresh_remote_image>(this); + m_state_builder->remote_image_ctx->state->refresh(ctx); +} + +template <typename I> +void Replayer<I>::handle_refresh_remote_image(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to refresh remote image: " << cpp_strerror(r) << dendl; + handle_replay_complete(r, "failed to refresh remote image"); + return; + } + + std::unique_lock locker{m_lock}; + scan_local_mirror_snapshots(&locker); +} + +template <typename I> +void Replayer<I>::scan_local_mirror_snapshots( + std::unique_lock<ceph::mutex>* locker) { + if (is_replay_interrupted(locker)) { + return; + } + + dout(10) << dendl; + + m_local_snap_id_start = 0; + m_local_snap_id_end = CEPH_NOSNAP; + m_local_mirror_snap_ns = {}; + m_local_object_count = 0; + + m_remote_snap_id_start = 0; + m_remote_snap_id_end = CEPH_NOSNAP; + m_remote_mirror_snap_ns = {}; + + std::set<uint64_t> prune_snap_ids; + + auto local_image_ctx = m_state_builder->local_image_ctx; + std::shared_lock image_locker{local_image_ctx->image_lock}; + for (auto snap_info_it = local_image_ctx->snap_info.begin(); + snap_info_it != local_image_ctx->snap_info.end(); ++snap_info_it) { + const auto& snap_ns = snap_info_it->second.snap_namespace; + auto mirror_ns = std::get_if< + cls::rbd::MirrorSnapshotNamespace>(&snap_ns); + if (mirror_ns == nullptr) { + continue; + } + + dout(15) << "local mirror snapshot: id=" << snap_info_it->first << ", " + << "mirror_ns=" << *mirror_ns << dendl; + m_local_mirror_snap_ns = *mirror_ns; + + auto local_snap_id = snap_info_it->first; + if (mirror_ns->is_non_primary()) { + if (mirror_ns->complete) { + // if remote has new snapshots, we would sync from here + m_local_snap_id_start = local_snap_id; + ceph_assert(m_local_snap_id_end == CEPH_NOSNAP); + + if (mirror_ns->mirror_peer_uuids.empty()) { + // no other peer will attempt to sync to this snapshot so store as + // a candidate for removal + prune_snap_ids.insert(local_snap_id); + } + } else if (mirror_ns->last_copied_object_number == 0 && + m_local_snap_id_start > 0) { + // snapshot might be missing image state, object-map, etc, so just + // delete and re-create it if we haven't started copying data + // objects. Also only prune this snapshot since we will need the + // previous mirror snapshot for syncing. Special case exception for + // the first non-primary snapshot since we know its snapshot is + // well-formed because otherwise the mirror-image-state would have + // forced an image deletion. + prune_snap_ids.clear(); + prune_snap_ids.insert(local_snap_id); + break; + } else { + // start snap will be last complete mirror snapshot or initial + // image revision + m_local_snap_id_end = local_snap_id; + break; + } + } else if (mirror_ns->is_primary()) { + if (mirror_ns->complete) { + m_local_snap_id_start = local_snap_id; + ceph_assert(m_local_snap_id_end == CEPH_NOSNAP); + } else { + derr << "incomplete local primary snapshot" << dendl; + handle_replay_complete(locker, -EINVAL, + "incomplete local primary snapshot"); + return; + } + } else { + derr << "unknown local mirror snapshot state" << dendl; + handle_replay_complete(locker, -EINVAL, + "invalid local mirror snapshot state"); + return; + } + } + image_locker.unlock(); + + if (m_local_snap_id_start > 0) { + // remove candidate that is required for delta snapshot sync + prune_snap_ids.erase(m_local_snap_id_start); + } + if (!prune_snap_ids.empty()) { + locker->unlock(); + + auto prune_snap_id = *prune_snap_ids.begin(); + dout(5) << "pruning unused non-primary snapshot " << prune_snap_id << dendl; + prune_non_primary_snapshot(prune_snap_id); + return; + } + + if (m_local_snap_id_start > 0 || m_local_snap_id_end != CEPH_NOSNAP) { + if (m_local_mirror_snap_ns.is_non_primary() && + m_local_mirror_snap_ns.primary_mirror_uuid != + m_state_builder->remote_mirror_uuid) { + if (m_local_mirror_snap_ns.is_orphan()) { + dout(5) << "local image being force promoted" << dendl; + handle_replay_complete(locker, 0, "orphan (force promoting)"); + return; + } + // TODO support multiple peers + derr << "local image linked to unknown peer: " + << m_local_mirror_snap_ns.primary_mirror_uuid << dendl; + handle_replay_complete(locker, -EEXIST, + "local image linked to unknown peer"); + return; + } else if (m_local_mirror_snap_ns.state == + cls::rbd::MIRROR_SNAPSHOT_STATE_PRIMARY) { + dout(5) << "local image promoted" << dendl; + handle_replay_complete(locker, 0, "force promoted"); + return; + } + + dout(10) << "found local mirror snapshot: " + << "local_snap_id_start=" << m_local_snap_id_start << ", " + << "local_snap_id_end=" << m_local_snap_id_end << ", " + << "local_snap_ns=" << m_local_mirror_snap_ns << dendl; + if (!m_local_mirror_snap_ns.is_primary() && + m_local_mirror_snap_ns.complete) { + // our remote sync should start after this completed snapshot + m_remote_snap_id_start = m_local_mirror_snap_ns.primary_snap_id; + } + } + + // we don't have any mirror snapshots or only completed non-primary + // mirror snapshots + scan_remote_mirror_snapshots(locker); +} + +template <typename I> +void Replayer<I>::scan_remote_mirror_snapshots( + std::unique_lock<ceph::mutex>* locker) { + dout(10) << dendl; + + m_pending_snapshots = 0; + + std::set<uint64_t> unlink_snap_ids; + bool split_brain = false; + bool remote_demoted = false; + auto remote_image_ctx = m_state_builder->remote_image_ctx; + std::shared_lock image_locker{remote_image_ctx->image_lock}; + for (auto snap_info_it = remote_image_ctx->snap_info.begin(); + snap_info_it != remote_image_ctx->snap_info.end(); ++snap_info_it) { + const auto& snap_ns = snap_info_it->second.snap_namespace; + auto mirror_ns = std::get_if< + cls::rbd::MirrorSnapshotNamespace>(&snap_ns); + if (mirror_ns == nullptr) { + continue; + } + + dout(15) << "remote mirror snapshot: id=" << snap_info_it->first << ", " + << "mirror_ns=" << *mirror_ns << dendl; + remote_demoted = mirror_ns->is_demoted(); + if (!mirror_ns->is_primary() && !mirror_ns->is_non_primary()) { + derr << "unknown remote mirror snapshot state" << dendl; + handle_replay_complete(locker, -EINVAL, + "invalid remote mirror snapshot state"); + return; + } else if (mirror_ns->mirror_peer_uuids.count(m_remote_mirror_peer_uuid) == + 0) { + dout(15) << "skipping remote snapshot due to missing mirror peer" + << dendl; + continue; + } + + auto remote_snap_id = snap_info_it->first; + if (m_local_snap_id_start > 0 || m_local_snap_id_end != CEPH_NOSNAP) { + // we have a local mirror snapshot + if (m_local_mirror_snap_ns.is_non_primary()) { + // previously validated that it was linked to remote + ceph_assert(m_local_mirror_snap_ns.primary_mirror_uuid == + m_state_builder->remote_mirror_uuid); + + if (m_remote_snap_id_end == CEPH_NOSNAP) { + // haven't found the end snap so treat this as a candidate for unlink + unlink_snap_ids.insert(remote_snap_id); + } + if (m_local_mirror_snap_ns.complete && + m_local_mirror_snap_ns.primary_snap_id >= remote_snap_id) { + // skip past completed remote snapshot + m_remote_snap_id_start = remote_snap_id; + m_remote_mirror_snap_ns = *mirror_ns; + dout(15) << "skipping synced remote snapshot " << remote_snap_id + << dendl; + continue; + } else if (!m_local_mirror_snap_ns.complete && + m_local_mirror_snap_ns.primary_snap_id > remote_snap_id) { + // skip until we get to the in-progress remote snapshot + dout(15) << "skipping synced remote snapshot " << remote_snap_id + << " while search for in-progress sync" << dendl; + m_remote_snap_id_start = remote_snap_id; + m_remote_mirror_snap_ns = *mirror_ns; + continue; + } + } else if (m_local_mirror_snap_ns.state == + cls::rbd::MIRROR_SNAPSHOT_STATE_PRIMARY_DEMOTED) { + // find the matching demotion snapshot in remote image + ceph_assert(m_local_snap_id_start > 0); + if (mirror_ns->state == + cls::rbd::MIRROR_SNAPSHOT_STATE_NON_PRIMARY_DEMOTED && + mirror_ns->primary_mirror_uuid == m_local_mirror_uuid && + mirror_ns->primary_snap_id == m_local_snap_id_start) { + dout(10) << "located matching demotion snapshot: " + << "remote_snap_id=" << remote_snap_id << ", " + << "local_snap_id=" << m_local_snap_id_start << dendl; + m_remote_snap_id_start = remote_snap_id; + split_brain = false; + continue; + } else if (m_remote_snap_id_start == 0) { + // still looking for our matching demotion snapshot + dout(15) << "skipping remote snapshot " << remote_snap_id << " " + << "while searching for demotion" << dendl; + split_brain = true; + continue; + } + } else { + // should not have been able to reach this + ceph_assert(false); + } + } else if (!mirror_ns->is_primary()) { + dout(15) << "skipping non-primary remote snapshot" << dendl; + continue; + } + + // found candidate snapshot to sync + ++m_pending_snapshots; + if (m_remote_snap_id_end != CEPH_NOSNAP) { + continue; + } + + // first primary snapshot where were are listed as a peer + m_remote_snap_id_end = remote_snap_id; + m_remote_mirror_snap_ns = *mirror_ns; + } + + if (m_remote_snap_id_start != 0 && + remote_image_ctx->snap_info.count(m_remote_snap_id_start) == 0) { + // the remote start snapshot was deleted out from under us + derr << "failed to locate remote start snapshot: " + << "snap_id=" << m_remote_snap_id_start << dendl; + split_brain = true; + } + + image_locker.unlock(); + + if (!split_brain) { + unlink_snap_ids.erase(m_remote_snap_id_start); + unlink_snap_ids.erase(m_remote_snap_id_end); + if (!unlink_snap_ids.empty()) { + locker->unlock(); + + // retry the unlinking process for a remote snapshot that we do not + // need anymore + auto remote_snap_id = *unlink_snap_ids.begin(); + dout(10) << "unlinking from remote snapshot " << remote_snap_id << dendl; + unlink_peer(remote_snap_id); + return; + } + + if (m_remote_snap_id_end != CEPH_NOSNAP) { + dout(10) << "found remote mirror snapshot: " + << "remote_snap_id_start=" << m_remote_snap_id_start << ", " + << "remote_snap_id_end=" << m_remote_snap_id_end << ", " + << "remote_snap_ns=" << m_remote_mirror_snap_ns << dendl; + if (m_remote_mirror_snap_ns.complete) { + locker->unlock(); + + if (m_local_snap_id_end != CEPH_NOSNAP && + !m_local_mirror_snap_ns.complete) { + // attempt to resume image-sync + dout(10) << "local image contains in-progress mirror snapshot" + << dendl; + get_local_image_state(); + } else { + copy_snapshots(); + } + return; + } else { + // might have raced with the creation of a remote mirror snapshot + // so we will need to refresh and rescan once it completes + dout(15) << "remote mirror snapshot not complete" << dendl; + } + } + } + + if (m_image_updated) { + // received update notification while scanning image, restart ... + m_image_updated = false; + locker->unlock(); + + dout(10) << "restarting snapshot scan due to remote update notification" + << dendl; + load_local_image_meta(); + return; + } + + if (is_replay_interrupted(locker)) { + return; + } else if (split_brain) { + derr << "split-brain detected: failed to find matching non-primary " + << "snapshot in remote image: " + << "local_snap_id_start=" << m_local_snap_id_start << ", " + << "local_snap_ns=" << m_local_mirror_snap_ns << dendl; + handle_replay_complete(locker, -EEXIST, "split-brain"); + return; + } else if (remote_demoted) { + dout(10) << "remote image demoted" << dendl; + handle_replay_complete(locker, -EREMOTEIO, "remote image demoted"); + return; + } + + dout(10) << "all remote snapshots synced: idling waiting for new snapshot" + << dendl; + ceph_assert(m_state == STATE_REPLAYING); + m_state = STATE_IDLE; + + notify_status_updated(); +} + +template <typename I> +void Replayer<I>::prune_non_primary_snapshot(uint64_t snap_id) { + dout(10) << "snap_id=" << snap_id << dendl; + + auto local_image_ctx = m_state_builder->local_image_ctx; + bool snap_valid = false; + cls::rbd::SnapshotNamespace snap_namespace; + std::string snap_name; + + { + std::shared_lock image_locker{local_image_ctx->image_lock}; + auto snap_info = local_image_ctx->get_snap_info(snap_id); + if (snap_info != nullptr) { + snap_valid = true; + snap_namespace = snap_info->snap_namespace; + snap_name = snap_info->name; + + ceph_assert(std::holds_alternative<cls::rbd::MirrorSnapshotNamespace>( + snap_namespace)); + } + } + + if (!snap_valid) { + load_local_image_meta(); + return; + } + + auto ctx = create_context_callback< + Replayer<I>, &Replayer<I>::handle_prune_non_primary_snapshot>(this); + local_image_ctx->operations->snap_remove(snap_namespace, snap_name, ctx); +} + +template <typename I> +void Replayer<I>::handle_prune_non_primary_snapshot(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0 && r != -ENOENT) { + derr << "failed to prune non-primary snapshot: " << cpp_strerror(r) + << dendl; + handle_replay_complete(r, "failed to prune non-primary snapshot"); + return; + } + + if (is_replay_interrupted()) { + return; + } + + load_local_image_meta(); +} + +template <typename I> +void Replayer<I>::copy_snapshots() { + dout(10) << "remote_snap_id_start=" << m_remote_snap_id_start << ", " + << "remote_snap_id_end=" << m_remote_snap_id_end << ", " + << "local_snap_id_start=" << m_local_snap_id_start << dendl; + + ceph_assert(m_remote_snap_id_start != CEPH_NOSNAP); + ceph_assert(m_remote_snap_id_end > 0 && + m_remote_snap_id_end != CEPH_NOSNAP); + ceph_assert(m_local_snap_id_start != CEPH_NOSNAP); + + m_local_mirror_snap_ns = {}; + auto ctx = create_context_callback< + Replayer<I>, &Replayer<I>::handle_copy_snapshots>(this); + auto req = librbd::deep_copy::SnapshotCopyRequest<I>::create( + m_state_builder->remote_image_ctx, m_state_builder->local_image_ctx, + m_remote_snap_id_start, m_remote_snap_id_end, m_local_snap_id_start, + false, m_threads->work_queue, &m_local_mirror_snap_ns.snap_seqs, + ctx); + req->send(); +} + +template <typename I> +void Replayer<I>::handle_copy_snapshots(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to copy snapshots from remote to local image: " + << cpp_strerror(r) << dendl; + handle_replay_complete( + r, "failed to copy snapshots from remote to local image"); + return; + } + + dout(10) << "remote_snap_id_start=" << m_remote_snap_id_start << ", " + << "remote_snap_id_end=" << m_remote_snap_id_end << ", " + << "local_snap_id_start=" << m_local_snap_id_start << ", " + << "snap_seqs=" << m_local_mirror_snap_ns.snap_seqs << dendl; + get_remote_image_state(); +} + +template <typename I> +void Replayer<I>::get_remote_image_state() { + dout(10) << dendl; + + auto ctx = create_context_callback< + Replayer<I>, &Replayer<I>::handle_get_remote_image_state>(this); + auto req = librbd::mirror::snapshot::GetImageStateRequest<I>::create( + m_state_builder->remote_image_ctx, m_remote_snap_id_end, + &m_image_state, ctx); + req->send(); +} + +template <typename I> +void Replayer<I>::handle_get_remote_image_state(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to retrieve remote snapshot image state: " + << cpp_strerror(r) << dendl; + handle_replay_complete(r, "failed to retrieve remote snapshot image state"); + return; + } + + create_non_primary_snapshot(); +} + +template <typename I> +void Replayer<I>::get_local_image_state() { + dout(10) << dendl; + + ceph_assert(m_local_snap_id_end != CEPH_NOSNAP); + auto ctx = create_context_callback< + Replayer<I>, &Replayer<I>::handle_get_local_image_state>(this); + auto req = librbd::mirror::snapshot::GetImageStateRequest<I>::create( + m_state_builder->local_image_ctx, m_local_snap_id_end, + &m_image_state, ctx); + req->send(); +} + +template <typename I> +void Replayer<I>::handle_get_local_image_state(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to retrieve local snapshot image state: " + << cpp_strerror(r) << dendl; + handle_replay_complete(r, "failed to retrieve local snapshot image state"); + return; + } + + request_sync(); +} + +template <typename I> +void Replayer<I>::create_non_primary_snapshot() { + auto local_image_ctx = m_state_builder->local_image_ctx; + + if (m_local_snap_id_start > 0) { + std::shared_lock local_image_locker{local_image_ctx->image_lock}; + + auto local_snap_info_it = local_image_ctx->snap_info.find( + m_local_snap_id_start); + if (local_snap_info_it == local_image_ctx->snap_info.end()) { + local_image_locker.unlock(); + + derr << "failed to locate local snapshot " << m_local_snap_id_start + << dendl; + handle_replay_complete(-ENOENT, "failed to locate local start snapshot"); + return; + } + + auto mirror_ns = std::get_if<cls::rbd::MirrorSnapshotNamespace>( + &local_snap_info_it->second.snap_namespace); + ceph_assert(mirror_ns != nullptr); + + auto remote_image_ctx = m_state_builder->remote_image_ctx; + std::shared_lock remote_image_locker{remote_image_ctx->image_lock}; + + // (re)build a full mapping from remote to local snap ids for all user + // snapshots to support applying image state in the future + for (auto& [remote_snap_id, remote_snap_info] : + remote_image_ctx->snap_info) { + if (remote_snap_id >= m_remote_snap_id_end) { + break; + } + + // we can ignore all non-user snapshots since image state only includes + // user snapshots + if (!std::holds_alternative<cls::rbd::UserSnapshotNamespace>( + remote_snap_info.snap_namespace)) { + continue; + } + + uint64_t local_snap_id = CEPH_NOSNAP; + if (mirror_ns->is_demoted() && !m_remote_mirror_snap_ns.is_demoted()) { + // if we are creating a non-primary snapshot following a demotion, + // re-build the full snapshot sequence since we don't have a valid + // snapshot mapping + auto local_snap_id_it = local_image_ctx->snap_ids.find( + {remote_snap_info.snap_namespace, remote_snap_info.name}); + if (local_snap_id_it != local_image_ctx->snap_ids.end()) { + local_snap_id = local_snap_id_it->second; + } + } else { + auto snap_seq_it = mirror_ns->snap_seqs.find(remote_snap_id); + if (snap_seq_it != mirror_ns->snap_seqs.end()) { + local_snap_id = snap_seq_it->second; + } + } + + if (m_local_mirror_snap_ns.snap_seqs.count(remote_snap_id) == 0 && + local_snap_id != CEPH_NOSNAP) { + dout(15) << "mapping remote snapshot " << remote_snap_id << " to " + << "local snapshot " << local_snap_id << dendl; + m_local_mirror_snap_ns.snap_seqs[remote_snap_id] = local_snap_id; + } + } + } + + dout(10) << "demoted=" << m_remote_mirror_snap_ns.is_demoted() << ", " + << "primary_mirror_uuid=" + << m_state_builder->remote_mirror_uuid << ", " + << "primary_snap_id=" << m_remote_snap_id_end << ", " + << "snap_seqs=" << m_local_mirror_snap_ns.snap_seqs << dendl; + + auto ctx = create_context_callback< + Replayer<I>, &Replayer<I>::handle_create_non_primary_snapshot>(this); + auto req = librbd::mirror::snapshot::CreateNonPrimaryRequest<I>::create( + local_image_ctx, m_remote_mirror_snap_ns.is_demoted(), + m_state_builder->remote_mirror_uuid, m_remote_snap_id_end, + m_local_mirror_snap_ns.snap_seqs, m_image_state, &m_local_snap_id_end, ctx); + req->send(); +} + +template <typename I> +void Replayer<I>::handle_create_non_primary_snapshot(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to create local mirror snapshot: " << cpp_strerror(r) + << dendl; + handle_replay_complete(r, "failed to create local mirror snapshot"); + return; + } + + dout(15) << "local_snap_id_end=" << m_local_snap_id_end << dendl; + + update_mirror_image_state(); +} + +template <typename I> +void Replayer<I>::update_mirror_image_state() { + if (m_local_snap_id_start > 0) { + request_sync(); + return; + } + + // a newly created non-primary image has a local mirror state of CREATING + // until this point so that we could avoid preserving the image until + // the first non-primary snapshot linked the two images together. + dout(10) << dendl; + auto ctx = create_context_callback< + Replayer<I>, &Replayer<I>::handle_update_mirror_image_state>(this); + auto req = librbd::mirror::ImageStateUpdateRequest<I>::create( + m_state_builder->local_image_ctx->md_ctx, + m_state_builder->local_image_ctx->id, + cls::rbd::MIRROR_IMAGE_STATE_ENABLED, {}, ctx); + req->send(); +} + +template <typename I> +void Replayer<I>::handle_update_mirror_image_state(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to update local mirror image state: " << cpp_strerror(r) + << dendl; + handle_replay_complete(r, "failed to update local mirror image state"); + return; + } + + request_sync(); +} + +template <typename I> +void Replayer<I>::request_sync() { + if (m_remote_mirror_snap_ns.clean_since_snap_id == m_remote_snap_id_start) { + dout(10) << "skipping unnecessary image copy: " + << "remote_snap_id_start=" << m_remote_snap_id_start << ", " + << "remote_mirror_snap_ns=" << m_remote_mirror_snap_ns << dendl; + apply_image_state(); + return; + } + + dout(10) << dendl; + std::unique_lock locker{m_lock}; + if (is_replay_interrupted(&locker)) { + return; + } + + auto ctx = create_async_context_callback( + m_threads->work_queue, create_context_callback< + Replayer<I>, &Replayer<I>::handle_request_sync>(this)); + m_instance_watcher->notify_sync_request(m_state_builder->local_image_ctx->id, + ctx); +} + +template <typename I> +void Replayer<I>::handle_request_sync(int r) { + dout(10) << "r=" << r << dendl; + + std::unique_lock locker{m_lock}; + if (is_replay_interrupted(&locker)) { + return; + } else if (r == -ECANCELED) { + dout(5) << "image-sync canceled" << dendl; + handle_replay_complete(&locker, r, "image-sync canceled"); + return; + } else if (r < 0) { + derr << "failed to request image-sync: " << cpp_strerror(r) << dendl; + handle_replay_complete(&locker, r, "failed to request image-sync"); + return; + } + + m_sync_in_progress = true; + locker.unlock(); + + copy_image(); +} + +template <typename I> +void Replayer<I>::copy_image() { + dout(10) << "remote_snap_id_start=" << m_remote_snap_id_start << ", " + << "remote_snap_id_end=" << m_remote_snap_id_end << ", " + << "local_snap_id_start=" << m_local_snap_id_start << ", " + << "last_copied_object_number=" + << m_local_mirror_snap_ns.last_copied_object_number << ", " + << "snap_seqs=" << m_local_mirror_snap_ns.snap_seqs << dendl; + + m_snapshot_bytes = 0; + m_snapshot_replay_start = ceph_clock_now(); + m_deep_copy_handler = new DeepCopyHandler(this); + auto ctx = create_context_callback< + Replayer<I>, &Replayer<I>::handle_copy_image>(this); + auto req = librbd::deep_copy::ImageCopyRequest<I>::create( + m_state_builder->remote_image_ctx, m_state_builder->local_image_ctx, + m_remote_snap_id_start, m_remote_snap_id_end, m_local_snap_id_start, false, + (m_local_mirror_snap_ns.last_copied_object_number > 0 ? + librbd::deep_copy::ObjectNumber{ + m_local_mirror_snap_ns.last_copied_object_number} : + librbd::deep_copy::ObjectNumber{}), + m_local_mirror_snap_ns.snap_seqs, m_deep_copy_handler, ctx); + req->send(); +} + +template <typename I> +void Replayer<I>::handle_copy_image(int r) { + dout(10) << "r=" << r << dendl; + + delete m_deep_copy_handler; + m_deep_copy_handler = nullptr; + + if (r < 0) { + derr << "failed to copy remote image to local image: " << cpp_strerror(r) + << dendl; + handle_replay_complete(r, "failed to copy remote image"); + return; + } + + { + std::unique_lock locker{m_lock}; + m_last_snapshot_bytes = m_snapshot_bytes; + m_bytes_per_snapshot(m_snapshot_bytes); + utime_t duration = ceph_clock_now() - m_snapshot_replay_start; + m_last_snapshot_sync_seconds = duration.sec(); + + if (g_snapshot_perf_counters) { + g_snapshot_perf_counters->inc(l_rbd_mirror_snapshot_sync_bytes, + m_snapshot_bytes); + g_snapshot_perf_counters->inc(l_rbd_mirror_snapshot_snapshots); + g_snapshot_perf_counters->tinc(l_rbd_mirror_snapshot_sync_time, + duration); + } + if (m_perf_counters) { + m_perf_counters->inc(l_rbd_mirror_snapshot_sync_bytes, m_snapshot_bytes); + m_perf_counters->inc(l_rbd_mirror_snapshot_snapshots); + m_perf_counters->tinc(l_rbd_mirror_snapshot_sync_time, duration); + m_perf_counters->tset(l_rbd_mirror_snapshot_last_sync_time, duration); + m_perf_counters->set(l_rbd_mirror_snapshot_last_sync_bytes, + m_snapshot_bytes); + } + } + + apply_image_state(); +} + +template <typename I> +void Replayer<I>::handle_copy_image_progress(uint64_t object_number, + uint64_t object_count) { + dout(10) << "object_number=" << object_number << ", " + << "object_count=" << object_count << dendl; + + std::unique_lock locker{m_lock}; + m_local_mirror_snap_ns.last_copied_object_number = std::min( + object_number, object_count); + m_local_object_count = object_count; + + update_non_primary_snapshot(false); +} + +template <typename I> +void Replayer<I>::handle_copy_image_read(uint64_t bytes_read) { + dout(20) << "bytes_read=" << bytes_read << dendl; + + std::unique_lock locker{m_lock}; + m_bytes_per_second(bytes_read); + m_snapshot_bytes += bytes_read; +} + +template <typename I> +void Replayer<I>::apply_image_state() { + dout(10) << dendl; + + auto ctx = create_context_callback< + Replayer<I>, &Replayer<I>::handle_apply_image_state>(this); + auto req = ApplyImageStateRequest<I>::create( + m_local_mirror_uuid, + m_state_builder->remote_mirror_uuid, + m_state_builder->local_image_ctx, + m_state_builder->remote_image_ctx, + m_image_state, ctx); + req->send(); +} + +template <typename I> +void Replayer<I>::handle_apply_image_state(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0 && r != -ENOENT) { + derr << "failed to apply remote image state to local image: " + << cpp_strerror(r) << dendl; + handle_replay_complete(r, "failed to apply remote image state"); + return; + } + + std::unique_lock locker{m_lock}; + update_non_primary_snapshot(true); +} + +template <typename I> +void Replayer<I>::update_non_primary_snapshot(bool complete) { + ceph_assert(ceph_mutex_is_locked_by_me(m_lock)); + if (!complete) { + // disallow two in-flight updates if this isn't the completion of the sync + if (m_updating_sync_point) { + return; + } + m_updating_sync_point = true; + } else { + m_local_mirror_snap_ns.complete = true; + } + + dout(10) << dendl; + + librados::ObjectWriteOperation op; + librbd::cls_client::mirror_image_snapshot_set_copy_progress( + &op, m_local_snap_id_end, m_local_mirror_snap_ns.complete, + m_local_mirror_snap_ns.last_copied_object_number); + + auto ctx = new C_TrackedOp( + m_in_flight_op_tracker, new LambdaContext([this, complete](int r) { + handle_update_non_primary_snapshot(complete, r); + })); + auto aio_comp = create_rados_callback(ctx); + int r = m_state_builder->local_image_ctx->md_ctx.aio_operate( + m_state_builder->local_image_ctx->header_oid, aio_comp, &op); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void Replayer<I>::handle_update_non_primary_snapshot(bool complete, int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to update local snapshot progress: " << cpp_strerror(r) + << dendl; + if (complete) { + // only fail if this was the final update + handle_replay_complete(r, "failed to update local snapshot progress"); + return; + } + } + + if (!complete) { + // periodic sync-point update -- do not advance state machine + std::unique_lock locker{m_lock}; + + ceph_assert(m_updating_sync_point); + m_updating_sync_point = false; + return; + } + + notify_image_update(); +} + +template <typename I> +void Replayer<I>::notify_image_update() { + dout(10) << dendl; + + auto ctx = create_context_callback< + Replayer<I>, &Replayer<I>::handle_notify_image_update>(this); + m_state_builder->local_image_ctx->notify_update(ctx); +} + +template <typename I> +void Replayer<I>::handle_notify_image_update(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to notify local image update: " << cpp_strerror(r) << dendl; + } + + unlink_peer(m_remote_snap_id_start); +} + +template <typename I> +void Replayer<I>::unlink_peer(uint64_t remote_snap_id) { + if (remote_snap_id == 0) { + finish_sync(); + return; + } + + // local snapshot fully synced -- we no longer depend on the sync + // start snapshot in the remote image + dout(10) << "remote_snap_id=" << remote_snap_id << dendl; + + auto ctx = create_context_callback< + Replayer<I>, &Replayer<I>::handle_unlink_peer>(this); + auto req = librbd::mirror::snapshot::UnlinkPeerRequest<I>::create( + m_state_builder->remote_image_ctx, remote_snap_id, + m_remote_mirror_peer_uuid, false, ctx); + req->send(); +} + +template <typename I> +void Replayer<I>::handle_unlink_peer(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0 && r != -ENOENT) { + derr << "failed to unlink local peer from remote image: " << cpp_strerror(r) + << dendl; + handle_replay_complete(r, "failed to unlink local peer from remote image"); + return; + } + + finish_sync(); +} + +template <typename I> +void Replayer<I>::finish_sync() { + dout(10) << dendl; + + { + std::unique_lock locker{m_lock}; + notify_status_updated(); + + if (m_sync_in_progress) { + m_sync_in_progress = false; + m_instance_watcher->notify_sync_complete( + m_state_builder->local_image_ctx->id); + } + } + + if (is_replay_interrupted()) { + return; + } + + load_local_image_meta(); +} + +template <typename I> +void Replayer<I>::register_local_update_watcher() { + dout(10) << dendl; + + m_update_watch_ctx = new C_UpdateWatchCtx(this); + + int r = m_state_builder->local_image_ctx->state->register_update_watcher( + m_update_watch_ctx, &m_local_update_watcher_handle); + auto ctx = create_context_callback< + Replayer<I>, &Replayer<I>::handle_register_local_update_watcher>(this); + m_threads->work_queue->queue(ctx, r); +} + +template <typename I> +void Replayer<I>::handle_register_local_update_watcher(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to register local update watcher: " << cpp_strerror(r) + << dendl; + handle_replay_complete(r, "failed to register local image update watcher"); + m_state = STATE_COMPLETE; + + delete m_update_watch_ctx; + m_update_watch_ctx = nullptr; + + Context* on_init = nullptr; + std::swap(on_init, m_on_init_shutdown); + on_init->complete(r); + return; + } + + register_remote_update_watcher(); +} + +template <typename I> +void Replayer<I>::register_remote_update_watcher() { + dout(10) << dendl; + + int r = m_state_builder->remote_image_ctx->state->register_update_watcher( + m_update_watch_ctx, &m_remote_update_watcher_handle); + auto ctx = create_context_callback< + Replayer<I>, &Replayer<I>::handle_register_remote_update_watcher>(this); + m_threads->work_queue->queue(ctx, r); +} + +template <typename I> +void Replayer<I>::handle_register_remote_update_watcher(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to register remote update watcher: " << cpp_strerror(r) + << dendl; + handle_replay_complete(r, "failed to register remote image update watcher"); + m_state = STATE_COMPLETE; + + unregister_local_update_watcher(); + return; + } + + m_state = STATE_REPLAYING; + + Context* on_init = nullptr; + std::swap(on_init, m_on_init_shutdown); + on_init->complete(0); + + // delay initial snapshot scan until after we have alerted + // image replayer that we have initialized in case an error + // occurs + { + std::unique_lock locker{m_lock}; + notify_status_updated(); + } + + load_local_image_meta(); +} + +template <typename I> +void Replayer<I>::unregister_remote_update_watcher() { + dout(10) << dendl; + + auto ctx = create_context_callback< + Replayer<I>, + &Replayer<I>::handle_unregister_remote_update_watcher>(this); + m_state_builder->remote_image_ctx->state->unregister_update_watcher( + m_remote_update_watcher_handle, ctx); +} + +template <typename I> +void Replayer<I>::handle_unregister_remote_update_watcher(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to unregister remote update watcher: " << cpp_strerror(r) + << dendl; + } + + unregister_local_update_watcher(); +} + +template <typename I> +void Replayer<I>::unregister_local_update_watcher() { + dout(10) << dendl; + + auto ctx = create_context_callback< + Replayer<I>, + &Replayer<I>::handle_unregister_local_update_watcher>(this); + m_state_builder->local_image_ctx->state->unregister_update_watcher( + m_local_update_watcher_handle, ctx); +} + +template <typename I> +void Replayer<I>::handle_unregister_local_update_watcher(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to unregister local update watcher: " << cpp_strerror(r) + << dendl; + } + + delete m_update_watch_ctx; + m_update_watch_ctx = nullptr; + + wait_for_in_flight_ops(); +} + +template <typename I> +void Replayer<I>::wait_for_in_flight_ops() { + dout(10) << dendl; + + auto ctx = create_async_context_callback( + m_threads->work_queue, create_context_callback< + Replayer<I>, &Replayer<I>::handle_wait_for_in_flight_ops>(this)); + m_in_flight_op_tracker.wait_for_ops(ctx); +} + +template <typename I> +void Replayer<I>::handle_wait_for_in_flight_ops(int r) { + dout(10) << "r=" << r << dendl; + + Context* on_shutdown = nullptr; + { + std::unique_lock locker{m_lock}; + ceph_assert(m_on_init_shutdown != nullptr); + std::swap(on_shutdown, m_on_init_shutdown); + } + on_shutdown->complete(m_error_code); +} + +template <typename I> +void Replayer<I>::handle_image_update_notify() { + dout(10) << dendl; + + std::unique_lock locker{m_lock}; + if (m_state == STATE_REPLAYING) { + dout(15) << "flagging snapshot rescan required" << dendl; + m_image_updated = true; + } else if (m_state == STATE_IDLE) { + m_state = STATE_REPLAYING; + locker.unlock(); + + dout(15) << "restarting idle replayer" << dendl; + load_local_image_meta(); + } +} + +template <typename I> +void Replayer<I>::handle_replay_complete(int r, + const std::string& description) { + std::unique_lock locker{m_lock}; + handle_replay_complete(&locker, r, description); +} + +template <typename I> +void Replayer<I>::handle_replay_complete(std::unique_lock<ceph::mutex>* locker, + int r, + const std::string& description) { + ceph_assert(ceph_mutex_is_locked_by_me(m_lock)); + + if (m_sync_in_progress) { + m_sync_in_progress = false; + m_instance_watcher->notify_sync_complete( + m_state_builder->local_image_ctx->id); + } + + // don't set error code and description if resuming a pending + // shutdown + if (is_replay_interrupted(locker)) { + return; + } + + if (m_error_code == 0) { + m_error_code = r; + m_error_description = description; + } + + if (m_state != STATE_REPLAYING && m_state != STATE_IDLE) { + return; + } + + m_state = STATE_COMPLETE; + notify_status_updated(); +} + +template <typename I> +void Replayer<I>::notify_status_updated() { + ceph_assert(ceph_mutex_is_locked_by_me(m_lock)); + + dout(10) << dendl; + auto ctx = new C_TrackedOp(m_in_flight_op_tracker, new LambdaContext( + [this](int) { + m_replayer_listener->handle_notification(); + })); + m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +bool Replayer<I>::is_replay_interrupted() { + std::unique_lock locker{m_lock}; + return is_replay_interrupted(&locker); +} + +template <typename I> +bool Replayer<I>::is_replay_interrupted(std::unique_lock<ceph::mutex>* locker) { + if (m_state == STATE_COMPLETE) { + locker->unlock(); + + dout(10) << "resuming pending shut down" << dendl; + unregister_remote_update_watcher(); + return true; + } + return false; +} + +template <typename I> +void Replayer<I>::register_perf_counters() { + dout(5) << dendl; + + ceph_assert(ceph_mutex_is_locked_by_me(m_lock)); + ceph_assert(m_perf_counters == nullptr); + + auto cct = static_cast<CephContext *>(m_state_builder->local_image_ctx->cct); + auto prio = cct->_conf.get_val<int64_t>("rbd_mirror_image_perf_stats_prio"); + + auto local_image_ctx = m_state_builder->local_image_ctx; + std::string labels = ceph::perf_counters::key_create( + "rbd_mirror_snapshot_image", + {{"pool", local_image_ctx->md_ctx.get_pool_name()}, + {"namespace", local_image_ctx->md_ctx.get_namespace()}, + {"image", local_image_ctx->name}}); + + PerfCountersBuilder plb(g_ceph_context, labels, l_rbd_mirror_snapshot_first, + l_rbd_mirror_snapshot_last); + plb.add_u64_counter(l_rbd_mirror_snapshot_snapshots, "snapshots", + "Number of snapshots synced", nullptr, prio); + plb.add_time_avg(l_rbd_mirror_snapshot_sync_time, "sync_time", + "Average sync time", nullptr, prio); + plb.add_u64_counter(l_rbd_mirror_snapshot_sync_bytes, "sync_bytes", + "Total bytes synced", nullptr, prio, unit_t(UNIT_BYTES)); + plb.add_time(l_rbd_mirror_snapshot_remote_timestamp, "remote_timestamp", + "Timestamp of the remote snapshot", nullptr, prio); + plb.add_time(l_rbd_mirror_snapshot_local_timestamp, "local_timestamp", + "Timestamp of the local snapshot", nullptr, prio); + plb.add_time(l_rbd_mirror_snapshot_last_sync_time, "last_sync_time", + "Time taken to sync the last snapshot", nullptr, prio); + plb.add_u64(l_rbd_mirror_snapshot_last_sync_bytes, "last_sync_bytes", + "Bytes synced for the last snapshot", nullptr, prio, + unit_t(UNIT_BYTES)); + + m_perf_counters = plb.create_perf_counters(); + g_ceph_context->get_perfcounters_collection()->add(m_perf_counters); +} + +template <typename I> +void Replayer<I>::unregister_perf_counters() { + dout(5) << dendl; + ceph_assert(ceph_mutex_is_locked_by_me(m_lock)); + + PerfCounters *perf_counters = nullptr; + std::swap(perf_counters, m_perf_counters); + + if (perf_counters != nullptr) { + g_ceph_context->get_perfcounters_collection()->remove(perf_counters); + delete perf_counters; + } +} + +} // namespace snapshot +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::snapshot::Replayer<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.h b/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.h new file mode 100644 index 000000000..17d45f6bc --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.h @@ -0,0 +1,349 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_REPLAYER_H +#define RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_REPLAYER_H + +#include "tools/rbd_mirror/image_replayer/Replayer.h" +#include "common/ceph_mutex.h" +#include "common/AsyncOpTracker.h" +#include "cls/rbd/cls_rbd_types.h" +#include "librbd/mirror/snapshot/Types.h" +#include "tools/rbd_mirror/image_replayer/TimeRollingMean.h" +#include <boost/accumulators/accumulators.hpp> +#include <boost/accumulators/statistics/stats.hpp> +#include <boost/accumulators/statistics/rolling_mean.hpp> +#include <string> +#include <type_traits> + +namespace librbd { + +struct ImageCtx; +namespace snapshot { template <typename I> class Replay; } + +} // namespace librbd + +namespace rbd { +namespace mirror { + +template <typename> struct InstanceWatcher; +class PoolMetaCache; +template <typename> struct Threads; + +namespace image_replayer { + +struct ReplayerListener; + +namespace snapshot { + +template <typename> class EventPreprocessor; +template <typename> class ReplayStatusFormatter; +template <typename> class StateBuilder; + +template <typename ImageCtxT> +class Replayer : public image_replayer::Replayer { +public: + static Replayer* create( + Threads<ImageCtxT>* threads, + InstanceWatcher<ImageCtxT>* instance_watcher, + const std::string& local_mirror_uuid, + PoolMetaCache* pool_meta_cache, + StateBuilder<ImageCtxT>* state_builder, + ReplayerListener* replayer_listener) { + return new Replayer(threads, instance_watcher, local_mirror_uuid, + pool_meta_cache, state_builder, replayer_listener); + } + + Replayer( + Threads<ImageCtxT>* threads, + InstanceWatcher<ImageCtxT>* instance_watcher, + const std::string& local_mirror_uuid, + PoolMetaCache* pool_meta_cache, + StateBuilder<ImageCtxT>* state_builder, + ReplayerListener* replayer_listener); + ~Replayer(); + + void destroy() override { + delete this; + } + + void init(Context* on_finish) override; + void shut_down(Context* on_finish) override; + + void flush(Context* on_finish) override; + + bool get_replay_status(std::string* description, Context* on_finish) override; + + bool is_replaying() const override { + std::unique_lock locker{m_lock}; + return (m_state == STATE_REPLAYING || m_state == STATE_IDLE); + } + + bool is_resync_requested() const override { + std::unique_lock locker{m_lock}; + return m_resync_requested; + } + + int get_error_code() const override { + std::unique_lock locker(m_lock); + return m_error_code; + } + + std::string get_error_description() const override { + std::unique_lock locker(m_lock); + return m_error_description; + } + + std::string get_image_spec() const { + std::unique_lock locker(m_lock); + return m_image_spec; + } + +private: + /** + * @verbatim + * + * <init> + * | + * v + * REGISTER_LOCAL_UPDATE_WATCHER + * | + * v + * REGISTER_REMOTE_UPDATE_WATCHER + * | + * v + * LOAD_LOCAL_IMAGE_META <----------------------------\ + * | | + * v (skip if not needed) | + * REFRESH_LOCAL_IMAGE | + * | | + * v (skip if not needed) | + * REFRESH_REMOTE_IMAGE | + * | | + * | (unused non-primary snapshot) | + * |\--------------> PRUNE_NON_PRIMARY_SNAPSHOT---/| + * | | + * | (interrupted sync) | + * |\--------------> GET_LOCAL_IMAGE_STATE ------\ | + * | | | + * | (new snapshot) | | + * |\--------------> COPY_SNAPSHOTS | | + * | | | | + * | v | | + * | GET_REMOTE_IMAGE_STATE | | + * | | | | + * | v | | + * | CREATE_NON_PRIMARY_SNAPSHOT | | + * | | | | + * | v (skip if not needed)| | + * | UPDATE_MIRROR_IMAGE_STATE | | + * | | | | + * | |/--------------------/ | + * | | | + * | v | + * | REQUEST_SYNC | + * | | | + * | v | + * | COPY_IMAGE | + * | | | + * | v | + * | APPLY_IMAGE_STATE | + * | | | + * | v | + * | UPDATE_NON_PRIMARY_SNAPSHOT | + * | | | + * | v | + * | NOTIFY_IMAGE_UPDATE | + * | | | + * | (interrupted unlink) v | + * |\--------------> UNLINK_PEER | + * | | | + * | v | + * | NOTIFY_LISTENER | + * | | | + * | \----------------------/| + * | | + * | (remote demoted) | + * \---------------> NOTIFY_LISTENER | + * | | | + * |/--------------------/ | + * | | + * | (update notification) | + * <idle> --------------------------------------------/ + * | + * v + * <shut down> + * | + * v + * UNREGISTER_REMOTE_UPDATE_WATCHER + * | + * v + * UNREGISTER_LOCAL_UPDATE_WATCHER + * | + * v + * WAIT_FOR_IN_FLIGHT_OPS + * | + * v + * <finish> + * + * @endverbatim + */ + + enum State { + STATE_INIT, + STATE_REPLAYING, + STATE_IDLE, + STATE_COMPLETE + }; + + struct C_UpdateWatchCtx; + struct DeepCopyHandler; + + Threads<ImageCtxT>* m_threads; + InstanceWatcher<ImageCtxT>* m_instance_watcher; + std::string m_local_mirror_uuid; + PoolMetaCache* m_pool_meta_cache; + StateBuilder<ImageCtxT>* m_state_builder; + ReplayerListener* m_replayer_listener; + + mutable ceph::mutex m_lock; + + State m_state = STATE_INIT; + + std::string m_image_spec; + Context* m_on_init_shutdown = nullptr; + + bool m_resync_requested = false; + int m_error_code = 0; + std::string m_error_description; + + C_UpdateWatchCtx* m_update_watch_ctx = nullptr; + uint64_t m_local_update_watcher_handle = 0; + uint64_t m_remote_update_watcher_handle = 0; + bool m_image_updated = false; + + AsyncOpTracker m_in_flight_op_tracker; + + uint64_t m_local_snap_id_start = 0; + uint64_t m_local_snap_id_end = CEPH_NOSNAP; + cls::rbd::MirrorSnapshotNamespace m_local_mirror_snap_ns; + uint64_t m_local_object_count = 0; + + std::string m_remote_mirror_peer_uuid; + uint64_t m_remote_snap_id_start = 0; + uint64_t m_remote_snap_id_end = CEPH_NOSNAP; + cls::rbd::MirrorSnapshotNamespace m_remote_mirror_snap_ns; + + librbd::mirror::snapshot::ImageState m_image_state; + DeepCopyHandler* m_deep_copy_handler = nullptr; + + TimeRollingMean m_bytes_per_second; + uint64_t m_last_snapshot_sync_seconds = 0; + + uint64_t m_snapshot_bytes = 0; + uint64_t m_last_snapshot_bytes = 0; + + boost::accumulators::accumulator_set< + uint64_t, boost::accumulators::stats< + boost::accumulators::tag::rolling_mean>> m_bytes_per_snapshot{ + boost::accumulators::tag::rolling_window::window_size = 2}; + utime_t m_snapshot_replay_start; + + uint32_t m_pending_snapshots = 0; + + bool m_remote_image_updated = false; + bool m_updating_sync_point = false; + bool m_sync_in_progress = false; + + PerfCounters *m_perf_counters = nullptr; + + void load_local_image_meta(); + void handle_load_local_image_meta(int r); + + void refresh_local_image(); + void handle_refresh_local_image(int r); + + void refresh_remote_image(); + void handle_refresh_remote_image(int r); + + void scan_local_mirror_snapshots(std::unique_lock<ceph::mutex>* locker); + void scan_remote_mirror_snapshots(std::unique_lock<ceph::mutex>* locker); + + void prune_non_primary_snapshot(uint64_t snap_id); + void handle_prune_non_primary_snapshot(int r); + + void copy_snapshots(); + void handle_copy_snapshots(int r); + + void get_remote_image_state(); + void handle_get_remote_image_state(int r); + + void get_local_image_state(); + void handle_get_local_image_state(int r); + + void create_non_primary_snapshot(); + void handle_create_non_primary_snapshot(int r); + + void update_mirror_image_state(); + void handle_update_mirror_image_state(int r); + + void request_sync(); + void handle_request_sync(int r); + + void copy_image(); + void handle_copy_image(int r); + void handle_copy_image_progress(uint64_t object_number, + uint64_t object_count); + void handle_copy_image_read(uint64_t bytes_read); + + void apply_image_state(); + void handle_apply_image_state(int r); + + void update_non_primary_snapshot(bool complete); + void handle_update_non_primary_snapshot(bool complete, int r); + + void notify_image_update(); + void handle_notify_image_update(int r); + + void unlink_peer(uint64_t remote_snap_id); + void handle_unlink_peer(int r); + + void finish_sync(); + + void register_local_update_watcher(); + void handle_register_local_update_watcher(int r); + + void register_remote_update_watcher(); + void handle_register_remote_update_watcher(int r); + + void unregister_remote_update_watcher(); + void handle_unregister_remote_update_watcher(int r); + + void unregister_local_update_watcher(); + void handle_unregister_local_update_watcher(int r); + + void wait_for_in_flight_ops(); + void handle_wait_for_in_flight_ops(int r); + + void handle_image_update_notify(); + + void handle_replay_complete(int r, const std::string& description); + void handle_replay_complete(std::unique_lock<ceph::mutex>* locker, + int r, const std::string& description); + void notify_status_updated(); + + bool is_replay_interrupted(); + bool is_replay_interrupted(std::unique_lock<ceph::mutex>* lock); + + void register_perf_counters(); + void unregister_perf_counters(); +}; + +} // namespace snapshot +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::snapshot::Replayer<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_REPLAYER_H diff --git a/src/tools/rbd_mirror/image_replayer/snapshot/StateBuilder.cc b/src/tools/rbd_mirror/image_replayer/snapshot/StateBuilder.cc new file mode 100644 index 000000000..ca3e6918b --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/snapshot/StateBuilder.cc @@ -0,0 +1,120 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "StateBuilder.h" +#include "include/ceph_assert.h" +#include "include/Context.h" +#include "common/debug.h" +#include "common/errno.h" +#include "librbd/ImageCtx.h" +#include "librbd/mirror/snapshot/ImageMeta.h" +#include "tools/rbd_mirror/image_replayer/snapshot/CreateLocalImageRequest.h" +#include "tools/rbd_mirror/image_replayer/snapshot/PrepareReplayRequest.h" +#include "tools/rbd_mirror/image_replayer/snapshot/Replayer.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::snapshot::" \ + << "StateBuilder: " << this << " " \ + << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace image_replayer { +namespace snapshot { + +template <typename I> +StateBuilder<I>::StateBuilder(const std::string& global_image_id) + : image_replayer::StateBuilder<I>(global_image_id) { +} + +template <typename I> +StateBuilder<I>::~StateBuilder() { + ceph_assert(local_image_meta == nullptr); +} + +template <typename I> +void StateBuilder<I>::close(Context* on_finish) { + dout(10) << dendl; + + delete local_image_meta; + local_image_meta = nullptr; + + // close the remote image after closing the local + // image in case the remote cluster is unreachable and + // we cannot close it. + on_finish = new LambdaContext([this, on_finish](int) { + this->close_remote_image(on_finish); + }); + this->close_local_image(on_finish); +} + +template <typename I> +bool StateBuilder<I>::is_disconnected() const { + return false; +} + +template <typename I> +bool StateBuilder<I>::is_linked_impl() const { + // the remote has to have us registered as a peer + return !remote_mirror_peer_uuid.empty(); +} + +template <typename I> +cls::rbd::MirrorImageMode StateBuilder<I>::get_mirror_image_mode() const { + return cls::rbd::MIRROR_IMAGE_MODE_SNAPSHOT; +} + +template <typename I> +image_sync::SyncPointHandler* StateBuilder<I>::create_sync_point_handler() { + dout(10) << dendl; + + // TODO + ceph_assert(false); + return nullptr; +} + +template <typename I> +BaseRequest* StateBuilder<I>::create_local_image_request( + Threads<I>* threads, + librados::IoCtx& local_io_ctx, + const std::string& global_image_id, + PoolMetaCache* pool_meta_cache, + ProgressContext* progress_ctx, + Context* on_finish) { + return CreateLocalImageRequest<I>::create( + threads, local_io_ctx, this->remote_image_ctx, global_image_id, + pool_meta_cache, progress_ctx, this, on_finish); +} + +template <typename I> +BaseRequest* StateBuilder<I>::create_prepare_replay_request( + const std::string& local_mirror_uuid, + ProgressContext* progress_ctx, + bool* resync_requested, + bool* syncing, + Context* on_finish) { + return PrepareReplayRequest<I>::create( + local_mirror_uuid, progress_ctx, this, resync_requested, syncing, + on_finish); +} + +template <typename I> +image_replayer::Replayer* StateBuilder<I>::create_replayer( + Threads<I>* threads, + InstanceWatcher<I>* instance_watcher, + const std::string& local_mirror_uuid, + PoolMetaCache* pool_meta_cache, + ReplayerListener* replayer_listener) { + return Replayer<I>::create( + threads, instance_watcher, local_mirror_uuid, pool_meta_cache, this, + replayer_listener); +} + +} // namespace snapshot +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::snapshot::StateBuilder<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/snapshot/StateBuilder.h b/src/tools/rbd_mirror/image_replayer/snapshot/StateBuilder.h new file mode 100644 index 000000000..a4ab82982 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/snapshot/StateBuilder.h @@ -0,0 +1,93 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_STATE_BUILDER_H +#define CEPH_RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_STATE_BUILDER_H + +#include "tools/rbd_mirror/image_replayer/StateBuilder.h" +#include <string> + +struct Context; + +namespace librbd { + +struct ImageCtx; + +namespace mirror { +namespace snapshot { + +template <typename> class ImageMeta; + +} // namespace snapshot +} // namespace mirror +} // namespace librbd + +namespace rbd { +namespace mirror { +namespace image_replayer { +namespace snapshot { + +template <typename> class SyncPointHandler; + +template <typename ImageCtxT> +class StateBuilder : public image_replayer::StateBuilder<ImageCtxT> { +public: + static StateBuilder* create(const std::string& global_image_id) { + return new StateBuilder(global_image_id); + } + + StateBuilder(const std::string& global_image_id); + ~StateBuilder() override; + + void close(Context* on_finish) override; + + bool is_disconnected() const override; + + cls::rbd::MirrorImageMode get_mirror_image_mode() const override; + + image_sync::SyncPointHandler* create_sync_point_handler() override; + + bool replay_requires_remote_image() const override { + return true; + } + + BaseRequest* create_local_image_request( + Threads<ImageCtxT>* threads, + librados::IoCtx& local_io_ctx, + const std::string& global_image_id, + PoolMetaCache* pool_meta_cache, + ProgressContext* progress_ctx, + Context* on_finish) override; + + BaseRequest* create_prepare_replay_request( + const std::string& local_mirror_uuid, + ProgressContext* progress_ctx, + bool* resync_requested, + bool* syncing, + Context* on_finish) override; + + image_replayer::Replayer* create_replayer( + Threads<ImageCtxT>* threads, + InstanceWatcher<ImageCtxT>* instance_watcher, + const std::string& local_mirror_uuid, + PoolMetaCache* pool_meta_cache, + ReplayerListener* replayer_listener) override; + + SyncPointHandler<ImageCtxT>* sync_point_handler = nullptr; + + std::string remote_mirror_peer_uuid; + + librbd::mirror::snapshot::ImageMeta<ImageCtxT>* local_image_meta = nullptr; + +private: + bool is_linked_impl() const override; +}; + +} // namespace snapshot +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::snapshot::StateBuilder<librbd::ImageCtx>; + +#endif // CEPH_RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_STATE_BUILDER_H diff --git a/src/tools/rbd_mirror/image_replayer/snapshot/Utils.cc b/src/tools/rbd_mirror/image_replayer/snapshot/Utils.cc new file mode 100644 index 000000000..6df95d300 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/snapshot/Utils.cc @@ -0,0 +1,65 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "Utils.h" +#include "common/debug.h" +#include "common/errno.h" +#include "cls/rbd/cls_rbd_types.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::snapshot::util::" \ + << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace image_replayer { +namespace snapshot { +namespace util { + +uint64_t compute_remote_snap_id( + const ceph::shared_mutex& local_image_lock, + const std::map<librados::snap_t, librbd::SnapInfo>& local_snap_infos, + uint64_t local_snap_id, const std::string& remote_mirror_uuid) { + ceph_assert(ceph_mutex_is_locked(local_image_lock)); + + // Search our local non-primary snapshots for a mapping to the remote + // snapshot. The non-primary mirror snapshot with the mappings will always + // come at or after the snapshot we are searching against + for (auto snap_it = local_snap_infos.lower_bound(local_snap_id); + snap_it != local_snap_infos.end(); ++snap_it) { + auto mirror_ns = std::get_if<cls::rbd::MirrorSnapshotNamespace>( + &snap_it->second.snap_namespace); + if (mirror_ns == nullptr || !mirror_ns->is_non_primary()) { + continue; + } + + if (mirror_ns->primary_mirror_uuid != remote_mirror_uuid) { + dout(20) << "local snapshot " << snap_it->first << " not tied to remote" + << dendl; + continue; + } else if (local_snap_id == snap_it->first) { + dout(15) << "local snapshot " << local_snap_id << " maps to " + << "remote snapshot " << mirror_ns->primary_snap_id << dendl; + return mirror_ns->primary_snap_id; + } + + const auto& snap_seqs = mirror_ns->snap_seqs; + for (auto [remote_snap_id_seq, local_snap_id_seq] : snap_seqs) { + if (local_snap_id_seq == local_snap_id) { + dout(15) << "local snapshot " << local_snap_id << " maps to " + << "remote snapshot " << remote_snap_id_seq << dendl; + return remote_snap_id_seq; + } + } + } + + return CEPH_NOSNAP; +} + +} // namespace util +} // namespace snapshot +} // namespace image_replayer +} // namespace mirror +} // namespace rbd diff --git a/src/tools/rbd_mirror/image_replayer/snapshot/Utils.h b/src/tools/rbd_mirror/image_replayer/snapshot/Utils.h new file mode 100644 index 000000000..8efc58685 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/snapshot/Utils.h @@ -0,0 +1,30 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_UTILS_H +#define RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_UTILS_H + +#include "include/int_types.h" +#include "include/rados/librados.hpp" +#include "common/ceph_mutex.h" +#include "librbd/Types.h" +#include <map> + +namespace rbd { +namespace mirror { +namespace image_replayer { +namespace snapshot { +namespace util { + +uint64_t compute_remote_snap_id( + const ceph::shared_mutex& local_image_lock, + const std::map<librados::snap_t, librbd::SnapInfo>& local_snap_infos, + uint64_t local_snap_id, const std::string& remote_mirror_uuid); + +} // namespace util +} // namespace snapshot +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +#endif // RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_UTILS_H diff --git a/src/tools/rbd_mirror/image_sync/SyncPointCreateRequest.cc b/src/tools/rbd_mirror/image_sync/SyncPointCreateRequest.cc new file mode 100644 index 000000000..1bd5d77f0 --- /dev/null +++ b/src/tools/rbd_mirror/image_sync/SyncPointCreateRequest.cc @@ -0,0 +1,172 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "SyncPointCreateRequest.h" +#include "include/uuid.h" +#include "common/debug.h" +#include "common/errno.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/Operations.h" +#include "librbd/Utils.h" +#include "tools/rbd_mirror/image_sync/Types.h" +#include "tools/rbd_mirror/image_sync/Utils.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_sync::SyncPointCreateRequest: " \ + << this << " " << __func__ + +namespace rbd { +namespace mirror { +namespace image_sync { + +using librbd::util::create_context_callback; + +template <typename I> +SyncPointCreateRequest<I>::SyncPointCreateRequest( + I *remote_image_ctx, + const std::string &local_mirror_uuid, + SyncPointHandler* sync_point_handler, + Context *on_finish) + : m_remote_image_ctx(remote_image_ctx), + m_local_mirror_uuid(local_mirror_uuid), + m_sync_point_handler(sync_point_handler), + m_on_finish(on_finish) { + m_sync_points_copy = m_sync_point_handler->get_sync_points(); + ceph_assert(m_sync_points_copy.size() < 2); + + // initialize the updated client meta with the new sync point + m_sync_points_copy.emplace_back(); + if (m_sync_points_copy.size() > 1) { + m_sync_points_copy.back().from_snap_name = + m_sync_points_copy.front().snap_name; + } +} + +template <typename I> +void SyncPointCreateRequest<I>::send() { + send_update_sync_points(); +} + +template <typename I> +void SyncPointCreateRequest<I>::send_update_sync_points() { + uuid_d uuid_gen; + uuid_gen.generate_random(); + + auto& sync_point = m_sync_points_copy.back(); + sync_point.snap_name = util::get_snapshot_name_prefix(m_local_mirror_uuid) + + uuid_gen.to_string(); + + auto ctx = create_context_callback< + SyncPointCreateRequest<I>, + &SyncPointCreateRequest<I>::handle_update_sync_points>(this); + m_sync_point_handler->update_sync_points( + m_sync_point_handler->get_snap_seqs(), m_sync_points_copy, false, ctx); +} + +template <typename I> +void SyncPointCreateRequest<I>::handle_update_sync_points(int r) { + dout(20) << ": r=" << r << dendl; + + if (r < 0) { + derr << ": failed to update client data: " << cpp_strerror(r) + << dendl; + finish(r); + return; + } + + send_refresh_image(); +} + +template <typename I> +void SyncPointCreateRequest<I>::send_refresh_image() { + dout(20) << dendl; + + Context *ctx = create_context_callback< + SyncPointCreateRequest<I>, &SyncPointCreateRequest<I>::handle_refresh_image>( + this); + m_remote_image_ctx->state->refresh(ctx); +} + +template <typename I> +void SyncPointCreateRequest<I>::handle_refresh_image(int r) { + dout(20) << ": r=" << r << dendl; + + if (r < 0) { + derr << ": remote image refresh failed: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + send_create_snap(); +} + +template <typename I> +void SyncPointCreateRequest<I>::send_create_snap() { + dout(20) << dendl; + + auto& sync_point = m_sync_points_copy.back(); + + Context *ctx = create_context_callback< + SyncPointCreateRequest<I>, &SyncPointCreateRequest<I>::handle_create_snap>( + this); + m_remote_image_ctx->operations->snap_create( + cls::rbd::UserSnapshotNamespace(), sync_point.snap_name.c_str(), + librbd::SNAP_CREATE_FLAG_SKIP_NOTIFY_QUIESCE, m_prog_ctx, ctx); +} + +template <typename I> +void SyncPointCreateRequest<I>::handle_create_snap(int r) { + dout(20) << ": r=" << r << dendl; + + if (r == -EEXIST) { + send_update_sync_points(); + return; + } else if (r < 0) { + derr << ": failed to create snapshot: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + send_final_refresh_image(); +} + +template <typename I> +void SyncPointCreateRequest<I>::send_final_refresh_image() { + dout(20) << dendl; + + Context *ctx = create_context_callback< + SyncPointCreateRequest<I>, + &SyncPointCreateRequest<I>::handle_final_refresh_image>(this); + m_remote_image_ctx->state->refresh(ctx); +} + +template <typename I> +void SyncPointCreateRequest<I>::handle_final_refresh_image(int r) { + dout(20) << ": r=" << r << dendl; + + if (r < 0) { + derr << ": failed to refresh image for snapshot: " << cpp_strerror(r) + << dendl; + finish(r); + return; + } + + finish(0); +} + +template <typename I> +void SyncPointCreateRequest<I>::finish(int r) { + dout(20) << ": r=" << r << dendl; + + m_on_finish->complete(r); + delete this; +} + +} // namespace image_sync +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_sync::SyncPointCreateRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_sync/SyncPointCreateRequest.h b/src/tools/rbd_mirror/image_sync/SyncPointCreateRequest.h new file mode 100644 index 000000000..9b52b8374 --- /dev/null +++ b/src/tools/rbd_mirror/image_sync/SyncPointCreateRequest.h @@ -0,0 +1,93 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_SYNC_SYNC_POINT_CREATE_REQUEST_H +#define RBD_MIRROR_IMAGE_SYNC_SYNC_POINT_CREATE_REQUEST_H + +#include "librbd/internal.h" +#include "Types.h" +#include <string> + +class Context; +namespace journal { class Journaler; } +namespace librbd { class ImageCtx; } +namespace librbd { namespace journal { struct MirrorPeerClientMeta; } } + +namespace rbd { +namespace mirror { +namespace image_sync { + +template <typename ImageCtxT = librbd::ImageCtx> +class SyncPointCreateRequest { +public: + static SyncPointCreateRequest* create( + ImageCtxT *remote_image_ctx, + const std::string &local_mirror_uuid, + SyncPointHandler* sync_point_handler, + Context *on_finish) { + return new SyncPointCreateRequest(remote_image_ctx, local_mirror_uuid, + sync_point_handler, on_finish); + } + + SyncPointCreateRequest( + ImageCtxT *remote_image_ctx, + const std::string &local_mirror_uuid, + SyncPointHandler* sync_point_handler, + Context *on_finish); + + void send(); + +private: + /** + * @verbatim + * + * <start> + * | + * v + * UPDATE_SYNC_POINTS < . . + * | . + * v . + * REFRESH_IMAGE . + * | . (repeat on EEXIST) + * v . + * CREATE_SNAP . . . . . . + * | + * v + * REFRESH_IMAGE + * | + * v + * <finish> + * + * @endverbatim + */ + + ImageCtxT *m_remote_image_ctx; + std::string m_local_mirror_uuid; + SyncPointHandler* m_sync_point_handler; + Context *m_on_finish; + + SyncPoints m_sync_points_copy; + librbd::NoOpProgressContext m_prog_ctx; + + void send_update_sync_points(); + void handle_update_sync_points(int r); + + void send_refresh_image(); + void handle_refresh_image(int r); + + void send_create_snap(); + void handle_create_snap(int r); + + void send_final_refresh_image(); + void handle_final_refresh_image(int r); + + void finish(int r); +}; + +} // namespace image_sync +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_sync::SyncPointCreateRequest<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_SYNC_SYNC_POINT_CREATE_REQUEST_H diff --git a/src/tools/rbd_mirror/image_sync/SyncPointPruneRequest.cc b/src/tools/rbd_mirror/image_sync/SyncPointPruneRequest.cc new file mode 100644 index 000000000..d1cd32b39 --- /dev/null +++ b/src/tools/rbd_mirror/image_sync/SyncPointPruneRequest.cc @@ -0,0 +1,213 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "SyncPointPruneRequest.h" +#include "common/debug.h" +#include "common/errno.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/Operations.h" +#include "librbd/Utils.h" +#include <set> + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_sync::SyncPointPruneRequest: " \ + << this << " " << __func__ +namespace rbd { +namespace mirror { +namespace image_sync { + +using librbd::util::create_context_callback; + +template <typename I> +SyncPointPruneRequest<I>::SyncPointPruneRequest( + I *remote_image_ctx, + bool sync_complete, + SyncPointHandler* sync_point_handler, + Context *on_finish) + : m_remote_image_ctx(remote_image_ctx), + m_sync_complete(sync_complete), + m_sync_point_handler(sync_point_handler), + m_on_finish(on_finish) { + m_sync_points_copy = m_sync_point_handler->get_sync_points(); +} + +template <typename I> +void SyncPointPruneRequest<I>::send() { + if (m_sync_points_copy.empty()) { + send_remove_snap(); + return; + } + + if (m_sync_complete) { + // if sync is complete, we can remove the master sync point + auto it = m_sync_points_copy.begin(); + auto& sync_point = *it; + + ++it; + if (it == m_sync_points_copy.end() || + it->from_snap_name != sync_point.snap_name) { + m_snap_names.push_back(sync_point.snap_name); + } + + if (!sync_point.from_snap_name.empty()) { + m_snap_names.push_back(sync_point.from_snap_name); + } + } else { + // if we have more than one sync point or invalid sync points, + // trim them off + std::shared_lock image_locker{m_remote_image_ctx->image_lock}; + std::set<std::string> snap_names; + for (auto it = m_sync_points_copy.rbegin(); + it != m_sync_points_copy.rend(); ++it) { + auto& sync_point = *it; + if (&sync_point == &m_sync_points_copy.front()) { + if (m_remote_image_ctx->get_snap_id( + cls::rbd::UserSnapshotNamespace(), sync_point.snap_name) == + CEPH_NOSNAP) { + derr << ": failed to locate sync point snapshot: " + << sync_point.snap_name << dendl; + } else if (!sync_point.from_snap_name.empty()) { + derr << ": unexpected from_snap_name in primary sync point: " + << sync_point.from_snap_name << dendl; + } else { + // first sync point is OK -- keep it + break; + } + m_invalid_master_sync_point = true; + } + + if (snap_names.count(sync_point.snap_name) == 0) { + snap_names.insert(sync_point.snap_name); + m_snap_names.push_back(sync_point.snap_name); + } + + auto& front_sync_point = m_sync_points_copy.front(); + if (!sync_point.from_snap_name.empty() && + snap_names.count(sync_point.from_snap_name) == 0 && + sync_point.from_snap_name != front_sync_point.snap_name) { + snap_names.insert(sync_point.from_snap_name); + m_snap_names.push_back(sync_point.from_snap_name); + } + } + } + + send_remove_snap(); +} + +template <typename I> +void SyncPointPruneRequest<I>::send_remove_snap() { + if (m_snap_names.empty()) { + send_refresh_image(); + return; + } + + const std::string &snap_name = m_snap_names.front(); + + dout(20) << ": snap_name=" << snap_name << dendl; + + Context *ctx = create_context_callback< + SyncPointPruneRequest<I>, &SyncPointPruneRequest<I>::handle_remove_snap>( + this); + m_remote_image_ctx->operations->snap_remove(cls::rbd::UserSnapshotNamespace(), + snap_name.c_str(), + ctx); +} + +template <typename I> +void SyncPointPruneRequest<I>::handle_remove_snap(int r) { + dout(20) << ": r=" << r << dendl; + + ceph_assert(!m_snap_names.empty()); + std::string snap_name = m_snap_names.front(); + m_snap_names.pop_front(); + + if (r == -ENOENT) { + r = 0; + } + if (r < 0) { + derr << ": failed to remove snapshot '" << snap_name << "': " + << cpp_strerror(r) << dendl; + finish(r); + return; + } + + send_remove_snap(); +} + +template <typename I> +void SyncPointPruneRequest<I>::send_refresh_image() { + dout(20) << dendl; + + Context *ctx = create_context_callback< + SyncPointPruneRequest<I>, &SyncPointPruneRequest<I>::handle_refresh_image>( + this); + m_remote_image_ctx->state->refresh(ctx); +} + +template <typename I> +void SyncPointPruneRequest<I>::handle_refresh_image(int r) { + dout(20) << ": r=" << r << dendl; + + if (r < 0) { + derr << ": remote image refresh failed: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + send_update_sync_points(); +} + +template <typename I> +void SyncPointPruneRequest<I>::send_update_sync_points() { + dout(20) << dendl; + + if (m_sync_complete) { + m_sync_points_copy.pop_front(); + } else { + while (m_sync_points_copy.size() > 1) { + m_sync_points_copy.pop_back(); + } + if (m_invalid_master_sync_point) { + // all subsequent sync points would have been pruned + m_sync_points_copy.clear(); + } + } + + auto ctx = create_context_callback< + SyncPointPruneRequest<I>, + &SyncPointPruneRequest<I>::handle_update_sync_points>(this); + m_sync_point_handler->update_sync_points( + m_sync_point_handler->get_snap_seqs(), m_sync_points_copy, + m_sync_complete, ctx); +} + +template <typename I> +void SyncPointPruneRequest<I>::handle_update_sync_points(int r) { + dout(20) << ": r=" << r << dendl; + + if (r < 0) { + derr << ": failed to update client data: " << cpp_strerror(r) + << dendl; + finish(r); + return; + } + + finish(0); +} + +template <typename I> +void SyncPointPruneRequest<I>::finish(int r) { + dout(20) << ": r=" << r << dendl; + + m_on_finish->complete(r); + delete this; +} + +} // namespace image_sync +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_sync::SyncPointPruneRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_sync/SyncPointPruneRequest.h b/src/tools/rbd_mirror/image_sync/SyncPointPruneRequest.h new file mode 100644 index 000000000..08bf840b1 --- /dev/null +++ b/src/tools/rbd_mirror/image_sync/SyncPointPruneRequest.h @@ -0,0 +1,91 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_SYNC_SYNC_POINT_PRUNE_REQUEST_H +#define RBD_MIRROR_IMAGE_SYNC_SYNC_POINT_PRUNE_REQUEST_H + +#include "tools/rbd_mirror/image_sync/Types.h" +#include <list> +#include <string> + +class Context; +namespace journal { class Journaler; } +namespace librbd { class ImageCtx; } +namespace librbd { namespace journal { struct MirrorPeerClientMeta; } } + +namespace rbd { +namespace mirror { +namespace image_sync { + +template <typename ImageCtxT = librbd::ImageCtx> +class SyncPointPruneRequest { +public: + static SyncPointPruneRequest* create( + ImageCtxT *remote_image_ctx, + bool sync_complete, + SyncPointHandler* sync_point_handler, + Context *on_finish) { + return new SyncPointPruneRequest(remote_image_ctx, sync_complete, + sync_point_handler, on_finish); + } + + SyncPointPruneRequest( + ImageCtxT *remote_image_ctx, + bool sync_complete, + SyncPointHandler* sync_point_handler, + Context *on_finish); + + void send(); + +private: + /** + * @verbatim + * + * <start> + * | + * | . . . . . + * | . . + * v v . (repeat if from snap + * REMOVE_SNAP . . . unused by other sync) + * | + * v + * REFRESH_IMAGE + * | + * v + * UPDATE_CLIENT + * | + * v + * <finish> + * + * @endverbatim + */ + + ImageCtxT *m_remote_image_ctx; + bool m_sync_complete; + SyncPointHandler* m_sync_point_handler; + Context *m_on_finish; + + SyncPoints m_sync_points_copy; + std::list<std::string> m_snap_names; + + bool m_invalid_master_sync_point = false; + + void send_remove_snap(); + void handle_remove_snap(int r); + + void send_refresh_image(); + void handle_refresh_image(int r); + + void send_update_sync_points(); + void handle_update_sync_points(int r); + + void finish(int r); +}; + +} // namespace image_sync +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_sync::SyncPointPruneRequest<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_SYNC_SYNC_POINT_PRUNE_REQUEST_H diff --git a/src/tools/rbd_mirror/image_sync/Types.h b/src/tools/rbd_mirror/image_sync/Types.h new file mode 100644 index 000000000..d748dc93e --- /dev/null +++ b/src/tools/rbd_mirror/image_sync/Types.h @@ -0,0 +1,74 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_SYNC_TYPES_H +#define RBD_MIRROR_IMAGE_SYNC_TYPES_H + +#include "cls/rbd/cls_rbd_types.h" +#include "librbd/Types.h" +#include <list> +#include <string> +#include <boost/optional.hpp> + +struct Context; + +namespace rbd { +namespace mirror { +namespace image_sync { + +struct SyncPoint { + typedef boost::optional<uint64_t> ObjectNumber; + + SyncPoint() { + } + SyncPoint(const cls::rbd::SnapshotNamespace& snap_namespace, + const std::string& snap_name, + const std::string& from_snap_name, + const ObjectNumber& object_number) + : snap_namespace(snap_namespace), snap_name(snap_name), + from_snap_name(from_snap_name), object_number(object_number) { + } + + cls::rbd::SnapshotNamespace snap_namespace = + {cls::rbd::UserSnapshotNamespace{}}; + std::string snap_name; + std::string from_snap_name; + ObjectNumber object_number = boost::none; + + bool operator==(const SyncPoint& rhs) const { + return (snap_namespace == rhs.snap_namespace && + snap_name == rhs.snap_name && + from_snap_name == rhs.from_snap_name && + object_number == rhs.object_number); + } +}; + +typedef std::list<SyncPoint> SyncPoints; + +struct SyncPointHandler { +public: + SyncPointHandler(const SyncPointHandler&) = delete; + SyncPointHandler& operator=(const SyncPointHandler&) = delete; + + virtual ~SyncPointHandler() {} + virtual void destroy() { + delete this; + } + + virtual SyncPoints get_sync_points() const = 0; + virtual librbd::SnapSeqs get_snap_seqs() const = 0; + + virtual void update_sync_points(const librbd::SnapSeqs& snap_seq, + const SyncPoints& sync_points, + bool sync_complete, + Context* on_finish) = 0; + +protected: + SyncPointHandler() {} +}; + +} // namespace image_sync +} // namespace mirror +} // namespace rbd + +#endif // RBD_MIRROR_IMAGE_SYNC_TYPES_H diff --git a/src/tools/rbd_mirror/image_sync/Utils.cc b/src/tools/rbd_mirror/image_sync/Utils.cc new file mode 100644 index 000000000..6a3eae72d --- /dev/null +++ b/src/tools/rbd_mirror/image_sync/Utils.cc @@ -0,0 +1,24 @@ +// -*- mode:c++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "Utils.h" + +namespace rbd { +namespace mirror { +namespace image_sync { +namespace util { + +namespace { + +static const std::string SNAP_NAME_PREFIX(".rbd-mirror"); + +} // anonymous namespace + +std::string get_snapshot_name_prefix(const std::string& local_mirror_uuid) { + return SNAP_NAME_PREFIX + "." + local_mirror_uuid + "."; +} + +} // namespace util +} // namespace image_sync +} // namespace mirror +} // namespace rbd diff --git a/src/tools/rbd_mirror/image_sync/Utils.h b/src/tools/rbd_mirror/image_sync/Utils.h new file mode 100644 index 000000000..139699daa --- /dev/null +++ b/src/tools/rbd_mirror/image_sync/Utils.h @@ -0,0 +1,16 @@ +// -*- mode:c++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include <string> + +namespace rbd { +namespace mirror { +namespace image_sync { +namespace util { + +std::string get_snapshot_name_prefix(const std::string& local_mirror_uuid); + +} // namespace util +} // namespace image_sync +} // namespace mirror +} // namespace rbd diff --git a/src/tools/rbd_mirror/instance_watcher/Types.cc b/src/tools/rbd_mirror/instance_watcher/Types.cc new file mode 100644 index 000000000..0e9922733 --- /dev/null +++ b/src/tools/rbd_mirror/instance_watcher/Types.cc @@ -0,0 +1,245 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "Types.h" +#include "include/ceph_assert.h" +#include "include/stringify.h" +#include "common/Formatter.h" + +namespace rbd { +namespace mirror { +namespace instance_watcher { + +namespace { + +class EncodePayloadVisitor : public boost::static_visitor<void> { +public: + explicit EncodePayloadVisitor(bufferlist &bl) : m_bl(bl) {} + + template <typename Payload> + inline void operator()(const Payload &payload) const { + using ceph::encode; + encode(static_cast<uint32_t>(Payload::NOTIFY_OP), m_bl); + payload.encode(m_bl); + } + +private: + bufferlist &m_bl; +}; + +class DecodePayloadVisitor : public boost::static_visitor<void> { +public: + DecodePayloadVisitor(__u8 version, bufferlist::const_iterator &iter) + : m_version(version), m_iter(iter) {} + + template <typename Payload> + inline void operator()(Payload &payload) const { + payload.decode(m_version, m_iter); + } + +private: + __u8 m_version; + bufferlist::const_iterator &m_iter; +}; + +class DumpPayloadVisitor : public boost::static_visitor<void> { +public: + explicit DumpPayloadVisitor(Formatter *formatter) : m_formatter(formatter) {} + + template <typename Payload> + inline void operator()(const Payload &payload) const { + NotifyOp notify_op = Payload::NOTIFY_OP; + m_formatter->dump_string("notify_op", stringify(notify_op)); + payload.dump(m_formatter); + } + +private: + ceph::Formatter *m_formatter; +}; + +} // anonymous namespace + +void PayloadBase::encode(bufferlist &bl) const { + using ceph::encode; + encode(request_id, bl); +} + +void PayloadBase::decode(__u8 version, bufferlist::const_iterator &iter) { + using ceph::decode; + decode(request_id, iter); +} + +void PayloadBase::dump(Formatter *f) const { + f->dump_unsigned("request_id", request_id); +} + +void ImagePayloadBase::encode(bufferlist &bl) const { + using ceph::encode; + PayloadBase::encode(bl); + encode(global_image_id, bl); +} + +void ImagePayloadBase::decode(__u8 version, bufferlist::const_iterator &iter) { + using ceph::decode; + PayloadBase::decode(version, iter); + decode(global_image_id, iter); +} + +void ImagePayloadBase::dump(Formatter *f) const { + PayloadBase::dump(f); + f->dump_string("global_image_id", global_image_id); +} + +void PeerImageRemovedPayload::encode(bufferlist &bl) const { + using ceph::encode; + PayloadBase::encode(bl); + encode(global_image_id, bl); + encode(peer_mirror_uuid, bl); +} + +void PeerImageRemovedPayload::decode(__u8 version, bufferlist::const_iterator &iter) { + using ceph::decode; + PayloadBase::decode(version, iter); + decode(global_image_id, iter); + decode(peer_mirror_uuid, iter); +} + +void PeerImageRemovedPayload::dump(Formatter *f) const { + PayloadBase::dump(f); + f->dump_string("global_image_id", global_image_id); + f->dump_string("peer_mirror_uuid", peer_mirror_uuid); +} + +void SyncPayloadBase::encode(bufferlist &bl) const { + using ceph::encode; + PayloadBase::encode(bl); + encode(sync_id, bl); +} + +void SyncPayloadBase::decode(__u8 version, bufferlist::const_iterator &iter) { + using ceph::decode; + PayloadBase::decode(version, iter); + decode(sync_id, iter); +} + +void SyncPayloadBase::dump(Formatter *f) const { + PayloadBase::dump(f); + f->dump_string("sync_id", sync_id); +} + +void UnknownPayload::encode(bufferlist &bl) const { + ceph_abort(); +} + +void UnknownPayload::decode(__u8 version, bufferlist::const_iterator &iter) { +} + +void UnknownPayload::dump(Formatter *f) const { +} + +void NotifyMessage::encode(bufferlist& bl) const { + ENCODE_START(2, 2, bl); + boost::apply_visitor(EncodePayloadVisitor(bl), payload); + ENCODE_FINISH(bl); +} + +void NotifyMessage::decode(bufferlist::const_iterator& iter) { + DECODE_START(2, iter); + + uint32_t notify_op; + decode(notify_op, iter); + + // select the correct payload variant based upon the encoded op + switch (notify_op) { + case NOTIFY_OP_IMAGE_ACQUIRE: + payload = ImageAcquirePayload(); + break; + case NOTIFY_OP_IMAGE_RELEASE: + payload = ImageReleasePayload(); + break; + case NOTIFY_OP_PEER_IMAGE_REMOVED: + payload = PeerImageRemovedPayload(); + break; + case NOTIFY_OP_SYNC_REQUEST: + payload = SyncRequestPayload(); + break; + case NOTIFY_OP_SYNC_START: + payload = SyncStartPayload(); + break; + default: + payload = UnknownPayload(); + break; + } + + apply_visitor(DecodePayloadVisitor(struct_v, iter), payload); + DECODE_FINISH(iter); +} + +void NotifyMessage::dump(Formatter *f) const { + apply_visitor(DumpPayloadVisitor(f), payload); +} + +void NotifyMessage::generate_test_instances(std::list<NotifyMessage *> &o) { + o.push_back(new NotifyMessage(ImageAcquirePayload())); + o.push_back(new NotifyMessage(ImageAcquirePayload(1, "gid"))); + + o.push_back(new NotifyMessage(ImageReleasePayload())); + o.push_back(new NotifyMessage(ImageReleasePayload(1, "gid"))); + + o.push_back(new NotifyMessage(PeerImageRemovedPayload())); + o.push_back(new NotifyMessage(PeerImageRemovedPayload(1, "gid", "uuid"))); + + o.push_back(new NotifyMessage(SyncRequestPayload())); + o.push_back(new NotifyMessage(SyncRequestPayload(1, "sync_id"))); + + o.push_back(new NotifyMessage(SyncStartPayload())); + o.push_back(new NotifyMessage(SyncStartPayload(1, "sync_id"))); +} + +std::ostream &operator<<(std::ostream &out, const NotifyOp &op) { + switch (op) { + case NOTIFY_OP_IMAGE_ACQUIRE: + out << "ImageAcquire"; + break; + case NOTIFY_OP_IMAGE_RELEASE: + out << "ImageRelease"; + break; + case NOTIFY_OP_PEER_IMAGE_REMOVED: + out << "PeerImageRemoved"; + break; + case NOTIFY_OP_SYNC_REQUEST: + out << "SyncRequest"; + break; + case NOTIFY_OP_SYNC_START: + out << "SyncStart"; + break; + default: + out << "Unknown (" << static_cast<uint32_t>(op) << ")"; + break; + } + return out; +} + +void NotifyAckPayload::encode(bufferlist &bl) const { + using ceph::encode; + encode(instance_id, bl); + encode(request_id, bl); + encode(ret_val, bl); +} + +void NotifyAckPayload::decode(bufferlist::const_iterator &iter) { + using ceph::decode; + decode(instance_id, iter); + decode(request_id, iter); + decode(ret_val, iter); +} + +void NotifyAckPayload::dump(Formatter *f) const { + f->dump_string("instance_id", instance_id); + f->dump_unsigned("request_id", request_id); + f->dump_int("request_id", ret_val); +} + +} // namespace instance_watcher +} // namespace mirror +} // namespace rbd diff --git a/src/tools/rbd_mirror/instance_watcher/Types.h b/src/tools/rbd_mirror/instance_watcher/Types.h new file mode 100644 index 000000000..b0b7b7791 --- /dev/null +++ b/src/tools/rbd_mirror/instance_watcher/Types.h @@ -0,0 +1,197 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_INSTANCE_WATCHER_TYPES_H +#define RBD_MIRROR_INSTANCE_WATCHER_TYPES_H + +#include <string> +#include <set> +#include <boost/variant.hpp> + +#include "include/buffer_fwd.h" +#include "include/encoding.h" +#include "include/int_types.h" + +namespace ceph { class Formatter; } + +namespace rbd { +namespace mirror { +namespace instance_watcher { + +enum NotifyOp { + NOTIFY_OP_IMAGE_ACQUIRE = 0, + NOTIFY_OP_IMAGE_RELEASE = 1, + NOTIFY_OP_PEER_IMAGE_REMOVED = 2, + NOTIFY_OP_SYNC_REQUEST = 3, + NOTIFY_OP_SYNC_START = 4 +}; + +struct PayloadBase { + uint64_t request_id; + + PayloadBase() : request_id(0) { + } + + PayloadBase(uint64_t request_id) : request_id(request_id) { + } + + void encode(bufferlist &bl) const; + void decode(__u8 version, bufferlist::const_iterator &iter); + void dump(Formatter *f) const; +}; + +struct ImagePayloadBase : public PayloadBase { + std::string global_image_id; + + ImagePayloadBase() : PayloadBase() { + } + + ImagePayloadBase(uint64_t request_id, const std::string &global_image_id) + : PayloadBase(request_id), global_image_id(global_image_id) { + } + + void encode(bufferlist &bl) const; + void decode(__u8 version, bufferlist::const_iterator &iter); + void dump(Formatter *f) const; +}; + +struct ImageAcquirePayload : public ImagePayloadBase { + static const NotifyOp NOTIFY_OP = NOTIFY_OP_IMAGE_ACQUIRE; + + ImageAcquirePayload() { + } + ImageAcquirePayload(uint64_t request_id, const std::string &global_image_id) + : ImagePayloadBase(request_id, global_image_id) { + } +}; + +struct ImageReleasePayload : public ImagePayloadBase { + static const NotifyOp NOTIFY_OP = NOTIFY_OP_IMAGE_RELEASE; + + ImageReleasePayload() { + } + ImageReleasePayload(uint64_t request_id, const std::string &global_image_id) + : ImagePayloadBase(request_id, global_image_id) { + } +}; + +struct PeerImageRemovedPayload : public PayloadBase { + static const NotifyOp NOTIFY_OP = NOTIFY_OP_PEER_IMAGE_REMOVED; + + std::string global_image_id; + std::string peer_mirror_uuid; + + PeerImageRemovedPayload() { + } + PeerImageRemovedPayload(uint64_t request_id, + const std::string& global_image_id, + const std::string& peer_mirror_uuid) + : PayloadBase(request_id), + global_image_id(global_image_id), peer_mirror_uuid(peer_mirror_uuid) { + } + + void encode(bufferlist &bl) const; + void decode(__u8 version, bufferlist::const_iterator &iter); + void dump(Formatter *f) const; +}; + +struct SyncPayloadBase : public PayloadBase { + std::string sync_id; + + SyncPayloadBase() : PayloadBase() { + } + + SyncPayloadBase(uint64_t request_id, const std::string &sync_id) + : PayloadBase(request_id), sync_id(sync_id) { + } + + void encode(bufferlist &bl) const; + void decode(__u8 version, bufferlist::const_iterator &iter); + void dump(Formatter *f) const; +}; + +struct SyncRequestPayload : public SyncPayloadBase { + static const NotifyOp NOTIFY_OP = NOTIFY_OP_SYNC_REQUEST; + + SyncRequestPayload() : SyncPayloadBase() { + } + + SyncRequestPayload(uint64_t request_id, const std::string &sync_id) + : SyncPayloadBase(request_id, sync_id) { + } +}; + +struct SyncStartPayload : public SyncPayloadBase { + static const NotifyOp NOTIFY_OP = NOTIFY_OP_SYNC_START; + + SyncStartPayload() : SyncPayloadBase() { + } + + SyncStartPayload(uint64_t request_id, const std::string &sync_id) + : SyncPayloadBase(request_id, sync_id) { + } +}; + +struct UnknownPayload { + static const NotifyOp NOTIFY_OP = static_cast<NotifyOp>(-1); + + UnknownPayload() { + } + + void encode(bufferlist &bl) const; + void decode(__u8 version, bufferlist::const_iterator &iter); + void dump(Formatter *f) const; +}; + +typedef boost::variant<ImageAcquirePayload, + ImageReleasePayload, + PeerImageRemovedPayload, + SyncRequestPayload, + SyncStartPayload, + UnknownPayload> Payload; + +struct NotifyMessage { + NotifyMessage(const Payload &payload = UnknownPayload()) : payload(payload) { + } + + Payload payload; + + void encode(bufferlist& bl) const; + void decode(bufferlist::const_iterator& it); + void dump(Formatter *f) const; + + static void generate_test_instances(std::list<NotifyMessage *> &o); +}; + +WRITE_CLASS_ENCODER(NotifyMessage); + +std::ostream &operator<<(std::ostream &out, const NotifyOp &op); + +struct NotifyAckPayload { + std::string instance_id; + uint64_t request_id; + int ret_val; + + NotifyAckPayload() : request_id(0), ret_val(0) { + } + + NotifyAckPayload(const std::string &instance_id, uint64_t request_id, + int ret_val) + : instance_id(instance_id), request_id(request_id), ret_val(ret_val) { + } + + void encode(bufferlist &bl) const; + void decode(bufferlist::const_iterator& it); + void dump(Formatter *f) const; +}; + +WRITE_CLASS_ENCODER(NotifyAckPayload); + +} // namespace instance_watcher +} // namespace mirror +} // namespace librbd + +using rbd::mirror::instance_watcher::encode; +using rbd::mirror::instance_watcher::decode; + +#endif // RBD_MIRROR_INSTANCE_WATCHER_TYPES_H diff --git a/src/tools/rbd_mirror/instances/Types.h b/src/tools/rbd_mirror/instances/Types.h new file mode 100644 index 000000000..8b0a68fc3 --- /dev/null +++ b/src/tools/rbd_mirror/instances/Types.h @@ -0,0 +1,28 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_INSTANCES_TYPES_H +#define CEPH_RBD_MIRROR_INSTANCES_TYPES_H + +#include <string> +#include <vector> + +namespace rbd { +namespace mirror { +namespace instances { + +struct Listener { + typedef std::vector<std::string> InstanceIds; + + virtual ~Listener() { + } + + virtual void handle_added(const InstanceIds& instance_ids) = 0; + virtual void handle_removed(const InstanceIds& instance_ids) = 0; +}; + +} // namespace instances +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_INSTANCES_TYPES_H diff --git a/src/tools/rbd_mirror/leader_watcher/Types.cc b/src/tools/rbd_mirror/leader_watcher/Types.cc new file mode 100644 index 000000000..d2fb7908f --- /dev/null +++ b/src/tools/rbd_mirror/leader_watcher/Types.cc @@ -0,0 +1,161 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "Types.h" +#include "include/ceph_assert.h" +#include "include/stringify.h" +#include "common/Formatter.h" + +namespace rbd { +namespace mirror { +namespace leader_watcher { + +namespace { + +class EncodePayloadVisitor : public boost::static_visitor<void> { +public: + explicit EncodePayloadVisitor(bufferlist &bl) : m_bl(bl) {} + + template <typename Payload> + inline void operator()(const Payload &payload) const { + using ceph::encode; + encode(static_cast<uint32_t>(Payload::NOTIFY_OP), m_bl); + payload.encode(m_bl); + } + +private: + bufferlist &m_bl; +}; + +class DecodePayloadVisitor : public boost::static_visitor<void> { +public: + DecodePayloadVisitor(__u8 version, bufferlist::const_iterator &iter) + : m_version(version), m_iter(iter) {} + + template <typename Payload> + inline void operator()(Payload &payload) const { + payload.decode(m_version, m_iter); + } + +private: + __u8 m_version; + bufferlist::const_iterator &m_iter; +}; + +class DumpPayloadVisitor : public boost::static_visitor<void> { +public: + explicit DumpPayloadVisitor(Formatter *formatter) : m_formatter(formatter) {} + + template <typename Payload> + inline void operator()(const Payload &payload) const { + NotifyOp notify_op = Payload::NOTIFY_OP; + m_formatter->dump_string("notify_op", stringify(notify_op)); + payload.dump(m_formatter); + } + +private: + ceph::Formatter *m_formatter; +}; + +} // anonymous namespace + +void HeartbeatPayload::encode(bufferlist &bl) const { +} + +void HeartbeatPayload::decode(__u8 version, bufferlist::const_iterator &iter) { +} + +void HeartbeatPayload::dump(Formatter *f) const { +} + +void LockAcquiredPayload::encode(bufferlist &bl) const { +} + +void LockAcquiredPayload::decode(__u8 version, bufferlist::const_iterator &iter) { +} + +void LockAcquiredPayload::dump(Formatter *f) const { +} + +void LockReleasedPayload::encode(bufferlist &bl) const { +} + +void LockReleasedPayload::decode(__u8 version, bufferlist::const_iterator &iter) { +} + +void LockReleasedPayload::dump(Formatter *f) const { +} + +void UnknownPayload::encode(bufferlist &bl) const { + ceph_abort(); +} + +void UnknownPayload::decode(__u8 version, bufferlist::const_iterator &iter) { +} + +void UnknownPayload::dump(Formatter *f) const { +} + +void NotifyMessage::encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + boost::apply_visitor(EncodePayloadVisitor(bl), payload); + ENCODE_FINISH(bl); +} + +void NotifyMessage::decode(bufferlist::const_iterator& iter) { + DECODE_START(1, iter); + + uint32_t notify_op; + decode(notify_op, iter); + + // select the correct payload variant based upon the encoded op + switch (notify_op) { + case NOTIFY_OP_HEARTBEAT: + payload = HeartbeatPayload(); + break; + case NOTIFY_OP_LOCK_ACQUIRED: + payload = LockAcquiredPayload(); + break; + case NOTIFY_OP_LOCK_RELEASED: + payload = LockReleasedPayload(); + break; + default: + payload = UnknownPayload(); + break; + } + + apply_visitor(DecodePayloadVisitor(struct_v, iter), payload); + DECODE_FINISH(iter); +} + +void NotifyMessage::dump(Formatter *f) const { + apply_visitor(DumpPayloadVisitor(f), payload); +} + +void NotifyMessage::generate_test_instances(std::list<NotifyMessage *> &o) { + o.push_back(new NotifyMessage(HeartbeatPayload())); + o.push_back(new NotifyMessage(LockAcquiredPayload())); + o.push_back(new NotifyMessage(LockReleasedPayload())); +} + +std::ostream &operator<<(std::ostream &out, const NotifyOp &op) { + switch (op) { + case NOTIFY_OP_HEARTBEAT: + out << "Heartbeat"; + break; + case NOTIFY_OP_LOCK_ACQUIRED: + out << "LockAcquired"; + break; + case NOTIFY_OP_LOCK_RELEASED: + out << "LockReleased"; + break; + default: + out << "Unknown (" << static_cast<uint32_t>(op) << ")"; + break; + } + return out; +} + +} // namespace leader_watcher +} // namespace mirror +} // namespace librbd diff --git a/src/tools/rbd_mirror/leader_watcher/Types.h b/src/tools/rbd_mirror/leader_watcher/Types.h new file mode 100644 index 000000000..1278e54b7 --- /dev/null +++ b/src/tools/rbd_mirror/leader_watcher/Types.h @@ -0,0 +1,117 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_LEADER_WATCHER_TYPES_H +#define RBD_MIRROR_LEADER_WATCHER_TYPES_H + +#include "include/int_types.h" +#include "include/buffer_fwd.h" +#include "include/encoding.h" +#include <string> +#include <vector> +#include <boost/variant.hpp> + +struct Context; + +namespace ceph { class Formatter; } + +namespace rbd { +namespace mirror { +namespace leader_watcher { + +struct Listener { + typedef std::vector<std::string> InstanceIds; + + virtual ~Listener() { + } + + virtual void post_acquire_handler(Context *on_finish) = 0; + virtual void pre_release_handler(Context *on_finish) = 0; + + virtual void update_leader_handler( + const std::string &leader_instance_id) = 0; + + virtual void handle_instances_added(const InstanceIds& instance_ids) = 0; + virtual void handle_instances_removed(const InstanceIds& instance_ids) = 0; +}; + +enum NotifyOp { + NOTIFY_OP_HEARTBEAT = 0, + NOTIFY_OP_LOCK_ACQUIRED = 1, + NOTIFY_OP_LOCK_RELEASED = 2, +}; + +struct HeartbeatPayload { + static const NotifyOp NOTIFY_OP = NOTIFY_OP_HEARTBEAT; + + HeartbeatPayload() { + } + + void encode(bufferlist &bl) const; + void decode(__u8 version, bufferlist::const_iterator &iter); + void dump(Formatter *f) const; +}; + +struct LockAcquiredPayload { + static const NotifyOp NOTIFY_OP = NOTIFY_OP_LOCK_ACQUIRED; + + LockAcquiredPayload() { + } + + void encode(bufferlist &bl) const; + void decode(__u8 version, bufferlist::const_iterator &iter); + void dump(Formatter *f) const; +}; + +struct LockReleasedPayload { + static const NotifyOp NOTIFY_OP = NOTIFY_OP_LOCK_RELEASED; + + LockReleasedPayload() { + } + + void encode(bufferlist &bl) const; + void decode(__u8 version, bufferlist::const_iterator &iter); + void dump(Formatter *f) const; +}; + +struct UnknownPayload { + static const NotifyOp NOTIFY_OP = static_cast<NotifyOp>(-1); + + UnknownPayload() { + } + + void encode(bufferlist &bl) const; + void decode(__u8 version, bufferlist::const_iterator &iter); + void dump(Formatter *f) const; +}; + +typedef boost::variant<HeartbeatPayload, + LockAcquiredPayload, + LockReleasedPayload, + UnknownPayload> Payload; + +struct NotifyMessage { + NotifyMessage(const Payload &payload = UnknownPayload()) : payload(payload) { + } + + Payload payload; + + void encode(bufferlist& bl) const; + void decode(bufferlist::const_iterator& it); + void dump(Formatter *f) const; + + static void generate_test_instances(std::list<NotifyMessage *> &o); +}; + +WRITE_CLASS_ENCODER(NotifyMessage); + +std::ostream &operator<<(std::ostream &out, const NotifyOp &op); + +} // namespace leader_watcher +} // namespace mirror +} // namespace librbd + +using rbd::mirror::leader_watcher::encode; +using rbd::mirror::leader_watcher::decode; + +#endif // RBD_MIRROR_LEADER_WATCHER_TYPES_H diff --git a/src/tools/rbd_mirror/main.cc b/src/tools/rbd_mirror/main.cc new file mode 100644 index 000000000..85e95e6b6 --- /dev/null +++ b/src/tools/rbd_mirror/main.cc @@ -0,0 +1,124 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "common/ceph_argparse.h" +#include "common/config.h" +#include "common/debug.h" +#include "common/errno.h" +#include "common/perf_counters.h" +#include "global/global_init.h" +#include "global/signal_handler.h" +#include "Mirror.h" +#include "Types.h" + +#include <vector> + +rbd::mirror::Mirror *mirror = nullptr; +PerfCounters *g_journal_perf_counters = nullptr; +PerfCounters *g_snapshot_perf_counters = nullptr; + +void usage() { + std::cout << "usage: rbd-mirror [options...]" << std::endl; + std::cout << "options:\n"; + std::cout << " -m monaddress[:port] connect to specified monitor\n"; + std::cout << " --keyring=<path> path to keyring for local cluster\n"; + std::cout << " --log-file=<logfile> file to log debug output\n"; + std::cout << " --debug-rbd-mirror=<log-level>/<memory-level> set rbd-mirror debug level\n"; + generic_server_usage(); +} + +static void handle_signal(int signum) +{ + if (mirror) + mirror->handle_signal(signum); +} + +int main(int argc, const char **argv) +{ + auto args = argv_to_vec(argc, argv); + if (args.empty()) { + std::cerr << argv[0] << ": -h or --help for usage" << std::endl; + exit(1); + } + if (ceph_argparse_need_usage(args)) { + usage(); + exit(0); + } + + auto cct = global_init(nullptr, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_DAEMON, + CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS); + + if (g_conf()->daemonize) { + global_init_daemonize(g_ceph_context); + } + + common_init_finish(g_ceph_context); + + init_async_signal_handler(); + register_async_signal_handler(SIGHUP, handle_signal); + register_async_signal_handler_oneshot(SIGINT, handle_signal); + register_async_signal_handler_oneshot(SIGTERM, handle_signal); + + auto cmd_args = argv_to_vec(argc, argv); + + // disable unnecessary librbd cache + g_ceph_context->_conf.set_val_or_die("rbd_cache", "false"); + + auto prio = + g_ceph_context->_conf.get_val<int64_t>("rbd_mirror_perf_stats_prio"); + { + PerfCountersBuilder plb(g_ceph_context, "rbd_mirror_journal", + rbd::mirror::l_rbd_mirror_journal_first, + rbd::mirror::l_rbd_mirror_journal_last); + plb.add_u64_counter(rbd::mirror::l_rbd_mirror_journal_entries, "entries", + "Number of entries replayed", nullptr, prio); + plb.add_u64_counter(rbd::mirror::l_rbd_mirror_journal_replay_bytes, + "replay_bytes", "Total bytes replayed", nullptr, prio, + unit_t(UNIT_BYTES)); + plb.add_time_avg(rbd::mirror::l_rbd_mirror_journal_replay_latency, + "replay_latency", "Replay latency", nullptr, prio); + g_journal_perf_counters = plb.create_perf_counters(); + } + { + PerfCountersBuilder plb( + g_ceph_context, "rbd_mirror_snapshot", + rbd::mirror::l_rbd_mirror_snapshot_first, + rbd::mirror::l_rbd_mirror_snapshot_remote_timestamp); + plb.add_u64_counter(rbd::mirror::l_rbd_mirror_snapshot_snapshots, + "snapshots", "Number of snapshots synced", nullptr, + prio); + plb.add_time_avg(rbd::mirror::l_rbd_mirror_snapshot_sync_time, "sync_time", + "Average sync time", nullptr, prio); + plb.add_u64_counter(rbd::mirror::l_rbd_mirror_snapshot_sync_bytes, + "sync_bytes", "Total bytes synced", nullptr, prio, + unit_t(UNIT_BYTES)); + g_snapshot_perf_counters = plb.create_perf_counters(); + } + g_ceph_context->get_perfcounters_collection()->add(g_journal_perf_counters); + g_ceph_context->get_perfcounters_collection()->add(g_snapshot_perf_counters); + + mirror = new rbd::mirror::Mirror(g_ceph_context, cmd_args); + int r = mirror->init(); + if (r < 0) { + std::cerr << "failed to initialize: " << cpp_strerror(r) << std::endl; + goto cleanup; + } + + mirror->run(); + + cleanup: + unregister_async_signal_handler(SIGHUP, handle_signal); + unregister_async_signal_handler(SIGINT, handle_signal); + unregister_async_signal_handler(SIGTERM, handle_signal); + shutdown_async_signal_handler(); + + g_ceph_context->get_perfcounters_collection()->remove(g_journal_perf_counters); + g_ceph_context->get_perfcounters_collection()->remove(g_snapshot_perf_counters); + + delete mirror; + delete g_journal_perf_counters; + delete g_snapshot_perf_counters; + + return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS; +} diff --git a/src/tools/rbd_mirror/pool_watcher/RefreshImagesRequest.cc b/src/tools/rbd_mirror/pool_watcher/RefreshImagesRequest.cc new file mode 100644 index 000000000..a1d9c1b54 --- /dev/null +++ b/src/tools/rbd_mirror/pool_watcher/RefreshImagesRequest.cc @@ -0,0 +1,89 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/pool_watcher/RefreshImagesRequest.h" +#include "common/debug.h" +#include "common/errno.h" +#include "cls/rbd/cls_rbd_client.h" +#include "librbd/Utils.h" +#include <map> + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::pool_watcher::RefreshImagesRequest " \ + << this << " " << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace pool_watcher { + +static const uint32_t MAX_RETURN = 1024; + +using librbd::util::create_rados_callback; + +template <typename I> +void RefreshImagesRequest<I>::send() { + m_image_ids->clear(); + mirror_image_list(); +} + +template <typename I> +void RefreshImagesRequest<I>::mirror_image_list() { + dout(10) << dendl; + + librados::ObjectReadOperation op; + librbd::cls_client::mirror_image_list_start(&op, m_start_after, MAX_RETURN); + + m_out_bl.clear(); + librados::AioCompletion *aio_comp = create_rados_callback< + RefreshImagesRequest<I>, + &RefreshImagesRequest<I>::handle_mirror_image_list>(this); + int r = m_remote_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op, &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void RefreshImagesRequest<I>::handle_mirror_image_list(int r) { + dout(10) << "r=" << r << dendl; + + std::map<std::string, std::string> ids; + if (r == 0) { + auto it = m_out_bl.cbegin(); + r = librbd::cls_client::mirror_image_list_finish(&it, &ids); + } + + if (r < 0 && r != -ENOENT) { + derr << "failed to list mirrored images: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + // store as global -> local image ids + for (auto &id : ids) { + m_image_ids->emplace(id.second, id.first); + } + + if (ids.size() == MAX_RETURN) { + m_start_after = ids.rbegin()->first; + mirror_image_list(); + return; + } + + finish(0); +} + +template <typename I> +void RefreshImagesRequest<I>::finish(int r) { + dout(10) << "r=" << r << dendl; + + m_on_finish->complete(r); + delete this; +} + +} // namespace pool_watcher +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::pool_watcher::RefreshImagesRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/pool_watcher/RefreshImagesRequest.h b/src/tools/rbd_mirror/pool_watcher/RefreshImagesRequest.h new file mode 100644 index 000000000..8bfeabe29 --- /dev/null +++ b/src/tools/rbd_mirror/pool_watcher/RefreshImagesRequest.h @@ -0,0 +1,73 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_POOL_WATCHER_REFRESH_IMAGES_REQUEST_H +#define CEPH_RBD_MIRROR_POOL_WATCHER_REFRESH_IMAGES_REQUEST_H + +#include "include/buffer.h" +#include "include/rados/librados.hpp" +#include "tools/rbd_mirror/Types.h" +#include <string> + +struct Context; + +namespace librbd { struct ImageCtx; } + +namespace rbd { +namespace mirror { +namespace pool_watcher { + +template <typename ImageCtxT = librbd::ImageCtx> +class RefreshImagesRequest { +public: + static RefreshImagesRequest *create(librados::IoCtx &remote_io_ctx, + ImageIds *image_ids, Context *on_finish) { + return new RefreshImagesRequest(remote_io_ctx, image_ids, on_finish); + } + + RefreshImagesRequest(librados::IoCtx &remote_io_ctx, ImageIds *image_ids, + Context *on_finish) + : m_remote_io_ctx(remote_io_ctx), m_image_ids(image_ids), + m_on_finish(on_finish) { + } + + void send(); + +private: + /** + * @verbatim + * + * <start> + * | + * | /-------------\ + * | | | + * v v | (more images) + * MIRROR_IMAGE_LIST ---/ + * | + * v + * <finish> + * + * @endverbatim + */ + + librados::IoCtx &m_remote_io_ctx; + ImageIds *m_image_ids; + Context *m_on_finish; + + bufferlist m_out_bl; + std::string m_start_after; + + void mirror_image_list(); + void handle_mirror_image_list(int r); + + void finish(int r); + +}; + +} // namespace pool_watcher +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::pool_watcher::RefreshImagesRequest<librbd::ImageCtx>; + +#endif // CEPH_RBD_MIRROR_POOL_WATCHER_REFRESH_IMAGES_REQUEST_H diff --git a/src/tools/rbd_mirror/pool_watcher/Types.h b/src/tools/rbd_mirror/pool_watcher/Types.h new file mode 100644 index 000000000..52dfc342d --- /dev/null +++ b/src/tools/rbd_mirror/pool_watcher/Types.h @@ -0,0 +1,27 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_POOL_WATCHER_TYPES_H +#define CEPH_RBD_MIRROR_POOL_WATCHER_TYPES_H + +#include "tools/rbd_mirror/Types.h" +#include <string> + +namespace rbd { +namespace mirror { +namespace pool_watcher { + +struct Listener { + virtual ~Listener() { + } + + virtual void handle_update(const std::string &mirror_uuid, + ImageIds &&added_image_ids, + ImageIds &&removed_image_ids) = 0; +}; + +} // namespace pool_watcher +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_POOL_WATCHER_TYPES_H diff --git a/src/tools/rbd_mirror/service_daemon/Types.cc b/src/tools/rbd_mirror/service_daemon/Types.cc new file mode 100644 index 000000000..7dc6537c5 --- /dev/null +++ b/src/tools/rbd_mirror/service_daemon/Types.cc @@ -0,0 +1,29 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/service_daemon/Types.h" +#include <iostream> + +namespace rbd { +namespace mirror { +namespace service_daemon { + +std::ostream& operator<<(std::ostream& os, const CalloutLevel& callout_level) { + switch (callout_level) { + case CALLOUT_LEVEL_INFO: + os << "info"; + break; + case CALLOUT_LEVEL_WARNING: + os << "warning"; + break; + case CALLOUT_LEVEL_ERROR: + os << "error"; + break; + } + return os; +} + +} // namespace service_daemon +} // namespace mirror +} // namespace rbd + diff --git a/src/tools/rbd_mirror/service_daemon/Types.h b/src/tools/rbd_mirror/service_daemon/Types.h new file mode 100644 index 000000000..3aab72016 --- /dev/null +++ b/src/tools/rbd_mirror/service_daemon/Types.h @@ -0,0 +1,33 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_SERVICE_DAEMON_TYPES_H +#define CEPH_RBD_MIRROR_SERVICE_DAEMON_TYPES_H + +#include "include/int_types.h" +#include <iosfwd> +#include <string> +#include <boost/variant.hpp> + +namespace rbd { +namespace mirror { +namespace service_daemon { + +typedef uint64_t CalloutId; +const uint64_t CALLOUT_ID_NONE {0}; + +enum CalloutLevel { + CALLOUT_LEVEL_INFO, + CALLOUT_LEVEL_WARNING, + CALLOUT_LEVEL_ERROR +}; + +std::ostream& operator<<(std::ostream& os, const CalloutLevel& callout_level); + +typedef boost::variant<bool, uint64_t, std::string> AttributeValue; + +} // namespace service_daemon +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_SERVICE_DAEMON_TYPES_H |