diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 18:24:20 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 18:24:20 +0000 |
commit | 483eb2f56657e8e7f419ab1a4fab8dce9ade8609 (patch) | |
tree | e5d88d25d870d5dedacb6bbdbe2a966086a0a5cf /src/tools/rbd_mirror | |
parent | Initial commit. (diff) | |
download | ceph-483eb2f56657e8e7f419ab1a4fab8dce9ade8609.tar.xz ceph-483eb2f56657e8e7f419ab1a4fab8dce9ade8609.zip |
Adding upstream version 14.2.21.upstream/14.2.21upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/tools/rbd_mirror')
98 files changed, 21727 insertions, 0 deletions
diff --git a/src/tools/rbd_mirror/BaseRequest.h b/src/tools/rbd_mirror/BaseRequest.h new file mode 100644 index 00000000..5053eb83 --- /dev/null +++ b/src/tools/rbd_mirror/BaseRequest.h @@ -0,0 +1,43 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_BASE_REQUEST_H +#define CEPH_RBD_MIRROR_BASE_REQUEST_H + +#include "common/RefCountedObj.h" +#include "include/Context.h" + +namespace rbd { +namespace mirror { + +class BaseRequest : public RefCountedObject { +public: + BaseRequest(const std::string& name, CephContext *cct, Context *on_finish) + : RefCountedObject(cct, 1), m_name(name), m_cct(cct), + m_on_finish(on_finish) { + } + + virtual void send() = 0; + virtual void cancel() {} + +protected: + virtual void finish(int r) { + if (m_cct) { + lsubdout(m_cct, rbd_mirror, 20) << m_name << "::finish: r=" << r << dendl; + } + if (m_on_finish) { + m_on_finish->complete(r); + } + put(); + } + +private: + const std::string m_name; + CephContext *m_cct; + Context *m_on_finish; +}; + +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_BASE_REQUEST_H diff --git a/src/tools/rbd_mirror/CMakeLists.txt b/src/tools/rbd_mirror/CMakeLists.txt new file mode 100644 index 00000000..30106a86 --- /dev/null +++ b/src/tools/rbd_mirror/CMakeLists.txt @@ -0,0 +1,69 @@ +add_library(rbd_mirror_types STATIC + image_map/Types.cc + instance_watcher/Types.cc + leader_watcher/Types.cc) + +set(rbd_mirror_internal + ClusterWatcher.cc + ImageDeleter.cc + ImageMap.cc + ImageReplayer.cc + ImageSync.cc + ImageSyncThrottler.cc + InstanceReplayer.cc + InstanceWatcher.cc + Instances.cc + LeaderWatcher.cc + Mirror.cc + MirrorStatusWatcher.cc + PoolReplayer.cc + PoolWatcher.cc + ServiceDaemon.cc + Threads.cc + Types.cc + image_deleter/SnapshotPurgeRequest.cc + image_deleter/TrashMoveRequest.cc + image_deleter/TrashRemoveRequest.cc + image_deleter/TrashWatcher.cc + image_map/LoadRequest.cc + image_map/Policy.cc + image_map/SimplePolicy.cc + image_map/StateTransition.cc + image_map/UpdateRequest.cc + image_replayer/BootstrapRequest.cc + image_replayer/CloseImageRequest.cc + image_replayer/CreateImageRequest.cc + image_replayer/EventPreprocessor.cc + image_replayer/GetMirrorImageIdRequest.cc + image_replayer/IsPrimaryRequest.cc + image_replayer/OpenImageRequest.cc + image_replayer/OpenLocalImageRequest.cc + image_replayer/PrepareLocalImageRequest.cc + image_replayer/PrepareRemoteImageRequest.cc + image_replayer/ReplayStatusFormatter.cc + image_replayer/Utils.cc + image_sync/SyncPointCreateRequest.cc + image_sync/SyncPointPruneRequest.cc + pool_watcher/RefreshImagesRequest.cc + service_daemon/Types.cc) + +add_library(rbd_mirror_internal STATIC + ${rbd_mirror_internal}) + +add_executable(rbd-mirror + main.cc) +target_link_libraries(rbd-mirror + rbd_mirror_internal + rbd_mirror_types + rbd_api + rbd_internal + rbd_types + journal + librados + osdc + cls_rbd_client + cls_lock_client + cls_journal_client + global + ${ALLOC_LIBS}) +install(TARGETS rbd-mirror DESTINATION bin) diff --git a/src/tools/rbd_mirror/ClusterWatcher.cc b/src/tools/rbd_mirror/ClusterWatcher.cc new file mode 100644 index 00000000..54329de6 --- /dev/null +++ b/src/tools/rbd_mirror/ClusterWatcher.cc @@ -0,0 +1,223 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "ClusterWatcher.h" +#include "include/stringify.h" +#include "common/ceph_json.h" +#include "common/debug.h" +#include "common/errno.h" +#include "cls/rbd/cls_rbd_client.h" +#include "librbd/internal.h" +#include "librbd/api/Mirror.h" +#include "tools/rbd_mirror/ServiceDaemon.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::ClusterWatcher:" << this << " " \ + << __func__ << ": " + +using std::list; +using std::map; +using std::set; +using std::string; +using std::unique_ptr; +using std::vector; + +using librados::Rados; +using librados::IoCtx; + +namespace rbd { +namespace mirror { + +ClusterWatcher::ClusterWatcher(RadosRef cluster, Mutex &lock, + ServiceDaemon<librbd::ImageCtx>* service_daemon) + : m_cluster(cluster), m_lock(lock), m_service_daemon(service_daemon) +{ +} + +const ClusterWatcher::PoolPeers& ClusterWatcher::get_pool_peers() const +{ + ceph_assert(m_lock.is_locked()); + return m_pool_peers; +} + +void ClusterWatcher::refresh_pools() +{ + dout(20) << "enter" << dendl; + + PoolPeers pool_peers; + read_pool_peers(&pool_peers); + + Mutex::Locker l(m_lock); + m_pool_peers = pool_peers; + // TODO: perhaps use a workqueue instead, once we get notifications + // about config changes for existing pools +} + +void ClusterWatcher::read_pool_peers(PoolPeers *pool_peers) +{ + int r = m_cluster->wait_for_latest_osdmap(); + if (r < 0) { + derr << "error waiting for OSD map: " << cpp_strerror(r) << dendl; + return; + } + + list<pair<int64_t, string> > pools; + r = m_cluster->pool_list2(pools); + if (r < 0) { + derr << "error listing pools: " << cpp_strerror(r) << dendl; + return; + } + + std::set<int64_t> service_pool_ids; + for (auto& kv : pools) { + int64_t pool_id = kv.first; + auto& pool_name = kv.second; + int64_t base_tier; + r = m_cluster->pool_get_base_tier(pool_id, &base_tier); + if (r == -ENOENT) { + dout(10) << "pool " << pool_name << " no longer exists" << dendl; + continue; + } else if (r < 0) { + derr << "Error retrieving base tier for pool " << pool_name << dendl; + continue; + } + if (pool_id != base_tier) { + // pool is a cache; skip it + continue; + } + + IoCtx ioctx; + r = m_cluster->ioctx_create2(pool_id, ioctx); + if (r == -ENOENT) { + dout(10) << "pool " << pool_id << " no longer exists" << dendl; + continue; + } else if (r < 0) { + derr << "Error accessing pool " << pool_name << cpp_strerror(r) << dendl; + continue; + } + + cls::rbd::MirrorMode mirror_mode_internal; + r = librbd::cls_client::mirror_mode_get(&ioctx, &mirror_mode_internal); + if (r == 0 && mirror_mode_internal == cls::rbd::MIRROR_MODE_DISABLED) { + dout(10) << "mirroring is disabled for pool " << pool_name << dendl; + continue; + } + + service_pool_ids.insert(pool_id); + if (m_service_pools.find(pool_id) == m_service_pools.end()) { + m_service_pools[pool_id] = {}; + m_service_daemon->add_pool(pool_id, pool_name); + } + + if (r == -EPERM) { + dout(10) << "access denied querying pool " << pool_name << dendl; + m_service_pools[pool_id] = m_service_daemon->add_or_update_callout( + pool_id, m_service_pools[pool_id], + service_daemon::CALLOUT_LEVEL_WARNING, "access denied"); + continue; + } else if (r < 0) { + derr << "could not tell whether mirroring was enabled for " << pool_name + << " : " << cpp_strerror(r) << dendl; + m_service_pools[pool_id] = m_service_daemon->add_or_update_callout( + pool_id, m_service_pools[pool_id], + service_daemon::CALLOUT_LEVEL_WARNING, "mirroring mode query failed"); + continue; + } + + vector<librbd::mirror_peer_t> configs; + r = librbd::api::Mirror<>::peer_list(ioctx, &configs); + if (r < 0) { + derr << "error reading mirroring config for pool " << pool_name + << cpp_strerror(r) << dendl; + m_service_pools[pool_id] = m_service_daemon->add_or_update_callout( + pool_id, m_service_pools[pool_id], + service_daemon::CALLOUT_LEVEL_ERROR, "mirroring peer list failed"); + continue; + } + + std::vector<PeerSpec> peers{configs.begin(), configs.end()}; + for (auto& peer : peers) { + r = resolve_peer_config_keys(pool_id, pool_name, &peer); + if (r < 0) { + break; + } + } + + if (m_service_pools[pool_id] != service_daemon::CALLOUT_ID_NONE) { + m_service_daemon->remove_callout(pool_id, m_service_pools[pool_id]); + m_service_pools[pool_id] = service_daemon::CALLOUT_ID_NONE; + } + + pool_peers->emplace(pool_id, Peers{peers.begin(), peers.end()}); + } + + for (auto it = m_service_pools.begin(); it != m_service_pools.end(); ) { + auto current_it(it++); + if (service_pool_ids.find(current_it->first) == service_pool_ids.end()) { + m_service_daemon->remove_pool(current_it->first); + m_service_pools.erase(current_it->first); + } + } +} + +int ClusterWatcher::resolve_peer_config_keys(int64_t pool_id, + const std::string& pool_name, + PeerSpec* peer) { + dout(10) << "retrieving config-key: pool_id=" << pool_id << ", " + << "pool_name=" << pool_name << ", " + << "peer_uuid=" << peer->uuid << dendl; + + std::string cmd = + "{" + "\"prefix\": \"config-key get\", " + "\"key\": \"" RBD_MIRROR_PEER_CONFIG_KEY_PREFIX + stringify(pool_id) + + "/" + peer->uuid + "\"" + "}"; + + bufferlist in_bl; + bufferlist out_bl; + int r = m_cluster->mon_command(cmd, in_bl, &out_bl, nullptr); + if (r == -ENOENT || out_bl.length() == 0) { + return 0; + } else if (r < 0) { + derr << "error reading mirroring peer config for pool " << pool_name << ": " + << cpp_strerror(r) << dendl; + m_service_pools[pool_id] = m_service_daemon->add_or_update_callout( + pool_id, m_service_pools[pool_id], + service_daemon::CALLOUT_LEVEL_WARNING, + "mirroring peer config-key query failed"); + return r; + } + + bool json_valid = false; + json_spirit::mValue json_root; + if(json_spirit::read(out_bl.to_str(), json_root)) { + try { + auto& json_obj = json_root.get_obj(); + if (json_obj.count("mon_host")) { + peer->mon_host = json_obj["mon_host"].get_str(); + } + if (json_obj.count("key")) { + peer->key = json_obj["key"].get_str(); + } + json_valid = true; + } catch (std::runtime_error&) { + } + } + + if (!json_valid) { + derr << "error parsing mirroring peer config for pool " << pool_name << ", " + << "peer " << peer->uuid << dendl; + m_service_pools[pool_id] = m_service_daemon->add_or_update_callout( + pool_id, m_service_pools[pool_id], + service_daemon::CALLOUT_LEVEL_WARNING, + "mirroring peer config-key decode failed"); + } + + return 0; +} + +} // namespace mirror +} // namespace rbd diff --git a/src/tools/rbd_mirror/ClusterWatcher.h b/src/tools/rbd_mirror/ClusterWatcher.h new file mode 100644 index 00000000..e8430b47 --- /dev/null +++ b/src/tools/rbd_mirror/ClusterWatcher.h @@ -0,0 +1,69 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_CLUSTER_WATCHER_H +#define CEPH_RBD_MIRROR_CLUSTER_WATCHER_H + +#include <map> +#include <memory> +#include <set> + +#include "common/ceph_context.h" +#include "common/Mutex.h" +#include "common/Timer.h" +#include "include/rados/librados.hpp" +#include "tools/rbd_mirror/Types.h" +#include "tools/rbd_mirror/service_daemon/Types.h" +#include <unordered_map> + +namespace librbd { struct ImageCtx; } + +namespace rbd { +namespace mirror { + +template <typename> class ServiceDaemon; + +/** + * Tracks mirroring configuration for pools in a single + * cluster. + */ +class ClusterWatcher { +public: + struct PeerSpecCompare { + bool operator()(const PeerSpec& lhs, const PeerSpec& rhs) const { + return (lhs.uuid < rhs.uuid); + } + }; + typedef std::set<PeerSpec, PeerSpecCompare> Peers; + typedef std::map<int64_t, Peers> PoolPeers; + + ClusterWatcher(RadosRef cluster, Mutex &lock, + ServiceDaemon<librbd::ImageCtx>* service_daemon); + ~ClusterWatcher() = default; + ClusterWatcher(const ClusterWatcher&) = delete; + ClusterWatcher& operator=(const ClusterWatcher&) = delete; + + // Caller controls frequency of calls + void refresh_pools(); + const PoolPeers& get_pool_peers() const; + +private: + typedef std::unordered_map<int64_t, service_daemon::CalloutId> ServicePools; + + RadosRef m_cluster; + Mutex &m_lock; + ServiceDaemon<librbd::ImageCtx>* m_service_daemon; + + ServicePools m_service_pools; + PoolPeers m_pool_peers; + + void read_pool_peers(PoolPeers *pool_peers); + + int resolve_peer_config_keys(int64_t pool_id, const std::string& pool_name, + PeerSpec* peer); +}; + +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_CLUSTER_WATCHER_H diff --git a/src/tools/rbd_mirror/ImageDeleter.cc b/src/tools/rbd_mirror/ImageDeleter.cc new file mode 100644 index 00000000..f4d928ca --- /dev/null +++ b/src/tools/rbd_mirror/ImageDeleter.cc @@ -0,0 +1,549 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2016 SUSE LINUX GmbH + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include "include/rados/librados.hpp" +#include "common/Formatter.h" +#include "common/admin_socket.h" +#include "common/debug.h" +#include "common/errno.h" +#include "common/Timer.h" +#include "common/WorkQueue.h" +#include "global/global_context.h" +#include "librbd/internal.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/Journal.h" +#include "librbd/Operations.h" +#include "cls/rbd/cls_rbd_client.h" +#include "cls/rbd/cls_rbd_types.h" +#include "librbd/Utils.h" +#include "ImageDeleter.h" +#include "tools/rbd_mirror/Threads.h" +#include "tools/rbd_mirror/image_deleter/TrashMoveRequest.h" +#include "tools/rbd_mirror/image_deleter/TrashRemoveRequest.h" +#include "tools/rbd_mirror/image_deleter/TrashWatcher.h" +#include <map> +#include <sstream> + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror + +using std::string; +using std::stringstream; +using std::vector; +using std::pair; +using std::make_pair; + +using librados::IoCtx; +using namespace librbd; + +namespace rbd { +namespace mirror { + +namespace { + +class ImageDeleterAdminSocketCommand { +public: + virtual ~ImageDeleterAdminSocketCommand() {} + virtual bool call(Formatter *f, stringstream *ss) = 0; +}; + +template <typename I> +class StatusCommand : public ImageDeleterAdminSocketCommand { +public: + explicit StatusCommand(ImageDeleter<I> *image_del) : image_del(image_del) {} + + bool call(Formatter *f, stringstream *ss) override { + image_del->print_status(f, ss); + return true; + } + +private: + ImageDeleter<I> *image_del; +}; + +} // anonymous namespace + +template <typename I> +class ImageDeleterAdminSocketHook : public AdminSocketHook { +public: + ImageDeleterAdminSocketHook(CephContext *cct, const std::string& pool_name, + ImageDeleter<I> *image_del) : + admin_socket(cct->get_admin_socket()) { + + std::string command; + int r; + + command = "rbd mirror deletion status " + pool_name; + r = admin_socket->register_command(command, command, this, + "get status for image deleter"); + if (r == 0) { + commands[command] = new StatusCommand<I>(image_del); + } + + } + + ~ImageDeleterAdminSocketHook() override { + for (Commands::const_iterator i = commands.begin(); i != commands.end(); + ++i) { + (void)admin_socket->unregister_command(i->first); + delete i->second; + } + } + + bool call(std::string_view command, const cmdmap_t& cmdmap, + std::string_view format, bufferlist& out) override { + Commands::const_iterator i = commands.find(command); + ceph_assert(i != commands.end()); + Formatter *f = Formatter::create(format); + stringstream ss; + bool r = i->second->call(f, &ss); + delete f; + out.append(ss); + return r; + } + +private: + typedef std::map<std::string, ImageDeleterAdminSocketCommand*, + std::less<>> Commands; + AdminSocket *admin_socket; + Commands commands; +}; + +template <typename I> +ImageDeleter<I>::ImageDeleter(librados::IoCtx& local_io_ctx, + Threads<librbd::ImageCtx>* threads, + ServiceDaemon<librbd::ImageCtx>* service_daemon) + : m_local_io_ctx(local_io_ctx), m_threads(threads), + m_service_daemon(service_daemon), m_trash_listener(this), + m_lock(librbd::util::unique_lock_name("rbd::mirror::ImageDeleter::m_lock", + this)) { +} + +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::ImageDeleter: " << " " \ + << __func__ << ": " + +template <typename I> +void ImageDeleter<I>::trash_move(librados::IoCtx& local_io_ctx, + const std::string& global_image_id, + bool resync, + ContextWQ* work_queue, Context* on_finish) { + dout(10) << "global_image_id=" << global_image_id << ", " + << "resync=" << resync << dendl; + + auto req = rbd::mirror::image_deleter::TrashMoveRequest<>::create( + local_io_ctx, global_image_id, resync, work_queue, on_finish); + req->send(); +} + +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::ImageDeleter: " << this << " " \ + << __func__ << ": " + +template <typename I> +void ImageDeleter<I>::init(Context* on_finish) { + dout(10) << dendl; + + m_asok_hook = new ImageDeleterAdminSocketHook<I>( + g_ceph_context, m_local_io_ctx.get_pool_name(), this); + + m_trash_watcher = image_deleter::TrashWatcher<I>::create(m_local_io_ctx, + m_threads, + m_trash_listener); + m_trash_watcher->init(on_finish); +} + +template <typename I> +void ImageDeleter<I>::shut_down(Context* on_finish) { + dout(10) << dendl; + + delete m_asok_hook; + m_asok_hook = nullptr; + + shut_down_trash_watcher(on_finish); +} + +template <typename I> +void ImageDeleter<I>::shut_down_trash_watcher(Context* on_finish) { + dout(10) << dendl; + ceph_assert(m_trash_watcher); + auto ctx = new FunctionContext([this, on_finish](int r) { + delete m_trash_watcher; + m_trash_watcher = nullptr; + + wait_for_ops(on_finish); + }); + m_trash_watcher->shut_down(ctx); +} + +template <typename I> +void ImageDeleter<I>::wait_for_ops(Context* on_finish) { + { + Mutex::Locker timer_locker(m_threads->timer_lock); + Mutex::Locker locker(m_lock); + m_running = false; + cancel_retry_timer(); + } + + auto ctx = new FunctionContext([this, on_finish](int) { + cancel_all_deletions(on_finish); + }); + m_async_op_tracker.wait_for_ops(ctx); +} + +template <typename I> +void ImageDeleter<I>::cancel_all_deletions(Context* on_finish) { + { + Mutex::Locker locker(m_lock); + // wake up any external state machines waiting on deletions + ceph_assert(m_in_flight_delete_queue.empty()); + for (auto& queue : {&m_delete_queue, &m_retry_delete_queue}) { + for (auto& info : *queue) { + notify_on_delete(info->image_id, -ECANCELED); + } + queue->clear(); + } + } + on_finish->complete(0); +} + +template <typename I> +void ImageDeleter<I>::wait_for_deletion(const std::string& image_id, + bool scheduled_only, + Context* on_finish) { + dout(5) << "image_id=" << image_id << dendl; + + on_finish = new FunctionContext([this, on_finish](int r) { + m_threads->work_queue->queue(on_finish, r); + }); + + Mutex::Locker locker(m_lock); + auto del_info = find_delete_info(image_id); + if (!del_info && scheduled_only) { + // image not scheduled for deletion + on_finish->complete(0); + return; + } + + notify_on_delete(image_id, -ESTALE); + m_on_delete_contexts[image_id] = on_finish; +} + +template <typename I> +void ImageDeleter<I>::complete_active_delete(DeleteInfoRef* delete_info, + int r) { + dout(20) << "info=" << *delete_info << ", r=" << r << dendl; + Mutex::Locker locker(m_lock); + notify_on_delete((*delete_info)->image_id, r); + delete_info->reset(); +} + +template <typename I> +void ImageDeleter<I>::enqueue_failed_delete(DeleteInfoRef* delete_info, + int error_code, + double retry_delay) { + dout(20) << "info=" << *delete_info << ", r=" << error_code << dendl; + if (error_code == -EBLACKLISTED) { + Mutex::Locker locker(m_lock); + derr << "blacklisted while deleting local image" << dendl; + complete_active_delete(delete_info, error_code); + return; + } + + Mutex::Locker timer_locker(m_threads->timer_lock); + Mutex::Locker locker(m_lock); + auto& delete_info_ref = *delete_info; + notify_on_delete(delete_info_ref->image_id, error_code); + delete_info_ref->error_code = error_code; + ++delete_info_ref->retries; + delete_info_ref->retry_time = ceph_clock_now(); + delete_info_ref->retry_time += retry_delay; + m_retry_delete_queue.push_back(delete_info_ref); + + schedule_retry_timer(); +} + +template <typename I> +typename ImageDeleter<I>::DeleteInfoRef +ImageDeleter<I>::find_delete_info(const std::string &image_id) { + ceph_assert(m_lock.is_locked()); + DeleteQueue delete_queues[] = {m_in_flight_delete_queue, + m_retry_delete_queue, + m_delete_queue}; + + DeleteInfo delete_info{image_id}; + for (auto& queue : delete_queues) { + auto it = std::find_if(queue.begin(), queue.end(), + [&delete_info](const DeleteInfoRef& ref) { + return delete_info == *ref; + }); + if (it != queue.end()) { + return *it; + } + } + return {}; +} + +template <typename I> +void ImageDeleter<I>::print_status(Formatter *f, stringstream *ss) { + dout(20) << dendl; + + if (f) { + f->open_object_section("image_deleter_status"); + f->open_array_section("delete_images_queue"); + } + + Mutex::Locker l(m_lock); + for (const auto& image : m_delete_queue) { + image->print_status(f, ss); + } + + if (f) { + f->close_section(); + f->open_array_section("failed_deletes_queue"); + } + + for (const auto& image : m_retry_delete_queue) { + image->print_status(f, ss, true); + } + + if (f) { + f->close_section(); + f->close_section(); + f->flush(*ss); + } +} + +template <typename I> +vector<string> ImageDeleter<I>::get_delete_queue_items() { + vector<string> items; + + Mutex::Locker l(m_lock); + for (const auto& del_info : m_delete_queue) { + items.push_back(del_info->image_id); + } + + return items; +} + +template <typename I> +vector<pair<string, int> > ImageDeleter<I>::get_failed_queue_items() { + vector<pair<string, int> > items; + + Mutex::Locker l(m_lock); + for (const auto& del_info : m_retry_delete_queue) { + items.push_back(make_pair(del_info->image_id, + del_info->error_code)); + } + + return items; +} + +template <typename I> +void ImageDeleter<I>::remove_images() { + dout(10) << dendl; + + auto cct = reinterpret_cast<CephContext *>(m_local_io_ctx.cct()); + uint64_t max_concurrent_deletions = cct->_conf.get_val<uint64_t>( + "rbd_mirror_concurrent_image_deletions"); + + Mutex::Locker locker(m_lock); + while (true) { + if (!m_running || m_delete_queue.empty() || + m_in_flight_delete_queue.size() >= max_concurrent_deletions) { + return; + } + + DeleteInfoRef delete_info = m_delete_queue.front(); + m_delete_queue.pop_front(); + + ceph_assert(delete_info); + remove_image(delete_info); + } +} + +template <typename I> +void ImageDeleter<I>::remove_image(DeleteInfoRef delete_info) { + dout(10) << "info=" << *delete_info << dendl; + ceph_assert(m_lock.is_locked()); + + m_in_flight_delete_queue.push_back(delete_info); + m_async_op_tracker.start_op(); + + auto ctx = new FunctionContext([this, delete_info](int r) { + handle_remove_image(delete_info, r); + m_async_op_tracker.finish_op(); + }); + + auto req = image_deleter::TrashRemoveRequest<I>::create( + m_local_io_ctx, delete_info->image_id, &delete_info->error_result, + m_threads->work_queue, ctx); + req->send(); +} + +template <typename I> +void ImageDeleter<I>::handle_remove_image(DeleteInfoRef delete_info, + int r) { + dout(10) << "info=" << *delete_info << ", r=" << r << dendl; + + { + Mutex::Locker locker(m_lock); + ceph_assert(m_lock.is_locked()); + auto it = std::find(m_in_flight_delete_queue.begin(), + m_in_flight_delete_queue.end(), delete_info); + ceph_assert(it != m_in_flight_delete_queue.end()); + m_in_flight_delete_queue.erase(it); + } + + if (r < 0) { + if (delete_info->error_result == image_deleter::ERROR_RESULT_COMPLETE) { + complete_active_delete(&delete_info, r); + } else if (delete_info->error_result == + image_deleter::ERROR_RESULT_RETRY_IMMEDIATELY) { + enqueue_failed_delete(&delete_info, r, m_busy_interval); + } else { + auto cct = reinterpret_cast<CephContext *>(m_local_io_ctx.cct()); + double failed_interval = cct->_conf.get_val<double>( + "rbd_mirror_delete_retry_interval"); + enqueue_failed_delete(&delete_info, r, failed_interval); + } + } else { + complete_active_delete(&delete_info, 0); + } + + // process the next queued image to delete + remove_images(); +} + +template <typename I> +void ImageDeleter<I>::schedule_retry_timer() { + ceph_assert(m_threads->timer_lock.is_locked()); + ceph_assert(m_lock.is_locked()); + if (!m_running || m_timer_ctx != nullptr || m_retry_delete_queue.empty()) { + return; + } + + dout(10) << dendl; + auto &delete_info = m_retry_delete_queue.front(); + m_timer_ctx = new FunctionContext([this](int r) { + handle_retry_timer(); + }); + m_threads->timer->add_event_at(delete_info->retry_time, m_timer_ctx); +} + +template <typename I> +void ImageDeleter<I>::cancel_retry_timer() { + dout(10) << dendl; + ceph_assert(m_threads->timer_lock.is_locked()); + if (m_timer_ctx != nullptr) { + bool canceled = m_threads->timer->cancel_event(m_timer_ctx); + m_timer_ctx = nullptr; + ceph_assert(canceled); + } +} + +template <typename I> +void ImageDeleter<I>::handle_retry_timer() { + dout(10) << dendl; + ceph_assert(m_threads->timer_lock.is_locked()); + Mutex::Locker locker(m_lock); + + ceph_assert(m_timer_ctx != nullptr); + m_timer_ctx = nullptr; + + ceph_assert(m_running); + ceph_assert(!m_retry_delete_queue.empty()); + + // move all ready-to-ready items back to main queue + utime_t now = ceph_clock_now(); + while (!m_retry_delete_queue.empty()) { + auto &delete_info = m_retry_delete_queue.front(); + if (delete_info->retry_time > now) { + break; + } + + m_delete_queue.push_back(delete_info); + m_retry_delete_queue.pop_front(); + } + + // schedule wake up for any future retries + schedule_retry_timer(); + + // start (concurrent) removal of images + m_async_op_tracker.start_op(); + auto ctx = new FunctionContext([this](int r) { + remove_images(); + m_async_op_tracker.finish_op(); + }); + m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +void ImageDeleter<I>::handle_trash_image(const std::string& image_id, + const utime_t& deferment_end_time) { + Mutex::Locker timer_locker(m_threads->timer_lock); + Mutex::Locker locker(m_lock); + + auto del_info = find_delete_info(image_id); + if (del_info != nullptr) { + dout(20) << "image " << image_id << " " + << "was already scheduled for deletion" << dendl; + return; + } + + dout(10) << "image_id=" << image_id << ", " + << "deferment_end_time=" << deferment_end_time << dendl; + + del_info.reset(new DeleteInfo(image_id)); + del_info->retry_time = deferment_end_time; + m_retry_delete_queue.push_back(del_info); + + schedule_retry_timer(); +} + +template <typename I> +void ImageDeleter<I>::notify_on_delete(const std::string& image_id, + int r) { + dout(10) << "image_id=" << image_id << ", r=" << r << dendl; + auto it = m_on_delete_contexts.find(image_id); + if (it == m_on_delete_contexts.end()) { + return; + } + + it->second->complete(r); + m_on_delete_contexts.erase(it); +} + +template <typename I> +void ImageDeleter<I>::DeleteInfo::print_status(Formatter *f, stringstream *ss, + bool print_failure_info) { + if (f) { + f->open_object_section("delete_info"); + f->dump_string("image_id", image_id); + if (print_failure_info) { + f->dump_string("error_code", cpp_strerror(error_code)); + f->dump_int("retries", retries); + } + f->close_section(); + f->flush(*ss); + } else { + *ss << *this; + } +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::ImageDeleter<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/ImageDeleter.h b/src/tools/rbd_mirror/ImageDeleter.h new file mode 100644 index 00000000..8a17eb38 --- /dev/null +++ b/src/tools/rbd_mirror/ImageDeleter.h @@ -0,0 +1,180 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2016 SUSE LINUX GmbH + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#ifndef CEPH_RBD_MIRROR_IMAGE_DELETER_H +#define CEPH_RBD_MIRROR_IMAGE_DELETER_H + +#include "include/utime.h" +#include "common/AsyncOpTracker.h" +#include "common/Mutex.h" +#include "tools/rbd_mirror/Types.h" +#include "tools/rbd_mirror/image_deleter/Types.h" +#include <atomic> +#include <deque> +#include <iosfwd> +#include <map> +#include <memory> +#include <vector> + +class AdminSocketHook; +class Context; +class ContextWQ; +class SafeTimer; +namespace librbd { struct ImageCtx; } + +namespace rbd { +namespace mirror { + +template <typename> class ServiceDaemon; +template <typename> class Threads; + +namespace image_deleter { template <typename> struct TrashWatcher; } + +/** + * Manage deletion of non-primary images. + */ +template <typename ImageCtxT = librbd::ImageCtx> +class ImageDeleter { +public: + static ImageDeleter* create(librados::IoCtx& local_io_ctx, + Threads<librbd::ImageCtx>* threads, + ServiceDaemon<librbd::ImageCtx>* service_daemon) { + return new ImageDeleter(local_io_ctx, threads, service_daemon); + } + + ImageDeleter(librados::IoCtx& local_io_ctx, + Threads<librbd::ImageCtx>* threads, + ServiceDaemon<librbd::ImageCtx>* service_daemon); + + ImageDeleter(const ImageDeleter&) = delete; + ImageDeleter& operator=(const ImageDeleter&) = delete; + + static void trash_move(librados::IoCtx& local_io_ctx, + const std::string& global_image_id, bool resync, + ContextWQ* work_queue, Context* on_finish); + + void init(Context* on_finish); + void shut_down(Context* on_finish); + + void print_status(Formatter *f, std::stringstream *ss); + + // for testing purposes + void wait_for_deletion(const std::string &image_id, + bool scheduled_only, Context* on_finish); + + std::vector<std::string> get_delete_queue_items(); + std::vector<std::pair<std::string, int> > get_failed_queue_items(); + + inline void set_busy_timer_interval(double interval) { + m_busy_interval = interval; + } + +private: + struct TrashListener : public image_deleter::TrashListener { + ImageDeleter *image_deleter; + + TrashListener(ImageDeleter *image_deleter) : image_deleter(image_deleter) { + } + + void handle_trash_image(const std::string& image_id, + const utime_t& deferment_end_time) override { + image_deleter->handle_trash_image(image_id, deferment_end_time); + } + }; + + struct DeleteInfo { + std::string image_id; + + image_deleter::ErrorResult error_result = {}; + int error_code = 0; + utime_t retry_time = {}; + int retries = 0; + + DeleteInfo(const std::string& image_id) + : image_id(image_id) { + } + + inline bool operator==(const DeleteInfo& delete_info) const { + return (image_id == delete_info.image_id); + } + + friend std::ostream& operator<<(std::ostream& os, DeleteInfo& delete_info) { + os << "[image_id=" << delete_info.image_id << "]"; + return os; + } + + void print_status(Formatter *f, std::stringstream *ss, + bool print_failure_info=false); + }; + typedef std::shared_ptr<DeleteInfo> DeleteInfoRef; + typedef std::deque<DeleteInfoRef> DeleteQueue; + typedef std::map<std::string, Context*> OnDeleteContexts; + + librados::IoCtx& m_local_io_ctx; + Threads<librbd::ImageCtx>* m_threads; + ServiceDaemon<librbd::ImageCtx>* m_service_daemon; + + image_deleter::TrashWatcher<ImageCtxT>* m_trash_watcher = nullptr; + TrashListener m_trash_listener; + + std::atomic<unsigned> m_running { 1 }; + + double m_busy_interval = 1; + + AsyncOpTracker m_async_op_tracker; + + Mutex m_lock; + DeleteQueue m_delete_queue; + DeleteQueue m_retry_delete_queue; + DeleteQueue m_in_flight_delete_queue; + + OnDeleteContexts m_on_delete_contexts; + + AdminSocketHook *m_asok_hook = nullptr; + + Context *m_timer_ctx = nullptr; + + bool process_image_delete(); + + void complete_active_delete(DeleteInfoRef* delete_info, int r); + void enqueue_failed_delete(DeleteInfoRef* delete_info, int error_code, + double retry_delay); + + DeleteInfoRef find_delete_info(const std::string &image_id); + + void remove_images(); + void remove_image(DeleteInfoRef delete_info); + void handle_remove_image(DeleteInfoRef delete_info, int r); + + void schedule_retry_timer(); + void cancel_retry_timer(); + void handle_retry_timer(); + + void handle_trash_image(const std::string& image_id, + const utime_t& deferment_end_time); + + void shut_down_trash_watcher(Context* on_finish); + void wait_for_ops(Context* on_finish); + void cancel_all_deletions(Context* on_finish); + + void notify_on_delete(const std::string& image_id, int r); + +}; + +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::ImageDeleter<librbd::ImageCtx>; + +#endif // CEPH_RBD_MIRROR_IMAGE_DELETER_H diff --git a/src/tools/rbd_mirror/ImageMap.cc b/src/tools/rbd_mirror/ImageMap.cc new file mode 100644 index 00000000..58fa5e03 --- /dev/null +++ b/src/tools/rbd_mirror/ImageMap.cc @@ -0,0 +1,601 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "common/debug.h" +#include "common/errno.h" +#include "common/Timer.h" +#include "common/WorkQueue.h" + +#include "librbd/Utils.h" +#include "tools/rbd_mirror/Threads.h" + +#include "ImageMap.h" +#include "image_map/LoadRequest.h" +#include "image_map/SimplePolicy.h" +#include "image_map/UpdateRequest.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::ImageMap: " << this << " " \ + << __func__ << ": " + +namespace rbd { +namespace mirror { + +using ::operator<<; +using image_map::Policy; + +using librbd::util::unique_lock_name; +using librbd::util::create_async_context_callback; + +template <typename I> +struct ImageMap<I>::C_NotifyInstance : public Context { + ImageMap* image_map; + std::string global_image_id; + bool acquire_release; + + C_NotifyInstance(ImageMap* image_map, const std::string& global_image_id, + bool acquire_release) + : image_map(image_map), global_image_id(global_image_id), + acquire_release(acquire_release) { + image_map->start_async_op(); + } + + void finish(int r) override { + if (acquire_release) { + image_map->handle_peer_ack(global_image_id, r); + } else { + image_map->handle_peer_ack_remove(global_image_id, r); + } + image_map->finish_async_op(); + } +}; + +template <typename I> +ImageMap<I>::ImageMap(librados::IoCtx &ioctx, Threads<I> *threads, + const std::string& instance_id, + image_map::Listener &listener) + : m_ioctx(ioctx), m_threads(threads), m_instance_id(instance_id), + m_listener(listener), + m_lock(unique_lock_name("rbd::mirror::ImageMap::m_lock", this)) { +} + +template <typename I> +ImageMap<I>::~ImageMap() { + ceph_assert(m_async_op_tracker.empty()); + ceph_assert(m_timer_task == nullptr); + ceph_assert(m_rebalance_task == nullptr); +} + +template <typename I> +void ImageMap<I>::continue_action(const std::set<std::string> &global_image_ids, + int r) { + dout(20) << dendl; + + { + Mutex::Locker locker(m_lock); + if (m_shutting_down) { + return; + } + + for (auto const &global_image_id : global_image_ids) { + bool schedule = m_policy->finish_action(global_image_id, r); + if (schedule) { + schedule_action(global_image_id); + } + } + } + + schedule_update_task(); +} + +template <typename I> +void ImageMap<I>::handle_update_request( + const Updates &updates, + const std::set<std::string> &remove_global_image_ids, int r) { + dout(20) << "r=" << r << dendl; + + std::set<std::string> global_image_ids; + + global_image_ids.insert(remove_global_image_ids.begin(), + remove_global_image_ids.end()); + for (auto const &update : updates) { + global_image_ids.insert(update.global_image_id); + } + + continue_action(global_image_ids, r); +} + +template <typename I> +void ImageMap<I>::update_image_mapping(Updates&& map_updates, + std::set<std::string>&& map_removals) { + if (map_updates.empty() && map_removals.empty()) { + return; + } + + dout(5) << "updates=[" << map_updates << "], " + << "removes=[" << map_removals << "]" << dendl; + + Context *on_finish = new FunctionContext( + [this, map_updates, map_removals](int r) { + handle_update_request(map_updates, map_removals, r); + finish_async_op(); + }); + on_finish = create_async_context_callback(m_threads->work_queue, on_finish); + + // empty meta policy for now.. + image_map::PolicyMetaNone policy_meta; + + bufferlist bl; + encode(image_map::PolicyData(policy_meta), bl); + + // prepare update map + std::map<std::string, cls::rbd::MirrorImageMap> update_mapping; + for (auto const &update : map_updates) { + update_mapping.emplace( + update.global_image_id, cls::rbd::MirrorImageMap(update.instance_id, + update.mapped_time, bl)); + } + + start_async_op(); + image_map::UpdateRequest<I> *req = image_map::UpdateRequest<I>::create( + m_ioctx, std::move(update_mapping), std::move(map_removals), on_finish); + req->send(); +} + +template <typename I> +void ImageMap<I>::process_updates() { + dout(20) << dendl; + + ceph_assert(m_threads->timer_lock.is_locked()); + ceph_assert(m_timer_task == nullptr); + + Updates map_updates; + std::set<std::string> map_removals; + Updates acquire_updates; + Updates release_updates; + + // gather updates by advancing the state machine + m_lock.Lock(); + for (auto const &global_image_id : m_global_image_ids) { + image_map::ActionType action_type = + m_policy->start_action(global_image_id); + image_map::LookupInfo info = m_policy->lookup(global_image_id); + + dout(15) << "global_image_id=" << global_image_id << ", " + << "action=" << action_type << ", " + << "instance=" << info.instance_id << dendl; + switch (action_type) { + case image_map::ACTION_TYPE_NONE: + continue; + case image_map::ACTION_TYPE_MAP_UPDATE: + ceph_assert(info.instance_id != image_map::UNMAPPED_INSTANCE_ID); + map_updates.emplace_back(global_image_id, info.instance_id, + info.mapped_time); + break; + case image_map::ACTION_TYPE_MAP_REMOVE: + map_removals.emplace(global_image_id); + break; + case image_map::ACTION_TYPE_ACQUIRE: + ceph_assert(info.instance_id != image_map::UNMAPPED_INSTANCE_ID); + acquire_updates.emplace_back(global_image_id, info.instance_id); + break; + case image_map::ACTION_TYPE_RELEASE: + ceph_assert(info.instance_id != image_map::UNMAPPED_INSTANCE_ID); + release_updates.emplace_back(global_image_id, info.instance_id); + break; + } + } + m_global_image_ids.clear(); + m_lock.Unlock(); + + // notify listener (acquire, release) and update on-disk map. note + // that its safe to process this outside m_lock as we still hold + // timer lock. + notify_listener_acquire_release_images(acquire_updates, release_updates); + update_image_mapping(std::move(map_updates), std::move(map_removals)); +} + +template <typename I> +void ImageMap<I>::schedule_update_task() { + Mutex::Locker timer_lock(m_threads->timer_lock); + schedule_update_task(m_threads->timer_lock); +} + +template <typename I> +void ImageMap<I>::schedule_update_task(const Mutex &timer_lock) { + ceph_assert(m_threads->timer_lock.is_locked()); + + schedule_rebalance_task(); + + if (m_timer_task != nullptr) { + return; + } + + { + Mutex::Locker locker(m_lock); + if (m_global_image_ids.empty()) { + return; + } + } + + m_timer_task = new FunctionContext([this](int r) { + ceph_assert(m_threads->timer_lock.is_locked()); + m_timer_task = nullptr; + + process_updates(); + }); + + CephContext *cct = reinterpret_cast<CephContext *>(m_ioctx.cct()); + double after = cct->_conf.get_val<double>("rbd_mirror_image_policy_update_throttle_interval"); + + dout(20) << "scheduling image check update (" << m_timer_task << ")" + << " after " << after << " second(s)" << dendl; + m_threads->timer->add_event_after(after, m_timer_task); +} + +template <typename I> +void ImageMap<I>::rebalance() { + ceph_assert(m_rebalance_task == nullptr); + + { + Mutex::Locker locker(m_lock); + if (m_async_op_tracker.empty() && m_global_image_ids.empty()){ + dout(20) << "starting rebalance" << dendl; + + std::set<std::string> remap_global_image_ids; + m_policy->add_instances({}, &remap_global_image_ids); + + for (auto const &global_image_id : remap_global_image_ids) { + schedule_action(global_image_id); + } + } + } + + schedule_update_task(m_threads->timer_lock); +} + +template <typename I> +void ImageMap<I>::schedule_rebalance_task() { + ceph_assert(m_threads->timer_lock.is_locked()); + + CephContext *cct = reinterpret_cast<CephContext *>(m_ioctx.cct()); + + // fetch the updated value of idle timeout for (re)scheduling + double resched_after = cct->_conf.get_val<double>( + "rbd_mirror_image_policy_rebalance_timeout"); + if (!resched_after) { + return; + } + + // cancel existing rebalance task if any before scheduling + if (m_rebalance_task != nullptr) { + m_threads->timer->cancel_event(m_rebalance_task); + } + + m_rebalance_task = new FunctionContext([this](int _) { + ceph_assert(m_threads->timer_lock.is_locked()); + m_rebalance_task = nullptr; + + rebalance(); + }); + + dout(20) << "scheduling rebalance (" << m_rebalance_task << ")" + << " after " << resched_after << " second(s)" << dendl; + m_threads->timer->add_event_after(resched_after, m_rebalance_task); +} + +template <typename I> +void ImageMap<I>::schedule_action(const std::string &global_image_id) { + dout(20) << "global_image_id=" << global_image_id << dendl; + ceph_assert(m_lock.is_locked()); + + m_global_image_ids.emplace(global_image_id); +} + +template <typename I> +void ImageMap<I>::notify_listener_acquire_release_images( + const Updates &acquire, const Updates &release) { + if (acquire.empty() && release.empty()) { + return; + } + + dout(5) << "acquire=[" << acquire << "], " + << "release=[" << release << "]" << dendl; + + for (auto const &update : acquire) { + m_listener.acquire_image( + update.global_image_id, update.instance_id, + create_async_context_callback( + m_threads->work_queue, + new C_NotifyInstance(this, update.global_image_id, true))); + } + + for (auto const &update : release) { + m_listener.release_image( + update.global_image_id, update.instance_id, + create_async_context_callback( + m_threads->work_queue, + new C_NotifyInstance(this, update.global_image_id, true))); + } +} + +template <typename I> +void ImageMap<I>::notify_listener_remove_images(const std::string &peer_uuid, + const Updates &remove) { + dout(5) << "peer_uuid=" << peer_uuid << ", " + << "remove=[" << remove << "]" << dendl; + + for (auto const &update : remove) { + m_listener.remove_image( + peer_uuid, update.global_image_id, update.instance_id, + create_async_context_callback( + m_threads->work_queue, + new C_NotifyInstance(this, update.global_image_id, false))); + } +} + +template <typename I> +void ImageMap<I>::handle_load(const std::map<std::string, + cls::rbd::MirrorImageMap> &image_mapping) { + dout(20) << dendl; + + { + Mutex::Locker locker(m_lock); + m_policy->init(image_mapping); + + for (auto& pair : image_mapping) { + schedule_action(pair.first); + } + } + schedule_update_task(); +} + +template <typename I> +void ImageMap<I>::handle_peer_ack_remove(const std::string &global_image_id, + int r) { + Mutex::Locker locker(m_lock); + dout(5) << "global_image_id=" << global_image_id << dendl; + + if (r < 0) { + derr << "failed to remove global_image_id=" << global_image_id << dendl; + } + + auto peer_it = m_peer_map.find(global_image_id); + if (peer_it == m_peer_map.end()) { + return; + } + + m_peer_map.erase(peer_it); +} + +template <typename I> +void ImageMap<I>::update_images_added( + const std::string &peer_uuid, + const std::set<std::string> &global_image_ids) { + dout(5) << "peer_uuid=" << peer_uuid << ", " + << "global_image_ids=[" << global_image_ids << "]" << dendl; + ceph_assert(m_lock.is_locked()); + + for (auto const &global_image_id : global_image_ids) { + auto result = m_peer_map[global_image_id].insert(peer_uuid); + if (result.second && m_peer_map[global_image_id].size() == 1) { + if (m_policy->add_image(global_image_id)) { + schedule_action(global_image_id); + } + } + } +} + +template <typename I> +void ImageMap<I>::update_images_removed( + const std::string &peer_uuid, + const std::set<std::string> &global_image_ids) { + dout(5) << "peer_uuid=" << peer_uuid << ", " + << "global_image_ids=[" << global_image_ids << "]" << dendl; + ceph_assert(m_lock.is_locked()); + + Updates to_remove; + for (auto const &global_image_id : global_image_ids) { + image_map::LookupInfo info = m_policy->lookup(global_image_id); + bool image_mapped = (info.instance_id != image_map::UNMAPPED_INSTANCE_ID); + + bool image_removed = image_mapped; + bool peer_removed = false; + auto peer_it = m_peer_map.find(global_image_id); + if (peer_it != m_peer_map.end()) { + auto& peer_set = peer_it->second; + peer_removed = peer_set.erase(peer_uuid); + image_removed = peer_removed && peer_set.empty(); + } + + if (image_mapped && peer_removed && !peer_uuid.empty()) { + // peer image has been deleted + to_remove.emplace_back(global_image_id, info.instance_id); + } + + if (image_mapped && image_removed) { + // local and peer images have been deleted + if (m_policy->remove_image(global_image_id)) { + schedule_action(global_image_id); + } + } + } + + if (!to_remove.empty()) { + // removal notification will be notified instantly. this is safe + // even after scheduling action for images as we still hold m_lock + notify_listener_remove_images(peer_uuid, to_remove); + } +} + +template <typename I> +void ImageMap<I>::update_instances_added( + const std::vector<std::string> &instance_ids) { + { + Mutex::Locker locker(m_lock); + if (m_shutting_down) { + return; + } + + std::vector<std::string> filtered_instance_ids; + filter_instance_ids(instance_ids, &filtered_instance_ids, false); + if (filtered_instance_ids.empty()) { + return; + } + + dout(20) << "instance_ids=" << filtered_instance_ids << dendl; + + std::set<std::string> remap_global_image_ids; + m_policy->add_instances(filtered_instance_ids, &remap_global_image_ids); + + for (auto const &global_image_id : remap_global_image_ids) { + schedule_action(global_image_id); + } + } + + schedule_update_task(); +} + +template <typename I> +void ImageMap<I>::update_instances_removed( + const std::vector<std::string> &instance_ids) { + { + Mutex::Locker locker(m_lock); + if (m_shutting_down) { + return; + } + + std::vector<std::string> filtered_instance_ids; + filter_instance_ids(instance_ids, &filtered_instance_ids, true); + if (filtered_instance_ids.empty()) { + return; + } + + dout(20) << "instance_ids=" << filtered_instance_ids << dendl; + + std::set<std::string> remap_global_image_ids; + m_policy->remove_instances(filtered_instance_ids, &remap_global_image_ids); + + for (auto const &global_image_id : remap_global_image_ids) { + schedule_action(global_image_id); + } + } + + schedule_update_task(); +} + +template <typename I> +void ImageMap<I>::update_images(const std::string &peer_uuid, + std::set<std::string> &&added_global_image_ids, + std::set<std::string> &&removed_global_image_ids) { + dout(5) << "peer_uuid=" << peer_uuid << ", " << "added_count=" + << added_global_image_ids.size() << ", " << "removed_count=" + << removed_global_image_ids.size() << dendl; + + { + Mutex::Locker locker(m_lock); + if (m_shutting_down) { + return; + } + + if (!removed_global_image_ids.empty()) { + update_images_removed(peer_uuid, removed_global_image_ids); + } + if (!added_global_image_ids.empty()) { + update_images_added(peer_uuid, added_global_image_ids); + } + } + + schedule_update_task(); +} + +template <typename I> +void ImageMap<I>::handle_peer_ack(const std::string &global_image_id, int r) { + dout (20) << "global_image_id=" << global_image_id << ", r=" << r + << dendl; + + continue_action({global_image_id}, r); +} + +template <typename I> +void ImageMap<I>::init(Context *on_finish) { + dout(20) << dendl; + + CephContext *cct = reinterpret_cast<CephContext *>(m_ioctx.cct()); + std::string policy_type = cct->_conf.get_val<string>("rbd_mirror_image_policy_type"); + + if (policy_type == "none" || policy_type == "simple") { + m_policy.reset(image_map::SimplePolicy::create(m_ioctx)); + } else { + ceph_abort(); // not really needed as such, but catch it. + } + + dout(20) << "mapping policy=" << policy_type << dendl; + + start_async_op(); + C_LoadMap *ctx = new C_LoadMap(this, on_finish); + image_map::LoadRequest<I> *req = image_map::LoadRequest<I>::create( + m_ioctx, &ctx->image_mapping, ctx); + req->send(); +} + +template <typename I> +void ImageMap<I>::shut_down(Context *on_finish) { + dout(20) << dendl; + + { + Mutex::Locker timer_lock(m_threads->timer_lock); + + { + Mutex::Locker locker(m_lock); + ceph_assert(!m_shutting_down); + + m_shutting_down = true; + m_policy.reset(); + } + + if (m_timer_task != nullptr) { + m_threads->timer->cancel_event(m_timer_task); + m_timer_task = nullptr; + } + if (m_rebalance_task != nullptr) { + m_threads->timer->cancel_event(m_rebalance_task); + m_rebalance_task = nullptr; + } + } + + wait_for_async_ops(on_finish); +} + +template <typename I> +void ImageMap<I>::filter_instance_ids( + const std::vector<std::string> &instance_ids, + std::vector<std::string> *filtered_instance_ids, bool removal) const { + CephContext *cct = reinterpret_cast<CephContext *>(m_ioctx.cct()); + std::string policy_type = cct->_conf.get_val<string>("rbd_mirror_image_policy_type"); + + if (policy_type != "none") { + *filtered_instance_ids = instance_ids; + return; + } + + if (removal) { + // propagate removals for external instances + for (auto& instance_id : instance_ids) { + if (instance_id != m_instance_id) { + filtered_instance_ids->push_back(instance_id); + } + } + } else if (std::find(instance_ids.begin(), instance_ids.end(), + m_instance_id) != instance_ids.end()) { + // propagate addition only for local instance + filtered_instance_ids->push_back(m_instance_id); + } +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::ImageMap<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/ImageMap.h b/src/tools/rbd_mirror/ImageMap.h new file mode 100644 index 00000000..283f55db --- /dev/null +++ b/src/tools/rbd_mirror/ImageMap.h @@ -0,0 +1,175 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_MAP_H +#define CEPH_RBD_MIRROR_IMAGE_MAP_H + +#include <vector> + +#include "common/Mutex.h" +#include "include/Context.h" +#include "common/AsyncOpTracker.h" +#include "cls/rbd/cls_rbd_types.h" +#include "include/rados/librados.hpp" + +#include "image_map/Policy.h" +#include "image_map/Types.h" + +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { + +template <typename> struct Threads; + +template <typename ImageCtxT = librbd::ImageCtx> +class ImageMap { +public: + static ImageMap *create(librados::IoCtx &ioctx, Threads<ImageCtxT> *threads, + const std::string& instance_id, + image_map::Listener &listener) { + return new ImageMap(ioctx, threads, instance_id, listener); + } + + ~ImageMap(); + + // init (load) the instance map from disk + void init(Context *on_finish); + + // shut down map operations + void shut_down(Context *on_finish); + + // update (add/remove) images + void update_images(const std::string &peer_uuid, + std::set<std::string> &&added_global_image_ids, + std::set<std::string> &&removed_global_image_ids); + + // add/remove instances + void update_instances_added(const std::vector<std::string> &instances); + void update_instances_removed(const std::vector<std::string> &instances); + +private: + struct C_NotifyInstance; + + ImageMap(librados::IoCtx &ioctx, Threads<ImageCtxT> *threads, + const std::string& instance_id, image_map::Listener &listener); + + struct Update { + std::string global_image_id; + std::string instance_id; + utime_t mapped_time; + + Update(const std::string &global_image_id, const std::string &instance_id, + utime_t mapped_time) + : global_image_id(global_image_id), + instance_id(instance_id), + mapped_time(mapped_time) { + } + Update(const std::string &global_image_id, const std::string &instance_id) + : Update(global_image_id, instance_id, ceph_clock_now()) { + } + + friend std::ostream& operator<<(std::ostream& os, + const Update& update) { + os << "{global_image_id=" << update.global_image_id << ", " + << "instance_id=" << update.instance_id << "}"; + return os; + } + + }; + typedef std::list<Update> Updates; + + // Lock ordering: m_threads->timer_lock, m_lock + + librados::IoCtx &m_ioctx; + Threads<ImageCtxT> *m_threads; + std::string m_instance_id; + image_map::Listener &m_listener; + + std::unique_ptr<image_map::Policy> m_policy; // our mapping policy + + Context *m_timer_task = nullptr; + Mutex m_lock; + bool m_shutting_down = false; + AsyncOpTracker m_async_op_tracker; + + // global_image_id -> registered peers ("" == local, remote otherwise) + std::map<std::string, std::set<std::string> > m_peer_map; + + std::set<std::string> m_global_image_ids; + + Context *m_rebalance_task = nullptr; + + struct C_LoadMap : Context { + ImageMap *image_map; + Context *on_finish; + + std::map<std::string, cls::rbd::MirrorImageMap> image_mapping; + + C_LoadMap(ImageMap *image_map, Context *on_finish) + : image_map(image_map), + on_finish(on_finish) { + } + + void finish(int r) override { + if (r == 0) { + image_map->handle_load(image_mapping); + } + + image_map->finish_async_op(); + on_finish->complete(r); + } + }; + + // async op-tracker helper routines + void start_async_op() { + m_async_op_tracker.start_op(); + } + void finish_async_op() { + m_async_op_tracker.finish_op(); + } + void wait_for_async_ops(Context *on_finish) { + m_async_op_tracker.wait_for_ops(on_finish); + } + + void handle_peer_ack(const std::string &global_image_id, int r); + void handle_peer_ack_remove(const std::string &global_image_id, int r); + + void handle_load(const std::map<std::string, cls::rbd::MirrorImageMap> &image_mapping); + void handle_update_request(const Updates &updates, + const std::set<std::string> &remove_global_image_ids, int r); + + // continue (retry or resume depending on state machine) processing + // current action. + void continue_action(const std::set<std::string> &global_image_ids, int r); + + // schedule an image for update + void schedule_action(const std::string &global_image_id); + + void schedule_update_task(); + void schedule_update_task(const Mutex &timer_lock); + void process_updates(); + void update_image_mapping(Updates&& map_updates, + std::set<std::string>&& map_removals); + + void rebalance(); + void schedule_rebalance_task(); + + void notify_listener_acquire_release_images(const Updates &acquire, const Updates &release); + void notify_listener_remove_images(const std::string &peer_uuid, const Updates &remove); + + void update_images_added(const std::string &peer_uuid, + const std::set<std::string> &global_image_ids); + void update_images_removed(const std::string &peer_uuid, + const std::set<std::string> &global_image_ids); + + void filter_instance_ids(const std::vector<std::string> &instance_ids, + std::vector<std::string> *filtered_instance_ids, + bool removal) const; + +}; + +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_IMAGE_MAP_H diff --git a/src/tools/rbd_mirror/ImageReplayer.cc b/src/tools/rbd_mirror/ImageReplayer.cc new file mode 100644 index 00000000..6c6ee2d5 --- /dev/null +++ b/src/tools/rbd_mirror/ImageReplayer.cc @@ -0,0 +1,1896 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "include/compat.h" +#include "common/Formatter.h" +#include "common/admin_socket.h" +#include "common/debug.h" +#include "common/errno.h" +#include "include/stringify.h" +#include "cls/rbd/cls_rbd_client.h" +#include "common/Timer.h" +#include "common/WorkQueue.h" +#include "global/global_context.h" +#include "journal/Journaler.h" +#include "journal/ReplayHandler.h" +#include "journal/Settings.h" +#include "librbd/ExclusiveLock.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/Journal.h" +#include "librbd/Operations.h" +#include "librbd/Utils.h" +#include "librbd/journal/Replay.h" +#include "ImageDeleter.h" +#include "ImageReplayer.h" +#include "Threads.h" +#include "tools/rbd_mirror/image_replayer/BootstrapRequest.h" +#include "tools/rbd_mirror/image_replayer/CloseImageRequest.h" +#include "tools/rbd_mirror/image_replayer/EventPreprocessor.h" +#include "tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.h" +#include "tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.h" +#include "tools/rbd_mirror/image_replayer/ReplayStatusFormatter.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::" << *this << " " \ + << __func__ << ": " + +using std::map; +using std::string; +using std::unique_ptr; +using std::shared_ptr; +using std::vector; + +extern PerfCounters *g_perf_counters; + +namespace rbd { +namespace mirror { + +using librbd::util::create_context_callback; +using librbd::util::create_rados_callback; +using namespace rbd::mirror::image_replayer; + +template <typename I> +std::ostream &operator<<(std::ostream &os, + const typename ImageReplayer<I>::State &state); + +namespace { + +template <typename I> +struct ReplayHandler : public ::journal::ReplayHandler { + ImageReplayer<I> *replayer; + ReplayHandler(ImageReplayer<I> *replayer) : replayer(replayer) {} + void get() override {} + void put() override {} + + void handle_entries_available() override { + replayer->handle_replay_ready(); + } + void handle_complete(int r) override { + std::stringstream ss; + if (r < 0) { + ss << "replay completed with error: " << cpp_strerror(r); + } + replayer->handle_replay_complete(r, ss.str()); + } +}; + +template <typename I> +class ImageReplayerAdminSocketCommand { +public: + ImageReplayerAdminSocketCommand(const std::string &desc, + ImageReplayer<I> *replayer) + : desc(desc), replayer(replayer) { + } + virtual ~ImageReplayerAdminSocketCommand() {} + virtual bool call(Formatter *f, stringstream *ss) = 0; + + std::string desc; + ImageReplayer<I> *replayer; + bool registered = false; +}; + +template <typename I> +class StatusCommand : public ImageReplayerAdminSocketCommand<I> { +public: + explicit StatusCommand(const std::string &desc, ImageReplayer<I> *replayer) + : ImageReplayerAdminSocketCommand<I>(desc, replayer) { + } + + bool call(Formatter *f, stringstream *ss) override { + this->replayer->print_status(f, ss); + return true; + } +}; + +template <typename I> +class StartCommand : public ImageReplayerAdminSocketCommand<I> { +public: + explicit StartCommand(const std::string &desc, ImageReplayer<I> *replayer) + : ImageReplayerAdminSocketCommand<I>(desc, replayer) { + } + + bool call(Formatter *f, stringstream *ss) override { + this->replayer->start(nullptr, true); + return true; + } +}; + +template <typename I> +class StopCommand : public ImageReplayerAdminSocketCommand<I> { +public: + explicit StopCommand(const std::string &desc, ImageReplayer<I> *replayer) + : ImageReplayerAdminSocketCommand<I>(desc, replayer) { + } + + bool call(Formatter *f, stringstream *ss) override { + this->replayer->stop(nullptr, true); + return true; + } +}; + +template <typename I> +class RestartCommand : public ImageReplayerAdminSocketCommand<I> { +public: + explicit RestartCommand(const std::string &desc, ImageReplayer<I> *replayer) + : ImageReplayerAdminSocketCommand<I>(desc, replayer) { + } + + bool call(Formatter *f, stringstream *ss) override { + this->replayer->restart(); + return true; + } +}; + +template <typename I> +class FlushCommand : public ImageReplayerAdminSocketCommand<I> { +public: + explicit FlushCommand(const std::string &desc, ImageReplayer<I> *replayer) + : ImageReplayerAdminSocketCommand<I>(desc, replayer) { + } + + bool call(Formatter *f, stringstream *ss) override { + this->replayer->flush(); + return true; + } +}; + +template <typename I> +class ImageReplayerAdminSocketHook : public AdminSocketHook { +public: + ImageReplayerAdminSocketHook(CephContext *cct, const std::string &name, + ImageReplayer<I> *replayer) + : admin_socket(cct->get_admin_socket()), + commands{{"rbd mirror flush " + name, + new FlushCommand<I>("flush rbd mirror " + name, replayer)}, + {"rbd mirror restart " + name, + new RestartCommand<I>("restart rbd mirror " + name, replayer)}, + {"rbd mirror start " + name, + new StartCommand<I>("start rbd mirror " + name, replayer)}, + {"rbd mirror status " + name, + new StatusCommand<I>("get status for rbd mirror " + name, replayer)}, + {"rbd mirror stop " + name, + new StopCommand<I>("stop rbd mirror " + name, replayer)}} { + } + + int register_commands() { + for (auto &it : commands) { + int r = admin_socket->register_command(it.first, it.first, this, + it.second->desc); + if (r < 0) { + return r; + } + it.second->registered = true; + } + return 0; + } + + ~ImageReplayerAdminSocketHook() override { + for (auto &it : commands) { + if (it.second->registered) { + admin_socket->unregister_command(it.first); + } + delete it.second; + } + commands.clear(); + } + + bool call(std::string_view command, const cmdmap_t& cmdmap, + std::string_view format, bufferlist& out) override { + auto i = commands.find(command); + ceph_assert(i != commands.end()); + Formatter *f = Formatter::create(format); + stringstream ss; + bool r = i->second->call(f, &ss); + delete f; + out.append(ss); + return r; + } + +private: + typedef std::map<std::string, ImageReplayerAdminSocketCommand<I>*, + std::less<>> Commands; + + AdminSocket *admin_socket; + Commands commands; +}; + +uint32_t calculate_replay_delay(const utime_t &event_time, + int mirroring_replay_delay) { + if (mirroring_replay_delay <= 0) { + return 0; + } + + utime_t now = ceph_clock_now(); + if (event_time + mirroring_replay_delay <= now) { + return 0; + } + + // ensure it is rounded up when converting to integer + return (event_time + mirroring_replay_delay - now) + 1; +} + +} // anonymous namespace + +template <typename I> +void ImageReplayer<I>::BootstrapProgressContext::update_progress( + const std::string &description, bool flush) +{ + const std::string desc = "bootstrapping, " + description; + replayer->set_state_description(0, desc); + if (flush) { + replayer->update_mirror_image_status(false, boost::none); + } +} + +template <typename I> +void ImageReplayer<I>::RemoteJournalerListener::handle_update( + ::journal::JournalMetadata *) { + FunctionContext *ctx = new FunctionContext([this](int r) { + replayer->handle_remote_journal_metadata_updated(); + }); + replayer->m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +ImageReplayer<I>::ImageReplayer(Threads<I> *threads, + InstanceWatcher<I> *instance_watcher, + RadosRef local, + const std::string &local_mirror_uuid, + int64_t local_pool_id, + const std::string &global_image_id) : + m_threads(threads), + m_instance_watcher(instance_watcher), + m_local(local), + m_local_mirror_uuid(local_mirror_uuid), + m_local_pool_id(local_pool_id), + m_global_image_id(global_image_id), m_local_image_name(global_image_id), + m_lock("rbd::mirror::ImageReplayer " + stringify(local_pool_id) + " " + + global_image_id), + m_progress_cxt(this), + m_journal_listener(new JournalListener(this)), + m_remote_listener(this) +{ + // Register asok commands using a temporary "remote_pool_name/global_image_id" + // name. When the image name becomes known on start the asok commands will be + // re-registered using "remote_pool_name/remote_image_name" name. + + std::string pool_name; + int r = m_local->pool_reverse_lookup(m_local_pool_id, &pool_name); + if (r < 0) { + derr << "error resolving local pool " << m_local_pool_id + << ": " << cpp_strerror(r) << dendl; + pool_name = stringify(m_local_pool_id); + } + + m_name = pool_name + "/" + m_global_image_id; + register_admin_socket_hook(); +} + +template <typename I> +ImageReplayer<I>::~ImageReplayer() +{ + unregister_admin_socket_hook(); + ceph_assert(m_event_preprocessor == nullptr); + ceph_assert(m_replay_status_formatter == nullptr); + ceph_assert(m_local_image_ctx == nullptr); + ceph_assert(m_local_replay == nullptr); + ceph_assert(m_remote_journaler == nullptr); + ceph_assert(m_replay_handler == nullptr); + ceph_assert(m_on_start_finish == nullptr); + ceph_assert(m_on_stop_finish == nullptr); + ceph_assert(m_bootstrap_request == nullptr); + ceph_assert(m_in_flight_status_updates == 0); + + delete m_journal_listener; +} + +template <typename I> +image_replayer::HealthState ImageReplayer<I>::get_health_state() const { + Mutex::Locker locker(m_lock); + + if (!m_mirror_image_status_state) { + return image_replayer::HEALTH_STATE_OK; + } else if (*m_mirror_image_status_state == + cls::rbd::MIRROR_IMAGE_STATUS_STATE_SYNCING || + *m_mirror_image_status_state == + cls::rbd::MIRROR_IMAGE_STATUS_STATE_UNKNOWN) { + return image_replayer::HEALTH_STATE_WARNING; + } + return image_replayer::HEALTH_STATE_ERROR; +} + +template <typename I> +void ImageReplayer<I>::add_peer(const std::string &peer_uuid, + librados::IoCtx &io_ctx) { + Mutex::Locker locker(m_lock); + auto it = m_peers.find({peer_uuid}); + if (it == m_peers.end()) { + m_peers.insert({peer_uuid, io_ctx}); + } +} + +template <typename I> +void ImageReplayer<I>::set_state_description(int r, const std::string &desc) { + dout(10) << r << " " << desc << dendl; + + Mutex::Locker l(m_lock); + m_last_r = r; + m_state_desc = desc; +} + +template <typename I> +void ImageReplayer<I>::start(Context *on_finish, bool manual) +{ + dout(10) << "on_finish=" << on_finish << dendl; + + int r = 0; + { + Mutex::Locker locker(m_lock); + if (!is_stopped_()) { + derr << "already running" << dendl; + r = -EINVAL; + } else if (m_manual_stop && !manual) { + dout(5) << "stopped manually, ignoring start without manual flag" + << dendl; + r = -EPERM; + } else { + m_state = STATE_STARTING; + m_last_r = 0; + m_state_desc.clear(); + m_manual_stop = false; + m_delete_requested = false; + + if (on_finish != nullptr) { + ceph_assert(m_on_start_finish == nullptr); + m_on_start_finish = on_finish; + } + ceph_assert(m_on_stop_finish == nullptr); + } + } + + if (r < 0) { + if (on_finish) { + on_finish->complete(r); + } + return; + } + + m_local_ioctx.reset(new librados::IoCtx{}); + r = m_local->ioctx_create2(m_local_pool_id, *m_local_ioctx); + if (r < 0) { + m_local_ioctx.reset(); + + derr << "error opening ioctx for local pool " << m_local_pool_id + << ": " << cpp_strerror(r) << dendl; + on_start_fail(r, "error opening local pool"); + return; + } + + prepare_local_image(); +} + +template <typename I> +void ImageReplayer<I>::prepare_local_image() { + dout(10) << dendl; + + m_local_image_id = ""; + Context *ctx = create_context_callback< + ImageReplayer, &ImageReplayer<I>::handle_prepare_local_image>(this); + auto req = PrepareLocalImageRequest<I>::create( + *m_local_ioctx, m_global_image_id, &m_local_image_id, &m_local_image_name, + &m_local_image_tag_owner, m_threads->work_queue, ctx); + req->send(); +} + +template <typename I> +void ImageReplayer<I>::handle_prepare_local_image(int r) { + dout(10) << "r=" << r << dendl; + + if (r == -ENOENT) { + dout(10) << "local image does not exist" << dendl; + } else if (r < 0) { + on_start_fail(r, "error preparing local image for replay"); + return; + } else { + reregister_admin_socket_hook(); + } + + // local image doesn't exist or is non-primary + prepare_remote_image(); +} + +template <typename I> +void ImageReplayer<I>::prepare_remote_image() { + dout(10) << dendl; + if (m_peers.empty()) { + // technically nothing to bootstrap, but it handles the status update + bootstrap(); + return; + } + + // TODO need to support multiple remote images + ceph_assert(!m_peers.empty()); + m_remote_image = {*m_peers.begin()}; + + auto cct = static_cast<CephContext *>(m_local->cct()); + journal::Settings journal_settings; + journal_settings.commit_interval = cct->_conf.get_val<double>( + "rbd_mirror_journal_commit_age"); + journal_settings.max_fetch_bytes = cct->_conf.get_val<Option::size_t>( + "rbd_mirror_journal_max_fetch_bytes"); + + Context *ctx = create_context_callback< + ImageReplayer, &ImageReplayer<I>::handle_prepare_remote_image>(this); + auto req = PrepareRemoteImageRequest<I>::create( + m_threads, m_remote_image.io_ctx, m_global_image_id, m_local_mirror_uuid, + m_local_image_id, journal_settings, &m_remote_image.mirror_uuid, + &m_remote_image.image_id, &m_remote_journaler, &m_client_state, + &m_client_meta, ctx); + req->send(); +} + +template <typename I> +void ImageReplayer<I>::handle_prepare_remote_image(int r) { + dout(10) << "r=" << r << dendl; + + ceph_assert(r < 0 ? m_remote_journaler == nullptr : m_remote_journaler != nullptr); + if (r < 0 && !m_local_image_id.empty() && + m_local_image_tag_owner == librbd::Journal<>::LOCAL_MIRROR_UUID) { + // local image is primary -- fall-through + } else if (r == -ENOENT) { + dout(10) << "remote image does not exist" << dendl; + + // TODO need to support multiple remote images + if (m_remote_image.image_id.empty() && !m_local_image_id.empty() && + m_local_image_tag_owner == m_remote_image.mirror_uuid) { + // local image exists and is non-primary and linked to the missing + // remote image + + m_delete_requested = true; + on_start_fail(0, "remote image no longer exists"); + } else { + on_start_fail(-ENOENT, "remote image does not exist"); + } + return; + } else if (r < 0) { + on_start_fail(r, "error retrieving remote image id"); + return; + } + + bootstrap(); +} + +template <typename I> +void ImageReplayer<I>::bootstrap() { + dout(10) << dendl; + + if (!m_local_image_id.empty() && + m_local_image_tag_owner == librbd::Journal<>::LOCAL_MIRROR_UUID) { + dout(5) << "local image is primary" << dendl; + on_start_fail(0, "local image is primary"); + return; + } else if (m_peers.empty()) { + dout(5) << "no peer clusters" << dendl; + on_start_fail(-ENOENT, "no peer clusters"); + return; + } + + BootstrapRequest<I> *request = nullptr; + { + Mutex::Locker locker(m_lock); + if (on_start_interrupted(m_lock)) { + return; + } + + auto ctx = create_context_callback< + ImageReplayer, &ImageReplayer<I>::handle_bootstrap>(this); + request = BootstrapRequest<I>::create( + m_threads, *m_local_ioctx, m_remote_image.io_ctx, m_instance_watcher, + &m_local_image_ctx, m_local_image_id, m_remote_image.image_id, + m_global_image_id, m_local_mirror_uuid, m_remote_image.mirror_uuid, + m_remote_journaler, &m_client_state, &m_client_meta, ctx, + &m_resync_requested, &m_progress_cxt); + request->get(); + m_bootstrap_request = request; + } + + update_mirror_image_status(false, boost::none); + reschedule_update_status_task(10); + + request->send(); +} + +template <typename I> +void ImageReplayer<I>::handle_bootstrap(int r) { + dout(10) << "r=" << r << dendl; + { + Mutex::Locker locker(m_lock); + m_bootstrap_request->put(); + m_bootstrap_request = nullptr; + if (m_local_image_ctx) { + m_local_image_id = m_local_image_ctx->id; + } + } + + if (on_start_interrupted()) { + return; + } else if (r == -EREMOTEIO) { + m_local_image_tag_owner = ""; + dout(5) << "remote image is non-primary" << dendl; + on_start_fail(-EREMOTEIO, "remote image is non-primary"); + return; + } else if (r == -EEXIST) { + m_local_image_tag_owner = ""; + on_start_fail(r, "split-brain detected"); + return; + } else if (r < 0) { + on_start_fail(r, "error bootstrapping replay"); + return; + } else if (m_resync_requested) { + on_start_fail(0, "resync requested"); + return; + } + + ceph_assert(m_local_journal == nullptr); + { + RWLock::RLocker snap_locker(m_local_image_ctx->snap_lock); + if (m_local_image_ctx->journal != nullptr) { + m_local_journal = m_local_image_ctx->journal; + m_local_journal->add_listener(m_journal_listener); + } + } + + if (m_local_journal == nullptr) { + on_start_fail(-EINVAL, "error accessing local journal"); + return; + } + + update_mirror_image_status(false, boost::none); + init_remote_journaler(); +} + +template <typename I> +void ImageReplayer<I>::init_remote_journaler() { + dout(10) << dendl; + + Context *ctx = create_context_callback< + ImageReplayer, &ImageReplayer<I>::handle_init_remote_journaler>(this); + m_remote_journaler->init(ctx); +} + +template <typename I> +void ImageReplayer<I>::handle_init_remote_journaler(int r) { + dout(10) << "r=" << r << dendl; + + if (on_start_interrupted()) { + return; + } else if (r < 0) { + derr << "failed to initialize remote journal: " << cpp_strerror(r) << dendl; + on_start_fail(r, "error initializing remote journal"); + return; + } + + m_remote_journaler->add_listener(&m_remote_listener); + + cls::journal::Client client; + r = m_remote_journaler->get_cached_client(m_local_mirror_uuid, &client); + if (r < 0) { + derr << "error retrieving remote journal client: " << cpp_strerror(r) + << dendl; + on_start_fail(r, "error retrieving remote journal client"); + return; + } + + dout(5) << "image_id=" << m_local_image_id << ", " + << "client_meta.image_id=" << m_client_meta.image_id << ", " + << "client.state=" << client.state << dendl; + if (m_client_meta.image_id == m_local_image_id && + client.state != cls::journal::CLIENT_STATE_CONNECTED) { + dout(5) << "client flagged disconnected, stopping image replay" << dendl; + if (m_local_image_ctx->config.template get_val<bool>("rbd_mirroring_resync_after_disconnect")) { + m_resync_requested = true; + on_start_fail(-ENOTCONN, "disconnected: automatic resync"); + } else { + on_start_fail(-ENOTCONN, "disconnected"); + } + return; + } + + start_replay(); +} + +template <typename I> +void ImageReplayer<I>::start_replay() { + dout(10) << dendl; + + Context *start_ctx = create_context_callback< + ImageReplayer, &ImageReplayer<I>::handle_start_replay>(this); + m_local_journal->start_external_replay(&m_local_replay, start_ctx); +} + +template <typename I> +void ImageReplayer<I>::handle_start_replay(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + ceph_assert(m_local_replay == nullptr); + derr << "error starting external replay on local image " + << m_local_image_id << ": " << cpp_strerror(r) << dendl; + on_start_fail(r, "error starting replay on local image"); + return; + } + + m_replay_status_formatter = + ReplayStatusFormatter<I>::create(m_remote_journaler, m_local_mirror_uuid); + + Context *on_finish(nullptr); + { + Mutex::Locker locker(m_lock); + ceph_assert(m_state == STATE_STARTING); + m_state = STATE_REPLAYING; + std::swap(m_on_start_finish, on_finish); + } + + m_event_preprocessor = EventPreprocessor<I>::create( + *m_local_image_ctx, *m_remote_journaler, m_local_mirror_uuid, + &m_client_meta, m_threads->work_queue); + + update_mirror_image_status(true, boost::none); + reschedule_update_status_task(30); + + if (on_replay_interrupted()) { + return; + } + + { + CephContext *cct = static_cast<CephContext *>(m_local->cct()); + double poll_seconds = cct->_conf.get_val<double>( + "rbd_mirror_journal_poll_age"); + + Mutex::Locker locker(m_lock); + m_replay_handler = new ReplayHandler<I>(this); + m_remote_journaler->start_live_replay(m_replay_handler, poll_seconds); + + dout(10) << "m_remote_journaler=" << *m_remote_journaler << dendl; + } + + dout(10) << "start succeeded" << dendl; + if (on_finish != nullptr) { + dout(10) << "on finish complete, r=" << r << dendl; + on_finish->complete(r); + } +} + +template <typename I> +void ImageReplayer<I>::on_start_fail(int r, const std::string &desc) +{ + dout(10) << "r=" << r << dendl; + Context *ctx = new FunctionContext([this, r, desc](int _r) { + { + Mutex::Locker locker(m_lock); + ceph_assert(m_state == STATE_STARTING); + m_state = STATE_STOPPING; + if (r < 0 && r != -ECANCELED && r != -EREMOTEIO && r != -ENOENT) { + derr << "start failed: " << cpp_strerror(r) << dendl; + } else { + dout(10) << "start canceled" << dendl; + } + } + + set_state_description(r, desc); + if (m_local_ioctx) { + update_mirror_image_status(false, boost::none); + } + reschedule_update_status_task(-1); + shut_down(r); + }); + m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +bool ImageReplayer<I>::on_start_interrupted() { + Mutex::Locker locker(m_lock); + return on_start_interrupted(m_lock); +} + +template <typename I> +bool ImageReplayer<I>::on_start_interrupted(Mutex& lock) { + ceph_assert(m_lock.is_locked()); + ceph_assert(m_state == STATE_STARTING); + if (!m_stop_requested) { + return false; + } + + on_start_fail(-ECANCELED, ""); + return true; +} + +template <typename I> +void ImageReplayer<I>::stop(Context *on_finish, bool manual, int r, + const std::string& desc) +{ + dout(10) << "on_finish=" << on_finish << ", manual=" << manual + << ", desc=" << desc << dendl; + + image_replayer::BootstrapRequest<I> *bootstrap_request = nullptr; + bool shut_down_replay = false; + bool running = true; + { + Mutex::Locker locker(m_lock); + + if (!is_running_()) { + running = false; + } else { + if (!is_stopped_()) { + if (m_state == STATE_STARTING) { + dout(10) << "canceling start" << dendl; + if (m_bootstrap_request != nullptr) { + bootstrap_request = m_bootstrap_request; + bootstrap_request->get(); + } + } else { + dout(10) << "interrupting replay" << dendl; + shut_down_replay = true; + } + + ceph_assert(m_on_stop_finish == nullptr); + std::swap(m_on_stop_finish, on_finish); + m_stop_requested = true; + m_manual_stop = manual; + } + } + } + + // avoid holding lock since bootstrap request will update status + if (bootstrap_request != nullptr) { + dout(10) << "canceling bootstrap" << dendl; + bootstrap_request->cancel(); + bootstrap_request->put(); + } + + if (!running) { + dout(20) << "not running" << dendl; + if (on_finish) { + on_finish->complete(-EINVAL); + } + return; + } + + if (shut_down_replay) { + on_stop_journal_replay(r, desc); + } else if (on_finish != nullptr) { + on_finish->complete(0); + } +} + +template <typename I> +void ImageReplayer<I>::on_stop_journal_replay(int r, const std::string &desc) +{ + dout(10) << dendl; + + { + Mutex::Locker locker(m_lock); + if (m_state != STATE_REPLAYING) { + // might be invoked multiple times while stopping + return; + } + m_stop_requested = true; + m_state = STATE_STOPPING; + } + + set_state_description(r, desc); + update_mirror_image_status(true, boost::none); + reschedule_update_status_task(-1); + shut_down(0); +} + +template <typename I> +void ImageReplayer<I>::handle_replay_ready() +{ + dout(20) << dendl; + if (on_replay_interrupted()) { + return; + } + + if (!m_remote_journaler->try_pop_front(&m_replay_entry, &m_replay_tag_tid)) { + return; + } + + m_event_replay_tracker.start_op(); + + m_lock.Lock(); + bool stopping = (m_state == STATE_STOPPING); + m_lock.Unlock(); + + if (stopping) { + dout(10) << "stopping event replay" << dendl; + m_event_replay_tracker.finish_op(); + return; + } + + if (m_replay_tag_valid && m_replay_tag.tid == m_replay_tag_tid) { + preprocess_entry(); + return; + } + + replay_flush(); +} + +template <typename I> +void ImageReplayer<I>::restart(Context *on_finish) +{ + FunctionContext *ctx = new FunctionContext( + [this, on_finish](int r) { + if (r < 0) { + // Try start anyway. + } + start(on_finish, true); + }); + stop(ctx); +} + +template <typename I> +void ImageReplayer<I>::flush() +{ + dout(10) << dendl; + C_SaferCond ctx; + flush_local_replay(&ctx); + ctx.wait(); + + update_mirror_image_status(false, boost::none); +} + +template <typename I> +void ImageReplayer<I>::flush_local_replay(Context* on_flush) +{ + m_lock.Lock(); + if (m_state != STATE_REPLAYING) { + m_lock.Unlock(); + on_flush->complete(0); + return; + } + + dout(15) << dendl; + auto ctx = new FunctionContext( + [this, on_flush](int r) { + handle_flush_local_replay(on_flush, r); + }); + m_local_replay->flush(ctx); + m_lock.Unlock(); +} + +template <typename I> +void ImageReplayer<I>::handle_flush_local_replay(Context* on_flush, int r) +{ + dout(15) << "r=" << r << dendl; + if (r < 0) { + derr << "error flushing local replay: " << cpp_strerror(r) << dendl; + on_flush->complete(r); + return; + } + + flush_commit_position(on_flush); +} + +template <typename I> +void ImageReplayer<I>::flush_commit_position(Context* on_flush) +{ + m_lock.Lock(); + if (m_state != STATE_REPLAYING) { + m_lock.Unlock(); + on_flush->complete(0); + return; + } + + dout(15) << dendl; + auto ctx = new FunctionContext( + [this, on_flush](int r) { + handle_flush_commit_position(on_flush, r); + }); + m_remote_journaler->flush_commit_position(ctx); + m_lock.Unlock(); +} + +template <typename I> +void ImageReplayer<I>::handle_flush_commit_position(Context* on_flush, int r) +{ + dout(15) << "r=" << r << dendl; + if (r < 0) { + derr << "error flushing remote journal commit position: " + << cpp_strerror(r) << dendl; + } + + on_flush->complete(r); +} + +template <typename I> +bool ImageReplayer<I>::on_replay_interrupted() +{ + bool shut_down; + { + Mutex::Locker locker(m_lock); + shut_down = m_stop_requested; + } + + if (shut_down) { + on_stop_journal_replay(); + } + return shut_down; +} + +template <typename I> +void ImageReplayer<I>::print_status(Formatter *f, stringstream *ss) +{ + dout(10) << dendl; + + Mutex::Locker l(m_lock); + + if (f) { + f->open_object_section("image_replayer"); + f->dump_string("name", m_name); + f->dump_string("state", to_string(m_state)); + f->close_section(); + f->flush(*ss); + } else { + *ss << m_name << ": state: " << to_string(m_state); + } +} + +template <typename I> +void ImageReplayer<I>::handle_replay_complete(int r, const std::string &error_desc) +{ + dout(10) << "r=" << r << dendl; + if (r < 0) { + derr << "replay encountered an error: " << cpp_strerror(r) << dendl; + } + + { + Mutex::Locker locker(m_lock); + m_stop_requested = true; + } + on_stop_journal_replay(r, error_desc); +} + +template <typename I> +void ImageReplayer<I>::replay_flush() { + dout(10) << dendl; + + bool interrupted = false; + { + Mutex::Locker locker(m_lock); + if (m_state != STATE_REPLAYING) { + dout(10) << "replay interrupted" << dendl; + interrupted = true; + } else { + m_state = STATE_REPLAY_FLUSHING; + } + } + + if (interrupted) { + m_event_replay_tracker.finish_op(); + return; + } + + // shut down the replay to flush all IO and ops and create a new + // replayer to handle the new tag epoch + Context *ctx = create_context_callback< + ImageReplayer<I>, &ImageReplayer<I>::handle_replay_flush>(this); + ctx = new FunctionContext([this, ctx](int r) { + m_local_image_ctx->journal->stop_external_replay(); + m_local_replay = nullptr; + + if (r < 0) { + ctx->complete(r); + return; + } + + m_local_journal->start_external_replay(&m_local_replay, ctx); + }); + m_local_replay->shut_down(false, ctx); +} + +template <typename I> +void ImageReplayer<I>::handle_replay_flush(int r) { + dout(10) << "r=" << r << dendl; + + { + Mutex::Locker locker(m_lock); + ceph_assert(m_state == STATE_REPLAY_FLUSHING); + m_state = STATE_REPLAYING; + } + + if (r < 0) { + derr << "replay flush encountered an error: " << cpp_strerror(r) << dendl; + m_event_replay_tracker.finish_op(); + handle_replay_complete(r, "replay flush encountered an error"); + return; + } else if (on_replay_interrupted()) { + m_event_replay_tracker.finish_op(); + return; + } + + get_remote_tag(); +} + +template <typename I> +void ImageReplayer<I>::get_remote_tag() { + dout(15) << "tag_tid: " << m_replay_tag_tid << dendl; + + Context *ctx = create_context_callback< + ImageReplayer, &ImageReplayer<I>::handle_get_remote_tag>(this); + m_remote_journaler->get_tag(m_replay_tag_tid, &m_replay_tag, ctx); +} + +template <typename I> +void ImageReplayer<I>::handle_get_remote_tag(int r) { + dout(15) << "r=" << r << dendl; + + if (r == 0) { + try { + auto it = m_replay_tag.data.cbegin(); + decode(m_replay_tag_data, it); + } catch (const buffer::error &err) { + r = -EBADMSG; + } + } + + if (r < 0) { + derr << "failed to retrieve remote tag " << m_replay_tag_tid << ": " + << cpp_strerror(r) << dendl; + m_event_replay_tracker.finish_op(); + handle_replay_complete(r, "failed to retrieve remote tag"); + return; + } + + m_replay_tag_valid = true; + dout(15) << "decoded remote tag " << m_replay_tag_tid << ": " + << m_replay_tag_data << dendl; + + allocate_local_tag(); +} + +template <typename I> +void ImageReplayer<I>::allocate_local_tag() { + dout(15) << dendl; + + std::string mirror_uuid = m_replay_tag_data.mirror_uuid; + if (mirror_uuid == librbd::Journal<>::LOCAL_MIRROR_UUID) { + mirror_uuid = m_remote_image.mirror_uuid; + } else if (mirror_uuid == m_local_mirror_uuid) { + mirror_uuid = librbd::Journal<>::LOCAL_MIRROR_UUID; + } else if (mirror_uuid == librbd::Journal<>::ORPHAN_MIRROR_UUID) { + // handle possible edge condition where daemon can failover and + // the local image has already been promoted/demoted + auto local_tag_data = m_local_journal->get_tag_data(); + if (local_tag_data.mirror_uuid == librbd::Journal<>::ORPHAN_MIRROR_UUID && + (local_tag_data.predecessor.commit_valid && + local_tag_data.predecessor.mirror_uuid == + librbd::Journal<>::LOCAL_MIRROR_UUID)) { + dout(15) << "skipping stale demotion event" << dendl; + handle_process_entry_safe(m_replay_entry, m_replay_start_time, 0); + handle_replay_ready(); + return; + } else { + dout(5) << "encountered image demotion: stopping" << dendl; + Mutex::Locker locker(m_lock); + m_stop_requested = true; + } + } + + librbd::journal::TagPredecessor predecessor(m_replay_tag_data.predecessor); + if (predecessor.mirror_uuid == librbd::Journal<>::LOCAL_MIRROR_UUID) { + predecessor.mirror_uuid = m_remote_image.mirror_uuid; + } else if (predecessor.mirror_uuid == m_local_mirror_uuid) { + predecessor.mirror_uuid = librbd::Journal<>::LOCAL_MIRROR_UUID; + } + + dout(15) << "mirror_uuid=" << mirror_uuid << ", " + << "predecessor=" << predecessor << ", " + << "replay_tag_tid=" << m_replay_tag_tid << dendl; + Context *ctx = create_context_callback< + ImageReplayer, &ImageReplayer<I>::handle_allocate_local_tag>(this); + m_local_journal->allocate_tag(mirror_uuid, predecessor, ctx); +} + +template <typename I> +void ImageReplayer<I>::handle_allocate_local_tag(int r) { + dout(15) << "r=" << r << ", " + << "tag_tid=" << m_local_journal->get_tag_tid() << dendl; + + if (r < 0) { + derr << "failed to allocate journal tag: " << cpp_strerror(r) << dendl; + m_event_replay_tracker.finish_op(); + handle_replay_complete(r, "failed to allocate journal tag"); + return; + } + + preprocess_entry(); +} + +template <typename I> +void ImageReplayer<I>::preprocess_entry() { + dout(20) << "preprocessing entry tid=" << m_replay_entry.get_commit_tid() + << dendl; + + bufferlist data = m_replay_entry.get_data(); + auto it = data.cbegin(); + int r = m_local_replay->decode(&it, &m_event_entry); + if (r < 0) { + derr << "failed to decode journal event" << dendl; + m_event_replay_tracker.finish_op(); + handle_replay_complete(r, "failed to decode journal event"); + return; + } + + uint32_t delay = calculate_replay_delay( + m_event_entry.timestamp, m_local_image_ctx->mirroring_replay_delay); + if (delay == 0) { + handle_preprocess_entry_ready(0); + return; + } + + dout(20) << "delaying replay by " << delay << " sec" << dendl; + + Mutex::Locker timer_locker(m_threads->timer_lock); + ceph_assert(m_delayed_preprocess_task == nullptr); + m_delayed_preprocess_task = new FunctionContext( + [this](int r) { + ceph_assert(m_threads->timer_lock.is_locked()); + m_delayed_preprocess_task = nullptr; + m_threads->work_queue->queue( + create_context_callback<ImageReplayer, + &ImageReplayer<I>::handle_preprocess_entry_ready>(this), 0); + }); + m_threads->timer->add_event_after(delay, m_delayed_preprocess_task); +} + +template <typename I> +void ImageReplayer<I>::handle_preprocess_entry_ready(int r) { + dout(20) << "r=" << r << dendl; + ceph_assert(r == 0); + + m_replay_start_time = ceph_clock_now(); + if (!m_event_preprocessor->is_required(m_event_entry)) { + process_entry(); + return; + } + + Context *ctx = create_context_callback< + ImageReplayer, &ImageReplayer<I>::handle_preprocess_entry_safe>(this); + m_event_preprocessor->preprocess(&m_event_entry, ctx); +} + +template <typename I> +void ImageReplayer<I>::handle_preprocess_entry_safe(int r) { + dout(20) << "r=" << r << dendl; + + if (r < 0) { + m_event_replay_tracker.finish_op(); + + if (r == -ECANCELED) { + handle_replay_complete(0, "lost exclusive lock"); + } else { + derr << "failed to preprocess journal event" << dendl; + handle_replay_complete(r, "failed to preprocess journal event"); + } + return; + } + + process_entry(); +} + +template <typename I> +void ImageReplayer<I>::process_entry() { + dout(20) << "processing entry tid=" << m_replay_entry.get_commit_tid() + << dendl; + + // stop replaying events if stop has been requested + if (on_replay_interrupted()) { + m_event_replay_tracker.finish_op(); + return; + } + + Context *on_ready = create_context_callback< + ImageReplayer, &ImageReplayer<I>::handle_process_entry_ready>(this); + Context *on_commit = new C_ReplayCommitted(this, std::move(m_replay_entry), + m_replay_start_time); + + m_local_replay->process(m_event_entry, on_ready, on_commit); +} + +template <typename I> +void ImageReplayer<I>::handle_process_entry_ready(int r) { + dout(20) << dendl; + ceph_assert(r == 0); + + bool update_status = false; + { + RWLock::RLocker snap_locker(m_local_image_ctx->snap_lock); + if (m_local_image_name != m_local_image_ctx->name) { + m_local_image_name = m_local_image_ctx->name; + update_status = true; + } + } + + if (update_status) { + reschedule_update_status_task(0); + } + + // attempt to process the next event + handle_replay_ready(); +} + +template <typename I> +void ImageReplayer<I>::handle_process_entry_safe(const ReplayEntry &replay_entry, + const utime_t &replay_start_time, + int r) { + dout(20) << "commit_tid=" << replay_entry.get_commit_tid() << ", r=" << r + << dendl; + + if (r < 0) { + derr << "failed to commit journal event: " << cpp_strerror(r) << dendl; + handle_replay_complete(r, "failed to commit journal event"); + } else { + ceph_assert(m_remote_journaler != nullptr); + m_remote_journaler->committed(replay_entry); + } + + auto bytes = replay_entry.get_data().length(); + auto latency = ceph_clock_now() - replay_start_time; + + if (g_perf_counters) { + g_perf_counters->inc(l_rbd_mirror_replay); + g_perf_counters->inc(l_rbd_mirror_replay_bytes, bytes); + g_perf_counters->tinc(l_rbd_mirror_replay_latency, latency); + } + + auto ctx = new FunctionContext( + [this, bytes, latency](int r) { + Mutex::Locker locker(m_lock); + if (m_perf_counters) { + m_perf_counters->inc(l_rbd_mirror_replay); + m_perf_counters->inc(l_rbd_mirror_replay_bytes, bytes); + m_perf_counters->tinc(l_rbd_mirror_replay_latency, latency); + } + m_event_replay_tracker.finish_op(); + }); + m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +bool ImageReplayer<I>::update_mirror_image_status(bool force, + const OptionalState &state) { + dout(15) << dendl; + { + Mutex::Locker locker(m_lock); + if (!start_mirror_image_status_update(force, false)) { + return false; + } + } + + queue_mirror_image_status_update(state); + return true; +} + +template <typename I> +bool ImageReplayer<I>::start_mirror_image_status_update(bool force, + bool restarting) { + ceph_assert(m_lock.is_locked()); + + if (!force && !is_stopped_()) { + if (!is_running_()) { + dout(15) << "shut down in-progress: ignoring update" << dendl; + return false; + } else if (m_in_flight_status_updates > (restarting ? 1 : 0)) { + dout(15) << "already sending update" << dendl; + m_update_status_requested = true; + return false; + } + } + + ++m_in_flight_status_updates; + dout(15) << "in-flight updates=" << m_in_flight_status_updates << dendl; + return true; +} + +template <typename I> +void ImageReplayer<I>::finish_mirror_image_status_update() { + reregister_admin_socket_hook(); + + Context *on_finish = nullptr; + { + Mutex::Locker locker(m_lock); + ceph_assert(m_in_flight_status_updates > 0); + if (--m_in_flight_status_updates > 0) { + dout(15) << "waiting on " << m_in_flight_status_updates << " in-flight " + << "updates" << dendl; + return; + } + + std::swap(on_finish, m_on_update_status_finish); + } + + dout(15) << dendl; + if (on_finish != nullptr) { + on_finish->complete(0); + } +} + +template <typename I> +void ImageReplayer<I>::queue_mirror_image_status_update(const OptionalState &state) { + dout(15) << dendl; + + auto ctx = new FunctionContext( + [this, state](int r) { + send_mirror_status_update(state); + }); + + // ensure pending IO is flushed and the commit position is updated + // prior to updating the mirror status + ctx = new FunctionContext( + [this, ctx](int r) { + flush_local_replay(ctx); + }); + m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +void ImageReplayer<I>::send_mirror_status_update(const OptionalState &opt_state) { + State state; + std::string state_desc; + int last_r; + bool stopping_replay; + + OptionalMirrorImageStatusState mirror_image_status_state = + boost::make_optional(false, cls::rbd::MIRROR_IMAGE_STATUS_STATE_UNKNOWN); + image_replayer::BootstrapRequest<I>* bootstrap_request = nullptr; + { + Mutex::Locker locker(m_lock); + state = m_state; + state_desc = m_state_desc; + mirror_image_status_state = m_mirror_image_status_state; + last_r = m_last_r; + stopping_replay = (m_local_image_ctx != nullptr); + + if (m_bootstrap_request != nullptr) { + bootstrap_request = m_bootstrap_request; + bootstrap_request->get(); + } + } + + bool syncing = false; + if (bootstrap_request != nullptr) { + syncing = bootstrap_request->is_syncing(); + bootstrap_request->put(); + bootstrap_request = nullptr; + } + + if (opt_state) { + state = *opt_state; + } + + cls::rbd::MirrorImageStatus status; + status.up = true; + switch (state) { + case STATE_STARTING: + if (syncing) { + status.state = cls::rbd::MIRROR_IMAGE_STATUS_STATE_SYNCING; + status.description = state_desc.empty() ? "syncing" : state_desc; + mirror_image_status_state = status.state; + } else { + status.state = cls::rbd::MIRROR_IMAGE_STATUS_STATE_STARTING_REPLAY; + status.description = "starting replay"; + } + break; + case STATE_REPLAYING: + case STATE_REPLAY_FLUSHING: + status.state = cls::rbd::MIRROR_IMAGE_STATUS_STATE_REPLAYING; + { + Context *on_req_finish = new FunctionContext( + [this](int r) { + dout(15) << "replay status ready: r=" << r << dendl; + if (r >= 0) { + send_mirror_status_update(boost::none); + } else if (r == -EAGAIN) { + // decrement in-flight status update counter + handle_mirror_status_update(r); + } + }); + + std::string desc; + ceph_assert(m_replay_status_formatter != nullptr); + if (!m_replay_status_formatter->get_or_send_update(&desc, + on_req_finish)) { + dout(15) << "waiting for replay status" << dendl; + return; + } + status.description = "replaying, " + desc; + mirror_image_status_state = boost::make_optional( + false, cls::rbd::MIRROR_IMAGE_STATUS_STATE_UNKNOWN); + } + break; + case STATE_STOPPING: + if (stopping_replay) { + status.state = cls::rbd::MIRROR_IMAGE_STATUS_STATE_STOPPING_REPLAY; + status.description = state_desc.empty() ? "stopping replay" : state_desc; + break; + } + // FALLTHROUGH + case STATE_STOPPED: + if (last_r == -EREMOTEIO) { + status.state = cls::rbd::MIRROR_IMAGE_STATUS_STATE_UNKNOWN; + status.description = state_desc; + mirror_image_status_state = status.state; + } else if (last_r < 0) { + status.state = cls::rbd::MIRROR_IMAGE_STATUS_STATE_ERROR; + status.description = state_desc; + mirror_image_status_state = status.state; + } else { + status.state = cls::rbd::MIRROR_IMAGE_STATUS_STATE_STOPPED; + status.description = state_desc.empty() ? "stopped" : state_desc; + mirror_image_status_state = boost::none; + } + break; + default: + ceph_assert(!"invalid state"); + } + + { + Mutex::Locker locker(m_lock); + m_mirror_image_status_state = mirror_image_status_state; + } + + // prevent the status from ping-ponging when failed replays are restarted + if (mirror_image_status_state && + *mirror_image_status_state == cls::rbd::MIRROR_IMAGE_STATUS_STATE_ERROR) { + status.state = *mirror_image_status_state; + } + + dout(15) << "status=" << status << dendl; + librados::ObjectWriteOperation op; + librbd::cls_client::mirror_image_status_set(&op, m_global_image_id, status); + + ceph_assert(m_local_ioctx); + librados::AioCompletion *aio_comp = create_rados_callback< + ImageReplayer<I>, &ImageReplayer<I>::handle_mirror_status_update>(this); + int r = m_local_ioctx->aio_operate(RBD_MIRRORING, aio_comp, &op); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void ImageReplayer<I>::handle_mirror_status_update(int r) { + dout(15) << "r=" << r << dendl; + + bool running = false; + bool started = false; + { + Mutex::Locker locker(m_lock); + bool update_status_requested = false; + std::swap(update_status_requested, m_update_status_requested); + + running = is_running_(); + if (running && update_status_requested) { + started = start_mirror_image_status_update(false, true); + } + } + + // if a deferred update is available, send it -- otherwise reschedule + // the timer task + if (started) { + queue_mirror_image_status_update(boost::none); + } else if (running) { + reschedule_update_status_task(0); + } + + // mark committed status update as no longer in-flight + finish_mirror_image_status_update(); +} + +template <typename I> +void ImageReplayer<I>::reschedule_update_status_task(int new_interval) { + bool canceled_task = false; + { + Mutex::Locker locker(m_lock); + Mutex::Locker timer_locker(m_threads->timer_lock); + + if (m_update_status_task) { + dout(15) << "canceling existing status update task" << dendl; + + canceled_task = m_threads->timer->cancel_event(m_update_status_task); + m_update_status_task = nullptr; + } + + if (new_interval > 0) { + m_update_status_interval = new_interval; + } + + if (new_interval >= 0 && is_running_() && + start_mirror_image_status_update(true, false)) { + m_update_status_task = new FunctionContext( + [this](int r) { + ceph_assert(m_threads->timer_lock.is_locked()); + m_update_status_task = nullptr; + + queue_mirror_image_status_update(boost::none); + }); + dout(15) << "scheduling status update task after " + << m_update_status_interval << " seconds" << dendl; + m_threads->timer->add_event_after(m_update_status_interval, + m_update_status_task); + } + } + + if (canceled_task) { + // decrement in-flight status update counter for canceled task + finish_mirror_image_status_update(); + } +} + +template <typename I> +void ImageReplayer<I>::shut_down(int r) { + dout(10) << "r=" << r << dendl; + + bool canceled_delayed_preprocess_task = false; + { + Mutex::Locker timer_locker(m_threads->timer_lock); + if (m_delayed_preprocess_task != nullptr) { + canceled_delayed_preprocess_task = m_threads->timer->cancel_event( + m_delayed_preprocess_task); + ceph_assert(canceled_delayed_preprocess_task); + m_delayed_preprocess_task = nullptr; + } + } + if (canceled_delayed_preprocess_task) { + // wake up sleeping replay + m_event_replay_tracker.finish_op(); + } + + reschedule_update_status_task(-1); + + { + Mutex::Locker locker(m_lock); + ceph_assert(m_state == STATE_STOPPING); + + // if status updates are in-flight, wait for them to complete + // before proceeding + if (m_in_flight_status_updates > 0) { + if (m_on_update_status_finish == nullptr) { + dout(15) << "waiting for in-flight status update" << dendl; + m_on_update_status_finish = new FunctionContext( + [this, r](int _r) { + shut_down(r); + }); + } + return; + } + } + + // NOTE: it's important to ensure that the local image is fully + // closed before attempting to close the remote journal in + // case the remote cluster is unreachable + + // chain the shut down sequence (reverse order) + Context *ctx = new FunctionContext( + [this, r](int _r) { + if (m_local_ioctx) { + update_mirror_image_status(true, STATE_STOPPED); + } + handle_shut_down(r); + }); + + // close the remote journal + if (m_remote_journaler != nullptr) { + ctx = new FunctionContext([this, ctx](int r) { + delete m_remote_journaler; + m_remote_journaler = nullptr; + ctx->complete(0); + }); + ctx = new FunctionContext([this, ctx](int r) { + m_remote_journaler->remove_listener(&m_remote_listener); + m_remote_journaler->shut_down(ctx); + }); + } + + // stop the replay of remote journal events + if (m_replay_handler != nullptr) { + ctx = new FunctionContext([this, ctx](int r) { + delete m_replay_handler; + m_replay_handler = nullptr; + + m_event_replay_tracker.wait_for_ops(ctx); + }); + ctx = new FunctionContext([this, ctx](int r) { + m_remote_journaler->stop_replay(ctx); + }); + } + + // close the local image (release exclusive lock) + if (m_local_image_ctx) { + ctx = new FunctionContext([this, ctx](int r) { + CloseImageRequest<I> *request = CloseImageRequest<I>::create( + &m_local_image_ctx, ctx); + request->send(); + }); + } + + // shut down event replay into the local image + if (m_local_journal != nullptr) { + ctx = new FunctionContext([this, ctx](int r) { + m_local_journal = nullptr; + ctx->complete(0); + }); + if (m_local_replay != nullptr) { + ctx = new FunctionContext([this, ctx](int r) { + m_local_journal->stop_external_replay(); + m_local_replay = nullptr; + + EventPreprocessor<I>::destroy(m_event_preprocessor); + m_event_preprocessor = nullptr; + ctx->complete(0); + }); + } + ctx = new FunctionContext([this, ctx](int r) { + // blocks if listener notification is in-progress + m_local_journal->remove_listener(m_journal_listener); + ctx->complete(0); + }); + } + + // wait for all local in-flight replay events to complete + ctx = new FunctionContext([this, ctx](int r) { + if (r < 0) { + derr << "error shutting down journal replay: " << cpp_strerror(r) + << dendl; + } + + m_event_replay_tracker.wait_for_ops(ctx); + }); + + // flush any local in-flight replay events + if (m_local_replay != nullptr) { + ctx = new FunctionContext([this, ctx](int r) { + m_local_replay->shut_down(true, ctx); + }); + } + + m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +void ImageReplayer<I>::handle_shut_down(int r) { + reschedule_update_status_task(-1); + + bool resync_requested = false; + bool delete_requested = false; + bool unregister_asok_hook = false; + { + Mutex::Locker locker(m_lock); + + // if status updates are in-flight, wait for them to complete + // before proceeding + if (m_in_flight_status_updates > 0) { + if (m_on_update_status_finish == nullptr) { + dout(15) << "waiting for in-flight status update" << dendl; + m_on_update_status_finish = new FunctionContext( + [this, r](int _r) { + handle_shut_down(r); + }); + } + return; + } + + if (m_delete_requested && !m_local_image_id.empty()) { + ceph_assert(m_remote_image.image_id.empty()); + dout(0) << "remote image no longer exists: scheduling deletion" << dendl; + unregister_asok_hook = true; + std::swap(delete_requested, m_delete_requested); + } + + std::swap(resync_requested, m_resync_requested); + if (delete_requested || resync_requested) { + m_local_image_id = ""; + } else if (m_last_r == -ENOENT && + m_local_image_id.empty() && m_remote_image.image_id.empty()) { + dout(0) << "mirror image no longer exists" << dendl; + unregister_asok_hook = true; + m_finished = true; + } + } + + if (unregister_asok_hook) { + unregister_admin_socket_hook(); + } + + if (delete_requested || resync_requested) { + dout(5) << "moving image to trash" << dendl; + auto ctx = new FunctionContext([this, r](int) { + handle_shut_down(r); + }); + ImageDeleter<I>::trash_move(*m_local_ioctx, m_global_image_id, + resync_requested, m_threads->work_queue, ctx); + return; + } + + dout(10) << "stop complete" << dendl; + ReplayStatusFormatter<I>::destroy(m_replay_status_formatter); + m_replay_status_formatter = nullptr; + + Context *on_start = nullptr; + Context *on_stop = nullptr; + { + Mutex::Locker locker(m_lock); + std::swap(on_start, m_on_start_finish); + std::swap(on_stop, m_on_stop_finish); + m_stop_requested = false; + ceph_assert(m_delayed_preprocess_task == nullptr); + ceph_assert(m_state == STATE_STOPPING); + m_state = STATE_STOPPED; + } + + if (on_start != nullptr) { + dout(10) << "on start finish complete, r=" << r << dendl; + on_start->complete(r); + r = 0; + } + if (on_stop != nullptr) { + dout(10) << "on stop finish complete, r=" << r << dendl; + on_stop->complete(r); + } +} + +template <typename I> +void ImageReplayer<I>::handle_remote_journal_metadata_updated() { + dout(20) << dendl; + + cls::journal::Client client; + { + Mutex::Locker locker(m_lock); + if (!is_running_()) { + return; + } + + int r = m_remote_journaler->get_cached_client(m_local_mirror_uuid, &client); + if (r < 0) { + derr << "failed to retrieve client: " << cpp_strerror(r) << dendl; + return; + } + } + + if (client.state != cls::journal::CLIENT_STATE_CONNECTED) { + dout(0) << "client flagged disconnected, stopping image replay" << dendl; + stop(nullptr, false, -ENOTCONN, "disconnected"); + } +} + +template <typename I> +std::string ImageReplayer<I>::to_string(const State state) { + switch (state) { + case ImageReplayer<I>::STATE_STARTING: + return "Starting"; + case ImageReplayer<I>::STATE_REPLAYING: + return "Replaying"; + case ImageReplayer<I>::STATE_REPLAY_FLUSHING: + return "ReplayFlushing"; + case ImageReplayer<I>::STATE_STOPPING: + return "Stopping"; + case ImageReplayer<I>::STATE_STOPPED: + return "Stopped"; + default: + break; + } + return "Unknown(" + stringify(state) + ")"; +} + +template <typename I> +void ImageReplayer<I>::resync_image(Context *on_finish) { + dout(10) << dendl; + + m_resync_requested = true; + stop(on_finish); +} + +template <typename I> +void ImageReplayer<I>::register_admin_socket_hook() { + ImageReplayerAdminSocketHook<I> *asok_hook; + { + Mutex::Locker locker(m_lock); + if (m_asok_hook != nullptr) { + return; + } + + ceph_assert(m_perf_counters == nullptr); + + dout(15) << "registered asok hook: " << m_name << dendl; + asok_hook = new ImageReplayerAdminSocketHook<I>(g_ceph_context, m_name, + this); + int r = asok_hook->register_commands(); + if (r == 0) { + m_asok_hook = asok_hook; + + CephContext *cct = static_cast<CephContext *>(m_local->cct()); + auto prio = cct->_conf.get_val<int64_t>("rbd_mirror_perf_stats_prio"); + PerfCountersBuilder plb(g_ceph_context, "rbd_mirror_" + m_name, + l_rbd_mirror_first, l_rbd_mirror_last); + plb.add_u64_counter(l_rbd_mirror_replay, "replay", "Replays", "r", prio); + plb.add_u64_counter(l_rbd_mirror_replay_bytes, "replay_bytes", + "Replayed data", "rb", prio, unit_t(UNIT_BYTES)); + plb.add_time_avg(l_rbd_mirror_replay_latency, "replay_latency", + "Replay latency", "rl", prio); + m_perf_counters = plb.create_perf_counters(); + g_ceph_context->get_perfcounters_collection()->add(m_perf_counters); + + return; + } + derr << "error registering admin socket commands" << dendl; + } + delete asok_hook; +} + +template <typename I> +void ImageReplayer<I>::unregister_admin_socket_hook() { + dout(15) << dendl; + + AdminSocketHook *asok_hook = nullptr; + PerfCounters *perf_counters = nullptr; + { + Mutex::Locker locker(m_lock); + std::swap(asok_hook, m_asok_hook); + std::swap(perf_counters, m_perf_counters); + } + delete asok_hook; + if (perf_counters != nullptr) { + g_ceph_context->get_perfcounters_collection()->remove(perf_counters); + delete perf_counters; + } +} + +template <typename I> +void ImageReplayer<I>::reregister_admin_socket_hook() { + { + Mutex::Locker locker(m_lock); + auto name = m_local_ioctx->get_pool_name() + "/" + m_local_image_name; + if (m_asok_hook != nullptr && m_name == name) { + return; + } + m_name = name; + } + unregister_admin_socket_hook(); + register_admin_socket_hook(); +} + +template <typename I> +std::ostream &operator<<(std::ostream &os, const ImageReplayer<I> &replayer) +{ + os << "ImageReplayer: " << &replayer << " [" << replayer.get_local_pool_id() + << "/" << replayer.get_global_image_id() << "]"; + return os; +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::ImageReplayer<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/ImageReplayer.h b/src/tools/rbd_mirror/ImageReplayer.h new file mode 100644 index 00000000..9af3e961 --- /dev/null +++ b/src/tools/rbd_mirror/ImageReplayer.h @@ -0,0 +1,438 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_REPLAYER_H +#define CEPH_RBD_MIRROR_IMAGE_REPLAYER_H + +#include "common/AsyncOpTracker.h" +#include "common/Mutex.h" +#include "common/WorkQueue.h" +#include "include/rados/librados.hpp" +#include "cls/journal/cls_journal_types.h" +#include "cls/rbd/cls_rbd_types.h" +#include "journal/JournalMetadataListener.h" +#include "journal/ReplayEntry.h" +#include "librbd/ImageCtx.h" +#include "librbd/journal/Types.h" +#include "librbd/journal/TypeTraits.h" +#include "ProgressContext.h" +#include "tools/rbd_mirror/Types.h" +#include "tools/rbd_mirror/image_replayer/Types.h" + +#include <boost/noncopyable.hpp> +#include <boost/optional.hpp> + +#include <set> +#include <map> +#include <atomic> +#include <string> +#include <vector> + +class AdminSocketHook; +class PerfCounters; + +namespace journal { + +class Journaler; +class ReplayHandler; + +} + +namespace librbd { + +class ImageCtx; +namespace journal { template <typename> class Replay; } + +} + +namespace rbd { +namespace mirror { + +template <typename> struct InstanceWatcher; +template <typename> struct Threads; + +namespace image_replayer { template <typename> class BootstrapRequest; } +namespace image_replayer { template <typename> class EventPreprocessor; } +namespace image_replayer { template <typename> class ReplayStatusFormatter; } + +/** + * Replays changes from a remote cluster for a single image. + */ +template <typename ImageCtxT = librbd::ImageCtx> +class ImageReplayer { +public: + static ImageReplayer *create( + Threads<ImageCtxT> *threads, InstanceWatcher<ImageCtxT> *instance_watcher, + RadosRef local, const std::string &local_mirror_uuid, int64_t local_pool_id, + const std::string &global_image_id) { + return new ImageReplayer(threads, instance_watcher, local, + local_mirror_uuid, local_pool_id, global_image_id); + } + void destroy() { + delete this; + } + + ImageReplayer(Threads<ImageCtxT> *threads, + InstanceWatcher<ImageCtxT> *instance_watcher, + RadosRef local, const std::string &local_mirror_uuid, + int64_t local_pool_id, const std::string &global_image_id); + virtual ~ImageReplayer(); + ImageReplayer(const ImageReplayer&) = delete; + ImageReplayer& operator=(const ImageReplayer&) = delete; + + bool is_stopped() { Mutex::Locker l(m_lock); return is_stopped_(); } + bool is_running() { Mutex::Locker l(m_lock); return is_running_(); } + bool is_replaying() { Mutex::Locker l(m_lock); return is_replaying_(); } + + std::string get_name() { Mutex::Locker l(m_lock); return m_name; }; + void set_state_description(int r, const std::string &desc); + + // TODO temporary until policy handles release of image replayers + inline bool is_finished() const { + Mutex::Locker locker(m_lock); + return m_finished; + } + inline void set_finished(bool finished) { + Mutex::Locker locker(m_lock); + m_finished = finished; + } + + inline bool is_blacklisted() const { + Mutex::Locker locker(m_lock); + return (m_last_r == -EBLACKLISTED); + } + + image_replayer::HealthState get_health_state() const; + + void add_peer(const std::string &peer_uuid, librados::IoCtx &remote_io_ctx); + + inline int64_t get_local_pool_id() const { + return m_local_pool_id; + } + inline const std::string& get_global_image_id() const { + return m_global_image_id; + } + + void start(Context *on_finish = nullptr, bool manual = false); + void stop(Context *on_finish = nullptr, bool manual = false, + int r = 0, const std::string& desc = ""); + void restart(Context *on_finish = nullptr); + void flush(); + + void resync_image(Context *on_finish=nullptr); + + void print_status(Formatter *f, stringstream *ss); + + virtual void handle_replay_ready(); + virtual void handle_replay_complete(int r, const std::string &error_desc); + +protected: + /** + * @verbatim + * (error) + * <uninitialized> <------------------------------------ FAIL + * | ^ + * v * + * <starting> * + * | * + * v (error) * + * PREPARE_LOCAL_IMAGE * * * * * * * * * * * * * * * * * * + * | * + * v (error) * + * PREPARE_REMOTE_IMAGE * * * * * * * * * * * * * * * * * * + * | * + * v (error) * + * BOOTSTRAP_IMAGE * * * * * * * * * * * * * * * * * * * * + * | * + * v (error) * + * INIT_REMOTE_JOURNALER * * * * * * * * * * * * * * * * * + * | * + * v (error) * + * START_REPLAY * * * * * * * * * * * * * * * * * * * * * * + * | + * | /--------------------------------------------\ + * | | | + * v v (asok flush) | + * REPLAYING -------------> LOCAL_REPLAY_FLUSH | + * | \ | | + * | | v | + * | | FLUSH_COMMIT_POSITION | + * | | | | + * | | \--------------------/| + * | | | + * | | (entries available) | + * | \-----------> REPLAY_READY | + * | | | + * | | (skip if not | + * | v needed) (error) + * | REPLAY_FLUSH * * * * * * * * * + * | | | * + * | | (skip if not | * + * | v needed) (error) * + * | GET_REMOTE_TAG * * * * * * * * + * | | | * + * | | (skip if not | * + * | v needed) (error) * + * | ALLOCATE_LOCAL_TAG * * * * * * + * | | | * + * | v (error) * + * | PREPROCESS_ENTRY * * * * * * * + * | | | * + * | v (error) * + * | PROCESS_ENTRY * * * * * * * * * + * | | | * + * | \---------------------/ * + * v * + * REPLAY_COMPLETE < * * * * * * * * * * * * * * * * * * * + * | + * v + * JOURNAL_REPLAY_SHUT_DOWN + * | + * v + * LOCAL_IMAGE_CLOSE + * | + * v + * <stopped> + * + * @endverbatim + */ + + virtual void on_start_fail(int r, const std::string &desc); + virtual bool on_start_interrupted(); + virtual bool on_start_interrupted(Mutex& lock); + + virtual void on_stop_journal_replay(int r = 0, const std::string &desc = ""); + + bool on_replay_interrupted(); + +private: + typedef typename librbd::journal::TypeTraits<ImageCtxT>::ReplayEntry ReplayEntry; + + enum State { + STATE_UNKNOWN, + STATE_STARTING, + STATE_REPLAYING, + STATE_REPLAY_FLUSHING, + STATE_STOPPING, + STATE_STOPPED, + }; + + struct RemoteImage { + std::string mirror_uuid; + std::string image_id; + librados::IoCtx io_ctx; + + RemoteImage() { + } + RemoteImage(const Peer& peer) : io_ctx(peer.io_ctx) { + } + }; + + typedef typename librbd::journal::TypeTraits<ImageCtxT>::Journaler Journaler; + typedef boost::optional<State> OptionalState; + typedef boost::optional<cls::rbd::MirrorImageStatusState> + OptionalMirrorImageStatusState; + + struct JournalListener : public librbd::journal::Listener { + ImageReplayer *img_replayer; + + JournalListener(ImageReplayer *img_replayer) + : img_replayer(img_replayer) { + } + + void handle_close() override { + img_replayer->on_stop_journal_replay(); + } + + void handle_promoted() override { + img_replayer->on_stop_journal_replay(0, "force promoted"); + } + + void handle_resync() override { + img_replayer->resync_image(); + } + }; + + class BootstrapProgressContext : public ProgressContext { + public: + BootstrapProgressContext(ImageReplayer<ImageCtxT> *replayer) : + replayer(replayer) { + } + + void update_progress(const std::string &description, + bool flush = true) override; + private: + ImageReplayer<ImageCtxT> *replayer; + }; + + Threads<ImageCtxT> *m_threads; + InstanceWatcher<ImageCtxT> *m_instance_watcher; + + Peers m_peers; + RemoteImage m_remote_image; + + RadosRef m_local; + std::string m_local_mirror_uuid; + int64_t m_local_pool_id; + std::string m_local_image_id; + std::string m_global_image_id; + std::string m_local_image_name; + std::string m_name; + + mutable Mutex m_lock; + State m_state = STATE_STOPPED; + std::string m_state_desc; + + OptionalMirrorImageStatusState m_mirror_image_status_state = + boost::make_optional(false, cls::rbd::MIRROR_IMAGE_STATUS_STATE_UNKNOWN); + int m_last_r = 0; + + BootstrapProgressContext m_progress_cxt; + + bool m_finished = false; + bool m_delete_requested = false; + bool m_resync_requested = false; + + image_replayer::EventPreprocessor<ImageCtxT> *m_event_preprocessor = nullptr; + image_replayer::ReplayStatusFormatter<ImageCtxT> *m_replay_status_formatter = + nullptr; + IoCtxRef m_local_ioctx; + ImageCtxT *m_local_image_ctx = nullptr; + std::string m_local_image_tag_owner; + + decltype(ImageCtxT::journal) m_local_journal = nullptr; + librbd::journal::Replay<ImageCtxT> *m_local_replay = nullptr; + Journaler* m_remote_journaler = nullptr; + ::journal::ReplayHandler *m_replay_handler = nullptr; + librbd::journal::Listener *m_journal_listener; + + Context *m_on_start_finish = nullptr; + Context *m_on_stop_finish = nullptr; + Context *m_update_status_task = nullptr; + int m_update_status_interval = 0; + librados::AioCompletion *m_update_status_comp = nullptr; + bool m_stop_requested = false; + bool m_manual_stop = false; + + AdminSocketHook *m_asok_hook = nullptr; + PerfCounters *m_perf_counters = nullptr; + + image_replayer::BootstrapRequest<ImageCtxT> *m_bootstrap_request = nullptr; + + uint32_t m_in_flight_status_updates = 0; + bool m_update_status_requested = false; + Context *m_on_update_status_finish = nullptr; + + cls::journal::ClientState m_client_state = + cls::journal::CLIENT_STATE_DISCONNECTED; + librbd::journal::MirrorPeerClientMeta m_client_meta; + + ReplayEntry m_replay_entry; + utime_t m_replay_start_time; + bool m_replay_tag_valid = false; + uint64_t m_replay_tag_tid = 0; + cls::journal::Tag m_replay_tag; + librbd::journal::TagData m_replay_tag_data; + librbd::journal::EventEntry m_event_entry; + AsyncOpTracker m_event_replay_tracker; + Context *m_delayed_preprocess_task = nullptr; + + struct RemoteJournalerListener : public ::journal::JournalMetadataListener { + ImageReplayer *replayer; + + RemoteJournalerListener(ImageReplayer *replayer) : replayer(replayer) { } + + void handle_update(::journal::JournalMetadata *) override; + } m_remote_listener; + + struct C_ReplayCommitted : public Context { + ImageReplayer *replayer; + ReplayEntry replay_entry; + utime_t replay_start_time; + + C_ReplayCommitted(ImageReplayer *replayer, + ReplayEntry &&replay_entry, + const utime_t &replay_start_time) + : replayer(replayer), replay_entry(std::move(replay_entry)), + replay_start_time(replay_start_time) { + } + void finish(int r) override { + replayer->handle_process_entry_safe(replay_entry, replay_start_time, r); + } + }; + + static std::string to_string(const State state); + + bool is_stopped_() const { + return m_state == STATE_STOPPED; + } + bool is_running_() const { + return !is_stopped_() && m_state != STATE_STOPPING && !m_stop_requested; + } + bool is_replaying_() const { + return (m_state == STATE_REPLAYING || + m_state == STATE_REPLAY_FLUSHING); + } + + void flush_local_replay(Context* on_flush); + void handle_flush_local_replay(Context* on_flush, int r); + + void flush_commit_position(Context* on_flush); + void handle_flush_commit_position(Context* on_flush, int r); + + bool update_mirror_image_status(bool force, const OptionalState &state); + bool start_mirror_image_status_update(bool force, bool restarting); + void finish_mirror_image_status_update(); + void queue_mirror_image_status_update(const OptionalState &state); + void send_mirror_status_update(const OptionalState &state); + void handle_mirror_status_update(int r); + void reschedule_update_status_task(int new_interval); + + void shut_down(int r); + void handle_shut_down(int r); + void handle_remote_journal_metadata_updated(); + + void prepare_local_image(); + void handle_prepare_local_image(int r); + + void prepare_remote_image(); + void handle_prepare_remote_image(int r); + + void bootstrap(); + void handle_bootstrap(int r); + + void init_remote_journaler(); + void handle_init_remote_journaler(int r); + + void start_replay(); + void handle_start_replay(int r); + + void replay_flush(); + void handle_replay_flush(int r); + + void get_remote_tag(); + void handle_get_remote_tag(int r); + + void allocate_local_tag(); + void handle_allocate_local_tag(int r); + + void preprocess_entry(); + void handle_preprocess_entry_ready(int r); + void handle_preprocess_entry_safe(int r); + + void process_entry(); + void handle_process_entry_ready(int r); + void handle_process_entry_safe(const ReplayEntry& replay_entry, + const utime_t &m_replay_start_time, int r); + + void register_admin_socket_hook(); + void unregister_admin_socket_hook(); + void reregister_admin_socket_hook(); +}; + +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::ImageReplayer<librbd::ImageCtx>; + +#endif // CEPH_RBD_MIRROR_IMAGE_REPLAYER_H diff --git a/src/tools/rbd_mirror/ImageSync.cc b/src/tools/rbd_mirror/ImageSync.cc new file mode 100644 index 00000000..929d75c2 --- /dev/null +++ b/src/tools/rbd_mirror/ImageSync.cc @@ -0,0 +1,481 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "ImageSync.h" +#include "InstanceWatcher.h" +#include "ProgressContext.h" +#include "common/debug.h" +#include "common/Timer.h" +#include "common/errno.h" +#include "journal/Journaler.h" +#include "librbd/DeepCopyRequest.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/Utils.h" +#include "librbd/internal.h" +#include "librbd/journal/Types.h" +#include "tools/rbd_mirror/image_sync/SyncPointCreateRequest.h" +#include "tools/rbd_mirror/image_sync/SyncPointPruneRequest.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::ImageSync: " \ + << this << " " << __func__ + +namespace rbd { +namespace mirror { + +using namespace image_sync; +using librbd::util::create_async_context_callback; +using librbd::util::create_context_callback; +using librbd::util::unique_lock_name; + +template <typename I> +class ImageSync<I>::ImageCopyProgressContext : public librbd::ProgressContext { +public: + ImageCopyProgressContext(ImageSync *image_sync) : image_sync(image_sync) { + } + + int update_progress(uint64_t object_no, uint64_t object_count) override { + image_sync->handle_copy_image_update_progress(object_no, object_count); + return 0; + } + + ImageSync *image_sync; +}; + +template <typename I> +ImageSync<I>::ImageSync(I *local_image_ctx, I *remote_image_ctx, + SafeTimer *timer, Mutex *timer_lock, + const std::string &mirror_uuid, Journaler *journaler, + MirrorPeerClientMeta *client_meta, + ContextWQ *work_queue, + InstanceWatcher<I> *instance_watcher, + Context *on_finish, ProgressContext *progress_ctx) + : BaseRequest("rbd::mirror::ImageSync", local_image_ctx->cct, on_finish), + m_local_image_ctx(local_image_ctx), m_remote_image_ctx(remote_image_ctx), + m_timer(timer), m_timer_lock(timer_lock), m_mirror_uuid(mirror_uuid), + m_journaler(journaler), m_client_meta(client_meta), + m_work_queue(work_queue), m_instance_watcher(instance_watcher), + m_progress_ctx(progress_ctx), + m_lock(unique_lock_name("ImageSync::m_lock", this)), + m_update_sync_point_interval(m_local_image_ctx->cct->_conf.template get_val<double>( + "rbd_mirror_sync_point_update_age")), m_client_meta_copy(*client_meta) { +} + +template <typename I> +ImageSync<I>::~ImageSync() { + ceph_assert(m_image_copy_request == nullptr); + ceph_assert(m_image_copy_prog_ctx == nullptr); + ceph_assert(m_update_sync_ctx == nullptr); +} + +template <typename I> +void ImageSync<I>::send() { + send_notify_sync_request(); +} + +template <typename I> +void ImageSync<I>::cancel() { + Mutex::Locker locker(m_lock); + + dout(10) << dendl; + + m_canceled = true; + + if (m_instance_watcher->cancel_sync_request(m_local_image_ctx->id)) { + return; + } + + if (m_image_copy_request != nullptr) { + m_image_copy_request->cancel(); + } +} + +template <typename I> +void ImageSync<I>::send_notify_sync_request() { + update_progress("NOTIFY_SYNC_REQUEST"); + + dout(10) << dendl; + + m_lock.Lock(); + if (m_canceled) { + m_lock.Unlock(); + BaseRequest::finish(-ECANCELED); + return; + } + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback< + ImageSync<I>, &ImageSync<I>::handle_notify_sync_request>(this)); + m_instance_watcher->notify_sync_request(m_local_image_ctx->id, ctx); + m_lock.Unlock(); +} + +template <typename I> +void ImageSync<I>::handle_notify_sync_request(int r) { + dout(10) << ": r=" << r << dendl; + + m_lock.Lock(); + if (r == 0 && m_canceled) { + r = -ECANCELED; + } + m_lock.Unlock(); + + if (r < 0) { + BaseRequest::finish(r); + return; + } + + send_prune_catch_up_sync_point(); +} + +template <typename I> +void ImageSync<I>::send_prune_catch_up_sync_point() { + update_progress("PRUNE_CATCH_UP_SYNC_POINT"); + + if (m_client_meta->sync_points.empty()) { + send_create_sync_point(); + return; + } + + dout(10) << dendl; + + // prune will remove sync points with missing snapshots and + // ensure we have a maximum of one sync point (in case we + // restarted) + Context *ctx = create_context_callback< + ImageSync<I>, &ImageSync<I>::handle_prune_catch_up_sync_point>(this); + SyncPointPruneRequest<I> *request = SyncPointPruneRequest<I>::create( + m_remote_image_ctx, false, m_journaler, m_client_meta, ctx); + request->send(); +} + +template <typename I> +void ImageSync<I>::handle_prune_catch_up_sync_point(int r) { + dout(10) << ": r=" << r << dendl; + + if (r < 0) { + derr << ": failed to prune catch-up sync point: " + << cpp_strerror(r) << dendl; + finish(r); + return; + } + + send_create_sync_point(); +} + +template <typename I> +void ImageSync<I>::send_create_sync_point() { + update_progress("CREATE_SYNC_POINT"); + + // TODO: when support for disconnecting laggy clients is added, + // re-connect and create catch-up sync point + if (m_client_meta->sync_points.size() > 0) { + send_copy_image(); + return; + } + + dout(10) << dendl; + + Context *ctx = create_context_callback< + ImageSync<I>, &ImageSync<I>::handle_create_sync_point>(this); + SyncPointCreateRequest<I> *request = SyncPointCreateRequest<I>::create( + m_remote_image_ctx, m_mirror_uuid, m_journaler, m_client_meta, ctx); + request->send(); +} + +template <typename I> +void ImageSync<I>::handle_create_sync_point(int r) { + dout(10) << ": r=" << r << dendl; + + if (r < 0) { + derr << ": failed to create sync point: " << cpp_strerror(r) + << dendl; + finish(r); + return; + } + + send_copy_image(); +} + +template <typename I> +void ImageSync<I>::send_copy_image() { + librados::snap_t snap_id_start = 0; + librados::snap_t snap_id_end; + librbd::deep_copy::ObjectNumber object_number; + int r = 0; + { + RWLock::RLocker snap_locker(m_remote_image_ctx->snap_lock); + ceph_assert(!m_client_meta->sync_points.empty()); + auto &sync_point = m_client_meta->sync_points.front(); + snap_id_end = m_remote_image_ctx->get_snap_id( + cls::rbd::UserSnapshotNamespace(), sync_point.snap_name); + if (snap_id_end == CEPH_NOSNAP) { + derr << ": failed to locate snapshot: " << sync_point.snap_name << dendl; + r = -ENOENT; + } else if (!sync_point.from_snap_name.empty()) { + snap_id_start = m_remote_image_ctx->get_snap_id( + cls::rbd::UserSnapshotNamespace(), sync_point.from_snap_name); + if (snap_id_start == CEPH_NOSNAP) { + derr << ": failed to locate from snapshot: " + << sync_point.from_snap_name << dendl; + r = -ENOENT; + } + } + object_number = sync_point.object_number; + } + if (r < 0) { + finish(r); + return; + } + + m_lock.Lock(); + if (m_canceled) { + m_lock.Unlock(); + finish(-ECANCELED); + return; + } + + dout(10) << dendl; + + Context *ctx = create_context_callback< + ImageSync<I>, &ImageSync<I>::handle_copy_image>(this); + m_image_copy_prog_ctx = new ImageCopyProgressContext(this); + m_image_copy_request = librbd::DeepCopyRequest<I>::create( + m_remote_image_ctx, m_local_image_ctx, snap_id_start, snap_id_end, + 0, false, object_number, m_work_queue, &m_client_meta->snap_seqs, + m_image_copy_prog_ctx, ctx); + m_image_copy_request->get(); + m_lock.Unlock(); + + update_progress("COPY_IMAGE"); + + m_image_copy_request->send(); +} + +template <typename I> +void ImageSync<I>::handle_copy_image(int r) { + dout(10) << ": r=" << r << dendl; + + { + Mutex::Locker timer_locker(*m_timer_lock); + Mutex::Locker locker(m_lock); + m_image_copy_request->put(); + m_image_copy_request = nullptr; + delete m_image_copy_prog_ctx; + m_image_copy_prog_ctx = nullptr; + if (r == 0 && m_canceled) { + r = -ECANCELED; + } + + if (m_update_sync_ctx != nullptr) { + m_timer->cancel_event(m_update_sync_ctx); + m_update_sync_ctx = nullptr; + } + + if (m_updating_sync_point) { + m_ret_val = r; + return; + } + } + + if (r == -ECANCELED) { + dout(10) << ": image copy canceled" << dendl; + finish(r); + return; + } else if (r < 0) { + derr << ": failed to copy image: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + send_flush_sync_point(); +} + +template <typename I> +void ImageSync<I>::handle_copy_image_update_progress(uint64_t object_no, + uint64_t object_count) { + int percent = 100 * object_no / object_count; + update_progress("COPY_IMAGE " + stringify(percent) + "%"); + + Mutex::Locker locker(m_lock); + m_image_copy_object_no = object_no; + m_image_copy_object_count = object_count; + + if (m_update_sync_ctx == nullptr && !m_updating_sync_point) { + send_update_sync_point(); + } +} + +template <typename I> +void ImageSync<I>::send_update_sync_point() { + ceph_assert(m_lock.is_locked()); + + m_update_sync_ctx = nullptr; + + if (m_canceled) { + return; + } + + auto sync_point = &m_client_meta->sync_points.front(); + + if (m_client_meta->sync_object_count == m_image_copy_object_count && + sync_point->object_number && + (m_image_copy_object_no - 1) == sync_point->object_number.get()) { + // update sync point did not progress since last sync + return; + } + + m_updating_sync_point = true; + + m_client_meta_copy = *m_client_meta; + m_client_meta->sync_object_count = m_image_copy_object_count; + if (m_image_copy_object_no > 0) { + sync_point->object_number = m_image_copy_object_no - 1; + } + + CephContext *cct = m_local_image_ctx->cct; + ldout(cct, 20) << ": sync_point=" << *sync_point << dendl; + + bufferlist client_data_bl; + librbd::journal::ClientData client_data(*m_client_meta); + encode(client_data, client_data_bl); + + Context *ctx = create_context_callback< + ImageSync<I>, &ImageSync<I>::handle_update_sync_point>( + this); + m_journaler->update_client(client_data_bl, ctx); +} + +template <typename I> +void ImageSync<I>::handle_update_sync_point(int r) { + CephContext *cct = m_local_image_ctx->cct; + ldout(cct, 20) << ": r=" << r << dendl; + + if (r < 0) { + *m_client_meta = m_client_meta_copy; + lderr(cct) << ": failed to update client data: " << cpp_strerror(r) + << dendl; + } + + { + Mutex::Locker timer_locker(*m_timer_lock); + Mutex::Locker locker(m_lock); + m_updating_sync_point = false; + + if (m_image_copy_request != nullptr) { + m_update_sync_ctx = new FunctionContext( + [this](int r) { + Mutex::Locker locker(m_lock); + this->send_update_sync_point(); + }); + m_timer->add_event_after(m_update_sync_point_interval, + m_update_sync_ctx); + return; + } + } + + send_flush_sync_point(); +} + +template <typename I> +void ImageSync<I>::send_flush_sync_point() { + if (m_ret_val < 0) { + finish(m_ret_val); + return; + } + + update_progress("FLUSH_SYNC_POINT"); + + m_client_meta_copy = *m_client_meta; + m_client_meta->sync_object_count = m_image_copy_object_count; + auto sync_point = &m_client_meta->sync_points.front(); + if (m_image_copy_object_no > 0) { + sync_point->object_number = m_image_copy_object_no - 1; + } else { + sync_point->object_number = boost::none; + } + + dout(10) << ": sync_point=" << *sync_point << dendl; + + bufferlist client_data_bl; + librbd::journal::ClientData client_data(*m_client_meta); + encode(client_data, client_data_bl); + + Context *ctx = create_context_callback< + ImageSync<I>, &ImageSync<I>::handle_flush_sync_point>( + this); + m_journaler->update_client(client_data_bl, ctx); +} + +template <typename I> +void ImageSync<I>::handle_flush_sync_point(int r) { + dout(10) << ": r=" << r << dendl; + + if (r < 0) { + *m_client_meta = m_client_meta_copy; + + derr << ": failed to update client data: " << cpp_strerror(r) + << dendl; + finish(r); + return; + } + + send_prune_sync_points(); +} + +template <typename I> +void ImageSync<I>::send_prune_sync_points() { + dout(10) << dendl; + + update_progress("PRUNE_SYNC_POINTS"); + + Context *ctx = create_context_callback< + ImageSync<I>, &ImageSync<I>::handle_prune_sync_points>(this); + SyncPointPruneRequest<I> *request = SyncPointPruneRequest<I>::create( + m_remote_image_ctx, true, m_journaler, m_client_meta, ctx); + request->send(); +} + +template <typename I> +void ImageSync<I>::handle_prune_sync_points(int r) { + dout(10) << ": r=" << r << dendl; + + if (r < 0) { + derr << ": failed to prune sync point: " + << cpp_strerror(r) << dendl; + finish(r); + return; + } + + if (!m_client_meta->sync_points.empty()) { + send_copy_image(); + return; + } + + finish(0); +} + +template <typename I> +void ImageSync<I>::update_progress(const std::string &description) { + dout(20) << ": " << description << dendl; + + if (m_progress_ctx) { + m_progress_ctx->update_progress("IMAGE_SYNC/" + description); + } +} + +template <typename I> +void ImageSync<I>::finish(int r) { + dout(20) << ": r=" << r << dendl; + + m_instance_watcher->notify_sync_complete(m_local_image_ctx->id); + BaseRequest::finish(r); +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::ImageSync<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/ImageSync.h b/src/tools/rbd_mirror/ImageSync.h new file mode 100644 index 00000000..9e00c129 --- /dev/null +++ b/src/tools/rbd_mirror/ImageSync.h @@ -0,0 +1,160 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_SYNC_H +#define RBD_MIRROR_IMAGE_SYNC_H + +#include "include/int_types.h" +#include "librbd/ImageCtx.h" +#include "librbd/journal/TypeTraits.h" +#include "librbd/journal/Types.h" +#include "common/Mutex.h" +#include "tools/rbd_mirror/BaseRequest.h" +#include <map> +#include <vector> + +class Context; +class ContextWQ; +namespace journal { class Journaler; } +namespace librbd { class ProgressContext; } +namespace librbd { template <typename> class DeepCopyRequest; } +namespace librbd { namespace journal { struct MirrorPeerClientMeta; } } + +namespace rbd { +namespace mirror { + +class ProgressContext; + +template <typename> class InstanceWatcher; + +template <typename ImageCtxT = librbd::ImageCtx> +class ImageSync : public BaseRequest { +public: + typedef librbd::journal::TypeTraits<ImageCtxT> TypeTraits; + typedef typename TypeTraits::Journaler Journaler; + typedef librbd::journal::MirrorPeerClientMeta MirrorPeerClientMeta; + + static ImageSync* create(ImageCtxT *local_image_ctx, + ImageCtxT *remote_image_ctx, + SafeTimer *timer, Mutex *timer_lock, + const std::string &mirror_uuid, + Journaler *journaler, + MirrorPeerClientMeta *client_meta, + ContextWQ *work_queue, + InstanceWatcher<ImageCtxT> *instance_watcher, + Context *on_finish, + ProgressContext *progress_ctx = nullptr) { + return new ImageSync(local_image_ctx, remote_image_ctx, timer, timer_lock, + mirror_uuid, journaler, client_meta, work_queue, + instance_watcher, on_finish, progress_ctx); + } + + ImageSync(ImageCtxT *local_image_ctx, ImageCtxT *remote_image_ctx, + SafeTimer *timer, Mutex *timer_lock, const std::string &mirror_uuid, + Journaler *journaler, MirrorPeerClientMeta *client_meta, + ContextWQ *work_queue, InstanceWatcher<ImageCtxT> *instance_watcher, + Context *on_finish, ProgressContext *progress_ctx = nullptr); + ~ImageSync() override; + + void send() override; + void cancel() override; + +protected: + void finish(int r) override; + +private: + /** + * @verbatim + * + * <start> + * | + * v + * NOTIFY_SYNC_REQUEST + * | + * v + * PRUNE_CATCH_UP_SYNC_POINT + * | + * v + * CREATE_SYNC_POINT (skip if already exists and + * | not disconnected) + * v + * COPY_IMAGE . . . . . . . . . . . . . . + * | . + * v . + * FLUSH_SYNC_POINT . + * | . (image sync canceled) + * v . + * PRUNE_SYNC_POINTS . + * | . + * v . + * <finish> < . . . . . . . . . . . . . . + * + * @endverbatim + */ + + typedef std::vector<librados::snap_t> SnapIds; + typedef std::map<librados::snap_t, SnapIds> SnapMap; + class ImageCopyProgressContext; + + ImageCtxT *m_local_image_ctx; + ImageCtxT *m_remote_image_ctx; + SafeTimer *m_timer; + Mutex *m_timer_lock; + std::string m_mirror_uuid; + Journaler *m_journaler; + MirrorPeerClientMeta *m_client_meta; + ContextWQ *m_work_queue; + InstanceWatcher<ImageCtxT> *m_instance_watcher; + ProgressContext *m_progress_ctx; + + SnapMap m_snap_map; + + Mutex m_lock; + bool m_canceled = false; + + librbd::DeepCopyRequest<ImageCtxT> *m_image_copy_request = nullptr; + librbd::ProgressContext *m_image_copy_prog_ctx = nullptr; + + bool m_updating_sync_point = false; + Context *m_update_sync_ctx = nullptr; + double m_update_sync_point_interval; + uint64_t m_image_copy_object_no = 0; + uint64_t m_image_copy_object_count = 0; + MirrorPeerClientMeta m_client_meta_copy; + + int m_ret_val = 0; + + void send_notify_sync_request(); + void handle_notify_sync_request(int r); + + void send_prune_catch_up_sync_point(); + void handle_prune_catch_up_sync_point(int r); + + void send_create_sync_point(); + void handle_create_sync_point(int r); + + void send_update_max_object_count(); + void handle_update_max_object_count(int r); + + void send_copy_image(); + void handle_copy_image(int r); + void handle_copy_image_update_progress(uint64_t object_no, + uint64_t object_count); + void send_update_sync_point(); + void handle_update_sync_point(int r); + + void send_flush_sync_point(); + void handle_flush_sync_point(int r); + + void send_prune_sync_points(); + void handle_prune_sync_points(int r); + + void update_progress(const std::string &description); +}; + +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::ImageSync<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_SYNC_H diff --git a/src/tools/rbd_mirror/ImageSyncThrottler.cc b/src/tools/rbd_mirror/ImageSyncThrottler.cc new file mode 100644 index 00000000..b395a012 --- /dev/null +++ b/src/tools/rbd_mirror/ImageSyncThrottler.cc @@ -0,0 +1,227 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2016 SUSE LINUX GmbH + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include "ImageSyncThrottler.h" +#include "common/Formatter.h" +#include "common/debug.h" +#include "common/errno.h" +#include "librbd/Utils.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::ImageSyncThrottler:: " << this \ + << " " << __func__ << ": " + +namespace rbd { +namespace mirror { + +template <typename I> +ImageSyncThrottler<I>::ImageSyncThrottler(CephContext *cct) + : m_cct(cct), + m_lock(librbd::util::unique_lock_name("rbd::mirror::ImageSyncThrottler", + this)), + m_max_concurrent_syncs(cct->_conf.get_val<uint64_t>( + "rbd_mirror_concurrent_image_syncs")) { + dout(20) << "max_concurrent_syncs=" << m_max_concurrent_syncs << dendl; + m_cct->_conf.add_observer(this); +} + +template <typename I> +ImageSyncThrottler<I>::~ImageSyncThrottler() { + m_cct->_conf.remove_observer(this); + + Mutex::Locker locker(m_lock); + ceph_assert(m_inflight_ops.empty()); + ceph_assert(m_queue.empty()); +} + +template <typename I> +void ImageSyncThrottler<I>::start_op(const std::string &id, Context *on_start) { + dout(20) << "id=" << id << dendl; + + int r = 0; + { + Mutex::Locker locker(m_lock); + + if (m_inflight_ops.count(id) > 0) { + dout(20) << "duplicate for already started op " << id << dendl; + } else if (m_queued_ops.count(id) > 0) { + dout(20) << "duplicate for already queued op " << id << dendl; + std::swap(m_queued_ops[id], on_start); + r = -ENOENT; + } else if (m_max_concurrent_syncs == 0 || + m_inflight_ops.size() < m_max_concurrent_syncs) { + ceph_assert(m_queue.empty()); + m_inflight_ops.insert(id); + dout(20) << "ready to start sync for " << id << " [" + << m_inflight_ops.size() << "/" << m_max_concurrent_syncs << "]" + << dendl; + } else { + m_queue.push_back(id); + std::swap(m_queued_ops[id], on_start); + dout(20) << "image sync for " << id << " has been queued" << dendl; + } + } + + if (on_start != nullptr) { + on_start->complete(r); + } +} + +template <typename I> +bool ImageSyncThrottler<I>::cancel_op(const std::string &id) { + dout(20) << "id=" << id << dendl; + + Context *on_start = nullptr; + { + Mutex::Locker locker(m_lock); + auto it = m_queued_ops.find(id); + if (it != m_queued_ops.end()) { + dout(20) << "canceled queued sync for " << id << dendl; + m_queue.remove(id); + on_start = it->second; + m_queued_ops.erase(it); + } + } + + if (on_start == nullptr) { + return false; + } + + on_start->complete(-ECANCELED); + return true; +} + +template <typename I> +void ImageSyncThrottler<I>::finish_op(const std::string &id) { + dout(20) << "id=" << id << dendl; + + if (cancel_op(id)) { + return; + } + + Context *on_start = nullptr; + { + Mutex::Locker locker(m_lock); + + m_inflight_ops.erase(id); + + if (m_inflight_ops.size() < m_max_concurrent_syncs && !m_queue.empty()) { + auto id = m_queue.front(); + auto it = m_queued_ops.find(id); + ceph_assert(it != m_queued_ops.end()); + m_inflight_ops.insert(id); + dout(20) << "ready to start sync for " << id << " [" + << m_inflight_ops.size() << "/" << m_max_concurrent_syncs << "]" + << dendl; + on_start = it->second; + m_queued_ops.erase(it); + m_queue.pop_front(); + } + } + + if (on_start != nullptr) { + on_start->complete(0); + } +} + +template <typename I> +void ImageSyncThrottler<I>::drain(int r) { + dout(20) << dendl; + + std::map<std::string, Context *> queued_ops; + { + Mutex::Locker locker(m_lock); + std::swap(m_queued_ops, queued_ops); + m_queue.clear(); + m_inflight_ops.clear(); + } + + for (auto &it : queued_ops) { + it.second->complete(r); + } +} + +template <typename I> +void ImageSyncThrottler<I>::set_max_concurrent_syncs(uint32_t max) { + dout(20) << "max=" << max << dendl; + + std::list<Context *> ops; + { + Mutex::Locker locker(m_lock); + m_max_concurrent_syncs = max; + + // Start waiting ops in the case of available free slots + while ((m_max_concurrent_syncs == 0 || + m_inflight_ops.size() < m_max_concurrent_syncs) && + !m_queue.empty()) { + auto id = m_queue.front(); + m_inflight_ops.insert(id); + dout(20) << "ready to start sync for " << id << " [" + << m_inflight_ops.size() << "/" << m_max_concurrent_syncs << "]" + << dendl; + auto it = m_queued_ops.find(id); + ceph_assert(it != m_queued_ops.end()); + ops.push_back(it->second); + m_queued_ops.erase(it); + m_queue.pop_front(); + } + } + + for (const auto& ctx : ops) { + ctx->complete(0); + } +} + +template <typename I> +void ImageSyncThrottler<I>::print_status(Formatter *f, std::stringstream *ss) { + dout(20) << dendl; + + Mutex::Locker locker(m_lock); + + if (f) { + f->dump_int("max_parallel_syncs", m_max_concurrent_syncs); + f->dump_int("running_syncs", m_inflight_ops.size()); + f->dump_int("waiting_syncs", m_queue.size()); + f->flush(*ss); + } else { + *ss << "[ "; + *ss << "max_parallel_syncs=" << m_max_concurrent_syncs << ", "; + *ss << "running_syncs=" << m_inflight_ops.size() << ", "; + *ss << "waiting_syncs=" << m_queue.size() << " ]"; + } +} + +template <typename I> +const char** ImageSyncThrottler<I>::get_tracked_conf_keys() const { + static const char* KEYS[] = { + "rbd_mirror_concurrent_image_syncs", + NULL + }; + return KEYS; +} + +template <typename I> +void ImageSyncThrottler<I>::handle_conf_change(const ConfigProxy& conf, + const set<string> &changed) { + if (changed.count("rbd_mirror_concurrent_image_syncs")) { + set_max_concurrent_syncs(conf.get_val<uint64_t>("rbd_mirror_concurrent_image_syncs")); + } +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::ImageSyncThrottler<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/ImageSyncThrottler.h b/src/tools/rbd_mirror/ImageSyncThrottler.h new file mode 100644 index 00000000..c0cda61e --- /dev/null +++ b/src/tools/rbd_mirror/ImageSyncThrottler.h @@ -0,0 +1,65 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_SYNC_THROTTLER_H +#define RBD_MIRROR_IMAGE_SYNC_THROTTLER_H + +#include <list> +#include <map> +#include <set> +#include <sstream> +#include <string> +#include <utility> + +#include "common/Mutex.h" +#include "common/config_obs.h" + +class CephContext; +class Context; + +namespace ceph { class Formatter; } +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { + +template <typename ImageCtxT = librbd::ImageCtx> +class ImageSyncThrottler : public md_config_obs_t { +public: + static ImageSyncThrottler *create(CephContext *cct) { + return new ImageSyncThrottler(cct); + } + void destroy() { + delete this; + } + + ImageSyncThrottler(CephContext *cct); + ~ImageSyncThrottler() override; + + void set_max_concurrent_syncs(uint32_t max); + void start_op(const std::string &id, Context *on_start); + bool cancel_op(const std::string &id); + void finish_op(const std::string &id); + void drain(int r); + + void print_status(Formatter *f, std::stringstream *ss); + +private: + CephContext *m_cct; + Mutex m_lock; + uint32_t m_max_concurrent_syncs; + std::list<std::string> m_queue; + std::map<std::string, Context *> m_queued_ops; + std::set<std::string> m_inflight_ops; + + const char **get_tracked_conf_keys() const override; + void handle_conf_change(const ConfigProxy& conf, + const std::set<std::string> &changed) override; +}; + +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::ImageSyncThrottler<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_SYNC_THROTTLER_H diff --git a/src/tools/rbd_mirror/InstanceReplayer.cc b/src/tools/rbd_mirror/InstanceReplayer.cc new file mode 100644 index 00000000..c0086a48 --- /dev/null +++ b/src/tools/rbd_mirror/InstanceReplayer.cc @@ -0,0 +1,510 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "include/stringify.h" +#include "common/Timer.h" +#include "common/debug.h" +#include "common/errno.h" +#include "librbd/Utils.h" +#include "ImageReplayer.h" +#include "InstanceReplayer.h" +#include "ServiceDaemon.h" +#include "Threads.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::InstanceReplayer: " \ + << this << " " << __func__ << ": " + +namespace rbd { +namespace mirror { + +namespace { + +const std::string SERVICE_DAEMON_ASSIGNED_COUNT_KEY("image_assigned_count"); +const std::string SERVICE_DAEMON_WARNING_COUNT_KEY("image_warning_count"); +const std::string SERVICE_DAEMON_ERROR_COUNT_KEY("image_error_count"); + +} // anonymous namespace + +using librbd::util::create_async_context_callback; +using librbd::util::create_context_callback; + +template <typename I> +InstanceReplayer<I>::InstanceReplayer( + Threads<I> *threads, ServiceDaemon<I>* service_daemon, + RadosRef local_rados, const std::string &local_mirror_uuid, + int64_t local_pool_id) + : m_threads(threads), m_service_daemon(service_daemon), + m_local_rados(local_rados), m_local_mirror_uuid(local_mirror_uuid), + m_local_pool_id(local_pool_id), + m_lock("rbd::mirror::InstanceReplayer " + stringify(local_pool_id)) { +} + +template <typename I> +InstanceReplayer<I>::~InstanceReplayer() { + ceph_assert(m_image_state_check_task == nullptr); + ceph_assert(m_async_op_tracker.empty()); + ceph_assert(m_image_replayers.empty()); +} + +template <typename I> +bool InstanceReplayer<I>::is_blacklisted() const { + std::lock_guard locker{m_lock}; + return m_blacklisted; +} + +template <typename I> +int InstanceReplayer<I>::init() { + C_SaferCond init_ctx; + init(&init_ctx); + return init_ctx.wait(); +} + +template <typename I> +void InstanceReplayer<I>::init(Context *on_finish) { + dout(10) << dendl; + + Context *ctx = new FunctionContext( + [this, on_finish] (int r) { + { + Mutex::Locker timer_locker(m_threads->timer_lock); + schedule_image_state_check_task(); + } + on_finish->complete(0); + }); + + m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +void InstanceReplayer<I>::shut_down() { + C_SaferCond shut_down_ctx; + shut_down(&shut_down_ctx); + int r = shut_down_ctx.wait(); + ceph_assert(r == 0); +} + +template <typename I> +void InstanceReplayer<I>::shut_down(Context *on_finish) { + dout(10) << dendl; + + Mutex::Locker locker(m_lock); + + ceph_assert(m_on_shut_down == nullptr); + m_on_shut_down = on_finish; + + Context *ctx = new FunctionContext( + [this] (int r) { + cancel_image_state_check_task(); + wait_for_ops(); + }); + + m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +void InstanceReplayer<I>::add_peer(std::string peer_uuid, + librados::IoCtx io_ctx) { + dout(10) << peer_uuid << dendl; + + Mutex::Locker locker(m_lock); + auto result = m_peers.insert(Peer(peer_uuid, io_ctx)).second; + ceph_assert(result); +} + +template <typename I> +void InstanceReplayer<I>::release_all(Context *on_finish) { + dout(10) << dendl; + + Mutex::Locker locker(m_lock); + + C_Gather *gather_ctx = new C_Gather(g_ceph_context, on_finish); + for (auto it = m_image_replayers.begin(); it != m_image_replayers.end(); + it = m_image_replayers.erase(it)) { + auto image_replayer = it->second; + auto ctx = gather_ctx->new_sub(); + ctx = new FunctionContext( + [image_replayer, ctx] (int r) { + image_replayer->destroy(); + ctx->complete(0); + }); + stop_image_replayer(image_replayer, ctx); + } + gather_ctx->activate(); +} + +template <typename I> +void InstanceReplayer<I>::acquire_image(InstanceWatcher<I> *instance_watcher, + const std::string &global_image_id, + Context *on_finish) { + dout(10) << "global_image_id=" << global_image_id << dendl; + + Mutex::Locker locker(m_lock); + + ceph_assert(m_on_shut_down == nullptr); + + auto it = m_image_replayers.find(global_image_id); + if (it == m_image_replayers.end()) { + auto image_replayer = ImageReplayer<I>::create( + m_threads, instance_watcher, m_local_rados, + m_local_mirror_uuid, m_local_pool_id, global_image_id); + + dout(10) << global_image_id << ": creating replayer " << image_replayer + << dendl; + + it = m_image_replayers.insert(std::make_pair(global_image_id, + image_replayer)).first; + + // TODO only a single peer is currently supported + ceph_assert(m_peers.size() == 1); + auto peer = *m_peers.begin(); + image_replayer->add_peer(peer.peer_uuid, peer.io_ctx); + start_image_replayer(image_replayer); + } else { + // A duplicate acquire notification implies (1) connection hiccup or + // (2) new leader election. For the second case, restart the replayer to + // detect if the image has been deleted while the leader was offline + auto& image_replayer = it->second; + image_replayer->set_finished(false); + image_replayer->restart(); + } + + m_threads->work_queue->queue(on_finish, 0); +} + +template <typename I> +void InstanceReplayer<I>::release_image(const std::string &global_image_id, + Context *on_finish) { + dout(10) << "global_image_id=" << global_image_id << dendl; + + Mutex::Locker locker(m_lock); + ceph_assert(m_on_shut_down == nullptr); + + auto it = m_image_replayers.find(global_image_id); + if (it == m_image_replayers.end()) { + dout(5) << global_image_id << ": not found" << dendl; + m_threads->work_queue->queue(on_finish, 0); + return; + } + + auto image_replayer = it->second; + m_image_replayers.erase(it); + + on_finish = new FunctionContext( + [image_replayer, on_finish] (int r) { + image_replayer->destroy(); + on_finish->complete(0); + }); + stop_image_replayer(image_replayer, on_finish); +} + +template <typename I> +void InstanceReplayer<I>::remove_peer_image(const std::string &global_image_id, + const std::string &peer_mirror_uuid, + Context *on_finish) { + dout(10) << "global_image_id=" << global_image_id << ", " + << "peer_mirror_uuid=" << peer_mirror_uuid << dendl; + + Mutex::Locker locker(m_lock); + ceph_assert(m_on_shut_down == nullptr); + + auto it = m_image_replayers.find(global_image_id); + if (it != m_image_replayers.end()) { + // TODO only a single peer is currently supported, therefore + // we can just interrupt the current image replayer and + // it will eventually detect that the peer image is missing and + // determine if a delete propagation is required. + auto image_replayer = it->second; + image_replayer->restart(); + } + m_threads->work_queue->queue(on_finish, 0); +} + +template <typename I> +void InstanceReplayer<I>::print_status(Formatter *f, stringstream *ss) { + dout(10) << dendl; + + if (!f) { + return; + } + + Mutex::Locker locker(m_lock); + + f->open_array_section("image_replayers"); + for (auto &kv : m_image_replayers) { + auto &image_replayer = kv.second; + image_replayer->print_status(f, ss); + } + f->close_section(); +} + +template <typename I> +void InstanceReplayer<I>::start() +{ + dout(10) << dendl; + + Mutex::Locker locker(m_lock); + + m_manual_stop = false; + + for (auto &kv : m_image_replayers) { + auto &image_replayer = kv.second; + image_replayer->start(nullptr, true); + } +} + +template <typename I> +void InstanceReplayer<I>::stop() +{ + dout(10) << dendl; + + Mutex::Locker locker(m_lock); + + m_manual_stop = true; + + for (auto &kv : m_image_replayers) { + auto &image_replayer = kv.second; + image_replayer->stop(nullptr, true); + } +} + +template <typename I> +void InstanceReplayer<I>::restart() +{ + dout(10) << dendl; + + Mutex::Locker locker(m_lock); + + m_manual_stop = false; + + for (auto &kv : m_image_replayers) { + auto &image_replayer = kv.second; + image_replayer->restart(); + } +} + +template <typename I> +void InstanceReplayer<I>::flush() +{ + dout(10) << dendl; + + Mutex::Locker locker(m_lock); + + for (auto &kv : m_image_replayers) { + auto &image_replayer = kv.second; + image_replayer->flush(); + } +} + +template <typename I> +void InstanceReplayer<I>::start_image_replayer( + ImageReplayer<I> *image_replayer) { + ceph_assert(m_lock.is_locked()); + + std::string global_image_id = image_replayer->get_global_image_id(); + if (!image_replayer->is_stopped()) { + return; + } else if (image_replayer->is_blacklisted()) { + derr << "global_image_id=" << global_image_id << ": blacklisted detected " + << "during image replay" << dendl; + m_blacklisted = true; + return; + } else if (image_replayer->is_finished()) { + // TODO temporary until policy integrated + dout(5) << "removing image replayer for global_image_id=" + << global_image_id << dendl; + m_image_replayers.erase(image_replayer->get_global_image_id()); + image_replayer->destroy(); + return; + } else if (m_manual_stop) { + return; + } + + dout(10) << "global_image_id=" << global_image_id << dendl; + image_replayer->start(nullptr, false); +} + +template <typename I> +void InstanceReplayer<I>::queue_start_image_replayers() { + dout(10) << dendl; + + Context *ctx = create_context_callback< + InstanceReplayer, &InstanceReplayer<I>::start_image_replayers>(this); + m_async_op_tracker.start_op(); + m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +void InstanceReplayer<I>::start_image_replayers(int r) { + dout(10) << dendl; + + Mutex::Locker locker(m_lock); + if (m_on_shut_down != nullptr) { + return; + } + + uint64_t image_count = 0; + uint64_t warning_count = 0; + uint64_t error_count = 0; + for (auto it = m_image_replayers.begin(); + it != m_image_replayers.end();) { + auto current_it(it); + ++it; + + ++image_count; + auto health_state = current_it->second->get_health_state(); + if (health_state == image_replayer::HEALTH_STATE_WARNING) { + ++warning_count; + } else if (health_state == image_replayer::HEALTH_STATE_ERROR) { + ++error_count; + } + + start_image_replayer(current_it->second); + } + + m_service_daemon->add_or_update_attribute( + m_local_pool_id, SERVICE_DAEMON_ASSIGNED_COUNT_KEY, image_count); + m_service_daemon->add_or_update_attribute( + m_local_pool_id, SERVICE_DAEMON_WARNING_COUNT_KEY, warning_count); + m_service_daemon->add_or_update_attribute( + m_local_pool_id, SERVICE_DAEMON_ERROR_COUNT_KEY, error_count); + + m_async_op_tracker.finish_op(); +} + +template <typename I> +void InstanceReplayer<I>::stop_image_replayer(ImageReplayer<I> *image_replayer, + Context *on_finish) { + dout(10) << image_replayer << " global_image_id=" + << image_replayer->get_global_image_id() << ", on_finish=" + << on_finish << dendl; + + if (image_replayer->is_stopped()) { + m_threads->work_queue->queue(on_finish, 0); + return; + } + + m_async_op_tracker.start_op(); + Context *ctx = create_async_context_callback( + m_threads->work_queue, new FunctionContext( + [this, image_replayer, on_finish] (int r) { + stop_image_replayer(image_replayer, on_finish); + m_async_op_tracker.finish_op(); + })); + + if (image_replayer->is_running()) { + image_replayer->stop(ctx, false); + } else { + int after = 1; + dout(10) << "scheduling image replayer " << image_replayer << " stop after " + << after << " sec (task " << ctx << ")" << dendl; + ctx = new FunctionContext( + [this, after, ctx] (int r) { + Mutex::Locker timer_locker(m_threads->timer_lock); + m_threads->timer->add_event_after(after, ctx); + }); + m_threads->work_queue->queue(ctx, 0); + } +} + +template <typename I> +void InstanceReplayer<I>::wait_for_ops() { + dout(10) << dendl; + + Context *ctx = create_context_callback< + InstanceReplayer, &InstanceReplayer<I>::handle_wait_for_ops>(this); + + m_async_op_tracker.wait_for_ops(ctx); +} + +template <typename I> +void InstanceReplayer<I>::handle_wait_for_ops(int r) { + dout(10) << "r=" << r << dendl; + + ceph_assert(r == 0); + + Mutex::Locker locker(m_lock); + stop_image_replayers(); +} + +template <typename I> +void InstanceReplayer<I>::stop_image_replayers() { + dout(10) << dendl; + + ceph_assert(m_lock.is_locked()); + + Context *ctx = create_async_context_callback( + m_threads->work_queue, create_context_callback<InstanceReplayer<I>, + &InstanceReplayer<I>::handle_stop_image_replayers>(this)); + + C_Gather *gather_ctx = new C_Gather(g_ceph_context, ctx); + for (auto &it : m_image_replayers) { + stop_image_replayer(it.second, gather_ctx->new_sub()); + } + gather_ctx->activate(); +} + +template <typename I> +void InstanceReplayer<I>::handle_stop_image_replayers(int r) { + dout(10) << "r=" << r << dendl; + + ceph_assert(r == 0); + + Context *on_finish = nullptr; + { + Mutex::Locker locker(m_lock); + + for (auto &it : m_image_replayers) { + ceph_assert(it.second->is_stopped()); + it.second->destroy(); + } + m_image_replayers.clear(); + + ceph_assert(m_on_shut_down != nullptr); + std::swap(on_finish, m_on_shut_down); + } + on_finish->complete(r); +} + +template <typename I> +void InstanceReplayer<I>::cancel_image_state_check_task() { + Mutex::Locker timer_locker(m_threads->timer_lock); + + if (m_image_state_check_task == nullptr) { + return; + } + + dout(10) << m_image_state_check_task << dendl; + bool canceled = m_threads->timer->cancel_event(m_image_state_check_task); + ceph_assert(canceled); + m_image_state_check_task = nullptr; +} + +template <typename I> +void InstanceReplayer<I>::schedule_image_state_check_task() { + ceph_assert(m_threads->timer_lock.is_locked()); + ceph_assert(m_image_state_check_task == nullptr); + + m_image_state_check_task = new FunctionContext( + [this](int r) { + ceph_assert(m_threads->timer_lock.is_locked()); + m_image_state_check_task = nullptr; + schedule_image_state_check_task(); + queue_start_image_replayers(); + }); + + auto cct = static_cast<CephContext *>(m_local_rados->cct()); + int after = cct->_conf.get_val<uint64_t>( + "rbd_mirror_image_state_check_interval"); + + dout(10) << "scheduling image state check after " << after << " sec (task " + << m_image_state_check_task << ")" << dendl; + m_threads->timer->add_event_after(after, m_image_state_check_task); +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::InstanceReplayer<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/InstanceReplayer.h b/src/tools/rbd_mirror/InstanceReplayer.h new file mode 100644 index 00000000..efbdde02 --- /dev/null +++ b/src/tools/rbd_mirror/InstanceReplayer.h @@ -0,0 +1,123 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_INSTANCE_REPLAYER_H +#define RBD_MIRROR_INSTANCE_REPLAYER_H + +#include <map> +#include <sstream> + +#include "common/AsyncOpTracker.h" +#include "common/Formatter.h" +#include "common/Mutex.h" +#include "tools/rbd_mirror/Types.h" + +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { + +template <typename> class ImageReplayer; +template <typename> class InstanceWatcher; +template <typename> class ServiceDaemon; +template <typename> struct Threads; + +template <typename ImageCtxT = librbd::ImageCtx> +class InstanceReplayer { +public: + static InstanceReplayer* create( + Threads<ImageCtxT> *threads, + ServiceDaemon<ImageCtxT>* service_daemon, + RadosRef local_rados, const std::string &local_mirror_uuid, + int64_t local_pool_id) { + return new InstanceReplayer(threads, service_daemon, local_rados, + local_mirror_uuid, local_pool_id); + } + void destroy() { + delete this; + } + + InstanceReplayer(Threads<ImageCtxT> *threads, + ServiceDaemon<ImageCtxT>* service_daemon, + RadosRef local_rados, const std::string &local_mirror_uuid, + int64_t local_pool_id); + ~InstanceReplayer(); + + bool is_blacklisted() const; + + int init(); + void shut_down(); + + void init(Context *on_finish); + void shut_down(Context *on_finish); + + void add_peer(std::string peer_uuid, librados::IoCtx io_ctx); + + void acquire_image(InstanceWatcher<ImageCtxT> *instance_watcher, + const std::string &global_image_id, Context *on_finish); + void release_image(const std::string &global_image_id, Context *on_finish); + void remove_peer_image(const std::string &global_image_id, + const std::string &peer_mirror_uuid, + Context *on_finish); + + void release_all(Context *on_finish); + + void print_status(Formatter *f, stringstream *ss); + void start(); + void stop(); + void restart(); + void flush(); + +private: + /** + * @verbatim + * + * <uninitialized> <-------------------\ + * | (init) | (repeat for each + * v STOP_IMAGE_REPLAYER ---\ image replayer) + * SCHEDULE_IMAGE_STATE_CHECK_TASK ^ ^ | + * | | | | + * v (shut_down) | \---------/ + * <initialized> -----------------> WAIT_FOR_OPS + * + * @endverbatim + */ + + Threads<ImageCtxT> *m_threads; + ServiceDaemon<ImageCtxT>* m_service_daemon; + RadosRef m_local_rados; + std::string m_local_mirror_uuid; + int64_t m_local_pool_id; + + mutable Mutex m_lock; + AsyncOpTracker m_async_op_tracker; + std::map<std::string, ImageReplayer<ImageCtxT> *> m_image_replayers; + Peers m_peers; + Context *m_image_state_check_task = nullptr; + Context *m_on_shut_down = nullptr; + bool m_manual_stop = false; + bool m_blacklisted = false; + + void wait_for_ops(); + void handle_wait_for_ops(int r); + + void start_image_replayer(ImageReplayer<ImageCtxT> *image_replayer); + void queue_start_image_replayers(); + void start_image_replayers(int r); + + void stop_image_replayer(ImageReplayer<ImageCtxT> *image_replayer, + Context *on_finish); + + void stop_image_replayers(); + void handle_stop_image_replayers(int r); + + void schedule_image_state_check_task(); + void cancel_image_state_check_task(); +}; + +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::InstanceReplayer<librbd::ImageCtx>; + +#endif // RBD_MIRROR_INSTANCE_REPLAYER_H diff --git a/src/tools/rbd_mirror/InstanceWatcher.cc b/src/tools/rbd_mirror/InstanceWatcher.cc new file mode 100644 index 00000000..d9e1ba23 --- /dev/null +++ b/src/tools/rbd_mirror/InstanceWatcher.cc @@ -0,0 +1,1299 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "InstanceWatcher.h" +#include "include/stringify.h" +#include "common/debug.h" +#include "common/errno.h" +#include "cls/rbd/cls_rbd_client.h" +#include "librbd/ManagedLock.h" +#include "librbd/Utils.h" +#include "InstanceReplayer.h" +#include "ImageSyncThrottler.h" +#include "common/Cond.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::InstanceWatcher: " + +namespace rbd { +namespace mirror { + +using namespace instance_watcher; + +using librbd::util::create_async_context_callback; +using librbd::util::create_context_callback; +using librbd::util::create_rados_callback; +using librbd::util::unique_lock_name; + +namespace { + +struct C_GetInstances : public Context { + std::vector<std::string> *instance_ids; + Context *on_finish; + bufferlist out_bl; + + C_GetInstances(std::vector<std::string> *instance_ids, Context *on_finish) + : instance_ids(instance_ids), on_finish(on_finish) { + } + + void finish(int r) override { + dout(10) << "C_GetInstances: " << this << " " << __func__ << ": r=" << r + << dendl; + + if (r == 0) { + auto it = out_bl.cbegin(); + r = librbd::cls_client::mirror_instances_list_finish(&it, instance_ids); + } else if (r == -ENOENT) { + r = 0; + } + on_finish->complete(r); + } +}; + +template <typename I> +struct C_RemoveInstanceRequest : public Context { + InstanceWatcher<I> instance_watcher; + Context *on_finish; + + C_RemoveInstanceRequest(librados::IoCtx &io_ctx, ContextWQ *work_queue, + const std::string &instance_id, Context *on_finish) + : instance_watcher(io_ctx, work_queue, nullptr, instance_id), + on_finish(on_finish) { + } + + void send() { + dout(10) << "C_RemoveInstanceRequest: " << this << " " << __func__ << dendl; + + instance_watcher.remove(this); + } + + void finish(int r) override { + dout(10) << "C_RemoveInstanceRequest: " << this << " " << __func__ << ": r=" + << r << dendl; + ceph_assert(r == 0); + + on_finish->complete(r); + } +}; + +} // anonymous namespace + +template <typename I> +struct InstanceWatcher<I>::C_NotifyInstanceRequest : public Context { + InstanceWatcher<I> *instance_watcher; + std::string instance_id; + uint64_t request_id; + bufferlist bl; + Context *on_finish; + bool send_to_leader; + std::unique_ptr<librbd::watcher::Notifier> notifier; + librbd::watcher::NotifyResponse response; + bool canceling = false; + + C_NotifyInstanceRequest(InstanceWatcher<I> *instance_watcher, + const std::string &instance_id, uint64_t request_id, + bufferlist &&bl, Context *on_finish) + : instance_watcher(instance_watcher), instance_id(instance_id), + request_id(request_id), bl(bl), on_finish(on_finish), + send_to_leader(instance_id.empty()) { + dout(10) << "C_NotifyInstanceRequest: " << this << " " << __func__ + << ": instance_watcher=" << instance_watcher << ", instance_id=" + << instance_id << ", request_id=" << request_id << dendl; + + ceph_assert(instance_watcher->m_lock.is_locked()); + + if (!send_to_leader) { + ceph_assert((!instance_id.empty())); + notifier.reset(new librbd::watcher::Notifier( + instance_watcher->m_work_queue, + instance_watcher->m_ioctx, + RBD_MIRROR_INSTANCE_PREFIX + instance_id)); + } + + instance_watcher->m_notify_op_tracker.start_op(); + auto result = instance_watcher->m_notify_ops.insert( + std::make_pair(instance_id, this)).second; + ceph_assert(result); + } + + void send() { + dout(10) << "C_NotifyInstanceRequest: " << this << " " << __func__ << dendl; + + ceph_assert(instance_watcher->m_lock.is_locked()); + + if (canceling) { + dout(10) << "C_NotifyInstanceRequest: " << this << " " << __func__ + << ": canceling" << dendl; + instance_watcher->m_work_queue->queue(this, -ECANCELED); + return; + } + + if (send_to_leader) { + if (instance_watcher->m_leader_instance_id.empty()) { + dout(10) << "C_NotifyInstanceRequest: " << this << " " << __func__ + << ": suspending" << dendl; + instance_watcher->suspend_notify_request(this); + return; + } + + if (instance_watcher->m_leader_instance_id != instance_id) { + auto count = instance_watcher->m_notify_ops.erase( + std::make_pair(instance_id, this)); + ceph_assert(count > 0); + + instance_id = instance_watcher->m_leader_instance_id; + + auto result = instance_watcher->m_notify_ops.insert( + std::make_pair(instance_id, this)).second; + ceph_assert(result); + + notifier.reset(new librbd::watcher::Notifier( + instance_watcher->m_work_queue, + instance_watcher->m_ioctx, + RBD_MIRROR_INSTANCE_PREFIX + instance_id)); + } + } + + dout(10) << "C_NotifyInstanceRequest: " << this << " " << __func__ + << ": sending to " << instance_id << dendl; + notifier->notify(bl, &response, this); + } + + void cancel() { + dout(10) << "C_NotifyInstanceRequest: " << this << " " << __func__ << dendl; + + ceph_assert(instance_watcher->m_lock.is_locked()); + + canceling = true; + instance_watcher->unsuspend_notify_request(this); + } + + void finish(int r) override { + dout(10) << "C_NotifyInstanceRequest: " << this << " " << __func__ << ": r=" + << r << dendl; + + if (r == 0 || r == -ETIMEDOUT) { + bool found = false; + for (auto &it : response.acks) { + auto &bl = it.second; + if (it.second.length() == 0) { + dout(5) << "C_NotifyInstanceRequest: " << this << " " << __func__ + << ": no payload in ack, ignoring" << dendl; + continue; + } + try { + auto iter = bl.cbegin(); + NotifyAckPayload ack; + decode(ack, iter); + if (ack.instance_id != instance_watcher->get_instance_id()) { + derr << "C_NotifyInstanceRequest: " << this << " " << __func__ + << ": ack instance_id (" << ack.instance_id << ") " + << "does not match, ignoring" << dendl; + continue; + } + if (ack.request_id != request_id) { + derr << "C_NotifyInstanceRequest: " << this << " " << __func__ + << ": ack request_id (" << ack.request_id << ") " + << "does not match, ignoring" << dendl; + continue; + } + r = ack.ret_val; + found = true; + break; + } catch (const buffer::error &err) { + derr << "C_NotifyInstanceRequest: " << this << " " << __func__ + << ": failed to decode ack: " << err.what() << dendl; + continue; + } + } + + if (!found) { + if (r == -ETIMEDOUT) { + derr << "C_NotifyInstanceRequest: " << this << " " << __func__ + << ": resending after timeout" << dendl; + Mutex::Locker locker(instance_watcher->m_lock); + send(); + return; + } else { + r = -EINVAL; + } + } else { + if (r == -ESTALE && send_to_leader) { + derr << "C_NotifyInstanceRequest: " << this << " " << __func__ + << ": resending due to leader change" << dendl; + Mutex::Locker locker(instance_watcher->m_lock); + send(); + return; + } + } + } + + on_finish->complete(r); + + { + Mutex::Locker locker(instance_watcher->m_lock); + auto result = instance_watcher->m_notify_ops.erase( + std::make_pair(instance_id, this)); + ceph_assert(result > 0); + instance_watcher->m_notify_op_tracker.finish_op(); + } + + delete this; + } + + void complete(int r) override { + finish(r); + } +}; + +template <typename I> +struct InstanceWatcher<I>::C_SyncRequest : public Context { + InstanceWatcher<I> *instance_watcher; + std::string sync_id; + Context *on_start; + Context *on_complete = nullptr; + C_NotifyInstanceRequest *req = nullptr; + + C_SyncRequest(InstanceWatcher<I> *instance_watcher, + const std::string &sync_id, Context *on_start) + : instance_watcher(instance_watcher), sync_id(sync_id), + on_start(on_start) { + dout(10) << "C_SyncRequest: " << this << " " << __func__ << ": sync_id=" + << sync_id << dendl; + } + + void finish(int r) override { + dout(10) << "C_SyncRequest: " << this << " " << __func__ << ": r=" + << r << dendl; + + if (on_start != nullptr) { + instance_watcher->handle_notify_sync_request(this, r); + } else { + instance_watcher->handle_notify_sync_complete(this, r); + delete this; + } + } + + // called twice + void complete(int r) override { + finish(r); + } +}; + +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::InstanceWatcher: " \ + << this << " " << __func__ << ": " +template <typename I> +void InstanceWatcher<I>::get_instances(librados::IoCtx &io_ctx, + std::vector<std::string> *instance_ids, + Context *on_finish) { + librados::ObjectReadOperation op; + librbd::cls_client::mirror_instances_list_start(&op); + C_GetInstances *ctx = new C_GetInstances(instance_ids, on_finish); + librados::AioCompletion *aio_comp = create_rados_callback(ctx); + + int r = io_ctx.aio_operate(RBD_MIRROR_LEADER, aio_comp, &op, &ctx->out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void InstanceWatcher<I>::remove_instance(librados::IoCtx &io_ctx, + ContextWQ *work_queue, + const std::string &instance_id, + Context *on_finish) { + auto req = new C_RemoveInstanceRequest<I>(io_ctx, work_queue, instance_id, + on_finish); + req->send(); +} + +template <typename I> +InstanceWatcher<I> *InstanceWatcher<I>::create( + librados::IoCtx &io_ctx, ContextWQ *work_queue, + InstanceReplayer<I> *instance_replayer) { + return new InstanceWatcher<I>(io_ctx, work_queue, instance_replayer, + stringify(io_ctx.get_instance_id())); +} + +template <typename I> +InstanceWatcher<I>::InstanceWatcher(librados::IoCtx &io_ctx, + ContextWQ *work_queue, + InstanceReplayer<I> *instance_replayer, + const std::string &instance_id) + : Watcher(io_ctx, work_queue, RBD_MIRROR_INSTANCE_PREFIX + instance_id), + m_instance_replayer(instance_replayer), m_instance_id(instance_id), + m_lock(unique_lock_name("rbd::mirror::InstanceWatcher::m_lock", this)), + m_instance_lock(librbd::ManagedLock<I>::create( + m_ioctx, m_work_queue, m_oid, this, librbd::managed_lock::EXCLUSIVE, true, + m_cct->_conf.get_val<uint64_t>("rbd_blacklist_expire_seconds"))) { +} + +template <typename I> +InstanceWatcher<I>::~InstanceWatcher() { + ceph_assert(m_requests.empty()); + ceph_assert(m_notify_ops.empty()); + ceph_assert(m_notify_op_tracker.empty()); + ceph_assert(m_suspended_ops.empty()); + ceph_assert(m_inflight_sync_reqs.empty()); + ceph_assert(m_image_sync_throttler == nullptr); + m_instance_lock->destroy(); +} + +template <typename I> +int InstanceWatcher<I>::init() { + C_SaferCond init_ctx; + init(&init_ctx); + return init_ctx.wait(); +} + +template <typename I> +void InstanceWatcher<I>::init(Context *on_finish) { + dout(10) << "instance_id=" << m_instance_id << dendl; + + Mutex::Locker locker(m_lock); + + ceph_assert(m_on_finish == nullptr); + m_on_finish = on_finish; + m_ret_val = 0; + + register_instance(); +} + +template <typename I> +void InstanceWatcher<I>::shut_down() { + C_SaferCond shut_down_ctx; + shut_down(&shut_down_ctx); + int r = shut_down_ctx.wait(); + ceph_assert(r == 0); +} + +template <typename I> +void InstanceWatcher<I>::shut_down(Context *on_finish) { + dout(10) << dendl; + + Mutex::Locker locker(m_lock); + + ceph_assert(m_on_finish == nullptr); + m_on_finish = on_finish; + m_ret_val = 0; + + release_lock(); +} + +template <typename I> +void InstanceWatcher<I>::remove(Context *on_finish) { + dout(10) << dendl; + + Mutex::Locker locker(m_lock); + + ceph_assert(m_on_finish == nullptr); + m_on_finish = on_finish; + m_ret_val = 0; + + get_instance_locker(); +} + +template <typename I> +void InstanceWatcher<I>::notify_image_acquire( + const std::string &instance_id, const std::string &global_image_id, + Context *on_notify_ack) { + dout(10) << "instance_id=" << instance_id << ", global_image_id=" + << global_image_id << dendl; + + Mutex::Locker locker(m_lock); + + ceph_assert(m_on_finish == nullptr); + + uint64_t request_id = ++m_request_seq; + bufferlist bl; + encode(NotifyMessage{ImageAcquirePayload{request_id, global_image_id}}, bl); + auto req = new C_NotifyInstanceRequest(this, instance_id, request_id, + std::move(bl), on_notify_ack); + req->send(); +} + +template <typename I> +void InstanceWatcher<I>::notify_image_release( + const std::string &instance_id, const std::string &global_image_id, + Context *on_notify_ack) { + dout(10) << "instance_id=" << instance_id << ", global_image_id=" + << global_image_id << dendl; + + Mutex::Locker locker(m_lock); + + ceph_assert(m_on_finish == nullptr); + + uint64_t request_id = ++m_request_seq; + bufferlist bl; + encode(NotifyMessage{ImageReleasePayload{request_id, global_image_id}}, bl); + auto req = new C_NotifyInstanceRequest(this, instance_id, request_id, + std::move(bl), on_notify_ack); + req->send(); +} + +template <typename I> +void InstanceWatcher<I>::notify_peer_image_removed( + const std::string &instance_id, const std::string &global_image_id, + const std::string &peer_mirror_uuid, Context *on_notify_ack) { + dout(10) << "instance_id=" << instance_id << ", " + << "global_image_id=" << global_image_id << ", " + << "peer_mirror_uuid=" << peer_mirror_uuid << dendl; + + Mutex::Locker locker(m_lock); + ceph_assert(m_on_finish == nullptr); + + uint64_t request_id = ++m_request_seq; + bufferlist bl; + encode(NotifyMessage{PeerImageRemovedPayload{request_id, global_image_id, + peer_mirror_uuid}}, bl); + auto req = new C_NotifyInstanceRequest(this, instance_id, request_id, + std::move(bl), on_notify_ack); + req->send(); +} + +template <typename I> +void InstanceWatcher<I>::notify_sync_request(const std::string &sync_id, + Context *on_sync_start) { + dout(10) << "sync_id=" << sync_id << dendl; + + Mutex::Locker locker(m_lock); + + ceph_assert(m_inflight_sync_reqs.count(sync_id) == 0); + + uint64_t request_id = ++m_request_seq; + + bufferlist bl; + encode(NotifyMessage{SyncRequestPayload{request_id, sync_id}}, bl); + + auto sync_ctx = new C_SyncRequest(this, sync_id, on_sync_start); + sync_ctx->req = new C_NotifyInstanceRequest(this, "", request_id, + std::move(bl), sync_ctx); + + m_inflight_sync_reqs[sync_id] = sync_ctx; + sync_ctx->req->send(); +} + +template <typename I> +bool InstanceWatcher<I>::cancel_sync_request(const std::string &sync_id) { + dout(10) << "sync_id=" << sync_id << dendl; + + Mutex::Locker locker(m_lock); + + auto it = m_inflight_sync_reqs.find(sync_id); + if (it == m_inflight_sync_reqs.end()) { + return false; + } + + auto sync_ctx = it->second; + + if (sync_ctx->on_start == nullptr) { + return false; + } + + ceph_assert(sync_ctx->req != nullptr); + sync_ctx->req->cancel(); + return true; +} + +template <typename I> +void InstanceWatcher<I>::notify_sync_start(const std::string &instance_id, + const std::string &sync_id) { + dout(10) << "sync_id=" << sync_id << dendl; + + Mutex::Locker locker(m_lock); + + uint64_t request_id = ++m_request_seq; + + bufferlist bl; + encode(NotifyMessage{SyncStartPayload{request_id, sync_id}}, bl); + + auto ctx = new FunctionContext( + [this, sync_id] (int r) { + dout(10) << "finish: sync_id=" << sync_id << ", r=" << r << dendl; + Mutex::Locker locker(m_lock); + if (r != -ESTALE && m_image_sync_throttler != nullptr) { + m_image_sync_throttler->finish_op(sync_id); + } + }); + auto req = new C_NotifyInstanceRequest(this, instance_id, request_id, + std::move(bl), ctx); + req->send(); +} + +template <typename I> +void InstanceWatcher<I>::notify_sync_complete(const std::string &sync_id) { + Mutex::Locker locker(m_lock); + notify_sync_complete(m_lock, sync_id); +} + +template <typename I> +void InstanceWatcher<I>::notify_sync_complete(const Mutex&, + const std::string &sync_id) { + dout(10) << "sync_id=" << sync_id << dendl; + ceph_assert(m_lock.is_locked()); + + auto it = m_inflight_sync_reqs.find(sync_id); + ceph_assert(it != m_inflight_sync_reqs.end()); + + auto sync_ctx = it->second; + ceph_assert(sync_ctx->req == nullptr); + + m_inflight_sync_reqs.erase(it); + m_work_queue->queue(sync_ctx, 0); +} + +template <typename I> +void InstanceWatcher<I>::handle_notify_sync_request(C_SyncRequest *sync_ctx, + int r) { + dout(10) << "sync_id=" << sync_ctx->sync_id << ", r=" << r << dendl; + + Context *on_start = nullptr; + { + Mutex::Locker locker(m_lock); + ceph_assert(sync_ctx->req != nullptr); + ceph_assert(sync_ctx->on_start != nullptr); + + if (sync_ctx->req->canceling) { + r = -ECANCELED; + } + + std::swap(sync_ctx->on_start, on_start); + sync_ctx->req = nullptr; + + if (r == -ECANCELED) { + notify_sync_complete(m_lock, sync_ctx->sync_id); + } + } + + on_start->complete(r == -ECANCELED ? r : 0); +} + +template <typename I> +void InstanceWatcher<I>::handle_notify_sync_complete(C_SyncRequest *sync_ctx, + int r) { + dout(10) << "sync_id=" << sync_ctx->sync_id << ", r=" << r << dendl; + + if (sync_ctx->on_complete != nullptr) { + sync_ctx->on_complete->complete(r); + } +} + +template <typename I> +void InstanceWatcher<I>::print_sync_status(Formatter *f, stringstream *ss) { + dout(10) << dendl; + + Mutex::Locker locker(m_lock); + if (m_image_sync_throttler != nullptr) { + m_image_sync_throttler->print_status(f, ss); + } +} + +template <typename I> +void InstanceWatcher<I>::handle_acquire_leader() { + dout(10) << dendl; + + Mutex::Locker locker(m_lock); + + ceph_assert(m_image_sync_throttler == nullptr); + m_image_sync_throttler = ImageSyncThrottler<I>::create(m_cct); + + m_leader_instance_id = m_instance_id; + unsuspend_notify_requests(); +} + +template <typename I> +void InstanceWatcher<I>::handle_release_leader() { + dout(10) << dendl; + + Mutex::Locker locker(m_lock); + + ceph_assert(m_image_sync_throttler != nullptr); + + m_leader_instance_id.clear(); + + m_image_sync_throttler->drain(-ESTALE); + m_image_sync_throttler->destroy(); + m_image_sync_throttler = nullptr; +} + +template <typename I> +void InstanceWatcher<I>::handle_update_leader( + const std::string &leader_instance_id) { + dout(10) << "leader_instance_id=" << leader_instance_id << dendl; + + Mutex::Locker locker(m_lock); + + m_leader_instance_id = leader_instance_id; + + if (!m_leader_instance_id.empty()) { + unsuspend_notify_requests(); + } +} + +template <typename I> +void InstanceWatcher<I>::cancel_notify_requests( + const std::string &instance_id) { + dout(10) << "instance_id=" << instance_id << dendl; + + Mutex::Locker locker(m_lock); + + for (auto op : m_notify_ops) { + if (op.first == instance_id && !op.second->send_to_leader) { + op.second->cancel(); + } + } +} + +template <typename I> +void InstanceWatcher<I>::register_instance() { + ceph_assert(m_lock.is_locked()); + + dout(10) << dendl; + + librados::ObjectWriteOperation op; + librbd::cls_client::mirror_instances_add(&op, m_instance_id); + librados::AioCompletion *aio_comp = create_rados_callback< + InstanceWatcher<I>, &InstanceWatcher<I>::handle_register_instance>(this); + + int r = m_ioctx.aio_operate(RBD_MIRROR_LEADER, aio_comp, &op); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void InstanceWatcher<I>::handle_register_instance(int r) { + dout(10) << "r=" << r << dendl; + + Context *on_finish = nullptr; + { + Mutex::Locker locker(m_lock); + + if (r == 0) { + create_instance_object(); + return; + } + + derr << "error registering instance: " << cpp_strerror(r) << dendl; + + std::swap(on_finish, m_on_finish); + } + on_finish->complete(r); +} + + +template <typename I> +void InstanceWatcher<I>::create_instance_object() { + dout(10) << dendl; + + ceph_assert(m_lock.is_locked()); + + librados::ObjectWriteOperation op; + op.create(true); + + librados::AioCompletion *aio_comp = create_rados_callback< + InstanceWatcher<I>, + &InstanceWatcher<I>::handle_create_instance_object>(this); + int r = m_ioctx.aio_operate(m_oid, aio_comp, &op); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void InstanceWatcher<I>::handle_create_instance_object(int r) { + dout(10) << "r=" << r << dendl; + + Mutex::Locker locker(m_lock); + + if (r < 0) { + derr << "error creating " << m_oid << " object: " << cpp_strerror(r) + << dendl; + + m_ret_val = r; + unregister_instance(); + return; + } + + register_watch(); +} + +template <typename I> +void InstanceWatcher<I>::register_watch() { + dout(10) << dendl; + + ceph_assert(m_lock.is_locked()); + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback< + InstanceWatcher<I>, &InstanceWatcher<I>::handle_register_watch>(this)); + + librbd::Watcher::register_watch(ctx); +} + +template <typename I> +void InstanceWatcher<I>::handle_register_watch(int r) { + dout(10) << "r=" << r << dendl; + + Mutex::Locker locker(m_lock); + + if (r < 0) { + derr << "error registering instance watcher for " << m_oid << " object: " + << cpp_strerror(r) << dendl; + + m_ret_val = r; + remove_instance_object(); + return; + } + + acquire_lock(); +} + +template <typename I> +void InstanceWatcher<I>::acquire_lock() { + dout(10) << dendl; + + ceph_assert(m_lock.is_locked()); + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback< + InstanceWatcher<I>, &InstanceWatcher<I>::handle_acquire_lock>(this)); + + m_instance_lock->acquire_lock(ctx); +} + +template <typename I> +void InstanceWatcher<I>::handle_acquire_lock(int r) { + dout(10) << "r=" << r << dendl; + + Context *on_finish = nullptr; + { + Mutex::Locker locker(m_lock); + + if (r < 0) { + + derr << "error acquiring instance lock: " << cpp_strerror(r) << dendl; + + m_ret_val = r; + unregister_watch(); + return; + } + + std::swap(on_finish, m_on_finish); + } + + on_finish->complete(r); +} + +template <typename I> +void InstanceWatcher<I>::release_lock() { + dout(10) << dendl; + + ceph_assert(m_lock.is_locked()); + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback< + InstanceWatcher<I>, &InstanceWatcher<I>::handle_release_lock>(this)); + + m_instance_lock->shut_down(ctx); +} + +template <typename I> +void InstanceWatcher<I>::handle_release_lock(int r) { + dout(10) << "r=" << r << dendl; + + Mutex::Locker locker(m_lock); + + if (r < 0) { + derr << "error releasing instance lock: " << cpp_strerror(r) << dendl; + } + + unregister_watch(); +} + +template <typename I> +void InstanceWatcher<I>::unregister_watch() { + dout(10) << dendl; + + ceph_assert(m_lock.is_locked()); + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback< + InstanceWatcher<I>, &InstanceWatcher<I>::handle_unregister_watch>(this)); + + librbd::Watcher::unregister_watch(ctx); +} + +template <typename I> +void InstanceWatcher<I>::handle_unregister_watch(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "error unregistering instance watcher for " << m_oid << " object: " + << cpp_strerror(r) << dendl; + } + + Mutex::Locker locker(m_lock); + remove_instance_object(); +} + +template <typename I> +void InstanceWatcher<I>::remove_instance_object() { + ceph_assert(m_lock.is_locked()); + + dout(10) << dendl; + + librados::ObjectWriteOperation op; + op.remove(); + + librados::AioCompletion *aio_comp = create_rados_callback< + InstanceWatcher<I>, + &InstanceWatcher<I>::handle_remove_instance_object>(this); + int r = m_ioctx.aio_operate(m_oid, aio_comp, &op); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void InstanceWatcher<I>::handle_remove_instance_object(int r) { + dout(10) << "r=" << r << dendl; + + if (r == -ENOENT) { + r = 0; + } + + if (r < 0) { + derr << "error removing " << m_oid << " object: " << cpp_strerror(r) + << dendl; + } + + Mutex::Locker locker(m_lock); + unregister_instance(); +} + +template <typename I> +void InstanceWatcher<I>::unregister_instance() { + dout(10) << dendl; + + ceph_assert(m_lock.is_locked()); + + librados::ObjectWriteOperation op; + librbd::cls_client::mirror_instances_remove(&op, m_instance_id); + librados::AioCompletion *aio_comp = create_rados_callback< + InstanceWatcher<I>, &InstanceWatcher<I>::handle_unregister_instance>(this); + + int r = m_ioctx.aio_operate(RBD_MIRROR_LEADER, aio_comp, &op); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void InstanceWatcher<I>::handle_unregister_instance(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "error unregistering instance: " << cpp_strerror(r) << dendl; + } + + Mutex::Locker locker(m_lock); + wait_for_notify_ops(); +} + +template <typename I> +void InstanceWatcher<I>::wait_for_notify_ops() { + dout(10) << dendl; + + ceph_assert(m_lock.is_locked()); + + for (auto op : m_notify_ops) { + op.second->cancel(); + } + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback< + InstanceWatcher<I>, &InstanceWatcher<I>::handle_wait_for_notify_ops>(this)); + + m_notify_op_tracker.wait_for_ops(ctx); +} + +template <typename I> +void InstanceWatcher<I>::handle_wait_for_notify_ops(int r) { + dout(10) << "r=" << r << dendl; + + ceph_assert(r == 0); + + Context *on_finish = nullptr; + { + Mutex::Locker locker(m_lock); + + ceph_assert(m_notify_ops.empty()); + + std::swap(on_finish, m_on_finish); + r = m_ret_val; + } + on_finish->complete(r); +} + +template <typename I> +void InstanceWatcher<I>::get_instance_locker() { + dout(10) << dendl; + + ceph_assert(m_lock.is_locked()); + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback< + InstanceWatcher<I>, &InstanceWatcher<I>::handle_get_instance_locker>(this)); + + m_instance_lock->get_locker(&m_instance_locker, ctx); +} + +template <typename I> +void InstanceWatcher<I>::handle_get_instance_locker(int r) { + dout(10) << "r=" << r << dendl; + + Mutex::Locker locker(m_lock); + + if (r < 0) { + if (r != -ENOENT) { + derr << "error retrieving instance locker: " << cpp_strerror(r) << dendl; + } + remove_instance_object(); + return; + } + + break_instance_lock(); +} + +template <typename I> +void InstanceWatcher<I>::break_instance_lock() { + dout(10) << dendl; + + ceph_assert(m_lock.is_locked()); + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback< + InstanceWatcher<I>, &InstanceWatcher<I>::handle_break_instance_lock>(this)); + + m_instance_lock->break_lock(m_instance_locker, true, ctx); +} + +template <typename I> +void InstanceWatcher<I>::handle_break_instance_lock(int r) { + dout(10) << "r=" << r << dendl; + + Mutex::Locker locker(m_lock); + + if (r < 0) { + if (r != -ENOENT) { + derr << "error breaking instance lock: " << cpp_strerror(r) << dendl; + } + remove_instance_object(); + return; + } + + remove_instance_object(); +} + +template <typename I> +void InstanceWatcher<I>::suspend_notify_request(C_NotifyInstanceRequest *req) { + dout(10) << req << dendl; + + ceph_assert(m_lock.is_locked()); + + auto result = m_suspended_ops.insert(req).second; + ceph_assert(result); +} + +template <typename I> +bool InstanceWatcher<I>::unsuspend_notify_request( + C_NotifyInstanceRequest *req) { + dout(10) << req << dendl; + + ceph_assert(m_lock.is_locked()); + + auto result = m_suspended_ops.erase(req); + if (result == 0) { + return false; + } + + req->send(); + return true; +} + +template <typename I> +void InstanceWatcher<I>::unsuspend_notify_requests() { + dout(10) << dendl; + + ceph_assert(m_lock.is_locked()); + + std::set<C_NotifyInstanceRequest *> suspended_ops; + std::swap(m_suspended_ops, suspended_ops); + + for (auto op : suspended_ops) { + op->send(); + } +} + +template <typename I> +Context *InstanceWatcher<I>::prepare_request(const std::string &instance_id, + uint64_t request_id, + C_NotifyAck *on_notify_ack) { + dout(10) << "instance_id=" << instance_id << ", request_id=" << request_id + << dendl; + + Mutex::Locker locker(m_lock); + + Context *ctx = nullptr; + Request request(instance_id, request_id); + auto it = m_requests.find(request); + + if (it != m_requests.end()) { + dout(10) << "duplicate for in-progress request" << dendl; + delete it->on_notify_ack; + m_requests.erase(it); + } else { + ctx = create_async_context_callback( + m_work_queue, new FunctionContext( + [this, instance_id, request_id] (int r) { + complete_request(instance_id, request_id, r); + })); + } + + request.on_notify_ack = on_notify_ack; + m_requests.insert(request); + return ctx; +} + +template <typename I> +void InstanceWatcher<I>::complete_request(const std::string &instance_id, + uint64_t request_id, int r) { + dout(10) << "instance_id=" << instance_id << ", request_id=" << request_id + << dendl; + + C_NotifyAck *on_notify_ack; + { + Mutex::Locker locker(m_lock); + Request request(instance_id, request_id); + auto it = m_requests.find(request); + ceph_assert(it != m_requests.end()); + on_notify_ack = it->on_notify_ack; + m_requests.erase(it); + } + + encode(NotifyAckPayload(instance_id, request_id, r), on_notify_ack->out); + on_notify_ack->complete(0); +} + +template <typename I> +void InstanceWatcher<I>::handle_notify(uint64_t notify_id, uint64_t handle, + uint64_t notifier_id, bufferlist &bl) { + dout(10) << "notify_id=" << notify_id << ", handle=" << handle << ", " + << "notifier_id=" << notifier_id << dendl; + + auto ctx = new C_NotifyAck(this, notify_id, handle); + + NotifyMessage notify_message; + try { + auto iter = bl.cbegin(); + decode(notify_message, iter); + } catch (const buffer::error &err) { + derr << "error decoding image notification: " << err.what() << dendl; + ctx->complete(0); + return; + } + + apply_visitor(HandlePayloadVisitor(this, stringify(notifier_id), ctx), + notify_message.payload); +} + +template <typename I> +void InstanceWatcher<I>::handle_image_acquire( + const std::string &global_image_id, Context *on_finish) { + dout(10) << "global_image_id=" << global_image_id << dendl; + + auto ctx = new FunctionContext( + [this, global_image_id, on_finish] (int r) { + m_instance_replayer->acquire_image(this, global_image_id, on_finish); + m_notify_op_tracker.finish_op(); + }); + + m_notify_op_tracker.start_op(); + m_work_queue->queue(ctx, 0); +} + +template <typename I> +void InstanceWatcher<I>::handle_image_release( + const std::string &global_image_id, Context *on_finish) { + dout(10) << "global_image_id=" << global_image_id << dendl; + + auto ctx = new FunctionContext( + [this, global_image_id, on_finish] (int r) { + m_instance_replayer->release_image(global_image_id, on_finish); + m_notify_op_tracker.finish_op(); + }); + + m_notify_op_tracker.start_op(); + m_work_queue->queue(ctx, 0); +} + +template <typename I> +void InstanceWatcher<I>::handle_peer_image_removed( + const std::string &global_image_id, const std::string &peer_mirror_uuid, + Context *on_finish) { + dout(10) << "global_image_id=" << global_image_id << ", " + << "peer_mirror_uuid=" << peer_mirror_uuid << dendl; + + auto ctx = new FunctionContext( + [this, peer_mirror_uuid, global_image_id, on_finish] (int r) { + m_instance_replayer->remove_peer_image(global_image_id, + peer_mirror_uuid, on_finish); + m_notify_op_tracker.finish_op(); + }); + + m_notify_op_tracker.start_op(); + m_work_queue->queue(ctx, 0); +} + +template <typename I> +void InstanceWatcher<I>::handle_sync_request(const std::string &instance_id, + const std::string &sync_id, + Context *on_finish) { + dout(10) << "instance_id=" << instance_id << ", sync_id=" << sync_id << dendl; + + Mutex::Locker locker(m_lock); + + if (m_image_sync_throttler == nullptr) { + dout(10) << "sync request for non-leader" << dendl; + m_work_queue->queue(on_finish, -ESTALE); + return; + } + + Context *on_start = create_async_context_callback( + m_work_queue, new FunctionContext( + [this, instance_id, sync_id, on_finish] (int r) { + dout(10) << "handle_sync_request: finish: instance_id=" << instance_id + << ", sync_id=" << sync_id << ", r=" << r << dendl; + if (r == 0) { + notify_sync_start(instance_id, sync_id); + } + if (r == -ENOENT) { + r = 0; + } + on_finish->complete(r); + })); + m_image_sync_throttler->start_op(sync_id, on_start); +} + +template <typename I> +void InstanceWatcher<I>::handle_sync_start(const std::string &instance_id, + const std::string &sync_id, + Context *on_finish) { + dout(10) << "instance_id=" << instance_id << ", sync_id=" << sync_id << dendl; + + Mutex::Locker locker(m_lock); + + auto it = m_inflight_sync_reqs.find(sync_id); + if (it == m_inflight_sync_reqs.end()) { + dout(5) << "not found" << dendl; + m_work_queue->queue(on_finish, 0); + return; + } + + auto sync_ctx = it->second; + + if (sync_ctx->on_complete != nullptr) { + dout(5) << "duplicate request" << dendl; + m_work_queue->queue(sync_ctx->on_complete, -ESTALE); + } + + sync_ctx->on_complete = on_finish; +} + +template <typename I> +void InstanceWatcher<I>::handle_payload(const std::string &instance_id, + const ImageAcquirePayload &payload, + C_NotifyAck *on_notify_ack) { + dout(10) << "image_acquire: instance_id=" << instance_id << ", " + << "request_id=" << payload.request_id << dendl; + + auto on_finish = prepare_request(instance_id, payload.request_id, + on_notify_ack); + if (on_finish != nullptr) { + handle_image_acquire(payload.global_image_id, on_finish); + } +} + +template <typename I> +void InstanceWatcher<I>::handle_payload(const std::string &instance_id, + const ImageReleasePayload &payload, + C_NotifyAck *on_notify_ack) { + dout(10) << "image_release: instance_id=" << instance_id << ", " + << "request_id=" << payload.request_id << dendl; + + auto on_finish = prepare_request(instance_id, payload.request_id, + on_notify_ack); + if (on_finish != nullptr) { + handle_image_release(payload.global_image_id, on_finish); + } +} + +template <typename I> +void InstanceWatcher<I>::handle_payload(const std::string &instance_id, + const PeerImageRemovedPayload &payload, + C_NotifyAck *on_notify_ack) { + dout(10) << "remove_peer_image: instance_id=" << instance_id << ", " + << "request_id=" << payload.request_id << dendl; + + auto on_finish = prepare_request(instance_id, payload.request_id, + on_notify_ack); + if (on_finish != nullptr) { + handle_peer_image_removed(payload.global_image_id, payload.peer_mirror_uuid, + on_finish); + } +} + +template <typename I> +void InstanceWatcher<I>::handle_payload(const std::string &instance_id, + const SyncRequestPayload &payload, + C_NotifyAck *on_notify_ack) { + dout(10) << "sync_request: instance_id=" << instance_id << ", " + << "request_id=" << payload.request_id << dendl; + + auto on_finish = prepare_request(instance_id, payload.request_id, + on_notify_ack); + if (on_finish == nullptr) { + return; + } + + handle_sync_request(instance_id, payload.sync_id, on_finish); +} + +template <typename I> +void InstanceWatcher<I>::handle_payload(const std::string &instance_id, + const SyncStartPayload &payload, + C_NotifyAck *on_notify_ack) { + dout(10) << "sync_start: instance_id=" << instance_id << ", " + << "request_id=" << payload.request_id << dendl; + + auto on_finish = prepare_request(instance_id, payload.request_id, + on_notify_ack); + if (on_finish == nullptr) { + return; + } + + handle_sync_start(instance_id, payload.sync_id, on_finish); +} + +template <typename I> +void InstanceWatcher<I>::handle_payload(const std::string &instance_id, + const UnknownPayload &payload, + C_NotifyAck *on_notify_ack) { + dout(5) << "unknown: instance_id=" << instance_id << dendl; + + on_notify_ack->complete(0); +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::InstanceWatcher<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/InstanceWatcher.h b/src/tools/rbd_mirror/InstanceWatcher.h new file mode 100644 index 00000000..5ec1aef0 --- /dev/null +++ b/src/tools/rbd_mirror/InstanceWatcher.h @@ -0,0 +1,264 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_INSTANCE_WATCHER_H +#define CEPH_RBD_MIRROR_INSTANCE_WATCHER_H + +#include <map> +#include <memory> +#include <set> +#include <string> +#include <vector> + +#include "common/AsyncOpTracker.h" +#include "librbd/Watcher.h" +#include "librbd/managed_lock/Types.h" +#include "tools/rbd_mirror/instance_watcher/Types.h" + +namespace librbd { + +class ImageCtx; +template <typename> class ManagedLock; + +} + +namespace rbd { +namespace mirror { + +template <typename> class ImageSyncThrottler; +template <typename> class InstanceReplayer; +template <typename> struct Threads; + +template <typename ImageCtxT = librbd::ImageCtx> +class InstanceWatcher : protected librbd::Watcher { + using librbd::Watcher::unregister_watch; // Silence overloaded virtual warning +public: + static void get_instances(librados::IoCtx &io_ctx, + std::vector<std::string> *instance_ids, + Context *on_finish); + static void remove_instance(librados::IoCtx &io_ctx, + ContextWQ *work_queue, + const std::string &instance_id, + Context *on_finish); + + static InstanceWatcher *create( + librados::IoCtx &io_ctx, ContextWQ *work_queue, + InstanceReplayer<ImageCtxT> *instance_replayer); + void destroy() { + delete this; + } + + InstanceWatcher(librados::IoCtx &io_ctx, ContextWQ *work_queue, + InstanceReplayer<ImageCtxT> *instance_replayer, + const std::string &instance_id); + ~InstanceWatcher() override; + + inline std::string &get_instance_id() { + return m_instance_id; + } + + int init(); + void shut_down(); + + void init(Context *on_finish); + void shut_down(Context *on_finish); + void remove(Context *on_finish); + + void notify_image_acquire(const std::string &instance_id, + const std::string &global_image_id, + Context *on_notify_ack); + void notify_image_release(const std::string &instance_id, + const std::string &global_image_id, + Context *on_notify_ack); + void notify_peer_image_removed(const std::string &instance_id, + const std::string &global_image_id, + const std::string &peer_mirror_uuid, + Context *on_notify_ack); + + void notify_sync_request(const std::string &sync_id, Context *on_sync_start); + bool cancel_sync_request(const std::string &sync_id); + void notify_sync_complete(const std::string &sync_id); + + void print_sync_status(Formatter *f, stringstream *ss); + + void cancel_notify_requests(const std::string &instance_id); + + void handle_acquire_leader(); + void handle_release_leader(); + void handle_update_leader(const std::string &leader_instance_id); + +private: + /** + * @verbatim + * + * BREAK_INSTANCE_LOCK -------\ + * ^ | + * | (error) | + * GET_INSTANCE_LOCKER * * *>| + * ^ (remove) | + * | | + * <uninitialized> <----------------+---- WAIT_FOR_NOTIFY_OPS + * | (init) ^ | ^ + * v (error) * | | + * REGISTER_INSTANCE * * * * * *|* *> UNREGISTER_INSTANCE + * | * | ^ + * v (error) * v | + * CREATE_INSTANCE_OBJECT * * * * * *> REMOVE_INSTANCE_OBJECT + * | * ^ + * v (error) * | + * REGISTER_WATCH * * * * * * * * * *> UNREGISTER_WATCH + * | * ^ + * v (error) * | + * ACQUIRE_LOCK * * * * * * * * * * * RELEASE_LOCK + * | ^ + * v (shut_down) | + * <watching> -------------------------------/ + * + * @endverbatim + */ + + struct C_NotifyInstanceRequest; + struct C_SyncRequest; + + typedef std::pair<std::string, std::string> Id; + + struct HandlePayloadVisitor : public boost::static_visitor<void> { + InstanceWatcher *instance_watcher; + std::string instance_id; + C_NotifyAck *on_notify_ack; + + HandlePayloadVisitor(InstanceWatcher *instance_watcher, + const std::string &instance_id, + C_NotifyAck *on_notify_ack) + : instance_watcher(instance_watcher), instance_id(instance_id), + on_notify_ack(on_notify_ack) { + } + + template <typename Payload> + inline void operator()(const Payload &payload) const { + instance_watcher->handle_payload(instance_id, payload, on_notify_ack); + } + }; + + struct Request { + std::string instance_id; + uint64_t request_id; + C_NotifyAck *on_notify_ack = nullptr; + + Request(const std::string &instance_id, uint64_t request_id) + : instance_id(instance_id), request_id(request_id) { + } + + inline bool operator<(const Request &rhs) const { + return instance_id < rhs.instance_id || + (instance_id == rhs.instance_id && request_id < rhs.request_id); + } + }; + + Threads<ImageCtxT> *m_threads; + InstanceReplayer<ImageCtxT> *m_instance_replayer; + std::string m_instance_id; + + mutable Mutex m_lock; + librbd::ManagedLock<ImageCtxT> *m_instance_lock; + Context *m_on_finish = nullptr; + int m_ret_val = 0; + std::string m_leader_instance_id; + librbd::managed_lock::Locker m_instance_locker; + std::set<std::pair<std::string, C_NotifyInstanceRequest *>> m_notify_ops; + AsyncOpTracker m_notify_op_tracker; + uint64_t m_request_seq = 0; + std::set<Request> m_requests; + std::set<C_NotifyInstanceRequest *> m_suspended_ops; + std::map<std::string, C_SyncRequest *> m_inflight_sync_reqs; + ImageSyncThrottler<ImageCtxT> *m_image_sync_throttler = nullptr; + + void register_instance(); + void handle_register_instance(int r); + + void create_instance_object(); + void handle_create_instance_object(int r); + + void register_watch(); + void handle_register_watch(int r); + + void acquire_lock(); + void handle_acquire_lock(int r); + + void release_lock(); + void handle_release_lock(int r); + + void unregister_watch(); + void handle_unregister_watch(int r); + + void remove_instance_object(); + void handle_remove_instance_object(int r); + + void unregister_instance(); + void handle_unregister_instance(int r); + + void wait_for_notify_ops(); + void handle_wait_for_notify_ops(int r); + + void get_instance_locker(); + void handle_get_instance_locker(int r); + + void break_instance_lock(); + void handle_break_instance_lock(int r); + + void suspend_notify_request(C_NotifyInstanceRequest *req); + bool unsuspend_notify_request(C_NotifyInstanceRequest *req); + void unsuspend_notify_requests(); + + void notify_sync_complete(const Mutex& lock, const std::string &sync_id); + void handle_notify_sync_request(C_SyncRequest *sync_ctx, int r); + void handle_notify_sync_complete(C_SyncRequest *sync_ctx, int r); + + void notify_sync_start(const std::string &instance_id, + const std::string &sync_id); + + Context *prepare_request(const std::string &instance_id, uint64_t request_id, + C_NotifyAck *on_notify_ack); + void complete_request(const std::string &instance_id, uint64_t request_id, + int r); + + void handle_notify(uint64_t notify_id, uint64_t handle, + uint64_t notifier_id, bufferlist &bl) override; + + void handle_image_acquire(const std::string &global_image_id, + Context *on_finish); + void handle_image_release(const std::string &global_image_id, + Context *on_finish); + void handle_peer_image_removed(const std::string &global_image_id, + const std::string &peer_mirror_uuid, + Context *on_finish); + + void handle_sync_request(const std::string &instance_id, + const std::string &sync_id, Context *on_finish); + void handle_sync_start(const std::string &instance_id, + const std::string &sync_id, Context *on_finish); + + void handle_payload(const std::string &instance_id, + const instance_watcher::ImageAcquirePayload &payload, + C_NotifyAck *on_notify_ack); + void handle_payload(const std::string &instance_id, + const instance_watcher::ImageReleasePayload &payload, + C_NotifyAck *on_notify_ack); + void handle_payload(const std::string &instance_id, + const instance_watcher::PeerImageRemovedPayload &payload, + C_NotifyAck *on_notify_ack); + void handle_payload(const std::string &instance_id, + const instance_watcher::SyncRequestPayload &payload, + C_NotifyAck *on_notify_ack); + void handle_payload(const std::string &instance_id, + const instance_watcher::SyncStartPayload &payload, + C_NotifyAck *on_notify_ack); + void handle_payload(const std::string &instance_id, + const instance_watcher::UnknownPayload &payload, + C_NotifyAck *on_notify_ack); +}; + +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_INSTANCE_WATCHER_H diff --git a/src/tools/rbd_mirror/Instances.cc b/src/tools/rbd_mirror/Instances.cc new file mode 100644 index 00000000..b7a6cf11 --- /dev/null +++ b/src/tools/rbd_mirror/Instances.cc @@ -0,0 +1,359 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "include/stringify.h" +#include "common/Timer.h" +#include "common/WorkQueue.h" +#include "common/debug.h" +#include "common/errno.h" +#include "librbd/Utils.h" +#include "InstanceWatcher.h" +#include "Instances.h" +#include "Threads.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::Instances: " \ + << this << " " << __func__ << ": " + +namespace rbd { +namespace mirror { + +using librbd::util::create_async_context_callback; +using librbd::util::create_context_callback; +using librbd::util::create_rados_callback; + +template <typename I> +Instances<I>::Instances(Threads<I> *threads, librados::IoCtx &ioctx, + const std::string& instance_id, + instances::Listener& listener) : + m_threads(threads), m_ioctx(ioctx), m_instance_id(instance_id), + m_listener(listener), m_cct(reinterpret_cast<CephContext *>(ioctx.cct())), + m_lock("rbd::mirror::Instances " + ioctx.get_pool_name()) { +} + +template <typename I> +Instances<I>::~Instances() { +} + +template <typename I> +void Instances<I>::init(Context *on_finish) { + dout(10) << dendl; + + Mutex::Locker locker(m_lock); + ceph_assert(m_on_finish == nullptr); + m_on_finish = on_finish; + get_instances(); +} + +template <typename I> +void Instances<I>::shut_down(Context *on_finish) { + dout(10) << dendl; + + Mutex::Locker locker(m_lock); + ceph_assert(m_on_finish == nullptr); + m_on_finish = on_finish; + + Context *ctx = new FunctionContext( + [this](int r) { + Mutex::Locker timer_locker(m_threads->timer_lock); + Mutex::Locker locker(m_lock); + cancel_remove_task(); + wait_for_ops(); + }); + + m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +void Instances<I>::unblock_listener() { + dout(5) << dendl; + + Mutex::Locker locker(m_lock); + ceph_assert(m_listener_blocked); + m_listener_blocked = false; + + InstanceIds added_instance_ids; + for (auto& pair : m_instances) { + if (pair.second.state == INSTANCE_STATE_ADDING) { + added_instance_ids.push_back(pair.first); + } + } + + if (!added_instance_ids.empty()) { + m_threads->work_queue->queue( + new C_NotifyInstancesAdded(this, added_instance_ids), 0); + } +} + +template <typename I> +void Instances<I>::acked(const InstanceIds& instance_ids) { + dout(10) << "instance_ids=" << instance_ids << dendl; + + Mutex::Locker locker(m_lock); + if (m_on_finish != nullptr) { + dout(5) << "received on shut down, ignoring" << dendl; + return; + } + + Context *ctx = new C_HandleAcked(this, instance_ids); + m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +void Instances<I>::handle_acked(const InstanceIds& instance_ids) { + dout(5) << "instance_ids=" << instance_ids << dendl; + + Mutex::Locker timer_locker(m_threads->timer_lock); + Mutex::Locker locker(m_lock); + if (m_on_finish != nullptr) { + dout(5) << "handled on shut down, ignoring" << dendl; + return; + } + + InstanceIds added_instance_ids; + auto time = ceph_clock_now(); + for (auto& instance_id : instance_ids) { + auto &instance = m_instances.insert( + std::make_pair(instance_id, Instance{})).first->second; + instance.acked_time = time; + if (instance.state == INSTANCE_STATE_ADDING) { + added_instance_ids.push_back(instance_id); + } + } + + schedule_remove_task(time); + if (!m_listener_blocked && !added_instance_ids.empty()) { + m_threads->work_queue->queue( + new C_NotifyInstancesAdded(this, added_instance_ids), 0); + } +} + +template <typename I> +void Instances<I>::notify_instances_added(const InstanceIds& instance_ids) { + Mutex::Locker locker(m_lock); + InstanceIds added_instance_ids; + for (auto& instance_id : instance_ids) { + auto it = m_instances.find(instance_id); + if (it != m_instances.end() && it->second.state == INSTANCE_STATE_ADDING) { + added_instance_ids.push_back(instance_id); + } + } + + if (added_instance_ids.empty()) { + return; + } + + dout(5) << "instance_ids=" << added_instance_ids << dendl; + m_lock.Unlock(); + m_listener.handle_added(added_instance_ids); + m_lock.Lock(); + + for (auto& instance_id : added_instance_ids) { + auto it = m_instances.find(instance_id); + if (it != m_instances.end() && it->second.state == INSTANCE_STATE_ADDING) { + it->second.state = INSTANCE_STATE_IDLE; + } + } +} + +template <typename I> +void Instances<I>::notify_instances_removed(const InstanceIds& instance_ids) { + dout(5) << "instance_ids=" << instance_ids << dendl; + m_listener.handle_removed(instance_ids); + + Mutex::Locker locker(m_lock); + for (auto& instance_id : instance_ids) { + m_instances.erase(instance_id); + } +} + +template <typename I> +void Instances<I>::list(std::vector<std::string> *instance_ids) { + dout(20) << dendl; + + Mutex::Locker locker(m_lock); + + for (auto it : m_instances) { + instance_ids->push_back(it.first); + } +} + + +template <typename I> +void Instances<I>::get_instances() { + dout(10) << dendl; + + ceph_assert(m_lock.is_locked()); + + Context *ctx = create_context_callback< + Instances, &Instances<I>::handle_get_instances>(this); + + InstanceWatcher<I>::get_instances(m_ioctx, &m_instance_ids, ctx); +} + +template <typename I> +void Instances<I>::handle_get_instances(int r) { + dout(10) << "r=" << r << dendl; + + Context *on_finish = nullptr; + { + Mutex::Locker locker(m_lock); + std::swap(on_finish, m_on_finish); + } + + if (r < 0) { + derr << "error retrieving instances: " << cpp_strerror(r) << dendl; + } else { + handle_acked(m_instance_ids); + } + on_finish->complete(r); +} + +template <typename I> +void Instances<I>::wait_for_ops() { + dout(10) << dendl; + + ceph_assert(m_lock.is_locked()); + + Context *ctx = create_async_context_callback( + m_threads->work_queue, create_context_callback< + Instances, &Instances<I>::handle_wait_for_ops>(this)); + + m_async_op_tracker.wait_for_ops(ctx); +} + +template <typename I> +void Instances<I>::handle_wait_for_ops(int r) { + dout(10) << "r=" << r << dendl; + + ceph_assert(r == 0); + + Context *on_finish = nullptr; + { + Mutex::Locker locker(m_lock); + std::swap(on_finish, m_on_finish); + } + on_finish->complete(r); +} + +template <typename I> +void Instances<I>::remove_instances(const utime_t& time) { + ceph_assert(m_lock.is_locked()); + + InstanceIds instance_ids; + for (auto& instance_pair : m_instances) { + if (instance_pair.first == m_instance_id) { + continue; + } + auto& instance = instance_pair.second; + if (instance.state != INSTANCE_STATE_REMOVING && + instance.acked_time <= time) { + instance.state = INSTANCE_STATE_REMOVING; + instance_ids.push_back(instance_pair.first); + } + } + ceph_assert(!instance_ids.empty()); + + dout(10) << "instance_ids=" << instance_ids << dendl; + Context* ctx = new FunctionContext([this, instance_ids](int r) { + handle_remove_instances(r, instance_ids); + }); + ctx = create_async_context_callback(m_threads->work_queue, ctx); + + auto gather_ctx = new C_Gather(m_cct, ctx); + for (auto& instance_id : instance_ids) { + InstanceWatcher<I>::remove_instance(m_ioctx, m_threads->work_queue, + instance_id, gather_ctx->new_sub()); + } + + m_async_op_tracker.start_op(); + gather_ctx->activate(); +} + +template <typename I> +void Instances<I>::handle_remove_instances( + int r, const InstanceIds& instance_ids) { + Mutex::Locker timer_locker(m_threads->timer_lock); + Mutex::Locker locker(m_lock); + + dout(10) << "r=" << r << ", instance_ids=" << instance_ids << dendl; + ceph_assert(r == 0); + + // fire removed notification now that instances have been blacklisted + m_threads->work_queue->queue( + new C_NotifyInstancesRemoved(this, instance_ids), 0); + + // reschedule the timer for the next batch + schedule_remove_task(ceph_clock_now()); + m_async_op_tracker.finish_op(); +} + +template <typename I> +void Instances<I>::cancel_remove_task() { + ceph_assert(m_threads->timer_lock.is_locked()); + ceph_assert(m_lock.is_locked()); + + if (m_timer_task == nullptr) { + return; + } + + dout(10) << dendl; + + bool canceled = m_threads->timer->cancel_event(m_timer_task); + ceph_assert(canceled); + m_timer_task = nullptr; +} + +template <typename I> +void Instances<I>::schedule_remove_task(const utime_t& time) { + cancel_remove_task(); + if (m_on_finish != nullptr) { + dout(10) << "received on shut down, ignoring" << dendl; + return; + } + + int after = m_cct->_conf.get_val<uint64_t>("rbd_mirror_leader_heartbeat_interval") * + (1 + m_cct->_conf.get_val<uint64_t>("rbd_mirror_leader_max_missed_heartbeats") + + m_cct->_conf.get_val<uint64_t>("rbd_mirror_leader_max_acquire_attempts_before_break")); + + bool schedule = false; + utime_t oldest_time = time; + for (auto& instance : m_instances) { + if (instance.first == m_instance_id) { + continue; + } + if (instance.second.state == INSTANCE_STATE_REMOVING) { + // removal is already in-flight + continue; + } + + oldest_time = std::min(oldest_time, instance.second.acked_time); + schedule = true; + } + + if (!schedule) { + return; + } + + dout(10) << dendl; + + // schedule a time to fire when the oldest instance should be removed + m_timer_task = new FunctionContext( + [this, oldest_time](int r) { + ceph_assert(m_threads->timer_lock.is_locked()); + Mutex::Locker locker(m_lock); + m_timer_task = nullptr; + + remove_instances(oldest_time); + }); + + oldest_time += after; + m_threads->timer->add_event_at(oldest_time, m_timer_task); +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::Instances<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/Instances.h b/src/tools/rbd_mirror/Instances.h new file mode 100644 index 00000000..dbfb16df --- /dev/null +++ b/src/tools/rbd_mirror/Instances.h @@ -0,0 +1,167 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_INSTANCES_H +#define CEPH_RBD_MIRROR_INSTANCES_H + +#include <map> +#include <vector> + +#include "include/buffer_fwd.h" +#include "include/rados/librados_fwd.hpp" +#include "common/AsyncOpTracker.h" +#include "common/Mutex.h" +#include "librbd/Watcher.h" +#include "tools/rbd_mirror/instances/Types.h" + +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { + +template <typename> struct Threads; + +template <typename ImageCtxT = librbd::ImageCtx> +class Instances { +public: + typedef std::vector<std::string> InstanceIds; + + static Instances *create(Threads<ImageCtxT> *threads, + librados::IoCtx &ioctx, + const std::string& instance_id, + instances::Listener& listener) { + return new Instances(threads, ioctx, instance_id, listener); + } + void destroy() { + delete this; + } + + Instances(Threads<ImageCtxT> *threads, librados::IoCtx &ioctx, + const std::string& instance_id, instances::Listener& listener); + virtual ~Instances(); + + void init(Context *on_finish); + void shut_down(Context *on_finish); + + void unblock_listener(); + + void acked(const InstanceIds& instance_ids); + + void list(std::vector<std::string> *instance_ids); + +private: + /** + * @verbatim + * + * <uninitialized> <---------------------\ + * | (init) ^ | + * v (error) * | + * GET_INSTANCES * * * * * WAIT_FOR_OPS + * | ^ + * v (shut_down) | + * <initialized> ------------------------/ + * . + * . (remove_instance) + * v + * REMOVE_INSTANCE + * + * @endverbatim + */ + + enum InstanceState { + INSTANCE_STATE_ADDING, + INSTANCE_STATE_IDLE, + INSTANCE_STATE_REMOVING + }; + + struct Instance { + utime_t acked_time{}; + InstanceState state = INSTANCE_STATE_ADDING; + }; + + struct C_NotifyBase : public Context { + Instances *instances; + InstanceIds instance_ids; + + C_NotifyBase(Instances *instances, const InstanceIds& instance_ids) + : instances(instances), instance_ids(instance_ids) { + instances->m_async_op_tracker.start_op(); + } + + void finish(int r) override { + execute(); + instances->m_async_op_tracker.finish_op(); + } + + virtual void execute() = 0; + }; + + struct C_HandleAcked : public C_NotifyBase { + C_HandleAcked(Instances *instances, const InstanceIds& instance_ids) + : C_NotifyBase(instances, instance_ids) { + } + + void execute() override { + this->instances->handle_acked(this->instance_ids); + } + }; + + struct C_NotifyInstancesAdded : public C_NotifyBase { + C_NotifyInstancesAdded(Instances *instances, + const InstanceIds& instance_ids) + : C_NotifyBase(instances, instance_ids) { + } + + void execute() override { + this->instances->notify_instances_added(this->instance_ids); + } + }; + + struct C_NotifyInstancesRemoved : public C_NotifyBase { + C_NotifyInstancesRemoved(Instances *instances, + const InstanceIds& instance_ids) + : C_NotifyBase(instances, instance_ids) { + } + + void execute() override { + this->instances->notify_instances_removed(this->instance_ids); + } + }; + + Threads<ImageCtxT> *m_threads; + librados::IoCtx &m_ioctx; + std::string m_instance_id; + instances::Listener& m_listener; + CephContext *m_cct; + + Mutex m_lock; + InstanceIds m_instance_ids; + std::map<std::string, Instance> m_instances; + Context *m_on_finish = nullptr; + AsyncOpTracker m_async_op_tracker; + + Context *m_timer_task = nullptr; + + bool m_listener_blocked = true; + + void handle_acked(const InstanceIds& instance_ids); + void notify_instances_added(const InstanceIds& instance_ids); + void notify_instances_removed(const InstanceIds& instance_ids); + + void get_instances(); + void handle_get_instances(int r); + + void wait_for_ops(); + void handle_wait_for_ops(int r); + + void remove_instances(const utime_t& time); + void handle_remove_instances(int r, const InstanceIds& instance_ids); + + void cancel_remove_task(); + void schedule_remove_task(const utime_t& time); +}; + +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_INSTANCES_H diff --git a/src/tools/rbd_mirror/LeaderWatcher.cc b/src/tools/rbd_mirror/LeaderWatcher.cc new file mode 100644 index 00000000..0d4bde6f --- /dev/null +++ b/src/tools/rbd_mirror/LeaderWatcher.cc @@ -0,0 +1,1145 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "LeaderWatcher.h" +#include "common/Timer.h" +#include "common/debug.h" +#include "common/errno.h" +#include "cls/rbd/cls_rbd_client.h" +#include "include/stringify.h" +#include "librbd/Utils.h" +#include "librbd/watcher/Types.h" +#include "Threads.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::LeaderWatcher: " \ + << this << " " << __func__ << ": " +namespace rbd { +namespace mirror { + +using namespace leader_watcher; + +using librbd::util::create_async_context_callback; +using librbd::util::create_context_callback; +using librbd::util::create_rados_callback; + +template <typename I> +LeaderWatcher<I>::LeaderWatcher(Threads<I> *threads, librados::IoCtx &io_ctx, + leader_watcher::Listener *listener) + : Watcher(io_ctx, threads->work_queue, RBD_MIRROR_LEADER), + m_threads(threads), m_listener(listener), m_instances_listener(this), + m_lock("rbd::mirror::LeaderWatcher " + io_ctx.get_pool_name()), + m_notifier_id(librados::Rados(io_ctx).get_instance_id()), + m_instance_id(stringify(m_notifier_id)), + m_leader_lock(new LeaderLock(m_ioctx, m_work_queue, m_oid, this, true, + m_cct->_conf.get_val<uint64_t>( + "rbd_blacklist_expire_seconds"))) { +} + +template <typename I> +LeaderWatcher<I>::~LeaderWatcher() { + ceph_assert(m_status_watcher == nullptr); + ceph_assert(m_instances == nullptr); + ceph_assert(m_timer_task == nullptr); + + delete m_leader_lock; +} + +template <typename I> +std::string LeaderWatcher<I>::get_instance_id() { + return m_instance_id; +} + +template <typename I> +int LeaderWatcher<I>::init() { + C_SaferCond init_ctx; + init(&init_ctx); + return init_ctx.wait(); +} + +template <typename I> +void LeaderWatcher<I>::init(Context *on_finish) { + dout(10) << "notifier_id=" << m_notifier_id << dendl; + + Mutex::Locker locker(m_lock); + + ceph_assert(m_on_finish == nullptr); + m_on_finish = on_finish; + + create_leader_object(); +} + +template <typename I> +void LeaderWatcher<I>::create_leader_object() { + dout(10) << dendl; + + ceph_assert(m_lock.is_locked()); + + librados::ObjectWriteOperation op; + op.create(false); + + librados::AioCompletion *aio_comp = create_rados_callback< + LeaderWatcher<I>, &LeaderWatcher<I>::handle_create_leader_object>(this); + int r = m_ioctx.aio_operate(m_oid, aio_comp, &op); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void LeaderWatcher<I>::handle_create_leader_object(int r) { + dout(10) << "r=" << r << dendl; + + Context *on_finish = nullptr; + { + Mutex::Locker locker(m_lock); + + if (r == 0) { + register_watch(); + return; + } + + derr << "error creating " << m_oid << " object: " << cpp_strerror(r) + << dendl; + + std::swap(on_finish, m_on_finish); + } + on_finish->complete(r); +} + +template <typename I> +void LeaderWatcher<I>::register_watch() { + dout(10) << dendl; + + ceph_assert(m_lock.is_locked()); + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback< + LeaderWatcher<I>, &LeaderWatcher<I>::handle_register_watch>(this)); + + librbd::Watcher::register_watch(ctx); +} + +template <typename I> +void LeaderWatcher<I>::handle_register_watch(int r) { + dout(10) << "r=" << r << dendl; + + Context *on_finish = nullptr; + if (r < 0) { + Mutex::Locker locker(m_lock); + derr << "error registering leader watcher for " << m_oid << " object: " + << cpp_strerror(r) << dendl; + ceph_assert(m_on_finish != nullptr); + std::swap(on_finish, m_on_finish); + } else { + Mutex::Locker locker(m_lock); + init_status_watcher(); + return; + } + + on_finish->complete(r); +} + +template <typename I> +void LeaderWatcher<I>::shut_down() { + C_SaferCond shut_down_ctx; + shut_down(&shut_down_ctx); + int r = shut_down_ctx.wait(); + ceph_assert(r == 0); +} + +template <typename I> +void LeaderWatcher<I>::shut_down(Context *on_finish) { + dout(10) << dendl; + + Mutex::Locker timer_locker(m_threads->timer_lock); + Mutex::Locker locker(m_lock); + + ceph_assert(m_on_shut_down_finish == nullptr); + m_on_shut_down_finish = on_finish; + cancel_timer_task(); + shut_down_leader_lock(); +} + +template <typename I> +void LeaderWatcher<I>::shut_down_leader_lock() { + dout(10) << dendl; + + ceph_assert(m_lock.is_locked()); + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback< + LeaderWatcher<I>, &LeaderWatcher<I>::handle_shut_down_leader_lock>(this)); + + m_leader_lock->shut_down(ctx); +} + +template <typename I> +void LeaderWatcher<I>::handle_shut_down_leader_lock(int r) { + dout(10) << "r=" << r << dendl; + + Mutex::Locker locker(m_lock); + + if (r < 0) { + derr << "error shutting down leader lock: " << cpp_strerror(r) << dendl; + } + + shut_down_status_watcher(); +} + +template <typename I> +void LeaderWatcher<I>::unregister_watch() { + dout(10) << dendl; + + ceph_assert(m_lock.is_locked()); + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback< + LeaderWatcher<I>, &LeaderWatcher<I>::handle_unregister_watch>(this)); + + librbd::Watcher::unregister_watch(ctx); +} + +template <typename I> +void LeaderWatcher<I>::handle_unregister_watch(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "error unregistering leader watcher for " << m_oid << " object: " + << cpp_strerror(r) << dendl; + } + wait_for_tasks(); +} + +template <typename I> +void LeaderWatcher<I>::wait_for_tasks() { + dout(10) << dendl; + + Mutex::Locker timer_locker(m_threads->timer_lock); + Mutex::Locker locker(m_lock); + schedule_timer_task("wait for tasks", 0, false, + &LeaderWatcher<I>::handle_wait_for_tasks, true); +} + +template <typename I> +void LeaderWatcher<I>::handle_wait_for_tasks() { + dout(10) << dendl; + + ceph_assert(m_threads->timer_lock.is_locked()); + ceph_assert(m_lock.is_locked()); + ceph_assert(m_on_shut_down_finish != nullptr); + + ceph_assert(!m_timer_op_tracker.empty()); + m_timer_op_tracker.finish_op(); + + auto ctx = new FunctionContext([this](int r) { + Context *on_finish; + { + // ensure lock isn't held when completing shut down + Mutex::Locker locker(m_lock); + ceph_assert(m_on_shut_down_finish != nullptr); + on_finish = m_on_shut_down_finish; + } + on_finish->complete(0); + }); + m_work_queue->queue(ctx, 0); +} + +template <typename I> +bool LeaderWatcher<I>::is_blacklisted() const { + std::lock_guard locker{m_lock}; + return m_blacklisted; +} + +template <typename I> +bool LeaderWatcher<I>::is_leader() const { + Mutex::Locker locker(m_lock); + + return is_leader(m_lock); +} + +template <typename I> +bool LeaderWatcher<I>::is_leader(Mutex &lock) const { + ceph_assert(m_lock.is_locked()); + + bool leader = m_leader_lock->is_leader(); + dout(10) << leader << dendl; + return leader; +} + +template <typename I> +bool LeaderWatcher<I>::is_releasing_leader() const { + Mutex::Locker locker(m_lock); + + return is_releasing_leader(m_lock); +} + +template <typename I> +bool LeaderWatcher<I>::is_releasing_leader(Mutex &lock) const { + ceph_assert(m_lock.is_locked()); + + bool releasing = m_leader_lock->is_releasing_leader(); + dout(10) << releasing << dendl; + return releasing; +} + +template <typename I> +bool LeaderWatcher<I>::get_leader_instance_id(std::string *instance_id) const { + dout(10) << dendl; + + Mutex::Locker locker(m_lock); + + if (is_leader(m_lock) || is_releasing_leader(m_lock)) { + *instance_id = m_instance_id; + return true; + } + + if (!m_locker.cookie.empty()) { + *instance_id = stringify(m_locker.entity.num()); + return true; + } + + return false; +} + +template <typename I> +void LeaderWatcher<I>::release_leader() { + dout(10) << dendl; + + Mutex::Locker locker(m_lock); + if (!is_leader(m_lock)) { + return; + } + + release_leader_lock(); +} + +template <typename I> +void LeaderWatcher<I>::list_instances(std::vector<std::string> *instance_ids) { + dout(10) << dendl; + + Mutex::Locker locker(m_lock); + + instance_ids->clear(); + if (m_instances != nullptr) { + m_instances->list(instance_ids); + } +} + +template <typename I> +void LeaderWatcher<I>::cancel_timer_task() { + ceph_assert(m_threads->timer_lock.is_locked()); + ceph_assert(m_lock.is_locked()); + + if (m_timer_task == nullptr) { + return; + } + + dout(10) << m_timer_task << dendl; + bool canceled = m_threads->timer->cancel_event(m_timer_task); + ceph_assert(canceled); + m_timer_task = nullptr; +} + +template <typename I> +void LeaderWatcher<I>::schedule_timer_task(const std::string &name, + int delay_factor, bool leader, + TimerCallback timer_callback, + bool shutting_down) { + ceph_assert(m_threads->timer_lock.is_locked()); + ceph_assert(m_lock.is_locked()); + + if (!shutting_down && m_on_shut_down_finish != nullptr) { + return; + } + + cancel_timer_task(); + + m_timer_task = new FunctionContext( + [this, leader, timer_callback](int r) { + ceph_assert(m_threads->timer_lock.is_locked()); + m_timer_task = nullptr; + + if (m_timer_op_tracker.empty()) { + Mutex::Locker locker(m_lock); + execute_timer_task(leader, timer_callback); + return; + } + + // old timer task is still running -- do not start next + // task until the previous task completes + if (m_timer_gate == nullptr) { + m_timer_gate = new C_TimerGate(this); + m_timer_op_tracker.wait_for_ops(m_timer_gate); + } + m_timer_gate->leader = leader; + m_timer_gate->timer_callback = timer_callback; + }); + + int after = delay_factor * m_cct->_conf.get_val<uint64_t>( + "rbd_mirror_leader_heartbeat_interval"); + + dout(10) << "scheduling " << name << " after " << after << " sec (task " + << m_timer_task << ")" << dendl; + m_threads->timer->add_event_after(after, m_timer_task); +} + +template <typename I> +void LeaderWatcher<I>::execute_timer_task(bool leader, + TimerCallback timer_callback) { + dout(10) << dendl; + + ceph_assert(m_threads->timer_lock.is_locked()); + ceph_assert(m_lock.is_locked()); + ceph_assert(m_timer_op_tracker.empty()); + + if (is_leader(m_lock) != leader) { + return; + } + + m_timer_op_tracker.start_op(); + (this->*timer_callback)(); +} + +template <typename I> +void LeaderWatcher<I>::handle_post_acquire_leader_lock(int r, + Context *on_finish) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + if (r == -EAGAIN) { + dout(10) << "already locked" << dendl; + } else { + derr << "error acquiring leader lock: " << cpp_strerror(r) << dendl; + } + on_finish->complete(r); + return; + } + + Mutex::Locker locker(m_lock); + ceph_assert(m_on_finish == nullptr); + m_on_finish = on_finish; + m_ret_val = 0; + + init_instances(); +} + +template <typename I> +void LeaderWatcher<I>::handle_pre_release_leader_lock(Context *on_finish) { + dout(10) << dendl; + + Mutex::Locker locker(m_lock); + ceph_assert(m_on_finish == nullptr); + m_on_finish = on_finish; + m_ret_val = 0; + + notify_listener(); +} + +template <typename I> +void LeaderWatcher<I>::handle_post_release_leader_lock(int r, + Context *on_finish) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + on_finish->complete(r); + return; + } + + Mutex::Locker locker(m_lock); + ceph_assert(m_on_finish == nullptr); + m_on_finish = on_finish; + + notify_lock_released(); +} + +template <typename I> +void LeaderWatcher<I>::break_leader_lock() { + dout(10) << dendl; + + ceph_assert(m_threads->timer_lock.is_locked()); + ceph_assert(m_lock.is_locked()); + ceph_assert(!m_timer_op_tracker.empty()); + + if (m_locker.cookie.empty()) { + get_locker(); + return; + } + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback< + LeaderWatcher<I>, &LeaderWatcher<I>::handle_break_leader_lock>(this)); + + m_leader_lock->break_lock(m_locker, true, ctx); +} + +template <typename I> +void LeaderWatcher<I>::handle_break_leader_lock(int r) { + dout(10) << "r=" << r << dendl; + + Mutex::Locker timer_locker(m_threads->timer_lock); + Mutex::Locker locker(m_lock); + ceph_assert(!m_timer_op_tracker.empty()); + + if (m_leader_lock->is_shutdown()) { + dout(10) << "canceling due to shutdown" << dendl; + m_timer_op_tracker.finish_op(); + return; + } + + if (r < 0 && r != -ENOENT) { + derr << "error breaking leader lock: " << cpp_strerror(r) << dendl; + schedule_acquire_leader_lock(1); + m_timer_op_tracker.finish_op(); + return; + } + + m_locker = {}; + m_acquire_attempts = 0; + acquire_leader_lock(); +} + +template <typename I> +void LeaderWatcher<I>::schedule_get_locker(bool reset_leader, + uint32_t delay_factor) { + dout(10) << dendl; + + ceph_assert(m_threads->timer_lock.is_locked()); + ceph_assert(m_lock.is_locked()); + + if (reset_leader) { + m_locker = {}; + m_acquire_attempts = 0; + } + + schedule_timer_task("get locker", delay_factor, false, + &LeaderWatcher<I>::get_locker, false); +} + +template <typename I> +void LeaderWatcher<I>::get_locker() { + dout(10) << dendl; + + ceph_assert(m_threads->timer_lock.is_locked()); + ceph_assert(m_lock.is_locked()); + ceph_assert(!m_timer_op_tracker.empty()); + + C_GetLocker *get_locker_ctx = new C_GetLocker(this); + Context *ctx = create_async_context_callback(m_work_queue, get_locker_ctx); + + m_leader_lock->get_locker(&get_locker_ctx->locker, ctx); +} + +template <typename I> +void LeaderWatcher<I>::handle_get_locker(int r, + librbd::managed_lock::Locker& locker) { + dout(10) << "r=" << r << dendl; + + Mutex::Locker timer_locker(m_threads->timer_lock); + Mutex::Locker mutex_locker(m_lock); + ceph_assert(!m_timer_op_tracker.empty()); + + if (m_leader_lock->is_shutdown()) { + dout(10) << "canceling due to shutdown" << dendl; + m_timer_op_tracker.finish_op(); + return; + } + + if (is_leader(m_lock)) { + m_locker = {}; + m_timer_op_tracker.finish_op(); + return; + } + + if (r == -ENOENT) { + m_locker = {}; + m_acquire_attempts = 0; + acquire_leader_lock(); + return; + } else if (r < 0) { + derr << "error retrieving leader locker: " << cpp_strerror(r) << dendl; + schedule_get_locker(true, 1); + m_timer_op_tracker.finish_op(); + return; + } + + bool notify_listener = false; + if (m_locker != locker) { + m_locker = locker; + notify_listener = true; + if (m_acquire_attempts > 1) { + dout(10) << "new lock owner detected -- resetting heartbeat counter" + << dendl; + m_acquire_attempts = 0; + } + } + + if (m_acquire_attempts >= m_cct->_conf.get_val<uint64_t>( + "rbd_mirror_leader_max_acquire_attempts_before_break")) { + dout(0) << "breaking leader lock after " << m_acquire_attempts << " " + << "failed attempts to acquire" << dendl; + break_leader_lock(); + return; + } + + schedule_acquire_leader_lock(1); + + if (!notify_listener) { + m_timer_op_tracker.finish_op(); + return; + } + + auto ctx = new FunctionContext( + [this](int r) { + std::string instance_id; + if (get_leader_instance_id(&instance_id)) { + m_listener->update_leader_handler(instance_id); + } + Mutex::Locker timer_locker(m_threads->timer_lock); + Mutex::Locker locker(m_lock); + m_timer_op_tracker.finish_op(); + }); + m_work_queue->queue(ctx, 0); +} + +template <typename I> +void LeaderWatcher<I>::schedule_acquire_leader_lock(uint32_t delay_factor) { + dout(10) << dendl; + + ceph_assert(m_threads->timer_lock.is_locked()); + ceph_assert(m_lock.is_locked()); + + schedule_timer_task("acquire leader lock", + delay_factor * + m_cct->_conf.get_val<uint64_t>("rbd_mirror_leader_max_missed_heartbeats"), + false, &LeaderWatcher<I>::acquire_leader_lock, false); +} + +template <typename I> +void LeaderWatcher<I>::acquire_leader_lock() { + ceph_assert(m_threads->timer_lock.is_locked()); + ceph_assert(m_lock.is_locked()); + ceph_assert(!m_timer_op_tracker.empty()); + + ++m_acquire_attempts; + dout(10) << "acquire_attempts=" << m_acquire_attempts << dendl; + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback< + LeaderWatcher<I>, &LeaderWatcher<I>::handle_acquire_leader_lock>(this)); + m_leader_lock->try_acquire_lock(ctx); +} + +template <typename I> +void LeaderWatcher<I>::handle_acquire_leader_lock(int r) { + dout(10) << "r=" << r << dendl; + + Mutex::Locker timer_locker(m_threads->timer_lock); + Mutex::Locker locker(m_lock); + ceph_assert(!m_timer_op_tracker.empty()); + + if (m_leader_lock->is_shutdown()) { + dout(10) << "canceling due to shutdown" << dendl; + m_timer_op_tracker.finish_op(); + return; + } + + if (r < 0) { + if (r == -EAGAIN) { + dout(10) << "already locked" << dendl; + } else { + derr << "error acquiring lock: " << cpp_strerror(r) << dendl; + } + + get_locker(); + return; + } + + m_locker = {}; + m_acquire_attempts = 0; + + if (m_ret_val) { + dout(5) << "releasing due to error on notify" << dendl; + release_leader_lock(); + m_timer_op_tracker.finish_op(); + return; + } + + notify_heartbeat(); +} + +template <typename I> +void LeaderWatcher<I>::release_leader_lock() { + dout(10) << dendl; + + ceph_assert(m_lock.is_locked()); + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback< + LeaderWatcher<I>, &LeaderWatcher<I>::handle_release_leader_lock>(this)); + + m_leader_lock->release_lock(ctx); +} + +template <typename I> +void LeaderWatcher<I>::handle_release_leader_lock(int r) { + dout(10) << "r=" << r << dendl; + + Mutex::Locker timer_locker(m_threads->timer_lock); + Mutex::Locker locker(m_lock); + + if (r < 0) { + derr << "error releasing lock: " << cpp_strerror(r) << dendl; + return; + } + + schedule_acquire_leader_lock(1); +} + +template <typename I> +void LeaderWatcher<I>::init_status_watcher() { + dout(10) << dendl; + + ceph_assert(m_lock.is_locked()); + ceph_assert(m_status_watcher == nullptr); + + m_status_watcher = MirrorStatusWatcher<I>::create(m_ioctx, m_work_queue); + + Context *ctx = create_context_callback< + LeaderWatcher<I>, &LeaderWatcher<I>::handle_init_status_watcher>(this); + + m_status_watcher->init(ctx); +} + +template <typename I> +void LeaderWatcher<I>::handle_init_status_watcher(int r) { + dout(10) << "r=" << r << dendl; + + Context *on_finish = nullptr; + { + Mutex::Locker timer_locker(m_threads->timer_lock); + Mutex::Locker locker(m_lock); + + if (r < 0) { + derr << "error initializing mirror status watcher: " << cpp_strerror(r) + << cpp_strerror(r) << dendl; + } else { + schedule_acquire_leader_lock(0); + } + + ceph_assert(m_on_finish != nullptr); + std::swap(on_finish, m_on_finish); + } + + on_finish->complete(r); +} + +template <typename I> +void LeaderWatcher<I>::shut_down_status_watcher() { + dout(10) << dendl; + + ceph_assert(m_lock.is_locked()); + ceph_assert(m_status_watcher != nullptr); + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback<LeaderWatcher<I>, + &LeaderWatcher<I>::handle_shut_down_status_watcher>(this)); + + m_status_watcher->shut_down(ctx); +} + +template <typename I> +void LeaderWatcher<I>::handle_shut_down_status_watcher(int r) { + dout(10) << "r=" << r << dendl; + + Mutex::Locker locker(m_lock); + m_status_watcher->destroy(); + m_status_watcher = nullptr; + + if (r < 0) { + derr << "error shutting mirror status watcher down: " << cpp_strerror(r) + << dendl; + } + + unregister_watch(); +} + +template <typename I> +void LeaderWatcher<I>::init_instances() { + dout(10) << dendl; + + ceph_assert(m_lock.is_locked()); + ceph_assert(m_instances == nullptr); + + m_instances = Instances<I>::create(m_threads, m_ioctx, m_instance_id, + m_instances_listener); + + Context *ctx = create_context_callback< + LeaderWatcher<I>, &LeaderWatcher<I>::handle_init_instances>(this); + + m_instances->init(ctx); +} + +template <typename I> +void LeaderWatcher<I>::handle_init_instances(int r) { + dout(10) << "r=" << r << dendl; + + Context *on_finish = nullptr; + if (r < 0) { + Mutex::Locker locker(m_lock); + derr << "error initializing instances: " << cpp_strerror(r) << dendl; + m_instances->destroy(); + m_instances = nullptr; + + ceph_assert(m_on_finish != nullptr); + std::swap(m_on_finish, on_finish); + } else { + Mutex::Locker locker(m_lock); + notify_listener(); + return; + } + + on_finish->complete(r); +} + +template <typename I> +void LeaderWatcher<I>::shut_down_instances() { + dout(10) << dendl; + + ceph_assert(m_lock.is_locked()); + ceph_assert(m_instances != nullptr); + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback<LeaderWatcher<I>, + &LeaderWatcher<I>::handle_shut_down_instances>(this)); + + m_instances->shut_down(ctx); +} + +template <typename I> +void LeaderWatcher<I>::handle_shut_down_instances(int r) { + dout(10) << "r=" << r << dendl; + ceph_assert(r == 0); + + Context *on_finish = nullptr; + { + Mutex::Locker locker(m_lock); + + m_instances->destroy(); + m_instances = nullptr; + + ceph_assert(m_on_finish != nullptr); + std::swap(m_on_finish, on_finish); + } + on_finish->complete(r); +} + +template <typename I> +void LeaderWatcher<I>::notify_listener() { + dout(10) << dendl; + + ceph_assert(m_lock.is_locked()); + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback< + LeaderWatcher<I>, &LeaderWatcher<I>::handle_notify_listener>(this)); + + if (is_leader(m_lock)) { + ctx = new FunctionContext( + [this, ctx](int r) { + m_listener->post_acquire_handler(ctx); + }); + } else { + ctx = new FunctionContext( + [this, ctx](int r) { + m_listener->pre_release_handler(ctx); + }); + } + m_work_queue->queue(ctx, 0); +} + +template <typename I> +void LeaderWatcher<I>::handle_notify_listener(int r) { + dout(10) << "r=" << r << dendl; + + Mutex::Locker locker(m_lock); + + if (r < 0) { + derr << "error notifying listener: " << cpp_strerror(r) << dendl; + m_ret_val = r; + } + + if (is_leader(m_lock)) { + notify_lock_acquired(); + } else { + shut_down_instances(); + } +} + +template <typename I> +void LeaderWatcher<I>::notify_lock_acquired() { + dout(10) << dendl; + + ceph_assert(m_lock.is_locked()); + + Context *ctx = create_context_callback< + LeaderWatcher<I>, &LeaderWatcher<I>::handle_notify_lock_acquired>(this); + + bufferlist bl; + encode(NotifyMessage{LockAcquiredPayload{}}, bl); + + send_notify(bl, nullptr, ctx); +} + +template <typename I> +void LeaderWatcher<I>::handle_notify_lock_acquired(int r) { + dout(10) << "r=" << r << dendl; + + Context *on_finish = nullptr; + { + Mutex::Locker locker(m_lock); + if (r < 0 && r != -ETIMEDOUT) { + derr << "error notifying leader lock acquired: " << cpp_strerror(r) + << dendl; + m_ret_val = r; + } + + ceph_assert(m_on_finish != nullptr); + std::swap(m_on_finish, on_finish); + + if (m_ret_val == 0) { + // listener should be ready for instance add/remove events now + m_instances->unblock_listener(); + } + } + on_finish->complete(0); +} + +template <typename I> +void LeaderWatcher<I>::notify_lock_released() { + dout(10) << dendl; + + ceph_assert(m_lock.is_locked()); + + Context *ctx = create_context_callback< + LeaderWatcher<I>, &LeaderWatcher<I>::handle_notify_lock_released>(this); + + bufferlist bl; + encode(NotifyMessage{LockReleasedPayload{}}, bl); + + send_notify(bl, nullptr, ctx); +} + +template <typename I> +void LeaderWatcher<I>::handle_notify_lock_released(int r) { + dout(10) << "r=" << r << dendl; + + Context *on_finish = nullptr; + { + Mutex::Locker locker(m_lock); + if (r < 0 && r != -ETIMEDOUT) { + derr << "error notifying leader lock released: " << cpp_strerror(r) + << dendl; + } + + ceph_assert(m_on_finish != nullptr); + std::swap(m_on_finish, on_finish); + } + on_finish->complete(r); +} + +template <typename I> +void LeaderWatcher<I>::notify_heartbeat() { + dout(10) << dendl; + + ceph_assert(m_threads->timer_lock.is_locked()); + ceph_assert(m_lock.is_locked()); + ceph_assert(!m_timer_op_tracker.empty()); + + if (!is_leader(m_lock)) { + dout(5) << "not leader, canceling" << dendl; + m_timer_op_tracker.finish_op(); + return; + } + + Context *ctx = create_context_callback< + LeaderWatcher<I>, &LeaderWatcher<I>::handle_notify_heartbeat>(this); + + bufferlist bl; + encode(NotifyMessage{HeartbeatPayload{}}, bl); + + m_heartbeat_response.acks.clear(); + send_notify(bl, &m_heartbeat_response, ctx); +} + +template <typename I> +void LeaderWatcher<I>::handle_notify_heartbeat(int r) { + dout(10) << "r=" << r << dendl; + + Mutex::Locker timer_locker(m_threads->timer_lock); + Mutex::Locker locker(m_lock); + ceph_assert(!m_timer_op_tracker.empty()); + + m_timer_op_tracker.finish_op(); + if (m_leader_lock->is_shutdown()) { + dout(10) << "canceling due to shutdown" << dendl; + return; + } else if (!is_leader(m_lock)) { + return; + } + + if (r < 0 && r != -ETIMEDOUT) { + derr << "error notifying heartbeat: " << cpp_strerror(r) + << ", releasing leader" << dendl; + release_leader_lock(); + return; + } + + dout(10) << m_heartbeat_response.acks.size() << " acks received, " + << m_heartbeat_response.timeouts.size() << " timed out" << dendl; + + std::vector<std::string> instance_ids; + for (auto &it: m_heartbeat_response.acks) { + uint64_t notifier_id = it.first.gid; + instance_ids.push_back(stringify(notifier_id)); + } + if (!instance_ids.empty()) { + m_instances->acked(instance_ids); + } + + schedule_timer_task("heartbeat", 1, true, + &LeaderWatcher<I>::notify_heartbeat, false); +} + +template <typename I> +void LeaderWatcher<I>::handle_heartbeat(Context *on_notify_ack) { + dout(10) << dendl; + + { + Mutex::Locker timer_locker(m_threads->timer_lock); + Mutex::Locker locker(m_lock); + if (is_leader(m_lock)) { + dout(5) << "got another leader heartbeat, ignoring" << dendl; + } else { + cancel_timer_task(); + m_acquire_attempts = 0; + schedule_acquire_leader_lock(1); + } + } + + on_notify_ack->complete(0); +} + +template <typename I> +void LeaderWatcher<I>::handle_lock_acquired(Context *on_notify_ack) { + dout(10) << dendl; + + { + Mutex::Locker timer_locker(m_threads->timer_lock); + Mutex::Locker locker(m_lock); + if (is_leader(m_lock)) { + dout(5) << "got another leader lock_acquired, ignoring" << dendl; + } else { + cancel_timer_task(); + schedule_get_locker(true, 0); + } + } + + on_notify_ack->complete(0); +} + +template <typename I> +void LeaderWatcher<I>::handle_lock_released(Context *on_notify_ack) { + dout(10) << dendl; + + { + Mutex::Locker timer_locker(m_threads->timer_lock); + Mutex::Locker locker(m_lock); + if (is_leader(m_lock)) { + dout(5) << "got another leader lock_released, ignoring" << dendl; + } else { + cancel_timer_task(); + schedule_get_locker(true, 0); + } + } + + on_notify_ack->complete(0); +} + +template <typename I> +void LeaderWatcher<I>::handle_notify(uint64_t notify_id, uint64_t handle, + uint64_t notifier_id, bufferlist &bl) { + dout(10) << "notify_id=" << notify_id << ", handle=" << handle << ", " + << "notifier_id=" << notifier_id << dendl; + + Context *ctx = new C_NotifyAck(this, notify_id, handle); + + if (notifier_id == m_notifier_id) { + dout(10) << "our own notification, ignoring" << dendl; + ctx->complete(0); + return; + } + + NotifyMessage notify_message; + try { + auto iter = bl.cbegin(); + decode(notify_message, iter); + } catch (const buffer::error &err) { + derr << "error decoding image notification: " << err.what() << dendl; + ctx->complete(0); + return; + } + + apply_visitor(HandlePayloadVisitor(this, ctx), notify_message.payload); +} + +template <typename I> +void LeaderWatcher<I>::handle_rewatch_complete(int r) { + dout(5) << "r=" << r << dendl; + + if (r == -EBLACKLISTED) { + dout(1) << "blacklisted detected" << dendl; + m_blacklisted = true; + return; + } + + m_leader_lock->reacquire_lock(nullptr); +} + +template <typename I> +void LeaderWatcher<I>::handle_payload(const HeartbeatPayload &payload, + Context *on_notify_ack) { + dout(10) << "heartbeat" << dendl; + + handle_heartbeat(on_notify_ack); +} + +template <typename I> +void LeaderWatcher<I>::handle_payload(const LockAcquiredPayload &payload, + Context *on_notify_ack) { + dout(10) << "lock_acquired" << dendl; + + handle_lock_acquired(on_notify_ack); +} + +template <typename I> +void LeaderWatcher<I>::handle_payload(const LockReleasedPayload &payload, + Context *on_notify_ack) { + dout(10) << "lock_released" << dendl; + + handle_lock_released(on_notify_ack); +} + +template <typename I> +void LeaderWatcher<I>::handle_payload(const UnknownPayload &payload, + Context *on_notify_ack) { + dout(10) << "unknown" << dendl; + + on_notify_ack->complete(0); +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::LeaderWatcher<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/LeaderWatcher.h b/src/tools/rbd_mirror/LeaderWatcher.h new file mode 100644 index 00000000..01ee0565 --- /dev/null +++ b/src/tools/rbd_mirror/LeaderWatcher.h @@ -0,0 +1,320 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_LEADER_WATCHER_H +#define CEPH_RBD_MIRROR_LEADER_WATCHER_H + +#include <list> +#include <memory> +#include <string> + +#include "common/AsyncOpTracker.h" +#include "librbd/ManagedLock.h" +#include "librbd/Watcher.h" +#include "librbd/managed_lock/Types.h" +#include "librbd/watcher/Types.h" +#include "Instances.h" +#include "MirrorStatusWatcher.h" +#include "tools/rbd_mirror/instances/Types.h" +#include "tools/rbd_mirror/leader_watcher/Types.h" + +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { + +template <typename> struct Threads; + +template <typename ImageCtxT = librbd::ImageCtx> +class LeaderWatcher : protected librbd::Watcher { + using librbd::Watcher::unregister_watch; // Silence overloaded virtual warning +public: + static LeaderWatcher* create(Threads<ImageCtxT> *threads, + librados::IoCtx &io_ctx, + leader_watcher::Listener *listener) { + return new LeaderWatcher(threads, io_ctx, listener); + } + + LeaderWatcher(Threads<ImageCtxT> *threads, librados::IoCtx &io_ctx, + leader_watcher::Listener *listener); + ~LeaderWatcher() override; + + int init(); + void shut_down(); + + void init(Context *on_finish); + void shut_down(Context *on_finish); + + bool is_blacklisted() const; + bool is_leader() const; + bool is_releasing_leader() const; + bool get_leader_instance_id(std::string *instance_id) const; + void release_leader(); + void list_instances(std::vector<std::string> *instance_ids); + + std::string get_instance_id(); + +private: + /** + * @verbatim + * + * <uninitialized> <------------------------------ WAIT_FOR_TASKS + * | (init) ^ ^ + * v * | + * CREATE_OBJECT * * * * * (error) UNREGISTER_WATCH + * | * ^ + * v * | + * REGISTER_WATCH * * * * * SHUT_DOWN_STATUS_WATCHER + * | * ^ + * v * | + * INIT_STATUS_WATCHER * * SHUT_DOWN_LEADER_LOCK + * | | + * | (no leader heartbeat and acquire failed) | + * | BREAK_LOCK <-------------------------------------\ | + * | | (no leader heartbeat) | | (shut down) + * | | /----------------------------------------\ | | + * | | | (lock_released received) | | + * | | | /-------------------------------------\ | | + * | | | | (lock_acquired or | | | + * | | | | heartbeat received) | | | + * | | | | (ENOENT) /-----------\ | | | + * | | | | * * * * * * * * * * | | | | | + * v v v v v (error) * v | | | | + * ACQUIRE_LEADER_LOCK * * * * *> GET_LOCKER ---> <secondary> + * | * ^ + * ....|...................*.................... .....|..................... + * . v * . . | post_release . + * .INIT_INSTANCES * * * * * . .NOTIFY_LOCK_RELEASED . + * . | . .....^..................... + * . v . | + * .NOTIFY_LISTENER . RELEASE_LEADER_LOCK + * . | . ^ + * . v . .....|..................... + * .NOTIFY_LOCK_ACQUIRED . . | . + * . | post_acquire . .SHUT_DOWN_INSTANCES . + * ....|........................................ . ^ . + * v . | . + * <leader> -----------------------------------> .NOTIFY_LISTENER . + * (shut_down, release_leader, . pre_release . + * notify error) ........................... + * @endverbatim + */ + + struct InstancesListener : public instances::Listener { + LeaderWatcher* leader_watcher; + + InstancesListener(LeaderWatcher* leader_watcher) + : leader_watcher(leader_watcher) { + } + + void handle_added(const InstanceIds& instance_ids) override { + leader_watcher->m_listener->handle_instances_added(instance_ids); + } + + void handle_removed(const InstanceIds& instance_ids) override { + leader_watcher->m_listener->handle_instances_removed(instance_ids); + } + }; + + class LeaderLock : public librbd::ManagedLock<ImageCtxT> { + public: + typedef librbd::ManagedLock<ImageCtxT> Parent; + + LeaderLock(librados::IoCtx& ioctx, ContextWQ *work_queue, + const std::string& oid, LeaderWatcher *watcher, + bool blacklist_on_break_lock, + uint32_t blacklist_expire_seconds) + : Parent(ioctx, work_queue, oid, watcher, librbd::managed_lock::EXCLUSIVE, + blacklist_on_break_lock, blacklist_expire_seconds), + watcher(watcher) { + } + + bool is_leader() const { + Mutex::Locker locker(Parent::m_lock); + return Parent::is_state_post_acquiring() || Parent::is_state_locked(); + } + + bool is_releasing_leader() const { + Mutex::Locker locker(Parent::m_lock); + return Parent::is_state_pre_releasing(); + } + + protected: + void post_acquire_lock_handler(int r, Context *on_finish) { + if (r == 0) { + // lock is owned at this point + Mutex::Locker locker(Parent::m_lock); + Parent::set_state_post_acquiring(); + } + watcher->handle_post_acquire_leader_lock(r, on_finish); + } + void pre_release_lock_handler(bool shutting_down, + Context *on_finish) { + watcher->handle_pre_release_leader_lock(on_finish); + } + void post_release_lock_handler(bool shutting_down, int r, + Context *on_finish) { + watcher->handle_post_release_leader_lock(r, on_finish); + } + private: + LeaderWatcher *watcher; + }; + + struct HandlePayloadVisitor : public boost::static_visitor<void> { + LeaderWatcher *leader_watcher; + Context *on_notify_ack; + + HandlePayloadVisitor(LeaderWatcher *leader_watcher, Context *on_notify_ack) + : leader_watcher(leader_watcher), on_notify_ack(on_notify_ack) { + } + + template <typename Payload> + inline void operator()(const Payload &payload) const { + leader_watcher->handle_payload(payload, on_notify_ack); + } + }; + + struct C_GetLocker : public Context { + LeaderWatcher *leader_watcher; + librbd::managed_lock::Locker locker; + + C_GetLocker(LeaderWatcher *leader_watcher) + : leader_watcher(leader_watcher) { + } + + void finish(int r) override { + leader_watcher->handle_get_locker(r, locker); + } + }; + + typedef void (LeaderWatcher<ImageCtxT>::*TimerCallback)(); + + struct C_TimerGate : public Context { + LeaderWatcher *leader_watcher; + + bool leader = false; + TimerCallback timer_callback = nullptr; + + C_TimerGate(LeaderWatcher *leader_watcher) + : leader_watcher(leader_watcher) { + } + + void finish(int r) override { + leader_watcher->m_timer_gate = nullptr; + leader_watcher->execute_timer_task(leader, timer_callback); + } + }; + + Threads<ImageCtxT> *m_threads; + leader_watcher::Listener *m_listener; + + InstancesListener m_instances_listener; + mutable Mutex m_lock; + uint64_t m_notifier_id; + std::string m_instance_id; + LeaderLock *m_leader_lock; + Context *m_on_finish = nullptr; + Context *m_on_shut_down_finish = nullptr; + uint64_t m_acquire_attempts = 0; + int m_ret_val = 0; + MirrorStatusWatcher<ImageCtxT> *m_status_watcher = nullptr; + Instances<ImageCtxT> *m_instances = nullptr; + librbd::managed_lock::Locker m_locker; + + bool m_blacklisted = false; + + AsyncOpTracker m_timer_op_tracker; + Context *m_timer_task = nullptr; + C_TimerGate *m_timer_gate = nullptr; + + librbd::watcher::NotifyResponse m_heartbeat_response; + + bool is_leader(Mutex &m_lock) const; + bool is_releasing_leader(Mutex &m_lock) const; + + void cancel_timer_task(); + void schedule_timer_task(const std::string &name, + int delay_factor, bool leader, + TimerCallback callback, bool shutting_down); + void execute_timer_task(bool leader, TimerCallback timer_callback); + + void create_leader_object(); + void handle_create_leader_object(int r); + + void register_watch(); + void handle_register_watch(int r); + + void shut_down_leader_lock(); + void handle_shut_down_leader_lock(int r); + + void unregister_watch(); + void handle_unregister_watch(int r); + + void wait_for_tasks(); + void handle_wait_for_tasks(); + + void break_leader_lock(); + void handle_break_leader_lock(int r); + + void schedule_get_locker(bool reset_leader, uint32_t delay_factor); + void get_locker(); + void handle_get_locker(int r, librbd::managed_lock::Locker& locker); + + void schedule_acquire_leader_lock(uint32_t delay_factor); + void acquire_leader_lock(); + void handle_acquire_leader_lock(int r); + + void release_leader_lock(); + void handle_release_leader_lock(int r); + + void init_status_watcher(); + void handle_init_status_watcher(int r); + + void shut_down_status_watcher(); + void handle_shut_down_status_watcher(int r); + + void init_instances(); + void handle_init_instances(int r); + + void shut_down_instances(); + void handle_shut_down_instances(int r); + + void notify_listener(); + void handle_notify_listener(int r); + + void notify_lock_acquired(); + void handle_notify_lock_acquired(int r); + + void notify_lock_released(); + void handle_notify_lock_released(int r); + + void notify_heartbeat(); + void handle_notify_heartbeat(int r); + + void handle_post_acquire_leader_lock(int r, Context *on_finish); + void handle_pre_release_leader_lock(Context *on_finish); + void handle_post_release_leader_lock(int r, Context *on_finish); + + void handle_notify(uint64_t notify_id, uint64_t handle, + uint64_t notifier_id, bufferlist &bl) override; + + void handle_rewatch_complete(int r) override; + + void handle_heartbeat(Context *on_ack); + void handle_lock_acquired(Context *on_ack); + void handle_lock_released(Context *on_ack); + + void handle_payload(const leader_watcher::HeartbeatPayload &payload, + Context *on_notify_ack); + void handle_payload(const leader_watcher::LockAcquiredPayload &payload, + Context *on_notify_ack); + void handle_payload(const leader_watcher::LockReleasedPayload &payload, + Context *on_notify_ack); + void handle_payload(const leader_watcher::UnknownPayload &payload, + Context *on_notify_ack); +}; + +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_LEADER_WATCHER_H diff --git a/src/tools/rbd_mirror/Mirror.cc b/src/tools/rbd_mirror/Mirror.cc new file mode 100644 index 00000000..ef18a0b6 --- /dev/null +++ b/src/tools/rbd_mirror/Mirror.cc @@ -0,0 +1,448 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include <signal.h> + +#include <boost/range/adaptor/map.hpp> + +#include "common/Formatter.h" +#include "common/admin_socket.h" +#include "common/debug.h" +#include "common/errno.h" +#include "librbd/ImageCtx.h" +#include "Mirror.h" +#include "ServiceDaemon.h" +#include "Threads.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::Mirror: " << this << " " \ + << __func__ << ": " + +using std::list; +using std::map; +using std::set; +using std::string; +using std::unique_ptr; +using std::vector; + +using librados::Rados; +using librados::IoCtx; +using librbd::mirror_peer_t; + +namespace rbd { +namespace mirror { + +namespace { + +class MirrorAdminSocketCommand { +public: + virtual ~MirrorAdminSocketCommand() {} + virtual bool call(Formatter *f, stringstream *ss) = 0; +}; + +class StatusCommand : public MirrorAdminSocketCommand { +public: + explicit StatusCommand(Mirror *mirror) : mirror(mirror) {} + + bool call(Formatter *f, stringstream *ss) override { + mirror->print_status(f, ss); + return true; + } + +private: + Mirror *mirror; +}; + +class StartCommand : public MirrorAdminSocketCommand { +public: + explicit StartCommand(Mirror *mirror) : mirror(mirror) {} + + bool call(Formatter *f, stringstream *ss) override { + mirror->start(); + return true; + } + +private: + Mirror *mirror; +}; + +class StopCommand : public MirrorAdminSocketCommand { +public: + explicit StopCommand(Mirror *mirror) : mirror(mirror) {} + + bool call(Formatter *f, stringstream *ss) override { + mirror->stop(); + return true; + } + +private: + Mirror *mirror; +}; + +class RestartCommand : public MirrorAdminSocketCommand { +public: + explicit RestartCommand(Mirror *mirror) : mirror(mirror) {} + + bool call(Formatter *f, stringstream *ss) override { + mirror->restart(); + return true; + } + +private: + Mirror *mirror; +}; + +class FlushCommand : public MirrorAdminSocketCommand { +public: + explicit FlushCommand(Mirror *mirror) : mirror(mirror) {} + + bool call(Formatter *f, stringstream *ss) override { + mirror->flush(); + return true; + } + +private: + Mirror *mirror; +}; + +class LeaderReleaseCommand : public MirrorAdminSocketCommand { +public: + explicit LeaderReleaseCommand(Mirror *mirror) : mirror(mirror) {} + + bool call(Formatter *f, stringstream *ss) override { + mirror->release_leader(); + return true; + } + +private: + Mirror *mirror; +}; + +} // anonymous namespace + +class MirrorAdminSocketHook : public AdminSocketHook { +public: + MirrorAdminSocketHook(CephContext *cct, Mirror *mirror) : + admin_socket(cct->get_admin_socket()) { + std::string command; + int r; + + command = "rbd mirror status"; + r = admin_socket->register_command(command, command, this, + "get status for rbd mirror"); + if (r == 0) { + commands[command] = new StatusCommand(mirror); + } + + command = "rbd mirror start"; + r = admin_socket->register_command(command, command, this, + "start rbd mirror"); + if (r == 0) { + commands[command] = new StartCommand(mirror); + } + + command = "rbd mirror stop"; + r = admin_socket->register_command(command, command, this, + "stop rbd mirror"); + if (r == 0) { + commands[command] = new StopCommand(mirror); + } + + command = "rbd mirror restart"; + r = admin_socket->register_command(command, command, this, + "restart rbd mirror"); + if (r == 0) { + commands[command] = new RestartCommand(mirror); + } + + command = "rbd mirror flush"; + r = admin_socket->register_command(command, command, this, + "flush rbd mirror"); + if (r == 0) { + commands[command] = new FlushCommand(mirror); + } + + command = "rbd mirror leader release"; + r = admin_socket->register_command(command, command, this, + "release rbd mirror leader"); + if (r == 0) { + commands[command] = new LeaderReleaseCommand(mirror); + } + } + + ~MirrorAdminSocketHook() override { + for (Commands::const_iterator i = commands.begin(); i != commands.end(); + ++i) { + (void)admin_socket->unregister_command(i->first); + delete i->second; + } + } + + bool call(std::string_view command, const cmdmap_t& cmdmap, + std::string_view format, bufferlist& out) override { + Commands::const_iterator i = commands.find(command); + ceph_assert(i != commands.end()); + Formatter *f = Formatter::create(format); + stringstream ss; + bool r = i->second->call(f, &ss); + delete f; + out.append(ss); + return r; + } + +private: + typedef std::map<std::string, MirrorAdminSocketCommand*, std::less<>> Commands; + + AdminSocket *admin_socket; + Commands commands; +}; + +Mirror::Mirror(CephContext *cct, const std::vector<const char*> &args) : + m_cct(cct), + m_args(args), + m_lock("rbd::mirror::Mirror"), + m_local(new librados::Rados()), + m_asok_hook(new MirrorAdminSocketHook(cct, this)) +{ + m_threads = + &(cct->lookup_or_create_singleton_object<Threads<librbd::ImageCtx>>( + "rbd_mirror::threads", false, cct)); + m_service_daemon.reset(new ServiceDaemon<>(m_cct, m_local, m_threads)); +} + +Mirror::~Mirror() +{ + delete m_asok_hook; +} + +void Mirror::handle_signal(int signum) +{ + dout(20) << signum << dendl; + + Mutex::Locker l(m_lock); + + switch (signum) { + case SIGHUP: + for (auto &it : m_pool_replayers) { + it.second->reopen_logs(); + } + g_ceph_context->reopen_logs(); + break; + + case SIGINT: + case SIGTERM: + m_stopping = true; + m_cond.Signal(); + break; + + default: + ceph_abort_msgf("unexpected signal %d", signum); + } +} + +int Mirror::init() +{ + int r = m_local->init_with_context(m_cct); + if (r < 0) { + derr << "could not initialize rados handle" << dendl; + return r; + } + + r = m_local->connect(); + if (r < 0) { + derr << "error connecting to local cluster" << dendl; + return r; + } + + r = m_service_daemon->init(); + if (r < 0) { + derr << "error registering service daemon: " << cpp_strerror(r) << dendl; + return r; + } + + m_local_cluster_watcher.reset(new ClusterWatcher(m_local, m_lock, + m_service_daemon.get())); + return r; +} + +void Mirror::run() +{ + dout(20) << "enter" << dendl; + while (!m_stopping) { + m_local_cluster_watcher->refresh_pools(); + Mutex::Locker l(m_lock); + if (!m_manual_stop) { + update_pool_replayers(m_local_cluster_watcher->get_pool_peers()); + } + m_cond.WaitInterval( + m_lock, + utime_t(m_cct->_conf.get_val<uint64_t>("rbd_mirror_pool_replayers_refresh_interval"), 0)); + } + + // stop all pool replayers in parallel + Mutex::Locker locker(m_lock); + for (auto &pool_replayer : m_pool_replayers) { + pool_replayer.second->stop(false); + } + dout(20) << "return" << dendl; +} + +void Mirror::print_status(Formatter *f, stringstream *ss) +{ + dout(20) << "enter" << dendl; + + Mutex::Locker l(m_lock); + + if (m_stopping) { + return; + } + + if (f) { + f->open_object_section("mirror_status"); + f->open_array_section("pool_replayers"); + }; + + for (auto &pool_replayer : m_pool_replayers) { + pool_replayer.second->print_status(f, ss); + } + + if (f) { + f->close_section(); + f->close_section(); + f->flush(*ss); + } +} + +void Mirror::start() +{ + dout(20) << "enter" << dendl; + Mutex::Locker l(m_lock); + + if (m_stopping) { + return; + } + + m_manual_stop = false; + + for (auto &pool_replayer : m_pool_replayers) { + pool_replayer.second->start(); + } +} + +void Mirror::stop() +{ + dout(20) << "enter" << dendl; + Mutex::Locker l(m_lock); + + if (m_stopping) { + return; + } + + m_manual_stop = true; + + for (auto &pool_replayer : m_pool_replayers) { + pool_replayer.second->stop(true); + } +} + +void Mirror::restart() +{ + dout(20) << "enter" << dendl; + Mutex::Locker l(m_lock); + + if (m_stopping) { + return; + } + + m_manual_stop = false; + + for (auto &pool_replayer : m_pool_replayers) { + pool_replayer.second->restart(); + } +} + +void Mirror::flush() +{ + dout(20) << "enter" << dendl; + Mutex::Locker l(m_lock); + + if (m_stopping || m_manual_stop) { + return; + } + + for (auto &pool_replayer : m_pool_replayers) { + pool_replayer.second->flush(); + } +} + +void Mirror::release_leader() +{ + dout(20) << "enter" << dendl; + Mutex::Locker l(m_lock); + + if (m_stopping) { + return; + } + + for (auto &pool_replayer : m_pool_replayers) { + pool_replayer.second->release_leader(); + } +} + +void Mirror::update_pool_replayers(const PoolPeers &pool_peers) +{ + dout(20) << "enter" << dendl; + ceph_assert(m_lock.is_locked()); + + // remove stale pool replayers before creating new pool replayers + for (auto it = m_pool_replayers.begin(); it != m_pool_replayers.end();) { + auto &peer = it->first.second; + auto pool_peer_it = pool_peers.find(it->first.first); + if (pool_peer_it == pool_peers.end() || + pool_peer_it->second.find(peer) == pool_peer_it->second.end()) { + dout(20) << "removing pool replayer for " << peer << dendl; + // TODO: make async + it->second->shut_down(); + it = m_pool_replayers.erase(it); + } else { + ++it; + } + } + + for (auto &kv : pool_peers) { + for (auto &peer : kv.second) { + PoolPeer pool_peer(kv.first, peer); + + auto pool_replayers_it = m_pool_replayers.find(pool_peer); + if (pool_replayers_it != m_pool_replayers.end()) { + auto& pool_replayer = pool_replayers_it->second; + if (pool_replayer->is_blacklisted()) { + derr << "restarting blacklisted pool replayer for " << peer << dendl; + // TODO: make async + pool_replayer->shut_down(); + pool_replayer->init(); + } else if (!pool_replayer->is_running()) { + derr << "restarting failed pool replayer for " << peer << dendl; + // TODO: make async + pool_replayer->shut_down(); + pool_replayer->init(); + } + } else { + dout(20) << "starting pool replayer for " << peer << dendl; + unique_ptr<PoolReplayer<>> pool_replayer(new PoolReplayer<>( + m_threads, m_service_daemon.get(), kv.first, peer, m_args)); + + // TODO: make async + pool_replayer->init(); + m_pool_replayers.emplace(pool_peer, std::move(pool_replayer)); + } + } + + // TODO currently only support a single peer + } +} + +} // namespace mirror +} // namespace rbd diff --git a/src/tools/rbd_mirror/Mirror.h b/src/tools/rbd_mirror/Mirror.h new file mode 100644 index 00000000..153c0bc5 --- /dev/null +++ b/src/tools/rbd_mirror/Mirror.h @@ -0,0 +1,77 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_H +#define CEPH_RBD_MIRROR_H + +#include "common/ceph_context.h" +#include "common/Mutex.h" +#include "include/rados/librados.hpp" +#include "ClusterWatcher.h" +#include "PoolReplayer.h" +#include "tools/rbd_mirror/Types.h" + +#include <set> +#include <map> +#include <memory> +#include <atomic> + +namespace librbd { struct ImageCtx; } + +namespace rbd { +namespace mirror { + +template <typename> struct ServiceDaemon; +template <typename> struct Threads; +class MirrorAdminSocketHook; + +/** + * Contains the main loop and overall state for rbd-mirror. + * + * Sets up mirroring, and coordinates between noticing config + * changes and applying them. + */ +class Mirror { +public: + Mirror(CephContext *cct, const std::vector<const char*> &args); + Mirror(const Mirror&) = delete; + Mirror& operator=(const Mirror&) = delete; + ~Mirror(); + + int init(); + void run(); + void handle_signal(int signum); + + void print_status(Formatter *f, stringstream *ss); + void start(); + void stop(); + void restart(); + void flush(); + void release_leader(); + +private: + typedef ClusterWatcher::PoolPeers PoolPeers; + typedef std::pair<int64_t, PeerSpec> PoolPeer; + + void update_pool_replayers(const PoolPeers &pool_peers); + + CephContext *m_cct; + std::vector<const char*> m_args; + Threads<librbd::ImageCtx> *m_threads = nullptr; + Mutex m_lock; + Cond m_cond; + RadosRef m_local; + std::unique_ptr<ServiceDaemon<librbd::ImageCtx>> m_service_daemon; + + // monitor local cluster for config changes in peers + std::unique_ptr<ClusterWatcher> m_local_cluster_watcher; + std::map<PoolPeer, std::unique_ptr<PoolReplayer<>>> m_pool_replayers; + std::atomic<bool> m_stopping = { false }; + bool m_manual_stop = false; + MirrorAdminSocketHook *m_asok_hook; +}; + +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_H diff --git a/src/tools/rbd_mirror/MirrorStatusWatcher.cc b/src/tools/rbd_mirror/MirrorStatusWatcher.cc new file mode 100644 index 00000000..b935bc5c --- /dev/null +++ b/src/tools/rbd_mirror/MirrorStatusWatcher.cc @@ -0,0 +1,74 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "MirrorStatusWatcher.h" +#include "common/debug.h" +#include "common/errno.h" +#include "cls/rbd/cls_rbd_client.h" +#include "librbd/Utils.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::MirrorStatusWatcher: " \ + << this << " " << __func__ << ": " + +namespace rbd { +namespace mirror { + +using librbd::util::create_rados_callback; + +template <typename I> +MirrorStatusWatcher<I>::MirrorStatusWatcher(librados::IoCtx &io_ctx, + ContextWQ *work_queue) + : Watcher(io_ctx, work_queue, RBD_MIRRORING) { +} + +template <typename I> +MirrorStatusWatcher<I>::~MirrorStatusWatcher() { +} + +template <typename I> +void MirrorStatusWatcher<I>::init(Context *on_finish) { + dout(20) << dendl; + + on_finish = new FunctionContext( + [this, on_finish] (int r) { + if (r < 0) { + derr << "error removing down statuses: " << cpp_strerror(r) << dendl; + on_finish->complete(r); + return; + } + register_watch(on_finish); + }); + + librados::ObjectWriteOperation op; + librbd::cls_client::mirror_image_status_remove_down(&op); + librados::AioCompletion *aio_comp = create_rados_callback(on_finish); + + int r = m_ioctx.aio_operate(RBD_MIRRORING, aio_comp, &op); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void MirrorStatusWatcher<I>::shut_down(Context *on_finish) { + dout(20) << dendl; + + unregister_watch(on_finish); +} + +template <typename I> +void MirrorStatusWatcher<I>::handle_notify(uint64_t notify_id, uint64_t handle, + uint64_t notifier_id, + bufferlist &bl) { + dout(20) << dendl; + + bufferlist out; + acknowledge_notify(notify_id, handle, out); +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::MirrorStatusWatcher<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/MirrorStatusWatcher.h b/src/tools/rbd_mirror/MirrorStatusWatcher.h new file mode 100644 index 00000000..155f8cc8 --- /dev/null +++ b/src/tools/rbd_mirror/MirrorStatusWatcher.h @@ -0,0 +1,39 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_MIRROR_STATUS_WATCHER_H +#define CEPH_RBD_MIRROR_MIRROR_STATUS_WATCHER_H + +#include "librbd/Watcher.h" + +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { + +template <typename ImageCtxT = librbd::ImageCtx> +class MirrorStatusWatcher : protected librbd::Watcher { +public: + static MirrorStatusWatcher *create(librados::IoCtx &io_ctx, + ContextWQ *work_queue) { + return new MirrorStatusWatcher(io_ctx, work_queue); + } + void destroy() { + delete this; + } + + MirrorStatusWatcher(librados::IoCtx &io_ctx, ContextWQ *work_queue); + ~MirrorStatusWatcher() override; + + void init(Context *on_finish); + void shut_down(Context *on_finish); + +protected: + void handle_notify(uint64_t notify_id, uint64_t handle, + uint64_t notifier_id, bufferlist &bl) override; +}; + +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_MIRROR_STATUS_WATCHER_H diff --git a/src/tools/rbd_mirror/PoolReplayer.cc b/src/tools/rbd_mirror/PoolReplayer.cc new file mode 100644 index 00000000..35d32eb5 --- /dev/null +++ b/src/tools/rbd_mirror/PoolReplayer.cc @@ -0,0 +1,1133 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "PoolReplayer.h" +#include <boost/bind.hpp> +#include "common/Formatter.h" +#include "common/admin_socket.h" +#include "common/ceph_argparse.h" +#include "common/code_environment.h" +#include "common/common_init.h" +#include "common/debug.h" +#include "common/errno.h" +#include "include/stringify.h" +#include "cls/rbd/cls_rbd_client.h" +#include "global/global_context.h" +#include "librbd/internal.h" +#include "librbd/Utils.h" +#include "librbd/Watcher.h" +#include "librbd/api/Config.h" +#include "librbd/api/Mirror.h" +#include "ImageMap.h" +#include "InstanceReplayer.h" +#include "InstanceWatcher.h" +#include "LeaderWatcher.h" +#include "ServiceDaemon.h" +#include "Threads.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::PoolReplayer: " \ + << this << " " << __func__ << ": " + +using std::chrono::seconds; +using std::map; +using std::string; +using std::unique_ptr; +using std::vector; + +using librbd::cls_client::dir_get_name; +using librbd::util::create_async_context_callback; + +namespace rbd { +namespace mirror { + +using ::operator<<; + +namespace { + +const std::string SERVICE_DAEMON_INSTANCE_ID_KEY("instance_id"); +const std::string SERVICE_DAEMON_LEADER_KEY("leader"); +const std::string SERVICE_DAEMON_LOCAL_COUNT_KEY("image_local_count"); +const std::string SERVICE_DAEMON_REMOTE_COUNT_KEY("image_remote_count"); + +const std::vector<std::string> UNIQUE_PEER_CONFIG_KEYS { + {"monmap", "mon_host", "mon_dns_srv_name", "key", "keyfile", "keyring"}}; + +template <typename I> +class PoolReplayerAdminSocketCommand { +public: + PoolReplayerAdminSocketCommand(PoolReplayer<I> *pool_replayer) + : pool_replayer(pool_replayer) { + } + virtual ~PoolReplayerAdminSocketCommand() {} + virtual bool call(Formatter *f, stringstream *ss) = 0; +protected: + PoolReplayer<I> *pool_replayer; +}; + +template <typename I> +class StatusCommand : public PoolReplayerAdminSocketCommand<I> { +public: + explicit StatusCommand(PoolReplayer<I> *pool_replayer) + : PoolReplayerAdminSocketCommand<I>(pool_replayer) { + } + + bool call(Formatter *f, stringstream *ss) override { + this->pool_replayer->print_status(f, ss); + return true; + } +}; + +template <typename I> +class StartCommand : public PoolReplayerAdminSocketCommand<I> { +public: + explicit StartCommand(PoolReplayer<I> *pool_replayer) + : PoolReplayerAdminSocketCommand<I>(pool_replayer) { + } + + bool call(Formatter *f, stringstream *ss) override { + this->pool_replayer->start(); + return true; + } +}; + +template <typename I> +class StopCommand : public PoolReplayerAdminSocketCommand<I> { +public: + explicit StopCommand(PoolReplayer<I> *pool_replayer) + : PoolReplayerAdminSocketCommand<I>(pool_replayer) { + } + + bool call(Formatter *f, stringstream *ss) override { + this->pool_replayer->stop(true); + return true; + } +}; + +template <typename I> +class RestartCommand : public PoolReplayerAdminSocketCommand<I> { +public: + explicit RestartCommand(PoolReplayer<I> *pool_replayer) + : PoolReplayerAdminSocketCommand<I>(pool_replayer) { + } + + bool call(Formatter *f, stringstream *ss) override { + this->pool_replayer->restart(); + return true; + } +}; + +template <typename I> +class FlushCommand : public PoolReplayerAdminSocketCommand<I> { +public: + explicit FlushCommand(PoolReplayer<I> *pool_replayer) + : PoolReplayerAdminSocketCommand<I>(pool_replayer) { + } + + bool call(Formatter *f, stringstream *ss) override { + this->pool_replayer->flush(); + return true; + } +}; + +template <typename I> +class LeaderReleaseCommand : public PoolReplayerAdminSocketCommand<I> { +public: + explicit LeaderReleaseCommand(PoolReplayer<I> *pool_replayer) + : PoolReplayerAdminSocketCommand<I>(pool_replayer) { + } + + bool call(Formatter *f, stringstream *ss) override { + this->pool_replayer->release_leader(); + return true; + } +}; + +template <typename I> +class PoolReplayerAdminSocketHook : public AdminSocketHook { +public: + PoolReplayerAdminSocketHook(CephContext *cct, const std::string &name, + PoolReplayer<I> *pool_replayer) + : admin_socket(cct->get_admin_socket()) { + std::string command; + int r; + + command = "rbd mirror status " + name; + r = admin_socket->register_command(command, command, this, + "get status for rbd mirror " + name); + if (r == 0) { + commands[command] = new StatusCommand<I>(pool_replayer); + } + + command = "rbd mirror start " + name; + r = admin_socket->register_command(command, command, this, + "start rbd mirror " + name); + if (r == 0) { + commands[command] = new StartCommand<I>(pool_replayer); + } + + command = "rbd mirror stop " + name; + r = admin_socket->register_command(command, command, this, + "stop rbd mirror " + name); + if (r == 0) { + commands[command] = new StopCommand<I>(pool_replayer); + } + + command = "rbd mirror restart " + name; + r = admin_socket->register_command(command, command, this, + "restart rbd mirror " + name); + if (r == 0) { + commands[command] = new RestartCommand<I>(pool_replayer); + } + + command = "rbd mirror flush " + name; + r = admin_socket->register_command(command, command, this, + "flush rbd mirror " + name); + if (r == 0) { + commands[command] = new FlushCommand<I>(pool_replayer); + } + + command = "rbd mirror leader release " + name; + r = admin_socket->register_command(command, command, this, + "release rbd mirror leader " + name); + if (r == 0) { + commands[command] = new LeaderReleaseCommand<I>(pool_replayer); + } + } + + ~PoolReplayerAdminSocketHook() override { + for (auto i = commands.begin(); i != commands.end(); ++i) { + (void)admin_socket->unregister_command(i->first); + delete i->second; + } + } + + bool call(std::string_view command, const cmdmap_t& cmdmap, + std::string_view format, bufferlist& out) override { + auto i = commands.find(command); + ceph_assert(i != commands.end()); + Formatter *f = Formatter::create(format); + stringstream ss; + bool r = i->second->call(f, &ss); + delete f; + out.append(ss); + return r; + } + +private: + typedef std::map<std::string, PoolReplayerAdminSocketCommand<I>*, + std::less<>> Commands; + + AdminSocket *admin_socket; + Commands commands; +}; + +} // anonymous namespace + +template <typename I> +PoolReplayer<I>::PoolReplayer(Threads<I> *threads, + ServiceDaemon<I>* service_daemon, + int64_t local_pool_id, const PeerSpec &peer, + const std::vector<const char*> &args) : + m_threads(threads), + m_service_daemon(service_daemon), + m_local_pool_id(local_pool_id), + m_peer(peer), + m_args(args), + m_lock(stringify("rbd::mirror::PoolReplayer ") + stringify(peer)), + m_local_pool_watcher_listener(this, true), + m_remote_pool_watcher_listener(this, false), + m_image_map_listener(this), + m_pool_replayer_thread(this), + m_leader_listener(this) +{ +} + +template <typename I> +PoolReplayer<I>::~PoolReplayer() +{ + delete m_asok_hook; + shut_down(); +} + +template <typename I> +bool PoolReplayer<I>::is_blacklisted() const { + Mutex::Locker locker(m_lock); + return m_blacklisted; +} + +template <typename I> +bool PoolReplayer<I>::is_leader() const { + Mutex::Locker locker(m_lock); + return m_leader_watcher && m_leader_watcher->is_leader(); +} + +template <typename I> +bool PoolReplayer<I>::is_running() const { + return m_pool_replayer_thread.is_started(); +} + +template <typename I> +void PoolReplayer<I>::init() +{ + Mutex::Locker l(m_lock); + + ceph_assert(!m_pool_replayer_thread.is_started()); + + // reset state + m_stopping = false; + m_blacklisted = false; + + dout(10) << "replaying for " << m_peer << dendl; + int r = init_rados(g_ceph_context->_conf->cluster, + g_ceph_context->_conf->name.to_str(), + "", "", "local cluster", &m_local_rados, false); + if (r < 0) { + m_callout_id = m_service_daemon->add_or_update_callout( + m_local_pool_id, m_callout_id, service_daemon::CALLOUT_LEVEL_ERROR, + "unable to connect to local cluster"); + return; + } + + r = init_rados(m_peer.cluster_name, m_peer.client_name, + m_peer.mon_host, m_peer.key, + std::string("remote peer ") + stringify(m_peer), + &m_remote_rados, true); + if (r < 0) { + m_callout_id = m_service_daemon->add_or_update_callout( + m_local_pool_id, m_callout_id, service_daemon::CALLOUT_LEVEL_ERROR, + "unable to connect to remote cluster"); + return; + } + + r = m_local_rados->ioctx_create2(m_local_pool_id, m_local_io_ctx); + if (r < 0) { + derr << "error accessing local pool " << m_local_pool_id << ": " + << cpp_strerror(r) << dendl; + return; + } + + auto cct = reinterpret_cast<CephContext *>(m_local_io_ctx.cct()); + librbd::api::Config<I>::apply_pool_overrides(m_local_io_ctx, &cct->_conf); + + std::string local_mirror_uuid; + r = librbd::cls_client::mirror_uuid_get(&m_local_io_ctx, + &local_mirror_uuid); + if (r < 0) { + derr << "failed to retrieve local mirror uuid from pool " + << m_local_io_ctx.get_pool_name() << ": " << cpp_strerror(r) << dendl; + m_callout_id = m_service_daemon->add_or_update_callout( + m_local_pool_id, m_callout_id, service_daemon::CALLOUT_LEVEL_ERROR, + "unable to query local mirror uuid"); + return; + } + + r = m_remote_rados->ioctx_create(m_local_io_ctx.get_pool_name().c_str(), + m_remote_io_ctx); + if (r < 0) { + derr << "error accessing remote pool " << m_local_io_ctx.get_pool_name() + << ": " << cpp_strerror(r) << dendl; + m_callout_id = m_service_daemon->add_or_update_callout( + m_local_pool_id, m_callout_id, service_daemon::CALLOUT_LEVEL_WARNING, + "unable to access remote pool"); + return; + } + + dout(10) << "connected to " << m_peer << dendl; + + m_instance_replayer.reset(InstanceReplayer<I>::create( + m_threads, m_service_daemon, m_local_rados, local_mirror_uuid, + m_local_pool_id)); + m_instance_replayer->init(); + m_instance_replayer->add_peer(m_peer.uuid, m_remote_io_ctx); + + m_instance_watcher.reset(InstanceWatcher<I>::create( + m_local_io_ctx, m_threads->work_queue, m_instance_replayer.get())); + r = m_instance_watcher->init(); + if (r < 0) { + derr << "error initializing instance watcher: " << cpp_strerror(r) << dendl; + m_callout_id = m_service_daemon->add_or_update_callout( + m_local_pool_id, m_callout_id, service_daemon::CALLOUT_LEVEL_ERROR, + "unable to initialize instance messenger object"); + return; + } + m_service_daemon->add_or_update_attribute( + m_local_pool_id, SERVICE_DAEMON_INSTANCE_ID_KEY, + m_instance_watcher->get_instance_id()); + + m_leader_watcher.reset(LeaderWatcher<I>::create(m_threads, m_local_io_ctx, + &m_leader_listener)); + r = m_leader_watcher->init(); + if (r < 0) { + derr << "error initializing leader watcher: " << cpp_strerror(r) << dendl; + m_callout_id = m_service_daemon->add_or_update_callout( + m_local_pool_id, m_callout_id, service_daemon::CALLOUT_LEVEL_ERROR, + "unable to initialize leader messenger object"); + return; + } + + if (m_callout_id != service_daemon::CALLOUT_ID_NONE) { + m_service_daemon->remove_callout(m_local_pool_id, m_callout_id); + m_callout_id = service_daemon::CALLOUT_ID_NONE; + } + + m_pool_replayer_thread.create("pool replayer"); +} + +template <typename I> +void PoolReplayer<I>::shut_down() { + m_stopping = true; + { + Mutex::Locker l(m_lock); + m_cond.Signal(); + } + if (m_pool_replayer_thread.is_started()) { + m_pool_replayer_thread.join(); + } + if (m_leader_watcher) { + m_leader_watcher->shut_down(); + } + if (m_instance_watcher) { + m_instance_watcher->shut_down(); + } + if (m_instance_replayer) { + m_instance_replayer->shut_down(); + } + + m_leader_watcher.reset(); + m_instance_watcher.reset(); + m_instance_replayer.reset(); + + ceph_assert(!m_image_map); + ceph_assert(!m_image_deleter); + ceph_assert(!m_local_pool_watcher); + ceph_assert(!m_remote_pool_watcher); + m_local_rados.reset(); + m_remote_rados.reset(); +} + +template <typename I> +int PoolReplayer<I>::init_rados(const std::string &cluster_name, + const std::string &client_name, + const std::string &mon_host, + const std::string &key, + const std::string &description, + RadosRef *rados_ref, + bool strip_cluster_overrides) { + // NOTE: manually bootstrap a CephContext here instead of via + // the librados API to avoid mixing global singletons between + // the librados shared library and the daemon + // TODO: eliminate intermingling of global singletons within Ceph APIs + CephInitParameters iparams(CEPH_ENTITY_TYPE_CLIENT); + if (client_name.empty() || !iparams.name.from_str(client_name)) { + derr << "error initializing cluster handle for " << description << dendl; + return -EINVAL; + } + + CephContext *cct = common_preinit(iparams, CODE_ENVIRONMENT_LIBRARY, + CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS); + cct->_conf->cluster = cluster_name; + + // librados::Rados::conf_read_file + int r = cct->_conf.parse_config_files(nullptr, nullptr, 0); + if (r < 0 && r != -ENOENT) { + // do not treat this as fatal, it might still be able to connect + derr << "could not read ceph conf for " << description << ": " + << cpp_strerror(r) << dendl; + } + + // preserve cluster-specific config settings before applying environment/cli + // overrides + std::map<std::string, std::string> config_values; + if (strip_cluster_overrides) { + // remote peer connections shouldn't apply cluster-specific + // configuration settings + for (auto& key : UNIQUE_PEER_CONFIG_KEYS) { + config_values[key] = cct->_conf.get_val<std::string>(key); + } + } + + cct->_conf.parse_env(cct->get_module_type()); + + // librados::Rados::conf_parse_env + std::vector<const char*> args; + r = cct->_conf.parse_argv(args); + if (r < 0) { + derr << "could not parse environment for " << description << ":" + << cpp_strerror(r) << dendl; + cct->put(); + return r; + } + cct->_conf.parse_env(cct->get_module_type()); + + if (!m_args.empty()) { + // librados::Rados::conf_parse_argv + args = m_args; + r = cct->_conf.parse_argv(args); + if (r < 0) { + derr << "could not parse command line args for " << description << ": " + << cpp_strerror(r) << dendl; + cct->put(); + return r; + } + } + + if (strip_cluster_overrides) { + // remote peer connections shouldn't apply cluster-specific + // configuration settings + for (auto& pair : config_values) { + auto value = cct->_conf.get_val<std::string>(pair.first); + if (pair.second != value) { + dout(0) << "reverting global config option override: " + << pair.first << ": " << value << " -> " << pair.second + << dendl; + cct->_conf.set_val_or_die(pair.first, pair.second); + } + } + } + + if (!g_ceph_context->_conf->admin_socket.empty()) { + cct->_conf.set_val_or_die("admin_socket", + "$run_dir/$name.$pid.$cluster.$cctid.asok"); + } + + if (!mon_host.empty()) { + r = cct->_conf.set_val("mon_host", mon_host); + if (r < 0) { + derr << "failed to set mon_host config for " << description << ": " + << cpp_strerror(r) << dendl; + cct->put(); + return r; + } + } + + if (!key.empty()) { + r = cct->_conf.set_val("key", key); + if (r < 0) { + derr << "failed to set key config for " << description << ": " + << cpp_strerror(r) << dendl; + cct->put(); + return r; + } + } + + // disable unnecessary librbd cache + cct->_conf.set_val_or_die("rbd_cache", "false"); + cct->_conf.apply_changes(nullptr); + cct->_conf.complain_about_parse_errors(cct); + + rados_ref->reset(new librados::Rados()); + + r = (*rados_ref)->init_with_context(cct); + ceph_assert(r == 0); + cct->put(); + + r = (*rados_ref)->connect(); + if (r < 0) { + derr << "error connecting to " << description << ": " + << cpp_strerror(r) << dendl; + return r; + } + + return 0; +} + +template <typename I> +void PoolReplayer<I>::run() +{ + dout(20) << "enter" << dendl; + + while (!m_stopping) { + std::string asok_hook_name = m_local_io_ctx.get_pool_name() + " " + + m_peer.cluster_name; + if (m_asok_hook_name != asok_hook_name || m_asok_hook == nullptr) { + m_asok_hook_name = asok_hook_name; + delete m_asok_hook; + + m_asok_hook = new PoolReplayerAdminSocketHook<I>(g_ceph_context, + m_asok_hook_name, this); + } + + Mutex::Locker locker(m_lock); + if (m_leader_watcher->is_blacklisted() || + m_instance_replayer->is_blacklisted() || + (m_local_pool_watcher && m_local_pool_watcher->is_blacklisted()) || + (m_remote_pool_watcher && m_remote_pool_watcher->is_blacklisted())) { + m_blacklisted = true; + m_stopping = true; + break; + } + + if (!m_stopping) { + m_cond.WaitInterval(m_lock, utime_t(1, 0)); + } + } + + m_instance_replayer->stop(); +} + +template <typename I> +void PoolReplayer<I>::reopen_logs() +{ + Mutex::Locker l(m_lock); + + if (m_local_rados) { + reinterpret_cast<CephContext *>(m_local_rados->cct())->reopen_logs(); + } + if (m_remote_rados) { + reinterpret_cast<CephContext *>(m_remote_rados->cct())->reopen_logs(); + } +} + +template <typename I> +void PoolReplayer<I>::print_status(Formatter *f, stringstream *ss) +{ + dout(20) << "enter" << dendl; + + if (!f) { + return; + } + + Mutex::Locker l(m_lock); + + f->open_object_section("pool_replayer_status"); + f->dump_stream("peer") << m_peer; + if (m_local_io_ctx.is_valid()) { + f->dump_string("pool", m_local_io_ctx.get_pool_name()); + f->dump_stream("instance_id") << m_instance_watcher->get_instance_id(); + } + + std::string state("running"); + if (m_manual_stop) { + state = "stopped (manual)"; + } else if (m_stopping) { + state = "stopped"; + } + f->dump_string("state", state); + + std::string leader_instance_id; + m_leader_watcher->get_leader_instance_id(&leader_instance_id); + f->dump_string("leader_instance_id", leader_instance_id); + + bool leader = m_leader_watcher->is_leader(); + f->dump_bool("leader", leader); + if (leader) { + std::vector<std::string> instance_ids; + m_leader_watcher->list_instances(&instance_ids); + f->open_array_section("instances"); + for (auto instance_id : instance_ids) { + f->dump_string("instance_id", instance_id); + } + f->close_section(); + } + + f->dump_string("local_cluster_admin_socket", + reinterpret_cast<CephContext *>(m_local_io_ctx.cct())->_conf. + get_val<std::string>("admin_socket")); + f->dump_string("remote_cluster_admin_socket", + reinterpret_cast<CephContext *>(m_remote_io_ctx.cct())->_conf. + get_val<std::string>("admin_socket")); + + f->open_object_section("sync_throttler"); + m_instance_watcher->print_sync_status(f, ss); + f->close_section(); + + m_instance_replayer->print_status(f, ss); + + if (m_image_deleter) { + f->open_object_section("image_deleter"); + m_image_deleter->print_status(f, ss); + f->close_section(); + } + + f->close_section(); + f->flush(*ss); +} + +template <typename I> +void PoolReplayer<I>::start() +{ + dout(20) << "enter" << dendl; + + Mutex::Locker l(m_lock); + + if (m_stopping) { + return; + } + + m_manual_stop = false; + + if (m_instance_replayer) { + m_instance_replayer->start(); + } +} + +template <typename I> +void PoolReplayer<I>::stop(bool manual) +{ + dout(20) << "enter: manual=" << manual << dendl; + + Mutex::Locker l(m_lock); + if (!manual) { + m_stopping = true; + m_cond.Signal(); + return; + } else if (m_stopping) { + return; + } + + m_manual_stop = true; + + if (m_instance_replayer) { + m_instance_replayer->stop(); + } +} + +template <typename I> +void PoolReplayer<I>::restart() +{ + dout(20) << "enter" << dendl; + + Mutex::Locker l(m_lock); + + if (m_stopping) { + return; + } + + if (m_instance_replayer) { + m_instance_replayer->restart(); + } +} + +template <typename I> +void PoolReplayer<I>::flush() +{ + dout(20) << "enter" << dendl; + + Mutex::Locker l(m_lock); + + if (m_stopping || m_manual_stop) { + return; + } + + if (m_instance_replayer) { + m_instance_replayer->flush(); + } +} + +template <typename I> +void PoolReplayer<I>::release_leader() +{ + dout(20) << "enter" << dendl; + + Mutex::Locker l(m_lock); + + if (m_stopping || !m_leader_watcher) { + return; + } + + m_leader_watcher->release_leader(); +} + +template <typename I> +void PoolReplayer<I>::handle_update(const std::string &mirror_uuid, + ImageIds &&added_image_ids, + ImageIds &&removed_image_ids) { + if (m_stopping) { + return; + } + + dout(10) << "mirror_uuid=" << mirror_uuid << ", " + << "added_count=" << added_image_ids.size() << ", " + << "removed_count=" << removed_image_ids.size() << dendl; + Mutex::Locker locker(m_lock); + if (!m_leader_watcher->is_leader()) { + return; + } + + m_service_daemon->add_or_update_attribute( + m_local_pool_id, SERVICE_DAEMON_LOCAL_COUNT_KEY, + m_local_pool_watcher->get_image_count()); + if (m_remote_pool_watcher) { + m_service_daemon->add_or_update_attribute( + m_local_pool_id, SERVICE_DAEMON_REMOTE_COUNT_KEY, + m_remote_pool_watcher->get_image_count()); + } + + std::set<std::string> added_global_image_ids; + for (auto& image_id : added_image_ids) { + added_global_image_ids.insert(image_id.global_id); + } + + std::set<std::string> removed_global_image_ids; + for (auto& image_id : removed_image_ids) { + removed_global_image_ids.insert(image_id.global_id); + } + + m_image_map->update_images(mirror_uuid, + std::move(added_global_image_ids), + std::move(removed_global_image_ids)); +} + +template <typename I> +void PoolReplayer<I>::handle_post_acquire_leader(Context *on_finish) { + dout(10) << dendl; + + m_service_daemon->add_or_update_attribute(m_local_pool_id, + SERVICE_DAEMON_LEADER_KEY, true); + m_instance_watcher->handle_acquire_leader(); + init_image_map(on_finish); +} + +template <typename I> +void PoolReplayer<I>::handle_pre_release_leader(Context *on_finish) { + dout(10) << dendl; + + m_service_daemon->remove_attribute(m_local_pool_id, + SERVICE_DAEMON_LEADER_KEY); + m_instance_watcher->handle_release_leader(); + shut_down_image_deleter(on_finish); +} + +template <typename I> +void PoolReplayer<I>::init_image_map(Context *on_finish) { + dout(5) << dendl; + + Mutex::Locker locker(m_lock); + ceph_assert(!m_image_map); + m_image_map.reset(ImageMap<I>::create(m_local_io_ctx, m_threads, + m_instance_watcher->get_instance_id(), + m_image_map_listener)); + + auto ctx = new FunctionContext([this, on_finish](int r) { + handle_init_image_map(r, on_finish); + }); + m_image_map->init(create_async_context_callback( + m_threads->work_queue, ctx)); +} + +template <typename I> +void PoolReplayer<I>::handle_init_image_map(int r, Context *on_finish) { + dout(5) << "r=" << r << dendl; + if (r < 0) { + derr << "failed to init image map: " << cpp_strerror(r) << dendl; + on_finish = new FunctionContext([on_finish, r](int) { + on_finish->complete(r); + }); + shut_down_image_map(on_finish); + return; + } + + init_local_pool_watcher(on_finish); +} + +template <typename I> +void PoolReplayer<I>::init_local_pool_watcher(Context *on_finish) { + dout(10) << dendl; + + Mutex::Locker locker(m_lock); + ceph_assert(!m_local_pool_watcher); + m_local_pool_watcher.reset(PoolWatcher<I>::create( + m_threads, m_local_io_ctx, m_local_pool_watcher_listener)); + + // ensure the initial set of local images is up-to-date + // after acquiring the leader role + auto ctx = new FunctionContext([this, on_finish](int r) { + handle_init_local_pool_watcher(r, on_finish); + }); + m_local_pool_watcher->init(create_async_context_callback( + m_threads->work_queue, ctx)); +} + +template <typename I> +void PoolReplayer<I>::handle_init_local_pool_watcher( + int r, Context *on_finish) { + dout(10) << "r=" << r << dendl; + if (r < 0) { + derr << "failed to retrieve local images: " << cpp_strerror(r) << dendl; + on_finish = new FunctionContext([on_finish, r](int) { + on_finish->complete(r); + }); + shut_down_pool_watchers(on_finish); + return; + } + + init_remote_pool_watcher(on_finish); +} + +template <typename I> +void PoolReplayer<I>::init_remote_pool_watcher(Context *on_finish) { + dout(10) << dendl; + + Mutex::Locker locker(m_lock); + ceph_assert(!m_remote_pool_watcher); + m_remote_pool_watcher.reset(PoolWatcher<I>::create( + m_threads, m_remote_io_ctx, m_remote_pool_watcher_listener)); + + auto ctx = new FunctionContext([this, on_finish](int r) { + handle_init_remote_pool_watcher(r, on_finish); + }); + m_remote_pool_watcher->init(create_async_context_callback( + m_threads->work_queue, ctx)); +} + +template <typename I> +void PoolReplayer<I>::handle_init_remote_pool_watcher( + int r, Context *on_finish) { + dout(10) << "r=" << r << dendl; + if (r == -ENOENT) { + // Technically nothing to do since the other side doesn't + // have mirroring enabled. Eventually the remote pool watcher will + // detect images (if mirroring is enabled), so no point propagating + // an error which would just busy-spin the state machines. + dout(0) << "remote peer does not have mirroring configured" << dendl; + } else if (r < 0) { + derr << "failed to retrieve remote images: " << cpp_strerror(r) << dendl; + on_finish = new FunctionContext([on_finish, r](int) { + on_finish->complete(r); + }); + shut_down_pool_watchers(on_finish); + return; + } + + init_image_deleter(on_finish); +} + +template <typename I> +void PoolReplayer<I>::init_image_deleter(Context *on_finish) { + dout(10) << dendl; + + Mutex::Locker locker(m_lock); + ceph_assert(!m_image_deleter); + + on_finish = new FunctionContext([this, on_finish](int r) { + handle_init_image_deleter(r, on_finish); + }); + m_image_deleter.reset(ImageDeleter<I>::create(m_local_io_ctx, m_threads, + m_service_daemon)); + m_image_deleter->init(create_async_context_callback( + m_threads->work_queue, on_finish)); +} + +template <typename I> +void PoolReplayer<I>::handle_init_image_deleter(int r, Context *on_finish) { + dout(10) << "r=" << r << dendl; + if (r < 0) { + derr << "failed to init image deleter: " << cpp_strerror(r) << dendl; + on_finish = new FunctionContext([on_finish, r](int) { + on_finish->complete(r); + }); + shut_down_image_deleter(on_finish); + return; + } + + on_finish->complete(0); + + Mutex::Locker locker(m_lock); + m_cond.Signal(); +} + +template <typename I> +void PoolReplayer<I>::shut_down_image_deleter(Context* on_finish) { + dout(10) << dendl; + { + Mutex::Locker locker(m_lock); + if (m_image_deleter) { + Context *ctx = new FunctionContext([this, on_finish](int r) { + handle_shut_down_image_deleter(r, on_finish); + }); + ctx = create_async_context_callback(m_threads->work_queue, ctx); + + m_image_deleter->shut_down(ctx); + return; + } + } + shut_down_pool_watchers(on_finish); +} + +template <typename I> +void PoolReplayer<I>::handle_shut_down_image_deleter( + int r, Context* on_finish) { + dout(10) << "r=" << r << dendl; + + { + Mutex::Locker locker(m_lock); + ceph_assert(m_image_deleter); + m_image_deleter.reset(); + } + + shut_down_pool_watchers(on_finish); +} + +template <typename I> +void PoolReplayer<I>::shut_down_pool_watchers(Context *on_finish) { + dout(10) << dendl; + + { + Mutex::Locker locker(m_lock); + if (m_local_pool_watcher) { + Context *ctx = new FunctionContext([this, on_finish](int r) { + handle_shut_down_pool_watchers(r, on_finish); + }); + ctx = create_async_context_callback(m_threads->work_queue, ctx); + + auto gather_ctx = new C_Gather(g_ceph_context, ctx); + m_local_pool_watcher->shut_down(gather_ctx->new_sub()); + if (m_remote_pool_watcher) { + m_remote_pool_watcher->shut_down(gather_ctx->new_sub()); + } + gather_ctx->activate(); + return; + } + } + + on_finish->complete(0); +} + +template <typename I> +void PoolReplayer<I>::handle_shut_down_pool_watchers( + int r, Context *on_finish) { + dout(10) << "r=" << r << dendl; + + { + Mutex::Locker locker(m_lock); + ceph_assert(m_local_pool_watcher); + m_local_pool_watcher.reset(); + + if (m_remote_pool_watcher) { + m_remote_pool_watcher.reset(); + } + } + wait_for_update_ops(on_finish); +} + +template <typename I> +void PoolReplayer<I>::wait_for_update_ops(Context *on_finish) { + dout(10) << dendl; + + Mutex::Locker locker(m_lock); + + Context *ctx = new FunctionContext([this, on_finish](int r) { + handle_wait_for_update_ops(r, on_finish); + }); + ctx = create_async_context_callback(m_threads->work_queue, ctx); + + m_update_op_tracker.wait_for_ops(ctx); +} + +template <typename I> +void PoolReplayer<I>::handle_wait_for_update_ops(int r, Context *on_finish) { + dout(10) << "r=" << r << dendl; + ceph_assert(r == 0); + + shut_down_image_map(on_finish); +} + +template <typename I> +void PoolReplayer<I>::shut_down_image_map(Context *on_finish) { + dout(5) << dendl; + + { + Mutex::Locker locker(m_lock); + if (m_image_map) { + on_finish = new FunctionContext([this, on_finish](int r) { + handle_shut_down_image_map(r, on_finish); + }); + m_image_map->shut_down(create_async_context_callback( + m_threads->work_queue, on_finish)); + return; + } + } + + on_finish->complete(0); +} + +template <typename I> +void PoolReplayer<I>::handle_shut_down_image_map(int r, Context *on_finish) { + dout(5) << "r=" << r << dendl; + if (r < 0 && r != -EBLACKLISTED) { + derr << "failed to shut down image map: " << cpp_strerror(r) << dendl; + } + + Mutex::Locker locker(m_lock); + ceph_assert(m_image_map); + m_image_map.reset(); + + m_instance_replayer->release_all(on_finish); +} + +template <typename I> +void PoolReplayer<I>::handle_update_leader( + const std::string &leader_instance_id) { + dout(10) << "leader_instance_id=" << leader_instance_id << dendl; + + m_instance_watcher->handle_update_leader(leader_instance_id); +} + +template <typename I> +void PoolReplayer<I>::handle_acquire_image(const std::string &global_image_id, + const std::string &instance_id, + Context* on_finish) { + dout(5) << "global_image_id=" << global_image_id << ", " + << "instance_id=" << instance_id << dendl; + + m_instance_watcher->notify_image_acquire(instance_id, global_image_id, + on_finish); +} + +template <typename I> +void PoolReplayer<I>::handle_release_image(const std::string &global_image_id, + const std::string &instance_id, + Context* on_finish) { + dout(5) << "global_image_id=" << global_image_id << ", " + << "instance_id=" << instance_id << dendl; + + m_instance_watcher->notify_image_release(instance_id, global_image_id, + on_finish); +} + +template <typename I> +void PoolReplayer<I>::handle_remove_image(const std::string &mirror_uuid, + const std::string &global_image_id, + const std::string &instance_id, + Context* on_finish) { + ceph_assert(!mirror_uuid.empty()); + dout(5) << "mirror_uuid=" << mirror_uuid << ", " + << "global_image_id=" << global_image_id << ", " + << "instance_id=" << instance_id << dendl; + + m_instance_watcher->notify_peer_image_removed(instance_id, global_image_id, + mirror_uuid, on_finish); +} + +template <typename I> +void PoolReplayer<I>::handle_instances_added(const InstanceIds &instance_ids) { + dout(5) << "instance_ids=" << instance_ids << dendl; + Mutex::Locker locker(m_lock); + if (!m_leader_watcher->is_leader()) { + return; + } + + ceph_assert(m_image_map); + m_image_map->update_instances_added(instance_ids); +} + +template <typename I> +void PoolReplayer<I>::handle_instances_removed( + const InstanceIds &instance_ids) { + dout(5) << "instance_ids=" << instance_ids << dendl; + Mutex::Locker locker(m_lock); + if (!m_leader_watcher->is_leader()) { + return; + } + + ceph_assert(m_image_map); + m_image_map->update_instances_removed(instance_ids); +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::PoolReplayer<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/PoolReplayer.h b/src/tools/rbd_mirror/PoolReplayer.h new file mode 100644 index 00000000..43a4a0fc --- /dev/null +++ b/src/tools/rbd_mirror/PoolReplayer.h @@ -0,0 +1,303 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_POOL_REPLAYER_H +#define CEPH_RBD_MIRROR_POOL_REPLAYER_H + +#include "common/AsyncOpTracker.h" +#include "common/Cond.h" +#include "common/Mutex.h" +#include "common/WorkQueue.h" +#include "include/rados/librados.hpp" + +#include "ClusterWatcher.h" +#include "LeaderWatcher.h" +#include "PoolWatcher.h" +#include "ImageDeleter.h" +#include "tools/rbd_mirror/Types.h" +#include "tools/rbd_mirror/image_map/Types.h" +#include "tools/rbd_mirror/leader_watcher/Types.h" +#include "tools/rbd_mirror/pool_watcher/Types.h" +#include "tools/rbd_mirror/service_daemon/Types.h" + +#include <set> +#include <map> +#include <memory> +#include <atomic> +#include <string> +#include <vector> + +class AdminSocketHook; + +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { + +template <typename> class ImageMap; +template <typename> class InstanceReplayer; +template <typename> class InstanceWatcher; +template <typename> class ServiceDaemon; +template <typename> struct Threads; + +/** + * Controls mirroring for a single remote cluster. + */ +template <typename ImageCtxT = librbd::ImageCtx> +class PoolReplayer { +public: + PoolReplayer(Threads<ImageCtxT> *threads, + ServiceDaemon<ImageCtxT>* service_daemon, + int64_t local_pool_id, const PeerSpec &peer, + const std::vector<const char*> &args); + ~PoolReplayer(); + PoolReplayer(const PoolReplayer&) = delete; + PoolReplayer& operator=(const PoolReplayer&) = delete; + + bool is_blacklisted() const; + bool is_leader() const; + bool is_running() const; + + void init(); + void shut_down(); + + void run(); + + void print_status(Formatter *f, stringstream *ss); + void start(); + void stop(bool manual); + void restart(); + void flush(); + void release_leader(); + void reopen_logs(); + +private: + /** + * @verbatim + * + * <start> + * | + * v + * INIT + * | + * v + * <follower> <-------------------------\ + * . | + * . | + * v (leader acquired) | + * INIT_IMAGE_MAP SHUT_DOWN_IMAGE_MAP + * | ^ + * v | + * INIT_LOCAL_POOL_WATCHER WAIT_FOR_NOTIFICATIONS + * | ^ + * v | + * INIT_REMOTE_POOL_WATCHER SHUT_DOWN_POOL_WATCHERS + * | ^ + * v | + * INIT_IMAGE_DELETER SHUT_DOWN_IMAGE_DELETER + * | ^ + * v . + * <leader> <-----------\ . + * . | . + * . (image update) | . + * . . > NOTIFY_INSTANCE_WATCHER . + * . . + * . (leader lost / shut down) . + * . . . . . . . . . . . . . . . . . . + * + * @endverbatim + */ + + typedef std::vector<std::string> InstanceIds; + + struct PoolWatcherListener : public pool_watcher::Listener { + PoolReplayer *pool_replayer; + bool local; + + PoolWatcherListener(PoolReplayer *pool_replayer, bool local) + : pool_replayer(pool_replayer), local(local) { + } + + void handle_update(const std::string &mirror_uuid, + ImageIds &&added_image_ids, + ImageIds &&removed_image_ids) override { + pool_replayer->handle_update((local ? "" : mirror_uuid), + std::move(added_image_ids), + std::move(removed_image_ids)); + } + }; + + struct ImageMapListener : public image_map::Listener { + PoolReplayer *pool_replayer; + + ImageMapListener(PoolReplayer *pool_replayer) + : pool_replayer(pool_replayer) { + } + + void acquire_image(const std::string &global_image_id, + const std::string &instance_id, + Context* on_finish) override { + pool_replayer->handle_acquire_image(global_image_id, instance_id, + on_finish); + } + + void release_image(const std::string &global_image_id, + const std::string &instance_id, + Context* on_finish) override { + pool_replayer->handle_release_image(global_image_id, instance_id, + on_finish); + } + + void remove_image(const std::string &mirror_uuid, + const std::string &global_image_id, + const std::string &instance_id, + Context* on_finish) override { + pool_replayer->handle_remove_image(mirror_uuid, global_image_id, + instance_id, on_finish); + } + }; + + void handle_update(const std::string &mirror_uuid, + ImageIds &&added_image_ids, + ImageIds &&removed_image_ids); + + int init_rados(const std::string &cluster_name, + const std::string &client_name, + const std::string &mon_host, + const std::string &key, + const std::string &description, RadosRef *rados_ref, + bool strip_cluster_overrides); + + void handle_post_acquire_leader(Context *on_finish); + void handle_pre_release_leader(Context *on_finish); + + void init_image_map(Context *on_finish); + void handle_init_image_map(int r, Context *on_finish); + + void init_local_pool_watcher(Context *on_finish); + void handle_init_local_pool_watcher(int r, Context *on_finish); + + void init_remote_pool_watcher(Context *on_finish); + void handle_init_remote_pool_watcher(int r, Context *on_finish); + + void init_image_deleter(Context* on_finish); + void handle_init_image_deleter(int r, Context* on_finish); + + void shut_down_image_deleter(Context* on_finish); + void handle_shut_down_image_deleter(int r, Context* on_finish); + + void shut_down_pool_watchers(Context *on_finish); + void handle_shut_down_pool_watchers(int r, Context *on_finish); + + void wait_for_update_ops(Context *on_finish); + void handle_wait_for_update_ops(int r, Context *on_finish); + + void shut_down_image_map(Context *on_finish); + void handle_shut_down_image_map(int r, Context *on_finish); + + void handle_update_leader(const std::string &leader_instance_id); + + void handle_acquire_image(const std::string &global_image_id, + const std::string &instance_id, + Context* on_finish); + void handle_release_image(const std::string &global_image_id, + const std::string &instance_id, + Context* on_finish); + void handle_remove_image(const std::string &mirror_uuid, + const std::string &global_image_id, + const std::string &instance_id, + Context* on_finish); + + void handle_instances_added(const InstanceIds &instance_ids); + void handle_instances_removed(const InstanceIds &instance_ids); + + Threads<ImageCtxT> *m_threads; + ServiceDaemon<ImageCtxT>* m_service_daemon; + int64_t m_local_pool_id = -1; + PeerSpec m_peer; + std::vector<const char*> m_args; + + mutable Mutex m_lock; + Cond m_cond; + std::atomic<bool> m_stopping = { false }; + bool m_manual_stop = false; + bool m_blacklisted = false; + + RadosRef m_local_rados; + RadosRef m_remote_rados; + + librados::IoCtx m_local_io_ctx; + librados::IoCtx m_remote_io_ctx; + + PoolWatcherListener m_local_pool_watcher_listener; + std::unique_ptr<PoolWatcher<ImageCtxT>> m_local_pool_watcher; + + PoolWatcherListener m_remote_pool_watcher_listener; + std::unique_ptr<PoolWatcher<ImageCtxT>> m_remote_pool_watcher; + + std::unique_ptr<InstanceReplayer<ImageCtxT>> m_instance_replayer; + std::unique_ptr<ImageDeleter<ImageCtxT>> m_image_deleter; + + ImageMapListener m_image_map_listener; + std::unique_ptr<ImageMap<ImageCtxT>> m_image_map; + + std::string m_asok_hook_name; + AdminSocketHook *m_asok_hook = nullptr; + + service_daemon::CalloutId m_callout_id = service_daemon::CALLOUT_ID_NONE; + + class PoolReplayerThread : public Thread { + PoolReplayer *m_pool_replayer; + public: + PoolReplayerThread(PoolReplayer *pool_replayer) + : m_pool_replayer(pool_replayer) { + } + void *entry() override { + m_pool_replayer->run(); + return 0; + } + } m_pool_replayer_thread; + + class LeaderListener : public leader_watcher::Listener { + public: + LeaderListener(PoolReplayer *pool_replayer) + : m_pool_replayer(pool_replayer) { + } + + protected: + void post_acquire_handler(Context *on_finish) override { + m_pool_replayer->handle_post_acquire_leader(on_finish); + } + + void pre_release_handler(Context *on_finish) override { + m_pool_replayer->handle_pre_release_leader(on_finish); + } + + void update_leader_handler( + const std::string &leader_instance_id) override { + m_pool_replayer->handle_update_leader(leader_instance_id); + } + + void handle_instances_added(const InstanceIds& instance_ids) override { + m_pool_replayer->handle_instances_added(instance_ids); + } + + void handle_instances_removed(const InstanceIds& instance_ids) override { + m_pool_replayer->handle_instances_removed(instance_ids); + } + + private: + PoolReplayer *m_pool_replayer; + } m_leader_listener; + + std::unique_ptr<LeaderWatcher<ImageCtxT>> m_leader_watcher; + std::unique_ptr<InstanceWatcher<ImageCtxT>> m_instance_watcher; + AsyncOpTracker m_update_op_tracker; +}; + +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::PoolReplayer<librbd::ImageCtx>; + +#endif // CEPH_RBD_MIRROR_POOL_REPLAYER_H diff --git a/src/tools/rbd_mirror/PoolWatcher.cc b/src/tools/rbd_mirror/PoolWatcher.cc new file mode 100644 index 00000000..81810ea1 --- /dev/null +++ b/src/tools/rbd_mirror/PoolWatcher.cc @@ -0,0 +1,553 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/PoolWatcher.h" +#include "include/rbd_types.h" +#include "cls/rbd/cls_rbd_client.h" +#include "common/debug.h" +#include "common/errno.h" +#include "common/Timer.h" +#include "librbd/ImageCtx.h" +#include "librbd/internal.h" +#include "librbd/MirroringWatcher.h" +#include "librbd/Utils.h" +#include "librbd/api/Image.h" +#include "librbd/api/Mirror.h" +#include "tools/rbd_mirror/Threads.h" +#include "tools/rbd_mirror/pool_watcher/RefreshImagesRequest.h" +#include <boost/bind.hpp> + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::PoolWatcher: " << this << " " \ + << __func__ << ": " + +using std::list; +using std::string; +using std::unique_ptr; +using std::vector; +using librbd::util::create_context_callback; +using librbd::util::create_rados_callback; + +namespace rbd { +namespace mirror { + +template <typename I> +class PoolWatcher<I>::MirroringWatcher : public librbd::MirroringWatcher<I> { +public: + using ContextWQ = typename std::decay< + typename std::remove_pointer< + decltype(Threads<I>::work_queue)>::type>::type; + + MirroringWatcher(librados::IoCtx &io_ctx, ContextWQ *work_queue, + PoolWatcher *pool_watcher) + : librbd::MirroringWatcher<I>(io_ctx, work_queue), + m_pool_watcher(pool_watcher) { + } + + void handle_rewatch_complete(int r) override { + m_pool_watcher->handle_rewatch_complete(r); + } + + void handle_mode_updated(cls::rbd::MirrorMode mirror_mode) override { + // invalidate all image state and refresh the pool contents + m_pool_watcher->schedule_refresh_images(5); + } + + void handle_image_updated(cls::rbd::MirrorImageState state, + const std::string &remote_image_id, + const std::string &global_image_id) override { + bool enabled = (state == cls::rbd::MIRROR_IMAGE_STATE_ENABLED); + m_pool_watcher->handle_image_updated(remote_image_id, global_image_id, + enabled); + } + +private: + PoolWatcher *m_pool_watcher; +}; + +template <typename I> +PoolWatcher<I>::PoolWatcher(Threads<I> *threads, librados::IoCtx &remote_io_ctx, + pool_watcher::Listener &listener) + : m_threads(threads), m_remote_io_ctx(remote_io_ctx), m_listener(listener), + m_lock(librbd::util::unique_lock_name("rbd::mirror::PoolWatcher", this)) { + m_mirroring_watcher = new MirroringWatcher(m_remote_io_ctx, + m_threads->work_queue, this); +} + +template <typename I> +PoolWatcher<I>::~PoolWatcher() { + delete m_mirroring_watcher; +} + +template <typename I> +bool PoolWatcher<I>::is_blacklisted() const { + Mutex::Locker locker(m_lock); + return m_blacklisted; +} + +template <typename I> +void PoolWatcher<I>::init(Context *on_finish) { + dout(5) << dendl; + + { + Mutex::Locker locker(m_lock); + m_on_init_finish = on_finish; + + ceph_assert(!m_refresh_in_progress); + m_refresh_in_progress = true; + } + + // start async updates for mirror image directory + register_watcher(); +} + +template <typename I> +void PoolWatcher<I>::shut_down(Context *on_finish) { + dout(5) << dendl; + + { + Mutex::Locker timer_locker(m_threads->timer_lock); + Mutex::Locker locker(m_lock); + + ceph_assert(!m_shutting_down); + m_shutting_down = true; + if (m_timer_ctx != nullptr) { + m_threads->timer->cancel_event(m_timer_ctx); + m_timer_ctx = nullptr; + } + } + + // in-progress unregister tracked as async op + unregister_watcher(); + + m_async_op_tracker.wait_for_ops(on_finish); +} + +template <typename I> +void PoolWatcher<I>::register_watcher() { + { + Mutex::Locker locker(m_lock); + ceph_assert(m_image_ids_invalid); + ceph_assert(m_refresh_in_progress); + } + + // if the watch registration is in-flight, let the watcher + // handle the transition -- only (re-)register if it's not registered + if (!m_mirroring_watcher->is_unregistered()) { + refresh_images(); + return; + } + + // first time registering or the watch failed + dout(5) << dendl; + m_async_op_tracker.start_op(); + + Context *ctx = create_context_callback< + PoolWatcher, &PoolWatcher<I>::handle_register_watcher>(this); + m_mirroring_watcher->register_watch(ctx); +} + +template <typename I> +void PoolWatcher<I>::handle_register_watcher(int r) { + dout(5) << "r=" << r << dendl; + + { + Mutex::Locker locker(m_lock); + ceph_assert(m_image_ids_invalid); + ceph_assert(m_refresh_in_progress); + if (r < 0) { + m_refresh_in_progress = false; + } + } + + Context *on_init_finish = nullptr; + if (r >= 0) { + refresh_images(); + } else if (r == -EBLACKLISTED) { + dout(0) << "detected client is blacklisted" << dendl; + + Mutex::Locker locker(m_lock); + m_blacklisted = true; + std::swap(on_init_finish, m_on_init_finish); + } else if (r == -ENOENT) { + dout(5) << "mirroring directory does not exist" << dendl; + { + Mutex::Locker locker(m_lock); + std::swap(on_init_finish, m_on_init_finish); + } + + schedule_refresh_images(30); + } else { + derr << "unexpected error registering mirroring directory watch: " + << cpp_strerror(r) << dendl; + schedule_refresh_images(10); + } + + m_async_op_tracker.finish_op(); + if (on_init_finish != nullptr) { + on_init_finish->complete(r); + } +} + +template <typename I> +void PoolWatcher<I>::unregister_watcher() { + dout(5) << dendl; + + m_async_op_tracker.start_op(); + Context *ctx = new FunctionContext([this](int r) { + dout(5) << "unregister_watcher: r=" << r << dendl; + if (r < 0) { + derr << "error unregistering watcher for " + << m_mirroring_watcher->get_oid() << " object: " << cpp_strerror(r) + << dendl; + } + m_async_op_tracker.finish_op(); + }); + + m_mirroring_watcher->unregister_watch(ctx); +} + +template <typename I> +void PoolWatcher<I>::refresh_images() { + dout(5) << dendl; + + { + Mutex::Locker locker(m_lock); + ceph_assert(m_image_ids_invalid); + ceph_assert(m_refresh_in_progress); + + // clear all pending notification events since we need to perform + // a full image list refresh + m_pending_added_image_ids.clear(); + m_pending_removed_image_ids.clear(); + } + + m_async_op_tracker.start_op(); + m_refresh_image_ids.clear(); + Context *ctx = create_context_callback< + PoolWatcher, &PoolWatcher<I>::handle_refresh_images>(this); + auto req = pool_watcher::RefreshImagesRequest<I>::create(m_remote_io_ctx, + &m_refresh_image_ids, + ctx); + req->send(); +} + +template <typename I> +void PoolWatcher<I>::handle_refresh_images(int r) { + dout(5) << "r=" << r << dendl; + + bool retry_refresh = false; + Context *on_init_finish = nullptr; + { + Mutex::Locker locker(m_lock); + ceph_assert(m_image_ids_invalid); + ceph_assert(m_refresh_in_progress); + + if (r >= 0) { + m_pending_image_ids = std::move(m_refresh_image_ids); + } else if (r == -EBLACKLISTED) { + dout(0) << "detected client is blacklisted during image refresh" << dendl; + + m_blacklisted = true; + m_refresh_in_progress = false; + std::swap(on_init_finish, m_on_init_finish); + } else if (r == -ENOENT) { + dout(5) << "mirroring directory not found" << dendl; + m_pending_image_ids.clear(); + r = 0; + } else { + m_refresh_in_progress = false; + retry_refresh = true; + } + } + + if (retry_refresh) { + derr << "failed to retrieve mirroring directory: " << cpp_strerror(r) + << dendl; + schedule_refresh_images(10); + } else if (r >= 0) { + get_mirror_uuid(); + return; + } + + m_async_op_tracker.finish_op(); + if (on_init_finish != nullptr) { + ceph_assert(r == -EBLACKLISTED); + on_init_finish->complete(r); + } +} + +template <typename I> +void PoolWatcher<I>::get_mirror_uuid() { + dout(5) << dendl; + + librados::ObjectReadOperation op; + librbd::cls_client::mirror_uuid_get_start(&op); + + m_out_bl.clear(); + librados::AioCompletion *aio_comp = create_rados_callback< + PoolWatcher, &PoolWatcher<I>::handle_get_mirror_uuid>(this); + int r = m_remote_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op, &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void PoolWatcher<I>::handle_get_mirror_uuid(int r) { + dout(5) << "r=" << r << dendl; + + bool deferred_refresh = false; + bool retry_refresh = false; + Context *on_init_finish = nullptr; + { + Mutex::Locker locker(m_lock); + ceph_assert(m_image_ids_invalid); + ceph_assert(m_refresh_in_progress); + m_refresh_in_progress = false; + + m_pending_mirror_uuid = ""; + if (r >= 0) { + auto it = m_out_bl.cbegin(); + r = librbd::cls_client::mirror_uuid_get_finish( + &it, &m_pending_mirror_uuid); + } + if (r >= 0 && m_pending_mirror_uuid.empty()) { + r = -ENOENT; + } + + if (m_deferred_refresh) { + // need to refresh -- skip the notification + deferred_refresh = true; + } else if (r >= 0) { + dout(10) << "mirror_uuid=" << m_pending_mirror_uuid << dendl; + m_image_ids_invalid = false; + std::swap(on_init_finish, m_on_init_finish); + schedule_listener(); + } else if (r == -EBLACKLISTED) { + dout(0) << "detected client is blacklisted during image refresh" << dendl; + + m_blacklisted = true; + std::swap(on_init_finish, m_on_init_finish); + } else if (r == -ENOENT) { + dout(5) << "mirroring uuid not found" << dendl; + std::swap(on_init_finish, m_on_init_finish); + retry_refresh = true; + } else { + retry_refresh = true; + } + } + + if (deferred_refresh) { + dout(5) << "scheduling deferred refresh" << dendl; + schedule_refresh_images(0); + } else if (retry_refresh) { + derr << "failed to retrieve mirror uuid: " << cpp_strerror(r) + << dendl; + schedule_refresh_images(10); + } + + m_async_op_tracker.finish_op(); + if (on_init_finish != nullptr) { + on_init_finish->complete(r); + } +} + +template <typename I> +void PoolWatcher<I>::schedule_refresh_images(double interval) { + Mutex::Locker timer_locker(m_threads->timer_lock); + Mutex::Locker locker(m_lock); + if (m_shutting_down || m_refresh_in_progress || m_timer_ctx != nullptr) { + if (m_refresh_in_progress && !m_deferred_refresh) { + dout(5) << "deferring refresh until in-flight refresh completes" << dendl; + m_deferred_refresh = true; + } + return; + } + + m_image_ids_invalid = true; + m_timer_ctx = m_threads->timer->add_event_after( + interval, + new FunctionContext([this](int r) { + process_refresh_images(); + })); +} + +template <typename I> +void PoolWatcher<I>::handle_rewatch_complete(int r) { + dout(5) << "r=" << r << dendl; + + if (r == -EBLACKLISTED) { + dout(0) << "detected client is blacklisted" << dendl; + + Mutex::Locker locker(m_lock); + m_blacklisted = true; + return; + } else if (r == -ENOENT) { + dout(5) << "mirroring directory deleted" << dendl; + } else if (r < 0) { + derr << "unexpected error re-registering mirroring directory watch: " + << cpp_strerror(r) << dendl; + } + + schedule_refresh_images(5); +} + +template <typename I> +void PoolWatcher<I>::handle_image_updated(const std::string &remote_image_id, + const std::string &global_image_id, + bool enabled) { + dout(10) << "remote_image_id=" << remote_image_id << ", " + << "global_image_id=" << global_image_id << ", " + << "enabled=" << enabled << dendl; + + Mutex::Locker locker(m_lock); + ImageId image_id(global_image_id, remote_image_id); + m_pending_added_image_ids.erase(image_id); + m_pending_removed_image_ids.erase(image_id); + + if (enabled) { + m_pending_added_image_ids.insert(image_id); + schedule_listener(); + } else { + m_pending_removed_image_ids.insert(image_id); + schedule_listener(); + } +} + +template <typename I> +void PoolWatcher<I>::process_refresh_images() { + ceph_assert(m_threads->timer_lock.is_locked()); + ceph_assert(m_timer_ctx != nullptr); + m_timer_ctx = nullptr; + + { + Mutex::Locker locker(m_lock); + ceph_assert(!m_refresh_in_progress); + m_refresh_in_progress = true; + m_deferred_refresh = false; + } + + // execute outside of the timer's lock + m_async_op_tracker.start_op(); + Context *ctx = new FunctionContext([this](int r) { + register_watcher(); + m_async_op_tracker.finish_op(); + }); + m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +void PoolWatcher<I>::schedule_listener() { + ceph_assert(m_lock.is_locked()); + m_pending_updates = true; + if (m_shutting_down || m_image_ids_invalid || m_notify_listener_in_progress) { + return; + } + + dout(20) << dendl; + + m_async_op_tracker.start_op(); + Context *ctx = new FunctionContext([this](int r) { + notify_listener(); + m_async_op_tracker.finish_op(); + }); + + m_notify_listener_in_progress = true; + m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +void PoolWatcher<I>::notify_listener() { + dout(10) << dendl; + + std::string mirror_uuid; + ImageIds added_image_ids; + ImageIds removed_image_ids; + { + Mutex::Locker locker(m_lock); + ceph_assert(m_notify_listener_in_progress); + + // if the mirror uuid is updated, treat it as the removal of all + // images in the pool + if (m_mirror_uuid != m_pending_mirror_uuid) { + if (!m_mirror_uuid.empty()) { + dout(0) << "mirror uuid updated:" + << "old=" << m_mirror_uuid << ", " + << "new=" << m_pending_mirror_uuid << dendl; + } + + mirror_uuid = m_mirror_uuid; + removed_image_ids = std::move(m_image_ids); + m_image_ids.clear(); + } + } + + if (!removed_image_ids.empty()) { + m_listener.handle_update(mirror_uuid, {}, std::move(removed_image_ids)); + removed_image_ids.clear(); + } + + { + Mutex::Locker locker(m_lock); + ceph_assert(m_notify_listener_in_progress); + + // if the watch failed while we didn't own the lock, we are going + // to need to perform a full refresh + if (m_image_ids_invalid) { + m_notify_listener_in_progress = false; + return; + } + + // merge add/remove notifications into pending set (a given image + // can only be in one set or another) + for (auto &image_id : m_pending_removed_image_ids) { + dout(20) << "image_id=" << image_id << dendl; + m_pending_image_ids.erase(image_id); + } + + for (auto &image_id : m_pending_added_image_ids) { + dout(20) << "image_id=" << image_id << dendl; + m_pending_image_ids.erase(image_id); + m_pending_image_ids.insert(image_id); + } + m_pending_added_image_ids.clear(); + + // compute added/removed images + for (auto &image_id : m_image_ids) { + auto it = m_pending_image_ids.find(image_id); + if (it == m_pending_image_ids.end() || it->id != image_id.id) { + removed_image_ids.insert(image_id); + } + } + for (auto &image_id : m_pending_image_ids) { + auto it = m_image_ids.find(image_id); + if (it == m_image_ids.end() || it->id != image_id.id) { + added_image_ids.insert(image_id); + } + } + + m_pending_updates = false; + m_image_ids = m_pending_image_ids; + + m_mirror_uuid = m_pending_mirror_uuid; + mirror_uuid = m_mirror_uuid; + } + + m_listener.handle_update(mirror_uuid, std::move(added_image_ids), + std::move(removed_image_ids)); + + { + Mutex::Locker locker(m_lock); + m_notify_listener_in_progress = false; + if (m_pending_updates) { + schedule_listener(); + } + } +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::PoolWatcher<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/PoolWatcher.h b/src/tools/rbd_mirror/PoolWatcher.h new file mode 100644 index 00000000..1136a319 --- /dev/null +++ b/src/tools/rbd_mirror/PoolWatcher.h @@ -0,0 +1,166 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_POOL_WATCHER_H +#define CEPH_RBD_MIRROR_POOL_WATCHER_H + +#include <map> +#include <memory> +#include <set> +#include <string> + +#include "common/AsyncOpTracker.h" +#include "common/ceph_context.h" +#include "common/Mutex.h" +#include "include/rados/librados.hpp" +#include "tools/rbd_mirror/Types.h" +#include <boost/functional/hash.hpp> +#include <boost/optional.hpp> +#include "include/ceph_assert.h" +#include "tools/rbd_mirror/pool_watcher/Types.h" + +namespace librbd { struct ImageCtx; } + +namespace rbd { +namespace mirror { + +template <typename> struct Threads; + +/** + * Keeps track of images that have mirroring enabled within all + * pools. + */ +template <typename ImageCtxT = librbd::ImageCtx> +class PoolWatcher { +public: + static PoolWatcher* create(Threads<ImageCtxT> *threads, + librados::IoCtx &remote_io_ctx, + pool_watcher::Listener &listener) { + return new PoolWatcher(threads, remote_io_ctx, listener); + } + + PoolWatcher(Threads<ImageCtxT> *threads, librados::IoCtx &remote_io_ctx, + pool_watcher::Listener &listener); + ~PoolWatcher(); + PoolWatcher(const PoolWatcher&) = delete; + PoolWatcher& operator=(const PoolWatcher&) = delete; + + bool is_blacklisted() const; + + void init(Context *on_finish = nullptr); + void shut_down(Context *on_finish); + + inline uint64_t get_image_count() const { + Mutex::Locker locker(m_lock); + return m_image_ids.size(); + } + +private: + /** + * @verbatim + * + * <start> + * | + * v + * INIT + * | + * v + * REGISTER_WATCHER + * | + * |/--------------------------------\ + * | | + * v | + * REFRESH_IMAGES | + * | | + * |/----------------------------\ | + * | | | + * v | | + * GET_MIRROR_UUID | | + * | | | + * v | | + * NOTIFY_LISTENER | | + * | | | + * v | | + * IDLE ---\ | | + * | | | | + * | |\---> IMAGE_UPDATED | | + * | | | | | + * | | v | | + * | | GET_IMAGE_NAME --/ | + * | | | + * | \----> WATCH_ERROR ---------/ + * v + * SHUT_DOWN + * | + * v + * UNREGISTER_WATCHER + * | + * v + * <finish> + * + * @endverbatim + */ + class MirroringWatcher; + + Threads<ImageCtxT> *m_threads; + librados::IoCtx m_remote_io_ctx; + pool_watcher::Listener &m_listener; + + ImageIds m_refresh_image_ids; + bufferlist m_out_bl; + + mutable Mutex m_lock; + + Context *m_on_init_finish = nullptr; + + ImageIds m_image_ids; + std::string m_mirror_uuid; + + bool m_pending_updates = false; + bool m_notify_listener_in_progress = false; + ImageIds m_pending_image_ids; + ImageIds m_pending_added_image_ids; + ImageIds m_pending_removed_image_ids; + + std::string m_pending_mirror_uuid; + + MirroringWatcher *m_mirroring_watcher; + + Context *m_timer_ctx = nullptr; + + AsyncOpTracker m_async_op_tracker; + bool m_blacklisted = false; + bool m_shutting_down = false; + bool m_image_ids_invalid = true; + bool m_refresh_in_progress = false; + bool m_deferred_refresh = false; + + void register_watcher(); + void handle_register_watcher(int r); + void unregister_watcher(); + + void refresh_images(); + void handle_refresh_images(int r); + + void schedule_refresh_images(double interval); + void process_refresh_images(); + + void get_mirror_uuid(); + void handle_get_mirror_uuid(int r); + + void handle_rewatch_complete(int r); + void handle_image_updated(const std::string &remote_image_id, + const std::string &global_image_id, + bool enabled); + + void schedule_listener(); + void notify_listener(); + +}; + +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::PoolWatcher<librbd::ImageCtx>; + +#endif // CEPH_RBD_MIRROR_POOL_WATCHER_H diff --git a/src/tools/rbd_mirror/ProgressContext.h b/src/tools/rbd_mirror/ProgressContext.h new file mode 100644 index 00000000..e4430ee6 --- /dev/null +++ b/src/tools/rbd_mirror/ProgressContext.h @@ -0,0 +1,21 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_PROGRESS_CONTEXT_H +#define RBD_MIRROR_PROGRESS_CONTEXT_H + +namespace rbd { +namespace mirror { + +class ProgressContext +{ +public: + virtual ~ProgressContext() {} + virtual void update_progress(const std::string &description, + bool flush = true) = 0; +}; + +} // namespace mirror +} // namespace rbd + +#endif // RBD_MIRROR_PROGRESS_CONTEXT_H diff --git a/src/tools/rbd_mirror/ServiceDaemon.cc b/src/tools/rbd_mirror/ServiceDaemon.cc new file mode 100644 index 00000000..f3b549b8 --- /dev/null +++ b/src/tools/rbd_mirror/ServiceDaemon.cc @@ -0,0 +1,251 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/ServiceDaemon.h" +#include "include/Context.h" +#include "include/stringify.h" +#include "common/ceph_context.h" +#include "common/config.h" +#include "common/debug.h" +#include "common/errno.h" +#include "common/Formatter.h" +#include "common/Timer.h" +#include "tools/rbd_mirror/Threads.h" +#include <sstream> + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::ServiceDaemon: " << this << " " \ + << __func__ << ": " + +namespace rbd { +namespace mirror { + +namespace { + +const std::string RBD_MIRROR_AUTH_ID_PREFIX("rbd-mirror."); + +struct AttributeDumpVisitor : public boost::static_visitor<void> { + ceph::Formatter *f; + const std::string& name; + + AttributeDumpVisitor(ceph::Formatter *f, const std::string& name) + : f(f), name(name) { + } + + void operator()(bool val) const { + f->dump_bool(name.c_str(), val); + } + void operator()(uint64_t val) const { + f->dump_unsigned(name.c_str(), val); + } + void operator()(const std::string& val) const { + f->dump_string(name.c_str(), val); + } +}; + +} // anonymous namespace + +using namespace service_daemon; + +template <typename I> +ServiceDaemon<I>::ServiceDaemon(CephContext *cct, RadosRef rados, + Threads<I>* threads) + : m_cct(cct), m_rados(rados), m_threads(threads), + m_lock("rbd::mirror::ServiceDaemon") { + dout(20) << dendl; +} + +template <typename I> +ServiceDaemon<I>::~ServiceDaemon() { + dout(20) << dendl; + Mutex::Locker timer_locker(m_threads->timer_lock); + if (m_timer_ctx != nullptr) { + m_threads->timer->cancel_event(m_timer_ctx); + update_status(); + } +} + +template <typename I> +int ServiceDaemon<I>::init() { + dout(20) << dendl; + + std::string id = m_cct->_conf->name.get_id(); + if (id.find(RBD_MIRROR_AUTH_ID_PREFIX) == 0) { + id = id.substr(RBD_MIRROR_AUTH_ID_PREFIX.size()); + } + + std::string instance_id = stringify(m_rados->get_instance_id()); + std::map<std::string, std::string> service_metadata = { + {"id", id}, {"instance_id", instance_id}}; + int r = m_rados->service_daemon_register("rbd-mirror", instance_id, + service_metadata); + if (r < 0) { + return r; + } + + return 0; +} + +template <typename I> +void ServiceDaemon<I>::add_pool(int64_t pool_id, const std::string& pool_name) { + dout(20) << "pool_id=" << pool_id << ", pool_name=" << pool_name << dendl; + + { + Mutex::Locker locker(m_lock); + m_pools.insert({pool_id, {pool_name}}); + } + schedule_update_status(); +} + +template <typename I> +void ServiceDaemon<I>::remove_pool(int64_t pool_id) { + dout(20) << "pool_id=" << pool_id << dendl; + { + Mutex::Locker locker(m_lock); + m_pools.erase(pool_id); + } + schedule_update_status(); +} + +template <typename I> +uint64_t ServiceDaemon<I>::add_or_update_callout(int64_t pool_id, + uint64_t callout_id, + CalloutLevel callout_level, + const std::string& text) { + dout(20) << "pool_id=" << pool_id << ", " + << "callout_id=" << callout_id << ", " + << "callout_level=" << callout_level << ", " + << "text=" << text << dendl; + + { + Mutex::Locker locker(m_lock); + auto pool_it = m_pools.find(pool_id); + if (pool_it == m_pools.end()) { + return CALLOUT_ID_NONE; + } + + if (callout_id == CALLOUT_ID_NONE) { + callout_id = ++m_callout_id; + } + pool_it->second.callouts[callout_id] = {callout_level, text}; + } + + schedule_update_status(); + return callout_id; +} + +template <typename I> +void ServiceDaemon<I>::remove_callout(int64_t pool_id, uint64_t callout_id) { + dout(20) << "pool_id=" << pool_id << ", " + << "callout_id=" << callout_id << dendl; + + { + Mutex::Locker locker(m_lock); + auto pool_it = m_pools.find(pool_id); + if (pool_it == m_pools.end()) { + return; + } + pool_it->second.callouts.erase(callout_id); + } + + schedule_update_status(); +} + +template <typename I> +void ServiceDaemon<I>::add_or_update_attribute(int64_t pool_id, + const std::string& key, + const AttributeValue& value) { + dout(20) << "pool_id=" << pool_id << ", " + << "key=" << key << ", " + << "value=" << value << dendl; + + { + Mutex::Locker locker(m_lock); + auto pool_it = m_pools.find(pool_id); + if (pool_it == m_pools.end()) { + return; + } + pool_it->second.attributes[key] = value; + } + + schedule_update_status(); +} + +template <typename I> +void ServiceDaemon<I>::remove_attribute(int64_t pool_id, + const std::string& key) { + dout(20) << "pool_id=" << pool_id << ", " + << "key=" << key << dendl; + + { + Mutex::Locker locker(m_lock); + auto pool_it = m_pools.find(pool_id); + if (pool_it == m_pools.end()) { + return; + } + pool_it->second.attributes.erase(key); + } + + schedule_update_status(); +} + +template <typename I> +void ServiceDaemon<I>::schedule_update_status() { + Mutex::Locker timer_locker(m_threads->timer_lock); + if (m_timer_ctx != nullptr) { + return; + } + + m_timer_ctx = new FunctionContext([this](int) { + m_timer_ctx = nullptr; + update_status(); + }); + m_threads->timer->add_event_after(1, m_timer_ctx); +} + +template <typename I> +void ServiceDaemon<I>::update_status() { + dout(20) << dendl; + ceph_assert(m_threads->timer_lock.is_locked()); + + ceph::JSONFormatter f; + { + Mutex::Locker locker(m_lock); + f.open_object_section("pools"); + for (auto& pool_pair : m_pools) { + f.open_object_section(stringify(pool_pair.first).c_str()); + f.dump_string("name", pool_pair.second.name); + f.open_object_section("callouts"); + for (auto& callout : pool_pair.second.callouts) { + f.open_object_section(stringify(callout.first).c_str()); + f.dump_string("level", stringify(callout.second.level).c_str()); + f.dump_string("text", callout.second.text.c_str()); + f.close_section(); + } + f.close_section(); // callouts + + for (auto& attribute : pool_pair.second.attributes) { + AttributeDumpVisitor attribute_dump_visitor(&f, attribute.first); + boost::apply_visitor(attribute_dump_visitor, attribute.second); + } + f.close_section(); // pool + } + f.close_section(); // pools + } + + std::stringstream ss; + f.flush(ss); + + int r = m_rados->service_daemon_update_status({{"json", ss.str()}}); + if (r < 0) { + derr << "failed to update service daemon status: " << cpp_strerror(r) + << dendl; + } +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::ServiceDaemon<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/ServiceDaemon.h b/src/tools/rbd_mirror/ServiceDaemon.h new file mode 100644 index 00000000..1de7e20b --- /dev/null +++ b/src/tools/rbd_mirror/ServiceDaemon.h @@ -0,0 +1,86 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_SERVICE_DAEMON_H +#define CEPH_RBD_MIRROR_SERVICE_DAEMON_H + +#include "common/Mutex.h" +#include "tools/rbd_mirror/Types.h" +#include "tools/rbd_mirror/service_daemon/Types.h" +#include <map> +#include <string> + +struct CephContext; +struct Context; +namespace librbd { struct ImageCtx; } + +namespace rbd { +namespace mirror { + +template <typename> struct Threads; + +template <typename ImageCtxT = librbd::ImageCtx> +class ServiceDaemon { +public: + ServiceDaemon(CephContext *cct, RadosRef rados, Threads<ImageCtxT>* threads); + ~ServiceDaemon(); + + int init(); + + void add_pool(int64_t pool_id, const std::string& pool_name); + void remove_pool(int64_t pool_id); + + uint64_t add_or_update_callout(int64_t pool_id, uint64_t callout_id, + service_daemon::CalloutLevel callout_level, + const std::string& text); + void remove_callout(int64_t pool_id, uint64_t callout_id); + + void add_or_update_attribute(int64_t pool_id, const std::string& key, + const service_daemon::AttributeValue& value); + void remove_attribute(int64_t pool_id, const std::string& key); + +private: + struct Callout { + service_daemon::CalloutLevel level; + std::string text; + + Callout() : level(service_daemon::CALLOUT_LEVEL_INFO) { + } + Callout(service_daemon::CalloutLevel level, const std::string& text) + : level(level), text(text) { + } + }; + typedef std::map<uint64_t, Callout> Callouts; + typedef std::map<std::string, service_daemon::AttributeValue> Attributes; + + struct Pool { + std::string name; + Callouts callouts; + Attributes attributes; + + Pool(const std::string& name) : name(name) { + } + }; + + typedef std::map<int64_t, Pool> Pools; + + CephContext *m_cct; + RadosRef m_rados; + Threads<ImageCtxT>* m_threads; + + Mutex m_lock; + Pools m_pools; + uint64_t m_callout_id = service_daemon::CALLOUT_ID_NONE; + + Context* m_timer_ctx = nullptr; + + void schedule_update_status(); + void update_status(); +}; + +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::ServiceDaemon<librbd::ImageCtx>; + +#endif // CEPH_RBD_MIRROR_SERVICE_DAEMON_H diff --git a/src/tools/rbd_mirror/Threads.cc b/src/tools/rbd_mirror/Threads.cc new file mode 100644 index 00000000..ca0a8b0f --- /dev/null +++ b/src/tools/rbd_mirror/Threads.cc @@ -0,0 +1,45 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/Threads.h" +#include "common/Timer.h" +#include "common/WorkQueue.h" +#include "librbd/ImageCtx.h" + +namespace rbd { +namespace mirror { + +template <typename I> +Threads<I>::Threads(CephContext *cct) : timer_lock("Threads::timer_lock") { + thread_pool = new ThreadPool(cct, "Journaler::thread_pool", "tp_journal", + cct->_conf.get_val<uint64_t>("rbd_op_threads"), + "rbd_op_threads"); + thread_pool->start(); + + work_queue = new ContextWQ("Journaler::work_queue", + cct->_conf.get_val<uint64_t>("rbd_op_thread_timeout"), + thread_pool); + + timer = new SafeTimer(cct, timer_lock, true); + timer->init(); +} + +template <typename I> +Threads<I>::~Threads() { + { + Mutex::Locker timer_locker(timer_lock); + timer->shutdown(); + } + delete timer; + + work_queue->drain(); + delete work_queue; + + thread_pool->stop(); + delete thread_pool; +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::Threads<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/Threads.h b/src/tools/rbd_mirror/Threads.h new file mode 100644 index 00000000..f52e8837 --- /dev/null +++ b/src/tools/rbd_mirror/Threads.h @@ -0,0 +1,39 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_THREADS_H +#define CEPH_RBD_MIRROR_THREADS_H + +#include "common/Mutex.h" + +class CephContext; +class ContextWQ; +class SafeTimer; +class ThreadPool; + +namespace librbd { struct ImageCtx; } + +namespace rbd { +namespace mirror { + +template <typename ImageCtxT = librbd::ImageCtx> +struct Threads { + ThreadPool *thread_pool = nullptr; + ContextWQ *work_queue = nullptr; + + SafeTimer *timer = nullptr; + Mutex timer_lock; + + explicit Threads(CephContext *cct); + Threads(const Threads&) = delete; + Threads& operator=(const Threads&) = delete; + + ~Threads(); +}; + +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::Threads<librbd::ImageCtx>; + +#endif // CEPH_RBD_MIRROR_THREADS_H diff --git a/src/tools/rbd_mirror/Types.cc b/src/tools/rbd_mirror/Types.cc new file mode 100644 index 00000000..74fe318e --- /dev/null +++ b/src/tools/rbd_mirror/Types.cc @@ -0,0 +1,21 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/Types.h" + +namespace rbd { +namespace mirror { + +std::ostream &operator<<(std::ostream &os, const ImageId &image_id) { + return os << "global id=" << image_id.global_id << ", " + << "id=" << image_id.id; +} + +std::ostream& operator<<(std::ostream& lhs, const PeerSpec &peer) { + return lhs << "uuid: " << peer.uuid + << " cluster: " << peer.cluster_name + << " client: " << peer.client_name; +} + +} // namespace mirror +} // namespace rbd diff --git a/src/tools/rbd_mirror/Types.h b/src/tools/rbd_mirror/Types.h new file mode 100644 index 00000000..ed3b9d8a --- /dev/null +++ b/src/tools/rbd_mirror/Types.h @@ -0,0 +1,123 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_TYPES_H +#define CEPH_RBD_MIRROR_TYPES_H + +#include <iostream> +#include <memory> +#include <set> +#include <string> +#include <vector> + +#include "include/rados/librados.hpp" +#include "include/rbd/librbd.hpp" + +namespace rbd { +namespace mirror { + +// Performance counters +enum { + l_rbd_mirror_first = 27000, + l_rbd_mirror_replay, + l_rbd_mirror_replay_bytes, + l_rbd_mirror_replay_latency, + l_rbd_mirror_last, +}; + +typedef std::shared_ptr<librados::Rados> RadosRef; +typedef std::shared_ptr<librados::IoCtx> IoCtxRef; +typedef std::shared_ptr<librbd::Image> ImageRef; + +struct ImageId { + std::string global_id; + std::string id; + + explicit ImageId(const std::string &global_id) : global_id(global_id) { + } + ImageId(const std::string &global_id, const std::string &id) + : global_id(global_id), id(id) { + } + + inline bool operator==(const ImageId &rhs) const { + return (global_id == rhs.global_id && id == rhs.id); + } + inline bool operator<(const ImageId &rhs) const { + return global_id < rhs.global_id; + } +}; + +std::ostream &operator<<(std::ostream &, const ImageId &image_id); + +typedef std::set<ImageId> ImageIds; + +struct Peer { + std::string peer_uuid; + librados::IoCtx io_ctx; + + Peer() { + } + Peer(const std::string &peer_uuid) : peer_uuid(peer_uuid) { + } + Peer(const std::string &peer_uuid, librados::IoCtx& io_ctx) + : peer_uuid(peer_uuid), io_ctx(io_ctx) { + } + + inline bool operator<(const Peer &rhs) const { + return peer_uuid < rhs.peer_uuid; + } +}; + +typedef std::set<Peer> Peers; + +struct PeerSpec { + PeerSpec() = default; + PeerSpec(const std::string &uuid, const std::string &cluster_name, + const std::string &client_name) + : uuid(uuid), cluster_name(cluster_name), client_name(client_name) + { + } + PeerSpec(const librbd::mirror_peer_t &peer) : + uuid(peer.uuid), + cluster_name(peer.cluster_name), + client_name(peer.client_name) + { + } + + std::string uuid; + std::string cluster_name; + std::string client_name; + + /// optional config properties + std::string mon_host; + std::string key; + + bool operator==(const PeerSpec& rhs) const { + return (uuid == rhs.uuid && + cluster_name == rhs.cluster_name && + client_name == rhs.client_name && + mon_host == rhs.mon_host && + key == rhs.key); + } + bool operator<(const PeerSpec& rhs) const { + if (uuid != rhs.uuid) { + return uuid < rhs.uuid; + } else if (cluster_name != rhs.cluster_name) { + return cluster_name < rhs.cluster_name; + } else if (client_name != rhs.client_name) { + return client_name < rhs.client_name; + } else if (mon_host < rhs.mon_host) { + return mon_host < rhs.mon_host; + } else { + return key < rhs.key; + } + } +}; + +std::ostream& operator<<(std::ostream& lhs, const PeerSpec &peer); + +} // namespace mirror +} // namespace rbd + + +#endif // CEPH_RBD_MIRROR_TYPES_H diff --git a/src/tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.cc b/src/tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.cc new file mode 100644 index 00000000..a0e9fd90 --- /dev/null +++ b/src/tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.cc @@ -0,0 +1,290 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.h" +#include "common/debug.h" +#include "common/errno.h" +#include "librbd/ExclusiveLock.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/Operations.h" +#include "librbd/Utils.h" +#include "librbd/journal/Policy.h" +#include "tools/rbd_mirror/image_deleter/Types.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_deleter::SnapshotPurgeRequest: " \ + << this << " " << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace image_deleter { + +using librbd::util::create_context_callback; + +template <typename I> +void SnapshotPurgeRequest<I>::send() { + open_image(); +} + +template <typename I> +void SnapshotPurgeRequest<I>::open_image() { + dout(10) << dendl; + m_image_ctx = I::create("", m_image_id, nullptr, m_io_ctx, false); + + { + RWLock::WLocker snap_locker(m_image_ctx->snap_lock); + m_image_ctx->set_journal_policy(new JournalPolicy()); + } + + Context *ctx = create_context_callback< + SnapshotPurgeRequest<I>, &SnapshotPurgeRequest<I>::handle_open_image>( + this); + m_image_ctx->state->open(librbd::OPEN_FLAG_SKIP_OPEN_PARENT, ctx); +} + +template <typename I> +void SnapshotPurgeRequest<I>::handle_open_image(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to open image '" << m_image_id << "': " << cpp_strerror(r) + << dendl; + m_image_ctx->destroy(); + m_image_ctx = nullptr; + + finish(r); + return; + } + + acquire_lock(); +} + +template <typename I> +void SnapshotPurgeRequest<I>::acquire_lock() { + dout(10) << dendl; + + m_image_ctx->owner_lock.get_read(); + if (m_image_ctx->exclusive_lock == nullptr) { + m_image_ctx->owner_lock.put_read(); + + derr << "exclusive lock not enabled" << dendl; + m_ret_val = -EINVAL; + close_image(); + return; + } + + m_image_ctx->exclusive_lock->acquire_lock(create_context_callback< + SnapshotPurgeRequest<I>, &SnapshotPurgeRequest<I>::handle_acquire_lock>( + this)); + m_image_ctx->owner_lock.put_read(); +} + +template <typename I> +void SnapshotPurgeRequest<I>::handle_acquire_lock(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to acquire exclusive lock: " << cpp_strerror(r) << dendl; + m_ret_val = r; + close_image(); + return; + } + + { + RWLock::RLocker snap_locker(m_image_ctx->snap_lock); + m_snaps = m_image_ctx->snaps; + } + snap_unprotect(); +} + +template <typename I> +void SnapshotPurgeRequest<I>::snap_unprotect() { + if (m_snaps.empty()) { + close_image(); + return; + } + + librados::snap_t snap_id = m_snaps.back(); + m_image_ctx->snap_lock.get_read(); + int r = m_image_ctx->get_snap_namespace(snap_id, &m_snap_namespace); + if (r < 0) { + m_image_ctx->snap_lock.put_read(); + + derr << "failed to get snap namespace: " << cpp_strerror(r) << dendl; + m_ret_val = r; + close_image(); + return; + } + + r = m_image_ctx->get_snap_name(snap_id, &m_snap_name); + if (r < 0) { + m_image_ctx->snap_lock.put_read(); + + derr << "failed to get snap name: " << cpp_strerror(r) << dendl; + m_ret_val = r; + close_image(); + return; + } + + bool is_protected; + r = m_image_ctx->is_snap_protected(snap_id, &is_protected); + if (r < 0) { + m_image_ctx->snap_lock.put_read(); + + derr << "failed to get snap protection status: " << cpp_strerror(r) + << dendl; + m_ret_val = r; + close_image(); + return; + } + m_image_ctx->snap_lock.put_read(); + + if (!is_protected) { + snap_remove(); + return; + } + + dout(10) << "snap_id=" << snap_id << ", " + << "snap_namespace=" << m_snap_namespace << ", " + << "snap_name=" << m_snap_name << dendl; + + auto finish_op_ctx = start_lock_op(&r); + if (finish_op_ctx == nullptr) { + derr << "lost exclusive lock" << dendl; + m_ret_val = r; + close_image(); + return; + } + + auto ctx = new FunctionContext([this, finish_op_ctx](int r) { + handle_snap_unprotect(r); + finish_op_ctx->complete(0); + }); + RWLock::RLocker owner_locker(m_image_ctx->owner_lock); + m_image_ctx->operations->execute_snap_unprotect( + m_snap_namespace, m_snap_name.c_str(), ctx); +} + +template <typename I> +void SnapshotPurgeRequest<I>::handle_snap_unprotect(int r) { + dout(10) << "r=" << r << dendl; + + if (r == -EBUSY) { + dout(10) << "snapshot in-use" << dendl; + m_ret_val = r; + close_image(); + return; + } else if (r < 0) { + derr << "failed to unprotect snapshot: " << cpp_strerror(r) << dendl; + m_ret_val = r; + close_image(); + return; + } + + { + // avoid the need to refresh to delete the newly unprotected snapshot + RWLock::RLocker snap_locker(m_image_ctx->snap_lock); + librados::snap_t snap_id = m_snaps.back(); + auto snap_info_it = m_image_ctx->snap_info.find(snap_id); + if (snap_info_it != m_image_ctx->snap_info.end()) { + snap_info_it->second.protection_status = + RBD_PROTECTION_STATUS_UNPROTECTED; + } + } + + snap_remove(); +} + +template <typename I> +void SnapshotPurgeRequest<I>::snap_remove() { + librados::snap_t snap_id = m_snaps.back(); + dout(10) << "snap_id=" << snap_id << ", " + << "snap_namespace=" << m_snap_namespace << ", " + << "snap_name=" << m_snap_name << dendl; + + int r; + auto finish_op_ctx = start_lock_op(&r); + if (finish_op_ctx == nullptr) { + derr << "lost exclusive lock" << dendl; + m_ret_val = r; + close_image(); + return; + } + + auto ctx = new FunctionContext([this, finish_op_ctx](int r) { + handle_snap_remove(r); + finish_op_ctx->complete(0); + }); + RWLock::RLocker owner_locker(m_image_ctx->owner_lock); + m_image_ctx->operations->execute_snap_remove( + m_snap_namespace, m_snap_name.c_str(), ctx); +} + +template <typename I> +void SnapshotPurgeRequest<I>::handle_snap_remove(int r) { + dout(10) << "r=" << r << dendl; + + if (r == -EBUSY) { + dout(10) << "snapshot in-use" << dendl; + m_ret_val = r; + close_image(); + return; + } else if (r < 0) { + derr << "failed to remove snapshot: " << cpp_strerror(r) << dendl; + m_ret_val = r; + close_image(); + return; + } + + m_snaps.pop_back(); + snap_unprotect(); +} + +template <typename I> +void SnapshotPurgeRequest<I>::close_image() { + dout(10) << dendl; + + m_image_ctx->state->close(create_context_callback< + SnapshotPurgeRequest<I>, + &SnapshotPurgeRequest<I>::handle_close_image>(this)); +} + +template <typename I> +void SnapshotPurgeRequest<I>::handle_close_image(int r) { + dout(10) << "r=" << r << dendl; + + m_image_ctx->destroy(); + m_image_ctx = nullptr; + + if (r < 0) { + derr << "failed to close: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + finish(0); +} + +template <typename I> +void SnapshotPurgeRequest<I>::finish(int r) { + if (m_ret_val < 0) { + r = m_ret_val; + } + + m_on_finish->complete(r); + delete this; +} + +template <typename I> +Context *SnapshotPurgeRequest<I>::start_lock_op(int* r) { + RWLock::RLocker owner_locker(m_image_ctx->owner_lock); + return m_image_ctx->exclusive_lock->start_op(r); +} + +} // namespace image_deleter +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_deleter::SnapshotPurgeRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.h b/src/tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.h new file mode 100644 index 00000000..b8b635fe --- /dev/null +++ b/src/tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.h @@ -0,0 +1,104 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_DELETER_SNAPSHOT_PURGE_REQUEST_H +#define CEPH_RBD_MIRROR_IMAGE_DELETER_SNAPSHOT_PURGE_REQUEST_H + +#include "include/rados/librados.hpp" +#include "cls/rbd/cls_rbd_types.h" +#include <string> +#include <vector> + +class Context; +namespace librbd { struct ImageCtx; } + +namespace rbd { +namespace mirror { +namespace image_deleter { + +template <typename ImageCtxT = librbd::ImageCtx> +class SnapshotPurgeRequest { +public: + static SnapshotPurgeRequest* create(librados::IoCtx &io_ctx, + const std::string &image_id, + Context *on_finish) { + return new SnapshotPurgeRequest(io_ctx, image_id, on_finish); + } + + SnapshotPurgeRequest(librados::IoCtx &io_ctx, const std::string &image_id, + Context *on_finish) + : m_io_ctx(io_ctx), m_image_id(image_id), m_on_finish(on_finish) { + } + + void send(); + +private: + /* + * @verbatim + * + * <start> + * | + * v + * OPEN_IMAGE + * | + * v + * ACQUIRE_LOCK + * | + * | (repeat for each snapshot) + * |/------------------------\ + * | | + * v (skip if not needed) | + * SNAP_UNPROTECT | + * | | + * v (skip if not needed) | + * SNAP_REMOVE -----------------/ + * | + * v + * CLOSE_IMAGE + * | + * v + * <finish> + * + * @endverbatim + */ + + librados::IoCtx &m_io_ctx; + std::string m_image_id; + Context *m_on_finish; + + ImageCtxT *m_image_ctx = nullptr; + int m_ret_val = 0; + + std::vector<librados::snap_t> m_snaps; + cls::rbd::SnapshotNamespace m_snap_namespace; + std::string m_snap_name; + + void open_image(); + void handle_open_image(int r); + + void acquire_lock(); + void handle_acquire_lock(int r); + + void snap_unprotect(); + void handle_snap_unprotect(int r); + + void snap_remove(); + void handle_snap_remove(int r); + + void close_image(); + void handle_close_image(int r); + + void finish(int r); + + Context *start_lock_op(int* r); + +}; + +} // namespace image_deleter +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_deleter::SnapshotPurgeRequest<librbd::ImageCtx>; + +#endif // CEPH_RBD_MIRROR_IMAGE_DELETER_SNAPSHOT_PURGE_REQUEST_H + diff --git a/src/tools/rbd_mirror/image_deleter/TrashMoveRequest.cc b/src/tools/rbd_mirror/image_deleter/TrashMoveRequest.cc new file mode 100644 index 00000000..92db22ca --- /dev/null +++ b/src/tools/rbd_mirror/image_deleter/TrashMoveRequest.cc @@ -0,0 +1,384 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/image_deleter/TrashMoveRequest.h" +#include "include/rbd_types.h" +#include "cls/rbd/cls_rbd_client.h" +#include "common/debug.h" +#include "common/errno.h" +#include "librbd/ExclusiveLock.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/Journal.h" +#include "librbd/TrashWatcher.h" +#include "librbd/Utils.h" +#include "librbd/journal/ResetRequest.h" +#include "librbd/trash/MoveRequest.h" +#include "tools/rbd_mirror/image_deleter/Types.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_deleter::TrashMoveRequest: " \ + << this << " " << __func__ << ": " +namespace rbd { +namespace mirror { +namespace image_deleter { + +using librbd::util::create_context_callback; +using librbd::util::create_rados_callback; + +template <typename I> +void TrashMoveRequest<I>::send() { + get_mirror_image_id(); +} + +template <typename I> +void TrashMoveRequest<I>::get_mirror_image_id() { + dout(10) << dendl; + + librados::ObjectReadOperation op; + librbd::cls_client::mirror_image_get_image_id_start(&op, m_global_image_id); + + auto aio_comp = create_rados_callback< + TrashMoveRequest<I>, + &TrashMoveRequest<I>::handle_get_mirror_image_id>(this); + m_out_bl.clear(); + int r = m_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op, &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void TrashMoveRequest<I>::handle_get_mirror_image_id(int r) { + dout(10) << "r=" << r << dendl; + + if (r == 0) { + auto bl_it = m_out_bl.cbegin(); + r = librbd::cls_client::mirror_image_get_image_id_finish(&bl_it, + &m_image_id); + } + if (r == -ENOENT) { + dout(10) << "image " << m_global_image_id << " is not mirrored" << dendl; + finish(r); + return; + } else if (r < 0) { + derr << "error retrieving local id for image " << m_global_image_id << ": " + << cpp_strerror(r) << dendl; + finish(r); + return; + } + + get_tag_owner(); +} + +template <typename I> +void TrashMoveRequest<I>::get_tag_owner() { + dout(10) << dendl; + + auto ctx = create_context_callback< + TrashMoveRequest<I>, &TrashMoveRequest<I>::handle_get_tag_owner>(this); + librbd::Journal<I>::get_tag_owner(m_io_ctx, m_image_id, &m_mirror_uuid, + m_op_work_queue, ctx); +} + +template <typename I> +void TrashMoveRequest<I>::handle_get_tag_owner(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0 && r != -ENOENT) { + derr << "error retrieving image primary info for image " + << m_global_image_id << ": " << cpp_strerror(r) << dendl; + finish(r); + return; + } else if (r != -ENOENT) { + if (m_mirror_uuid == librbd::Journal<>::LOCAL_MIRROR_UUID) { + dout(10) << "image " << m_global_image_id << " is local primary" << dendl; + finish(-EPERM); + return; + } else if (m_mirror_uuid == librbd::Journal<>::ORPHAN_MIRROR_UUID && + !m_resync) { + dout(10) << "image " << m_global_image_id << " is orphaned" << dendl; + finish(-EPERM); + return; + } + } + + disable_mirror_image(); +} + +template <typename I> +void TrashMoveRequest<I>::disable_mirror_image() { + dout(10) << dendl; + + cls::rbd::MirrorImage mirror_image; + mirror_image.global_image_id = m_global_image_id; + mirror_image.state = cls::rbd::MIRROR_IMAGE_STATE_DISABLING; + + librados::ObjectWriteOperation op; + librbd::cls_client::mirror_image_set(&op, m_image_id, mirror_image); + + auto aio_comp = create_rados_callback< + TrashMoveRequest<I>, + &TrashMoveRequest<I>::handle_disable_mirror_image>(this); + int r = m_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void TrashMoveRequest<I>::handle_disable_mirror_image(int r) { + dout(10) << "r=" << r << dendl; + + if (r == -ENOENT) { + dout(10) << "local image is not mirrored, aborting deletion." << dendl; + finish(r); + return; + } else if (r == -EEXIST || r == -EINVAL) { + derr << "cannot disable mirroring for image " << m_global_image_id + << ": global_image_id has changed/reused: " + << cpp_strerror(r) << dendl; + finish(r); + return; + } else if (r < 0) { + derr << "cannot disable mirroring for image " << m_global_image_id + << ": " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + reset_journal(); +} + +template <typename I> +void TrashMoveRequest<I>::reset_journal() { + dout(10) << dendl; + + // ensure that if the image is recovered any peers will split-brain + auto ctx = create_context_callback< + TrashMoveRequest<I>, &TrashMoveRequest<I>::handle_reset_journal>(this); + auto req = librbd::journal::ResetRequest<I>::create( + m_io_ctx, m_image_id, librbd::Journal<>::IMAGE_CLIENT_ID, + librbd::Journal<>::LOCAL_MIRROR_UUID, m_op_work_queue, ctx); + req->send(); +} + +template <typename I> +void TrashMoveRequest<I>::handle_reset_journal(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0 && r != -ENOENT) { + derr << "failed to reset journal: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + open_image(); +} + +template <typename I> +void TrashMoveRequest<I>::open_image() { + dout(10) << dendl; + + m_image_ctx = I::create("", m_image_id, nullptr, m_io_ctx, false); + + { + // don't attempt to open the journal + RWLock::WLocker snap_locker(m_image_ctx->snap_lock); + m_image_ctx->set_journal_policy(new JournalPolicy()); + } + + Context *ctx = create_context_callback< + TrashMoveRequest<I>, &TrashMoveRequest<I>::handle_open_image>(this); + m_image_ctx->state->open(librbd::OPEN_FLAG_SKIP_OPEN_PARENT, ctx); +} + +template <typename I> +void TrashMoveRequest<I>::handle_open_image(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to open image: " << cpp_strerror(r) << dendl; + m_image_ctx->destroy(); + m_image_ctx = nullptr; + finish(r); + return; + } + + if (m_image_ctx->old_format) { + derr << "cannot move v1 image to trash" << dendl; + m_ret_val = -EINVAL; + close_image(); + return; + } + + acquire_lock(); +} + +template <typename I> +void TrashMoveRequest<I>::acquire_lock() { + m_image_ctx->owner_lock.get_read(); + if (m_image_ctx->exclusive_lock == nullptr) { + derr << "exclusive lock feature not enabled" << dendl; + m_image_ctx->owner_lock.put_read(); + m_ret_val = -EINVAL; + close_image(); + return; + } + + dout(10) << dendl; + + Context *ctx = create_context_callback< + TrashMoveRequest<I>, &TrashMoveRequest<I>::handle_acquire_lock>(this); + m_image_ctx->exclusive_lock->block_requests(0); + m_image_ctx->exclusive_lock->acquire_lock(ctx); + m_image_ctx->owner_lock.put_read(); +} + +template <typename I> +void TrashMoveRequest<I>::handle_acquire_lock(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to acquire exclusive lock: " << cpp_strerror(r) << dendl; + m_ret_val = r; + close_image(); + return; + } + + trash_move(); +} + +template <typename I> +void TrashMoveRequest<I>::trash_move() { + dout(10) << dendl; + + utime_t delete_time{ceph_clock_now()}; + utime_t deferment_end_time{delete_time}; + deferment_end_time += + m_image_ctx->config.template get_val<uint64_t>("rbd_mirroring_delete_delay"); + + m_trash_image_spec = { + cls::rbd::TRASH_IMAGE_SOURCE_MIRRORING, m_image_ctx->name, delete_time, + deferment_end_time}; + + Context *ctx = create_context_callback< + TrashMoveRequest<I>, &TrashMoveRequest<I>::handle_trash_move>(this); + auto req = librbd::trash::MoveRequest<I>::create( + m_io_ctx, m_image_id, m_trash_image_spec, ctx); + req->send(); +} + +template <typename I> +void TrashMoveRequest<I>::handle_trash_move(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to move image to trash: " << cpp_strerror(r) << dendl; + m_ret_val = r; + close_image(); + return; + } + + m_moved_to_trash = true; + remove_mirror_image(); +} + +template <typename I> +void TrashMoveRequest<I>::remove_mirror_image() { + dout(10) << dendl; + + librados::ObjectWriteOperation op; + librbd::cls_client::mirror_image_remove(&op, m_image_id); + + auto aio_comp = create_rados_callback< + TrashMoveRequest<I>, + &TrashMoveRequest<I>::handle_remove_mirror_image>(this); + int r = m_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void TrashMoveRequest<I>::handle_remove_mirror_image(int r) { + dout(10) << "r=" << r << dendl; + + if (r == -ENOENT) { + dout(10) << "local image is not mirrored" << dendl; + } else if (r < 0) { + derr << "failed to remove mirror image state for " << m_global_image_id + << ": " << cpp_strerror(r) << dendl; + m_ret_val = r; + } + + close_image(); +} + +template <typename I> +void TrashMoveRequest<I>::close_image() { + dout(10) << dendl; + + Context *ctx = create_context_callback< + TrashMoveRequest<I>, &TrashMoveRequest<I>::handle_close_image>(this); + m_image_ctx->state->close(ctx); +} + +template <typename I> +void TrashMoveRequest<I>::handle_close_image(int r) { + dout(10) << "r=" << r << dendl; + + m_image_ctx->destroy(); + m_image_ctx = nullptr; + + if (r < 0) { + derr << "failed to close image: " << cpp_strerror(r) << dendl; + } + + // don't send notification if we failed + if (!m_moved_to_trash) { + finish(0); + return; + } + + notify_trash_add(); +} + +template <typename I> +void TrashMoveRequest<I>::notify_trash_add() { + dout(10) << dendl; + + Context *ctx = create_context_callback< + TrashMoveRequest<I>, &TrashMoveRequest<I>::handle_notify_trash_add>(this); + librbd::TrashWatcher<I>::notify_image_added(m_io_ctx, m_image_id, + m_trash_image_spec, ctx); +} + +template <typename I> +void TrashMoveRequest<I>::handle_notify_trash_add(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to notify trash watchers: " << cpp_strerror(r) << dendl; + } + + finish(0); +} + +template <typename I> +void TrashMoveRequest<I>::finish(int r) { + if (m_ret_val < 0) { + r = m_ret_val; + } + + dout(10) << "r=" << r << dendl; + + m_on_finish->complete(r); + delete this; +} + +} // namespace image_deleter +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_deleter::TrashMoveRequest<librbd::ImageCtx>; + diff --git a/src/tools/rbd_mirror/image_deleter/TrashMoveRequest.h b/src/tools/rbd_mirror/image_deleter/TrashMoveRequest.h new file mode 100644 index 00000000..07b7432e --- /dev/null +++ b/src/tools/rbd_mirror/image_deleter/TrashMoveRequest.h @@ -0,0 +1,136 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_DELETE_TRASH_MOVE_REQUEST_H +#define CEPH_RBD_MIRROR_IMAGE_DELETE_TRASH_MOVE_REQUEST_H + +#include "include/buffer.h" +#include "include/rados/librados.hpp" +#include "cls/rbd/cls_rbd_types.h" +#include <boost/optional.hpp> +#include <string> + +struct Context; +class ContextWQ; +namespace librbd { struct ImageCtx; } + +namespace rbd { +namespace mirror { +namespace image_deleter { + +template <typename ImageCtxT = librbd::ImageCtx> +class TrashMoveRequest { +public: + static TrashMoveRequest* create(librados::IoCtx& io_ctx, + const std::string& global_image_id, + bool resync, ContextWQ* op_work_queue, + Context* on_finish) { + return new TrashMoveRequest(io_ctx, global_image_id, resync, op_work_queue, + on_finish); + } + + TrashMoveRequest(librados::IoCtx& io_ctx, const std::string& global_image_id, + bool resync, ContextWQ* op_work_queue, Context* on_finish) + : m_io_ctx(io_ctx), m_global_image_id(global_image_id), m_resync(resync), + m_op_work_queue(op_work_queue), m_on_finish(on_finish) { + } + + void send(); + +private: + /* + * @verbatim + * + * <start> + * | + * v + * GET_MIRROR_IMAGE_ID + * | + * v + * GET_TAG_OWNER + * | + * v + * DISABLE_MIRROR_IMAGE + * | + * v + * RESET_JOURNAL + * | + * v + * OPEN_IMAGE + * | + * v + * ACQUIRE_LOCK + * | + * v + * TRASH_MOVE + * | + * v + * REMOVE_MIRROR_IMAGE + * | + * v + * CLOSE_IMAGE + * | + * v + * NOTIFY_TRASH_ADD + * | + * v + * <finish> + * + * @endverbatim + */ + + librados::IoCtx &m_io_ctx; + std::string m_global_image_id; + bool m_resync; + ContextWQ *m_op_work_queue; + Context *m_on_finish; + + ceph::bufferlist m_out_bl; + std::string m_image_id; + std::string m_mirror_uuid; + cls::rbd::TrashImageSpec m_trash_image_spec; + ImageCtxT *m_image_ctx = nullptr;; + int m_ret_val = 0; + bool m_moved_to_trash = false; + + void get_mirror_image_id(); + void handle_get_mirror_image_id(int r); + + void get_tag_owner(); + void handle_get_tag_owner(int r); + + void disable_mirror_image(); + void handle_disable_mirror_image(int r); + + void reset_journal(); + void handle_reset_journal(int r); + + void open_image(); + void handle_open_image(int r); + + void acquire_lock(); + void handle_acquire_lock(int r); + + void trash_move(); + void handle_trash_move(int r); + + void remove_mirror_image(); + void handle_remove_mirror_image(int r); + + void close_image(); + void handle_close_image(int r); + + void notify_trash_add(); + void handle_notify_trash_add(int r); + + void finish(int r); + +}; + +} // namespace image_deleter +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_deleter::TrashMoveRequest<librbd::ImageCtx>; + +#endif // CEPH_RBD_MIRROR_IMAGE_DELETE_TRASH_WATCHER_H diff --git a/src/tools/rbd_mirror/image_deleter/TrashRemoveRequest.cc b/src/tools/rbd_mirror/image_deleter/TrashRemoveRequest.cc new file mode 100644 index 00000000..e7c725dc --- /dev/null +++ b/src/tools/rbd_mirror/image_deleter/TrashRemoveRequest.cc @@ -0,0 +1,265 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/image_deleter/TrashRemoveRequest.h" +#include "include/ceph_assert.h" +#include "common/debug.h" +#include "common/errno.h" +#include "common/WorkQueue.h" +#include "cls/rbd/cls_rbd_client.h" +#include "librbd/ImageCtx.h" +#include "librbd/Journal.h" +#include "librbd/TrashWatcher.h" +#include "librbd/Utils.h" +#include "librbd/trash/RemoveRequest.h" +#include "tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_deleter::TrashRemoveRequest: " \ + << this << " " << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace image_deleter { + +using librbd::util::create_context_callback; +using librbd::util::create_rados_callback; + +template <typename I> +void TrashRemoveRequest<I>::send() { + *m_error_result = ERROR_RESULT_RETRY; + + get_trash_image_spec(); +} + +template <typename I> +void TrashRemoveRequest<I>::get_trash_image_spec() { + dout(10) << dendl; + + librados::ObjectReadOperation op; + librbd::cls_client::trash_get_start(&op, m_image_id); + + auto aio_comp = create_rados_callback< + TrashRemoveRequest<I>, + &TrashRemoveRequest<I>::handle_get_trash_image_spec>(this); + m_out_bl.clear(); + int r = m_io_ctx.aio_operate(RBD_TRASH, aio_comp, &op, &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void TrashRemoveRequest<I>::handle_get_trash_image_spec(int r) { + dout(10) << "r=" << r << dendl; + + if (r == 0) { + auto bl_it = m_out_bl.cbegin(); + r = librbd::cls_client::trash_get_finish(&bl_it, &m_trash_image_spec); + } + + if (r == -ENOENT || (r >= 0 && m_trash_image_spec.source != + cls::rbd::TRASH_IMAGE_SOURCE_MIRRORING)) { + dout(10) << "image id " << m_image_id << " not in mirroring trash" << dendl; + finish(0); + return; + } else if (r < 0) { + derr << "error getting image id " << m_image_id << " info from trash: " + << cpp_strerror(r) << dendl; + finish(r); + return; + } + + if (m_trash_image_spec.state != cls::rbd::TRASH_IMAGE_STATE_NORMAL && + m_trash_image_spec.state != cls::rbd::TRASH_IMAGE_STATE_REMOVING) { + dout(10) << "image " << m_image_id << " is not in an expected trash state: " + << m_trash_image_spec.state << dendl; + *m_error_result = ERROR_RESULT_RETRY_IMMEDIATELY; + finish(-EBUSY); + return; + } + + set_trash_state(); +} + +template <typename I> +void TrashRemoveRequest<I>::set_trash_state() { + if (m_trash_image_spec.state == cls::rbd::TRASH_IMAGE_STATE_REMOVING) { + get_snap_context(); + return; + } + + dout(10) << dendl; + + librados::ObjectWriteOperation op; + librbd::cls_client::trash_state_set(&op, m_image_id, + cls::rbd::TRASH_IMAGE_STATE_REMOVING, + cls::rbd::TRASH_IMAGE_STATE_NORMAL); + + auto aio_comp = create_rados_callback< + TrashRemoveRequest<I>, + &TrashRemoveRequest<I>::handle_set_trash_state>(this); + int r = m_io_ctx.aio_operate(RBD_TRASH, aio_comp, &op); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void TrashRemoveRequest<I>::handle_set_trash_state(int r) { + dout(10) << "r=" << r << dendl; + + if (r == -ENOENT) { + dout(10) << "image id " << m_image_id << " not in mirroring trash" << dendl; + finish(0); + return; + } else if (r < 0 && r != -EOPNOTSUPP) { + derr << "error setting trash image state for image id " << m_image_id + << ": " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + get_snap_context(); +} + +template <typename I> +void TrashRemoveRequest<I>::get_snap_context() { + dout(10) << dendl; + + librados::ObjectReadOperation op; + librbd::cls_client::get_snapcontext_start(&op); + + std::string header_oid = librbd::util::header_name(m_image_id); + + auto aio_comp = create_rados_callback< + TrashRemoveRequest<I>, + &TrashRemoveRequest<I>::handle_get_snap_context>(this); + m_out_bl.clear(); + int r = m_io_ctx.aio_operate(header_oid, aio_comp, &op, &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void TrashRemoveRequest<I>::handle_get_snap_context(int r) { + dout(10) << "r=" << r << dendl; + + ::SnapContext snapc; + if (r == 0) { + auto bl_it = m_out_bl.cbegin(); + r = librbd::cls_client::get_snapcontext_finish(&bl_it, &snapc); + } + if (r < 0 && r != -ENOENT) { + derr << "error retrieving snapshot context for image " + << m_image_id << ": " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + m_has_snapshots = (!snapc.empty()); + purge_snapshots(); +} + +template <typename I> +void TrashRemoveRequest<I>::purge_snapshots() { + if (!m_has_snapshots) { + remove_image(); + return; + } + + dout(10) << dendl; + auto ctx = create_context_callback< + TrashRemoveRequest<I>, + &TrashRemoveRequest<I>::handle_purge_snapshots>(this); + auto req = SnapshotPurgeRequest<I>::create(m_io_ctx, m_image_id, ctx); + req->send(); +} + +template <typename I> +void TrashRemoveRequest<I>::handle_purge_snapshots(int r) { + dout(10) << "r=" << r << dendl; + + if (r == -EBUSY) { + dout(10) << "snapshots still in-use" << dendl; + *m_error_result = ERROR_RESULT_RETRY_IMMEDIATELY; + finish(r); + return; + } else if (r < 0) { + derr << "failed to purge image snapshots: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + remove_image(); +} + +template <typename I> +void TrashRemoveRequest<I>::remove_image() { + dout(10) << dendl; + + auto ctx = create_context_callback< + TrashRemoveRequest<I>, + &TrashRemoveRequest<I>::handle_remove_image>(this); + auto req = librbd::trash::RemoveRequest<I>::create( + m_io_ctx, m_image_id, m_op_work_queue, true, m_progress_ctx, + ctx); + req->send(); +} + +template <typename I> +void TrashRemoveRequest<I>::handle_remove_image(int r) { + dout(10) << "r=" << r << dendl; + if (r == -ENOTEMPTY) { + // image must have clone v2 snapshot still associated to child + dout(10) << "snapshots still in-use" << dendl; + *m_error_result = ERROR_RESULT_RETRY_IMMEDIATELY; + finish(-EBUSY); + return; + } + + if (r < 0 && r != -ENOENT) { + derr << "error removing image " << m_image_id << " " + << "(" << m_image_id << ") from local pool: " + << cpp_strerror(r) << dendl; + finish(r); + return; + } + + notify_trash_removed(); +} + +template <typename I> +void TrashRemoveRequest<I>::notify_trash_removed() { + dout(10) << dendl; + + Context *ctx = create_context_callback< + TrashRemoveRequest<I>, + &TrashRemoveRequest<I>::handle_notify_trash_removed>(this); + librbd::TrashWatcher<I>::notify_image_removed(m_io_ctx, m_image_id, ctx); +} + +template <typename I> +void TrashRemoveRequest<I>::handle_notify_trash_removed(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to notify trash watchers: " << cpp_strerror(r) << dendl; + } + + finish(0); +} + +template <typename I> +void TrashRemoveRequest<I>::finish(int r) { + dout(10) << "r=" << r << dendl; + + m_on_finish->complete(r); + delete this; +} + +} // namespace image_deleter +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_deleter::TrashRemoveRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_deleter/TrashRemoveRequest.h b/src/tools/rbd_mirror/image_deleter/TrashRemoveRequest.h new file mode 100644 index 00000000..d2295e8e --- /dev/null +++ b/src/tools/rbd_mirror/image_deleter/TrashRemoveRequest.h @@ -0,0 +1,113 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_DELETER_TRASH_REMOVE_REQUEST_H +#define CEPH_RBD_MIRROR_IMAGE_DELETER_TRASH_REMOVE_REQUEST_H + +#include "include/rados/librados.hpp" +#include "include/buffer.h" +#include "cls/rbd/cls_rbd_types.h" +#include "librbd/internal.h" +#include "tools/rbd_mirror/image_deleter/Types.h" +#include <string> +#include <vector> + +class Context; +class ContextWQ; +namespace librbd { struct ImageCtx; } + +namespace rbd { +namespace mirror { +namespace image_deleter { + +template <typename ImageCtxT = librbd::ImageCtx> +class TrashRemoveRequest { +public: + static TrashRemoveRequest* create(librados::IoCtx &io_ctx, + const std::string &image_id, + ErrorResult *error_result, + ContextWQ *op_work_queue, + Context *on_finish) { + return new TrashRemoveRequest(io_ctx, image_id, error_result, op_work_queue, + on_finish); + } + + TrashRemoveRequest(librados::IoCtx &io_ctx, const std::string &image_id, + ErrorResult *error_result, ContextWQ *op_work_queue, + Context *on_finish) + : m_io_ctx(io_ctx), m_image_id(image_id), m_error_result(error_result), + m_op_work_queue(op_work_queue), m_on_finish(on_finish) { + } + + void send(); + +private: + /* + * @verbatim + * + * <start> + * | + * v + * GET_TRASH_IMAGE_SPEC + * | + * v + * SET_TRASH_STATE + * | + * v + * GET_SNAP_CONTEXT + * | + * v + * PURGE_SNAPSHOTS + * | + * v + * TRASH_REMOVE + * | + * v + * NOTIFY_TRASH_REMOVE + * | + * v + * <finish> + * + * @endverbatim + */ + + librados::IoCtx &m_io_ctx; + std::string m_image_id; + ErrorResult *m_error_result; + ContextWQ *m_op_work_queue; + Context *m_on_finish; + + ceph::bufferlist m_out_bl; + cls::rbd::TrashImageSpec m_trash_image_spec; + bool m_has_snapshots = false; + librbd::NoOpProgressContext m_progress_ctx; + + void get_trash_image_spec(); + void handle_get_trash_image_spec(int r); + + void set_trash_state(); + void handle_set_trash_state(int r); + + void get_snap_context(); + void handle_get_snap_context(int r); + + void purge_snapshots(); + void handle_purge_snapshots(int r); + + void remove_image(); + void handle_remove_image(int r); + + void notify_trash_removed(); + void handle_notify_trash_removed(int r); + + void finish(int r); + +}; + +} // namespace image_deleter +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_deleter::TrashRemoveRequest<librbd::ImageCtx>; + +#endif // CEPH_RBD_MIRROR_IMAGE_DELETER_TRASH_REMOVE_REQUEST_H diff --git a/src/tools/rbd_mirror/image_deleter/TrashWatcher.cc b/src/tools/rbd_mirror/image_deleter/TrashWatcher.cc new file mode 100644 index 00000000..8735dfb7 --- /dev/null +++ b/src/tools/rbd_mirror/image_deleter/TrashWatcher.cc @@ -0,0 +1,384 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/image_deleter/TrashWatcher.h" +#include "include/rbd_types.h" +#include "cls/rbd/cls_rbd_client.h" +#include "common/debug.h" +#include "common/errno.h" +#include "common/Timer.h" +#include "librbd/ImageCtx.h" +#include "librbd/Utils.h" +#include "tools/rbd_mirror/Threads.h" +#include "tools/rbd_mirror/image_deleter/Types.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_deleter::TrashWatcher: " \ + << this << " " << __func__ << ": " + +using librbd::util::create_context_callback; +using librbd::util::create_rados_callback; + +namespace rbd { +namespace mirror { +namespace image_deleter { + +namespace { + +const size_t MAX_RETURN = 1024; + +} // anonymous namespace + +template <typename I> +TrashWatcher<I>::TrashWatcher(librados::IoCtx &io_ctx, Threads<I> *threads, + TrashListener& trash_listener) + : librbd::TrashWatcher<I>(io_ctx, threads->work_queue), + m_io_ctx(io_ctx), m_threads(threads), m_trash_listener(trash_listener), + m_lock(librbd::util::unique_lock_name( + "rbd::mirror::image_deleter::TrashWatcher", this)) { +} + +template <typename I> +void TrashWatcher<I>::init(Context *on_finish) { + dout(5) << dendl; + + { + Mutex::Locker locker(m_lock); + m_on_init_finish = on_finish; + + ceph_assert(!m_trash_list_in_progress); + m_trash_list_in_progress = true; + } + + create_trash(); +} + +template <typename I> +void TrashWatcher<I>::shut_down(Context *on_finish) { + dout(5) << dendl; + + { + Mutex::Locker timer_locker(m_threads->timer_lock); + Mutex::Locker locker(m_lock); + + ceph_assert(!m_shutting_down); + m_shutting_down = true; + if (m_timer_ctx != nullptr) { + m_threads->timer->cancel_event(m_timer_ctx); + m_timer_ctx = nullptr; + } + } + + auto ctx = new FunctionContext([this, on_finish](int r) { + unregister_watcher(on_finish); + }); + m_async_op_tracker.wait_for_ops(ctx); +} + +template <typename I> +void TrashWatcher<I>::handle_image_added(const std::string &image_id, + const cls::rbd::TrashImageSpec& spec) { + dout(10) << "image_id=" << image_id << dendl; + + Mutex::Locker locker(m_lock); + add_image(image_id, spec); +} + +template <typename I> +void TrashWatcher<I>::handle_image_removed(const std::string &image_id) { + // ignore removals -- the image deleter will ignore -ENOENTs +} + +template <typename I> +void TrashWatcher<I>::handle_rewatch_complete(int r) { + dout(5) << "r=" << r << dendl; + + if (r == -EBLACKLISTED) { + dout(0) << "detected client is blacklisted" << dendl; + return; + } else if (r == -ENOENT) { + dout(5) << "trash directory deleted" << dendl; + } else if (r < 0) { + derr << "unexpected error re-registering trash directory watch: " + << cpp_strerror(r) << dendl; + } + schedule_trash_list(30); +} + +template <typename I> +void TrashWatcher<I>::create_trash() { + dout(20) << dendl; + { + Mutex::Locker locker(m_lock); + ceph_assert(m_trash_list_in_progress); + } + + librados::ObjectWriteOperation op; + op.create(false); + + m_async_op_tracker.start_op(); + auto aio_comp = create_rados_callback< + TrashWatcher<I>, &TrashWatcher<I>::handle_create_trash>(this); + int r = m_io_ctx.aio_operate(RBD_TRASH, aio_comp, &op); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void TrashWatcher<I>::handle_create_trash(int r) { + dout(20) << "r=" << r << dendl; + { + Mutex::Locker locker(m_lock); + ceph_assert(m_trash_list_in_progress); + } + + Context* on_init_finish = nullptr; + if (r == -EBLACKLISTED || r == -ENOENT) { + if (r == -EBLACKLISTED) { + dout(0) << "detected client is blacklisted" << dendl; + } else { + dout(0) << "detected pool no longer exists" << dendl; + } + + Mutex::Locker locker(m_lock); + std::swap(on_init_finish, m_on_init_finish); + m_trash_list_in_progress = false; + } else if (r < 0 && r != -EEXIST) { + derr << "failed to create trash object: " << cpp_strerror(r) << dendl; + { + Mutex::Locker locker(m_lock); + m_trash_list_in_progress = false; + } + + schedule_trash_list(30); + } else { + register_watcher(); + } + + m_async_op_tracker.finish_op(); + if (on_init_finish != nullptr) { + on_init_finish->complete(r); + } +} + +template <typename I> +void TrashWatcher<I>::register_watcher() { + { + Mutex::Locker locker(m_lock); + ceph_assert(m_trash_list_in_progress); + } + + // if the watch registration is in-flight, let the watcher + // handle the transition -- only (re-)register if it's not registered + if (!this->is_unregistered()) { + trash_list(true); + return; + } + + // first time registering or the watch failed + dout(5) << dendl; + m_async_op_tracker.start_op(); + + Context *ctx = create_context_callback< + TrashWatcher, &TrashWatcher<I>::handle_register_watcher>(this); + this->register_watch(ctx); +} + +template <typename I> +void TrashWatcher<I>::handle_register_watcher(int r) { + dout(5) << "r=" << r << dendl; + + { + Mutex::Locker locker(m_lock); + ceph_assert(m_trash_list_in_progress); + if (r < 0) { + m_trash_list_in_progress = false; + } + } + + Context *on_init_finish = nullptr; + if (r >= 0) { + trash_list(true); + } else if (r == -EBLACKLISTED) { + dout(0) << "detected client is blacklisted" << dendl; + + Mutex::Locker locker(m_lock); + std::swap(on_init_finish, m_on_init_finish); + } else { + derr << "unexpected error registering trash directory watch: " + << cpp_strerror(r) << dendl; + schedule_trash_list(10); + } + + m_async_op_tracker.finish_op(); + if (on_init_finish != nullptr) { + on_init_finish->complete(r); + } +} + +template <typename I> +void TrashWatcher<I>::unregister_watcher(Context* on_finish) { + dout(5) << dendl; + + m_async_op_tracker.start_op(); + Context *ctx = new FunctionContext([this, on_finish](int r) { + handle_unregister_watcher(r, on_finish); + }); + this->unregister_watch(ctx); +} + +template <typename I> +void TrashWatcher<I>::handle_unregister_watcher(int r, Context* on_finish) { + dout(5) << "unregister_watcher: r=" << r << dendl; + if (r < 0) { + derr << "error unregistering watcher for trash directory: " + << cpp_strerror(r) << dendl; + } + m_async_op_tracker.finish_op(); + on_finish->complete(0); +} + +template <typename I> +void TrashWatcher<I>::trash_list(bool initial_request) { + if (initial_request) { + m_async_op_tracker.start_op(); + m_last_image_id = ""; + } + + dout(5) << "last_image_id=" << m_last_image_id << dendl; + + { + Mutex::Locker locker(m_lock); + ceph_assert(m_trash_list_in_progress); + } + + librados::ObjectReadOperation op; + librbd::cls_client::trash_list_start(&op, m_last_image_id, MAX_RETURN); + + librados::AioCompletion *aio_comp = create_rados_callback< + TrashWatcher<I>, &TrashWatcher<I>::handle_trash_list>(this); + m_out_bl.clear(); + int r = m_io_ctx.aio_operate(RBD_TRASH, aio_comp, &op, &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void TrashWatcher<I>::handle_trash_list(int r) { + dout(5) << "r=" << r << dendl; + + std::map<std::string, cls::rbd::TrashImageSpec> images; + if (r >= 0) { + auto bl_it = m_out_bl.cbegin(); + r = librbd::cls_client::trash_list_finish(&bl_it, &images); + } + + Context *on_init_finish = nullptr; + { + Mutex::Locker locker(m_lock); + ceph_assert(m_trash_list_in_progress); + if (r >= 0) { + for (auto& image : images) { + add_image(image.first, image.second); + } + } else if (r == -ENOENT) { + r = 0; + } + + if (r == -EBLACKLISTED) { + dout(0) << "detected client is blacklisted during trash refresh" << dendl; + m_trash_list_in_progress = false; + std::swap(on_init_finish, m_on_init_finish); + } else if (r >= 0 && images.size() < MAX_RETURN) { + m_trash_list_in_progress = false; + std::swap(on_init_finish, m_on_init_finish); + } else if (r < 0) { + m_trash_list_in_progress = false; + } + } + + if (r >= 0 && images.size() == MAX_RETURN) { + m_last_image_id = images.rbegin()->first; + trash_list(false); + return; + } else if (r < 0 && r != -EBLACKLISTED) { + derr << "failed to retrieve trash directory: " << cpp_strerror(r) << dendl; + schedule_trash_list(10); + } + + m_async_op_tracker.finish_op(); + if (on_init_finish != nullptr) { + on_init_finish->complete(r); + } +} + +template <typename I> +void TrashWatcher<I>::schedule_trash_list(double interval) { + Mutex::Locker timer_locker(m_threads->timer_lock); + Mutex::Locker locker(m_lock); + if (m_shutting_down || m_trash_list_in_progress || m_timer_ctx != nullptr) { + if (m_trash_list_in_progress && !m_deferred_trash_list) { + dout(5) << "deferring refresh until in-flight refresh completes" << dendl; + m_deferred_trash_list = true; + } + return; + } + + dout(5) << dendl; + m_timer_ctx = m_threads->timer->add_event_after( + interval, + new FunctionContext([this](int r) { + process_trash_list(); + })); +} + +template <typename I> +void TrashWatcher<I>::process_trash_list() { + dout(5) << dendl; + + ceph_assert(m_threads->timer_lock.is_locked()); + ceph_assert(m_timer_ctx != nullptr); + m_timer_ctx = nullptr; + + { + Mutex::Locker locker(m_lock); + ceph_assert(!m_trash_list_in_progress); + m_trash_list_in_progress = true; + } + + // execute outside of the timer's lock + m_async_op_tracker.start_op(); + Context *ctx = new FunctionContext([this](int r) { + create_trash(); + m_async_op_tracker.finish_op(); + }); + m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +void TrashWatcher<I>::add_image(const std::string& image_id, + const cls::rbd::TrashImageSpec& spec) { + if (spec.source != cls::rbd::TRASH_IMAGE_SOURCE_MIRRORING) { + return; + } + + ceph_assert(m_lock.is_locked()); + auto& deferment_end_time = spec.deferment_end_time; + dout(10) << "image_id=" << image_id << ", " + << "deferment_end_time=" << deferment_end_time << dendl; + + m_async_op_tracker.start_op(); + auto ctx = new FunctionContext([this, image_id, deferment_end_time](int r) { + m_trash_listener.handle_trash_image(image_id, deferment_end_time); + m_async_op_tracker.finish_op(); + }); + m_threads->work_queue->queue(ctx, 0); +} + +} // namespace image_deleter; +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_deleter::TrashWatcher<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_deleter/TrashWatcher.h b/src/tools/rbd_mirror/image_deleter/TrashWatcher.h new file mode 100644 index 00000000..b6f69833 --- /dev/null +++ b/src/tools/rbd_mirror/image_deleter/TrashWatcher.h @@ -0,0 +1,139 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_DELETE_TRASH_WATCHER_H +#define CEPH_RBD_MIRROR_IMAGE_DELETE_TRASH_WATCHER_H + +#include "include/rados/librados.hpp" +#include "common/AsyncOpTracker.h" +#include "common/Mutex.h" +#include "librbd/TrashWatcher.h" +#include <set> +#include <string> + +struct Context; +namespace librbd { struct ImageCtx; } + +namespace rbd { +namespace mirror { + +template <typename> struct Threads; + +namespace image_deleter { + +struct TrashListener; + +template <typename ImageCtxT = librbd::ImageCtx> +class TrashWatcher : public librbd::TrashWatcher<ImageCtxT> { +public: + static TrashWatcher* create(librados::IoCtx &io_ctx, + Threads<ImageCtxT> *threads, + TrashListener& trash_listener) { + return new TrashWatcher(io_ctx, threads, trash_listener); + } + + TrashWatcher(librados::IoCtx &io_ctx, Threads<ImageCtxT> *threads, + TrashListener& trash_listener); + TrashWatcher(const TrashWatcher&) = delete; + TrashWatcher& operator=(const TrashWatcher&) = delete; + + void init(Context *on_finish); + void shut_down(Context *on_finish); + +protected: + void handle_image_added(const std::string &image_id, + const cls::rbd::TrashImageSpec& spec) override; + + void handle_image_removed(const std::string &image_id) override; + + void handle_rewatch_complete(int r) override; + +private: + /** + * @verbatim + * + * <start> + * | + * v + * INIT + * | + * v + * CREATE_TRASH + * | + * v + * REGISTER_WATCHER + * | + * |/--------------------------------\ + * | | + * |/---------\ | + * | | | + * v | (more images) | + * TRASH_LIST ---/ | + * | | + * |/----------------------------\ | + * | | | + * v | | + * <idle> --\ | | + * | | | | + * | |\---> IMAGE_ADDED -----/ | + * | | | + * | \----> WATCH_ERROR ---------/ + * v + * SHUT_DOWN + * | + * v + * UNREGISTER_WATCHER + * | + * v + * <finish> + * + * @endverbatim + */ + + librados::IoCtx m_io_ctx; + Threads<ImageCtxT> *m_threads; + TrashListener& m_trash_listener; + + std::string m_last_image_id; + bufferlist m_out_bl; + + mutable Mutex m_lock; + + Context *m_on_init_finish = nullptr; + Context *m_timer_ctx = nullptr; + + AsyncOpTracker m_async_op_tracker; + bool m_trash_list_in_progress = false; + bool m_deferred_trash_list = false; + bool m_shutting_down = false; + + void register_watcher(); + void handle_register_watcher(int r); + + void create_trash(); + void handle_create_trash(int r); + + void unregister_watcher(Context* on_finish); + void handle_unregister_watcher(int r, Context* on_finish); + + void trash_list(bool initial_request); + void handle_trash_list(int r); + + void schedule_trash_list(double interval); + void process_trash_list(); + + void get_mirror_uuid(); + void handle_get_mirror_uuid(int r); + + void add_image(const std::string& image_id, + const cls::rbd::TrashImageSpec& spec); + +}; + +} // namespace image_deleter +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_deleter::TrashWatcher<librbd::ImageCtx>; + +#endif // CEPH_RBD_MIRROR_IMAGE_DELETE_TRASH_WATCHER_H diff --git a/src/tools/rbd_mirror/image_deleter/Types.h b/src/tools/rbd_mirror/image_deleter/Types.h new file mode 100644 index 00000000..ac3bc64a --- /dev/null +++ b/src/tools/rbd_mirror/image_deleter/Types.h @@ -0,0 +1,54 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_DELETER_TYPES_H +#define CEPH_RBD_MIRROR_IMAGE_DELETER_TYPES_H + +#include "include/Context.h" +#include "librbd/journal/Policy.h" +#include <string> + +struct utime_t; + +namespace rbd { +namespace mirror { +namespace image_deleter { + +enum ErrorResult { + ERROR_RESULT_COMPLETE, + ERROR_RESULT_RETRY, + ERROR_RESULT_RETRY_IMMEDIATELY +}; + +struct TrashListener { + TrashListener() { + } + TrashListener(const TrashListener&) = delete; + TrashListener& operator=(const TrashListener&) = delete; + + virtual ~TrashListener() { + } + + virtual void handle_trash_image(const std::string& image_id, + const utime_t& deferment_end_time) = 0; + +}; + +struct JournalPolicy : public librbd::journal::Policy { + bool append_disabled() const override { + return true; + } + bool journal_disabled() const override { + return true; + } + + void allocate_tag_on_lock(Context *on_finish) override { + on_finish->complete(0); + } +}; + +} // namespace image_deleter +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_IMAGE_DELETER_TYPES_H diff --git a/src/tools/rbd_mirror/image_map/LoadRequest.cc b/src/tools/rbd_mirror/image_map/LoadRequest.cc new file mode 100644 index 00000000..7387b476 --- /dev/null +++ b/src/tools/rbd_mirror/image_map/LoadRequest.cc @@ -0,0 +1,98 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "common/debug.h" +#include "common/errno.h" + +#include "librbd/Utils.h" +#include "include/rbd_types.h" +#include "cls/rbd/cls_rbd_client.h" + +#include "LoadRequest.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_map::LoadRequest: " \ + << this << " " << __func__ + +namespace rbd { +namespace mirror { +namespace image_map { + +static const uint32_t MAX_RETURN = 1024; + +using librbd::util::create_rados_callback; + +template<typename I> +LoadRequest<I>::LoadRequest(librados::IoCtx &ioctx, + std::map<std::string, cls::rbd::MirrorImageMap> *image_mapping, + Context *on_finish) + : m_ioctx(ioctx), + m_image_mapping(image_mapping), + m_on_finish(on_finish) { +} + +template<typename I> +void LoadRequest<I>::send() { + dout(20) << dendl; + + image_map_list(); +} + +template<typename I> +void LoadRequest<I>::image_map_list() { + dout(20) << dendl; + + librados::ObjectReadOperation op; + librbd::cls_client::mirror_image_map_list_start(&op, m_start_after, MAX_RETURN); + + librados::AioCompletion *aio_comp = create_rados_callback< + LoadRequest, &LoadRequest::handle_image_map_list>(this); + + m_out_bl.clear(); + int r = m_ioctx.aio_operate(RBD_MIRROR_LEADER, aio_comp, &op, &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template<typename I> +void LoadRequest<I>::handle_image_map_list(int r) { + dout(20) << ": r=" << r << dendl; + + std::map<std::string, cls::rbd::MirrorImageMap> image_mapping; + if (r == 0) { + auto it = m_out_bl.cbegin(); + r = librbd::cls_client::mirror_image_map_list_finish(&it, &image_mapping); + } + + if (r < 0) { + derr << ": failed to get image map: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + m_image_mapping->insert(image_mapping.begin(), image_mapping.end()); + + if (image_mapping.size() == MAX_RETURN) { + m_start_after = image_mapping.rbegin()->first; + image_map_list(); + return; + } + + finish(0); +} + +template<typename I> +void LoadRequest<I>::finish(int r) { + dout(20) << ": r=" << r << dendl; + + m_on_finish->complete(r); + delete this; +} + +} // namespace image_map +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_map::LoadRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_map/LoadRequest.h b/src/tools/rbd_mirror/image_map/LoadRequest.h new file mode 100644 index 00000000..7657e110 --- /dev/null +++ b/src/tools/rbd_mirror/image_map/LoadRequest.h @@ -0,0 +1,64 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_MAP_LOAD_REQUEST_H +#define CEPH_RBD_MIRROR_IMAGE_MAP_LOAD_REQUEST_H + +#include "cls/rbd/cls_rbd_types.h" +#include "include/rados/librados.hpp" + +class Context; + +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { +namespace image_map { + +template<typename ImageCtxT = librbd::ImageCtx> +class LoadRequest { +public: + static LoadRequest *create(librados::IoCtx &ioctx, + std::map<std::string, cls::rbd::MirrorImageMap> *image_mapping, + Context *on_finish) { + return new LoadRequest(ioctx, image_mapping, on_finish); + } + + void send(); + +private: + /** + * @verbatim + * + * <start> + * | . . . . . . . . + * v v . MAX_RETURN + * IMAGE_MAP_LIST. . . . . . . + * | + * v + * <finish> + * + * @endverbatim + */ + LoadRequest(librados::IoCtx &ioctx, + std::map<std::string, cls::rbd::MirrorImageMap> *image_mapping, + Context *on_finish); + + librados::IoCtx &m_ioctx; + std::map<std::string, cls::rbd::MirrorImageMap> *m_image_mapping; + Context *m_on_finish; + + bufferlist m_out_bl; + std::string m_start_after; + + void image_map_list(); + void handle_image_map_list(int r); + + void finish(int r); +}; + +} // namespace image_map +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_IMAGE_MAP_LOAD_REQUEST_H diff --git a/src/tools/rbd_mirror/image_map/Policy.cc b/src/tools/rbd_mirror/image_map/Policy.cc new file mode 100644 index 00000000..6fababdd --- /dev/null +++ b/src/tools/rbd_mirror/image_map/Policy.cc @@ -0,0 +1,406 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "common/debug.h" +#include "common/errno.h" + +#include "librbd/Utils.h" +#include "Policy.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_map::Policy: " << this \ + << " " << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace image_map { + +namespace { + +bool is_instance_action(ActionType action_type) { + switch (action_type) { + case ACTION_TYPE_ACQUIRE: + case ACTION_TYPE_RELEASE: + return true; + case ACTION_TYPE_NONE: + case ACTION_TYPE_MAP_UPDATE: + case ACTION_TYPE_MAP_REMOVE: + break; + } + return false; +} + +} // anonymous namespace + +using ::operator<<; +using librbd::util::unique_lock_name; + +Policy::Policy(librados::IoCtx &ioctx) + : m_ioctx(ioctx), + m_map_lock(unique_lock_name("rbd::mirror::image_map::Policy::m_map_lock", + this)) { + + // map should at least have once instance + std::string instance_id = stringify(ioctx.get_instance_id()); + m_map.emplace(instance_id, std::set<std::string>{}); +} + +void Policy::init( + const std::map<std::string, cls::rbd::MirrorImageMap> &image_mapping) { + dout(20) << dendl; + + RWLock::WLocker map_lock(m_map_lock); + for (auto& it : image_mapping) { + ceph_assert(!it.second.instance_id.empty()); + auto map_result = m_map[it.second.instance_id].emplace(it.first); + ceph_assert(map_result.second); + + auto image_state_result = m_image_states.emplace( + it.first, ImageState{it.second.instance_id, it.second.mapped_time}); + ceph_assert(image_state_result.second); + + // ensure we (re)send image acquire actions to the instance + auto& image_state = image_state_result.first->second; + auto start_action = set_state(&image_state, + StateTransition::STATE_INITIALIZING, false); + ceph_assert(start_action); + } +} + +LookupInfo Policy::lookup(const std::string &global_image_id) { + dout(20) << "global_image_id=" << global_image_id << dendl; + + RWLock::RLocker map_lock(m_map_lock); + LookupInfo info; + + auto it = m_image_states.find(global_image_id); + if (it != m_image_states.end()) { + info.instance_id = it->second.instance_id; + info.mapped_time = it->second.mapped_time; + } + return info; +} + +bool Policy::add_image(const std::string &global_image_id) { + dout(5) << "global_image_id=" << global_image_id << dendl; + + RWLock::WLocker map_lock(m_map_lock); + auto image_state_result = m_image_states.emplace(global_image_id, + ImageState{}); + auto& image_state = image_state_result.first->second; + if (image_state.state == StateTransition::STATE_INITIALIZING) { + // avoid duplicate acquire notifications upon leader startup + return false; + } + + return set_state(&image_state, StateTransition::STATE_ASSOCIATING, false); +} + +bool Policy::remove_image(const std::string &global_image_id) { + dout(5) << "global_image_id=" << global_image_id << dendl; + + RWLock::WLocker map_lock(m_map_lock); + auto it = m_image_states.find(global_image_id); + if (it == m_image_states.end()) { + return false; + } + + auto& image_state = it->second; + return set_state(&image_state, StateTransition::STATE_DISSOCIATING, false); +} + +void Policy::add_instances(const InstanceIds &instance_ids, + GlobalImageIds* global_image_ids) { + dout(5) << "instance_ids=" << instance_ids << dendl; + + RWLock::WLocker map_lock(m_map_lock); + for (auto& instance : instance_ids) { + ceph_assert(!instance.empty()); + m_map.emplace(instance, std::set<std::string>{}); + } + + // post-failover, remove any dead instances and re-shuffle their images + if (m_initial_update) { + dout(5) << "initial instance update" << dendl; + m_initial_update = false; + + std::set<std::string> alive_instances(instance_ids.begin(), + instance_ids.end()); + InstanceIds dead_instances; + for (auto& map_pair : m_map) { + if (alive_instances.find(map_pair.first) == alive_instances.end()) { + dead_instances.push_back(map_pair.first); + } + } + + if (!dead_instances.empty()) { + remove_instances(m_map_lock, dead_instances, global_image_ids); + } + } + + GlobalImageIds shuffle_global_image_ids; + do_shuffle_add_instances(m_map, m_image_states.size(), &shuffle_global_image_ids); + dout(5) << "shuffling global_image_ids=[" << shuffle_global_image_ids + << "]" << dendl; + for (auto& global_image_id : shuffle_global_image_ids) { + auto it = m_image_states.find(global_image_id); + ceph_assert(it != m_image_states.end()); + + auto& image_state = it->second; + if (set_state(&image_state, StateTransition::STATE_SHUFFLING, false)) { + global_image_ids->emplace(global_image_id); + } + } +} + +void Policy::remove_instances(const InstanceIds &instance_ids, + GlobalImageIds* global_image_ids) { + RWLock::WLocker map_lock(m_map_lock); + remove_instances(m_map_lock, instance_ids, global_image_ids); +} + +void Policy::remove_instances(const RWLock& lock, + const InstanceIds &instance_ids, + GlobalImageIds* global_image_ids) { + ceph_assert(m_map_lock.is_wlocked()); + dout(5) << "instance_ids=" << instance_ids << dendl; + + for (auto& instance_id : instance_ids) { + auto map_it = m_map.find(instance_id); + if (map_it == m_map.end()) { + continue; + } + + auto& instance_global_image_ids = map_it->second; + if (instance_global_image_ids.empty()) { + m_map.erase(map_it); + continue; + } + + m_dead_instances.insert(instance_id); + dout(5) << "force shuffling: instance_id=" << instance_id << ", " + << "global_image_ids=[" << instance_global_image_ids << "]"<< dendl; + for (auto& global_image_id : instance_global_image_ids) { + auto it = m_image_states.find(global_image_id); + ceph_assert(it != m_image_states.end()); + + auto& image_state = it->second; + if (is_state_scheduled(image_state, + StateTransition::STATE_DISSOCIATING)) { + // don't shuffle images that no longer exist + continue; + } + + if (set_state(&image_state, StateTransition::STATE_SHUFFLING, true)) { + global_image_ids->emplace(global_image_id); + } + } + } +} + +ActionType Policy::start_action(const std::string &global_image_id) { + RWLock::WLocker map_lock(m_map_lock); + + auto it = m_image_states.find(global_image_id); + ceph_assert(it != m_image_states.end()); + + auto& image_state = it->second; + auto& transition = image_state.transition; + ceph_assert(transition.action_type != ACTION_TYPE_NONE); + + dout(5) << "global_image_id=" << global_image_id << ", " + << "state=" << image_state.state << ", " + << "action_type=" << transition.action_type << dendl; + if (transition.start_policy_action) { + execute_policy_action(global_image_id, &image_state, + *transition.start_policy_action); + transition.start_policy_action = boost::none; + } + return transition.action_type; +} + +bool Policy::finish_action(const std::string &global_image_id, int r) { + RWLock::WLocker map_lock(m_map_lock); + + auto it = m_image_states.find(global_image_id); + ceph_assert(it != m_image_states.end()); + + auto& image_state = it->second; + auto& transition = image_state.transition; + dout(5) << "global_image_id=" << global_image_id << ", " + << "state=" << image_state.state << ", " + << "action_type=" << transition.action_type << ", " + << "r=" << r << dendl; + + // retry on failure unless it's an RPC message to an instance that is dead + if (r < 0 && + (!is_instance_action(image_state.transition.action_type) || + image_state.instance_id == UNMAPPED_INSTANCE_ID || + m_dead_instances.find(image_state.instance_id) == + m_dead_instances.end())) { + return true; + } + + auto finish_policy_action = transition.finish_policy_action; + StateTransition::transit(image_state.state, &image_state.transition); + if (transition.finish_state) { + // in-progress state machine complete + ceph_assert(StateTransition::is_idle(*transition.finish_state)); + image_state.state = *transition.finish_state; + image_state.transition = {}; + } + + if (StateTransition::is_idle(image_state.state) && image_state.next_state) { + // advance to pending state machine + bool start_action = set_state(&image_state, *image_state.next_state, false); + ceph_assert(start_action); + } + + // image state may get purged in execute_policy_action() + bool pending_action = image_state.transition.action_type != ACTION_TYPE_NONE; + if (finish_policy_action) { + execute_policy_action(global_image_id, &image_state, *finish_policy_action); + } + + return pending_action; +} + +void Policy::execute_policy_action( + const std::string& global_image_id, ImageState* image_state, + StateTransition::PolicyAction policy_action) { + dout(5) << "global_image_id=" << global_image_id << ", " + << "policy_action=" << policy_action << dendl; + + switch (policy_action) { + case StateTransition::POLICY_ACTION_MAP: + map(global_image_id, image_state); + break; + case StateTransition::POLICY_ACTION_UNMAP: + unmap(global_image_id, image_state); + break; + case StateTransition::POLICY_ACTION_REMOVE: + if (image_state->state == StateTransition::STATE_UNASSOCIATED) { + ceph_assert(image_state->instance_id == UNMAPPED_INSTANCE_ID); + ceph_assert(!image_state->next_state); + m_image_states.erase(global_image_id); + } + break; + } +} + +void Policy::map(const std::string& global_image_id, ImageState* image_state) { + ceph_assert(m_map_lock.is_wlocked()); + + std::string instance_id = image_state->instance_id; + if (instance_id != UNMAPPED_INSTANCE_ID && !is_dead_instance(instance_id)) { + return; + } + if (is_dead_instance(instance_id)) { + unmap(global_image_id, image_state); + } + + instance_id = do_map(m_map, global_image_id); + ceph_assert(!instance_id.empty()); + dout(5) << "global_image_id=" << global_image_id << ", " + << "instance_id=" << instance_id << dendl; + + image_state->instance_id = instance_id; + image_state->mapped_time = ceph_clock_now(); + + auto ins = m_map[instance_id].emplace(global_image_id); + ceph_assert(ins.second); +} + +void Policy::unmap(const std::string &global_image_id, + ImageState* image_state) { + ceph_assert(m_map_lock.is_wlocked()); + + std::string instance_id = image_state->instance_id; + if (instance_id == UNMAPPED_INSTANCE_ID) { + return; + } + + dout(5) << "global_image_id=" << global_image_id << ", " + << "instance_id=" << instance_id << dendl; + + ceph_assert(!instance_id.empty()); + m_map[instance_id].erase(global_image_id); + image_state->instance_id = UNMAPPED_INSTANCE_ID; + image_state->mapped_time = {}; + + if (is_dead_instance(instance_id) && m_map[instance_id].empty()) { + dout(5) << "removing dead instance_id=" << instance_id << dendl; + m_map.erase(instance_id); + m_dead_instances.erase(instance_id); + } +} + +bool Policy::is_image_shuffling(const std::string &global_image_id) { + ceph_assert(m_map_lock.is_locked()); + + auto it = m_image_states.find(global_image_id); + ceph_assert(it != m_image_states.end()); + auto& image_state = it->second; + + // avoid attempting to re-shuffle a pending shuffle + auto result = is_state_scheduled(image_state, + StateTransition::STATE_SHUFFLING); + dout(20) << "global_image_id=" << global_image_id << ", " + << "result=" << result << dendl; + return result; +} + +bool Policy::can_shuffle_image(const std::string &global_image_id) { + ceph_assert(m_map_lock.is_locked()); + + CephContext *cct = reinterpret_cast<CephContext *>(m_ioctx.cct()); + int migration_throttle = cct->_conf.get_val<uint64_t>( + "rbd_mirror_image_policy_migration_throttle"); + + auto it = m_image_states.find(global_image_id); + ceph_assert(it != m_image_states.end()); + auto& image_state = it->second; + + utime_t last_shuffled_time = image_state.mapped_time; + + // idle images that haven't been recently remapped can shuffle + utime_t now = ceph_clock_now(); + auto result = (StateTransition::is_idle(image_state.state) && + ((migration_throttle <= 0) || + (now - last_shuffled_time >= migration_throttle))); + dout(10) << "global_image_id=" << global_image_id << ", " + << "migration_throttle=" << migration_throttle << ", " + << "last_shuffled_time=" << last_shuffled_time << ", " + << "result=" << result << dendl; + return result; +} + +bool Policy::set_state(ImageState* image_state, StateTransition::State state, + bool ignore_current_state) { + if (!ignore_current_state && image_state->state == state) { + return false; + } else if (StateTransition::is_idle(image_state->state)) { + image_state->state = state; + image_state->next_state = boost::none; + + StateTransition::transit(image_state->state, &image_state->transition); + ceph_assert(image_state->transition.action_type != ACTION_TYPE_NONE); + ceph_assert(!image_state->transition.finish_state); + return true; + } + + image_state->next_state = state; + return false; +} + +bool Policy::is_state_scheduled(const ImageState& image_state, + StateTransition::State state) const { + return (image_state.state == state || + (image_state.next_state && *image_state.next_state == state)); +} + +} // namespace image_map +} // namespace mirror +} // namespace rbd diff --git a/src/tools/rbd_mirror/image_map/Policy.h b/src/tools/rbd_mirror/image_map/Policy.h new file mode 100644 index 00000000..590fdbfe --- /dev/null +++ b/src/tools/rbd_mirror/image_map/Policy.h @@ -0,0 +1,122 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_MAP_POLICY_H +#define CEPH_RBD_MIRROR_IMAGE_MAP_POLICY_H + +#include <map> +#include <tuple> +#include <boost/optional.hpp> + +#include "common/RWLock.h" +#include "cls/rbd/cls_rbd_types.h" +#include "include/rados/librados.hpp" +#include "tools/rbd_mirror/image_map/StateTransition.h" +#include "tools/rbd_mirror/image_map/Types.h" + +class Context; + +namespace rbd { +namespace mirror { +namespace image_map { + +class Policy { +public: + Policy(librados::IoCtx &ioctx); + + virtual ~Policy() { + } + + // init -- called during initialization + void init( + const std::map<std::string, cls::rbd::MirrorImageMap> &image_mapping); + + // lookup an image from the map + LookupInfo lookup(const std::string &global_image_id); + + // add, remove + bool add_image(const std::string &global_image_id); + bool remove_image(const std::string &global_image_id); + + // shuffle images when instances are added/removed + void add_instances(const InstanceIds &instance_ids, + GlobalImageIds* global_image_ids); + void remove_instances(const InstanceIds &instance_ids, + GlobalImageIds* global_image_ids); + + ActionType start_action(const std::string &global_image_id); + bool finish_action(const std::string &global_image_id, int r); + +protected: + typedef std::map<std::string, std::set<std::string> > InstanceToImageMap; + + bool is_dead_instance(const std::string instance_id) { + ceph_assert(m_map_lock.is_locked()); + return m_dead_instances.find(instance_id) != m_dead_instances.end(); + } + + bool is_image_shuffling(const std::string &global_image_id); + bool can_shuffle_image(const std::string &global_image_id); + + // map an image (global image id) to an instance + virtual std::string do_map(const InstanceToImageMap& map, + const std::string &global_image_id) = 0; + + // shuffle images when instances are added/removed + virtual void do_shuffle_add_instances( + const InstanceToImageMap& map, size_t image_count, + std::set<std::string> *remap_global_image_ids) = 0; + +private: + struct ImageState { + std::string instance_id = UNMAPPED_INSTANCE_ID; + utime_t mapped_time; + + ImageState() {} + ImageState(const std::string& instance_id, const utime_t& mapped_time) + : instance_id(instance_id), mapped_time(mapped_time) { + } + + // active state and action + StateTransition::State state = StateTransition::STATE_UNASSOCIATED; + StateTransition::Transition transition; + + // next scheduled state + boost::optional<StateTransition::State> next_state = boost::none; + }; + + typedef std::map<std::string, ImageState> ImageStates; + + librados::IoCtx &m_ioctx; + + RWLock m_map_lock; // protects m_map + InstanceToImageMap m_map; // instance_id -> global_id map + + ImageStates m_image_states; + std::set<std::string> m_dead_instances; + + bool m_initial_update = true; + + void remove_instances(const RWLock& lock, const InstanceIds &instance_ids, + GlobalImageIds* global_image_ids); + + bool set_state(ImageState* image_state, StateTransition::State state, + bool ignore_current_state); + + void execute_policy_action(const std::string& global_image_id, + ImageState* image_state, + StateTransition::PolicyAction policy_action); + + void map(const std::string& global_image_id, ImageState* image_state); + void unmap(const std::string &global_image_id, ImageState* image_state); + + bool is_state_scheduled(const ImageState& image_state, + StateTransition::State state) const; + +}; + +} // namespace image_map +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_IMAGE_MAP_POLICY_H diff --git a/src/tools/rbd_mirror/image_map/SimplePolicy.cc b/src/tools/rbd_mirror/image_map/SimplePolicy.cc new file mode 100644 index 00000000..f2680581 --- /dev/null +++ b/src/tools/rbd_mirror/image_map/SimplePolicy.cc @@ -0,0 +1,89 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "common/debug.h" +#include "common/errno.h" + +#include "SimplePolicy.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_map::SimplePolicy: " << this \ + << " " << __func__ << ": " +namespace rbd { +namespace mirror { +namespace image_map { + +SimplePolicy::SimplePolicy(librados::IoCtx &ioctx) + : Policy(ioctx) { +} + +size_t SimplePolicy::calc_images_per_instance(const InstanceToImageMap& map, + size_t image_count) { + size_t nr_instances = 0; + for (auto const &it : map) { + if (!Policy::is_dead_instance(it.first)) { + ++nr_instances; + } + } + ceph_assert(nr_instances > 0); + + size_t images_per_instance = image_count / nr_instances; + if (images_per_instance == 0) { + ++images_per_instance; + } + + return images_per_instance; +} + +void SimplePolicy::do_shuffle_add_instances( + const InstanceToImageMap& map, size_t image_count, + std::set<std::string> *remap_global_image_ids) { + uint64_t images_per_instance = calc_images_per_instance(map, image_count); + dout(5) << "images per instance=" << images_per_instance << dendl; + + for (auto const &instance : map) { + if (instance.second.size() <= images_per_instance) { + continue; + } + + auto it = instance.second.begin(); + uint64_t cut_off = instance.second.size() - images_per_instance; + + while (it != instance.second.end() && cut_off > 0) { + if (Policy::is_image_shuffling(*it)) { + --cut_off; + } else if (Policy::can_shuffle_image(*it)) { + --cut_off; + remap_global_image_ids->emplace(*it); + } + + ++it; + } + } +} + +std::string SimplePolicy::do_map(const InstanceToImageMap& map, + const std::string &global_image_id) { + auto min_it = map.end(); + for (auto it = map.begin(); it != map.end(); ++it) { + ceph_assert(it->second.find(global_image_id) == it->second.end()); + if (Policy::is_dead_instance(it->first)) { + continue; + } else if (min_it == map.end()) { + min_it = it; + } else if (it->second.size() < min_it->second.size()) { + min_it = it; + } + } + + ceph_assert(min_it != map.end()); + dout(20) << "global_image_id=" << global_image_id << " maps to instance_id=" + << min_it->first << dendl; + return min_it->first; +} + +} // namespace image_map +} // namespace mirror +} // namespace rbd diff --git a/src/tools/rbd_mirror/image_map/SimplePolicy.h b/src/tools/rbd_mirror/image_map/SimplePolicy.h new file mode 100644 index 00000000..ad2071b2 --- /dev/null +++ b/src/tools/rbd_mirror/image_map/SimplePolicy.h @@ -0,0 +1,39 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_MAP_SIMPLE_POLICY_H +#define CEPH_RBD_MIRROR_IMAGE_MAP_SIMPLE_POLICY_H + +#include "Policy.h" + +namespace rbd { +namespace mirror { +namespace image_map { + +class SimplePolicy : public Policy { +public: + static SimplePolicy *create(librados::IoCtx &ioctx) { + return new SimplePolicy(ioctx); + } + +protected: + SimplePolicy(librados::IoCtx &ioctx); + + std::string do_map(const InstanceToImageMap& map, + const std::string &global_image_id) override; + + void do_shuffle_add_instances( + const InstanceToImageMap& map, size_t image_count, + std::set<std::string> *remap_global_image_ids) override; + +private: + size_t calc_images_per_instance(const InstanceToImageMap& map, + size_t image_count); + +}; + +} // namespace image_map +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_IMAGE_MAP_SIMPLE_POLICY_H diff --git a/src/tools/rbd_mirror/image_map/StateTransition.cc b/src/tools/rbd_mirror/image_map/StateTransition.cc new file mode 100644 index 00000000..ec5f07ff --- /dev/null +++ b/src/tools/rbd_mirror/image_map/StateTransition.cc @@ -0,0 +1,94 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include <ostream> +#include "include/ceph_assert.h" +#include "StateTransition.h" + +namespace rbd { +namespace mirror { +namespace image_map { + +std::ostream &operator<<(std::ostream &os, + const StateTransition::State &state) { + switch(state) { + case StateTransition::STATE_INITIALIZING: + os << "INITIALIZING"; + break; + case StateTransition::STATE_ASSOCIATING: + os << "ASSOCIATING"; + break; + case StateTransition::STATE_ASSOCIATED: + os << "ASSOCIATED"; + break; + case StateTransition::STATE_SHUFFLING: + os << "SHUFFLING"; + break; + case StateTransition::STATE_DISSOCIATING: + os << "DISSOCIATING"; + break; + case StateTransition::STATE_UNASSOCIATED: + os << "UNASSOCIATED"; + break; + } + return os; +} + +std::ostream &operator<<(std::ostream &os, + const StateTransition::PolicyAction &policy_action) { + switch(policy_action) { + case StateTransition::POLICY_ACTION_MAP: + os << "MAP"; + break; + case StateTransition::POLICY_ACTION_UNMAP: + os << "UNMAP"; + break; + case StateTransition::POLICY_ACTION_REMOVE: + os << "REMOVE"; + break; + } + return os; +} + +const StateTransition::TransitionTable StateTransition::s_transition_table { + // state current_action Transition + // --------------------------------------------------------------------------- + {{STATE_INITIALIZING, ACTION_TYPE_NONE}, {ACTION_TYPE_ACQUIRE, {}, {}, + {}}}, + {{STATE_INITIALIZING, ACTION_TYPE_ACQUIRE}, {ACTION_TYPE_NONE, {}, {}, + {STATE_ASSOCIATED}}}, + + {{STATE_ASSOCIATING, ACTION_TYPE_NONE}, {ACTION_TYPE_MAP_UPDATE, + {POLICY_ACTION_MAP}, {}, {}}}, + {{STATE_ASSOCIATING, ACTION_TYPE_MAP_UPDATE}, {ACTION_TYPE_ACQUIRE, {}, {}, + {}}}, + {{STATE_ASSOCIATING, ACTION_TYPE_ACQUIRE}, {ACTION_TYPE_NONE, {}, {}, + {STATE_ASSOCIATED}}}, + + {{STATE_DISSOCIATING, ACTION_TYPE_NONE}, {ACTION_TYPE_RELEASE, {}, + {POLICY_ACTION_UNMAP}, {}}}, + {{STATE_DISSOCIATING, ACTION_TYPE_RELEASE}, {ACTION_TYPE_MAP_REMOVE, {}, + {POLICY_ACTION_REMOVE}, {}}}, + {{STATE_DISSOCIATING, ACTION_TYPE_MAP_REMOVE}, {ACTION_TYPE_NONE, {}, + {}, {STATE_UNASSOCIATED}}}, + + {{STATE_SHUFFLING, ACTION_TYPE_NONE}, {ACTION_TYPE_RELEASE, {}, + {POLICY_ACTION_UNMAP}, {}}}, + {{STATE_SHUFFLING, ACTION_TYPE_RELEASE}, {ACTION_TYPE_MAP_UPDATE, + {POLICY_ACTION_MAP}, {}, {}}}, + {{STATE_SHUFFLING, ACTION_TYPE_MAP_UPDATE}, {ACTION_TYPE_ACQUIRE, {}, {}, + {}}}, + {{STATE_SHUFFLING, ACTION_TYPE_ACQUIRE}, {ACTION_TYPE_NONE, {}, {}, + {STATE_ASSOCIATED}}} +}; + +void StateTransition::transit(State state, Transition* transition) { + auto it = s_transition_table.find({state, transition->action_type}); + ceph_assert(it != s_transition_table.end()); + + *transition = it->second; +} + +} // namespace image_map +} // namespace mirror +} // namespace rbd diff --git a/src/tools/rbd_mirror/image_map/StateTransition.h b/src/tools/rbd_mirror/image_map/StateTransition.h new file mode 100644 index 00000000..02a5ce4e --- /dev/null +++ b/src/tools/rbd_mirror/image_map/StateTransition.h @@ -0,0 +1,76 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_MAP_STATE_TRANSITION_H +#define CEPH_RBD_MIRROR_IMAGE_MAP_STATE_TRANSITION_H + +#include "tools/rbd_mirror/image_map/Types.h" +#include <boost/optional.hpp> +#include <map> + +namespace rbd { +namespace mirror { +namespace image_map { + +class StateTransition { +public: + enum State { + STATE_UNASSOCIATED, + STATE_INITIALIZING, + STATE_ASSOCIATING, + STATE_ASSOCIATED, + STATE_SHUFFLING, + STATE_DISSOCIATING + }; + + enum PolicyAction { + POLICY_ACTION_MAP, + POLICY_ACTION_UNMAP, + POLICY_ACTION_REMOVE + }; + + struct Transition { + // image map action + ActionType action_type = ACTION_TYPE_NONE; + + // policy internal action + boost::optional<PolicyAction> start_policy_action; + boost::optional<PolicyAction> finish_policy_action; + + // state machine complete + boost::optional<State> finish_state; + + Transition() { + } + Transition(ActionType action_type, + const boost::optional<PolicyAction>& start_policy_action, + const boost::optional<PolicyAction>& finish_policy_action, + const boost::optional<State>& finish_state) + : action_type(action_type), start_policy_action(start_policy_action), + finish_policy_action(finish_policy_action), finish_state(finish_state) { + } + }; + + static bool is_idle(State state) { + return (state == STATE_UNASSOCIATED || state == STATE_ASSOCIATED); + } + + static void transit(State state, Transition* transition); + +private: + typedef std::pair<State, ActionType> TransitionKey; + typedef std::map<TransitionKey, Transition> TransitionTable; + + // image transition table + static const TransitionTable s_transition_table; +}; + +std::ostream &operator<<(std::ostream &os, const StateTransition::State &state); +std::ostream &operator<<(std::ostream &os, + const StateTransition::PolicyAction &policy_action); + +} // namespace image_map +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_IMAGE_MAP_STATE_TRANSITION_H diff --git a/src/tools/rbd_mirror/image_map/Types.cc b/src/tools/rbd_mirror/image_map/Types.cc new file mode 100644 index 00000000..47de9c3c --- /dev/null +++ b/src/tools/rbd_mirror/image_map/Types.cc @@ -0,0 +1,138 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "Types.h" +#include "include/ceph_assert.h" +#include "include/stringify.h" +#include "common/Formatter.h" +#include <iostream> + +namespace rbd { +namespace mirror { +namespace image_map { + +const std::string UNMAPPED_INSTANCE_ID(""); + +namespace { + +template <typename E> +class GetTypeVisitor : public boost::static_visitor<E> { +public: + template <typename T> + inline E operator()(const T&) const { + return T::TYPE; + } +}; + +class EncodeVisitor : public boost::static_visitor<void> { +public: + explicit EncodeVisitor(bufferlist &bl) : m_bl(bl) { + } + + template <typename T> + inline void operator()(const T& t) const { + using ceph::encode; + encode(static_cast<uint32_t>(T::TYPE), m_bl); + t.encode(m_bl); + } +private: + bufferlist &m_bl; +}; + +class DecodeVisitor : public boost::static_visitor<void> { +public: + DecodeVisitor(__u8 version, bufferlist::const_iterator &iter) + : m_version(version), m_iter(iter) { + } + + template <typename T> + inline void operator()(T& t) const { + t.decode(m_version, m_iter); + } +private: + __u8 m_version; + bufferlist::const_iterator &m_iter; +}; + +class DumpVisitor : public boost::static_visitor<void> { +public: + explicit DumpVisitor(Formatter *formatter, const std::string &key) + : m_formatter(formatter), m_key(key) {} + + template <typename T> + inline void operator()(const T& t) const { + auto type = T::TYPE; + m_formatter->dump_string(m_key.c_str(), stringify(type)); + t.dump(m_formatter); + } +private: + ceph::Formatter *m_formatter; + std::string m_key; +}; + +} // anonymous namespace + +PolicyMetaType PolicyData::get_policy_meta_type() const { + return boost::apply_visitor(GetTypeVisitor<PolicyMetaType>(), policy_meta); +} + +void PolicyData::encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + boost::apply_visitor(EncodeVisitor(bl), policy_meta); + ENCODE_FINISH(bl); +} + +void PolicyData::decode(bufferlist::const_iterator& it) { + DECODE_START(1, it); + + uint32_t policy_meta_type; + decode(policy_meta_type, it); + + switch (policy_meta_type) { + case POLICY_META_TYPE_NONE: + policy_meta = PolicyMetaNone(); + break; + default: + policy_meta = PolicyMetaUnknown(); + break; + } + + boost::apply_visitor(DecodeVisitor(struct_v, it), policy_meta); + DECODE_FINISH(it); +} + +void PolicyData::dump(Formatter *f) const { + boost::apply_visitor(DumpVisitor(f, "policy_meta_type"), policy_meta); +} + +void PolicyData::generate_test_instances(std::list<PolicyData *> &o) { + o.push_back(new PolicyData(PolicyMetaNone())); +} + +std::ostream &operator<<(std::ostream &os, const ActionType& action_type) { + switch (action_type) { + case ACTION_TYPE_NONE: + os << "NONE"; + break; + case ACTION_TYPE_MAP_UPDATE: + os << "MAP_UPDATE"; + break; + case ACTION_TYPE_MAP_REMOVE: + os << "MAP_REMOVE"; + break; + case ACTION_TYPE_ACQUIRE: + os << "ACQUIRE"; + break; + case ACTION_TYPE_RELEASE: + os << "RELEASE"; + break; + default: + os << "UNKNOWN (" << static_cast<uint32_t>(action_type) << ")"; + break; + } + return os; +} + +} // namespace image_map +} // namespace mirror +} // namespace rbd diff --git a/src/tools/rbd_mirror/image_map/Types.h b/src/tools/rbd_mirror/image_map/Types.h new file mode 100644 index 00000000..5a97430f --- /dev/null +++ b/src/tools/rbd_mirror/image_map/Types.h @@ -0,0 +1,130 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_MAP_TYPES_H +#define CEPH_RBD_MIRROR_IMAGE_MAP_TYPES_H + +#include <iosfwd> +#include <map> +#include <set> +#include <string> +#include <boost/variant.hpp> + +#include "include/buffer.h" +#include "include/encoding.h" +#include "include/utime.h" +#include "tools/rbd_mirror/Types.h" + +struct Context; + +namespace ceph { +class Formatter; +} + +namespace rbd { +namespace mirror { +namespace image_map { + +extern const std::string UNMAPPED_INSTANCE_ID; + +struct Listener { + virtual ~Listener() { + } + + virtual void acquire_image(const std::string &global_image_id, + const std::string &instance_id, + Context* on_finish) = 0; + virtual void release_image(const std::string &global_image_id, + const std::string &instance_id, + Context* on_finish) = 0; + virtual void remove_image(const std::string &mirror_uuid, + const std::string &global_image_id, + const std::string &instance_id, + Context* on_finish) = 0; +}; + +struct LookupInfo { + std::string instance_id = UNMAPPED_INSTANCE_ID; + utime_t mapped_time; +}; + +enum ActionType { + ACTION_TYPE_NONE, + ACTION_TYPE_MAP_UPDATE, + ACTION_TYPE_MAP_REMOVE, + ACTION_TYPE_ACQUIRE, + ACTION_TYPE_RELEASE +}; + +typedef std::vector<std::string> InstanceIds; +typedef std::set<std::string> GlobalImageIds; +typedef std::map<std::string, ActionType> ImageActionTypes; + +enum PolicyMetaType { + POLICY_META_TYPE_NONE = 0, +}; + +struct PolicyMetaNone { + static const PolicyMetaType TYPE = POLICY_META_TYPE_NONE; + + PolicyMetaNone() { + } + + void encode(bufferlist& bl) const { + } + + void decode(__u8 version, bufferlist::const_iterator& it) { + } + + void dump(Formatter *f) const { + } +}; + +struct PolicyMetaUnknown { + static const PolicyMetaType TYPE = static_cast<PolicyMetaType>(-1); + + PolicyMetaUnknown() { + } + + void encode(bufferlist& bl) const { + ceph_abort(); + } + + void decode(__u8 version, bufferlist::const_iterator& it) { + } + + void dump(Formatter *f) const { + } +}; + +typedef boost::variant<PolicyMetaNone, + PolicyMetaUnknown> PolicyMeta; + +struct PolicyData { + PolicyData() + : policy_meta(PolicyMetaUnknown()) { + } + PolicyData(const PolicyMeta &policy_meta) + : policy_meta(policy_meta) { + } + + PolicyMeta policy_meta; + + PolicyMetaType get_policy_meta_type() const; + + void encode(bufferlist& bl) const; + void decode(bufferlist::const_iterator& it); + void dump(Formatter *f) const; + + static void generate_test_instances(std::list<PolicyData *> &o); +}; + +WRITE_CLASS_ENCODER(PolicyData); + +std::ostream &operator<<(std::ostream &os, const ActionType &action_type); + +} // namespace image_map +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_IMAGE_MAP_TYPES_H diff --git a/src/tools/rbd_mirror/image_map/UpdateRequest.cc b/src/tools/rbd_mirror/image_map/UpdateRequest.cc new file mode 100644 index 00000000..799c5670 --- /dev/null +++ b/src/tools/rbd_mirror/image_map/UpdateRequest.cc @@ -0,0 +1,100 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "common/debug.h" +#include "common/errno.h" + +#include "librbd/Utils.h" +#include "include/rbd_types.h" +#include "cls/rbd/cls_rbd_client.h" + +#include "UpdateRequest.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_map::UpdateRequest: " \ + << this << " " << __func__ + +namespace rbd { +namespace mirror { +namespace image_map { + +using librbd::util::create_rados_callback; + +static const uint32_t MAX_UPDATE = 256; + +template <typename I> +UpdateRequest<I>::UpdateRequest(librados::IoCtx &ioctx, + std::map<std::string, cls::rbd::MirrorImageMap> &&update_mapping, + std::set<std::string> &&remove_global_image_ids, Context *on_finish) + : m_ioctx(ioctx), + m_update_mapping(update_mapping), + m_remove_global_image_ids(remove_global_image_ids), + m_on_finish(on_finish) { +} + +template <typename I> +void UpdateRequest<I>::send() { + dout(20) << dendl; + + update_image_map(); +} + +template <typename I> +void UpdateRequest<I>::update_image_map() { + dout(20) << dendl; + + if (m_update_mapping.empty() && m_remove_global_image_ids.empty()) { + finish(0); + return; + } + + uint32_t nr_updates = 0; + librados::ObjectWriteOperation op; + + auto it1 = m_update_mapping.begin(); + while (it1 != m_update_mapping.end() && nr_updates++ < MAX_UPDATE) { + librbd::cls_client::mirror_image_map_update(&op, it1->first, it1->second); + it1 = m_update_mapping.erase(it1); + } + + auto it2 = m_remove_global_image_ids.begin(); + while (it2 != m_remove_global_image_ids.end() && nr_updates++ < MAX_UPDATE) { + librbd::cls_client::mirror_image_map_remove(&op, *it2); + it2 = m_remove_global_image_ids.erase(it2); + } + + librados::AioCompletion *aio_comp = create_rados_callback< + UpdateRequest, &UpdateRequest::handle_update_image_map>(this); + int r = m_ioctx.aio_operate(RBD_MIRROR_LEADER, aio_comp, &op); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void UpdateRequest<I>::handle_update_image_map(int r) { + dout(20) << ": r=" << r << dendl; + + if (r < 0) { + derr << ": failed to update image map: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + update_image_map(); +} + +template <typename I> +void UpdateRequest<I>::finish(int r) { + dout(20) << ": r=" << r << dendl; + + m_on_finish->complete(r); + delete this; +} + +} // namespace image_map +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_map::UpdateRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_map/UpdateRequest.h b/src/tools/rbd_mirror/image_map/UpdateRequest.h new file mode 100644 index 00000000..841cc6f9 --- /dev/null +++ b/src/tools/rbd_mirror/image_map/UpdateRequest.h @@ -0,0 +1,65 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_MAP_UPDATE_REQUEST_H +#define CEPH_RBD_MIRROR_IMAGE_MAP_UPDATE_REQUEST_H + +#include "cls/rbd/cls_rbd_types.h" +#include "include/rados/librados.hpp" + +class Context; + +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { +namespace image_map { + +template<typename ImageCtxT = librbd::ImageCtx> +class UpdateRequest { +public: + // accepts an image map for updation and a collection of + // global image ids to purge. + static UpdateRequest *create(librados::IoCtx &ioctx, + std::map<std::string, cls::rbd::MirrorImageMap> &&update_mapping, + std::set<std::string> &&remove_global_image_ids, Context *on_finish) { + return new UpdateRequest(ioctx, std::move(update_mapping), std::move(remove_global_image_ids), + on_finish); + } + + void send(); + +private: + /** + * @verbatim + * + * <start> + * | . . . . . . . . + * v v . MAX_UPDATE + * UPDATE_IMAGE_MAP. . . . . . . + * | + * v + * <finish> + * + * @endverbatim + */ + UpdateRequest(librados::IoCtx &ioctx, + std::map<std::string, cls::rbd::MirrorImageMap> &&update_mapping, + std::set<std::string> &&remove_global_image_ids, Context *on_finish); + + librados::IoCtx &m_ioctx; + std::map<std::string, cls::rbd::MirrorImageMap> m_update_mapping; + std::set<std::string> m_remove_global_image_ids; + Context *m_on_finish; + + void update_image_map(); + void handle_update_image_map(int r); + + void finish(int r); +}; + +} // namespace image_map +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_IMAGE_MAP_UPDATE_REQUEST_H diff --git a/src/tools/rbd_mirror/image_replayer/BootstrapRequest.cc b/src/tools/rbd_mirror/image_replayer/BootstrapRequest.cc new file mode 100644 index 00000000..7ce21b4b --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/BootstrapRequest.cc @@ -0,0 +1,785 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "include/compat.h" +#include "BootstrapRequest.h" +#include "CloseImageRequest.h" +#include "CreateImageRequest.h" +#include "IsPrimaryRequest.h" +#include "OpenImageRequest.h" +#include "OpenLocalImageRequest.h" +#include "common/debug.h" +#include "common/dout.h" +#include "common/errno.h" +#include "common/WorkQueue.h" +#include "cls/rbd/cls_rbd_client.h" +#include "journal/Journaler.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/internal.h" +#include "librbd/Journal.h" +#include "librbd/Utils.h" +#include "librbd/journal/Types.h" +#include "tools/rbd_mirror/ProgressContext.h" +#include "tools/rbd_mirror/ImageSync.h" +#include "tools/rbd_mirror/Threads.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::BootstrapRequest: " \ + << this << " " << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace image_replayer { + +using librbd::util::create_context_callback; +using librbd::util::create_rados_callback; +using librbd::util::unique_lock_name; + +template <typename I> +BootstrapRequest<I>::BootstrapRequest( + Threads<I>* threads, + librados::IoCtx &local_io_ctx, + librados::IoCtx &remote_io_ctx, + InstanceWatcher<I> *instance_watcher, + I **local_image_ctx, + const std::string &local_image_id, + const std::string &remote_image_id, + const std::string &global_image_id, + const std::string &local_mirror_uuid, + const std::string &remote_mirror_uuid, + Journaler *journaler, + cls::journal::ClientState *client_state, + MirrorPeerClientMeta *client_meta, + Context *on_finish, + bool *do_resync, + rbd::mirror::ProgressContext *progress_ctx) + : BaseRequest("rbd::mirror::image_replayer::BootstrapRequest", + reinterpret_cast<CephContext*>(local_io_ctx.cct()), on_finish), + m_threads(threads), m_local_io_ctx(local_io_ctx), + m_remote_io_ctx(remote_io_ctx), m_instance_watcher(instance_watcher), + m_local_image_ctx(local_image_ctx), m_local_image_id(local_image_id), + m_remote_image_id(remote_image_id), m_global_image_id(global_image_id), + m_local_mirror_uuid(local_mirror_uuid), + m_remote_mirror_uuid(remote_mirror_uuid), m_journaler(journaler), + m_client_state(client_state), m_client_meta(client_meta), + m_progress_ctx(progress_ctx), m_do_resync(do_resync), + m_lock(unique_lock_name("BootstrapRequest::m_lock", this)) { + dout(10) << dendl; +} + +template <typename I> +BootstrapRequest<I>::~BootstrapRequest() { + ceph_assert(m_remote_image_ctx == nullptr); +} + +template <typename I> +bool BootstrapRequest<I>::is_syncing() const { + Mutex::Locker locker(m_lock); + return (m_image_sync != nullptr); +} + +template <typename I> +void BootstrapRequest<I>::send() { + *m_do_resync = false; + + get_remote_tag_class(); +} + +template <typename I> +void BootstrapRequest<I>::cancel() { + dout(10) << dendl; + + Mutex::Locker locker(m_lock); + m_canceled = true; + + if (m_image_sync != nullptr) { + m_image_sync->cancel(); + } +} + +template <typename I> +void BootstrapRequest<I>::get_remote_tag_class() { + dout(15) << dendl; + + update_progress("GET_REMOTE_TAG_CLASS"); + + Context *ctx = create_context_callback< + BootstrapRequest<I>, &BootstrapRequest<I>::handle_get_remote_tag_class>( + this); + m_journaler->get_client(librbd::Journal<>::IMAGE_CLIENT_ID, &m_client, ctx); +} + +template <typename I> +void BootstrapRequest<I>::handle_get_remote_tag_class(int r) { + dout(15) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to retrieve remote client: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + librbd::journal::ClientData client_data; + auto it = m_client.data.cbegin(); + try { + decode(client_data, it); + } catch (const buffer::error &err) { + derr << "failed to decode remote client meta data: " << err.what() + << dendl; + finish(-EBADMSG); + return; + } + + librbd::journal::ImageClientMeta *client_meta = + boost::get<librbd::journal::ImageClientMeta>(&client_data.client_meta); + if (client_meta == nullptr) { + derr << "unknown remote client registration" << dendl; + finish(-EINVAL); + return; + } + + m_remote_tag_class = client_meta->tag_class; + dout(10) << "remote tag class=" << m_remote_tag_class << dendl; + + open_remote_image(); +} + +template <typename I> +void BootstrapRequest<I>::open_remote_image() { + dout(15) << "remote_image_id=" << m_remote_image_id << dendl; + + update_progress("OPEN_REMOTE_IMAGE"); + + Context *ctx = create_context_callback< + BootstrapRequest<I>, &BootstrapRequest<I>::handle_open_remote_image>( + this); + OpenImageRequest<I> *request = OpenImageRequest<I>::create( + m_remote_io_ctx, &m_remote_image_ctx, m_remote_image_id, false, + ctx); + request->send(); +} + +template <typename I> +void BootstrapRequest<I>::handle_open_remote_image(int r) { + dout(15) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to open remote image: " << cpp_strerror(r) << dendl; + ceph_assert(m_remote_image_ctx == nullptr); + finish(r); + return; + } + + is_primary(); +} + +template <typename I> +void BootstrapRequest<I>::is_primary() { + dout(15) << dendl; + + update_progress("OPEN_REMOTE_IMAGE"); + + Context *ctx = create_context_callback< + BootstrapRequest<I>, &BootstrapRequest<I>::handle_is_primary>( + this); + IsPrimaryRequest<I> *request = IsPrimaryRequest<I>::create(m_remote_image_ctx, + &m_primary, ctx); + request->send(); +} + +template <typename I> +void BootstrapRequest<I>::handle_is_primary(int r) { + dout(15) << "r=" << r << dendl; + + if (r == -ENOENT) { + dout(5) << "remote image is not mirrored" << dendl; + m_ret_val = -EREMOTEIO; + close_remote_image(); + return; + } else if (r < 0) { + derr << "error querying remote image primary status: " << cpp_strerror(r) + << dendl; + m_ret_val = r; + close_remote_image(); + return; + } + + if (!m_primary) { + if (m_local_image_id.empty()) { + // no local image and remote isn't primary -- don't sync it + dout(5) << "remote image is not primary -- not syncing" + << dendl; + m_ret_val = -EREMOTEIO; + close_remote_image(); + return; + } else if (m_client_meta->state != + librbd::journal::MIRROR_PEER_STATE_REPLAYING) { + // ensure we attempt to re-sync to remote if it's re-promoted + dout(5) << "remote image is not primary -- sync interrupted" + << dendl; + m_ret_val = -EREMOTEIO; + update_client_state(); + return; + } + } + + if (!m_client_meta->image_id.empty()) { + // have an image id -- use that to open the image since a deletion (resync) + // will leave the old image id registered in the peer + m_local_image_id = m_client_meta->image_id; + } + + if (m_local_image_id.empty()) { + // prepare to create local image + update_client_image(); + return; + } + + open_local_image(); +} + +template <typename I> +void BootstrapRequest<I>::update_client_state() { + dout(15) << dendl; + update_progress("UPDATE_CLIENT_STATE"); + + librbd::journal::MirrorPeerClientMeta client_meta(*m_client_meta); + client_meta.state = librbd::journal::MIRROR_PEER_STATE_REPLAYING; + + librbd::journal::ClientData client_data(client_meta); + bufferlist data_bl; + encode(client_data, data_bl); + + Context *ctx = create_context_callback< + BootstrapRequest<I>, &BootstrapRequest<I>::handle_update_client_state>( + this); + m_journaler->update_client(data_bl, ctx); +} + +template <typename I> +void BootstrapRequest<I>::handle_update_client_state(int r) { + dout(15) << "r=" << r << dendl; + if (r < 0) { + derr << "failed to update client: " << cpp_strerror(r) << dendl; + } else { + m_client_meta->state = librbd::journal::MIRROR_PEER_STATE_REPLAYING; + } + + close_remote_image(); +} + +template <typename I> +void BootstrapRequest<I>::open_local_image() { + dout(15) << "local_image_id=" << m_local_image_id << dendl; + + update_progress("OPEN_LOCAL_IMAGE"); + + Context *ctx = create_context_callback< + BootstrapRequest<I>, &BootstrapRequest<I>::handle_open_local_image>( + this); + OpenLocalImageRequest<I> *request = OpenLocalImageRequest<I>::create( + m_local_io_ctx, m_local_image_ctx, m_local_image_id, m_threads->work_queue, + ctx); + request->send(); +} + +template <typename I> +void BootstrapRequest<I>::handle_open_local_image(int r) { + dout(15) << "r=" << r << dendl; + + if (r == -ENOENT) { + ceph_assert(*m_local_image_ctx == nullptr); + dout(10) << "local image missing" << dendl; + unregister_client(); + return; + } else if (r == -EREMOTEIO) { + ceph_assert(*m_local_image_ctx == nullptr); + dout(10) << "local image is primary -- skipping image replay" << dendl; + m_ret_val = r; + close_remote_image(); + return; + } else if (r < 0) { + ceph_assert(*m_local_image_ctx == nullptr); + derr << "failed to open local image: " << cpp_strerror(r) << dendl; + m_ret_val = r; + close_remote_image(); + return; + } + + I *local_image_ctx = (*m_local_image_ctx); + { + local_image_ctx->snap_lock.get_read(); + if (local_image_ctx->journal == nullptr) { + local_image_ctx->snap_lock.put_read(); + + derr << "local image does not support journaling" << dendl; + m_ret_val = -EINVAL; + close_local_image(); + return; + } + + r = (*m_local_image_ctx)->journal->is_resync_requested(m_do_resync); + if (r < 0) { + local_image_ctx->snap_lock.put_read(); + + derr << "failed to check if a resync was requested" << dendl; + m_ret_val = r; + close_local_image(); + return; + } + + m_local_tag_tid = local_image_ctx->journal->get_tag_tid(); + m_local_tag_data = local_image_ctx->journal->get_tag_data(); + dout(10) << "local tag=" << m_local_tag_tid << ", " + << "local tag data=" << m_local_tag_data << dendl; + local_image_ctx->snap_lock.put_read(); + } + + if (m_local_tag_data.mirror_uuid != m_remote_mirror_uuid && !m_primary) { + // if the local mirror is not linked to the (now) non-primary image, + // stop the replay. Otherwise, we ignore that the remote is non-primary + // so that we can replay the demotion + dout(5) << "remote image is not primary -- skipping image replay" + << dendl; + m_ret_val = -EREMOTEIO; + close_local_image(); + return; + } + + if (*m_do_resync) { + close_remote_image(); + return; + } + + if (*m_client_state == cls::journal::CLIENT_STATE_DISCONNECTED) { + dout(10) << "client flagged disconnected -- skipping bootstrap" << dendl; + // The caller is expected to detect disconnect initializing remote journal. + m_ret_val = 0; + close_remote_image(); + return; + } + + get_remote_tags(); +} + +template <typename I> +void BootstrapRequest<I>::unregister_client() { + dout(15) << dendl; + update_progress("UNREGISTER_CLIENT"); + + m_local_image_id = ""; + Context *ctx = create_context_callback< + BootstrapRequest<I>, &BootstrapRequest<I>::handle_unregister_client>( + this); + m_journaler->unregister_client(ctx); +} + +template <typename I> +void BootstrapRequest<I>::handle_unregister_client(int r) { + dout(15) << "r=" << r << dendl; + if (r < 0) { + derr << "failed to unregister with remote journal: " << cpp_strerror(r) + << dendl; + m_ret_val = r; + close_remote_image(); + return; + } + + *m_client_meta = librbd::journal::MirrorPeerClientMeta(""); + register_client(); +} + +template <typename I> +void BootstrapRequest<I>::register_client() { + dout(15) << dendl; + + update_progress("REGISTER_CLIENT"); + + ceph_assert(m_local_image_id.empty()); + librbd::journal::MirrorPeerClientMeta mirror_peer_client_meta; + mirror_peer_client_meta.state = librbd::journal::MIRROR_PEER_STATE_REPLAYING; + + librbd::journal::ClientData client_data{mirror_peer_client_meta}; + bufferlist client_data_bl; + encode(client_data, client_data_bl); + + Context *ctx = create_context_callback< + BootstrapRequest<I>, &BootstrapRequest<I>::handle_register_client>( + this); + m_journaler->register_client(client_data_bl, ctx); +} + +template <typename I> +void BootstrapRequest<I>::handle_register_client(int r) { + dout(15) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to register with remote journal: " << cpp_strerror(r) + << dendl; + m_ret_val = r; + close_remote_image(); + return; + } + + *m_client_state = cls::journal::CLIENT_STATE_CONNECTED; + *m_client_meta = librbd::journal::MirrorPeerClientMeta(); + m_client_meta->state = librbd::journal::MIRROR_PEER_STATE_REPLAYING; + + is_primary(); +} + +template <typename I> +void BootstrapRequest<I>::update_client_image() { + ceph_assert(m_local_image_id.empty()); + assert(m_local_image_id.empty()); + m_local_image_id = librbd::util::generate_image_id<I>(m_local_io_ctx); + + dout(15) << "local_image_id=" << m_local_image_id << dendl; + update_progress("UPDATE_CLIENT_IMAGE"); + + librbd::journal::MirrorPeerClientMeta client_meta{m_local_image_id}; + client_meta.state = librbd::journal::MIRROR_PEER_STATE_SYNCING; + + librbd::journal::ClientData client_data(client_meta); + bufferlist data_bl; + encode(client_data, data_bl); + + Context *ctx = create_context_callback< + BootstrapRequest<I>, &BootstrapRequest<I>::handle_update_client_image>( + this); + m_journaler->update_client(data_bl, ctx); +} + +template <typename I> +void BootstrapRequest<I>::handle_update_client_image(int r) { + dout(15) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to update client: " << cpp_strerror(r) << dendl; + m_ret_val = r; + close_remote_image(); + return; + } + + if (m_canceled) { + dout(10) << "request canceled" << dendl; + m_ret_val = -ECANCELED; + close_remote_image(); + return; + } + + *m_client_meta = {m_local_image_id}; + m_client_meta->state = librbd::journal::MIRROR_PEER_STATE_SYNCING; + create_local_image(); +} + +template <typename I> +void BootstrapRequest<I>::create_local_image() { + dout(15) << "local_image_id=" << m_local_image_id << dendl; + update_progress("CREATE_LOCAL_IMAGE"); + + m_remote_image_ctx->snap_lock.get_read(); + std::string image_name = m_remote_image_ctx->name; + m_remote_image_ctx->snap_lock.put_read(); + + Context *ctx = create_context_callback< + BootstrapRequest<I>, &BootstrapRequest<I>::handle_create_local_image>( + this); + CreateImageRequest<I> *request = CreateImageRequest<I>::create( + m_threads, m_local_io_ctx, m_global_image_id, m_remote_mirror_uuid, + image_name, m_local_image_id, m_remote_image_ctx, ctx); + request->send(); +} + +template <typename I> +void BootstrapRequest<I>::handle_create_local_image(int r) { + dout(15) << "r=" << r << dendl; + + if (r == -EBADF) { + dout(5) << "image id " << m_local_image_id << " already in-use" << dendl; + m_local_image_id = ""; + update_client_image(); + return; + } else if (r < 0) { + if (r == -ENOENT) { + dout(10) << "parent image does not exist" << dendl; + } else { + derr << "failed to create local image: " << cpp_strerror(r) << dendl; + } + m_ret_val = r; + close_remote_image(); + return; + } + + open_local_image(); +} + +template <typename I> +void BootstrapRequest<I>::get_remote_tags() { + if (m_client_meta->state == librbd::journal::MIRROR_PEER_STATE_SYNCING) { + // optimization -- no need to compare remote tags if we just created + // the image locally or sync was interrupted + image_sync(); + return; + } + + dout(15) << dendl; + update_progress("GET_REMOTE_TAGS"); + + Context *ctx = create_context_callback< + BootstrapRequest<I>, &BootstrapRequest<I>::handle_get_remote_tags>(this); + m_journaler->get_tags(m_remote_tag_class, &m_remote_tags, ctx); +} + +template <typename I> +void BootstrapRequest<I>::handle_get_remote_tags(int r) { + dout(15) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to retrieve remote tags: " << cpp_strerror(r) << dendl; + m_ret_val = r; + close_local_image(); + return; + } + + if (m_canceled) { + dout(10) << "request canceled" << dendl; + m_ret_val = -ECANCELED; + close_local_image(); + return; + } + + // At this point, the local image was existing, non-primary, and replaying; + // and the remote image is primary. Attempt to link the local image's most + // recent tag to the remote image's tag chain. + bool remote_tag_data_valid = false; + librbd::journal::TagData remote_tag_data; + boost::optional<uint64_t> remote_orphan_tag_tid = + boost::make_optional<uint64_t>(false, 0U); + bool reconnect_orphan = false; + + // decode the remote tags + for (auto &remote_tag : m_remote_tags) { + if (m_local_tag_data.predecessor.commit_valid && + m_local_tag_data.predecessor.mirror_uuid == m_remote_mirror_uuid && + m_local_tag_data.predecessor.tag_tid > remote_tag.tid) { + dout(15) << "skipping processed predecessor remote tag " + << remote_tag.tid << dendl; + continue; + } + + try { + auto it = remote_tag.data.cbegin(); + decode(remote_tag_data, it); + remote_tag_data_valid = true; + } catch (const buffer::error &err) { + derr << "failed to decode remote tag " << remote_tag.tid << ": " + << err.what() << dendl; + m_ret_val = -EBADMSG; + close_local_image(); + return; + } + + dout(10) << "decoded remote tag " << remote_tag.tid << ": " + << remote_tag_data << dendl; + + if (!m_local_tag_data.predecessor.commit_valid) { + // newly synced local image (no predecessor) replays from the first tag + if (remote_tag_data.mirror_uuid != librbd::Journal<>::LOCAL_MIRROR_UUID) { + dout(15) << "skipping non-primary remote tag" << dendl; + continue; + } + + dout(10) << "using initial primary remote tag" << dendl; + break; + } + + if (m_local_tag_data.mirror_uuid == librbd::Journal<>::ORPHAN_MIRROR_UUID) { + // demotion last available local epoch + + if (remote_tag_data.mirror_uuid == m_local_tag_data.mirror_uuid && + remote_tag_data.predecessor.commit_valid && + remote_tag_data.predecessor.tag_tid == + m_local_tag_data.predecessor.tag_tid) { + // demotion matches remote epoch + + if (remote_tag_data.predecessor.mirror_uuid == m_local_mirror_uuid && + m_local_tag_data.predecessor.mirror_uuid == + librbd::Journal<>::LOCAL_MIRROR_UUID) { + // local demoted and remote has matching event + dout(15) << "found matching local demotion tag" << dendl; + remote_orphan_tag_tid = remote_tag.tid; + continue; + } + + if (m_local_tag_data.predecessor.mirror_uuid == m_remote_mirror_uuid && + remote_tag_data.predecessor.mirror_uuid == + librbd::Journal<>::LOCAL_MIRROR_UUID) { + // remote demoted and local has matching event + dout(15) << "found matching remote demotion tag" << dendl; + remote_orphan_tag_tid = remote_tag.tid; + continue; + } + } + + if (remote_tag_data.mirror_uuid == librbd::Journal<>::LOCAL_MIRROR_UUID && + remote_tag_data.predecessor.mirror_uuid == librbd::Journal<>::ORPHAN_MIRROR_UUID && + remote_tag_data.predecessor.commit_valid && remote_orphan_tag_tid && + remote_tag_data.predecessor.tag_tid == *remote_orphan_tag_tid) { + // remote promotion tag chained to remote/local demotion tag + dout(15) << "found chained remote promotion tag" << dendl; + reconnect_orphan = true; + break; + } + + // promotion must follow demotion + remote_orphan_tag_tid = boost::none; + } + } + + if (remote_tag_data_valid && + m_local_tag_data.mirror_uuid == m_remote_mirror_uuid) { + dout(10) << "local image is in clean replay state" << dendl; + } else if (reconnect_orphan) { + dout(10) << "remote image was demoted/promoted" << dendl; + } else { + derr << "split-brain detected -- skipping image replay" << dendl; + m_ret_val = -EEXIST; + close_local_image(); + return; + } + + image_sync(); +} + +template <typename I> +void BootstrapRequest<I>::image_sync() { + if (m_client_meta->state == librbd::journal::MIRROR_PEER_STATE_REPLAYING) { + // clean replay state -- no image sync required + close_remote_image(); + return; + } + + { + Mutex::Locker locker(m_lock); + if (m_canceled) { + m_ret_val = -ECANCELED; + } else { + dout(15) << dendl; + ceph_assert(m_image_sync == nullptr); + + Context *ctx = create_context_callback< + BootstrapRequest<I>, &BootstrapRequest<I>::handle_image_sync>(this); + m_image_sync = ImageSync<I>::create( + *m_local_image_ctx, m_remote_image_ctx, m_threads->timer, + &m_threads->timer_lock, m_local_mirror_uuid, m_journaler, + m_client_meta, m_threads->work_queue, m_instance_watcher, ctx, + m_progress_ctx); + + m_image_sync->get(); + + m_lock.Unlock(); + update_progress("IMAGE_SYNC"); + m_lock.Lock(); + + m_image_sync->send(); + return; + } + } + + dout(10) << "request canceled" << dendl; + close_remote_image(); +} + +template <typename I> +void BootstrapRequest<I>::handle_image_sync(int r) { + dout(15) << "r=" << r << dendl; + + { + Mutex::Locker locker(m_lock); + m_image_sync->put(); + m_image_sync = nullptr; + + if (m_canceled) { + dout(10) << "request canceled" << dendl; + m_ret_val = -ECANCELED; + } + + if (r < 0) { + derr << "failed to sync remote image: " << cpp_strerror(r) << dendl; + m_ret_val = r; + } + } + + close_remote_image(); +} + +template <typename I> +void BootstrapRequest<I>::close_local_image() { + dout(15) << dendl; + + update_progress("CLOSE_LOCAL_IMAGE"); + + Context *ctx = create_context_callback< + BootstrapRequest<I>, &BootstrapRequest<I>::handle_close_local_image>( + this); + CloseImageRequest<I> *request = CloseImageRequest<I>::create( + m_local_image_ctx, ctx); + request->send(); +} + +template <typename I> +void BootstrapRequest<I>::handle_close_local_image(int r) { + dout(15) << "r=" << r << dendl; + + if (r < 0) { + derr << "error encountered closing local image: " << cpp_strerror(r) + << dendl; + } + + close_remote_image(); +} + +template <typename I> +void BootstrapRequest<I>::close_remote_image() { + dout(15) << dendl; + + update_progress("CLOSE_REMOTE_IMAGE"); + + Context *ctx = create_context_callback< + BootstrapRequest<I>, &BootstrapRequest<I>::handle_close_remote_image>( + this); + CloseImageRequest<I> *request = CloseImageRequest<I>::create( + &m_remote_image_ctx, ctx); + request->send(); +} + +template <typename I> +void BootstrapRequest<I>::handle_close_remote_image(int r) { + dout(15) << "r=" << r << dendl; + + if (r < 0) { + derr << "error encountered closing remote image: " << cpp_strerror(r) + << dendl; + } + + finish(m_ret_val); +} + +template <typename I> +void BootstrapRequest<I>::update_progress(const std::string &description) { + dout(15) << description << dendl; + + if (m_progress_ctx) { + m_progress_ctx->update_progress(description); + } +} + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::BootstrapRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/BootstrapRequest.h b/src/tools/rbd_mirror/image_replayer/BootstrapRequest.h new file mode 100644 index 00000000..ea9f8565 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/BootstrapRequest.h @@ -0,0 +1,230 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_BOOTSTRAP_REQUEST_H +#define RBD_MIRROR_IMAGE_REPLAYER_BOOTSTRAP_REQUEST_H + +#include "include/int_types.h" +#include "include/rados/librados.hpp" +#include "common/Mutex.h" +#include "cls/journal/cls_journal_types.h" +#include "librbd/journal/Types.h" +#include "librbd/journal/TypeTraits.h" +#include "tools/rbd_mirror/BaseRequest.h" +#include "tools/rbd_mirror/Types.h" +#include <list> +#include <string> + +class Context; +class ContextWQ; +class Mutex; +class SafeTimer; +namespace journal { class Journaler; } +namespace librbd { class ImageCtx; } +namespace librbd { namespace journal { struct MirrorPeerClientMeta; } } + +namespace rbd { +namespace mirror { + +class ProgressContext; + +template <typename> class ImageSync; +template <typename> class InstanceWatcher; +template <typename> struct Threads; + +namespace image_replayer { + +template <typename ImageCtxT = librbd::ImageCtx> +class BootstrapRequest : public BaseRequest { +public: + typedef librbd::journal::TypeTraits<ImageCtxT> TypeTraits; + typedef typename TypeTraits::Journaler Journaler; + typedef librbd::journal::MirrorPeerClientMeta MirrorPeerClientMeta; + typedef rbd::mirror::ProgressContext ProgressContext; + + static BootstrapRequest* create( + Threads<ImageCtxT>* threads, + librados::IoCtx &local_io_ctx, + librados::IoCtx &remote_io_ctx, + InstanceWatcher<ImageCtxT> *instance_watcher, + ImageCtxT **local_image_ctx, + const std::string &local_image_id, + const std::string &remote_image_id, + const std::string &global_image_id, + const std::string &local_mirror_uuid, + const std::string &remote_mirror_uuid, + Journaler *journaler, + cls::journal::ClientState *client_state, + MirrorPeerClientMeta *client_meta, + Context *on_finish, + bool *do_resync, + ProgressContext *progress_ctx = nullptr) { + return new BootstrapRequest(threads, local_io_ctx, remote_io_ctx, + instance_watcher, local_image_ctx, + local_image_id, remote_image_id, + global_image_id, local_mirror_uuid, + remote_mirror_uuid, journaler, client_state, + client_meta, on_finish, do_resync, + progress_ctx); + } + + BootstrapRequest(Threads<ImageCtxT>* threads, + librados::IoCtx &local_io_ctx, + librados::IoCtx &remote_io_ctx, + InstanceWatcher<ImageCtxT> *instance_watcher, + ImageCtxT **local_image_ctx, + const std::string &local_image_id, + const std::string &remote_image_id, + const std::string &global_image_id, + const std::string &local_mirror_uuid, + const std::string &remote_mirror_uuid, Journaler *journaler, + cls::journal::ClientState *client_state, + MirrorPeerClientMeta *client_meta, Context *on_finish, + bool *do_resync, ProgressContext *progress_ctx = nullptr); + ~BootstrapRequest() override; + + bool is_syncing() const; + + void send() override; + void cancel() override; + +private: + /** + * @verbatim + * + * <start> + * | + * v + * GET_REMOTE_TAG_CLASS * * * * * * * * * * * * * * * * * * + * | * (error) + * v * + * OPEN_REMOTE_IMAGE * * * * * * * * * * * * * * * * * * * + * | * + * |/--------------------------------------------------*---\ + * v * | + * IS_PRIMARY * * * * * * * * * * * * * * * * * * * * * * | + * | * * | + * | (remote image primary, no local image id) * * | + * \----> UPDATE_CLIENT_IMAGE * * * * * * * * * * * * | + * | | ^ * * | + * | | * (duplicate image id) * * | + * | v * * * | + * \----> CREATE_LOCAL_IMAGE * * * * * * * * * * * * * | + * | | * * | + * | v * * | + * | (remote image primary) * * | + * \----> OPEN_LOCAL_IMAGE * * * * * * * * * * * * * * | + * | | . * * | + * | | . (image doesn't exist) * * | + * | | . . > UNREGISTER_CLIENT * * * * * * * | + * | | | * * | + * | | v * * | + * | | REGISTER_CLIENT * * * * * * * * | + * | | | * * | + * | | \-----------------------*---*---/ + * | | * * + * | v (skip if not needed) * * + * | GET_REMOTE_TAGS * * * * * * * * * + * | | * * * + * | v (skip if not needed) v * * + * | IMAGE_SYNC * * * > CLOSE_LOCAL_IMAGE * * + * | | | * * + * | \-----------------\ /-----/ * * + * | | * * + * | | * * + * | (skip if not needed) | * * + * \----> UPDATE_CLIENT_STATE *|* * * * * * * * * * * + * | | * * + * /-----------/----------------/ * * + * | * * + * v * * + * CLOSE_REMOTE_IMAGE < * * * * * * * * * * * * * * * * * + * | * + * v * + * <finish> < * * * * * * * * * * * * * * * * * * * * * * * + * + * @endverbatim + */ + typedef std::list<cls::journal::Tag> Tags; + + Threads<ImageCtxT>* m_threads; + librados::IoCtx &m_local_io_ctx; + librados::IoCtx &m_remote_io_ctx; + InstanceWatcher<ImageCtxT> *m_instance_watcher; + ImageCtxT **m_local_image_ctx; + std::string m_local_image_id; + std::string m_remote_image_id; + std::string m_global_image_id; + std::string m_local_mirror_uuid; + std::string m_remote_mirror_uuid; + Journaler *m_journaler; + cls::journal::ClientState *m_client_state; + MirrorPeerClientMeta *m_client_meta; + ProgressContext *m_progress_ctx; + bool *m_do_resync; + + mutable Mutex m_lock; + bool m_canceled = false; + + Tags m_remote_tags; + cls::journal::Client m_client; + uint64_t m_remote_tag_class = 0; + ImageCtxT *m_remote_image_ctx = nullptr; + bool m_primary = false; + int m_ret_val = 0; + ImageSync<ImageCtxT> *m_image_sync = nullptr; + + uint64_t m_local_tag_tid = 0; + librbd::journal::TagData m_local_tag_data; + + bufferlist m_out_bl; + + void get_remote_tag_class(); + void handle_get_remote_tag_class(int r); + + void open_remote_image(); + void handle_open_remote_image(int r); + + void is_primary(); + void handle_is_primary(int r); + + void update_client_state(); + void handle_update_client_state(int r); + + void open_local_image(); + void handle_open_local_image(int r); + + void unregister_client(); + void handle_unregister_client(int r); + + void register_client(); + void handle_register_client(int r); + + void create_local_image(); + void handle_create_local_image(int r); + + void update_client_image(); + void handle_update_client_image(int r); + + void get_remote_tags(); + void handle_get_remote_tags(int r); + + void image_sync(); + void handle_image_sync(int r); + + void close_local_image(); + void handle_close_local_image(int r); + + void close_remote_image(); + void handle_close_remote_image(int r); + + void update_progress(const std::string &description); +}; + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::BootstrapRequest<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_REPLAYER_BOOTSTRAP_REQUEST_H diff --git a/src/tools/rbd_mirror/image_replayer/CloseImageRequest.cc b/src/tools/rbd_mirror/image_replayer/CloseImageRequest.cc new file mode 100644 index 00000000..5b754823 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/CloseImageRequest.cc @@ -0,0 +1,64 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "CloseImageRequest.h" +#include "common/debug.h" +#include "common/errno.h" +#include "common/WorkQueue.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/Utils.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::CloseImageRequest: " \ + << this << " " << __func__ + +namespace rbd { +namespace mirror { +namespace image_replayer { + +using librbd::util::create_context_callback; + +template <typename I> +CloseImageRequest<I>::CloseImageRequest(I **image_ctx, Context *on_finish) + : m_image_ctx(image_ctx), m_on_finish(on_finish) { +} + +template <typename I> +void CloseImageRequest<I>::send() { + close_image(); +} + +template <typename I> +void CloseImageRequest<I>::close_image() { + dout(20) << dendl; + + Context *ctx = create_context_callback< + CloseImageRequest<I>, &CloseImageRequest<I>::handle_close_image>(this); + (*m_image_ctx)->state->close(ctx); +} + +template <typename I> +void CloseImageRequest<I>::handle_close_image(int r) { + dout(20) << ": r=" << r << dendl; + + if (r < 0) { + derr << ": error encountered while closing image: " << cpp_strerror(r) + << dendl; + } + + delete *m_image_ctx; + *m_image_ctx = nullptr; + + m_on_finish->complete(0); + delete this; +} + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::CloseImageRequest<librbd::ImageCtx>; + diff --git a/src/tools/rbd_mirror/image_replayer/CloseImageRequest.h b/src/tools/rbd_mirror/image_replayer/CloseImageRequest.h new file mode 100644 index 00000000..02481369 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/CloseImageRequest.h @@ -0,0 +1,56 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_CLOSE_IMAGE_REQUEST_H +#define RBD_MIRROR_IMAGE_REPLAYER_CLOSE_IMAGE_REQUEST_H + +#include "include/int_types.h" +#include "librbd/ImageCtx.h" +#include <string> + +class Context; +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { +namespace image_replayer { + +template <typename ImageCtxT = librbd::ImageCtx> +class CloseImageRequest { +public: + static CloseImageRequest* create(ImageCtxT **image_ctx, Context *on_finish) { + return new CloseImageRequest(image_ctx, on_finish); + } + + CloseImageRequest(ImageCtxT **image_ctx, Context *on_finish); + + void send(); + +private: + /** + * @verbatim + * + * <start> + * | + * v + * CLOSE_IMAGE + * | + * v + * <finish> + * + * @endverbatim + */ + ImageCtxT **m_image_ctx; + Context *m_on_finish; + + void close_image(); + void handle_close_image(int r); +}; + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::CloseImageRequest<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_REPLAYER_CLOSE_IMAGE_REQUEST_H diff --git a/src/tools/rbd_mirror/image_replayer/CreateImageRequest.cc b/src/tools/rbd_mirror/image_replayer/CreateImageRequest.cc new file mode 100644 index 00000000..8d8236b2 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/CreateImageRequest.cc @@ -0,0 +1,506 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "CreateImageRequest.h" +#include "CloseImageRequest.h" +#include "OpenImageRequest.h" +#include "common/debug.h" +#include "common/errno.h" +#include "common/WorkQueue.h" +#include "cls/rbd/cls_rbd_client.h" +#include "journal/Journaler.h" +#include "journal/Settings.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/internal.h" +#include "librbd/Utils.h" +#include "librbd/image/CreateRequest.h" +#include "librbd/image/CloneRequest.h" +#include "librbd/journal/Types.h" +#include "tools/rbd_mirror/Threads.h" +#include "tools/rbd_mirror/image_replayer/Utils.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::CreateImageRequest: " \ + << this << " " << __func__ << ": " + +using librbd::util::create_async_context_callback; +using librbd::util::create_context_callback; +using librbd::util::create_rados_callback; + +namespace rbd { +namespace mirror { +namespace image_replayer { + +template <typename I> +CreateImageRequest<I>::CreateImageRequest(Threads<I>* threads, + librados::IoCtx &local_io_ctx, + const std::string &global_image_id, + const std::string &remote_mirror_uuid, + const std::string &local_image_name, + const std::string &local_image_id, + I *remote_image_ctx, + Context *on_finish) + : m_threads(threads), m_local_io_ctx(local_io_ctx), + m_global_image_id(global_image_id), + m_remote_mirror_uuid(remote_mirror_uuid), + m_local_image_name(local_image_name), m_local_image_id(local_image_id), + m_remote_image_ctx(remote_image_ctx), m_on_finish(on_finish) { +} + +template <typename I> +void CreateImageRequest<I>::send() { + int r = validate_parent(); + if (r < 0) { + error(r); + return; + } + + if (m_remote_parent_spec.pool_id == -1) { + create_image(); + } else { + get_local_parent_mirror_uuid(); + } +} + +template <typename I> +void CreateImageRequest<I>::create_image() { + dout(10) << dendl; + + using klass = CreateImageRequest<I>; + Context *ctx = create_context_callback< + klass, &klass::handle_create_image>(this); + + RWLock::RLocker snap_locker(m_remote_image_ctx->snap_lock); + + auto& config{ + reinterpret_cast<CephContext*>(m_local_io_ctx.cct())->_conf}; + + librbd::ImageOptions image_options; + populate_image_options(&image_options); + + auto req = librbd::image::CreateRequest<I>::create( + config, m_local_io_ctx, m_local_image_name, m_local_image_id, + m_remote_image_ctx->size, image_options, m_global_image_id, + m_remote_mirror_uuid, false, m_remote_image_ctx->op_work_queue, ctx); + req->send(); +} + +template <typename I> +void CreateImageRequest<I>::handle_create_image(int r) { + dout(10) << "r=" << r << dendl; + if (r == -EBADF) { + dout(5) << "image id " << m_local_image_id << " already in-use" << dendl; + finish(r); + return; + } else if (r < 0) { + derr << "failed to create local image: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + finish(0); +} + +template <typename I> +void CreateImageRequest<I>::get_local_parent_mirror_uuid() { + dout(10) << dendl; + + librados::ObjectReadOperation op; + librbd::cls_client::mirror_uuid_get_start(&op); + + librados::AioCompletion *aio_comp = create_rados_callback< + CreateImageRequest<I>, + &CreateImageRequest<I>::handle_get_local_parent_mirror_uuid>(this); + m_out_bl.clear(); + int r = m_local_parent_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op, + &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void CreateImageRequest<I>::handle_get_local_parent_mirror_uuid(int r) { + if (r >= 0) { + auto it = m_out_bl.cbegin(); + r = librbd::cls_client::mirror_uuid_get_finish( + &it, &m_local_parent_mirror_uuid); + if (r >= 0 && m_local_parent_mirror_uuid.empty()) { + r = -ENOENT; + } + } + + dout(10) << "r=" << r << dendl; + if (r < 0) { + if (r == -ENOENT) { + dout(5) << "local parent mirror uuid missing" << dendl; + } else { + derr << "failed to retrieve local parent mirror uuid: " << cpp_strerror(r) + << dendl; + } + finish(r); + return; + } + + dout(15) << "local_parent_mirror_uuid=" << m_local_parent_mirror_uuid + << dendl; + get_remote_parent_client_state(); +} + +template <typename I> +void CreateImageRequest<I>::get_remote_parent_client_state() { + dout(10) << dendl; + + m_remote_journaler = new Journaler(m_threads->work_queue, m_threads->timer, + &m_threads->timer_lock, + m_remote_parent_io_ctx, + m_remote_parent_spec.image_id, + m_local_parent_mirror_uuid, {}); + + Context *ctx = create_async_context_callback( + m_threads->work_queue, create_context_callback< + CreateImageRequest<I>, + &CreateImageRequest<I>::handle_get_remote_parent_client_state>(this)); + m_remote_journaler->get_client(m_local_parent_mirror_uuid, &m_client, ctx); +} + +template <typename I> +void CreateImageRequest<I>::handle_get_remote_parent_client_state(int r) { + dout(10) << "r=" << r << dendl; + + delete m_remote_journaler; + m_remote_journaler = nullptr; + + librbd::journal::MirrorPeerClientMeta mirror_peer_client_meta; + if (r == -ENOENT) { + dout(15) << "client not registered to parent image" << dendl; + finish(r); + return; + } else if (r < 0) { + derr << "failed to retrieve parent client: " << cpp_strerror(r) << dendl; + finish(r); + return; + } else if (!util::decode_client_meta(m_client, &mirror_peer_client_meta)) { + // require operator intervention since the data is corrupt + derr << "failed to decode parent client: " << cpp_strerror(r) << dendl; + finish(-EBADMSG); + return; + } else if (mirror_peer_client_meta.state != + librbd::journal::MIRROR_PEER_STATE_REPLAYING) { + // avoid possible race w/ incomplete parent image since the parent snapshot + // might be deleted if the sync restarts + dout(15) << "parent image still syncing" << dendl; + finish(-ENOENT); + return; + } + + get_parent_global_image_id(); +} + + +template <typename I> +void CreateImageRequest<I>::get_parent_global_image_id() { + dout(10) << dendl; + + librados::ObjectReadOperation op; + librbd::cls_client::mirror_image_get_start(&op, + m_remote_parent_spec.image_id); + + librados::AioCompletion *aio_comp = create_rados_callback< + CreateImageRequest<I>, + &CreateImageRequest<I>::handle_get_parent_global_image_id>(this); + m_out_bl.clear(); + int r = m_remote_parent_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op, + &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void CreateImageRequest<I>::handle_get_parent_global_image_id(int r) { + dout(10) << "r=" << r << dendl; + if (r == 0) { + cls::rbd::MirrorImage mirror_image; + auto iter = m_out_bl.cbegin(); + r = librbd::cls_client::mirror_image_get_finish(&iter, &mirror_image); + if (r == 0) { + m_parent_global_image_id = mirror_image.global_image_id; + dout(15) << "parent_global_image_id=" << m_parent_global_image_id + << dendl; + } + } + + if (r == -ENOENT) { + dout(10) << "parent image " << m_remote_parent_spec.image_id + << " not mirrored" << dendl; + finish(r); + return; + } else if (r < 0) { + derr << "failed to retrieve global image id for parent image " + << m_remote_parent_spec.image_id << ": " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + get_local_parent_image_id(); +} + +template <typename I> +void CreateImageRequest<I>::get_local_parent_image_id() { + dout(10) << dendl; + + librados::ObjectReadOperation op; + librbd::cls_client::mirror_image_get_image_id_start( + &op, m_parent_global_image_id); + + librados::AioCompletion *aio_comp = create_rados_callback< + CreateImageRequest<I>, + &CreateImageRequest<I>::handle_get_local_parent_image_id>(this); + m_out_bl.clear(); + int r = m_local_parent_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op, + &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void CreateImageRequest<I>::handle_get_local_parent_image_id(int r) { + dout(10) << "r=" << r << dendl; + + if (r == 0) { + auto iter = m_out_bl.cbegin(); + r = librbd::cls_client::mirror_image_get_image_id_finish( + &iter, &m_local_parent_spec.image_id); + } + + if (r == -ENOENT) { + dout(10) << "parent image " << m_parent_global_image_id << " not " + << "registered locally" << dendl; + finish(r); + return; + } else if (r < 0) { + derr << "failed to retrieve local image id for parent image " + << m_parent_global_image_id << ": " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + open_remote_parent_image(); +} + +template <typename I> +void CreateImageRequest<I>::open_remote_parent_image() { + dout(10) << dendl; + + Context *ctx = create_context_callback< + CreateImageRequest<I>, + &CreateImageRequest<I>::handle_open_remote_parent_image>(this); + OpenImageRequest<I> *request = OpenImageRequest<I>::create( + m_remote_parent_io_ctx, &m_remote_parent_image_ctx, + m_remote_parent_spec.image_id, true, ctx); + request->send(); +} + +template <typename I> +void CreateImageRequest<I>::handle_open_remote_parent_image(int r) { + dout(10) << "r=" << r << dendl; + if (r < 0) { + derr << "failed to open remote parent image " << m_parent_pool_name << "/" + << m_remote_parent_spec.image_id << dendl; + finish(r); + return; + } + + clone_image(); +} + +template <typename I> +void CreateImageRequest<I>::clone_image() { + dout(10) << dendl; + + std::string snap_name; + cls::rbd::SnapshotNamespace snap_namespace; + { + RWLock::RLocker remote_snap_locker(m_remote_parent_image_ctx->snap_lock); + auto it = m_remote_parent_image_ctx->snap_info.find( + m_remote_parent_spec.snap_id); + if (it != m_remote_parent_image_ctx->snap_info.end()) { + snap_name = it->second.name; + snap_namespace = it->second.snap_namespace; + } + } + + librbd::ImageOptions opts; + populate_image_options(&opts); + + auto& config{ + reinterpret_cast<CephContext*>(m_local_io_ctx.cct())->_conf}; + + using klass = CreateImageRequest<I>; + Context *ctx = create_context_callback< + klass, &klass::handle_clone_image>(this); + + librbd::image::CloneRequest<I> *req = librbd::image::CloneRequest<I>::create( + config, m_local_parent_io_ctx, m_local_parent_spec.image_id, snap_name, + CEPH_NOSNAP, m_local_io_ctx, m_local_image_name, m_local_image_id, opts, + m_global_image_id, m_remote_mirror_uuid, m_remote_image_ctx->op_work_queue, + ctx); + req->send(); +} + +template <typename I> +void CreateImageRequest<I>::handle_clone_image(int r) { + dout(10) << "r=" << r << dendl; + if (r == -EBADF) { + dout(5) << "image id " << m_local_image_id << " already in-use" << dendl; + finish(r); + return; + } else if (r < 0) { + derr << "failed to clone image " << m_parent_pool_name << "/" + << m_remote_parent_spec.image_id << " to " + << m_local_image_name << dendl; + m_ret_val = r; + } + + close_remote_parent_image(); +} + +template <typename I> +void CreateImageRequest<I>::close_remote_parent_image() { + dout(10) << dendl; + Context *ctx = create_context_callback< + CreateImageRequest<I>, + &CreateImageRequest<I>::handle_close_remote_parent_image>(this); + CloseImageRequest<I> *request = CloseImageRequest<I>::create( + &m_remote_parent_image_ctx, ctx); + request->send(); +} + +template <typename I> +void CreateImageRequest<I>::handle_close_remote_parent_image(int r) { + dout(10) << "r=" << r << dendl; + if (r < 0) { + derr << "error encountered closing remote parent image: " + << cpp_strerror(r) << dendl; + } + + finish(m_ret_val); +} + +template <typename I> +void CreateImageRequest<I>::error(int r) { + dout(10) << "r=" << r << dendl; + + m_threads->work_queue->queue(create_context_callback< + CreateImageRequest<I>, &CreateImageRequest<I>::finish>(this), r); +} + +template <typename I> +void CreateImageRequest<I>::finish(int r) { + dout(10) << "r=" << r << dendl; + m_on_finish->complete(r); + delete this; +} + +template <typename I> +int CreateImageRequest<I>::validate_parent() { + RWLock::RLocker owner_locker(m_remote_image_ctx->owner_lock); + RWLock::RLocker snap_locker(m_remote_image_ctx->snap_lock); + + m_remote_parent_spec = m_remote_image_ctx->parent_md.spec; + + // scan all remote snapshots for a linked parent + for (auto &snap_info_pair : m_remote_image_ctx->snap_info) { + auto &parent_spec = snap_info_pair.second.parent.spec; + if (parent_spec.pool_id == -1) { + continue; + } else if (m_remote_parent_spec.pool_id == -1) { + m_remote_parent_spec = parent_spec; + continue; + } + + if (m_remote_parent_spec != parent_spec) { + derr << "remote image parent spec mismatch" << dendl; + return -EINVAL; + } + } + + if (m_remote_parent_spec.pool_id == -1) { + return 0; + } + + // map remote parent pool to local parent pool + librados::Rados remote_rados(m_remote_image_ctx->md_ctx); + int r = remote_rados.ioctx_create2(m_remote_parent_spec.pool_id, + m_remote_parent_io_ctx); + if (r < 0) { + derr << "failed to open remote parent pool " << m_remote_parent_spec.pool_id + << ": " << cpp_strerror(r) << dendl; + return r; + } + + m_parent_pool_name = m_remote_parent_io_ctx.get_pool_name(); + + librados::Rados local_rados(m_local_io_ctx); + r = local_rados.ioctx_create(m_parent_pool_name.c_str(), + m_local_parent_io_ctx); + if (r < 0) { + derr << "failed to open local parent pool " << m_parent_pool_name << ": " + << cpp_strerror(r) << dendl; + return r; + } + + return 0; +} + +template <typename I> +void CreateImageRequest<I>::populate_image_options( + librbd::ImageOptions* image_options) { + image_options->set(RBD_IMAGE_OPTION_FEATURES, + m_remote_image_ctx->features); + image_options->set(RBD_IMAGE_OPTION_ORDER, m_remote_image_ctx->order); + image_options->set(RBD_IMAGE_OPTION_STRIPE_UNIT, + m_remote_image_ctx->stripe_unit); + image_options->set(RBD_IMAGE_OPTION_STRIPE_COUNT, + m_remote_image_ctx->stripe_count); + + // Determine the data pool for the local image as follows: + // 1. If the local pool has a default data pool, use it. + // 2. If the remote image has a data pool different from its metadata pool and + // a pool with the same name exists locally, use it. + // 3. Don't set the data pool explicitly. + std::string data_pool; + librados::Rados local_rados(m_local_io_ctx); + auto default_data_pool = g_ceph_context->_conf.get_val<std::string>("rbd_default_data_pool"); + auto remote_md_pool = m_remote_image_ctx->md_ctx.get_pool_name(); + auto remote_data_pool = m_remote_image_ctx->data_ctx.get_pool_name(); + + if (default_data_pool != "") { + data_pool = default_data_pool; + } else if (remote_data_pool != remote_md_pool) { + if (local_rados.pool_lookup(remote_data_pool.c_str()) >= 0) { + data_pool = remote_data_pool; + } + } + + if (data_pool != "") { + image_options->set(RBD_IMAGE_OPTION_DATA_POOL, data_pool); + } + + if (m_remote_parent_spec.pool_id != -1) { + uint64_t clone_format = 1; + if (m_remote_image_ctx->test_op_features( + RBD_OPERATION_FEATURE_CLONE_CHILD)) { + clone_format = 2; + } + image_options->set(RBD_IMAGE_OPTION_CLONE_FORMAT, clone_format); + } +} + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::CreateImageRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/CreateImageRequest.h b/src/tools/rbd_mirror/image_replayer/CreateImageRequest.h new file mode 100644 index 00000000..0b20da52 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/CreateImageRequest.h @@ -0,0 +1,154 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_CREATE_IMAGE_REQUEST_H +#define RBD_MIRROR_IMAGE_REPLAYER_CREATE_IMAGE_REQUEST_H + +#include "include/int_types.h" +#include "include/types.h" +#include "include/rados/librados.hpp" +#include "cls/journal/cls_journal_types.h" +#include "librbd/Types.h" +#include "librbd/journal/TypeTraits.h" +#include <string> + +class Context; +class ContextWQ; +namespace journal { class Journaler; } +namespace librbd { class ImageCtx; } +namespace librbd { class ImageOptions; } + +namespace rbd { +namespace mirror { + +template <typename> struct Threads; + +namespace image_replayer { + +template <typename ImageCtxT = librbd::ImageCtx> +class CreateImageRequest { +public: + static CreateImageRequest *create(Threads<ImageCtxT> *threads, + librados::IoCtx &local_io_ctx, + const std::string &global_image_id, + const std::string &remote_mirror_uuid, + const std::string &local_image_name, + const std::string &local_image_id, + ImageCtxT *remote_image_ctx, + Context *on_finish) { + return new CreateImageRequest(threads, local_io_ctx, global_image_id, + remote_mirror_uuid, local_image_name, + local_image_id, remote_image_ctx, on_finish); + } + + CreateImageRequest(Threads<ImageCtxT> *threads, librados::IoCtx &local_io_ctx, + const std::string &global_image_id, + const std::string &remote_mirror_uuid, + const std::string &local_image_name, + const std::string &local_image_id, + ImageCtxT *remote_image_ctx, + Context *on_finish); + + void send(); + +private: + /** + * @verbatim + * + * <start> * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * | * + * | (non-clone) * + * |\------------> CREATE_IMAGE ---------------------\ * (error) + * | | * + * | (clone) | * + * \-------------> GET_LOCAL_PARENT_MIRROR_UUID * * | * * * * + * | | * * + * v | * + * GET_REMOTE_PARENT_CLIENT_STATE * | * * * * + * | | * * + * v | * + * GET_PARENT_GLOBAL_IMAGE_ID * * * | * * * * + * | | * * + * v | * + * GET_LOCAL_PARENT_IMAGE_ID * * * * | * * * * + * | | * * + * v | * + * OPEN_REMOTE_PARENT * * * * * * * | * * * * + * | | * * + * v | * + * CLONE_IMAGE | * + * | | * + * v | * + * CLOSE_REMOTE_PARENT | * + * | v * + * \------------------------> <finish> < * * + * @endverbatim + */ + + typedef librbd::journal::TypeTraits<ImageCtxT> TypeTraits; + typedef typename TypeTraits::Journaler Journaler; + + Threads<ImageCtxT> *m_threads; + librados::IoCtx &m_local_io_ctx; + std::string m_global_image_id; + std::string m_remote_mirror_uuid; + std::string m_local_image_name; + std::string m_local_image_id; + ImageCtxT *m_remote_image_ctx; + Context *m_on_finish; + + librados::IoCtx m_remote_parent_io_ctx; + std::string m_local_parent_mirror_uuid; + Journaler *m_remote_journaler = nullptr; + ImageCtxT *m_remote_parent_image_ctx = nullptr; + cls::rbd::ParentImageSpec m_remote_parent_spec; + + librados::IoCtx m_local_parent_io_ctx; + cls::rbd::ParentImageSpec m_local_parent_spec; + + bufferlist m_out_bl; + std::string m_parent_global_image_id; + std::string m_parent_pool_name; + cls::journal::Client m_client; + int m_ret_val = 0; + + void create_image(); + void handle_create_image(int r); + + void get_local_parent_mirror_uuid(); + void handle_get_local_parent_mirror_uuid(int r); + + void get_remote_parent_client_state(); + void handle_get_remote_parent_client_state(int r); + + void get_parent_global_image_id(); + void handle_get_parent_global_image_id(int r); + + void get_local_parent_image_id(); + void handle_get_local_parent_image_id(int r); + + void open_remote_parent_image(); + void handle_open_remote_parent_image(int r); + + void clone_image(); + void handle_clone_image(int r); + + void close_remote_parent_image(); + void handle_close_remote_parent_image(int r); + + void error(int r); + void finish(int r); + + int validate_parent(); + + void populate_image_options(librbd::ImageOptions* image_options); + +}; + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::CreateImageRequest<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_REPLAYER_CREATE_IMAGE_REQUEST_H diff --git a/src/tools/rbd_mirror/image_replayer/EventPreprocessor.cc b/src/tools/rbd_mirror/image_replayer/EventPreprocessor.cc new file mode 100644 index 00000000..6314eb7d --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/EventPreprocessor.cc @@ -0,0 +1,204 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "EventPreprocessor.h" +#include "common/debug.h" +#include "common/dout.h" +#include "common/errno.h" +#include "common/WorkQueue.h" +#include "journal/Journaler.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/Utils.h" +#include "librbd/journal/Types.h" +#include <boost/variant.hpp> + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror + +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::EventPreprocessor: " \ + << this << " " << __func__ + +namespace rbd { +namespace mirror { +namespace image_replayer { + +using librbd::util::create_context_callback; + +template <typename I> +EventPreprocessor<I>::EventPreprocessor(I &local_image_ctx, + Journaler &remote_journaler, + const std::string &local_mirror_uuid, + MirrorPeerClientMeta *client_meta, + ContextWQ *work_queue) + : m_local_image_ctx(local_image_ctx), m_remote_journaler(remote_journaler), + m_local_mirror_uuid(local_mirror_uuid), m_client_meta(client_meta), + m_work_queue(work_queue) { +} + +template <typename I> +EventPreprocessor<I>::~EventPreprocessor() { + ceph_assert(!m_in_progress); +} + +template <typename I> +bool EventPreprocessor<I>::is_required(const EventEntry &event_entry) { + SnapSeqs snap_seqs(m_client_meta->snap_seqs); + return (prune_snap_map(&snap_seqs) || + event_entry.get_event_type() == + librbd::journal::EVENT_TYPE_SNAP_RENAME); +} + +template <typename I> +void EventPreprocessor<I>::preprocess(EventEntry *event_entry, + Context *on_finish) { + ceph_assert(!m_in_progress); + m_in_progress = true; + m_event_entry = event_entry; + m_on_finish = on_finish; + + refresh_image(); +} + +template <typename I> +void EventPreprocessor<I>::refresh_image() { + dout(20) << dendl; + + Context *ctx = create_context_callback< + EventPreprocessor<I>, &EventPreprocessor<I>::handle_refresh_image>(this); + m_local_image_ctx.state->refresh(ctx); +} + +template <typename I> +void EventPreprocessor<I>::handle_refresh_image(int r) { + dout(20) << ": r=" << r << dendl; + + if (r < 0) { + derr << "error encountered during image refresh: " << cpp_strerror(r) + << dendl; + finish(r); + return; + } + + preprocess_event(); +} + +template <typename I> +void EventPreprocessor<I>::preprocess_event() { + dout(20) << dendl; + + m_snap_seqs = m_client_meta->snap_seqs; + m_snap_seqs_updated = prune_snap_map(&m_snap_seqs); + + int r = boost::apply_visitor(PreprocessEventVisitor(this), + m_event_entry->event); + if (r < 0) { + finish(r); + return; + } + + update_client(); +} + +template <typename I> +int EventPreprocessor<I>::preprocess_snap_rename( + librbd::journal::SnapRenameEvent &event) { + dout(20) << ": " + << "remote_snap_id=" << event.snap_id << ", " + << "src_snap_name=" << event.src_snap_name << ", " + << "dest_snap_name=" << event.dst_snap_name << dendl; + + auto snap_seq_it = m_snap_seqs.find(event.snap_id); + if (snap_seq_it != m_snap_seqs.end()) { + dout(20) << ": remapping remote snap id " << snap_seq_it->first << " " + << "to local snap id " << snap_seq_it->second << dendl; + event.snap_id = snap_seq_it->second; + return 0; + } + + auto snap_id_it = m_local_image_ctx.snap_ids.find({cls::rbd::UserSnapshotNamespace(), + event.src_snap_name}); + if (snap_id_it == m_local_image_ctx.snap_ids.end()) { + dout(20) << ": cannot map remote snapshot '" << event.src_snap_name << "' " + << "to local snapshot" << dendl; + event.snap_id = CEPH_NOSNAP; + return -ENOENT; + } + + dout(20) << ": mapping remote snap id " << event.snap_id << " " + << "to local snap id " << snap_id_it->second << dendl; + m_snap_seqs_updated = true; + m_snap_seqs[event.snap_id] = snap_id_it->second; + event.snap_id = snap_id_it->second; + return 0; +} + +template <typename I> +void EventPreprocessor<I>::update_client() { + if (!m_snap_seqs_updated) { + finish(0); + return; + } + + dout(20) << dendl; + librbd::journal::MirrorPeerClientMeta client_meta(*m_client_meta); + client_meta.snap_seqs = m_snap_seqs; + + librbd::journal::ClientData client_data(client_meta); + bufferlist data_bl; + encode(client_data, data_bl); + + Context *ctx = create_context_callback< + EventPreprocessor<I>, &EventPreprocessor<I>::handle_update_client>( + this); + m_remote_journaler.update_client(data_bl, ctx); +} + +template <typename I> +void EventPreprocessor<I>::handle_update_client(int r) { + dout(20) << ": r=" << r << dendl; + + if (r < 0) { + derr << "failed to update mirror peer journal client: " + << cpp_strerror(r) << dendl; + finish(r); + return; + } + + m_client_meta->snap_seqs = m_snap_seqs; + finish(0); +} + +template <typename I> +bool EventPreprocessor<I>::prune_snap_map(SnapSeqs *snap_seqs) { + bool pruned = false; + + RWLock::RLocker snap_locker(m_local_image_ctx.snap_lock); + for (auto it = snap_seqs->begin(); it != snap_seqs->end(); ) { + auto current_it(it++); + if (m_local_image_ctx.snap_info.count(current_it->second) == 0) { + snap_seqs->erase(current_it); + pruned = true; + } + } + return pruned; +} + +template <typename I> +void EventPreprocessor<I>::finish(int r) { + dout(20) << ": r=" << r << dendl; + + Context *on_finish = m_on_finish; + m_on_finish = nullptr; + m_event_entry = nullptr; + m_in_progress = false; + m_snap_seqs_updated = false; + m_work_queue->queue(on_finish, r); +} + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::EventPreprocessor<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/EventPreprocessor.h b/src/tools/rbd_mirror/image_replayer/EventPreprocessor.h new file mode 100644 index 00000000..67aeea0b --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/EventPreprocessor.h @@ -0,0 +1,122 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_EVENT_PREPROCESSOR_H +#define RBD_MIRROR_IMAGE_REPLAYER_EVENT_PREPROCESSOR_H + +#include "include/int_types.h" +#include "librbd/journal/Types.h" +#include "librbd/journal/TypeTraits.h" +#include <map> +#include <string> +#include <boost/variant/static_visitor.hpp> + +struct Context; +struct ContextWQ; +namespace journal { class Journaler; } +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { +namespace image_replayer { + +template <typename ImageCtxT = librbd::ImageCtx> +class EventPreprocessor { +public: + using Journaler = typename librbd::journal::TypeTraits<ImageCtxT>::Journaler; + using EventEntry = librbd::journal::EventEntry; + using MirrorPeerClientMeta = librbd::journal::MirrorPeerClientMeta; + + static EventPreprocessor *create(ImageCtxT &local_image_ctx, + Journaler &remote_journaler, + const std::string &local_mirror_uuid, + MirrorPeerClientMeta *client_meta, + ContextWQ *work_queue) { + return new EventPreprocessor(local_image_ctx, remote_journaler, + local_mirror_uuid, client_meta, work_queue); + } + + static void destroy(EventPreprocessor* processor) { + delete processor; + } + + EventPreprocessor(ImageCtxT &local_image_ctx, Journaler &remote_journaler, + const std::string &local_mirror_uuid, + MirrorPeerClientMeta *client_meta, ContextWQ *work_queue); + ~EventPreprocessor(); + + bool is_required(const EventEntry &event_entry); + void preprocess(EventEntry *event_entry, Context *on_finish); + +private: + /** + * @verbatim + * + * <start> + * | + * v (skip if not required) + * REFRESH_IMAGE + * | + * v (skip if not required) + * PREPROCESS_EVENT + * | + * v (skip if not required) + * UPDATE_CLIENT + * + * @endverbatim + */ + + typedef std::map<uint64_t, uint64_t> SnapSeqs; + + class PreprocessEventVisitor : public boost::static_visitor<int> { + public: + EventPreprocessor *event_preprocessor; + + PreprocessEventVisitor(EventPreprocessor *event_preprocessor) + : event_preprocessor(event_preprocessor) { + } + + template <typename T> + inline int operator()(T&) const { + return 0; + } + inline int operator()(librbd::journal::SnapRenameEvent &event) const { + return event_preprocessor->preprocess_snap_rename(event); + } + }; + + ImageCtxT &m_local_image_ctx; + Journaler &m_remote_journaler; + std::string m_local_mirror_uuid; + MirrorPeerClientMeta *m_client_meta; + ContextWQ *m_work_queue; + + bool m_in_progress = false; + EventEntry *m_event_entry = nullptr; + Context *m_on_finish = nullptr; + + SnapSeqs m_snap_seqs; + bool m_snap_seqs_updated = false; + + bool prune_snap_map(SnapSeqs *snap_seqs); + + void refresh_image(); + void handle_refresh_image(int r); + + void preprocess_event(); + int preprocess_snap_rename(librbd::journal::SnapRenameEvent &event); + + void update_client(); + void handle_update_client(int r); + + void finish(int r); + +}; + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::EventPreprocessor<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_REPLAYER_EVENT_PREPROCESSOR_H diff --git a/src/tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.cc b/src/tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.cc new file mode 100644 index 00000000..74e97537 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.cc @@ -0,0 +1,85 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.h" +#include "include/rados/librados.hpp" +#include "cls/rbd/cls_rbd_client.h" +#include "common/debug.h" +#include "common/errno.h" +#include "librbd/ImageCtx.h" +#include "librbd/Utils.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::" \ + << "GetMirrorImageIdRequest: " << this << " " \ + << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace image_replayer { + +using librbd::util::create_rados_callback; + +template <typename I> +void GetMirrorImageIdRequest<I>::send() { + dout(20) << dendl; + get_image_id(); +} + +template <typename I> +void GetMirrorImageIdRequest<I>::get_image_id() { + dout(20) << dendl; + + // attempt to cross-reference a image id by the global image id + librados::ObjectReadOperation op; + librbd::cls_client::mirror_image_get_image_id_start(&op, m_global_image_id); + + librados::AioCompletion *aio_comp = create_rados_callback< + GetMirrorImageIdRequest<I>, + &GetMirrorImageIdRequest<I>::handle_get_image_id>( + this); + int r = m_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op, &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void GetMirrorImageIdRequest<I>::handle_get_image_id(int r) { + if (r == 0) { + auto iter = m_out_bl.cbegin(); + r = librbd::cls_client::mirror_image_get_image_id_finish( + &iter, m_image_id); + } + + dout(20) << "r=" << r << ", " + << "image_id=" << *m_image_id << dendl; + + if (r < 0) { + if (r == -ENOENT) { + dout(10) << "global image " << m_global_image_id << " not registered" + << dendl; + } else { + derr << "failed to retrieve image id: " << cpp_strerror(r) << dendl; + } + finish(r); + return; + } + + finish(0); +} + +template <typename I> +void GetMirrorImageIdRequest<I>::finish(int r) { + dout(20) << "r=" << r << dendl; + + m_on_finish->complete(r); + delete this; +} + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::GetMirrorImageIdRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.h b/src/tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.h new file mode 100644 index 00000000..b2664513 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.h @@ -0,0 +1,75 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_GET_MIRROR_IMAGE_ID_REQUEST_H +#define RBD_MIRROR_IMAGE_REPLAYER_GET_MIRROR_IMAGE_ID_REQUEST_H + +#include "include/buffer.h" +#include "include/rados/librados_fwd.hpp" +#include <string> + +namespace librbd { struct ImageCtx; } + +struct Context; + +namespace rbd { +namespace mirror { +namespace image_replayer { + +template <typename ImageCtxT = librbd::ImageCtx> +class GetMirrorImageIdRequest { +public: + static GetMirrorImageIdRequest *create(librados::IoCtx &io_ctx, + const std::string &global_image_id, + std::string *image_id, + Context *on_finish) { + return new GetMirrorImageIdRequest(io_ctx, global_image_id, image_id, + on_finish); + } + + GetMirrorImageIdRequest(librados::IoCtx &io_ctx, + const std::string &global_image_id, + std::string *image_id, + Context *on_finish) + : m_io_ctx(io_ctx), m_global_image_id(global_image_id), + m_image_id(image_id), m_on_finish(on_finish) { + } + + void send(); + +private: + /** + * @verbatim + * + * <start> + * | + * v + * GET_IMAGE_ID + * | + * v + * <finish> + + * @endverbatim + */ + + librados::IoCtx &m_io_ctx; + std::string m_global_image_id; + std::string *m_image_id; + Context *m_on_finish; + + bufferlist m_out_bl; + + void get_image_id(); + void handle_get_image_id(int r); + + void finish(int r); + +}; + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::GetMirrorImageIdRequest<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_REPLAYER_GET_MIRROR_IMAGE_ID_REQUEST_H diff --git a/src/tools/rbd_mirror/image_replayer/IsPrimaryRequest.cc b/src/tools/rbd_mirror/image_replayer/IsPrimaryRequest.cc new file mode 100644 index 00000000..54636fdb --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/IsPrimaryRequest.cc @@ -0,0 +1,125 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "IsPrimaryRequest.h" +#include "common/debug.h" +#include "common/errno.h" +#include "common/WorkQueue.h" +#include "cls/rbd/cls_rbd_client.h" +#include "librbd/ImageCtx.h" +#include "librbd/Journal.h" +#include "librbd/Utils.h" +#include <type_traits> + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::IsPrimaryRequest: " \ + << this << " " << __func__ << " " + +namespace rbd { +namespace mirror { +namespace image_replayer { + +using librbd::util::create_context_callback; +using librbd::util::create_rados_callback; + +template <typename I> +IsPrimaryRequest<I>::IsPrimaryRequest(I *image_ctx, bool *primary, + Context *on_finish) + : m_image_ctx(image_ctx), m_primary(primary), m_on_finish(on_finish) { +} + +template <typename I> +void IsPrimaryRequest<I>::send() { + send_get_mirror_state(); +} + +template <typename I> +void IsPrimaryRequest<I>::send_get_mirror_state() { + dout(20) << dendl; + + librados::ObjectReadOperation op; + librbd::cls_client::mirror_image_get_start(&op, m_image_ctx->id); + + librados::AioCompletion *aio_comp = create_rados_callback< + IsPrimaryRequest<I>, &IsPrimaryRequest<I>::handle_get_mirror_state>(this); + int r = m_image_ctx->md_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op, + &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void IsPrimaryRequest<I>::handle_get_mirror_state(int r) { + dout(20) << ": r=" << r << dendl; + + cls::rbd::MirrorImage mirror_image; + if (r == 0) { + auto iter = m_out_bl.cbegin(); + r = librbd::cls_client::mirror_image_get_finish(&iter, &mirror_image); + if (r == 0) { + if (mirror_image.state == cls::rbd::MIRROR_IMAGE_STATE_ENABLED) { + send_is_tag_owner(); + return; + } else if (mirror_image.state == cls::rbd::MIRROR_IMAGE_STATE_DISABLING) { + dout(5) << ": image mirroring is being disabled" << dendl; + r = -ENOENT; + } else { + derr << ": image mirroring is disabled" << dendl; + r = -EINVAL; + } + } else { + derr << ": failed to decode image mirror state: " << cpp_strerror(r) + << dendl; + } + } else if (r == -ENOENT) { + dout(5) << ": image is not mirrored" << dendl; + } else { + derr << ": failed to retrieve image mirror state: " << cpp_strerror(r) + << dendl; + } + + finish(r); +} + +template <typename I> +void IsPrimaryRequest<I>::send_is_tag_owner() { + // deduce the class type for the journal to support unit tests + using Journal = typename std::decay< + typename std::remove_pointer<decltype(std::declval<I>().journal)> + ::type>::type; + + dout(20) << dendl; + + Context *ctx = create_context_callback< + IsPrimaryRequest<I>, &IsPrimaryRequest<I>::handle_is_tag_owner>(this); + + Journal::is_tag_owner(m_image_ctx, m_primary, ctx); +} + +template <typename I> +void IsPrimaryRequest<I>::handle_is_tag_owner(int r) { + dout(20) << ": r=" << r << dendl; + + if (r < 0) { + derr << ": failed to query remote image tag owner: " << cpp_strerror(r) + << dendl; + } + + finish(r); +} + +template <typename I> +void IsPrimaryRequest<I>::finish(int r) { + dout(20) << ": r=" << r << dendl; + + m_on_finish->complete(r); + delete this; +} + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::IsPrimaryRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/IsPrimaryRequest.h b/src/tools/rbd_mirror/image_replayer/IsPrimaryRequest.h new file mode 100644 index 00000000..ddb332cb --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/IsPrimaryRequest.h @@ -0,0 +1,67 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_IS_PRIMARY_REQUEST_H +#define RBD_MIRROR_IMAGE_REPLAYER_IS_PRIMARY_REQUEST_H + +#include "include/buffer.h" + +class Context; +class ContextWQ; +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { +namespace image_replayer { + +template <typename ImageCtxT = librbd::ImageCtx> +class IsPrimaryRequest { +public: + static IsPrimaryRequest* create(ImageCtxT *image_ctx, bool *primary, + Context *on_finish) { + return new IsPrimaryRequest(image_ctx, primary, on_finish); + } + + IsPrimaryRequest(ImageCtxT *image_ctx, bool *primary, Context *on_finish); + + void send(); + +private: + /** + * @verbatim + * + * <start> + * | + * v + * GET_MIRROR_STATE * * * * * + * | * + * v * + * IS_TAG_OWNER * * * * * * * (error) + * | * + * v * + * <finish> < * * * * * * * * + * + * @endverbatim + */ + ImageCtxT *m_image_ctx; + bool *m_primary; + Context *m_on_finish; + + bufferlist m_out_bl; + + void send_get_mirror_state(); + void handle_get_mirror_state(int r); + + void send_is_tag_owner(); + void handle_is_tag_owner(int r); + + void finish(int r); +}; + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::IsPrimaryRequest<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_REPLAYER_IS_PRIMARY_REQUEST_H diff --git a/src/tools/rbd_mirror/image_replayer/OpenImageRequest.cc b/src/tools/rbd_mirror/image_replayer/OpenImageRequest.cc new file mode 100644 index 00000000..7f55745e --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/OpenImageRequest.cc @@ -0,0 +1,75 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "OpenImageRequest.h" +#include "common/debug.h" +#include "common/errno.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/Utils.h" +#include <type_traits> + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::OpenImageRequest: " \ + << this << " " << __func__ << " " + +namespace rbd { +namespace mirror { +namespace image_replayer { + +using librbd::util::create_context_callback; + +template <typename I> +OpenImageRequest<I>::OpenImageRequest(librados::IoCtx &io_ctx, I **image_ctx, + const std::string &image_id, + bool read_only, Context *on_finish) + : m_io_ctx(io_ctx), m_image_ctx(image_ctx), m_image_id(image_id), + m_read_only(read_only), m_on_finish(on_finish) { +} + +template <typename I> +void OpenImageRequest<I>::send() { + send_open_image(); +} + +template <typename I> +void OpenImageRequest<I>::send_open_image() { + dout(20) << dendl; + + *m_image_ctx = I::create("", m_image_id, nullptr, m_io_ctx, m_read_only); + + Context *ctx = create_context_callback< + OpenImageRequest<I>, &OpenImageRequest<I>::handle_open_image>( + this); + (*m_image_ctx)->state->open(0, ctx); +} + +template <typename I> +void OpenImageRequest<I>::handle_open_image(int r) { + dout(20) << ": r=" << r << dendl; + + if (r < 0) { + derr << ": failed to open image '" << m_image_id << "': " + << cpp_strerror(r) << dendl; + (*m_image_ctx)->destroy(); + *m_image_ctx = nullptr; + } + + finish(r); +} + +template <typename I> +void OpenImageRequest<I>::finish(int r) { + dout(20) << ": r=" << r << dendl; + + m_on_finish->complete(r); + delete this; +} + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::OpenImageRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/OpenImageRequest.h b/src/tools/rbd_mirror/image_replayer/OpenImageRequest.h new file mode 100644 index 00000000..01ab3117 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/OpenImageRequest.h @@ -0,0 +1,71 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_OPEN_IMAGE_REQUEST_H +#define RBD_MIRROR_IMAGE_REPLAYER_OPEN_IMAGE_REQUEST_H + +#include "include/int_types.h" +#include "librbd/ImageCtx.h" +#include <string> + +class Context; +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { +namespace image_replayer { + +template <typename ImageCtxT = librbd::ImageCtx> +class OpenImageRequest { +public: + static OpenImageRequest* create(librados::IoCtx &io_ctx, + ImageCtxT **image_ctx, + const std::string &image_id, + bool read_only, Context *on_finish) { + return new OpenImageRequest(io_ctx, image_ctx, image_id, read_only, + on_finish); + } + + OpenImageRequest(librados::IoCtx &io_ctx, ImageCtxT **image_ctx, + const std::string &image_id, bool read_only, + Context *on_finish); + + void send(); + +private: + /** + * @verbatim + * + * <start> + * | + * v + * OPEN_IMAGE + * | + * v + * <finish> + * + * @endverbatim + */ + librados::IoCtx &m_io_ctx; + ImageCtxT **m_image_ctx; + std::string m_image_id; + bool m_read_only; + Context *m_on_finish; + + void send_open_image(); + void handle_open_image(int r); + + void send_close_image(int r); + void handle_close_image(int r); + + void finish(int r); + +}; + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::OpenImageRequest<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_REPLAYER_OPEN_IMAGE_REQUEST_H diff --git a/src/tools/rbd_mirror/image_replayer/OpenLocalImageRequest.cc b/src/tools/rbd_mirror/image_replayer/OpenLocalImageRequest.cc new file mode 100644 index 00000000..87b141ca --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/OpenLocalImageRequest.cc @@ -0,0 +1,271 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "include/compat.h" +#include "CloseImageRequest.h" +#include "IsPrimaryRequest.h" +#include "OpenLocalImageRequest.h" +#include "common/debug.h" +#include "common/errno.h" +#include "common/WorkQueue.h" +#include "librbd/ExclusiveLock.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/Journal.h" +#include "librbd/Utils.h" +#include "librbd/exclusive_lock/Policy.h" +#include "librbd/journal/Policy.h" +#include <type_traits> + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::OpenLocalImageRequest: " \ + << this << " " << __func__ << " " + +namespace rbd { +namespace mirror { +namespace image_replayer { + +using librbd::util::create_context_callback; + +namespace { + +template <typename I> +struct MirrorExclusiveLockPolicy : public librbd::exclusive_lock::Policy { + I *image_ctx; + + MirrorExclusiveLockPolicy(I *image_ctx) : image_ctx(image_ctx) { + } + + bool may_auto_request_lock() override { + return false; + } + + int lock_requested(bool force) override { + int r = -EROFS; + { + RWLock::RLocker owner_locker(image_ctx->owner_lock); + RWLock::RLocker snap_locker(image_ctx->snap_lock); + if (image_ctx->journal == nullptr || image_ctx->journal->is_tag_owner()) { + r = 0; + } + } + + if (r == 0) { + // if the local image journal has been closed or if it was (force) + // promoted allow the lock to be released to another client + image_ctx->exclusive_lock->release_lock(nullptr); + } + return r; + } + + bool accept_blocked_request( + librbd::exclusive_lock::OperationRequestType request_type) override { + if (request_type == + librbd::exclusive_lock::OPERATION_REQUEST_TYPE_TRASH_SNAP_REMOVE) { + return true; + } + return false; + } +}; + +struct MirrorJournalPolicy : public librbd::journal::Policy { + ContextWQ *work_queue; + + MirrorJournalPolicy(ContextWQ *work_queue) : work_queue(work_queue) { + } + + bool append_disabled() const override { + // avoid recording any events to the local journal + return true; + } + bool journal_disabled() const override { + return false; + } + + void allocate_tag_on_lock(Context *on_finish) override { + // rbd-mirror will manually create tags by copying them from the peer + work_queue->queue(on_finish, 0); + } +}; + +} // anonymous namespace + +template <typename I> +OpenLocalImageRequest<I>::OpenLocalImageRequest(librados::IoCtx &local_io_ctx, + I **local_image_ctx, + const std::string &local_image_id, + ContextWQ *work_queue, + Context *on_finish) + : m_local_io_ctx(local_io_ctx), m_local_image_ctx(local_image_ctx), + m_local_image_id(local_image_id), m_work_queue(work_queue), + m_on_finish(on_finish) { +} + +template <typename I> +void OpenLocalImageRequest<I>::send() { + send_open_image(); +} + +template <typename I> +void OpenLocalImageRequest<I>::send_open_image() { + dout(20) << dendl; + + *m_local_image_ctx = I::create("", m_local_image_id, nullptr, + m_local_io_ctx, false); + { + RWLock::WLocker owner_locker((*m_local_image_ctx)->owner_lock); + RWLock::WLocker snap_locker((*m_local_image_ctx)->snap_lock); + (*m_local_image_ctx)->set_exclusive_lock_policy( + new MirrorExclusiveLockPolicy<I>(*m_local_image_ctx)); + (*m_local_image_ctx)->set_journal_policy( + new MirrorJournalPolicy(m_work_queue)); + } + + Context *ctx = create_context_callback< + OpenLocalImageRequest<I>, &OpenLocalImageRequest<I>::handle_open_image>( + this); + (*m_local_image_ctx)->state->open(0, ctx); +} + +template <typename I> +void OpenLocalImageRequest<I>::handle_open_image(int r) { + dout(20) << ": r=" << r << dendl; + + if (r < 0) { + if (r == -ENOENT) { + dout(10) << ": local image does not exist" << dendl; + } else { + derr << ": failed to open image '" << m_local_image_id << "': " + << cpp_strerror(r) << dendl; + } + (*m_local_image_ctx)->destroy(); + *m_local_image_ctx = nullptr; + finish(r); + return; + } + + send_is_primary(); +} + +template <typename I> +void OpenLocalImageRequest<I>::send_is_primary() { + dout(20) << dendl; + + Context *ctx = create_context_callback< + OpenLocalImageRequest<I>, &OpenLocalImageRequest<I>::handle_is_primary>( + this); + IsPrimaryRequest<I> *request = IsPrimaryRequest<I>::create(*m_local_image_ctx, + &m_primary, ctx); + request->send(); +} + +template <typename I> +void OpenLocalImageRequest<I>::handle_is_primary(int r) { + dout(20) << ": r=" << r << dendl; + + if (r == -ENOENT) { + dout(5) << ": local image is not mirrored" << dendl; + send_close_image(r); + return; + } else if (r < 0) { + derr << ": error querying local image primary status: " << cpp_strerror(r) + << dendl; + send_close_image(r); + return; + } + + // if the local image owns the tag -- don't steal the lock since + // we aren't going to mirror peer data into this image anyway + if (m_primary) { + dout(10) << ": local image is primary -- skipping image replay" << dendl; + send_close_image(-EREMOTEIO); + return; + } + + send_lock_image(); +} + +template <typename I> +void OpenLocalImageRequest<I>::send_lock_image() { + dout(20) << dendl; + + RWLock::RLocker owner_locker((*m_local_image_ctx)->owner_lock); + if ((*m_local_image_ctx)->exclusive_lock == nullptr) { + derr << ": image does not support exclusive lock" << dendl; + send_close_image(-EINVAL); + return; + } + + // disallow any proxied maintenance operations before grabbing lock + (*m_local_image_ctx)->exclusive_lock->block_requests(-EROFS); + + Context *ctx = create_context_callback< + OpenLocalImageRequest<I>, &OpenLocalImageRequest<I>::handle_lock_image>( + this); + + (*m_local_image_ctx)->exclusive_lock->acquire_lock(ctx); +} + +template <typename I> +void OpenLocalImageRequest<I>::handle_lock_image(int r) { + dout(20) << ": r=" << r << dendl; + + if (r < 0) { + derr << ": failed to lock image '" << m_local_image_id << "': " + << cpp_strerror(r) << dendl; + send_close_image(r); + return; + } + + { + RWLock::RLocker owner_locker((*m_local_image_ctx)->owner_lock); + if ((*m_local_image_ctx)->exclusive_lock == nullptr || + !(*m_local_image_ctx)->exclusive_lock->is_lock_owner()) { + derr << ": image is not locked" << dendl; + send_close_image(-EBUSY); + return; + } + } + + finish(0); +} + +template <typename I> +void OpenLocalImageRequest<I>::send_close_image(int r) { + dout(20) << dendl; + + if (m_ret_val == 0 && r < 0) { + m_ret_val = r; + } + + Context *ctx = create_context_callback< + OpenLocalImageRequest<I>, &OpenLocalImageRequest<I>::handle_close_image>( + this); + CloseImageRequest<I> *request = CloseImageRequest<I>::create( + m_local_image_ctx, ctx); + request->send(); +} + +template <typename I> +void OpenLocalImageRequest<I>::handle_close_image(int r) { + dout(20) << dendl; + + ceph_assert(r == 0); + finish(m_ret_val); +} + +template <typename I> +void OpenLocalImageRequest<I>::finish(int r) { + dout(20) << ": r=" << r << dendl; + + m_on_finish->complete(r); + delete this; +} + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::OpenLocalImageRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/OpenLocalImageRequest.h b/src/tools/rbd_mirror/image_replayer/OpenLocalImageRequest.h new file mode 100644 index 00000000..58de545f --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/OpenLocalImageRequest.h @@ -0,0 +1,90 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_OPEN_LOCAL_IMAGE_REQUEST_H +#define RBD_MIRROR_IMAGE_REPLAYER_OPEN_LOCAL_IMAGE_REQUEST_H + +#include "include/int_types.h" +#include "librbd/ImageCtx.h" +#include <string> + +class Context; +class ContextWQ; +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { +namespace image_replayer { + +template <typename ImageCtxT = librbd::ImageCtx> +class OpenLocalImageRequest { +public: + static OpenLocalImageRequest* create(librados::IoCtx &local_io_ctx, + ImageCtxT **local_image_ctx, + const std::string &local_image_id, + ContextWQ *work_queue, + Context *on_finish) { + return new OpenLocalImageRequest(local_io_ctx, local_image_ctx, + local_image_id, work_queue, on_finish); + } + + OpenLocalImageRequest(librados::IoCtx &local_io_ctx, + ImageCtxT **local_image_ctx, + const std::string &local_image_id, + ContextWQ *m_work_queue, + Context *on_finish); + + void send(); + +private: + /** + * @verbatim + * + * <start> + * | + * v + * OPEN_IMAGE * * * * * * * * + * | * + * v * + * IS_PRIMARY * * * * * * * * + * | * + * v (skip if primary) v + * LOCK_IMAGE * * * > CLOSE_IMAGE + * | | + * v | + * <finish> <---------------/ + * + * @endverbatim + */ + librados::IoCtx &m_local_io_ctx; + ImageCtxT **m_local_image_ctx; + std::string m_local_image_id; + ContextWQ *m_work_queue; + Context *m_on_finish; + + bool m_primary = false; + int m_ret_val = 0; + + void send_open_image(); + void handle_open_image(int r); + + void send_is_primary(); + void handle_is_primary(int r); + + void send_lock_image(); + void handle_lock_image(int r); + + void send_close_image(int r); + void handle_close_image(int r); + + void finish(int r); + +}; + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::OpenLocalImageRequest<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_REPLAYER_OPEN_LOCAL_IMAGE_REQUEST_H diff --git a/src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.cc b/src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.cc new file mode 100644 index 00000000..8e0ea837 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.cc @@ -0,0 +1,180 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.h" +#include "include/rados/librados.hpp" +#include "cls/rbd/cls_rbd_client.h" +#include "common/debug.h" +#include "common/errno.h" +#include "librbd/ImageCtx.h" +#include "librbd/Journal.h" +#include "librbd/Utils.h" +#include "tools/rbd_mirror/Threads.h" +#include "tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.h" +#include <type_traits> + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::" \ + << "PrepareLocalImageRequest: " << this << " " \ + << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace image_replayer { + +using librbd::util::create_context_callback; +using librbd::util::create_rados_callback; + +template <typename I> +void PrepareLocalImageRequest<I>::send() { + dout(20) << dendl; + get_local_image_id(); +} + +template <typename I> +void PrepareLocalImageRequest<I>::get_local_image_id() { + dout(20) << dendl; + + Context *ctx = create_context_callback< + PrepareLocalImageRequest<I>, + &PrepareLocalImageRequest<I>::handle_get_local_image_id>(this); + auto req = GetMirrorImageIdRequest<I>::create(m_io_ctx, m_global_image_id, + m_local_image_id, ctx); + req->send(); +} + +template <typename I> +void PrepareLocalImageRequest<I>::handle_get_local_image_id(int r) { + dout(20) << "r=" << r << ", " + << "local_image_id=" << *m_local_image_id << dendl; + + if (r < 0) { + finish(r); + return; + } + + get_local_image_name(); +} + +template <typename I> +void PrepareLocalImageRequest<I>::get_local_image_name() { + dout(20) << dendl; + + librados::ObjectReadOperation op; + librbd::cls_client::dir_get_name_start(&op, *m_local_image_id); + + m_out_bl.clear(); + librados::AioCompletion *aio_comp = create_rados_callback< + PrepareLocalImageRequest<I>, + &PrepareLocalImageRequest<I>::handle_get_local_image_name>(this); + int r = m_io_ctx.aio_operate(RBD_DIRECTORY, aio_comp, &op, &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void PrepareLocalImageRequest<I>::handle_get_local_image_name(int r) { + dout(20) << "r=" << r << dendl; + + if (r == 0) { + auto it = m_out_bl.cbegin(); + r = librbd::cls_client::dir_get_name_finish(&it, m_local_image_name); + } + + if (r < 0) { + if (r != -ENOENT) { + derr << "failed to retrieve image name: " << cpp_strerror(r) << dendl; + } + finish(r); + return; + } + + get_mirror_state(); +} + +template <typename I> +void PrepareLocalImageRequest<I>::get_mirror_state() { + dout(20) << dendl; + + librados::ObjectReadOperation op; + librbd::cls_client::mirror_image_get_start(&op, *m_local_image_id); + + m_out_bl.clear(); + librados::AioCompletion *aio_comp = create_rados_callback< + PrepareLocalImageRequest<I>, + &PrepareLocalImageRequest<I>::handle_get_mirror_state>(this); + int r = m_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op, &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void PrepareLocalImageRequest<I>::handle_get_mirror_state(int r) { + dout(20) << ": r=" << r << dendl; + + cls::rbd::MirrorImage mirror_image; + if (r == 0) { + auto iter = m_out_bl.cbegin(); + r = librbd::cls_client::mirror_image_get_finish(&iter, &mirror_image); + } + + if (r < 0) { + derr << "failed to retrieve image mirror state: " << cpp_strerror(r) + << dendl; + finish(r); + return; + } + + // TODO save current mirror state to determine if we should + // delete a partially formed image + // (e.g. MIRROR_IMAGE_STATE_CREATING/DELETING) + + get_tag_owner(); +} + +template <typename I> +void PrepareLocalImageRequest<I>::get_tag_owner() { + // deduce the class type for the journal to support unit tests + using Journal = typename std::decay< + typename std::remove_pointer<decltype(std::declval<I>().journal)> + ::type>::type; + + dout(20) << dendl; + + Context *ctx = create_context_callback< + PrepareLocalImageRequest<I>, + &PrepareLocalImageRequest<I>::handle_get_tag_owner>(this); + Journal::get_tag_owner(m_io_ctx, *m_local_image_id, m_tag_owner, + m_work_queue, ctx); +} + +template <typename I> +void PrepareLocalImageRequest<I>::handle_get_tag_owner(int r) { + dout(20) << "r=" << r << ", " + << "tag_owner=" << *m_tag_owner << dendl; + + if (r < 0) { + derr << "failed to retrieve journal tag owner: " << cpp_strerror(r) + << dendl; + finish(r); + return; + } + + finish(0); +} + +template <typename I> +void PrepareLocalImageRequest<I>::finish(int r) { + dout(20) << "r=" << r << dendl; + + m_on_finish->complete(r); + delete this; +} + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::PrepareLocalImageRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.h b/src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.h new file mode 100644 index 00000000..3417dd96 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.h @@ -0,0 +1,102 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_PREPARE_LOCAL_IMAGE_REQUEST_H +#define RBD_MIRROR_IMAGE_REPLAYER_PREPARE_LOCAL_IMAGE_REQUEST_H + +#include "include/buffer.h" +#include "include/rados/librados_fwd.hpp" +#include <string> + +namespace librbd { struct ImageCtx; } + +struct Context; +struct ContextWQ; + +namespace rbd { +namespace mirror { +namespace image_replayer { + +template <typename ImageCtxT = librbd::ImageCtx> +class PrepareLocalImageRequest { +public: + static PrepareLocalImageRequest *create(librados::IoCtx &io_ctx, + const std::string &global_image_id, + std::string *local_image_id, + std::string *local_image_name, + std::string *tag_owner, + ContextWQ *work_queue, + Context *on_finish) { + return new PrepareLocalImageRequest(io_ctx, global_image_id, local_image_id, + local_image_name, tag_owner, work_queue, + on_finish); + } + + PrepareLocalImageRequest(librados::IoCtx &io_ctx, + const std::string &global_image_id, + std::string *local_image_id, + std::string *local_image_name, + std::string *tag_owner, + ContextWQ *work_queue, + Context *on_finish) + : m_io_ctx(io_ctx), m_global_image_id(global_image_id), + m_local_image_id(local_image_id), m_local_image_name(local_image_name), + m_tag_owner(tag_owner), m_work_queue(work_queue), m_on_finish(on_finish) { + } + + void send(); + +private: + /** + * @verbatim + * + * <start> + * | + * v + * GET_LOCAL_IMAGE_ID + * | + * v + * GET_LOCAL_IMAGE_NAME + * | + * v + * GET_MIRROR_STATE + * | + * v + * <finish> + + * @endverbatim + */ + + librados::IoCtx &m_io_ctx; + std::string m_global_image_id; + std::string *m_local_image_id; + std::string *m_local_image_name; + std::string *m_tag_owner; + ContextWQ *m_work_queue; + Context *m_on_finish; + + bufferlist m_out_bl; + + void get_local_image_id(); + void handle_get_local_image_id(int r); + + void get_local_image_name(); + void handle_get_local_image_name(int r); + + void get_mirror_state(); + void handle_get_mirror_state(int r); + + void get_tag_owner(); + void handle_get_tag_owner(int r); + + void finish(int r); + +}; + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::PrepareLocalImageRequest<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_REPLAYER_PREPARE_LOCAL_IMAGE_REQUEST_H diff --git a/src/tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.cc b/src/tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.cc new file mode 100644 index 00000000..00c141e0 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.cc @@ -0,0 +1,195 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.h" +#include "include/rados/librados.hpp" +#include "cls/rbd/cls_rbd_client.h" +#include "common/debug.h" +#include "common/errno.h" +#include "common/WorkQueue.h" +#include "journal/Journaler.h" +#include "librbd/ImageCtx.h" +#include "librbd/Utils.h" +#include "librbd/journal/Types.h" +#include "tools/rbd_mirror/Threads.h" +#include "tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.h" +#include "tools/rbd_mirror/image_replayer/Utils.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::" \ + << "PrepareRemoteImageRequest: " << this << " " \ + << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace image_replayer { + +using librbd::util::create_async_context_callback; +using librbd::util::create_context_callback; +using librbd::util::create_rados_callback; + +template <typename I> +void PrepareRemoteImageRequest<I>::send() { + get_remote_mirror_uuid(); +} + +template <typename I> +void PrepareRemoteImageRequest<I>::get_remote_mirror_uuid() { + dout(20) << dendl; + + librados::ObjectReadOperation op; + librbd::cls_client::mirror_uuid_get_start(&op); + + librados::AioCompletion *aio_comp = create_rados_callback< + PrepareRemoteImageRequest<I>, + &PrepareRemoteImageRequest<I>::handle_get_remote_mirror_uuid>(this); + int r = m_remote_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op, &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void PrepareRemoteImageRequest<I>::handle_get_remote_mirror_uuid(int r) { + if (r >= 0) { + auto it = m_out_bl.cbegin(); + r = librbd::cls_client::mirror_uuid_get_finish(&it, m_remote_mirror_uuid); + if (r >= 0 && m_remote_mirror_uuid->empty()) { + r = -ENOENT; + } + } + + dout(20) << "r=" << r << dendl; + if (r < 0) { + if (r == -ENOENT) { + dout(5) << "remote mirror uuid missing" << dendl; + } else { + derr << "failed to retrieve remote mirror uuid: " << cpp_strerror(r) + << dendl; + } + finish(r); + return; + } + + get_remote_image_id(); +} + +template <typename I> +void PrepareRemoteImageRequest<I>::get_remote_image_id() { + dout(20) << dendl; + + Context *ctx = create_context_callback< + PrepareRemoteImageRequest<I>, + &PrepareRemoteImageRequest<I>::handle_get_remote_image_id>(this); + auto req = GetMirrorImageIdRequest<I>::create(m_remote_io_ctx, + m_global_image_id, + m_remote_image_id, ctx); + req->send(); +} + +template <typename I> +void PrepareRemoteImageRequest<I>::handle_get_remote_image_id(int r) { + dout(20) << "r=" << r << ", " + << "remote_image_id=" << *m_remote_image_id << dendl; + + if (r < 0) { + finish(r); + return; + } + + get_client(); +} + +template <typename I> +void PrepareRemoteImageRequest<I>::get_client() { + dout(20) << dendl; + + ceph_assert(*m_remote_journaler == nullptr); + *m_remote_journaler = new Journaler(m_threads->work_queue, m_threads->timer, + &m_threads->timer_lock, m_remote_io_ctx, + *m_remote_image_id, m_local_mirror_uuid, + m_journal_settings); + + Context *ctx = create_async_context_callback( + m_threads->work_queue, create_context_callback< + PrepareRemoteImageRequest<I>, + &PrepareRemoteImageRequest<I>::handle_get_client>(this)); + (*m_remote_journaler)->get_client(m_local_mirror_uuid, &m_client, ctx); +} + +template <typename I> +void PrepareRemoteImageRequest<I>::handle_get_client(int r) { + dout(20) << "r=" << r << dendl; + + if (r == -ENOENT) { + dout(10) << "client not registered" << dendl; + register_client(); + } else if (r < 0) { + derr << "failed to retrieve client: " << cpp_strerror(r) << dendl; + finish(r); + } else if (!util::decode_client_meta(m_client, m_client_meta)) { + // require operator intervention since the data is corrupt + finish(-EBADMSG); + } else { + // skip registration if it already exists + *m_client_state = m_client.state; + finish(0); + } +} + +template <typename I> +void PrepareRemoteImageRequest<I>::register_client() { + dout(20) << dendl; + + librbd::journal::MirrorPeerClientMeta mirror_peer_client_meta{ + m_local_image_id}; + mirror_peer_client_meta.state = librbd::journal::MIRROR_PEER_STATE_REPLAYING; + + librbd::journal::ClientData client_data{mirror_peer_client_meta}; + bufferlist client_data_bl; + encode(client_data, client_data_bl); + + Context *ctx = create_async_context_callback( + m_threads->work_queue, create_context_callback< + PrepareRemoteImageRequest<I>, + &PrepareRemoteImageRequest<I>::handle_register_client>(this)); + (*m_remote_journaler)->register_client(client_data_bl, ctx); +} + +template <typename I> +void PrepareRemoteImageRequest<I>::handle_register_client(int r) { + dout(20) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to register with remote journal: " << cpp_strerror(r) + << dendl; + finish(r); + return; + } + + *m_client_state = cls::journal::CLIENT_STATE_CONNECTED; + *m_client_meta = librbd::journal::MirrorPeerClientMeta(m_local_image_id); + m_client_meta->state = librbd::journal::MIRROR_PEER_STATE_REPLAYING; + + finish(0); +} + +template <typename I> +void PrepareRemoteImageRequest<I>::finish(int r) { + dout(20) << "r=" << r << dendl; + + if (r < 0) { + delete *m_remote_journaler; + *m_remote_journaler = nullptr; + } + + m_on_finish->complete(r); + delete this; +} + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::PrepareRemoteImageRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.h b/src/tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.h new file mode 100644 index 00000000..100a066b --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.h @@ -0,0 +1,141 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_PREPARE_REMOTE_IMAGE_REQUEST_H +#define RBD_MIRROR_IMAGE_REPLAYER_PREPARE_REMOTE_IMAGE_REQUEST_H + +#include "include/buffer_fwd.h" +#include "include/rados/librados_fwd.hpp" +#include "cls/journal/cls_journal_types.h" +#include "journal/Settings.h" +#include "librbd/journal/TypeTraits.h" +#include <string> + +namespace journal { class Journaler; } +namespace journal { class Settings; } +namespace librbd { struct ImageCtx; } +namespace librbd { namespace journal { struct MirrorPeerClientMeta; } } + +struct Context; +struct ContextWQ; + +namespace rbd { +namespace mirror { + +template <typename> struct Threads; + +namespace image_replayer { + +template <typename ImageCtxT = librbd::ImageCtx> +class PrepareRemoteImageRequest { +public: + typedef librbd::journal::TypeTraits<ImageCtxT> TypeTraits; + typedef typename TypeTraits::Journaler Journaler; + typedef librbd::journal::MirrorPeerClientMeta MirrorPeerClientMeta; + + static PrepareRemoteImageRequest *create(Threads<ImageCtxT> *threads, + librados::IoCtx &remote_io_ctx, + const std::string &global_image_id, + const std::string &local_mirror_uuid, + const std::string &local_image_id, + const journal::Settings &settings, + std::string *remote_mirror_uuid, + std::string *remote_image_id, + Journaler **remote_journaler, + cls::journal::ClientState *client_state, + MirrorPeerClientMeta *client_meta, + Context *on_finish) { + return new PrepareRemoteImageRequest(threads, remote_io_ctx, + global_image_id, local_mirror_uuid, + local_image_id, settings, + remote_mirror_uuid, remote_image_id, + remote_journaler, client_state, + client_meta, on_finish); + } + + PrepareRemoteImageRequest(Threads<ImageCtxT> *threads, + librados::IoCtx &remote_io_ctx, + const std::string &global_image_id, + const std::string &local_mirror_uuid, + const std::string &local_image_id, + const journal::Settings &journal_settings, + std::string *remote_mirror_uuid, + std::string *remote_image_id, + Journaler **remote_journaler, + cls::journal::ClientState *client_state, + MirrorPeerClientMeta *client_meta, + Context *on_finish) + : m_threads(threads), m_remote_io_ctx(remote_io_ctx), + m_global_image_id(global_image_id), + m_local_mirror_uuid(local_mirror_uuid), m_local_image_id(local_image_id), + m_journal_settings(journal_settings), + m_remote_mirror_uuid(remote_mirror_uuid), + m_remote_image_id(remote_image_id), + m_remote_journaler(remote_journaler), m_client_state(client_state), + m_client_meta(client_meta), m_on_finish(on_finish) { + } + + void send(); + +private: + /** + * @verbatim + * + * <start> + * | + * v + * GET_REMOTE_MIRROR_UUID + * | + * v + * GET_REMOTE_IMAGE_ID + * | + * v + * GET_CLIENT + * | + * v (skip if not needed) + * REGISTER_CLIENT + * | + * v + * <finish> + + * @endverbatim + */ + + Threads<ImageCtxT> *m_threads; + librados::IoCtx &m_remote_io_ctx; + std::string m_global_image_id; + std::string m_local_mirror_uuid; + std::string m_local_image_id; + journal::Settings m_journal_settings; + std::string *m_remote_mirror_uuid; + std::string *m_remote_image_id; + Journaler **m_remote_journaler; + cls::journal::ClientState *m_client_state; + MirrorPeerClientMeta *m_client_meta; + Context *m_on_finish; + + bufferlist m_out_bl; + cls::journal::Client m_client; + + void get_remote_mirror_uuid(); + void handle_get_remote_mirror_uuid(int r); + + void get_remote_image_id(); + void handle_get_remote_image_id(int r); + + void get_client(); + void handle_get_client(int r); + + void register_client(); + void handle_register_client(int r); + + void finish(int r); +}; + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::PrepareRemoteImageRequest<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_REPLAYER_PREPARE_REMOTE_IMAGE_REQUEST_H diff --git a/src/tools/rbd_mirror/image_replayer/ReplayStatusFormatter.cc b/src/tools/rbd_mirror/image_replayer/ReplayStatusFormatter.cc new file mode 100644 index 00000000..f514d749 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/ReplayStatusFormatter.cc @@ -0,0 +1,246 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "ReplayStatusFormatter.h" +#include "common/debug.h" +#include "common/dout.h" +#include "common/errno.h" +#include "journal/Journaler.h" +#include "librbd/ImageCtx.h" +#include "librbd/Journal.h" +#include "librbd/Utils.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::ReplayStatusFormatter: " \ + << this << " " << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace image_replayer { + +using librbd::util::unique_lock_name; + +template <typename I> +ReplayStatusFormatter<I>::ReplayStatusFormatter(Journaler *journaler, + const std::string &mirror_uuid) + : m_journaler(journaler), + m_mirror_uuid(mirror_uuid), + m_lock(unique_lock_name("ReplayStatusFormatter::m_lock", this)) { +} + +template <typename I> +bool ReplayStatusFormatter<I>::get_or_send_update(std::string *description, + Context *on_finish) { + dout(20) << dendl; + + bool in_progress = false; + { + Mutex::Locker locker(m_lock); + if (m_on_finish) { + in_progress = true; + } else { + m_on_finish = on_finish; + } + } + + if (in_progress) { + dout(10) << "previous request is still in progress, ignoring" << dendl; + on_finish->complete(-EAGAIN); + return false; + } + + m_master_position = cls::journal::ObjectPosition(); + m_mirror_position = cls::journal::ObjectPosition(); + + cls::journal::Client master_client, mirror_client; + int r; + + r = m_journaler->get_cached_client(librbd::Journal<>::IMAGE_CLIENT_ID, + &master_client); + if (r < 0) { + derr << "error retrieving registered master client: " + << cpp_strerror(r) << dendl; + } else { + r = m_journaler->get_cached_client(m_mirror_uuid, &mirror_client); + if (r < 0) { + derr << "error retrieving registered mirror client: " + << cpp_strerror(r) << dendl; + } + } + + if (!master_client.commit_position.object_positions.empty()) { + m_master_position = + *(master_client.commit_position.object_positions.begin()); + } + + if (!mirror_client.commit_position.object_positions.empty()) { + m_mirror_position = + *(mirror_client.commit_position.object_positions.begin()); + } + + if (!calculate_behind_master_or_send_update()) { + dout(20) << "need to update tag cache" << dendl; + return false; + } + + format(description); + + { + Mutex::Locker locker(m_lock); + ceph_assert(m_on_finish == on_finish); + m_on_finish = nullptr; + } + + on_finish->complete(-EEXIST); + return true; +} + +template <typename I> +bool ReplayStatusFormatter<I>::calculate_behind_master_or_send_update() { + dout(20) << "m_master_position=" << m_master_position + << ", m_mirror_position=" << m_mirror_position << dendl; + + m_entries_behind_master = 0; + + if (m_master_position == cls::journal::ObjectPosition() || + m_master_position.tag_tid < m_mirror_position.tag_tid) { + return true; + } + + cls::journal::ObjectPosition master = m_master_position; + uint64_t mirror_tag_tid = m_mirror_position.tag_tid; + + while (master.tag_tid > mirror_tag_tid) { + auto tag_it = m_tag_cache.find(master.tag_tid); + if (tag_it == m_tag_cache.end()) { + send_update_tag_cache(master.tag_tid, mirror_tag_tid); + return false; + } + librbd::journal::TagData &tag_data = tag_it->second; + m_entries_behind_master += master.entry_tid; + master = {0, tag_data.predecessor.tag_tid, tag_data.predecessor.entry_tid}; + } + if (master.tag_tid == mirror_tag_tid && + master.entry_tid > m_mirror_position.entry_tid) { + m_entries_behind_master += master.entry_tid - m_mirror_position.entry_tid; + } + + dout(20) << "clearing tags not needed any more (below mirror position)" + << dendl; + + uint64_t tag_tid = mirror_tag_tid; + size_t old_size = m_tag_cache.size(); + while (tag_tid != 0) { + auto tag_it = m_tag_cache.find(tag_tid); + if (tag_it == m_tag_cache.end()) { + break; + } + librbd::journal::TagData &tag_data = tag_it->second; + + dout(20) << "erasing tag " << tag_data << "for tag_tid " << tag_tid + << dendl; + + tag_tid = tag_data.predecessor.tag_tid; + m_tag_cache.erase(tag_it); + } + + dout(20) << old_size - m_tag_cache.size() << " entries cleared" << dendl; + + return true; +} + +template <typename I> +void ReplayStatusFormatter<I>::send_update_tag_cache(uint64_t master_tag_tid, + uint64_t mirror_tag_tid) { + if (master_tag_tid <= mirror_tag_tid || + m_tag_cache.find(master_tag_tid) != m_tag_cache.end()) { + Context *on_finish = nullptr; + { + Mutex::Locker locker(m_lock); + std::swap(m_on_finish, on_finish); + } + + ceph_assert(on_finish); + on_finish->complete(0); + return; + } + + dout(20) << "master_tag_tid=" << master_tag_tid << ", mirror_tag_tid=" + << mirror_tag_tid << dendl; + + FunctionContext *ctx = new FunctionContext( + [this, master_tag_tid, mirror_tag_tid](int r) { + handle_update_tag_cache(master_tag_tid, mirror_tag_tid, r); + }); + m_journaler->get_tag(master_tag_tid, &m_tag, ctx); +} + +template <typename I> +void ReplayStatusFormatter<I>::handle_update_tag_cache(uint64_t master_tag_tid, + uint64_t mirror_tag_tid, + int r) { + librbd::journal::TagData tag_data; + + if (r < 0) { + derr << "error retrieving tag " << master_tag_tid << ": " << cpp_strerror(r) + << dendl; + } else { + dout(20) << "retrieved tag " << master_tag_tid << ": " << m_tag << dendl; + + auto it = m_tag.data.cbegin(); + try { + decode(tag_data, it); + } catch (const buffer::error &err) { + derr << "error decoding tag " << master_tag_tid << ": " << err.what() + << dendl; + } + } + + if (tag_data.predecessor.mirror_uuid != + librbd::Journal<>::LOCAL_MIRROR_UUID && + tag_data.predecessor.mirror_uuid != + librbd::Journal<>::ORPHAN_MIRROR_UUID) { + dout(20) << "hit remote image non-primary epoch" << dendl; + tag_data.predecessor = {}; + } + + dout(20) << "decoded tag " << master_tag_tid << ": " << tag_data << dendl; + + m_tag_cache[master_tag_tid] = tag_data; + send_update_tag_cache(tag_data.predecessor.tag_tid, mirror_tag_tid); +} + +template <typename I> +void ReplayStatusFormatter<I>::format(std::string *description) { + + dout(20) << "m_master_position=" << m_master_position + << ", m_mirror_position=" << m_mirror_position + << ", m_entries_behind_master=" << m_entries_behind_master << dendl; + + std::stringstream ss; + ss << "master_position="; + if (m_master_position == cls::journal::ObjectPosition()) { + ss << "[]"; + } else { + ss << m_master_position; + } + ss << ", mirror_position="; + if (m_mirror_position == cls::journal::ObjectPosition()) { + ss << "[]"; + } else { + ss << m_mirror_position; + } + ss << ", entries_behind_master=" + << (m_entries_behind_master > 0 ? m_entries_behind_master : 0); + + *description = ss.str(); +} + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class +rbd::mirror::image_replayer::ReplayStatusFormatter<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/ReplayStatusFormatter.h b/src/tools/rbd_mirror/image_replayer/ReplayStatusFormatter.h new file mode 100644 index 00000000..59940a65 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/ReplayStatusFormatter.h @@ -0,0 +1,60 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_REPLAY_STATUS_FORMATTER_H +#define RBD_MIRROR_IMAGE_REPLAYER_REPLAY_STATUS_FORMATTER_H + +#include "include/Context.h" +#include "common/Mutex.h" +#include "cls/journal/cls_journal_types.h" +#include "librbd/journal/Types.h" +#include "librbd/journal/TypeTraits.h" + +namespace journal { class Journaler; } +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { +namespace image_replayer { + +template <typename ImageCtxT = librbd::ImageCtx> +class ReplayStatusFormatter { +public: + typedef typename librbd::journal::TypeTraits<ImageCtxT>::Journaler Journaler; + + static ReplayStatusFormatter* create(Journaler *journaler, + const std::string &mirror_uuid) { + return new ReplayStatusFormatter(journaler, mirror_uuid); + } + + static void destroy(ReplayStatusFormatter* formatter) { + delete formatter; + } + + ReplayStatusFormatter(Journaler *journaler, const std::string &mirror_uuid); + + bool get_or_send_update(std::string *description, Context *on_finish); + +private: + Journaler *m_journaler; + std::string m_mirror_uuid; + Mutex m_lock; + Context *m_on_finish = nullptr; + cls::journal::ObjectPosition m_master_position; + cls::journal::ObjectPosition m_mirror_position; + int m_entries_behind_master = 0; + cls::journal::Tag m_tag; + std::map<uint64_t, librbd::journal::TagData> m_tag_cache; + + bool calculate_behind_master_or_send_update(); + void send_update_tag_cache(uint64_t master_tag_tid, uint64_t mirror_tag_tid); + void handle_update_tag_cache(uint64_t master_tag_tid, uint64_t mirror_tag_tid, + int r); + void format(std::string *description); +}; + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +#endif // RBD_MIRROR_IMAGE_REPLAYER_REPLAY_STATUS_FORMATTER_H diff --git a/src/tools/rbd_mirror/image_replayer/Types.h b/src/tools/rbd_mirror/image_replayer/Types.h new file mode 100644 index 00000000..6ab988a7 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/Types.h @@ -0,0 +1,21 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_REPLAYER_TYPES_H +#define CEPH_RBD_MIRROR_IMAGE_REPLAYER_TYPES_H + +namespace rbd { +namespace mirror { +namespace image_replayer { + +enum HealthState { + HEALTH_STATE_OK, + HEALTH_STATE_WARNING, + HEALTH_STATE_ERROR +}; + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_IMAGE_REPLAYER_TYPES_H diff --git a/src/tools/rbd_mirror/image_replayer/Utils.cc b/src/tools/rbd_mirror/image_replayer/Utils.cc new file mode 100644 index 00000000..eda0179f --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/Utils.cc @@ -0,0 +1,50 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/image_replayer/Utils.h" +#include "common/debug.h" +#include "common/errno.h" +#include "cls/journal/cls_journal_types.h" +#include "librbd/journal/Types.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::util::" \ + << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace image_replayer { +namespace util { + +bool decode_client_meta(const cls::journal::Client& client, + librbd::journal::MirrorPeerClientMeta* client_meta) { + dout(15) << dendl; + + librbd::journal::ClientData client_data; + auto it = client.data.cbegin(); + try { + decode(client_data, it); + } catch (const buffer::error &err) { + derr << "failed to decode client meta data: " << err.what() << dendl; + return false; + } + + auto local_client_meta = boost::get<librbd::journal::MirrorPeerClientMeta>( + &client_data.client_meta); + if (local_client_meta == nullptr) { + derr << "unknown peer registration" << dendl; + return false; + } + + *client_meta = *local_client_meta; + dout(15) << "client found: client_meta=" << *client_meta << dendl; + return true; +} + +} // namespace util +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + diff --git a/src/tools/rbd_mirror/image_replayer/Utils.h b/src/tools/rbd_mirror/image_replayer/Utils.h new file mode 100644 index 00000000..d42146d1 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/Utils.h @@ -0,0 +1,23 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_UTILS_H +#define RBD_MIRROR_IMAGE_REPLAYER_UTILS_H + +namespace cls { namespace journal { struct Client; } } +namespace librbd { namespace journal { struct MirrorPeerClientMeta; } } + +namespace rbd { +namespace mirror { +namespace image_replayer { +namespace util { + +bool decode_client_meta(const cls::journal::Client& client, + librbd::journal::MirrorPeerClientMeta* client_meta); + +} // namespace util +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +#endif // RBD_MIRROR_IMAGE_REPLAYER_UTILS_H diff --git a/src/tools/rbd_mirror/image_sync/SyncPointCreateRequest.cc b/src/tools/rbd_mirror/image_sync/SyncPointCreateRequest.cc new file mode 100644 index 00000000..ffe2eca9 --- /dev/null +++ b/src/tools/rbd_mirror/image_sync/SyncPointCreateRequest.cc @@ -0,0 +1,182 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "SyncPointCreateRequest.h" +#include "include/uuid.h" +#include "common/debug.h" +#include "common/errno.h" +#include "journal/Journaler.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/Operations.h" +#include "librbd/Utils.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_sync::SyncPointCreateRequest: " \ + << this << " " << __func__ + +namespace rbd { +namespace mirror { +namespace image_sync { + +namespace { + +static const std::string SNAP_NAME_PREFIX(".rbd-mirror"); + +} // anonymous namespace + +using librbd::util::create_context_callback; + +template <typename I> +SyncPointCreateRequest<I>::SyncPointCreateRequest(I *remote_image_ctx, + const std::string &mirror_uuid, + Journaler *journaler, + MirrorPeerClientMeta *client_meta, + Context *on_finish) + : m_remote_image_ctx(remote_image_ctx), m_mirror_uuid(mirror_uuid), + m_journaler(journaler), m_client_meta(client_meta), m_on_finish(on_finish), + m_client_meta_copy(*client_meta) { + ceph_assert(m_client_meta->sync_points.size() < 2); + + // initialize the updated client meta with the new sync point + m_client_meta_copy.sync_points.emplace_back(); + if (m_client_meta_copy.sync_points.size() > 1) { + m_client_meta_copy.sync_points.back().from_snap_name = + m_client_meta_copy.sync_points.front().snap_name; + } +} + +template <typename I> +void SyncPointCreateRequest<I>::send() { + send_update_client(); +} + +template <typename I> +void SyncPointCreateRequest<I>::send_update_client() { + uuid_d uuid_gen; + uuid_gen.generate_random(); + + MirrorPeerSyncPoint &sync_point = m_client_meta_copy.sync_points.back(); + sync_point.snap_name = SNAP_NAME_PREFIX + "." + m_mirror_uuid + "." + + uuid_gen.to_string(); + + dout(20) << ": sync_point=" << sync_point << dendl; + + bufferlist client_data_bl; + librbd::journal::ClientData client_data(m_client_meta_copy); + encode(client_data, client_data_bl); + + Context *ctx = create_context_callback< + SyncPointCreateRequest<I>, &SyncPointCreateRequest<I>::handle_update_client>( + this); + m_journaler->update_client(client_data_bl, ctx); +} + +template <typename I> +void SyncPointCreateRequest<I>::handle_update_client(int r) { + dout(20) << ": r=" << r << dendl; + + if (r < 0) { + derr << ": failed to update client data: " << cpp_strerror(r) + << dendl; + finish(r); + return; + } + + // update provided meta structure to reflect reality + *m_client_meta = m_client_meta_copy; + + send_refresh_image(); +} + +template <typename I> +void SyncPointCreateRequest<I>::send_refresh_image() { + dout(20) << dendl; + + Context *ctx = create_context_callback< + SyncPointCreateRequest<I>, &SyncPointCreateRequest<I>::handle_refresh_image>( + this); + m_remote_image_ctx->state->refresh(ctx); +} + +template <typename I> +void SyncPointCreateRequest<I>::handle_refresh_image(int r) { + dout(20) << ": r=" << r << dendl; + + if (r < 0) { + derr << ": remote image refresh failed: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + send_create_snap(); +} + +template <typename I> +void SyncPointCreateRequest<I>::send_create_snap() { + dout(20) << dendl; + + MirrorPeerSyncPoint &sync_point = m_client_meta_copy.sync_points.back(); + + Context *ctx = create_context_callback< + SyncPointCreateRequest<I>, &SyncPointCreateRequest<I>::handle_create_snap>( + this); + m_remote_image_ctx->operations->snap_create( + cls::rbd::UserSnapshotNamespace(), sync_point.snap_name.c_str(), ctx); +} + +template <typename I> +void SyncPointCreateRequest<I>::handle_create_snap(int r) { + dout(20) << ": r=" << r << dendl; + + if (r == -EEXIST) { + send_update_client(); + return; + } else if (r < 0) { + derr << ": failed to create snapshot: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + send_final_refresh_image(); +} + +template <typename I> +void SyncPointCreateRequest<I>::send_final_refresh_image() { + dout(20) << dendl; + + Context *ctx = create_context_callback< + SyncPointCreateRequest<I>, + &SyncPointCreateRequest<I>::handle_final_refresh_image>(this); + m_remote_image_ctx->state->refresh(ctx); +} + +template <typename I> +void SyncPointCreateRequest<I>::handle_final_refresh_image(int r) { + dout(20) << ": r=" << r << dendl; + + if (r < 0) { + derr << ": failed to refresh image for snapshot: " << cpp_strerror(r) + << dendl; + finish(r); + return; + } + + finish(0); +} + +template <typename I> +void SyncPointCreateRequest<I>::finish(int r) { + dout(20) << ": r=" << r << dendl; + + m_on_finish->complete(r); + delete this; +} + +} // namespace image_sync +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_sync::SyncPointCreateRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_sync/SyncPointCreateRequest.h b/src/tools/rbd_mirror/image_sync/SyncPointCreateRequest.h new file mode 100644 index 00000000..45275ec4 --- /dev/null +++ b/src/tools/rbd_mirror/image_sync/SyncPointCreateRequest.h @@ -0,0 +1,96 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_SYNC_SYNC_POINT_CREATE_REQUEST_H +#define RBD_MIRROR_IMAGE_SYNC_SYNC_POINT_CREATE_REQUEST_H + +#include "librbd/journal/Types.h" +#include "librbd/journal/TypeTraits.h" +#include <string> + +class Context; +namespace journal { class Journaler; } +namespace librbd { class ImageCtx; } +namespace librbd { namespace journal { struct MirrorPeerClientMeta; } } + +namespace rbd { +namespace mirror { +namespace image_sync { + +template <typename ImageCtxT = librbd::ImageCtx> +class SyncPointCreateRequest { +public: + typedef librbd::journal::TypeTraits<ImageCtxT> TypeTraits; + typedef typename TypeTraits::Journaler Journaler; + typedef librbd::journal::MirrorPeerClientMeta MirrorPeerClientMeta; + typedef librbd::journal::MirrorPeerSyncPoint MirrorPeerSyncPoint; + + static SyncPointCreateRequest* create(ImageCtxT *remote_image_ctx, + const std::string &mirror_uuid, + Journaler *journaler, + MirrorPeerClientMeta *client_meta, + Context *on_finish) { + return new SyncPointCreateRequest(remote_image_ctx, mirror_uuid, journaler, + client_meta, on_finish); + } + + SyncPointCreateRequest(ImageCtxT *remote_image_ctx, + const std::string &mirror_uuid, Journaler *journaler, + MirrorPeerClientMeta *client_meta, Context *on_finish); + + void send(); + +private: + /** + * @verbatim + * + * <start> + * | + * v + * UPDATE_CLIENT < . . + * | . + * v . + * REFRESH_IMAGE . + * | . (repeat on EEXIST) + * v . + * CREATE_SNAP . . . . + * | + * v + * REFRESH_IMAGE + * | + * v + * <finish> + * + * @endverbatim + */ + + ImageCtxT *m_remote_image_ctx; + std::string m_mirror_uuid; + Journaler *m_journaler; + MirrorPeerClientMeta *m_client_meta; + Context *m_on_finish; + + MirrorPeerClientMeta m_client_meta_copy; + + void send_update_client(); + void handle_update_client(int r); + + void send_refresh_image(); + void handle_refresh_image(int r); + + void send_create_snap(); + void handle_create_snap(int r); + + void send_final_refresh_image(); + void handle_final_refresh_image(int r); + + void finish(int r); +}; + +} // namespace image_sync +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_sync::SyncPointCreateRequest<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_SYNC_SYNC_POINT_CREATE_REQUEST_H diff --git a/src/tools/rbd_mirror/image_sync/SyncPointPruneRequest.cc b/src/tools/rbd_mirror/image_sync/SyncPointPruneRequest.cc new file mode 100644 index 00000000..2cfed5e6 --- /dev/null +++ b/src/tools/rbd_mirror/image_sync/SyncPointPruneRequest.cc @@ -0,0 +1,220 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "SyncPointPruneRequest.h" +#include "common/debug.h" +#include "common/errno.h" +#include "journal/Journaler.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/Operations.h" +#include "librbd/Utils.h" +#include <set> + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_sync::SyncPointPruneRequest: " \ + << this << " " << __func__ +namespace rbd { +namespace mirror { +namespace image_sync { + +using librbd::util::create_context_callback; + +template <typename I> +SyncPointPruneRequest<I>::SyncPointPruneRequest(I *remote_image_ctx, + bool sync_complete, + Journaler *journaler, + MirrorPeerClientMeta *client_meta, + Context *on_finish) + : m_remote_image_ctx(remote_image_ctx), m_sync_complete(sync_complete), + m_journaler(journaler), m_client_meta(client_meta), m_on_finish(on_finish), + m_client_meta_copy(*client_meta) { +} + +template <typename I> +void SyncPointPruneRequest<I>::send() { + if (m_client_meta->sync_points.empty()) { + send_remove_snap(); + return; + } + + if (m_sync_complete) { + // if sync is complete, we can remove the master sync point + auto it = m_client_meta_copy.sync_points.begin(); + MirrorPeerSyncPoint &sync_point = *it; + + ++it; + if (it == m_client_meta_copy.sync_points.end() || + it->from_snap_name != sync_point.snap_name) { + m_snap_names.push_back(sync_point.snap_name); + } + + if (!sync_point.from_snap_name.empty()) { + m_snap_names.push_back(sync_point.from_snap_name); + } + } else { + // if we have more than one sync point or invalid sync points, + // trim them off + RWLock::RLocker snap_locker(m_remote_image_ctx->snap_lock); + std::set<std::string> snap_names; + for (auto it = m_client_meta_copy.sync_points.rbegin(); + it != m_client_meta_copy.sync_points.rend(); ++it) { + MirrorPeerSyncPoint &sync_point = *it; + if (&sync_point == &m_client_meta_copy.sync_points.front()) { + if (m_remote_image_ctx->get_snap_id( + cls::rbd::UserSnapshotNamespace(), sync_point.snap_name) == + CEPH_NOSNAP) { + derr << ": failed to locate sync point snapshot: " + << sync_point.snap_name << dendl; + } else if (!sync_point.from_snap_name.empty()) { + derr << ": unexpected from_snap_name in primary sync point: " + << sync_point.from_snap_name << dendl; + } else { + // first sync point is OK -- keep it + break; + } + m_invalid_master_sync_point = true; + } + + if (snap_names.count(sync_point.snap_name) == 0) { + snap_names.insert(sync_point.snap_name); + m_snap_names.push_back(sync_point.snap_name); + } + + MirrorPeerSyncPoint &front_sync_point = + m_client_meta_copy.sync_points.front(); + if (!sync_point.from_snap_name.empty() && + snap_names.count(sync_point.from_snap_name) == 0 && + sync_point.from_snap_name != front_sync_point.snap_name) { + snap_names.insert(sync_point.from_snap_name); + m_snap_names.push_back(sync_point.from_snap_name); + } + } + } + + send_remove_snap(); +} + +template <typename I> +void SyncPointPruneRequest<I>::send_remove_snap() { + if (m_snap_names.empty()) { + send_refresh_image(); + return; + } + + const std::string &snap_name = m_snap_names.front(); + + dout(20) << ": snap_name=" << snap_name << dendl; + + Context *ctx = create_context_callback< + SyncPointPruneRequest<I>, &SyncPointPruneRequest<I>::handle_remove_snap>( + this); + m_remote_image_ctx->operations->snap_remove(cls::rbd::UserSnapshotNamespace(), + snap_name.c_str(), + ctx); +} + +template <typename I> +void SyncPointPruneRequest<I>::handle_remove_snap(int r) { + dout(20) << ": r=" << r << dendl; + + ceph_assert(!m_snap_names.empty()); + std::string snap_name = m_snap_names.front(); + m_snap_names.pop_front(); + + if (r == -ENOENT) { + r = 0; + } + if (r < 0) { + derr << ": failed to remove snapshot '" << snap_name << "': " + << cpp_strerror(r) << dendl; + finish(r); + return; + } + + send_remove_snap(); +} + +template <typename I> +void SyncPointPruneRequest<I>::send_refresh_image() { + dout(20) << dendl; + + Context *ctx = create_context_callback< + SyncPointPruneRequest<I>, &SyncPointPruneRequest<I>::handle_refresh_image>( + this); + m_remote_image_ctx->state->refresh(ctx); +} + +template <typename I> +void SyncPointPruneRequest<I>::handle_refresh_image(int r) { + dout(20) << ": r=" << r << dendl; + + if (r < 0) { + derr << ": remote image refresh failed: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + send_update_client(); +} + +template <typename I> +void SyncPointPruneRequest<I>::send_update_client() { + dout(20) << dendl; + + if (m_sync_complete) { + m_client_meta_copy.sync_points.pop_front(); + if (m_client_meta_copy.sync_points.empty()) { + m_client_meta_copy.state = librbd::journal::MIRROR_PEER_STATE_REPLAYING; + } + } else { + while (m_client_meta_copy.sync_points.size() > 1) { + m_client_meta_copy.sync_points.pop_back(); + } + if (m_invalid_master_sync_point) { + // all subsequent sync points would have been pruned + m_client_meta_copy.sync_points.clear(); + } + } + + bufferlist client_data_bl; + librbd::journal::ClientData client_data(m_client_meta_copy); + encode(client_data, client_data_bl); + + Context *ctx = create_context_callback< + SyncPointPruneRequest<I>, &SyncPointPruneRequest<I>::handle_update_client>( + this); + m_journaler->update_client(client_data_bl, ctx); +} + +template <typename I> +void SyncPointPruneRequest<I>::handle_update_client(int r) { + dout(20) << ": r=" << r << dendl; + + if (r < 0) { + derr << ": failed to update client data: " << cpp_strerror(r) + << dendl; + finish(r); + return; + } + + // update provided meta structure to reflect reality + *m_client_meta = m_client_meta_copy; + finish(0); +} + +template <typename I> +void SyncPointPruneRequest<I>::finish(int r) { + dout(20) << ": r=" << r << dendl; + + m_on_finish->complete(r); + delete this; +} + +} // namespace image_sync +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_sync::SyncPointPruneRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_sync/SyncPointPruneRequest.h b/src/tools/rbd_mirror/image_sync/SyncPointPruneRequest.h new file mode 100644 index 00000000..65e13ef5 --- /dev/null +++ b/src/tools/rbd_mirror/image_sync/SyncPointPruneRequest.h @@ -0,0 +1,96 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_SYNC_SYNC_POINT_PRUNE_REQUEST_H +#define RBD_MIRROR_IMAGE_SYNC_SYNC_POINT_PRUNE_REQUEST_H + +#include "librbd/journal/Types.h" +#include "librbd/journal/TypeTraits.h" +#include <list> +#include <string> + +class Context; +namespace journal { class Journaler; } +namespace librbd { class ImageCtx; } +namespace librbd { namespace journal { struct MirrorPeerClientMeta; } } + +namespace rbd { +namespace mirror { +namespace image_sync { + +template <typename ImageCtxT = librbd::ImageCtx> +class SyncPointPruneRequest { +public: + typedef librbd::journal::TypeTraits<ImageCtxT> TypeTraits; + typedef typename TypeTraits::Journaler Journaler; + typedef librbd::journal::MirrorPeerClientMeta MirrorPeerClientMeta; + typedef librbd::journal::MirrorPeerSyncPoint MirrorPeerSyncPoint; + + static SyncPointPruneRequest* create(ImageCtxT *remote_image_ctx, + bool sync_complete, + Journaler *journaler, + MirrorPeerClientMeta *client_meta, + Context *on_finish) { + return new SyncPointPruneRequest(remote_image_ctx, sync_complete, journaler, + client_meta, on_finish); + } + + SyncPointPruneRequest(ImageCtxT *remote_image_ctx, bool sync_complete, + Journaler *journaler, MirrorPeerClientMeta *client_meta, + Context *on_finish); + + void send(); + +private: + /** + * @verbatim + * + * <start> + * | + * | . . . . . + * | . . + * v v . (repeat if from snap + * REMOVE_SNAP . . . unused by other sync) + * | + * v + * REFRESH_IMAGE + * | + * v + * UPDATE_CLIENT + * | + * v + * <finish> + * + * @endverbatim + */ + + ImageCtxT *m_remote_image_ctx; + bool m_sync_complete; + Journaler *m_journaler; + MirrorPeerClientMeta *m_client_meta; + Context *m_on_finish; + + MirrorPeerClientMeta m_client_meta_copy; + std::list<std::string> m_snap_names; + + bool m_invalid_master_sync_point = false; + + void send_remove_snap(); + void handle_remove_snap(int r); + + void send_refresh_image(); + void handle_refresh_image(int r); + + void send_update_client(); + void handle_update_client(int r); + + void finish(int r); +}; + +} // namespace image_sync +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_sync::SyncPointPruneRequest<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_SYNC_SYNC_POINT_PRUNE_REQUEST_H diff --git a/src/tools/rbd_mirror/instance_watcher/Types.cc b/src/tools/rbd_mirror/instance_watcher/Types.cc new file mode 100644 index 00000000..0e992273 --- /dev/null +++ b/src/tools/rbd_mirror/instance_watcher/Types.cc @@ -0,0 +1,245 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "Types.h" +#include "include/ceph_assert.h" +#include "include/stringify.h" +#include "common/Formatter.h" + +namespace rbd { +namespace mirror { +namespace instance_watcher { + +namespace { + +class EncodePayloadVisitor : public boost::static_visitor<void> { +public: + explicit EncodePayloadVisitor(bufferlist &bl) : m_bl(bl) {} + + template <typename Payload> + inline void operator()(const Payload &payload) const { + using ceph::encode; + encode(static_cast<uint32_t>(Payload::NOTIFY_OP), m_bl); + payload.encode(m_bl); + } + +private: + bufferlist &m_bl; +}; + +class DecodePayloadVisitor : public boost::static_visitor<void> { +public: + DecodePayloadVisitor(__u8 version, bufferlist::const_iterator &iter) + : m_version(version), m_iter(iter) {} + + template <typename Payload> + inline void operator()(Payload &payload) const { + payload.decode(m_version, m_iter); + } + +private: + __u8 m_version; + bufferlist::const_iterator &m_iter; +}; + +class DumpPayloadVisitor : public boost::static_visitor<void> { +public: + explicit DumpPayloadVisitor(Formatter *formatter) : m_formatter(formatter) {} + + template <typename Payload> + inline void operator()(const Payload &payload) const { + NotifyOp notify_op = Payload::NOTIFY_OP; + m_formatter->dump_string("notify_op", stringify(notify_op)); + payload.dump(m_formatter); + } + +private: + ceph::Formatter *m_formatter; +}; + +} // anonymous namespace + +void PayloadBase::encode(bufferlist &bl) const { + using ceph::encode; + encode(request_id, bl); +} + +void PayloadBase::decode(__u8 version, bufferlist::const_iterator &iter) { + using ceph::decode; + decode(request_id, iter); +} + +void PayloadBase::dump(Formatter *f) const { + f->dump_unsigned("request_id", request_id); +} + +void ImagePayloadBase::encode(bufferlist &bl) const { + using ceph::encode; + PayloadBase::encode(bl); + encode(global_image_id, bl); +} + +void ImagePayloadBase::decode(__u8 version, bufferlist::const_iterator &iter) { + using ceph::decode; + PayloadBase::decode(version, iter); + decode(global_image_id, iter); +} + +void ImagePayloadBase::dump(Formatter *f) const { + PayloadBase::dump(f); + f->dump_string("global_image_id", global_image_id); +} + +void PeerImageRemovedPayload::encode(bufferlist &bl) const { + using ceph::encode; + PayloadBase::encode(bl); + encode(global_image_id, bl); + encode(peer_mirror_uuid, bl); +} + +void PeerImageRemovedPayload::decode(__u8 version, bufferlist::const_iterator &iter) { + using ceph::decode; + PayloadBase::decode(version, iter); + decode(global_image_id, iter); + decode(peer_mirror_uuid, iter); +} + +void PeerImageRemovedPayload::dump(Formatter *f) const { + PayloadBase::dump(f); + f->dump_string("global_image_id", global_image_id); + f->dump_string("peer_mirror_uuid", peer_mirror_uuid); +} + +void SyncPayloadBase::encode(bufferlist &bl) const { + using ceph::encode; + PayloadBase::encode(bl); + encode(sync_id, bl); +} + +void SyncPayloadBase::decode(__u8 version, bufferlist::const_iterator &iter) { + using ceph::decode; + PayloadBase::decode(version, iter); + decode(sync_id, iter); +} + +void SyncPayloadBase::dump(Formatter *f) const { + PayloadBase::dump(f); + f->dump_string("sync_id", sync_id); +} + +void UnknownPayload::encode(bufferlist &bl) const { + ceph_abort(); +} + +void UnknownPayload::decode(__u8 version, bufferlist::const_iterator &iter) { +} + +void UnknownPayload::dump(Formatter *f) const { +} + +void NotifyMessage::encode(bufferlist& bl) const { + ENCODE_START(2, 2, bl); + boost::apply_visitor(EncodePayloadVisitor(bl), payload); + ENCODE_FINISH(bl); +} + +void NotifyMessage::decode(bufferlist::const_iterator& iter) { + DECODE_START(2, iter); + + uint32_t notify_op; + decode(notify_op, iter); + + // select the correct payload variant based upon the encoded op + switch (notify_op) { + case NOTIFY_OP_IMAGE_ACQUIRE: + payload = ImageAcquirePayload(); + break; + case NOTIFY_OP_IMAGE_RELEASE: + payload = ImageReleasePayload(); + break; + case NOTIFY_OP_PEER_IMAGE_REMOVED: + payload = PeerImageRemovedPayload(); + break; + case NOTIFY_OP_SYNC_REQUEST: + payload = SyncRequestPayload(); + break; + case NOTIFY_OP_SYNC_START: + payload = SyncStartPayload(); + break; + default: + payload = UnknownPayload(); + break; + } + + apply_visitor(DecodePayloadVisitor(struct_v, iter), payload); + DECODE_FINISH(iter); +} + +void NotifyMessage::dump(Formatter *f) const { + apply_visitor(DumpPayloadVisitor(f), payload); +} + +void NotifyMessage::generate_test_instances(std::list<NotifyMessage *> &o) { + o.push_back(new NotifyMessage(ImageAcquirePayload())); + o.push_back(new NotifyMessage(ImageAcquirePayload(1, "gid"))); + + o.push_back(new NotifyMessage(ImageReleasePayload())); + o.push_back(new NotifyMessage(ImageReleasePayload(1, "gid"))); + + o.push_back(new NotifyMessage(PeerImageRemovedPayload())); + o.push_back(new NotifyMessage(PeerImageRemovedPayload(1, "gid", "uuid"))); + + o.push_back(new NotifyMessage(SyncRequestPayload())); + o.push_back(new NotifyMessage(SyncRequestPayload(1, "sync_id"))); + + o.push_back(new NotifyMessage(SyncStartPayload())); + o.push_back(new NotifyMessage(SyncStartPayload(1, "sync_id"))); +} + +std::ostream &operator<<(std::ostream &out, const NotifyOp &op) { + switch (op) { + case NOTIFY_OP_IMAGE_ACQUIRE: + out << "ImageAcquire"; + break; + case NOTIFY_OP_IMAGE_RELEASE: + out << "ImageRelease"; + break; + case NOTIFY_OP_PEER_IMAGE_REMOVED: + out << "PeerImageRemoved"; + break; + case NOTIFY_OP_SYNC_REQUEST: + out << "SyncRequest"; + break; + case NOTIFY_OP_SYNC_START: + out << "SyncStart"; + break; + default: + out << "Unknown (" << static_cast<uint32_t>(op) << ")"; + break; + } + return out; +} + +void NotifyAckPayload::encode(bufferlist &bl) const { + using ceph::encode; + encode(instance_id, bl); + encode(request_id, bl); + encode(ret_val, bl); +} + +void NotifyAckPayload::decode(bufferlist::const_iterator &iter) { + using ceph::decode; + decode(instance_id, iter); + decode(request_id, iter); + decode(ret_val, iter); +} + +void NotifyAckPayload::dump(Formatter *f) const { + f->dump_string("instance_id", instance_id); + f->dump_unsigned("request_id", request_id); + f->dump_int("request_id", ret_val); +} + +} // namespace instance_watcher +} // namespace mirror +} // namespace rbd diff --git a/src/tools/rbd_mirror/instance_watcher/Types.h b/src/tools/rbd_mirror/instance_watcher/Types.h new file mode 100644 index 00000000..b0b7b779 --- /dev/null +++ b/src/tools/rbd_mirror/instance_watcher/Types.h @@ -0,0 +1,197 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_INSTANCE_WATCHER_TYPES_H +#define RBD_MIRROR_INSTANCE_WATCHER_TYPES_H + +#include <string> +#include <set> +#include <boost/variant.hpp> + +#include "include/buffer_fwd.h" +#include "include/encoding.h" +#include "include/int_types.h" + +namespace ceph { class Formatter; } + +namespace rbd { +namespace mirror { +namespace instance_watcher { + +enum NotifyOp { + NOTIFY_OP_IMAGE_ACQUIRE = 0, + NOTIFY_OP_IMAGE_RELEASE = 1, + NOTIFY_OP_PEER_IMAGE_REMOVED = 2, + NOTIFY_OP_SYNC_REQUEST = 3, + NOTIFY_OP_SYNC_START = 4 +}; + +struct PayloadBase { + uint64_t request_id; + + PayloadBase() : request_id(0) { + } + + PayloadBase(uint64_t request_id) : request_id(request_id) { + } + + void encode(bufferlist &bl) const; + void decode(__u8 version, bufferlist::const_iterator &iter); + void dump(Formatter *f) const; +}; + +struct ImagePayloadBase : public PayloadBase { + std::string global_image_id; + + ImagePayloadBase() : PayloadBase() { + } + + ImagePayloadBase(uint64_t request_id, const std::string &global_image_id) + : PayloadBase(request_id), global_image_id(global_image_id) { + } + + void encode(bufferlist &bl) const; + void decode(__u8 version, bufferlist::const_iterator &iter); + void dump(Formatter *f) const; +}; + +struct ImageAcquirePayload : public ImagePayloadBase { + static const NotifyOp NOTIFY_OP = NOTIFY_OP_IMAGE_ACQUIRE; + + ImageAcquirePayload() { + } + ImageAcquirePayload(uint64_t request_id, const std::string &global_image_id) + : ImagePayloadBase(request_id, global_image_id) { + } +}; + +struct ImageReleasePayload : public ImagePayloadBase { + static const NotifyOp NOTIFY_OP = NOTIFY_OP_IMAGE_RELEASE; + + ImageReleasePayload() { + } + ImageReleasePayload(uint64_t request_id, const std::string &global_image_id) + : ImagePayloadBase(request_id, global_image_id) { + } +}; + +struct PeerImageRemovedPayload : public PayloadBase { + static const NotifyOp NOTIFY_OP = NOTIFY_OP_PEER_IMAGE_REMOVED; + + std::string global_image_id; + std::string peer_mirror_uuid; + + PeerImageRemovedPayload() { + } + PeerImageRemovedPayload(uint64_t request_id, + const std::string& global_image_id, + const std::string& peer_mirror_uuid) + : PayloadBase(request_id), + global_image_id(global_image_id), peer_mirror_uuid(peer_mirror_uuid) { + } + + void encode(bufferlist &bl) const; + void decode(__u8 version, bufferlist::const_iterator &iter); + void dump(Formatter *f) const; +}; + +struct SyncPayloadBase : public PayloadBase { + std::string sync_id; + + SyncPayloadBase() : PayloadBase() { + } + + SyncPayloadBase(uint64_t request_id, const std::string &sync_id) + : PayloadBase(request_id), sync_id(sync_id) { + } + + void encode(bufferlist &bl) const; + void decode(__u8 version, bufferlist::const_iterator &iter); + void dump(Formatter *f) const; +}; + +struct SyncRequestPayload : public SyncPayloadBase { + static const NotifyOp NOTIFY_OP = NOTIFY_OP_SYNC_REQUEST; + + SyncRequestPayload() : SyncPayloadBase() { + } + + SyncRequestPayload(uint64_t request_id, const std::string &sync_id) + : SyncPayloadBase(request_id, sync_id) { + } +}; + +struct SyncStartPayload : public SyncPayloadBase { + static const NotifyOp NOTIFY_OP = NOTIFY_OP_SYNC_START; + + SyncStartPayload() : SyncPayloadBase() { + } + + SyncStartPayload(uint64_t request_id, const std::string &sync_id) + : SyncPayloadBase(request_id, sync_id) { + } +}; + +struct UnknownPayload { + static const NotifyOp NOTIFY_OP = static_cast<NotifyOp>(-1); + + UnknownPayload() { + } + + void encode(bufferlist &bl) const; + void decode(__u8 version, bufferlist::const_iterator &iter); + void dump(Formatter *f) const; +}; + +typedef boost::variant<ImageAcquirePayload, + ImageReleasePayload, + PeerImageRemovedPayload, + SyncRequestPayload, + SyncStartPayload, + UnknownPayload> Payload; + +struct NotifyMessage { + NotifyMessage(const Payload &payload = UnknownPayload()) : payload(payload) { + } + + Payload payload; + + void encode(bufferlist& bl) const; + void decode(bufferlist::const_iterator& it); + void dump(Formatter *f) const; + + static void generate_test_instances(std::list<NotifyMessage *> &o); +}; + +WRITE_CLASS_ENCODER(NotifyMessage); + +std::ostream &operator<<(std::ostream &out, const NotifyOp &op); + +struct NotifyAckPayload { + std::string instance_id; + uint64_t request_id; + int ret_val; + + NotifyAckPayload() : request_id(0), ret_val(0) { + } + + NotifyAckPayload(const std::string &instance_id, uint64_t request_id, + int ret_val) + : instance_id(instance_id), request_id(request_id), ret_val(ret_val) { + } + + void encode(bufferlist &bl) const; + void decode(bufferlist::const_iterator& it); + void dump(Formatter *f) const; +}; + +WRITE_CLASS_ENCODER(NotifyAckPayload); + +} // namespace instance_watcher +} // namespace mirror +} // namespace librbd + +using rbd::mirror::instance_watcher::encode; +using rbd::mirror::instance_watcher::decode; + +#endif // RBD_MIRROR_INSTANCE_WATCHER_TYPES_H diff --git a/src/tools/rbd_mirror/instances/Types.h b/src/tools/rbd_mirror/instances/Types.h new file mode 100644 index 00000000..8b0a68fc --- /dev/null +++ b/src/tools/rbd_mirror/instances/Types.h @@ -0,0 +1,28 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_INSTANCES_TYPES_H +#define CEPH_RBD_MIRROR_INSTANCES_TYPES_H + +#include <string> +#include <vector> + +namespace rbd { +namespace mirror { +namespace instances { + +struct Listener { + typedef std::vector<std::string> InstanceIds; + + virtual ~Listener() { + } + + virtual void handle_added(const InstanceIds& instance_ids) = 0; + virtual void handle_removed(const InstanceIds& instance_ids) = 0; +}; + +} // namespace instances +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_INSTANCES_TYPES_H diff --git a/src/tools/rbd_mirror/leader_watcher/Types.cc b/src/tools/rbd_mirror/leader_watcher/Types.cc new file mode 100644 index 00000000..d2fb7908 --- /dev/null +++ b/src/tools/rbd_mirror/leader_watcher/Types.cc @@ -0,0 +1,161 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "Types.h" +#include "include/ceph_assert.h" +#include "include/stringify.h" +#include "common/Formatter.h" + +namespace rbd { +namespace mirror { +namespace leader_watcher { + +namespace { + +class EncodePayloadVisitor : public boost::static_visitor<void> { +public: + explicit EncodePayloadVisitor(bufferlist &bl) : m_bl(bl) {} + + template <typename Payload> + inline void operator()(const Payload &payload) const { + using ceph::encode; + encode(static_cast<uint32_t>(Payload::NOTIFY_OP), m_bl); + payload.encode(m_bl); + } + +private: + bufferlist &m_bl; +}; + +class DecodePayloadVisitor : public boost::static_visitor<void> { +public: + DecodePayloadVisitor(__u8 version, bufferlist::const_iterator &iter) + : m_version(version), m_iter(iter) {} + + template <typename Payload> + inline void operator()(Payload &payload) const { + payload.decode(m_version, m_iter); + } + +private: + __u8 m_version; + bufferlist::const_iterator &m_iter; +}; + +class DumpPayloadVisitor : public boost::static_visitor<void> { +public: + explicit DumpPayloadVisitor(Formatter *formatter) : m_formatter(formatter) {} + + template <typename Payload> + inline void operator()(const Payload &payload) const { + NotifyOp notify_op = Payload::NOTIFY_OP; + m_formatter->dump_string("notify_op", stringify(notify_op)); + payload.dump(m_formatter); + } + +private: + ceph::Formatter *m_formatter; +}; + +} // anonymous namespace + +void HeartbeatPayload::encode(bufferlist &bl) const { +} + +void HeartbeatPayload::decode(__u8 version, bufferlist::const_iterator &iter) { +} + +void HeartbeatPayload::dump(Formatter *f) const { +} + +void LockAcquiredPayload::encode(bufferlist &bl) const { +} + +void LockAcquiredPayload::decode(__u8 version, bufferlist::const_iterator &iter) { +} + +void LockAcquiredPayload::dump(Formatter *f) const { +} + +void LockReleasedPayload::encode(bufferlist &bl) const { +} + +void LockReleasedPayload::decode(__u8 version, bufferlist::const_iterator &iter) { +} + +void LockReleasedPayload::dump(Formatter *f) const { +} + +void UnknownPayload::encode(bufferlist &bl) const { + ceph_abort(); +} + +void UnknownPayload::decode(__u8 version, bufferlist::const_iterator &iter) { +} + +void UnknownPayload::dump(Formatter *f) const { +} + +void NotifyMessage::encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + boost::apply_visitor(EncodePayloadVisitor(bl), payload); + ENCODE_FINISH(bl); +} + +void NotifyMessage::decode(bufferlist::const_iterator& iter) { + DECODE_START(1, iter); + + uint32_t notify_op; + decode(notify_op, iter); + + // select the correct payload variant based upon the encoded op + switch (notify_op) { + case NOTIFY_OP_HEARTBEAT: + payload = HeartbeatPayload(); + break; + case NOTIFY_OP_LOCK_ACQUIRED: + payload = LockAcquiredPayload(); + break; + case NOTIFY_OP_LOCK_RELEASED: + payload = LockReleasedPayload(); + break; + default: + payload = UnknownPayload(); + break; + } + + apply_visitor(DecodePayloadVisitor(struct_v, iter), payload); + DECODE_FINISH(iter); +} + +void NotifyMessage::dump(Formatter *f) const { + apply_visitor(DumpPayloadVisitor(f), payload); +} + +void NotifyMessage::generate_test_instances(std::list<NotifyMessage *> &o) { + o.push_back(new NotifyMessage(HeartbeatPayload())); + o.push_back(new NotifyMessage(LockAcquiredPayload())); + o.push_back(new NotifyMessage(LockReleasedPayload())); +} + +std::ostream &operator<<(std::ostream &out, const NotifyOp &op) { + switch (op) { + case NOTIFY_OP_HEARTBEAT: + out << "Heartbeat"; + break; + case NOTIFY_OP_LOCK_ACQUIRED: + out << "LockAcquired"; + break; + case NOTIFY_OP_LOCK_RELEASED: + out << "LockReleased"; + break; + default: + out << "Unknown (" << static_cast<uint32_t>(op) << ")"; + break; + } + return out; +} + +} // namespace leader_watcher +} // namespace mirror +} // namespace librbd diff --git a/src/tools/rbd_mirror/leader_watcher/Types.h b/src/tools/rbd_mirror/leader_watcher/Types.h new file mode 100644 index 00000000..1278e54b --- /dev/null +++ b/src/tools/rbd_mirror/leader_watcher/Types.h @@ -0,0 +1,117 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_LEADER_WATCHER_TYPES_H +#define RBD_MIRROR_LEADER_WATCHER_TYPES_H + +#include "include/int_types.h" +#include "include/buffer_fwd.h" +#include "include/encoding.h" +#include <string> +#include <vector> +#include <boost/variant.hpp> + +struct Context; + +namespace ceph { class Formatter; } + +namespace rbd { +namespace mirror { +namespace leader_watcher { + +struct Listener { + typedef std::vector<std::string> InstanceIds; + + virtual ~Listener() { + } + + virtual void post_acquire_handler(Context *on_finish) = 0; + virtual void pre_release_handler(Context *on_finish) = 0; + + virtual void update_leader_handler( + const std::string &leader_instance_id) = 0; + + virtual void handle_instances_added(const InstanceIds& instance_ids) = 0; + virtual void handle_instances_removed(const InstanceIds& instance_ids) = 0; +}; + +enum NotifyOp { + NOTIFY_OP_HEARTBEAT = 0, + NOTIFY_OP_LOCK_ACQUIRED = 1, + NOTIFY_OP_LOCK_RELEASED = 2, +}; + +struct HeartbeatPayload { + static const NotifyOp NOTIFY_OP = NOTIFY_OP_HEARTBEAT; + + HeartbeatPayload() { + } + + void encode(bufferlist &bl) const; + void decode(__u8 version, bufferlist::const_iterator &iter); + void dump(Formatter *f) const; +}; + +struct LockAcquiredPayload { + static const NotifyOp NOTIFY_OP = NOTIFY_OP_LOCK_ACQUIRED; + + LockAcquiredPayload() { + } + + void encode(bufferlist &bl) const; + void decode(__u8 version, bufferlist::const_iterator &iter); + void dump(Formatter *f) const; +}; + +struct LockReleasedPayload { + static const NotifyOp NOTIFY_OP = NOTIFY_OP_LOCK_RELEASED; + + LockReleasedPayload() { + } + + void encode(bufferlist &bl) const; + void decode(__u8 version, bufferlist::const_iterator &iter); + void dump(Formatter *f) const; +}; + +struct UnknownPayload { + static const NotifyOp NOTIFY_OP = static_cast<NotifyOp>(-1); + + UnknownPayload() { + } + + void encode(bufferlist &bl) const; + void decode(__u8 version, bufferlist::const_iterator &iter); + void dump(Formatter *f) const; +}; + +typedef boost::variant<HeartbeatPayload, + LockAcquiredPayload, + LockReleasedPayload, + UnknownPayload> Payload; + +struct NotifyMessage { + NotifyMessage(const Payload &payload = UnknownPayload()) : payload(payload) { + } + + Payload payload; + + void encode(bufferlist& bl) const; + void decode(bufferlist::const_iterator& it); + void dump(Formatter *f) const; + + static void generate_test_instances(std::list<NotifyMessage *> &o); +}; + +WRITE_CLASS_ENCODER(NotifyMessage); + +std::ostream &operator<<(std::ostream &out, const NotifyOp &op); + +} // namespace leader_watcher +} // namespace mirror +} // namespace librbd + +using rbd::mirror::leader_watcher::encode; +using rbd::mirror::leader_watcher::decode; + +#endif // RBD_MIRROR_LEADER_WATCHER_TYPES_H diff --git a/src/tools/rbd_mirror/main.cc b/src/tools/rbd_mirror/main.cc new file mode 100644 index 00000000..ab350a01 --- /dev/null +++ b/src/tools/rbd_mirror/main.cc @@ -0,0 +1,104 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "common/ceph_argparse.h" +#include "common/config.h" +#include "common/debug.h" +#include "common/errno.h" +#include "common/perf_counters.h" +#include "global/global_init.h" +#include "global/signal_handler.h" +#include "Mirror.h" +#include "Types.h" + +#include <vector> + +rbd::mirror::Mirror *mirror = nullptr; +PerfCounters *g_perf_counters = nullptr; + +void usage() { + std::cout << "usage: rbd-mirror [options...]" << std::endl; + std::cout << "options:\n"; + std::cout << " -m monaddress[:port] connect to specified monitor\n"; + std::cout << " --keyring=<path> path to keyring for local cluster\n"; + std::cout << " --log-file=<logfile> file to log debug output\n"; + std::cout << " --debug-rbd-mirror=<log-level>/<memory-level> set rbd-mirror debug level\n"; + generic_server_usage(); +} + +static void handle_signal(int signum) +{ + if (mirror) + mirror->handle_signal(signum); +} + +int main(int argc, const char **argv) +{ + std::vector<const char*> args; + argv_to_vec(argc, argv, args); + if (args.empty()) { + cerr << argv[0] << ": -h or --help for usage" << std::endl; + exit(1); + } + if (ceph_argparse_need_usage(args)) { + usage(); + exit(0); + } + + auto cct = global_init(nullptr, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_DAEMON, + CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS); + + if (g_conf()->daemonize) { + global_init_daemonize(g_ceph_context); + } + + common_init_finish(g_ceph_context); + + init_async_signal_handler(); + register_async_signal_handler(SIGHUP, handle_signal); + register_async_signal_handler_oneshot(SIGINT, handle_signal); + register_async_signal_handler_oneshot(SIGTERM, handle_signal); + + std::vector<const char*> cmd_args; + argv_to_vec(argc, argv, cmd_args); + + // disable unnecessary librbd cache + g_ceph_context->_conf.set_val_or_die("rbd_cache", "false"); + + auto prio = + g_ceph_context->_conf.get_val<int64_t>("rbd_mirror_perf_stats_prio"); + PerfCountersBuilder plb(g_ceph_context, "rbd_mirror", + rbd::mirror::l_rbd_mirror_first, + rbd::mirror::l_rbd_mirror_last); + plb.add_u64_counter(rbd::mirror::l_rbd_mirror_replay, "replay", "Replays", + "r", prio); + plb.add_u64_counter(rbd::mirror::l_rbd_mirror_replay_bytes, "replay_bytes", + "Replayed data", "rb", prio, unit_t(UNIT_BYTES)); + plb.add_time_avg(rbd::mirror::l_rbd_mirror_replay_latency, "replay_latency", + "Replay latency", "rl", prio); + g_perf_counters = plb.create_perf_counters(); + g_ceph_context->get_perfcounters_collection()->add(g_perf_counters); + + mirror = new rbd::mirror::Mirror(g_ceph_context, cmd_args); + int r = mirror->init(); + if (r < 0) { + std::cerr << "failed to initialize: " << cpp_strerror(r) << std::endl; + goto cleanup; + } + + mirror->run(); + + cleanup: + unregister_async_signal_handler(SIGHUP, handle_signal); + unregister_async_signal_handler(SIGINT, handle_signal); + unregister_async_signal_handler(SIGTERM, handle_signal); + shutdown_async_signal_handler(); + + g_ceph_context->get_perfcounters_collection()->remove(g_perf_counters); + + delete mirror; + delete g_perf_counters; + + return r < 0 ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/src/tools/rbd_mirror/pool_watcher/RefreshImagesRequest.cc b/src/tools/rbd_mirror/pool_watcher/RefreshImagesRequest.cc new file mode 100644 index 00000000..a1d9c1b5 --- /dev/null +++ b/src/tools/rbd_mirror/pool_watcher/RefreshImagesRequest.cc @@ -0,0 +1,89 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/pool_watcher/RefreshImagesRequest.h" +#include "common/debug.h" +#include "common/errno.h" +#include "cls/rbd/cls_rbd_client.h" +#include "librbd/Utils.h" +#include <map> + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::pool_watcher::RefreshImagesRequest " \ + << this << " " << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace pool_watcher { + +static const uint32_t MAX_RETURN = 1024; + +using librbd::util::create_rados_callback; + +template <typename I> +void RefreshImagesRequest<I>::send() { + m_image_ids->clear(); + mirror_image_list(); +} + +template <typename I> +void RefreshImagesRequest<I>::mirror_image_list() { + dout(10) << dendl; + + librados::ObjectReadOperation op; + librbd::cls_client::mirror_image_list_start(&op, m_start_after, MAX_RETURN); + + m_out_bl.clear(); + librados::AioCompletion *aio_comp = create_rados_callback< + RefreshImagesRequest<I>, + &RefreshImagesRequest<I>::handle_mirror_image_list>(this); + int r = m_remote_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op, &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void RefreshImagesRequest<I>::handle_mirror_image_list(int r) { + dout(10) << "r=" << r << dendl; + + std::map<std::string, std::string> ids; + if (r == 0) { + auto it = m_out_bl.cbegin(); + r = librbd::cls_client::mirror_image_list_finish(&it, &ids); + } + + if (r < 0 && r != -ENOENT) { + derr << "failed to list mirrored images: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + // store as global -> local image ids + for (auto &id : ids) { + m_image_ids->emplace(id.second, id.first); + } + + if (ids.size() == MAX_RETURN) { + m_start_after = ids.rbegin()->first; + mirror_image_list(); + return; + } + + finish(0); +} + +template <typename I> +void RefreshImagesRequest<I>::finish(int r) { + dout(10) << "r=" << r << dendl; + + m_on_finish->complete(r); + delete this; +} + +} // namespace pool_watcher +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::pool_watcher::RefreshImagesRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/pool_watcher/RefreshImagesRequest.h b/src/tools/rbd_mirror/pool_watcher/RefreshImagesRequest.h new file mode 100644 index 00000000..8bfeabe2 --- /dev/null +++ b/src/tools/rbd_mirror/pool_watcher/RefreshImagesRequest.h @@ -0,0 +1,73 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_POOL_WATCHER_REFRESH_IMAGES_REQUEST_H +#define CEPH_RBD_MIRROR_POOL_WATCHER_REFRESH_IMAGES_REQUEST_H + +#include "include/buffer.h" +#include "include/rados/librados.hpp" +#include "tools/rbd_mirror/Types.h" +#include <string> + +struct Context; + +namespace librbd { struct ImageCtx; } + +namespace rbd { +namespace mirror { +namespace pool_watcher { + +template <typename ImageCtxT = librbd::ImageCtx> +class RefreshImagesRequest { +public: + static RefreshImagesRequest *create(librados::IoCtx &remote_io_ctx, + ImageIds *image_ids, Context *on_finish) { + return new RefreshImagesRequest(remote_io_ctx, image_ids, on_finish); + } + + RefreshImagesRequest(librados::IoCtx &remote_io_ctx, ImageIds *image_ids, + Context *on_finish) + : m_remote_io_ctx(remote_io_ctx), m_image_ids(image_ids), + m_on_finish(on_finish) { + } + + void send(); + +private: + /** + * @verbatim + * + * <start> + * | + * | /-------------\ + * | | | + * v v | (more images) + * MIRROR_IMAGE_LIST ---/ + * | + * v + * <finish> + * + * @endverbatim + */ + + librados::IoCtx &m_remote_io_ctx; + ImageIds *m_image_ids; + Context *m_on_finish; + + bufferlist m_out_bl; + std::string m_start_after; + + void mirror_image_list(); + void handle_mirror_image_list(int r); + + void finish(int r); + +}; + +} // namespace pool_watcher +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::pool_watcher::RefreshImagesRequest<librbd::ImageCtx>; + +#endif // CEPH_RBD_MIRROR_POOL_WATCHER_REFRESH_IMAGES_REQUEST_H diff --git a/src/tools/rbd_mirror/pool_watcher/Types.h b/src/tools/rbd_mirror/pool_watcher/Types.h new file mode 100644 index 00000000..52dfc342 --- /dev/null +++ b/src/tools/rbd_mirror/pool_watcher/Types.h @@ -0,0 +1,27 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_POOL_WATCHER_TYPES_H +#define CEPH_RBD_MIRROR_POOL_WATCHER_TYPES_H + +#include "tools/rbd_mirror/Types.h" +#include <string> + +namespace rbd { +namespace mirror { +namespace pool_watcher { + +struct Listener { + virtual ~Listener() { + } + + virtual void handle_update(const std::string &mirror_uuid, + ImageIds &&added_image_ids, + ImageIds &&removed_image_ids) = 0; +}; + +} // namespace pool_watcher +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_POOL_WATCHER_TYPES_H diff --git a/src/tools/rbd_mirror/service_daemon/Types.cc b/src/tools/rbd_mirror/service_daemon/Types.cc new file mode 100644 index 00000000..7dc6537c --- /dev/null +++ b/src/tools/rbd_mirror/service_daemon/Types.cc @@ -0,0 +1,29 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/service_daemon/Types.h" +#include <iostream> + +namespace rbd { +namespace mirror { +namespace service_daemon { + +std::ostream& operator<<(std::ostream& os, const CalloutLevel& callout_level) { + switch (callout_level) { + case CALLOUT_LEVEL_INFO: + os << "info"; + break; + case CALLOUT_LEVEL_WARNING: + os << "warning"; + break; + case CALLOUT_LEVEL_ERROR: + os << "error"; + break; + } + return os; +} + +} // namespace service_daemon +} // namespace mirror +} // namespace rbd + diff --git a/src/tools/rbd_mirror/service_daemon/Types.h b/src/tools/rbd_mirror/service_daemon/Types.h new file mode 100644 index 00000000..3aab7201 --- /dev/null +++ b/src/tools/rbd_mirror/service_daemon/Types.h @@ -0,0 +1,33 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_SERVICE_DAEMON_TYPES_H +#define CEPH_RBD_MIRROR_SERVICE_DAEMON_TYPES_H + +#include "include/int_types.h" +#include <iosfwd> +#include <string> +#include <boost/variant.hpp> + +namespace rbd { +namespace mirror { +namespace service_daemon { + +typedef uint64_t CalloutId; +const uint64_t CALLOUT_ID_NONE {0}; + +enum CalloutLevel { + CALLOUT_LEVEL_INFO, + CALLOUT_LEVEL_WARNING, + CALLOUT_LEVEL_ERROR +}; + +std::ostream& operator<<(std::ostream& os, const CalloutLevel& callout_level); + +typedef boost::variant<bool, uint64_t, std::string> AttributeValue; + +} // namespace service_daemon +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_SERVICE_DAEMON_TYPES_H |