summaryrefslogtreecommitdiffstats
path: root/src/tools/rbd_mirror
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-27 18:24:20 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-27 18:24:20 +0000
commit483eb2f56657e8e7f419ab1a4fab8dce9ade8609 (patch)
treee5d88d25d870d5dedacb6bbdbe2a966086a0a5cf /src/tools/rbd_mirror
parentInitial commit. (diff)
downloadceph-483eb2f56657e8e7f419ab1a4fab8dce9ade8609.tar.xz
ceph-483eb2f56657e8e7f419ab1a4fab8dce9ade8609.zip
Adding upstream version 14.2.21.upstream/14.2.21upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r--src/tools/rbd_mirror/BaseRequest.h43
-rw-r--r--src/tools/rbd_mirror/CMakeLists.txt69
-rw-r--r--src/tools/rbd_mirror/ClusterWatcher.cc223
-rw-r--r--src/tools/rbd_mirror/ClusterWatcher.h69
-rw-r--r--src/tools/rbd_mirror/ImageDeleter.cc549
-rw-r--r--src/tools/rbd_mirror/ImageDeleter.h180
-rw-r--r--src/tools/rbd_mirror/ImageMap.cc601
-rw-r--r--src/tools/rbd_mirror/ImageMap.h175
-rw-r--r--src/tools/rbd_mirror/ImageReplayer.cc1896
-rw-r--r--src/tools/rbd_mirror/ImageReplayer.h438
-rw-r--r--src/tools/rbd_mirror/ImageSync.cc481
-rw-r--r--src/tools/rbd_mirror/ImageSync.h160
-rw-r--r--src/tools/rbd_mirror/ImageSyncThrottler.cc227
-rw-r--r--src/tools/rbd_mirror/ImageSyncThrottler.h65
-rw-r--r--src/tools/rbd_mirror/InstanceReplayer.cc510
-rw-r--r--src/tools/rbd_mirror/InstanceReplayer.h123
-rw-r--r--src/tools/rbd_mirror/InstanceWatcher.cc1299
-rw-r--r--src/tools/rbd_mirror/InstanceWatcher.h264
-rw-r--r--src/tools/rbd_mirror/Instances.cc359
-rw-r--r--src/tools/rbd_mirror/Instances.h167
-rw-r--r--src/tools/rbd_mirror/LeaderWatcher.cc1145
-rw-r--r--src/tools/rbd_mirror/LeaderWatcher.h320
-rw-r--r--src/tools/rbd_mirror/Mirror.cc448
-rw-r--r--src/tools/rbd_mirror/Mirror.h77
-rw-r--r--src/tools/rbd_mirror/MirrorStatusWatcher.cc74
-rw-r--r--src/tools/rbd_mirror/MirrorStatusWatcher.h39
-rw-r--r--src/tools/rbd_mirror/PoolReplayer.cc1133
-rw-r--r--src/tools/rbd_mirror/PoolReplayer.h303
-rw-r--r--src/tools/rbd_mirror/PoolWatcher.cc553
-rw-r--r--src/tools/rbd_mirror/PoolWatcher.h166
-rw-r--r--src/tools/rbd_mirror/ProgressContext.h21
-rw-r--r--src/tools/rbd_mirror/ServiceDaemon.cc251
-rw-r--r--src/tools/rbd_mirror/ServiceDaemon.h86
-rw-r--r--src/tools/rbd_mirror/Threads.cc45
-rw-r--r--src/tools/rbd_mirror/Threads.h39
-rw-r--r--src/tools/rbd_mirror/Types.cc21
-rw-r--r--src/tools/rbd_mirror/Types.h123
-rw-r--r--src/tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.cc290
-rw-r--r--src/tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.h104
-rw-r--r--src/tools/rbd_mirror/image_deleter/TrashMoveRequest.cc384
-rw-r--r--src/tools/rbd_mirror/image_deleter/TrashMoveRequest.h136
-rw-r--r--src/tools/rbd_mirror/image_deleter/TrashRemoveRequest.cc265
-rw-r--r--src/tools/rbd_mirror/image_deleter/TrashRemoveRequest.h113
-rw-r--r--src/tools/rbd_mirror/image_deleter/TrashWatcher.cc384
-rw-r--r--src/tools/rbd_mirror/image_deleter/TrashWatcher.h139
-rw-r--r--src/tools/rbd_mirror/image_deleter/Types.h54
-rw-r--r--src/tools/rbd_mirror/image_map/LoadRequest.cc98
-rw-r--r--src/tools/rbd_mirror/image_map/LoadRequest.h64
-rw-r--r--src/tools/rbd_mirror/image_map/Policy.cc406
-rw-r--r--src/tools/rbd_mirror/image_map/Policy.h122
-rw-r--r--src/tools/rbd_mirror/image_map/SimplePolicy.cc89
-rw-r--r--src/tools/rbd_mirror/image_map/SimplePolicy.h39
-rw-r--r--src/tools/rbd_mirror/image_map/StateTransition.cc94
-rw-r--r--src/tools/rbd_mirror/image_map/StateTransition.h76
-rw-r--r--src/tools/rbd_mirror/image_map/Types.cc138
-rw-r--r--src/tools/rbd_mirror/image_map/Types.h130
-rw-r--r--src/tools/rbd_mirror/image_map/UpdateRequest.cc100
-rw-r--r--src/tools/rbd_mirror/image_map/UpdateRequest.h65
-rw-r--r--src/tools/rbd_mirror/image_replayer/BootstrapRequest.cc785
-rw-r--r--src/tools/rbd_mirror/image_replayer/BootstrapRequest.h230
-rw-r--r--src/tools/rbd_mirror/image_replayer/CloseImageRequest.cc64
-rw-r--r--src/tools/rbd_mirror/image_replayer/CloseImageRequest.h56
-rw-r--r--src/tools/rbd_mirror/image_replayer/CreateImageRequest.cc506
-rw-r--r--src/tools/rbd_mirror/image_replayer/CreateImageRequest.h154
-rw-r--r--src/tools/rbd_mirror/image_replayer/EventPreprocessor.cc204
-rw-r--r--src/tools/rbd_mirror/image_replayer/EventPreprocessor.h122
-rw-r--r--src/tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.cc85
-rw-r--r--src/tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.h75
-rw-r--r--src/tools/rbd_mirror/image_replayer/IsPrimaryRequest.cc125
-rw-r--r--src/tools/rbd_mirror/image_replayer/IsPrimaryRequest.h67
-rw-r--r--src/tools/rbd_mirror/image_replayer/OpenImageRequest.cc75
-rw-r--r--src/tools/rbd_mirror/image_replayer/OpenImageRequest.h71
-rw-r--r--src/tools/rbd_mirror/image_replayer/OpenLocalImageRequest.cc271
-rw-r--r--src/tools/rbd_mirror/image_replayer/OpenLocalImageRequest.h90
-rw-r--r--src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.cc180
-rw-r--r--src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.h102
-rw-r--r--src/tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.cc195
-rw-r--r--src/tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.h141
-rw-r--r--src/tools/rbd_mirror/image_replayer/ReplayStatusFormatter.cc246
-rw-r--r--src/tools/rbd_mirror/image_replayer/ReplayStatusFormatter.h60
-rw-r--r--src/tools/rbd_mirror/image_replayer/Types.h21
-rw-r--r--src/tools/rbd_mirror/image_replayer/Utils.cc50
-rw-r--r--src/tools/rbd_mirror/image_replayer/Utils.h23
-rw-r--r--src/tools/rbd_mirror/image_sync/SyncPointCreateRequest.cc182
-rw-r--r--src/tools/rbd_mirror/image_sync/SyncPointCreateRequest.h96
-rw-r--r--src/tools/rbd_mirror/image_sync/SyncPointPruneRequest.cc220
-rw-r--r--src/tools/rbd_mirror/image_sync/SyncPointPruneRequest.h96
-rw-r--r--src/tools/rbd_mirror/instance_watcher/Types.cc245
-rw-r--r--src/tools/rbd_mirror/instance_watcher/Types.h197
-rw-r--r--src/tools/rbd_mirror/instances/Types.h28
-rw-r--r--src/tools/rbd_mirror/leader_watcher/Types.cc161
-rw-r--r--src/tools/rbd_mirror/leader_watcher/Types.h117
-rw-r--r--src/tools/rbd_mirror/main.cc104
-rw-r--r--src/tools/rbd_mirror/pool_watcher/RefreshImagesRequest.cc89
-rw-r--r--src/tools/rbd_mirror/pool_watcher/RefreshImagesRequest.h73
-rw-r--r--src/tools/rbd_mirror/pool_watcher/Types.h27
-rw-r--r--src/tools/rbd_mirror/service_daemon/Types.cc29
-rw-r--r--src/tools/rbd_mirror/service_daemon/Types.h33
98 files changed, 21727 insertions, 0 deletions
diff --git a/src/tools/rbd_mirror/BaseRequest.h b/src/tools/rbd_mirror/BaseRequest.h
new file mode 100644
index 00000000..5053eb83
--- /dev/null
+++ b/src/tools/rbd_mirror/BaseRequest.h
@@ -0,0 +1,43 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_BASE_REQUEST_H
+#define CEPH_RBD_MIRROR_BASE_REQUEST_H
+
+#include "common/RefCountedObj.h"
+#include "include/Context.h"
+
+namespace rbd {
+namespace mirror {
+
+class BaseRequest : public RefCountedObject {
+public:
+ BaseRequest(const std::string& name, CephContext *cct, Context *on_finish)
+ : RefCountedObject(cct, 1), m_name(name), m_cct(cct),
+ m_on_finish(on_finish) {
+ }
+
+ virtual void send() = 0;
+ virtual void cancel() {}
+
+protected:
+ virtual void finish(int r) {
+ if (m_cct) {
+ lsubdout(m_cct, rbd_mirror, 20) << m_name << "::finish: r=" << r << dendl;
+ }
+ if (m_on_finish) {
+ m_on_finish->complete(r);
+ }
+ put();
+ }
+
+private:
+ const std::string m_name;
+ CephContext *m_cct;
+ Context *m_on_finish;
+};
+
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_BASE_REQUEST_H
diff --git a/src/tools/rbd_mirror/CMakeLists.txt b/src/tools/rbd_mirror/CMakeLists.txt
new file mode 100644
index 00000000..30106a86
--- /dev/null
+++ b/src/tools/rbd_mirror/CMakeLists.txt
@@ -0,0 +1,69 @@
+add_library(rbd_mirror_types STATIC
+ image_map/Types.cc
+ instance_watcher/Types.cc
+ leader_watcher/Types.cc)
+
+set(rbd_mirror_internal
+ ClusterWatcher.cc
+ ImageDeleter.cc
+ ImageMap.cc
+ ImageReplayer.cc
+ ImageSync.cc
+ ImageSyncThrottler.cc
+ InstanceReplayer.cc
+ InstanceWatcher.cc
+ Instances.cc
+ LeaderWatcher.cc
+ Mirror.cc
+ MirrorStatusWatcher.cc
+ PoolReplayer.cc
+ PoolWatcher.cc
+ ServiceDaemon.cc
+ Threads.cc
+ Types.cc
+ image_deleter/SnapshotPurgeRequest.cc
+ image_deleter/TrashMoveRequest.cc
+ image_deleter/TrashRemoveRequest.cc
+ image_deleter/TrashWatcher.cc
+ image_map/LoadRequest.cc
+ image_map/Policy.cc
+ image_map/SimplePolicy.cc
+ image_map/StateTransition.cc
+ image_map/UpdateRequest.cc
+ image_replayer/BootstrapRequest.cc
+ image_replayer/CloseImageRequest.cc
+ image_replayer/CreateImageRequest.cc
+ image_replayer/EventPreprocessor.cc
+ image_replayer/GetMirrorImageIdRequest.cc
+ image_replayer/IsPrimaryRequest.cc
+ image_replayer/OpenImageRequest.cc
+ image_replayer/OpenLocalImageRequest.cc
+ image_replayer/PrepareLocalImageRequest.cc
+ image_replayer/PrepareRemoteImageRequest.cc
+ image_replayer/ReplayStatusFormatter.cc
+ image_replayer/Utils.cc
+ image_sync/SyncPointCreateRequest.cc
+ image_sync/SyncPointPruneRequest.cc
+ pool_watcher/RefreshImagesRequest.cc
+ service_daemon/Types.cc)
+
+add_library(rbd_mirror_internal STATIC
+ ${rbd_mirror_internal})
+
+add_executable(rbd-mirror
+ main.cc)
+target_link_libraries(rbd-mirror
+ rbd_mirror_internal
+ rbd_mirror_types
+ rbd_api
+ rbd_internal
+ rbd_types
+ journal
+ librados
+ osdc
+ cls_rbd_client
+ cls_lock_client
+ cls_journal_client
+ global
+ ${ALLOC_LIBS})
+install(TARGETS rbd-mirror DESTINATION bin)
diff --git a/src/tools/rbd_mirror/ClusterWatcher.cc b/src/tools/rbd_mirror/ClusterWatcher.cc
new file mode 100644
index 00000000..54329de6
--- /dev/null
+++ b/src/tools/rbd_mirror/ClusterWatcher.cc
@@ -0,0 +1,223 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "ClusterWatcher.h"
+#include "include/stringify.h"
+#include "common/ceph_json.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "librbd/internal.h"
+#include "librbd/api/Mirror.h"
+#include "tools/rbd_mirror/ServiceDaemon.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::ClusterWatcher:" << this << " " \
+ << __func__ << ": "
+
+using std::list;
+using std::map;
+using std::set;
+using std::string;
+using std::unique_ptr;
+using std::vector;
+
+using librados::Rados;
+using librados::IoCtx;
+
+namespace rbd {
+namespace mirror {
+
+ClusterWatcher::ClusterWatcher(RadosRef cluster, Mutex &lock,
+ ServiceDaemon<librbd::ImageCtx>* service_daemon)
+ : m_cluster(cluster), m_lock(lock), m_service_daemon(service_daemon)
+{
+}
+
+const ClusterWatcher::PoolPeers& ClusterWatcher::get_pool_peers() const
+{
+ ceph_assert(m_lock.is_locked());
+ return m_pool_peers;
+}
+
+void ClusterWatcher::refresh_pools()
+{
+ dout(20) << "enter" << dendl;
+
+ PoolPeers pool_peers;
+ read_pool_peers(&pool_peers);
+
+ Mutex::Locker l(m_lock);
+ m_pool_peers = pool_peers;
+ // TODO: perhaps use a workqueue instead, once we get notifications
+ // about config changes for existing pools
+}
+
+void ClusterWatcher::read_pool_peers(PoolPeers *pool_peers)
+{
+ int r = m_cluster->wait_for_latest_osdmap();
+ if (r < 0) {
+ derr << "error waiting for OSD map: " << cpp_strerror(r) << dendl;
+ return;
+ }
+
+ list<pair<int64_t, string> > pools;
+ r = m_cluster->pool_list2(pools);
+ if (r < 0) {
+ derr << "error listing pools: " << cpp_strerror(r) << dendl;
+ return;
+ }
+
+ std::set<int64_t> service_pool_ids;
+ for (auto& kv : pools) {
+ int64_t pool_id = kv.first;
+ auto& pool_name = kv.second;
+ int64_t base_tier;
+ r = m_cluster->pool_get_base_tier(pool_id, &base_tier);
+ if (r == -ENOENT) {
+ dout(10) << "pool " << pool_name << " no longer exists" << dendl;
+ continue;
+ } else if (r < 0) {
+ derr << "Error retrieving base tier for pool " << pool_name << dendl;
+ continue;
+ }
+ if (pool_id != base_tier) {
+ // pool is a cache; skip it
+ continue;
+ }
+
+ IoCtx ioctx;
+ r = m_cluster->ioctx_create2(pool_id, ioctx);
+ if (r == -ENOENT) {
+ dout(10) << "pool " << pool_id << " no longer exists" << dendl;
+ continue;
+ } else if (r < 0) {
+ derr << "Error accessing pool " << pool_name << cpp_strerror(r) << dendl;
+ continue;
+ }
+
+ cls::rbd::MirrorMode mirror_mode_internal;
+ r = librbd::cls_client::mirror_mode_get(&ioctx, &mirror_mode_internal);
+ if (r == 0 && mirror_mode_internal == cls::rbd::MIRROR_MODE_DISABLED) {
+ dout(10) << "mirroring is disabled for pool " << pool_name << dendl;
+ continue;
+ }
+
+ service_pool_ids.insert(pool_id);
+ if (m_service_pools.find(pool_id) == m_service_pools.end()) {
+ m_service_pools[pool_id] = {};
+ m_service_daemon->add_pool(pool_id, pool_name);
+ }
+
+ if (r == -EPERM) {
+ dout(10) << "access denied querying pool " << pool_name << dendl;
+ m_service_pools[pool_id] = m_service_daemon->add_or_update_callout(
+ pool_id, m_service_pools[pool_id],
+ service_daemon::CALLOUT_LEVEL_WARNING, "access denied");
+ continue;
+ } else if (r < 0) {
+ derr << "could not tell whether mirroring was enabled for " << pool_name
+ << " : " << cpp_strerror(r) << dendl;
+ m_service_pools[pool_id] = m_service_daemon->add_or_update_callout(
+ pool_id, m_service_pools[pool_id],
+ service_daemon::CALLOUT_LEVEL_WARNING, "mirroring mode query failed");
+ continue;
+ }
+
+ vector<librbd::mirror_peer_t> configs;
+ r = librbd::api::Mirror<>::peer_list(ioctx, &configs);
+ if (r < 0) {
+ derr << "error reading mirroring config for pool " << pool_name
+ << cpp_strerror(r) << dendl;
+ m_service_pools[pool_id] = m_service_daemon->add_or_update_callout(
+ pool_id, m_service_pools[pool_id],
+ service_daemon::CALLOUT_LEVEL_ERROR, "mirroring peer list failed");
+ continue;
+ }
+
+ std::vector<PeerSpec> peers{configs.begin(), configs.end()};
+ for (auto& peer : peers) {
+ r = resolve_peer_config_keys(pool_id, pool_name, &peer);
+ if (r < 0) {
+ break;
+ }
+ }
+
+ if (m_service_pools[pool_id] != service_daemon::CALLOUT_ID_NONE) {
+ m_service_daemon->remove_callout(pool_id, m_service_pools[pool_id]);
+ m_service_pools[pool_id] = service_daemon::CALLOUT_ID_NONE;
+ }
+
+ pool_peers->emplace(pool_id, Peers{peers.begin(), peers.end()});
+ }
+
+ for (auto it = m_service_pools.begin(); it != m_service_pools.end(); ) {
+ auto current_it(it++);
+ if (service_pool_ids.find(current_it->first) == service_pool_ids.end()) {
+ m_service_daemon->remove_pool(current_it->first);
+ m_service_pools.erase(current_it->first);
+ }
+ }
+}
+
+int ClusterWatcher::resolve_peer_config_keys(int64_t pool_id,
+ const std::string& pool_name,
+ PeerSpec* peer) {
+ dout(10) << "retrieving config-key: pool_id=" << pool_id << ", "
+ << "pool_name=" << pool_name << ", "
+ << "peer_uuid=" << peer->uuid << dendl;
+
+ std::string cmd =
+ "{"
+ "\"prefix\": \"config-key get\", "
+ "\"key\": \"" RBD_MIRROR_PEER_CONFIG_KEY_PREFIX + stringify(pool_id) +
+ "/" + peer->uuid + "\""
+ "}";
+
+ bufferlist in_bl;
+ bufferlist out_bl;
+ int r = m_cluster->mon_command(cmd, in_bl, &out_bl, nullptr);
+ if (r == -ENOENT || out_bl.length() == 0) {
+ return 0;
+ } else if (r < 0) {
+ derr << "error reading mirroring peer config for pool " << pool_name << ": "
+ << cpp_strerror(r) << dendl;
+ m_service_pools[pool_id] = m_service_daemon->add_or_update_callout(
+ pool_id, m_service_pools[pool_id],
+ service_daemon::CALLOUT_LEVEL_WARNING,
+ "mirroring peer config-key query failed");
+ return r;
+ }
+
+ bool json_valid = false;
+ json_spirit::mValue json_root;
+ if(json_spirit::read(out_bl.to_str(), json_root)) {
+ try {
+ auto& json_obj = json_root.get_obj();
+ if (json_obj.count("mon_host")) {
+ peer->mon_host = json_obj["mon_host"].get_str();
+ }
+ if (json_obj.count("key")) {
+ peer->key = json_obj["key"].get_str();
+ }
+ json_valid = true;
+ } catch (std::runtime_error&) {
+ }
+ }
+
+ if (!json_valid) {
+ derr << "error parsing mirroring peer config for pool " << pool_name << ", "
+ << "peer " << peer->uuid << dendl;
+ m_service_pools[pool_id] = m_service_daemon->add_or_update_callout(
+ pool_id, m_service_pools[pool_id],
+ service_daemon::CALLOUT_LEVEL_WARNING,
+ "mirroring peer config-key decode failed");
+ }
+
+ return 0;
+}
+
+} // namespace mirror
+} // namespace rbd
diff --git a/src/tools/rbd_mirror/ClusterWatcher.h b/src/tools/rbd_mirror/ClusterWatcher.h
new file mode 100644
index 00000000..e8430b47
--- /dev/null
+++ b/src/tools/rbd_mirror/ClusterWatcher.h
@@ -0,0 +1,69 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_CLUSTER_WATCHER_H
+#define CEPH_RBD_MIRROR_CLUSTER_WATCHER_H
+
+#include <map>
+#include <memory>
+#include <set>
+
+#include "common/ceph_context.h"
+#include "common/Mutex.h"
+#include "common/Timer.h"
+#include "include/rados/librados.hpp"
+#include "tools/rbd_mirror/Types.h"
+#include "tools/rbd_mirror/service_daemon/Types.h"
+#include <unordered_map>
+
+namespace librbd { struct ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+
+template <typename> class ServiceDaemon;
+
+/**
+ * Tracks mirroring configuration for pools in a single
+ * cluster.
+ */
+class ClusterWatcher {
+public:
+ struct PeerSpecCompare {
+ bool operator()(const PeerSpec& lhs, const PeerSpec& rhs) const {
+ return (lhs.uuid < rhs.uuid);
+ }
+ };
+ typedef std::set<PeerSpec, PeerSpecCompare> Peers;
+ typedef std::map<int64_t, Peers> PoolPeers;
+
+ ClusterWatcher(RadosRef cluster, Mutex &lock,
+ ServiceDaemon<librbd::ImageCtx>* service_daemon);
+ ~ClusterWatcher() = default;
+ ClusterWatcher(const ClusterWatcher&) = delete;
+ ClusterWatcher& operator=(const ClusterWatcher&) = delete;
+
+ // Caller controls frequency of calls
+ void refresh_pools();
+ const PoolPeers& get_pool_peers() const;
+
+private:
+ typedef std::unordered_map<int64_t, service_daemon::CalloutId> ServicePools;
+
+ RadosRef m_cluster;
+ Mutex &m_lock;
+ ServiceDaemon<librbd::ImageCtx>* m_service_daemon;
+
+ ServicePools m_service_pools;
+ PoolPeers m_pool_peers;
+
+ void read_pool_peers(PoolPeers *pool_peers);
+
+ int resolve_peer_config_keys(int64_t pool_id, const std::string& pool_name,
+ PeerSpec* peer);
+};
+
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_CLUSTER_WATCHER_H
diff --git a/src/tools/rbd_mirror/ImageDeleter.cc b/src/tools/rbd_mirror/ImageDeleter.cc
new file mode 100644
index 00000000..f4d928ca
--- /dev/null
+++ b/src/tools/rbd_mirror/ImageDeleter.cc
@@ -0,0 +1,549 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2016 SUSE LINUX GmbH
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include "include/rados/librados.hpp"
+#include "common/Formatter.h"
+#include "common/admin_socket.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "common/Timer.h"
+#include "common/WorkQueue.h"
+#include "global/global_context.h"
+#include "librbd/internal.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/Journal.h"
+#include "librbd/Operations.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "cls/rbd/cls_rbd_types.h"
+#include "librbd/Utils.h"
+#include "ImageDeleter.h"
+#include "tools/rbd_mirror/Threads.h"
+#include "tools/rbd_mirror/image_deleter/TrashMoveRequest.h"
+#include "tools/rbd_mirror/image_deleter/TrashRemoveRequest.h"
+#include "tools/rbd_mirror/image_deleter/TrashWatcher.h"
+#include <map>
+#include <sstream>
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+
+using std::string;
+using std::stringstream;
+using std::vector;
+using std::pair;
+using std::make_pair;
+
+using librados::IoCtx;
+using namespace librbd;
+
+namespace rbd {
+namespace mirror {
+
+namespace {
+
+class ImageDeleterAdminSocketCommand {
+public:
+ virtual ~ImageDeleterAdminSocketCommand() {}
+ virtual bool call(Formatter *f, stringstream *ss) = 0;
+};
+
+template <typename I>
+class StatusCommand : public ImageDeleterAdminSocketCommand {
+public:
+ explicit StatusCommand(ImageDeleter<I> *image_del) : image_del(image_del) {}
+
+ bool call(Formatter *f, stringstream *ss) override {
+ image_del->print_status(f, ss);
+ return true;
+ }
+
+private:
+ ImageDeleter<I> *image_del;
+};
+
+} // anonymous namespace
+
+template <typename I>
+class ImageDeleterAdminSocketHook : public AdminSocketHook {
+public:
+ ImageDeleterAdminSocketHook(CephContext *cct, const std::string& pool_name,
+ ImageDeleter<I> *image_del) :
+ admin_socket(cct->get_admin_socket()) {
+
+ std::string command;
+ int r;
+
+ command = "rbd mirror deletion status " + pool_name;
+ r = admin_socket->register_command(command, command, this,
+ "get status for image deleter");
+ if (r == 0) {
+ commands[command] = new StatusCommand<I>(image_del);
+ }
+
+ }
+
+ ~ImageDeleterAdminSocketHook() override {
+ for (Commands::const_iterator i = commands.begin(); i != commands.end();
+ ++i) {
+ (void)admin_socket->unregister_command(i->first);
+ delete i->second;
+ }
+ }
+
+ bool call(std::string_view command, const cmdmap_t& cmdmap,
+ std::string_view format, bufferlist& out) override {
+ Commands::const_iterator i = commands.find(command);
+ ceph_assert(i != commands.end());
+ Formatter *f = Formatter::create(format);
+ stringstream ss;
+ bool r = i->second->call(f, &ss);
+ delete f;
+ out.append(ss);
+ return r;
+ }
+
+private:
+ typedef std::map<std::string, ImageDeleterAdminSocketCommand*,
+ std::less<>> Commands;
+ AdminSocket *admin_socket;
+ Commands commands;
+};
+
+template <typename I>
+ImageDeleter<I>::ImageDeleter(librados::IoCtx& local_io_ctx,
+ Threads<librbd::ImageCtx>* threads,
+ ServiceDaemon<librbd::ImageCtx>* service_daemon)
+ : m_local_io_ctx(local_io_ctx), m_threads(threads),
+ m_service_daemon(service_daemon), m_trash_listener(this),
+ m_lock(librbd::util::unique_lock_name("rbd::mirror::ImageDeleter::m_lock",
+ this)) {
+}
+
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::ImageDeleter: " << " " \
+ << __func__ << ": "
+
+template <typename I>
+void ImageDeleter<I>::trash_move(librados::IoCtx& local_io_ctx,
+ const std::string& global_image_id,
+ bool resync,
+ ContextWQ* work_queue, Context* on_finish) {
+ dout(10) << "global_image_id=" << global_image_id << ", "
+ << "resync=" << resync << dendl;
+
+ auto req = rbd::mirror::image_deleter::TrashMoveRequest<>::create(
+ local_io_ctx, global_image_id, resync, work_queue, on_finish);
+ req->send();
+}
+
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::ImageDeleter: " << this << " " \
+ << __func__ << ": "
+
+template <typename I>
+void ImageDeleter<I>::init(Context* on_finish) {
+ dout(10) << dendl;
+
+ m_asok_hook = new ImageDeleterAdminSocketHook<I>(
+ g_ceph_context, m_local_io_ctx.get_pool_name(), this);
+
+ m_trash_watcher = image_deleter::TrashWatcher<I>::create(m_local_io_ctx,
+ m_threads,
+ m_trash_listener);
+ m_trash_watcher->init(on_finish);
+}
+
+template <typename I>
+void ImageDeleter<I>::shut_down(Context* on_finish) {
+ dout(10) << dendl;
+
+ delete m_asok_hook;
+ m_asok_hook = nullptr;
+
+ shut_down_trash_watcher(on_finish);
+}
+
+template <typename I>
+void ImageDeleter<I>::shut_down_trash_watcher(Context* on_finish) {
+ dout(10) << dendl;
+ ceph_assert(m_trash_watcher);
+ auto ctx = new FunctionContext([this, on_finish](int r) {
+ delete m_trash_watcher;
+ m_trash_watcher = nullptr;
+
+ wait_for_ops(on_finish);
+ });
+ m_trash_watcher->shut_down(ctx);
+}
+
+template <typename I>
+void ImageDeleter<I>::wait_for_ops(Context* on_finish) {
+ {
+ Mutex::Locker timer_locker(m_threads->timer_lock);
+ Mutex::Locker locker(m_lock);
+ m_running = false;
+ cancel_retry_timer();
+ }
+
+ auto ctx = new FunctionContext([this, on_finish](int) {
+ cancel_all_deletions(on_finish);
+ });
+ m_async_op_tracker.wait_for_ops(ctx);
+}
+
+template <typename I>
+void ImageDeleter<I>::cancel_all_deletions(Context* on_finish) {
+ {
+ Mutex::Locker locker(m_lock);
+ // wake up any external state machines waiting on deletions
+ ceph_assert(m_in_flight_delete_queue.empty());
+ for (auto& queue : {&m_delete_queue, &m_retry_delete_queue}) {
+ for (auto& info : *queue) {
+ notify_on_delete(info->image_id, -ECANCELED);
+ }
+ queue->clear();
+ }
+ }
+ on_finish->complete(0);
+}
+
+template <typename I>
+void ImageDeleter<I>::wait_for_deletion(const std::string& image_id,
+ bool scheduled_only,
+ Context* on_finish) {
+ dout(5) << "image_id=" << image_id << dendl;
+
+ on_finish = new FunctionContext([this, on_finish](int r) {
+ m_threads->work_queue->queue(on_finish, r);
+ });
+
+ Mutex::Locker locker(m_lock);
+ auto del_info = find_delete_info(image_id);
+ if (!del_info && scheduled_only) {
+ // image not scheduled for deletion
+ on_finish->complete(0);
+ return;
+ }
+
+ notify_on_delete(image_id, -ESTALE);
+ m_on_delete_contexts[image_id] = on_finish;
+}
+
+template <typename I>
+void ImageDeleter<I>::complete_active_delete(DeleteInfoRef* delete_info,
+ int r) {
+ dout(20) << "info=" << *delete_info << ", r=" << r << dendl;
+ Mutex::Locker locker(m_lock);
+ notify_on_delete((*delete_info)->image_id, r);
+ delete_info->reset();
+}
+
+template <typename I>
+void ImageDeleter<I>::enqueue_failed_delete(DeleteInfoRef* delete_info,
+ int error_code,
+ double retry_delay) {
+ dout(20) << "info=" << *delete_info << ", r=" << error_code << dendl;
+ if (error_code == -EBLACKLISTED) {
+ Mutex::Locker locker(m_lock);
+ derr << "blacklisted while deleting local image" << dendl;
+ complete_active_delete(delete_info, error_code);
+ return;
+ }
+
+ Mutex::Locker timer_locker(m_threads->timer_lock);
+ Mutex::Locker locker(m_lock);
+ auto& delete_info_ref = *delete_info;
+ notify_on_delete(delete_info_ref->image_id, error_code);
+ delete_info_ref->error_code = error_code;
+ ++delete_info_ref->retries;
+ delete_info_ref->retry_time = ceph_clock_now();
+ delete_info_ref->retry_time += retry_delay;
+ m_retry_delete_queue.push_back(delete_info_ref);
+
+ schedule_retry_timer();
+}
+
+template <typename I>
+typename ImageDeleter<I>::DeleteInfoRef
+ImageDeleter<I>::find_delete_info(const std::string &image_id) {
+ ceph_assert(m_lock.is_locked());
+ DeleteQueue delete_queues[] = {m_in_flight_delete_queue,
+ m_retry_delete_queue,
+ m_delete_queue};
+
+ DeleteInfo delete_info{image_id};
+ for (auto& queue : delete_queues) {
+ auto it = std::find_if(queue.begin(), queue.end(),
+ [&delete_info](const DeleteInfoRef& ref) {
+ return delete_info == *ref;
+ });
+ if (it != queue.end()) {
+ return *it;
+ }
+ }
+ return {};
+}
+
+template <typename I>
+void ImageDeleter<I>::print_status(Formatter *f, stringstream *ss) {
+ dout(20) << dendl;
+
+ if (f) {
+ f->open_object_section("image_deleter_status");
+ f->open_array_section("delete_images_queue");
+ }
+
+ Mutex::Locker l(m_lock);
+ for (const auto& image : m_delete_queue) {
+ image->print_status(f, ss);
+ }
+
+ if (f) {
+ f->close_section();
+ f->open_array_section("failed_deletes_queue");
+ }
+
+ for (const auto& image : m_retry_delete_queue) {
+ image->print_status(f, ss, true);
+ }
+
+ if (f) {
+ f->close_section();
+ f->close_section();
+ f->flush(*ss);
+ }
+}
+
+template <typename I>
+vector<string> ImageDeleter<I>::get_delete_queue_items() {
+ vector<string> items;
+
+ Mutex::Locker l(m_lock);
+ for (const auto& del_info : m_delete_queue) {
+ items.push_back(del_info->image_id);
+ }
+
+ return items;
+}
+
+template <typename I>
+vector<pair<string, int> > ImageDeleter<I>::get_failed_queue_items() {
+ vector<pair<string, int> > items;
+
+ Mutex::Locker l(m_lock);
+ for (const auto& del_info : m_retry_delete_queue) {
+ items.push_back(make_pair(del_info->image_id,
+ del_info->error_code));
+ }
+
+ return items;
+}
+
+template <typename I>
+void ImageDeleter<I>::remove_images() {
+ dout(10) << dendl;
+
+ auto cct = reinterpret_cast<CephContext *>(m_local_io_ctx.cct());
+ uint64_t max_concurrent_deletions = cct->_conf.get_val<uint64_t>(
+ "rbd_mirror_concurrent_image_deletions");
+
+ Mutex::Locker locker(m_lock);
+ while (true) {
+ if (!m_running || m_delete_queue.empty() ||
+ m_in_flight_delete_queue.size() >= max_concurrent_deletions) {
+ return;
+ }
+
+ DeleteInfoRef delete_info = m_delete_queue.front();
+ m_delete_queue.pop_front();
+
+ ceph_assert(delete_info);
+ remove_image(delete_info);
+ }
+}
+
+template <typename I>
+void ImageDeleter<I>::remove_image(DeleteInfoRef delete_info) {
+ dout(10) << "info=" << *delete_info << dendl;
+ ceph_assert(m_lock.is_locked());
+
+ m_in_flight_delete_queue.push_back(delete_info);
+ m_async_op_tracker.start_op();
+
+ auto ctx = new FunctionContext([this, delete_info](int r) {
+ handle_remove_image(delete_info, r);
+ m_async_op_tracker.finish_op();
+ });
+
+ auto req = image_deleter::TrashRemoveRequest<I>::create(
+ m_local_io_ctx, delete_info->image_id, &delete_info->error_result,
+ m_threads->work_queue, ctx);
+ req->send();
+}
+
+template <typename I>
+void ImageDeleter<I>::handle_remove_image(DeleteInfoRef delete_info,
+ int r) {
+ dout(10) << "info=" << *delete_info << ", r=" << r << dendl;
+
+ {
+ Mutex::Locker locker(m_lock);
+ ceph_assert(m_lock.is_locked());
+ auto it = std::find(m_in_flight_delete_queue.begin(),
+ m_in_flight_delete_queue.end(), delete_info);
+ ceph_assert(it != m_in_flight_delete_queue.end());
+ m_in_flight_delete_queue.erase(it);
+ }
+
+ if (r < 0) {
+ if (delete_info->error_result == image_deleter::ERROR_RESULT_COMPLETE) {
+ complete_active_delete(&delete_info, r);
+ } else if (delete_info->error_result ==
+ image_deleter::ERROR_RESULT_RETRY_IMMEDIATELY) {
+ enqueue_failed_delete(&delete_info, r, m_busy_interval);
+ } else {
+ auto cct = reinterpret_cast<CephContext *>(m_local_io_ctx.cct());
+ double failed_interval = cct->_conf.get_val<double>(
+ "rbd_mirror_delete_retry_interval");
+ enqueue_failed_delete(&delete_info, r, failed_interval);
+ }
+ } else {
+ complete_active_delete(&delete_info, 0);
+ }
+
+ // process the next queued image to delete
+ remove_images();
+}
+
+template <typename I>
+void ImageDeleter<I>::schedule_retry_timer() {
+ ceph_assert(m_threads->timer_lock.is_locked());
+ ceph_assert(m_lock.is_locked());
+ if (!m_running || m_timer_ctx != nullptr || m_retry_delete_queue.empty()) {
+ return;
+ }
+
+ dout(10) << dendl;
+ auto &delete_info = m_retry_delete_queue.front();
+ m_timer_ctx = new FunctionContext([this](int r) {
+ handle_retry_timer();
+ });
+ m_threads->timer->add_event_at(delete_info->retry_time, m_timer_ctx);
+}
+
+template <typename I>
+void ImageDeleter<I>::cancel_retry_timer() {
+ dout(10) << dendl;
+ ceph_assert(m_threads->timer_lock.is_locked());
+ if (m_timer_ctx != nullptr) {
+ bool canceled = m_threads->timer->cancel_event(m_timer_ctx);
+ m_timer_ctx = nullptr;
+ ceph_assert(canceled);
+ }
+}
+
+template <typename I>
+void ImageDeleter<I>::handle_retry_timer() {
+ dout(10) << dendl;
+ ceph_assert(m_threads->timer_lock.is_locked());
+ Mutex::Locker locker(m_lock);
+
+ ceph_assert(m_timer_ctx != nullptr);
+ m_timer_ctx = nullptr;
+
+ ceph_assert(m_running);
+ ceph_assert(!m_retry_delete_queue.empty());
+
+ // move all ready-to-ready items back to main queue
+ utime_t now = ceph_clock_now();
+ while (!m_retry_delete_queue.empty()) {
+ auto &delete_info = m_retry_delete_queue.front();
+ if (delete_info->retry_time > now) {
+ break;
+ }
+
+ m_delete_queue.push_back(delete_info);
+ m_retry_delete_queue.pop_front();
+ }
+
+ // schedule wake up for any future retries
+ schedule_retry_timer();
+
+ // start (concurrent) removal of images
+ m_async_op_tracker.start_op();
+ auto ctx = new FunctionContext([this](int r) {
+ remove_images();
+ m_async_op_tracker.finish_op();
+ });
+ m_threads->work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+void ImageDeleter<I>::handle_trash_image(const std::string& image_id,
+ const utime_t& deferment_end_time) {
+ Mutex::Locker timer_locker(m_threads->timer_lock);
+ Mutex::Locker locker(m_lock);
+
+ auto del_info = find_delete_info(image_id);
+ if (del_info != nullptr) {
+ dout(20) << "image " << image_id << " "
+ << "was already scheduled for deletion" << dendl;
+ return;
+ }
+
+ dout(10) << "image_id=" << image_id << ", "
+ << "deferment_end_time=" << deferment_end_time << dendl;
+
+ del_info.reset(new DeleteInfo(image_id));
+ del_info->retry_time = deferment_end_time;
+ m_retry_delete_queue.push_back(del_info);
+
+ schedule_retry_timer();
+}
+
+template <typename I>
+void ImageDeleter<I>::notify_on_delete(const std::string& image_id,
+ int r) {
+ dout(10) << "image_id=" << image_id << ", r=" << r << dendl;
+ auto it = m_on_delete_contexts.find(image_id);
+ if (it == m_on_delete_contexts.end()) {
+ return;
+ }
+
+ it->second->complete(r);
+ m_on_delete_contexts.erase(it);
+}
+
+template <typename I>
+void ImageDeleter<I>::DeleteInfo::print_status(Formatter *f, stringstream *ss,
+ bool print_failure_info) {
+ if (f) {
+ f->open_object_section("delete_info");
+ f->dump_string("image_id", image_id);
+ if (print_failure_info) {
+ f->dump_string("error_code", cpp_strerror(error_code));
+ f->dump_int("retries", retries);
+ }
+ f->close_section();
+ f->flush(*ss);
+ } else {
+ *ss << *this;
+ }
+}
+
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::ImageDeleter<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/ImageDeleter.h b/src/tools/rbd_mirror/ImageDeleter.h
new file mode 100644
index 00000000..8a17eb38
--- /dev/null
+++ b/src/tools/rbd_mirror/ImageDeleter.h
@@ -0,0 +1,180 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2016 SUSE LINUX GmbH
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_DELETER_H
+#define CEPH_RBD_MIRROR_IMAGE_DELETER_H
+
+#include "include/utime.h"
+#include "common/AsyncOpTracker.h"
+#include "common/Mutex.h"
+#include "tools/rbd_mirror/Types.h"
+#include "tools/rbd_mirror/image_deleter/Types.h"
+#include <atomic>
+#include <deque>
+#include <iosfwd>
+#include <map>
+#include <memory>
+#include <vector>
+
+class AdminSocketHook;
+class Context;
+class ContextWQ;
+class SafeTimer;
+namespace librbd { struct ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+
+template <typename> class ServiceDaemon;
+template <typename> class Threads;
+
+namespace image_deleter { template <typename> struct TrashWatcher; }
+
+/**
+ * Manage deletion of non-primary images.
+ */
+template <typename ImageCtxT = librbd::ImageCtx>
+class ImageDeleter {
+public:
+ static ImageDeleter* create(librados::IoCtx& local_io_ctx,
+ Threads<librbd::ImageCtx>* threads,
+ ServiceDaemon<librbd::ImageCtx>* service_daemon) {
+ return new ImageDeleter(local_io_ctx, threads, service_daemon);
+ }
+
+ ImageDeleter(librados::IoCtx& local_io_ctx,
+ Threads<librbd::ImageCtx>* threads,
+ ServiceDaemon<librbd::ImageCtx>* service_daemon);
+
+ ImageDeleter(const ImageDeleter&) = delete;
+ ImageDeleter& operator=(const ImageDeleter&) = delete;
+
+ static void trash_move(librados::IoCtx& local_io_ctx,
+ const std::string& global_image_id, bool resync,
+ ContextWQ* work_queue, Context* on_finish);
+
+ void init(Context* on_finish);
+ void shut_down(Context* on_finish);
+
+ void print_status(Formatter *f, std::stringstream *ss);
+
+ // for testing purposes
+ void wait_for_deletion(const std::string &image_id,
+ bool scheduled_only, Context* on_finish);
+
+ std::vector<std::string> get_delete_queue_items();
+ std::vector<std::pair<std::string, int> > get_failed_queue_items();
+
+ inline void set_busy_timer_interval(double interval) {
+ m_busy_interval = interval;
+ }
+
+private:
+ struct TrashListener : public image_deleter::TrashListener {
+ ImageDeleter *image_deleter;
+
+ TrashListener(ImageDeleter *image_deleter) : image_deleter(image_deleter) {
+ }
+
+ void handle_trash_image(const std::string& image_id,
+ const utime_t& deferment_end_time) override {
+ image_deleter->handle_trash_image(image_id, deferment_end_time);
+ }
+ };
+
+ struct DeleteInfo {
+ std::string image_id;
+
+ image_deleter::ErrorResult error_result = {};
+ int error_code = 0;
+ utime_t retry_time = {};
+ int retries = 0;
+
+ DeleteInfo(const std::string& image_id)
+ : image_id(image_id) {
+ }
+
+ inline bool operator==(const DeleteInfo& delete_info) const {
+ return (image_id == delete_info.image_id);
+ }
+
+ friend std::ostream& operator<<(std::ostream& os, DeleteInfo& delete_info) {
+ os << "[image_id=" << delete_info.image_id << "]";
+ return os;
+ }
+
+ void print_status(Formatter *f, std::stringstream *ss,
+ bool print_failure_info=false);
+ };
+ typedef std::shared_ptr<DeleteInfo> DeleteInfoRef;
+ typedef std::deque<DeleteInfoRef> DeleteQueue;
+ typedef std::map<std::string, Context*> OnDeleteContexts;
+
+ librados::IoCtx& m_local_io_ctx;
+ Threads<librbd::ImageCtx>* m_threads;
+ ServiceDaemon<librbd::ImageCtx>* m_service_daemon;
+
+ image_deleter::TrashWatcher<ImageCtxT>* m_trash_watcher = nullptr;
+ TrashListener m_trash_listener;
+
+ std::atomic<unsigned> m_running { 1 };
+
+ double m_busy_interval = 1;
+
+ AsyncOpTracker m_async_op_tracker;
+
+ Mutex m_lock;
+ DeleteQueue m_delete_queue;
+ DeleteQueue m_retry_delete_queue;
+ DeleteQueue m_in_flight_delete_queue;
+
+ OnDeleteContexts m_on_delete_contexts;
+
+ AdminSocketHook *m_asok_hook = nullptr;
+
+ Context *m_timer_ctx = nullptr;
+
+ bool process_image_delete();
+
+ void complete_active_delete(DeleteInfoRef* delete_info, int r);
+ void enqueue_failed_delete(DeleteInfoRef* delete_info, int error_code,
+ double retry_delay);
+
+ DeleteInfoRef find_delete_info(const std::string &image_id);
+
+ void remove_images();
+ void remove_image(DeleteInfoRef delete_info);
+ void handle_remove_image(DeleteInfoRef delete_info, int r);
+
+ void schedule_retry_timer();
+ void cancel_retry_timer();
+ void handle_retry_timer();
+
+ void handle_trash_image(const std::string& image_id,
+ const utime_t& deferment_end_time);
+
+ void shut_down_trash_watcher(Context* on_finish);
+ void wait_for_ops(Context* on_finish);
+ void cancel_all_deletions(Context* on_finish);
+
+ void notify_on_delete(const std::string& image_id, int r);
+
+};
+
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::ImageDeleter<librbd::ImageCtx>;
+
+#endif // CEPH_RBD_MIRROR_IMAGE_DELETER_H
diff --git a/src/tools/rbd_mirror/ImageMap.cc b/src/tools/rbd_mirror/ImageMap.cc
new file mode 100644
index 00000000..58fa5e03
--- /dev/null
+++ b/src/tools/rbd_mirror/ImageMap.cc
@@ -0,0 +1,601 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "common/debug.h"
+#include "common/errno.h"
+#include "common/Timer.h"
+#include "common/WorkQueue.h"
+
+#include "librbd/Utils.h"
+#include "tools/rbd_mirror/Threads.h"
+
+#include "ImageMap.h"
+#include "image_map/LoadRequest.h"
+#include "image_map/SimplePolicy.h"
+#include "image_map/UpdateRequest.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::ImageMap: " << this << " " \
+ << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+
+using ::operator<<;
+using image_map::Policy;
+
+using librbd::util::unique_lock_name;
+using librbd::util::create_async_context_callback;
+
+template <typename I>
+struct ImageMap<I>::C_NotifyInstance : public Context {
+ ImageMap* image_map;
+ std::string global_image_id;
+ bool acquire_release;
+
+ C_NotifyInstance(ImageMap* image_map, const std::string& global_image_id,
+ bool acquire_release)
+ : image_map(image_map), global_image_id(global_image_id),
+ acquire_release(acquire_release) {
+ image_map->start_async_op();
+ }
+
+ void finish(int r) override {
+ if (acquire_release) {
+ image_map->handle_peer_ack(global_image_id, r);
+ } else {
+ image_map->handle_peer_ack_remove(global_image_id, r);
+ }
+ image_map->finish_async_op();
+ }
+};
+
+template <typename I>
+ImageMap<I>::ImageMap(librados::IoCtx &ioctx, Threads<I> *threads,
+ const std::string& instance_id,
+ image_map::Listener &listener)
+ : m_ioctx(ioctx), m_threads(threads), m_instance_id(instance_id),
+ m_listener(listener),
+ m_lock(unique_lock_name("rbd::mirror::ImageMap::m_lock", this)) {
+}
+
+template <typename I>
+ImageMap<I>::~ImageMap() {
+ ceph_assert(m_async_op_tracker.empty());
+ ceph_assert(m_timer_task == nullptr);
+ ceph_assert(m_rebalance_task == nullptr);
+}
+
+template <typename I>
+void ImageMap<I>::continue_action(const std::set<std::string> &global_image_ids,
+ int r) {
+ dout(20) << dendl;
+
+ {
+ Mutex::Locker locker(m_lock);
+ if (m_shutting_down) {
+ return;
+ }
+
+ for (auto const &global_image_id : global_image_ids) {
+ bool schedule = m_policy->finish_action(global_image_id, r);
+ if (schedule) {
+ schedule_action(global_image_id);
+ }
+ }
+ }
+
+ schedule_update_task();
+}
+
+template <typename I>
+void ImageMap<I>::handle_update_request(
+ const Updates &updates,
+ const std::set<std::string> &remove_global_image_ids, int r) {
+ dout(20) << "r=" << r << dendl;
+
+ std::set<std::string> global_image_ids;
+
+ global_image_ids.insert(remove_global_image_ids.begin(),
+ remove_global_image_ids.end());
+ for (auto const &update : updates) {
+ global_image_ids.insert(update.global_image_id);
+ }
+
+ continue_action(global_image_ids, r);
+}
+
+template <typename I>
+void ImageMap<I>::update_image_mapping(Updates&& map_updates,
+ std::set<std::string>&& map_removals) {
+ if (map_updates.empty() && map_removals.empty()) {
+ return;
+ }
+
+ dout(5) << "updates=[" << map_updates << "], "
+ << "removes=[" << map_removals << "]" << dendl;
+
+ Context *on_finish = new FunctionContext(
+ [this, map_updates, map_removals](int r) {
+ handle_update_request(map_updates, map_removals, r);
+ finish_async_op();
+ });
+ on_finish = create_async_context_callback(m_threads->work_queue, on_finish);
+
+ // empty meta policy for now..
+ image_map::PolicyMetaNone policy_meta;
+
+ bufferlist bl;
+ encode(image_map::PolicyData(policy_meta), bl);
+
+ // prepare update map
+ std::map<std::string, cls::rbd::MirrorImageMap> update_mapping;
+ for (auto const &update : map_updates) {
+ update_mapping.emplace(
+ update.global_image_id, cls::rbd::MirrorImageMap(update.instance_id,
+ update.mapped_time, bl));
+ }
+
+ start_async_op();
+ image_map::UpdateRequest<I> *req = image_map::UpdateRequest<I>::create(
+ m_ioctx, std::move(update_mapping), std::move(map_removals), on_finish);
+ req->send();
+}
+
+template <typename I>
+void ImageMap<I>::process_updates() {
+ dout(20) << dendl;
+
+ ceph_assert(m_threads->timer_lock.is_locked());
+ ceph_assert(m_timer_task == nullptr);
+
+ Updates map_updates;
+ std::set<std::string> map_removals;
+ Updates acquire_updates;
+ Updates release_updates;
+
+ // gather updates by advancing the state machine
+ m_lock.Lock();
+ for (auto const &global_image_id : m_global_image_ids) {
+ image_map::ActionType action_type =
+ m_policy->start_action(global_image_id);
+ image_map::LookupInfo info = m_policy->lookup(global_image_id);
+
+ dout(15) << "global_image_id=" << global_image_id << ", "
+ << "action=" << action_type << ", "
+ << "instance=" << info.instance_id << dendl;
+ switch (action_type) {
+ case image_map::ACTION_TYPE_NONE:
+ continue;
+ case image_map::ACTION_TYPE_MAP_UPDATE:
+ ceph_assert(info.instance_id != image_map::UNMAPPED_INSTANCE_ID);
+ map_updates.emplace_back(global_image_id, info.instance_id,
+ info.mapped_time);
+ break;
+ case image_map::ACTION_TYPE_MAP_REMOVE:
+ map_removals.emplace(global_image_id);
+ break;
+ case image_map::ACTION_TYPE_ACQUIRE:
+ ceph_assert(info.instance_id != image_map::UNMAPPED_INSTANCE_ID);
+ acquire_updates.emplace_back(global_image_id, info.instance_id);
+ break;
+ case image_map::ACTION_TYPE_RELEASE:
+ ceph_assert(info.instance_id != image_map::UNMAPPED_INSTANCE_ID);
+ release_updates.emplace_back(global_image_id, info.instance_id);
+ break;
+ }
+ }
+ m_global_image_ids.clear();
+ m_lock.Unlock();
+
+ // notify listener (acquire, release) and update on-disk map. note
+ // that its safe to process this outside m_lock as we still hold
+ // timer lock.
+ notify_listener_acquire_release_images(acquire_updates, release_updates);
+ update_image_mapping(std::move(map_updates), std::move(map_removals));
+}
+
+template <typename I>
+void ImageMap<I>::schedule_update_task() {
+ Mutex::Locker timer_lock(m_threads->timer_lock);
+ schedule_update_task(m_threads->timer_lock);
+}
+
+template <typename I>
+void ImageMap<I>::schedule_update_task(const Mutex &timer_lock) {
+ ceph_assert(m_threads->timer_lock.is_locked());
+
+ schedule_rebalance_task();
+
+ if (m_timer_task != nullptr) {
+ return;
+ }
+
+ {
+ Mutex::Locker locker(m_lock);
+ if (m_global_image_ids.empty()) {
+ return;
+ }
+ }
+
+ m_timer_task = new FunctionContext([this](int r) {
+ ceph_assert(m_threads->timer_lock.is_locked());
+ m_timer_task = nullptr;
+
+ process_updates();
+ });
+
+ CephContext *cct = reinterpret_cast<CephContext *>(m_ioctx.cct());
+ double after = cct->_conf.get_val<double>("rbd_mirror_image_policy_update_throttle_interval");
+
+ dout(20) << "scheduling image check update (" << m_timer_task << ")"
+ << " after " << after << " second(s)" << dendl;
+ m_threads->timer->add_event_after(after, m_timer_task);
+}
+
+template <typename I>
+void ImageMap<I>::rebalance() {
+ ceph_assert(m_rebalance_task == nullptr);
+
+ {
+ Mutex::Locker locker(m_lock);
+ if (m_async_op_tracker.empty() && m_global_image_ids.empty()){
+ dout(20) << "starting rebalance" << dendl;
+
+ std::set<std::string> remap_global_image_ids;
+ m_policy->add_instances({}, &remap_global_image_ids);
+
+ for (auto const &global_image_id : remap_global_image_ids) {
+ schedule_action(global_image_id);
+ }
+ }
+ }
+
+ schedule_update_task(m_threads->timer_lock);
+}
+
+template <typename I>
+void ImageMap<I>::schedule_rebalance_task() {
+ ceph_assert(m_threads->timer_lock.is_locked());
+
+ CephContext *cct = reinterpret_cast<CephContext *>(m_ioctx.cct());
+
+ // fetch the updated value of idle timeout for (re)scheduling
+ double resched_after = cct->_conf.get_val<double>(
+ "rbd_mirror_image_policy_rebalance_timeout");
+ if (!resched_after) {
+ return;
+ }
+
+ // cancel existing rebalance task if any before scheduling
+ if (m_rebalance_task != nullptr) {
+ m_threads->timer->cancel_event(m_rebalance_task);
+ }
+
+ m_rebalance_task = new FunctionContext([this](int _) {
+ ceph_assert(m_threads->timer_lock.is_locked());
+ m_rebalance_task = nullptr;
+
+ rebalance();
+ });
+
+ dout(20) << "scheduling rebalance (" << m_rebalance_task << ")"
+ << " after " << resched_after << " second(s)" << dendl;
+ m_threads->timer->add_event_after(resched_after, m_rebalance_task);
+}
+
+template <typename I>
+void ImageMap<I>::schedule_action(const std::string &global_image_id) {
+ dout(20) << "global_image_id=" << global_image_id << dendl;
+ ceph_assert(m_lock.is_locked());
+
+ m_global_image_ids.emplace(global_image_id);
+}
+
+template <typename I>
+void ImageMap<I>::notify_listener_acquire_release_images(
+ const Updates &acquire, const Updates &release) {
+ if (acquire.empty() && release.empty()) {
+ return;
+ }
+
+ dout(5) << "acquire=[" << acquire << "], "
+ << "release=[" << release << "]" << dendl;
+
+ for (auto const &update : acquire) {
+ m_listener.acquire_image(
+ update.global_image_id, update.instance_id,
+ create_async_context_callback(
+ m_threads->work_queue,
+ new C_NotifyInstance(this, update.global_image_id, true)));
+ }
+
+ for (auto const &update : release) {
+ m_listener.release_image(
+ update.global_image_id, update.instance_id,
+ create_async_context_callback(
+ m_threads->work_queue,
+ new C_NotifyInstance(this, update.global_image_id, true)));
+ }
+}
+
+template <typename I>
+void ImageMap<I>::notify_listener_remove_images(const std::string &peer_uuid,
+ const Updates &remove) {
+ dout(5) << "peer_uuid=" << peer_uuid << ", "
+ << "remove=[" << remove << "]" << dendl;
+
+ for (auto const &update : remove) {
+ m_listener.remove_image(
+ peer_uuid, update.global_image_id, update.instance_id,
+ create_async_context_callback(
+ m_threads->work_queue,
+ new C_NotifyInstance(this, update.global_image_id, false)));
+ }
+}
+
+template <typename I>
+void ImageMap<I>::handle_load(const std::map<std::string,
+ cls::rbd::MirrorImageMap> &image_mapping) {
+ dout(20) << dendl;
+
+ {
+ Mutex::Locker locker(m_lock);
+ m_policy->init(image_mapping);
+
+ for (auto& pair : image_mapping) {
+ schedule_action(pair.first);
+ }
+ }
+ schedule_update_task();
+}
+
+template <typename I>
+void ImageMap<I>::handle_peer_ack_remove(const std::string &global_image_id,
+ int r) {
+ Mutex::Locker locker(m_lock);
+ dout(5) << "global_image_id=" << global_image_id << dendl;
+
+ if (r < 0) {
+ derr << "failed to remove global_image_id=" << global_image_id << dendl;
+ }
+
+ auto peer_it = m_peer_map.find(global_image_id);
+ if (peer_it == m_peer_map.end()) {
+ return;
+ }
+
+ m_peer_map.erase(peer_it);
+}
+
+template <typename I>
+void ImageMap<I>::update_images_added(
+ const std::string &peer_uuid,
+ const std::set<std::string> &global_image_ids) {
+ dout(5) << "peer_uuid=" << peer_uuid << ", "
+ << "global_image_ids=[" << global_image_ids << "]" << dendl;
+ ceph_assert(m_lock.is_locked());
+
+ for (auto const &global_image_id : global_image_ids) {
+ auto result = m_peer_map[global_image_id].insert(peer_uuid);
+ if (result.second && m_peer_map[global_image_id].size() == 1) {
+ if (m_policy->add_image(global_image_id)) {
+ schedule_action(global_image_id);
+ }
+ }
+ }
+}
+
+template <typename I>
+void ImageMap<I>::update_images_removed(
+ const std::string &peer_uuid,
+ const std::set<std::string> &global_image_ids) {
+ dout(5) << "peer_uuid=" << peer_uuid << ", "
+ << "global_image_ids=[" << global_image_ids << "]" << dendl;
+ ceph_assert(m_lock.is_locked());
+
+ Updates to_remove;
+ for (auto const &global_image_id : global_image_ids) {
+ image_map::LookupInfo info = m_policy->lookup(global_image_id);
+ bool image_mapped = (info.instance_id != image_map::UNMAPPED_INSTANCE_ID);
+
+ bool image_removed = image_mapped;
+ bool peer_removed = false;
+ auto peer_it = m_peer_map.find(global_image_id);
+ if (peer_it != m_peer_map.end()) {
+ auto& peer_set = peer_it->second;
+ peer_removed = peer_set.erase(peer_uuid);
+ image_removed = peer_removed && peer_set.empty();
+ }
+
+ if (image_mapped && peer_removed && !peer_uuid.empty()) {
+ // peer image has been deleted
+ to_remove.emplace_back(global_image_id, info.instance_id);
+ }
+
+ if (image_mapped && image_removed) {
+ // local and peer images have been deleted
+ if (m_policy->remove_image(global_image_id)) {
+ schedule_action(global_image_id);
+ }
+ }
+ }
+
+ if (!to_remove.empty()) {
+ // removal notification will be notified instantly. this is safe
+ // even after scheduling action for images as we still hold m_lock
+ notify_listener_remove_images(peer_uuid, to_remove);
+ }
+}
+
+template <typename I>
+void ImageMap<I>::update_instances_added(
+ const std::vector<std::string> &instance_ids) {
+ {
+ Mutex::Locker locker(m_lock);
+ if (m_shutting_down) {
+ return;
+ }
+
+ std::vector<std::string> filtered_instance_ids;
+ filter_instance_ids(instance_ids, &filtered_instance_ids, false);
+ if (filtered_instance_ids.empty()) {
+ return;
+ }
+
+ dout(20) << "instance_ids=" << filtered_instance_ids << dendl;
+
+ std::set<std::string> remap_global_image_ids;
+ m_policy->add_instances(filtered_instance_ids, &remap_global_image_ids);
+
+ for (auto const &global_image_id : remap_global_image_ids) {
+ schedule_action(global_image_id);
+ }
+ }
+
+ schedule_update_task();
+}
+
+template <typename I>
+void ImageMap<I>::update_instances_removed(
+ const std::vector<std::string> &instance_ids) {
+ {
+ Mutex::Locker locker(m_lock);
+ if (m_shutting_down) {
+ return;
+ }
+
+ std::vector<std::string> filtered_instance_ids;
+ filter_instance_ids(instance_ids, &filtered_instance_ids, true);
+ if (filtered_instance_ids.empty()) {
+ return;
+ }
+
+ dout(20) << "instance_ids=" << filtered_instance_ids << dendl;
+
+ std::set<std::string> remap_global_image_ids;
+ m_policy->remove_instances(filtered_instance_ids, &remap_global_image_ids);
+
+ for (auto const &global_image_id : remap_global_image_ids) {
+ schedule_action(global_image_id);
+ }
+ }
+
+ schedule_update_task();
+}
+
+template <typename I>
+void ImageMap<I>::update_images(const std::string &peer_uuid,
+ std::set<std::string> &&added_global_image_ids,
+ std::set<std::string> &&removed_global_image_ids) {
+ dout(5) << "peer_uuid=" << peer_uuid << ", " << "added_count="
+ << added_global_image_ids.size() << ", " << "removed_count="
+ << removed_global_image_ids.size() << dendl;
+
+ {
+ Mutex::Locker locker(m_lock);
+ if (m_shutting_down) {
+ return;
+ }
+
+ if (!removed_global_image_ids.empty()) {
+ update_images_removed(peer_uuid, removed_global_image_ids);
+ }
+ if (!added_global_image_ids.empty()) {
+ update_images_added(peer_uuid, added_global_image_ids);
+ }
+ }
+
+ schedule_update_task();
+}
+
+template <typename I>
+void ImageMap<I>::handle_peer_ack(const std::string &global_image_id, int r) {
+ dout (20) << "global_image_id=" << global_image_id << ", r=" << r
+ << dendl;
+
+ continue_action({global_image_id}, r);
+}
+
+template <typename I>
+void ImageMap<I>::init(Context *on_finish) {
+ dout(20) << dendl;
+
+ CephContext *cct = reinterpret_cast<CephContext *>(m_ioctx.cct());
+ std::string policy_type = cct->_conf.get_val<string>("rbd_mirror_image_policy_type");
+
+ if (policy_type == "none" || policy_type == "simple") {
+ m_policy.reset(image_map::SimplePolicy::create(m_ioctx));
+ } else {
+ ceph_abort(); // not really needed as such, but catch it.
+ }
+
+ dout(20) << "mapping policy=" << policy_type << dendl;
+
+ start_async_op();
+ C_LoadMap *ctx = new C_LoadMap(this, on_finish);
+ image_map::LoadRequest<I> *req = image_map::LoadRequest<I>::create(
+ m_ioctx, &ctx->image_mapping, ctx);
+ req->send();
+}
+
+template <typename I>
+void ImageMap<I>::shut_down(Context *on_finish) {
+ dout(20) << dendl;
+
+ {
+ Mutex::Locker timer_lock(m_threads->timer_lock);
+
+ {
+ Mutex::Locker locker(m_lock);
+ ceph_assert(!m_shutting_down);
+
+ m_shutting_down = true;
+ m_policy.reset();
+ }
+
+ if (m_timer_task != nullptr) {
+ m_threads->timer->cancel_event(m_timer_task);
+ m_timer_task = nullptr;
+ }
+ if (m_rebalance_task != nullptr) {
+ m_threads->timer->cancel_event(m_rebalance_task);
+ m_rebalance_task = nullptr;
+ }
+ }
+
+ wait_for_async_ops(on_finish);
+}
+
+template <typename I>
+void ImageMap<I>::filter_instance_ids(
+ const std::vector<std::string> &instance_ids,
+ std::vector<std::string> *filtered_instance_ids, bool removal) const {
+ CephContext *cct = reinterpret_cast<CephContext *>(m_ioctx.cct());
+ std::string policy_type = cct->_conf.get_val<string>("rbd_mirror_image_policy_type");
+
+ if (policy_type != "none") {
+ *filtered_instance_ids = instance_ids;
+ return;
+ }
+
+ if (removal) {
+ // propagate removals for external instances
+ for (auto& instance_id : instance_ids) {
+ if (instance_id != m_instance_id) {
+ filtered_instance_ids->push_back(instance_id);
+ }
+ }
+ } else if (std::find(instance_ids.begin(), instance_ids.end(),
+ m_instance_id) != instance_ids.end()) {
+ // propagate addition only for local instance
+ filtered_instance_ids->push_back(m_instance_id);
+ }
+}
+
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::ImageMap<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/ImageMap.h b/src/tools/rbd_mirror/ImageMap.h
new file mode 100644
index 00000000..283f55db
--- /dev/null
+++ b/src/tools/rbd_mirror/ImageMap.h
@@ -0,0 +1,175 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_MAP_H
+#define CEPH_RBD_MIRROR_IMAGE_MAP_H
+
+#include <vector>
+
+#include "common/Mutex.h"
+#include "include/Context.h"
+#include "common/AsyncOpTracker.h"
+#include "cls/rbd/cls_rbd_types.h"
+#include "include/rados/librados.hpp"
+
+#include "image_map/Policy.h"
+#include "image_map/Types.h"
+
+namespace librbd { class ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+
+template <typename> struct Threads;
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class ImageMap {
+public:
+ static ImageMap *create(librados::IoCtx &ioctx, Threads<ImageCtxT> *threads,
+ const std::string& instance_id,
+ image_map::Listener &listener) {
+ return new ImageMap(ioctx, threads, instance_id, listener);
+ }
+
+ ~ImageMap();
+
+ // init (load) the instance map from disk
+ void init(Context *on_finish);
+
+ // shut down map operations
+ void shut_down(Context *on_finish);
+
+ // update (add/remove) images
+ void update_images(const std::string &peer_uuid,
+ std::set<std::string> &&added_global_image_ids,
+ std::set<std::string> &&removed_global_image_ids);
+
+ // add/remove instances
+ void update_instances_added(const std::vector<std::string> &instances);
+ void update_instances_removed(const std::vector<std::string> &instances);
+
+private:
+ struct C_NotifyInstance;
+
+ ImageMap(librados::IoCtx &ioctx, Threads<ImageCtxT> *threads,
+ const std::string& instance_id, image_map::Listener &listener);
+
+ struct Update {
+ std::string global_image_id;
+ std::string instance_id;
+ utime_t mapped_time;
+
+ Update(const std::string &global_image_id, const std::string &instance_id,
+ utime_t mapped_time)
+ : global_image_id(global_image_id),
+ instance_id(instance_id),
+ mapped_time(mapped_time) {
+ }
+ Update(const std::string &global_image_id, const std::string &instance_id)
+ : Update(global_image_id, instance_id, ceph_clock_now()) {
+ }
+
+ friend std::ostream& operator<<(std::ostream& os,
+ const Update& update) {
+ os << "{global_image_id=" << update.global_image_id << ", "
+ << "instance_id=" << update.instance_id << "}";
+ return os;
+ }
+
+ };
+ typedef std::list<Update> Updates;
+
+ // Lock ordering: m_threads->timer_lock, m_lock
+
+ librados::IoCtx &m_ioctx;
+ Threads<ImageCtxT> *m_threads;
+ std::string m_instance_id;
+ image_map::Listener &m_listener;
+
+ std::unique_ptr<image_map::Policy> m_policy; // our mapping policy
+
+ Context *m_timer_task = nullptr;
+ Mutex m_lock;
+ bool m_shutting_down = false;
+ AsyncOpTracker m_async_op_tracker;
+
+ // global_image_id -> registered peers ("" == local, remote otherwise)
+ std::map<std::string, std::set<std::string> > m_peer_map;
+
+ std::set<std::string> m_global_image_ids;
+
+ Context *m_rebalance_task = nullptr;
+
+ struct C_LoadMap : Context {
+ ImageMap *image_map;
+ Context *on_finish;
+
+ std::map<std::string, cls::rbd::MirrorImageMap> image_mapping;
+
+ C_LoadMap(ImageMap *image_map, Context *on_finish)
+ : image_map(image_map),
+ on_finish(on_finish) {
+ }
+
+ void finish(int r) override {
+ if (r == 0) {
+ image_map->handle_load(image_mapping);
+ }
+
+ image_map->finish_async_op();
+ on_finish->complete(r);
+ }
+ };
+
+ // async op-tracker helper routines
+ void start_async_op() {
+ m_async_op_tracker.start_op();
+ }
+ void finish_async_op() {
+ m_async_op_tracker.finish_op();
+ }
+ void wait_for_async_ops(Context *on_finish) {
+ m_async_op_tracker.wait_for_ops(on_finish);
+ }
+
+ void handle_peer_ack(const std::string &global_image_id, int r);
+ void handle_peer_ack_remove(const std::string &global_image_id, int r);
+
+ void handle_load(const std::map<std::string, cls::rbd::MirrorImageMap> &image_mapping);
+ void handle_update_request(const Updates &updates,
+ const std::set<std::string> &remove_global_image_ids, int r);
+
+ // continue (retry or resume depending on state machine) processing
+ // current action.
+ void continue_action(const std::set<std::string> &global_image_ids, int r);
+
+ // schedule an image for update
+ void schedule_action(const std::string &global_image_id);
+
+ void schedule_update_task();
+ void schedule_update_task(const Mutex &timer_lock);
+ void process_updates();
+ void update_image_mapping(Updates&& map_updates,
+ std::set<std::string>&& map_removals);
+
+ void rebalance();
+ void schedule_rebalance_task();
+
+ void notify_listener_acquire_release_images(const Updates &acquire, const Updates &release);
+ void notify_listener_remove_images(const std::string &peer_uuid, const Updates &remove);
+
+ void update_images_added(const std::string &peer_uuid,
+ const std::set<std::string> &global_image_ids);
+ void update_images_removed(const std::string &peer_uuid,
+ const std::set<std::string> &global_image_ids);
+
+ void filter_instance_ids(const std::vector<std::string> &instance_ids,
+ std::vector<std::string> *filtered_instance_ids,
+ bool removal) const;
+
+};
+
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_IMAGE_MAP_H
diff --git a/src/tools/rbd_mirror/ImageReplayer.cc b/src/tools/rbd_mirror/ImageReplayer.cc
new file mode 100644
index 00000000..6c6ee2d5
--- /dev/null
+++ b/src/tools/rbd_mirror/ImageReplayer.cc
@@ -0,0 +1,1896 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "include/compat.h"
+#include "common/Formatter.h"
+#include "common/admin_socket.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "include/stringify.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "common/Timer.h"
+#include "common/WorkQueue.h"
+#include "global/global_context.h"
+#include "journal/Journaler.h"
+#include "journal/ReplayHandler.h"
+#include "journal/Settings.h"
+#include "librbd/ExclusiveLock.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/Journal.h"
+#include "librbd/Operations.h"
+#include "librbd/Utils.h"
+#include "librbd/journal/Replay.h"
+#include "ImageDeleter.h"
+#include "ImageReplayer.h"
+#include "Threads.h"
+#include "tools/rbd_mirror/image_replayer/BootstrapRequest.h"
+#include "tools/rbd_mirror/image_replayer/CloseImageRequest.h"
+#include "tools/rbd_mirror/image_replayer/EventPreprocessor.h"
+#include "tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.h"
+#include "tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.h"
+#include "tools/rbd_mirror/image_replayer/ReplayStatusFormatter.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::" << *this << " " \
+ << __func__ << ": "
+
+using std::map;
+using std::string;
+using std::unique_ptr;
+using std::shared_ptr;
+using std::vector;
+
+extern PerfCounters *g_perf_counters;
+
+namespace rbd {
+namespace mirror {
+
+using librbd::util::create_context_callback;
+using librbd::util::create_rados_callback;
+using namespace rbd::mirror::image_replayer;
+
+template <typename I>
+std::ostream &operator<<(std::ostream &os,
+ const typename ImageReplayer<I>::State &state);
+
+namespace {
+
+template <typename I>
+struct ReplayHandler : public ::journal::ReplayHandler {
+ ImageReplayer<I> *replayer;
+ ReplayHandler(ImageReplayer<I> *replayer) : replayer(replayer) {}
+ void get() override {}
+ void put() override {}
+
+ void handle_entries_available() override {
+ replayer->handle_replay_ready();
+ }
+ void handle_complete(int r) override {
+ std::stringstream ss;
+ if (r < 0) {
+ ss << "replay completed with error: " << cpp_strerror(r);
+ }
+ replayer->handle_replay_complete(r, ss.str());
+ }
+};
+
+template <typename I>
+class ImageReplayerAdminSocketCommand {
+public:
+ ImageReplayerAdminSocketCommand(const std::string &desc,
+ ImageReplayer<I> *replayer)
+ : desc(desc), replayer(replayer) {
+ }
+ virtual ~ImageReplayerAdminSocketCommand() {}
+ virtual bool call(Formatter *f, stringstream *ss) = 0;
+
+ std::string desc;
+ ImageReplayer<I> *replayer;
+ bool registered = false;
+};
+
+template <typename I>
+class StatusCommand : public ImageReplayerAdminSocketCommand<I> {
+public:
+ explicit StatusCommand(const std::string &desc, ImageReplayer<I> *replayer)
+ : ImageReplayerAdminSocketCommand<I>(desc, replayer) {
+ }
+
+ bool call(Formatter *f, stringstream *ss) override {
+ this->replayer->print_status(f, ss);
+ return true;
+ }
+};
+
+template <typename I>
+class StartCommand : public ImageReplayerAdminSocketCommand<I> {
+public:
+ explicit StartCommand(const std::string &desc, ImageReplayer<I> *replayer)
+ : ImageReplayerAdminSocketCommand<I>(desc, replayer) {
+ }
+
+ bool call(Formatter *f, stringstream *ss) override {
+ this->replayer->start(nullptr, true);
+ return true;
+ }
+};
+
+template <typename I>
+class StopCommand : public ImageReplayerAdminSocketCommand<I> {
+public:
+ explicit StopCommand(const std::string &desc, ImageReplayer<I> *replayer)
+ : ImageReplayerAdminSocketCommand<I>(desc, replayer) {
+ }
+
+ bool call(Formatter *f, stringstream *ss) override {
+ this->replayer->stop(nullptr, true);
+ return true;
+ }
+};
+
+template <typename I>
+class RestartCommand : public ImageReplayerAdminSocketCommand<I> {
+public:
+ explicit RestartCommand(const std::string &desc, ImageReplayer<I> *replayer)
+ : ImageReplayerAdminSocketCommand<I>(desc, replayer) {
+ }
+
+ bool call(Formatter *f, stringstream *ss) override {
+ this->replayer->restart();
+ return true;
+ }
+};
+
+template <typename I>
+class FlushCommand : public ImageReplayerAdminSocketCommand<I> {
+public:
+ explicit FlushCommand(const std::string &desc, ImageReplayer<I> *replayer)
+ : ImageReplayerAdminSocketCommand<I>(desc, replayer) {
+ }
+
+ bool call(Formatter *f, stringstream *ss) override {
+ this->replayer->flush();
+ return true;
+ }
+};
+
+template <typename I>
+class ImageReplayerAdminSocketHook : public AdminSocketHook {
+public:
+ ImageReplayerAdminSocketHook(CephContext *cct, const std::string &name,
+ ImageReplayer<I> *replayer)
+ : admin_socket(cct->get_admin_socket()),
+ commands{{"rbd mirror flush " + name,
+ new FlushCommand<I>("flush rbd mirror " + name, replayer)},
+ {"rbd mirror restart " + name,
+ new RestartCommand<I>("restart rbd mirror " + name, replayer)},
+ {"rbd mirror start " + name,
+ new StartCommand<I>("start rbd mirror " + name, replayer)},
+ {"rbd mirror status " + name,
+ new StatusCommand<I>("get status for rbd mirror " + name, replayer)},
+ {"rbd mirror stop " + name,
+ new StopCommand<I>("stop rbd mirror " + name, replayer)}} {
+ }
+
+ int register_commands() {
+ for (auto &it : commands) {
+ int r = admin_socket->register_command(it.first, it.first, this,
+ it.second->desc);
+ if (r < 0) {
+ return r;
+ }
+ it.second->registered = true;
+ }
+ return 0;
+ }
+
+ ~ImageReplayerAdminSocketHook() override {
+ for (auto &it : commands) {
+ if (it.second->registered) {
+ admin_socket->unregister_command(it.first);
+ }
+ delete it.second;
+ }
+ commands.clear();
+ }
+
+ bool call(std::string_view command, const cmdmap_t& cmdmap,
+ std::string_view format, bufferlist& out) override {
+ auto i = commands.find(command);
+ ceph_assert(i != commands.end());
+ Formatter *f = Formatter::create(format);
+ stringstream ss;
+ bool r = i->second->call(f, &ss);
+ delete f;
+ out.append(ss);
+ return r;
+ }
+
+private:
+ typedef std::map<std::string, ImageReplayerAdminSocketCommand<I>*,
+ std::less<>> Commands;
+
+ AdminSocket *admin_socket;
+ Commands commands;
+};
+
+uint32_t calculate_replay_delay(const utime_t &event_time,
+ int mirroring_replay_delay) {
+ if (mirroring_replay_delay <= 0) {
+ return 0;
+ }
+
+ utime_t now = ceph_clock_now();
+ if (event_time + mirroring_replay_delay <= now) {
+ return 0;
+ }
+
+ // ensure it is rounded up when converting to integer
+ return (event_time + mirroring_replay_delay - now) + 1;
+}
+
+} // anonymous namespace
+
+template <typename I>
+void ImageReplayer<I>::BootstrapProgressContext::update_progress(
+ const std::string &description, bool flush)
+{
+ const std::string desc = "bootstrapping, " + description;
+ replayer->set_state_description(0, desc);
+ if (flush) {
+ replayer->update_mirror_image_status(false, boost::none);
+ }
+}
+
+template <typename I>
+void ImageReplayer<I>::RemoteJournalerListener::handle_update(
+ ::journal::JournalMetadata *) {
+ FunctionContext *ctx = new FunctionContext([this](int r) {
+ replayer->handle_remote_journal_metadata_updated();
+ });
+ replayer->m_threads->work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+ImageReplayer<I>::ImageReplayer(Threads<I> *threads,
+ InstanceWatcher<I> *instance_watcher,
+ RadosRef local,
+ const std::string &local_mirror_uuid,
+ int64_t local_pool_id,
+ const std::string &global_image_id) :
+ m_threads(threads),
+ m_instance_watcher(instance_watcher),
+ m_local(local),
+ m_local_mirror_uuid(local_mirror_uuid),
+ m_local_pool_id(local_pool_id),
+ m_global_image_id(global_image_id), m_local_image_name(global_image_id),
+ m_lock("rbd::mirror::ImageReplayer " + stringify(local_pool_id) + " " +
+ global_image_id),
+ m_progress_cxt(this),
+ m_journal_listener(new JournalListener(this)),
+ m_remote_listener(this)
+{
+ // Register asok commands using a temporary "remote_pool_name/global_image_id"
+ // name. When the image name becomes known on start the asok commands will be
+ // re-registered using "remote_pool_name/remote_image_name" name.
+
+ std::string pool_name;
+ int r = m_local->pool_reverse_lookup(m_local_pool_id, &pool_name);
+ if (r < 0) {
+ derr << "error resolving local pool " << m_local_pool_id
+ << ": " << cpp_strerror(r) << dendl;
+ pool_name = stringify(m_local_pool_id);
+ }
+
+ m_name = pool_name + "/" + m_global_image_id;
+ register_admin_socket_hook();
+}
+
+template <typename I>
+ImageReplayer<I>::~ImageReplayer()
+{
+ unregister_admin_socket_hook();
+ ceph_assert(m_event_preprocessor == nullptr);
+ ceph_assert(m_replay_status_formatter == nullptr);
+ ceph_assert(m_local_image_ctx == nullptr);
+ ceph_assert(m_local_replay == nullptr);
+ ceph_assert(m_remote_journaler == nullptr);
+ ceph_assert(m_replay_handler == nullptr);
+ ceph_assert(m_on_start_finish == nullptr);
+ ceph_assert(m_on_stop_finish == nullptr);
+ ceph_assert(m_bootstrap_request == nullptr);
+ ceph_assert(m_in_flight_status_updates == 0);
+
+ delete m_journal_listener;
+}
+
+template <typename I>
+image_replayer::HealthState ImageReplayer<I>::get_health_state() const {
+ Mutex::Locker locker(m_lock);
+
+ if (!m_mirror_image_status_state) {
+ return image_replayer::HEALTH_STATE_OK;
+ } else if (*m_mirror_image_status_state ==
+ cls::rbd::MIRROR_IMAGE_STATUS_STATE_SYNCING ||
+ *m_mirror_image_status_state ==
+ cls::rbd::MIRROR_IMAGE_STATUS_STATE_UNKNOWN) {
+ return image_replayer::HEALTH_STATE_WARNING;
+ }
+ return image_replayer::HEALTH_STATE_ERROR;
+}
+
+template <typename I>
+void ImageReplayer<I>::add_peer(const std::string &peer_uuid,
+ librados::IoCtx &io_ctx) {
+ Mutex::Locker locker(m_lock);
+ auto it = m_peers.find({peer_uuid});
+ if (it == m_peers.end()) {
+ m_peers.insert({peer_uuid, io_ctx});
+ }
+}
+
+template <typename I>
+void ImageReplayer<I>::set_state_description(int r, const std::string &desc) {
+ dout(10) << r << " " << desc << dendl;
+
+ Mutex::Locker l(m_lock);
+ m_last_r = r;
+ m_state_desc = desc;
+}
+
+template <typename I>
+void ImageReplayer<I>::start(Context *on_finish, bool manual)
+{
+ dout(10) << "on_finish=" << on_finish << dendl;
+
+ int r = 0;
+ {
+ Mutex::Locker locker(m_lock);
+ if (!is_stopped_()) {
+ derr << "already running" << dendl;
+ r = -EINVAL;
+ } else if (m_manual_stop && !manual) {
+ dout(5) << "stopped manually, ignoring start without manual flag"
+ << dendl;
+ r = -EPERM;
+ } else {
+ m_state = STATE_STARTING;
+ m_last_r = 0;
+ m_state_desc.clear();
+ m_manual_stop = false;
+ m_delete_requested = false;
+
+ if (on_finish != nullptr) {
+ ceph_assert(m_on_start_finish == nullptr);
+ m_on_start_finish = on_finish;
+ }
+ ceph_assert(m_on_stop_finish == nullptr);
+ }
+ }
+
+ if (r < 0) {
+ if (on_finish) {
+ on_finish->complete(r);
+ }
+ return;
+ }
+
+ m_local_ioctx.reset(new librados::IoCtx{});
+ r = m_local->ioctx_create2(m_local_pool_id, *m_local_ioctx);
+ if (r < 0) {
+ m_local_ioctx.reset();
+
+ derr << "error opening ioctx for local pool " << m_local_pool_id
+ << ": " << cpp_strerror(r) << dendl;
+ on_start_fail(r, "error opening local pool");
+ return;
+ }
+
+ prepare_local_image();
+}
+
+template <typename I>
+void ImageReplayer<I>::prepare_local_image() {
+ dout(10) << dendl;
+
+ m_local_image_id = "";
+ Context *ctx = create_context_callback<
+ ImageReplayer, &ImageReplayer<I>::handle_prepare_local_image>(this);
+ auto req = PrepareLocalImageRequest<I>::create(
+ *m_local_ioctx, m_global_image_id, &m_local_image_id, &m_local_image_name,
+ &m_local_image_tag_owner, m_threads->work_queue, ctx);
+ req->send();
+}
+
+template <typename I>
+void ImageReplayer<I>::handle_prepare_local_image(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r == -ENOENT) {
+ dout(10) << "local image does not exist" << dendl;
+ } else if (r < 0) {
+ on_start_fail(r, "error preparing local image for replay");
+ return;
+ } else {
+ reregister_admin_socket_hook();
+ }
+
+ // local image doesn't exist or is non-primary
+ prepare_remote_image();
+}
+
+template <typename I>
+void ImageReplayer<I>::prepare_remote_image() {
+ dout(10) << dendl;
+ if (m_peers.empty()) {
+ // technically nothing to bootstrap, but it handles the status update
+ bootstrap();
+ return;
+ }
+
+ // TODO need to support multiple remote images
+ ceph_assert(!m_peers.empty());
+ m_remote_image = {*m_peers.begin()};
+
+ auto cct = static_cast<CephContext *>(m_local->cct());
+ journal::Settings journal_settings;
+ journal_settings.commit_interval = cct->_conf.get_val<double>(
+ "rbd_mirror_journal_commit_age");
+ journal_settings.max_fetch_bytes = cct->_conf.get_val<Option::size_t>(
+ "rbd_mirror_journal_max_fetch_bytes");
+
+ Context *ctx = create_context_callback<
+ ImageReplayer, &ImageReplayer<I>::handle_prepare_remote_image>(this);
+ auto req = PrepareRemoteImageRequest<I>::create(
+ m_threads, m_remote_image.io_ctx, m_global_image_id, m_local_mirror_uuid,
+ m_local_image_id, journal_settings, &m_remote_image.mirror_uuid,
+ &m_remote_image.image_id, &m_remote_journaler, &m_client_state,
+ &m_client_meta, ctx);
+ req->send();
+}
+
+template <typename I>
+void ImageReplayer<I>::handle_prepare_remote_image(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ ceph_assert(r < 0 ? m_remote_journaler == nullptr : m_remote_journaler != nullptr);
+ if (r < 0 && !m_local_image_id.empty() &&
+ m_local_image_tag_owner == librbd::Journal<>::LOCAL_MIRROR_UUID) {
+ // local image is primary -- fall-through
+ } else if (r == -ENOENT) {
+ dout(10) << "remote image does not exist" << dendl;
+
+ // TODO need to support multiple remote images
+ if (m_remote_image.image_id.empty() && !m_local_image_id.empty() &&
+ m_local_image_tag_owner == m_remote_image.mirror_uuid) {
+ // local image exists and is non-primary and linked to the missing
+ // remote image
+
+ m_delete_requested = true;
+ on_start_fail(0, "remote image no longer exists");
+ } else {
+ on_start_fail(-ENOENT, "remote image does not exist");
+ }
+ return;
+ } else if (r < 0) {
+ on_start_fail(r, "error retrieving remote image id");
+ return;
+ }
+
+ bootstrap();
+}
+
+template <typename I>
+void ImageReplayer<I>::bootstrap() {
+ dout(10) << dendl;
+
+ if (!m_local_image_id.empty() &&
+ m_local_image_tag_owner == librbd::Journal<>::LOCAL_MIRROR_UUID) {
+ dout(5) << "local image is primary" << dendl;
+ on_start_fail(0, "local image is primary");
+ return;
+ } else if (m_peers.empty()) {
+ dout(5) << "no peer clusters" << dendl;
+ on_start_fail(-ENOENT, "no peer clusters");
+ return;
+ }
+
+ BootstrapRequest<I> *request = nullptr;
+ {
+ Mutex::Locker locker(m_lock);
+ if (on_start_interrupted(m_lock)) {
+ return;
+ }
+
+ auto ctx = create_context_callback<
+ ImageReplayer, &ImageReplayer<I>::handle_bootstrap>(this);
+ request = BootstrapRequest<I>::create(
+ m_threads, *m_local_ioctx, m_remote_image.io_ctx, m_instance_watcher,
+ &m_local_image_ctx, m_local_image_id, m_remote_image.image_id,
+ m_global_image_id, m_local_mirror_uuid, m_remote_image.mirror_uuid,
+ m_remote_journaler, &m_client_state, &m_client_meta, ctx,
+ &m_resync_requested, &m_progress_cxt);
+ request->get();
+ m_bootstrap_request = request;
+ }
+
+ update_mirror_image_status(false, boost::none);
+ reschedule_update_status_task(10);
+
+ request->send();
+}
+
+template <typename I>
+void ImageReplayer<I>::handle_bootstrap(int r) {
+ dout(10) << "r=" << r << dendl;
+ {
+ Mutex::Locker locker(m_lock);
+ m_bootstrap_request->put();
+ m_bootstrap_request = nullptr;
+ if (m_local_image_ctx) {
+ m_local_image_id = m_local_image_ctx->id;
+ }
+ }
+
+ if (on_start_interrupted()) {
+ return;
+ } else if (r == -EREMOTEIO) {
+ m_local_image_tag_owner = "";
+ dout(5) << "remote image is non-primary" << dendl;
+ on_start_fail(-EREMOTEIO, "remote image is non-primary");
+ return;
+ } else if (r == -EEXIST) {
+ m_local_image_tag_owner = "";
+ on_start_fail(r, "split-brain detected");
+ return;
+ } else if (r < 0) {
+ on_start_fail(r, "error bootstrapping replay");
+ return;
+ } else if (m_resync_requested) {
+ on_start_fail(0, "resync requested");
+ return;
+ }
+
+ ceph_assert(m_local_journal == nullptr);
+ {
+ RWLock::RLocker snap_locker(m_local_image_ctx->snap_lock);
+ if (m_local_image_ctx->journal != nullptr) {
+ m_local_journal = m_local_image_ctx->journal;
+ m_local_journal->add_listener(m_journal_listener);
+ }
+ }
+
+ if (m_local_journal == nullptr) {
+ on_start_fail(-EINVAL, "error accessing local journal");
+ return;
+ }
+
+ update_mirror_image_status(false, boost::none);
+ init_remote_journaler();
+}
+
+template <typename I>
+void ImageReplayer<I>::init_remote_journaler() {
+ dout(10) << dendl;
+
+ Context *ctx = create_context_callback<
+ ImageReplayer, &ImageReplayer<I>::handle_init_remote_journaler>(this);
+ m_remote_journaler->init(ctx);
+}
+
+template <typename I>
+void ImageReplayer<I>::handle_init_remote_journaler(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (on_start_interrupted()) {
+ return;
+ } else if (r < 0) {
+ derr << "failed to initialize remote journal: " << cpp_strerror(r) << dendl;
+ on_start_fail(r, "error initializing remote journal");
+ return;
+ }
+
+ m_remote_journaler->add_listener(&m_remote_listener);
+
+ cls::journal::Client client;
+ r = m_remote_journaler->get_cached_client(m_local_mirror_uuid, &client);
+ if (r < 0) {
+ derr << "error retrieving remote journal client: " << cpp_strerror(r)
+ << dendl;
+ on_start_fail(r, "error retrieving remote journal client");
+ return;
+ }
+
+ dout(5) << "image_id=" << m_local_image_id << ", "
+ << "client_meta.image_id=" << m_client_meta.image_id << ", "
+ << "client.state=" << client.state << dendl;
+ if (m_client_meta.image_id == m_local_image_id &&
+ client.state != cls::journal::CLIENT_STATE_CONNECTED) {
+ dout(5) << "client flagged disconnected, stopping image replay" << dendl;
+ if (m_local_image_ctx->config.template get_val<bool>("rbd_mirroring_resync_after_disconnect")) {
+ m_resync_requested = true;
+ on_start_fail(-ENOTCONN, "disconnected: automatic resync");
+ } else {
+ on_start_fail(-ENOTCONN, "disconnected");
+ }
+ return;
+ }
+
+ start_replay();
+}
+
+template <typename I>
+void ImageReplayer<I>::start_replay() {
+ dout(10) << dendl;
+
+ Context *start_ctx = create_context_callback<
+ ImageReplayer, &ImageReplayer<I>::handle_start_replay>(this);
+ m_local_journal->start_external_replay(&m_local_replay, start_ctx);
+}
+
+template <typename I>
+void ImageReplayer<I>::handle_start_replay(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ ceph_assert(m_local_replay == nullptr);
+ derr << "error starting external replay on local image "
+ << m_local_image_id << ": " << cpp_strerror(r) << dendl;
+ on_start_fail(r, "error starting replay on local image");
+ return;
+ }
+
+ m_replay_status_formatter =
+ ReplayStatusFormatter<I>::create(m_remote_journaler, m_local_mirror_uuid);
+
+ Context *on_finish(nullptr);
+ {
+ Mutex::Locker locker(m_lock);
+ ceph_assert(m_state == STATE_STARTING);
+ m_state = STATE_REPLAYING;
+ std::swap(m_on_start_finish, on_finish);
+ }
+
+ m_event_preprocessor = EventPreprocessor<I>::create(
+ *m_local_image_ctx, *m_remote_journaler, m_local_mirror_uuid,
+ &m_client_meta, m_threads->work_queue);
+
+ update_mirror_image_status(true, boost::none);
+ reschedule_update_status_task(30);
+
+ if (on_replay_interrupted()) {
+ return;
+ }
+
+ {
+ CephContext *cct = static_cast<CephContext *>(m_local->cct());
+ double poll_seconds = cct->_conf.get_val<double>(
+ "rbd_mirror_journal_poll_age");
+
+ Mutex::Locker locker(m_lock);
+ m_replay_handler = new ReplayHandler<I>(this);
+ m_remote_journaler->start_live_replay(m_replay_handler, poll_seconds);
+
+ dout(10) << "m_remote_journaler=" << *m_remote_journaler << dendl;
+ }
+
+ dout(10) << "start succeeded" << dendl;
+ if (on_finish != nullptr) {
+ dout(10) << "on finish complete, r=" << r << dendl;
+ on_finish->complete(r);
+ }
+}
+
+template <typename I>
+void ImageReplayer<I>::on_start_fail(int r, const std::string &desc)
+{
+ dout(10) << "r=" << r << dendl;
+ Context *ctx = new FunctionContext([this, r, desc](int _r) {
+ {
+ Mutex::Locker locker(m_lock);
+ ceph_assert(m_state == STATE_STARTING);
+ m_state = STATE_STOPPING;
+ if (r < 0 && r != -ECANCELED && r != -EREMOTEIO && r != -ENOENT) {
+ derr << "start failed: " << cpp_strerror(r) << dendl;
+ } else {
+ dout(10) << "start canceled" << dendl;
+ }
+ }
+
+ set_state_description(r, desc);
+ if (m_local_ioctx) {
+ update_mirror_image_status(false, boost::none);
+ }
+ reschedule_update_status_task(-1);
+ shut_down(r);
+ });
+ m_threads->work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+bool ImageReplayer<I>::on_start_interrupted() {
+ Mutex::Locker locker(m_lock);
+ return on_start_interrupted(m_lock);
+}
+
+template <typename I>
+bool ImageReplayer<I>::on_start_interrupted(Mutex& lock) {
+ ceph_assert(m_lock.is_locked());
+ ceph_assert(m_state == STATE_STARTING);
+ if (!m_stop_requested) {
+ return false;
+ }
+
+ on_start_fail(-ECANCELED, "");
+ return true;
+}
+
+template <typename I>
+void ImageReplayer<I>::stop(Context *on_finish, bool manual, int r,
+ const std::string& desc)
+{
+ dout(10) << "on_finish=" << on_finish << ", manual=" << manual
+ << ", desc=" << desc << dendl;
+
+ image_replayer::BootstrapRequest<I> *bootstrap_request = nullptr;
+ bool shut_down_replay = false;
+ bool running = true;
+ {
+ Mutex::Locker locker(m_lock);
+
+ if (!is_running_()) {
+ running = false;
+ } else {
+ if (!is_stopped_()) {
+ if (m_state == STATE_STARTING) {
+ dout(10) << "canceling start" << dendl;
+ if (m_bootstrap_request != nullptr) {
+ bootstrap_request = m_bootstrap_request;
+ bootstrap_request->get();
+ }
+ } else {
+ dout(10) << "interrupting replay" << dendl;
+ shut_down_replay = true;
+ }
+
+ ceph_assert(m_on_stop_finish == nullptr);
+ std::swap(m_on_stop_finish, on_finish);
+ m_stop_requested = true;
+ m_manual_stop = manual;
+ }
+ }
+ }
+
+ // avoid holding lock since bootstrap request will update status
+ if (bootstrap_request != nullptr) {
+ dout(10) << "canceling bootstrap" << dendl;
+ bootstrap_request->cancel();
+ bootstrap_request->put();
+ }
+
+ if (!running) {
+ dout(20) << "not running" << dendl;
+ if (on_finish) {
+ on_finish->complete(-EINVAL);
+ }
+ return;
+ }
+
+ if (shut_down_replay) {
+ on_stop_journal_replay(r, desc);
+ } else if (on_finish != nullptr) {
+ on_finish->complete(0);
+ }
+}
+
+template <typename I>
+void ImageReplayer<I>::on_stop_journal_replay(int r, const std::string &desc)
+{
+ dout(10) << dendl;
+
+ {
+ Mutex::Locker locker(m_lock);
+ if (m_state != STATE_REPLAYING) {
+ // might be invoked multiple times while stopping
+ return;
+ }
+ m_stop_requested = true;
+ m_state = STATE_STOPPING;
+ }
+
+ set_state_description(r, desc);
+ update_mirror_image_status(true, boost::none);
+ reschedule_update_status_task(-1);
+ shut_down(0);
+}
+
+template <typename I>
+void ImageReplayer<I>::handle_replay_ready()
+{
+ dout(20) << dendl;
+ if (on_replay_interrupted()) {
+ return;
+ }
+
+ if (!m_remote_journaler->try_pop_front(&m_replay_entry, &m_replay_tag_tid)) {
+ return;
+ }
+
+ m_event_replay_tracker.start_op();
+
+ m_lock.Lock();
+ bool stopping = (m_state == STATE_STOPPING);
+ m_lock.Unlock();
+
+ if (stopping) {
+ dout(10) << "stopping event replay" << dendl;
+ m_event_replay_tracker.finish_op();
+ return;
+ }
+
+ if (m_replay_tag_valid && m_replay_tag.tid == m_replay_tag_tid) {
+ preprocess_entry();
+ return;
+ }
+
+ replay_flush();
+}
+
+template <typename I>
+void ImageReplayer<I>::restart(Context *on_finish)
+{
+ FunctionContext *ctx = new FunctionContext(
+ [this, on_finish](int r) {
+ if (r < 0) {
+ // Try start anyway.
+ }
+ start(on_finish, true);
+ });
+ stop(ctx);
+}
+
+template <typename I>
+void ImageReplayer<I>::flush()
+{
+ dout(10) << dendl;
+ C_SaferCond ctx;
+ flush_local_replay(&ctx);
+ ctx.wait();
+
+ update_mirror_image_status(false, boost::none);
+}
+
+template <typename I>
+void ImageReplayer<I>::flush_local_replay(Context* on_flush)
+{
+ m_lock.Lock();
+ if (m_state != STATE_REPLAYING) {
+ m_lock.Unlock();
+ on_flush->complete(0);
+ return;
+ }
+
+ dout(15) << dendl;
+ auto ctx = new FunctionContext(
+ [this, on_flush](int r) {
+ handle_flush_local_replay(on_flush, r);
+ });
+ m_local_replay->flush(ctx);
+ m_lock.Unlock();
+}
+
+template <typename I>
+void ImageReplayer<I>::handle_flush_local_replay(Context* on_flush, int r)
+{
+ dout(15) << "r=" << r << dendl;
+ if (r < 0) {
+ derr << "error flushing local replay: " << cpp_strerror(r) << dendl;
+ on_flush->complete(r);
+ return;
+ }
+
+ flush_commit_position(on_flush);
+}
+
+template <typename I>
+void ImageReplayer<I>::flush_commit_position(Context* on_flush)
+{
+ m_lock.Lock();
+ if (m_state != STATE_REPLAYING) {
+ m_lock.Unlock();
+ on_flush->complete(0);
+ return;
+ }
+
+ dout(15) << dendl;
+ auto ctx = new FunctionContext(
+ [this, on_flush](int r) {
+ handle_flush_commit_position(on_flush, r);
+ });
+ m_remote_journaler->flush_commit_position(ctx);
+ m_lock.Unlock();
+}
+
+template <typename I>
+void ImageReplayer<I>::handle_flush_commit_position(Context* on_flush, int r)
+{
+ dout(15) << "r=" << r << dendl;
+ if (r < 0) {
+ derr << "error flushing remote journal commit position: "
+ << cpp_strerror(r) << dendl;
+ }
+
+ on_flush->complete(r);
+}
+
+template <typename I>
+bool ImageReplayer<I>::on_replay_interrupted()
+{
+ bool shut_down;
+ {
+ Mutex::Locker locker(m_lock);
+ shut_down = m_stop_requested;
+ }
+
+ if (shut_down) {
+ on_stop_journal_replay();
+ }
+ return shut_down;
+}
+
+template <typename I>
+void ImageReplayer<I>::print_status(Formatter *f, stringstream *ss)
+{
+ dout(10) << dendl;
+
+ Mutex::Locker l(m_lock);
+
+ if (f) {
+ f->open_object_section("image_replayer");
+ f->dump_string("name", m_name);
+ f->dump_string("state", to_string(m_state));
+ f->close_section();
+ f->flush(*ss);
+ } else {
+ *ss << m_name << ": state: " << to_string(m_state);
+ }
+}
+
+template <typename I>
+void ImageReplayer<I>::handle_replay_complete(int r, const std::string &error_desc)
+{
+ dout(10) << "r=" << r << dendl;
+ if (r < 0) {
+ derr << "replay encountered an error: " << cpp_strerror(r) << dendl;
+ }
+
+ {
+ Mutex::Locker locker(m_lock);
+ m_stop_requested = true;
+ }
+ on_stop_journal_replay(r, error_desc);
+}
+
+template <typename I>
+void ImageReplayer<I>::replay_flush() {
+ dout(10) << dendl;
+
+ bool interrupted = false;
+ {
+ Mutex::Locker locker(m_lock);
+ if (m_state != STATE_REPLAYING) {
+ dout(10) << "replay interrupted" << dendl;
+ interrupted = true;
+ } else {
+ m_state = STATE_REPLAY_FLUSHING;
+ }
+ }
+
+ if (interrupted) {
+ m_event_replay_tracker.finish_op();
+ return;
+ }
+
+ // shut down the replay to flush all IO and ops and create a new
+ // replayer to handle the new tag epoch
+ Context *ctx = create_context_callback<
+ ImageReplayer<I>, &ImageReplayer<I>::handle_replay_flush>(this);
+ ctx = new FunctionContext([this, ctx](int r) {
+ m_local_image_ctx->journal->stop_external_replay();
+ m_local_replay = nullptr;
+
+ if (r < 0) {
+ ctx->complete(r);
+ return;
+ }
+
+ m_local_journal->start_external_replay(&m_local_replay, ctx);
+ });
+ m_local_replay->shut_down(false, ctx);
+}
+
+template <typename I>
+void ImageReplayer<I>::handle_replay_flush(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ {
+ Mutex::Locker locker(m_lock);
+ ceph_assert(m_state == STATE_REPLAY_FLUSHING);
+ m_state = STATE_REPLAYING;
+ }
+
+ if (r < 0) {
+ derr << "replay flush encountered an error: " << cpp_strerror(r) << dendl;
+ m_event_replay_tracker.finish_op();
+ handle_replay_complete(r, "replay flush encountered an error");
+ return;
+ } else if (on_replay_interrupted()) {
+ m_event_replay_tracker.finish_op();
+ return;
+ }
+
+ get_remote_tag();
+}
+
+template <typename I>
+void ImageReplayer<I>::get_remote_tag() {
+ dout(15) << "tag_tid: " << m_replay_tag_tid << dendl;
+
+ Context *ctx = create_context_callback<
+ ImageReplayer, &ImageReplayer<I>::handle_get_remote_tag>(this);
+ m_remote_journaler->get_tag(m_replay_tag_tid, &m_replay_tag, ctx);
+}
+
+template <typename I>
+void ImageReplayer<I>::handle_get_remote_tag(int r) {
+ dout(15) << "r=" << r << dendl;
+
+ if (r == 0) {
+ try {
+ auto it = m_replay_tag.data.cbegin();
+ decode(m_replay_tag_data, it);
+ } catch (const buffer::error &err) {
+ r = -EBADMSG;
+ }
+ }
+
+ if (r < 0) {
+ derr << "failed to retrieve remote tag " << m_replay_tag_tid << ": "
+ << cpp_strerror(r) << dendl;
+ m_event_replay_tracker.finish_op();
+ handle_replay_complete(r, "failed to retrieve remote tag");
+ return;
+ }
+
+ m_replay_tag_valid = true;
+ dout(15) << "decoded remote tag " << m_replay_tag_tid << ": "
+ << m_replay_tag_data << dendl;
+
+ allocate_local_tag();
+}
+
+template <typename I>
+void ImageReplayer<I>::allocate_local_tag() {
+ dout(15) << dendl;
+
+ std::string mirror_uuid = m_replay_tag_data.mirror_uuid;
+ if (mirror_uuid == librbd::Journal<>::LOCAL_MIRROR_UUID) {
+ mirror_uuid = m_remote_image.mirror_uuid;
+ } else if (mirror_uuid == m_local_mirror_uuid) {
+ mirror_uuid = librbd::Journal<>::LOCAL_MIRROR_UUID;
+ } else if (mirror_uuid == librbd::Journal<>::ORPHAN_MIRROR_UUID) {
+ // handle possible edge condition where daemon can failover and
+ // the local image has already been promoted/demoted
+ auto local_tag_data = m_local_journal->get_tag_data();
+ if (local_tag_data.mirror_uuid == librbd::Journal<>::ORPHAN_MIRROR_UUID &&
+ (local_tag_data.predecessor.commit_valid &&
+ local_tag_data.predecessor.mirror_uuid ==
+ librbd::Journal<>::LOCAL_MIRROR_UUID)) {
+ dout(15) << "skipping stale demotion event" << dendl;
+ handle_process_entry_safe(m_replay_entry, m_replay_start_time, 0);
+ handle_replay_ready();
+ return;
+ } else {
+ dout(5) << "encountered image demotion: stopping" << dendl;
+ Mutex::Locker locker(m_lock);
+ m_stop_requested = true;
+ }
+ }
+
+ librbd::journal::TagPredecessor predecessor(m_replay_tag_data.predecessor);
+ if (predecessor.mirror_uuid == librbd::Journal<>::LOCAL_MIRROR_UUID) {
+ predecessor.mirror_uuid = m_remote_image.mirror_uuid;
+ } else if (predecessor.mirror_uuid == m_local_mirror_uuid) {
+ predecessor.mirror_uuid = librbd::Journal<>::LOCAL_MIRROR_UUID;
+ }
+
+ dout(15) << "mirror_uuid=" << mirror_uuid << ", "
+ << "predecessor=" << predecessor << ", "
+ << "replay_tag_tid=" << m_replay_tag_tid << dendl;
+ Context *ctx = create_context_callback<
+ ImageReplayer, &ImageReplayer<I>::handle_allocate_local_tag>(this);
+ m_local_journal->allocate_tag(mirror_uuid, predecessor, ctx);
+}
+
+template <typename I>
+void ImageReplayer<I>::handle_allocate_local_tag(int r) {
+ dout(15) << "r=" << r << ", "
+ << "tag_tid=" << m_local_journal->get_tag_tid() << dendl;
+
+ if (r < 0) {
+ derr << "failed to allocate journal tag: " << cpp_strerror(r) << dendl;
+ m_event_replay_tracker.finish_op();
+ handle_replay_complete(r, "failed to allocate journal tag");
+ return;
+ }
+
+ preprocess_entry();
+}
+
+template <typename I>
+void ImageReplayer<I>::preprocess_entry() {
+ dout(20) << "preprocessing entry tid=" << m_replay_entry.get_commit_tid()
+ << dendl;
+
+ bufferlist data = m_replay_entry.get_data();
+ auto it = data.cbegin();
+ int r = m_local_replay->decode(&it, &m_event_entry);
+ if (r < 0) {
+ derr << "failed to decode journal event" << dendl;
+ m_event_replay_tracker.finish_op();
+ handle_replay_complete(r, "failed to decode journal event");
+ return;
+ }
+
+ uint32_t delay = calculate_replay_delay(
+ m_event_entry.timestamp, m_local_image_ctx->mirroring_replay_delay);
+ if (delay == 0) {
+ handle_preprocess_entry_ready(0);
+ return;
+ }
+
+ dout(20) << "delaying replay by " << delay << " sec" << dendl;
+
+ Mutex::Locker timer_locker(m_threads->timer_lock);
+ ceph_assert(m_delayed_preprocess_task == nullptr);
+ m_delayed_preprocess_task = new FunctionContext(
+ [this](int r) {
+ ceph_assert(m_threads->timer_lock.is_locked());
+ m_delayed_preprocess_task = nullptr;
+ m_threads->work_queue->queue(
+ create_context_callback<ImageReplayer,
+ &ImageReplayer<I>::handle_preprocess_entry_ready>(this), 0);
+ });
+ m_threads->timer->add_event_after(delay, m_delayed_preprocess_task);
+}
+
+template <typename I>
+void ImageReplayer<I>::handle_preprocess_entry_ready(int r) {
+ dout(20) << "r=" << r << dendl;
+ ceph_assert(r == 0);
+
+ m_replay_start_time = ceph_clock_now();
+ if (!m_event_preprocessor->is_required(m_event_entry)) {
+ process_entry();
+ return;
+ }
+
+ Context *ctx = create_context_callback<
+ ImageReplayer, &ImageReplayer<I>::handle_preprocess_entry_safe>(this);
+ m_event_preprocessor->preprocess(&m_event_entry, ctx);
+}
+
+template <typename I>
+void ImageReplayer<I>::handle_preprocess_entry_safe(int r) {
+ dout(20) << "r=" << r << dendl;
+
+ if (r < 0) {
+ m_event_replay_tracker.finish_op();
+
+ if (r == -ECANCELED) {
+ handle_replay_complete(0, "lost exclusive lock");
+ } else {
+ derr << "failed to preprocess journal event" << dendl;
+ handle_replay_complete(r, "failed to preprocess journal event");
+ }
+ return;
+ }
+
+ process_entry();
+}
+
+template <typename I>
+void ImageReplayer<I>::process_entry() {
+ dout(20) << "processing entry tid=" << m_replay_entry.get_commit_tid()
+ << dendl;
+
+ // stop replaying events if stop has been requested
+ if (on_replay_interrupted()) {
+ m_event_replay_tracker.finish_op();
+ return;
+ }
+
+ Context *on_ready = create_context_callback<
+ ImageReplayer, &ImageReplayer<I>::handle_process_entry_ready>(this);
+ Context *on_commit = new C_ReplayCommitted(this, std::move(m_replay_entry),
+ m_replay_start_time);
+
+ m_local_replay->process(m_event_entry, on_ready, on_commit);
+}
+
+template <typename I>
+void ImageReplayer<I>::handle_process_entry_ready(int r) {
+ dout(20) << dendl;
+ ceph_assert(r == 0);
+
+ bool update_status = false;
+ {
+ RWLock::RLocker snap_locker(m_local_image_ctx->snap_lock);
+ if (m_local_image_name != m_local_image_ctx->name) {
+ m_local_image_name = m_local_image_ctx->name;
+ update_status = true;
+ }
+ }
+
+ if (update_status) {
+ reschedule_update_status_task(0);
+ }
+
+ // attempt to process the next event
+ handle_replay_ready();
+}
+
+template <typename I>
+void ImageReplayer<I>::handle_process_entry_safe(const ReplayEntry &replay_entry,
+ const utime_t &replay_start_time,
+ int r) {
+ dout(20) << "commit_tid=" << replay_entry.get_commit_tid() << ", r=" << r
+ << dendl;
+
+ if (r < 0) {
+ derr << "failed to commit journal event: " << cpp_strerror(r) << dendl;
+ handle_replay_complete(r, "failed to commit journal event");
+ } else {
+ ceph_assert(m_remote_journaler != nullptr);
+ m_remote_journaler->committed(replay_entry);
+ }
+
+ auto bytes = replay_entry.get_data().length();
+ auto latency = ceph_clock_now() - replay_start_time;
+
+ if (g_perf_counters) {
+ g_perf_counters->inc(l_rbd_mirror_replay);
+ g_perf_counters->inc(l_rbd_mirror_replay_bytes, bytes);
+ g_perf_counters->tinc(l_rbd_mirror_replay_latency, latency);
+ }
+
+ auto ctx = new FunctionContext(
+ [this, bytes, latency](int r) {
+ Mutex::Locker locker(m_lock);
+ if (m_perf_counters) {
+ m_perf_counters->inc(l_rbd_mirror_replay);
+ m_perf_counters->inc(l_rbd_mirror_replay_bytes, bytes);
+ m_perf_counters->tinc(l_rbd_mirror_replay_latency, latency);
+ }
+ m_event_replay_tracker.finish_op();
+ });
+ m_threads->work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+bool ImageReplayer<I>::update_mirror_image_status(bool force,
+ const OptionalState &state) {
+ dout(15) << dendl;
+ {
+ Mutex::Locker locker(m_lock);
+ if (!start_mirror_image_status_update(force, false)) {
+ return false;
+ }
+ }
+
+ queue_mirror_image_status_update(state);
+ return true;
+}
+
+template <typename I>
+bool ImageReplayer<I>::start_mirror_image_status_update(bool force,
+ bool restarting) {
+ ceph_assert(m_lock.is_locked());
+
+ if (!force && !is_stopped_()) {
+ if (!is_running_()) {
+ dout(15) << "shut down in-progress: ignoring update" << dendl;
+ return false;
+ } else if (m_in_flight_status_updates > (restarting ? 1 : 0)) {
+ dout(15) << "already sending update" << dendl;
+ m_update_status_requested = true;
+ return false;
+ }
+ }
+
+ ++m_in_flight_status_updates;
+ dout(15) << "in-flight updates=" << m_in_flight_status_updates << dendl;
+ return true;
+}
+
+template <typename I>
+void ImageReplayer<I>::finish_mirror_image_status_update() {
+ reregister_admin_socket_hook();
+
+ Context *on_finish = nullptr;
+ {
+ Mutex::Locker locker(m_lock);
+ ceph_assert(m_in_flight_status_updates > 0);
+ if (--m_in_flight_status_updates > 0) {
+ dout(15) << "waiting on " << m_in_flight_status_updates << " in-flight "
+ << "updates" << dendl;
+ return;
+ }
+
+ std::swap(on_finish, m_on_update_status_finish);
+ }
+
+ dout(15) << dendl;
+ if (on_finish != nullptr) {
+ on_finish->complete(0);
+ }
+}
+
+template <typename I>
+void ImageReplayer<I>::queue_mirror_image_status_update(const OptionalState &state) {
+ dout(15) << dendl;
+
+ auto ctx = new FunctionContext(
+ [this, state](int r) {
+ send_mirror_status_update(state);
+ });
+
+ // ensure pending IO is flushed and the commit position is updated
+ // prior to updating the mirror status
+ ctx = new FunctionContext(
+ [this, ctx](int r) {
+ flush_local_replay(ctx);
+ });
+ m_threads->work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+void ImageReplayer<I>::send_mirror_status_update(const OptionalState &opt_state) {
+ State state;
+ std::string state_desc;
+ int last_r;
+ bool stopping_replay;
+
+ OptionalMirrorImageStatusState mirror_image_status_state =
+ boost::make_optional(false, cls::rbd::MIRROR_IMAGE_STATUS_STATE_UNKNOWN);
+ image_replayer::BootstrapRequest<I>* bootstrap_request = nullptr;
+ {
+ Mutex::Locker locker(m_lock);
+ state = m_state;
+ state_desc = m_state_desc;
+ mirror_image_status_state = m_mirror_image_status_state;
+ last_r = m_last_r;
+ stopping_replay = (m_local_image_ctx != nullptr);
+
+ if (m_bootstrap_request != nullptr) {
+ bootstrap_request = m_bootstrap_request;
+ bootstrap_request->get();
+ }
+ }
+
+ bool syncing = false;
+ if (bootstrap_request != nullptr) {
+ syncing = bootstrap_request->is_syncing();
+ bootstrap_request->put();
+ bootstrap_request = nullptr;
+ }
+
+ if (opt_state) {
+ state = *opt_state;
+ }
+
+ cls::rbd::MirrorImageStatus status;
+ status.up = true;
+ switch (state) {
+ case STATE_STARTING:
+ if (syncing) {
+ status.state = cls::rbd::MIRROR_IMAGE_STATUS_STATE_SYNCING;
+ status.description = state_desc.empty() ? "syncing" : state_desc;
+ mirror_image_status_state = status.state;
+ } else {
+ status.state = cls::rbd::MIRROR_IMAGE_STATUS_STATE_STARTING_REPLAY;
+ status.description = "starting replay";
+ }
+ break;
+ case STATE_REPLAYING:
+ case STATE_REPLAY_FLUSHING:
+ status.state = cls::rbd::MIRROR_IMAGE_STATUS_STATE_REPLAYING;
+ {
+ Context *on_req_finish = new FunctionContext(
+ [this](int r) {
+ dout(15) << "replay status ready: r=" << r << dendl;
+ if (r >= 0) {
+ send_mirror_status_update(boost::none);
+ } else if (r == -EAGAIN) {
+ // decrement in-flight status update counter
+ handle_mirror_status_update(r);
+ }
+ });
+
+ std::string desc;
+ ceph_assert(m_replay_status_formatter != nullptr);
+ if (!m_replay_status_formatter->get_or_send_update(&desc,
+ on_req_finish)) {
+ dout(15) << "waiting for replay status" << dendl;
+ return;
+ }
+ status.description = "replaying, " + desc;
+ mirror_image_status_state = boost::make_optional(
+ false, cls::rbd::MIRROR_IMAGE_STATUS_STATE_UNKNOWN);
+ }
+ break;
+ case STATE_STOPPING:
+ if (stopping_replay) {
+ status.state = cls::rbd::MIRROR_IMAGE_STATUS_STATE_STOPPING_REPLAY;
+ status.description = state_desc.empty() ? "stopping replay" : state_desc;
+ break;
+ }
+ // FALLTHROUGH
+ case STATE_STOPPED:
+ if (last_r == -EREMOTEIO) {
+ status.state = cls::rbd::MIRROR_IMAGE_STATUS_STATE_UNKNOWN;
+ status.description = state_desc;
+ mirror_image_status_state = status.state;
+ } else if (last_r < 0) {
+ status.state = cls::rbd::MIRROR_IMAGE_STATUS_STATE_ERROR;
+ status.description = state_desc;
+ mirror_image_status_state = status.state;
+ } else {
+ status.state = cls::rbd::MIRROR_IMAGE_STATUS_STATE_STOPPED;
+ status.description = state_desc.empty() ? "stopped" : state_desc;
+ mirror_image_status_state = boost::none;
+ }
+ break;
+ default:
+ ceph_assert(!"invalid state");
+ }
+
+ {
+ Mutex::Locker locker(m_lock);
+ m_mirror_image_status_state = mirror_image_status_state;
+ }
+
+ // prevent the status from ping-ponging when failed replays are restarted
+ if (mirror_image_status_state &&
+ *mirror_image_status_state == cls::rbd::MIRROR_IMAGE_STATUS_STATE_ERROR) {
+ status.state = *mirror_image_status_state;
+ }
+
+ dout(15) << "status=" << status << dendl;
+ librados::ObjectWriteOperation op;
+ librbd::cls_client::mirror_image_status_set(&op, m_global_image_id, status);
+
+ ceph_assert(m_local_ioctx);
+ librados::AioCompletion *aio_comp = create_rados_callback<
+ ImageReplayer<I>, &ImageReplayer<I>::handle_mirror_status_update>(this);
+ int r = m_local_ioctx->aio_operate(RBD_MIRRORING, aio_comp, &op);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void ImageReplayer<I>::handle_mirror_status_update(int r) {
+ dout(15) << "r=" << r << dendl;
+
+ bool running = false;
+ bool started = false;
+ {
+ Mutex::Locker locker(m_lock);
+ bool update_status_requested = false;
+ std::swap(update_status_requested, m_update_status_requested);
+
+ running = is_running_();
+ if (running && update_status_requested) {
+ started = start_mirror_image_status_update(false, true);
+ }
+ }
+
+ // if a deferred update is available, send it -- otherwise reschedule
+ // the timer task
+ if (started) {
+ queue_mirror_image_status_update(boost::none);
+ } else if (running) {
+ reschedule_update_status_task(0);
+ }
+
+ // mark committed status update as no longer in-flight
+ finish_mirror_image_status_update();
+}
+
+template <typename I>
+void ImageReplayer<I>::reschedule_update_status_task(int new_interval) {
+ bool canceled_task = false;
+ {
+ Mutex::Locker locker(m_lock);
+ Mutex::Locker timer_locker(m_threads->timer_lock);
+
+ if (m_update_status_task) {
+ dout(15) << "canceling existing status update task" << dendl;
+
+ canceled_task = m_threads->timer->cancel_event(m_update_status_task);
+ m_update_status_task = nullptr;
+ }
+
+ if (new_interval > 0) {
+ m_update_status_interval = new_interval;
+ }
+
+ if (new_interval >= 0 && is_running_() &&
+ start_mirror_image_status_update(true, false)) {
+ m_update_status_task = new FunctionContext(
+ [this](int r) {
+ ceph_assert(m_threads->timer_lock.is_locked());
+ m_update_status_task = nullptr;
+
+ queue_mirror_image_status_update(boost::none);
+ });
+ dout(15) << "scheduling status update task after "
+ << m_update_status_interval << " seconds" << dendl;
+ m_threads->timer->add_event_after(m_update_status_interval,
+ m_update_status_task);
+ }
+ }
+
+ if (canceled_task) {
+ // decrement in-flight status update counter for canceled task
+ finish_mirror_image_status_update();
+ }
+}
+
+template <typename I>
+void ImageReplayer<I>::shut_down(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ bool canceled_delayed_preprocess_task = false;
+ {
+ Mutex::Locker timer_locker(m_threads->timer_lock);
+ if (m_delayed_preprocess_task != nullptr) {
+ canceled_delayed_preprocess_task = m_threads->timer->cancel_event(
+ m_delayed_preprocess_task);
+ ceph_assert(canceled_delayed_preprocess_task);
+ m_delayed_preprocess_task = nullptr;
+ }
+ }
+ if (canceled_delayed_preprocess_task) {
+ // wake up sleeping replay
+ m_event_replay_tracker.finish_op();
+ }
+
+ reschedule_update_status_task(-1);
+
+ {
+ Mutex::Locker locker(m_lock);
+ ceph_assert(m_state == STATE_STOPPING);
+
+ // if status updates are in-flight, wait for them to complete
+ // before proceeding
+ if (m_in_flight_status_updates > 0) {
+ if (m_on_update_status_finish == nullptr) {
+ dout(15) << "waiting for in-flight status update" << dendl;
+ m_on_update_status_finish = new FunctionContext(
+ [this, r](int _r) {
+ shut_down(r);
+ });
+ }
+ return;
+ }
+ }
+
+ // NOTE: it's important to ensure that the local image is fully
+ // closed before attempting to close the remote journal in
+ // case the remote cluster is unreachable
+
+ // chain the shut down sequence (reverse order)
+ Context *ctx = new FunctionContext(
+ [this, r](int _r) {
+ if (m_local_ioctx) {
+ update_mirror_image_status(true, STATE_STOPPED);
+ }
+ handle_shut_down(r);
+ });
+
+ // close the remote journal
+ if (m_remote_journaler != nullptr) {
+ ctx = new FunctionContext([this, ctx](int r) {
+ delete m_remote_journaler;
+ m_remote_journaler = nullptr;
+ ctx->complete(0);
+ });
+ ctx = new FunctionContext([this, ctx](int r) {
+ m_remote_journaler->remove_listener(&m_remote_listener);
+ m_remote_journaler->shut_down(ctx);
+ });
+ }
+
+ // stop the replay of remote journal events
+ if (m_replay_handler != nullptr) {
+ ctx = new FunctionContext([this, ctx](int r) {
+ delete m_replay_handler;
+ m_replay_handler = nullptr;
+
+ m_event_replay_tracker.wait_for_ops(ctx);
+ });
+ ctx = new FunctionContext([this, ctx](int r) {
+ m_remote_journaler->stop_replay(ctx);
+ });
+ }
+
+ // close the local image (release exclusive lock)
+ if (m_local_image_ctx) {
+ ctx = new FunctionContext([this, ctx](int r) {
+ CloseImageRequest<I> *request = CloseImageRequest<I>::create(
+ &m_local_image_ctx, ctx);
+ request->send();
+ });
+ }
+
+ // shut down event replay into the local image
+ if (m_local_journal != nullptr) {
+ ctx = new FunctionContext([this, ctx](int r) {
+ m_local_journal = nullptr;
+ ctx->complete(0);
+ });
+ if (m_local_replay != nullptr) {
+ ctx = new FunctionContext([this, ctx](int r) {
+ m_local_journal->stop_external_replay();
+ m_local_replay = nullptr;
+
+ EventPreprocessor<I>::destroy(m_event_preprocessor);
+ m_event_preprocessor = nullptr;
+ ctx->complete(0);
+ });
+ }
+ ctx = new FunctionContext([this, ctx](int r) {
+ // blocks if listener notification is in-progress
+ m_local_journal->remove_listener(m_journal_listener);
+ ctx->complete(0);
+ });
+ }
+
+ // wait for all local in-flight replay events to complete
+ ctx = new FunctionContext([this, ctx](int r) {
+ if (r < 0) {
+ derr << "error shutting down journal replay: " << cpp_strerror(r)
+ << dendl;
+ }
+
+ m_event_replay_tracker.wait_for_ops(ctx);
+ });
+
+ // flush any local in-flight replay events
+ if (m_local_replay != nullptr) {
+ ctx = new FunctionContext([this, ctx](int r) {
+ m_local_replay->shut_down(true, ctx);
+ });
+ }
+
+ m_threads->work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+void ImageReplayer<I>::handle_shut_down(int r) {
+ reschedule_update_status_task(-1);
+
+ bool resync_requested = false;
+ bool delete_requested = false;
+ bool unregister_asok_hook = false;
+ {
+ Mutex::Locker locker(m_lock);
+
+ // if status updates are in-flight, wait for them to complete
+ // before proceeding
+ if (m_in_flight_status_updates > 0) {
+ if (m_on_update_status_finish == nullptr) {
+ dout(15) << "waiting for in-flight status update" << dendl;
+ m_on_update_status_finish = new FunctionContext(
+ [this, r](int _r) {
+ handle_shut_down(r);
+ });
+ }
+ return;
+ }
+
+ if (m_delete_requested && !m_local_image_id.empty()) {
+ ceph_assert(m_remote_image.image_id.empty());
+ dout(0) << "remote image no longer exists: scheduling deletion" << dendl;
+ unregister_asok_hook = true;
+ std::swap(delete_requested, m_delete_requested);
+ }
+
+ std::swap(resync_requested, m_resync_requested);
+ if (delete_requested || resync_requested) {
+ m_local_image_id = "";
+ } else if (m_last_r == -ENOENT &&
+ m_local_image_id.empty() && m_remote_image.image_id.empty()) {
+ dout(0) << "mirror image no longer exists" << dendl;
+ unregister_asok_hook = true;
+ m_finished = true;
+ }
+ }
+
+ if (unregister_asok_hook) {
+ unregister_admin_socket_hook();
+ }
+
+ if (delete_requested || resync_requested) {
+ dout(5) << "moving image to trash" << dendl;
+ auto ctx = new FunctionContext([this, r](int) {
+ handle_shut_down(r);
+ });
+ ImageDeleter<I>::trash_move(*m_local_ioctx, m_global_image_id,
+ resync_requested, m_threads->work_queue, ctx);
+ return;
+ }
+
+ dout(10) << "stop complete" << dendl;
+ ReplayStatusFormatter<I>::destroy(m_replay_status_formatter);
+ m_replay_status_formatter = nullptr;
+
+ Context *on_start = nullptr;
+ Context *on_stop = nullptr;
+ {
+ Mutex::Locker locker(m_lock);
+ std::swap(on_start, m_on_start_finish);
+ std::swap(on_stop, m_on_stop_finish);
+ m_stop_requested = false;
+ ceph_assert(m_delayed_preprocess_task == nullptr);
+ ceph_assert(m_state == STATE_STOPPING);
+ m_state = STATE_STOPPED;
+ }
+
+ if (on_start != nullptr) {
+ dout(10) << "on start finish complete, r=" << r << dendl;
+ on_start->complete(r);
+ r = 0;
+ }
+ if (on_stop != nullptr) {
+ dout(10) << "on stop finish complete, r=" << r << dendl;
+ on_stop->complete(r);
+ }
+}
+
+template <typename I>
+void ImageReplayer<I>::handle_remote_journal_metadata_updated() {
+ dout(20) << dendl;
+
+ cls::journal::Client client;
+ {
+ Mutex::Locker locker(m_lock);
+ if (!is_running_()) {
+ return;
+ }
+
+ int r = m_remote_journaler->get_cached_client(m_local_mirror_uuid, &client);
+ if (r < 0) {
+ derr << "failed to retrieve client: " << cpp_strerror(r) << dendl;
+ return;
+ }
+ }
+
+ if (client.state != cls::journal::CLIENT_STATE_CONNECTED) {
+ dout(0) << "client flagged disconnected, stopping image replay" << dendl;
+ stop(nullptr, false, -ENOTCONN, "disconnected");
+ }
+}
+
+template <typename I>
+std::string ImageReplayer<I>::to_string(const State state) {
+ switch (state) {
+ case ImageReplayer<I>::STATE_STARTING:
+ return "Starting";
+ case ImageReplayer<I>::STATE_REPLAYING:
+ return "Replaying";
+ case ImageReplayer<I>::STATE_REPLAY_FLUSHING:
+ return "ReplayFlushing";
+ case ImageReplayer<I>::STATE_STOPPING:
+ return "Stopping";
+ case ImageReplayer<I>::STATE_STOPPED:
+ return "Stopped";
+ default:
+ break;
+ }
+ return "Unknown(" + stringify(state) + ")";
+}
+
+template <typename I>
+void ImageReplayer<I>::resync_image(Context *on_finish) {
+ dout(10) << dendl;
+
+ m_resync_requested = true;
+ stop(on_finish);
+}
+
+template <typename I>
+void ImageReplayer<I>::register_admin_socket_hook() {
+ ImageReplayerAdminSocketHook<I> *asok_hook;
+ {
+ Mutex::Locker locker(m_lock);
+ if (m_asok_hook != nullptr) {
+ return;
+ }
+
+ ceph_assert(m_perf_counters == nullptr);
+
+ dout(15) << "registered asok hook: " << m_name << dendl;
+ asok_hook = new ImageReplayerAdminSocketHook<I>(g_ceph_context, m_name,
+ this);
+ int r = asok_hook->register_commands();
+ if (r == 0) {
+ m_asok_hook = asok_hook;
+
+ CephContext *cct = static_cast<CephContext *>(m_local->cct());
+ auto prio = cct->_conf.get_val<int64_t>("rbd_mirror_perf_stats_prio");
+ PerfCountersBuilder plb(g_ceph_context, "rbd_mirror_" + m_name,
+ l_rbd_mirror_first, l_rbd_mirror_last);
+ plb.add_u64_counter(l_rbd_mirror_replay, "replay", "Replays", "r", prio);
+ plb.add_u64_counter(l_rbd_mirror_replay_bytes, "replay_bytes",
+ "Replayed data", "rb", prio, unit_t(UNIT_BYTES));
+ plb.add_time_avg(l_rbd_mirror_replay_latency, "replay_latency",
+ "Replay latency", "rl", prio);
+ m_perf_counters = plb.create_perf_counters();
+ g_ceph_context->get_perfcounters_collection()->add(m_perf_counters);
+
+ return;
+ }
+ derr << "error registering admin socket commands" << dendl;
+ }
+ delete asok_hook;
+}
+
+template <typename I>
+void ImageReplayer<I>::unregister_admin_socket_hook() {
+ dout(15) << dendl;
+
+ AdminSocketHook *asok_hook = nullptr;
+ PerfCounters *perf_counters = nullptr;
+ {
+ Mutex::Locker locker(m_lock);
+ std::swap(asok_hook, m_asok_hook);
+ std::swap(perf_counters, m_perf_counters);
+ }
+ delete asok_hook;
+ if (perf_counters != nullptr) {
+ g_ceph_context->get_perfcounters_collection()->remove(perf_counters);
+ delete perf_counters;
+ }
+}
+
+template <typename I>
+void ImageReplayer<I>::reregister_admin_socket_hook() {
+ {
+ Mutex::Locker locker(m_lock);
+ auto name = m_local_ioctx->get_pool_name() + "/" + m_local_image_name;
+ if (m_asok_hook != nullptr && m_name == name) {
+ return;
+ }
+ m_name = name;
+ }
+ unregister_admin_socket_hook();
+ register_admin_socket_hook();
+}
+
+template <typename I>
+std::ostream &operator<<(std::ostream &os, const ImageReplayer<I> &replayer)
+{
+ os << "ImageReplayer: " << &replayer << " [" << replayer.get_local_pool_id()
+ << "/" << replayer.get_global_image_id() << "]";
+ return os;
+}
+
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::ImageReplayer<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/ImageReplayer.h b/src/tools/rbd_mirror/ImageReplayer.h
new file mode 100644
index 00000000..9af3e961
--- /dev/null
+++ b/src/tools/rbd_mirror/ImageReplayer.h
@@ -0,0 +1,438 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_REPLAYER_H
+#define CEPH_RBD_MIRROR_IMAGE_REPLAYER_H
+
+#include "common/AsyncOpTracker.h"
+#include "common/Mutex.h"
+#include "common/WorkQueue.h"
+#include "include/rados/librados.hpp"
+#include "cls/journal/cls_journal_types.h"
+#include "cls/rbd/cls_rbd_types.h"
+#include "journal/JournalMetadataListener.h"
+#include "journal/ReplayEntry.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/journal/Types.h"
+#include "librbd/journal/TypeTraits.h"
+#include "ProgressContext.h"
+#include "tools/rbd_mirror/Types.h"
+#include "tools/rbd_mirror/image_replayer/Types.h"
+
+#include <boost/noncopyable.hpp>
+#include <boost/optional.hpp>
+
+#include <set>
+#include <map>
+#include <atomic>
+#include <string>
+#include <vector>
+
+class AdminSocketHook;
+class PerfCounters;
+
+namespace journal {
+
+class Journaler;
+class ReplayHandler;
+
+}
+
+namespace librbd {
+
+class ImageCtx;
+namespace journal { template <typename> class Replay; }
+
+}
+
+namespace rbd {
+namespace mirror {
+
+template <typename> struct InstanceWatcher;
+template <typename> struct Threads;
+
+namespace image_replayer { template <typename> class BootstrapRequest; }
+namespace image_replayer { template <typename> class EventPreprocessor; }
+namespace image_replayer { template <typename> class ReplayStatusFormatter; }
+
+/**
+ * Replays changes from a remote cluster for a single image.
+ */
+template <typename ImageCtxT = librbd::ImageCtx>
+class ImageReplayer {
+public:
+ static ImageReplayer *create(
+ Threads<ImageCtxT> *threads, InstanceWatcher<ImageCtxT> *instance_watcher,
+ RadosRef local, const std::string &local_mirror_uuid, int64_t local_pool_id,
+ const std::string &global_image_id) {
+ return new ImageReplayer(threads, instance_watcher, local,
+ local_mirror_uuid, local_pool_id, global_image_id);
+ }
+ void destroy() {
+ delete this;
+ }
+
+ ImageReplayer(Threads<ImageCtxT> *threads,
+ InstanceWatcher<ImageCtxT> *instance_watcher,
+ RadosRef local, const std::string &local_mirror_uuid,
+ int64_t local_pool_id, const std::string &global_image_id);
+ virtual ~ImageReplayer();
+ ImageReplayer(const ImageReplayer&) = delete;
+ ImageReplayer& operator=(const ImageReplayer&) = delete;
+
+ bool is_stopped() { Mutex::Locker l(m_lock); return is_stopped_(); }
+ bool is_running() { Mutex::Locker l(m_lock); return is_running_(); }
+ bool is_replaying() { Mutex::Locker l(m_lock); return is_replaying_(); }
+
+ std::string get_name() { Mutex::Locker l(m_lock); return m_name; };
+ void set_state_description(int r, const std::string &desc);
+
+ // TODO temporary until policy handles release of image replayers
+ inline bool is_finished() const {
+ Mutex::Locker locker(m_lock);
+ return m_finished;
+ }
+ inline void set_finished(bool finished) {
+ Mutex::Locker locker(m_lock);
+ m_finished = finished;
+ }
+
+ inline bool is_blacklisted() const {
+ Mutex::Locker locker(m_lock);
+ return (m_last_r == -EBLACKLISTED);
+ }
+
+ image_replayer::HealthState get_health_state() const;
+
+ void add_peer(const std::string &peer_uuid, librados::IoCtx &remote_io_ctx);
+
+ inline int64_t get_local_pool_id() const {
+ return m_local_pool_id;
+ }
+ inline const std::string& get_global_image_id() const {
+ return m_global_image_id;
+ }
+
+ void start(Context *on_finish = nullptr, bool manual = false);
+ void stop(Context *on_finish = nullptr, bool manual = false,
+ int r = 0, const std::string& desc = "");
+ void restart(Context *on_finish = nullptr);
+ void flush();
+
+ void resync_image(Context *on_finish=nullptr);
+
+ void print_status(Formatter *f, stringstream *ss);
+
+ virtual void handle_replay_ready();
+ virtual void handle_replay_complete(int r, const std::string &error_desc);
+
+protected:
+ /**
+ * @verbatim
+ * (error)
+ * <uninitialized> <------------------------------------ FAIL
+ * | ^
+ * v *
+ * <starting> *
+ * | *
+ * v (error) *
+ * PREPARE_LOCAL_IMAGE * * * * * * * * * * * * * * * * * *
+ * | *
+ * v (error) *
+ * PREPARE_REMOTE_IMAGE * * * * * * * * * * * * * * * * * *
+ * | *
+ * v (error) *
+ * BOOTSTRAP_IMAGE * * * * * * * * * * * * * * * * * * * *
+ * | *
+ * v (error) *
+ * INIT_REMOTE_JOURNALER * * * * * * * * * * * * * * * * *
+ * | *
+ * v (error) *
+ * START_REPLAY * * * * * * * * * * * * * * * * * * * * * *
+ * |
+ * | /--------------------------------------------\
+ * | | |
+ * v v (asok flush) |
+ * REPLAYING -------------> LOCAL_REPLAY_FLUSH |
+ * | \ | |
+ * | | v |
+ * | | FLUSH_COMMIT_POSITION |
+ * | | | |
+ * | | \--------------------/|
+ * | | |
+ * | | (entries available) |
+ * | \-----------> REPLAY_READY |
+ * | | |
+ * | | (skip if not |
+ * | v needed) (error)
+ * | REPLAY_FLUSH * * * * * * * * *
+ * | | | *
+ * | | (skip if not | *
+ * | v needed) (error) *
+ * | GET_REMOTE_TAG * * * * * * * *
+ * | | | *
+ * | | (skip if not | *
+ * | v needed) (error) *
+ * | ALLOCATE_LOCAL_TAG * * * * * *
+ * | | | *
+ * | v (error) *
+ * | PREPROCESS_ENTRY * * * * * * *
+ * | | | *
+ * | v (error) *
+ * | PROCESS_ENTRY * * * * * * * * *
+ * | | | *
+ * | \---------------------/ *
+ * v *
+ * REPLAY_COMPLETE < * * * * * * * * * * * * * * * * * * *
+ * |
+ * v
+ * JOURNAL_REPLAY_SHUT_DOWN
+ * |
+ * v
+ * LOCAL_IMAGE_CLOSE
+ * |
+ * v
+ * <stopped>
+ *
+ * @endverbatim
+ */
+
+ virtual void on_start_fail(int r, const std::string &desc);
+ virtual bool on_start_interrupted();
+ virtual bool on_start_interrupted(Mutex& lock);
+
+ virtual void on_stop_journal_replay(int r = 0, const std::string &desc = "");
+
+ bool on_replay_interrupted();
+
+private:
+ typedef typename librbd::journal::TypeTraits<ImageCtxT>::ReplayEntry ReplayEntry;
+
+ enum State {
+ STATE_UNKNOWN,
+ STATE_STARTING,
+ STATE_REPLAYING,
+ STATE_REPLAY_FLUSHING,
+ STATE_STOPPING,
+ STATE_STOPPED,
+ };
+
+ struct RemoteImage {
+ std::string mirror_uuid;
+ std::string image_id;
+ librados::IoCtx io_ctx;
+
+ RemoteImage() {
+ }
+ RemoteImage(const Peer& peer) : io_ctx(peer.io_ctx) {
+ }
+ };
+
+ typedef typename librbd::journal::TypeTraits<ImageCtxT>::Journaler Journaler;
+ typedef boost::optional<State> OptionalState;
+ typedef boost::optional<cls::rbd::MirrorImageStatusState>
+ OptionalMirrorImageStatusState;
+
+ struct JournalListener : public librbd::journal::Listener {
+ ImageReplayer *img_replayer;
+
+ JournalListener(ImageReplayer *img_replayer)
+ : img_replayer(img_replayer) {
+ }
+
+ void handle_close() override {
+ img_replayer->on_stop_journal_replay();
+ }
+
+ void handle_promoted() override {
+ img_replayer->on_stop_journal_replay(0, "force promoted");
+ }
+
+ void handle_resync() override {
+ img_replayer->resync_image();
+ }
+ };
+
+ class BootstrapProgressContext : public ProgressContext {
+ public:
+ BootstrapProgressContext(ImageReplayer<ImageCtxT> *replayer) :
+ replayer(replayer) {
+ }
+
+ void update_progress(const std::string &description,
+ bool flush = true) override;
+ private:
+ ImageReplayer<ImageCtxT> *replayer;
+ };
+
+ Threads<ImageCtxT> *m_threads;
+ InstanceWatcher<ImageCtxT> *m_instance_watcher;
+
+ Peers m_peers;
+ RemoteImage m_remote_image;
+
+ RadosRef m_local;
+ std::string m_local_mirror_uuid;
+ int64_t m_local_pool_id;
+ std::string m_local_image_id;
+ std::string m_global_image_id;
+ std::string m_local_image_name;
+ std::string m_name;
+
+ mutable Mutex m_lock;
+ State m_state = STATE_STOPPED;
+ std::string m_state_desc;
+
+ OptionalMirrorImageStatusState m_mirror_image_status_state =
+ boost::make_optional(false, cls::rbd::MIRROR_IMAGE_STATUS_STATE_UNKNOWN);
+ int m_last_r = 0;
+
+ BootstrapProgressContext m_progress_cxt;
+
+ bool m_finished = false;
+ bool m_delete_requested = false;
+ bool m_resync_requested = false;
+
+ image_replayer::EventPreprocessor<ImageCtxT> *m_event_preprocessor = nullptr;
+ image_replayer::ReplayStatusFormatter<ImageCtxT> *m_replay_status_formatter =
+ nullptr;
+ IoCtxRef m_local_ioctx;
+ ImageCtxT *m_local_image_ctx = nullptr;
+ std::string m_local_image_tag_owner;
+
+ decltype(ImageCtxT::journal) m_local_journal = nullptr;
+ librbd::journal::Replay<ImageCtxT> *m_local_replay = nullptr;
+ Journaler* m_remote_journaler = nullptr;
+ ::journal::ReplayHandler *m_replay_handler = nullptr;
+ librbd::journal::Listener *m_journal_listener;
+
+ Context *m_on_start_finish = nullptr;
+ Context *m_on_stop_finish = nullptr;
+ Context *m_update_status_task = nullptr;
+ int m_update_status_interval = 0;
+ librados::AioCompletion *m_update_status_comp = nullptr;
+ bool m_stop_requested = false;
+ bool m_manual_stop = false;
+
+ AdminSocketHook *m_asok_hook = nullptr;
+ PerfCounters *m_perf_counters = nullptr;
+
+ image_replayer::BootstrapRequest<ImageCtxT> *m_bootstrap_request = nullptr;
+
+ uint32_t m_in_flight_status_updates = 0;
+ bool m_update_status_requested = false;
+ Context *m_on_update_status_finish = nullptr;
+
+ cls::journal::ClientState m_client_state =
+ cls::journal::CLIENT_STATE_DISCONNECTED;
+ librbd::journal::MirrorPeerClientMeta m_client_meta;
+
+ ReplayEntry m_replay_entry;
+ utime_t m_replay_start_time;
+ bool m_replay_tag_valid = false;
+ uint64_t m_replay_tag_tid = 0;
+ cls::journal::Tag m_replay_tag;
+ librbd::journal::TagData m_replay_tag_data;
+ librbd::journal::EventEntry m_event_entry;
+ AsyncOpTracker m_event_replay_tracker;
+ Context *m_delayed_preprocess_task = nullptr;
+
+ struct RemoteJournalerListener : public ::journal::JournalMetadataListener {
+ ImageReplayer *replayer;
+
+ RemoteJournalerListener(ImageReplayer *replayer) : replayer(replayer) { }
+
+ void handle_update(::journal::JournalMetadata *) override;
+ } m_remote_listener;
+
+ struct C_ReplayCommitted : public Context {
+ ImageReplayer *replayer;
+ ReplayEntry replay_entry;
+ utime_t replay_start_time;
+
+ C_ReplayCommitted(ImageReplayer *replayer,
+ ReplayEntry &&replay_entry,
+ const utime_t &replay_start_time)
+ : replayer(replayer), replay_entry(std::move(replay_entry)),
+ replay_start_time(replay_start_time) {
+ }
+ void finish(int r) override {
+ replayer->handle_process_entry_safe(replay_entry, replay_start_time, r);
+ }
+ };
+
+ static std::string to_string(const State state);
+
+ bool is_stopped_() const {
+ return m_state == STATE_STOPPED;
+ }
+ bool is_running_() const {
+ return !is_stopped_() && m_state != STATE_STOPPING && !m_stop_requested;
+ }
+ bool is_replaying_() const {
+ return (m_state == STATE_REPLAYING ||
+ m_state == STATE_REPLAY_FLUSHING);
+ }
+
+ void flush_local_replay(Context* on_flush);
+ void handle_flush_local_replay(Context* on_flush, int r);
+
+ void flush_commit_position(Context* on_flush);
+ void handle_flush_commit_position(Context* on_flush, int r);
+
+ bool update_mirror_image_status(bool force, const OptionalState &state);
+ bool start_mirror_image_status_update(bool force, bool restarting);
+ void finish_mirror_image_status_update();
+ void queue_mirror_image_status_update(const OptionalState &state);
+ void send_mirror_status_update(const OptionalState &state);
+ void handle_mirror_status_update(int r);
+ void reschedule_update_status_task(int new_interval);
+
+ void shut_down(int r);
+ void handle_shut_down(int r);
+ void handle_remote_journal_metadata_updated();
+
+ void prepare_local_image();
+ void handle_prepare_local_image(int r);
+
+ void prepare_remote_image();
+ void handle_prepare_remote_image(int r);
+
+ void bootstrap();
+ void handle_bootstrap(int r);
+
+ void init_remote_journaler();
+ void handle_init_remote_journaler(int r);
+
+ void start_replay();
+ void handle_start_replay(int r);
+
+ void replay_flush();
+ void handle_replay_flush(int r);
+
+ void get_remote_tag();
+ void handle_get_remote_tag(int r);
+
+ void allocate_local_tag();
+ void handle_allocate_local_tag(int r);
+
+ void preprocess_entry();
+ void handle_preprocess_entry_ready(int r);
+ void handle_preprocess_entry_safe(int r);
+
+ void process_entry();
+ void handle_process_entry_ready(int r);
+ void handle_process_entry_safe(const ReplayEntry& replay_entry,
+ const utime_t &m_replay_start_time, int r);
+
+ void register_admin_socket_hook();
+ void unregister_admin_socket_hook();
+ void reregister_admin_socket_hook();
+};
+
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::ImageReplayer<librbd::ImageCtx>;
+
+#endif // CEPH_RBD_MIRROR_IMAGE_REPLAYER_H
diff --git a/src/tools/rbd_mirror/ImageSync.cc b/src/tools/rbd_mirror/ImageSync.cc
new file mode 100644
index 00000000..929d75c2
--- /dev/null
+++ b/src/tools/rbd_mirror/ImageSync.cc
@@ -0,0 +1,481 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "ImageSync.h"
+#include "InstanceWatcher.h"
+#include "ProgressContext.h"
+#include "common/debug.h"
+#include "common/Timer.h"
+#include "common/errno.h"
+#include "journal/Journaler.h"
+#include "librbd/DeepCopyRequest.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/Utils.h"
+#include "librbd/internal.h"
+#include "librbd/journal/Types.h"
+#include "tools/rbd_mirror/image_sync/SyncPointCreateRequest.h"
+#include "tools/rbd_mirror/image_sync/SyncPointPruneRequest.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::ImageSync: " \
+ << this << " " << __func__
+
+namespace rbd {
+namespace mirror {
+
+using namespace image_sync;
+using librbd::util::create_async_context_callback;
+using librbd::util::create_context_callback;
+using librbd::util::unique_lock_name;
+
+template <typename I>
+class ImageSync<I>::ImageCopyProgressContext : public librbd::ProgressContext {
+public:
+ ImageCopyProgressContext(ImageSync *image_sync) : image_sync(image_sync) {
+ }
+
+ int update_progress(uint64_t object_no, uint64_t object_count) override {
+ image_sync->handle_copy_image_update_progress(object_no, object_count);
+ return 0;
+ }
+
+ ImageSync *image_sync;
+};
+
+template <typename I>
+ImageSync<I>::ImageSync(I *local_image_ctx, I *remote_image_ctx,
+ SafeTimer *timer, Mutex *timer_lock,
+ const std::string &mirror_uuid, Journaler *journaler,
+ MirrorPeerClientMeta *client_meta,
+ ContextWQ *work_queue,
+ InstanceWatcher<I> *instance_watcher,
+ Context *on_finish, ProgressContext *progress_ctx)
+ : BaseRequest("rbd::mirror::ImageSync", local_image_ctx->cct, on_finish),
+ m_local_image_ctx(local_image_ctx), m_remote_image_ctx(remote_image_ctx),
+ m_timer(timer), m_timer_lock(timer_lock), m_mirror_uuid(mirror_uuid),
+ m_journaler(journaler), m_client_meta(client_meta),
+ m_work_queue(work_queue), m_instance_watcher(instance_watcher),
+ m_progress_ctx(progress_ctx),
+ m_lock(unique_lock_name("ImageSync::m_lock", this)),
+ m_update_sync_point_interval(m_local_image_ctx->cct->_conf.template get_val<double>(
+ "rbd_mirror_sync_point_update_age")), m_client_meta_copy(*client_meta) {
+}
+
+template <typename I>
+ImageSync<I>::~ImageSync() {
+ ceph_assert(m_image_copy_request == nullptr);
+ ceph_assert(m_image_copy_prog_ctx == nullptr);
+ ceph_assert(m_update_sync_ctx == nullptr);
+}
+
+template <typename I>
+void ImageSync<I>::send() {
+ send_notify_sync_request();
+}
+
+template <typename I>
+void ImageSync<I>::cancel() {
+ Mutex::Locker locker(m_lock);
+
+ dout(10) << dendl;
+
+ m_canceled = true;
+
+ if (m_instance_watcher->cancel_sync_request(m_local_image_ctx->id)) {
+ return;
+ }
+
+ if (m_image_copy_request != nullptr) {
+ m_image_copy_request->cancel();
+ }
+}
+
+template <typename I>
+void ImageSync<I>::send_notify_sync_request() {
+ update_progress("NOTIFY_SYNC_REQUEST");
+
+ dout(10) << dendl;
+
+ m_lock.Lock();
+ if (m_canceled) {
+ m_lock.Unlock();
+ BaseRequest::finish(-ECANCELED);
+ return;
+ }
+
+ Context *ctx = create_async_context_callback(
+ m_work_queue, create_context_callback<
+ ImageSync<I>, &ImageSync<I>::handle_notify_sync_request>(this));
+ m_instance_watcher->notify_sync_request(m_local_image_ctx->id, ctx);
+ m_lock.Unlock();
+}
+
+template <typename I>
+void ImageSync<I>::handle_notify_sync_request(int r) {
+ dout(10) << ": r=" << r << dendl;
+
+ m_lock.Lock();
+ if (r == 0 && m_canceled) {
+ r = -ECANCELED;
+ }
+ m_lock.Unlock();
+
+ if (r < 0) {
+ BaseRequest::finish(r);
+ return;
+ }
+
+ send_prune_catch_up_sync_point();
+}
+
+template <typename I>
+void ImageSync<I>::send_prune_catch_up_sync_point() {
+ update_progress("PRUNE_CATCH_UP_SYNC_POINT");
+
+ if (m_client_meta->sync_points.empty()) {
+ send_create_sync_point();
+ return;
+ }
+
+ dout(10) << dendl;
+
+ // prune will remove sync points with missing snapshots and
+ // ensure we have a maximum of one sync point (in case we
+ // restarted)
+ Context *ctx = create_context_callback<
+ ImageSync<I>, &ImageSync<I>::handle_prune_catch_up_sync_point>(this);
+ SyncPointPruneRequest<I> *request = SyncPointPruneRequest<I>::create(
+ m_remote_image_ctx, false, m_journaler, m_client_meta, ctx);
+ request->send();
+}
+
+template <typename I>
+void ImageSync<I>::handle_prune_catch_up_sync_point(int r) {
+ dout(10) << ": r=" << r << dendl;
+
+ if (r < 0) {
+ derr << ": failed to prune catch-up sync point: "
+ << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ send_create_sync_point();
+}
+
+template <typename I>
+void ImageSync<I>::send_create_sync_point() {
+ update_progress("CREATE_SYNC_POINT");
+
+ // TODO: when support for disconnecting laggy clients is added,
+ // re-connect and create catch-up sync point
+ if (m_client_meta->sync_points.size() > 0) {
+ send_copy_image();
+ return;
+ }
+
+ dout(10) << dendl;
+
+ Context *ctx = create_context_callback<
+ ImageSync<I>, &ImageSync<I>::handle_create_sync_point>(this);
+ SyncPointCreateRequest<I> *request = SyncPointCreateRequest<I>::create(
+ m_remote_image_ctx, m_mirror_uuid, m_journaler, m_client_meta, ctx);
+ request->send();
+}
+
+template <typename I>
+void ImageSync<I>::handle_create_sync_point(int r) {
+ dout(10) << ": r=" << r << dendl;
+
+ if (r < 0) {
+ derr << ": failed to create sync point: " << cpp_strerror(r)
+ << dendl;
+ finish(r);
+ return;
+ }
+
+ send_copy_image();
+}
+
+template <typename I>
+void ImageSync<I>::send_copy_image() {
+ librados::snap_t snap_id_start = 0;
+ librados::snap_t snap_id_end;
+ librbd::deep_copy::ObjectNumber object_number;
+ int r = 0;
+ {
+ RWLock::RLocker snap_locker(m_remote_image_ctx->snap_lock);
+ ceph_assert(!m_client_meta->sync_points.empty());
+ auto &sync_point = m_client_meta->sync_points.front();
+ snap_id_end = m_remote_image_ctx->get_snap_id(
+ cls::rbd::UserSnapshotNamespace(), sync_point.snap_name);
+ if (snap_id_end == CEPH_NOSNAP) {
+ derr << ": failed to locate snapshot: " << sync_point.snap_name << dendl;
+ r = -ENOENT;
+ } else if (!sync_point.from_snap_name.empty()) {
+ snap_id_start = m_remote_image_ctx->get_snap_id(
+ cls::rbd::UserSnapshotNamespace(), sync_point.from_snap_name);
+ if (snap_id_start == CEPH_NOSNAP) {
+ derr << ": failed to locate from snapshot: "
+ << sync_point.from_snap_name << dendl;
+ r = -ENOENT;
+ }
+ }
+ object_number = sync_point.object_number;
+ }
+ if (r < 0) {
+ finish(r);
+ return;
+ }
+
+ m_lock.Lock();
+ if (m_canceled) {
+ m_lock.Unlock();
+ finish(-ECANCELED);
+ return;
+ }
+
+ dout(10) << dendl;
+
+ Context *ctx = create_context_callback<
+ ImageSync<I>, &ImageSync<I>::handle_copy_image>(this);
+ m_image_copy_prog_ctx = new ImageCopyProgressContext(this);
+ m_image_copy_request = librbd::DeepCopyRequest<I>::create(
+ m_remote_image_ctx, m_local_image_ctx, snap_id_start, snap_id_end,
+ 0, false, object_number, m_work_queue, &m_client_meta->snap_seqs,
+ m_image_copy_prog_ctx, ctx);
+ m_image_copy_request->get();
+ m_lock.Unlock();
+
+ update_progress("COPY_IMAGE");
+
+ m_image_copy_request->send();
+}
+
+template <typename I>
+void ImageSync<I>::handle_copy_image(int r) {
+ dout(10) << ": r=" << r << dendl;
+
+ {
+ Mutex::Locker timer_locker(*m_timer_lock);
+ Mutex::Locker locker(m_lock);
+ m_image_copy_request->put();
+ m_image_copy_request = nullptr;
+ delete m_image_copy_prog_ctx;
+ m_image_copy_prog_ctx = nullptr;
+ if (r == 0 && m_canceled) {
+ r = -ECANCELED;
+ }
+
+ if (m_update_sync_ctx != nullptr) {
+ m_timer->cancel_event(m_update_sync_ctx);
+ m_update_sync_ctx = nullptr;
+ }
+
+ if (m_updating_sync_point) {
+ m_ret_val = r;
+ return;
+ }
+ }
+
+ if (r == -ECANCELED) {
+ dout(10) << ": image copy canceled" << dendl;
+ finish(r);
+ return;
+ } else if (r < 0) {
+ derr << ": failed to copy image: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ send_flush_sync_point();
+}
+
+template <typename I>
+void ImageSync<I>::handle_copy_image_update_progress(uint64_t object_no,
+ uint64_t object_count) {
+ int percent = 100 * object_no / object_count;
+ update_progress("COPY_IMAGE " + stringify(percent) + "%");
+
+ Mutex::Locker locker(m_lock);
+ m_image_copy_object_no = object_no;
+ m_image_copy_object_count = object_count;
+
+ if (m_update_sync_ctx == nullptr && !m_updating_sync_point) {
+ send_update_sync_point();
+ }
+}
+
+template <typename I>
+void ImageSync<I>::send_update_sync_point() {
+ ceph_assert(m_lock.is_locked());
+
+ m_update_sync_ctx = nullptr;
+
+ if (m_canceled) {
+ return;
+ }
+
+ auto sync_point = &m_client_meta->sync_points.front();
+
+ if (m_client_meta->sync_object_count == m_image_copy_object_count &&
+ sync_point->object_number &&
+ (m_image_copy_object_no - 1) == sync_point->object_number.get()) {
+ // update sync point did not progress since last sync
+ return;
+ }
+
+ m_updating_sync_point = true;
+
+ m_client_meta_copy = *m_client_meta;
+ m_client_meta->sync_object_count = m_image_copy_object_count;
+ if (m_image_copy_object_no > 0) {
+ sync_point->object_number = m_image_copy_object_no - 1;
+ }
+
+ CephContext *cct = m_local_image_ctx->cct;
+ ldout(cct, 20) << ": sync_point=" << *sync_point << dendl;
+
+ bufferlist client_data_bl;
+ librbd::journal::ClientData client_data(*m_client_meta);
+ encode(client_data, client_data_bl);
+
+ Context *ctx = create_context_callback<
+ ImageSync<I>, &ImageSync<I>::handle_update_sync_point>(
+ this);
+ m_journaler->update_client(client_data_bl, ctx);
+}
+
+template <typename I>
+void ImageSync<I>::handle_update_sync_point(int r) {
+ CephContext *cct = m_local_image_ctx->cct;
+ ldout(cct, 20) << ": r=" << r << dendl;
+
+ if (r < 0) {
+ *m_client_meta = m_client_meta_copy;
+ lderr(cct) << ": failed to update client data: " << cpp_strerror(r)
+ << dendl;
+ }
+
+ {
+ Mutex::Locker timer_locker(*m_timer_lock);
+ Mutex::Locker locker(m_lock);
+ m_updating_sync_point = false;
+
+ if (m_image_copy_request != nullptr) {
+ m_update_sync_ctx = new FunctionContext(
+ [this](int r) {
+ Mutex::Locker locker(m_lock);
+ this->send_update_sync_point();
+ });
+ m_timer->add_event_after(m_update_sync_point_interval,
+ m_update_sync_ctx);
+ return;
+ }
+ }
+
+ send_flush_sync_point();
+}
+
+template <typename I>
+void ImageSync<I>::send_flush_sync_point() {
+ if (m_ret_val < 0) {
+ finish(m_ret_val);
+ return;
+ }
+
+ update_progress("FLUSH_SYNC_POINT");
+
+ m_client_meta_copy = *m_client_meta;
+ m_client_meta->sync_object_count = m_image_copy_object_count;
+ auto sync_point = &m_client_meta->sync_points.front();
+ if (m_image_copy_object_no > 0) {
+ sync_point->object_number = m_image_copy_object_no - 1;
+ } else {
+ sync_point->object_number = boost::none;
+ }
+
+ dout(10) << ": sync_point=" << *sync_point << dendl;
+
+ bufferlist client_data_bl;
+ librbd::journal::ClientData client_data(*m_client_meta);
+ encode(client_data, client_data_bl);
+
+ Context *ctx = create_context_callback<
+ ImageSync<I>, &ImageSync<I>::handle_flush_sync_point>(
+ this);
+ m_journaler->update_client(client_data_bl, ctx);
+}
+
+template <typename I>
+void ImageSync<I>::handle_flush_sync_point(int r) {
+ dout(10) << ": r=" << r << dendl;
+
+ if (r < 0) {
+ *m_client_meta = m_client_meta_copy;
+
+ derr << ": failed to update client data: " << cpp_strerror(r)
+ << dendl;
+ finish(r);
+ return;
+ }
+
+ send_prune_sync_points();
+}
+
+template <typename I>
+void ImageSync<I>::send_prune_sync_points() {
+ dout(10) << dendl;
+
+ update_progress("PRUNE_SYNC_POINTS");
+
+ Context *ctx = create_context_callback<
+ ImageSync<I>, &ImageSync<I>::handle_prune_sync_points>(this);
+ SyncPointPruneRequest<I> *request = SyncPointPruneRequest<I>::create(
+ m_remote_image_ctx, true, m_journaler, m_client_meta, ctx);
+ request->send();
+}
+
+template <typename I>
+void ImageSync<I>::handle_prune_sync_points(int r) {
+ dout(10) << ": r=" << r << dendl;
+
+ if (r < 0) {
+ derr << ": failed to prune sync point: "
+ << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ if (!m_client_meta->sync_points.empty()) {
+ send_copy_image();
+ return;
+ }
+
+ finish(0);
+}
+
+template <typename I>
+void ImageSync<I>::update_progress(const std::string &description) {
+ dout(20) << ": " << description << dendl;
+
+ if (m_progress_ctx) {
+ m_progress_ctx->update_progress("IMAGE_SYNC/" + description);
+ }
+}
+
+template <typename I>
+void ImageSync<I>::finish(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ m_instance_watcher->notify_sync_complete(m_local_image_ctx->id);
+ BaseRequest::finish(r);
+}
+
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::ImageSync<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/ImageSync.h b/src/tools/rbd_mirror/ImageSync.h
new file mode 100644
index 00000000..9e00c129
--- /dev/null
+++ b/src/tools/rbd_mirror/ImageSync.h
@@ -0,0 +1,160 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_SYNC_H
+#define RBD_MIRROR_IMAGE_SYNC_H
+
+#include "include/int_types.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/journal/TypeTraits.h"
+#include "librbd/journal/Types.h"
+#include "common/Mutex.h"
+#include "tools/rbd_mirror/BaseRequest.h"
+#include <map>
+#include <vector>
+
+class Context;
+class ContextWQ;
+namespace journal { class Journaler; }
+namespace librbd { class ProgressContext; }
+namespace librbd { template <typename> class DeepCopyRequest; }
+namespace librbd { namespace journal { struct MirrorPeerClientMeta; } }
+
+namespace rbd {
+namespace mirror {
+
+class ProgressContext;
+
+template <typename> class InstanceWatcher;
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class ImageSync : public BaseRequest {
+public:
+ typedef librbd::journal::TypeTraits<ImageCtxT> TypeTraits;
+ typedef typename TypeTraits::Journaler Journaler;
+ typedef librbd::journal::MirrorPeerClientMeta MirrorPeerClientMeta;
+
+ static ImageSync* create(ImageCtxT *local_image_ctx,
+ ImageCtxT *remote_image_ctx,
+ SafeTimer *timer, Mutex *timer_lock,
+ const std::string &mirror_uuid,
+ Journaler *journaler,
+ MirrorPeerClientMeta *client_meta,
+ ContextWQ *work_queue,
+ InstanceWatcher<ImageCtxT> *instance_watcher,
+ Context *on_finish,
+ ProgressContext *progress_ctx = nullptr) {
+ return new ImageSync(local_image_ctx, remote_image_ctx, timer, timer_lock,
+ mirror_uuid, journaler, client_meta, work_queue,
+ instance_watcher, on_finish, progress_ctx);
+ }
+
+ ImageSync(ImageCtxT *local_image_ctx, ImageCtxT *remote_image_ctx,
+ SafeTimer *timer, Mutex *timer_lock, const std::string &mirror_uuid,
+ Journaler *journaler, MirrorPeerClientMeta *client_meta,
+ ContextWQ *work_queue, InstanceWatcher<ImageCtxT> *instance_watcher,
+ Context *on_finish, ProgressContext *progress_ctx = nullptr);
+ ~ImageSync() override;
+
+ void send() override;
+ void cancel() override;
+
+protected:
+ void finish(int r) override;
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * NOTIFY_SYNC_REQUEST
+ * |
+ * v
+ * PRUNE_CATCH_UP_SYNC_POINT
+ * |
+ * v
+ * CREATE_SYNC_POINT (skip if already exists and
+ * | not disconnected)
+ * v
+ * COPY_IMAGE . . . . . . . . . . . . . .
+ * | .
+ * v .
+ * FLUSH_SYNC_POINT .
+ * | . (image sync canceled)
+ * v .
+ * PRUNE_SYNC_POINTS .
+ * | .
+ * v .
+ * <finish> < . . . . . . . . . . . . . .
+ *
+ * @endverbatim
+ */
+
+ typedef std::vector<librados::snap_t> SnapIds;
+ typedef std::map<librados::snap_t, SnapIds> SnapMap;
+ class ImageCopyProgressContext;
+
+ ImageCtxT *m_local_image_ctx;
+ ImageCtxT *m_remote_image_ctx;
+ SafeTimer *m_timer;
+ Mutex *m_timer_lock;
+ std::string m_mirror_uuid;
+ Journaler *m_journaler;
+ MirrorPeerClientMeta *m_client_meta;
+ ContextWQ *m_work_queue;
+ InstanceWatcher<ImageCtxT> *m_instance_watcher;
+ ProgressContext *m_progress_ctx;
+
+ SnapMap m_snap_map;
+
+ Mutex m_lock;
+ bool m_canceled = false;
+
+ librbd::DeepCopyRequest<ImageCtxT> *m_image_copy_request = nullptr;
+ librbd::ProgressContext *m_image_copy_prog_ctx = nullptr;
+
+ bool m_updating_sync_point = false;
+ Context *m_update_sync_ctx = nullptr;
+ double m_update_sync_point_interval;
+ uint64_t m_image_copy_object_no = 0;
+ uint64_t m_image_copy_object_count = 0;
+ MirrorPeerClientMeta m_client_meta_copy;
+
+ int m_ret_val = 0;
+
+ void send_notify_sync_request();
+ void handle_notify_sync_request(int r);
+
+ void send_prune_catch_up_sync_point();
+ void handle_prune_catch_up_sync_point(int r);
+
+ void send_create_sync_point();
+ void handle_create_sync_point(int r);
+
+ void send_update_max_object_count();
+ void handle_update_max_object_count(int r);
+
+ void send_copy_image();
+ void handle_copy_image(int r);
+ void handle_copy_image_update_progress(uint64_t object_no,
+ uint64_t object_count);
+ void send_update_sync_point();
+ void handle_update_sync_point(int r);
+
+ void send_flush_sync_point();
+ void handle_flush_sync_point(int r);
+
+ void send_prune_sync_points();
+ void handle_prune_sync_points(int r);
+
+ void update_progress(const std::string &description);
+};
+
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::ImageSync<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_SYNC_H
diff --git a/src/tools/rbd_mirror/ImageSyncThrottler.cc b/src/tools/rbd_mirror/ImageSyncThrottler.cc
new file mode 100644
index 00000000..b395a012
--- /dev/null
+++ b/src/tools/rbd_mirror/ImageSyncThrottler.cc
@@ -0,0 +1,227 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2016 SUSE LINUX GmbH
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include "ImageSyncThrottler.h"
+#include "common/Formatter.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "librbd/Utils.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::ImageSyncThrottler:: " << this \
+ << " " << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+
+template <typename I>
+ImageSyncThrottler<I>::ImageSyncThrottler(CephContext *cct)
+ : m_cct(cct),
+ m_lock(librbd::util::unique_lock_name("rbd::mirror::ImageSyncThrottler",
+ this)),
+ m_max_concurrent_syncs(cct->_conf.get_val<uint64_t>(
+ "rbd_mirror_concurrent_image_syncs")) {
+ dout(20) << "max_concurrent_syncs=" << m_max_concurrent_syncs << dendl;
+ m_cct->_conf.add_observer(this);
+}
+
+template <typename I>
+ImageSyncThrottler<I>::~ImageSyncThrottler() {
+ m_cct->_conf.remove_observer(this);
+
+ Mutex::Locker locker(m_lock);
+ ceph_assert(m_inflight_ops.empty());
+ ceph_assert(m_queue.empty());
+}
+
+template <typename I>
+void ImageSyncThrottler<I>::start_op(const std::string &id, Context *on_start) {
+ dout(20) << "id=" << id << dendl;
+
+ int r = 0;
+ {
+ Mutex::Locker locker(m_lock);
+
+ if (m_inflight_ops.count(id) > 0) {
+ dout(20) << "duplicate for already started op " << id << dendl;
+ } else if (m_queued_ops.count(id) > 0) {
+ dout(20) << "duplicate for already queued op " << id << dendl;
+ std::swap(m_queued_ops[id], on_start);
+ r = -ENOENT;
+ } else if (m_max_concurrent_syncs == 0 ||
+ m_inflight_ops.size() < m_max_concurrent_syncs) {
+ ceph_assert(m_queue.empty());
+ m_inflight_ops.insert(id);
+ dout(20) << "ready to start sync for " << id << " ["
+ << m_inflight_ops.size() << "/" << m_max_concurrent_syncs << "]"
+ << dendl;
+ } else {
+ m_queue.push_back(id);
+ std::swap(m_queued_ops[id], on_start);
+ dout(20) << "image sync for " << id << " has been queued" << dendl;
+ }
+ }
+
+ if (on_start != nullptr) {
+ on_start->complete(r);
+ }
+}
+
+template <typename I>
+bool ImageSyncThrottler<I>::cancel_op(const std::string &id) {
+ dout(20) << "id=" << id << dendl;
+
+ Context *on_start = nullptr;
+ {
+ Mutex::Locker locker(m_lock);
+ auto it = m_queued_ops.find(id);
+ if (it != m_queued_ops.end()) {
+ dout(20) << "canceled queued sync for " << id << dendl;
+ m_queue.remove(id);
+ on_start = it->second;
+ m_queued_ops.erase(it);
+ }
+ }
+
+ if (on_start == nullptr) {
+ return false;
+ }
+
+ on_start->complete(-ECANCELED);
+ return true;
+}
+
+template <typename I>
+void ImageSyncThrottler<I>::finish_op(const std::string &id) {
+ dout(20) << "id=" << id << dendl;
+
+ if (cancel_op(id)) {
+ return;
+ }
+
+ Context *on_start = nullptr;
+ {
+ Mutex::Locker locker(m_lock);
+
+ m_inflight_ops.erase(id);
+
+ if (m_inflight_ops.size() < m_max_concurrent_syncs && !m_queue.empty()) {
+ auto id = m_queue.front();
+ auto it = m_queued_ops.find(id);
+ ceph_assert(it != m_queued_ops.end());
+ m_inflight_ops.insert(id);
+ dout(20) << "ready to start sync for " << id << " ["
+ << m_inflight_ops.size() << "/" << m_max_concurrent_syncs << "]"
+ << dendl;
+ on_start = it->second;
+ m_queued_ops.erase(it);
+ m_queue.pop_front();
+ }
+ }
+
+ if (on_start != nullptr) {
+ on_start->complete(0);
+ }
+}
+
+template <typename I>
+void ImageSyncThrottler<I>::drain(int r) {
+ dout(20) << dendl;
+
+ std::map<std::string, Context *> queued_ops;
+ {
+ Mutex::Locker locker(m_lock);
+ std::swap(m_queued_ops, queued_ops);
+ m_queue.clear();
+ m_inflight_ops.clear();
+ }
+
+ for (auto &it : queued_ops) {
+ it.second->complete(r);
+ }
+}
+
+template <typename I>
+void ImageSyncThrottler<I>::set_max_concurrent_syncs(uint32_t max) {
+ dout(20) << "max=" << max << dendl;
+
+ std::list<Context *> ops;
+ {
+ Mutex::Locker locker(m_lock);
+ m_max_concurrent_syncs = max;
+
+ // Start waiting ops in the case of available free slots
+ while ((m_max_concurrent_syncs == 0 ||
+ m_inflight_ops.size() < m_max_concurrent_syncs) &&
+ !m_queue.empty()) {
+ auto id = m_queue.front();
+ m_inflight_ops.insert(id);
+ dout(20) << "ready to start sync for " << id << " ["
+ << m_inflight_ops.size() << "/" << m_max_concurrent_syncs << "]"
+ << dendl;
+ auto it = m_queued_ops.find(id);
+ ceph_assert(it != m_queued_ops.end());
+ ops.push_back(it->second);
+ m_queued_ops.erase(it);
+ m_queue.pop_front();
+ }
+ }
+
+ for (const auto& ctx : ops) {
+ ctx->complete(0);
+ }
+}
+
+template <typename I>
+void ImageSyncThrottler<I>::print_status(Formatter *f, std::stringstream *ss) {
+ dout(20) << dendl;
+
+ Mutex::Locker locker(m_lock);
+
+ if (f) {
+ f->dump_int("max_parallel_syncs", m_max_concurrent_syncs);
+ f->dump_int("running_syncs", m_inflight_ops.size());
+ f->dump_int("waiting_syncs", m_queue.size());
+ f->flush(*ss);
+ } else {
+ *ss << "[ ";
+ *ss << "max_parallel_syncs=" << m_max_concurrent_syncs << ", ";
+ *ss << "running_syncs=" << m_inflight_ops.size() << ", ";
+ *ss << "waiting_syncs=" << m_queue.size() << " ]";
+ }
+}
+
+template <typename I>
+const char** ImageSyncThrottler<I>::get_tracked_conf_keys() const {
+ static const char* KEYS[] = {
+ "rbd_mirror_concurrent_image_syncs",
+ NULL
+ };
+ return KEYS;
+}
+
+template <typename I>
+void ImageSyncThrottler<I>::handle_conf_change(const ConfigProxy& conf,
+ const set<string> &changed) {
+ if (changed.count("rbd_mirror_concurrent_image_syncs")) {
+ set_max_concurrent_syncs(conf.get_val<uint64_t>("rbd_mirror_concurrent_image_syncs"));
+ }
+}
+
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::ImageSyncThrottler<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/ImageSyncThrottler.h b/src/tools/rbd_mirror/ImageSyncThrottler.h
new file mode 100644
index 00000000..c0cda61e
--- /dev/null
+++ b/src/tools/rbd_mirror/ImageSyncThrottler.h
@@ -0,0 +1,65 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_SYNC_THROTTLER_H
+#define RBD_MIRROR_IMAGE_SYNC_THROTTLER_H
+
+#include <list>
+#include <map>
+#include <set>
+#include <sstream>
+#include <string>
+#include <utility>
+
+#include "common/Mutex.h"
+#include "common/config_obs.h"
+
+class CephContext;
+class Context;
+
+namespace ceph { class Formatter; }
+namespace librbd { class ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class ImageSyncThrottler : public md_config_obs_t {
+public:
+ static ImageSyncThrottler *create(CephContext *cct) {
+ return new ImageSyncThrottler(cct);
+ }
+ void destroy() {
+ delete this;
+ }
+
+ ImageSyncThrottler(CephContext *cct);
+ ~ImageSyncThrottler() override;
+
+ void set_max_concurrent_syncs(uint32_t max);
+ void start_op(const std::string &id, Context *on_start);
+ bool cancel_op(const std::string &id);
+ void finish_op(const std::string &id);
+ void drain(int r);
+
+ void print_status(Formatter *f, std::stringstream *ss);
+
+private:
+ CephContext *m_cct;
+ Mutex m_lock;
+ uint32_t m_max_concurrent_syncs;
+ std::list<std::string> m_queue;
+ std::map<std::string, Context *> m_queued_ops;
+ std::set<std::string> m_inflight_ops;
+
+ const char **get_tracked_conf_keys() const override;
+ void handle_conf_change(const ConfigProxy& conf,
+ const std::set<std::string> &changed) override;
+};
+
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::ImageSyncThrottler<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_SYNC_THROTTLER_H
diff --git a/src/tools/rbd_mirror/InstanceReplayer.cc b/src/tools/rbd_mirror/InstanceReplayer.cc
new file mode 100644
index 00000000..c0086a48
--- /dev/null
+++ b/src/tools/rbd_mirror/InstanceReplayer.cc
@@ -0,0 +1,510 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "include/stringify.h"
+#include "common/Timer.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "librbd/Utils.h"
+#include "ImageReplayer.h"
+#include "InstanceReplayer.h"
+#include "ServiceDaemon.h"
+#include "Threads.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::InstanceReplayer: " \
+ << this << " " << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+
+namespace {
+
+const std::string SERVICE_DAEMON_ASSIGNED_COUNT_KEY("image_assigned_count");
+const std::string SERVICE_DAEMON_WARNING_COUNT_KEY("image_warning_count");
+const std::string SERVICE_DAEMON_ERROR_COUNT_KEY("image_error_count");
+
+} // anonymous namespace
+
+using librbd::util::create_async_context_callback;
+using librbd::util::create_context_callback;
+
+template <typename I>
+InstanceReplayer<I>::InstanceReplayer(
+ Threads<I> *threads, ServiceDaemon<I>* service_daemon,
+ RadosRef local_rados, const std::string &local_mirror_uuid,
+ int64_t local_pool_id)
+ : m_threads(threads), m_service_daemon(service_daemon),
+ m_local_rados(local_rados), m_local_mirror_uuid(local_mirror_uuid),
+ m_local_pool_id(local_pool_id),
+ m_lock("rbd::mirror::InstanceReplayer " + stringify(local_pool_id)) {
+}
+
+template <typename I>
+InstanceReplayer<I>::~InstanceReplayer() {
+ ceph_assert(m_image_state_check_task == nullptr);
+ ceph_assert(m_async_op_tracker.empty());
+ ceph_assert(m_image_replayers.empty());
+}
+
+template <typename I>
+bool InstanceReplayer<I>::is_blacklisted() const {
+ std::lock_guard locker{m_lock};
+ return m_blacklisted;
+}
+
+template <typename I>
+int InstanceReplayer<I>::init() {
+ C_SaferCond init_ctx;
+ init(&init_ctx);
+ return init_ctx.wait();
+}
+
+template <typename I>
+void InstanceReplayer<I>::init(Context *on_finish) {
+ dout(10) << dendl;
+
+ Context *ctx = new FunctionContext(
+ [this, on_finish] (int r) {
+ {
+ Mutex::Locker timer_locker(m_threads->timer_lock);
+ schedule_image_state_check_task();
+ }
+ on_finish->complete(0);
+ });
+
+ m_threads->work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+void InstanceReplayer<I>::shut_down() {
+ C_SaferCond shut_down_ctx;
+ shut_down(&shut_down_ctx);
+ int r = shut_down_ctx.wait();
+ ceph_assert(r == 0);
+}
+
+template <typename I>
+void InstanceReplayer<I>::shut_down(Context *on_finish) {
+ dout(10) << dendl;
+
+ Mutex::Locker locker(m_lock);
+
+ ceph_assert(m_on_shut_down == nullptr);
+ m_on_shut_down = on_finish;
+
+ Context *ctx = new FunctionContext(
+ [this] (int r) {
+ cancel_image_state_check_task();
+ wait_for_ops();
+ });
+
+ m_threads->work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+void InstanceReplayer<I>::add_peer(std::string peer_uuid,
+ librados::IoCtx io_ctx) {
+ dout(10) << peer_uuid << dendl;
+
+ Mutex::Locker locker(m_lock);
+ auto result = m_peers.insert(Peer(peer_uuid, io_ctx)).second;
+ ceph_assert(result);
+}
+
+template <typename I>
+void InstanceReplayer<I>::release_all(Context *on_finish) {
+ dout(10) << dendl;
+
+ Mutex::Locker locker(m_lock);
+
+ C_Gather *gather_ctx = new C_Gather(g_ceph_context, on_finish);
+ for (auto it = m_image_replayers.begin(); it != m_image_replayers.end();
+ it = m_image_replayers.erase(it)) {
+ auto image_replayer = it->second;
+ auto ctx = gather_ctx->new_sub();
+ ctx = new FunctionContext(
+ [image_replayer, ctx] (int r) {
+ image_replayer->destroy();
+ ctx->complete(0);
+ });
+ stop_image_replayer(image_replayer, ctx);
+ }
+ gather_ctx->activate();
+}
+
+template <typename I>
+void InstanceReplayer<I>::acquire_image(InstanceWatcher<I> *instance_watcher,
+ const std::string &global_image_id,
+ Context *on_finish) {
+ dout(10) << "global_image_id=" << global_image_id << dendl;
+
+ Mutex::Locker locker(m_lock);
+
+ ceph_assert(m_on_shut_down == nullptr);
+
+ auto it = m_image_replayers.find(global_image_id);
+ if (it == m_image_replayers.end()) {
+ auto image_replayer = ImageReplayer<I>::create(
+ m_threads, instance_watcher, m_local_rados,
+ m_local_mirror_uuid, m_local_pool_id, global_image_id);
+
+ dout(10) << global_image_id << ": creating replayer " << image_replayer
+ << dendl;
+
+ it = m_image_replayers.insert(std::make_pair(global_image_id,
+ image_replayer)).first;
+
+ // TODO only a single peer is currently supported
+ ceph_assert(m_peers.size() == 1);
+ auto peer = *m_peers.begin();
+ image_replayer->add_peer(peer.peer_uuid, peer.io_ctx);
+ start_image_replayer(image_replayer);
+ } else {
+ // A duplicate acquire notification implies (1) connection hiccup or
+ // (2) new leader election. For the second case, restart the replayer to
+ // detect if the image has been deleted while the leader was offline
+ auto& image_replayer = it->second;
+ image_replayer->set_finished(false);
+ image_replayer->restart();
+ }
+
+ m_threads->work_queue->queue(on_finish, 0);
+}
+
+template <typename I>
+void InstanceReplayer<I>::release_image(const std::string &global_image_id,
+ Context *on_finish) {
+ dout(10) << "global_image_id=" << global_image_id << dendl;
+
+ Mutex::Locker locker(m_lock);
+ ceph_assert(m_on_shut_down == nullptr);
+
+ auto it = m_image_replayers.find(global_image_id);
+ if (it == m_image_replayers.end()) {
+ dout(5) << global_image_id << ": not found" << dendl;
+ m_threads->work_queue->queue(on_finish, 0);
+ return;
+ }
+
+ auto image_replayer = it->second;
+ m_image_replayers.erase(it);
+
+ on_finish = new FunctionContext(
+ [image_replayer, on_finish] (int r) {
+ image_replayer->destroy();
+ on_finish->complete(0);
+ });
+ stop_image_replayer(image_replayer, on_finish);
+}
+
+template <typename I>
+void InstanceReplayer<I>::remove_peer_image(const std::string &global_image_id,
+ const std::string &peer_mirror_uuid,
+ Context *on_finish) {
+ dout(10) << "global_image_id=" << global_image_id << ", "
+ << "peer_mirror_uuid=" << peer_mirror_uuid << dendl;
+
+ Mutex::Locker locker(m_lock);
+ ceph_assert(m_on_shut_down == nullptr);
+
+ auto it = m_image_replayers.find(global_image_id);
+ if (it != m_image_replayers.end()) {
+ // TODO only a single peer is currently supported, therefore
+ // we can just interrupt the current image replayer and
+ // it will eventually detect that the peer image is missing and
+ // determine if a delete propagation is required.
+ auto image_replayer = it->second;
+ image_replayer->restart();
+ }
+ m_threads->work_queue->queue(on_finish, 0);
+}
+
+template <typename I>
+void InstanceReplayer<I>::print_status(Formatter *f, stringstream *ss) {
+ dout(10) << dendl;
+
+ if (!f) {
+ return;
+ }
+
+ Mutex::Locker locker(m_lock);
+
+ f->open_array_section("image_replayers");
+ for (auto &kv : m_image_replayers) {
+ auto &image_replayer = kv.second;
+ image_replayer->print_status(f, ss);
+ }
+ f->close_section();
+}
+
+template <typename I>
+void InstanceReplayer<I>::start()
+{
+ dout(10) << dendl;
+
+ Mutex::Locker locker(m_lock);
+
+ m_manual_stop = false;
+
+ for (auto &kv : m_image_replayers) {
+ auto &image_replayer = kv.second;
+ image_replayer->start(nullptr, true);
+ }
+}
+
+template <typename I>
+void InstanceReplayer<I>::stop()
+{
+ dout(10) << dendl;
+
+ Mutex::Locker locker(m_lock);
+
+ m_manual_stop = true;
+
+ for (auto &kv : m_image_replayers) {
+ auto &image_replayer = kv.second;
+ image_replayer->stop(nullptr, true);
+ }
+}
+
+template <typename I>
+void InstanceReplayer<I>::restart()
+{
+ dout(10) << dendl;
+
+ Mutex::Locker locker(m_lock);
+
+ m_manual_stop = false;
+
+ for (auto &kv : m_image_replayers) {
+ auto &image_replayer = kv.second;
+ image_replayer->restart();
+ }
+}
+
+template <typename I>
+void InstanceReplayer<I>::flush()
+{
+ dout(10) << dendl;
+
+ Mutex::Locker locker(m_lock);
+
+ for (auto &kv : m_image_replayers) {
+ auto &image_replayer = kv.second;
+ image_replayer->flush();
+ }
+}
+
+template <typename I>
+void InstanceReplayer<I>::start_image_replayer(
+ ImageReplayer<I> *image_replayer) {
+ ceph_assert(m_lock.is_locked());
+
+ std::string global_image_id = image_replayer->get_global_image_id();
+ if (!image_replayer->is_stopped()) {
+ return;
+ } else if (image_replayer->is_blacklisted()) {
+ derr << "global_image_id=" << global_image_id << ": blacklisted detected "
+ << "during image replay" << dendl;
+ m_blacklisted = true;
+ return;
+ } else if (image_replayer->is_finished()) {
+ // TODO temporary until policy integrated
+ dout(5) << "removing image replayer for global_image_id="
+ << global_image_id << dendl;
+ m_image_replayers.erase(image_replayer->get_global_image_id());
+ image_replayer->destroy();
+ return;
+ } else if (m_manual_stop) {
+ return;
+ }
+
+ dout(10) << "global_image_id=" << global_image_id << dendl;
+ image_replayer->start(nullptr, false);
+}
+
+template <typename I>
+void InstanceReplayer<I>::queue_start_image_replayers() {
+ dout(10) << dendl;
+
+ Context *ctx = create_context_callback<
+ InstanceReplayer, &InstanceReplayer<I>::start_image_replayers>(this);
+ m_async_op_tracker.start_op();
+ m_threads->work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+void InstanceReplayer<I>::start_image_replayers(int r) {
+ dout(10) << dendl;
+
+ Mutex::Locker locker(m_lock);
+ if (m_on_shut_down != nullptr) {
+ return;
+ }
+
+ uint64_t image_count = 0;
+ uint64_t warning_count = 0;
+ uint64_t error_count = 0;
+ for (auto it = m_image_replayers.begin();
+ it != m_image_replayers.end();) {
+ auto current_it(it);
+ ++it;
+
+ ++image_count;
+ auto health_state = current_it->second->get_health_state();
+ if (health_state == image_replayer::HEALTH_STATE_WARNING) {
+ ++warning_count;
+ } else if (health_state == image_replayer::HEALTH_STATE_ERROR) {
+ ++error_count;
+ }
+
+ start_image_replayer(current_it->second);
+ }
+
+ m_service_daemon->add_or_update_attribute(
+ m_local_pool_id, SERVICE_DAEMON_ASSIGNED_COUNT_KEY, image_count);
+ m_service_daemon->add_or_update_attribute(
+ m_local_pool_id, SERVICE_DAEMON_WARNING_COUNT_KEY, warning_count);
+ m_service_daemon->add_or_update_attribute(
+ m_local_pool_id, SERVICE_DAEMON_ERROR_COUNT_KEY, error_count);
+
+ m_async_op_tracker.finish_op();
+}
+
+template <typename I>
+void InstanceReplayer<I>::stop_image_replayer(ImageReplayer<I> *image_replayer,
+ Context *on_finish) {
+ dout(10) << image_replayer << " global_image_id="
+ << image_replayer->get_global_image_id() << ", on_finish="
+ << on_finish << dendl;
+
+ if (image_replayer->is_stopped()) {
+ m_threads->work_queue->queue(on_finish, 0);
+ return;
+ }
+
+ m_async_op_tracker.start_op();
+ Context *ctx = create_async_context_callback(
+ m_threads->work_queue, new FunctionContext(
+ [this, image_replayer, on_finish] (int r) {
+ stop_image_replayer(image_replayer, on_finish);
+ m_async_op_tracker.finish_op();
+ }));
+
+ if (image_replayer->is_running()) {
+ image_replayer->stop(ctx, false);
+ } else {
+ int after = 1;
+ dout(10) << "scheduling image replayer " << image_replayer << " stop after "
+ << after << " sec (task " << ctx << ")" << dendl;
+ ctx = new FunctionContext(
+ [this, after, ctx] (int r) {
+ Mutex::Locker timer_locker(m_threads->timer_lock);
+ m_threads->timer->add_event_after(after, ctx);
+ });
+ m_threads->work_queue->queue(ctx, 0);
+ }
+}
+
+template <typename I>
+void InstanceReplayer<I>::wait_for_ops() {
+ dout(10) << dendl;
+
+ Context *ctx = create_context_callback<
+ InstanceReplayer, &InstanceReplayer<I>::handle_wait_for_ops>(this);
+
+ m_async_op_tracker.wait_for_ops(ctx);
+}
+
+template <typename I>
+void InstanceReplayer<I>::handle_wait_for_ops(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ ceph_assert(r == 0);
+
+ Mutex::Locker locker(m_lock);
+ stop_image_replayers();
+}
+
+template <typename I>
+void InstanceReplayer<I>::stop_image_replayers() {
+ dout(10) << dendl;
+
+ ceph_assert(m_lock.is_locked());
+
+ Context *ctx = create_async_context_callback(
+ m_threads->work_queue, create_context_callback<InstanceReplayer<I>,
+ &InstanceReplayer<I>::handle_stop_image_replayers>(this));
+
+ C_Gather *gather_ctx = new C_Gather(g_ceph_context, ctx);
+ for (auto &it : m_image_replayers) {
+ stop_image_replayer(it.second, gather_ctx->new_sub());
+ }
+ gather_ctx->activate();
+}
+
+template <typename I>
+void InstanceReplayer<I>::handle_stop_image_replayers(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ ceph_assert(r == 0);
+
+ Context *on_finish = nullptr;
+ {
+ Mutex::Locker locker(m_lock);
+
+ for (auto &it : m_image_replayers) {
+ ceph_assert(it.second->is_stopped());
+ it.second->destroy();
+ }
+ m_image_replayers.clear();
+
+ ceph_assert(m_on_shut_down != nullptr);
+ std::swap(on_finish, m_on_shut_down);
+ }
+ on_finish->complete(r);
+}
+
+template <typename I>
+void InstanceReplayer<I>::cancel_image_state_check_task() {
+ Mutex::Locker timer_locker(m_threads->timer_lock);
+
+ if (m_image_state_check_task == nullptr) {
+ return;
+ }
+
+ dout(10) << m_image_state_check_task << dendl;
+ bool canceled = m_threads->timer->cancel_event(m_image_state_check_task);
+ ceph_assert(canceled);
+ m_image_state_check_task = nullptr;
+}
+
+template <typename I>
+void InstanceReplayer<I>::schedule_image_state_check_task() {
+ ceph_assert(m_threads->timer_lock.is_locked());
+ ceph_assert(m_image_state_check_task == nullptr);
+
+ m_image_state_check_task = new FunctionContext(
+ [this](int r) {
+ ceph_assert(m_threads->timer_lock.is_locked());
+ m_image_state_check_task = nullptr;
+ schedule_image_state_check_task();
+ queue_start_image_replayers();
+ });
+
+ auto cct = static_cast<CephContext *>(m_local_rados->cct());
+ int after = cct->_conf.get_val<uint64_t>(
+ "rbd_mirror_image_state_check_interval");
+
+ dout(10) << "scheduling image state check after " << after << " sec (task "
+ << m_image_state_check_task << ")" << dendl;
+ m_threads->timer->add_event_after(after, m_image_state_check_task);
+}
+
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::InstanceReplayer<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/InstanceReplayer.h b/src/tools/rbd_mirror/InstanceReplayer.h
new file mode 100644
index 00000000..efbdde02
--- /dev/null
+++ b/src/tools/rbd_mirror/InstanceReplayer.h
@@ -0,0 +1,123 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_INSTANCE_REPLAYER_H
+#define RBD_MIRROR_INSTANCE_REPLAYER_H
+
+#include <map>
+#include <sstream>
+
+#include "common/AsyncOpTracker.h"
+#include "common/Formatter.h"
+#include "common/Mutex.h"
+#include "tools/rbd_mirror/Types.h"
+
+namespace librbd { class ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+
+template <typename> class ImageReplayer;
+template <typename> class InstanceWatcher;
+template <typename> class ServiceDaemon;
+template <typename> struct Threads;
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class InstanceReplayer {
+public:
+ static InstanceReplayer* create(
+ Threads<ImageCtxT> *threads,
+ ServiceDaemon<ImageCtxT>* service_daemon,
+ RadosRef local_rados, const std::string &local_mirror_uuid,
+ int64_t local_pool_id) {
+ return new InstanceReplayer(threads, service_daemon, local_rados,
+ local_mirror_uuid, local_pool_id);
+ }
+ void destroy() {
+ delete this;
+ }
+
+ InstanceReplayer(Threads<ImageCtxT> *threads,
+ ServiceDaemon<ImageCtxT>* service_daemon,
+ RadosRef local_rados, const std::string &local_mirror_uuid,
+ int64_t local_pool_id);
+ ~InstanceReplayer();
+
+ bool is_blacklisted() const;
+
+ int init();
+ void shut_down();
+
+ void init(Context *on_finish);
+ void shut_down(Context *on_finish);
+
+ void add_peer(std::string peer_uuid, librados::IoCtx io_ctx);
+
+ void acquire_image(InstanceWatcher<ImageCtxT> *instance_watcher,
+ const std::string &global_image_id, Context *on_finish);
+ void release_image(const std::string &global_image_id, Context *on_finish);
+ void remove_peer_image(const std::string &global_image_id,
+ const std::string &peer_mirror_uuid,
+ Context *on_finish);
+
+ void release_all(Context *on_finish);
+
+ void print_status(Formatter *f, stringstream *ss);
+ void start();
+ void stop();
+ void restart();
+ void flush();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <uninitialized> <-------------------\
+ * | (init) | (repeat for each
+ * v STOP_IMAGE_REPLAYER ---\ image replayer)
+ * SCHEDULE_IMAGE_STATE_CHECK_TASK ^ ^ |
+ * | | | |
+ * v (shut_down) | \---------/
+ * <initialized> -----------------> WAIT_FOR_OPS
+ *
+ * @endverbatim
+ */
+
+ Threads<ImageCtxT> *m_threads;
+ ServiceDaemon<ImageCtxT>* m_service_daemon;
+ RadosRef m_local_rados;
+ std::string m_local_mirror_uuid;
+ int64_t m_local_pool_id;
+
+ mutable Mutex m_lock;
+ AsyncOpTracker m_async_op_tracker;
+ std::map<std::string, ImageReplayer<ImageCtxT> *> m_image_replayers;
+ Peers m_peers;
+ Context *m_image_state_check_task = nullptr;
+ Context *m_on_shut_down = nullptr;
+ bool m_manual_stop = false;
+ bool m_blacklisted = false;
+
+ void wait_for_ops();
+ void handle_wait_for_ops(int r);
+
+ void start_image_replayer(ImageReplayer<ImageCtxT> *image_replayer);
+ void queue_start_image_replayers();
+ void start_image_replayers(int r);
+
+ void stop_image_replayer(ImageReplayer<ImageCtxT> *image_replayer,
+ Context *on_finish);
+
+ void stop_image_replayers();
+ void handle_stop_image_replayers(int r);
+
+ void schedule_image_state_check_task();
+ void cancel_image_state_check_task();
+};
+
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::InstanceReplayer<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_INSTANCE_REPLAYER_H
diff --git a/src/tools/rbd_mirror/InstanceWatcher.cc b/src/tools/rbd_mirror/InstanceWatcher.cc
new file mode 100644
index 00000000..d9e1ba23
--- /dev/null
+++ b/src/tools/rbd_mirror/InstanceWatcher.cc
@@ -0,0 +1,1299 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "InstanceWatcher.h"
+#include "include/stringify.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "librbd/ManagedLock.h"
+#include "librbd/Utils.h"
+#include "InstanceReplayer.h"
+#include "ImageSyncThrottler.h"
+#include "common/Cond.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::InstanceWatcher: "
+
+namespace rbd {
+namespace mirror {
+
+using namespace instance_watcher;
+
+using librbd::util::create_async_context_callback;
+using librbd::util::create_context_callback;
+using librbd::util::create_rados_callback;
+using librbd::util::unique_lock_name;
+
+namespace {
+
+struct C_GetInstances : public Context {
+ std::vector<std::string> *instance_ids;
+ Context *on_finish;
+ bufferlist out_bl;
+
+ C_GetInstances(std::vector<std::string> *instance_ids, Context *on_finish)
+ : instance_ids(instance_ids), on_finish(on_finish) {
+ }
+
+ void finish(int r) override {
+ dout(10) << "C_GetInstances: " << this << " " << __func__ << ": r=" << r
+ << dendl;
+
+ if (r == 0) {
+ auto it = out_bl.cbegin();
+ r = librbd::cls_client::mirror_instances_list_finish(&it, instance_ids);
+ } else if (r == -ENOENT) {
+ r = 0;
+ }
+ on_finish->complete(r);
+ }
+};
+
+template <typename I>
+struct C_RemoveInstanceRequest : public Context {
+ InstanceWatcher<I> instance_watcher;
+ Context *on_finish;
+
+ C_RemoveInstanceRequest(librados::IoCtx &io_ctx, ContextWQ *work_queue,
+ const std::string &instance_id, Context *on_finish)
+ : instance_watcher(io_ctx, work_queue, nullptr, instance_id),
+ on_finish(on_finish) {
+ }
+
+ void send() {
+ dout(10) << "C_RemoveInstanceRequest: " << this << " " << __func__ << dendl;
+
+ instance_watcher.remove(this);
+ }
+
+ void finish(int r) override {
+ dout(10) << "C_RemoveInstanceRequest: " << this << " " << __func__ << ": r="
+ << r << dendl;
+ ceph_assert(r == 0);
+
+ on_finish->complete(r);
+ }
+};
+
+} // anonymous namespace
+
+template <typename I>
+struct InstanceWatcher<I>::C_NotifyInstanceRequest : public Context {
+ InstanceWatcher<I> *instance_watcher;
+ std::string instance_id;
+ uint64_t request_id;
+ bufferlist bl;
+ Context *on_finish;
+ bool send_to_leader;
+ std::unique_ptr<librbd::watcher::Notifier> notifier;
+ librbd::watcher::NotifyResponse response;
+ bool canceling = false;
+
+ C_NotifyInstanceRequest(InstanceWatcher<I> *instance_watcher,
+ const std::string &instance_id, uint64_t request_id,
+ bufferlist &&bl, Context *on_finish)
+ : instance_watcher(instance_watcher), instance_id(instance_id),
+ request_id(request_id), bl(bl), on_finish(on_finish),
+ send_to_leader(instance_id.empty()) {
+ dout(10) << "C_NotifyInstanceRequest: " << this << " " << __func__
+ << ": instance_watcher=" << instance_watcher << ", instance_id="
+ << instance_id << ", request_id=" << request_id << dendl;
+
+ ceph_assert(instance_watcher->m_lock.is_locked());
+
+ if (!send_to_leader) {
+ ceph_assert((!instance_id.empty()));
+ notifier.reset(new librbd::watcher::Notifier(
+ instance_watcher->m_work_queue,
+ instance_watcher->m_ioctx,
+ RBD_MIRROR_INSTANCE_PREFIX + instance_id));
+ }
+
+ instance_watcher->m_notify_op_tracker.start_op();
+ auto result = instance_watcher->m_notify_ops.insert(
+ std::make_pair(instance_id, this)).second;
+ ceph_assert(result);
+ }
+
+ void send() {
+ dout(10) << "C_NotifyInstanceRequest: " << this << " " << __func__ << dendl;
+
+ ceph_assert(instance_watcher->m_lock.is_locked());
+
+ if (canceling) {
+ dout(10) << "C_NotifyInstanceRequest: " << this << " " << __func__
+ << ": canceling" << dendl;
+ instance_watcher->m_work_queue->queue(this, -ECANCELED);
+ return;
+ }
+
+ if (send_to_leader) {
+ if (instance_watcher->m_leader_instance_id.empty()) {
+ dout(10) << "C_NotifyInstanceRequest: " << this << " " << __func__
+ << ": suspending" << dendl;
+ instance_watcher->suspend_notify_request(this);
+ return;
+ }
+
+ if (instance_watcher->m_leader_instance_id != instance_id) {
+ auto count = instance_watcher->m_notify_ops.erase(
+ std::make_pair(instance_id, this));
+ ceph_assert(count > 0);
+
+ instance_id = instance_watcher->m_leader_instance_id;
+
+ auto result = instance_watcher->m_notify_ops.insert(
+ std::make_pair(instance_id, this)).second;
+ ceph_assert(result);
+
+ notifier.reset(new librbd::watcher::Notifier(
+ instance_watcher->m_work_queue,
+ instance_watcher->m_ioctx,
+ RBD_MIRROR_INSTANCE_PREFIX + instance_id));
+ }
+ }
+
+ dout(10) << "C_NotifyInstanceRequest: " << this << " " << __func__
+ << ": sending to " << instance_id << dendl;
+ notifier->notify(bl, &response, this);
+ }
+
+ void cancel() {
+ dout(10) << "C_NotifyInstanceRequest: " << this << " " << __func__ << dendl;
+
+ ceph_assert(instance_watcher->m_lock.is_locked());
+
+ canceling = true;
+ instance_watcher->unsuspend_notify_request(this);
+ }
+
+ void finish(int r) override {
+ dout(10) << "C_NotifyInstanceRequest: " << this << " " << __func__ << ": r="
+ << r << dendl;
+
+ if (r == 0 || r == -ETIMEDOUT) {
+ bool found = false;
+ for (auto &it : response.acks) {
+ auto &bl = it.second;
+ if (it.second.length() == 0) {
+ dout(5) << "C_NotifyInstanceRequest: " << this << " " << __func__
+ << ": no payload in ack, ignoring" << dendl;
+ continue;
+ }
+ try {
+ auto iter = bl.cbegin();
+ NotifyAckPayload ack;
+ decode(ack, iter);
+ if (ack.instance_id != instance_watcher->get_instance_id()) {
+ derr << "C_NotifyInstanceRequest: " << this << " " << __func__
+ << ": ack instance_id (" << ack.instance_id << ") "
+ << "does not match, ignoring" << dendl;
+ continue;
+ }
+ if (ack.request_id != request_id) {
+ derr << "C_NotifyInstanceRequest: " << this << " " << __func__
+ << ": ack request_id (" << ack.request_id << ") "
+ << "does not match, ignoring" << dendl;
+ continue;
+ }
+ r = ack.ret_val;
+ found = true;
+ break;
+ } catch (const buffer::error &err) {
+ derr << "C_NotifyInstanceRequest: " << this << " " << __func__
+ << ": failed to decode ack: " << err.what() << dendl;
+ continue;
+ }
+ }
+
+ if (!found) {
+ if (r == -ETIMEDOUT) {
+ derr << "C_NotifyInstanceRequest: " << this << " " << __func__
+ << ": resending after timeout" << dendl;
+ Mutex::Locker locker(instance_watcher->m_lock);
+ send();
+ return;
+ } else {
+ r = -EINVAL;
+ }
+ } else {
+ if (r == -ESTALE && send_to_leader) {
+ derr << "C_NotifyInstanceRequest: " << this << " " << __func__
+ << ": resending due to leader change" << dendl;
+ Mutex::Locker locker(instance_watcher->m_lock);
+ send();
+ return;
+ }
+ }
+ }
+
+ on_finish->complete(r);
+
+ {
+ Mutex::Locker locker(instance_watcher->m_lock);
+ auto result = instance_watcher->m_notify_ops.erase(
+ std::make_pair(instance_id, this));
+ ceph_assert(result > 0);
+ instance_watcher->m_notify_op_tracker.finish_op();
+ }
+
+ delete this;
+ }
+
+ void complete(int r) override {
+ finish(r);
+ }
+};
+
+template <typename I>
+struct InstanceWatcher<I>::C_SyncRequest : public Context {
+ InstanceWatcher<I> *instance_watcher;
+ std::string sync_id;
+ Context *on_start;
+ Context *on_complete = nullptr;
+ C_NotifyInstanceRequest *req = nullptr;
+
+ C_SyncRequest(InstanceWatcher<I> *instance_watcher,
+ const std::string &sync_id, Context *on_start)
+ : instance_watcher(instance_watcher), sync_id(sync_id),
+ on_start(on_start) {
+ dout(10) << "C_SyncRequest: " << this << " " << __func__ << ": sync_id="
+ << sync_id << dendl;
+ }
+
+ void finish(int r) override {
+ dout(10) << "C_SyncRequest: " << this << " " << __func__ << ": r="
+ << r << dendl;
+
+ if (on_start != nullptr) {
+ instance_watcher->handle_notify_sync_request(this, r);
+ } else {
+ instance_watcher->handle_notify_sync_complete(this, r);
+ delete this;
+ }
+ }
+
+ // called twice
+ void complete(int r) override {
+ finish(r);
+ }
+};
+
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::InstanceWatcher: " \
+ << this << " " << __func__ << ": "
+template <typename I>
+void InstanceWatcher<I>::get_instances(librados::IoCtx &io_ctx,
+ std::vector<std::string> *instance_ids,
+ Context *on_finish) {
+ librados::ObjectReadOperation op;
+ librbd::cls_client::mirror_instances_list_start(&op);
+ C_GetInstances *ctx = new C_GetInstances(instance_ids, on_finish);
+ librados::AioCompletion *aio_comp = create_rados_callback(ctx);
+
+ int r = io_ctx.aio_operate(RBD_MIRROR_LEADER, aio_comp, &op, &ctx->out_bl);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void InstanceWatcher<I>::remove_instance(librados::IoCtx &io_ctx,
+ ContextWQ *work_queue,
+ const std::string &instance_id,
+ Context *on_finish) {
+ auto req = new C_RemoveInstanceRequest<I>(io_ctx, work_queue, instance_id,
+ on_finish);
+ req->send();
+}
+
+template <typename I>
+InstanceWatcher<I> *InstanceWatcher<I>::create(
+ librados::IoCtx &io_ctx, ContextWQ *work_queue,
+ InstanceReplayer<I> *instance_replayer) {
+ return new InstanceWatcher<I>(io_ctx, work_queue, instance_replayer,
+ stringify(io_ctx.get_instance_id()));
+}
+
+template <typename I>
+InstanceWatcher<I>::InstanceWatcher(librados::IoCtx &io_ctx,
+ ContextWQ *work_queue,
+ InstanceReplayer<I> *instance_replayer,
+ const std::string &instance_id)
+ : Watcher(io_ctx, work_queue, RBD_MIRROR_INSTANCE_PREFIX + instance_id),
+ m_instance_replayer(instance_replayer), m_instance_id(instance_id),
+ m_lock(unique_lock_name("rbd::mirror::InstanceWatcher::m_lock", this)),
+ m_instance_lock(librbd::ManagedLock<I>::create(
+ m_ioctx, m_work_queue, m_oid, this, librbd::managed_lock::EXCLUSIVE, true,
+ m_cct->_conf.get_val<uint64_t>("rbd_blacklist_expire_seconds"))) {
+}
+
+template <typename I>
+InstanceWatcher<I>::~InstanceWatcher() {
+ ceph_assert(m_requests.empty());
+ ceph_assert(m_notify_ops.empty());
+ ceph_assert(m_notify_op_tracker.empty());
+ ceph_assert(m_suspended_ops.empty());
+ ceph_assert(m_inflight_sync_reqs.empty());
+ ceph_assert(m_image_sync_throttler == nullptr);
+ m_instance_lock->destroy();
+}
+
+template <typename I>
+int InstanceWatcher<I>::init() {
+ C_SaferCond init_ctx;
+ init(&init_ctx);
+ return init_ctx.wait();
+}
+
+template <typename I>
+void InstanceWatcher<I>::init(Context *on_finish) {
+ dout(10) << "instance_id=" << m_instance_id << dendl;
+
+ Mutex::Locker locker(m_lock);
+
+ ceph_assert(m_on_finish == nullptr);
+ m_on_finish = on_finish;
+ m_ret_val = 0;
+
+ register_instance();
+}
+
+template <typename I>
+void InstanceWatcher<I>::shut_down() {
+ C_SaferCond shut_down_ctx;
+ shut_down(&shut_down_ctx);
+ int r = shut_down_ctx.wait();
+ ceph_assert(r == 0);
+}
+
+template <typename I>
+void InstanceWatcher<I>::shut_down(Context *on_finish) {
+ dout(10) << dendl;
+
+ Mutex::Locker locker(m_lock);
+
+ ceph_assert(m_on_finish == nullptr);
+ m_on_finish = on_finish;
+ m_ret_val = 0;
+
+ release_lock();
+}
+
+template <typename I>
+void InstanceWatcher<I>::remove(Context *on_finish) {
+ dout(10) << dendl;
+
+ Mutex::Locker locker(m_lock);
+
+ ceph_assert(m_on_finish == nullptr);
+ m_on_finish = on_finish;
+ m_ret_val = 0;
+
+ get_instance_locker();
+}
+
+template <typename I>
+void InstanceWatcher<I>::notify_image_acquire(
+ const std::string &instance_id, const std::string &global_image_id,
+ Context *on_notify_ack) {
+ dout(10) << "instance_id=" << instance_id << ", global_image_id="
+ << global_image_id << dendl;
+
+ Mutex::Locker locker(m_lock);
+
+ ceph_assert(m_on_finish == nullptr);
+
+ uint64_t request_id = ++m_request_seq;
+ bufferlist bl;
+ encode(NotifyMessage{ImageAcquirePayload{request_id, global_image_id}}, bl);
+ auto req = new C_NotifyInstanceRequest(this, instance_id, request_id,
+ std::move(bl), on_notify_ack);
+ req->send();
+}
+
+template <typename I>
+void InstanceWatcher<I>::notify_image_release(
+ const std::string &instance_id, const std::string &global_image_id,
+ Context *on_notify_ack) {
+ dout(10) << "instance_id=" << instance_id << ", global_image_id="
+ << global_image_id << dendl;
+
+ Mutex::Locker locker(m_lock);
+
+ ceph_assert(m_on_finish == nullptr);
+
+ uint64_t request_id = ++m_request_seq;
+ bufferlist bl;
+ encode(NotifyMessage{ImageReleasePayload{request_id, global_image_id}}, bl);
+ auto req = new C_NotifyInstanceRequest(this, instance_id, request_id,
+ std::move(bl), on_notify_ack);
+ req->send();
+}
+
+template <typename I>
+void InstanceWatcher<I>::notify_peer_image_removed(
+ const std::string &instance_id, const std::string &global_image_id,
+ const std::string &peer_mirror_uuid, Context *on_notify_ack) {
+ dout(10) << "instance_id=" << instance_id << ", "
+ << "global_image_id=" << global_image_id << ", "
+ << "peer_mirror_uuid=" << peer_mirror_uuid << dendl;
+
+ Mutex::Locker locker(m_lock);
+ ceph_assert(m_on_finish == nullptr);
+
+ uint64_t request_id = ++m_request_seq;
+ bufferlist bl;
+ encode(NotifyMessage{PeerImageRemovedPayload{request_id, global_image_id,
+ peer_mirror_uuid}}, bl);
+ auto req = new C_NotifyInstanceRequest(this, instance_id, request_id,
+ std::move(bl), on_notify_ack);
+ req->send();
+}
+
+template <typename I>
+void InstanceWatcher<I>::notify_sync_request(const std::string &sync_id,
+ Context *on_sync_start) {
+ dout(10) << "sync_id=" << sync_id << dendl;
+
+ Mutex::Locker locker(m_lock);
+
+ ceph_assert(m_inflight_sync_reqs.count(sync_id) == 0);
+
+ uint64_t request_id = ++m_request_seq;
+
+ bufferlist bl;
+ encode(NotifyMessage{SyncRequestPayload{request_id, sync_id}}, bl);
+
+ auto sync_ctx = new C_SyncRequest(this, sync_id, on_sync_start);
+ sync_ctx->req = new C_NotifyInstanceRequest(this, "", request_id,
+ std::move(bl), sync_ctx);
+
+ m_inflight_sync_reqs[sync_id] = sync_ctx;
+ sync_ctx->req->send();
+}
+
+template <typename I>
+bool InstanceWatcher<I>::cancel_sync_request(const std::string &sync_id) {
+ dout(10) << "sync_id=" << sync_id << dendl;
+
+ Mutex::Locker locker(m_lock);
+
+ auto it = m_inflight_sync_reqs.find(sync_id);
+ if (it == m_inflight_sync_reqs.end()) {
+ return false;
+ }
+
+ auto sync_ctx = it->second;
+
+ if (sync_ctx->on_start == nullptr) {
+ return false;
+ }
+
+ ceph_assert(sync_ctx->req != nullptr);
+ sync_ctx->req->cancel();
+ return true;
+}
+
+template <typename I>
+void InstanceWatcher<I>::notify_sync_start(const std::string &instance_id,
+ const std::string &sync_id) {
+ dout(10) << "sync_id=" << sync_id << dendl;
+
+ Mutex::Locker locker(m_lock);
+
+ uint64_t request_id = ++m_request_seq;
+
+ bufferlist bl;
+ encode(NotifyMessage{SyncStartPayload{request_id, sync_id}}, bl);
+
+ auto ctx = new FunctionContext(
+ [this, sync_id] (int r) {
+ dout(10) << "finish: sync_id=" << sync_id << ", r=" << r << dendl;
+ Mutex::Locker locker(m_lock);
+ if (r != -ESTALE && m_image_sync_throttler != nullptr) {
+ m_image_sync_throttler->finish_op(sync_id);
+ }
+ });
+ auto req = new C_NotifyInstanceRequest(this, instance_id, request_id,
+ std::move(bl), ctx);
+ req->send();
+}
+
+template <typename I>
+void InstanceWatcher<I>::notify_sync_complete(const std::string &sync_id) {
+ Mutex::Locker locker(m_lock);
+ notify_sync_complete(m_lock, sync_id);
+}
+
+template <typename I>
+void InstanceWatcher<I>::notify_sync_complete(const Mutex&,
+ const std::string &sync_id) {
+ dout(10) << "sync_id=" << sync_id << dendl;
+ ceph_assert(m_lock.is_locked());
+
+ auto it = m_inflight_sync_reqs.find(sync_id);
+ ceph_assert(it != m_inflight_sync_reqs.end());
+
+ auto sync_ctx = it->second;
+ ceph_assert(sync_ctx->req == nullptr);
+
+ m_inflight_sync_reqs.erase(it);
+ m_work_queue->queue(sync_ctx, 0);
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_notify_sync_request(C_SyncRequest *sync_ctx,
+ int r) {
+ dout(10) << "sync_id=" << sync_ctx->sync_id << ", r=" << r << dendl;
+
+ Context *on_start = nullptr;
+ {
+ Mutex::Locker locker(m_lock);
+ ceph_assert(sync_ctx->req != nullptr);
+ ceph_assert(sync_ctx->on_start != nullptr);
+
+ if (sync_ctx->req->canceling) {
+ r = -ECANCELED;
+ }
+
+ std::swap(sync_ctx->on_start, on_start);
+ sync_ctx->req = nullptr;
+
+ if (r == -ECANCELED) {
+ notify_sync_complete(m_lock, sync_ctx->sync_id);
+ }
+ }
+
+ on_start->complete(r == -ECANCELED ? r : 0);
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_notify_sync_complete(C_SyncRequest *sync_ctx,
+ int r) {
+ dout(10) << "sync_id=" << sync_ctx->sync_id << ", r=" << r << dendl;
+
+ if (sync_ctx->on_complete != nullptr) {
+ sync_ctx->on_complete->complete(r);
+ }
+}
+
+template <typename I>
+void InstanceWatcher<I>::print_sync_status(Formatter *f, stringstream *ss) {
+ dout(10) << dendl;
+
+ Mutex::Locker locker(m_lock);
+ if (m_image_sync_throttler != nullptr) {
+ m_image_sync_throttler->print_status(f, ss);
+ }
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_acquire_leader() {
+ dout(10) << dendl;
+
+ Mutex::Locker locker(m_lock);
+
+ ceph_assert(m_image_sync_throttler == nullptr);
+ m_image_sync_throttler = ImageSyncThrottler<I>::create(m_cct);
+
+ m_leader_instance_id = m_instance_id;
+ unsuspend_notify_requests();
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_release_leader() {
+ dout(10) << dendl;
+
+ Mutex::Locker locker(m_lock);
+
+ ceph_assert(m_image_sync_throttler != nullptr);
+
+ m_leader_instance_id.clear();
+
+ m_image_sync_throttler->drain(-ESTALE);
+ m_image_sync_throttler->destroy();
+ m_image_sync_throttler = nullptr;
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_update_leader(
+ const std::string &leader_instance_id) {
+ dout(10) << "leader_instance_id=" << leader_instance_id << dendl;
+
+ Mutex::Locker locker(m_lock);
+
+ m_leader_instance_id = leader_instance_id;
+
+ if (!m_leader_instance_id.empty()) {
+ unsuspend_notify_requests();
+ }
+}
+
+template <typename I>
+void InstanceWatcher<I>::cancel_notify_requests(
+ const std::string &instance_id) {
+ dout(10) << "instance_id=" << instance_id << dendl;
+
+ Mutex::Locker locker(m_lock);
+
+ for (auto op : m_notify_ops) {
+ if (op.first == instance_id && !op.second->send_to_leader) {
+ op.second->cancel();
+ }
+ }
+}
+
+template <typename I>
+void InstanceWatcher<I>::register_instance() {
+ ceph_assert(m_lock.is_locked());
+
+ dout(10) << dendl;
+
+ librados::ObjectWriteOperation op;
+ librbd::cls_client::mirror_instances_add(&op, m_instance_id);
+ librados::AioCompletion *aio_comp = create_rados_callback<
+ InstanceWatcher<I>, &InstanceWatcher<I>::handle_register_instance>(this);
+
+ int r = m_ioctx.aio_operate(RBD_MIRROR_LEADER, aio_comp, &op);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_register_instance(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ Context *on_finish = nullptr;
+ {
+ Mutex::Locker locker(m_lock);
+
+ if (r == 0) {
+ create_instance_object();
+ return;
+ }
+
+ derr << "error registering instance: " << cpp_strerror(r) << dendl;
+
+ std::swap(on_finish, m_on_finish);
+ }
+ on_finish->complete(r);
+}
+
+
+template <typename I>
+void InstanceWatcher<I>::create_instance_object() {
+ dout(10) << dendl;
+
+ ceph_assert(m_lock.is_locked());
+
+ librados::ObjectWriteOperation op;
+ op.create(true);
+
+ librados::AioCompletion *aio_comp = create_rados_callback<
+ InstanceWatcher<I>,
+ &InstanceWatcher<I>::handle_create_instance_object>(this);
+ int r = m_ioctx.aio_operate(m_oid, aio_comp, &op);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_create_instance_object(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ Mutex::Locker locker(m_lock);
+
+ if (r < 0) {
+ derr << "error creating " << m_oid << " object: " << cpp_strerror(r)
+ << dendl;
+
+ m_ret_val = r;
+ unregister_instance();
+ return;
+ }
+
+ register_watch();
+}
+
+template <typename I>
+void InstanceWatcher<I>::register_watch() {
+ dout(10) << dendl;
+
+ ceph_assert(m_lock.is_locked());
+
+ Context *ctx = create_async_context_callback(
+ m_work_queue, create_context_callback<
+ InstanceWatcher<I>, &InstanceWatcher<I>::handle_register_watch>(this));
+
+ librbd::Watcher::register_watch(ctx);
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_register_watch(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ Mutex::Locker locker(m_lock);
+
+ if (r < 0) {
+ derr << "error registering instance watcher for " << m_oid << " object: "
+ << cpp_strerror(r) << dendl;
+
+ m_ret_val = r;
+ remove_instance_object();
+ return;
+ }
+
+ acquire_lock();
+}
+
+template <typename I>
+void InstanceWatcher<I>::acquire_lock() {
+ dout(10) << dendl;
+
+ ceph_assert(m_lock.is_locked());
+
+ Context *ctx = create_async_context_callback(
+ m_work_queue, create_context_callback<
+ InstanceWatcher<I>, &InstanceWatcher<I>::handle_acquire_lock>(this));
+
+ m_instance_lock->acquire_lock(ctx);
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_acquire_lock(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ Context *on_finish = nullptr;
+ {
+ Mutex::Locker locker(m_lock);
+
+ if (r < 0) {
+
+ derr << "error acquiring instance lock: " << cpp_strerror(r) << dendl;
+
+ m_ret_val = r;
+ unregister_watch();
+ return;
+ }
+
+ std::swap(on_finish, m_on_finish);
+ }
+
+ on_finish->complete(r);
+}
+
+template <typename I>
+void InstanceWatcher<I>::release_lock() {
+ dout(10) << dendl;
+
+ ceph_assert(m_lock.is_locked());
+
+ Context *ctx = create_async_context_callback(
+ m_work_queue, create_context_callback<
+ InstanceWatcher<I>, &InstanceWatcher<I>::handle_release_lock>(this));
+
+ m_instance_lock->shut_down(ctx);
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_release_lock(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ Mutex::Locker locker(m_lock);
+
+ if (r < 0) {
+ derr << "error releasing instance lock: " << cpp_strerror(r) << dendl;
+ }
+
+ unregister_watch();
+}
+
+template <typename I>
+void InstanceWatcher<I>::unregister_watch() {
+ dout(10) << dendl;
+
+ ceph_assert(m_lock.is_locked());
+
+ Context *ctx = create_async_context_callback(
+ m_work_queue, create_context_callback<
+ InstanceWatcher<I>, &InstanceWatcher<I>::handle_unregister_watch>(this));
+
+ librbd::Watcher::unregister_watch(ctx);
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_unregister_watch(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "error unregistering instance watcher for " << m_oid << " object: "
+ << cpp_strerror(r) << dendl;
+ }
+
+ Mutex::Locker locker(m_lock);
+ remove_instance_object();
+}
+
+template <typename I>
+void InstanceWatcher<I>::remove_instance_object() {
+ ceph_assert(m_lock.is_locked());
+
+ dout(10) << dendl;
+
+ librados::ObjectWriteOperation op;
+ op.remove();
+
+ librados::AioCompletion *aio_comp = create_rados_callback<
+ InstanceWatcher<I>,
+ &InstanceWatcher<I>::handle_remove_instance_object>(this);
+ int r = m_ioctx.aio_operate(m_oid, aio_comp, &op);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_remove_instance_object(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r == -ENOENT) {
+ r = 0;
+ }
+
+ if (r < 0) {
+ derr << "error removing " << m_oid << " object: " << cpp_strerror(r)
+ << dendl;
+ }
+
+ Mutex::Locker locker(m_lock);
+ unregister_instance();
+}
+
+template <typename I>
+void InstanceWatcher<I>::unregister_instance() {
+ dout(10) << dendl;
+
+ ceph_assert(m_lock.is_locked());
+
+ librados::ObjectWriteOperation op;
+ librbd::cls_client::mirror_instances_remove(&op, m_instance_id);
+ librados::AioCompletion *aio_comp = create_rados_callback<
+ InstanceWatcher<I>, &InstanceWatcher<I>::handle_unregister_instance>(this);
+
+ int r = m_ioctx.aio_operate(RBD_MIRROR_LEADER, aio_comp, &op);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_unregister_instance(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "error unregistering instance: " << cpp_strerror(r) << dendl;
+ }
+
+ Mutex::Locker locker(m_lock);
+ wait_for_notify_ops();
+}
+
+template <typename I>
+void InstanceWatcher<I>::wait_for_notify_ops() {
+ dout(10) << dendl;
+
+ ceph_assert(m_lock.is_locked());
+
+ for (auto op : m_notify_ops) {
+ op.second->cancel();
+ }
+
+ Context *ctx = create_async_context_callback(
+ m_work_queue, create_context_callback<
+ InstanceWatcher<I>, &InstanceWatcher<I>::handle_wait_for_notify_ops>(this));
+
+ m_notify_op_tracker.wait_for_ops(ctx);
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_wait_for_notify_ops(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ ceph_assert(r == 0);
+
+ Context *on_finish = nullptr;
+ {
+ Mutex::Locker locker(m_lock);
+
+ ceph_assert(m_notify_ops.empty());
+
+ std::swap(on_finish, m_on_finish);
+ r = m_ret_val;
+ }
+ on_finish->complete(r);
+}
+
+template <typename I>
+void InstanceWatcher<I>::get_instance_locker() {
+ dout(10) << dendl;
+
+ ceph_assert(m_lock.is_locked());
+
+ Context *ctx = create_async_context_callback(
+ m_work_queue, create_context_callback<
+ InstanceWatcher<I>, &InstanceWatcher<I>::handle_get_instance_locker>(this));
+
+ m_instance_lock->get_locker(&m_instance_locker, ctx);
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_get_instance_locker(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ Mutex::Locker locker(m_lock);
+
+ if (r < 0) {
+ if (r != -ENOENT) {
+ derr << "error retrieving instance locker: " << cpp_strerror(r) << dendl;
+ }
+ remove_instance_object();
+ return;
+ }
+
+ break_instance_lock();
+}
+
+template <typename I>
+void InstanceWatcher<I>::break_instance_lock() {
+ dout(10) << dendl;
+
+ ceph_assert(m_lock.is_locked());
+
+ Context *ctx = create_async_context_callback(
+ m_work_queue, create_context_callback<
+ InstanceWatcher<I>, &InstanceWatcher<I>::handle_break_instance_lock>(this));
+
+ m_instance_lock->break_lock(m_instance_locker, true, ctx);
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_break_instance_lock(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ Mutex::Locker locker(m_lock);
+
+ if (r < 0) {
+ if (r != -ENOENT) {
+ derr << "error breaking instance lock: " << cpp_strerror(r) << dendl;
+ }
+ remove_instance_object();
+ return;
+ }
+
+ remove_instance_object();
+}
+
+template <typename I>
+void InstanceWatcher<I>::suspend_notify_request(C_NotifyInstanceRequest *req) {
+ dout(10) << req << dendl;
+
+ ceph_assert(m_lock.is_locked());
+
+ auto result = m_suspended_ops.insert(req).second;
+ ceph_assert(result);
+}
+
+template <typename I>
+bool InstanceWatcher<I>::unsuspend_notify_request(
+ C_NotifyInstanceRequest *req) {
+ dout(10) << req << dendl;
+
+ ceph_assert(m_lock.is_locked());
+
+ auto result = m_suspended_ops.erase(req);
+ if (result == 0) {
+ return false;
+ }
+
+ req->send();
+ return true;
+}
+
+template <typename I>
+void InstanceWatcher<I>::unsuspend_notify_requests() {
+ dout(10) << dendl;
+
+ ceph_assert(m_lock.is_locked());
+
+ std::set<C_NotifyInstanceRequest *> suspended_ops;
+ std::swap(m_suspended_ops, suspended_ops);
+
+ for (auto op : suspended_ops) {
+ op->send();
+ }
+}
+
+template <typename I>
+Context *InstanceWatcher<I>::prepare_request(const std::string &instance_id,
+ uint64_t request_id,
+ C_NotifyAck *on_notify_ack) {
+ dout(10) << "instance_id=" << instance_id << ", request_id=" << request_id
+ << dendl;
+
+ Mutex::Locker locker(m_lock);
+
+ Context *ctx = nullptr;
+ Request request(instance_id, request_id);
+ auto it = m_requests.find(request);
+
+ if (it != m_requests.end()) {
+ dout(10) << "duplicate for in-progress request" << dendl;
+ delete it->on_notify_ack;
+ m_requests.erase(it);
+ } else {
+ ctx = create_async_context_callback(
+ m_work_queue, new FunctionContext(
+ [this, instance_id, request_id] (int r) {
+ complete_request(instance_id, request_id, r);
+ }));
+ }
+
+ request.on_notify_ack = on_notify_ack;
+ m_requests.insert(request);
+ return ctx;
+}
+
+template <typename I>
+void InstanceWatcher<I>::complete_request(const std::string &instance_id,
+ uint64_t request_id, int r) {
+ dout(10) << "instance_id=" << instance_id << ", request_id=" << request_id
+ << dendl;
+
+ C_NotifyAck *on_notify_ack;
+ {
+ Mutex::Locker locker(m_lock);
+ Request request(instance_id, request_id);
+ auto it = m_requests.find(request);
+ ceph_assert(it != m_requests.end());
+ on_notify_ack = it->on_notify_ack;
+ m_requests.erase(it);
+ }
+
+ encode(NotifyAckPayload(instance_id, request_id, r), on_notify_ack->out);
+ on_notify_ack->complete(0);
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_notify(uint64_t notify_id, uint64_t handle,
+ uint64_t notifier_id, bufferlist &bl) {
+ dout(10) << "notify_id=" << notify_id << ", handle=" << handle << ", "
+ << "notifier_id=" << notifier_id << dendl;
+
+ auto ctx = new C_NotifyAck(this, notify_id, handle);
+
+ NotifyMessage notify_message;
+ try {
+ auto iter = bl.cbegin();
+ decode(notify_message, iter);
+ } catch (const buffer::error &err) {
+ derr << "error decoding image notification: " << err.what() << dendl;
+ ctx->complete(0);
+ return;
+ }
+
+ apply_visitor(HandlePayloadVisitor(this, stringify(notifier_id), ctx),
+ notify_message.payload);
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_image_acquire(
+ const std::string &global_image_id, Context *on_finish) {
+ dout(10) << "global_image_id=" << global_image_id << dendl;
+
+ auto ctx = new FunctionContext(
+ [this, global_image_id, on_finish] (int r) {
+ m_instance_replayer->acquire_image(this, global_image_id, on_finish);
+ m_notify_op_tracker.finish_op();
+ });
+
+ m_notify_op_tracker.start_op();
+ m_work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_image_release(
+ const std::string &global_image_id, Context *on_finish) {
+ dout(10) << "global_image_id=" << global_image_id << dendl;
+
+ auto ctx = new FunctionContext(
+ [this, global_image_id, on_finish] (int r) {
+ m_instance_replayer->release_image(global_image_id, on_finish);
+ m_notify_op_tracker.finish_op();
+ });
+
+ m_notify_op_tracker.start_op();
+ m_work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_peer_image_removed(
+ const std::string &global_image_id, const std::string &peer_mirror_uuid,
+ Context *on_finish) {
+ dout(10) << "global_image_id=" << global_image_id << ", "
+ << "peer_mirror_uuid=" << peer_mirror_uuid << dendl;
+
+ auto ctx = new FunctionContext(
+ [this, peer_mirror_uuid, global_image_id, on_finish] (int r) {
+ m_instance_replayer->remove_peer_image(global_image_id,
+ peer_mirror_uuid, on_finish);
+ m_notify_op_tracker.finish_op();
+ });
+
+ m_notify_op_tracker.start_op();
+ m_work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_sync_request(const std::string &instance_id,
+ const std::string &sync_id,
+ Context *on_finish) {
+ dout(10) << "instance_id=" << instance_id << ", sync_id=" << sync_id << dendl;
+
+ Mutex::Locker locker(m_lock);
+
+ if (m_image_sync_throttler == nullptr) {
+ dout(10) << "sync request for non-leader" << dendl;
+ m_work_queue->queue(on_finish, -ESTALE);
+ return;
+ }
+
+ Context *on_start = create_async_context_callback(
+ m_work_queue, new FunctionContext(
+ [this, instance_id, sync_id, on_finish] (int r) {
+ dout(10) << "handle_sync_request: finish: instance_id=" << instance_id
+ << ", sync_id=" << sync_id << ", r=" << r << dendl;
+ if (r == 0) {
+ notify_sync_start(instance_id, sync_id);
+ }
+ if (r == -ENOENT) {
+ r = 0;
+ }
+ on_finish->complete(r);
+ }));
+ m_image_sync_throttler->start_op(sync_id, on_start);
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_sync_start(const std::string &instance_id,
+ const std::string &sync_id,
+ Context *on_finish) {
+ dout(10) << "instance_id=" << instance_id << ", sync_id=" << sync_id << dendl;
+
+ Mutex::Locker locker(m_lock);
+
+ auto it = m_inflight_sync_reqs.find(sync_id);
+ if (it == m_inflight_sync_reqs.end()) {
+ dout(5) << "not found" << dendl;
+ m_work_queue->queue(on_finish, 0);
+ return;
+ }
+
+ auto sync_ctx = it->second;
+
+ if (sync_ctx->on_complete != nullptr) {
+ dout(5) << "duplicate request" << dendl;
+ m_work_queue->queue(sync_ctx->on_complete, -ESTALE);
+ }
+
+ sync_ctx->on_complete = on_finish;
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_payload(const std::string &instance_id,
+ const ImageAcquirePayload &payload,
+ C_NotifyAck *on_notify_ack) {
+ dout(10) << "image_acquire: instance_id=" << instance_id << ", "
+ << "request_id=" << payload.request_id << dendl;
+
+ auto on_finish = prepare_request(instance_id, payload.request_id,
+ on_notify_ack);
+ if (on_finish != nullptr) {
+ handle_image_acquire(payload.global_image_id, on_finish);
+ }
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_payload(const std::string &instance_id,
+ const ImageReleasePayload &payload,
+ C_NotifyAck *on_notify_ack) {
+ dout(10) << "image_release: instance_id=" << instance_id << ", "
+ << "request_id=" << payload.request_id << dendl;
+
+ auto on_finish = prepare_request(instance_id, payload.request_id,
+ on_notify_ack);
+ if (on_finish != nullptr) {
+ handle_image_release(payload.global_image_id, on_finish);
+ }
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_payload(const std::string &instance_id,
+ const PeerImageRemovedPayload &payload,
+ C_NotifyAck *on_notify_ack) {
+ dout(10) << "remove_peer_image: instance_id=" << instance_id << ", "
+ << "request_id=" << payload.request_id << dendl;
+
+ auto on_finish = prepare_request(instance_id, payload.request_id,
+ on_notify_ack);
+ if (on_finish != nullptr) {
+ handle_peer_image_removed(payload.global_image_id, payload.peer_mirror_uuid,
+ on_finish);
+ }
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_payload(const std::string &instance_id,
+ const SyncRequestPayload &payload,
+ C_NotifyAck *on_notify_ack) {
+ dout(10) << "sync_request: instance_id=" << instance_id << ", "
+ << "request_id=" << payload.request_id << dendl;
+
+ auto on_finish = prepare_request(instance_id, payload.request_id,
+ on_notify_ack);
+ if (on_finish == nullptr) {
+ return;
+ }
+
+ handle_sync_request(instance_id, payload.sync_id, on_finish);
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_payload(const std::string &instance_id,
+ const SyncStartPayload &payload,
+ C_NotifyAck *on_notify_ack) {
+ dout(10) << "sync_start: instance_id=" << instance_id << ", "
+ << "request_id=" << payload.request_id << dendl;
+
+ auto on_finish = prepare_request(instance_id, payload.request_id,
+ on_notify_ack);
+ if (on_finish == nullptr) {
+ return;
+ }
+
+ handle_sync_start(instance_id, payload.sync_id, on_finish);
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_payload(const std::string &instance_id,
+ const UnknownPayload &payload,
+ C_NotifyAck *on_notify_ack) {
+ dout(5) << "unknown: instance_id=" << instance_id << dendl;
+
+ on_notify_ack->complete(0);
+}
+
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::InstanceWatcher<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/InstanceWatcher.h b/src/tools/rbd_mirror/InstanceWatcher.h
new file mode 100644
index 00000000..5ec1aef0
--- /dev/null
+++ b/src/tools/rbd_mirror/InstanceWatcher.h
@@ -0,0 +1,264 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_INSTANCE_WATCHER_H
+#define CEPH_RBD_MIRROR_INSTANCE_WATCHER_H
+
+#include <map>
+#include <memory>
+#include <set>
+#include <string>
+#include <vector>
+
+#include "common/AsyncOpTracker.h"
+#include "librbd/Watcher.h"
+#include "librbd/managed_lock/Types.h"
+#include "tools/rbd_mirror/instance_watcher/Types.h"
+
+namespace librbd {
+
+class ImageCtx;
+template <typename> class ManagedLock;
+
+}
+
+namespace rbd {
+namespace mirror {
+
+template <typename> class ImageSyncThrottler;
+template <typename> class InstanceReplayer;
+template <typename> struct Threads;
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class InstanceWatcher : protected librbd::Watcher {
+ using librbd::Watcher::unregister_watch; // Silence overloaded virtual warning
+public:
+ static void get_instances(librados::IoCtx &io_ctx,
+ std::vector<std::string> *instance_ids,
+ Context *on_finish);
+ static void remove_instance(librados::IoCtx &io_ctx,
+ ContextWQ *work_queue,
+ const std::string &instance_id,
+ Context *on_finish);
+
+ static InstanceWatcher *create(
+ librados::IoCtx &io_ctx, ContextWQ *work_queue,
+ InstanceReplayer<ImageCtxT> *instance_replayer);
+ void destroy() {
+ delete this;
+ }
+
+ InstanceWatcher(librados::IoCtx &io_ctx, ContextWQ *work_queue,
+ InstanceReplayer<ImageCtxT> *instance_replayer,
+ const std::string &instance_id);
+ ~InstanceWatcher() override;
+
+ inline std::string &get_instance_id() {
+ return m_instance_id;
+ }
+
+ int init();
+ void shut_down();
+
+ void init(Context *on_finish);
+ void shut_down(Context *on_finish);
+ void remove(Context *on_finish);
+
+ void notify_image_acquire(const std::string &instance_id,
+ const std::string &global_image_id,
+ Context *on_notify_ack);
+ void notify_image_release(const std::string &instance_id,
+ const std::string &global_image_id,
+ Context *on_notify_ack);
+ void notify_peer_image_removed(const std::string &instance_id,
+ const std::string &global_image_id,
+ const std::string &peer_mirror_uuid,
+ Context *on_notify_ack);
+
+ void notify_sync_request(const std::string &sync_id, Context *on_sync_start);
+ bool cancel_sync_request(const std::string &sync_id);
+ void notify_sync_complete(const std::string &sync_id);
+
+ void print_sync_status(Formatter *f, stringstream *ss);
+
+ void cancel_notify_requests(const std::string &instance_id);
+
+ void handle_acquire_leader();
+ void handle_release_leader();
+ void handle_update_leader(const std::string &leader_instance_id);
+
+private:
+ /**
+ * @verbatim
+ *
+ * BREAK_INSTANCE_LOCK -------\
+ * ^ |
+ * | (error) |
+ * GET_INSTANCE_LOCKER * * *>|
+ * ^ (remove) |
+ * | |
+ * <uninitialized> <----------------+---- WAIT_FOR_NOTIFY_OPS
+ * | (init) ^ | ^
+ * v (error) * | |
+ * REGISTER_INSTANCE * * * * * *|* *> UNREGISTER_INSTANCE
+ * | * | ^
+ * v (error) * v |
+ * CREATE_INSTANCE_OBJECT * * * * * *> REMOVE_INSTANCE_OBJECT
+ * | * ^
+ * v (error) * |
+ * REGISTER_WATCH * * * * * * * * * *> UNREGISTER_WATCH
+ * | * ^
+ * v (error) * |
+ * ACQUIRE_LOCK * * * * * * * * * * * RELEASE_LOCK
+ * | ^
+ * v (shut_down) |
+ * <watching> -------------------------------/
+ *
+ * @endverbatim
+ */
+
+ struct C_NotifyInstanceRequest;
+ struct C_SyncRequest;
+
+ typedef std::pair<std::string, std::string> Id;
+
+ struct HandlePayloadVisitor : public boost::static_visitor<void> {
+ InstanceWatcher *instance_watcher;
+ std::string instance_id;
+ C_NotifyAck *on_notify_ack;
+
+ HandlePayloadVisitor(InstanceWatcher *instance_watcher,
+ const std::string &instance_id,
+ C_NotifyAck *on_notify_ack)
+ : instance_watcher(instance_watcher), instance_id(instance_id),
+ on_notify_ack(on_notify_ack) {
+ }
+
+ template <typename Payload>
+ inline void operator()(const Payload &payload) const {
+ instance_watcher->handle_payload(instance_id, payload, on_notify_ack);
+ }
+ };
+
+ struct Request {
+ std::string instance_id;
+ uint64_t request_id;
+ C_NotifyAck *on_notify_ack = nullptr;
+
+ Request(const std::string &instance_id, uint64_t request_id)
+ : instance_id(instance_id), request_id(request_id) {
+ }
+
+ inline bool operator<(const Request &rhs) const {
+ return instance_id < rhs.instance_id ||
+ (instance_id == rhs.instance_id && request_id < rhs.request_id);
+ }
+ };
+
+ Threads<ImageCtxT> *m_threads;
+ InstanceReplayer<ImageCtxT> *m_instance_replayer;
+ std::string m_instance_id;
+
+ mutable Mutex m_lock;
+ librbd::ManagedLock<ImageCtxT> *m_instance_lock;
+ Context *m_on_finish = nullptr;
+ int m_ret_val = 0;
+ std::string m_leader_instance_id;
+ librbd::managed_lock::Locker m_instance_locker;
+ std::set<std::pair<std::string, C_NotifyInstanceRequest *>> m_notify_ops;
+ AsyncOpTracker m_notify_op_tracker;
+ uint64_t m_request_seq = 0;
+ std::set<Request> m_requests;
+ std::set<C_NotifyInstanceRequest *> m_suspended_ops;
+ std::map<std::string, C_SyncRequest *> m_inflight_sync_reqs;
+ ImageSyncThrottler<ImageCtxT> *m_image_sync_throttler = nullptr;
+
+ void register_instance();
+ void handle_register_instance(int r);
+
+ void create_instance_object();
+ void handle_create_instance_object(int r);
+
+ void register_watch();
+ void handle_register_watch(int r);
+
+ void acquire_lock();
+ void handle_acquire_lock(int r);
+
+ void release_lock();
+ void handle_release_lock(int r);
+
+ void unregister_watch();
+ void handle_unregister_watch(int r);
+
+ void remove_instance_object();
+ void handle_remove_instance_object(int r);
+
+ void unregister_instance();
+ void handle_unregister_instance(int r);
+
+ void wait_for_notify_ops();
+ void handle_wait_for_notify_ops(int r);
+
+ void get_instance_locker();
+ void handle_get_instance_locker(int r);
+
+ void break_instance_lock();
+ void handle_break_instance_lock(int r);
+
+ void suspend_notify_request(C_NotifyInstanceRequest *req);
+ bool unsuspend_notify_request(C_NotifyInstanceRequest *req);
+ void unsuspend_notify_requests();
+
+ void notify_sync_complete(const Mutex& lock, const std::string &sync_id);
+ void handle_notify_sync_request(C_SyncRequest *sync_ctx, int r);
+ void handle_notify_sync_complete(C_SyncRequest *sync_ctx, int r);
+
+ void notify_sync_start(const std::string &instance_id,
+ const std::string &sync_id);
+
+ Context *prepare_request(const std::string &instance_id, uint64_t request_id,
+ C_NotifyAck *on_notify_ack);
+ void complete_request(const std::string &instance_id, uint64_t request_id,
+ int r);
+
+ void handle_notify(uint64_t notify_id, uint64_t handle,
+ uint64_t notifier_id, bufferlist &bl) override;
+
+ void handle_image_acquire(const std::string &global_image_id,
+ Context *on_finish);
+ void handle_image_release(const std::string &global_image_id,
+ Context *on_finish);
+ void handle_peer_image_removed(const std::string &global_image_id,
+ const std::string &peer_mirror_uuid,
+ Context *on_finish);
+
+ void handle_sync_request(const std::string &instance_id,
+ const std::string &sync_id, Context *on_finish);
+ void handle_sync_start(const std::string &instance_id,
+ const std::string &sync_id, Context *on_finish);
+
+ void handle_payload(const std::string &instance_id,
+ const instance_watcher::ImageAcquirePayload &payload,
+ C_NotifyAck *on_notify_ack);
+ void handle_payload(const std::string &instance_id,
+ const instance_watcher::ImageReleasePayload &payload,
+ C_NotifyAck *on_notify_ack);
+ void handle_payload(const std::string &instance_id,
+ const instance_watcher::PeerImageRemovedPayload &payload,
+ C_NotifyAck *on_notify_ack);
+ void handle_payload(const std::string &instance_id,
+ const instance_watcher::SyncRequestPayload &payload,
+ C_NotifyAck *on_notify_ack);
+ void handle_payload(const std::string &instance_id,
+ const instance_watcher::SyncStartPayload &payload,
+ C_NotifyAck *on_notify_ack);
+ void handle_payload(const std::string &instance_id,
+ const instance_watcher::UnknownPayload &payload,
+ C_NotifyAck *on_notify_ack);
+};
+
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_INSTANCE_WATCHER_H
diff --git a/src/tools/rbd_mirror/Instances.cc b/src/tools/rbd_mirror/Instances.cc
new file mode 100644
index 00000000..b7a6cf11
--- /dev/null
+++ b/src/tools/rbd_mirror/Instances.cc
@@ -0,0 +1,359 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "include/stringify.h"
+#include "common/Timer.h"
+#include "common/WorkQueue.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "librbd/Utils.h"
+#include "InstanceWatcher.h"
+#include "Instances.h"
+#include "Threads.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::Instances: " \
+ << this << " " << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+
+using librbd::util::create_async_context_callback;
+using librbd::util::create_context_callback;
+using librbd::util::create_rados_callback;
+
+template <typename I>
+Instances<I>::Instances(Threads<I> *threads, librados::IoCtx &ioctx,
+ const std::string& instance_id,
+ instances::Listener& listener) :
+ m_threads(threads), m_ioctx(ioctx), m_instance_id(instance_id),
+ m_listener(listener), m_cct(reinterpret_cast<CephContext *>(ioctx.cct())),
+ m_lock("rbd::mirror::Instances " + ioctx.get_pool_name()) {
+}
+
+template <typename I>
+Instances<I>::~Instances() {
+}
+
+template <typename I>
+void Instances<I>::init(Context *on_finish) {
+ dout(10) << dendl;
+
+ Mutex::Locker locker(m_lock);
+ ceph_assert(m_on_finish == nullptr);
+ m_on_finish = on_finish;
+ get_instances();
+}
+
+template <typename I>
+void Instances<I>::shut_down(Context *on_finish) {
+ dout(10) << dendl;
+
+ Mutex::Locker locker(m_lock);
+ ceph_assert(m_on_finish == nullptr);
+ m_on_finish = on_finish;
+
+ Context *ctx = new FunctionContext(
+ [this](int r) {
+ Mutex::Locker timer_locker(m_threads->timer_lock);
+ Mutex::Locker locker(m_lock);
+ cancel_remove_task();
+ wait_for_ops();
+ });
+
+ m_threads->work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+void Instances<I>::unblock_listener() {
+ dout(5) << dendl;
+
+ Mutex::Locker locker(m_lock);
+ ceph_assert(m_listener_blocked);
+ m_listener_blocked = false;
+
+ InstanceIds added_instance_ids;
+ for (auto& pair : m_instances) {
+ if (pair.second.state == INSTANCE_STATE_ADDING) {
+ added_instance_ids.push_back(pair.first);
+ }
+ }
+
+ if (!added_instance_ids.empty()) {
+ m_threads->work_queue->queue(
+ new C_NotifyInstancesAdded(this, added_instance_ids), 0);
+ }
+}
+
+template <typename I>
+void Instances<I>::acked(const InstanceIds& instance_ids) {
+ dout(10) << "instance_ids=" << instance_ids << dendl;
+
+ Mutex::Locker locker(m_lock);
+ if (m_on_finish != nullptr) {
+ dout(5) << "received on shut down, ignoring" << dendl;
+ return;
+ }
+
+ Context *ctx = new C_HandleAcked(this, instance_ids);
+ m_threads->work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+void Instances<I>::handle_acked(const InstanceIds& instance_ids) {
+ dout(5) << "instance_ids=" << instance_ids << dendl;
+
+ Mutex::Locker timer_locker(m_threads->timer_lock);
+ Mutex::Locker locker(m_lock);
+ if (m_on_finish != nullptr) {
+ dout(5) << "handled on shut down, ignoring" << dendl;
+ return;
+ }
+
+ InstanceIds added_instance_ids;
+ auto time = ceph_clock_now();
+ for (auto& instance_id : instance_ids) {
+ auto &instance = m_instances.insert(
+ std::make_pair(instance_id, Instance{})).first->second;
+ instance.acked_time = time;
+ if (instance.state == INSTANCE_STATE_ADDING) {
+ added_instance_ids.push_back(instance_id);
+ }
+ }
+
+ schedule_remove_task(time);
+ if (!m_listener_blocked && !added_instance_ids.empty()) {
+ m_threads->work_queue->queue(
+ new C_NotifyInstancesAdded(this, added_instance_ids), 0);
+ }
+}
+
+template <typename I>
+void Instances<I>::notify_instances_added(const InstanceIds& instance_ids) {
+ Mutex::Locker locker(m_lock);
+ InstanceIds added_instance_ids;
+ for (auto& instance_id : instance_ids) {
+ auto it = m_instances.find(instance_id);
+ if (it != m_instances.end() && it->second.state == INSTANCE_STATE_ADDING) {
+ added_instance_ids.push_back(instance_id);
+ }
+ }
+
+ if (added_instance_ids.empty()) {
+ return;
+ }
+
+ dout(5) << "instance_ids=" << added_instance_ids << dendl;
+ m_lock.Unlock();
+ m_listener.handle_added(added_instance_ids);
+ m_lock.Lock();
+
+ for (auto& instance_id : added_instance_ids) {
+ auto it = m_instances.find(instance_id);
+ if (it != m_instances.end() && it->second.state == INSTANCE_STATE_ADDING) {
+ it->second.state = INSTANCE_STATE_IDLE;
+ }
+ }
+}
+
+template <typename I>
+void Instances<I>::notify_instances_removed(const InstanceIds& instance_ids) {
+ dout(5) << "instance_ids=" << instance_ids << dendl;
+ m_listener.handle_removed(instance_ids);
+
+ Mutex::Locker locker(m_lock);
+ for (auto& instance_id : instance_ids) {
+ m_instances.erase(instance_id);
+ }
+}
+
+template <typename I>
+void Instances<I>::list(std::vector<std::string> *instance_ids) {
+ dout(20) << dendl;
+
+ Mutex::Locker locker(m_lock);
+
+ for (auto it : m_instances) {
+ instance_ids->push_back(it.first);
+ }
+}
+
+
+template <typename I>
+void Instances<I>::get_instances() {
+ dout(10) << dendl;
+
+ ceph_assert(m_lock.is_locked());
+
+ Context *ctx = create_context_callback<
+ Instances, &Instances<I>::handle_get_instances>(this);
+
+ InstanceWatcher<I>::get_instances(m_ioctx, &m_instance_ids, ctx);
+}
+
+template <typename I>
+void Instances<I>::handle_get_instances(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ Context *on_finish = nullptr;
+ {
+ Mutex::Locker locker(m_lock);
+ std::swap(on_finish, m_on_finish);
+ }
+
+ if (r < 0) {
+ derr << "error retrieving instances: " << cpp_strerror(r) << dendl;
+ } else {
+ handle_acked(m_instance_ids);
+ }
+ on_finish->complete(r);
+}
+
+template <typename I>
+void Instances<I>::wait_for_ops() {
+ dout(10) << dendl;
+
+ ceph_assert(m_lock.is_locked());
+
+ Context *ctx = create_async_context_callback(
+ m_threads->work_queue, create_context_callback<
+ Instances, &Instances<I>::handle_wait_for_ops>(this));
+
+ m_async_op_tracker.wait_for_ops(ctx);
+}
+
+template <typename I>
+void Instances<I>::handle_wait_for_ops(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ ceph_assert(r == 0);
+
+ Context *on_finish = nullptr;
+ {
+ Mutex::Locker locker(m_lock);
+ std::swap(on_finish, m_on_finish);
+ }
+ on_finish->complete(r);
+}
+
+template <typename I>
+void Instances<I>::remove_instances(const utime_t& time) {
+ ceph_assert(m_lock.is_locked());
+
+ InstanceIds instance_ids;
+ for (auto& instance_pair : m_instances) {
+ if (instance_pair.first == m_instance_id) {
+ continue;
+ }
+ auto& instance = instance_pair.second;
+ if (instance.state != INSTANCE_STATE_REMOVING &&
+ instance.acked_time <= time) {
+ instance.state = INSTANCE_STATE_REMOVING;
+ instance_ids.push_back(instance_pair.first);
+ }
+ }
+ ceph_assert(!instance_ids.empty());
+
+ dout(10) << "instance_ids=" << instance_ids << dendl;
+ Context* ctx = new FunctionContext([this, instance_ids](int r) {
+ handle_remove_instances(r, instance_ids);
+ });
+ ctx = create_async_context_callback(m_threads->work_queue, ctx);
+
+ auto gather_ctx = new C_Gather(m_cct, ctx);
+ for (auto& instance_id : instance_ids) {
+ InstanceWatcher<I>::remove_instance(m_ioctx, m_threads->work_queue,
+ instance_id, gather_ctx->new_sub());
+ }
+
+ m_async_op_tracker.start_op();
+ gather_ctx->activate();
+}
+
+template <typename I>
+void Instances<I>::handle_remove_instances(
+ int r, const InstanceIds& instance_ids) {
+ Mutex::Locker timer_locker(m_threads->timer_lock);
+ Mutex::Locker locker(m_lock);
+
+ dout(10) << "r=" << r << ", instance_ids=" << instance_ids << dendl;
+ ceph_assert(r == 0);
+
+ // fire removed notification now that instances have been blacklisted
+ m_threads->work_queue->queue(
+ new C_NotifyInstancesRemoved(this, instance_ids), 0);
+
+ // reschedule the timer for the next batch
+ schedule_remove_task(ceph_clock_now());
+ m_async_op_tracker.finish_op();
+}
+
+template <typename I>
+void Instances<I>::cancel_remove_task() {
+ ceph_assert(m_threads->timer_lock.is_locked());
+ ceph_assert(m_lock.is_locked());
+
+ if (m_timer_task == nullptr) {
+ return;
+ }
+
+ dout(10) << dendl;
+
+ bool canceled = m_threads->timer->cancel_event(m_timer_task);
+ ceph_assert(canceled);
+ m_timer_task = nullptr;
+}
+
+template <typename I>
+void Instances<I>::schedule_remove_task(const utime_t& time) {
+ cancel_remove_task();
+ if (m_on_finish != nullptr) {
+ dout(10) << "received on shut down, ignoring" << dendl;
+ return;
+ }
+
+ int after = m_cct->_conf.get_val<uint64_t>("rbd_mirror_leader_heartbeat_interval") *
+ (1 + m_cct->_conf.get_val<uint64_t>("rbd_mirror_leader_max_missed_heartbeats") +
+ m_cct->_conf.get_val<uint64_t>("rbd_mirror_leader_max_acquire_attempts_before_break"));
+
+ bool schedule = false;
+ utime_t oldest_time = time;
+ for (auto& instance : m_instances) {
+ if (instance.first == m_instance_id) {
+ continue;
+ }
+ if (instance.second.state == INSTANCE_STATE_REMOVING) {
+ // removal is already in-flight
+ continue;
+ }
+
+ oldest_time = std::min(oldest_time, instance.second.acked_time);
+ schedule = true;
+ }
+
+ if (!schedule) {
+ return;
+ }
+
+ dout(10) << dendl;
+
+ // schedule a time to fire when the oldest instance should be removed
+ m_timer_task = new FunctionContext(
+ [this, oldest_time](int r) {
+ ceph_assert(m_threads->timer_lock.is_locked());
+ Mutex::Locker locker(m_lock);
+ m_timer_task = nullptr;
+
+ remove_instances(oldest_time);
+ });
+
+ oldest_time += after;
+ m_threads->timer->add_event_at(oldest_time, m_timer_task);
+}
+
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::Instances<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/Instances.h b/src/tools/rbd_mirror/Instances.h
new file mode 100644
index 00000000..dbfb16df
--- /dev/null
+++ b/src/tools/rbd_mirror/Instances.h
@@ -0,0 +1,167 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_INSTANCES_H
+#define CEPH_RBD_MIRROR_INSTANCES_H
+
+#include <map>
+#include <vector>
+
+#include "include/buffer_fwd.h"
+#include "include/rados/librados_fwd.hpp"
+#include "common/AsyncOpTracker.h"
+#include "common/Mutex.h"
+#include "librbd/Watcher.h"
+#include "tools/rbd_mirror/instances/Types.h"
+
+namespace librbd { class ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+
+template <typename> struct Threads;
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class Instances {
+public:
+ typedef std::vector<std::string> InstanceIds;
+
+ static Instances *create(Threads<ImageCtxT> *threads,
+ librados::IoCtx &ioctx,
+ const std::string& instance_id,
+ instances::Listener& listener) {
+ return new Instances(threads, ioctx, instance_id, listener);
+ }
+ void destroy() {
+ delete this;
+ }
+
+ Instances(Threads<ImageCtxT> *threads, librados::IoCtx &ioctx,
+ const std::string& instance_id, instances::Listener& listener);
+ virtual ~Instances();
+
+ void init(Context *on_finish);
+ void shut_down(Context *on_finish);
+
+ void unblock_listener();
+
+ void acked(const InstanceIds& instance_ids);
+
+ void list(std::vector<std::string> *instance_ids);
+
+private:
+ /**
+ * @verbatim
+ *
+ * <uninitialized> <---------------------\
+ * | (init) ^ |
+ * v (error) * |
+ * GET_INSTANCES * * * * * WAIT_FOR_OPS
+ * | ^
+ * v (shut_down) |
+ * <initialized> ------------------------/
+ * .
+ * . (remove_instance)
+ * v
+ * REMOVE_INSTANCE
+ *
+ * @endverbatim
+ */
+
+ enum InstanceState {
+ INSTANCE_STATE_ADDING,
+ INSTANCE_STATE_IDLE,
+ INSTANCE_STATE_REMOVING
+ };
+
+ struct Instance {
+ utime_t acked_time{};
+ InstanceState state = INSTANCE_STATE_ADDING;
+ };
+
+ struct C_NotifyBase : public Context {
+ Instances *instances;
+ InstanceIds instance_ids;
+
+ C_NotifyBase(Instances *instances, const InstanceIds& instance_ids)
+ : instances(instances), instance_ids(instance_ids) {
+ instances->m_async_op_tracker.start_op();
+ }
+
+ void finish(int r) override {
+ execute();
+ instances->m_async_op_tracker.finish_op();
+ }
+
+ virtual void execute() = 0;
+ };
+
+ struct C_HandleAcked : public C_NotifyBase {
+ C_HandleAcked(Instances *instances, const InstanceIds& instance_ids)
+ : C_NotifyBase(instances, instance_ids) {
+ }
+
+ void execute() override {
+ this->instances->handle_acked(this->instance_ids);
+ }
+ };
+
+ struct C_NotifyInstancesAdded : public C_NotifyBase {
+ C_NotifyInstancesAdded(Instances *instances,
+ const InstanceIds& instance_ids)
+ : C_NotifyBase(instances, instance_ids) {
+ }
+
+ void execute() override {
+ this->instances->notify_instances_added(this->instance_ids);
+ }
+ };
+
+ struct C_NotifyInstancesRemoved : public C_NotifyBase {
+ C_NotifyInstancesRemoved(Instances *instances,
+ const InstanceIds& instance_ids)
+ : C_NotifyBase(instances, instance_ids) {
+ }
+
+ void execute() override {
+ this->instances->notify_instances_removed(this->instance_ids);
+ }
+ };
+
+ Threads<ImageCtxT> *m_threads;
+ librados::IoCtx &m_ioctx;
+ std::string m_instance_id;
+ instances::Listener& m_listener;
+ CephContext *m_cct;
+
+ Mutex m_lock;
+ InstanceIds m_instance_ids;
+ std::map<std::string, Instance> m_instances;
+ Context *m_on_finish = nullptr;
+ AsyncOpTracker m_async_op_tracker;
+
+ Context *m_timer_task = nullptr;
+
+ bool m_listener_blocked = true;
+
+ void handle_acked(const InstanceIds& instance_ids);
+ void notify_instances_added(const InstanceIds& instance_ids);
+ void notify_instances_removed(const InstanceIds& instance_ids);
+
+ void get_instances();
+ void handle_get_instances(int r);
+
+ void wait_for_ops();
+ void handle_wait_for_ops(int r);
+
+ void remove_instances(const utime_t& time);
+ void handle_remove_instances(int r, const InstanceIds& instance_ids);
+
+ void cancel_remove_task();
+ void schedule_remove_task(const utime_t& time);
+};
+
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_INSTANCES_H
diff --git a/src/tools/rbd_mirror/LeaderWatcher.cc b/src/tools/rbd_mirror/LeaderWatcher.cc
new file mode 100644
index 00000000..0d4bde6f
--- /dev/null
+++ b/src/tools/rbd_mirror/LeaderWatcher.cc
@@ -0,0 +1,1145 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "LeaderWatcher.h"
+#include "common/Timer.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "include/stringify.h"
+#include "librbd/Utils.h"
+#include "librbd/watcher/Types.h"
+#include "Threads.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::LeaderWatcher: " \
+ << this << " " << __func__ << ": "
+namespace rbd {
+namespace mirror {
+
+using namespace leader_watcher;
+
+using librbd::util::create_async_context_callback;
+using librbd::util::create_context_callback;
+using librbd::util::create_rados_callback;
+
+template <typename I>
+LeaderWatcher<I>::LeaderWatcher(Threads<I> *threads, librados::IoCtx &io_ctx,
+ leader_watcher::Listener *listener)
+ : Watcher(io_ctx, threads->work_queue, RBD_MIRROR_LEADER),
+ m_threads(threads), m_listener(listener), m_instances_listener(this),
+ m_lock("rbd::mirror::LeaderWatcher " + io_ctx.get_pool_name()),
+ m_notifier_id(librados::Rados(io_ctx).get_instance_id()),
+ m_instance_id(stringify(m_notifier_id)),
+ m_leader_lock(new LeaderLock(m_ioctx, m_work_queue, m_oid, this, true,
+ m_cct->_conf.get_val<uint64_t>(
+ "rbd_blacklist_expire_seconds"))) {
+}
+
+template <typename I>
+LeaderWatcher<I>::~LeaderWatcher() {
+ ceph_assert(m_status_watcher == nullptr);
+ ceph_assert(m_instances == nullptr);
+ ceph_assert(m_timer_task == nullptr);
+
+ delete m_leader_lock;
+}
+
+template <typename I>
+std::string LeaderWatcher<I>::get_instance_id() {
+ return m_instance_id;
+}
+
+template <typename I>
+int LeaderWatcher<I>::init() {
+ C_SaferCond init_ctx;
+ init(&init_ctx);
+ return init_ctx.wait();
+}
+
+template <typename I>
+void LeaderWatcher<I>::init(Context *on_finish) {
+ dout(10) << "notifier_id=" << m_notifier_id << dendl;
+
+ Mutex::Locker locker(m_lock);
+
+ ceph_assert(m_on_finish == nullptr);
+ m_on_finish = on_finish;
+
+ create_leader_object();
+}
+
+template <typename I>
+void LeaderWatcher<I>::create_leader_object() {
+ dout(10) << dendl;
+
+ ceph_assert(m_lock.is_locked());
+
+ librados::ObjectWriteOperation op;
+ op.create(false);
+
+ librados::AioCompletion *aio_comp = create_rados_callback<
+ LeaderWatcher<I>, &LeaderWatcher<I>::handle_create_leader_object>(this);
+ int r = m_ioctx.aio_operate(m_oid, aio_comp, &op);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_create_leader_object(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ Context *on_finish = nullptr;
+ {
+ Mutex::Locker locker(m_lock);
+
+ if (r == 0) {
+ register_watch();
+ return;
+ }
+
+ derr << "error creating " << m_oid << " object: " << cpp_strerror(r)
+ << dendl;
+
+ std::swap(on_finish, m_on_finish);
+ }
+ on_finish->complete(r);
+}
+
+template <typename I>
+void LeaderWatcher<I>::register_watch() {
+ dout(10) << dendl;
+
+ ceph_assert(m_lock.is_locked());
+
+ Context *ctx = create_async_context_callback(
+ m_work_queue, create_context_callback<
+ LeaderWatcher<I>, &LeaderWatcher<I>::handle_register_watch>(this));
+
+ librbd::Watcher::register_watch(ctx);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_register_watch(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ Context *on_finish = nullptr;
+ if (r < 0) {
+ Mutex::Locker locker(m_lock);
+ derr << "error registering leader watcher for " << m_oid << " object: "
+ << cpp_strerror(r) << dendl;
+ ceph_assert(m_on_finish != nullptr);
+ std::swap(on_finish, m_on_finish);
+ } else {
+ Mutex::Locker locker(m_lock);
+ init_status_watcher();
+ return;
+ }
+
+ on_finish->complete(r);
+}
+
+template <typename I>
+void LeaderWatcher<I>::shut_down() {
+ C_SaferCond shut_down_ctx;
+ shut_down(&shut_down_ctx);
+ int r = shut_down_ctx.wait();
+ ceph_assert(r == 0);
+}
+
+template <typename I>
+void LeaderWatcher<I>::shut_down(Context *on_finish) {
+ dout(10) << dendl;
+
+ Mutex::Locker timer_locker(m_threads->timer_lock);
+ Mutex::Locker locker(m_lock);
+
+ ceph_assert(m_on_shut_down_finish == nullptr);
+ m_on_shut_down_finish = on_finish;
+ cancel_timer_task();
+ shut_down_leader_lock();
+}
+
+template <typename I>
+void LeaderWatcher<I>::shut_down_leader_lock() {
+ dout(10) << dendl;
+
+ ceph_assert(m_lock.is_locked());
+
+ Context *ctx = create_async_context_callback(
+ m_work_queue, create_context_callback<
+ LeaderWatcher<I>, &LeaderWatcher<I>::handle_shut_down_leader_lock>(this));
+
+ m_leader_lock->shut_down(ctx);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_shut_down_leader_lock(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ Mutex::Locker locker(m_lock);
+
+ if (r < 0) {
+ derr << "error shutting down leader lock: " << cpp_strerror(r) << dendl;
+ }
+
+ shut_down_status_watcher();
+}
+
+template <typename I>
+void LeaderWatcher<I>::unregister_watch() {
+ dout(10) << dendl;
+
+ ceph_assert(m_lock.is_locked());
+
+ Context *ctx = create_async_context_callback(
+ m_work_queue, create_context_callback<
+ LeaderWatcher<I>, &LeaderWatcher<I>::handle_unregister_watch>(this));
+
+ librbd::Watcher::unregister_watch(ctx);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_unregister_watch(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "error unregistering leader watcher for " << m_oid << " object: "
+ << cpp_strerror(r) << dendl;
+ }
+ wait_for_tasks();
+}
+
+template <typename I>
+void LeaderWatcher<I>::wait_for_tasks() {
+ dout(10) << dendl;
+
+ Mutex::Locker timer_locker(m_threads->timer_lock);
+ Mutex::Locker locker(m_lock);
+ schedule_timer_task("wait for tasks", 0, false,
+ &LeaderWatcher<I>::handle_wait_for_tasks, true);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_wait_for_tasks() {
+ dout(10) << dendl;
+
+ ceph_assert(m_threads->timer_lock.is_locked());
+ ceph_assert(m_lock.is_locked());
+ ceph_assert(m_on_shut_down_finish != nullptr);
+
+ ceph_assert(!m_timer_op_tracker.empty());
+ m_timer_op_tracker.finish_op();
+
+ auto ctx = new FunctionContext([this](int r) {
+ Context *on_finish;
+ {
+ // ensure lock isn't held when completing shut down
+ Mutex::Locker locker(m_lock);
+ ceph_assert(m_on_shut_down_finish != nullptr);
+ on_finish = m_on_shut_down_finish;
+ }
+ on_finish->complete(0);
+ });
+ m_work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+bool LeaderWatcher<I>::is_blacklisted() const {
+ std::lock_guard locker{m_lock};
+ return m_blacklisted;
+}
+
+template <typename I>
+bool LeaderWatcher<I>::is_leader() const {
+ Mutex::Locker locker(m_lock);
+
+ return is_leader(m_lock);
+}
+
+template <typename I>
+bool LeaderWatcher<I>::is_leader(Mutex &lock) const {
+ ceph_assert(m_lock.is_locked());
+
+ bool leader = m_leader_lock->is_leader();
+ dout(10) << leader << dendl;
+ return leader;
+}
+
+template <typename I>
+bool LeaderWatcher<I>::is_releasing_leader() const {
+ Mutex::Locker locker(m_lock);
+
+ return is_releasing_leader(m_lock);
+}
+
+template <typename I>
+bool LeaderWatcher<I>::is_releasing_leader(Mutex &lock) const {
+ ceph_assert(m_lock.is_locked());
+
+ bool releasing = m_leader_lock->is_releasing_leader();
+ dout(10) << releasing << dendl;
+ return releasing;
+}
+
+template <typename I>
+bool LeaderWatcher<I>::get_leader_instance_id(std::string *instance_id) const {
+ dout(10) << dendl;
+
+ Mutex::Locker locker(m_lock);
+
+ if (is_leader(m_lock) || is_releasing_leader(m_lock)) {
+ *instance_id = m_instance_id;
+ return true;
+ }
+
+ if (!m_locker.cookie.empty()) {
+ *instance_id = stringify(m_locker.entity.num());
+ return true;
+ }
+
+ return false;
+}
+
+template <typename I>
+void LeaderWatcher<I>::release_leader() {
+ dout(10) << dendl;
+
+ Mutex::Locker locker(m_lock);
+ if (!is_leader(m_lock)) {
+ return;
+ }
+
+ release_leader_lock();
+}
+
+template <typename I>
+void LeaderWatcher<I>::list_instances(std::vector<std::string> *instance_ids) {
+ dout(10) << dendl;
+
+ Mutex::Locker locker(m_lock);
+
+ instance_ids->clear();
+ if (m_instances != nullptr) {
+ m_instances->list(instance_ids);
+ }
+}
+
+template <typename I>
+void LeaderWatcher<I>::cancel_timer_task() {
+ ceph_assert(m_threads->timer_lock.is_locked());
+ ceph_assert(m_lock.is_locked());
+
+ if (m_timer_task == nullptr) {
+ return;
+ }
+
+ dout(10) << m_timer_task << dendl;
+ bool canceled = m_threads->timer->cancel_event(m_timer_task);
+ ceph_assert(canceled);
+ m_timer_task = nullptr;
+}
+
+template <typename I>
+void LeaderWatcher<I>::schedule_timer_task(const std::string &name,
+ int delay_factor, bool leader,
+ TimerCallback timer_callback,
+ bool shutting_down) {
+ ceph_assert(m_threads->timer_lock.is_locked());
+ ceph_assert(m_lock.is_locked());
+
+ if (!shutting_down && m_on_shut_down_finish != nullptr) {
+ return;
+ }
+
+ cancel_timer_task();
+
+ m_timer_task = new FunctionContext(
+ [this, leader, timer_callback](int r) {
+ ceph_assert(m_threads->timer_lock.is_locked());
+ m_timer_task = nullptr;
+
+ if (m_timer_op_tracker.empty()) {
+ Mutex::Locker locker(m_lock);
+ execute_timer_task(leader, timer_callback);
+ return;
+ }
+
+ // old timer task is still running -- do not start next
+ // task until the previous task completes
+ if (m_timer_gate == nullptr) {
+ m_timer_gate = new C_TimerGate(this);
+ m_timer_op_tracker.wait_for_ops(m_timer_gate);
+ }
+ m_timer_gate->leader = leader;
+ m_timer_gate->timer_callback = timer_callback;
+ });
+
+ int after = delay_factor * m_cct->_conf.get_val<uint64_t>(
+ "rbd_mirror_leader_heartbeat_interval");
+
+ dout(10) << "scheduling " << name << " after " << after << " sec (task "
+ << m_timer_task << ")" << dendl;
+ m_threads->timer->add_event_after(after, m_timer_task);
+}
+
+template <typename I>
+void LeaderWatcher<I>::execute_timer_task(bool leader,
+ TimerCallback timer_callback) {
+ dout(10) << dendl;
+
+ ceph_assert(m_threads->timer_lock.is_locked());
+ ceph_assert(m_lock.is_locked());
+ ceph_assert(m_timer_op_tracker.empty());
+
+ if (is_leader(m_lock) != leader) {
+ return;
+ }
+
+ m_timer_op_tracker.start_op();
+ (this->*timer_callback)();
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_post_acquire_leader_lock(int r,
+ Context *on_finish) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ if (r == -EAGAIN) {
+ dout(10) << "already locked" << dendl;
+ } else {
+ derr << "error acquiring leader lock: " << cpp_strerror(r) << dendl;
+ }
+ on_finish->complete(r);
+ return;
+ }
+
+ Mutex::Locker locker(m_lock);
+ ceph_assert(m_on_finish == nullptr);
+ m_on_finish = on_finish;
+ m_ret_val = 0;
+
+ init_instances();
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_pre_release_leader_lock(Context *on_finish) {
+ dout(10) << dendl;
+
+ Mutex::Locker locker(m_lock);
+ ceph_assert(m_on_finish == nullptr);
+ m_on_finish = on_finish;
+ m_ret_val = 0;
+
+ notify_listener();
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_post_release_leader_lock(int r,
+ Context *on_finish) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ on_finish->complete(r);
+ return;
+ }
+
+ Mutex::Locker locker(m_lock);
+ ceph_assert(m_on_finish == nullptr);
+ m_on_finish = on_finish;
+
+ notify_lock_released();
+}
+
+template <typename I>
+void LeaderWatcher<I>::break_leader_lock() {
+ dout(10) << dendl;
+
+ ceph_assert(m_threads->timer_lock.is_locked());
+ ceph_assert(m_lock.is_locked());
+ ceph_assert(!m_timer_op_tracker.empty());
+
+ if (m_locker.cookie.empty()) {
+ get_locker();
+ return;
+ }
+
+ Context *ctx = create_async_context_callback(
+ m_work_queue, create_context_callback<
+ LeaderWatcher<I>, &LeaderWatcher<I>::handle_break_leader_lock>(this));
+
+ m_leader_lock->break_lock(m_locker, true, ctx);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_break_leader_lock(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ Mutex::Locker timer_locker(m_threads->timer_lock);
+ Mutex::Locker locker(m_lock);
+ ceph_assert(!m_timer_op_tracker.empty());
+
+ if (m_leader_lock->is_shutdown()) {
+ dout(10) << "canceling due to shutdown" << dendl;
+ m_timer_op_tracker.finish_op();
+ return;
+ }
+
+ if (r < 0 && r != -ENOENT) {
+ derr << "error breaking leader lock: " << cpp_strerror(r) << dendl;
+ schedule_acquire_leader_lock(1);
+ m_timer_op_tracker.finish_op();
+ return;
+ }
+
+ m_locker = {};
+ m_acquire_attempts = 0;
+ acquire_leader_lock();
+}
+
+template <typename I>
+void LeaderWatcher<I>::schedule_get_locker(bool reset_leader,
+ uint32_t delay_factor) {
+ dout(10) << dendl;
+
+ ceph_assert(m_threads->timer_lock.is_locked());
+ ceph_assert(m_lock.is_locked());
+
+ if (reset_leader) {
+ m_locker = {};
+ m_acquire_attempts = 0;
+ }
+
+ schedule_timer_task("get locker", delay_factor, false,
+ &LeaderWatcher<I>::get_locker, false);
+}
+
+template <typename I>
+void LeaderWatcher<I>::get_locker() {
+ dout(10) << dendl;
+
+ ceph_assert(m_threads->timer_lock.is_locked());
+ ceph_assert(m_lock.is_locked());
+ ceph_assert(!m_timer_op_tracker.empty());
+
+ C_GetLocker *get_locker_ctx = new C_GetLocker(this);
+ Context *ctx = create_async_context_callback(m_work_queue, get_locker_ctx);
+
+ m_leader_lock->get_locker(&get_locker_ctx->locker, ctx);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_get_locker(int r,
+ librbd::managed_lock::Locker& locker) {
+ dout(10) << "r=" << r << dendl;
+
+ Mutex::Locker timer_locker(m_threads->timer_lock);
+ Mutex::Locker mutex_locker(m_lock);
+ ceph_assert(!m_timer_op_tracker.empty());
+
+ if (m_leader_lock->is_shutdown()) {
+ dout(10) << "canceling due to shutdown" << dendl;
+ m_timer_op_tracker.finish_op();
+ return;
+ }
+
+ if (is_leader(m_lock)) {
+ m_locker = {};
+ m_timer_op_tracker.finish_op();
+ return;
+ }
+
+ if (r == -ENOENT) {
+ m_locker = {};
+ m_acquire_attempts = 0;
+ acquire_leader_lock();
+ return;
+ } else if (r < 0) {
+ derr << "error retrieving leader locker: " << cpp_strerror(r) << dendl;
+ schedule_get_locker(true, 1);
+ m_timer_op_tracker.finish_op();
+ return;
+ }
+
+ bool notify_listener = false;
+ if (m_locker != locker) {
+ m_locker = locker;
+ notify_listener = true;
+ if (m_acquire_attempts > 1) {
+ dout(10) << "new lock owner detected -- resetting heartbeat counter"
+ << dendl;
+ m_acquire_attempts = 0;
+ }
+ }
+
+ if (m_acquire_attempts >= m_cct->_conf.get_val<uint64_t>(
+ "rbd_mirror_leader_max_acquire_attempts_before_break")) {
+ dout(0) << "breaking leader lock after " << m_acquire_attempts << " "
+ << "failed attempts to acquire" << dendl;
+ break_leader_lock();
+ return;
+ }
+
+ schedule_acquire_leader_lock(1);
+
+ if (!notify_listener) {
+ m_timer_op_tracker.finish_op();
+ return;
+ }
+
+ auto ctx = new FunctionContext(
+ [this](int r) {
+ std::string instance_id;
+ if (get_leader_instance_id(&instance_id)) {
+ m_listener->update_leader_handler(instance_id);
+ }
+ Mutex::Locker timer_locker(m_threads->timer_lock);
+ Mutex::Locker locker(m_lock);
+ m_timer_op_tracker.finish_op();
+ });
+ m_work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+void LeaderWatcher<I>::schedule_acquire_leader_lock(uint32_t delay_factor) {
+ dout(10) << dendl;
+
+ ceph_assert(m_threads->timer_lock.is_locked());
+ ceph_assert(m_lock.is_locked());
+
+ schedule_timer_task("acquire leader lock",
+ delay_factor *
+ m_cct->_conf.get_val<uint64_t>("rbd_mirror_leader_max_missed_heartbeats"),
+ false, &LeaderWatcher<I>::acquire_leader_lock, false);
+}
+
+template <typename I>
+void LeaderWatcher<I>::acquire_leader_lock() {
+ ceph_assert(m_threads->timer_lock.is_locked());
+ ceph_assert(m_lock.is_locked());
+ ceph_assert(!m_timer_op_tracker.empty());
+
+ ++m_acquire_attempts;
+ dout(10) << "acquire_attempts=" << m_acquire_attempts << dendl;
+
+ Context *ctx = create_async_context_callback(
+ m_work_queue, create_context_callback<
+ LeaderWatcher<I>, &LeaderWatcher<I>::handle_acquire_leader_lock>(this));
+ m_leader_lock->try_acquire_lock(ctx);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_acquire_leader_lock(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ Mutex::Locker timer_locker(m_threads->timer_lock);
+ Mutex::Locker locker(m_lock);
+ ceph_assert(!m_timer_op_tracker.empty());
+
+ if (m_leader_lock->is_shutdown()) {
+ dout(10) << "canceling due to shutdown" << dendl;
+ m_timer_op_tracker.finish_op();
+ return;
+ }
+
+ if (r < 0) {
+ if (r == -EAGAIN) {
+ dout(10) << "already locked" << dendl;
+ } else {
+ derr << "error acquiring lock: " << cpp_strerror(r) << dendl;
+ }
+
+ get_locker();
+ return;
+ }
+
+ m_locker = {};
+ m_acquire_attempts = 0;
+
+ if (m_ret_val) {
+ dout(5) << "releasing due to error on notify" << dendl;
+ release_leader_lock();
+ m_timer_op_tracker.finish_op();
+ return;
+ }
+
+ notify_heartbeat();
+}
+
+template <typename I>
+void LeaderWatcher<I>::release_leader_lock() {
+ dout(10) << dendl;
+
+ ceph_assert(m_lock.is_locked());
+
+ Context *ctx = create_async_context_callback(
+ m_work_queue, create_context_callback<
+ LeaderWatcher<I>, &LeaderWatcher<I>::handle_release_leader_lock>(this));
+
+ m_leader_lock->release_lock(ctx);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_release_leader_lock(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ Mutex::Locker timer_locker(m_threads->timer_lock);
+ Mutex::Locker locker(m_lock);
+
+ if (r < 0) {
+ derr << "error releasing lock: " << cpp_strerror(r) << dendl;
+ return;
+ }
+
+ schedule_acquire_leader_lock(1);
+}
+
+template <typename I>
+void LeaderWatcher<I>::init_status_watcher() {
+ dout(10) << dendl;
+
+ ceph_assert(m_lock.is_locked());
+ ceph_assert(m_status_watcher == nullptr);
+
+ m_status_watcher = MirrorStatusWatcher<I>::create(m_ioctx, m_work_queue);
+
+ Context *ctx = create_context_callback<
+ LeaderWatcher<I>, &LeaderWatcher<I>::handle_init_status_watcher>(this);
+
+ m_status_watcher->init(ctx);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_init_status_watcher(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ Context *on_finish = nullptr;
+ {
+ Mutex::Locker timer_locker(m_threads->timer_lock);
+ Mutex::Locker locker(m_lock);
+
+ if (r < 0) {
+ derr << "error initializing mirror status watcher: " << cpp_strerror(r)
+ << cpp_strerror(r) << dendl;
+ } else {
+ schedule_acquire_leader_lock(0);
+ }
+
+ ceph_assert(m_on_finish != nullptr);
+ std::swap(on_finish, m_on_finish);
+ }
+
+ on_finish->complete(r);
+}
+
+template <typename I>
+void LeaderWatcher<I>::shut_down_status_watcher() {
+ dout(10) << dendl;
+
+ ceph_assert(m_lock.is_locked());
+ ceph_assert(m_status_watcher != nullptr);
+
+ Context *ctx = create_async_context_callback(
+ m_work_queue, create_context_callback<LeaderWatcher<I>,
+ &LeaderWatcher<I>::handle_shut_down_status_watcher>(this));
+
+ m_status_watcher->shut_down(ctx);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_shut_down_status_watcher(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ Mutex::Locker locker(m_lock);
+ m_status_watcher->destroy();
+ m_status_watcher = nullptr;
+
+ if (r < 0) {
+ derr << "error shutting mirror status watcher down: " << cpp_strerror(r)
+ << dendl;
+ }
+
+ unregister_watch();
+}
+
+template <typename I>
+void LeaderWatcher<I>::init_instances() {
+ dout(10) << dendl;
+
+ ceph_assert(m_lock.is_locked());
+ ceph_assert(m_instances == nullptr);
+
+ m_instances = Instances<I>::create(m_threads, m_ioctx, m_instance_id,
+ m_instances_listener);
+
+ Context *ctx = create_context_callback<
+ LeaderWatcher<I>, &LeaderWatcher<I>::handle_init_instances>(this);
+
+ m_instances->init(ctx);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_init_instances(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ Context *on_finish = nullptr;
+ if (r < 0) {
+ Mutex::Locker locker(m_lock);
+ derr << "error initializing instances: " << cpp_strerror(r) << dendl;
+ m_instances->destroy();
+ m_instances = nullptr;
+
+ ceph_assert(m_on_finish != nullptr);
+ std::swap(m_on_finish, on_finish);
+ } else {
+ Mutex::Locker locker(m_lock);
+ notify_listener();
+ return;
+ }
+
+ on_finish->complete(r);
+}
+
+template <typename I>
+void LeaderWatcher<I>::shut_down_instances() {
+ dout(10) << dendl;
+
+ ceph_assert(m_lock.is_locked());
+ ceph_assert(m_instances != nullptr);
+
+ Context *ctx = create_async_context_callback(
+ m_work_queue, create_context_callback<LeaderWatcher<I>,
+ &LeaderWatcher<I>::handle_shut_down_instances>(this));
+
+ m_instances->shut_down(ctx);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_shut_down_instances(int r) {
+ dout(10) << "r=" << r << dendl;
+ ceph_assert(r == 0);
+
+ Context *on_finish = nullptr;
+ {
+ Mutex::Locker locker(m_lock);
+
+ m_instances->destroy();
+ m_instances = nullptr;
+
+ ceph_assert(m_on_finish != nullptr);
+ std::swap(m_on_finish, on_finish);
+ }
+ on_finish->complete(r);
+}
+
+template <typename I>
+void LeaderWatcher<I>::notify_listener() {
+ dout(10) << dendl;
+
+ ceph_assert(m_lock.is_locked());
+
+ Context *ctx = create_async_context_callback(
+ m_work_queue, create_context_callback<
+ LeaderWatcher<I>, &LeaderWatcher<I>::handle_notify_listener>(this));
+
+ if (is_leader(m_lock)) {
+ ctx = new FunctionContext(
+ [this, ctx](int r) {
+ m_listener->post_acquire_handler(ctx);
+ });
+ } else {
+ ctx = new FunctionContext(
+ [this, ctx](int r) {
+ m_listener->pre_release_handler(ctx);
+ });
+ }
+ m_work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_notify_listener(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ Mutex::Locker locker(m_lock);
+
+ if (r < 0) {
+ derr << "error notifying listener: " << cpp_strerror(r) << dendl;
+ m_ret_val = r;
+ }
+
+ if (is_leader(m_lock)) {
+ notify_lock_acquired();
+ } else {
+ shut_down_instances();
+ }
+}
+
+template <typename I>
+void LeaderWatcher<I>::notify_lock_acquired() {
+ dout(10) << dendl;
+
+ ceph_assert(m_lock.is_locked());
+
+ Context *ctx = create_context_callback<
+ LeaderWatcher<I>, &LeaderWatcher<I>::handle_notify_lock_acquired>(this);
+
+ bufferlist bl;
+ encode(NotifyMessage{LockAcquiredPayload{}}, bl);
+
+ send_notify(bl, nullptr, ctx);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_notify_lock_acquired(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ Context *on_finish = nullptr;
+ {
+ Mutex::Locker locker(m_lock);
+ if (r < 0 && r != -ETIMEDOUT) {
+ derr << "error notifying leader lock acquired: " << cpp_strerror(r)
+ << dendl;
+ m_ret_val = r;
+ }
+
+ ceph_assert(m_on_finish != nullptr);
+ std::swap(m_on_finish, on_finish);
+
+ if (m_ret_val == 0) {
+ // listener should be ready for instance add/remove events now
+ m_instances->unblock_listener();
+ }
+ }
+ on_finish->complete(0);
+}
+
+template <typename I>
+void LeaderWatcher<I>::notify_lock_released() {
+ dout(10) << dendl;
+
+ ceph_assert(m_lock.is_locked());
+
+ Context *ctx = create_context_callback<
+ LeaderWatcher<I>, &LeaderWatcher<I>::handle_notify_lock_released>(this);
+
+ bufferlist bl;
+ encode(NotifyMessage{LockReleasedPayload{}}, bl);
+
+ send_notify(bl, nullptr, ctx);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_notify_lock_released(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ Context *on_finish = nullptr;
+ {
+ Mutex::Locker locker(m_lock);
+ if (r < 0 && r != -ETIMEDOUT) {
+ derr << "error notifying leader lock released: " << cpp_strerror(r)
+ << dendl;
+ }
+
+ ceph_assert(m_on_finish != nullptr);
+ std::swap(m_on_finish, on_finish);
+ }
+ on_finish->complete(r);
+}
+
+template <typename I>
+void LeaderWatcher<I>::notify_heartbeat() {
+ dout(10) << dendl;
+
+ ceph_assert(m_threads->timer_lock.is_locked());
+ ceph_assert(m_lock.is_locked());
+ ceph_assert(!m_timer_op_tracker.empty());
+
+ if (!is_leader(m_lock)) {
+ dout(5) << "not leader, canceling" << dendl;
+ m_timer_op_tracker.finish_op();
+ return;
+ }
+
+ Context *ctx = create_context_callback<
+ LeaderWatcher<I>, &LeaderWatcher<I>::handle_notify_heartbeat>(this);
+
+ bufferlist bl;
+ encode(NotifyMessage{HeartbeatPayload{}}, bl);
+
+ m_heartbeat_response.acks.clear();
+ send_notify(bl, &m_heartbeat_response, ctx);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_notify_heartbeat(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ Mutex::Locker timer_locker(m_threads->timer_lock);
+ Mutex::Locker locker(m_lock);
+ ceph_assert(!m_timer_op_tracker.empty());
+
+ m_timer_op_tracker.finish_op();
+ if (m_leader_lock->is_shutdown()) {
+ dout(10) << "canceling due to shutdown" << dendl;
+ return;
+ } else if (!is_leader(m_lock)) {
+ return;
+ }
+
+ if (r < 0 && r != -ETIMEDOUT) {
+ derr << "error notifying heartbeat: " << cpp_strerror(r)
+ << ", releasing leader" << dendl;
+ release_leader_lock();
+ return;
+ }
+
+ dout(10) << m_heartbeat_response.acks.size() << " acks received, "
+ << m_heartbeat_response.timeouts.size() << " timed out" << dendl;
+
+ std::vector<std::string> instance_ids;
+ for (auto &it: m_heartbeat_response.acks) {
+ uint64_t notifier_id = it.first.gid;
+ instance_ids.push_back(stringify(notifier_id));
+ }
+ if (!instance_ids.empty()) {
+ m_instances->acked(instance_ids);
+ }
+
+ schedule_timer_task("heartbeat", 1, true,
+ &LeaderWatcher<I>::notify_heartbeat, false);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_heartbeat(Context *on_notify_ack) {
+ dout(10) << dendl;
+
+ {
+ Mutex::Locker timer_locker(m_threads->timer_lock);
+ Mutex::Locker locker(m_lock);
+ if (is_leader(m_lock)) {
+ dout(5) << "got another leader heartbeat, ignoring" << dendl;
+ } else {
+ cancel_timer_task();
+ m_acquire_attempts = 0;
+ schedule_acquire_leader_lock(1);
+ }
+ }
+
+ on_notify_ack->complete(0);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_lock_acquired(Context *on_notify_ack) {
+ dout(10) << dendl;
+
+ {
+ Mutex::Locker timer_locker(m_threads->timer_lock);
+ Mutex::Locker locker(m_lock);
+ if (is_leader(m_lock)) {
+ dout(5) << "got another leader lock_acquired, ignoring" << dendl;
+ } else {
+ cancel_timer_task();
+ schedule_get_locker(true, 0);
+ }
+ }
+
+ on_notify_ack->complete(0);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_lock_released(Context *on_notify_ack) {
+ dout(10) << dendl;
+
+ {
+ Mutex::Locker timer_locker(m_threads->timer_lock);
+ Mutex::Locker locker(m_lock);
+ if (is_leader(m_lock)) {
+ dout(5) << "got another leader lock_released, ignoring" << dendl;
+ } else {
+ cancel_timer_task();
+ schedule_get_locker(true, 0);
+ }
+ }
+
+ on_notify_ack->complete(0);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_notify(uint64_t notify_id, uint64_t handle,
+ uint64_t notifier_id, bufferlist &bl) {
+ dout(10) << "notify_id=" << notify_id << ", handle=" << handle << ", "
+ << "notifier_id=" << notifier_id << dendl;
+
+ Context *ctx = new C_NotifyAck(this, notify_id, handle);
+
+ if (notifier_id == m_notifier_id) {
+ dout(10) << "our own notification, ignoring" << dendl;
+ ctx->complete(0);
+ return;
+ }
+
+ NotifyMessage notify_message;
+ try {
+ auto iter = bl.cbegin();
+ decode(notify_message, iter);
+ } catch (const buffer::error &err) {
+ derr << "error decoding image notification: " << err.what() << dendl;
+ ctx->complete(0);
+ return;
+ }
+
+ apply_visitor(HandlePayloadVisitor(this, ctx), notify_message.payload);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_rewatch_complete(int r) {
+ dout(5) << "r=" << r << dendl;
+
+ if (r == -EBLACKLISTED) {
+ dout(1) << "blacklisted detected" << dendl;
+ m_blacklisted = true;
+ return;
+ }
+
+ m_leader_lock->reacquire_lock(nullptr);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_payload(const HeartbeatPayload &payload,
+ Context *on_notify_ack) {
+ dout(10) << "heartbeat" << dendl;
+
+ handle_heartbeat(on_notify_ack);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_payload(const LockAcquiredPayload &payload,
+ Context *on_notify_ack) {
+ dout(10) << "lock_acquired" << dendl;
+
+ handle_lock_acquired(on_notify_ack);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_payload(const LockReleasedPayload &payload,
+ Context *on_notify_ack) {
+ dout(10) << "lock_released" << dendl;
+
+ handle_lock_released(on_notify_ack);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_payload(const UnknownPayload &payload,
+ Context *on_notify_ack) {
+ dout(10) << "unknown" << dendl;
+
+ on_notify_ack->complete(0);
+}
+
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::LeaderWatcher<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/LeaderWatcher.h b/src/tools/rbd_mirror/LeaderWatcher.h
new file mode 100644
index 00000000..01ee0565
--- /dev/null
+++ b/src/tools/rbd_mirror/LeaderWatcher.h
@@ -0,0 +1,320 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_LEADER_WATCHER_H
+#define CEPH_RBD_MIRROR_LEADER_WATCHER_H
+
+#include <list>
+#include <memory>
+#include <string>
+
+#include "common/AsyncOpTracker.h"
+#include "librbd/ManagedLock.h"
+#include "librbd/Watcher.h"
+#include "librbd/managed_lock/Types.h"
+#include "librbd/watcher/Types.h"
+#include "Instances.h"
+#include "MirrorStatusWatcher.h"
+#include "tools/rbd_mirror/instances/Types.h"
+#include "tools/rbd_mirror/leader_watcher/Types.h"
+
+namespace librbd { class ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+
+template <typename> struct Threads;
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class LeaderWatcher : protected librbd::Watcher {
+ using librbd::Watcher::unregister_watch; // Silence overloaded virtual warning
+public:
+ static LeaderWatcher* create(Threads<ImageCtxT> *threads,
+ librados::IoCtx &io_ctx,
+ leader_watcher::Listener *listener) {
+ return new LeaderWatcher(threads, io_ctx, listener);
+ }
+
+ LeaderWatcher(Threads<ImageCtxT> *threads, librados::IoCtx &io_ctx,
+ leader_watcher::Listener *listener);
+ ~LeaderWatcher() override;
+
+ int init();
+ void shut_down();
+
+ void init(Context *on_finish);
+ void shut_down(Context *on_finish);
+
+ bool is_blacklisted() const;
+ bool is_leader() const;
+ bool is_releasing_leader() const;
+ bool get_leader_instance_id(std::string *instance_id) const;
+ void release_leader();
+ void list_instances(std::vector<std::string> *instance_ids);
+
+ std::string get_instance_id();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <uninitialized> <------------------------------ WAIT_FOR_TASKS
+ * | (init) ^ ^
+ * v * |
+ * CREATE_OBJECT * * * * * (error) UNREGISTER_WATCH
+ * | * ^
+ * v * |
+ * REGISTER_WATCH * * * * * SHUT_DOWN_STATUS_WATCHER
+ * | * ^
+ * v * |
+ * INIT_STATUS_WATCHER * * SHUT_DOWN_LEADER_LOCK
+ * | |
+ * | (no leader heartbeat and acquire failed) |
+ * | BREAK_LOCK <-------------------------------------\ |
+ * | | (no leader heartbeat) | | (shut down)
+ * | | /----------------------------------------\ | |
+ * | | | (lock_released received) | |
+ * | | | /-------------------------------------\ | |
+ * | | | | (lock_acquired or | | |
+ * | | | | heartbeat received) | | |
+ * | | | | (ENOENT) /-----------\ | | |
+ * | | | | * * * * * * * * * * | | | | |
+ * v v v v v (error) * v | | | |
+ * ACQUIRE_LEADER_LOCK * * * * *> GET_LOCKER ---> <secondary>
+ * | * ^
+ * ....|...................*.................... .....|.....................
+ * . v * . . | post_release .
+ * .INIT_INSTANCES * * * * * . .NOTIFY_LOCK_RELEASED .
+ * . | . .....^.....................
+ * . v . |
+ * .NOTIFY_LISTENER . RELEASE_LEADER_LOCK
+ * . | . ^
+ * . v . .....|.....................
+ * .NOTIFY_LOCK_ACQUIRED . . | .
+ * . | post_acquire . .SHUT_DOWN_INSTANCES .
+ * ....|........................................ . ^ .
+ * v . | .
+ * <leader> -----------------------------------> .NOTIFY_LISTENER .
+ * (shut_down, release_leader, . pre_release .
+ * notify error) ...........................
+ * @endverbatim
+ */
+
+ struct InstancesListener : public instances::Listener {
+ LeaderWatcher* leader_watcher;
+
+ InstancesListener(LeaderWatcher* leader_watcher)
+ : leader_watcher(leader_watcher) {
+ }
+
+ void handle_added(const InstanceIds& instance_ids) override {
+ leader_watcher->m_listener->handle_instances_added(instance_ids);
+ }
+
+ void handle_removed(const InstanceIds& instance_ids) override {
+ leader_watcher->m_listener->handle_instances_removed(instance_ids);
+ }
+ };
+
+ class LeaderLock : public librbd::ManagedLock<ImageCtxT> {
+ public:
+ typedef librbd::ManagedLock<ImageCtxT> Parent;
+
+ LeaderLock(librados::IoCtx& ioctx, ContextWQ *work_queue,
+ const std::string& oid, LeaderWatcher *watcher,
+ bool blacklist_on_break_lock,
+ uint32_t blacklist_expire_seconds)
+ : Parent(ioctx, work_queue, oid, watcher, librbd::managed_lock::EXCLUSIVE,
+ blacklist_on_break_lock, blacklist_expire_seconds),
+ watcher(watcher) {
+ }
+
+ bool is_leader() const {
+ Mutex::Locker locker(Parent::m_lock);
+ return Parent::is_state_post_acquiring() || Parent::is_state_locked();
+ }
+
+ bool is_releasing_leader() const {
+ Mutex::Locker locker(Parent::m_lock);
+ return Parent::is_state_pre_releasing();
+ }
+
+ protected:
+ void post_acquire_lock_handler(int r, Context *on_finish) {
+ if (r == 0) {
+ // lock is owned at this point
+ Mutex::Locker locker(Parent::m_lock);
+ Parent::set_state_post_acquiring();
+ }
+ watcher->handle_post_acquire_leader_lock(r, on_finish);
+ }
+ void pre_release_lock_handler(bool shutting_down,
+ Context *on_finish) {
+ watcher->handle_pre_release_leader_lock(on_finish);
+ }
+ void post_release_lock_handler(bool shutting_down, int r,
+ Context *on_finish) {
+ watcher->handle_post_release_leader_lock(r, on_finish);
+ }
+ private:
+ LeaderWatcher *watcher;
+ };
+
+ struct HandlePayloadVisitor : public boost::static_visitor<void> {
+ LeaderWatcher *leader_watcher;
+ Context *on_notify_ack;
+
+ HandlePayloadVisitor(LeaderWatcher *leader_watcher, Context *on_notify_ack)
+ : leader_watcher(leader_watcher), on_notify_ack(on_notify_ack) {
+ }
+
+ template <typename Payload>
+ inline void operator()(const Payload &payload) const {
+ leader_watcher->handle_payload(payload, on_notify_ack);
+ }
+ };
+
+ struct C_GetLocker : public Context {
+ LeaderWatcher *leader_watcher;
+ librbd::managed_lock::Locker locker;
+
+ C_GetLocker(LeaderWatcher *leader_watcher)
+ : leader_watcher(leader_watcher) {
+ }
+
+ void finish(int r) override {
+ leader_watcher->handle_get_locker(r, locker);
+ }
+ };
+
+ typedef void (LeaderWatcher<ImageCtxT>::*TimerCallback)();
+
+ struct C_TimerGate : public Context {
+ LeaderWatcher *leader_watcher;
+
+ bool leader = false;
+ TimerCallback timer_callback = nullptr;
+
+ C_TimerGate(LeaderWatcher *leader_watcher)
+ : leader_watcher(leader_watcher) {
+ }
+
+ void finish(int r) override {
+ leader_watcher->m_timer_gate = nullptr;
+ leader_watcher->execute_timer_task(leader, timer_callback);
+ }
+ };
+
+ Threads<ImageCtxT> *m_threads;
+ leader_watcher::Listener *m_listener;
+
+ InstancesListener m_instances_listener;
+ mutable Mutex m_lock;
+ uint64_t m_notifier_id;
+ std::string m_instance_id;
+ LeaderLock *m_leader_lock;
+ Context *m_on_finish = nullptr;
+ Context *m_on_shut_down_finish = nullptr;
+ uint64_t m_acquire_attempts = 0;
+ int m_ret_val = 0;
+ MirrorStatusWatcher<ImageCtxT> *m_status_watcher = nullptr;
+ Instances<ImageCtxT> *m_instances = nullptr;
+ librbd::managed_lock::Locker m_locker;
+
+ bool m_blacklisted = false;
+
+ AsyncOpTracker m_timer_op_tracker;
+ Context *m_timer_task = nullptr;
+ C_TimerGate *m_timer_gate = nullptr;
+
+ librbd::watcher::NotifyResponse m_heartbeat_response;
+
+ bool is_leader(Mutex &m_lock) const;
+ bool is_releasing_leader(Mutex &m_lock) const;
+
+ void cancel_timer_task();
+ void schedule_timer_task(const std::string &name,
+ int delay_factor, bool leader,
+ TimerCallback callback, bool shutting_down);
+ void execute_timer_task(bool leader, TimerCallback timer_callback);
+
+ void create_leader_object();
+ void handle_create_leader_object(int r);
+
+ void register_watch();
+ void handle_register_watch(int r);
+
+ void shut_down_leader_lock();
+ void handle_shut_down_leader_lock(int r);
+
+ void unregister_watch();
+ void handle_unregister_watch(int r);
+
+ void wait_for_tasks();
+ void handle_wait_for_tasks();
+
+ void break_leader_lock();
+ void handle_break_leader_lock(int r);
+
+ void schedule_get_locker(bool reset_leader, uint32_t delay_factor);
+ void get_locker();
+ void handle_get_locker(int r, librbd::managed_lock::Locker& locker);
+
+ void schedule_acquire_leader_lock(uint32_t delay_factor);
+ void acquire_leader_lock();
+ void handle_acquire_leader_lock(int r);
+
+ void release_leader_lock();
+ void handle_release_leader_lock(int r);
+
+ void init_status_watcher();
+ void handle_init_status_watcher(int r);
+
+ void shut_down_status_watcher();
+ void handle_shut_down_status_watcher(int r);
+
+ void init_instances();
+ void handle_init_instances(int r);
+
+ void shut_down_instances();
+ void handle_shut_down_instances(int r);
+
+ void notify_listener();
+ void handle_notify_listener(int r);
+
+ void notify_lock_acquired();
+ void handle_notify_lock_acquired(int r);
+
+ void notify_lock_released();
+ void handle_notify_lock_released(int r);
+
+ void notify_heartbeat();
+ void handle_notify_heartbeat(int r);
+
+ void handle_post_acquire_leader_lock(int r, Context *on_finish);
+ void handle_pre_release_leader_lock(Context *on_finish);
+ void handle_post_release_leader_lock(int r, Context *on_finish);
+
+ void handle_notify(uint64_t notify_id, uint64_t handle,
+ uint64_t notifier_id, bufferlist &bl) override;
+
+ void handle_rewatch_complete(int r) override;
+
+ void handle_heartbeat(Context *on_ack);
+ void handle_lock_acquired(Context *on_ack);
+ void handle_lock_released(Context *on_ack);
+
+ void handle_payload(const leader_watcher::HeartbeatPayload &payload,
+ Context *on_notify_ack);
+ void handle_payload(const leader_watcher::LockAcquiredPayload &payload,
+ Context *on_notify_ack);
+ void handle_payload(const leader_watcher::LockReleasedPayload &payload,
+ Context *on_notify_ack);
+ void handle_payload(const leader_watcher::UnknownPayload &payload,
+ Context *on_notify_ack);
+};
+
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_LEADER_WATCHER_H
diff --git a/src/tools/rbd_mirror/Mirror.cc b/src/tools/rbd_mirror/Mirror.cc
new file mode 100644
index 00000000..ef18a0b6
--- /dev/null
+++ b/src/tools/rbd_mirror/Mirror.cc
@@ -0,0 +1,448 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <signal.h>
+
+#include <boost/range/adaptor/map.hpp>
+
+#include "common/Formatter.h"
+#include "common/admin_socket.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "librbd/ImageCtx.h"
+#include "Mirror.h"
+#include "ServiceDaemon.h"
+#include "Threads.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::Mirror: " << this << " " \
+ << __func__ << ": "
+
+using std::list;
+using std::map;
+using std::set;
+using std::string;
+using std::unique_ptr;
+using std::vector;
+
+using librados::Rados;
+using librados::IoCtx;
+using librbd::mirror_peer_t;
+
+namespace rbd {
+namespace mirror {
+
+namespace {
+
+class MirrorAdminSocketCommand {
+public:
+ virtual ~MirrorAdminSocketCommand() {}
+ virtual bool call(Formatter *f, stringstream *ss) = 0;
+};
+
+class StatusCommand : public MirrorAdminSocketCommand {
+public:
+ explicit StatusCommand(Mirror *mirror) : mirror(mirror) {}
+
+ bool call(Formatter *f, stringstream *ss) override {
+ mirror->print_status(f, ss);
+ return true;
+ }
+
+private:
+ Mirror *mirror;
+};
+
+class StartCommand : public MirrorAdminSocketCommand {
+public:
+ explicit StartCommand(Mirror *mirror) : mirror(mirror) {}
+
+ bool call(Formatter *f, stringstream *ss) override {
+ mirror->start();
+ return true;
+ }
+
+private:
+ Mirror *mirror;
+};
+
+class StopCommand : public MirrorAdminSocketCommand {
+public:
+ explicit StopCommand(Mirror *mirror) : mirror(mirror) {}
+
+ bool call(Formatter *f, stringstream *ss) override {
+ mirror->stop();
+ return true;
+ }
+
+private:
+ Mirror *mirror;
+};
+
+class RestartCommand : public MirrorAdminSocketCommand {
+public:
+ explicit RestartCommand(Mirror *mirror) : mirror(mirror) {}
+
+ bool call(Formatter *f, stringstream *ss) override {
+ mirror->restart();
+ return true;
+ }
+
+private:
+ Mirror *mirror;
+};
+
+class FlushCommand : public MirrorAdminSocketCommand {
+public:
+ explicit FlushCommand(Mirror *mirror) : mirror(mirror) {}
+
+ bool call(Formatter *f, stringstream *ss) override {
+ mirror->flush();
+ return true;
+ }
+
+private:
+ Mirror *mirror;
+};
+
+class LeaderReleaseCommand : public MirrorAdminSocketCommand {
+public:
+ explicit LeaderReleaseCommand(Mirror *mirror) : mirror(mirror) {}
+
+ bool call(Formatter *f, stringstream *ss) override {
+ mirror->release_leader();
+ return true;
+ }
+
+private:
+ Mirror *mirror;
+};
+
+} // anonymous namespace
+
+class MirrorAdminSocketHook : public AdminSocketHook {
+public:
+ MirrorAdminSocketHook(CephContext *cct, Mirror *mirror) :
+ admin_socket(cct->get_admin_socket()) {
+ std::string command;
+ int r;
+
+ command = "rbd mirror status";
+ r = admin_socket->register_command(command, command, this,
+ "get status for rbd mirror");
+ if (r == 0) {
+ commands[command] = new StatusCommand(mirror);
+ }
+
+ command = "rbd mirror start";
+ r = admin_socket->register_command(command, command, this,
+ "start rbd mirror");
+ if (r == 0) {
+ commands[command] = new StartCommand(mirror);
+ }
+
+ command = "rbd mirror stop";
+ r = admin_socket->register_command(command, command, this,
+ "stop rbd mirror");
+ if (r == 0) {
+ commands[command] = new StopCommand(mirror);
+ }
+
+ command = "rbd mirror restart";
+ r = admin_socket->register_command(command, command, this,
+ "restart rbd mirror");
+ if (r == 0) {
+ commands[command] = new RestartCommand(mirror);
+ }
+
+ command = "rbd mirror flush";
+ r = admin_socket->register_command(command, command, this,
+ "flush rbd mirror");
+ if (r == 0) {
+ commands[command] = new FlushCommand(mirror);
+ }
+
+ command = "rbd mirror leader release";
+ r = admin_socket->register_command(command, command, this,
+ "release rbd mirror leader");
+ if (r == 0) {
+ commands[command] = new LeaderReleaseCommand(mirror);
+ }
+ }
+
+ ~MirrorAdminSocketHook() override {
+ for (Commands::const_iterator i = commands.begin(); i != commands.end();
+ ++i) {
+ (void)admin_socket->unregister_command(i->first);
+ delete i->second;
+ }
+ }
+
+ bool call(std::string_view command, const cmdmap_t& cmdmap,
+ std::string_view format, bufferlist& out) override {
+ Commands::const_iterator i = commands.find(command);
+ ceph_assert(i != commands.end());
+ Formatter *f = Formatter::create(format);
+ stringstream ss;
+ bool r = i->second->call(f, &ss);
+ delete f;
+ out.append(ss);
+ return r;
+ }
+
+private:
+ typedef std::map<std::string, MirrorAdminSocketCommand*, std::less<>> Commands;
+
+ AdminSocket *admin_socket;
+ Commands commands;
+};
+
+Mirror::Mirror(CephContext *cct, const std::vector<const char*> &args) :
+ m_cct(cct),
+ m_args(args),
+ m_lock("rbd::mirror::Mirror"),
+ m_local(new librados::Rados()),
+ m_asok_hook(new MirrorAdminSocketHook(cct, this))
+{
+ m_threads =
+ &(cct->lookup_or_create_singleton_object<Threads<librbd::ImageCtx>>(
+ "rbd_mirror::threads", false, cct));
+ m_service_daemon.reset(new ServiceDaemon<>(m_cct, m_local, m_threads));
+}
+
+Mirror::~Mirror()
+{
+ delete m_asok_hook;
+}
+
+void Mirror::handle_signal(int signum)
+{
+ dout(20) << signum << dendl;
+
+ Mutex::Locker l(m_lock);
+
+ switch (signum) {
+ case SIGHUP:
+ for (auto &it : m_pool_replayers) {
+ it.second->reopen_logs();
+ }
+ g_ceph_context->reopen_logs();
+ break;
+
+ case SIGINT:
+ case SIGTERM:
+ m_stopping = true;
+ m_cond.Signal();
+ break;
+
+ default:
+ ceph_abort_msgf("unexpected signal %d", signum);
+ }
+}
+
+int Mirror::init()
+{
+ int r = m_local->init_with_context(m_cct);
+ if (r < 0) {
+ derr << "could not initialize rados handle" << dendl;
+ return r;
+ }
+
+ r = m_local->connect();
+ if (r < 0) {
+ derr << "error connecting to local cluster" << dendl;
+ return r;
+ }
+
+ r = m_service_daemon->init();
+ if (r < 0) {
+ derr << "error registering service daemon: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ m_local_cluster_watcher.reset(new ClusterWatcher(m_local, m_lock,
+ m_service_daemon.get()));
+ return r;
+}
+
+void Mirror::run()
+{
+ dout(20) << "enter" << dendl;
+ while (!m_stopping) {
+ m_local_cluster_watcher->refresh_pools();
+ Mutex::Locker l(m_lock);
+ if (!m_manual_stop) {
+ update_pool_replayers(m_local_cluster_watcher->get_pool_peers());
+ }
+ m_cond.WaitInterval(
+ m_lock,
+ utime_t(m_cct->_conf.get_val<uint64_t>("rbd_mirror_pool_replayers_refresh_interval"), 0));
+ }
+
+ // stop all pool replayers in parallel
+ Mutex::Locker locker(m_lock);
+ for (auto &pool_replayer : m_pool_replayers) {
+ pool_replayer.second->stop(false);
+ }
+ dout(20) << "return" << dendl;
+}
+
+void Mirror::print_status(Formatter *f, stringstream *ss)
+{
+ dout(20) << "enter" << dendl;
+
+ Mutex::Locker l(m_lock);
+
+ if (m_stopping) {
+ return;
+ }
+
+ if (f) {
+ f->open_object_section("mirror_status");
+ f->open_array_section("pool_replayers");
+ };
+
+ for (auto &pool_replayer : m_pool_replayers) {
+ pool_replayer.second->print_status(f, ss);
+ }
+
+ if (f) {
+ f->close_section();
+ f->close_section();
+ f->flush(*ss);
+ }
+}
+
+void Mirror::start()
+{
+ dout(20) << "enter" << dendl;
+ Mutex::Locker l(m_lock);
+
+ if (m_stopping) {
+ return;
+ }
+
+ m_manual_stop = false;
+
+ for (auto &pool_replayer : m_pool_replayers) {
+ pool_replayer.second->start();
+ }
+}
+
+void Mirror::stop()
+{
+ dout(20) << "enter" << dendl;
+ Mutex::Locker l(m_lock);
+
+ if (m_stopping) {
+ return;
+ }
+
+ m_manual_stop = true;
+
+ for (auto &pool_replayer : m_pool_replayers) {
+ pool_replayer.second->stop(true);
+ }
+}
+
+void Mirror::restart()
+{
+ dout(20) << "enter" << dendl;
+ Mutex::Locker l(m_lock);
+
+ if (m_stopping) {
+ return;
+ }
+
+ m_manual_stop = false;
+
+ for (auto &pool_replayer : m_pool_replayers) {
+ pool_replayer.second->restart();
+ }
+}
+
+void Mirror::flush()
+{
+ dout(20) << "enter" << dendl;
+ Mutex::Locker l(m_lock);
+
+ if (m_stopping || m_manual_stop) {
+ return;
+ }
+
+ for (auto &pool_replayer : m_pool_replayers) {
+ pool_replayer.second->flush();
+ }
+}
+
+void Mirror::release_leader()
+{
+ dout(20) << "enter" << dendl;
+ Mutex::Locker l(m_lock);
+
+ if (m_stopping) {
+ return;
+ }
+
+ for (auto &pool_replayer : m_pool_replayers) {
+ pool_replayer.second->release_leader();
+ }
+}
+
+void Mirror::update_pool_replayers(const PoolPeers &pool_peers)
+{
+ dout(20) << "enter" << dendl;
+ ceph_assert(m_lock.is_locked());
+
+ // remove stale pool replayers before creating new pool replayers
+ for (auto it = m_pool_replayers.begin(); it != m_pool_replayers.end();) {
+ auto &peer = it->first.second;
+ auto pool_peer_it = pool_peers.find(it->first.first);
+ if (pool_peer_it == pool_peers.end() ||
+ pool_peer_it->second.find(peer) == pool_peer_it->second.end()) {
+ dout(20) << "removing pool replayer for " << peer << dendl;
+ // TODO: make async
+ it->second->shut_down();
+ it = m_pool_replayers.erase(it);
+ } else {
+ ++it;
+ }
+ }
+
+ for (auto &kv : pool_peers) {
+ for (auto &peer : kv.second) {
+ PoolPeer pool_peer(kv.first, peer);
+
+ auto pool_replayers_it = m_pool_replayers.find(pool_peer);
+ if (pool_replayers_it != m_pool_replayers.end()) {
+ auto& pool_replayer = pool_replayers_it->second;
+ if (pool_replayer->is_blacklisted()) {
+ derr << "restarting blacklisted pool replayer for " << peer << dendl;
+ // TODO: make async
+ pool_replayer->shut_down();
+ pool_replayer->init();
+ } else if (!pool_replayer->is_running()) {
+ derr << "restarting failed pool replayer for " << peer << dendl;
+ // TODO: make async
+ pool_replayer->shut_down();
+ pool_replayer->init();
+ }
+ } else {
+ dout(20) << "starting pool replayer for " << peer << dendl;
+ unique_ptr<PoolReplayer<>> pool_replayer(new PoolReplayer<>(
+ m_threads, m_service_daemon.get(), kv.first, peer, m_args));
+
+ // TODO: make async
+ pool_replayer->init();
+ m_pool_replayers.emplace(pool_peer, std::move(pool_replayer));
+ }
+ }
+
+ // TODO currently only support a single peer
+ }
+}
+
+} // namespace mirror
+} // namespace rbd
diff --git a/src/tools/rbd_mirror/Mirror.h b/src/tools/rbd_mirror/Mirror.h
new file mode 100644
index 00000000..153c0bc5
--- /dev/null
+++ b/src/tools/rbd_mirror/Mirror.h
@@ -0,0 +1,77 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_H
+#define CEPH_RBD_MIRROR_H
+
+#include "common/ceph_context.h"
+#include "common/Mutex.h"
+#include "include/rados/librados.hpp"
+#include "ClusterWatcher.h"
+#include "PoolReplayer.h"
+#include "tools/rbd_mirror/Types.h"
+
+#include <set>
+#include <map>
+#include <memory>
+#include <atomic>
+
+namespace librbd { struct ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+
+template <typename> struct ServiceDaemon;
+template <typename> struct Threads;
+class MirrorAdminSocketHook;
+
+/**
+ * Contains the main loop and overall state for rbd-mirror.
+ *
+ * Sets up mirroring, and coordinates between noticing config
+ * changes and applying them.
+ */
+class Mirror {
+public:
+ Mirror(CephContext *cct, const std::vector<const char*> &args);
+ Mirror(const Mirror&) = delete;
+ Mirror& operator=(const Mirror&) = delete;
+ ~Mirror();
+
+ int init();
+ void run();
+ void handle_signal(int signum);
+
+ void print_status(Formatter *f, stringstream *ss);
+ void start();
+ void stop();
+ void restart();
+ void flush();
+ void release_leader();
+
+private:
+ typedef ClusterWatcher::PoolPeers PoolPeers;
+ typedef std::pair<int64_t, PeerSpec> PoolPeer;
+
+ void update_pool_replayers(const PoolPeers &pool_peers);
+
+ CephContext *m_cct;
+ std::vector<const char*> m_args;
+ Threads<librbd::ImageCtx> *m_threads = nullptr;
+ Mutex m_lock;
+ Cond m_cond;
+ RadosRef m_local;
+ std::unique_ptr<ServiceDaemon<librbd::ImageCtx>> m_service_daemon;
+
+ // monitor local cluster for config changes in peers
+ std::unique_ptr<ClusterWatcher> m_local_cluster_watcher;
+ std::map<PoolPeer, std::unique_ptr<PoolReplayer<>>> m_pool_replayers;
+ std::atomic<bool> m_stopping = { false };
+ bool m_manual_stop = false;
+ MirrorAdminSocketHook *m_asok_hook;
+};
+
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_H
diff --git a/src/tools/rbd_mirror/MirrorStatusWatcher.cc b/src/tools/rbd_mirror/MirrorStatusWatcher.cc
new file mode 100644
index 00000000..b935bc5c
--- /dev/null
+++ b/src/tools/rbd_mirror/MirrorStatusWatcher.cc
@@ -0,0 +1,74 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "MirrorStatusWatcher.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "librbd/Utils.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::MirrorStatusWatcher: " \
+ << this << " " << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+
+using librbd::util::create_rados_callback;
+
+template <typename I>
+MirrorStatusWatcher<I>::MirrorStatusWatcher(librados::IoCtx &io_ctx,
+ ContextWQ *work_queue)
+ : Watcher(io_ctx, work_queue, RBD_MIRRORING) {
+}
+
+template <typename I>
+MirrorStatusWatcher<I>::~MirrorStatusWatcher() {
+}
+
+template <typename I>
+void MirrorStatusWatcher<I>::init(Context *on_finish) {
+ dout(20) << dendl;
+
+ on_finish = new FunctionContext(
+ [this, on_finish] (int r) {
+ if (r < 0) {
+ derr << "error removing down statuses: " << cpp_strerror(r) << dendl;
+ on_finish->complete(r);
+ return;
+ }
+ register_watch(on_finish);
+ });
+
+ librados::ObjectWriteOperation op;
+ librbd::cls_client::mirror_image_status_remove_down(&op);
+ librados::AioCompletion *aio_comp = create_rados_callback(on_finish);
+
+ int r = m_ioctx.aio_operate(RBD_MIRRORING, aio_comp, &op);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void MirrorStatusWatcher<I>::shut_down(Context *on_finish) {
+ dout(20) << dendl;
+
+ unregister_watch(on_finish);
+}
+
+template <typename I>
+void MirrorStatusWatcher<I>::handle_notify(uint64_t notify_id, uint64_t handle,
+ uint64_t notifier_id,
+ bufferlist &bl) {
+ dout(20) << dendl;
+
+ bufferlist out;
+ acknowledge_notify(notify_id, handle, out);
+}
+
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::MirrorStatusWatcher<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/MirrorStatusWatcher.h b/src/tools/rbd_mirror/MirrorStatusWatcher.h
new file mode 100644
index 00000000..155f8cc8
--- /dev/null
+++ b/src/tools/rbd_mirror/MirrorStatusWatcher.h
@@ -0,0 +1,39 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_MIRROR_STATUS_WATCHER_H
+#define CEPH_RBD_MIRROR_MIRROR_STATUS_WATCHER_H
+
+#include "librbd/Watcher.h"
+
+namespace librbd { class ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class MirrorStatusWatcher : protected librbd::Watcher {
+public:
+ static MirrorStatusWatcher *create(librados::IoCtx &io_ctx,
+ ContextWQ *work_queue) {
+ return new MirrorStatusWatcher(io_ctx, work_queue);
+ }
+ void destroy() {
+ delete this;
+ }
+
+ MirrorStatusWatcher(librados::IoCtx &io_ctx, ContextWQ *work_queue);
+ ~MirrorStatusWatcher() override;
+
+ void init(Context *on_finish);
+ void shut_down(Context *on_finish);
+
+protected:
+ void handle_notify(uint64_t notify_id, uint64_t handle,
+ uint64_t notifier_id, bufferlist &bl) override;
+};
+
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_MIRROR_STATUS_WATCHER_H
diff --git a/src/tools/rbd_mirror/PoolReplayer.cc b/src/tools/rbd_mirror/PoolReplayer.cc
new file mode 100644
index 00000000..35d32eb5
--- /dev/null
+++ b/src/tools/rbd_mirror/PoolReplayer.cc
@@ -0,0 +1,1133 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "PoolReplayer.h"
+#include <boost/bind.hpp>
+#include "common/Formatter.h"
+#include "common/admin_socket.h"
+#include "common/ceph_argparse.h"
+#include "common/code_environment.h"
+#include "common/common_init.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "include/stringify.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "global/global_context.h"
+#include "librbd/internal.h"
+#include "librbd/Utils.h"
+#include "librbd/Watcher.h"
+#include "librbd/api/Config.h"
+#include "librbd/api/Mirror.h"
+#include "ImageMap.h"
+#include "InstanceReplayer.h"
+#include "InstanceWatcher.h"
+#include "LeaderWatcher.h"
+#include "ServiceDaemon.h"
+#include "Threads.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::PoolReplayer: " \
+ << this << " " << __func__ << ": "
+
+using std::chrono::seconds;
+using std::map;
+using std::string;
+using std::unique_ptr;
+using std::vector;
+
+using librbd::cls_client::dir_get_name;
+using librbd::util::create_async_context_callback;
+
+namespace rbd {
+namespace mirror {
+
+using ::operator<<;
+
+namespace {
+
+const std::string SERVICE_DAEMON_INSTANCE_ID_KEY("instance_id");
+const std::string SERVICE_DAEMON_LEADER_KEY("leader");
+const std::string SERVICE_DAEMON_LOCAL_COUNT_KEY("image_local_count");
+const std::string SERVICE_DAEMON_REMOTE_COUNT_KEY("image_remote_count");
+
+const std::vector<std::string> UNIQUE_PEER_CONFIG_KEYS {
+ {"monmap", "mon_host", "mon_dns_srv_name", "key", "keyfile", "keyring"}};
+
+template <typename I>
+class PoolReplayerAdminSocketCommand {
+public:
+ PoolReplayerAdminSocketCommand(PoolReplayer<I> *pool_replayer)
+ : pool_replayer(pool_replayer) {
+ }
+ virtual ~PoolReplayerAdminSocketCommand() {}
+ virtual bool call(Formatter *f, stringstream *ss) = 0;
+protected:
+ PoolReplayer<I> *pool_replayer;
+};
+
+template <typename I>
+class StatusCommand : public PoolReplayerAdminSocketCommand<I> {
+public:
+ explicit StatusCommand(PoolReplayer<I> *pool_replayer)
+ : PoolReplayerAdminSocketCommand<I>(pool_replayer) {
+ }
+
+ bool call(Formatter *f, stringstream *ss) override {
+ this->pool_replayer->print_status(f, ss);
+ return true;
+ }
+};
+
+template <typename I>
+class StartCommand : public PoolReplayerAdminSocketCommand<I> {
+public:
+ explicit StartCommand(PoolReplayer<I> *pool_replayer)
+ : PoolReplayerAdminSocketCommand<I>(pool_replayer) {
+ }
+
+ bool call(Formatter *f, stringstream *ss) override {
+ this->pool_replayer->start();
+ return true;
+ }
+};
+
+template <typename I>
+class StopCommand : public PoolReplayerAdminSocketCommand<I> {
+public:
+ explicit StopCommand(PoolReplayer<I> *pool_replayer)
+ : PoolReplayerAdminSocketCommand<I>(pool_replayer) {
+ }
+
+ bool call(Formatter *f, stringstream *ss) override {
+ this->pool_replayer->stop(true);
+ return true;
+ }
+};
+
+template <typename I>
+class RestartCommand : public PoolReplayerAdminSocketCommand<I> {
+public:
+ explicit RestartCommand(PoolReplayer<I> *pool_replayer)
+ : PoolReplayerAdminSocketCommand<I>(pool_replayer) {
+ }
+
+ bool call(Formatter *f, stringstream *ss) override {
+ this->pool_replayer->restart();
+ return true;
+ }
+};
+
+template <typename I>
+class FlushCommand : public PoolReplayerAdminSocketCommand<I> {
+public:
+ explicit FlushCommand(PoolReplayer<I> *pool_replayer)
+ : PoolReplayerAdminSocketCommand<I>(pool_replayer) {
+ }
+
+ bool call(Formatter *f, stringstream *ss) override {
+ this->pool_replayer->flush();
+ return true;
+ }
+};
+
+template <typename I>
+class LeaderReleaseCommand : public PoolReplayerAdminSocketCommand<I> {
+public:
+ explicit LeaderReleaseCommand(PoolReplayer<I> *pool_replayer)
+ : PoolReplayerAdminSocketCommand<I>(pool_replayer) {
+ }
+
+ bool call(Formatter *f, stringstream *ss) override {
+ this->pool_replayer->release_leader();
+ return true;
+ }
+};
+
+template <typename I>
+class PoolReplayerAdminSocketHook : public AdminSocketHook {
+public:
+ PoolReplayerAdminSocketHook(CephContext *cct, const std::string &name,
+ PoolReplayer<I> *pool_replayer)
+ : admin_socket(cct->get_admin_socket()) {
+ std::string command;
+ int r;
+
+ command = "rbd mirror status " + name;
+ r = admin_socket->register_command(command, command, this,
+ "get status for rbd mirror " + name);
+ if (r == 0) {
+ commands[command] = new StatusCommand<I>(pool_replayer);
+ }
+
+ command = "rbd mirror start " + name;
+ r = admin_socket->register_command(command, command, this,
+ "start rbd mirror " + name);
+ if (r == 0) {
+ commands[command] = new StartCommand<I>(pool_replayer);
+ }
+
+ command = "rbd mirror stop " + name;
+ r = admin_socket->register_command(command, command, this,
+ "stop rbd mirror " + name);
+ if (r == 0) {
+ commands[command] = new StopCommand<I>(pool_replayer);
+ }
+
+ command = "rbd mirror restart " + name;
+ r = admin_socket->register_command(command, command, this,
+ "restart rbd mirror " + name);
+ if (r == 0) {
+ commands[command] = new RestartCommand<I>(pool_replayer);
+ }
+
+ command = "rbd mirror flush " + name;
+ r = admin_socket->register_command(command, command, this,
+ "flush rbd mirror " + name);
+ if (r == 0) {
+ commands[command] = new FlushCommand<I>(pool_replayer);
+ }
+
+ command = "rbd mirror leader release " + name;
+ r = admin_socket->register_command(command, command, this,
+ "release rbd mirror leader " + name);
+ if (r == 0) {
+ commands[command] = new LeaderReleaseCommand<I>(pool_replayer);
+ }
+ }
+
+ ~PoolReplayerAdminSocketHook() override {
+ for (auto i = commands.begin(); i != commands.end(); ++i) {
+ (void)admin_socket->unregister_command(i->first);
+ delete i->second;
+ }
+ }
+
+ bool call(std::string_view command, const cmdmap_t& cmdmap,
+ std::string_view format, bufferlist& out) override {
+ auto i = commands.find(command);
+ ceph_assert(i != commands.end());
+ Formatter *f = Formatter::create(format);
+ stringstream ss;
+ bool r = i->second->call(f, &ss);
+ delete f;
+ out.append(ss);
+ return r;
+ }
+
+private:
+ typedef std::map<std::string, PoolReplayerAdminSocketCommand<I>*,
+ std::less<>> Commands;
+
+ AdminSocket *admin_socket;
+ Commands commands;
+};
+
+} // anonymous namespace
+
+template <typename I>
+PoolReplayer<I>::PoolReplayer(Threads<I> *threads,
+ ServiceDaemon<I>* service_daemon,
+ int64_t local_pool_id, const PeerSpec &peer,
+ const std::vector<const char*> &args) :
+ m_threads(threads),
+ m_service_daemon(service_daemon),
+ m_local_pool_id(local_pool_id),
+ m_peer(peer),
+ m_args(args),
+ m_lock(stringify("rbd::mirror::PoolReplayer ") + stringify(peer)),
+ m_local_pool_watcher_listener(this, true),
+ m_remote_pool_watcher_listener(this, false),
+ m_image_map_listener(this),
+ m_pool_replayer_thread(this),
+ m_leader_listener(this)
+{
+}
+
+template <typename I>
+PoolReplayer<I>::~PoolReplayer()
+{
+ delete m_asok_hook;
+ shut_down();
+}
+
+template <typename I>
+bool PoolReplayer<I>::is_blacklisted() const {
+ Mutex::Locker locker(m_lock);
+ return m_blacklisted;
+}
+
+template <typename I>
+bool PoolReplayer<I>::is_leader() const {
+ Mutex::Locker locker(m_lock);
+ return m_leader_watcher && m_leader_watcher->is_leader();
+}
+
+template <typename I>
+bool PoolReplayer<I>::is_running() const {
+ return m_pool_replayer_thread.is_started();
+}
+
+template <typename I>
+void PoolReplayer<I>::init()
+{
+ Mutex::Locker l(m_lock);
+
+ ceph_assert(!m_pool_replayer_thread.is_started());
+
+ // reset state
+ m_stopping = false;
+ m_blacklisted = false;
+
+ dout(10) << "replaying for " << m_peer << dendl;
+ int r = init_rados(g_ceph_context->_conf->cluster,
+ g_ceph_context->_conf->name.to_str(),
+ "", "", "local cluster", &m_local_rados, false);
+ if (r < 0) {
+ m_callout_id = m_service_daemon->add_or_update_callout(
+ m_local_pool_id, m_callout_id, service_daemon::CALLOUT_LEVEL_ERROR,
+ "unable to connect to local cluster");
+ return;
+ }
+
+ r = init_rados(m_peer.cluster_name, m_peer.client_name,
+ m_peer.mon_host, m_peer.key,
+ std::string("remote peer ") + stringify(m_peer),
+ &m_remote_rados, true);
+ if (r < 0) {
+ m_callout_id = m_service_daemon->add_or_update_callout(
+ m_local_pool_id, m_callout_id, service_daemon::CALLOUT_LEVEL_ERROR,
+ "unable to connect to remote cluster");
+ return;
+ }
+
+ r = m_local_rados->ioctx_create2(m_local_pool_id, m_local_io_ctx);
+ if (r < 0) {
+ derr << "error accessing local pool " << m_local_pool_id << ": "
+ << cpp_strerror(r) << dendl;
+ return;
+ }
+
+ auto cct = reinterpret_cast<CephContext *>(m_local_io_ctx.cct());
+ librbd::api::Config<I>::apply_pool_overrides(m_local_io_ctx, &cct->_conf);
+
+ std::string local_mirror_uuid;
+ r = librbd::cls_client::mirror_uuid_get(&m_local_io_ctx,
+ &local_mirror_uuid);
+ if (r < 0) {
+ derr << "failed to retrieve local mirror uuid from pool "
+ << m_local_io_ctx.get_pool_name() << ": " << cpp_strerror(r) << dendl;
+ m_callout_id = m_service_daemon->add_or_update_callout(
+ m_local_pool_id, m_callout_id, service_daemon::CALLOUT_LEVEL_ERROR,
+ "unable to query local mirror uuid");
+ return;
+ }
+
+ r = m_remote_rados->ioctx_create(m_local_io_ctx.get_pool_name().c_str(),
+ m_remote_io_ctx);
+ if (r < 0) {
+ derr << "error accessing remote pool " << m_local_io_ctx.get_pool_name()
+ << ": " << cpp_strerror(r) << dendl;
+ m_callout_id = m_service_daemon->add_or_update_callout(
+ m_local_pool_id, m_callout_id, service_daemon::CALLOUT_LEVEL_WARNING,
+ "unable to access remote pool");
+ return;
+ }
+
+ dout(10) << "connected to " << m_peer << dendl;
+
+ m_instance_replayer.reset(InstanceReplayer<I>::create(
+ m_threads, m_service_daemon, m_local_rados, local_mirror_uuid,
+ m_local_pool_id));
+ m_instance_replayer->init();
+ m_instance_replayer->add_peer(m_peer.uuid, m_remote_io_ctx);
+
+ m_instance_watcher.reset(InstanceWatcher<I>::create(
+ m_local_io_ctx, m_threads->work_queue, m_instance_replayer.get()));
+ r = m_instance_watcher->init();
+ if (r < 0) {
+ derr << "error initializing instance watcher: " << cpp_strerror(r) << dendl;
+ m_callout_id = m_service_daemon->add_or_update_callout(
+ m_local_pool_id, m_callout_id, service_daemon::CALLOUT_LEVEL_ERROR,
+ "unable to initialize instance messenger object");
+ return;
+ }
+ m_service_daemon->add_or_update_attribute(
+ m_local_pool_id, SERVICE_DAEMON_INSTANCE_ID_KEY,
+ m_instance_watcher->get_instance_id());
+
+ m_leader_watcher.reset(LeaderWatcher<I>::create(m_threads, m_local_io_ctx,
+ &m_leader_listener));
+ r = m_leader_watcher->init();
+ if (r < 0) {
+ derr << "error initializing leader watcher: " << cpp_strerror(r) << dendl;
+ m_callout_id = m_service_daemon->add_or_update_callout(
+ m_local_pool_id, m_callout_id, service_daemon::CALLOUT_LEVEL_ERROR,
+ "unable to initialize leader messenger object");
+ return;
+ }
+
+ if (m_callout_id != service_daemon::CALLOUT_ID_NONE) {
+ m_service_daemon->remove_callout(m_local_pool_id, m_callout_id);
+ m_callout_id = service_daemon::CALLOUT_ID_NONE;
+ }
+
+ m_pool_replayer_thread.create("pool replayer");
+}
+
+template <typename I>
+void PoolReplayer<I>::shut_down() {
+ m_stopping = true;
+ {
+ Mutex::Locker l(m_lock);
+ m_cond.Signal();
+ }
+ if (m_pool_replayer_thread.is_started()) {
+ m_pool_replayer_thread.join();
+ }
+ if (m_leader_watcher) {
+ m_leader_watcher->shut_down();
+ }
+ if (m_instance_watcher) {
+ m_instance_watcher->shut_down();
+ }
+ if (m_instance_replayer) {
+ m_instance_replayer->shut_down();
+ }
+
+ m_leader_watcher.reset();
+ m_instance_watcher.reset();
+ m_instance_replayer.reset();
+
+ ceph_assert(!m_image_map);
+ ceph_assert(!m_image_deleter);
+ ceph_assert(!m_local_pool_watcher);
+ ceph_assert(!m_remote_pool_watcher);
+ m_local_rados.reset();
+ m_remote_rados.reset();
+}
+
+template <typename I>
+int PoolReplayer<I>::init_rados(const std::string &cluster_name,
+ const std::string &client_name,
+ const std::string &mon_host,
+ const std::string &key,
+ const std::string &description,
+ RadosRef *rados_ref,
+ bool strip_cluster_overrides) {
+ // NOTE: manually bootstrap a CephContext here instead of via
+ // the librados API to avoid mixing global singletons between
+ // the librados shared library and the daemon
+ // TODO: eliminate intermingling of global singletons within Ceph APIs
+ CephInitParameters iparams(CEPH_ENTITY_TYPE_CLIENT);
+ if (client_name.empty() || !iparams.name.from_str(client_name)) {
+ derr << "error initializing cluster handle for " << description << dendl;
+ return -EINVAL;
+ }
+
+ CephContext *cct = common_preinit(iparams, CODE_ENVIRONMENT_LIBRARY,
+ CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS);
+ cct->_conf->cluster = cluster_name;
+
+ // librados::Rados::conf_read_file
+ int r = cct->_conf.parse_config_files(nullptr, nullptr, 0);
+ if (r < 0 && r != -ENOENT) {
+ // do not treat this as fatal, it might still be able to connect
+ derr << "could not read ceph conf for " << description << ": "
+ << cpp_strerror(r) << dendl;
+ }
+
+ // preserve cluster-specific config settings before applying environment/cli
+ // overrides
+ std::map<std::string, std::string> config_values;
+ if (strip_cluster_overrides) {
+ // remote peer connections shouldn't apply cluster-specific
+ // configuration settings
+ for (auto& key : UNIQUE_PEER_CONFIG_KEYS) {
+ config_values[key] = cct->_conf.get_val<std::string>(key);
+ }
+ }
+
+ cct->_conf.parse_env(cct->get_module_type());
+
+ // librados::Rados::conf_parse_env
+ std::vector<const char*> args;
+ r = cct->_conf.parse_argv(args);
+ if (r < 0) {
+ derr << "could not parse environment for " << description << ":"
+ << cpp_strerror(r) << dendl;
+ cct->put();
+ return r;
+ }
+ cct->_conf.parse_env(cct->get_module_type());
+
+ if (!m_args.empty()) {
+ // librados::Rados::conf_parse_argv
+ args = m_args;
+ r = cct->_conf.parse_argv(args);
+ if (r < 0) {
+ derr << "could not parse command line args for " << description << ": "
+ << cpp_strerror(r) << dendl;
+ cct->put();
+ return r;
+ }
+ }
+
+ if (strip_cluster_overrides) {
+ // remote peer connections shouldn't apply cluster-specific
+ // configuration settings
+ for (auto& pair : config_values) {
+ auto value = cct->_conf.get_val<std::string>(pair.first);
+ if (pair.second != value) {
+ dout(0) << "reverting global config option override: "
+ << pair.first << ": " << value << " -> " << pair.second
+ << dendl;
+ cct->_conf.set_val_or_die(pair.first, pair.second);
+ }
+ }
+ }
+
+ if (!g_ceph_context->_conf->admin_socket.empty()) {
+ cct->_conf.set_val_or_die("admin_socket",
+ "$run_dir/$name.$pid.$cluster.$cctid.asok");
+ }
+
+ if (!mon_host.empty()) {
+ r = cct->_conf.set_val("mon_host", mon_host);
+ if (r < 0) {
+ derr << "failed to set mon_host config for " << description << ": "
+ << cpp_strerror(r) << dendl;
+ cct->put();
+ return r;
+ }
+ }
+
+ if (!key.empty()) {
+ r = cct->_conf.set_val("key", key);
+ if (r < 0) {
+ derr << "failed to set key config for " << description << ": "
+ << cpp_strerror(r) << dendl;
+ cct->put();
+ return r;
+ }
+ }
+
+ // disable unnecessary librbd cache
+ cct->_conf.set_val_or_die("rbd_cache", "false");
+ cct->_conf.apply_changes(nullptr);
+ cct->_conf.complain_about_parse_errors(cct);
+
+ rados_ref->reset(new librados::Rados());
+
+ r = (*rados_ref)->init_with_context(cct);
+ ceph_assert(r == 0);
+ cct->put();
+
+ r = (*rados_ref)->connect();
+ if (r < 0) {
+ derr << "error connecting to " << description << ": "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+void PoolReplayer<I>::run()
+{
+ dout(20) << "enter" << dendl;
+
+ while (!m_stopping) {
+ std::string asok_hook_name = m_local_io_ctx.get_pool_name() + " " +
+ m_peer.cluster_name;
+ if (m_asok_hook_name != asok_hook_name || m_asok_hook == nullptr) {
+ m_asok_hook_name = asok_hook_name;
+ delete m_asok_hook;
+
+ m_asok_hook = new PoolReplayerAdminSocketHook<I>(g_ceph_context,
+ m_asok_hook_name, this);
+ }
+
+ Mutex::Locker locker(m_lock);
+ if (m_leader_watcher->is_blacklisted() ||
+ m_instance_replayer->is_blacklisted() ||
+ (m_local_pool_watcher && m_local_pool_watcher->is_blacklisted()) ||
+ (m_remote_pool_watcher && m_remote_pool_watcher->is_blacklisted())) {
+ m_blacklisted = true;
+ m_stopping = true;
+ break;
+ }
+
+ if (!m_stopping) {
+ m_cond.WaitInterval(m_lock, utime_t(1, 0));
+ }
+ }
+
+ m_instance_replayer->stop();
+}
+
+template <typename I>
+void PoolReplayer<I>::reopen_logs()
+{
+ Mutex::Locker l(m_lock);
+
+ if (m_local_rados) {
+ reinterpret_cast<CephContext *>(m_local_rados->cct())->reopen_logs();
+ }
+ if (m_remote_rados) {
+ reinterpret_cast<CephContext *>(m_remote_rados->cct())->reopen_logs();
+ }
+}
+
+template <typename I>
+void PoolReplayer<I>::print_status(Formatter *f, stringstream *ss)
+{
+ dout(20) << "enter" << dendl;
+
+ if (!f) {
+ return;
+ }
+
+ Mutex::Locker l(m_lock);
+
+ f->open_object_section("pool_replayer_status");
+ f->dump_stream("peer") << m_peer;
+ if (m_local_io_ctx.is_valid()) {
+ f->dump_string("pool", m_local_io_ctx.get_pool_name());
+ f->dump_stream("instance_id") << m_instance_watcher->get_instance_id();
+ }
+
+ std::string state("running");
+ if (m_manual_stop) {
+ state = "stopped (manual)";
+ } else if (m_stopping) {
+ state = "stopped";
+ }
+ f->dump_string("state", state);
+
+ std::string leader_instance_id;
+ m_leader_watcher->get_leader_instance_id(&leader_instance_id);
+ f->dump_string("leader_instance_id", leader_instance_id);
+
+ bool leader = m_leader_watcher->is_leader();
+ f->dump_bool("leader", leader);
+ if (leader) {
+ std::vector<std::string> instance_ids;
+ m_leader_watcher->list_instances(&instance_ids);
+ f->open_array_section("instances");
+ for (auto instance_id : instance_ids) {
+ f->dump_string("instance_id", instance_id);
+ }
+ f->close_section();
+ }
+
+ f->dump_string("local_cluster_admin_socket",
+ reinterpret_cast<CephContext *>(m_local_io_ctx.cct())->_conf.
+ get_val<std::string>("admin_socket"));
+ f->dump_string("remote_cluster_admin_socket",
+ reinterpret_cast<CephContext *>(m_remote_io_ctx.cct())->_conf.
+ get_val<std::string>("admin_socket"));
+
+ f->open_object_section("sync_throttler");
+ m_instance_watcher->print_sync_status(f, ss);
+ f->close_section();
+
+ m_instance_replayer->print_status(f, ss);
+
+ if (m_image_deleter) {
+ f->open_object_section("image_deleter");
+ m_image_deleter->print_status(f, ss);
+ f->close_section();
+ }
+
+ f->close_section();
+ f->flush(*ss);
+}
+
+template <typename I>
+void PoolReplayer<I>::start()
+{
+ dout(20) << "enter" << dendl;
+
+ Mutex::Locker l(m_lock);
+
+ if (m_stopping) {
+ return;
+ }
+
+ m_manual_stop = false;
+
+ if (m_instance_replayer) {
+ m_instance_replayer->start();
+ }
+}
+
+template <typename I>
+void PoolReplayer<I>::stop(bool manual)
+{
+ dout(20) << "enter: manual=" << manual << dendl;
+
+ Mutex::Locker l(m_lock);
+ if (!manual) {
+ m_stopping = true;
+ m_cond.Signal();
+ return;
+ } else if (m_stopping) {
+ return;
+ }
+
+ m_manual_stop = true;
+
+ if (m_instance_replayer) {
+ m_instance_replayer->stop();
+ }
+}
+
+template <typename I>
+void PoolReplayer<I>::restart()
+{
+ dout(20) << "enter" << dendl;
+
+ Mutex::Locker l(m_lock);
+
+ if (m_stopping) {
+ return;
+ }
+
+ if (m_instance_replayer) {
+ m_instance_replayer->restart();
+ }
+}
+
+template <typename I>
+void PoolReplayer<I>::flush()
+{
+ dout(20) << "enter" << dendl;
+
+ Mutex::Locker l(m_lock);
+
+ if (m_stopping || m_manual_stop) {
+ return;
+ }
+
+ if (m_instance_replayer) {
+ m_instance_replayer->flush();
+ }
+}
+
+template <typename I>
+void PoolReplayer<I>::release_leader()
+{
+ dout(20) << "enter" << dendl;
+
+ Mutex::Locker l(m_lock);
+
+ if (m_stopping || !m_leader_watcher) {
+ return;
+ }
+
+ m_leader_watcher->release_leader();
+}
+
+template <typename I>
+void PoolReplayer<I>::handle_update(const std::string &mirror_uuid,
+ ImageIds &&added_image_ids,
+ ImageIds &&removed_image_ids) {
+ if (m_stopping) {
+ return;
+ }
+
+ dout(10) << "mirror_uuid=" << mirror_uuid << ", "
+ << "added_count=" << added_image_ids.size() << ", "
+ << "removed_count=" << removed_image_ids.size() << dendl;
+ Mutex::Locker locker(m_lock);
+ if (!m_leader_watcher->is_leader()) {
+ return;
+ }
+
+ m_service_daemon->add_or_update_attribute(
+ m_local_pool_id, SERVICE_DAEMON_LOCAL_COUNT_KEY,
+ m_local_pool_watcher->get_image_count());
+ if (m_remote_pool_watcher) {
+ m_service_daemon->add_or_update_attribute(
+ m_local_pool_id, SERVICE_DAEMON_REMOTE_COUNT_KEY,
+ m_remote_pool_watcher->get_image_count());
+ }
+
+ std::set<std::string> added_global_image_ids;
+ for (auto& image_id : added_image_ids) {
+ added_global_image_ids.insert(image_id.global_id);
+ }
+
+ std::set<std::string> removed_global_image_ids;
+ for (auto& image_id : removed_image_ids) {
+ removed_global_image_ids.insert(image_id.global_id);
+ }
+
+ m_image_map->update_images(mirror_uuid,
+ std::move(added_global_image_ids),
+ std::move(removed_global_image_ids));
+}
+
+template <typename I>
+void PoolReplayer<I>::handle_post_acquire_leader(Context *on_finish) {
+ dout(10) << dendl;
+
+ m_service_daemon->add_or_update_attribute(m_local_pool_id,
+ SERVICE_DAEMON_LEADER_KEY, true);
+ m_instance_watcher->handle_acquire_leader();
+ init_image_map(on_finish);
+}
+
+template <typename I>
+void PoolReplayer<I>::handle_pre_release_leader(Context *on_finish) {
+ dout(10) << dendl;
+
+ m_service_daemon->remove_attribute(m_local_pool_id,
+ SERVICE_DAEMON_LEADER_KEY);
+ m_instance_watcher->handle_release_leader();
+ shut_down_image_deleter(on_finish);
+}
+
+template <typename I>
+void PoolReplayer<I>::init_image_map(Context *on_finish) {
+ dout(5) << dendl;
+
+ Mutex::Locker locker(m_lock);
+ ceph_assert(!m_image_map);
+ m_image_map.reset(ImageMap<I>::create(m_local_io_ctx, m_threads,
+ m_instance_watcher->get_instance_id(),
+ m_image_map_listener));
+
+ auto ctx = new FunctionContext([this, on_finish](int r) {
+ handle_init_image_map(r, on_finish);
+ });
+ m_image_map->init(create_async_context_callback(
+ m_threads->work_queue, ctx));
+}
+
+template <typename I>
+void PoolReplayer<I>::handle_init_image_map(int r, Context *on_finish) {
+ dout(5) << "r=" << r << dendl;
+ if (r < 0) {
+ derr << "failed to init image map: " << cpp_strerror(r) << dendl;
+ on_finish = new FunctionContext([on_finish, r](int) {
+ on_finish->complete(r);
+ });
+ shut_down_image_map(on_finish);
+ return;
+ }
+
+ init_local_pool_watcher(on_finish);
+}
+
+template <typename I>
+void PoolReplayer<I>::init_local_pool_watcher(Context *on_finish) {
+ dout(10) << dendl;
+
+ Mutex::Locker locker(m_lock);
+ ceph_assert(!m_local_pool_watcher);
+ m_local_pool_watcher.reset(PoolWatcher<I>::create(
+ m_threads, m_local_io_ctx, m_local_pool_watcher_listener));
+
+ // ensure the initial set of local images is up-to-date
+ // after acquiring the leader role
+ auto ctx = new FunctionContext([this, on_finish](int r) {
+ handle_init_local_pool_watcher(r, on_finish);
+ });
+ m_local_pool_watcher->init(create_async_context_callback(
+ m_threads->work_queue, ctx));
+}
+
+template <typename I>
+void PoolReplayer<I>::handle_init_local_pool_watcher(
+ int r, Context *on_finish) {
+ dout(10) << "r=" << r << dendl;
+ if (r < 0) {
+ derr << "failed to retrieve local images: " << cpp_strerror(r) << dendl;
+ on_finish = new FunctionContext([on_finish, r](int) {
+ on_finish->complete(r);
+ });
+ shut_down_pool_watchers(on_finish);
+ return;
+ }
+
+ init_remote_pool_watcher(on_finish);
+}
+
+template <typename I>
+void PoolReplayer<I>::init_remote_pool_watcher(Context *on_finish) {
+ dout(10) << dendl;
+
+ Mutex::Locker locker(m_lock);
+ ceph_assert(!m_remote_pool_watcher);
+ m_remote_pool_watcher.reset(PoolWatcher<I>::create(
+ m_threads, m_remote_io_ctx, m_remote_pool_watcher_listener));
+
+ auto ctx = new FunctionContext([this, on_finish](int r) {
+ handle_init_remote_pool_watcher(r, on_finish);
+ });
+ m_remote_pool_watcher->init(create_async_context_callback(
+ m_threads->work_queue, ctx));
+}
+
+template <typename I>
+void PoolReplayer<I>::handle_init_remote_pool_watcher(
+ int r, Context *on_finish) {
+ dout(10) << "r=" << r << dendl;
+ if (r == -ENOENT) {
+ // Technically nothing to do since the other side doesn't
+ // have mirroring enabled. Eventually the remote pool watcher will
+ // detect images (if mirroring is enabled), so no point propagating
+ // an error which would just busy-spin the state machines.
+ dout(0) << "remote peer does not have mirroring configured" << dendl;
+ } else if (r < 0) {
+ derr << "failed to retrieve remote images: " << cpp_strerror(r) << dendl;
+ on_finish = new FunctionContext([on_finish, r](int) {
+ on_finish->complete(r);
+ });
+ shut_down_pool_watchers(on_finish);
+ return;
+ }
+
+ init_image_deleter(on_finish);
+}
+
+template <typename I>
+void PoolReplayer<I>::init_image_deleter(Context *on_finish) {
+ dout(10) << dendl;
+
+ Mutex::Locker locker(m_lock);
+ ceph_assert(!m_image_deleter);
+
+ on_finish = new FunctionContext([this, on_finish](int r) {
+ handle_init_image_deleter(r, on_finish);
+ });
+ m_image_deleter.reset(ImageDeleter<I>::create(m_local_io_ctx, m_threads,
+ m_service_daemon));
+ m_image_deleter->init(create_async_context_callback(
+ m_threads->work_queue, on_finish));
+}
+
+template <typename I>
+void PoolReplayer<I>::handle_init_image_deleter(int r, Context *on_finish) {
+ dout(10) << "r=" << r << dendl;
+ if (r < 0) {
+ derr << "failed to init image deleter: " << cpp_strerror(r) << dendl;
+ on_finish = new FunctionContext([on_finish, r](int) {
+ on_finish->complete(r);
+ });
+ shut_down_image_deleter(on_finish);
+ return;
+ }
+
+ on_finish->complete(0);
+
+ Mutex::Locker locker(m_lock);
+ m_cond.Signal();
+}
+
+template <typename I>
+void PoolReplayer<I>::shut_down_image_deleter(Context* on_finish) {
+ dout(10) << dendl;
+ {
+ Mutex::Locker locker(m_lock);
+ if (m_image_deleter) {
+ Context *ctx = new FunctionContext([this, on_finish](int r) {
+ handle_shut_down_image_deleter(r, on_finish);
+ });
+ ctx = create_async_context_callback(m_threads->work_queue, ctx);
+
+ m_image_deleter->shut_down(ctx);
+ return;
+ }
+ }
+ shut_down_pool_watchers(on_finish);
+}
+
+template <typename I>
+void PoolReplayer<I>::handle_shut_down_image_deleter(
+ int r, Context* on_finish) {
+ dout(10) << "r=" << r << dendl;
+
+ {
+ Mutex::Locker locker(m_lock);
+ ceph_assert(m_image_deleter);
+ m_image_deleter.reset();
+ }
+
+ shut_down_pool_watchers(on_finish);
+}
+
+template <typename I>
+void PoolReplayer<I>::shut_down_pool_watchers(Context *on_finish) {
+ dout(10) << dendl;
+
+ {
+ Mutex::Locker locker(m_lock);
+ if (m_local_pool_watcher) {
+ Context *ctx = new FunctionContext([this, on_finish](int r) {
+ handle_shut_down_pool_watchers(r, on_finish);
+ });
+ ctx = create_async_context_callback(m_threads->work_queue, ctx);
+
+ auto gather_ctx = new C_Gather(g_ceph_context, ctx);
+ m_local_pool_watcher->shut_down(gather_ctx->new_sub());
+ if (m_remote_pool_watcher) {
+ m_remote_pool_watcher->shut_down(gather_ctx->new_sub());
+ }
+ gather_ctx->activate();
+ return;
+ }
+ }
+
+ on_finish->complete(0);
+}
+
+template <typename I>
+void PoolReplayer<I>::handle_shut_down_pool_watchers(
+ int r, Context *on_finish) {
+ dout(10) << "r=" << r << dendl;
+
+ {
+ Mutex::Locker locker(m_lock);
+ ceph_assert(m_local_pool_watcher);
+ m_local_pool_watcher.reset();
+
+ if (m_remote_pool_watcher) {
+ m_remote_pool_watcher.reset();
+ }
+ }
+ wait_for_update_ops(on_finish);
+}
+
+template <typename I>
+void PoolReplayer<I>::wait_for_update_ops(Context *on_finish) {
+ dout(10) << dendl;
+
+ Mutex::Locker locker(m_lock);
+
+ Context *ctx = new FunctionContext([this, on_finish](int r) {
+ handle_wait_for_update_ops(r, on_finish);
+ });
+ ctx = create_async_context_callback(m_threads->work_queue, ctx);
+
+ m_update_op_tracker.wait_for_ops(ctx);
+}
+
+template <typename I>
+void PoolReplayer<I>::handle_wait_for_update_ops(int r, Context *on_finish) {
+ dout(10) << "r=" << r << dendl;
+ ceph_assert(r == 0);
+
+ shut_down_image_map(on_finish);
+}
+
+template <typename I>
+void PoolReplayer<I>::shut_down_image_map(Context *on_finish) {
+ dout(5) << dendl;
+
+ {
+ Mutex::Locker locker(m_lock);
+ if (m_image_map) {
+ on_finish = new FunctionContext([this, on_finish](int r) {
+ handle_shut_down_image_map(r, on_finish);
+ });
+ m_image_map->shut_down(create_async_context_callback(
+ m_threads->work_queue, on_finish));
+ return;
+ }
+ }
+
+ on_finish->complete(0);
+}
+
+template <typename I>
+void PoolReplayer<I>::handle_shut_down_image_map(int r, Context *on_finish) {
+ dout(5) << "r=" << r << dendl;
+ if (r < 0 && r != -EBLACKLISTED) {
+ derr << "failed to shut down image map: " << cpp_strerror(r) << dendl;
+ }
+
+ Mutex::Locker locker(m_lock);
+ ceph_assert(m_image_map);
+ m_image_map.reset();
+
+ m_instance_replayer->release_all(on_finish);
+}
+
+template <typename I>
+void PoolReplayer<I>::handle_update_leader(
+ const std::string &leader_instance_id) {
+ dout(10) << "leader_instance_id=" << leader_instance_id << dendl;
+
+ m_instance_watcher->handle_update_leader(leader_instance_id);
+}
+
+template <typename I>
+void PoolReplayer<I>::handle_acquire_image(const std::string &global_image_id,
+ const std::string &instance_id,
+ Context* on_finish) {
+ dout(5) << "global_image_id=" << global_image_id << ", "
+ << "instance_id=" << instance_id << dendl;
+
+ m_instance_watcher->notify_image_acquire(instance_id, global_image_id,
+ on_finish);
+}
+
+template <typename I>
+void PoolReplayer<I>::handle_release_image(const std::string &global_image_id,
+ const std::string &instance_id,
+ Context* on_finish) {
+ dout(5) << "global_image_id=" << global_image_id << ", "
+ << "instance_id=" << instance_id << dendl;
+
+ m_instance_watcher->notify_image_release(instance_id, global_image_id,
+ on_finish);
+}
+
+template <typename I>
+void PoolReplayer<I>::handle_remove_image(const std::string &mirror_uuid,
+ const std::string &global_image_id,
+ const std::string &instance_id,
+ Context* on_finish) {
+ ceph_assert(!mirror_uuid.empty());
+ dout(5) << "mirror_uuid=" << mirror_uuid << ", "
+ << "global_image_id=" << global_image_id << ", "
+ << "instance_id=" << instance_id << dendl;
+
+ m_instance_watcher->notify_peer_image_removed(instance_id, global_image_id,
+ mirror_uuid, on_finish);
+}
+
+template <typename I>
+void PoolReplayer<I>::handle_instances_added(const InstanceIds &instance_ids) {
+ dout(5) << "instance_ids=" << instance_ids << dendl;
+ Mutex::Locker locker(m_lock);
+ if (!m_leader_watcher->is_leader()) {
+ return;
+ }
+
+ ceph_assert(m_image_map);
+ m_image_map->update_instances_added(instance_ids);
+}
+
+template <typename I>
+void PoolReplayer<I>::handle_instances_removed(
+ const InstanceIds &instance_ids) {
+ dout(5) << "instance_ids=" << instance_ids << dendl;
+ Mutex::Locker locker(m_lock);
+ if (!m_leader_watcher->is_leader()) {
+ return;
+ }
+
+ ceph_assert(m_image_map);
+ m_image_map->update_instances_removed(instance_ids);
+}
+
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::PoolReplayer<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/PoolReplayer.h b/src/tools/rbd_mirror/PoolReplayer.h
new file mode 100644
index 00000000..43a4a0fc
--- /dev/null
+++ b/src/tools/rbd_mirror/PoolReplayer.h
@@ -0,0 +1,303 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_POOL_REPLAYER_H
+#define CEPH_RBD_MIRROR_POOL_REPLAYER_H
+
+#include "common/AsyncOpTracker.h"
+#include "common/Cond.h"
+#include "common/Mutex.h"
+#include "common/WorkQueue.h"
+#include "include/rados/librados.hpp"
+
+#include "ClusterWatcher.h"
+#include "LeaderWatcher.h"
+#include "PoolWatcher.h"
+#include "ImageDeleter.h"
+#include "tools/rbd_mirror/Types.h"
+#include "tools/rbd_mirror/image_map/Types.h"
+#include "tools/rbd_mirror/leader_watcher/Types.h"
+#include "tools/rbd_mirror/pool_watcher/Types.h"
+#include "tools/rbd_mirror/service_daemon/Types.h"
+
+#include <set>
+#include <map>
+#include <memory>
+#include <atomic>
+#include <string>
+#include <vector>
+
+class AdminSocketHook;
+
+namespace librbd { class ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+
+template <typename> class ImageMap;
+template <typename> class InstanceReplayer;
+template <typename> class InstanceWatcher;
+template <typename> class ServiceDaemon;
+template <typename> struct Threads;
+
+/**
+ * Controls mirroring for a single remote cluster.
+ */
+template <typename ImageCtxT = librbd::ImageCtx>
+class PoolReplayer {
+public:
+ PoolReplayer(Threads<ImageCtxT> *threads,
+ ServiceDaemon<ImageCtxT>* service_daemon,
+ int64_t local_pool_id, const PeerSpec &peer,
+ const std::vector<const char*> &args);
+ ~PoolReplayer();
+ PoolReplayer(const PoolReplayer&) = delete;
+ PoolReplayer& operator=(const PoolReplayer&) = delete;
+
+ bool is_blacklisted() const;
+ bool is_leader() const;
+ bool is_running() const;
+
+ void init();
+ void shut_down();
+
+ void run();
+
+ void print_status(Formatter *f, stringstream *ss);
+ void start();
+ void stop(bool manual);
+ void restart();
+ void flush();
+ void release_leader();
+ void reopen_logs();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * INIT
+ * |
+ * v
+ * <follower> <-------------------------\
+ * . |
+ * . |
+ * v (leader acquired) |
+ * INIT_IMAGE_MAP SHUT_DOWN_IMAGE_MAP
+ * | ^
+ * v |
+ * INIT_LOCAL_POOL_WATCHER WAIT_FOR_NOTIFICATIONS
+ * | ^
+ * v |
+ * INIT_REMOTE_POOL_WATCHER SHUT_DOWN_POOL_WATCHERS
+ * | ^
+ * v |
+ * INIT_IMAGE_DELETER SHUT_DOWN_IMAGE_DELETER
+ * | ^
+ * v .
+ * <leader> <-----------\ .
+ * . | .
+ * . (image update) | .
+ * . . > NOTIFY_INSTANCE_WATCHER .
+ * . .
+ * . (leader lost / shut down) .
+ * . . . . . . . . . . . . . . . . . .
+ *
+ * @endverbatim
+ */
+
+ typedef std::vector<std::string> InstanceIds;
+
+ struct PoolWatcherListener : public pool_watcher::Listener {
+ PoolReplayer *pool_replayer;
+ bool local;
+
+ PoolWatcherListener(PoolReplayer *pool_replayer, bool local)
+ : pool_replayer(pool_replayer), local(local) {
+ }
+
+ void handle_update(const std::string &mirror_uuid,
+ ImageIds &&added_image_ids,
+ ImageIds &&removed_image_ids) override {
+ pool_replayer->handle_update((local ? "" : mirror_uuid),
+ std::move(added_image_ids),
+ std::move(removed_image_ids));
+ }
+ };
+
+ struct ImageMapListener : public image_map::Listener {
+ PoolReplayer *pool_replayer;
+
+ ImageMapListener(PoolReplayer *pool_replayer)
+ : pool_replayer(pool_replayer) {
+ }
+
+ void acquire_image(const std::string &global_image_id,
+ const std::string &instance_id,
+ Context* on_finish) override {
+ pool_replayer->handle_acquire_image(global_image_id, instance_id,
+ on_finish);
+ }
+
+ void release_image(const std::string &global_image_id,
+ const std::string &instance_id,
+ Context* on_finish) override {
+ pool_replayer->handle_release_image(global_image_id, instance_id,
+ on_finish);
+ }
+
+ void remove_image(const std::string &mirror_uuid,
+ const std::string &global_image_id,
+ const std::string &instance_id,
+ Context* on_finish) override {
+ pool_replayer->handle_remove_image(mirror_uuid, global_image_id,
+ instance_id, on_finish);
+ }
+ };
+
+ void handle_update(const std::string &mirror_uuid,
+ ImageIds &&added_image_ids,
+ ImageIds &&removed_image_ids);
+
+ int init_rados(const std::string &cluster_name,
+ const std::string &client_name,
+ const std::string &mon_host,
+ const std::string &key,
+ const std::string &description, RadosRef *rados_ref,
+ bool strip_cluster_overrides);
+
+ void handle_post_acquire_leader(Context *on_finish);
+ void handle_pre_release_leader(Context *on_finish);
+
+ void init_image_map(Context *on_finish);
+ void handle_init_image_map(int r, Context *on_finish);
+
+ void init_local_pool_watcher(Context *on_finish);
+ void handle_init_local_pool_watcher(int r, Context *on_finish);
+
+ void init_remote_pool_watcher(Context *on_finish);
+ void handle_init_remote_pool_watcher(int r, Context *on_finish);
+
+ void init_image_deleter(Context* on_finish);
+ void handle_init_image_deleter(int r, Context* on_finish);
+
+ void shut_down_image_deleter(Context* on_finish);
+ void handle_shut_down_image_deleter(int r, Context* on_finish);
+
+ void shut_down_pool_watchers(Context *on_finish);
+ void handle_shut_down_pool_watchers(int r, Context *on_finish);
+
+ void wait_for_update_ops(Context *on_finish);
+ void handle_wait_for_update_ops(int r, Context *on_finish);
+
+ void shut_down_image_map(Context *on_finish);
+ void handle_shut_down_image_map(int r, Context *on_finish);
+
+ void handle_update_leader(const std::string &leader_instance_id);
+
+ void handle_acquire_image(const std::string &global_image_id,
+ const std::string &instance_id,
+ Context* on_finish);
+ void handle_release_image(const std::string &global_image_id,
+ const std::string &instance_id,
+ Context* on_finish);
+ void handle_remove_image(const std::string &mirror_uuid,
+ const std::string &global_image_id,
+ const std::string &instance_id,
+ Context* on_finish);
+
+ void handle_instances_added(const InstanceIds &instance_ids);
+ void handle_instances_removed(const InstanceIds &instance_ids);
+
+ Threads<ImageCtxT> *m_threads;
+ ServiceDaemon<ImageCtxT>* m_service_daemon;
+ int64_t m_local_pool_id = -1;
+ PeerSpec m_peer;
+ std::vector<const char*> m_args;
+
+ mutable Mutex m_lock;
+ Cond m_cond;
+ std::atomic<bool> m_stopping = { false };
+ bool m_manual_stop = false;
+ bool m_blacklisted = false;
+
+ RadosRef m_local_rados;
+ RadosRef m_remote_rados;
+
+ librados::IoCtx m_local_io_ctx;
+ librados::IoCtx m_remote_io_ctx;
+
+ PoolWatcherListener m_local_pool_watcher_listener;
+ std::unique_ptr<PoolWatcher<ImageCtxT>> m_local_pool_watcher;
+
+ PoolWatcherListener m_remote_pool_watcher_listener;
+ std::unique_ptr<PoolWatcher<ImageCtxT>> m_remote_pool_watcher;
+
+ std::unique_ptr<InstanceReplayer<ImageCtxT>> m_instance_replayer;
+ std::unique_ptr<ImageDeleter<ImageCtxT>> m_image_deleter;
+
+ ImageMapListener m_image_map_listener;
+ std::unique_ptr<ImageMap<ImageCtxT>> m_image_map;
+
+ std::string m_asok_hook_name;
+ AdminSocketHook *m_asok_hook = nullptr;
+
+ service_daemon::CalloutId m_callout_id = service_daemon::CALLOUT_ID_NONE;
+
+ class PoolReplayerThread : public Thread {
+ PoolReplayer *m_pool_replayer;
+ public:
+ PoolReplayerThread(PoolReplayer *pool_replayer)
+ : m_pool_replayer(pool_replayer) {
+ }
+ void *entry() override {
+ m_pool_replayer->run();
+ return 0;
+ }
+ } m_pool_replayer_thread;
+
+ class LeaderListener : public leader_watcher::Listener {
+ public:
+ LeaderListener(PoolReplayer *pool_replayer)
+ : m_pool_replayer(pool_replayer) {
+ }
+
+ protected:
+ void post_acquire_handler(Context *on_finish) override {
+ m_pool_replayer->handle_post_acquire_leader(on_finish);
+ }
+
+ void pre_release_handler(Context *on_finish) override {
+ m_pool_replayer->handle_pre_release_leader(on_finish);
+ }
+
+ void update_leader_handler(
+ const std::string &leader_instance_id) override {
+ m_pool_replayer->handle_update_leader(leader_instance_id);
+ }
+
+ void handle_instances_added(const InstanceIds& instance_ids) override {
+ m_pool_replayer->handle_instances_added(instance_ids);
+ }
+
+ void handle_instances_removed(const InstanceIds& instance_ids) override {
+ m_pool_replayer->handle_instances_removed(instance_ids);
+ }
+
+ private:
+ PoolReplayer *m_pool_replayer;
+ } m_leader_listener;
+
+ std::unique_ptr<LeaderWatcher<ImageCtxT>> m_leader_watcher;
+ std::unique_ptr<InstanceWatcher<ImageCtxT>> m_instance_watcher;
+ AsyncOpTracker m_update_op_tracker;
+};
+
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::PoolReplayer<librbd::ImageCtx>;
+
+#endif // CEPH_RBD_MIRROR_POOL_REPLAYER_H
diff --git a/src/tools/rbd_mirror/PoolWatcher.cc b/src/tools/rbd_mirror/PoolWatcher.cc
new file mode 100644
index 00000000..81810ea1
--- /dev/null
+++ b/src/tools/rbd_mirror/PoolWatcher.cc
@@ -0,0 +1,553 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd_mirror/PoolWatcher.h"
+#include "include/rbd_types.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "common/Timer.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/internal.h"
+#include "librbd/MirroringWatcher.h"
+#include "librbd/Utils.h"
+#include "librbd/api/Image.h"
+#include "librbd/api/Mirror.h"
+#include "tools/rbd_mirror/Threads.h"
+#include "tools/rbd_mirror/pool_watcher/RefreshImagesRequest.h"
+#include <boost/bind.hpp>
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::PoolWatcher: " << this << " " \
+ << __func__ << ": "
+
+using std::list;
+using std::string;
+using std::unique_ptr;
+using std::vector;
+using librbd::util::create_context_callback;
+using librbd::util::create_rados_callback;
+
+namespace rbd {
+namespace mirror {
+
+template <typename I>
+class PoolWatcher<I>::MirroringWatcher : public librbd::MirroringWatcher<I> {
+public:
+ using ContextWQ = typename std::decay<
+ typename std::remove_pointer<
+ decltype(Threads<I>::work_queue)>::type>::type;
+
+ MirroringWatcher(librados::IoCtx &io_ctx, ContextWQ *work_queue,
+ PoolWatcher *pool_watcher)
+ : librbd::MirroringWatcher<I>(io_ctx, work_queue),
+ m_pool_watcher(pool_watcher) {
+ }
+
+ void handle_rewatch_complete(int r) override {
+ m_pool_watcher->handle_rewatch_complete(r);
+ }
+
+ void handle_mode_updated(cls::rbd::MirrorMode mirror_mode) override {
+ // invalidate all image state and refresh the pool contents
+ m_pool_watcher->schedule_refresh_images(5);
+ }
+
+ void handle_image_updated(cls::rbd::MirrorImageState state,
+ const std::string &remote_image_id,
+ const std::string &global_image_id) override {
+ bool enabled = (state == cls::rbd::MIRROR_IMAGE_STATE_ENABLED);
+ m_pool_watcher->handle_image_updated(remote_image_id, global_image_id,
+ enabled);
+ }
+
+private:
+ PoolWatcher *m_pool_watcher;
+};
+
+template <typename I>
+PoolWatcher<I>::PoolWatcher(Threads<I> *threads, librados::IoCtx &remote_io_ctx,
+ pool_watcher::Listener &listener)
+ : m_threads(threads), m_remote_io_ctx(remote_io_ctx), m_listener(listener),
+ m_lock(librbd::util::unique_lock_name("rbd::mirror::PoolWatcher", this)) {
+ m_mirroring_watcher = new MirroringWatcher(m_remote_io_ctx,
+ m_threads->work_queue, this);
+}
+
+template <typename I>
+PoolWatcher<I>::~PoolWatcher() {
+ delete m_mirroring_watcher;
+}
+
+template <typename I>
+bool PoolWatcher<I>::is_blacklisted() const {
+ Mutex::Locker locker(m_lock);
+ return m_blacklisted;
+}
+
+template <typename I>
+void PoolWatcher<I>::init(Context *on_finish) {
+ dout(5) << dendl;
+
+ {
+ Mutex::Locker locker(m_lock);
+ m_on_init_finish = on_finish;
+
+ ceph_assert(!m_refresh_in_progress);
+ m_refresh_in_progress = true;
+ }
+
+ // start async updates for mirror image directory
+ register_watcher();
+}
+
+template <typename I>
+void PoolWatcher<I>::shut_down(Context *on_finish) {
+ dout(5) << dendl;
+
+ {
+ Mutex::Locker timer_locker(m_threads->timer_lock);
+ Mutex::Locker locker(m_lock);
+
+ ceph_assert(!m_shutting_down);
+ m_shutting_down = true;
+ if (m_timer_ctx != nullptr) {
+ m_threads->timer->cancel_event(m_timer_ctx);
+ m_timer_ctx = nullptr;
+ }
+ }
+
+ // in-progress unregister tracked as async op
+ unregister_watcher();
+
+ m_async_op_tracker.wait_for_ops(on_finish);
+}
+
+template <typename I>
+void PoolWatcher<I>::register_watcher() {
+ {
+ Mutex::Locker locker(m_lock);
+ ceph_assert(m_image_ids_invalid);
+ ceph_assert(m_refresh_in_progress);
+ }
+
+ // if the watch registration is in-flight, let the watcher
+ // handle the transition -- only (re-)register if it's not registered
+ if (!m_mirroring_watcher->is_unregistered()) {
+ refresh_images();
+ return;
+ }
+
+ // first time registering or the watch failed
+ dout(5) << dendl;
+ m_async_op_tracker.start_op();
+
+ Context *ctx = create_context_callback<
+ PoolWatcher, &PoolWatcher<I>::handle_register_watcher>(this);
+ m_mirroring_watcher->register_watch(ctx);
+}
+
+template <typename I>
+void PoolWatcher<I>::handle_register_watcher(int r) {
+ dout(5) << "r=" << r << dendl;
+
+ {
+ Mutex::Locker locker(m_lock);
+ ceph_assert(m_image_ids_invalid);
+ ceph_assert(m_refresh_in_progress);
+ if (r < 0) {
+ m_refresh_in_progress = false;
+ }
+ }
+
+ Context *on_init_finish = nullptr;
+ if (r >= 0) {
+ refresh_images();
+ } else if (r == -EBLACKLISTED) {
+ dout(0) << "detected client is blacklisted" << dendl;
+
+ Mutex::Locker locker(m_lock);
+ m_blacklisted = true;
+ std::swap(on_init_finish, m_on_init_finish);
+ } else if (r == -ENOENT) {
+ dout(5) << "mirroring directory does not exist" << dendl;
+ {
+ Mutex::Locker locker(m_lock);
+ std::swap(on_init_finish, m_on_init_finish);
+ }
+
+ schedule_refresh_images(30);
+ } else {
+ derr << "unexpected error registering mirroring directory watch: "
+ << cpp_strerror(r) << dendl;
+ schedule_refresh_images(10);
+ }
+
+ m_async_op_tracker.finish_op();
+ if (on_init_finish != nullptr) {
+ on_init_finish->complete(r);
+ }
+}
+
+template <typename I>
+void PoolWatcher<I>::unregister_watcher() {
+ dout(5) << dendl;
+
+ m_async_op_tracker.start_op();
+ Context *ctx = new FunctionContext([this](int r) {
+ dout(5) << "unregister_watcher: r=" << r << dendl;
+ if (r < 0) {
+ derr << "error unregistering watcher for "
+ << m_mirroring_watcher->get_oid() << " object: " << cpp_strerror(r)
+ << dendl;
+ }
+ m_async_op_tracker.finish_op();
+ });
+
+ m_mirroring_watcher->unregister_watch(ctx);
+}
+
+template <typename I>
+void PoolWatcher<I>::refresh_images() {
+ dout(5) << dendl;
+
+ {
+ Mutex::Locker locker(m_lock);
+ ceph_assert(m_image_ids_invalid);
+ ceph_assert(m_refresh_in_progress);
+
+ // clear all pending notification events since we need to perform
+ // a full image list refresh
+ m_pending_added_image_ids.clear();
+ m_pending_removed_image_ids.clear();
+ }
+
+ m_async_op_tracker.start_op();
+ m_refresh_image_ids.clear();
+ Context *ctx = create_context_callback<
+ PoolWatcher, &PoolWatcher<I>::handle_refresh_images>(this);
+ auto req = pool_watcher::RefreshImagesRequest<I>::create(m_remote_io_ctx,
+ &m_refresh_image_ids,
+ ctx);
+ req->send();
+}
+
+template <typename I>
+void PoolWatcher<I>::handle_refresh_images(int r) {
+ dout(5) << "r=" << r << dendl;
+
+ bool retry_refresh = false;
+ Context *on_init_finish = nullptr;
+ {
+ Mutex::Locker locker(m_lock);
+ ceph_assert(m_image_ids_invalid);
+ ceph_assert(m_refresh_in_progress);
+
+ if (r >= 0) {
+ m_pending_image_ids = std::move(m_refresh_image_ids);
+ } else if (r == -EBLACKLISTED) {
+ dout(0) << "detected client is blacklisted during image refresh" << dendl;
+
+ m_blacklisted = true;
+ m_refresh_in_progress = false;
+ std::swap(on_init_finish, m_on_init_finish);
+ } else if (r == -ENOENT) {
+ dout(5) << "mirroring directory not found" << dendl;
+ m_pending_image_ids.clear();
+ r = 0;
+ } else {
+ m_refresh_in_progress = false;
+ retry_refresh = true;
+ }
+ }
+
+ if (retry_refresh) {
+ derr << "failed to retrieve mirroring directory: " << cpp_strerror(r)
+ << dendl;
+ schedule_refresh_images(10);
+ } else if (r >= 0) {
+ get_mirror_uuid();
+ return;
+ }
+
+ m_async_op_tracker.finish_op();
+ if (on_init_finish != nullptr) {
+ ceph_assert(r == -EBLACKLISTED);
+ on_init_finish->complete(r);
+ }
+}
+
+template <typename I>
+void PoolWatcher<I>::get_mirror_uuid() {
+ dout(5) << dendl;
+
+ librados::ObjectReadOperation op;
+ librbd::cls_client::mirror_uuid_get_start(&op);
+
+ m_out_bl.clear();
+ librados::AioCompletion *aio_comp = create_rados_callback<
+ PoolWatcher, &PoolWatcher<I>::handle_get_mirror_uuid>(this);
+ int r = m_remote_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op, &m_out_bl);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void PoolWatcher<I>::handle_get_mirror_uuid(int r) {
+ dout(5) << "r=" << r << dendl;
+
+ bool deferred_refresh = false;
+ bool retry_refresh = false;
+ Context *on_init_finish = nullptr;
+ {
+ Mutex::Locker locker(m_lock);
+ ceph_assert(m_image_ids_invalid);
+ ceph_assert(m_refresh_in_progress);
+ m_refresh_in_progress = false;
+
+ m_pending_mirror_uuid = "";
+ if (r >= 0) {
+ auto it = m_out_bl.cbegin();
+ r = librbd::cls_client::mirror_uuid_get_finish(
+ &it, &m_pending_mirror_uuid);
+ }
+ if (r >= 0 && m_pending_mirror_uuid.empty()) {
+ r = -ENOENT;
+ }
+
+ if (m_deferred_refresh) {
+ // need to refresh -- skip the notification
+ deferred_refresh = true;
+ } else if (r >= 0) {
+ dout(10) << "mirror_uuid=" << m_pending_mirror_uuid << dendl;
+ m_image_ids_invalid = false;
+ std::swap(on_init_finish, m_on_init_finish);
+ schedule_listener();
+ } else if (r == -EBLACKLISTED) {
+ dout(0) << "detected client is blacklisted during image refresh" << dendl;
+
+ m_blacklisted = true;
+ std::swap(on_init_finish, m_on_init_finish);
+ } else if (r == -ENOENT) {
+ dout(5) << "mirroring uuid not found" << dendl;
+ std::swap(on_init_finish, m_on_init_finish);
+ retry_refresh = true;
+ } else {
+ retry_refresh = true;
+ }
+ }
+
+ if (deferred_refresh) {
+ dout(5) << "scheduling deferred refresh" << dendl;
+ schedule_refresh_images(0);
+ } else if (retry_refresh) {
+ derr << "failed to retrieve mirror uuid: " << cpp_strerror(r)
+ << dendl;
+ schedule_refresh_images(10);
+ }
+
+ m_async_op_tracker.finish_op();
+ if (on_init_finish != nullptr) {
+ on_init_finish->complete(r);
+ }
+}
+
+template <typename I>
+void PoolWatcher<I>::schedule_refresh_images(double interval) {
+ Mutex::Locker timer_locker(m_threads->timer_lock);
+ Mutex::Locker locker(m_lock);
+ if (m_shutting_down || m_refresh_in_progress || m_timer_ctx != nullptr) {
+ if (m_refresh_in_progress && !m_deferred_refresh) {
+ dout(5) << "deferring refresh until in-flight refresh completes" << dendl;
+ m_deferred_refresh = true;
+ }
+ return;
+ }
+
+ m_image_ids_invalid = true;
+ m_timer_ctx = m_threads->timer->add_event_after(
+ interval,
+ new FunctionContext([this](int r) {
+ process_refresh_images();
+ }));
+}
+
+template <typename I>
+void PoolWatcher<I>::handle_rewatch_complete(int r) {
+ dout(5) << "r=" << r << dendl;
+
+ if (r == -EBLACKLISTED) {
+ dout(0) << "detected client is blacklisted" << dendl;
+
+ Mutex::Locker locker(m_lock);
+ m_blacklisted = true;
+ return;
+ } else if (r == -ENOENT) {
+ dout(5) << "mirroring directory deleted" << dendl;
+ } else if (r < 0) {
+ derr << "unexpected error re-registering mirroring directory watch: "
+ << cpp_strerror(r) << dendl;
+ }
+
+ schedule_refresh_images(5);
+}
+
+template <typename I>
+void PoolWatcher<I>::handle_image_updated(const std::string &remote_image_id,
+ const std::string &global_image_id,
+ bool enabled) {
+ dout(10) << "remote_image_id=" << remote_image_id << ", "
+ << "global_image_id=" << global_image_id << ", "
+ << "enabled=" << enabled << dendl;
+
+ Mutex::Locker locker(m_lock);
+ ImageId image_id(global_image_id, remote_image_id);
+ m_pending_added_image_ids.erase(image_id);
+ m_pending_removed_image_ids.erase(image_id);
+
+ if (enabled) {
+ m_pending_added_image_ids.insert(image_id);
+ schedule_listener();
+ } else {
+ m_pending_removed_image_ids.insert(image_id);
+ schedule_listener();
+ }
+}
+
+template <typename I>
+void PoolWatcher<I>::process_refresh_images() {
+ ceph_assert(m_threads->timer_lock.is_locked());
+ ceph_assert(m_timer_ctx != nullptr);
+ m_timer_ctx = nullptr;
+
+ {
+ Mutex::Locker locker(m_lock);
+ ceph_assert(!m_refresh_in_progress);
+ m_refresh_in_progress = true;
+ m_deferred_refresh = false;
+ }
+
+ // execute outside of the timer's lock
+ m_async_op_tracker.start_op();
+ Context *ctx = new FunctionContext([this](int r) {
+ register_watcher();
+ m_async_op_tracker.finish_op();
+ });
+ m_threads->work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+void PoolWatcher<I>::schedule_listener() {
+ ceph_assert(m_lock.is_locked());
+ m_pending_updates = true;
+ if (m_shutting_down || m_image_ids_invalid || m_notify_listener_in_progress) {
+ return;
+ }
+
+ dout(20) << dendl;
+
+ m_async_op_tracker.start_op();
+ Context *ctx = new FunctionContext([this](int r) {
+ notify_listener();
+ m_async_op_tracker.finish_op();
+ });
+
+ m_notify_listener_in_progress = true;
+ m_threads->work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+void PoolWatcher<I>::notify_listener() {
+ dout(10) << dendl;
+
+ std::string mirror_uuid;
+ ImageIds added_image_ids;
+ ImageIds removed_image_ids;
+ {
+ Mutex::Locker locker(m_lock);
+ ceph_assert(m_notify_listener_in_progress);
+
+ // if the mirror uuid is updated, treat it as the removal of all
+ // images in the pool
+ if (m_mirror_uuid != m_pending_mirror_uuid) {
+ if (!m_mirror_uuid.empty()) {
+ dout(0) << "mirror uuid updated:"
+ << "old=" << m_mirror_uuid << ", "
+ << "new=" << m_pending_mirror_uuid << dendl;
+ }
+
+ mirror_uuid = m_mirror_uuid;
+ removed_image_ids = std::move(m_image_ids);
+ m_image_ids.clear();
+ }
+ }
+
+ if (!removed_image_ids.empty()) {
+ m_listener.handle_update(mirror_uuid, {}, std::move(removed_image_ids));
+ removed_image_ids.clear();
+ }
+
+ {
+ Mutex::Locker locker(m_lock);
+ ceph_assert(m_notify_listener_in_progress);
+
+ // if the watch failed while we didn't own the lock, we are going
+ // to need to perform a full refresh
+ if (m_image_ids_invalid) {
+ m_notify_listener_in_progress = false;
+ return;
+ }
+
+ // merge add/remove notifications into pending set (a given image
+ // can only be in one set or another)
+ for (auto &image_id : m_pending_removed_image_ids) {
+ dout(20) << "image_id=" << image_id << dendl;
+ m_pending_image_ids.erase(image_id);
+ }
+
+ for (auto &image_id : m_pending_added_image_ids) {
+ dout(20) << "image_id=" << image_id << dendl;
+ m_pending_image_ids.erase(image_id);
+ m_pending_image_ids.insert(image_id);
+ }
+ m_pending_added_image_ids.clear();
+
+ // compute added/removed images
+ for (auto &image_id : m_image_ids) {
+ auto it = m_pending_image_ids.find(image_id);
+ if (it == m_pending_image_ids.end() || it->id != image_id.id) {
+ removed_image_ids.insert(image_id);
+ }
+ }
+ for (auto &image_id : m_pending_image_ids) {
+ auto it = m_image_ids.find(image_id);
+ if (it == m_image_ids.end() || it->id != image_id.id) {
+ added_image_ids.insert(image_id);
+ }
+ }
+
+ m_pending_updates = false;
+ m_image_ids = m_pending_image_ids;
+
+ m_mirror_uuid = m_pending_mirror_uuid;
+ mirror_uuid = m_mirror_uuid;
+ }
+
+ m_listener.handle_update(mirror_uuid, std::move(added_image_ids),
+ std::move(removed_image_ids));
+
+ {
+ Mutex::Locker locker(m_lock);
+ m_notify_listener_in_progress = false;
+ if (m_pending_updates) {
+ schedule_listener();
+ }
+ }
+}
+
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::PoolWatcher<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/PoolWatcher.h b/src/tools/rbd_mirror/PoolWatcher.h
new file mode 100644
index 00000000..1136a319
--- /dev/null
+++ b/src/tools/rbd_mirror/PoolWatcher.h
@@ -0,0 +1,166 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_POOL_WATCHER_H
+#define CEPH_RBD_MIRROR_POOL_WATCHER_H
+
+#include <map>
+#include <memory>
+#include <set>
+#include <string>
+
+#include "common/AsyncOpTracker.h"
+#include "common/ceph_context.h"
+#include "common/Mutex.h"
+#include "include/rados/librados.hpp"
+#include "tools/rbd_mirror/Types.h"
+#include <boost/functional/hash.hpp>
+#include <boost/optional.hpp>
+#include "include/ceph_assert.h"
+#include "tools/rbd_mirror/pool_watcher/Types.h"
+
+namespace librbd { struct ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+
+template <typename> struct Threads;
+
+/**
+ * Keeps track of images that have mirroring enabled within all
+ * pools.
+ */
+template <typename ImageCtxT = librbd::ImageCtx>
+class PoolWatcher {
+public:
+ static PoolWatcher* create(Threads<ImageCtxT> *threads,
+ librados::IoCtx &remote_io_ctx,
+ pool_watcher::Listener &listener) {
+ return new PoolWatcher(threads, remote_io_ctx, listener);
+ }
+
+ PoolWatcher(Threads<ImageCtxT> *threads, librados::IoCtx &remote_io_ctx,
+ pool_watcher::Listener &listener);
+ ~PoolWatcher();
+ PoolWatcher(const PoolWatcher&) = delete;
+ PoolWatcher& operator=(const PoolWatcher&) = delete;
+
+ bool is_blacklisted() const;
+
+ void init(Context *on_finish = nullptr);
+ void shut_down(Context *on_finish);
+
+ inline uint64_t get_image_count() const {
+ Mutex::Locker locker(m_lock);
+ return m_image_ids.size();
+ }
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * INIT
+ * |
+ * v
+ * REGISTER_WATCHER
+ * |
+ * |/--------------------------------\
+ * | |
+ * v |
+ * REFRESH_IMAGES |
+ * | |
+ * |/----------------------------\ |
+ * | | |
+ * v | |
+ * GET_MIRROR_UUID | |
+ * | | |
+ * v | |
+ * NOTIFY_LISTENER | |
+ * | | |
+ * v | |
+ * IDLE ---\ | |
+ * | | | |
+ * | |\---> IMAGE_UPDATED | |
+ * | | | | |
+ * | | v | |
+ * | | GET_IMAGE_NAME --/ |
+ * | | |
+ * | \----> WATCH_ERROR ---------/
+ * v
+ * SHUT_DOWN
+ * |
+ * v
+ * UNREGISTER_WATCHER
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+ class MirroringWatcher;
+
+ Threads<ImageCtxT> *m_threads;
+ librados::IoCtx m_remote_io_ctx;
+ pool_watcher::Listener &m_listener;
+
+ ImageIds m_refresh_image_ids;
+ bufferlist m_out_bl;
+
+ mutable Mutex m_lock;
+
+ Context *m_on_init_finish = nullptr;
+
+ ImageIds m_image_ids;
+ std::string m_mirror_uuid;
+
+ bool m_pending_updates = false;
+ bool m_notify_listener_in_progress = false;
+ ImageIds m_pending_image_ids;
+ ImageIds m_pending_added_image_ids;
+ ImageIds m_pending_removed_image_ids;
+
+ std::string m_pending_mirror_uuid;
+
+ MirroringWatcher *m_mirroring_watcher;
+
+ Context *m_timer_ctx = nullptr;
+
+ AsyncOpTracker m_async_op_tracker;
+ bool m_blacklisted = false;
+ bool m_shutting_down = false;
+ bool m_image_ids_invalid = true;
+ bool m_refresh_in_progress = false;
+ bool m_deferred_refresh = false;
+
+ void register_watcher();
+ void handle_register_watcher(int r);
+ void unregister_watcher();
+
+ void refresh_images();
+ void handle_refresh_images(int r);
+
+ void schedule_refresh_images(double interval);
+ void process_refresh_images();
+
+ void get_mirror_uuid();
+ void handle_get_mirror_uuid(int r);
+
+ void handle_rewatch_complete(int r);
+ void handle_image_updated(const std::string &remote_image_id,
+ const std::string &global_image_id,
+ bool enabled);
+
+ void schedule_listener();
+ void notify_listener();
+
+};
+
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::PoolWatcher<librbd::ImageCtx>;
+
+#endif // CEPH_RBD_MIRROR_POOL_WATCHER_H
diff --git a/src/tools/rbd_mirror/ProgressContext.h b/src/tools/rbd_mirror/ProgressContext.h
new file mode 100644
index 00000000..e4430ee6
--- /dev/null
+++ b/src/tools/rbd_mirror/ProgressContext.h
@@ -0,0 +1,21 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_PROGRESS_CONTEXT_H
+#define RBD_MIRROR_PROGRESS_CONTEXT_H
+
+namespace rbd {
+namespace mirror {
+
+class ProgressContext
+{
+public:
+ virtual ~ProgressContext() {}
+ virtual void update_progress(const std::string &description,
+ bool flush = true) = 0;
+};
+
+} // namespace mirror
+} // namespace rbd
+
+#endif // RBD_MIRROR_PROGRESS_CONTEXT_H
diff --git a/src/tools/rbd_mirror/ServiceDaemon.cc b/src/tools/rbd_mirror/ServiceDaemon.cc
new file mode 100644
index 00000000..f3b549b8
--- /dev/null
+++ b/src/tools/rbd_mirror/ServiceDaemon.cc
@@ -0,0 +1,251 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd_mirror/ServiceDaemon.h"
+#include "include/Context.h"
+#include "include/stringify.h"
+#include "common/ceph_context.h"
+#include "common/config.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "common/Formatter.h"
+#include "common/Timer.h"
+#include "tools/rbd_mirror/Threads.h"
+#include <sstream>
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::ServiceDaemon: " << this << " " \
+ << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+
+namespace {
+
+const std::string RBD_MIRROR_AUTH_ID_PREFIX("rbd-mirror.");
+
+struct AttributeDumpVisitor : public boost::static_visitor<void> {
+ ceph::Formatter *f;
+ const std::string& name;
+
+ AttributeDumpVisitor(ceph::Formatter *f, const std::string& name)
+ : f(f), name(name) {
+ }
+
+ void operator()(bool val) const {
+ f->dump_bool(name.c_str(), val);
+ }
+ void operator()(uint64_t val) const {
+ f->dump_unsigned(name.c_str(), val);
+ }
+ void operator()(const std::string& val) const {
+ f->dump_string(name.c_str(), val);
+ }
+};
+
+} // anonymous namespace
+
+using namespace service_daemon;
+
+template <typename I>
+ServiceDaemon<I>::ServiceDaemon(CephContext *cct, RadosRef rados,
+ Threads<I>* threads)
+ : m_cct(cct), m_rados(rados), m_threads(threads),
+ m_lock("rbd::mirror::ServiceDaemon") {
+ dout(20) << dendl;
+}
+
+template <typename I>
+ServiceDaemon<I>::~ServiceDaemon() {
+ dout(20) << dendl;
+ Mutex::Locker timer_locker(m_threads->timer_lock);
+ if (m_timer_ctx != nullptr) {
+ m_threads->timer->cancel_event(m_timer_ctx);
+ update_status();
+ }
+}
+
+template <typename I>
+int ServiceDaemon<I>::init() {
+ dout(20) << dendl;
+
+ std::string id = m_cct->_conf->name.get_id();
+ if (id.find(RBD_MIRROR_AUTH_ID_PREFIX) == 0) {
+ id = id.substr(RBD_MIRROR_AUTH_ID_PREFIX.size());
+ }
+
+ std::string instance_id = stringify(m_rados->get_instance_id());
+ std::map<std::string, std::string> service_metadata = {
+ {"id", id}, {"instance_id", instance_id}};
+ int r = m_rados->service_daemon_register("rbd-mirror", instance_id,
+ service_metadata);
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+void ServiceDaemon<I>::add_pool(int64_t pool_id, const std::string& pool_name) {
+ dout(20) << "pool_id=" << pool_id << ", pool_name=" << pool_name << dendl;
+
+ {
+ Mutex::Locker locker(m_lock);
+ m_pools.insert({pool_id, {pool_name}});
+ }
+ schedule_update_status();
+}
+
+template <typename I>
+void ServiceDaemon<I>::remove_pool(int64_t pool_id) {
+ dout(20) << "pool_id=" << pool_id << dendl;
+ {
+ Mutex::Locker locker(m_lock);
+ m_pools.erase(pool_id);
+ }
+ schedule_update_status();
+}
+
+template <typename I>
+uint64_t ServiceDaemon<I>::add_or_update_callout(int64_t pool_id,
+ uint64_t callout_id,
+ CalloutLevel callout_level,
+ const std::string& text) {
+ dout(20) << "pool_id=" << pool_id << ", "
+ << "callout_id=" << callout_id << ", "
+ << "callout_level=" << callout_level << ", "
+ << "text=" << text << dendl;
+
+ {
+ Mutex::Locker locker(m_lock);
+ auto pool_it = m_pools.find(pool_id);
+ if (pool_it == m_pools.end()) {
+ return CALLOUT_ID_NONE;
+ }
+
+ if (callout_id == CALLOUT_ID_NONE) {
+ callout_id = ++m_callout_id;
+ }
+ pool_it->second.callouts[callout_id] = {callout_level, text};
+ }
+
+ schedule_update_status();
+ return callout_id;
+}
+
+template <typename I>
+void ServiceDaemon<I>::remove_callout(int64_t pool_id, uint64_t callout_id) {
+ dout(20) << "pool_id=" << pool_id << ", "
+ << "callout_id=" << callout_id << dendl;
+
+ {
+ Mutex::Locker locker(m_lock);
+ auto pool_it = m_pools.find(pool_id);
+ if (pool_it == m_pools.end()) {
+ return;
+ }
+ pool_it->second.callouts.erase(callout_id);
+ }
+
+ schedule_update_status();
+}
+
+template <typename I>
+void ServiceDaemon<I>::add_or_update_attribute(int64_t pool_id,
+ const std::string& key,
+ const AttributeValue& value) {
+ dout(20) << "pool_id=" << pool_id << ", "
+ << "key=" << key << ", "
+ << "value=" << value << dendl;
+
+ {
+ Mutex::Locker locker(m_lock);
+ auto pool_it = m_pools.find(pool_id);
+ if (pool_it == m_pools.end()) {
+ return;
+ }
+ pool_it->second.attributes[key] = value;
+ }
+
+ schedule_update_status();
+}
+
+template <typename I>
+void ServiceDaemon<I>::remove_attribute(int64_t pool_id,
+ const std::string& key) {
+ dout(20) << "pool_id=" << pool_id << ", "
+ << "key=" << key << dendl;
+
+ {
+ Mutex::Locker locker(m_lock);
+ auto pool_it = m_pools.find(pool_id);
+ if (pool_it == m_pools.end()) {
+ return;
+ }
+ pool_it->second.attributes.erase(key);
+ }
+
+ schedule_update_status();
+}
+
+template <typename I>
+void ServiceDaemon<I>::schedule_update_status() {
+ Mutex::Locker timer_locker(m_threads->timer_lock);
+ if (m_timer_ctx != nullptr) {
+ return;
+ }
+
+ m_timer_ctx = new FunctionContext([this](int) {
+ m_timer_ctx = nullptr;
+ update_status();
+ });
+ m_threads->timer->add_event_after(1, m_timer_ctx);
+}
+
+template <typename I>
+void ServiceDaemon<I>::update_status() {
+ dout(20) << dendl;
+ ceph_assert(m_threads->timer_lock.is_locked());
+
+ ceph::JSONFormatter f;
+ {
+ Mutex::Locker locker(m_lock);
+ f.open_object_section("pools");
+ for (auto& pool_pair : m_pools) {
+ f.open_object_section(stringify(pool_pair.first).c_str());
+ f.dump_string("name", pool_pair.second.name);
+ f.open_object_section("callouts");
+ for (auto& callout : pool_pair.second.callouts) {
+ f.open_object_section(stringify(callout.first).c_str());
+ f.dump_string("level", stringify(callout.second.level).c_str());
+ f.dump_string("text", callout.second.text.c_str());
+ f.close_section();
+ }
+ f.close_section(); // callouts
+
+ for (auto& attribute : pool_pair.second.attributes) {
+ AttributeDumpVisitor attribute_dump_visitor(&f, attribute.first);
+ boost::apply_visitor(attribute_dump_visitor, attribute.second);
+ }
+ f.close_section(); // pool
+ }
+ f.close_section(); // pools
+ }
+
+ std::stringstream ss;
+ f.flush(ss);
+
+ int r = m_rados->service_daemon_update_status({{"json", ss.str()}});
+ if (r < 0) {
+ derr << "failed to update service daemon status: " << cpp_strerror(r)
+ << dendl;
+ }
+}
+
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::ServiceDaemon<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/ServiceDaemon.h b/src/tools/rbd_mirror/ServiceDaemon.h
new file mode 100644
index 00000000..1de7e20b
--- /dev/null
+++ b/src/tools/rbd_mirror/ServiceDaemon.h
@@ -0,0 +1,86 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_SERVICE_DAEMON_H
+#define CEPH_RBD_MIRROR_SERVICE_DAEMON_H
+
+#include "common/Mutex.h"
+#include "tools/rbd_mirror/Types.h"
+#include "tools/rbd_mirror/service_daemon/Types.h"
+#include <map>
+#include <string>
+
+struct CephContext;
+struct Context;
+namespace librbd { struct ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+
+template <typename> struct Threads;
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class ServiceDaemon {
+public:
+ ServiceDaemon(CephContext *cct, RadosRef rados, Threads<ImageCtxT>* threads);
+ ~ServiceDaemon();
+
+ int init();
+
+ void add_pool(int64_t pool_id, const std::string& pool_name);
+ void remove_pool(int64_t pool_id);
+
+ uint64_t add_or_update_callout(int64_t pool_id, uint64_t callout_id,
+ service_daemon::CalloutLevel callout_level,
+ const std::string& text);
+ void remove_callout(int64_t pool_id, uint64_t callout_id);
+
+ void add_or_update_attribute(int64_t pool_id, const std::string& key,
+ const service_daemon::AttributeValue& value);
+ void remove_attribute(int64_t pool_id, const std::string& key);
+
+private:
+ struct Callout {
+ service_daemon::CalloutLevel level;
+ std::string text;
+
+ Callout() : level(service_daemon::CALLOUT_LEVEL_INFO) {
+ }
+ Callout(service_daemon::CalloutLevel level, const std::string& text)
+ : level(level), text(text) {
+ }
+ };
+ typedef std::map<uint64_t, Callout> Callouts;
+ typedef std::map<std::string, service_daemon::AttributeValue> Attributes;
+
+ struct Pool {
+ std::string name;
+ Callouts callouts;
+ Attributes attributes;
+
+ Pool(const std::string& name) : name(name) {
+ }
+ };
+
+ typedef std::map<int64_t, Pool> Pools;
+
+ CephContext *m_cct;
+ RadosRef m_rados;
+ Threads<ImageCtxT>* m_threads;
+
+ Mutex m_lock;
+ Pools m_pools;
+ uint64_t m_callout_id = service_daemon::CALLOUT_ID_NONE;
+
+ Context* m_timer_ctx = nullptr;
+
+ void schedule_update_status();
+ void update_status();
+};
+
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::ServiceDaemon<librbd::ImageCtx>;
+
+#endif // CEPH_RBD_MIRROR_SERVICE_DAEMON_H
diff --git a/src/tools/rbd_mirror/Threads.cc b/src/tools/rbd_mirror/Threads.cc
new file mode 100644
index 00000000..ca0a8b0f
--- /dev/null
+++ b/src/tools/rbd_mirror/Threads.cc
@@ -0,0 +1,45 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd_mirror/Threads.h"
+#include "common/Timer.h"
+#include "common/WorkQueue.h"
+#include "librbd/ImageCtx.h"
+
+namespace rbd {
+namespace mirror {
+
+template <typename I>
+Threads<I>::Threads(CephContext *cct) : timer_lock("Threads::timer_lock") {
+ thread_pool = new ThreadPool(cct, "Journaler::thread_pool", "tp_journal",
+ cct->_conf.get_val<uint64_t>("rbd_op_threads"),
+ "rbd_op_threads");
+ thread_pool->start();
+
+ work_queue = new ContextWQ("Journaler::work_queue",
+ cct->_conf.get_val<uint64_t>("rbd_op_thread_timeout"),
+ thread_pool);
+
+ timer = new SafeTimer(cct, timer_lock, true);
+ timer->init();
+}
+
+template <typename I>
+Threads<I>::~Threads() {
+ {
+ Mutex::Locker timer_locker(timer_lock);
+ timer->shutdown();
+ }
+ delete timer;
+
+ work_queue->drain();
+ delete work_queue;
+
+ thread_pool->stop();
+ delete thread_pool;
+}
+
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::Threads<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/Threads.h b/src/tools/rbd_mirror/Threads.h
new file mode 100644
index 00000000..f52e8837
--- /dev/null
+++ b/src/tools/rbd_mirror/Threads.h
@@ -0,0 +1,39 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_THREADS_H
+#define CEPH_RBD_MIRROR_THREADS_H
+
+#include "common/Mutex.h"
+
+class CephContext;
+class ContextWQ;
+class SafeTimer;
+class ThreadPool;
+
+namespace librbd { struct ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+struct Threads {
+ ThreadPool *thread_pool = nullptr;
+ ContextWQ *work_queue = nullptr;
+
+ SafeTimer *timer = nullptr;
+ Mutex timer_lock;
+
+ explicit Threads(CephContext *cct);
+ Threads(const Threads&) = delete;
+ Threads& operator=(const Threads&) = delete;
+
+ ~Threads();
+};
+
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::Threads<librbd::ImageCtx>;
+
+#endif // CEPH_RBD_MIRROR_THREADS_H
diff --git a/src/tools/rbd_mirror/Types.cc b/src/tools/rbd_mirror/Types.cc
new file mode 100644
index 00000000..74fe318e
--- /dev/null
+++ b/src/tools/rbd_mirror/Types.cc
@@ -0,0 +1,21 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd_mirror/Types.h"
+
+namespace rbd {
+namespace mirror {
+
+std::ostream &operator<<(std::ostream &os, const ImageId &image_id) {
+ return os << "global id=" << image_id.global_id << ", "
+ << "id=" << image_id.id;
+}
+
+std::ostream& operator<<(std::ostream& lhs, const PeerSpec &peer) {
+ return lhs << "uuid: " << peer.uuid
+ << " cluster: " << peer.cluster_name
+ << " client: " << peer.client_name;
+}
+
+} // namespace mirror
+} // namespace rbd
diff --git a/src/tools/rbd_mirror/Types.h b/src/tools/rbd_mirror/Types.h
new file mode 100644
index 00000000..ed3b9d8a
--- /dev/null
+++ b/src/tools/rbd_mirror/Types.h
@@ -0,0 +1,123 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_TYPES_H
+#define CEPH_RBD_MIRROR_TYPES_H
+
+#include <iostream>
+#include <memory>
+#include <set>
+#include <string>
+#include <vector>
+
+#include "include/rados/librados.hpp"
+#include "include/rbd/librbd.hpp"
+
+namespace rbd {
+namespace mirror {
+
+// Performance counters
+enum {
+ l_rbd_mirror_first = 27000,
+ l_rbd_mirror_replay,
+ l_rbd_mirror_replay_bytes,
+ l_rbd_mirror_replay_latency,
+ l_rbd_mirror_last,
+};
+
+typedef std::shared_ptr<librados::Rados> RadosRef;
+typedef std::shared_ptr<librados::IoCtx> IoCtxRef;
+typedef std::shared_ptr<librbd::Image> ImageRef;
+
+struct ImageId {
+ std::string global_id;
+ std::string id;
+
+ explicit ImageId(const std::string &global_id) : global_id(global_id) {
+ }
+ ImageId(const std::string &global_id, const std::string &id)
+ : global_id(global_id), id(id) {
+ }
+
+ inline bool operator==(const ImageId &rhs) const {
+ return (global_id == rhs.global_id && id == rhs.id);
+ }
+ inline bool operator<(const ImageId &rhs) const {
+ return global_id < rhs.global_id;
+ }
+};
+
+std::ostream &operator<<(std::ostream &, const ImageId &image_id);
+
+typedef std::set<ImageId> ImageIds;
+
+struct Peer {
+ std::string peer_uuid;
+ librados::IoCtx io_ctx;
+
+ Peer() {
+ }
+ Peer(const std::string &peer_uuid) : peer_uuid(peer_uuid) {
+ }
+ Peer(const std::string &peer_uuid, librados::IoCtx& io_ctx)
+ : peer_uuid(peer_uuid), io_ctx(io_ctx) {
+ }
+
+ inline bool operator<(const Peer &rhs) const {
+ return peer_uuid < rhs.peer_uuid;
+ }
+};
+
+typedef std::set<Peer> Peers;
+
+struct PeerSpec {
+ PeerSpec() = default;
+ PeerSpec(const std::string &uuid, const std::string &cluster_name,
+ const std::string &client_name)
+ : uuid(uuid), cluster_name(cluster_name), client_name(client_name)
+ {
+ }
+ PeerSpec(const librbd::mirror_peer_t &peer) :
+ uuid(peer.uuid),
+ cluster_name(peer.cluster_name),
+ client_name(peer.client_name)
+ {
+ }
+
+ std::string uuid;
+ std::string cluster_name;
+ std::string client_name;
+
+ /// optional config properties
+ std::string mon_host;
+ std::string key;
+
+ bool operator==(const PeerSpec& rhs) const {
+ return (uuid == rhs.uuid &&
+ cluster_name == rhs.cluster_name &&
+ client_name == rhs.client_name &&
+ mon_host == rhs.mon_host &&
+ key == rhs.key);
+ }
+ bool operator<(const PeerSpec& rhs) const {
+ if (uuid != rhs.uuid) {
+ return uuid < rhs.uuid;
+ } else if (cluster_name != rhs.cluster_name) {
+ return cluster_name < rhs.cluster_name;
+ } else if (client_name != rhs.client_name) {
+ return client_name < rhs.client_name;
+ } else if (mon_host < rhs.mon_host) {
+ return mon_host < rhs.mon_host;
+ } else {
+ return key < rhs.key;
+ }
+ }
+};
+
+std::ostream& operator<<(std::ostream& lhs, const PeerSpec &peer);
+
+} // namespace mirror
+} // namespace rbd
+
+
+#endif // CEPH_RBD_MIRROR_TYPES_H
diff --git a/src/tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.cc b/src/tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.cc
new file mode 100644
index 00000000..a0e9fd90
--- /dev/null
+++ b/src/tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.cc
@@ -0,0 +1,290 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "librbd/ExclusiveLock.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/Operations.h"
+#include "librbd/Utils.h"
+#include "librbd/journal/Policy.h"
+#include "tools/rbd_mirror/image_deleter/Types.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_deleter::SnapshotPurgeRequest: " \
+ << this << " " << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+namespace image_deleter {
+
+using librbd::util::create_context_callback;
+
+template <typename I>
+void SnapshotPurgeRequest<I>::send() {
+ open_image();
+}
+
+template <typename I>
+void SnapshotPurgeRequest<I>::open_image() {
+ dout(10) << dendl;
+ m_image_ctx = I::create("", m_image_id, nullptr, m_io_ctx, false);
+
+ {
+ RWLock::WLocker snap_locker(m_image_ctx->snap_lock);
+ m_image_ctx->set_journal_policy(new JournalPolicy());
+ }
+
+ Context *ctx = create_context_callback<
+ SnapshotPurgeRequest<I>, &SnapshotPurgeRequest<I>::handle_open_image>(
+ this);
+ m_image_ctx->state->open(librbd::OPEN_FLAG_SKIP_OPEN_PARENT, ctx);
+}
+
+template <typename I>
+void SnapshotPurgeRequest<I>::handle_open_image(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to open image '" << m_image_id << "': " << cpp_strerror(r)
+ << dendl;
+ m_image_ctx->destroy();
+ m_image_ctx = nullptr;
+
+ finish(r);
+ return;
+ }
+
+ acquire_lock();
+}
+
+template <typename I>
+void SnapshotPurgeRequest<I>::acquire_lock() {
+ dout(10) << dendl;
+
+ m_image_ctx->owner_lock.get_read();
+ if (m_image_ctx->exclusive_lock == nullptr) {
+ m_image_ctx->owner_lock.put_read();
+
+ derr << "exclusive lock not enabled" << dendl;
+ m_ret_val = -EINVAL;
+ close_image();
+ return;
+ }
+
+ m_image_ctx->exclusive_lock->acquire_lock(create_context_callback<
+ SnapshotPurgeRequest<I>, &SnapshotPurgeRequest<I>::handle_acquire_lock>(
+ this));
+ m_image_ctx->owner_lock.put_read();
+}
+
+template <typename I>
+void SnapshotPurgeRequest<I>::handle_acquire_lock(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to acquire exclusive lock: " << cpp_strerror(r) << dendl;
+ m_ret_val = r;
+ close_image();
+ return;
+ }
+
+ {
+ RWLock::RLocker snap_locker(m_image_ctx->snap_lock);
+ m_snaps = m_image_ctx->snaps;
+ }
+ snap_unprotect();
+}
+
+template <typename I>
+void SnapshotPurgeRequest<I>::snap_unprotect() {
+ if (m_snaps.empty()) {
+ close_image();
+ return;
+ }
+
+ librados::snap_t snap_id = m_snaps.back();
+ m_image_ctx->snap_lock.get_read();
+ int r = m_image_ctx->get_snap_namespace(snap_id, &m_snap_namespace);
+ if (r < 0) {
+ m_image_ctx->snap_lock.put_read();
+
+ derr << "failed to get snap namespace: " << cpp_strerror(r) << dendl;
+ m_ret_val = r;
+ close_image();
+ return;
+ }
+
+ r = m_image_ctx->get_snap_name(snap_id, &m_snap_name);
+ if (r < 0) {
+ m_image_ctx->snap_lock.put_read();
+
+ derr << "failed to get snap name: " << cpp_strerror(r) << dendl;
+ m_ret_val = r;
+ close_image();
+ return;
+ }
+
+ bool is_protected;
+ r = m_image_ctx->is_snap_protected(snap_id, &is_protected);
+ if (r < 0) {
+ m_image_ctx->snap_lock.put_read();
+
+ derr << "failed to get snap protection status: " << cpp_strerror(r)
+ << dendl;
+ m_ret_val = r;
+ close_image();
+ return;
+ }
+ m_image_ctx->snap_lock.put_read();
+
+ if (!is_protected) {
+ snap_remove();
+ return;
+ }
+
+ dout(10) << "snap_id=" << snap_id << ", "
+ << "snap_namespace=" << m_snap_namespace << ", "
+ << "snap_name=" << m_snap_name << dendl;
+
+ auto finish_op_ctx = start_lock_op(&r);
+ if (finish_op_ctx == nullptr) {
+ derr << "lost exclusive lock" << dendl;
+ m_ret_val = r;
+ close_image();
+ return;
+ }
+
+ auto ctx = new FunctionContext([this, finish_op_ctx](int r) {
+ handle_snap_unprotect(r);
+ finish_op_ctx->complete(0);
+ });
+ RWLock::RLocker owner_locker(m_image_ctx->owner_lock);
+ m_image_ctx->operations->execute_snap_unprotect(
+ m_snap_namespace, m_snap_name.c_str(), ctx);
+}
+
+template <typename I>
+void SnapshotPurgeRequest<I>::handle_snap_unprotect(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r == -EBUSY) {
+ dout(10) << "snapshot in-use" << dendl;
+ m_ret_val = r;
+ close_image();
+ return;
+ } else if (r < 0) {
+ derr << "failed to unprotect snapshot: " << cpp_strerror(r) << dendl;
+ m_ret_val = r;
+ close_image();
+ return;
+ }
+
+ {
+ // avoid the need to refresh to delete the newly unprotected snapshot
+ RWLock::RLocker snap_locker(m_image_ctx->snap_lock);
+ librados::snap_t snap_id = m_snaps.back();
+ auto snap_info_it = m_image_ctx->snap_info.find(snap_id);
+ if (snap_info_it != m_image_ctx->snap_info.end()) {
+ snap_info_it->second.protection_status =
+ RBD_PROTECTION_STATUS_UNPROTECTED;
+ }
+ }
+
+ snap_remove();
+}
+
+template <typename I>
+void SnapshotPurgeRequest<I>::snap_remove() {
+ librados::snap_t snap_id = m_snaps.back();
+ dout(10) << "snap_id=" << snap_id << ", "
+ << "snap_namespace=" << m_snap_namespace << ", "
+ << "snap_name=" << m_snap_name << dendl;
+
+ int r;
+ auto finish_op_ctx = start_lock_op(&r);
+ if (finish_op_ctx == nullptr) {
+ derr << "lost exclusive lock" << dendl;
+ m_ret_val = r;
+ close_image();
+ return;
+ }
+
+ auto ctx = new FunctionContext([this, finish_op_ctx](int r) {
+ handle_snap_remove(r);
+ finish_op_ctx->complete(0);
+ });
+ RWLock::RLocker owner_locker(m_image_ctx->owner_lock);
+ m_image_ctx->operations->execute_snap_remove(
+ m_snap_namespace, m_snap_name.c_str(), ctx);
+}
+
+template <typename I>
+void SnapshotPurgeRequest<I>::handle_snap_remove(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r == -EBUSY) {
+ dout(10) << "snapshot in-use" << dendl;
+ m_ret_val = r;
+ close_image();
+ return;
+ } else if (r < 0) {
+ derr << "failed to remove snapshot: " << cpp_strerror(r) << dendl;
+ m_ret_val = r;
+ close_image();
+ return;
+ }
+
+ m_snaps.pop_back();
+ snap_unprotect();
+}
+
+template <typename I>
+void SnapshotPurgeRequest<I>::close_image() {
+ dout(10) << dendl;
+
+ m_image_ctx->state->close(create_context_callback<
+ SnapshotPurgeRequest<I>,
+ &SnapshotPurgeRequest<I>::handle_close_image>(this));
+}
+
+template <typename I>
+void SnapshotPurgeRequest<I>::handle_close_image(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ m_image_ctx->destroy();
+ m_image_ctx = nullptr;
+
+ if (r < 0) {
+ derr << "failed to close: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+ finish(0);
+}
+
+template <typename I>
+void SnapshotPurgeRequest<I>::finish(int r) {
+ if (m_ret_val < 0) {
+ r = m_ret_val;
+ }
+
+ m_on_finish->complete(r);
+ delete this;
+}
+
+template <typename I>
+Context *SnapshotPurgeRequest<I>::start_lock_op(int* r) {
+ RWLock::RLocker owner_locker(m_image_ctx->owner_lock);
+ return m_image_ctx->exclusive_lock->start_op(r);
+}
+
+} // namespace image_deleter
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_deleter::SnapshotPurgeRequest<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.h b/src/tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.h
new file mode 100644
index 00000000..b8b635fe
--- /dev/null
+++ b/src/tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.h
@@ -0,0 +1,104 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_DELETER_SNAPSHOT_PURGE_REQUEST_H
+#define CEPH_RBD_MIRROR_IMAGE_DELETER_SNAPSHOT_PURGE_REQUEST_H
+
+#include "include/rados/librados.hpp"
+#include "cls/rbd/cls_rbd_types.h"
+#include <string>
+#include <vector>
+
+class Context;
+namespace librbd { struct ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+namespace image_deleter {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class SnapshotPurgeRequest {
+public:
+ static SnapshotPurgeRequest* create(librados::IoCtx &io_ctx,
+ const std::string &image_id,
+ Context *on_finish) {
+ return new SnapshotPurgeRequest(io_ctx, image_id, on_finish);
+ }
+
+ SnapshotPurgeRequest(librados::IoCtx &io_ctx, const std::string &image_id,
+ Context *on_finish)
+ : m_io_ctx(io_ctx), m_image_id(image_id), m_on_finish(on_finish) {
+ }
+
+ void send();
+
+private:
+ /*
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * OPEN_IMAGE
+ * |
+ * v
+ * ACQUIRE_LOCK
+ * |
+ * | (repeat for each snapshot)
+ * |/------------------------\
+ * | |
+ * v (skip if not needed) |
+ * SNAP_UNPROTECT |
+ * | |
+ * v (skip if not needed) |
+ * SNAP_REMOVE -----------------/
+ * |
+ * v
+ * CLOSE_IMAGE
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+
+ librados::IoCtx &m_io_ctx;
+ std::string m_image_id;
+ Context *m_on_finish;
+
+ ImageCtxT *m_image_ctx = nullptr;
+ int m_ret_val = 0;
+
+ std::vector<librados::snap_t> m_snaps;
+ cls::rbd::SnapshotNamespace m_snap_namespace;
+ std::string m_snap_name;
+
+ void open_image();
+ void handle_open_image(int r);
+
+ void acquire_lock();
+ void handle_acquire_lock(int r);
+
+ void snap_unprotect();
+ void handle_snap_unprotect(int r);
+
+ void snap_remove();
+ void handle_snap_remove(int r);
+
+ void close_image();
+ void handle_close_image(int r);
+
+ void finish(int r);
+
+ Context *start_lock_op(int* r);
+
+};
+
+} // namespace image_deleter
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_deleter::SnapshotPurgeRequest<librbd::ImageCtx>;
+
+#endif // CEPH_RBD_MIRROR_IMAGE_DELETER_SNAPSHOT_PURGE_REQUEST_H
+
diff --git a/src/tools/rbd_mirror/image_deleter/TrashMoveRequest.cc b/src/tools/rbd_mirror/image_deleter/TrashMoveRequest.cc
new file mode 100644
index 00000000..92db22ca
--- /dev/null
+++ b/src/tools/rbd_mirror/image_deleter/TrashMoveRequest.cc
@@ -0,0 +1,384 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd_mirror/image_deleter/TrashMoveRequest.h"
+#include "include/rbd_types.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "librbd/ExclusiveLock.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/Journal.h"
+#include "librbd/TrashWatcher.h"
+#include "librbd/Utils.h"
+#include "librbd/journal/ResetRequest.h"
+#include "librbd/trash/MoveRequest.h"
+#include "tools/rbd_mirror/image_deleter/Types.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_deleter::TrashMoveRequest: " \
+ << this << " " << __func__ << ": "
+namespace rbd {
+namespace mirror {
+namespace image_deleter {
+
+using librbd::util::create_context_callback;
+using librbd::util::create_rados_callback;
+
+template <typename I>
+void TrashMoveRequest<I>::send() {
+ get_mirror_image_id();
+}
+
+template <typename I>
+void TrashMoveRequest<I>::get_mirror_image_id() {
+ dout(10) << dendl;
+
+ librados::ObjectReadOperation op;
+ librbd::cls_client::mirror_image_get_image_id_start(&op, m_global_image_id);
+
+ auto aio_comp = create_rados_callback<
+ TrashMoveRequest<I>,
+ &TrashMoveRequest<I>::handle_get_mirror_image_id>(this);
+ m_out_bl.clear();
+ int r = m_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op, &m_out_bl);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void TrashMoveRequest<I>::handle_get_mirror_image_id(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r == 0) {
+ auto bl_it = m_out_bl.cbegin();
+ r = librbd::cls_client::mirror_image_get_image_id_finish(&bl_it,
+ &m_image_id);
+ }
+ if (r == -ENOENT) {
+ dout(10) << "image " << m_global_image_id << " is not mirrored" << dendl;
+ finish(r);
+ return;
+ } else if (r < 0) {
+ derr << "error retrieving local id for image " << m_global_image_id << ": "
+ << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ get_tag_owner();
+}
+
+template <typename I>
+void TrashMoveRequest<I>::get_tag_owner() {
+ dout(10) << dendl;
+
+ auto ctx = create_context_callback<
+ TrashMoveRequest<I>, &TrashMoveRequest<I>::handle_get_tag_owner>(this);
+ librbd::Journal<I>::get_tag_owner(m_io_ctx, m_image_id, &m_mirror_uuid,
+ m_op_work_queue, ctx);
+}
+
+template <typename I>
+void TrashMoveRequest<I>::handle_get_tag_owner(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0 && r != -ENOENT) {
+ derr << "error retrieving image primary info for image "
+ << m_global_image_id << ": " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ } else if (r != -ENOENT) {
+ if (m_mirror_uuid == librbd::Journal<>::LOCAL_MIRROR_UUID) {
+ dout(10) << "image " << m_global_image_id << " is local primary" << dendl;
+ finish(-EPERM);
+ return;
+ } else if (m_mirror_uuid == librbd::Journal<>::ORPHAN_MIRROR_UUID &&
+ !m_resync) {
+ dout(10) << "image " << m_global_image_id << " is orphaned" << dendl;
+ finish(-EPERM);
+ return;
+ }
+ }
+
+ disable_mirror_image();
+}
+
+template <typename I>
+void TrashMoveRequest<I>::disable_mirror_image() {
+ dout(10) << dendl;
+
+ cls::rbd::MirrorImage mirror_image;
+ mirror_image.global_image_id = m_global_image_id;
+ mirror_image.state = cls::rbd::MIRROR_IMAGE_STATE_DISABLING;
+
+ librados::ObjectWriteOperation op;
+ librbd::cls_client::mirror_image_set(&op, m_image_id, mirror_image);
+
+ auto aio_comp = create_rados_callback<
+ TrashMoveRequest<I>,
+ &TrashMoveRequest<I>::handle_disable_mirror_image>(this);
+ int r = m_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void TrashMoveRequest<I>::handle_disable_mirror_image(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r == -ENOENT) {
+ dout(10) << "local image is not mirrored, aborting deletion." << dendl;
+ finish(r);
+ return;
+ } else if (r == -EEXIST || r == -EINVAL) {
+ derr << "cannot disable mirroring for image " << m_global_image_id
+ << ": global_image_id has changed/reused: "
+ << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ } else if (r < 0) {
+ derr << "cannot disable mirroring for image " << m_global_image_id
+ << ": " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ reset_journal();
+}
+
+template <typename I>
+void TrashMoveRequest<I>::reset_journal() {
+ dout(10) << dendl;
+
+ // ensure that if the image is recovered any peers will split-brain
+ auto ctx = create_context_callback<
+ TrashMoveRequest<I>, &TrashMoveRequest<I>::handle_reset_journal>(this);
+ auto req = librbd::journal::ResetRequest<I>::create(
+ m_io_ctx, m_image_id, librbd::Journal<>::IMAGE_CLIENT_ID,
+ librbd::Journal<>::LOCAL_MIRROR_UUID, m_op_work_queue, ctx);
+ req->send();
+}
+
+template <typename I>
+void TrashMoveRequest<I>::handle_reset_journal(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0 && r != -ENOENT) {
+ derr << "failed to reset journal: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ open_image();
+}
+
+template <typename I>
+void TrashMoveRequest<I>::open_image() {
+ dout(10) << dendl;
+
+ m_image_ctx = I::create("", m_image_id, nullptr, m_io_ctx, false);
+
+ {
+ // don't attempt to open the journal
+ RWLock::WLocker snap_locker(m_image_ctx->snap_lock);
+ m_image_ctx->set_journal_policy(new JournalPolicy());
+ }
+
+ Context *ctx = create_context_callback<
+ TrashMoveRequest<I>, &TrashMoveRequest<I>::handle_open_image>(this);
+ m_image_ctx->state->open(librbd::OPEN_FLAG_SKIP_OPEN_PARENT, ctx);
+}
+
+template <typename I>
+void TrashMoveRequest<I>::handle_open_image(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to open image: " << cpp_strerror(r) << dendl;
+ m_image_ctx->destroy();
+ m_image_ctx = nullptr;
+ finish(r);
+ return;
+ }
+
+ if (m_image_ctx->old_format) {
+ derr << "cannot move v1 image to trash" << dendl;
+ m_ret_val = -EINVAL;
+ close_image();
+ return;
+ }
+
+ acquire_lock();
+}
+
+template <typename I>
+void TrashMoveRequest<I>::acquire_lock() {
+ m_image_ctx->owner_lock.get_read();
+ if (m_image_ctx->exclusive_lock == nullptr) {
+ derr << "exclusive lock feature not enabled" << dendl;
+ m_image_ctx->owner_lock.put_read();
+ m_ret_val = -EINVAL;
+ close_image();
+ return;
+ }
+
+ dout(10) << dendl;
+
+ Context *ctx = create_context_callback<
+ TrashMoveRequest<I>, &TrashMoveRequest<I>::handle_acquire_lock>(this);
+ m_image_ctx->exclusive_lock->block_requests(0);
+ m_image_ctx->exclusive_lock->acquire_lock(ctx);
+ m_image_ctx->owner_lock.put_read();
+}
+
+template <typename I>
+void TrashMoveRequest<I>::handle_acquire_lock(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to acquire exclusive lock: " << cpp_strerror(r) << dendl;
+ m_ret_val = r;
+ close_image();
+ return;
+ }
+
+ trash_move();
+}
+
+template <typename I>
+void TrashMoveRequest<I>::trash_move() {
+ dout(10) << dendl;
+
+ utime_t delete_time{ceph_clock_now()};
+ utime_t deferment_end_time{delete_time};
+ deferment_end_time +=
+ m_image_ctx->config.template get_val<uint64_t>("rbd_mirroring_delete_delay");
+
+ m_trash_image_spec = {
+ cls::rbd::TRASH_IMAGE_SOURCE_MIRRORING, m_image_ctx->name, delete_time,
+ deferment_end_time};
+
+ Context *ctx = create_context_callback<
+ TrashMoveRequest<I>, &TrashMoveRequest<I>::handle_trash_move>(this);
+ auto req = librbd::trash::MoveRequest<I>::create(
+ m_io_ctx, m_image_id, m_trash_image_spec, ctx);
+ req->send();
+}
+
+template <typename I>
+void TrashMoveRequest<I>::handle_trash_move(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to move image to trash: " << cpp_strerror(r) << dendl;
+ m_ret_val = r;
+ close_image();
+ return;
+ }
+
+ m_moved_to_trash = true;
+ remove_mirror_image();
+}
+
+template <typename I>
+void TrashMoveRequest<I>::remove_mirror_image() {
+ dout(10) << dendl;
+
+ librados::ObjectWriteOperation op;
+ librbd::cls_client::mirror_image_remove(&op, m_image_id);
+
+ auto aio_comp = create_rados_callback<
+ TrashMoveRequest<I>,
+ &TrashMoveRequest<I>::handle_remove_mirror_image>(this);
+ int r = m_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void TrashMoveRequest<I>::handle_remove_mirror_image(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r == -ENOENT) {
+ dout(10) << "local image is not mirrored" << dendl;
+ } else if (r < 0) {
+ derr << "failed to remove mirror image state for " << m_global_image_id
+ << ": " << cpp_strerror(r) << dendl;
+ m_ret_val = r;
+ }
+
+ close_image();
+}
+
+template <typename I>
+void TrashMoveRequest<I>::close_image() {
+ dout(10) << dendl;
+
+ Context *ctx = create_context_callback<
+ TrashMoveRequest<I>, &TrashMoveRequest<I>::handle_close_image>(this);
+ m_image_ctx->state->close(ctx);
+}
+
+template <typename I>
+void TrashMoveRequest<I>::handle_close_image(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ m_image_ctx->destroy();
+ m_image_ctx = nullptr;
+
+ if (r < 0) {
+ derr << "failed to close image: " << cpp_strerror(r) << dendl;
+ }
+
+ // don't send notification if we failed
+ if (!m_moved_to_trash) {
+ finish(0);
+ return;
+ }
+
+ notify_trash_add();
+}
+
+template <typename I>
+void TrashMoveRequest<I>::notify_trash_add() {
+ dout(10) << dendl;
+
+ Context *ctx = create_context_callback<
+ TrashMoveRequest<I>, &TrashMoveRequest<I>::handle_notify_trash_add>(this);
+ librbd::TrashWatcher<I>::notify_image_added(m_io_ctx, m_image_id,
+ m_trash_image_spec, ctx);
+}
+
+template <typename I>
+void TrashMoveRequest<I>::handle_notify_trash_add(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to notify trash watchers: " << cpp_strerror(r) << dendl;
+ }
+
+ finish(0);
+}
+
+template <typename I>
+void TrashMoveRequest<I>::finish(int r) {
+ if (m_ret_val < 0) {
+ r = m_ret_val;
+ }
+
+ dout(10) << "r=" << r << dendl;
+
+ m_on_finish->complete(r);
+ delete this;
+}
+
+} // namespace image_deleter
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_deleter::TrashMoveRequest<librbd::ImageCtx>;
+
diff --git a/src/tools/rbd_mirror/image_deleter/TrashMoveRequest.h b/src/tools/rbd_mirror/image_deleter/TrashMoveRequest.h
new file mode 100644
index 00000000..07b7432e
--- /dev/null
+++ b/src/tools/rbd_mirror/image_deleter/TrashMoveRequest.h
@@ -0,0 +1,136 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_DELETE_TRASH_MOVE_REQUEST_H
+#define CEPH_RBD_MIRROR_IMAGE_DELETE_TRASH_MOVE_REQUEST_H
+
+#include "include/buffer.h"
+#include "include/rados/librados.hpp"
+#include "cls/rbd/cls_rbd_types.h"
+#include <boost/optional.hpp>
+#include <string>
+
+struct Context;
+class ContextWQ;
+namespace librbd { struct ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+namespace image_deleter {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class TrashMoveRequest {
+public:
+ static TrashMoveRequest* create(librados::IoCtx& io_ctx,
+ const std::string& global_image_id,
+ bool resync, ContextWQ* op_work_queue,
+ Context* on_finish) {
+ return new TrashMoveRequest(io_ctx, global_image_id, resync, op_work_queue,
+ on_finish);
+ }
+
+ TrashMoveRequest(librados::IoCtx& io_ctx, const std::string& global_image_id,
+ bool resync, ContextWQ* op_work_queue, Context* on_finish)
+ : m_io_ctx(io_ctx), m_global_image_id(global_image_id), m_resync(resync),
+ m_op_work_queue(op_work_queue), m_on_finish(on_finish) {
+ }
+
+ void send();
+
+private:
+ /*
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * GET_MIRROR_IMAGE_ID
+ * |
+ * v
+ * GET_TAG_OWNER
+ * |
+ * v
+ * DISABLE_MIRROR_IMAGE
+ * |
+ * v
+ * RESET_JOURNAL
+ * |
+ * v
+ * OPEN_IMAGE
+ * |
+ * v
+ * ACQUIRE_LOCK
+ * |
+ * v
+ * TRASH_MOVE
+ * |
+ * v
+ * REMOVE_MIRROR_IMAGE
+ * |
+ * v
+ * CLOSE_IMAGE
+ * |
+ * v
+ * NOTIFY_TRASH_ADD
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+
+ librados::IoCtx &m_io_ctx;
+ std::string m_global_image_id;
+ bool m_resync;
+ ContextWQ *m_op_work_queue;
+ Context *m_on_finish;
+
+ ceph::bufferlist m_out_bl;
+ std::string m_image_id;
+ std::string m_mirror_uuid;
+ cls::rbd::TrashImageSpec m_trash_image_spec;
+ ImageCtxT *m_image_ctx = nullptr;;
+ int m_ret_val = 0;
+ bool m_moved_to_trash = false;
+
+ void get_mirror_image_id();
+ void handle_get_mirror_image_id(int r);
+
+ void get_tag_owner();
+ void handle_get_tag_owner(int r);
+
+ void disable_mirror_image();
+ void handle_disable_mirror_image(int r);
+
+ void reset_journal();
+ void handle_reset_journal(int r);
+
+ void open_image();
+ void handle_open_image(int r);
+
+ void acquire_lock();
+ void handle_acquire_lock(int r);
+
+ void trash_move();
+ void handle_trash_move(int r);
+
+ void remove_mirror_image();
+ void handle_remove_mirror_image(int r);
+
+ void close_image();
+ void handle_close_image(int r);
+
+ void notify_trash_add();
+ void handle_notify_trash_add(int r);
+
+ void finish(int r);
+
+};
+
+} // namespace image_deleter
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_deleter::TrashMoveRequest<librbd::ImageCtx>;
+
+#endif // CEPH_RBD_MIRROR_IMAGE_DELETE_TRASH_WATCHER_H
diff --git a/src/tools/rbd_mirror/image_deleter/TrashRemoveRequest.cc b/src/tools/rbd_mirror/image_deleter/TrashRemoveRequest.cc
new file mode 100644
index 00000000..e7c725dc
--- /dev/null
+++ b/src/tools/rbd_mirror/image_deleter/TrashRemoveRequest.cc
@@ -0,0 +1,265 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd_mirror/image_deleter/TrashRemoveRequest.h"
+#include "include/ceph_assert.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "common/WorkQueue.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/Journal.h"
+#include "librbd/TrashWatcher.h"
+#include "librbd/Utils.h"
+#include "librbd/trash/RemoveRequest.h"
+#include "tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_deleter::TrashRemoveRequest: " \
+ << this << " " << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+namespace image_deleter {
+
+using librbd::util::create_context_callback;
+using librbd::util::create_rados_callback;
+
+template <typename I>
+void TrashRemoveRequest<I>::send() {
+ *m_error_result = ERROR_RESULT_RETRY;
+
+ get_trash_image_spec();
+}
+
+template <typename I>
+void TrashRemoveRequest<I>::get_trash_image_spec() {
+ dout(10) << dendl;
+
+ librados::ObjectReadOperation op;
+ librbd::cls_client::trash_get_start(&op, m_image_id);
+
+ auto aio_comp = create_rados_callback<
+ TrashRemoveRequest<I>,
+ &TrashRemoveRequest<I>::handle_get_trash_image_spec>(this);
+ m_out_bl.clear();
+ int r = m_io_ctx.aio_operate(RBD_TRASH, aio_comp, &op, &m_out_bl);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void TrashRemoveRequest<I>::handle_get_trash_image_spec(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r == 0) {
+ auto bl_it = m_out_bl.cbegin();
+ r = librbd::cls_client::trash_get_finish(&bl_it, &m_trash_image_spec);
+ }
+
+ if (r == -ENOENT || (r >= 0 && m_trash_image_spec.source !=
+ cls::rbd::TRASH_IMAGE_SOURCE_MIRRORING)) {
+ dout(10) << "image id " << m_image_id << " not in mirroring trash" << dendl;
+ finish(0);
+ return;
+ } else if (r < 0) {
+ derr << "error getting image id " << m_image_id << " info from trash: "
+ << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ if (m_trash_image_spec.state != cls::rbd::TRASH_IMAGE_STATE_NORMAL &&
+ m_trash_image_spec.state != cls::rbd::TRASH_IMAGE_STATE_REMOVING) {
+ dout(10) << "image " << m_image_id << " is not in an expected trash state: "
+ << m_trash_image_spec.state << dendl;
+ *m_error_result = ERROR_RESULT_RETRY_IMMEDIATELY;
+ finish(-EBUSY);
+ return;
+ }
+
+ set_trash_state();
+}
+
+template <typename I>
+void TrashRemoveRequest<I>::set_trash_state() {
+ if (m_trash_image_spec.state == cls::rbd::TRASH_IMAGE_STATE_REMOVING) {
+ get_snap_context();
+ return;
+ }
+
+ dout(10) << dendl;
+
+ librados::ObjectWriteOperation op;
+ librbd::cls_client::trash_state_set(&op, m_image_id,
+ cls::rbd::TRASH_IMAGE_STATE_REMOVING,
+ cls::rbd::TRASH_IMAGE_STATE_NORMAL);
+
+ auto aio_comp = create_rados_callback<
+ TrashRemoveRequest<I>,
+ &TrashRemoveRequest<I>::handle_set_trash_state>(this);
+ int r = m_io_ctx.aio_operate(RBD_TRASH, aio_comp, &op);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void TrashRemoveRequest<I>::handle_set_trash_state(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r == -ENOENT) {
+ dout(10) << "image id " << m_image_id << " not in mirroring trash" << dendl;
+ finish(0);
+ return;
+ } else if (r < 0 && r != -EOPNOTSUPP) {
+ derr << "error setting trash image state for image id " << m_image_id
+ << ": " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ get_snap_context();
+}
+
+template <typename I>
+void TrashRemoveRequest<I>::get_snap_context() {
+ dout(10) << dendl;
+
+ librados::ObjectReadOperation op;
+ librbd::cls_client::get_snapcontext_start(&op);
+
+ std::string header_oid = librbd::util::header_name(m_image_id);
+
+ auto aio_comp = create_rados_callback<
+ TrashRemoveRequest<I>,
+ &TrashRemoveRequest<I>::handle_get_snap_context>(this);
+ m_out_bl.clear();
+ int r = m_io_ctx.aio_operate(header_oid, aio_comp, &op, &m_out_bl);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void TrashRemoveRequest<I>::handle_get_snap_context(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ ::SnapContext snapc;
+ if (r == 0) {
+ auto bl_it = m_out_bl.cbegin();
+ r = librbd::cls_client::get_snapcontext_finish(&bl_it, &snapc);
+ }
+ if (r < 0 && r != -ENOENT) {
+ derr << "error retrieving snapshot context for image "
+ << m_image_id << ": " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ m_has_snapshots = (!snapc.empty());
+ purge_snapshots();
+}
+
+template <typename I>
+void TrashRemoveRequest<I>::purge_snapshots() {
+ if (!m_has_snapshots) {
+ remove_image();
+ return;
+ }
+
+ dout(10) << dendl;
+ auto ctx = create_context_callback<
+ TrashRemoveRequest<I>,
+ &TrashRemoveRequest<I>::handle_purge_snapshots>(this);
+ auto req = SnapshotPurgeRequest<I>::create(m_io_ctx, m_image_id, ctx);
+ req->send();
+}
+
+template <typename I>
+void TrashRemoveRequest<I>::handle_purge_snapshots(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r == -EBUSY) {
+ dout(10) << "snapshots still in-use" << dendl;
+ *m_error_result = ERROR_RESULT_RETRY_IMMEDIATELY;
+ finish(r);
+ return;
+ } else if (r < 0) {
+ derr << "failed to purge image snapshots: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ remove_image();
+}
+
+template <typename I>
+void TrashRemoveRequest<I>::remove_image() {
+ dout(10) << dendl;
+
+ auto ctx = create_context_callback<
+ TrashRemoveRequest<I>,
+ &TrashRemoveRequest<I>::handle_remove_image>(this);
+ auto req = librbd::trash::RemoveRequest<I>::create(
+ m_io_ctx, m_image_id, m_op_work_queue, true, m_progress_ctx,
+ ctx);
+ req->send();
+}
+
+template <typename I>
+void TrashRemoveRequest<I>::handle_remove_image(int r) {
+ dout(10) << "r=" << r << dendl;
+ if (r == -ENOTEMPTY) {
+ // image must have clone v2 snapshot still associated to child
+ dout(10) << "snapshots still in-use" << dendl;
+ *m_error_result = ERROR_RESULT_RETRY_IMMEDIATELY;
+ finish(-EBUSY);
+ return;
+ }
+
+ if (r < 0 && r != -ENOENT) {
+ derr << "error removing image " << m_image_id << " "
+ << "(" << m_image_id << ") from local pool: "
+ << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ notify_trash_removed();
+}
+
+template <typename I>
+void TrashRemoveRequest<I>::notify_trash_removed() {
+ dout(10) << dendl;
+
+ Context *ctx = create_context_callback<
+ TrashRemoveRequest<I>,
+ &TrashRemoveRequest<I>::handle_notify_trash_removed>(this);
+ librbd::TrashWatcher<I>::notify_image_removed(m_io_ctx, m_image_id, ctx);
+}
+
+template <typename I>
+void TrashRemoveRequest<I>::handle_notify_trash_removed(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to notify trash watchers: " << cpp_strerror(r) << dendl;
+ }
+
+ finish(0);
+}
+
+template <typename I>
+void TrashRemoveRequest<I>::finish(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ m_on_finish->complete(r);
+ delete this;
+}
+
+} // namespace image_deleter
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_deleter::TrashRemoveRequest<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_deleter/TrashRemoveRequest.h b/src/tools/rbd_mirror/image_deleter/TrashRemoveRequest.h
new file mode 100644
index 00000000..d2295e8e
--- /dev/null
+++ b/src/tools/rbd_mirror/image_deleter/TrashRemoveRequest.h
@@ -0,0 +1,113 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_DELETER_TRASH_REMOVE_REQUEST_H
+#define CEPH_RBD_MIRROR_IMAGE_DELETER_TRASH_REMOVE_REQUEST_H
+
+#include "include/rados/librados.hpp"
+#include "include/buffer.h"
+#include "cls/rbd/cls_rbd_types.h"
+#include "librbd/internal.h"
+#include "tools/rbd_mirror/image_deleter/Types.h"
+#include <string>
+#include <vector>
+
+class Context;
+class ContextWQ;
+namespace librbd { struct ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+namespace image_deleter {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class TrashRemoveRequest {
+public:
+ static TrashRemoveRequest* create(librados::IoCtx &io_ctx,
+ const std::string &image_id,
+ ErrorResult *error_result,
+ ContextWQ *op_work_queue,
+ Context *on_finish) {
+ return new TrashRemoveRequest(io_ctx, image_id, error_result, op_work_queue,
+ on_finish);
+ }
+
+ TrashRemoveRequest(librados::IoCtx &io_ctx, const std::string &image_id,
+ ErrorResult *error_result, ContextWQ *op_work_queue,
+ Context *on_finish)
+ : m_io_ctx(io_ctx), m_image_id(image_id), m_error_result(error_result),
+ m_op_work_queue(op_work_queue), m_on_finish(on_finish) {
+ }
+
+ void send();
+
+private:
+ /*
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * GET_TRASH_IMAGE_SPEC
+ * |
+ * v
+ * SET_TRASH_STATE
+ * |
+ * v
+ * GET_SNAP_CONTEXT
+ * |
+ * v
+ * PURGE_SNAPSHOTS
+ * |
+ * v
+ * TRASH_REMOVE
+ * |
+ * v
+ * NOTIFY_TRASH_REMOVE
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+
+ librados::IoCtx &m_io_ctx;
+ std::string m_image_id;
+ ErrorResult *m_error_result;
+ ContextWQ *m_op_work_queue;
+ Context *m_on_finish;
+
+ ceph::bufferlist m_out_bl;
+ cls::rbd::TrashImageSpec m_trash_image_spec;
+ bool m_has_snapshots = false;
+ librbd::NoOpProgressContext m_progress_ctx;
+
+ void get_trash_image_spec();
+ void handle_get_trash_image_spec(int r);
+
+ void set_trash_state();
+ void handle_set_trash_state(int r);
+
+ void get_snap_context();
+ void handle_get_snap_context(int r);
+
+ void purge_snapshots();
+ void handle_purge_snapshots(int r);
+
+ void remove_image();
+ void handle_remove_image(int r);
+
+ void notify_trash_removed();
+ void handle_notify_trash_removed(int r);
+
+ void finish(int r);
+
+};
+
+} // namespace image_deleter
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_deleter::TrashRemoveRequest<librbd::ImageCtx>;
+
+#endif // CEPH_RBD_MIRROR_IMAGE_DELETER_TRASH_REMOVE_REQUEST_H
diff --git a/src/tools/rbd_mirror/image_deleter/TrashWatcher.cc b/src/tools/rbd_mirror/image_deleter/TrashWatcher.cc
new file mode 100644
index 00000000..8735dfb7
--- /dev/null
+++ b/src/tools/rbd_mirror/image_deleter/TrashWatcher.cc
@@ -0,0 +1,384 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd_mirror/image_deleter/TrashWatcher.h"
+#include "include/rbd_types.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "common/Timer.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/Utils.h"
+#include "tools/rbd_mirror/Threads.h"
+#include "tools/rbd_mirror/image_deleter/Types.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_deleter::TrashWatcher: " \
+ << this << " " << __func__ << ": "
+
+using librbd::util::create_context_callback;
+using librbd::util::create_rados_callback;
+
+namespace rbd {
+namespace mirror {
+namespace image_deleter {
+
+namespace {
+
+const size_t MAX_RETURN = 1024;
+
+} // anonymous namespace
+
+template <typename I>
+TrashWatcher<I>::TrashWatcher(librados::IoCtx &io_ctx, Threads<I> *threads,
+ TrashListener& trash_listener)
+ : librbd::TrashWatcher<I>(io_ctx, threads->work_queue),
+ m_io_ctx(io_ctx), m_threads(threads), m_trash_listener(trash_listener),
+ m_lock(librbd::util::unique_lock_name(
+ "rbd::mirror::image_deleter::TrashWatcher", this)) {
+}
+
+template <typename I>
+void TrashWatcher<I>::init(Context *on_finish) {
+ dout(5) << dendl;
+
+ {
+ Mutex::Locker locker(m_lock);
+ m_on_init_finish = on_finish;
+
+ ceph_assert(!m_trash_list_in_progress);
+ m_trash_list_in_progress = true;
+ }
+
+ create_trash();
+}
+
+template <typename I>
+void TrashWatcher<I>::shut_down(Context *on_finish) {
+ dout(5) << dendl;
+
+ {
+ Mutex::Locker timer_locker(m_threads->timer_lock);
+ Mutex::Locker locker(m_lock);
+
+ ceph_assert(!m_shutting_down);
+ m_shutting_down = true;
+ if (m_timer_ctx != nullptr) {
+ m_threads->timer->cancel_event(m_timer_ctx);
+ m_timer_ctx = nullptr;
+ }
+ }
+
+ auto ctx = new FunctionContext([this, on_finish](int r) {
+ unregister_watcher(on_finish);
+ });
+ m_async_op_tracker.wait_for_ops(ctx);
+}
+
+template <typename I>
+void TrashWatcher<I>::handle_image_added(const std::string &image_id,
+ const cls::rbd::TrashImageSpec& spec) {
+ dout(10) << "image_id=" << image_id << dendl;
+
+ Mutex::Locker locker(m_lock);
+ add_image(image_id, spec);
+}
+
+template <typename I>
+void TrashWatcher<I>::handle_image_removed(const std::string &image_id) {
+ // ignore removals -- the image deleter will ignore -ENOENTs
+}
+
+template <typename I>
+void TrashWatcher<I>::handle_rewatch_complete(int r) {
+ dout(5) << "r=" << r << dendl;
+
+ if (r == -EBLACKLISTED) {
+ dout(0) << "detected client is blacklisted" << dendl;
+ return;
+ } else if (r == -ENOENT) {
+ dout(5) << "trash directory deleted" << dendl;
+ } else if (r < 0) {
+ derr << "unexpected error re-registering trash directory watch: "
+ << cpp_strerror(r) << dendl;
+ }
+ schedule_trash_list(30);
+}
+
+template <typename I>
+void TrashWatcher<I>::create_trash() {
+ dout(20) << dendl;
+ {
+ Mutex::Locker locker(m_lock);
+ ceph_assert(m_trash_list_in_progress);
+ }
+
+ librados::ObjectWriteOperation op;
+ op.create(false);
+
+ m_async_op_tracker.start_op();
+ auto aio_comp = create_rados_callback<
+ TrashWatcher<I>, &TrashWatcher<I>::handle_create_trash>(this);
+ int r = m_io_ctx.aio_operate(RBD_TRASH, aio_comp, &op);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void TrashWatcher<I>::handle_create_trash(int r) {
+ dout(20) << "r=" << r << dendl;
+ {
+ Mutex::Locker locker(m_lock);
+ ceph_assert(m_trash_list_in_progress);
+ }
+
+ Context* on_init_finish = nullptr;
+ if (r == -EBLACKLISTED || r == -ENOENT) {
+ if (r == -EBLACKLISTED) {
+ dout(0) << "detected client is blacklisted" << dendl;
+ } else {
+ dout(0) << "detected pool no longer exists" << dendl;
+ }
+
+ Mutex::Locker locker(m_lock);
+ std::swap(on_init_finish, m_on_init_finish);
+ m_trash_list_in_progress = false;
+ } else if (r < 0 && r != -EEXIST) {
+ derr << "failed to create trash object: " << cpp_strerror(r) << dendl;
+ {
+ Mutex::Locker locker(m_lock);
+ m_trash_list_in_progress = false;
+ }
+
+ schedule_trash_list(30);
+ } else {
+ register_watcher();
+ }
+
+ m_async_op_tracker.finish_op();
+ if (on_init_finish != nullptr) {
+ on_init_finish->complete(r);
+ }
+}
+
+template <typename I>
+void TrashWatcher<I>::register_watcher() {
+ {
+ Mutex::Locker locker(m_lock);
+ ceph_assert(m_trash_list_in_progress);
+ }
+
+ // if the watch registration is in-flight, let the watcher
+ // handle the transition -- only (re-)register if it's not registered
+ if (!this->is_unregistered()) {
+ trash_list(true);
+ return;
+ }
+
+ // first time registering or the watch failed
+ dout(5) << dendl;
+ m_async_op_tracker.start_op();
+
+ Context *ctx = create_context_callback<
+ TrashWatcher, &TrashWatcher<I>::handle_register_watcher>(this);
+ this->register_watch(ctx);
+}
+
+template <typename I>
+void TrashWatcher<I>::handle_register_watcher(int r) {
+ dout(5) << "r=" << r << dendl;
+
+ {
+ Mutex::Locker locker(m_lock);
+ ceph_assert(m_trash_list_in_progress);
+ if (r < 0) {
+ m_trash_list_in_progress = false;
+ }
+ }
+
+ Context *on_init_finish = nullptr;
+ if (r >= 0) {
+ trash_list(true);
+ } else if (r == -EBLACKLISTED) {
+ dout(0) << "detected client is blacklisted" << dendl;
+
+ Mutex::Locker locker(m_lock);
+ std::swap(on_init_finish, m_on_init_finish);
+ } else {
+ derr << "unexpected error registering trash directory watch: "
+ << cpp_strerror(r) << dendl;
+ schedule_trash_list(10);
+ }
+
+ m_async_op_tracker.finish_op();
+ if (on_init_finish != nullptr) {
+ on_init_finish->complete(r);
+ }
+}
+
+template <typename I>
+void TrashWatcher<I>::unregister_watcher(Context* on_finish) {
+ dout(5) << dendl;
+
+ m_async_op_tracker.start_op();
+ Context *ctx = new FunctionContext([this, on_finish](int r) {
+ handle_unregister_watcher(r, on_finish);
+ });
+ this->unregister_watch(ctx);
+}
+
+template <typename I>
+void TrashWatcher<I>::handle_unregister_watcher(int r, Context* on_finish) {
+ dout(5) << "unregister_watcher: r=" << r << dendl;
+ if (r < 0) {
+ derr << "error unregistering watcher for trash directory: "
+ << cpp_strerror(r) << dendl;
+ }
+ m_async_op_tracker.finish_op();
+ on_finish->complete(0);
+}
+
+template <typename I>
+void TrashWatcher<I>::trash_list(bool initial_request) {
+ if (initial_request) {
+ m_async_op_tracker.start_op();
+ m_last_image_id = "";
+ }
+
+ dout(5) << "last_image_id=" << m_last_image_id << dendl;
+
+ {
+ Mutex::Locker locker(m_lock);
+ ceph_assert(m_trash_list_in_progress);
+ }
+
+ librados::ObjectReadOperation op;
+ librbd::cls_client::trash_list_start(&op, m_last_image_id, MAX_RETURN);
+
+ librados::AioCompletion *aio_comp = create_rados_callback<
+ TrashWatcher<I>, &TrashWatcher<I>::handle_trash_list>(this);
+ m_out_bl.clear();
+ int r = m_io_ctx.aio_operate(RBD_TRASH, aio_comp, &op, &m_out_bl);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void TrashWatcher<I>::handle_trash_list(int r) {
+ dout(5) << "r=" << r << dendl;
+
+ std::map<std::string, cls::rbd::TrashImageSpec> images;
+ if (r >= 0) {
+ auto bl_it = m_out_bl.cbegin();
+ r = librbd::cls_client::trash_list_finish(&bl_it, &images);
+ }
+
+ Context *on_init_finish = nullptr;
+ {
+ Mutex::Locker locker(m_lock);
+ ceph_assert(m_trash_list_in_progress);
+ if (r >= 0) {
+ for (auto& image : images) {
+ add_image(image.first, image.second);
+ }
+ } else if (r == -ENOENT) {
+ r = 0;
+ }
+
+ if (r == -EBLACKLISTED) {
+ dout(0) << "detected client is blacklisted during trash refresh" << dendl;
+ m_trash_list_in_progress = false;
+ std::swap(on_init_finish, m_on_init_finish);
+ } else if (r >= 0 && images.size() < MAX_RETURN) {
+ m_trash_list_in_progress = false;
+ std::swap(on_init_finish, m_on_init_finish);
+ } else if (r < 0) {
+ m_trash_list_in_progress = false;
+ }
+ }
+
+ if (r >= 0 && images.size() == MAX_RETURN) {
+ m_last_image_id = images.rbegin()->first;
+ trash_list(false);
+ return;
+ } else if (r < 0 && r != -EBLACKLISTED) {
+ derr << "failed to retrieve trash directory: " << cpp_strerror(r) << dendl;
+ schedule_trash_list(10);
+ }
+
+ m_async_op_tracker.finish_op();
+ if (on_init_finish != nullptr) {
+ on_init_finish->complete(r);
+ }
+}
+
+template <typename I>
+void TrashWatcher<I>::schedule_trash_list(double interval) {
+ Mutex::Locker timer_locker(m_threads->timer_lock);
+ Mutex::Locker locker(m_lock);
+ if (m_shutting_down || m_trash_list_in_progress || m_timer_ctx != nullptr) {
+ if (m_trash_list_in_progress && !m_deferred_trash_list) {
+ dout(5) << "deferring refresh until in-flight refresh completes" << dendl;
+ m_deferred_trash_list = true;
+ }
+ return;
+ }
+
+ dout(5) << dendl;
+ m_timer_ctx = m_threads->timer->add_event_after(
+ interval,
+ new FunctionContext([this](int r) {
+ process_trash_list();
+ }));
+}
+
+template <typename I>
+void TrashWatcher<I>::process_trash_list() {
+ dout(5) << dendl;
+
+ ceph_assert(m_threads->timer_lock.is_locked());
+ ceph_assert(m_timer_ctx != nullptr);
+ m_timer_ctx = nullptr;
+
+ {
+ Mutex::Locker locker(m_lock);
+ ceph_assert(!m_trash_list_in_progress);
+ m_trash_list_in_progress = true;
+ }
+
+ // execute outside of the timer's lock
+ m_async_op_tracker.start_op();
+ Context *ctx = new FunctionContext([this](int r) {
+ create_trash();
+ m_async_op_tracker.finish_op();
+ });
+ m_threads->work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+void TrashWatcher<I>::add_image(const std::string& image_id,
+ const cls::rbd::TrashImageSpec& spec) {
+ if (spec.source != cls::rbd::TRASH_IMAGE_SOURCE_MIRRORING) {
+ return;
+ }
+
+ ceph_assert(m_lock.is_locked());
+ auto& deferment_end_time = spec.deferment_end_time;
+ dout(10) << "image_id=" << image_id << ", "
+ << "deferment_end_time=" << deferment_end_time << dendl;
+
+ m_async_op_tracker.start_op();
+ auto ctx = new FunctionContext([this, image_id, deferment_end_time](int r) {
+ m_trash_listener.handle_trash_image(image_id, deferment_end_time);
+ m_async_op_tracker.finish_op();
+ });
+ m_threads->work_queue->queue(ctx, 0);
+}
+
+} // namespace image_deleter;
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_deleter::TrashWatcher<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_deleter/TrashWatcher.h b/src/tools/rbd_mirror/image_deleter/TrashWatcher.h
new file mode 100644
index 00000000..b6f69833
--- /dev/null
+++ b/src/tools/rbd_mirror/image_deleter/TrashWatcher.h
@@ -0,0 +1,139 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_DELETE_TRASH_WATCHER_H
+#define CEPH_RBD_MIRROR_IMAGE_DELETE_TRASH_WATCHER_H
+
+#include "include/rados/librados.hpp"
+#include "common/AsyncOpTracker.h"
+#include "common/Mutex.h"
+#include "librbd/TrashWatcher.h"
+#include <set>
+#include <string>
+
+struct Context;
+namespace librbd { struct ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+
+template <typename> struct Threads;
+
+namespace image_deleter {
+
+struct TrashListener;
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class TrashWatcher : public librbd::TrashWatcher<ImageCtxT> {
+public:
+ static TrashWatcher* create(librados::IoCtx &io_ctx,
+ Threads<ImageCtxT> *threads,
+ TrashListener& trash_listener) {
+ return new TrashWatcher(io_ctx, threads, trash_listener);
+ }
+
+ TrashWatcher(librados::IoCtx &io_ctx, Threads<ImageCtxT> *threads,
+ TrashListener& trash_listener);
+ TrashWatcher(const TrashWatcher&) = delete;
+ TrashWatcher& operator=(const TrashWatcher&) = delete;
+
+ void init(Context *on_finish);
+ void shut_down(Context *on_finish);
+
+protected:
+ void handle_image_added(const std::string &image_id,
+ const cls::rbd::TrashImageSpec& spec) override;
+
+ void handle_image_removed(const std::string &image_id) override;
+
+ void handle_rewatch_complete(int r) override;
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * INIT
+ * |
+ * v
+ * CREATE_TRASH
+ * |
+ * v
+ * REGISTER_WATCHER
+ * |
+ * |/--------------------------------\
+ * | |
+ * |/---------\ |
+ * | | |
+ * v | (more images) |
+ * TRASH_LIST ---/ |
+ * | |
+ * |/----------------------------\ |
+ * | | |
+ * v | |
+ * <idle> --\ | |
+ * | | | |
+ * | |\---> IMAGE_ADDED -----/ |
+ * | | |
+ * | \----> WATCH_ERROR ---------/
+ * v
+ * SHUT_DOWN
+ * |
+ * v
+ * UNREGISTER_WATCHER
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+
+ librados::IoCtx m_io_ctx;
+ Threads<ImageCtxT> *m_threads;
+ TrashListener& m_trash_listener;
+
+ std::string m_last_image_id;
+ bufferlist m_out_bl;
+
+ mutable Mutex m_lock;
+
+ Context *m_on_init_finish = nullptr;
+ Context *m_timer_ctx = nullptr;
+
+ AsyncOpTracker m_async_op_tracker;
+ bool m_trash_list_in_progress = false;
+ bool m_deferred_trash_list = false;
+ bool m_shutting_down = false;
+
+ void register_watcher();
+ void handle_register_watcher(int r);
+
+ void create_trash();
+ void handle_create_trash(int r);
+
+ void unregister_watcher(Context* on_finish);
+ void handle_unregister_watcher(int r, Context* on_finish);
+
+ void trash_list(bool initial_request);
+ void handle_trash_list(int r);
+
+ void schedule_trash_list(double interval);
+ void process_trash_list();
+
+ void get_mirror_uuid();
+ void handle_get_mirror_uuid(int r);
+
+ void add_image(const std::string& image_id,
+ const cls::rbd::TrashImageSpec& spec);
+
+};
+
+} // namespace image_deleter
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_deleter::TrashWatcher<librbd::ImageCtx>;
+
+#endif // CEPH_RBD_MIRROR_IMAGE_DELETE_TRASH_WATCHER_H
diff --git a/src/tools/rbd_mirror/image_deleter/Types.h b/src/tools/rbd_mirror/image_deleter/Types.h
new file mode 100644
index 00000000..ac3bc64a
--- /dev/null
+++ b/src/tools/rbd_mirror/image_deleter/Types.h
@@ -0,0 +1,54 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_DELETER_TYPES_H
+#define CEPH_RBD_MIRROR_IMAGE_DELETER_TYPES_H
+
+#include "include/Context.h"
+#include "librbd/journal/Policy.h"
+#include <string>
+
+struct utime_t;
+
+namespace rbd {
+namespace mirror {
+namespace image_deleter {
+
+enum ErrorResult {
+ ERROR_RESULT_COMPLETE,
+ ERROR_RESULT_RETRY,
+ ERROR_RESULT_RETRY_IMMEDIATELY
+};
+
+struct TrashListener {
+ TrashListener() {
+ }
+ TrashListener(const TrashListener&) = delete;
+ TrashListener& operator=(const TrashListener&) = delete;
+
+ virtual ~TrashListener() {
+ }
+
+ virtual void handle_trash_image(const std::string& image_id,
+ const utime_t& deferment_end_time) = 0;
+
+};
+
+struct JournalPolicy : public librbd::journal::Policy {
+ bool append_disabled() const override {
+ return true;
+ }
+ bool journal_disabled() const override {
+ return true;
+ }
+
+ void allocate_tag_on_lock(Context *on_finish) override {
+ on_finish->complete(0);
+ }
+};
+
+} // namespace image_deleter
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_IMAGE_DELETER_TYPES_H
diff --git a/src/tools/rbd_mirror/image_map/LoadRequest.cc b/src/tools/rbd_mirror/image_map/LoadRequest.cc
new file mode 100644
index 00000000..7387b476
--- /dev/null
+++ b/src/tools/rbd_mirror/image_map/LoadRequest.cc
@@ -0,0 +1,98 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "common/debug.h"
+#include "common/errno.h"
+
+#include "librbd/Utils.h"
+#include "include/rbd_types.h"
+#include "cls/rbd/cls_rbd_client.h"
+
+#include "LoadRequest.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_map::LoadRequest: " \
+ << this << " " << __func__
+
+namespace rbd {
+namespace mirror {
+namespace image_map {
+
+static const uint32_t MAX_RETURN = 1024;
+
+using librbd::util::create_rados_callback;
+
+template<typename I>
+LoadRequest<I>::LoadRequest(librados::IoCtx &ioctx,
+ std::map<std::string, cls::rbd::MirrorImageMap> *image_mapping,
+ Context *on_finish)
+ : m_ioctx(ioctx),
+ m_image_mapping(image_mapping),
+ m_on_finish(on_finish) {
+}
+
+template<typename I>
+void LoadRequest<I>::send() {
+ dout(20) << dendl;
+
+ image_map_list();
+}
+
+template<typename I>
+void LoadRequest<I>::image_map_list() {
+ dout(20) << dendl;
+
+ librados::ObjectReadOperation op;
+ librbd::cls_client::mirror_image_map_list_start(&op, m_start_after, MAX_RETURN);
+
+ librados::AioCompletion *aio_comp = create_rados_callback<
+ LoadRequest, &LoadRequest::handle_image_map_list>(this);
+
+ m_out_bl.clear();
+ int r = m_ioctx.aio_operate(RBD_MIRROR_LEADER, aio_comp, &op, &m_out_bl);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template<typename I>
+void LoadRequest<I>::handle_image_map_list(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ std::map<std::string, cls::rbd::MirrorImageMap> image_mapping;
+ if (r == 0) {
+ auto it = m_out_bl.cbegin();
+ r = librbd::cls_client::mirror_image_map_list_finish(&it, &image_mapping);
+ }
+
+ if (r < 0) {
+ derr << ": failed to get image map: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ m_image_mapping->insert(image_mapping.begin(), image_mapping.end());
+
+ if (image_mapping.size() == MAX_RETURN) {
+ m_start_after = image_mapping.rbegin()->first;
+ image_map_list();
+ return;
+ }
+
+ finish(0);
+}
+
+template<typename I>
+void LoadRequest<I>::finish(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ m_on_finish->complete(r);
+ delete this;
+}
+
+} // namespace image_map
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_map::LoadRequest<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_map/LoadRequest.h b/src/tools/rbd_mirror/image_map/LoadRequest.h
new file mode 100644
index 00000000..7657e110
--- /dev/null
+++ b/src/tools/rbd_mirror/image_map/LoadRequest.h
@@ -0,0 +1,64 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_MAP_LOAD_REQUEST_H
+#define CEPH_RBD_MIRROR_IMAGE_MAP_LOAD_REQUEST_H
+
+#include "cls/rbd/cls_rbd_types.h"
+#include "include/rados/librados.hpp"
+
+class Context;
+
+namespace librbd { class ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+namespace image_map {
+
+template<typename ImageCtxT = librbd::ImageCtx>
+class LoadRequest {
+public:
+ static LoadRequest *create(librados::IoCtx &ioctx,
+ std::map<std::string, cls::rbd::MirrorImageMap> *image_mapping,
+ Context *on_finish) {
+ return new LoadRequest(ioctx, image_mapping, on_finish);
+ }
+
+ void send();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * | . . . . . . . .
+ * v v . MAX_RETURN
+ * IMAGE_MAP_LIST. . . . . . .
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+ LoadRequest(librados::IoCtx &ioctx,
+ std::map<std::string, cls::rbd::MirrorImageMap> *image_mapping,
+ Context *on_finish);
+
+ librados::IoCtx &m_ioctx;
+ std::map<std::string, cls::rbd::MirrorImageMap> *m_image_mapping;
+ Context *m_on_finish;
+
+ bufferlist m_out_bl;
+ std::string m_start_after;
+
+ void image_map_list();
+ void handle_image_map_list(int r);
+
+ void finish(int r);
+};
+
+} // namespace image_map
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_IMAGE_MAP_LOAD_REQUEST_H
diff --git a/src/tools/rbd_mirror/image_map/Policy.cc b/src/tools/rbd_mirror/image_map/Policy.cc
new file mode 100644
index 00000000..6fababdd
--- /dev/null
+++ b/src/tools/rbd_mirror/image_map/Policy.cc
@@ -0,0 +1,406 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "common/debug.h"
+#include "common/errno.h"
+
+#include "librbd/Utils.h"
+#include "Policy.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_map::Policy: " << this \
+ << " " << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+namespace image_map {
+
+namespace {
+
+bool is_instance_action(ActionType action_type) {
+ switch (action_type) {
+ case ACTION_TYPE_ACQUIRE:
+ case ACTION_TYPE_RELEASE:
+ return true;
+ case ACTION_TYPE_NONE:
+ case ACTION_TYPE_MAP_UPDATE:
+ case ACTION_TYPE_MAP_REMOVE:
+ break;
+ }
+ return false;
+}
+
+} // anonymous namespace
+
+using ::operator<<;
+using librbd::util::unique_lock_name;
+
+Policy::Policy(librados::IoCtx &ioctx)
+ : m_ioctx(ioctx),
+ m_map_lock(unique_lock_name("rbd::mirror::image_map::Policy::m_map_lock",
+ this)) {
+
+ // map should at least have once instance
+ std::string instance_id = stringify(ioctx.get_instance_id());
+ m_map.emplace(instance_id, std::set<std::string>{});
+}
+
+void Policy::init(
+ const std::map<std::string, cls::rbd::MirrorImageMap> &image_mapping) {
+ dout(20) << dendl;
+
+ RWLock::WLocker map_lock(m_map_lock);
+ for (auto& it : image_mapping) {
+ ceph_assert(!it.second.instance_id.empty());
+ auto map_result = m_map[it.second.instance_id].emplace(it.first);
+ ceph_assert(map_result.second);
+
+ auto image_state_result = m_image_states.emplace(
+ it.first, ImageState{it.second.instance_id, it.second.mapped_time});
+ ceph_assert(image_state_result.second);
+
+ // ensure we (re)send image acquire actions to the instance
+ auto& image_state = image_state_result.first->second;
+ auto start_action = set_state(&image_state,
+ StateTransition::STATE_INITIALIZING, false);
+ ceph_assert(start_action);
+ }
+}
+
+LookupInfo Policy::lookup(const std::string &global_image_id) {
+ dout(20) << "global_image_id=" << global_image_id << dendl;
+
+ RWLock::RLocker map_lock(m_map_lock);
+ LookupInfo info;
+
+ auto it = m_image_states.find(global_image_id);
+ if (it != m_image_states.end()) {
+ info.instance_id = it->second.instance_id;
+ info.mapped_time = it->second.mapped_time;
+ }
+ return info;
+}
+
+bool Policy::add_image(const std::string &global_image_id) {
+ dout(5) << "global_image_id=" << global_image_id << dendl;
+
+ RWLock::WLocker map_lock(m_map_lock);
+ auto image_state_result = m_image_states.emplace(global_image_id,
+ ImageState{});
+ auto& image_state = image_state_result.first->second;
+ if (image_state.state == StateTransition::STATE_INITIALIZING) {
+ // avoid duplicate acquire notifications upon leader startup
+ return false;
+ }
+
+ return set_state(&image_state, StateTransition::STATE_ASSOCIATING, false);
+}
+
+bool Policy::remove_image(const std::string &global_image_id) {
+ dout(5) << "global_image_id=" << global_image_id << dendl;
+
+ RWLock::WLocker map_lock(m_map_lock);
+ auto it = m_image_states.find(global_image_id);
+ if (it == m_image_states.end()) {
+ return false;
+ }
+
+ auto& image_state = it->second;
+ return set_state(&image_state, StateTransition::STATE_DISSOCIATING, false);
+}
+
+void Policy::add_instances(const InstanceIds &instance_ids,
+ GlobalImageIds* global_image_ids) {
+ dout(5) << "instance_ids=" << instance_ids << dendl;
+
+ RWLock::WLocker map_lock(m_map_lock);
+ for (auto& instance : instance_ids) {
+ ceph_assert(!instance.empty());
+ m_map.emplace(instance, std::set<std::string>{});
+ }
+
+ // post-failover, remove any dead instances and re-shuffle their images
+ if (m_initial_update) {
+ dout(5) << "initial instance update" << dendl;
+ m_initial_update = false;
+
+ std::set<std::string> alive_instances(instance_ids.begin(),
+ instance_ids.end());
+ InstanceIds dead_instances;
+ for (auto& map_pair : m_map) {
+ if (alive_instances.find(map_pair.first) == alive_instances.end()) {
+ dead_instances.push_back(map_pair.first);
+ }
+ }
+
+ if (!dead_instances.empty()) {
+ remove_instances(m_map_lock, dead_instances, global_image_ids);
+ }
+ }
+
+ GlobalImageIds shuffle_global_image_ids;
+ do_shuffle_add_instances(m_map, m_image_states.size(), &shuffle_global_image_ids);
+ dout(5) << "shuffling global_image_ids=[" << shuffle_global_image_ids
+ << "]" << dendl;
+ for (auto& global_image_id : shuffle_global_image_ids) {
+ auto it = m_image_states.find(global_image_id);
+ ceph_assert(it != m_image_states.end());
+
+ auto& image_state = it->second;
+ if (set_state(&image_state, StateTransition::STATE_SHUFFLING, false)) {
+ global_image_ids->emplace(global_image_id);
+ }
+ }
+}
+
+void Policy::remove_instances(const InstanceIds &instance_ids,
+ GlobalImageIds* global_image_ids) {
+ RWLock::WLocker map_lock(m_map_lock);
+ remove_instances(m_map_lock, instance_ids, global_image_ids);
+}
+
+void Policy::remove_instances(const RWLock& lock,
+ const InstanceIds &instance_ids,
+ GlobalImageIds* global_image_ids) {
+ ceph_assert(m_map_lock.is_wlocked());
+ dout(5) << "instance_ids=" << instance_ids << dendl;
+
+ for (auto& instance_id : instance_ids) {
+ auto map_it = m_map.find(instance_id);
+ if (map_it == m_map.end()) {
+ continue;
+ }
+
+ auto& instance_global_image_ids = map_it->second;
+ if (instance_global_image_ids.empty()) {
+ m_map.erase(map_it);
+ continue;
+ }
+
+ m_dead_instances.insert(instance_id);
+ dout(5) << "force shuffling: instance_id=" << instance_id << ", "
+ << "global_image_ids=[" << instance_global_image_ids << "]"<< dendl;
+ for (auto& global_image_id : instance_global_image_ids) {
+ auto it = m_image_states.find(global_image_id);
+ ceph_assert(it != m_image_states.end());
+
+ auto& image_state = it->second;
+ if (is_state_scheduled(image_state,
+ StateTransition::STATE_DISSOCIATING)) {
+ // don't shuffle images that no longer exist
+ continue;
+ }
+
+ if (set_state(&image_state, StateTransition::STATE_SHUFFLING, true)) {
+ global_image_ids->emplace(global_image_id);
+ }
+ }
+ }
+}
+
+ActionType Policy::start_action(const std::string &global_image_id) {
+ RWLock::WLocker map_lock(m_map_lock);
+
+ auto it = m_image_states.find(global_image_id);
+ ceph_assert(it != m_image_states.end());
+
+ auto& image_state = it->second;
+ auto& transition = image_state.transition;
+ ceph_assert(transition.action_type != ACTION_TYPE_NONE);
+
+ dout(5) << "global_image_id=" << global_image_id << ", "
+ << "state=" << image_state.state << ", "
+ << "action_type=" << transition.action_type << dendl;
+ if (transition.start_policy_action) {
+ execute_policy_action(global_image_id, &image_state,
+ *transition.start_policy_action);
+ transition.start_policy_action = boost::none;
+ }
+ return transition.action_type;
+}
+
+bool Policy::finish_action(const std::string &global_image_id, int r) {
+ RWLock::WLocker map_lock(m_map_lock);
+
+ auto it = m_image_states.find(global_image_id);
+ ceph_assert(it != m_image_states.end());
+
+ auto& image_state = it->second;
+ auto& transition = image_state.transition;
+ dout(5) << "global_image_id=" << global_image_id << ", "
+ << "state=" << image_state.state << ", "
+ << "action_type=" << transition.action_type << ", "
+ << "r=" << r << dendl;
+
+ // retry on failure unless it's an RPC message to an instance that is dead
+ if (r < 0 &&
+ (!is_instance_action(image_state.transition.action_type) ||
+ image_state.instance_id == UNMAPPED_INSTANCE_ID ||
+ m_dead_instances.find(image_state.instance_id) ==
+ m_dead_instances.end())) {
+ return true;
+ }
+
+ auto finish_policy_action = transition.finish_policy_action;
+ StateTransition::transit(image_state.state, &image_state.transition);
+ if (transition.finish_state) {
+ // in-progress state machine complete
+ ceph_assert(StateTransition::is_idle(*transition.finish_state));
+ image_state.state = *transition.finish_state;
+ image_state.transition = {};
+ }
+
+ if (StateTransition::is_idle(image_state.state) && image_state.next_state) {
+ // advance to pending state machine
+ bool start_action = set_state(&image_state, *image_state.next_state, false);
+ ceph_assert(start_action);
+ }
+
+ // image state may get purged in execute_policy_action()
+ bool pending_action = image_state.transition.action_type != ACTION_TYPE_NONE;
+ if (finish_policy_action) {
+ execute_policy_action(global_image_id, &image_state, *finish_policy_action);
+ }
+
+ return pending_action;
+}
+
+void Policy::execute_policy_action(
+ const std::string& global_image_id, ImageState* image_state,
+ StateTransition::PolicyAction policy_action) {
+ dout(5) << "global_image_id=" << global_image_id << ", "
+ << "policy_action=" << policy_action << dendl;
+
+ switch (policy_action) {
+ case StateTransition::POLICY_ACTION_MAP:
+ map(global_image_id, image_state);
+ break;
+ case StateTransition::POLICY_ACTION_UNMAP:
+ unmap(global_image_id, image_state);
+ break;
+ case StateTransition::POLICY_ACTION_REMOVE:
+ if (image_state->state == StateTransition::STATE_UNASSOCIATED) {
+ ceph_assert(image_state->instance_id == UNMAPPED_INSTANCE_ID);
+ ceph_assert(!image_state->next_state);
+ m_image_states.erase(global_image_id);
+ }
+ break;
+ }
+}
+
+void Policy::map(const std::string& global_image_id, ImageState* image_state) {
+ ceph_assert(m_map_lock.is_wlocked());
+
+ std::string instance_id = image_state->instance_id;
+ if (instance_id != UNMAPPED_INSTANCE_ID && !is_dead_instance(instance_id)) {
+ return;
+ }
+ if (is_dead_instance(instance_id)) {
+ unmap(global_image_id, image_state);
+ }
+
+ instance_id = do_map(m_map, global_image_id);
+ ceph_assert(!instance_id.empty());
+ dout(5) << "global_image_id=" << global_image_id << ", "
+ << "instance_id=" << instance_id << dendl;
+
+ image_state->instance_id = instance_id;
+ image_state->mapped_time = ceph_clock_now();
+
+ auto ins = m_map[instance_id].emplace(global_image_id);
+ ceph_assert(ins.second);
+}
+
+void Policy::unmap(const std::string &global_image_id,
+ ImageState* image_state) {
+ ceph_assert(m_map_lock.is_wlocked());
+
+ std::string instance_id = image_state->instance_id;
+ if (instance_id == UNMAPPED_INSTANCE_ID) {
+ return;
+ }
+
+ dout(5) << "global_image_id=" << global_image_id << ", "
+ << "instance_id=" << instance_id << dendl;
+
+ ceph_assert(!instance_id.empty());
+ m_map[instance_id].erase(global_image_id);
+ image_state->instance_id = UNMAPPED_INSTANCE_ID;
+ image_state->mapped_time = {};
+
+ if (is_dead_instance(instance_id) && m_map[instance_id].empty()) {
+ dout(5) << "removing dead instance_id=" << instance_id << dendl;
+ m_map.erase(instance_id);
+ m_dead_instances.erase(instance_id);
+ }
+}
+
+bool Policy::is_image_shuffling(const std::string &global_image_id) {
+ ceph_assert(m_map_lock.is_locked());
+
+ auto it = m_image_states.find(global_image_id);
+ ceph_assert(it != m_image_states.end());
+ auto& image_state = it->second;
+
+ // avoid attempting to re-shuffle a pending shuffle
+ auto result = is_state_scheduled(image_state,
+ StateTransition::STATE_SHUFFLING);
+ dout(20) << "global_image_id=" << global_image_id << ", "
+ << "result=" << result << dendl;
+ return result;
+}
+
+bool Policy::can_shuffle_image(const std::string &global_image_id) {
+ ceph_assert(m_map_lock.is_locked());
+
+ CephContext *cct = reinterpret_cast<CephContext *>(m_ioctx.cct());
+ int migration_throttle = cct->_conf.get_val<uint64_t>(
+ "rbd_mirror_image_policy_migration_throttle");
+
+ auto it = m_image_states.find(global_image_id);
+ ceph_assert(it != m_image_states.end());
+ auto& image_state = it->second;
+
+ utime_t last_shuffled_time = image_state.mapped_time;
+
+ // idle images that haven't been recently remapped can shuffle
+ utime_t now = ceph_clock_now();
+ auto result = (StateTransition::is_idle(image_state.state) &&
+ ((migration_throttle <= 0) ||
+ (now - last_shuffled_time >= migration_throttle)));
+ dout(10) << "global_image_id=" << global_image_id << ", "
+ << "migration_throttle=" << migration_throttle << ", "
+ << "last_shuffled_time=" << last_shuffled_time << ", "
+ << "result=" << result << dendl;
+ return result;
+}
+
+bool Policy::set_state(ImageState* image_state, StateTransition::State state,
+ bool ignore_current_state) {
+ if (!ignore_current_state && image_state->state == state) {
+ return false;
+ } else if (StateTransition::is_idle(image_state->state)) {
+ image_state->state = state;
+ image_state->next_state = boost::none;
+
+ StateTransition::transit(image_state->state, &image_state->transition);
+ ceph_assert(image_state->transition.action_type != ACTION_TYPE_NONE);
+ ceph_assert(!image_state->transition.finish_state);
+ return true;
+ }
+
+ image_state->next_state = state;
+ return false;
+}
+
+bool Policy::is_state_scheduled(const ImageState& image_state,
+ StateTransition::State state) const {
+ return (image_state.state == state ||
+ (image_state.next_state && *image_state.next_state == state));
+}
+
+} // namespace image_map
+} // namespace mirror
+} // namespace rbd
diff --git a/src/tools/rbd_mirror/image_map/Policy.h b/src/tools/rbd_mirror/image_map/Policy.h
new file mode 100644
index 00000000..590fdbfe
--- /dev/null
+++ b/src/tools/rbd_mirror/image_map/Policy.h
@@ -0,0 +1,122 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_MAP_POLICY_H
+#define CEPH_RBD_MIRROR_IMAGE_MAP_POLICY_H
+
+#include <map>
+#include <tuple>
+#include <boost/optional.hpp>
+
+#include "common/RWLock.h"
+#include "cls/rbd/cls_rbd_types.h"
+#include "include/rados/librados.hpp"
+#include "tools/rbd_mirror/image_map/StateTransition.h"
+#include "tools/rbd_mirror/image_map/Types.h"
+
+class Context;
+
+namespace rbd {
+namespace mirror {
+namespace image_map {
+
+class Policy {
+public:
+ Policy(librados::IoCtx &ioctx);
+
+ virtual ~Policy() {
+ }
+
+ // init -- called during initialization
+ void init(
+ const std::map<std::string, cls::rbd::MirrorImageMap> &image_mapping);
+
+ // lookup an image from the map
+ LookupInfo lookup(const std::string &global_image_id);
+
+ // add, remove
+ bool add_image(const std::string &global_image_id);
+ bool remove_image(const std::string &global_image_id);
+
+ // shuffle images when instances are added/removed
+ void add_instances(const InstanceIds &instance_ids,
+ GlobalImageIds* global_image_ids);
+ void remove_instances(const InstanceIds &instance_ids,
+ GlobalImageIds* global_image_ids);
+
+ ActionType start_action(const std::string &global_image_id);
+ bool finish_action(const std::string &global_image_id, int r);
+
+protected:
+ typedef std::map<std::string, std::set<std::string> > InstanceToImageMap;
+
+ bool is_dead_instance(const std::string instance_id) {
+ ceph_assert(m_map_lock.is_locked());
+ return m_dead_instances.find(instance_id) != m_dead_instances.end();
+ }
+
+ bool is_image_shuffling(const std::string &global_image_id);
+ bool can_shuffle_image(const std::string &global_image_id);
+
+ // map an image (global image id) to an instance
+ virtual std::string do_map(const InstanceToImageMap& map,
+ const std::string &global_image_id) = 0;
+
+ // shuffle images when instances are added/removed
+ virtual void do_shuffle_add_instances(
+ const InstanceToImageMap& map, size_t image_count,
+ std::set<std::string> *remap_global_image_ids) = 0;
+
+private:
+ struct ImageState {
+ std::string instance_id = UNMAPPED_INSTANCE_ID;
+ utime_t mapped_time;
+
+ ImageState() {}
+ ImageState(const std::string& instance_id, const utime_t& mapped_time)
+ : instance_id(instance_id), mapped_time(mapped_time) {
+ }
+
+ // active state and action
+ StateTransition::State state = StateTransition::STATE_UNASSOCIATED;
+ StateTransition::Transition transition;
+
+ // next scheduled state
+ boost::optional<StateTransition::State> next_state = boost::none;
+ };
+
+ typedef std::map<std::string, ImageState> ImageStates;
+
+ librados::IoCtx &m_ioctx;
+
+ RWLock m_map_lock; // protects m_map
+ InstanceToImageMap m_map; // instance_id -> global_id map
+
+ ImageStates m_image_states;
+ std::set<std::string> m_dead_instances;
+
+ bool m_initial_update = true;
+
+ void remove_instances(const RWLock& lock, const InstanceIds &instance_ids,
+ GlobalImageIds* global_image_ids);
+
+ bool set_state(ImageState* image_state, StateTransition::State state,
+ bool ignore_current_state);
+
+ void execute_policy_action(const std::string& global_image_id,
+ ImageState* image_state,
+ StateTransition::PolicyAction policy_action);
+
+ void map(const std::string& global_image_id, ImageState* image_state);
+ void unmap(const std::string &global_image_id, ImageState* image_state);
+
+ bool is_state_scheduled(const ImageState& image_state,
+ StateTransition::State state) const;
+
+};
+
+} // namespace image_map
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_IMAGE_MAP_POLICY_H
diff --git a/src/tools/rbd_mirror/image_map/SimplePolicy.cc b/src/tools/rbd_mirror/image_map/SimplePolicy.cc
new file mode 100644
index 00000000..f2680581
--- /dev/null
+++ b/src/tools/rbd_mirror/image_map/SimplePolicy.cc
@@ -0,0 +1,89 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "common/debug.h"
+#include "common/errno.h"
+
+#include "SimplePolicy.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_map::SimplePolicy: " << this \
+ << " " << __func__ << ": "
+namespace rbd {
+namespace mirror {
+namespace image_map {
+
+SimplePolicy::SimplePolicy(librados::IoCtx &ioctx)
+ : Policy(ioctx) {
+}
+
+size_t SimplePolicy::calc_images_per_instance(const InstanceToImageMap& map,
+ size_t image_count) {
+ size_t nr_instances = 0;
+ for (auto const &it : map) {
+ if (!Policy::is_dead_instance(it.first)) {
+ ++nr_instances;
+ }
+ }
+ ceph_assert(nr_instances > 0);
+
+ size_t images_per_instance = image_count / nr_instances;
+ if (images_per_instance == 0) {
+ ++images_per_instance;
+ }
+
+ return images_per_instance;
+}
+
+void SimplePolicy::do_shuffle_add_instances(
+ const InstanceToImageMap& map, size_t image_count,
+ std::set<std::string> *remap_global_image_ids) {
+ uint64_t images_per_instance = calc_images_per_instance(map, image_count);
+ dout(5) << "images per instance=" << images_per_instance << dendl;
+
+ for (auto const &instance : map) {
+ if (instance.second.size() <= images_per_instance) {
+ continue;
+ }
+
+ auto it = instance.second.begin();
+ uint64_t cut_off = instance.second.size() - images_per_instance;
+
+ while (it != instance.second.end() && cut_off > 0) {
+ if (Policy::is_image_shuffling(*it)) {
+ --cut_off;
+ } else if (Policy::can_shuffle_image(*it)) {
+ --cut_off;
+ remap_global_image_ids->emplace(*it);
+ }
+
+ ++it;
+ }
+ }
+}
+
+std::string SimplePolicy::do_map(const InstanceToImageMap& map,
+ const std::string &global_image_id) {
+ auto min_it = map.end();
+ for (auto it = map.begin(); it != map.end(); ++it) {
+ ceph_assert(it->second.find(global_image_id) == it->second.end());
+ if (Policy::is_dead_instance(it->first)) {
+ continue;
+ } else if (min_it == map.end()) {
+ min_it = it;
+ } else if (it->second.size() < min_it->second.size()) {
+ min_it = it;
+ }
+ }
+
+ ceph_assert(min_it != map.end());
+ dout(20) << "global_image_id=" << global_image_id << " maps to instance_id="
+ << min_it->first << dendl;
+ return min_it->first;
+}
+
+} // namespace image_map
+} // namespace mirror
+} // namespace rbd
diff --git a/src/tools/rbd_mirror/image_map/SimplePolicy.h b/src/tools/rbd_mirror/image_map/SimplePolicy.h
new file mode 100644
index 00000000..ad2071b2
--- /dev/null
+++ b/src/tools/rbd_mirror/image_map/SimplePolicy.h
@@ -0,0 +1,39 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_MAP_SIMPLE_POLICY_H
+#define CEPH_RBD_MIRROR_IMAGE_MAP_SIMPLE_POLICY_H
+
+#include "Policy.h"
+
+namespace rbd {
+namespace mirror {
+namespace image_map {
+
+class SimplePolicy : public Policy {
+public:
+ static SimplePolicy *create(librados::IoCtx &ioctx) {
+ return new SimplePolicy(ioctx);
+ }
+
+protected:
+ SimplePolicy(librados::IoCtx &ioctx);
+
+ std::string do_map(const InstanceToImageMap& map,
+ const std::string &global_image_id) override;
+
+ void do_shuffle_add_instances(
+ const InstanceToImageMap& map, size_t image_count,
+ std::set<std::string> *remap_global_image_ids) override;
+
+private:
+ size_t calc_images_per_instance(const InstanceToImageMap& map,
+ size_t image_count);
+
+};
+
+} // namespace image_map
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_IMAGE_MAP_SIMPLE_POLICY_H
diff --git a/src/tools/rbd_mirror/image_map/StateTransition.cc b/src/tools/rbd_mirror/image_map/StateTransition.cc
new file mode 100644
index 00000000..ec5f07ff
--- /dev/null
+++ b/src/tools/rbd_mirror/image_map/StateTransition.cc
@@ -0,0 +1,94 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <ostream>
+#include "include/ceph_assert.h"
+#include "StateTransition.h"
+
+namespace rbd {
+namespace mirror {
+namespace image_map {
+
+std::ostream &operator<<(std::ostream &os,
+ const StateTransition::State &state) {
+ switch(state) {
+ case StateTransition::STATE_INITIALIZING:
+ os << "INITIALIZING";
+ break;
+ case StateTransition::STATE_ASSOCIATING:
+ os << "ASSOCIATING";
+ break;
+ case StateTransition::STATE_ASSOCIATED:
+ os << "ASSOCIATED";
+ break;
+ case StateTransition::STATE_SHUFFLING:
+ os << "SHUFFLING";
+ break;
+ case StateTransition::STATE_DISSOCIATING:
+ os << "DISSOCIATING";
+ break;
+ case StateTransition::STATE_UNASSOCIATED:
+ os << "UNASSOCIATED";
+ break;
+ }
+ return os;
+}
+
+std::ostream &operator<<(std::ostream &os,
+ const StateTransition::PolicyAction &policy_action) {
+ switch(policy_action) {
+ case StateTransition::POLICY_ACTION_MAP:
+ os << "MAP";
+ break;
+ case StateTransition::POLICY_ACTION_UNMAP:
+ os << "UNMAP";
+ break;
+ case StateTransition::POLICY_ACTION_REMOVE:
+ os << "REMOVE";
+ break;
+ }
+ return os;
+}
+
+const StateTransition::TransitionTable StateTransition::s_transition_table {
+ // state current_action Transition
+ // ---------------------------------------------------------------------------
+ {{STATE_INITIALIZING, ACTION_TYPE_NONE}, {ACTION_TYPE_ACQUIRE, {}, {},
+ {}}},
+ {{STATE_INITIALIZING, ACTION_TYPE_ACQUIRE}, {ACTION_TYPE_NONE, {}, {},
+ {STATE_ASSOCIATED}}},
+
+ {{STATE_ASSOCIATING, ACTION_TYPE_NONE}, {ACTION_TYPE_MAP_UPDATE,
+ {POLICY_ACTION_MAP}, {}, {}}},
+ {{STATE_ASSOCIATING, ACTION_TYPE_MAP_UPDATE}, {ACTION_TYPE_ACQUIRE, {}, {},
+ {}}},
+ {{STATE_ASSOCIATING, ACTION_TYPE_ACQUIRE}, {ACTION_TYPE_NONE, {}, {},
+ {STATE_ASSOCIATED}}},
+
+ {{STATE_DISSOCIATING, ACTION_TYPE_NONE}, {ACTION_TYPE_RELEASE, {},
+ {POLICY_ACTION_UNMAP}, {}}},
+ {{STATE_DISSOCIATING, ACTION_TYPE_RELEASE}, {ACTION_TYPE_MAP_REMOVE, {},
+ {POLICY_ACTION_REMOVE}, {}}},
+ {{STATE_DISSOCIATING, ACTION_TYPE_MAP_REMOVE}, {ACTION_TYPE_NONE, {},
+ {}, {STATE_UNASSOCIATED}}},
+
+ {{STATE_SHUFFLING, ACTION_TYPE_NONE}, {ACTION_TYPE_RELEASE, {},
+ {POLICY_ACTION_UNMAP}, {}}},
+ {{STATE_SHUFFLING, ACTION_TYPE_RELEASE}, {ACTION_TYPE_MAP_UPDATE,
+ {POLICY_ACTION_MAP}, {}, {}}},
+ {{STATE_SHUFFLING, ACTION_TYPE_MAP_UPDATE}, {ACTION_TYPE_ACQUIRE, {}, {},
+ {}}},
+ {{STATE_SHUFFLING, ACTION_TYPE_ACQUIRE}, {ACTION_TYPE_NONE, {}, {},
+ {STATE_ASSOCIATED}}}
+};
+
+void StateTransition::transit(State state, Transition* transition) {
+ auto it = s_transition_table.find({state, transition->action_type});
+ ceph_assert(it != s_transition_table.end());
+
+ *transition = it->second;
+}
+
+} // namespace image_map
+} // namespace mirror
+} // namespace rbd
diff --git a/src/tools/rbd_mirror/image_map/StateTransition.h b/src/tools/rbd_mirror/image_map/StateTransition.h
new file mode 100644
index 00000000..02a5ce4e
--- /dev/null
+++ b/src/tools/rbd_mirror/image_map/StateTransition.h
@@ -0,0 +1,76 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_MAP_STATE_TRANSITION_H
+#define CEPH_RBD_MIRROR_IMAGE_MAP_STATE_TRANSITION_H
+
+#include "tools/rbd_mirror/image_map/Types.h"
+#include <boost/optional.hpp>
+#include <map>
+
+namespace rbd {
+namespace mirror {
+namespace image_map {
+
+class StateTransition {
+public:
+ enum State {
+ STATE_UNASSOCIATED,
+ STATE_INITIALIZING,
+ STATE_ASSOCIATING,
+ STATE_ASSOCIATED,
+ STATE_SHUFFLING,
+ STATE_DISSOCIATING
+ };
+
+ enum PolicyAction {
+ POLICY_ACTION_MAP,
+ POLICY_ACTION_UNMAP,
+ POLICY_ACTION_REMOVE
+ };
+
+ struct Transition {
+ // image map action
+ ActionType action_type = ACTION_TYPE_NONE;
+
+ // policy internal action
+ boost::optional<PolicyAction> start_policy_action;
+ boost::optional<PolicyAction> finish_policy_action;
+
+ // state machine complete
+ boost::optional<State> finish_state;
+
+ Transition() {
+ }
+ Transition(ActionType action_type,
+ const boost::optional<PolicyAction>& start_policy_action,
+ const boost::optional<PolicyAction>& finish_policy_action,
+ const boost::optional<State>& finish_state)
+ : action_type(action_type), start_policy_action(start_policy_action),
+ finish_policy_action(finish_policy_action), finish_state(finish_state) {
+ }
+ };
+
+ static bool is_idle(State state) {
+ return (state == STATE_UNASSOCIATED || state == STATE_ASSOCIATED);
+ }
+
+ static void transit(State state, Transition* transition);
+
+private:
+ typedef std::pair<State, ActionType> TransitionKey;
+ typedef std::map<TransitionKey, Transition> TransitionTable;
+
+ // image transition table
+ static const TransitionTable s_transition_table;
+};
+
+std::ostream &operator<<(std::ostream &os, const StateTransition::State &state);
+std::ostream &operator<<(std::ostream &os,
+ const StateTransition::PolicyAction &policy_action);
+
+} // namespace image_map
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_IMAGE_MAP_STATE_TRANSITION_H
diff --git a/src/tools/rbd_mirror/image_map/Types.cc b/src/tools/rbd_mirror/image_map/Types.cc
new file mode 100644
index 00000000..47de9c3c
--- /dev/null
+++ b/src/tools/rbd_mirror/image_map/Types.cc
@@ -0,0 +1,138 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "Types.h"
+#include "include/ceph_assert.h"
+#include "include/stringify.h"
+#include "common/Formatter.h"
+#include <iostream>
+
+namespace rbd {
+namespace mirror {
+namespace image_map {
+
+const std::string UNMAPPED_INSTANCE_ID("");
+
+namespace {
+
+template <typename E>
+class GetTypeVisitor : public boost::static_visitor<E> {
+public:
+ template <typename T>
+ inline E operator()(const T&) const {
+ return T::TYPE;
+ }
+};
+
+class EncodeVisitor : public boost::static_visitor<void> {
+public:
+ explicit EncodeVisitor(bufferlist &bl) : m_bl(bl) {
+ }
+
+ template <typename T>
+ inline void operator()(const T& t) const {
+ using ceph::encode;
+ encode(static_cast<uint32_t>(T::TYPE), m_bl);
+ t.encode(m_bl);
+ }
+private:
+ bufferlist &m_bl;
+};
+
+class DecodeVisitor : public boost::static_visitor<void> {
+public:
+ DecodeVisitor(__u8 version, bufferlist::const_iterator &iter)
+ : m_version(version), m_iter(iter) {
+ }
+
+ template <typename T>
+ inline void operator()(T& t) const {
+ t.decode(m_version, m_iter);
+ }
+private:
+ __u8 m_version;
+ bufferlist::const_iterator &m_iter;
+};
+
+class DumpVisitor : public boost::static_visitor<void> {
+public:
+ explicit DumpVisitor(Formatter *formatter, const std::string &key)
+ : m_formatter(formatter), m_key(key) {}
+
+ template <typename T>
+ inline void operator()(const T& t) const {
+ auto type = T::TYPE;
+ m_formatter->dump_string(m_key.c_str(), stringify(type));
+ t.dump(m_formatter);
+ }
+private:
+ ceph::Formatter *m_formatter;
+ std::string m_key;
+};
+
+} // anonymous namespace
+
+PolicyMetaType PolicyData::get_policy_meta_type() const {
+ return boost::apply_visitor(GetTypeVisitor<PolicyMetaType>(), policy_meta);
+}
+
+void PolicyData::encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ boost::apply_visitor(EncodeVisitor(bl), policy_meta);
+ ENCODE_FINISH(bl);
+}
+
+void PolicyData::decode(bufferlist::const_iterator& it) {
+ DECODE_START(1, it);
+
+ uint32_t policy_meta_type;
+ decode(policy_meta_type, it);
+
+ switch (policy_meta_type) {
+ case POLICY_META_TYPE_NONE:
+ policy_meta = PolicyMetaNone();
+ break;
+ default:
+ policy_meta = PolicyMetaUnknown();
+ break;
+ }
+
+ boost::apply_visitor(DecodeVisitor(struct_v, it), policy_meta);
+ DECODE_FINISH(it);
+}
+
+void PolicyData::dump(Formatter *f) const {
+ boost::apply_visitor(DumpVisitor(f, "policy_meta_type"), policy_meta);
+}
+
+void PolicyData::generate_test_instances(std::list<PolicyData *> &o) {
+ o.push_back(new PolicyData(PolicyMetaNone()));
+}
+
+std::ostream &operator<<(std::ostream &os, const ActionType& action_type) {
+ switch (action_type) {
+ case ACTION_TYPE_NONE:
+ os << "NONE";
+ break;
+ case ACTION_TYPE_MAP_UPDATE:
+ os << "MAP_UPDATE";
+ break;
+ case ACTION_TYPE_MAP_REMOVE:
+ os << "MAP_REMOVE";
+ break;
+ case ACTION_TYPE_ACQUIRE:
+ os << "ACQUIRE";
+ break;
+ case ACTION_TYPE_RELEASE:
+ os << "RELEASE";
+ break;
+ default:
+ os << "UNKNOWN (" << static_cast<uint32_t>(action_type) << ")";
+ break;
+ }
+ return os;
+}
+
+} // namespace image_map
+} // namespace mirror
+} // namespace rbd
diff --git a/src/tools/rbd_mirror/image_map/Types.h b/src/tools/rbd_mirror/image_map/Types.h
new file mode 100644
index 00000000..5a97430f
--- /dev/null
+++ b/src/tools/rbd_mirror/image_map/Types.h
@@ -0,0 +1,130 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_MAP_TYPES_H
+#define CEPH_RBD_MIRROR_IMAGE_MAP_TYPES_H
+
+#include <iosfwd>
+#include <map>
+#include <set>
+#include <string>
+#include <boost/variant.hpp>
+
+#include "include/buffer.h"
+#include "include/encoding.h"
+#include "include/utime.h"
+#include "tools/rbd_mirror/Types.h"
+
+struct Context;
+
+namespace ceph {
+class Formatter;
+}
+
+namespace rbd {
+namespace mirror {
+namespace image_map {
+
+extern const std::string UNMAPPED_INSTANCE_ID;
+
+struct Listener {
+ virtual ~Listener() {
+ }
+
+ virtual void acquire_image(const std::string &global_image_id,
+ const std::string &instance_id,
+ Context* on_finish) = 0;
+ virtual void release_image(const std::string &global_image_id,
+ const std::string &instance_id,
+ Context* on_finish) = 0;
+ virtual void remove_image(const std::string &mirror_uuid,
+ const std::string &global_image_id,
+ const std::string &instance_id,
+ Context* on_finish) = 0;
+};
+
+struct LookupInfo {
+ std::string instance_id = UNMAPPED_INSTANCE_ID;
+ utime_t mapped_time;
+};
+
+enum ActionType {
+ ACTION_TYPE_NONE,
+ ACTION_TYPE_MAP_UPDATE,
+ ACTION_TYPE_MAP_REMOVE,
+ ACTION_TYPE_ACQUIRE,
+ ACTION_TYPE_RELEASE
+};
+
+typedef std::vector<std::string> InstanceIds;
+typedef std::set<std::string> GlobalImageIds;
+typedef std::map<std::string, ActionType> ImageActionTypes;
+
+enum PolicyMetaType {
+ POLICY_META_TYPE_NONE = 0,
+};
+
+struct PolicyMetaNone {
+ static const PolicyMetaType TYPE = POLICY_META_TYPE_NONE;
+
+ PolicyMetaNone() {
+ }
+
+ void encode(bufferlist& bl) const {
+ }
+
+ void decode(__u8 version, bufferlist::const_iterator& it) {
+ }
+
+ void dump(Formatter *f) const {
+ }
+};
+
+struct PolicyMetaUnknown {
+ static const PolicyMetaType TYPE = static_cast<PolicyMetaType>(-1);
+
+ PolicyMetaUnknown() {
+ }
+
+ void encode(bufferlist& bl) const {
+ ceph_abort();
+ }
+
+ void decode(__u8 version, bufferlist::const_iterator& it) {
+ }
+
+ void dump(Formatter *f) const {
+ }
+};
+
+typedef boost::variant<PolicyMetaNone,
+ PolicyMetaUnknown> PolicyMeta;
+
+struct PolicyData {
+ PolicyData()
+ : policy_meta(PolicyMetaUnknown()) {
+ }
+ PolicyData(const PolicyMeta &policy_meta)
+ : policy_meta(policy_meta) {
+ }
+
+ PolicyMeta policy_meta;
+
+ PolicyMetaType get_policy_meta_type() const;
+
+ void encode(bufferlist& bl) const;
+ void decode(bufferlist::const_iterator& it);
+ void dump(Formatter *f) const;
+
+ static void generate_test_instances(std::list<PolicyData *> &o);
+};
+
+WRITE_CLASS_ENCODER(PolicyData);
+
+std::ostream &operator<<(std::ostream &os, const ActionType &action_type);
+
+} // namespace image_map
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_IMAGE_MAP_TYPES_H
diff --git a/src/tools/rbd_mirror/image_map/UpdateRequest.cc b/src/tools/rbd_mirror/image_map/UpdateRequest.cc
new file mode 100644
index 00000000..799c5670
--- /dev/null
+++ b/src/tools/rbd_mirror/image_map/UpdateRequest.cc
@@ -0,0 +1,100 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "common/debug.h"
+#include "common/errno.h"
+
+#include "librbd/Utils.h"
+#include "include/rbd_types.h"
+#include "cls/rbd/cls_rbd_client.h"
+
+#include "UpdateRequest.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_map::UpdateRequest: " \
+ << this << " " << __func__
+
+namespace rbd {
+namespace mirror {
+namespace image_map {
+
+using librbd::util::create_rados_callback;
+
+static const uint32_t MAX_UPDATE = 256;
+
+template <typename I>
+UpdateRequest<I>::UpdateRequest(librados::IoCtx &ioctx,
+ std::map<std::string, cls::rbd::MirrorImageMap> &&update_mapping,
+ std::set<std::string> &&remove_global_image_ids, Context *on_finish)
+ : m_ioctx(ioctx),
+ m_update_mapping(update_mapping),
+ m_remove_global_image_ids(remove_global_image_ids),
+ m_on_finish(on_finish) {
+}
+
+template <typename I>
+void UpdateRequest<I>::send() {
+ dout(20) << dendl;
+
+ update_image_map();
+}
+
+template <typename I>
+void UpdateRequest<I>::update_image_map() {
+ dout(20) << dendl;
+
+ if (m_update_mapping.empty() && m_remove_global_image_ids.empty()) {
+ finish(0);
+ return;
+ }
+
+ uint32_t nr_updates = 0;
+ librados::ObjectWriteOperation op;
+
+ auto it1 = m_update_mapping.begin();
+ while (it1 != m_update_mapping.end() && nr_updates++ < MAX_UPDATE) {
+ librbd::cls_client::mirror_image_map_update(&op, it1->first, it1->second);
+ it1 = m_update_mapping.erase(it1);
+ }
+
+ auto it2 = m_remove_global_image_ids.begin();
+ while (it2 != m_remove_global_image_ids.end() && nr_updates++ < MAX_UPDATE) {
+ librbd::cls_client::mirror_image_map_remove(&op, *it2);
+ it2 = m_remove_global_image_ids.erase(it2);
+ }
+
+ librados::AioCompletion *aio_comp = create_rados_callback<
+ UpdateRequest, &UpdateRequest::handle_update_image_map>(this);
+ int r = m_ioctx.aio_operate(RBD_MIRROR_LEADER, aio_comp, &op);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void UpdateRequest<I>::handle_update_image_map(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ if (r < 0) {
+ derr << ": failed to update image map: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ update_image_map();
+}
+
+template <typename I>
+void UpdateRequest<I>::finish(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ m_on_finish->complete(r);
+ delete this;
+}
+
+} // namespace image_map
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_map::UpdateRequest<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_map/UpdateRequest.h b/src/tools/rbd_mirror/image_map/UpdateRequest.h
new file mode 100644
index 00000000..841cc6f9
--- /dev/null
+++ b/src/tools/rbd_mirror/image_map/UpdateRequest.h
@@ -0,0 +1,65 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_MAP_UPDATE_REQUEST_H
+#define CEPH_RBD_MIRROR_IMAGE_MAP_UPDATE_REQUEST_H
+
+#include "cls/rbd/cls_rbd_types.h"
+#include "include/rados/librados.hpp"
+
+class Context;
+
+namespace librbd { class ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+namespace image_map {
+
+template<typename ImageCtxT = librbd::ImageCtx>
+class UpdateRequest {
+public:
+ // accepts an image map for updation and a collection of
+ // global image ids to purge.
+ static UpdateRequest *create(librados::IoCtx &ioctx,
+ std::map<std::string, cls::rbd::MirrorImageMap> &&update_mapping,
+ std::set<std::string> &&remove_global_image_ids, Context *on_finish) {
+ return new UpdateRequest(ioctx, std::move(update_mapping), std::move(remove_global_image_ids),
+ on_finish);
+ }
+
+ void send();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * | . . . . . . . .
+ * v v . MAX_UPDATE
+ * UPDATE_IMAGE_MAP. . . . . . .
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+ UpdateRequest(librados::IoCtx &ioctx,
+ std::map<std::string, cls::rbd::MirrorImageMap> &&update_mapping,
+ std::set<std::string> &&remove_global_image_ids, Context *on_finish);
+
+ librados::IoCtx &m_ioctx;
+ std::map<std::string, cls::rbd::MirrorImageMap> m_update_mapping;
+ std::set<std::string> m_remove_global_image_ids;
+ Context *m_on_finish;
+
+ void update_image_map();
+ void handle_update_image_map(int r);
+
+ void finish(int r);
+};
+
+} // namespace image_map
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_IMAGE_MAP_UPDATE_REQUEST_H
diff --git a/src/tools/rbd_mirror/image_replayer/BootstrapRequest.cc b/src/tools/rbd_mirror/image_replayer/BootstrapRequest.cc
new file mode 100644
index 00000000..7ce21b4b
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/BootstrapRequest.cc
@@ -0,0 +1,785 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "include/compat.h"
+#include "BootstrapRequest.h"
+#include "CloseImageRequest.h"
+#include "CreateImageRequest.h"
+#include "IsPrimaryRequest.h"
+#include "OpenImageRequest.h"
+#include "OpenLocalImageRequest.h"
+#include "common/debug.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "common/WorkQueue.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "journal/Journaler.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/internal.h"
+#include "librbd/Journal.h"
+#include "librbd/Utils.h"
+#include "librbd/journal/Types.h"
+#include "tools/rbd_mirror/ProgressContext.h"
+#include "tools/rbd_mirror/ImageSync.h"
+#include "tools/rbd_mirror/Threads.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::BootstrapRequest: " \
+ << this << " " << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+using librbd::util::create_context_callback;
+using librbd::util::create_rados_callback;
+using librbd::util::unique_lock_name;
+
+template <typename I>
+BootstrapRequest<I>::BootstrapRequest(
+ Threads<I>* threads,
+ librados::IoCtx &local_io_ctx,
+ librados::IoCtx &remote_io_ctx,
+ InstanceWatcher<I> *instance_watcher,
+ I **local_image_ctx,
+ const std::string &local_image_id,
+ const std::string &remote_image_id,
+ const std::string &global_image_id,
+ const std::string &local_mirror_uuid,
+ const std::string &remote_mirror_uuid,
+ Journaler *journaler,
+ cls::journal::ClientState *client_state,
+ MirrorPeerClientMeta *client_meta,
+ Context *on_finish,
+ bool *do_resync,
+ rbd::mirror::ProgressContext *progress_ctx)
+ : BaseRequest("rbd::mirror::image_replayer::BootstrapRequest",
+ reinterpret_cast<CephContext*>(local_io_ctx.cct()), on_finish),
+ m_threads(threads), m_local_io_ctx(local_io_ctx),
+ m_remote_io_ctx(remote_io_ctx), m_instance_watcher(instance_watcher),
+ m_local_image_ctx(local_image_ctx), m_local_image_id(local_image_id),
+ m_remote_image_id(remote_image_id), m_global_image_id(global_image_id),
+ m_local_mirror_uuid(local_mirror_uuid),
+ m_remote_mirror_uuid(remote_mirror_uuid), m_journaler(journaler),
+ m_client_state(client_state), m_client_meta(client_meta),
+ m_progress_ctx(progress_ctx), m_do_resync(do_resync),
+ m_lock(unique_lock_name("BootstrapRequest::m_lock", this)) {
+ dout(10) << dendl;
+}
+
+template <typename I>
+BootstrapRequest<I>::~BootstrapRequest() {
+ ceph_assert(m_remote_image_ctx == nullptr);
+}
+
+template <typename I>
+bool BootstrapRequest<I>::is_syncing() const {
+ Mutex::Locker locker(m_lock);
+ return (m_image_sync != nullptr);
+}
+
+template <typename I>
+void BootstrapRequest<I>::send() {
+ *m_do_resync = false;
+
+ get_remote_tag_class();
+}
+
+template <typename I>
+void BootstrapRequest<I>::cancel() {
+ dout(10) << dendl;
+
+ Mutex::Locker locker(m_lock);
+ m_canceled = true;
+
+ if (m_image_sync != nullptr) {
+ m_image_sync->cancel();
+ }
+}
+
+template <typename I>
+void BootstrapRequest<I>::get_remote_tag_class() {
+ dout(15) << dendl;
+
+ update_progress("GET_REMOTE_TAG_CLASS");
+
+ Context *ctx = create_context_callback<
+ BootstrapRequest<I>, &BootstrapRequest<I>::handle_get_remote_tag_class>(
+ this);
+ m_journaler->get_client(librbd::Journal<>::IMAGE_CLIENT_ID, &m_client, ctx);
+}
+
+template <typename I>
+void BootstrapRequest<I>::handle_get_remote_tag_class(int r) {
+ dout(15) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to retrieve remote client: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ librbd::journal::ClientData client_data;
+ auto it = m_client.data.cbegin();
+ try {
+ decode(client_data, it);
+ } catch (const buffer::error &err) {
+ derr << "failed to decode remote client meta data: " << err.what()
+ << dendl;
+ finish(-EBADMSG);
+ return;
+ }
+
+ librbd::journal::ImageClientMeta *client_meta =
+ boost::get<librbd::journal::ImageClientMeta>(&client_data.client_meta);
+ if (client_meta == nullptr) {
+ derr << "unknown remote client registration" << dendl;
+ finish(-EINVAL);
+ return;
+ }
+
+ m_remote_tag_class = client_meta->tag_class;
+ dout(10) << "remote tag class=" << m_remote_tag_class << dendl;
+
+ open_remote_image();
+}
+
+template <typename I>
+void BootstrapRequest<I>::open_remote_image() {
+ dout(15) << "remote_image_id=" << m_remote_image_id << dendl;
+
+ update_progress("OPEN_REMOTE_IMAGE");
+
+ Context *ctx = create_context_callback<
+ BootstrapRequest<I>, &BootstrapRequest<I>::handle_open_remote_image>(
+ this);
+ OpenImageRequest<I> *request = OpenImageRequest<I>::create(
+ m_remote_io_ctx, &m_remote_image_ctx, m_remote_image_id, false,
+ ctx);
+ request->send();
+}
+
+template <typename I>
+void BootstrapRequest<I>::handle_open_remote_image(int r) {
+ dout(15) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to open remote image: " << cpp_strerror(r) << dendl;
+ ceph_assert(m_remote_image_ctx == nullptr);
+ finish(r);
+ return;
+ }
+
+ is_primary();
+}
+
+template <typename I>
+void BootstrapRequest<I>::is_primary() {
+ dout(15) << dendl;
+
+ update_progress("OPEN_REMOTE_IMAGE");
+
+ Context *ctx = create_context_callback<
+ BootstrapRequest<I>, &BootstrapRequest<I>::handle_is_primary>(
+ this);
+ IsPrimaryRequest<I> *request = IsPrimaryRequest<I>::create(m_remote_image_ctx,
+ &m_primary, ctx);
+ request->send();
+}
+
+template <typename I>
+void BootstrapRequest<I>::handle_is_primary(int r) {
+ dout(15) << "r=" << r << dendl;
+
+ if (r == -ENOENT) {
+ dout(5) << "remote image is not mirrored" << dendl;
+ m_ret_val = -EREMOTEIO;
+ close_remote_image();
+ return;
+ } else if (r < 0) {
+ derr << "error querying remote image primary status: " << cpp_strerror(r)
+ << dendl;
+ m_ret_val = r;
+ close_remote_image();
+ return;
+ }
+
+ if (!m_primary) {
+ if (m_local_image_id.empty()) {
+ // no local image and remote isn't primary -- don't sync it
+ dout(5) << "remote image is not primary -- not syncing"
+ << dendl;
+ m_ret_val = -EREMOTEIO;
+ close_remote_image();
+ return;
+ } else if (m_client_meta->state !=
+ librbd::journal::MIRROR_PEER_STATE_REPLAYING) {
+ // ensure we attempt to re-sync to remote if it's re-promoted
+ dout(5) << "remote image is not primary -- sync interrupted"
+ << dendl;
+ m_ret_val = -EREMOTEIO;
+ update_client_state();
+ return;
+ }
+ }
+
+ if (!m_client_meta->image_id.empty()) {
+ // have an image id -- use that to open the image since a deletion (resync)
+ // will leave the old image id registered in the peer
+ m_local_image_id = m_client_meta->image_id;
+ }
+
+ if (m_local_image_id.empty()) {
+ // prepare to create local image
+ update_client_image();
+ return;
+ }
+
+ open_local_image();
+}
+
+template <typename I>
+void BootstrapRequest<I>::update_client_state() {
+ dout(15) << dendl;
+ update_progress("UPDATE_CLIENT_STATE");
+
+ librbd::journal::MirrorPeerClientMeta client_meta(*m_client_meta);
+ client_meta.state = librbd::journal::MIRROR_PEER_STATE_REPLAYING;
+
+ librbd::journal::ClientData client_data(client_meta);
+ bufferlist data_bl;
+ encode(client_data, data_bl);
+
+ Context *ctx = create_context_callback<
+ BootstrapRequest<I>, &BootstrapRequest<I>::handle_update_client_state>(
+ this);
+ m_journaler->update_client(data_bl, ctx);
+}
+
+template <typename I>
+void BootstrapRequest<I>::handle_update_client_state(int r) {
+ dout(15) << "r=" << r << dendl;
+ if (r < 0) {
+ derr << "failed to update client: " << cpp_strerror(r) << dendl;
+ } else {
+ m_client_meta->state = librbd::journal::MIRROR_PEER_STATE_REPLAYING;
+ }
+
+ close_remote_image();
+}
+
+template <typename I>
+void BootstrapRequest<I>::open_local_image() {
+ dout(15) << "local_image_id=" << m_local_image_id << dendl;
+
+ update_progress("OPEN_LOCAL_IMAGE");
+
+ Context *ctx = create_context_callback<
+ BootstrapRequest<I>, &BootstrapRequest<I>::handle_open_local_image>(
+ this);
+ OpenLocalImageRequest<I> *request = OpenLocalImageRequest<I>::create(
+ m_local_io_ctx, m_local_image_ctx, m_local_image_id, m_threads->work_queue,
+ ctx);
+ request->send();
+}
+
+template <typename I>
+void BootstrapRequest<I>::handle_open_local_image(int r) {
+ dout(15) << "r=" << r << dendl;
+
+ if (r == -ENOENT) {
+ ceph_assert(*m_local_image_ctx == nullptr);
+ dout(10) << "local image missing" << dendl;
+ unregister_client();
+ return;
+ } else if (r == -EREMOTEIO) {
+ ceph_assert(*m_local_image_ctx == nullptr);
+ dout(10) << "local image is primary -- skipping image replay" << dendl;
+ m_ret_val = r;
+ close_remote_image();
+ return;
+ } else if (r < 0) {
+ ceph_assert(*m_local_image_ctx == nullptr);
+ derr << "failed to open local image: " << cpp_strerror(r) << dendl;
+ m_ret_val = r;
+ close_remote_image();
+ return;
+ }
+
+ I *local_image_ctx = (*m_local_image_ctx);
+ {
+ local_image_ctx->snap_lock.get_read();
+ if (local_image_ctx->journal == nullptr) {
+ local_image_ctx->snap_lock.put_read();
+
+ derr << "local image does not support journaling" << dendl;
+ m_ret_val = -EINVAL;
+ close_local_image();
+ return;
+ }
+
+ r = (*m_local_image_ctx)->journal->is_resync_requested(m_do_resync);
+ if (r < 0) {
+ local_image_ctx->snap_lock.put_read();
+
+ derr << "failed to check if a resync was requested" << dendl;
+ m_ret_val = r;
+ close_local_image();
+ return;
+ }
+
+ m_local_tag_tid = local_image_ctx->journal->get_tag_tid();
+ m_local_tag_data = local_image_ctx->journal->get_tag_data();
+ dout(10) << "local tag=" << m_local_tag_tid << ", "
+ << "local tag data=" << m_local_tag_data << dendl;
+ local_image_ctx->snap_lock.put_read();
+ }
+
+ if (m_local_tag_data.mirror_uuid != m_remote_mirror_uuid && !m_primary) {
+ // if the local mirror is not linked to the (now) non-primary image,
+ // stop the replay. Otherwise, we ignore that the remote is non-primary
+ // so that we can replay the demotion
+ dout(5) << "remote image is not primary -- skipping image replay"
+ << dendl;
+ m_ret_val = -EREMOTEIO;
+ close_local_image();
+ return;
+ }
+
+ if (*m_do_resync) {
+ close_remote_image();
+ return;
+ }
+
+ if (*m_client_state == cls::journal::CLIENT_STATE_DISCONNECTED) {
+ dout(10) << "client flagged disconnected -- skipping bootstrap" << dendl;
+ // The caller is expected to detect disconnect initializing remote journal.
+ m_ret_val = 0;
+ close_remote_image();
+ return;
+ }
+
+ get_remote_tags();
+}
+
+template <typename I>
+void BootstrapRequest<I>::unregister_client() {
+ dout(15) << dendl;
+ update_progress("UNREGISTER_CLIENT");
+
+ m_local_image_id = "";
+ Context *ctx = create_context_callback<
+ BootstrapRequest<I>, &BootstrapRequest<I>::handle_unregister_client>(
+ this);
+ m_journaler->unregister_client(ctx);
+}
+
+template <typename I>
+void BootstrapRequest<I>::handle_unregister_client(int r) {
+ dout(15) << "r=" << r << dendl;
+ if (r < 0) {
+ derr << "failed to unregister with remote journal: " << cpp_strerror(r)
+ << dendl;
+ m_ret_val = r;
+ close_remote_image();
+ return;
+ }
+
+ *m_client_meta = librbd::journal::MirrorPeerClientMeta("");
+ register_client();
+}
+
+template <typename I>
+void BootstrapRequest<I>::register_client() {
+ dout(15) << dendl;
+
+ update_progress("REGISTER_CLIENT");
+
+ ceph_assert(m_local_image_id.empty());
+ librbd::journal::MirrorPeerClientMeta mirror_peer_client_meta;
+ mirror_peer_client_meta.state = librbd::journal::MIRROR_PEER_STATE_REPLAYING;
+
+ librbd::journal::ClientData client_data{mirror_peer_client_meta};
+ bufferlist client_data_bl;
+ encode(client_data, client_data_bl);
+
+ Context *ctx = create_context_callback<
+ BootstrapRequest<I>, &BootstrapRequest<I>::handle_register_client>(
+ this);
+ m_journaler->register_client(client_data_bl, ctx);
+}
+
+template <typename I>
+void BootstrapRequest<I>::handle_register_client(int r) {
+ dout(15) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to register with remote journal: " << cpp_strerror(r)
+ << dendl;
+ m_ret_val = r;
+ close_remote_image();
+ return;
+ }
+
+ *m_client_state = cls::journal::CLIENT_STATE_CONNECTED;
+ *m_client_meta = librbd::journal::MirrorPeerClientMeta();
+ m_client_meta->state = librbd::journal::MIRROR_PEER_STATE_REPLAYING;
+
+ is_primary();
+}
+
+template <typename I>
+void BootstrapRequest<I>::update_client_image() {
+ ceph_assert(m_local_image_id.empty());
+ assert(m_local_image_id.empty());
+ m_local_image_id = librbd::util::generate_image_id<I>(m_local_io_ctx);
+
+ dout(15) << "local_image_id=" << m_local_image_id << dendl;
+ update_progress("UPDATE_CLIENT_IMAGE");
+
+ librbd::journal::MirrorPeerClientMeta client_meta{m_local_image_id};
+ client_meta.state = librbd::journal::MIRROR_PEER_STATE_SYNCING;
+
+ librbd::journal::ClientData client_data(client_meta);
+ bufferlist data_bl;
+ encode(client_data, data_bl);
+
+ Context *ctx = create_context_callback<
+ BootstrapRequest<I>, &BootstrapRequest<I>::handle_update_client_image>(
+ this);
+ m_journaler->update_client(data_bl, ctx);
+}
+
+template <typename I>
+void BootstrapRequest<I>::handle_update_client_image(int r) {
+ dout(15) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to update client: " << cpp_strerror(r) << dendl;
+ m_ret_val = r;
+ close_remote_image();
+ return;
+ }
+
+ if (m_canceled) {
+ dout(10) << "request canceled" << dendl;
+ m_ret_val = -ECANCELED;
+ close_remote_image();
+ return;
+ }
+
+ *m_client_meta = {m_local_image_id};
+ m_client_meta->state = librbd::journal::MIRROR_PEER_STATE_SYNCING;
+ create_local_image();
+}
+
+template <typename I>
+void BootstrapRequest<I>::create_local_image() {
+ dout(15) << "local_image_id=" << m_local_image_id << dendl;
+ update_progress("CREATE_LOCAL_IMAGE");
+
+ m_remote_image_ctx->snap_lock.get_read();
+ std::string image_name = m_remote_image_ctx->name;
+ m_remote_image_ctx->snap_lock.put_read();
+
+ Context *ctx = create_context_callback<
+ BootstrapRequest<I>, &BootstrapRequest<I>::handle_create_local_image>(
+ this);
+ CreateImageRequest<I> *request = CreateImageRequest<I>::create(
+ m_threads, m_local_io_ctx, m_global_image_id, m_remote_mirror_uuid,
+ image_name, m_local_image_id, m_remote_image_ctx, ctx);
+ request->send();
+}
+
+template <typename I>
+void BootstrapRequest<I>::handle_create_local_image(int r) {
+ dout(15) << "r=" << r << dendl;
+
+ if (r == -EBADF) {
+ dout(5) << "image id " << m_local_image_id << " already in-use" << dendl;
+ m_local_image_id = "";
+ update_client_image();
+ return;
+ } else if (r < 0) {
+ if (r == -ENOENT) {
+ dout(10) << "parent image does not exist" << dendl;
+ } else {
+ derr << "failed to create local image: " << cpp_strerror(r) << dendl;
+ }
+ m_ret_val = r;
+ close_remote_image();
+ return;
+ }
+
+ open_local_image();
+}
+
+template <typename I>
+void BootstrapRequest<I>::get_remote_tags() {
+ if (m_client_meta->state == librbd::journal::MIRROR_PEER_STATE_SYNCING) {
+ // optimization -- no need to compare remote tags if we just created
+ // the image locally or sync was interrupted
+ image_sync();
+ return;
+ }
+
+ dout(15) << dendl;
+ update_progress("GET_REMOTE_TAGS");
+
+ Context *ctx = create_context_callback<
+ BootstrapRequest<I>, &BootstrapRequest<I>::handle_get_remote_tags>(this);
+ m_journaler->get_tags(m_remote_tag_class, &m_remote_tags, ctx);
+}
+
+template <typename I>
+void BootstrapRequest<I>::handle_get_remote_tags(int r) {
+ dout(15) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to retrieve remote tags: " << cpp_strerror(r) << dendl;
+ m_ret_val = r;
+ close_local_image();
+ return;
+ }
+
+ if (m_canceled) {
+ dout(10) << "request canceled" << dendl;
+ m_ret_val = -ECANCELED;
+ close_local_image();
+ return;
+ }
+
+ // At this point, the local image was existing, non-primary, and replaying;
+ // and the remote image is primary. Attempt to link the local image's most
+ // recent tag to the remote image's tag chain.
+ bool remote_tag_data_valid = false;
+ librbd::journal::TagData remote_tag_data;
+ boost::optional<uint64_t> remote_orphan_tag_tid =
+ boost::make_optional<uint64_t>(false, 0U);
+ bool reconnect_orphan = false;
+
+ // decode the remote tags
+ for (auto &remote_tag : m_remote_tags) {
+ if (m_local_tag_data.predecessor.commit_valid &&
+ m_local_tag_data.predecessor.mirror_uuid == m_remote_mirror_uuid &&
+ m_local_tag_data.predecessor.tag_tid > remote_tag.tid) {
+ dout(15) << "skipping processed predecessor remote tag "
+ << remote_tag.tid << dendl;
+ continue;
+ }
+
+ try {
+ auto it = remote_tag.data.cbegin();
+ decode(remote_tag_data, it);
+ remote_tag_data_valid = true;
+ } catch (const buffer::error &err) {
+ derr << "failed to decode remote tag " << remote_tag.tid << ": "
+ << err.what() << dendl;
+ m_ret_val = -EBADMSG;
+ close_local_image();
+ return;
+ }
+
+ dout(10) << "decoded remote tag " << remote_tag.tid << ": "
+ << remote_tag_data << dendl;
+
+ if (!m_local_tag_data.predecessor.commit_valid) {
+ // newly synced local image (no predecessor) replays from the first tag
+ if (remote_tag_data.mirror_uuid != librbd::Journal<>::LOCAL_MIRROR_UUID) {
+ dout(15) << "skipping non-primary remote tag" << dendl;
+ continue;
+ }
+
+ dout(10) << "using initial primary remote tag" << dendl;
+ break;
+ }
+
+ if (m_local_tag_data.mirror_uuid == librbd::Journal<>::ORPHAN_MIRROR_UUID) {
+ // demotion last available local epoch
+
+ if (remote_tag_data.mirror_uuid == m_local_tag_data.mirror_uuid &&
+ remote_tag_data.predecessor.commit_valid &&
+ remote_tag_data.predecessor.tag_tid ==
+ m_local_tag_data.predecessor.tag_tid) {
+ // demotion matches remote epoch
+
+ if (remote_tag_data.predecessor.mirror_uuid == m_local_mirror_uuid &&
+ m_local_tag_data.predecessor.mirror_uuid ==
+ librbd::Journal<>::LOCAL_MIRROR_UUID) {
+ // local demoted and remote has matching event
+ dout(15) << "found matching local demotion tag" << dendl;
+ remote_orphan_tag_tid = remote_tag.tid;
+ continue;
+ }
+
+ if (m_local_tag_data.predecessor.mirror_uuid == m_remote_mirror_uuid &&
+ remote_tag_data.predecessor.mirror_uuid ==
+ librbd::Journal<>::LOCAL_MIRROR_UUID) {
+ // remote demoted and local has matching event
+ dout(15) << "found matching remote demotion tag" << dendl;
+ remote_orphan_tag_tid = remote_tag.tid;
+ continue;
+ }
+ }
+
+ if (remote_tag_data.mirror_uuid == librbd::Journal<>::LOCAL_MIRROR_UUID &&
+ remote_tag_data.predecessor.mirror_uuid == librbd::Journal<>::ORPHAN_MIRROR_UUID &&
+ remote_tag_data.predecessor.commit_valid && remote_orphan_tag_tid &&
+ remote_tag_data.predecessor.tag_tid == *remote_orphan_tag_tid) {
+ // remote promotion tag chained to remote/local demotion tag
+ dout(15) << "found chained remote promotion tag" << dendl;
+ reconnect_orphan = true;
+ break;
+ }
+
+ // promotion must follow demotion
+ remote_orphan_tag_tid = boost::none;
+ }
+ }
+
+ if (remote_tag_data_valid &&
+ m_local_tag_data.mirror_uuid == m_remote_mirror_uuid) {
+ dout(10) << "local image is in clean replay state" << dendl;
+ } else if (reconnect_orphan) {
+ dout(10) << "remote image was demoted/promoted" << dendl;
+ } else {
+ derr << "split-brain detected -- skipping image replay" << dendl;
+ m_ret_val = -EEXIST;
+ close_local_image();
+ return;
+ }
+
+ image_sync();
+}
+
+template <typename I>
+void BootstrapRequest<I>::image_sync() {
+ if (m_client_meta->state == librbd::journal::MIRROR_PEER_STATE_REPLAYING) {
+ // clean replay state -- no image sync required
+ close_remote_image();
+ return;
+ }
+
+ {
+ Mutex::Locker locker(m_lock);
+ if (m_canceled) {
+ m_ret_val = -ECANCELED;
+ } else {
+ dout(15) << dendl;
+ ceph_assert(m_image_sync == nullptr);
+
+ Context *ctx = create_context_callback<
+ BootstrapRequest<I>, &BootstrapRequest<I>::handle_image_sync>(this);
+ m_image_sync = ImageSync<I>::create(
+ *m_local_image_ctx, m_remote_image_ctx, m_threads->timer,
+ &m_threads->timer_lock, m_local_mirror_uuid, m_journaler,
+ m_client_meta, m_threads->work_queue, m_instance_watcher, ctx,
+ m_progress_ctx);
+
+ m_image_sync->get();
+
+ m_lock.Unlock();
+ update_progress("IMAGE_SYNC");
+ m_lock.Lock();
+
+ m_image_sync->send();
+ return;
+ }
+ }
+
+ dout(10) << "request canceled" << dendl;
+ close_remote_image();
+}
+
+template <typename I>
+void BootstrapRequest<I>::handle_image_sync(int r) {
+ dout(15) << "r=" << r << dendl;
+
+ {
+ Mutex::Locker locker(m_lock);
+ m_image_sync->put();
+ m_image_sync = nullptr;
+
+ if (m_canceled) {
+ dout(10) << "request canceled" << dendl;
+ m_ret_val = -ECANCELED;
+ }
+
+ if (r < 0) {
+ derr << "failed to sync remote image: " << cpp_strerror(r) << dendl;
+ m_ret_val = r;
+ }
+ }
+
+ close_remote_image();
+}
+
+template <typename I>
+void BootstrapRequest<I>::close_local_image() {
+ dout(15) << dendl;
+
+ update_progress("CLOSE_LOCAL_IMAGE");
+
+ Context *ctx = create_context_callback<
+ BootstrapRequest<I>, &BootstrapRequest<I>::handle_close_local_image>(
+ this);
+ CloseImageRequest<I> *request = CloseImageRequest<I>::create(
+ m_local_image_ctx, ctx);
+ request->send();
+}
+
+template <typename I>
+void BootstrapRequest<I>::handle_close_local_image(int r) {
+ dout(15) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "error encountered closing local image: " << cpp_strerror(r)
+ << dendl;
+ }
+
+ close_remote_image();
+}
+
+template <typename I>
+void BootstrapRequest<I>::close_remote_image() {
+ dout(15) << dendl;
+
+ update_progress("CLOSE_REMOTE_IMAGE");
+
+ Context *ctx = create_context_callback<
+ BootstrapRequest<I>, &BootstrapRequest<I>::handle_close_remote_image>(
+ this);
+ CloseImageRequest<I> *request = CloseImageRequest<I>::create(
+ &m_remote_image_ctx, ctx);
+ request->send();
+}
+
+template <typename I>
+void BootstrapRequest<I>::handle_close_remote_image(int r) {
+ dout(15) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "error encountered closing remote image: " << cpp_strerror(r)
+ << dendl;
+ }
+
+ finish(m_ret_val);
+}
+
+template <typename I>
+void BootstrapRequest<I>::update_progress(const std::string &description) {
+ dout(15) << description << dendl;
+
+ if (m_progress_ctx) {
+ m_progress_ctx->update_progress(description);
+ }
+}
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_replayer::BootstrapRequest<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_replayer/BootstrapRequest.h b/src/tools/rbd_mirror/image_replayer/BootstrapRequest.h
new file mode 100644
index 00000000..ea9f8565
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/BootstrapRequest.h
@@ -0,0 +1,230 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_BOOTSTRAP_REQUEST_H
+#define RBD_MIRROR_IMAGE_REPLAYER_BOOTSTRAP_REQUEST_H
+
+#include "include/int_types.h"
+#include "include/rados/librados.hpp"
+#include "common/Mutex.h"
+#include "cls/journal/cls_journal_types.h"
+#include "librbd/journal/Types.h"
+#include "librbd/journal/TypeTraits.h"
+#include "tools/rbd_mirror/BaseRequest.h"
+#include "tools/rbd_mirror/Types.h"
+#include <list>
+#include <string>
+
+class Context;
+class ContextWQ;
+class Mutex;
+class SafeTimer;
+namespace journal { class Journaler; }
+namespace librbd { class ImageCtx; }
+namespace librbd { namespace journal { struct MirrorPeerClientMeta; } }
+
+namespace rbd {
+namespace mirror {
+
+class ProgressContext;
+
+template <typename> class ImageSync;
+template <typename> class InstanceWatcher;
+template <typename> struct Threads;
+
+namespace image_replayer {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class BootstrapRequest : public BaseRequest {
+public:
+ typedef librbd::journal::TypeTraits<ImageCtxT> TypeTraits;
+ typedef typename TypeTraits::Journaler Journaler;
+ typedef librbd::journal::MirrorPeerClientMeta MirrorPeerClientMeta;
+ typedef rbd::mirror::ProgressContext ProgressContext;
+
+ static BootstrapRequest* create(
+ Threads<ImageCtxT>* threads,
+ librados::IoCtx &local_io_ctx,
+ librados::IoCtx &remote_io_ctx,
+ InstanceWatcher<ImageCtxT> *instance_watcher,
+ ImageCtxT **local_image_ctx,
+ const std::string &local_image_id,
+ const std::string &remote_image_id,
+ const std::string &global_image_id,
+ const std::string &local_mirror_uuid,
+ const std::string &remote_mirror_uuid,
+ Journaler *journaler,
+ cls::journal::ClientState *client_state,
+ MirrorPeerClientMeta *client_meta,
+ Context *on_finish,
+ bool *do_resync,
+ ProgressContext *progress_ctx = nullptr) {
+ return new BootstrapRequest(threads, local_io_ctx, remote_io_ctx,
+ instance_watcher, local_image_ctx,
+ local_image_id, remote_image_id,
+ global_image_id, local_mirror_uuid,
+ remote_mirror_uuid, journaler, client_state,
+ client_meta, on_finish, do_resync,
+ progress_ctx);
+ }
+
+ BootstrapRequest(Threads<ImageCtxT>* threads,
+ librados::IoCtx &local_io_ctx,
+ librados::IoCtx &remote_io_ctx,
+ InstanceWatcher<ImageCtxT> *instance_watcher,
+ ImageCtxT **local_image_ctx,
+ const std::string &local_image_id,
+ const std::string &remote_image_id,
+ const std::string &global_image_id,
+ const std::string &local_mirror_uuid,
+ const std::string &remote_mirror_uuid, Journaler *journaler,
+ cls::journal::ClientState *client_state,
+ MirrorPeerClientMeta *client_meta, Context *on_finish,
+ bool *do_resync, ProgressContext *progress_ctx = nullptr);
+ ~BootstrapRequest() override;
+
+ bool is_syncing() const;
+
+ void send() override;
+ void cancel() override;
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * GET_REMOTE_TAG_CLASS * * * * * * * * * * * * * * * * * *
+ * | * (error)
+ * v *
+ * OPEN_REMOTE_IMAGE * * * * * * * * * * * * * * * * * * *
+ * | *
+ * |/--------------------------------------------------*---\
+ * v * |
+ * IS_PRIMARY * * * * * * * * * * * * * * * * * * * * * * |
+ * | * * |
+ * | (remote image primary, no local image id) * * |
+ * \----> UPDATE_CLIENT_IMAGE * * * * * * * * * * * * |
+ * | | ^ * * |
+ * | | * (duplicate image id) * * |
+ * | v * * * |
+ * \----> CREATE_LOCAL_IMAGE * * * * * * * * * * * * * |
+ * | | * * |
+ * | v * * |
+ * | (remote image primary) * * |
+ * \----> OPEN_LOCAL_IMAGE * * * * * * * * * * * * * * |
+ * | | . * * |
+ * | | . (image doesn't exist) * * |
+ * | | . . > UNREGISTER_CLIENT * * * * * * * |
+ * | | | * * |
+ * | | v * * |
+ * | | REGISTER_CLIENT * * * * * * * * |
+ * | | | * * |
+ * | | \-----------------------*---*---/
+ * | | * *
+ * | v (skip if not needed) * *
+ * | GET_REMOTE_TAGS * * * * * * * * *
+ * | | * * *
+ * | v (skip if not needed) v * *
+ * | IMAGE_SYNC * * * > CLOSE_LOCAL_IMAGE * *
+ * | | | * *
+ * | \-----------------\ /-----/ * *
+ * | | * *
+ * | | * *
+ * | (skip if not needed) | * *
+ * \----> UPDATE_CLIENT_STATE *|* * * * * * * * * * *
+ * | | * *
+ * /-----------/----------------/ * *
+ * | * *
+ * v * *
+ * CLOSE_REMOTE_IMAGE < * * * * * * * * * * * * * * * * *
+ * | *
+ * v *
+ * <finish> < * * * * * * * * * * * * * * * * * * * * * * *
+ *
+ * @endverbatim
+ */
+ typedef std::list<cls::journal::Tag> Tags;
+
+ Threads<ImageCtxT>* m_threads;
+ librados::IoCtx &m_local_io_ctx;
+ librados::IoCtx &m_remote_io_ctx;
+ InstanceWatcher<ImageCtxT> *m_instance_watcher;
+ ImageCtxT **m_local_image_ctx;
+ std::string m_local_image_id;
+ std::string m_remote_image_id;
+ std::string m_global_image_id;
+ std::string m_local_mirror_uuid;
+ std::string m_remote_mirror_uuid;
+ Journaler *m_journaler;
+ cls::journal::ClientState *m_client_state;
+ MirrorPeerClientMeta *m_client_meta;
+ ProgressContext *m_progress_ctx;
+ bool *m_do_resync;
+
+ mutable Mutex m_lock;
+ bool m_canceled = false;
+
+ Tags m_remote_tags;
+ cls::journal::Client m_client;
+ uint64_t m_remote_tag_class = 0;
+ ImageCtxT *m_remote_image_ctx = nullptr;
+ bool m_primary = false;
+ int m_ret_val = 0;
+ ImageSync<ImageCtxT> *m_image_sync = nullptr;
+
+ uint64_t m_local_tag_tid = 0;
+ librbd::journal::TagData m_local_tag_data;
+
+ bufferlist m_out_bl;
+
+ void get_remote_tag_class();
+ void handle_get_remote_tag_class(int r);
+
+ void open_remote_image();
+ void handle_open_remote_image(int r);
+
+ void is_primary();
+ void handle_is_primary(int r);
+
+ void update_client_state();
+ void handle_update_client_state(int r);
+
+ void open_local_image();
+ void handle_open_local_image(int r);
+
+ void unregister_client();
+ void handle_unregister_client(int r);
+
+ void register_client();
+ void handle_register_client(int r);
+
+ void create_local_image();
+ void handle_create_local_image(int r);
+
+ void update_client_image();
+ void handle_update_client_image(int r);
+
+ void get_remote_tags();
+ void handle_get_remote_tags(int r);
+
+ void image_sync();
+ void handle_image_sync(int r);
+
+ void close_local_image();
+ void handle_close_local_image(int r);
+
+ void close_remote_image();
+ void handle_close_remote_image(int r);
+
+ void update_progress(const std::string &description);
+};
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_replayer::BootstrapRequest<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_BOOTSTRAP_REQUEST_H
diff --git a/src/tools/rbd_mirror/image_replayer/CloseImageRequest.cc b/src/tools/rbd_mirror/image_replayer/CloseImageRequest.cc
new file mode 100644
index 00000000..5b754823
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/CloseImageRequest.cc
@@ -0,0 +1,64 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "CloseImageRequest.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "common/WorkQueue.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/Utils.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::CloseImageRequest: " \
+ << this << " " << __func__
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+using librbd::util::create_context_callback;
+
+template <typename I>
+CloseImageRequest<I>::CloseImageRequest(I **image_ctx, Context *on_finish)
+ : m_image_ctx(image_ctx), m_on_finish(on_finish) {
+}
+
+template <typename I>
+void CloseImageRequest<I>::send() {
+ close_image();
+}
+
+template <typename I>
+void CloseImageRequest<I>::close_image() {
+ dout(20) << dendl;
+
+ Context *ctx = create_context_callback<
+ CloseImageRequest<I>, &CloseImageRequest<I>::handle_close_image>(this);
+ (*m_image_ctx)->state->close(ctx);
+}
+
+template <typename I>
+void CloseImageRequest<I>::handle_close_image(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ if (r < 0) {
+ derr << ": error encountered while closing image: " << cpp_strerror(r)
+ << dendl;
+ }
+
+ delete *m_image_ctx;
+ *m_image_ctx = nullptr;
+
+ m_on_finish->complete(0);
+ delete this;
+}
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_replayer::CloseImageRequest<librbd::ImageCtx>;
+
diff --git a/src/tools/rbd_mirror/image_replayer/CloseImageRequest.h b/src/tools/rbd_mirror/image_replayer/CloseImageRequest.h
new file mode 100644
index 00000000..02481369
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/CloseImageRequest.h
@@ -0,0 +1,56 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_CLOSE_IMAGE_REQUEST_H
+#define RBD_MIRROR_IMAGE_REPLAYER_CLOSE_IMAGE_REQUEST_H
+
+#include "include/int_types.h"
+#include "librbd/ImageCtx.h"
+#include <string>
+
+class Context;
+namespace librbd { class ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class CloseImageRequest {
+public:
+ static CloseImageRequest* create(ImageCtxT **image_ctx, Context *on_finish) {
+ return new CloseImageRequest(image_ctx, on_finish);
+ }
+
+ CloseImageRequest(ImageCtxT **image_ctx, Context *on_finish);
+
+ void send();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * CLOSE_IMAGE
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+ ImageCtxT **m_image_ctx;
+ Context *m_on_finish;
+
+ void close_image();
+ void handle_close_image(int r);
+};
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_replayer::CloseImageRequest<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_CLOSE_IMAGE_REQUEST_H
diff --git a/src/tools/rbd_mirror/image_replayer/CreateImageRequest.cc b/src/tools/rbd_mirror/image_replayer/CreateImageRequest.cc
new file mode 100644
index 00000000..8d8236b2
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/CreateImageRequest.cc
@@ -0,0 +1,506 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "CreateImageRequest.h"
+#include "CloseImageRequest.h"
+#include "OpenImageRequest.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "common/WorkQueue.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "journal/Journaler.h"
+#include "journal/Settings.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/internal.h"
+#include "librbd/Utils.h"
+#include "librbd/image/CreateRequest.h"
+#include "librbd/image/CloneRequest.h"
+#include "librbd/journal/Types.h"
+#include "tools/rbd_mirror/Threads.h"
+#include "tools/rbd_mirror/image_replayer/Utils.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::CreateImageRequest: " \
+ << this << " " << __func__ << ": "
+
+using librbd::util::create_async_context_callback;
+using librbd::util::create_context_callback;
+using librbd::util::create_rados_callback;
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+template <typename I>
+CreateImageRequest<I>::CreateImageRequest(Threads<I>* threads,
+ librados::IoCtx &local_io_ctx,
+ const std::string &global_image_id,
+ const std::string &remote_mirror_uuid,
+ const std::string &local_image_name,
+ const std::string &local_image_id,
+ I *remote_image_ctx,
+ Context *on_finish)
+ : m_threads(threads), m_local_io_ctx(local_io_ctx),
+ m_global_image_id(global_image_id),
+ m_remote_mirror_uuid(remote_mirror_uuid),
+ m_local_image_name(local_image_name), m_local_image_id(local_image_id),
+ m_remote_image_ctx(remote_image_ctx), m_on_finish(on_finish) {
+}
+
+template <typename I>
+void CreateImageRequest<I>::send() {
+ int r = validate_parent();
+ if (r < 0) {
+ error(r);
+ return;
+ }
+
+ if (m_remote_parent_spec.pool_id == -1) {
+ create_image();
+ } else {
+ get_local_parent_mirror_uuid();
+ }
+}
+
+template <typename I>
+void CreateImageRequest<I>::create_image() {
+ dout(10) << dendl;
+
+ using klass = CreateImageRequest<I>;
+ Context *ctx = create_context_callback<
+ klass, &klass::handle_create_image>(this);
+
+ RWLock::RLocker snap_locker(m_remote_image_ctx->snap_lock);
+
+ auto& config{
+ reinterpret_cast<CephContext*>(m_local_io_ctx.cct())->_conf};
+
+ librbd::ImageOptions image_options;
+ populate_image_options(&image_options);
+
+ auto req = librbd::image::CreateRequest<I>::create(
+ config, m_local_io_ctx, m_local_image_name, m_local_image_id,
+ m_remote_image_ctx->size, image_options, m_global_image_id,
+ m_remote_mirror_uuid, false, m_remote_image_ctx->op_work_queue, ctx);
+ req->send();
+}
+
+template <typename I>
+void CreateImageRequest<I>::handle_create_image(int r) {
+ dout(10) << "r=" << r << dendl;
+ if (r == -EBADF) {
+ dout(5) << "image id " << m_local_image_id << " already in-use" << dendl;
+ finish(r);
+ return;
+ } else if (r < 0) {
+ derr << "failed to create local image: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ finish(0);
+}
+
+template <typename I>
+void CreateImageRequest<I>::get_local_parent_mirror_uuid() {
+ dout(10) << dendl;
+
+ librados::ObjectReadOperation op;
+ librbd::cls_client::mirror_uuid_get_start(&op);
+
+ librados::AioCompletion *aio_comp = create_rados_callback<
+ CreateImageRequest<I>,
+ &CreateImageRequest<I>::handle_get_local_parent_mirror_uuid>(this);
+ m_out_bl.clear();
+ int r = m_local_parent_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op,
+ &m_out_bl);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void CreateImageRequest<I>::handle_get_local_parent_mirror_uuid(int r) {
+ if (r >= 0) {
+ auto it = m_out_bl.cbegin();
+ r = librbd::cls_client::mirror_uuid_get_finish(
+ &it, &m_local_parent_mirror_uuid);
+ if (r >= 0 && m_local_parent_mirror_uuid.empty()) {
+ r = -ENOENT;
+ }
+ }
+
+ dout(10) << "r=" << r << dendl;
+ if (r < 0) {
+ if (r == -ENOENT) {
+ dout(5) << "local parent mirror uuid missing" << dendl;
+ } else {
+ derr << "failed to retrieve local parent mirror uuid: " << cpp_strerror(r)
+ << dendl;
+ }
+ finish(r);
+ return;
+ }
+
+ dout(15) << "local_parent_mirror_uuid=" << m_local_parent_mirror_uuid
+ << dendl;
+ get_remote_parent_client_state();
+}
+
+template <typename I>
+void CreateImageRequest<I>::get_remote_parent_client_state() {
+ dout(10) << dendl;
+
+ m_remote_journaler = new Journaler(m_threads->work_queue, m_threads->timer,
+ &m_threads->timer_lock,
+ m_remote_parent_io_ctx,
+ m_remote_parent_spec.image_id,
+ m_local_parent_mirror_uuid, {});
+
+ Context *ctx = create_async_context_callback(
+ m_threads->work_queue, create_context_callback<
+ CreateImageRequest<I>,
+ &CreateImageRequest<I>::handle_get_remote_parent_client_state>(this));
+ m_remote_journaler->get_client(m_local_parent_mirror_uuid, &m_client, ctx);
+}
+
+template <typename I>
+void CreateImageRequest<I>::handle_get_remote_parent_client_state(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ delete m_remote_journaler;
+ m_remote_journaler = nullptr;
+
+ librbd::journal::MirrorPeerClientMeta mirror_peer_client_meta;
+ if (r == -ENOENT) {
+ dout(15) << "client not registered to parent image" << dendl;
+ finish(r);
+ return;
+ } else if (r < 0) {
+ derr << "failed to retrieve parent client: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ } else if (!util::decode_client_meta(m_client, &mirror_peer_client_meta)) {
+ // require operator intervention since the data is corrupt
+ derr << "failed to decode parent client: " << cpp_strerror(r) << dendl;
+ finish(-EBADMSG);
+ return;
+ } else if (mirror_peer_client_meta.state !=
+ librbd::journal::MIRROR_PEER_STATE_REPLAYING) {
+ // avoid possible race w/ incomplete parent image since the parent snapshot
+ // might be deleted if the sync restarts
+ dout(15) << "parent image still syncing" << dendl;
+ finish(-ENOENT);
+ return;
+ }
+
+ get_parent_global_image_id();
+}
+
+
+template <typename I>
+void CreateImageRequest<I>::get_parent_global_image_id() {
+ dout(10) << dendl;
+
+ librados::ObjectReadOperation op;
+ librbd::cls_client::mirror_image_get_start(&op,
+ m_remote_parent_spec.image_id);
+
+ librados::AioCompletion *aio_comp = create_rados_callback<
+ CreateImageRequest<I>,
+ &CreateImageRequest<I>::handle_get_parent_global_image_id>(this);
+ m_out_bl.clear();
+ int r = m_remote_parent_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op,
+ &m_out_bl);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void CreateImageRequest<I>::handle_get_parent_global_image_id(int r) {
+ dout(10) << "r=" << r << dendl;
+ if (r == 0) {
+ cls::rbd::MirrorImage mirror_image;
+ auto iter = m_out_bl.cbegin();
+ r = librbd::cls_client::mirror_image_get_finish(&iter, &mirror_image);
+ if (r == 0) {
+ m_parent_global_image_id = mirror_image.global_image_id;
+ dout(15) << "parent_global_image_id=" << m_parent_global_image_id
+ << dendl;
+ }
+ }
+
+ if (r == -ENOENT) {
+ dout(10) << "parent image " << m_remote_parent_spec.image_id
+ << " not mirrored" << dendl;
+ finish(r);
+ return;
+ } else if (r < 0) {
+ derr << "failed to retrieve global image id for parent image "
+ << m_remote_parent_spec.image_id << ": " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ get_local_parent_image_id();
+}
+
+template <typename I>
+void CreateImageRequest<I>::get_local_parent_image_id() {
+ dout(10) << dendl;
+
+ librados::ObjectReadOperation op;
+ librbd::cls_client::mirror_image_get_image_id_start(
+ &op, m_parent_global_image_id);
+
+ librados::AioCompletion *aio_comp = create_rados_callback<
+ CreateImageRequest<I>,
+ &CreateImageRequest<I>::handle_get_local_parent_image_id>(this);
+ m_out_bl.clear();
+ int r = m_local_parent_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op,
+ &m_out_bl);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void CreateImageRequest<I>::handle_get_local_parent_image_id(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r == 0) {
+ auto iter = m_out_bl.cbegin();
+ r = librbd::cls_client::mirror_image_get_image_id_finish(
+ &iter, &m_local_parent_spec.image_id);
+ }
+
+ if (r == -ENOENT) {
+ dout(10) << "parent image " << m_parent_global_image_id << " not "
+ << "registered locally" << dendl;
+ finish(r);
+ return;
+ } else if (r < 0) {
+ derr << "failed to retrieve local image id for parent image "
+ << m_parent_global_image_id << ": " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ open_remote_parent_image();
+}
+
+template <typename I>
+void CreateImageRequest<I>::open_remote_parent_image() {
+ dout(10) << dendl;
+
+ Context *ctx = create_context_callback<
+ CreateImageRequest<I>,
+ &CreateImageRequest<I>::handle_open_remote_parent_image>(this);
+ OpenImageRequest<I> *request = OpenImageRequest<I>::create(
+ m_remote_parent_io_ctx, &m_remote_parent_image_ctx,
+ m_remote_parent_spec.image_id, true, ctx);
+ request->send();
+}
+
+template <typename I>
+void CreateImageRequest<I>::handle_open_remote_parent_image(int r) {
+ dout(10) << "r=" << r << dendl;
+ if (r < 0) {
+ derr << "failed to open remote parent image " << m_parent_pool_name << "/"
+ << m_remote_parent_spec.image_id << dendl;
+ finish(r);
+ return;
+ }
+
+ clone_image();
+}
+
+template <typename I>
+void CreateImageRequest<I>::clone_image() {
+ dout(10) << dendl;
+
+ std::string snap_name;
+ cls::rbd::SnapshotNamespace snap_namespace;
+ {
+ RWLock::RLocker remote_snap_locker(m_remote_parent_image_ctx->snap_lock);
+ auto it = m_remote_parent_image_ctx->snap_info.find(
+ m_remote_parent_spec.snap_id);
+ if (it != m_remote_parent_image_ctx->snap_info.end()) {
+ snap_name = it->second.name;
+ snap_namespace = it->second.snap_namespace;
+ }
+ }
+
+ librbd::ImageOptions opts;
+ populate_image_options(&opts);
+
+ auto& config{
+ reinterpret_cast<CephContext*>(m_local_io_ctx.cct())->_conf};
+
+ using klass = CreateImageRequest<I>;
+ Context *ctx = create_context_callback<
+ klass, &klass::handle_clone_image>(this);
+
+ librbd::image::CloneRequest<I> *req = librbd::image::CloneRequest<I>::create(
+ config, m_local_parent_io_ctx, m_local_parent_spec.image_id, snap_name,
+ CEPH_NOSNAP, m_local_io_ctx, m_local_image_name, m_local_image_id, opts,
+ m_global_image_id, m_remote_mirror_uuid, m_remote_image_ctx->op_work_queue,
+ ctx);
+ req->send();
+}
+
+template <typename I>
+void CreateImageRequest<I>::handle_clone_image(int r) {
+ dout(10) << "r=" << r << dendl;
+ if (r == -EBADF) {
+ dout(5) << "image id " << m_local_image_id << " already in-use" << dendl;
+ finish(r);
+ return;
+ } else if (r < 0) {
+ derr << "failed to clone image " << m_parent_pool_name << "/"
+ << m_remote_parent_spec.image_id << " to "
+ << m_local_image_name << dendl;
+ m_ret_val = r;
+ }
+
+ close_remote_parent_image();
+}
+
+template <typename I>
+void CreateImageRequest<I>::close_remote_parent_image() {
+ dout(10) << dendl;
+ Context *ctx = create_context_callback<
+ CreateImageRequest<I>,
+ &CreateImageRequest<I>::handle_close_remote_parent_image>(this);
+ CloseImageRequest<I> *request = CloseImageRequest<I>::create(
+ &m_remote_parent_image_ctx, ctx);
+ request->send();
+}
+
+template <typename I>
+void CreateImageRequest<I>::handle_close_remote_parent_image(int r) {
+ dout(10) << "r=" << r << dendl;
+ if (r < 0) {
+ derr << "error encountered closing remote parent image: "
+ << cpp_strerror(r) << dendl;
+ }
+
+ finish(m_ret_val);
+}
+
+template <typename I>
+void CreateImageRequest<I>::error(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ m_threads->work_queue->queue(create_context_callback<
+ CreateImageRequest<I>, &CreateImageRequest<I>::finish>(this), r);
+}
+
+template <typename I>
+void CreateImageRequest<I>::finish(int r) {
+ dout(10) << "r=" << r << dendl;
+ m_on_finish->complete(r);
+ delete this;
+}
+
+template <typename I>
+int CreateImageRequest<I>::validate_parent() {
+ RWLock::RLocker owner_locker(m_remote_image_ctx->owner_lock);
+ RWLock::RLocker snap_locker(m_remote_image_ctx->snap_lock);
+
+ m_remote_parent_spec = m_remote_image_ctx->parent_md.spec;
+
+ // scan all remote snapshots for a linked parent
+ for (auto &snap_info_pair : m_remote_image_ctx->snap_info) {
+ auto &parent_spec = snap_info_pair.second.parent.spec;
+ if (parent_spec.pool_id == -1) {
+ continue;
+ } else if (m_remote_parent_spec.pool_id == -1) {
+ m_remote_parent_spec = parent_spec;
+ continue;
+ }
+
+ if (m_remote_parent_spec != parent_spec) {
+ derr << "remote image parent spec mismatch" << dendl;
+ return -EINVAL;
+ }
+ }
+
+ if (m_remote_parent_spec.pool_id == -1) {
+ return 0;
+ }
+
+ // map remote parent pool to local parent pool
+ librados::Rados remote_rados(m_remote_image_ctx->md_ctx);
+ int r = remote_rados.ioctx_create2(m_remote_parent_spec.pool_id,
+ m_remote_parent_io_ctx);
+ if (r < 0) {
+ derr << "failed to open remote parent pool " << m_remote_parent_spec.pool_id
+ << ": " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ m_parent_pool_name = m_remote_parent_io_ctx.get_pool_name();
+
+ librados::Rados local_rados(m_local_io_ctx);
+ r = local_rados.ioctx_create(m_parent_pool_name.c_str(),
+ m_local_parent_io_ctx);
+ if (r < 0) {
+ derr << "failed to open local parent pool " << m_parent_pool_name << ": "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+void CreateImageRequest<I>::populate_image_options(
+ librbd::ImageOptions* image_options) {
+ image_options->set(RBD_IMAGE_OPTION_FEATURES,
+ m_remote_image_ctx->features);
+ image_options->set(RBD_IMAGE_OPTION_ORDER, m_remote_image_ctx->order);
+ image_options->set(RBD_IMAGE_OPTION_STRIPE_UNIT,
+ m_remote_image_ctx->stripe_unit);
+ image_options->set(RBD_IMAGE_OPTION_STRIPE_COUNT,
+ m_remote_image_ctx->stripe_count);
+
+ // Determine the data pool for the local image as follows:
+ // 1. If the local pool has a default data pool, use it.
+ // 2. If the remote image has a data pool different from its metadata pool and
+ // a pool with the same name exists locally, use it.
+ // 3. Don't set the data pool explicitly.
+ std::string data_pool;
+ librados::Rados local_rados(m_local_io_ctx);
+ auto default_data_pool = g_ceph_context->_conf.get_val<std::string>("rbd_default_data_pool");
+ auto remote_md_pool = m_remote_image_ctx->md_ctx.get_pool_name();
+ auto remote_data_pool = m_remote_image_ctx->data_ctx.get_pool_name();
+
+ if (default_data_pool != "") {
+ data_pool = default_data_pool;
+ } else if (remote_data_pool != remote_md_pool) {
+ if (local_rados.pool_lookup(remote_data_pool.c_str()) >= 0) {
+ data_pool = remote_data_pool;
+ }
+ }
+
+ if (data_pool != "") {
+ image_options->set(RBD_IMAGE_OPTION_DATA_POOL, data_pool);
+ }
+
+ if (m_remote_parent_spec.pool_id != -1) {
+ uint64_t clone_format = 1;
+ if (m_remote_image_ctx->test_op_features(
+ RBD_OPERATION_FEATURE_CLONE_CHILD)) {
+ clone_format = 2;
+ }
+ image_options->set(RBD_IMAGE_OPTION_CLONE_FORMAT, clone_format);
+ }
+}
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_replayer::CreateImageRequest<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_replayer/CreateImageRequest.h b/src/tools/rbd_mirror/image_replayer/CreateImageRequest.h
new file mode 100644
index 00000000..0b20da52
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/CreateImageRequest.h
@@ -0,0 +1,154 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_CREATE_IMAGE_REQUEST_H
+#define RBD_MIRROR_IMAGE_REPLAYER_CREATE_IMAGE_REQUEST_H
+
+#include "include/int_types.h"
+#include "include/types.h"
+#include "include/rados/librados.hpp"
+#include "cls/journal/cls_journal_types.h"
+#include "librbd/Types.h"
+#include "librbd/journal/TypeTraits.h"
+#include <string>
+
+class Context;
+class ContextWQ;
+namespace journal { class Journaler; }
+namespace librbd { class ImageCtx; }
+namespace librbd { class ImageOptions; }
+
+namespace rbd {
+namespace mirror {
+
+template <typename> struct Threads;
+
+namespace image_replayer {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class CreateImageRequest {
+public:
+ static CreateImageRequest *create(Threads<ImageCtxT> *threads,
+ librados::IoCtx &local_io_ctx,
+ const std::string &global_image_id,
+ const std::string &remote_mirror_uuid,
+ const std::string &local_image_name,
+ const std::string &local_image_id,
+ ImageCtxT *remote_image_ctx,
+ Context *on_finish) {
+ return new CreateImageRequest(threads, local_io_ctx, global_image_id,
+ remote_mirror_uuid, local_image_name,
+ local_image_id, remote_image_ctx, on_finish);
+ }
+
+ CreateImageRequest(Threads<ImageCtxT> *threads, librados::IoCtx &local_io_ctx,
+ const std::string &global_image_id,
+ const std::string &remote_mirror_uuid,
+ const std::string &local_image_name,
+ const std::string &local_image_id,
+ ImageCtxT *remote_image_ctx,
+ Context *on_finish);
+
+ void send();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start> * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+ * | *
+ * | (non-clone) *
+ * |\------------> CREATE_IMAGE ---------------------\ * (error)
+ * | | *
+ * | (clone) | *
+ * \-------------> GET_LOCAL_PARENT_MIRROR_UUID * * | * * * *
+ * | | * *
+ * v | *
+ * GET_REMOTE_PARENT_CLIENT_STATE * | * * * *
+ * | | * *
+ * v | *
+ * GET_PARENT_GLOBAL_IMAGE_ID * * * | * * * *
+ * | | * *
+ * v | *
+ * GET_LOCAL_PARENT_IMAGE_ID * * * * | * * * *
+ * | | * *
+ * v | *
+ * OPEN_REMOTE_PARENT * * * * * * * | * * * *
+ * | | * *
+ * v | *
+ * CLONE_IMAGE | *
+ * | | *
+ * v | *
+ * CLOSE_REMOTE_PARENT | *
+ * | v *
+ * \------------------------> <finish> < * *
+ * @endverbatim
+ */
+
+ typedef librbd::journal::TypeTraits<ImageCtxT> TypeTraits;
+ typedef typename TypeTraits::Journaler Journaler;
+
+ Threads<ImageCtxT> *m_threads;
+ librados::IoCtx &m_local_io_ctx;
+ std::string m_global_image_id;
+ std::string m_remote_mirror_uuid;
+ std::string m_local_image_name;
+ std::string m_local_image_id;
+ ImageCtxT *m_remote_image_ctx;
+ Context *m_on_finish;
+
+ librados::IoCtx m_remote_parent_io_ctx;
+ std::string m_local_parent_mirror_uuid;
+ Journaler *m_remote_journaler = nullptr;
+ ImageCtxT *m_remote_parent_image_ctx = nullptr;
+ cls::rbd::ParentImageSpec m_remote_parent_spec;
+
+ librados::IoCtx m_local_parent_io_ctx;
+ cls::rbd::ParentImageSpec m_local_parent_spec;
+
+ bufferlist m_out_bl;
+ std::string m_parent_global_image_id;
+ std::string m_parent_pool_name;
+ cls::journal::Client m_client;
+ int m_ret_val = 0;
+
+ void create_image();
+ void handle_create_image(int r);
+
+ void get_local_parent_mirror_uuid();
+ void handle_get_local_parent_mirror_uuid(int r);
+
+ void get_remote_parent_client_state();
+ void handle_get_remote_parent_client_state(int r);
+
+ void get_parent_global_image_id();
+ void handle_get_parent_global_image_id(int r);
+
+ void get_local_parent_image_id();
+ void handle_get_local_parent_image_id(int r);
+
+ void open_remote_parent_image();
+ void handle_open_remote_parent_image(int r);
+
+ void clone_image();
+ void handle_clone_image(int r);
+
+ void close_remote_parent_image();
+ void handle_close_remote_parent_image(int r);
+
+ void error(int r);
+ void finish(int r);
+
+ int validate_parent();
+
+ void populate_image_options(librbd::ImageOptions* image_options);
+
+};
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_replayer::CreateImageRequest<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_CREATE_IMAGE_REQUEST_H
diff --git a/src/tools/rbd_mirror/image_replayer/EventPreprocessor.cc b/src/tools/rbd_mirror/image_replayer/EventPreprocessor.cc
new file mode 100644
index 00000000..6314eb7d
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/EventPreprocessor.cc
@@ -0,0 +1,204 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "EventPreprocessor.h"
+#include "common/debug.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "common/WorkQueue.h"
+#include "journal/Journaler.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/Utils.h"
+#include "librbd/journal/Types.h"
+#include <boost/variant.hpp>
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::EventPreprocessor: " \
+ << this << " " << __func__
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+using librbd::util::create_context_callback;
+
+template <typename I>
+EventPreprocessor<I>::EventPreprocessor(I &local_image_ctx,
+ Journaler &remote_journaler,
+ const std::string &local_mirror_uuid,
+ MirrorPeerClientMeta *client_meta,
+ ContextWQ *work_queue)
+ : m_local_image_ctx(local_image_ctx), m_remote_journaler(remote_journaler),
+ m_local_mirror_uuid(local_mirror_uuid), m_client_meta(client_meta),
+ m_work_queue(work_queue) {
+}
+
+template <typename I>
+EventPreprocessor<I>::~EventPreprocessor() {
+ ceph_assert(!m_in_progress);
+}
+
+template <typename I>
+bool EventPreprocessor<I>::is_required(const EventEntry &event_entry) {
+ SnapSeqs snap_seqs(m_client_meta->snap_seqs);
+ return (prune_snap_map(&snap_seqs) ||
+ event_entry.get_event_type() ==
+ librbd::journal::EVENT_TYPE_SNAP_RENAME);
+}
+
+template <typename I>
+void EventPreprocessor<I>::preprocess(EventEntry *event_entry,
+ Context *on_finish) {
+ ceph_assert(!m_in_progress);
+ m_in_progress = true;
+ m_event_entry = event_entry;
+ m_on_finish = on_finish;
+
+ refresh_image();
+}
+
+template <typename I>
+void EventPreprocessor<I>::refresh_image() {
+ dout(20) << dendl;
+
+ Context *ctx = create_context_callback<
+ EventPreprocessor<I>, &EventPreprocessor<I>::handle_refresh_image>(this);
+ m_local_image_ctx.state->refresh(ctx);
+}
+
+template <typename I>
+void EventPreprocessor<I>::handle_refresh_image(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "error encountered during image refresh: " << cpp_strerror(r)
+ << dendl;
+ finish(r);
+ return;
+ }
+
+ preprocess_event();
+}
+
+template <typename I>
+void EventPreprocessor<I>::preprocess_event() {
+ dout(20) << dendl;
+
+ m_snap_seqs = m_client_meta->snap_seqs;
+ m_snap_seqs_updated = prune_snap_map(&m_snap_seqs);
+
+ int r = boost::apply_visitor(PreprocessEventVisitor(this),
+ m_event_entry->event);
+ if (r < 0) {
+ finish(r);
+ return;
+ }
+
+ update_client();
+}
+
+template <typename I>
+int EventPreprocessor<I>::preprocess_snap_rename(
+ librbd::journal::SnapRenameEvent &event) {
+ dout(20) << ": "
+ << "remote_snap_id=" << event.snap_id << ", "
+ << "src_snap_name=" << event.src_snap_name << ", "
+ << "dest_snap_name=" << event.dst_snap_name << dendl;
+
+ auto snap_seq_it = m_snap_seqs.find(event.snap_id);
+ if (snap_seq_it != m_snap_seqs.end()) {
+ dout(20) << ": remapping remote snap id " << snap_seq_it->first << " "
+ << "to local snap id " << snap_seq_it->second << dendl;
+ event.snap_id = snap_seq_it->second;
+ return 0;
+ }
+
+ auto snap_id_it = m_local_image_ctx.snap_ids.find({cls::rbd::UserSnapshotNamespace(),
+ event.src_snap_name});
+ if (snap_id_it == m_local_image_ctx.snap_ids.end()) {
+ dout(20) << ": cannot map remote snapshot '" << event.src_snap_name << "' "
+ << "to local snapshot" << dendl;
+ event.snap_id = CEPH_NOSNAP;
+ return -ENOENT;
+ }
+
+ dout(20) << ": mapping remote snap id " << event.snap_id << " "
+ << "to local snap id " << snap_id_it->second << dendl;
+ m_snap_seqs_updated = true;
+ m_snap_seqs[event.snap_id] = snap_id_it->second;
+ event.snap_id = snap_id_it->second;
+ return 0;
+}
+
+template <typename I>
+void EventPreprocessor<I>::update_client() {
+ if (!m_snap_seqs_updated) {
+ finish(0);
+ return;
+ }
+
+ dout(20) << dendl;
+ librbd::journal::MirrorPeerClientMeta client_meta(*m_client_meta);
+ client_meta.snap_seqs = m_snap_seqs;
+
+ librbd::journal::ClientData client_data(client_meta);
+ bufferlist data_bl;
+ encode(client_data, data_bl);
+
+ Context *ctx = create_context_callback<
+ EventPreprocessor<I>, &EventPreprocessor<I>::handle_update_client>(
+ this);
+ m_remote_journaler.update_client(data_bl, ctx);
+}
+
+template <typename I>
+void EventPreprocessor<I>::handle_update_client(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to update mirror peer journal client: "
+ << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ m_client_meta->snap_seqs = m_snap_seqs;
+ finish(0);
+}
+
+template <typename I>
+bool EventPreprocessor<I>::prune_snap_map(SnapSeqs *snap_seqs) {
+ bool pruned = false;
+
+ RWLock::RLocker snap_locker(m_local_image_ctx.snap_lock);
+ for (auto it = snap_seqs->begin(); it != snap_seqs->end(); ) {
+ auto current_it(it++);
+ if (m_local_image_ctx.snap_info.count(current_it->second) == 0) {
+ snap_seqs->erase(current_it);
+ pruned = true;
+ }
+ }
+ return pruned;
+}
+
+template <typename I>
+void EventPreprocessor<I>::finish(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ Context *on_finish = m_on_finish;
+ m_on_finish = nullptr;
+ m_event_entry = nullptr;
+ m_in_progress = false;
+ m_snap_seqs_updated = false;
+ m_work_queue->queue(on_finish, r);
+}
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_replayer::EventPreprocessor<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_replayer/EventPreprocessor.h b/src/tools/rbd_mirror/image_replayer/EventPreprocessor.h
new file mode 100644
index 00000000..67aeea0b
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/EventPreprocessor.h
@@ -0,0 +1,122 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_EVENT_PREPROCESSOR_H
+#define RBD_MIRROR_IMAGE_REPLAYER_EVENT_PREPROCESSOR_H
+
+#include "include/int_types.h"
+#include "librbd/journal/Types.h"
+#include "librbd/journal/TypeTraits.h"
+#include <map>
+#include <string>
+#include <boost/variant/static_visitor.hpp>
+
+struct Context;
+struct ContextWQ;
+namespace journal { class Journaler; }
+namespace librbd { class ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class EventPreprocessor {
+public:
+ using Journaler = typename librbd::journal::TypeTraits<ImageCtxT>::Journaler;
+ using EventEntry = librbd::journal::EventEntry;
+ using MirrorPeerClientMeta = librbd::journal::MirrorPeerClientMeta;
+
+ static EventPreprocessor *create(ImageCtxT &local_image_ctx,
+ Journaler &remote_journaler,
+ const std::string &local_mirror_uuid,
+ MirrorPeerClientMeta *client_meta,
+ ContextWQ *work_queue) {
+ return new EventPreprocessor(local_image_ctx, remote_journaler,
+ local_mirror_uuid, client_meta, work_queue);
+ }
+
+ static void destroy(EventPreprocessor* processor) {
+ delete processor;
+ }
+
+ EventPreprocessor(ImageCtxT &local_image_ctx, Journaler &remote_journaler,
+ const std::string &local_mirror_uuid,
+ MirrorPeerClientMeta *client_meta, ContextWQ *work_queue);
+ ~EventPreprocessor();
+
+ bool is_required(const EventEntry &event_entry);
+ void preprocess(EventEntry *event_entry, Context *on_finish);
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v (skip if not required)
+ * REFRESH_IMAGE
+ * |
+ * v (skip if not required)
+ * PREPROCESS_EVENT
+ * |
+ * v (skip if not required)
+ * UPDATE_CLIENT
+ *
+ * @endverbatim
+ */
+
+ typedef std::map<uint64_t, uint64_t> SnapSeqs;
+
+ class PreprocessEventVisitor : public boost::static_visitor<int> {
+ public:
+ EventPreprocessor *event_preprocessor;
+
+ PreprocessEventVisitor(EventPreprocessor *event_preprocessor)
+ : event_preprocessor(event_preprocessor) {
+ }
+
+ template <typename T>
+ inline int operator()(T&) const {
+ return 0;
+ }
+ inline int operator()(librbd::journal::SnapRenameEvent &event) const {
+ return event_preprocessor->preprocess_snap_rename(event);
+ }
+ };
+
+ ImageCtxT &m_local_image_ctx;
+ Journaler &m_remote_journaler;
+ std::string m_local_mirror_uuid;
+ MirrorPeerClientMeta *m_client_meta;
+ ContextWQ *m_work_queue;
+
+ bool m_in_progress = false;
+ EventEntry *m_event_entry = nullptr;
+ Context *m_on_finish = nullptr;
+
+ SnapSeqs m_snap_seqs;
+ bool m_snap_seqs_updated = false;
+
+ bool prune_snap_map(SnapSeqs *snap_seqs);
+
+ void refresh_image();
+ void handle_refresh_image(int r);
+
+ void preprocess_event();
+ int preprocess_snap_rename(librbd::journal::SnapRenameEvent &event);
+
+ void update_client();
+ void handle_update_client(int r);
+
+ void finish(int r);
+
+};
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_replayer::EventPreprocessor<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_EVENT_PREPROCESSOR_H
diff --git a/src/tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.cc b/src/tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.cc
new file mode 100644
index 00000000..74e97537
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.cc
@@ -0,0 +1,85 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.h"
+#include "include/rados/librados.hpp"
+#include "cls/rbd/cls_rbd_client.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/Utils.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::" \
+ << "GetMirrorImageIdRequest: " << this << " " \
+ << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+using librbd::util::create_rados_callback;
+
+template <typename I>
+void GetMirrorImageIdRequest<I>::send() {
+ dout(20) << dendl;
+ get_image_id();
+}
+
+template <typename I>
+void GetMirrorImageIdRequest<I>::get_image_id() {
+ dout(20) << dendl;
+
+ // attempt to cross-reference a image id by the global image id
+ librados::ObjectReadOperation op;
+ librbd::cls_client::mirror_image_get_image_id_start(&op, m_global_image_id);
+
+ librados::AioCompletion *aio_comp = create_rados_callback<
+ GetMirrorImageIdRequest<I>,
+ &GetMirrorImageIdRequest<I>::handle_get_image_id>(
+ this);
+ int r = m_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op, &m_out_bl);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void GetMirrorImageIdRequest<I>::handle_get_image_id(int r) {
+ if (r == 0) {
+ auto iter = m_out_bl.cbegin();
+ r = librbd::cls_client::mirror_image_get_image_id_finish(
+ &iter, m_image_id);
+ }
+
+ dout(20) << "r=" << r << ", "
+ << "image_id=" << *m_image_id << dendl;
+
+ if (r < 0) {
+ if (r == -ENOENT) {
+ dout(10) << "global image " << m_global_image_id << " not registered"
+ << dendl;
+ } else {
+ derr << "failed to retrieve image id: " << cpp_strerror(r) << dendl;
+ }
+ finish(r);
+ return;
+ }
+
+ finish(0);
+}
+
+template <typename I>
+void GetMirrorImageIdRequest<I>::finish(int r) {
+ dout(20) << "r=" << r << dendl;
+
+ m_on_finish->complete(r);
+ delete this;
+}
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_replayer::GetMirrorImageIdRequest<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.h b/src/tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.h
new file mode 100644
index 00000000..b2664513
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.h
@@ -0,0 +1,75 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_GET_MIRROR_IMAGE_ID_REQUEST_H
+#define RBD_MIRROR_IMAGE_REPLAYER_GET_MIRROR_IMAGE_ID_REQUEST_H
+
+#include "include/buffer.h"
+#include "include/rados/librados_fwd.hpp"
+#include <string>
+
+namespace librbd { struct ImageCtx; }
+
+struct Context;
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class GetMirrorImageIdRequest {
+public:
+ static GetMirrorImageIdRequest *create(librados::IoCtx &io_ctx,
+ const std::string &global_image_id,
+ std::string *image_id,
+ Context *on_finish) {
+ return new GetMirrorImageIdRequest(io_ctx, global_image_id, image_id,
+ on_finish);
+ }
+
+ GetMirrorImageIdRequest(librados::IoCtx &io_ctx,
+ const std::string &global_image_id,
+ std::string *image_id,
+ Context *on_finish)
+ : m_io_ctx(io_ctx), m_global_image_id(global_image_id),
+ m_image_id(image_id), m_on_finish(on_finish) {
+ }
+
+ void send();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * GET_IMAGE_ID
+ * |
+ * v
+ * <finish>
+
+ * @endverbatim
+ */
+
+ librados::IoCtx &m_io_ctx;
+ std::string m_global_image_id;
+ std::string *m_image_id;
+ Context *m_on_finish;
+
+ bufferlist m_out_bl;
+
+ void get_image_id();
+ void handle_get_image_id(int r);
+
+ void finish(int r);
+
+};
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_replayer::GetMirrorImageIdRequest<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_GET_MIRROR_IMAGE_ID_REQUEST_H
diff --git a/src/tools/rbd_mirror/image_replayer/IsPrimaryRequest.cc b/src/tools/rbd_mirror/image_replayer/IsPrimaryRequest.cc
new file mode 100644
index 00000000..54636fdb
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/IsPrimaryRequest.cc
@@ -0,0 +1,125 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "IsPrimaryRequest.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "common/WorkQueue.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/Journal.h"
+#include "librbd/Utils.h"
+#include <type_traits>
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::IsPrimaryRequest: " \
+ << this << " " << __func__ << " "
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+using librbd::util::create_context_callback;
+using librbd::util::create_rados_callback;
+
+template <typename I>
+IsPrimaryRequest<I>::IsPrimaryRequest(I *image_ctx, bool *primary,
+ Context *on_finish)
+ : m_image_ctx(image_ctx), m_primary(primary), m_on_finish(on_finish) {
+}
+
+template <typename I>
+void IsPrimaryRequest<I>::send() {
+ send_get_mirror_state();
+}
+
+template <typename I>
+void IsPrimaryRequest<I>::send_get_mirror_state() {
+ dout(20) << dendl;
+
+ librados::ObjectReadOperation op;
+ librbd::cls_client::mirror_image_get_start(&op, m_image_ctx->id);
+
+ librados::AioCompletion *aio_comp = create_rados_callback<
+ IsPrimaryRequest<I>, &IsPrimaryRequest<I>::handle_get_mirror_state>(this);
+ int r = m_image_ctx->md_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op,
+ &m_out_bl);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void IsPrimaryRequest<I>::handle_get_mirror_state(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ cls::rbd::MirrorImage mirror_image;
+ if (r == 0) {
+ auto iter = m_out_bl.cbegin();
+ r = librbd::cls_client::mirror_image_get_finish(&iter, &mirror_image);
+ if (r == 0) {
+ if (mirror_image.state == cls::rbd::MIRROR_IMAGE_STATE_ENABLED) {
+ send_is_tag_owner();
+ return;
+ } else if (mirror_image.state == cls::rbd::MIRROR_IMAGE_STATE_DISABLING) {
+ dout(5) << ": image mirroring is being disabled" << dendl;
+ r = -ENOENT;
+ } else {
+ derr << ": image mirroring is disabled" << dendl;
+ r = -EINVAL;
+ }
+ } else {
+ derr << ": failed to decode image mirror state: " << cpp_strerror(r)
+ << dendl;
+ }
+ } else if (r == -ENOENT) {
+ dout(5) << ": image is not mirrored" << dendl;
+ } else {
+ derr << ": failed to retrieve image mirror state: " << cpp_strerror(r)
+ << dendl;
+ }
+
+ finish(r);
+}
+
+template <typename I>
+void IsPrimaryRequest<I>::send_is_tag_owner() {
+ // deduce the class type for the journal to support unit tests
+ using Journal = typename std::decay<
+ typename std::remove_pointer<decltype(std::declval<I>().journal)>
+ ::type>::type;
+
+ dout(20) << dendl;
+
+ Context *ctx = create_context_callback<
+ IsPrimaryRequest<I>, &IsPrimaryRequest<I>::handle_is_tag_owner>(this);
+
+ Journal::is_tag_owner(m_image_ctx, m_primary, ctx);
+}
+
+template <typename I>
+void IsPrimaryRequest<I>::handle_is_tag_owner(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ if (r < 0) {
+ derr << ": failed to query remote image tag owner: " << cpp_strerror(r)
+ << dendl;
+ }
+
+ finish(r);
+}
+
+template <typename I>
+void IsPrimaryRequest<I>::finish(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ m_on_finish->complete(r);
+ delete this;
+}
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_replayer::IsPrimaryRequest<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_replayer/IsPrimaryRequest.h b/src/tools/rbd_mirror/image_replayer/IsPrimaryRequest.h
new file mode 100644
index 00000000..ddb332cb
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/IsPrimaryRequest.h
@@ -0,0 +1,67 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_IS_PRIMARY_REQUEST_H
+#define RBD_MIRROR_IMAGE_REPLAYER_IS_PRIMARY_REQUEST_H
+
+#include "include/buffer.h"
+
+class Context;
+class ContextWQ;
+namespace librbd { class ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class IsPrimaryRequest {
+public:
+ static IsPrimaryRequest* create(ImageCtxT *image_ctx, bool *primary,
+ Context *on_finish) {
+ return new IsPrimaryRequest(image_ctx, primary, on_finish);
+ }
+
+ IsPrimaryRequest(ImageCtxT *image_ctx, bool *primary, Context *on_finish);
+
+ void send();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * GET_MIRROR_STATE * * * * *
+ * | *
+ * v *
+ * IS_TAG_OWNER * * * * * * * (error)
+ * | *
+ * v *
+ * <finish> < * * * * * * * *
+ *
+ * @endverbatim
+ */
+ ImageCtxT *m_image_ctx;
+ bool *m_primary;
+ Context *m_on_finish;
+
+ bufferlist m_out_bl;
+
+ void send_get_mirror_state();
+ void handle_get_mirror_state(int r);
+
+ void send_is_tag_owner();
+ void handle_is_tag_owner(int r);
+
+ void finish(int r);
+};
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_replayer::IsPrimaryRequest<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_IS_PRIMARY_REQUEST_H
diff --git a/src/tools/rbd_mirror/image_replayer/OpenImageRequest.cc b/src/tools/rbd_mirror/image_replayer/OpenImageRequest.cc
new file mode 100644
index 00000000..7f55745e
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/OpenImageRequest.cc
@@ -0,0 +1,75 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "OpenImageRequest.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/Utils.h"
+#include <type_traits>
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::OpenImageRequest: " \
+ << this << " " << __func__ << " "
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+using librbd::util::create_context_callback;
+
+template <typename I>
+OpenImageRequest<I>::OpenImageRequest(librados::IoCtx &io_ctx, I **image_ctx,
+ const std::string &image_id,
+ bool read_only, Context *on_finish)
+ : m_io_ctx(io_ctx), m_image_ctx(image_ctx), m_image_id(image_id),
+ m_read_only(read_only), m_on_finish(on_finish) {
+}
+
+template <typename I>
+void OpenImageRequest<I>::send() {
+ send_open_image();
+}
+
+template <typename I>
+void OpenImageRequest<I>::send_open_image() {
+ dout(20) << dendl;
+
+ *m_image_ctx = I::create("", m_image_id, nullptr, m_io_ctx, m_read_only);
+
+ Context *ctx = create_context_callback<
+ OpenImageRequest<I>, &OpenImageRequest<I>::handle_open_image>(
+ this);
+ (*m_image_ctx)->state->open(0, ctx);
+}
+
+template <typename I>
+void OpenImageRequest<I>::handle_open_image(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ if (r < 0) {
+ derr << ": failed to open image '" << m_image_id << "': "
+ << cpp_strerror(r) << dendl;
+ (*m_image_ctx)->destroy();
+ *m_image_ctx = nullptr;
+ }
+
+ finish(r);
+}
+
+template <typename I>
+void OpenImageRequest<I>::finish(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ m_on_finish->complete(r);
+ delete this;
+}
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_replayer::OpenImageRequest<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_replayer/OpenImageRequest.h b/src/tools/rbd_mirror/image_replayer/OpenImageRequest.h
new file mode 100644
index 00000000..01ab3117
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/OpenImageRequest.h
@@ -0,0 +1,71 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_OPEN_IMAGE_REQUEST_H
+#define RBD_MIRROR_IMAGE_REPLAYER_OPEN_IMAGE_REQUEST_H
+
+#include "include/int_types.h"
+#include "librbd/ImageCtx.h"
+#include <string>
+
+class Context;
+namespace librbd { class ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class OpenImageRequest {
+public:
+ static OpenImageRequest* create(librados::IoCtx &io_ctx,
+ ImageCtxT **image_ctx,
+ const std::string &image_id,
+ bool read_only, Context *on_finish) {
+ return new OpenImageRequest(io_ctx, image_ctx, image_id, read_only,
+ on_finish);
+ }
+
+ OpenImageRequest(librados::IoCtx &io_ctx, ImageCtxT **image_ctx,
+ const std::string &image_id, bool read_only,
+ Context *on_finish);
+
+ void send();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * OPEN_IMAGE
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+ librados::IoCtx &m_io_ctx;
+ ImageCtxT **m_image_ctx;
+ std::string m_image_id;
+ bool m_read_only;
+ Context *m_on_finish;
+
+ void send_open_image();
+ void handle_open_image(int r);
+
+ void send_close_image(int r);
+ void handle_close_image(int r);
+
+ void finish(int r);
+
+};
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_replayer::OpenImageRequest<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_OPEN_IMAGE_REQUEST_H
diff --git a/src/tools/rbd_mirror/image_replayer/OpenLocalImageRequest.cc b/src/tools/rbd_mirror/image_replayer/OpenLocalImageRequest.cc
new file mode 100644
index 00000000..87b141ca
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/OpenLocalImageRequest.cc
@@ -0,0 +1,271 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "include/compat.h"
+#include "CloseImageRequest.h"
+#include "IsPrimaryRequest.h"
+#include "OpenLocalImageRequest.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "common/WorkQueue.h"
+#include "librbd/ExclusiveLock.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/Journal.h"
+#include "librbd/Utils.h"
+#include "librbd/exclusive_lock/Policy.h"
+#include "librbd/journal/Policy.h"
+#include <type_traits>
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::OpenLocalImageRequest: " \
+ << this << " " << __func__ << " "
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+using librbd::util::create_context_callback;
+
+namespace {
+
+template <typename I>
+struct MirrorExclusiveLockPolicy : public librbd::exclusive_lock::Policy {
+ I *image_ctx;
+
+ MirrorExclusiveLockPolicy(I *image_ctx) : image_ctx(image_ctx) {
+ }
+
+ bool may_auto_request_lock() override {
+ return false;
+ }
+
+ int lock_requested(bool force) override {
+ int r = -EROFS;
+ {
+ RWLock::RLocker owner_locker(image_ctx->owner_lock);
+ RWLock::RLocker snap_locker(image_ctx->snap_lock);
+ if (image_ctx->journal == nullptr || image_ctx->journal->is_tag_owner()) {
+ r = 0;
+ }
+ }
+
+ if (r == 0) {
+ // if the local image journal has been closed or if it was (force)
+ // promoted allow the lock to be released to another client
+ image_ctx->exclusive_lock->release_lock(nullptr);
+ }
+ return r;
+ }
+
+ bool accept_blocked_request(
+ librbd::exclusive_lock::OperationRequestType request_type) override {
+ if (request_type ==
+ librbd::exclusive_lock::OPERATION_REQUEST_TYPE_TRASH_SNAP_REMOVE) {
+ return true;
+ }
+ return false;
+ }
+};
+
+struct MirrorJournalPolicy : public librbd::journal::Policy {
+ ContextWQ *work_queue;
+
+ MirrorJournalPolicy(ContextWQ *work_queue) : work_queue(work_queue) {
+ }
+
+ bool append_disabled() const override {
+ // avoid recording any events to the local journal
+ return true;
+ }
+ bool journal_disabled() const override {
+ return false;
+ }
+
+ void allocate_tag_on_lock(Context *on_finish) override {
+ // rbd-mirror will manually create tags by copying them from the peer
+ work_queue->queue(on_finish, 0);
+ }
+};
+
+} // anonymous namespace
+
+template <typename I>
+OpenLocalImageRequest<I>::OpenLocalImageRequest(librados::IoCtx &local_io_ctx,
+ I **local_image_ctx,
+ const std::string &local_image_id,
+ ContextWQ *work_queue,
+ Context *on_finish)
+ : m_local_io_ctx(local_io_ctx), m_local_image_ctx(local_image_ctx),
+ m_local_image_id(local_image_id), m_work_queue(work_queue),
+ m_on_finish(on_finish) {
+}
+
+template <typename I>
+void OpenLocalImageRequest<I>::send() {
+ send_open_image();
+}
+
+template <typename I>
+void OpenLocalImageRequest<I>::send_open_image() {
+ dout(20) << dendl;
+
+ *m_local_image_ctx = I::create("", m_local_image_id, nullptr,
+ m_local_io_ctx, false);
+ {
+ RWLock::WLocker owner_locker((*m_local_image_ctx)->owner_lock);
+ RWLock::WLocker snap_locker((*m_local_image_ctx)->snap_lock);
+ (*m_local_image_ctx)->set_exclusive_lock_policy(
+ new MirrorExclusiveLockPolicy<I>(*m_local_image_ctx));
+ (*m_local_image_ctx)->set_journal_policy(
+ new MirrorJournalPolicy(m_work_queue));
+ }
+
+ Context *ctx = create_context_callback<
+ OpenLocalImageRequest<I>, &OpenLocalImageRequest<I>::handle_open_image>(
+ this);
+ (*m_local_image_ctx)->state->open(0, ctx);
+}
+
+template <typename I>
+void OpenLocalImageRequest<I>::handle_open_image(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ if (r < 0) {
+ if (r == -ENOENT) {
+ dout(10) << ": local image does not exist" << dendl;
+ } else {
+ derr << ": failed to open image '" << m_local_image_id << "': "
+ << cpp_strerror(r) << dendl;
+ }
+ (*m_local_image_ctx)->destroy();
+ *m_local_image_ctx = nullptr;
+ finish(r);
+ return;
+ }
+
+ send_is_primary();
+}
+
+template <typename I>
+void OpenLocalImageRequest<I>::send_is_primary() {
+ dout(20) << dendl;
+
+ Context *ctx = create_context_callback<
+ OpenLocalImageRequest<I>, &OpenLocalImageRequest<I>::handle_is_primary>(
+ this);
+ IsPrimaryRequest<I> *request = IsPrimaryRequest<I>::create(*m_local_image_ctx,
+ &m_primary, ctx);
+ request->send();
+}
+
+template <typename I>
+void OpenLocalImageRequest<I>::handle_is_primary(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ if (r == -ENOENT) {
+ dout(5) << ": local image is not mirrored" << dendl;
+ send_close_image(r);
+ return;
+ } else if (r < 0) {
+ derr << ": error querying local image primary status: " << cpp_strerror(r)
+ << dendl;
+ send_close_image(r);
+ return;
+ }
+
+ // if the local image owns the tag -- don't steal the lock since
+ // we aren't going to mirror peer data into this image anyway
+ if (m_primary) {
+ dout(10) << ": local image is primary -- skipping image replay" << dendl;
+ send_close_image(-EREMOTEIO);
+ return;
+ }
+
+ send_lock_image();
+}
+
+template <typename I>
+void OpenLocalImageRequest<I>::send_lock_image() {
+ dout(20) << dendl;
+
+ RWLock::RLocker owner_locker((*m_local_image_ctx)->owner_lock);
+ if ((*m_local_image_ctx)->exclusive_lock == nullptr) {
+ derr << ": image does not support exclusive lock" << dendl;
+ send_close_image(-EINVAL);
+ return;
+ }
+
+ // disallow any proxied maintenance operations before grabbing lock
+ (*m_local_image_ctx)->exclusive_lock->block_requests(-EROFS);
+
+ Context *ctx = create_context_callback<
+ OpenLocalImageRequest<I>, &OpenLocalImageRequest<I>::handle_lock_image>(
+ this);
+
+ (*m_local_image_ctx)->exclusive_lock->acquire_lock(ctx);
+}
+
+template <typename I>
+void OpenLocalImageRequest<I>::handle_lock_image(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ if (r < 0) {
+ derr << ": failed to lock image '" << m_local_image_id << "': "
+ << cpp_strerror(r) << dendl;
+ send_close_image(r);
+ return;
+ }
+
+ {
+ RWLock::RLocker owner_locker((*m_local_image_ctx)->owner_lock);
+ if ((*m_local_image_ctx)->exclusive_lock == nullptr ||
+ !(*m_local_image_ctx)->exclusive_lock->is_lock_owner()) {
+ derr << ": image is not locked" << dendl;
+ send_close_image(-EBUSY);
+ return;
+ }
+ }
+
+ finish(0);
+}
+
+template <typename I>
+void OpenLocalImageRequest<I>::send_close_image(int r) {
+ dout(20) << dendl;
+
+ if (m_ret_val == 0 && r < 0) {
+ m_ret_val = r;
+ }
+
+ Context *ctx = create_context_callback<
+ OpenLocalImageRequest<I>, &OpenLocalImageRequest<I>::handle_close_image>(
+ this);
+ CloseImageRequest<I> *request = CloseImageRequest<I>::create(
+ m_local_image_ctx, ctx);
+ request->send();
+}
+
+template <typename I>
+void OpenLocalImageRequest<I>::handle_close_image(int r) {
+ dout(20) << dendl;
+
+ ceph_assert(r == 0);
+ finish(m_ret_val);
+}
+
+template <typename I>
+void OpenLocalImageRequest<I>::finish(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ m_on_finish->complete(r);
+ delete this;
+}
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_replayer::OpenLocalImageRequest<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_replayer/OpenLocalImageRequest.h b/src/tools/rbd_mirror/image_replayer/OpenLocalImageRequest.h
new file mode 100644
index 00000000..58de545f
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/OpenLocalImageRequest.h
@@ -0,0 +1,90 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_OPEN_LOCAL_IMAGE_REQUEST_H
+#define RBD_MIRROR_IMAGE_REPLAYER_OPEN_LOCAL_IMAGE_REQUEST_H
+
+#include "include/int_types.h"
+#include "librbd/ImageCtx.h"
+#include <string>
+
+class Context;
+class ContextWQ;
+namespace librbd { class ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class OpenLocalImageRequest {
+public:
+ static OpenLocalImageRequest* create(librados::IoCtx &local_io_ctx,
+ ImageCtxT **local_image_ctx,
+ const std::string &local_image_id,
+ ContextWQ *work_queue,
+ Context *on_finish) {
+ return new OpenLocalImageRequest(local_io_ctx, local_image_ctx,
+ local_image_id, work_queue, on_finish);
+ }
+
+ OpenLocalImageRequest(librados::IoCtx &local_io_ctx,
+ ImageCtxT **local_image_ctx,
+ const std::string &local_image_id,
+ ContextWQ *m_work_queue,
+ Context *on_finish);
+
+ void send();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * OPEN_IMAGE * * * * * * * *
+ * | *
+ * v *
+ * IS_PRIMARY * * * * * * * *
+ * | *
+ * v (skip if primary) v
+ * LOCK_IMAGE * * * > CLOSE_IMAGE
+ * | |
+ * v |
+ * <finish> <---------------/
+ *
+ * @endverbatim
+ */
+ librados::IoCtx &m_local_io_ctx;
+ ImageCtxT **m_local_image_ctx;
+ std::string m_local_image_id;
+ ContextWQ *m_work_queue;
+ Context *m_on_finish;
+
+ bool m_primary = false;
+ int m_ret_val = 0;
+
+ void send_open_image();
+ void handle_open_image(int r);
+
+ void send_is_primary();
+ void handle_is_primary(int r);
+
+ void send_lock_image();
+ void handle_lock_image(int r);
+
+ void send_close_image(int r);
+ void handle_close_image(int r);
+
+ void finish(int r);
+
+};
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_replayer::OpenLocalImageRequest<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_OPEN_LOCAL_IMAGE_REQUEST_H
diff --git a/src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.cc b/src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.cc
new file mode 100644
index 00000000..8e0ea837
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.cc
@@ -0,0 +1,180 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.h"
+#include "include/rados/librados.hpp"
+#include "cls/rbd/cls_rbd_client.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/Journal.h"
+#include "librbd/Utils.h"
+#include "tools/rbd_mirror/Threads.h"
+#include "tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.h"
+#include <type_traits>
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::" \
+ << "PrepareLocalImageRequest: " << this << " " \
+ << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+using librbd::util::create_context_callback;
+using librbd::util::create_rados_callback;
+
+template <typename I>
+void PrepareLocalImageRequest<I>::send() {
+ dout(20) << dendl;
+ get_local_image_id();
+}
+
+template <typename I>
+void PrepareLocalImageRequest<I>::get_local_image_id() {
+ dout(20) << dendl;
+
+ Context *ctx = create_context_callback<
+ PrepareLocalImageRequest<I>,
+ &PrepareLocalImageRequest<I>::handle_get_local_image_id>(this);
+ auto req = GetMirrorImageIdRequest<I>::create(m_io_ctx, m_global_image_id,
+ m_local_image_id, ctx);
+ req->send();
+}
+
+template <typename I>
+void PrepareLocalImageRequest<I>::handle_get_local_image_id(int r) {
+ dout(20) << "r=" << r << ", "
+ << "local_image_id=" << *m_local_image_id << dendl;
+
+ if (r < 0) {
+ finish(r);
+ return;
+ }
+
+ get_local_image_name();
+}
+
+template <typename I>
+void PrepareLocalImageRequest<I>::get_local_image_name() {
+ dout(20) << dendl;
+
+ librados::ObjectReadOperation op;
+ librbd::cls_client::dir_get_name_start(&op, *m_local_image_id);
+
+ m_out_bl.clear();
+ librados::AioCompletion *aio_comp = create_rados_callback<
+ PrepareLocalImageRequest<I>,
+ &PrepareLocalImageRequest<I>::handle_get_local_image_name>(this);
+ int r = m_io_ctx.aio_operate(RBD_DIRECTORY, aio_comp, &op, &m_out_bl);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void PrepareLocalImageRequest<I>::handle_get_local_image_name(int r) {
+ dout(20) << "r=" << r << dendl;
+
+ if (r == 0) {
+ auto it = m_out_bl.cbegin();
+ r = librbd::cls_client::dir_get_name_finish(&it, m_local_image_name);
+ }
+
+ if (r < 0) {
+ if (r != -ENOENT) {
+ derr << "failed to retrieve image name: " << cpp_strerror(r) << dendl;
+ }
+ finish(r);
+ return;
+ }
+
+ get_mirror_state();
+}
+
+template <typename I>
+void PrepareLocalImageRequest<I>::get_mirror_state() {
+ dout(20) << dendl;
+
+ librados::ObjectReadOperation op;
+ librbd::cls_client::mirror_image_get_start(&op, *m_local_image_id);
+
+ m_out_bl.clear();
+ librados::AioCompletion *aio_comp = create_rados_callback<
+ PrepareLocalImageRequest<I>,
+ &PrepareLocalImageRequest<I>::handle_get_mirror_state>(this);
+ int r = m_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op, &m_out_bl);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void PrepareLocalImageRequest<I>::handle_get_mirror_state(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ cls::rbd::MirrorImage mirror_image;
+ if (r == 0) {
+ auto iter = m_out_bl.cbegin();
+ r = librbd::cls_client::mirror_image_get_finish(&iter, &mirror_image);
+ }
+
+ if (r < 0) {
+ derr << "failed to retrieve image mirror state: " << cpp_strerror(r)
+ << dendl;
+ finish(r);
+ return;
+ }
+
+ // TODO save current mirror state to determine if we should
+ // delete a partially formed image
+ // (e.g. MIRROR_IMAGE_STATE_CREATING/DELETING)
+
+ get_tag_owner();
+}
+
+template <typename I>
+void PrepareLocalImageRequest<I>::get_tag_owner() {
+ // deduce the class type for the journal to support unit tests
+ using Journal = typename std::decay<
+ typename std::remove_pointer<decltype(std::declval<I>().journal)>
+ ::type>::type;
+
+ dout(20) << dendl;
+
+ Context *ctx = create_context_callback<
+ PrepareLocalImageRequest<I>,
+ &PrepareLocalImageRequest<I>::handle_get_tag_owner>(this);
+ Journal::get_tag_owner(m_io_ctx, *m_local_image_id, m_tag_owner,
+ m_work_queue, ctx);
+}
+
+template <typename I>
+void PrepareLocalImageRequest<I>::handle_get_tag_owner(int r) {
+ dout(20) << "r=" << r << ", "
+ << "tag_owner=" << *m_tag_owner << dendl;
+
+ if (r < 0) {
+ derr << "failed to retrieve journal tag owner: " << cpp_strerror(r)
+ << dendl;
+ finish(r);
+ return;
+ }
+
+ finish(0);
+}
+
+template <typename I>
+void PrepareLocalImageRequest<I>::finish(int r) {
+ dout(20) << "r=" << r << dendl;
+
+ m_on_finish->complete(r);
+ delete this;
+}
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_replayer::PrepareLocalImageRequest<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.h b/src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.h
new file mode 100644
index 00000000..3417dd96
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.h
@@ -0,0 +1,102 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_PREPARE_LOCAL_IMAGE_REQUEST_H
+#define RBD_MIRROR_IMAGE_REPLAYER_PREPARE_LOCAL_IMAGE_REQUEST_H
+
+#include "include/buffer.h"
+#include "include/rados/librados_fwd.hpp"
+#include <string>
+
+namespace librbd { struct ImageCtx; }
+
+struct Context;
+struct ContextWQ;
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class PrepareLocalImageRequest {
+public:
+ static PrepareLocalImageRequest *create(librados::IoCtx &io_ctx,
+ const std::string &global_image_id,
+ std::string *local_image_id,
+ std::string *local_image_name,
+ std::string *tag_owner,
+ ContextWQ *work_queue,
+ Context *on_finish) {
+ return new PrepareLocalImageRequest(io_ctx, global_image_id, local_image_id,
+ local_image_name, tag_owner, work_queue,
+ on_finish);
+ }
+
+ PrepareLocalImageRequest(librados::IoCtx &io_ctx,
+ const std::string &global_image_id,
+ std::string *local_image_id,
+ std::string *local_image_name,
+ std::string *tag_owner,
+ ContextWQ *work_queue,
+ Context *on_finish)
+ : m_io_ctx(io_ctx), m_global_image_id(global_image_id),
+ m_local_image_id(local_image_id), m_local_image_name(local_image_name),
+ m_tag_owner(tag_owner), m_work_queue(work_queue), m_on_finish(on_finish) {
+ }
+
+ void send();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * GET_LOCAL_IMAGE_ID
+ * |
+ * v
+ * GET_LOCAL_IMAGE_NAME
+ * |
+ * v
+ * GET_MIRROR_STATE
+ * |
+ * v
+ * <finish>
+
+ * @endverbatim
+ */
+
+ librados::IoCtx &m_io_ctx;
+ std::string m_global_image_id;
+ std::string *m_local_image_id;
+ std::string *m_local_image_name;
+ std::string *m_tag_owner;
+ ContextWQ *m_work_queue;
+ Context *m_on_finish;
+
+ bufferlist m_out_bl;
+
+ void get_local_image_id();
+ void handle_get_local_image_id(int r);
+
+ void get_local_image_name();
+ void handle_get_local_image_name(int r);
+
+ void get_mirror_state();
+ void handle_get_mirror_state(int r);
+
+ void get_tag_owner();
+ void handle_get_tag_owner(int r);
+
+ void finish(int r);
+
+};
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_replayer::PrepareLocalImageRequest<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_PREPARE_LOCAL_IMAGE_REQUEST_H
diff --git a/src/tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.cc b/src/tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.cc
new file mode 100644
index 00000000..00c141e0
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.cc
@@ -0,0 +1,195 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.h"
+#include "include/rados/librados.hpp"
+#include "cls/rbd/cls_rbd_client.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "common/WorkQueue.h"
+#include "journal/Journaler.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/Utils.h"
+#include "librbd/journal/Types.h"
+#include "tools/rbd_mirror/Threads.h"
+#include "tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.h"
+#include "tools/rbd_mirror/image_replayer/Utils.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::" \
+ << "PrepareRemoteImageRequest: " << this << " " \
+ << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+using librbd::util::create_async_context_callback;
+using librbd::util::create_context_callback;
+using librbd::util::create_rados_callback;
+
+template <typename I>
+void PrepareRemoteImageRequest<I>::send() {
+ get_remote_mirror_uuid();
+}
+
+template <typename I>
+void PrepareRemoteImageRequest<I>::get_remote_mirror_uuid() {
+ dout(20) << dendl;
+
+ librados::ObjectReadOperation op;
+ librbd::cls_client::mirror_uuid_get_start(&op);
+
+ librados::AioCompletion *aio_comp = create_rados_callback<
+ PrepareRemoteImageRequest<I>,
+ &PrepareRemoteImageRequest<I>::handle_get_remote_mirror_uuid>(this);
+ int r = m_remote_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op, &m_out_bl);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void PrepareRemoteImageRequest<I>::handle_get_remote_mirror_uuid(int r) {
+ if (r >= 0) {
+ auto it = m_out_bl.cbegin();
+ r = librbd::cls_client::mirror_uuid_get_finish(&it, m_remote_mirror_uuid);
+ if (r >= 0 && m_remote_mirror_uuid->empty()) {
+ r = -ENOENT;
+ }
+ }
+
+ dout(20) << "r=" << r << dendl;
+ if (r < 0) {
+ if (r == -ENOENT) {
+ dout(5) << "remote mirror uuid missing" << dendl;
+ } else {
+ derr << "failed to retrieve remote mirror uuid: " << cpp_strerror(r)
+ << dendl;
+ }
+ finish(r);
+ return;
+ }
+
+ get_remote_image_id();
+}
+
+template <typename I>
+void PrepareRemoteImageRequest<I>::get_remote_image_id() {
+ dout(20) << dendl;
+
+ Context *ctx = create_context_callback<
+ PrepareRemoteImageRequest<I>,
+ &PrepareRemoteImageRequest<I>::handle_get_remote_image_id>(this);
+ auto req = GetMirrorImageIdRequest<I>::create(m_remote_io_ctx,
+ m_global_image_id,
+ m_remote_image_id, ctx);
+ req->send();
+}
+
+template <typename I>
+void PrepareRemoteImageRequest<I>::handle_get_remote_image_id(int r) {
+ dout(20) << "r=" << r << ", "
+ << "remote_image_id=" << *m_remote_image_id << dendl;
+
+ if (r < 0) {
+ finish(r);
+ return;
+ }
+
+ get_client();
+}
+
+template <typename I>
+void PrepareRemoteImageRequest<I>::get_client() {
+ dout(20) << dendl;
+
+ ceph_assert(*m_remote_journaler == nullptr);
+ *m_remote_journaler = new Journaler(m_threads->work_queue, m_threads->timer,
+ &m_threads->timer_lock, m_remote_io_ctx,
+ *m_remote_image_id, m_local_mirror_uuid,
+ m_journal_settings);
+
+ Context *ctx = create_async_context_callback(
+ m_threads->work_queue, create_context_callback<
+ PrepareRemoteImageRequest<I>,
+ &PrepareRemoteImageRequest<I>::handle_get_client>(this));
+ (*m_remote_journaler)->get_client(m_local_mirror_uuid, &m_client, ctx);
+}
+
+template <typename I>
+void PrepareRemoteImageRequest<I>::handle_get_client(int r) {
+ dout(20) << "r=" << r << dendl;
+
+ if (r == -ENOENT) {
+ dout(10) << "client not registered" << dendl;
+ register_client();
+ } else if (r < 0) {
+ derr << "failed to retrieve client: " << cpp_strerror(r) << dendl;
+ finish(r);
+ } else if (!util::decode_client_meta(m_client, m_client_meta)) {
+ // require operator intervention since the data is corrupt
+ finish(-EBADMSG);
+ } else {
+ // skip registration if it already exists
+ *m_client_state = m_client.state;
+ finish(0);
+ }
+}
+
+template <typename I>
+void PrepareRemoteImageRequest<I>::register_client() {
+ dout(20) << dendl;
+
+ librbd::journal::MirrorPeerClientMeta mirror_peer_client_meta{
+ m_local_image_id};
+ mirror_peer_client_meta.state = librbd::journal::MIRROR_PEER_STATE_REPLAYING;
+
+ librbd::journal::ClientData client_data{mirror_peer_client_meta};
+ bufferlist client_data_bl;
+ encode(client_data, client_data_bl);
+
+ Context *ctx = create_async_context_callback(
+ m_threads->work_queue, create_context_callback<
+ PrepareRemoteImageRequest<I>,
+ &PrepareRemoteImageRequest<I>::handle_register_client>(this));
+ (*m_remote_journaler)->register_client(client_data_bl, ctx);
+}
+
+template <typename I>
+void PrepareRemoteImageRequest<I>::handle_register_client(int r) {
+ dout(20) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to register with remote journal: " << cpp_strerror(r)
+ << dendl;
+ finish(r);
+ return;
+ }
+
+ *m_client_state = cls::journal::CLIENT_STATE_CONNECTED;
+ *m_client_meta = librbd::journal::MirrorPeerClientMeta(m_local_image_id);
+ m_client_meta->state = librbd::journal::MIRROR_PEER_STATE_REPLAYING;
+
+ finish(0);
+}
+
+template <typename I>
+void PrepareRemoteImageRequest<I>::finish(int r) {
+ dout(20) << "r=" << r << dendl;
+
+ if (r < 0) {
+ delete *m_remote_journaler;
+ *m_remote_journaler = nullptr;
+ }
+
+ m_on_finish->complete(r);
+ delete this;
+}
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_replayer::PrepareRemoteImageRequest<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.h b/src/tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.h
new file mode 100644
index 00000000..100a066b
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.h
@@ -0,0 +1,141 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_PREPARE_REMOTE_IMAGE_REQUEST_H
+#define RBD_MIRROR_IMAGE_REPLAYER_PREPARE_REMOTE_IMAGE_REQUEST_H
+
+#include "include/buffer_fwd.h"
+#include "include/rados/librados_fwd.hpp"
+#include "cls/journal/cls_journal_types.h"
+#include "journal/Settings.h"
+#include "librbd/journal/TypeTraits.h"
+#include <string>
+
+namespace journal { class Journaler; }
+namespace journal { class Settings; }
+namespace librbd { struct ImageCtx; }
+namespace librbd { namespace journal { struct MirrorPeerClientMeta; } }
+
+struct Context;
+struct ContextWQ;
+
+namespace rbd {
+namespace mirror {
+
+template <typename> struct Threads;
+
+namespace image_replayer {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class PrepareRemoteImageRequest {
+public:
+ typedef librbd::journal::TypeTraits<ImageCtxT> TypeTraits;
+ typedef typename TypeTraits::Journaler Journaler;
+ typedef librbd::journal::MirrorPeerClientMeta MirrorPeerClientMeta;
+
+ static PrepareRemoteImageRequest *create(Threads<ImageCtxT> *threads,
+ librados::IoCtx &remote_io_ctx,
+ const std::string &global_image_id,
+ const std::string &local_mirror_uuid,
+ const std::string &local_image_id,
+ const journal::Settings &settings,
+ std::string *remote_mirror_uuid,
+ std::string *remote_image_id,
+ Journaler **remote_journaler,
+ cls::journal::ClientState *client_state,
+ MirrorPeerClientMeta *client_meta,
+ Context *on_finish) {
+ return new PrepareRemoteImageRequest(threads, remote_io_ctx,
+ global_image_id, local_mirror_uuid,
+ local_image_id, settings,
+ remote_mirror_uuid, remote_image_id,
+ remote_journaler, client_state,
+ client_meta, on_finish);
+ }
+
+ PrepareRemoteImageRequest(Threads<ImageCtxT> *threads,
+ librados::IoCtx &remote_io_ctx,
+ const std::string &global_image_id,
+ const std::string &local_mirror_uuid,
+ const std::string &local_image_id,
+ const journal::Settings &journal_settings,
+ std::string *remote_mirror_uuid,
+ std::string *remote_image_id,
+ Journaler **remote_journaler,
+ cls::journal::ClientState *client_state,
+ MirrorPeerClientMeta *client_meta,
+ Context *on_finish)
+ : m_threads(threads), m_remote_io_ctx(remote_io_ctx),
+ m_global_image_id(global_image_id),
+ m_local_mirror_uuid(local_mirror_uuid), m_local_image_id(local_image_id),
+ m_journal_settings(journal_settings),
+ m_remote_mirror_uuid(remote_mirror_uuid),
+ m_remote_image_id(remote_image_id),
+ m_remote_journaler(remote_journaler), m_client_state(client_state),
+ m_client_meta(client_meta), m_on_finish(on_finish) {
+ }
+
+ void send();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * GET_REMOTE_MIRROR_UUID
+ * |
+ * v
+ * GET_REMOTE_IMAGE_ID
+ * |
+ * v
+ * GET_CLIENT
+ * |
+ * v (skip if not needed)
+ * REGISTER_CLIENT
+ * |
+ * v
+ * <finish>
+
+ * @endverbatim
+ */
+
+ Threads<ImageCtxT> *m_threads;
+ librados::IoCtx &m_remote_io_ctx;
+ std::string m_global_image_id;
+ std::string m_local_mirror_uuid;
+ std::string m_local_image_id;
+ journal::Settings m_journal_settings;
+ std::string *m_remote_mirror_uuid;
+ std::string *m_remote_image_id;
+ Journaler **m_remote_journaler;
+ cls::journal::ClientState *m_client_state;
+ MirrorPeerClientMeta *m_client_meta;
+ Context *m_on_finish;
+
+ bufferlist m_out_bl;
+ cls::journal::Client m_client;
+
+ void get_remote_mirror_uuid();
+ void handle_get_remote_mirror_uuid(int r);
+
+ void get_remote_image_id();
+ void handle_get_remote_image_id(int r);
+
+ void get_client();
+ void handle_get_client(int r);
+
+ void register_client();
+ void handle_register_client(int r);
+
+ void finish(int r);
+};
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_replayer::PrepareRemoteImageRequest<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_PREPARE_REMOTE_IMAGE_REQUEST_H
diff --git a/src/tools/rbd_mirror/image_replayer/ReplayStatusFormatter.cc b/src/tools/rbd_mirror/image_replayer/ReplayStatusFormatter.cc
new file mode 100644
index 00000000..f514d749
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/ReplayStatusFormatter.cc
@@ -0,0 +1,246 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "ReplayStatusFormatter.h"
+#include "common/debug.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "journal/Journaler.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/Journal.h"
+#include "librbd/Utils.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::ReplayStatusFormatter: " \
+ << this << " " << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+using librbd::util::unique_lock_name;
+
+template <typename I>
+ReplayStatusFormatter<I>::ReplayStatusFormatter(Journaler *journaler,
+ const std::string &mirror_uuid)
+ : m_journaler(journaler),
+ m_mirror_uuid(mirror_uuid),
+ m_lock(unique_lock_name("ReplayStatusFormatter::m_lock", this)) {
+}
+
+template <typename I>
+bool ReplayStatusFormatter<I>::get_or_send_update(std::string *description,
+ Context *on_finish) {
+ dout(20) << dendl;
+
+ bool in_progress = false;
+ {
+ Mutex::Locker locker(m_lock);
+ if (m_on_finish) {
+ in_progress = true;
+ } else {
+ m_on_finish = on_finish;
+ }
+ }
+
+ if (in_progress) {
+ dout(10) << "previous request is still in progress, ignoring" << dendl;
+ on_finish->complete(-EAGAIN);
+ return false;
+ }
+
+ m_master_position = cls::journal::ObjectPosition();
+ m_mirror_position = cls::journal::ObjectPosition();
+
+ cls::journal::Client master_client, mirror_client;
+ int r;
+
+ r = m_journaler->get_cached_client(librbd::Journal<>::IMAGE_CLIENT_ID,
+ &master_client);
+ if (r < 0) {
+ derr << "error retrieving registered master client: "
+ << cpp_strerror(r) << dendl;
+ } else {
+ r = m_journaler->get_cached_client(m_mirror_uuid, &mirror_client);
+ if (r < 0) {
+ derr << "error retrieving registered mirror client: "
+ << cpp_strerror(r) << dendl;
+ }
+ }
+
+ if (!master_client.commit_position.object_positions.empty()) {
+ m_master_position =
+ *(master_client.commit_position.object_positions.begin());
+ }
+
+ if (!mirror_client.commit_position.object_positions.empty()) {
+ m_mirror_position =
+ *(mirror_client.commit_position.object_positions.begin());
+ }
+
+ if (!calculate_behind_master_or_send_update()) {
+ dout(20) << "need to update tag cache" << dendl;
+ return false;
+ }
+
+ format(description);
+
+ {
+ Mutex::Locker locker(m_lock);
+ ceph_assert(m_on_finish == on_finish);
+ m_on_finish = nullptr;
+ }
+
+ on_finish->complete(-EEXIST);
+ return true;
+}
+
+template <typename I>
+bool ReplayStatusFormatter<I>::calculate_behind_master_or_send_update() {
+ dout(20) << "m_master_position=" << m_master_position
+ << ", m_mirror_position=" << m_mirror_position << dendl;
+
+ m_entries_behind_master = 0;
+
+ if (m_master_position == cls::journal::ObjectPosition() ||
+ m_master_position.tag_tid < m_mirror_position.tag_tid) {
+ return true;
+ }
+
+ cls::journal::ObjectPosition master = m_master_position;
+ uint64_t mirror_tag_tid = m_mirror_position.tag_tid;
+
+ while (master.tag_tid > mirror_tag_tid) {
+ auto tag_it = m_tag_cache.find(master.tag_tid);
+ if (tag_it == m_tag_cache.end()) {
+ send_update_tag_cache(master.tag_tid, mirror_tag_tid);
+ return false;
+ }
+ librbd::journal::TagData &tag_data = tag_it->second;
+ m_entries_behind_master += master.entry_tid;
+ master = {0, tag_data.predecessor.tag_tid, tag_data.predecessor.entry_tid};
+ }
+ if (master.tag_tid == mirror_tag_tid &&
+ master.entry_tid > m_mirror_position.entry_tid) {
+ m_entries_behind_master += master.entry_tid - m_mirror_position.entry_tid;
+ }
+
+ dout(20) << "clearing tags not needed any more (below mirror position)"
+ << dendl;
+
+ uint64_t tag_tid = mirror_tag_tid;
+ size_t old_size = m_tag_cache.size();
+ while (tag_tid != 0) {
+ auto tag_it = m_tag_cache.find(tag_tid);
+ if (tag_it == m_tag_cache.end()) {
+ break;
+ }
+ librbd::journal::TagData &tag_data = tag_it->second;
+
+ dout(20) << "erasing tag " << tag_data << "for tag_tid " << tag_tid
+ << dendl;
+
+ tag_tid = tag_data.predecessor.tag_tid;
+ m_tag_cache.erase(tag_it);
+ }
+
+ dout(20) << old_size - m_tag_cache.size() << " entries cleared" << dendl;
+
+ return true;
+}
+
+template <typename I>
+void ReplayStatusFormatter<I>::send_update_tag_cache(uint64_t master_tag_tid,
+ uint64_t mirror_tag_tid) {
+ if (master_tag_tid <= mirror_tag_tid ||
+ m_tag_cache.find(master_tag_tid) != m_tag_cache.end()) {
+ Context *on_finish = nullptr;
+ {
+ Mutex::Locker locker(m_lock);
+ std::swap(m_on_finish, on_finish);
+ }
+
+ ceph_assert(on_finish);
+ on_finish->complete(0);
+ return;
+ }
+
+ dout(20) << "master_tag_tid=" << master_tag_tid << ", mirror_tag_tid="
+ << mirror_tag_tid << dendl;
+
+ FunctionContext *ctx = new FunctionContext(
+ [this, master_tag_tid, mirror_tag_tid](int r) {
+ handle_update_tag_cache(master_tag_tid, mirror_tag_tid, r);
+ });
+ m_journaler->get_tag(master_tag_tid, &m_tag, ctx);
+}
+
+template <typename I>
+void ReplayStatusFormatter<I>::handle_update_tag_cache(uint64_t master_tag_tid,
+ uint64_t mirror_tag_tid,
+ int r) {
+ librbd::journal::TagData tag_data;
+
+ if (r < 0) {
+ derr << "error retrieving tag " << master_tag_tid << ": " << cpp_strerror(r)
+ << dendl;
+ } else {
+ dout(20) << "retrieved tag " << master_tag_tid << ": " << m_tag << dendl;
+
+ auto it = m_tag.data.cbegin();
+ try {
+ decode(tag_data, it);
+ } catch (const buffer::error &err) {
+ derr << "error decoding tag " << master_tag_tid << ": " << err.what()
+ << dendl;
+ }
+ }
+
+ if (tag_data.predecessor.mirror_uuid !=
+ librbd::Journal<>::LOCAL_MIRROR_UUID &&
+ tag_data.predecessor.mirror_uuid !=
+ librbd::Journal<>::ORPHAN_MIRROR_UUID) {
+ dout(20) << "hit remote image non-primary epoch" << dendl;
+ tag_data.predecessor = {};
+ }
+
+ dout(20) << "decoded tag " << master_tag_tid << ": " << tag_data << dendl;
+
+ m_tag_cache[master_tag_tid] = tag_data;
+ send_update_tag_cache(tag_data.predecessor.tag_tid, mirror_tag_tid);
+}
+
+template <typename I>
+void ReplayStatusFormatter<I>::format(std::string *description) {
+
+ dout(20) << "m_master_position=" << m_master_position
+ << ", m_mirror_position=" << m_mirror_position
+ << ", m_entries_behind_master=" << m_entries_behind_master << dendl;
+
+ std::stringstream ss;
+ ss << "master_position=";
+ if (m_master_position == cls::journal::ObjectPosition()) {
+ ss << "[]";
+ } else {
+ ss << m_master_position;
+ }
+ ss << ", mirror_position=";
+ if (m_mirror_position == cls::journal::ObjectPosition()) {
+ ss << "[]";
+ } else {
+ ss << m_mirror_position;
+ }
+ ss << ", entries_behind_master="
+ << (m_entries_behind_master > 0 ? m_entries_behind_master : 0);
+
+ *description = ss.str();
+}
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+template class
+rbd::mirror::image_replayer::ReplayStatusFormatter<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_replayer/ReplayStatusFormatter.h b/src/tools/rbd_mirror/image_replayer/ReplayStatusFormatter.h
new file mode 100644
index 00000000..59940a65
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/ReplayStatusFormatter.h
@@ -0,0 +1,60 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_REPLAY_STATUS_FORMATTER_H
+#define RBD_MIRROR_IMAGE_REPLAYER_REPLAY_STATUS_FORMATTER_H
+
+#include "include/Context.h"
+#include "common/Mutex.h"
+#include "cls/journal/cls_journal_types.h"
+#include "librbd/journal/Types.h"
+#include "librbd/journal/TypeTraits.h"
+
+namespace journal { class Journaler; }
+namespace librbd { class ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class ReplayStatusFormatter {
+public:
+ typedef typename librbd::journal::TypeTraits<ImageCtxT>::Journaler Journaler;
+
+ static ReplayStatusFormatter* create(Journaler *journaler,
+ const std::string &mirror_uuid) {
+ return new ReplayStatusFormatter(journaler, mirror_uuid);
+ }
+
+ static void destroy(ReplayStatusFormatter* formatter) {
+ delete formatter;
+ }
+
+ ReplayStatusFormatter(Journaler *journaler, const std::string &mirror_uuid);
+
+ bool get_or_send_update(std::string *description, Context *on_finish);
+
+private:
+ Journaler *m_journaler;
+ std::string m_mirror_uuid;
+ Mutex m_lock;
+ Context *m_on_finish = nullptr;
+ cls::journal::ObjectPosition m_master_position;
+ cls::journal::ObjectPosition m_mirror_position;
+ int m_entries_behind_master = 0;
+ cls::journal::Tag m_tag;
+ std::map<uint64_t, librbd::journal::TagData> m_tag_cache;
+
+ bool calculate_behind_master_or_send_update();
+ void send_update_tag_cache(uint64_t master_tag_tid, uint64_t mirror_tag_tid);
+ void handle_update_tag_cache(uint64_t master_tag_tid, uint64_t mirror_tag_tid,
+ int r);
+ void format(std::string *description);
+};
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_REPLAY_STATUS_FORMATTER_H
diff --git a/src/tools/rbd_mirror/image_replayer/Types.h b/src/tools/rbd_mirror/image_replayer/Types.h
new file mode 100644
index 00000000..6ab988a7
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/Types.h
@@ -0,0 +1,21 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_REPLAYER_TYPES_H
+#define CEPH_RBD_MIRROR_IMAGE_REPLAYER_TYPES_H
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+enum HealthState {
+ HEALTH_STATE_OK,
+ HEALTH_STATE_WARNING,
+ HEALTH_STATE_ERROR
+};
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_IMAGE_REPLAYER_TYPES_H
diff --git a/src/tools/rbd_mirror/image_replayer/Utils.cc b/src/tools/rbd_mirror/image_replayer/Utils.cc
new file mode 100644
index 00000000..eda0179f
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/Utils.cc
@@ -0,0 +1,50 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd_mirror/image_replayer/Utils.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "cls/journal/cls_journal_types.h"
+#include "librbd/journal/Types.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::util::" \
+ << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+namespace util {
+
+bool decode_client_meta(const cls::journal::Client& client,
+ librbd::journal::MirrorPeerClientMeta* client_meta) {
+ dout(15) << dendl;
+
+ librbd::journal::ClientData client_data;
+ auto it = client.data.cbegin();
+ try {
+ decode(client_data, it);
+ } catch (const buffer::error &err) {
+ derr << "failed to decode client meta data: " << err.what() << dendl;
+ return false;
+ }
+
+ auto local_client_meta = boost::get<librbd::journal::MirrorPeerClientMeta>(
+ &client_data.client_meta);
+ if (local_client_meta == nullptr) {
+ derr << "unknown peer registration" << dendl;
+ return false;
+ }
+
+ *client_meta = *local_client_meta;
+ dout(15) << "client found: client_meta=" << *client_meta << dendl;
+ return true;
+}
+
+} // namespace util
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
diff --git a/src/tools/rbd_mirror/image_replayer/Utils.h b/src/tools/rbd_mirror/image_replayer/Utils.h
new file mode 100644
index 00000000..d42146d1
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/Utils.h
@@ -0,0 +1,23 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_UTILS_H
+#define RBD_MIRROR_IMAGE_REPLAYER_UTILS_H
+
+namespace cls { namespace journal { struct Client; } }
+namespace librbd { namespace journal { struct MirrorPeerClientMeta; } }
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+namespace util {
+
+bool decode_client_meta(const cls::journal::Client& client,
+ librbd::journal::MirrorPeerClientMeta* client_meta);
+
+} // namespace util
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_UTILS_H
diff --git a/src/tools/rbd_mirror/image_sync/SyncPointCreateRequest.cc b/src/tools/rbd_mirror/image_sync/SyncPointCreateRequest.cc
new file mode 100644
index 00000000..ffe2eca9
--- /dev/null
+++ b/src/tools/rbd_mirror/image_sync/SyncPointCreateRequest.cc
@@ -0,0 +1,182 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "SyncPointCreateRequest.h"
+#include "include/uuid.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "journal/Journaler.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/Operations.h"
+#include "librbd/Utils.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_sync::SyncPointCreateRequest: " \
+ << this << " " << __func__
+
+namespace rbd {
+namespace mirror {
+namespace image_sync {
+
+namespace {
+
+static const std::string SNAP_NAME_PREFIX(".rbd-mirror");
+
+} // anonymous namespace
+
+using librbd::util::create_context_callback;
+
+template <typename I>
+SyncPointCreateRequest<I>::SyncPointCreateRequest(I *remote_image_ctx,
+ const std::string &mirror_uuid,
+ Journaler *journaler,
+ MirrorPeerClientMeta *client_meta,
+ Context *on_finish)
+ : m_remote_image_ctx(remote_image_ctx), m_mirror_uuid(mirror_uuid),
+ m_journaler(journaler), m_client_meta(client_meta), m_on_finish(on_finish),
+ m_client_meta_copy(*client_meta) {
+ ceph_assert(m_client_meta->sync_points.size() < 2);
+
+ // initialize the updated client meta with the new sync point
+ m_client_meta_copy.sync_points.emplace_back();
+ if (m_client_meta_copy.sync_points.size() > 1) {
+ m_client_meta_copy.sync_points.back().from_snap_name =
+ m_client_meta_copy.sync_points.front().snap_name;
+ }
+}
+
+template <typename I>
+void SyncPointCreateRequest<I>::send() {
+ send_update_client();
+}
+
+template <typename I>
+void SyncPointCreateRequest<I>::send_update_client() {
+ uuid_d uuid_gen;
+ uuid_gen.generate_random();
+
+ MirrorPeerSyncPoint &sync_point = m_client_meta_copy.sync_points.back();
+ sync_point.snap_name = SNAP_NAME_PREFIX + "." + m_mirror_uuid + "." +
+ uuid_gen.to_string();
+
+ dout(20) << ": sync_point=" << sync_point << dendl;
+
+ bufferlist client_data_bl;
+ librbd::journal::ClientData client_data(m_client_meta_copy);
+ encode(client_data, client_data_bl);
+
+ Context *ctx = create_context_callback<
+ SyncPointCreateRequest<I>, &SyncPointCreateRequest<I>::handle_update_client>(
+ this);
+ m_journaler->update_client(client_data_bl, ctx);
+}
+
+template <typename I>
+void SyncPointCreateRequest<I>::handle_update_client(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ if (r < 0) {
+ derr << ": failed to update client data: " << cpp_strerror(r)
+ << dendl;
+ finish(r);
+ return;
+ }
+
+ // update provided meta structure to reflect reality
+ *m_client_meta = m_client_meta_copy;
+
+ send_refresh_image();
+}
+
+template <typename I>
+void SyncPointCreateRequest<I>::send_refresh_image() {
+ dout(20) << dendl;
+
+ Context *ctx = create_context_callback<
+ SyncPointCreateRequest<I>, &SyncPointCreateRequest<I>::handle_refresh_image>(
+ this);
+ m_remote_image_ctx->state->refresh(ctx);
+}
+
+template <typename I>
+void SyncPointCreateRequest<I>::handle_refresh_image(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ if (r < 0) {
+ derr << ": remote image refresh failed: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ send_create_snap();
+}
+
+template <typename I>
+void SyncPointCreateRequest<I>::send_create_snap() {
+ dout(20) << dendl;
+
+ MirrorPeerSyncPoint &sync_point = m_client_meta_copy.sync_points.back();
+
+ Context *ctx = create_context_callback<
+ SyncPointCreateRequest<I>, &SyncPointCreateRequest<I>::handle_create_snap>(
+ this);
+ m_remote_image_ctx->operations->snap_create(
+ cls::rbd::UserSnapshotNamespace(), sync_point.snap_name.c_str(), ctx);
+}
+
+template <typename I>
+void SyncPointCreateRequest<I>::handle_create_snap(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ if (r == -EEXIST) {
+ send_update_client();
+ return;
+ } else if (r < 0) {
+ derr << ": failed to create snapshot: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ send_final_refresh_image();
+}
+
+template <typename I>
+void SyncPointCreateRequest<I>::send_final_refresh_image() {
+ dout(20) << dendl;
+
+ Context *ctx = create_context_callback<
+ SyncPointCreateRequest<I>,
+ &SyncPointCreateRequest<I>::handle_final_refresh_image>(this);
+ m_remote_image_ctx->state->refresh(ctx);
+}
+
+template <typename I>
+void SyncPointCreateRequest<I>::handle_final_refresh_image(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ if (r < 0) {
+ derr << ": failed to refresh image for snapshot: " << cpp_strerror(r)
+ << dendl;
+ finish(r);
+ return;
+ }
+
+ finish(0);
+}
+
+template <typename I>
+void SyncPointCreateRequest<I>::finish(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ m_on_finish->complete(r);
+ delete this;
+}
+
+} // namespace image_sync
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_sync::SyncPointCreateRequest<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_sync/SyncPointCreateRequest.h b/src/tools/rbd_mirror/image_sync/SyncPointCreateRequest.h
new file mode 100644
index 00000000..45275ec4
--- /dev/null
+++ b/src/tools/rbd_mirror/image_sync/SyncPointCreateRequest.h
@@ -0,0 +1,96 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_SYNC_SYNC_POINT_CREATE_REQUEST_H
+#define RBD_MIRROR_IMAGE_SYNC_SYNC_POINT_CREATE_REQUEST_H
+
+#include "librbd/journal/Types.h"
+#include "librbd/journal/TypeTraits.h"
+#include <string>
+
+class Context;
+namespace journal { class Journaler; }
+namespace librbd { class ImageCtx; }
+namespace librbd { namespace journal { struct MirrorPeerClientMeta; } }
+
+namespace rbd {
+namespace mirror {
+namespace image_sync {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class SyncPointCreateRequest {
+public:
+ typedef librbd::journal::TypeTraits<ImageCtxT> TypeTraits;
+ typedef typename TypeTraits::Journaler Journaler;
+ typedef librbd::journal::MirrorPeerClientMeta MirrorPeerClientMeta;
+ typedef librbd::journal::MirrorPeerSyncPoint MirrorPeerSyncPoint;
+
+ static SyncPointCreateRequest* create(ImageCtxT *remote_image_ctx,
+ const std::string &mirror_uuid,
+ Journaler *journaler,
+ MirrorPeerClientMeta *client_meta,
+ Context *on_finish) {
+ return new SyncPointCreateRequest(remote_image_ctx, mirror_uuid, journaler,
+ client_meta, on_finish);
+ }
+
+ SyncPointCreateRequest(ImageCtxT *remote_image_ctx,
+ const std::string &mirror_uuid, Journaler *journaler,
+ MirrorPeerClientMeta *client_meta, Context *on_finish);
+
+ void send();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * UPDATE_CLIENT < . .
+ * | .
+ * v .
+ * REFRESH_IMAGE .
+ * | . (repeat on EEXIST)
+ * v .
+ * CREATE_SNAP . . . .
+ * |
+ * v
+ * REFRESH_IMAGE
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+
+ ImageCtxT *m_remote_image_ctx;
+ std::string m_mirror_uuid;
+ Journaler *m_journaler;
+ MirrorPeerClientMeta *m_client_meta;
+ Context *m_on_finish;
+
+ MirrorPeerClientMeta m_client_meta_copy;
+
+ void send_update_client();
+ void handle_update_client(int r);
+
+ void send_refresh_image();
+ void handle_refresh_image(int r);
+
+ void send_create_snap();
+ void handle_create_snap(int r);
+
+ void send_final_refresh_image();
+ void handle_final_refresh_image(int r);
+
+ void finish(int r);
+};
+
+} // namespace image_sync
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_sync::SyncPointCreateRequest<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_SYNC_SYNC_POINT_CREATE_REQUEST_H
diff --git a/src/tools/rbd_mirror/image_sync/SyncPointPruneRequest.cc b/src/tools/rbd_mirror/image_sync/SyncPointPruneRequest.cc
new file mode 100644
index 00000000..2cfed5e6
--- /dev/null
+++ b/src/tools/rbd_mirror/image_sync/SyncPointPruneRequest.cc
@@ -0,0 +1,220 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "SyncPointPruneRequest.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "journal/Journaler.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/Operations.h"
+#include "librbd/Utils.h"
+#include <set>
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_sync::SyncPointPruneRequest: " \
+ << this << " " << __func__
+namespace rbd {
+namespace mirror {
+namespace image_sync {
+
+using librbd::util::create_context_callback;
+
+template <typename I>
+SyncPointPruneRequest<I>::SyncPointPruneRequest(I *remote_image_ctx,
+ bool sync_complete,
+ Journaler *journaler,
+ MirrorPeerClientMeta *client_meta,
+ Context *on_finish)
+ : m_remote_image_ctx(remote_image_ctx), m_sync_complete(sync_complete),
+ m_journaler(journaler), m_client_meta(client_meta), m_on_finish(on_finish),
+ m_client_meta_copy(*client_meta) {
+}
+
+template <typename I>
+void SyncPointPruneRequest<I>::send() {
+ if (m_client_meta->sync_points.empty()) {
+ send_remove_snap();
+ return;
+ }
+
+ if (m_sync_complete) {
+ // if sync is complete, we can remove the master sync point
+ auto it = m_client_meta_copy.sync_points.begin();
+ MirrorPeerSyncPoint &sync_point = *it;
+
+ ++it;
+ if (it == m_client_meta_copy.sync_points.end() ||
+ it->from_snap_name != sync_point.snap_name) {
+ m_snap_names.push_back(sync_point.snap_name);
+ }
+
+ if (!sync_point.from_snap_name.empty()) {
+ m_snap_names.push_back(sync_point.from_snap_name);
+ }
+ } else {
+ // if we have more than one sync point or invalid sync points,
+ // trim them off
+ RWLock::RLocker snap_locker(m_remote_image_ctx->snap_lock);
+ std::set<std::string> snap_names;
+ for (auto it = m_client_meta_copy.sync_points.rbegin();
+ it != m_client_meta_copy.sync_points.rend(); ++it) {
+ MirrorPeerSyncPoint &sync_point = *it;
+ if (&sync_point == &m_client_meta_copy.sync_points.front()) {
+ if (m_remote_image_ctx->get_snap_id(
+ cls::rbd::UserSnapshotNamespace(), sync_point.snap_name) ==
+ CEPH_NOSNAP) {
+ derr << ": failed to locate sync point snapshot: "
+ << sync_point.snap_name << dendl;
+ } else if (!sync_point.from_snap_name.empty()) {
+ derr << ": unexpected from_snap_name in primary sync point: "
+ << sync_point.from_snap_name << dendl;
+ } else {
+ // first sync point is OK -- keep it
+ break;
+ }
+ m_invalid_master_sync_point = true;
+ }
+
+ if (snap_names.count(sync_point.snap_name) == 0) {
+ snap_names.insert(sync_point.snap_name);
+ m_snap_names.push_back(sync_point.snap_name);
+ }
+
+ MirrorPeerSyncPoint &front_sync_point =
+ m_client_meta_copy.sync_points.front();
+ if (!sync_point.from_snap_name.empty() &&
+ snap_names.count(sync_point.from_snap_name) == 0 &&
+ sync_point.from_snap_name != front_sync_point.snap_name) {
+ snap_names.insert(sync_point.from_snap_name);
+ m_snap_names.push_back(sync_point.from_snap_name);
+ }
+ }
+ }
+
+ send_remove_snap();
+}
+
+template <typename I>
+void SyncPointPruneRequest<I>::send_remove_snap() {
+ if (m_snap_names.empty()) {
+ send_refresh_image();
+ return;
+ }
+
+ const std::string &snap_name = m_snap_names.front();
+
+ dout(20) << ": snap_name=" << snap_name << dendl;
+
+ Context *ctx = create_context_callback<
+ SyncPointPruneRequest<I>, &SyncPointPruneRequest<I>::handle_remove_snap>(
+ this);
+ m_remote_image_ctx->operations->snap_remove(cls::rbd::UserSnapshotNamespace(),
+ snap_name.c_str(),
+ ctx);
+}
+
+template <typename I>
+void SyncPointPruneRequest<I>::handle_remove_snap(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ ceph_assert(!m_snap_names.empty());
+ std::string snap_name = m_snap_names.front();
+ m_snap_names.pop_front();
+
+ if (r == -ENOENT) {
+ r = 0;
+ }
+ if (r < 0) {
+ derr << ": failed to remove snapshot '" << snap_name << "': "
+ << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ send_remove_snap();
+}
+
+template <typename I>
+void SyncPointPruneRequest<I>::send_refresh_image() {
+ dout(20) << dendl;
+
+ Context *ctx = create_context_callback<
+ SyncPointPruneRequest<I>, &SyncPointPruneRequest<I>::handle_refresh_image>(
+ this);
+ m_remote_image_ctx->state->refresh(ctx);
+}
+
+template <typename I>
+void SyncPointPruneRequest<I>::handle_refresh_image(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ if (r < 0) {
+ derr << ": remote image refresh failed: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ send_update_client();
+}
+
+template <typename I>
+void SyncPointPruneRequest<I>::send_update_client() {
+ dout(20) << dendl;
+
+ if (m_sync_complete) {
+ m_client_meta_copy.sync_points.pop_front();
+ if (m_client_meta_copy.sync_points.empty()) {
+ m_client_meta_copy.state = librbd::journal::MIRROR_PEER_STATE_REPLAYING;
+ }
+ } else {
+ while (m_client_meta_copy.sync_points.size() > 1) {
+ m_client_meta_copy.sync_points.pop_back();
+ }
+ if (m_invalid_master_sync_point) {
+ // all subsequent sync points would have been pruned
+ m_client_meta_copy.sync_points.clear();
+ }
+ }
+
+ bufferlist client_data_bl;
+ librbd::journal::ClientData client_data(m_client_meta_copy);
+ encode(client_data, client_data_bl);
+
+ Context *ctx = create_context_callback<
+ SyncPointPruneRequest<I>, &SyncPointPruneRequest<I>::handle_update_client>(
+ this);
+ m_journaler->update_client(client_data_bl, ctx);
+}
+
+template <typename I>
+void SyncPointPruneRequest<I>::handle_update_client(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ if (r < 0) {
+ derr << ": failed to update client data: " << cpp_strerror(r)
+ << dendl;
+ finish(r);
+ return;
+ }
+
+ // update provided meta structure to reflect reality
+ *m_client_meta = m_client_meta_copy;
+ finish(0);
+}
+
+template <typename I>
+void SyncPointPruneRequest<I>::finish(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ m_on_finish->complete(r);
+ delete this;
+}
+
+} // namespace image_sync
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_sync::SyncPointPruneRequest<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_sync/SyncPointPruneRequest.h b/src/tools/rbd_mirror/image_sync/SyncPointPruneRequest.h
new file mode 100644
index 00000000..65e13ef5
--- /dev/null
+++ b/src/tools/rbd_mirror/image_sync/SyncPointPruneRequest.h
@@ -0,0 +1,96 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_SYNC_SYNC_POINT_PRUNE_REQUEST_H
+#define RBD_MIRROR_IMAGE_SYNC_SYNC_POINT_PRUNE_REQUEST_H
+
+#include "librbd/journal/Types.h"
+#include "librbd/journal/TypeTraits.h"
+#include <list>
+#include <string>
+
+class Context;
+namespace journal { class Journaler; }
+namespace librbd { class ImageCtx; }
+namespace librbd { namespace journal { struct MirrorPeerClientMeta; } }
+
+namespace rbd {
+namespace mirror {
+namespace image_sync {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class SyncPointPruneRequest {
+public:
+ typedef librbd::journal::TypeTraits<ImageCtxT> TypeTraits;
+ typedef typename TypeTraits::Journaler Journaler;
+ typedef librbd::journal::MirrorPeerClientMeta MirrorPeerClientMeta;
+ typedef librbd::journal::MirrorPeerSyncPoint MirrorPeerSyncPoint;
+
+ static SyncPointPruneRequest* create(ImageCtxT *remote_image_ctx,
+ bool sync_complete,
+ Journaler *journaler,
+ MirrorPeerClientMeta *client_meta,
+ Context *on_finish) {
+ return new SyncPointPruneRequest(remote_image_ctx, sync_complete, journaler,
+ client_meta, on_finish);
+ }
+
+ SyncPointPruneRequest(ImageCtxT *remote_image_ctx, bool sync_complete,
+ Journaler *journaler, MirrorPeerClientMeta *client_meta,
+ Context *on_finish);
+
+ void send();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * | . . . . .
+ * | . .
+ * v v . (repeat if from snap
+ * REMOVE_SNAP . . . unused by other sync)
+ * |
+ * v
+ * REFRESH_IMAGE
+ * |
+ * v
+ * UPDATE_CLIENT
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+
+ ImageCtxT *m_remote_image_ctx;
+ bool m_sync_complete;
+ Journaler *m_journaler;
+ MirrorPeerClientMeta *m_client_meta;
+ Context *m_on_finish;
+
+ MirrorPeerClientMeta m_client_meta_copy;
+ std::list<std::string> m_snap_names;
+
+ bool m_invalid_master_sync_point = false;
+
+ void send_remove_snap();
+ void handle_remove_snap(int r);
+
+ void send_refresh_image();
+ void handle_refresh_image(int r);
+
+ void send_update_client();
+ void handle_update_client(int r);
+
+ void finish(int r);
+};
+
+} // namespace image_sync
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_sync::SyncPointPruneRequest<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_SYNC_SYNC_POINT_PRUNE_REQUEST_H
diff --git a/src/tools/rbd_mirror/instance_watcher/Types.cc b/src/tools/rbd_mirror/instance_watcher/Types.cc
new file mode 100644
index 00000000..0e992273
--- /dev/null
+++ b/src/tools/rbd_mirror/instance_watcher/Types.cc
@@ -0,0 +1,245 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "Types.h"
+#include "include/ceph_assert.h"
+#include "include/stringify.h"
+#include "common/Formatter.h"
+
+namespace rbd {
+namespace mirror {
+namespace instance_watcher {
+
+namespace {
+
+class EncodePayloadVisitor : public boost::static_visitor<void> {
+public:
+ explicit EncodePayloadVisitor(bufferlist &bl) : m_bl(bl) {}
+
+ template <typename Payload>
+ inline void operator()(const Payload &payload) const {
+ using ceph::encode;
+ encode(static_cast<uint32_t>(Payload::NOTIFY_OP), m_bl);
+ payload.encode(m_bl);
+ }
+
+private:
+ bufferlist &m_bl;
+};
+
+class DecodePayloadVisitor : public boost::static_visitor<void> {
+public:
+ DecodePayloadVisitor(__u8 version, bufferlist::const_iterator &iter)
+ : m_version(version), m_iter(iter) {}
+
+ template <typename Payload>
+ inline void operator()(Payload &payload) const {
+ payload.decode(m_version, m_iter);
+ }
+
+private:
+ __u8 m_version;
+ bufferlist::const_iterator &m_iter;
+};
+
+class DumpPayloadVisitor : public boost::static_visitor<void> {
+public:
+ explicit DumpPayloadVisitor(Formatter *formatter) : m_formatter(formatter) {}
+
+ template <typename Payload>
+ inline void operator()(const Payload &payload) const {
+ NotifyOp notify_op = Payload::NOTIFY_OP;
+ m_formatter->dump_string("notify_op", stringify(notify_op));
+ payload.dump(m_formatter);
+ }
+
+private:
+ ceph::Formatter *m_formatter;
+};
+
+} // anonymous namespace
+
+void PayloadBase::encode(bufferlist &bl) const {
+ using ceph::encode;
+ encode(request_id, bl);
+}
+
+void PayloadBase::decode(__u8 version, bufferlist::const_iterator &iter) {
+ using ceph::decode;
+ decode(request_id, iter);
+}
+
+void PayloadBase::dump(Formatter *f) const {
+ f->dump_unsigned("request_id", request_id);
+}
+
+void ImagePayloadBase::encode(bufferlist &bl) const {
+ using ceph::encode;
+ PayloadBase::encode(bl);
+ encode(global_image_id, bl);
+}
+
+void ImagePayloadBase::decode(__u8 version, bufferlist::const_iterator &iter) {
+ using ceph::decode;
+ PayloadBase::decode(version, iter);
+ decode(global_image_id, iter);
+}
+
+void ImagePayloadBase::dump(Formatter *f) const {
+ PayloadBase::dump(f);
+ f->dump_string("global_image_id", global_image_id);
+}
+
+void PeerImageRemovedPayload::encode(bufferlist &bl) const {
+ using ceph::encode;
+ PayloadBase::encode(bl);
+ encode(global_image_id, bl);
+ encode(peer_mirror_uuid, bl);
+}
+
+void PeerImageRemovedPayload::decode(__u8 version, bufferlist::const_iterator &iter) {
+ using ceph::decode;
+ PayloadBase::decode(version, iter);
+ decode(global_image_id, iter);
+ decode(peer_mirror_uuid, iter);
+}
+
+void PeerImageRemovedPayload::dump(Formatter *f) const {
+ PayloadBase::dump(f);
+ f->dump_string("global_image_id", global_image_id);
+ f->dump_string("peer_mirror_uuid", peer_mirror_uuid);
+}
+
+void SyncPayloadBase::encode(bufferlist &bl) const {
+ using ceph::encode;
+ PayloadBase::encode(bl);
+ encode(sync_id, bl);
+}
+
+void SyncPayloadBase::decode(__u8 version, bufferlist::const_iterator &iter) {
+ using ceph::decode;
+ PayloadBase::decode(version, iter);
+ decode(sync_id, iter);
+}
+
+void SyncPayloadBase::dump(Formatter *f) const {
+ PayloadBase::dump(f);
+ f->dump_string("sync_id", sync_id);
+}
+
+void UnknownPayload::encode(bufferlist &bl) const {
+ ceph_abort();
+}
+
+void UnknownPayload::decode(__u8 version, bufferlist::const_iterator &iter) {
+}
+
+void UnknownPayload::dump(Formatter *f) const {
+}
+
+void NotifyMessage::encode(bufferlist& bl) const {
+ ENCODE_START(2, 2, bl);
+ boost::apply_visitor(EncodePayloadVisitor(bl), payload);
+ ENCODE_FINISH(bl);
+}
+
+void NotifyMessage::decode(bufferlist::const_iterator& iter) {
+ DECODE_START(2, iter);
+
+ uint32_t notify_op;
+ decode(notify_op, iter);
+
+ // select the correct payload variant based upon the encoded op
+ switch (notify_op) {
+ case NOTIFY_OP_IMAGE_ACQUIRE:
+ payload = ImageAcquirePayload();
+ break;
+ case NOTIFY_OP_IMAGE_RELEASE:
+ payload = ImageReleasePayload();
+ break;
+ case NOTIFY_OP_PEER_IMAGE_REMOVED:
+ payload = PeerImageRemovedPayload();
+ break;
+ case NOTIFY_OP_SYNC_REQUEST:
+ payload = SyncRequestPayload();
+ break;
+ case NOTIFY_OP_SYNC_START:
+ payload = SyncStartPayload();
+ break;
+ default:
+ payload = UnknownPayload();
+ break;
+ }
+
+ apply_visitor(DecodePayloadVisitor(struct_v, iter), payload);
+ DECODE_FINISH(iter);
+}
+
+void NotifyMessage::dump(Formatter *f) const {
+ apply_visitor(DumpPayloadVisitor(f), payload);
+}
+
+void NotifyMessage::generate_test_instances(std::list<NotifyMessage *> &o) {
+ o.push_back(new NotifyMessage(ImageAcquirePayload()));
+ o.push_back(new NotifyMessage(ImageAcquirePayload(1, "gid")));
+
+ o.push_back(new NotifyMessage(ImageReleasePayload()));
+ o.push_back(new NotifyMessage(ImageReleasePayload(1, "gid")));
+
+ o.push_back(new NotifyMessage(PeerImageRemovedPayload()));
+ o.push_back(new NotifyMessage(PeerImageRemovedPayload(1, "gid", "uuid")));
+
+ o.push_back(new NotifyMessage(SyncRequestPayload()));
+ o.push_back(new NotifyMessage(SyncRequestPayload(1, "sync_id")));
+
+ o.push_back(new NotifyMessage(SyncStartPayload()));
+ o.push_back(new NotifyMessage(SyncStartPayload(1, "sync_id")));
+}
+
+std::ostream &operator<<(std::ostream &out, const NotifyOp &op) {
+ switch (op) {
+ case NOTIFY_OP_IMAGE_ACQUIRE:
+ out << "ImageAcquire";
+ break;
+ case NOTIFY_OP_IMAGE_RELEASE:
+ out << "ImageRelease";
+ break;
+ case NOTIFY_OP_PEER_IMAGE_REMOVED:
+ out << "PeerImageRemoved";
+ break;
+ case NOTIFY_OP_SYNC_REQUEST:
+ out << "SyncRequest";
+ break;
+ case NOTIFY_OP_SYNC_START:
+ out << "SyncStart";
+ break;
+ default:
+ out << "Unknown (" << static_cast<uint32_t>(op) << ")";
+ break;
+ }
+ return out;
+}
+
+void NotifyAckPayload::encode(bufferlist &bl) const {
+ using ceph::encode;
+ encode(instance_id, bl);
+ encode(request_id, bl);
+ encode(ret_val, bl);
+}
+
+void NotifyAckPayload::decode(bufferlist::const_iterator &iter) {
+ using ceph::decode;
+ decode(instance_id, iter);
+ decode(request_id, iter);
+ decode(ret_val, iter);
+}
+
+void NotifyAckPayload::dump(Formatter *f) const {
+ f->dump_string("instance_id", instance_id);
+ f->dump_unsigned("request_id", request_id);
+ f->dump_int("request_id", ret_val);
+}
+
+} // namespace instance_watcher
+} // namespace mirror
+} // namespace rbd
diff --git a/src/tools/rbd_mirror/instance_watcher/Types.h b/src/tools/rbd_mirror/instance_watcher/Types.h
new file mode 100644
index 00000000..b0b7b779
--- /dev/null
+++ b/src/tools/rbd_mirror/instance_watcher/Types.h
@@ -0,0 +1,197 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_INSTANCE_WATCHER_TYPES_H
+#define RBD_MIRROR_INSTANCE_WATCHER_TYPES_H
+
+#include <string>
+#include <set>
+#include <boost/variant.hpp>
+
+#include "include/buffer_fwd.h"
+#include "include/encoding.h"
+#include "include/int_types.h"
+
+namespace ceph { class Formatter; }
+
+namespace rbd {
+namespace mirror {
+namespace instance_watcher {
+
+enum NotifyOp {
+ NOTIFY_OP_IMAGE_ACQUIRE = 0,
+ NOTIFY_OP_IMAGE_RELEASE = 1,
+ NOTIFY_OP_PEER_IMAGE_REMOVED = 2,
+ NOTIFY_OP_SYNC_REQUEST = 3,
+ NOTIFY_OP_SYNC_START = 4
+};
+
+struct PayloadBase {
+ uint64_t request_id;
+
+ PayloadBase() : request_id(0) {
+ }
+
+ PayloadBase(uint64_t request_id) : request_id(request_id) {
+ }
+
+ void encode(bufferlist &bl) const;
+ void decode(__u8 version, bufferlist::const_iterator &iter);
+ void dump(Formatter *f) const;
+};
+
+struct ImagePayloadBase : public PayloadBase {
+ std::string global_image_id;
+
+ ImagePayloadBase() : PayloadBase() {
+ }
+
+ ImagePayloadBase(uint64_t request_id, const std::string &global_image_id)
+ : PayloadBase(request_id), global_image_id(global_image_id) {
+ }
+
+ void encode(bufferlist &bl) const;
+ void decode(__u8 version, bufferlist::const_iterator &iter);
+ void dump(Formatter *f) const;
+};
+
+struct ImageAcquirePayload : public ImagePayloadBase {
+ static const NotifyOp NOTIFY_OP = NOTIFY_OP_IMAGE_ACQUIRE;
+
+ ImageAcquirePayload() {
+ }
+ ImageAcquirePayload(uint64_t request_id, const std::string &global_image_id)
+ : ImagePayloadBase(request_id, global_image_id) {
+ }
+};
+
+struct ImageReleasePayload : public ImagePayloadBase {
+ static const NotifyOp NOTIFY_OP = NOTIFY_OP_IMAGE_RELEASE;
+
+ ImageReleasePayload() {
+ }
+ ImageReleasePayload(uint64_t request_id, const std::string &global_image_id)
+ : ImagePayloadBase(request_id, global_image_id) {
+ }
+};
+
+struct PeerImageRemovedPayload : public PayloadBase {
+ static const NotifyOp NOTIFY_OP = NOTIFY_OP_PEER_IMAGE_REMOVED;
+
+ std::string global_image_id;
+ std::string peer_mirror_uuid;
+
+ PeerImageRemovedPayload() {
+ }
+ PeerImageRemovedPayload(uint64_t request_id,
+ const std::string& global_image_id,
+ const std::string& peer_mirror_uuid)
+ : PayloadBase(request_id),
+ global_image_id(global_image_id), peer_mirror_uuid(peer_mirror_uuid) {
+ }
+
+ void encode(bufferlist &bl) const;
+ void decode(__u8 version, bufferlist::const_iterator &iter);
+ void dump(Formatter *f) const;
+};
+
+struct SyncPayloadBase : public PayloadBase {
+ std::string sync_id;
+
+ SyncPayloadBase() : PayloadBase() {
+ }
+
+ SyncPayloadBase(uint64_t request_id, const std::string &sync_id)
+ : PayloadBase(request_id), sync_id(sync_id) {
+ }
+
+ void encode(bufferlist &bl) const;
+ void decode(__u8 version, bufferlist::const_iterator &iter);
+ void dump(Formatter *f) const;
+};
+
+struct SyncRequestPayload : public SyncPayloadBase {
+ static const NotifyOp NOTIFY_OP = NOTIFY_OP_SYNC_REQUEST;
+
+ SyncRequestPayload() : SyncPayloadBase() {
+ }
+
+ SyncRequestPayload(uint64_t request_id, const std::string &sync_id)
+ : SyncPayloadBase(request_id, sync_id) {
+ }
+};
+
+struct SyncStartPayload : public SyncPayloadBase {
+ static const NotifyOp NOTIFY_OP = NOTIFY_OP_SYNC_START;
+
+ SyncStartPayload() : SyncPayloadBase() {
+ }
+
+ SyncStartPayload(uint64_t request_id, const std::string &sync_id)
+ : SyncPayloadBase(request_id, sync_id) {
+ }
+};
+
+struct UnknownPayload {
+ static const NotifyOp NOTIFY_OP = static_cast<NotifyOp>(-1);
+
+ UnknownPayload() {
+ }
+
+ void encode(bufferlist &bl) const;
+ void decode(__u8 version, bufferlist::const_iterator &iter);
+ void dump(Formatter *f) const;
+};
+
+typedef boost::variant<ImageAcquirePayload,
+ ImageReleasePayload,
+ PeerImageRemovedPayload,
+ SyncRequestPayload,
+ SyncStartPayload,
+ UnknownPayload> Payload;
+
+struct NotifyMessage {
+ NotifyMessage(const Payload &payload = UnknownPayload()) : payload(payload) {
+ }
+
+ Payload payload;
+
+ void encode(bufferlist& bl) const;
+ void decode(bufferlist::const_iterator& it);
+ void dump(Formatter *f) const;
+
+ static void generate_test_instances(std::list<NotifyMessage *> &o);
+};
+
+WRITE_CLASS_ENCODER(NotifyMessage);
+
+std::ostream &operator<<(std::ostream &out, const NotifyOp &op);
+
+struct NotifyAckPayload {
+ std::string instance_id;
+ uint64_t request_id;
+ int ret_val;
+
+ NotifyAckPayload() : request_id(0), ret_val(0) {
+ }
+
+ NotifyAckPayload(const std::string &instance_id, uint64_t request_id,
+ int ret_val)
+ : instance_id(instance_id), request_id(request_id), ret_val(ret_val) {
+ }
+
+ void encode(bufferlist &bl) const;
+ void decode(bufferlist::const_iterator& it);
+ void dump(Formatter *f) const;
+};
+
+WRITE_CLASS_ENCODER(NotifyAckPayload);
+
+} // namespace instance_watcher
+} // namespace mirror
+} // namespace librbd
+
+using rbd::mirror::instance_watcher::encode;
+using rbd::mirror::instance_watcher::decode;
+
+#endif // RBD_MIRROR_INSTANCE_WATCHER_TYPES_H
diff --git a/src/tools/rbd_mirror/instances/Types.h b/src/tools/rbd_mirror/instances/Types.h
new file mode 100644
index 00000000..8b0a68fc
--- /dev/null
+++ b/src/tools/rbd_mirror/instances/Types.h
@@ -0,0 +1,28 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_INSTANCES_TYPES_H
+#define CEPH_RBD_MIRROR_INSTANCES_TYPES_H
+
+#include <string>
+#include <vector>
+
+namespace rbd {
+namespace mirror {
+namespace instances {
+
+struct Listener {
+ typedef std::vector<std::string> InstanceIds;
+
+ virtual ~Listener() {
+ }
+
+ virtual void handle_added(const InstanceIds& instance_ids) = 0;
+ virtual void handle_removed(const InstanceIds& instance_ids) = 0;
+};
+
+} // namespace instances
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_INSTANCES_TYPES_H
diff --git a/src/tools/rbd_mirror/leader_watcher/Types.cc b/src/tools/rbd_mirror/leader_watcher/Types.cc
new file mode 100644
index 00000000..d2fb7908
--- /dev/null
+++ b/src/tools/rbd_mirror/leader_watcher/Types.cc
@@ -0,0 +1,161 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "Types.h"
+#include "include/ceph_assert.h"
+#include "include/stringify.h"
+#include "common/Formatter.h"
+
+namespace rbd {
+namespace mirror {
+namespace leader_watcher {
+
+namespace {
+
+class EncodePayloadVisitor : public boost::static_visitor<void> {
+public:
+ explicit EncodePayloadVisitor(bufferlist &bl) : m_bl(bl) {}
+
+ template <typename Payload>
+ inline void operator()(const Payload &payload) const {
+ using ceph::encode;
+ encode(static_cast<uint32_t>(Payload::NOTIFY_OP), m_bl);
+ payload.encode(m_bl);
+ }
+
+private:
+ bufferlist &m_bl;
+};
+
+class DecodePayloadVisitor : public boost::static_visitor<void> {
+public:
+ DecodePayloadVisitor(__u8 version, bufferlist::const_iterator &iter)
+ : m_version(version), m_iter(iter) {}
+
+ template <typename Payload>
+ inline void operator()(Payload &payload) const {
+ payload.decode(m_version, m_iter);
+ }
+
+private:
+ __u8 m_version;
+ bufferlist::const_iterator &m_iter;
+};
+
+class DumpPayloadVisitor : public boost::static_visitor<void> {
+public:
+ explicit DumpPayloadVisitor(Formatter *formatter) : m_formatter(formatter) {}
+
+ template <typename Payload>
+ inline void operator()(const Payload &payload) const {
+ NotifyOp notify_op = Payload::NOTIFY_OP;
+ m_formatter->dump_string("notify_op", stringify(notify_op));
+ payload.dump(m_formatter);
+ }
+
+private:
+ ceph::Formatter *m_formatter;
+};
+
+} // anonymous namespace
+
+void HeartbeatPayload::encode(bufferlist &bl) const {
+}
+
+void HeartbeatPayload::decode(__u8 version, bufferlist::const_iterator &iter) {
+}
+
+void HeartbeatPayload::dump(Formatter *f) const {
+}
+
+void LockAcquiredPayload::encode(bufferlist &bl) const {
+}
+
+void LockAcquiredPayload::decode(__u8 version, bufferlist::const_iterator &iter) {
+}
+
+void LockAcquiredPayload::dump(Formatter *f) const {
+}
+
+void LockReleasedPayload::encode(bufferlist &bl) const {
+}
+
+void LockReleasedPayload::decode(__u8 version, bufferlist::const_iterator &iter) {
+}
+
+void LockReleasedPayload::dump(Formatter *f) const {
+}
+
+void UnknownPayload::encode(bufferlist &bl) const {
+ ceph_abort();
+}
+
+void UnknownPayload::decode(__u8 version, bufferlist::const_iterator &iter) {
+}
+
+void UnknownPayload::dump(Formatter *f) const {
+}
+
+void NotifyMessage::encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ boost::apply_visitor(EncodePayloadVisitor(bl), payload);
+ ENCODE_FINISH(bl);
+}
+
+void NotifyMessage::decode(bufferlist::const_iterator& iter) {
+ DECODE_START(1, iter);
+
+ uint32_t notify_op;
+ decode(notify_op, iter);
+
+ // select the correct payload variant based upon the encoded op
+ switch (notify_op) {
+ case NOTIFY_OP_HEARTBEAT:
+ payload = HeartbeatPayload();
+ break;
+ case NOTIFY_OP_LOCK_ACQUIRED:
+ payload = LockAcquiredPayload();
+ break;
+ case NOTIFY_OP_LOCK_RELEASED:
+ payload = LockReleasedPayload();
+ break;
+ default:
+ payload = UnknownPayload();
+ break;
+ }
+
+ apply_visitor(DecodePayloadVisitor(struct_v, iter), payload);
+ DECODE_FINISH(iter);
+}
+
+void NotifyMessage::dump(Formatter *f) const {
+ apply_visitor(DumpPayloadVisitor(f), payload);
+}
+
+void NotifyMessage::generate_test_instances(std::list<NotifyMessage *> &o) {
+ o.push_back(new NotifyMessage(HeartbeatPayload()));
+ o.push_back(new NotifyMessage(LockAcquiredPayload()));
+ o.push_back(new NotifyMessage(LockReleasedPayload()));
+}
+
+std::ostream &operator<<(std::ostream &out, const NotifyOp &op) {
+ switch (op) {
+ case NOTIFY_OP_HEARTBEAT:
+ out << "Heartbeat";
+ break;
+ case NOTIFY_OP_LOCK_ACQUIRED:
+ out << "LockAcquired";
+ break;
+ case NOTIFY_OP_LOCK_RELEASED:
+ out << "LockReleased";
+ break;
+ default:
+ out << "Unknown (" << static_cast<uint32_t>(op) << ")";
+ break;
+ }
+ return out;
+}
+
+} // namespace leader_watcher
+} // namespace mirror
+} // namespace librbd
diff --git a/src/tools/rbd_mirror/leader_watcher/Types.h b/src/tools/rbd_mirror/leader_watcher/Types.h
new file mode 100644
index 00000000..1278e54b
--- /dev/null
+++ b/src/tools/rbd_mirror/leader_watcher/Types.h
@@ -0,0 +1,117 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_LEADER_WATCHER_TYPES_H
+#define RBD_MIRROR_LEADER_WATCHER_TYPES_H
+
+#include "include/int_types.h"
+#include "include/buffer_fwd.h"
+#include "include/encoding.h"
+#include <string>
+#include <vector>
+#include <boost/variant.hpp>
+
+struct Context;
+
+namespace ceph { class Formatter; }
+
+namespace rbd {
+namespace mirror {
+namespace leader_watcher {
+
+struct Listener {
+ typedef std::vector<std::string> InstanceIds;
+
+ virtual ~Listener() {
+ }
+
+ virtual void post_acquire_handler(Context *on_finish) = 0;
+ virtual void pre_release_handler(Context *on_finish) = 0;
+
+ virtual void update_leader_handler(
+ const std::string &leader_instance_id) = 0;
+
+ virtual void handle_instances_added(const InstanceIds& instance_ids) = 0;
+ virtual void handle_instances_removed(const InstanceIds& instance_ids) = 0;
+};
+
+enum NotifyOp {
+ NOTIFY_OP_HEARTBEAT = 0,
+ NOTIFY_OP_LOCK_ACQUIRED = 1,
+ NOTIFY_OP_LOCK_RELEASED = 2,
+};
+
+struct HeartbeatPayload {
+ static const NotifyOp NOTIFY_OP = NOTIFY_OP_HEARTBEAT;
+
+ HeartbeatPayload() {
+ }
+
+ void encode(bufferlist &bl) const;
+ void decode(__u8 version, bufferlist::const_iterator &iter);
+ void dump(Formatter *f) const;
+};
+
+struct LockAcquiredPayload {
+ static const NotifyOp NOTIFY_OP = NOTIFY_OP_LOCK_ACQUIRED;
+
+ LockAcquiredPayload() {
+ }
+
+ void encode(bufferlist &bl) const;
+ void decode(__u8 version, bufferlist::const_iterator &iter);
+ void dump(Formatter *f) const;
+};
+
+struct LockReleasedPayload {
+ static const NotifyOp NOTIFY_OP = NOTIFY_OP_LOCK_RELEASED;
+
+ LockReleasedPayload() {
+ }
+
+ void encode(bufferlist &bl) const;
+ void decode(__u8 version, bufferlist::const_iterator &iter);
+ void dump(Formatter *f) const;
+};
+
+struct UnknownPayload {
+ static const NotifyOp NOTIFY_OP = static_cast<NotifyOp>(-1);
+
+ UnknownPayload() {
+ }
+
+ void encode(bufferlist &bl) const;
+ void decode(__u8 version, bufferlist::const_iterator &iter);
+ void dump(Formatter *f) const;
+};
+
+typedef boost::variant<HeartbeatPayload,
+ LockAcquiredPayload,
+ LockReleasedPayload,
+ UnknownPayload> Payload;
+
+struct NotifyMessage {
+ NotifyMessage(const Payload &payload = UnknownPayload()) : payload(payload) {
+ }
+
+ Payload payload;
+
+ void encode(bufferlist& bl) const;
+ void decode(bufferlist::const_iterator& it);
+ void dump(Formatter *f) const;
+
+ static void generate_test_instances(std::list<NotifyMessage *> &o);
+};
+
+WRITE_CLASS_ENCODER(NotifyMessage);
+
+std::ostream &operator<<(std::ostream &out, const NotifyOp &op);
+
+} // namespace leader_watcher
+} // namespace mirror
+} // namespace librbd
+
+using rbd::mirror::leader_watcher::encode;
+using rbd::mirror::leader_watcher::decode;
+
+#endif // RBD_MIRROR_LEADER_WATCHER_TYPES_H
diff --git a/src/tools/rbd_mirror/main.cc b/src/tools/rbd_mirror/main.cc
new file mode 100644
index 00000000..ab350a01
--- /dev/null
+++ b/src/tools/rbd_mirror/main.cc
@@ -0,0 +1,104 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "common/ceph_argparse.h"
+#include "common/config.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "common/perf_counters.h"
+#include "global/global_init.h"
+#include "global/signal_handler.h"
+#include "Mirror.h"
+#include "Types.h"
+
+#include <vector>
+
+rbd::mirror::Mirror *mirror = nullptr;
+PerfCounters *g_perf_counters = nullptr;
+
+void usage() {
+ std::cout << "usage: rbd-mirror [options...]" << std::endl;
+ std::cout << "options:\n";
+ std::cout << " -m monaddress[:port] connect to specified monitor\n";
+ std::cout << " --keyring=<path> path to keyring for local cluster\n";
+ std::cout << " --log-file=<logfile> file to log debug output\n";
+ std::cout << " --debug-rbd-mirror=<log-level>/<memory-level> set rbd-mirror debug level\n";
+ generic_server_usage();
+}
+
+static void handle_signal(int signum)
+{
+ if (mirror)
+ mirror->handle_signal(signum);
+}
+
+int main(int argc, const char **argv)
+{
+ std::vector<const char*> args;
+ argv_to_vec(argc, argv, args);
+ if (args.empty()) {
+ cerr << argv[0] << ": -h or --help for usage" << std::endl;
+ exit(1);
+ }
+ if (ceph_argparse_need_usage(args)) {
+ usage();
+ exit(0);
+ }
+
+ auto cct = global_init(nullptr, args, CEPH_ENTITY_TYPE_CLIENT,
+ CODE_ENVIRONMENT_DAEMON,
+ CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS);
+
+ if (g_conf()->daemonize) {
+ global_init_daemonize(g_ceph_context);
+ }
+
+ common_init_finish(g_ceph_context);
+
+ init_async_signal_handler();
+ register_async_signal_handler(SIGHUP, handle_signal);
+ register_async_signal_handler_oneshot(SIGINT, handle_signal);
+ register_async_signal_handler_oneshot(SIGTERM, handle_signal);
+
+ std::vector<const char*> cmd_args;
+ argv_to_vec(argc, argv, cmd_args);
+
+ // disable unnecessary librbd cache
+ g_ceph_context->_conf.set_val_or_die("rbd_cache", "false");
+
+ auto prio =
+ g_ceph_context->_conf.get_val<int64_t>("rbd_mirror_perf_stats_prio");
+ PerfCountersBuilder plb(g_ceph_context, "rbd_mirror",
+ rbd::mirror::l_rbd_mirror_first,
+ rbd::mirror::l_rbd_mirror_last);
+ plb.add_u64_counter(rbd::mirror::l_rbd_mirror_replay, "replay", "Replays",
+ "r", prio);
+ plb.add_u64_counter(rbd::mirror::l_rbd_mirror_replay_bytes, "replay_bytes",
+ "Replayed data", "rb", prio, unit_t(UNIT_BYTES));
+ plb.add_time_avg(rbd::mirror::l_rbd_mirror_replay_latency, "replay_latency",
+ "Replay latency", "rl", prio);
+ g_perf_counters = plb.create_perf_counters();
+ g_ceph_context->get_perfcounters_collection()->add(g_perf_counters);
+
+ mirror = new rbd::mirror::Mirror(g_ceph_context, cmd_args);
+ int r = mirror->init();
+ if (r < 0) {
+ std::cerr << "failed to initialize: " << cpp_strerror(r) << std::endl;
+ goto cleanup;
+ }
+
+ mirror->run();
+
+ cleanup:
+ unregister_async_signal_handler(SIGHUP, handle_signal);
+ unregister_async_signal_handler(SIGINT, handle_signal);
+ unregister_async_signal_handler(SIGTERM, handle_signal);
+ shutdown_async_signal_handler();
+
+ g_ceph_context->get_perfcounters_collection()->remove(g_perf_counters);
+
+ delete mirror;
+ delete g_perf_counters;
+
+ return r < 0 ? EXIT_SUCCESS : EXIT_FAILURE;
+}
diff --git a/src/tools/rbd_mirror/pool_watcher/RefreshImagesRequest.cc b/src/tools/rbd_mirror/pool_watcher/RefreshImagesRequest.cc
new file mode 100644
index 00000000..a1d9c1b5
--- /dev/null
+++ b/src/tools/rbd_mirror/pool_watcher/RefreshImagesRequest.cc
@@ -0,0 +1,89 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd_mirror/pool_watcher/RefreshImagesRequest.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "librbd/Utils.h"
+#include <map>
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::pool_watcher::RefreshImagesRequest " \
+ << this << " " << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+namespace pool_watcher {
+
+static const uint32_t MAX_RETURN = 1024;
+
+using librbd::util::create_rados_callback;
+
+template <typename I>
+void RefreshImagesRequest<I>::send() {
+ m_image_ids->clear();
+ mirror_image_list();
+}
+
+template <typename I>
+void RefreshImagesRequest<I>::mirror_image_list() {
+ dout(10) << dendl;
+
+ librados::ObjectReadOperation op;
+ librbd::cls_client::mirror_image_list_start(&op, m_start_after, MAX_RETURN);
+
+ m_out_bl.clear();
+ librados::AioCompletion *aio_comp = create_rados_callback<
+ RefreshImagesRequest<I>,
+ &RefreshImagesRequest<I>::handle_mirror_image_list>(this);
+ int r = m_remote_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op, &m_out_bl);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void RefreshImagesRequest<I>::handle_mirror_image_list(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ std::map<std::string, std::string> ids;
+ if (r == 0) {
+ auto it = m_out_bl.cbegin();
+ r = librbd::cls_client::mirror_image_list_finish(&it, &ids);
+ }
+
+ if (r < 0 && r != -ENOENT) {
+ derr << "failed to list mirrored images: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ // store as global -> local image ids
+ for (auto &id : ids) {
+ m_image_ids->emplace(id.second, id.first);
+ }
+
+ if (ids.size() == MAX_RETURN) {
+ m_start_after = ids.rbegin()->first;
+ mirror_image_list();
+ return;
+ }
+
+ finish(0);
+}
+
+template <typename I>
+void RefreshImagesRequest<I>::finish(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ m_on_finish->complete(r);
+ delete this;
+}
+
+} // namespace pool_watcher
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::pool_watcher::RefreshImagesRequest<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/pool_watcher/RefreshImagesRequest.h b/src/tools/rbd_mirror/pool_watcher/RefreshImagesRequest.h
new file mode 100644
index 00000000..8bfeabe2
--- /dev/null
+++ b/src/tools/rbd_mirror/pool_watcher/RefreshImagesRequest.h
@@ -0,0 +1,73 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_POOL_WATCHER_REFRESH_IMAGES_REQUEST_H
+#define CEPH_RBD_MIRROR_POOL_WATCHER_REFRESH_IMAGES_REQUEST_H
+
+#include "include/buffer.h"
+#include "include/rados/librados.hpp"
+#include "tools/rbd_mirror/Types.h"
+#include <string>
+
+struct Context;
+
+namespace librbd { struct ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+namespace pool_watcher {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class RefreshImagesRequest {
+public:
+ static RefreshImagesRequest *create(librados::IoCtx &remote_io_ctx,
+ ImageIds *image_ids, Context *on_finish) {
+ return new RefreshImagesRequest(remote_io_ctx, image_ids, on_finish);
+ }
+
+ RefreshImagesRequest(librados::IoCtx &remote_io_ctx, ImageIds *image_ids,
+ Context *on_finish)
+ : m_remote_io_ctx(remote_io_ctx), m_image_ids(image_ids),
+ m_on_finish(on_finish) {
+ }
+
+ void send();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * | /-------------\
+ * | | |
+ * v v | (more images)
+ * MIRROR_IMAGE_LIST ---/
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+
+ librados::IoCtx &m_remote_io_ctx;
+ ImageIds *m_image_ids;
+ Context *m_on_finish;
+
+ bufferlist m_out_bl;
+ std::string m_start_after;
+
+ void mirror_image_list();
+ void handle_mirror_image_list(int r);
+
+ void finish(int r);
+
+};
+
+} // namespace pool_watcher
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::pool_watcher::RefreshImagesRequest<librbd::ImageCtx>;
+
+#endif // CEPH_RBD_MIRROR_POOL_WATCHER_REFRESH_IMAGES_REQUEST_H
diff --git a/src/tools/rbd_mirror/pool_watcher/Types.h b/src/tools/rbd_mirror/pool_watcher/Types.h
new file mode 100644
index 00000000..52dfc342
--- /dev/null
+++ b/src/tools/rbd_mirror/pool_watcher/Types.h
@@ -0,0 +1,27 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_POOL_WATCHER_TYPES_H
+#define CEPH_RBD_MIRROR_POOL_WATCHER_TYPES_H
+
+#include "tools/rbd_mirror/Types.h"
+#include <string>
+
+namespace rbd {
+namespace mirror {
+namespace pool_watcher {
+
+struct Listener {
+ virtual ~Listener() {
+ }
+
+ virtual void handle_update(const std::string &mirror_uuid,
+ ImageIds &&added_image_ids,
+ ImageIds &&removed_image_ids) = 0;
+};
+
+} // namespace pool_watcher
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_POOL_WATCHER_TYPES_H
diff --git a/src/tools/rbd_mirror/service_daemon/Types.cc b/src/tools/rbd_mirror/service_daemon/Types.cc
new file mode 100644
index 00000000..7dc6537c
--- /dev/null
+++ b/src/tools/rbd_mirror/service_daemon/Types.cc
@@ -0,0 +1,29 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd_mirror/service_daemon/Types.h"
+#include <iostream>
+
+namespace rbd {
+namespace mirror {
+namespace service_daemon {
+
+std::ostream& operator<<(std::ostream& os, const CalloutLevel& callout_level) {
+ switch (callout_level) {
+ case CALLOUT_LEVEL_INFO:
+ os << "info";
+ break;
+ case CALLOUT_LEVEL_WARNING:
+ os << "warning";
+ break;
+ case CALLOUT_LEVEL_ERROR:
+ os << "error";
+ break;
+ }
+ return os;
+}
+
+} // namespace service_daemon
+} // namespace mirror
+} // namespace rbd
+
diff --git a/src/tools/rbd_mirror/service_daemon/Types.h b/src/tools/rbd_mirror/service_daemon/Types.h
new file mode 100644
index 00000000..3aab7201
--- /dev/null
+++ b/src/tools/rbd_mirror/service_daemon/Types.h
@@ -0,0 +1,33 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_SERVICE_DAEMON_TYPES_H
+#define CEPH_RBD_MIRROR_SERVICE_DAEMON_TYPES_H
+
+#include "include/int_types.h"
+#include <iosfwd>
+#include <string>
+#include <boost/variant.hpp>
+
+namespace rbd {
+namespace mirror {
+namespace service_daemon {
+
+typedef uint64_t CalloutId;
+const uint64_t CALLOUT_ID_NONE {0};
+
+enum CalloutLevel {
+ CALLOUT_LEVEL_INFO,
+ CALLOUT_LEVEL_WARNING,
+ CALLOUT_LEVEL_ERROR
+};
+
+std::ostream& operator<<(std::ostream& os, const CalloutLevel& callout_level);
+
+typedef boost::variant<bool, uint64_t, std::string> AttributeValue;
+
+} // namespace service_daemon
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_SERVICE_DAEMON_TYPES_H