summaryrefslogtreecommitdiffstats
path: root/src/tools/rbd_mirror
diff options
context:
space:
mode:
Diffstat (limited to 'src/tools/rbd_mirror')
-rw-r--r--src/tools/rbd_mirror/BaseRequest.h33
-rw-r--r--src/tools/rbd_mirror/CMakeLists.txt90
-rw-r--r--src/tools/rbd_mirror/CancelableRequest.h44
-rw-r--r--src/tools/rbd_mirror/ClusterWatcher.cc251
-rw-r--r--src/tools/rbd_mirror/ClusterWatcher.h73
-rw-r--r--src/tools/rbd_mirror/ImageDeleter.cc548
-rw-r--r--src/tools/rbd_mirror/ImageDeleter.h189
-rw-r--r--src/tools/rbd_mirror/ImageMap.cc602
-rw-r--r--src/tools/rbd_mirror/ImageMap.h175
-rw-r--r--src/tools/rbd_mirror/ImageReplayer.cc1190
-rw-r--r--src/tools/rbd_mirror/ImageReplayer.h273
-rw-r--r--src/tools/rbd_mirror/ImageSync.cc469
-rw-r--r--src/tools/rbd_mirror/ImageSync.h151
-rw-r--r--src/tools/rbd_mirror/InstanceReplayer.cc543
-rw-r--r--src/tools/rbd_mirror/InstanceReplayer.h138
-rw-r--r--src/tools/rbd_mirror/InstanceWatcher.cc1290
-rw-r--r--src/tools/rbd_mirror/InstanceWatcher.h269
-rw-r--r--src/tools/rbd_mirror/Instances.cc356
-rw-r--r--src/tools/rbd_mirror/Instances.h168
-rw-r--r--src/tools/rbd_mirror/LeaderWatcher.cc1069
-rw-r--r--src/tools/rbd_mirror/LeaderWatcher.h313
-rw-r--r--src/tools/rbd_mirror/Mirror.cc748
-rw-r--r--src/tools/rbd_mirror/Mirror.h89
-rw-r--r--src/tools/rbd_mirror/MirrorStatusUpdater.cc397
-rw-r--r--src/tools/rbd_mirror/MirrorStatusUpdater.h119
-rw-r--r--src/tools/rbd_mirror/MirrorStatusWatcher.cc74
-rw-r--r--src/tools/rbd_mirror/MirrorStatusWatcher.h43
-rw-r--r--src/tools/rbd_mirror/NamespaceReplayer.cc862
-rw-r--r--src/tools/rbd_mirror/NamespaceReplayer.h308
-rw-r--r--src/tools/rbd_mirror/PoolMetaCache.cc83
-rw-r--r--src/tools/rbd_mirror/PoolMetaCache.h47
-rw-r--r--src/tools/rbd_mirror/PoolReplayer.cc1109
-rw-r--r--src/tools/rbd_mirror/PoolReplayer.h288
-rw-r--r--src/tools/rbd_mirror/PoolWatcher.cc473
-rw-r--r--src/tools/rbd_mirror/PoolWatcher.h161
-rw-r--r--src/tools/rbd_mirror/ProgressContext.h21
-rw-r--r--src/tools/rbd_mirror/RemotePoolPoller.cc267
-rw-r--r--src/tools/rbd_mirror/RemotePoolPoller.h133
-rw-r--r--src/tools/rbd_mirror/ServiceDaemon.cc327
-rw-r--r--src/tools/rbd_mirror/ServiceDaemon.h94
-rw-r--r--src/tools/rbd_mirror/Threads.cc38
-rw-r--r--src/tools/rbd_mirror/Threads.h45
-rw-r--r--src/tools/rbd_mirror/Throttler.cc240
-rw-r--r--src/tools/rbd_mirror/Throttler.h74
-rw-r--r--src/tools/rbd_mirror/Types.cc32
-rw-r--r--src/tools/rbd_mirror/Types.h166
-rw-r--r--src/tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.cc299
-rw-r--r--src/tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.h105
-rw-r--r--src/tools/rbd_mirror/image_deleter/TrashMoveRequest.cc419
-rw-r--r--src/tools/rbd_mirror/image_deleter/TrashMoveRequest.h142
-rw-r--r--src/tools/rbd_mirror/image_deleter/TrashRemoveRequest.cc265
-rw-r--r--src/tools/rbd_mirror/image_deleter/TrashRemoveRequest.h117
-rw-r--r--src/tools/rbd_mirror/image_deleter/TrashWatcher.cc384
-rw-r--r--src/tools/rbd_mirror/image_deleter/TrashWatcher.h139
-rw-r--r--src/tools/rbd_mirror/image_deleter/Types.h54
-rw-r--r--src/tools/rbd_mirror/image_map/LoadRequest.cc174
-rw-r--r--src/tools/rbd_mirror/image_map/LoadRequest.h77
-rw-r--r--src/tools/rbd_mirror/image_map/Policy.cc407
-rw-r--r--src/tools/rbd_mirror/image_map/Policy.h123
-rw-r--r--src/tools/rbd_mirror/image_map/SimplePolicy.cc89
-rw-r--r--src/tools/rbd_mirror/image_map/SimplePolicy.h39
-rw-r--r--src/tools/rbd_mirror/image_map/StateTransition.cc94
-rw-r--r--src/tools/rbd_mirror/image_map/StateTransition.h76
-rw-r--r--src/tools/rbd_mirror/image_map/Types.cc138
-rw-r--r--src/tools/rbd_mirror/image_map/Types.h130
-rw-r--r--src/tools/rbd_mirror/image_map/UpdateRequest.cc100
-rw-r--r--src/tools/rbd_mirror/image_map/UpdateRequest.h65
-rw-r--r--src/tools/rbd_mirror/image_replayer/BootstrapRequest.cc485
-rw-r--r--src/tools/rbd_mirror/image_replayer/BootstrapRequest.h181
-rw-r--r--src/tools/rbd_mirror/image_replayer/CloseImageRequest.cc62
-rw-r--r--src/tools/rbd_mirror/image_replayer/CloseImageRequest.h56
-rw-r--r--src/tools/rbd_mirror/image_replayer/CreateImageRequest.cc451
-rw-r--r--src/tools/rbd_mirror/image_replayer/CreateImageRequest.h144
-rw-r--r--src/tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.cc85
-rw-r--r--src/tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.h75
-rw-r--r--src/tools/rbd_mirror/image_replayer/OpenImageRequest.cc79
-rw-r--r--src/tools/rbd_mirror/image_replayer/OpenImageRequest.h71
-rw-r--r--src/tools/rbd_mirror/image_replayer/OpenLocalImageRequest.cc292
-rw-r--r--src/tools/rbd_mirror/image_replayer/OpenLocalImageRequest.h97
-rw-r--r--src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.cc197
-rw-r--r--src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.h115
-rw-r--r--src/tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.cc283
-rw-r--r--src/tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.h153
-rw-r--r--src/tools/rbd_mirror/image_replayer/Replayer.h39
-rw-r--r--src/tools/rbd_mirror/image_replayer/ReplayerListener.h21
-rw-r--r--src/tools/rbd_mirror/image_replayer/StateBuilder.cc138
-rw-r--r--src/tools/rbd_mirror/image_replayer/StateBuilder.h114
-rw-r--r--src/tools/rbd_mirror/image_replayer/TimeRollingMean.cc34
-rw-r--r--src/tools/rbd_mirror/image_replayer/TimeRollingMean.h40
-rw-r--r--src/tools/rbd_mirror/image_replayer/Types.h21
-rw-r--r--src/tools/rbd_mirror/image_replayer/Utils.cc61
-rw-r--r--src/tools/rbd_mirror/image_replayer/Utils.h29
-rw-r--r--src/tools/rbd_mirror/image_replayer/journal/CreateLocalImageRequest.cc162
-rw-r--r--src/tools/rbd_mirror/image_replayer/journal/CreateLocalImageRequest.h116
-rw-r--r--src/tools/rbd_mirror/image_replayer/journal/EventPreprocessor.cc206
-rw-r--r--src/tools/rbd_mirror/image_replayer/journal/EventPreprocessor.h127
-rw-r--r--src/tools/rbd_mirror/image_replayer/journal/PrepareReplayRequest.cc316
-rw-r--r--src/tools/rbd_mirror/image_replayer/journal/PrepareReplayRequest.h115
-rw-r--r--src/tools/rbd_mirror/image_replayer/journal/ReplayStatusFormatter.cc284
-rw-r--r--src/tools/rbd_mirror/image_replayer/journal/ReplayStatusFormatter.h70
-rw-r--r--src/tools/rbd_mirror/image_replayer/journal/Replayer.cc1303
-rw-r--r--src/tools/rbd_mirror/image_replayer/journal/Replayer.h323
-rw-r--r--src/tools/rbd_mirror/image_replayer/journal/StateBuilder.cc149
-rw-r--r--src/tools/rbd_mirror/image_replayer/journal/StateBuilder.h94
-rw-r--r--src/tools/rbd_mirror/image_replayer/journal/SyncPointHandler.cc109
-rw-r--r--src/tools/rbd_mirror/image_replayer/journal/SyncPointHandler.h55
-rw-r--r--src/tools/rbd_mirror/image_replayer/snapshot/ApplyImageStateRequest.cc658
-rw-r--r--src/tools/rbd_mirror/image_replayer/snapshot/ApplyImageStateRequest.h155
-rw-r--r--src/tools/rbd_mirror/image_replayer/snapshot/CreateLocalImageRequest.cc204
-rw-r--r--src/tools/rbd_mirror/image_replayer/snapshot/CreateLocalImageRequest.h121
-rw-r--r--src/tools/rbd_mirror/image_replayer/snapshot/PrepareReplayRequest.cc70
-rw-r--r--src/tools/rbd_mirror/image_replayer/snapshot/PrepareReplayRequest.h92
-rw-r--r--src/tools/rbd_mirror/image_replayer/snapshot/Replayer.cc1586
-rw-r--r--src/tools/rbd_mirror/image_replayer/snapshot/Replayer.h346
-rw-r--r--src/tools/rbd_mirror/image_replayer/snapshot/StateBuilder.cc120
-rw-r--r--src/tools/rbd_mirror/image_replayer/snapshot/StateBuilder.h93
-rw-r--r--src/tools/rbd_mirror/image_replayer/snapshot/Utils.cc65
-rw-r--r--src/tools/rbd_mirror/image_replayer/snapshot/Utils.h30
-rw-r--r--src/tools/rbd_mirror/image_sync/SyncPointCreateRequest.cc172
-rw-r--r--src/tools/rbd_mirror/image_sync/SyncPointCreateRequest.h93
-rw-r--r--src/tools/rbd_mirror/image_sync/SyncPointPruneRequest.cc213
-rw-r--r--src/tools/rbd_mirror/image_sync/SyncPointPruneRequest.h91
-rw-r--r--src/tools/rbd_mirror/image_sync/Types.h74
-rw-r--r--src/tools/rbd_mirror/image_sync/Utils.cc24
-rw-r--r--src/tools/rbd_mirror/image_sync/Utils.h16
-rw-r--r--src/tools/rbd_mirror/instance_watcher/Types.cc245
-rw-r--r--src/tools/rbd_mirror/instance_watcher/Types.h197
-rw-r--r--src/tools/rbd_mirror/instances/Types.h28
-rw-r--r--src/tools/rbd_mirror/leader_watcher/Types.cc161
-rw-r--r--src/tools/rbd_mirror/leader_watcher/Types.h117
-rw-r--r--src/tools/rbd_mirror/main.cc123
-rw-r--r--src/tools/rbd_mirror/pool_watcher/RefreshImagesRequest.cc89
-rw-r--r--src/tools/rbd_mirror/pool_watcher/RefreshImagesRequest.h73
-rw-r--r--src/tools/rbd_mirror/pool_watcher/Types.h27
-rw-r--r--src/tools/rbd_mirror/service_daemon/Types.cc29
-rw-r--r--src/tools/rbd_mirror/service_daemon/Types.h33
136 files changed, 30005 insertions, 0 deletions
diff --git a/src/tools/rbd_mirror/BaseRequest.h b/src/tools/rbd_mirror/BaseRequest.h
new file mode 100644
index 000000000..0da98651d
--- /dev/null
+++ b/src/tools/rbd_mirror/BaseRequest.h
@@ -0,0 +1,33 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_BASE_REQUEST_H
+#define CEPH_RBD_MIRROR_BASE_REQUEST_H
+
+#include "include/Context.h"
+
+namespace rbd {
+namespace mirror {
+
+class BaseRequest {
+public:
+ BaseRequest(Context *on_finish) : m_on_finish(on_finish) {
+ }
+ virtual ~BaseRequest() {}
+
+ virtual void send() = 0;
+
+protected:
+ virtual void finish(int r) {
+ m_on_finish->complete(r);
+ delete this;
+ }
+
+private:
+ Context *m_on_finish;
+};
+
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_BASE_REQUEST_H
diff --git a/src/tools/rbd_mirror/CMakeLists.txt b/src/tools/rbd_mirror/CMakeLists.txt
new file mode 100644
index 000000000..f260d9786
--- /dev/null
+++ b/src/tools/rbd_mirror/CMakeLists.txt
@@ -0,0 +1,90 @@
+add_library(rbd_mirror_types STATIC
+ image_map/Types.cc
+ instance_watcher/Types.cc
+ leader_watcher/Types.cc)
+
+set(rbd_mirror_internal
+ ClusterWatcher.cc
+ ImageDeleter.cc
+ ImageMap.cc
+ ImageReplayer.cc
+ ImageSync.cc
+ InstanceReplayer.cc
+ InstanceWatcher.cc
+ Instances.cc
+ LeaderWatcher.cc
+ Mirror.cc
+ MirrorStatusUpdater.cc
+ MirrorStatusWatcher.cc
+ NamespaceReplayer.cc
+ PoolMetaCache.cc
+ PoolReplayer.cc
+ PoolWatcher.cc
+ RemotePoolPoller.cc
+ ServiceDaemon.cc
+ Threads.cc
+ Throttler.cc
+ Types.cc
+ image_deleter/SnapshotPurgeRequest.cc
+ image_deleter/TrashMoveRequest.cc
+ image_deleter/TrashRemoveRequest.cc
+ image_deleter/TrashWatcher.cc
+ image_map/LoadRequest.cc
+ image_map/Policy.cc
+ image_map/SimplePolicy.cc
+ image_map/StateTransition.cc
+ image_map/UpdateRequest.cc
+ image_replayer/BootstrapRequest.cc
+ image_replayer/CloseImageRequest.cc
+ image_replayer/CreateImageRequest.cc
+ image_replayer/GetMirrorImageIdRequest.cc
+ image_replayer/OpenImageRequest.cc
+ image_replayer/OpenLocalImageRequest.cc
+ image_replayer/PrepareLocalImageRequest.cc
+ image_replayer/PrepareRemoteImageRequest.cc
+ image_replayer/StateBuilder.cc
+ image_replayer/TimeRollingMean.cc
+ image_replayer/Utils.cc
+ image_replayer/journal/CreateLocalImageRequest.cc
+ image_replayer/journal/EventPreprocessor.cc
+ image_replayer/journal/PrepareReplayRequest.cc
+ image_replayer/journal/Replayer.cc
+ image_replayer/journal/ReplayStatusFormatter.cc
+ image_replayer/journal/StateBuilder.cc
+ image_replayer/journal/SyncPointHandler.cc
+ image_replayer/snapshot/ApplyImageStateRequest.cc
+ image_replayer/snapshot/CreateLocalImageRequest.cc
+ image_replayer/snapshot/PrepareReplayRequest.cc
+ image_replayer/snapshot/Replayer.cc
+ image_replayer/snapshot/StateBuilder.cc
+ image_replayer/snapshot/Utils.cc
+ image_sync/SyncPointCreateRequest.cc
+ image_sync/SyncPointPruneRequest.cc
+ image_sync/Utils.cc
+ pool_watcher/RefreshImagesRequest.cc
+ service_daemon/Types.cc)
+
+add_library(rbd_mirror_internal STATIC
+ ${rbd_mirror_internal}
+ $<TARGET_OBJECTS:common_prioritycache_obj>)
+
+add_executable(rbd-mirror
+ main.cc)
+target_link_libraries(rbd-mirror
+ rbd_mirror_internal
+ rbd_mirror_types
+ rbd_api
+ rbd_internal
+ rbd_types
+ journal
+ libneorados
+ librados
+ osdc
+ cls_rbd_client
+ cls_lock_client
+ cls_journal_client
+ global
+ heap_profiler
+ ${ALLOC_LIBS}
+ OpenSSL::SSL)
+install(TARGETS rbd-mirror DESTINATION bin)
diff --git a/src/tools/rbd_mirror/CancelableRequest.h b/src/tools/rbd_mirror/CancelableRequest.h
new file mode 100644
index 000000000..26e8dcb5b
--- /dev/null
+++ b/src/tools/rbd_mirror/CancelableRequest.h
@@ -0,0 +1,44 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_CANCELABLE_REQUEST_H
+#define CEPH_RBD_MIRROR_CANCELABLE_REQUEST_H
+
+#include "common/RefCountedObj.h"
+#include "include/Context.h"
+
+namespace rbd {
+namespace mirror {
+
+class CancelableRequest : public RefCountedObject {
+public:
+ CancelableRequest(const std::string& name, CephContext *cct,
+ Context *on_finish)
+ : RefCountedObject(cct), m_name(name), m_cct(cct),
+ m_on_finish(on_finish) {
+ }
+
+ virtual void send() = 0;
+ virtual void cancel() {}
+
+protected:
+ virtual void finish(int r) {
+ if (m_cct) {
+ lsubdout(m_cct, rbd_mirror, 20) << m_name << "::finish: r=" << r << dendl;
+ }
+ if (m_on_finish) {
+ m_on_finish->complete(r);
+ }
+ put();
+ }
+
+private:
+ const std::string m_name;
+ CephContext *m_cct;
+ Context *m_on_finish;
+};
+
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_CANCELABLE_REQUEST_H
diff --git a/src/tools/rbd_mirror/ClusterWatcher.cc b/src/tools/rbd_mirror/ClusterWatcher.cc
new file mode 100644
index 000000000..2ae1306be
--- /dev/null
+++ b/src/tools/rbd_mirror/ClusterWatcher.cc
@@ -0,0 +1,251 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "ClusterWatcher.h"
+#include "include/stringify.h"
+#include "common/ceph_json.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "librbd/internal.h"
+#include "librbd/api/Mirror.h"
+#include "tools/rbd_mirror/ServiceDaemon.h"
+#include "json_spirit/json_spirit.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::ClusterWatcher:" << this << " " \
+ << __func__ << ": "
+
+using std::list;
+using std::map;
+using std::set;
+using std::string;
+using std::unique_ptr;
+using std::vector;
+
+using librados::Rados;
+using librados::IoCtx;
+
+namespace rbd {
+namespace mirror {
+
+ClusterWatcher::ClusterWatcher(RadosRef cluster, ceph::mutex &lock,
+ ServiceDaemon<librbd::ImageCtx>* service_daemon)
+ : m_cluster(cluster), m_lock(lock), m_service_daemon(service_daemon)
+{
+}
+
+const ClusterWatcher::PoolPeers& ClusterWatcher::get_pool_peers() const
+{
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+ return m_pool_peers;
+}
+
+std::string ClusterWatcher::get_site_name() const {
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+ return m_site_name;
+}
+
+void ClusterWatcher::refresh_pools()
+{
+ dout(20) << "enter" << dendl;
+
+ PoolPeers pool_peers;
+ read_pool_peers(&pool_peers);
+
+ std::string site_name;
+ int r = read_site_name(&site_name);
+
+ std::lock_guard l{m_lock};
+ m_pool_peers = pool_peers;
+
+ if (r >= 0) {
+ m_site_name = site_name;
+ }
+
+ // TODO: perhaps use a workqueue instead, once we get notifications
+ // about config changes for existing pools
+}
+
+void ClusterWatcher::read_pool_peers(PoolPeers *pool_peers)
+{
+ int r = m_cluster->wait_for_latest_osdmap();
+ if (r < 0) {
+ derr << "error waiting for OSD map: " << cpp_strerror(r) << dendl;
+ return;
+ }
+
+ list<pair<int64_t, string> > pools;
+ r = m_cluster->pool_list2(pools);
+ if (r < 0) {
+ derr << "error listing pools: " << cpp_strerror(r) << dendl;
+ return;
+ }
+
+ std::set<int64_t> service_pool_ids;
+ for (auto& kv : pools) {
+ int64_t pool_id = kv.first;
+ auto& pool_name = kv.second;
+ int64_t base_tier;
+ r = m_cluster->pool_get_base_tier(pool_id, &base_tier);
+ if (r == -ENOENT) {
+ dout(10) << "pool " << pool_name << " no longer exists" << dendl;
+ continue;
+ } else if (r < 0) {
+ derr << "Error retrieving base tier for pool " << pool_name << dendl;
+ continue;
+ }
+ if (pool_id != base_tier) {
+ // pool is a cache; skip it
+ continue;
+ }
+
+ IoCtx ioctx;
+ r = m_cluster->ioctx_create2(pool_id, ioctx);
+ if (r == -ENOENT) {
+ dout(10) << "pool " << pool_id << " no longer exists" << dendl;
+ continue;
+ } else if (r < 0) {
+ derr << "Error accessing pool " << pool_name << cpp_strerror(r) << dendl;
+ continue;
+ }
+
+ cls::rbd::MirrorMode mirror_mode_internal;
+ r = librbd::cls_client::mirror_mode_get(&ioctx, &mirror_mode_internal);
+ if (r == 0 && mirror_mode_internal == cls::rbd::MIRROR_MODE_DISABLED) {
+ dout(10) << "mirroring is disabled for pool " << pool_name << dendl;
+ continue;
+ }
+
+ service_pool_ids.insert(pool_id);
+ if (m_service_pools.find(pool_id) == m_service_pools.end()) {
+ m_service_pools[pool_id] = {};
+ m_service_daemon->add_pool(pool_id, pool_name);
+ }
+
+ if (r == -EPERM) {
+ dout(10) << "access denied querying pool " << pool_name << dendl;
+ m_service_pools[pool_id] = m_service_daemon->add_or_update_callout(
+ pool_id, m_service_pools[pool_id],
+ service_daemon::CALLOUT_LEVEL_WARNING, "access denied");
+ continue;
+ } else if (r < 0) {
+ derr << "could not tell whether mirroring was enabled for " << pool_name
+ << " : " << cpp_strerror(r) << dendl;
+ m_service_pools[pool_id] = m_service_daemon->add_or_update_callout(
+ pool_id, m_service_pools[pool_id],
+ service_daemon::CALLOUT_LEVEL_WARNING, "mirroring mode query failed");
+ continue;
+ }
+
+ vector<librbd::mirror_peer_site_t> configs;
+ r = librbd::api::Mirror<>::peer_site_list(ioctx, &configs);
+ if (r < 0) {
+ derr << "error reading mirroring config for pool " << pool_name
+ << cpp_strerror(r) << dendl;
+ m_service_pools[pool_id] = m_service_daemon->add_or_update_callout(
+ pool_id, m_service_pools[pool_id],
+ service_daemon::CALLOUT_LEVEL_ERROR, "mirroring peer list failed");
+ continue;
+ }
+
+ std::vector<PeerSpec> peers;
+ peers.reserve(configs.size());
+ for (auto& peer : configs) {
+ if (peer.direction != RBD_MIRROR_PEER_DIRECTION_TX) {
+ peers.push_back(peer);
+ }
+ }
+
+ for (auto& peer : peers) {
+ r = resolve_peer_site_config_keys(pool_id, pool_name, &peer);
+ if (r < 0) {
+ break;
+ }
+ }
+
+ if (m_service_pools[pool_id] != service_daemon::CALLOUT_ID_NONE) {
+ m_service_daemon->remove_callout(pool_id, m_service_pools[pool_id]);
+ m_service_pools[pool_id] = service_daemon::CALLOUT_ID_NONE;
+ }
+
+ pool_peers->emplace(pool_id, Peers{peers.begin(), peers.end()});
+ }
+
+ for (auto it = m_service_pools.begin(); it != m_service_pools.end(); ) {
+ auto current_it(it++);
+ if (service_pool_ids.find(current_it->first) == service_pool_ids.end()) {
+ m_service_daemon->remove_pool(current_it->first);
+ m_service_pools.erase(current_it->first);
+ }
+ }
+}
+
+int ClusterWatcher::read_site_name(std::string* site_name) {
+ dout(10) << dendl;
+
+ librbd::RBD rbd;
+ return rbd.mirror_site_name_get(*m_cluster, site_name);
+}
+
+int ClusterWatcher::resolve_peer_site_config_keys(int64_t pool_id,
+ const std::string& pool_name,
+ PeerSpec* peer) {
+ dout(10) << "retrieving config-key: pool_id=" << pool_id << ", "
+ << "pool_name=" << pool_name << ", "
+ << "peer_uuid=" << peer->uuid << dendl;
+
+ std::string cmd =
+ "{"
+ "\"prefix\": \"config-key get\", "
+ "\"key\": \"" RBD_MIRROR_PEER_CONFIG_KEY_PREFIX + stringify(pool_id) +
+ "/" + peer->uuid + "\""
+ "}";
+
+ bufferlist in_bl;
+ bufferlist out_bl;
+ int r = m_cluster->mon_command(cmd, in_bl, &out_bl, nullptr);
+ if (r == -ENOENT || out_bl.length() == 0) {
+ return 0;
+ } else if (r < 0) {
+ derr << "error reading mirroring peer config for pool " << pool_name << ": "
+ << cpp_strerror(r) << dendl;
+ m_service_pools[pool_id] = m_service_daemon->add_or_update_callout(
+ pool_id, m_service_pools[pool_id],
+ service_daemon::CALLOUT_LEVEL_WARNING,
+ "mirroring peer config-key query failed");
+ return r;
+ }
+
+ bool json_valid = false;
+ json_spirit::mValue json_root;
+ if(json_spirit::read(out_bl.to_str(), json_root)) {
+ try {
+ auto& json_obj = json_root.get_obj();
+ if (json_obj.count("mon_host")) {
+ peer->mon_host = json_obj["mon_host"].get_str();
+ }
+ if (json_obj.count("key")) {
+ peer->key = json_obj["key"].get_str();
+ }
+ json_valid = true;
+ } catch (std::runtime_error&) {
+ }
+ }
+
+ if (!json_valid) {
+ derr << "error parsing mirroring peer config for pool " << pool_name << ", "
+ << "peer " << peer->uuid << dendl;
+ m_service_pools[pool_id] = m_service_daemon->add_or_update_callout(
+ pool_id, m_service_pools[pool_id],
+ service_daemon::CALLOUT_LEVEL_WARNING,
+ "mirroring peer config-key decode failed");
+ }
+
+ return 0;
+}
+
+} // namespace mirror
+} // namespace rbd
diff --git a/src/tools/rbd_mirror/ClusterWatcher.h b/src/tools/rbd_mirror/ClusterWatcher.h
new file mode 100644
index 000000000..93356fec6
--- /dev/null
+++ b/src/tools/rbd_mirror/ClusterWatcher.h
@@ -0,0 +1,73 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_CLUSTER_WATCHER_H
+#define CEPH_RBD_MIRROR_CLUSTER_WATCHER_H
+
+#include <map>
+#include <memory>
+#include <set>
+
+#include "common/ceph_context.h"
+#include "common/ceph_mutex.h"
+#include "common/Timer.h"
+#include "include/rados/librados.hpp"
+#include "tools/rbd_mirror/Types.h"
+#include "tools/rbd_mirror/service_daemon/Types.h"
+#include <unordered_map>
+
+namespace librbd { struct ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+
+template <typename> class ServiceDaemon;
+
+/**
+ * Tracks mirroring configuration for pools in a single
+ * cluster.
+ */
+class ClusterWatcher {
+public:
+ struct PeerSpecCompare {
+ bool operator()(const PeerSpec& lhs, const PeerSpec& rhs) const {
+ return (lhs.uuid < rhs.uuid);
+ }
+ };
+ typedef std::set<PeerSpec, PeerSpecCompare> Peers;
+ typedef std::map<int64_t, Peers> PoolPeers;
+
+ ClusterWatcher(RadosRef cluster, ceph::mutex &lock,
+ ServiceDaemon<librbd::ImageCtx>* service_daemon);
+ ~ClusterWatcher() = default;
+ ClusterWatcher(const ClusterWatcher&) = delete;
+ ClusterWatcher& operator=(const ClusterWatcher&) = delete;
+
+ // Caller controls frequency of calls
+ void refresh_pools();
+ const PoolPeers& get_pool_peers() const;
+ std::string get_site_name() const;
+
+private:
+ typedef std::unordered_map<int64_t, service_daemon::CalloutId> ServicePools;
+
+ RadosRef m_cluster;
+ ceph::mutex &m_lock;
+ ServiceDaemon<librbd::ImageCtx>* m_service_daemon;
+
+ ServicePools m_service_pools;
+ PoolPeers m_pool_peers;
+ std::string m_site_name;
+
+ void read_pool_peers(PoolPeers *pool_peers);
+
+ int read_site_name(std::string* site_name);
+
+ int resolve_peer_site_config_keys(
+ int64_t pool_id, const std::string& pool_name, PeerSpec* peer);
+};
+
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_CLUSTER_WATCHER_H
diff --git a/src/tools/rbd_mirror/ImageDeleter.cc b/src/tools/rbd_mirror/ImageDeleter.cc
new file mode 100644
index 000000000..fcdd1baad
--- /dev/null
+++ b/src/tools/rbd_mirror/ImageDeleter.cc
@@ -0,0 +1,548 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2016 SUSE LINUX GmbH
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include "include/rados/librados.hpp"
+#include "common/Formatter.h"
+#include "common/admin_socket.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "common/Timer.h"
+#include "global/global_context.h"
+#include "librbd/internal.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/Operations.h"
+#include "librbd/asio/ContextWQ.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "cls/rbd/cls_rbd_types.h"
+#include "librbd/Utils.h"
+#include "ImageDeleter.h"
+#include "tools/rbd_mirror/Threads.h"
+#include "tools/rbd_mirror/Throttler.h"
+#include "tools/rbd_mirror/image_deleter/TrashMoveRequest.h"
+#include "tools/rbd_mirror/image_deleter/TrashRemoveRequest.h"
+#include "tools/rbd_mirror/image_deleter/TrashWatcher.h"
+#include <map>
+#include <sstream>
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+
+using std::string;
+using std::stringstream;
+using std::vector;
+using std::pair;
+using std::make_pair;
+
+using librados::IoCtx;
+using namespace librbd;
+
+namespace rbd {
+namespace mirror {
+
+using librbd::util::create_async_context_callback;
+
+namespace {
+
+class ImageDeleterAdminSocketCommand {
+public:
+ virtual ~ImageDeleterAdminSocketCommand() {}
+ virtual int call(Formatter *f) = 0;
+};
+
+template <typename I>
+class StatusCommand : public ImageDeleterAdminSocketCommand {
+public:
+ explicit StatusCommand(ImageDeleter<I> *image_del) : image_del(image_del) {}
+
+ int call(Formatter *f) override {
+ image_del->print_status(f);
+ return 0;
+ }
+
+private:
+ ImageDeleter<I> *image_del;
+};
+
+} // anonymous namespace
+
+template <typename I>
+class ImageDeleterAdminSocketHook : public AdminSocketHook {
+public:
+ ImageDeleterAdminSocketHook(CephContext *cct, const std::string& pool_name,
+ ImageDeleter<I> *image_del) :
+ admin_socket(cct->get_admin_socket()) {
+
+ std::string command;
+ int r;
+
+ command = "rbd mirror deletion status " + pool_name;
+ r = admin_socket->register_command(command, this,
+ "get status for image deleter");
+ if (r == 0) {
+ commands[command] = new StatusCommand<I>(image_del);
+ }
+
+ }
+
+ ~ImageDeleterAdminSocketHook() override {
+ (void)admin_socket->unregister_commands(this);
+ for (Commands::const_iterator i = commands.begin(); i != commands.end();
+ ++i) {
+ delete i->second;
+ }
+ }
+
+ int call(std::string_view command, const cmdmap_t& cmdmap,
+ Formatter *f,
+ std::ostream& errss,
+ bufferlist& out) override {
+ Commands::const_iterator i = commands.find(command);
+ ceph_assert(i != commands.end());
+ return i->second->call(f);
+ }
+
+private:
+ typedef std::map<std::string, ImageDeleterAdminSocketCommand*,
+ std::less<>> Commands;
+ AdminSocket *admin_socket;
+ Commands commands;
+};
+
+template <typename I>
+ImageDeleter<I>::ImageDeleter(
+ librados::IoCtx& local_io_ctx, Threads<librbd::ImageCtx>* threads,
+ Throttler<librbd::ImageCtx>* image_deletion_throttler,
+ ServiceDaemon<librbd::ImageCtx>* service_daemon)
+ : m_local_io_ctx(local_io_ctx), m_threads(threads),
+ m_image_deletion_throttler(image_deletion_throttler),
+ m_service_daemon(service_daemon), m_trash_listener(this),
+ m_lock(ceph::make_mutex(
+ librbd::util::unique_lock_name("rbd::mirror::ImageDeleter::m_lock",
+ this))) {
+}
+
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::ImageDeleter: " << " " \
+ << __func__ << ": "
+
+template <typename I>
+void ImageDeleter<I>::trash_move(librados::IoCtx& local_io_ctx,
+ const std::string& global_image_id,
+ bool resync,
+ librbd::asio::ContextWQ* work_queue,
+ Context* on_finish) {
+ dout(10) << "global_image_id=" << global_image_id << ", "
+ << "resync=" << resync << dendl;
+
+ auto req = rbd::mirror::image_deleter::TrashMoveRequest<>::create(
+ local_io_ctx, global_image_id, resync, work_queue, on_finish);
+ req->send();
+}
+
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::ImageDeleter: " << this << " " \
+ << __func__ << ": "
+
+template <typename I>
+void ImageDeleter<I>::init(Context* on_finish) {
+ dout(10) << dendl;
+
+ m_asok_hook = new ImageDeleterAdminSocketHook<I>(
+ g_ceph_context, m_local_io_ctx.get_pool_name(), this);
+
+ m_trash_watcher = image_deleter::TrashWatcher<I>::create(m_local_io_ctx,
+ m_threads,
+ m_trash_listener);
+ m_trash_watcher->init(on_finish);
+}
+
+template <typename I>
+void ImageDeleter<I>::shut_down(Context* on_finish) {
+ dout(10) << dendl;
+
+ delete m_asok_hook;
+ m_asok_hook = nullptr;
+
+ m_image_deletion_throttler->drain(m_local_io_ctx.get_namespace(),
+ -ESTALE);
+
+ shut_down_trash_watcher(on_finish);
+}
+
+template <typename I>
+void ImageDeleter<I>::shut_down_trash_watcher(Context* on_finish) {
+ dout(10) << dendl;
+ ceph_assert(m_trash_watcher);
+ auto ctx = new LambdaContext([this, on_finish](int r) {
+ delete m_trash_watcher;
+ m_trash_watcher = nullptr;
+
+ wait_for_ops(on_finish);
+ });
+ m_trash_watcher->shut_down(ctx);
+}
+
+template <typename I>
+void ImageDeleter<I>::wait_for_ops(Context* on_finish) {
+ {
+ std::scoped_lock locker{m_threads->timer_lock, m_lock};
+ m_running = false;
+ cancel_retry_timer();
+ }
+
+ auto ctx = new LambdaContext([this, on_finish](int) {
+ cancel_all_deletions(on_finish);
+ });
+ m_async_op_tracker.wait_for_ops(ctx);
+}
+
+template <typename I>
+void ImageDeleter<I>::cancel_all_deletions(Context* on_finish) {
+ m_image_deletion_throttler->drain(m_local_io_ctx.get_namespace(),
+ -ECANCELED);
+ {
+ std::lock_guard locker{m_lock};
+ // wake up any external state machines waiting on deletions
+ ceph_assert(m_in_flight_delete_queue.empty());
+ for (auto& queue : {&m_delete_queue, &m_retry_delete_queue}) {
+ for (auto& info : *queue) {
+ notify_on_delete(info->image_id, -ECANCELED);
+ }
+ queue->clear();
+ }
+ }
+ on_finish->complete(0);
+}
+
+template <typename I>
+void ImageDeleter<I>::wait_for_deletion(const std::string& image_id,
+ bool scheduled_only,
+ Context* on_finish) {
+ dout(5) << "image_id=" << image_id << dendl;
+
+ on_finish = new LambdaContext([this, on_finish](int r) {
+ m_threads->work_queue->queue(on_finish, r);
+ });
+
+ std::lock_guard locker{m_lock};
+ auto del_info = find_delete_info(image_id);
+ if (!del_info && scheduled_only) {
+ // image not scheduled for deletion
+ on_finish->complete(0);
+ return;
+ }
+
+ notify_on_delete(image_id, -ESTALE);
+ m_on_delete_contexts[image_id] = on_finish;
+}
+
+template <typename I>
+void ImageDeleter<I>::complete_active_delete(DeleteInfoRef* delete_info,
+ int r) {
+ dout(20) << "info=" << *delete_info << ", r=" << r << dendl;
+ std::lock_guard locker{m_lock};
+ notify_on_delete((*delete_info)->image_id, r);
+ delete_info->reset();
+}
+
+template <typename I>
+void ImageDeleter<I>::enqueue_failed_delete(DeleteInfoRef* delete_info,
+ int error_code,
+ double retry_delay) {
+ dout(20) << "info=" << *delete_info << ", r=" << error_code << dendl;
+ if (error_code == -EBLOCKLISTED) {
+ std::lock_guard locker{m_lock};
+ derr << "blocklisted while deleting local image" << dendl;
+ complete_active_delete(delete_info, error_code);
+ return;
+ }
+
+ std::scoped_lock locker{m_threads->timer_lock, m_lock};
+ auto& delete_info_ref = *delete_info;
+ notify_on_delete(delete_info_ref->image_id, error_code);
+ delete_info_ref->error_code = error_code;
+ ++delete_info_ref->retries;
+ delete_info_ref->retry_time = (clock_t::now() +
+ ceph::make_timespan(retry_delay));
+ m_retry_delete_queue.push_back(delete_info_ref);
+
+ schedule_retry_timer();
+}
+
+template <typename I>
+typename ImageDeleter<I>::DeleteInfoRef
+ImageDeleter<I>::find_delete_info(const std::string &image_id) {
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+ DeleteQueue delete_queues[] = {m_in_flight_delete_queue,
+ m_retry_delete_queue,
+ m_delete_queue};
+
+ DeleteInfo delete_info{image_id};
+ for (auto& queue : delete_queues) {
+ auto it = std::find_if(queue.begin(), queue.end(),
+ [&delete_info](const DeleteInfoRef& ref) {
+ return delete_info == *ref;
+ });
+ if (it != queue.end()) {
+ return *it;
+ }
+ }
+ return {};
+}
+
+template <typename I>
+void ImageDeleter<I>::print_status(Formatter *f) {
+ dout(20) << dendl;
+
+ f->open_object_section("image_deleter_status");
+ f->open_array_section("delete_images_queue");
+
+ std::lock_guard l{m_lock};
+ for (const auto& image : m_delete_queue) {
+ image->print_status(f);
+ }
+
+ f->close_section();
+ f->open_array_section("failed_deletes_queue");
+ for (const auto& image : m_retry_delete_queue) {
+ image->print_status(f, true);
+ }
+
+ f->close_section();
+ f->close_section();
+}
+
+template <typename I>
+vector<string> ImageDeleter<I>::get_delete_queue_items() {
+ vector<string> items;
+
+ std::lock_guard l{m_lock};
+ for (const auto& del_info : m_delete_queue) {
+ items.push_back(del_info->image_id);
+ }
+
+ return items;
+}
+
+template <typename I>
+vector<pair<string, int> > ImageDeleter<I>::get_failed_queue_items() {
+ vector<pair<string, int> > items;
+
+ std::lock_guard l{m_lock};
+ for (const auto& del_info : m_retry_delete_queue) {
+ items.push_back(make_pair(del_info->image_id,
+ del_info->error_code));
+ }
+
+ return items;
+}
+
+template <typename I>
+void ImageDeleter<I>::remove_images() {
+ dout(10) << dendl;
+
+ std::lock_guard locker{m_lock};
+ while (m_running && !m_delete_queue.empty()) {
+
+ DeleteInfoRef delete_info = m_delete_queue.front();
+ m_delete_queue.pop_front();
+
+ ceph_assert(delete_info);
+
+ auto on_start = create_async_context_callback(
+ m_threads->work_queue, new LambdaContext(
+ [this, delete_info](int r) {
+ if (r < 0) {
+ notify_on_delete(delete_info->image_id, r);
+ return;
+ }
+ remove_image(delete_info);
+ }));
+
+ m_image_deletion_throttler->start_op(m_local_io_ctx.get_namespace(),
+ delete_info->image_id, on_start);
+ }
+}
+
+template <typename I>
+void ImageDeleter<I>::remove_image(DeleteInfoRef delete_info) {
+ dout(10) << "info=" << *delete_info << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ m_in_flight_delete_queue.push_back(delete_info);
+ m_async_op_tracker.start_op();
+
+ auto ctx = new LambdaContext([this, delete_info](int r) {
+ handle_remove_image(delete_info, r);
+ m_async_op_tracker.finish_op();
+ });
+
+ auto req = image_deleter::TrashRemoveRequest<I>::create(
+ m_local_io_ctx, delete_info->image_id, &delete_info->error_result,
+ m_threads->work_queue, ctx);
+ req->send();
+}
+
+template <typename I>
+void ImageDeleter<I>::handle_remove_image(DeleteInfoRef delete_info,
+ int r) {
+ dout(10) << "info=" << *delete_info << ", r=" << r << dendl;
+
+ m_image_deletion_throttler->finish_op(m_local_io_ctx.get_namespace(),
+ delete_info->image_id);
+ {
+ std::lock_guard locker{m_lock};
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+ auto it = std::find(m_in_flight_delete_queue.begin(),
+ m_in_flight_delete_queue.end(), delete_info);
+ ceph_assert(it != m_in_flight_delete_queue.end());
+ m_in_flight_delete_queue.erase(it);
+ }
+
+ if (r < 0) {
+ if (delete_info->error_result == image_deleter::ERROR_RESULT_COMPLETE) {
+ complete_active_delete(&delete_info, r);
+ } else if (delete_info->error_result ==
+ image_deleter::ERROR_RESULT_RETRY_IMMEDIATELY) {
+ enqueue_failed_delete(&delete_info, r, m_busy_interval);
+ } else {
+ auto cct = reinterpret_cast<CephContext *>(m_local_io_ctx.cct());
+ double failed_interval = cct->_conf.get_val<double>(
+ "rbd_mirror_delete_retry_interval");
+ enqueue_failed_delete(&delete_info, r, failed_interval);
+ }
+ } else {
+ complete_active_delete(&delete_info, 0);
+ }
+
+ // process the next queued image to delete
+ remove_images();
+}
+
+template <typename I>
+void ImageDeleter<I>::schedule_retry_timer() {
+ ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock));
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+ if (!m_running || m_timer_ctx != nullptr || m_retry_delete_queue.empty()) {
+ return;
+ }
+
+ dout(10) << dendl;
+ auto &delete_info = m_retry_delete_queue.front();
+ m_timer_ctx = new LambdaContext([this](int r) {
+ handle_retry_timer();
+ });
+ m_threads->timer->add_event_at(delete_info->retry_time, m_timer_ctx);
+}
+
+template <typename I>
+void ImageDeleter<I>::cancel_retry_timer() {
+ dout(10) << dendl;
+ ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock));
+ if (m_timer_ctx != nullptr) {
+ bool canceled = m_threads->timer->cancel_event(m_timer_ctx);
+ m_timer_ctx = nullptr;
+ ceph_assert(canceled);
+ }
+}
+
+template <typename I>
+void ImageDeleter<I>::handle_retry_timer() {
+ dout(10) << dendl;
+ ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock));
+ std::lock_guard locker{m_lock};
+
+ ceph_assert(m_timer_ctx != nullptr);
+ m_timer_ctx = nullptr;
+
+ ceph_assert(m_running);
+ ceph_assert(!m_retry_delete_queue.empty());
+
+ // move all ready-to-ready items back to main queue
+ auto now = clock_t::now();
+ while (!m_retry_delete_queue.empty()) {
+ auto &delete_info = m_retry_delete_queue.front();
+ if (delete_info->retry_time > now) {
+ break;
+ }
+
+ m_delete_queue.push_back(delete_info);
+ m_retry_delete_queue.pop_front();
+ }
+
+ // schedule wake up for any future retries
+ schedule_retry_timer();
+
+ // start (concurrent) removal of images
+ m_async_op_tracker.start_op();
+ auto ctx = new LambdaContext([this](int r) {
+ remove_images();
+ m_async_op_tracker.finish_op();
+ });
+ m_threads->work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+void ImageDeleter<I>::handle_trash_image(const std::string& image_id,
+ const ImageDeleter<I>::clock_t::time_point& deferment_end_time) {
+ std::scoped_lock locker{m_threads->timer_lock, m_lock};
+
+ auto del_info = find_delete_info(image_id);
+ if (del_info != nullptr) {
+ dout(20) << "image " << image_id << " "
+ << "was already scheduled for deletion" << dendl;
+ return;
+ }
+
+ dout(10) << "image_id=" << image_id << ", "
+ << "deferment_end_time=" << utime_t{deferment_end_time} << dendl;
+
+ del_info.reset(new DeleteInfo(image_id));
+ del_info->retry_time = deferment_end_time;
+ m_retry_delete_queue.push_back(del_info);
+
+ schedule_retry_timer();
+}
+
+template <typename I>
+void ImageDeleter<I>::notify_on_delete(const std::string& image_id,
+ int r) {
+ dout(10) << "image_id=" << image_id << ", r=" << r << dendl;
+ auto it = m_on_delete_contexts.find(image_id);
+ if (it == m_on_delete_contexts.end()) {
+ return;
+ }
+
+ it->second->complete(r);
+ m_on_delete_contexts.erase(it);
+}
+
+template <typename I>
+void ImageDeleter<I>::DeleteInfo::print_status(Formatter *f,
+ bool print_failure_info) {
+ f->open_object_section("delete_info");
+ f->dump_string("image_id", image_id);
+ if (print_failure_info) {
+ f->dump_string("error_code", cpp_strerror(error_code));
+ f->dump_int("retries", retries);
+ }
+ f->close_section();
+}
+
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::ImageDeleter<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/ImageDeleter.h b/src/tools/rbd_mirror/ImageDeleter.h
new file mode 100644
index 000000000..5fe79496b
--- /dev/null
+++ b/src/tools/rbd_mirror/ImageDeleter.h
@@ -0,0 +1,189 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2016 SUSE LINUX GmbH
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_DELETER_H
+#define CEPH_RBD_MIRROR_IMAGE_DELETER_H
+
+#include "include/utime.h"
+#include "common/AsyncOpTracker.h"
+#include "common/ceph_mutex.h"
+#include "common/Timer.h"
+#include "tools/rbd_mirror/Types.h"
+#include "tools/rbd_mirror/image_deleter/Types.h"
+#include <atomic>
+#include <deque>
+#include <iosfwd>
+#include <map>
+#include <memory>
+#include <vector>
+
+class AdminSocketHook;
+class Context;
+namespace librbd {
+struct ImageCtx;
+namespace asio { struct ContextWQ; }
+} // namespace librbd
+
+namespace rbd {
+namespace mirror {
+
+template <typename> class ServiceDaemon;
+template <typename> class Threads;
+template <typename> class Throttler;
+
+namespace image_deleter { template <typename> struct TrashWatcher; }
+
+/**
+ * Manage deletion of non-primary images.
+ */
+template <typename ImageCtxT = librbd::ImageCtx>
+class ImageDeleter {
+public:
+ static ImageDeleter* create(
+ librados::IoCtx& local_io_ctx, Threads<librbd::ImageCtx>* threads,
+ Throttler<librbd::ImageCtx>* image_deletion_throttler,
+ ServiceDaemon<librbd::ImageCtx>* service_daemon) {
+ return new ImageDeleter(local_io_ctx, threads, image_deletion_throttler,
+ service_daemon);
+ }
+
+ ImageDeleter(librados::IoCtx& local_io_ctx,
+ Threads<librbd::ImageCtx>* threads,
+ Throttler<librbd::ImageCtx>* image_deletion_throttler,
+ ServiceDaemon<librbd::ImageCtx>* service_daemon);
+
+ ImageDeleter(const ImageDeleter&) = delete;
+ ImageDeleter& operator=(const ImageDeleter&) = delete;
+
+ static void trash_move(librados::IoCtx& local_io_ctx,
+ const std::string& global_image_id, bool resync,
+ librbd::asio::ContextWQ* work_queue,
+ Context* on_finish);
+
+ void init(Context* on_finish);
+ void shut_down(Context* on_finish);
+
+ void print_status(Formatter *f);
+
+ // for testing purposes
+ void wait_for_deletion(const std::string &image_id,
+ bool scheduled_only, Context* on_finish);
+
+ std::vector<std::string> get_delete_queue_items();
+ std::vector<std::pair<std::string, int> > get_failed_queue_items();
+
+ inline void set_busy_timer_interval(double interval) {
+ m_busy_interval = interval;
+ }
+
+private:
+ using clock_t = ceph::real_clock;
+ struct TrashListener : public image_deleter::TrashListener {
+ ImageDeleter *image_deleter;
+
+ TrashListener(ImageDeleter *image_deleter) : image_deleter(image_deleter) {
+ }
+
+ void handle_trash_image(const std::string& image_id,
+ const ceph::real_clock::time_point& deferment_end_time) override {
+ image_deleter->handle_trash_image(image_id, deferment_end_time);
+ }
+ };
+
+ struct DeleteInfo {
+ std::string image_id;
+
+ image_deleter::ErrorResult error_result = {};
+ int error_code = 0;
+ clock_t::time_point retry_time;
+ int retries = 0;
+
+ DeleteInfo(const std::string& image_id)
+ : image_id(image_id) {
+ }
+
+ inline bool operator==(const DeleteInfo& delete_info) const {
+ return (image_id == delete_info.image_id);
+ }
+
+ friend std::ostream& operator<<(std::ostream& os, DeleteInfo& delete_info) {
+ os << "[image_id=" << delete_info.image_id << "]";
+ return os;
+ }
+
+ void print_status(Formatter *f,
+ bool print_failure_info=false);
+ };
+ typedef std::shared_ptr<DeleteInfo> DeleteInfoRef;
+ typedef std::deque<DeleteInfoRef> DeleteQueue;
+ typedef std::map<std::string, Context*> OnDeleteContexts;
+
+ librados::IoCtx& m_local_io_ctx;
+ Threads<librbd::ImageCtx>* m_threads;
+ Throttler<librbd::ImageCtx>* m_image_deletion_throttler;
+ ServiceDaemon<librbd::ImageCtx>* m_service_daemon;
+
+ image_deleter::TrashWatcher<ImageCtxT>* m_trash_watcher = nullptr;
+ TrashListener m_trash_listener;
+
+ std::atomic<unsigned> m_running { 1 };
+
+ double m_busy_interval = 1;
+
+ AsyncOpTracker m_async_op_tracker;
+
+ ceph::mutex m_lock;
+ DeleteQueue m_delete_queue;
+ DeleteQueue m_retry_delete_queue;
+ DeleteQueue m_in_flight_delete_queue;
+
+ OnDeleteContexts m_on_delete_contexts;
+
+ AdminSocketHook *m_asok_hook = nullptr;
+
+ Context *m_timer_ctx = nullptr;
+
+ bool process_image_delete();
+
+ void complete_active_delete(DeleteInfoRef* delete_info, int r);
+ void enqueue_failed_delete(DeleteInfoRef* delete_info, int error_code,
+ double retry_delay);
+
+ DeleteInfoRef find_delete_info(const std::string &image_id);
+
+ void remove_images();
+ void remove_image(DeleteInfoRef delete_info);
+ void handle_remove_image(DeleteInfoRef delete_info, int r);
+
+ void schedule_retry_timer();
+ void cancel_retry_timer();
+ void handle_retry_timer();
+
+ void handle_trash_image(const std::string& image_id,
+ const clock_t::time_point& deferment_end_time);
+
+ void shut_down_trash_watcher(Context* on_finish);
+ void wait_for_ops(Context* on_finish);
+ void cancel_all_deletions(Context* on_finish);
+
+ void notify_on_delete(const std::string& image_id, int r);
+
+};
+
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::ImageDeleter<librbd::ImageCtx>;
+
+#endif // CEPH_RBD_MIRROR_IMAGE_DELETER_H
diff --git a/src/tools/rbd_mirror/ImageMap.cc b/src/tools/rbd_mirror/ImageMap.cc
new file mode 100644
index 000000000..d352fcb2c
--- /dev/null
+++ b/src/tools/rbd_mirror/ImageMap.cc
@@ -0,0 +1,602 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "common/debug.h"
+#include "common/errno.h"
+#include "common/Timer.h"
+
+#include "librbd/Utils.h"
+#include "librbd/asio/ContextWQ.h"
+#include "tools/rbd_mirror/Threads.h"
+
+#include "ImageMap.h"
+#include "image_map/LoadRequest.h"
+#include "image_map/SimplePolicy.h"
+#include "image_map/UpdateRequest.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::ImageMap: " << this << " " \
+ << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+
+using ::operator<<;
+using image_map::Policy;
+
+using librbd::util::unique_lock_name;
+using librbd::util::create_async_context_callback;
+
+template <typename I>
+struct ImageMap<I>::C_NotifyInstance : public Context {
+ ImageMap* image_map;
+ std::string global_image_id;
+ bool acquire_release;
+
+ C_NotifyInstance(ImageMap* image_map, const std::string& global_image_id,
+ bool acquire_release)
+ : image_map(image_map), global_image_id(global_image_id),
+ acquire_release(acquire_release) {
+ image_map->start_async_op();
+ }
+
+ void finish(int r) override {
+ if (acquire_release) {
+ image_map->handle_peer_ack(global_image_id, r);
+ } else {
+ image_map->handle_peer_ack_remove(global_image_id, r);
+ }
+ image_map->finish_async_op();
+ }
+};
+
+template <typename I>
+ImageMap<I>::ImageMap(librados::IoCtx &ioctx, Threads<I> *threads,
+ const std::string& instance_id,
+ image_map::Listener &listener)
+ : m_ioctx(ioctx), m_threads(threads), m_instance_id(instance_id),
+ m_listener(listener),
+ m_lock(ceph::make_mutex(
+ unique_lock_name("rbd::mirror::ImageMap::m_lock", this))) {
+}
+
+template <typename I>
+ImageMap<I>::~ImageMap() {
+ ceph_assert(m_async_op_tracker.empty());
+ ceph_assert(m_timer_task == nullptr);
+ ceph_assert(m_rebalance_task == nullptr);
+}
+
+template <typename I>
+void ImageMap<I>::continue_action(const std::set<std::string> &global_image_ids,
+ int r) {
+ dout(20) << dendl;
+
+ {
+ std::lock_guard locker{m_lock};
+ if (m_shutting_down) {
+ return;
+ }
+
+ for (auto const &global_image_id : global_image_ids) {
+ bool schedule = m_policy->finish_action(global_image_id, r);
+ if (schedule) {
+ schedule_action(global_image_id);
+ }
+ }
+ }
+
+ schedule_update_task();
+}
+
+template <typename I>
+void ImageMap<I>::handle_update_request(
+ const Updates &updates,
+ const std::set<std::string> &remove_global_image_ids, int r) {
+ dout(20) << "r=" << r << dendl;
+
+ std::set<std::string> global_image_ids;
+
+ global_image_ids.insert(remove_global_image_ids.begin(),
+ remove_global_image_ids.end());
+ for (auto const &update : updates) {
+ global_image_ids.insert(update.global_image_id);
+ }
+
+ continue_action(global_image_ids, r);
+}
+
+template <typename I>
+void ImageMap<I>::update_image_mapping(Updates&& map_updates,
+ std::set<std::string>&& map_removals) {
+ if (map_updates.empty() && map_removals.empty()) {
+ return;
+ }
+
+ dout(5) << "updates=[" << map_updates << "], "
+ << "removes=[" << map_removals << "]" << dendl;
+
+ Context *on_finish = new LambdaContext(
+ [this, map_updates, map_removals](int r) {
+ handle_update_request(map_updates, map_removals, r);
+ finish_async_op();
+ });
+ on_finish = create_async_context_callback(m_threads->work_queue, on_finish);
+
+ // empty meta policy for now..
+ image_map::PolicyMetaNone policy_meta;
+
+ bufferlist bl;
+ encode(image_map::PolicyData(policy_meta), bl);
+
+ // prepare update map
+ std::map<std::string, cls::rbd::MirrorImageMap> update_mapping;
+ for (auto const &update : map_updates) {
+ update_mapping.emplace(
+ update.global_image_id, cls::rbd::MirrorImageMap(update.instance_id,
+ update.mapped_time, bl));
+ }
+
+ start_async_op();
+ image_map::UpdateRequest<I> *req = image_map::UpdateRequest<I>::create(
+ m_ioctx, std::move(update_mapping), std::move(map_removals), on_finish);
+ req->send();
+}
+
+template <typename I>
+void ImageMap<I>::process_updates() {
+ dout(20) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock));
+ ceph_assert(m_timer_task == nullptr);
+
+ Updates map_updates;
+ std::set<std::string> map_removals;
+ Updates acquire_updates;
+ Updates release_updates;
+
+ // gather updates by advancing the state machine
+ m_lock.lock();
+ for (auto const &global_image_id : m_global_image_ids) {
+ image_map::ActionType action_type =
+ m_policy->start_action(global_image_id);
+ image_map::LookupInfo info = m_policy->lookup(global_image_id);
+
+ dout(15) << "global_image_id=" << global_image_id << ", "
+ << "action=" << action_type << ", "
+ << "instance=" << info.instance_id << dendl;
+ switch (action_type) {
+ case image_map::ACTION_TYPE_NONE:
+ continue;
+ case image_map::ACTION_TYPE_MAP_UPDATE:
+ ceph_assert(info.instance_id != image_map::UNMAPPED_INSTANCE_ID);
+ map_updates.emplace_back(global_image_id, info.instance_id,
+ info.mapped_time);
+ break;
+ case image_map::ACTION_TYPE_MAP_REMOVE:
+ map_removals.emplace(global_image_id);
+ break;
+ case image_map::ACTION_TYPE_ACQUIRE:
+ ceph_assert(info.instance_id != image_map::UNMAPPED_INSTANCE_ID);
+ acquire_updates.emplace_back(global_image_id, info.instance_id);
+ break;
+ case image_map::ACTION_TYPE_RELEASE:
+ ceph_assert(info.instance_id != image_map::UNMAPPED_INSTANCE_ID);
+ release_updates.emplace_back(global_image_id, info.instance_id);
+ break;
+ }
+ }
+ m_global_image_ids.clear();
+ m_lock.unlock();
+
+ // notify listener (acquire, release) and update on-disk map. note
+ // that its safe to process this outside m_lock as we still hold
+ // timer lock.
+ notify_listener_acquire_release_images(acquire_updates, release_updates);
+ update_image_mapping(std::move(map_updates), std::move(map_removals));
+}
+
+template <typename I>
+void ImageMap<I>::schedule_update_task() {
+ std::lock_guard timer_lock{m_threads->timer_lock};
+ schedule_update_task(m_threads->timer_lock);
+}
+
+template <typename I>
+void ImageMap<I>::schedule_update_task(const ceph::mutex &timer_lock) {
+ ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock));
+
+ schedule_rebalance_task();
+
+ if (m_timer_task != nullptr) {
+ return;
+ }
+
+ {
+ std::lock_guard locker{m_lock};
+ if (m_global_image_ids.empty()) {
+ return;
+ }
+ }
+
+ m_timer_task = new LambdaContext([this](int r) {
+ ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock));
+ m_timer_task = nullptr;
+
+ process_updates();
+ });
+
+ CephContext *cct = reinterpret_cast<CephContext *>(m_ioctx.cct());
+ double after = cct->_conf.get_val<double>("rbd_mirror_image_policy_update_throttle_interval");
+
+ dout(20) << "scheduling image check update (" << m_timer_task << ")"
+ << " after " << after << " second(s)" << dendl;
+ m_threads->timer->add_event_after(after, m_timer_task);
+}
+
+template <typename I>
+void ImageMap<I>::rebalance() {
+ ceph_assert(m_rebalance_task == nullptr);
+
+ {
+ std::lock_guard locker{m_lock};
+ if (m_async_op_tracker.empty() && m_global_image_ids.empty()){
+ dout(20) << "starting rebalance" << dendl;
+
+ std::set<std::string> remap_global_image_ids;
+ m_policy->add_instances({}, &remap_global_image_ids);
+
+ for (auto const &global_image_id : remap_global_image_ids) {
+ schedule_action(global_image_id);
+ }
+ }
+ }
+
+ schedule_update_task(m_threads->timer_lock);
+}
+
+template <typename I>
+void ImageMap<I>::schedule_rebalance_task() {
+ ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock));
+
+ CephContext *cct = reinterpret_cast<CephContext *>(m_ioctx.cct());
+
+ // fetch the updated value of idle timeout for (re)scheduling
+ double resched_after = cct->_conf.get_val<double>(
+ "rbd_mirror_image_policy_rebalance_timeout");
+ if (!resched_after) {
+ return;
+ }
+
+ // cancel existing rebalance task if any before scheduling
+ if (m_rebalance_task != nullptr) {
+ m_threads->timer->cancel_event(m_rebalance_task);
+ }
+
+ m_rebalance_task = new LambdaContext([this](int _) {
+ ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock));
+ m_rebalance_task = nullptr;
+
+ rebalance();
+ });
+
+ dout(20) << "scheduling rebalance (" << m_rebalance_task << ")"
+ << " after " << resched_after << " second(s)" << dendl;
+ m_threads->timer->add_event_after(resched_after, m_rebalance_task);
+}
+
+template <typename I>
+void ImageMap<I>::schedule_action(const std::string &global_image_id) {
+ dout(20) << "global_image_id=" << global_image_id << dendl;
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ m_global_image_ids.emplace(global_image_id);
+}
+
+template <typename I>
+void ImageMap<I>::notify_listener_acquire_release_images(
+ const Updates &acquire, const Updates &release) {
+ if (acquire.empty() && release.empty()) {
+ return;
+ }
+
+ dout(5) << "acquire=[" << acquire << "], "
+ << "release=[" << release << "]" << dendl;
+
+ for (auto const &update : acquire) {
+ m_listener.acquire_image(
+ update.global_image_id, update.instance_id,
+ create_async_context_callback(
+ m_threads->work_queue,
+ new C_NotifyInstance(this, update.global_image_id, true)));
+ }
+
+ for (auto const &update : release) {
+ m_listener.release_image(
+ update.global_image_id, update.instance_id,
+ create_async_context_callback(
+ m_threads->work_queue,
+ new C_NotifyInstance(this, update.global_image_id, true)));
+ }
+}
+
+template <typename I>
+void ImageMap<I>::notify_listener_remove_images(const std::string &peer_uuid,
+ const Updates &remove) {
+ dout(5) << "peer_uuid=" << peer_uuid << ", "
+ << "remove=[" << remove << "]" << dendl;
+
+ for (auto const &update : remove) {
+ m_listener.remove_image(
+ peer_uuid, update.global_image_id, update.instance_id,
+ create_async_context_callback(
+ m_threads->work_queue,
+ new C_NotifyInstance(this, update.global_image_id, false)));
+ }
+}
+
+template <typename I>
+void ImageMap<I>::handle_load(const std::map<std::string,
+ cls::rbd::MirrorImageMap> &image_mapping) {
+ dout(20) << dendl;
+
+ {
+ std::lock_guard locker{m_lock};
+ m_policy->init(image_mapping);
+
+ for (auto& pair : image_mapping) {
+ schedule_action(pair.first);
+ }
+ }
+ schedule_update_task();
+}
+
+template <typename I>
+void ImageMap<I>::handle_peer_ack_remove(const std::string &global_image_id,
+ int r) {
+ std::lock_guard locker{m_lock};
+ dout(5) << "global_image_id=" << global_image_id << dendl;
+
+ if (r < 0) {
+ derr << "failed to remove global_image_id=" << global_image_id << dendl;
+ }
+
+ auto peer_it = m_peer_map.find(global_image_id);
+ if (peer_it == m_peer_map.end()) {
+ return;
+ }
+
+ m_peer_map.erase(peer_it);
+}
+
+template <typename I>
+void ImageMap<I>::update_images_added(
+ const std::string &peer_uuid,
+ const std::set<std::string> &global_image_ids) {
+ dout(5) << "peer_uuid=" << peer_uuid << ", "
+ << "global_image_ids=[" << global_image_ids << "]" << dendl;
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ for (auto const &global_image_id : global_image_ids) {
+ auto result = m_peer_map[global_image_id].insert(peer_uuid);
+ if (result.second && m_peer_map[global_image_id].size() == 1) {
+ if (m_policy->add_image(global_image_id)) {
+ schedule_action(global_image_id);
+ }
+ }
+ }
+}
+
+template <typename I>
+void ImageMap<I>::update_images_removed(
+ const std::string &peer_uuid,
+ const std::set<std::string> &global_image_ids) {
+ dout(5) << "peer_uuid=" << peer_uuid << ", "
+ << "global_image_ids=[" << global_image_ids << "]" << dendl;
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ Updates to_remove;
+ for (auto const &global_image_id : global_image_ids) {
+ image_map::LookupInfo info = m_policy->lookup(global_image_id);
+ bool image_mapped = (info.instance_id != image_map::UNMAPPED_INSTANCE_ID);
+
+ bool image_removed = image_mapped;
+ bool peer_removed = false;
+ auto peer_it = m_peer_map.find(global_image_id);
+ if (peer_it != m_peer_map.end()) {
+ auto& peer_set = peer_it->second;
+ peer_removed = peer_set.erase(peer_uuid);
+ image_removed = peer_removed && peer_set.empty();
+ }
+
+ if (image_mapped && peer_removed && !peer_uuid.empty()) {
+ // peer image has been deleted
+ to_remove.emplace_back(global_image_id, info.instance_id);
+ }
+
+ if (image_removed) {
+ // local and peer images have been deleted
+ if (m_policy->remove_image(global_image_id)) {
+ schedule_action(global_image_id);
+ }
+ }
+ }
+
+ if (!to_remove.empty()) {
+ // removal notification will be notified instantly. this is safe
+ // even after scheduling action for images as we still hold m_lock
+ notify_listener_remove_images(peer_uuid, to_remove);
+ }
+}
+
+template <typename I>
+void ImageMap<I>::update_instances_added(
+ const std::vector<std::string> &instance_ids) {
+ {
+ std::lock_guard locker{m_lock};
+ if (m_shutting_down) {
+ return;
+ }
+
+ std::vector<std::string> filtered_instance_ids;
+ filter_instance_ids(instance_ids, &filtered_instance_ids, false);
+ if (filtered_instance_ids.empty()) {
+ return;
+ }
+
+ dout(20) << "instance_ids=" << filtered_instance_ids << dendl;
+
+ std::set<std::string> remap_global_image_ids;
+ m_policy->add_instances(filtered_instance_ids, &remap_global_image_ids);
+
+ for (auto const &global_image_id : remap_global_image_ids) {
+ schedule_action(global_image_id);
+ }
+ }
+
+ schedule_update_task();
+}
+
+template <typename I>
+void ImageMap<I>::update_instances_removed(
+ const std::vector<std::string> &instance_ids) {
+ {
+ std::lock_guard locker{m_lock};
+ if (m_shutting_down) {
+ return;
+ }
+
+ std::vector<std::string> filtered_instance_ids;
+ filter_instance_ids(instance_ids, &filtered_instance_ids, true);
+ if (filtered_instance_ids.empty()) {
+ return;
+ }
+
+ dout(20) << "instance_ids=" << filtered_instance_ids << dendl;
+
+ std::set<std::string> remap_global_image_ids;
+ m_policy->remove_instances(filtered_instance_ids, &remap_global_image_ids);
+
+ for (auto const &global_image_id : remap_global_image_ids) {
+ schedule_action(global_image_id);
+ }
+ }
+
+ schedule_update_task();
+}
+
+template <typename I>
+void ImageMap<I>::update_images(const std::string &peer_uuid,
+ std::set<std::string> &&added_global_image_ids,
+ std::set<std::string> &&removed_global_image_ids) {
+ dout(5) << "peer_uuid=" << peer_uuid << ", " << "added_count="
+ << added_global_image_ids.size() << ", " << "removed_count="
+ << removed_global_image_ids.size() << dendl;
+
+ {
+ std::lock_guard locker{m_lock};
+ if (m_shutting_down) {
+ return;
+ }
+
+ if (!removed_global_image_ids.empty()) {
+ update_images_removed(peer_uuid, removed_global_image_ids);
+ }
+ if (!added_global_image_ids.empty()) {
+ update_images_added(peer_uuid, added_global_image_ids);
+ }
+ }
+
+ schedule_update_task();
+}
+
+template <typename I>
+void ImageMap<I>::handle_peer_ack(const std::string &global_image_id, int r) {
+ dout (20) << "global_image_id=" << global_image_id << ", r=" << r
+ << dendl;
+
+ continue_action({global_image_id}, r);
+}
+
+template <typename I>
+void ImageMap<I>::init(Context *on_finish) {
+ dout(20) << dendl;
+
+ CephContext *cct = reinterpret_cast<CephContext *>(m_ioctx.cct());
+ std::string policy_type = cct->_conf.get_val<string>("rbd_mirror_image_policy_type");
+
+ if (policy_type == "none" || policy_type == "simple") {
+ m_policy.reset(image_map::SimplePolicy::create(m_ioctx));
+ } else {
+ ceph_abort(); // not really needed as such, but catch it.
+ }
+
+ dout(20) << "mapping policy=" << policy_type << dendl;
+
+ start_async_op();
+ C_LoadMap *ctx = new C_LoadMap(this, on_finish);
+ image_map::LoadRequest<I> *req = image_map::LoadRequest<I>::create(
+ m_ioctx, &ctx->image_mapping, ctx);
+ req->send();
+}
+
+template <typename I>
+void ImageMap<I>::shut_down(Context *on_finish) {
+ dout(20) << dendl;
+
+ {
+ std::lock_guard timer_lock{m_threads->timer_lock};
+
+ {
+ std::lock_guard locker{m_lock};
+ ceph_assert(!m_shutting_down);
+
+ m_shutting_down = true;
+ m_policy.reset();
+ }
+
+ if (m_timer_task != nullptr) {
+ m_threads->timer->cancel_event(m_timer_task);
+ m_timer_task = nullptr;
+ }
+ if (m_rebalance_task != nullptr) {
+ m_threads->timer->cancel_event(m_rebalance_task);
+ m_rebalance_task = nullptr;
+ }
+ }
+
+ wait_for_async_ops(on_finish);
+}
+
+template <typename I>
+void ImageMap<I>::filter_instance_ids(
+ const std::vector<std::string> &instance_ids,
+ std::vector<std::string> *filtered_instance_ids, bool removal) const {
+ CephContext *cct = reinterpret_cast<CephContext *>(m_ioctx.cct());
+ std::string policy_type = cct->_conf.get_val<string>("rbd_mirror_image_policy_type");
+
+ if (policy_type != "none") {
+ *filtered_instance_ids = instance_ids;
+ return;
+ }
+
+ if (removal) {
+ // propagate removals for external instances
+ for (auto& instance_id : instance_ids) {
+ if (instance_id != m_instance_id) {
+ filtered_instance_ids->push_back(instance_id);
+ }
+ }
+ } else if (std::find(instance_ids.begin(), instance_ids.end(),
+ m_instance_id) != instance_ids.end()) {
+ // propagate addition only for local instance
+ filtered_instance_ids->push_back(m_instance_id);
+ }
+}
+
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::ImageMap<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/ImageMap.h b/src/tools/rbd_mirror/ImageMap.h
new file mode 100644
index 000000000..9dd61ee0d
--- /dev/null
+++ b/src/tools/rbd_mirror/ImageMap.h
@@ -0,0 +1,175 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_MAP_H
+#define CEPH_RBD_MIRROR_IMAGE_MAP_H
+
+#include <vector>
+
+#include "common/ceph_mutex.h"
+#include "include/Context.h"
+#include "common/AsyncOpTracker.h"
+#include "cls/rbd/cls_rbd_types.h"
+#include "include/rados/librados.hpp"
+
+#include "image_map/Policy.h"
+#include "image_map/Types.h"
+
+namespace librbd { class ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+
+template <typename> struct Threads;
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class ImageMap {
+public:
+ static ImageMap *create(librados::IoCtx &ioctx, Threads<ImageCtxT> *threads,
+ const std::string& instance_id,
+ image_map::Listener &listener) {
+ return new ImageMap(ioctx, threads, instance_id, listener);
+ }
+
+ ~ImageMap();
+
+ // init (load) the instance map from disk
+ void init(Context *on_finish);
+
+ // shut down map operations
+ void shut_down(Context *on_finish);
+
+ // update (add/remove) images
+ void update_images(const std::string &peer_uuid,
+ std::set<std::string> &&added_global_image_ids,
+ std::set<std::string> &&removed_global_image_ids);
+
+ // add/remove instances
+ void update_instances_added(const std::vector<std::string> &instances);
+ void update_instances_removed(const std::vector<std::string> &instances);
+
+private:
+ struct C_NotifyInstance;
+
+ ImageMap(librados::IoCtx &ioctx, Threads<ImageCtxT> *threads,
+ const std::string& instance_id, image_map::Listener &listener);
+
+ struct Update {
+ std::string global_image_id;
+ std::string instance_id;
+ utime_t mapped_time;
+
+ Update(const std::string &global_image_id, const std::string &instance_id,
+ utime_t mapped_time)
+ : global_image_id(global_image_id),
+ instance_id(instance_id),
+ mapped_time(mapped_time) {
+ }
+ Update(const std::string &global_image_id, const std::string &instance_id)
+ : Update(global_image_id, instance_id, ceph_clock_now()) {
+ }
+
+ friend std::ostream& operator<<(std::ostream& os,
+ const Update& update) {
+ os << "{global_image_id=" << update.global_image_id << ", "
+ << "instance_id=" << update.instance_id << "}";
+ return os;
+ }
+
+ };
+ typedef std::list<Update> Updates;
+
+ // Lock ordering: m_threads->timer_lock, m_lock
+
+ librados::IoCtx &m_ioctx;
+ Threads<ImageCtxT> *m_threads;
+ std::string m_instance_id;
+ image_map::Listener &m_listener;
+
+ std::unique_ptr<image_map::Policy> m_policy; // our mapping policy
+
+ Context *m_timer_task = nullptr;
+ ceph::mutex m_lock;
+ bool m_shutting_down = false;
+ AsyncOpTracker m_async_op_tracker;
+
+ // global_image_id -> registered peers ("" == local, remote otherwise)
+ std::map<std::string, std::set<std::string> > m_peer_map;
+
+ std::set<std::string> m_global_image_ids;
+
+ Context *m_rebalance_task = nullptr;
+
+ struct C_LoadMap : Context {
+ ImageMap *image_map;
+ Context *on_finish;
+
+ std::map<std::string, cls::rbd::MirrorImageMap> image_mapping;
+
+ C_LoadMap(ImageMap *image_map, Context *on_finish)
+ : image_map(image_map),
+ on_finish(on_finish) {
+ }
+
+ void finish(int r) override {
+ if (r == 0) {
+ image_map->handle_load(image_mapping);
+ }
+
+ image_map->finish_async_op();
+ on_finish->complete(r);
+ }
+ };
+
+ // async op-tracker helper routines
+ void start_async_op() {
+ m_async_op_tracker.start_op();
+ }
+ void finish_async_op() {
+ m_async_op_tracker.finish_op();
+ }
+ void wait_for_async_ops(Context *on_finish) {
+ m_async_op_tracker.wait_for_ops(on_finish);
+ }
+
+ void handle_peer_ack(const std::string &global_image_id, int r);
+ void handle_peer_ack_remove(const std::string &global_image_id, int r);
+
+ void handle_load(const std::map<std::string, cls::rbd::MirrorImageMap> &image_mapping);
+ void handle_update_request(const Updates &updates,
+ const std::set<std::string> &remove_global_image_ids, int r);
+
+ // continue (retry or resume depending on state machine) processing
+ // current action.
+ void continue_action(const std::set<std::string> &global_image_ids, int r);
+
+ // schedule an image for update
+ void schedule_action(const std::string &global_image_id);
+
+ void schedule_update_task();
+ void schedule_update_task(const ceph::mutex &timer_lock);
+ void process_updates();
+ void update_image_mapping(Updates&& map_updates,
+ std::set<std::string>&& map_removals);
+
+ void rebalance();
+ void schedule_rebalance_task();
+
+ void notify_listener_acquire_release_images(const Updates &acquire, const Updates &release);
+ void notify_listener_remove_images(const std::string &peer_uuid, const Updates &remove);
+
+ void update_images_added(const std::string &peer_uuid,
+ const std::set<std::string> &global_image_ids);
+ void update_images_removed(const std::string &peer_uuid,
+ const std::set<std::string> &global_image_ids);
+
+ void filter_instance_ids(const std::vector<std::string> &instance_ids,
+ std::vector<std::string> *filtered_instance_ids,
+ bool removal) const;
+
+};
+
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_IMAGE_MAP_H
diff --git a/src/tools/rbd_mirror/ImageReplayer.cc b/src/tools/rbd_mirror/ImageReplayer.cc
new file mode 100644
index 000000000..ee22b8d34
--- /dev/null
+++ b/src/tools/rbd_mirror/ImageReplayer.cc
@@ -0,0 +1,1190 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "include/compat.h"
+#include "common/Formatter.h"
+#include "common/admin_socket.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "include/stringify.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "common/Timer.h"
+#include "global/global_context.h"
+#include "journal/Journaler.h"
+#include "librbd/ExclusiveLock.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/Journal.h"
+#include "librbd/Operations.h"
+#include "librbd/Utils.h"
+#include "librbd/asio/ContextWQ.h"
+#include "ImageDeleter.h"
+#include "ImageReplayer.h"
+#include "MirrorStatusUpdater.h"
+#include "Threads.h"
+#include "tools/rbd_mirror/image_replayer/BootstrapRequest.h"
+#include "tools/rbd_mirror/image_replayer/ReplayerListener.h"
+#include "tools/rbd_mirror/image_replayer/StateBuilder.h"
+#include "tools/rbd_mirror/image_replayer/Utils.h"
+#include "tools/rbd_mirror/image_replayer/journal/Replayer.h"
+#include "tools/rbd_mirror/image_replayer/journal/StateBuilder.h"
+#include <map>
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::" << *this << " " \
+ << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+
+using librbd::util::create_context_callback;
+
+template <typename I>
+std::ostream &operator<<(std::ostream &os,
+ const typename ImageReplayer<I>::State &state);
+
+namespace {
+
+template <typename I>
+class ImageReplayerAdminSocketCommand {
+public:
+ ImageReplayerAdminSocketCommand(const std::string &desc,
+ ImageReplayer<I> *replayer)
+ : desc(desc), replayer(replayer) {
+ }
+ virtual ~ImageReplayerAdminSocketCommand() {}
+ virtual int call(Formatter *f) = 0;
+
+ std::string desc;
+ ImageReplayer<I> *replayer;
+ bool registered = false;
+};
+
+template <typename I>
+class StatusCommand : public ImageReplayerAdminSocketCommand<I> {
+public:
+ explicit StatusCommand(const std::string &desc, ImageReplayer<I> *replayer)
+ : ImageReplayerAdminSocketCommand<I>(desc, replayer) {
+ }
+
+ int call(Formatter *f) override {
+ this->replayer->print_status(f);
+ return 0;
+ }
+};
+
+template <typename I>
+class StartCommand : public ImageReplayerAdminSocketCommand<I> {
+public:
+ explicit StartCommand(const std::string &desc, ImageReplayer<I> *replayer)
+ : ImageReplayerAdminSocketCommand<I>(desc, replayer) {
+ }
+
+ int call(Formatter *f) override {
+ this->replayer->start(nullptr, true);
+ return 0;
+ }
+};
+
+template <typename I>
+class StopCommand : public ImageReplayerAdminSocketCommand<I> {
+public:
+ explicit StopCommand(const std::string &desc, ImageReplayer<I> *replayer)
+ : ImageReplayerAdminSocketCommand<I>(desc, replayer) {
+ }
+
+ int call(Formatter *f) override {
+ this->replayer->stop(nullptr, true);
+ return 0;
+ }
+};
+
+template <typename I>
+class RestartCommand : public ImageReplayerAdminSocketCommand<I> {
+public:
+ explicit RestartCommand(const std::string &desc, ImageReplayer<I> *replayer)
+ : ImageReplayerAdminSocketCommand<I>(desc, replayer) {
+ }
+
+ int call(Formatter *f) override {
+ this->replayer->restart();
+ return 0;
+ }
+};
+
+template <typename I>
+class FlushCommand : public ImageReplayerAdminSocketCommand<I> {
+public:
+ explicit FlushCommand(const std::string &desc, ImageReplayer<I> *replayer)
+ : ImageReplayerAdminSocketCommand<I>(desc, replayer) {
+ }
+
+ int call(Formatter *f) override {
+ this->replayer->flush();
+ return 0;
+ }
+};
+
+template <typename I>
+class ImageReplayerAdminSocketHook : public AdminSocketHook {
+public:
+ ImageReplayerAdminSocketHook(CephContext *cct, const std::string &name,
+ ImageReplayer<I> *replayer)
+ : admin_socket(cct->get_admin_socket()),
+ commands{{"rbd mirror flush " + name,
+ new FlushCommand<I>("flush rbd mirror " + name, replayer)},
+ {"rbd mirror restart " + name,
+ new RestartCommand<I>("restart rbd mirror " + name, replayer)},
+ {"rbd mirror start " + name,
+ new StartCommand<I>("start rbd mirror " + name, replayer)},
+ {"rbd mirror status " + name,
+ new StatusCommand<I>("get status for rbd mirror " + name, replayer)},
+ {"rbd mirror stop " + name,
+ new StopCommand<I>("stop rbd mirror " + name, replayer)}} {
+ }
+
+ int register_commands() {
+ for (auto &it : commands) {
+ int r = admin_socket->register_command(it.first, this,
+ it.second->desc);
+ if (r < 0) {
+ return r;
+ }
+ it.second->registered = true;
+ }
+ return 0;
+ }
+
+ ~ImageReplayerAdminSocketHook() override {
+ admin_socket->unregister_commands(this);
+ for (auto &it : commands) {
+ delete it.second;
+ }
+ commands.clear();
+ }
+
+ int call(std::string_view command, const cmdmap_t& cmdmap,
+ Formatter *f,
+ std::ostream& errss,
+ bufferlist& out) override {
+ auto i = commands.find(command);
+ ceph_assert(i != commands.end());
+ return i->second->call(f);
+ }
+
+private:
+ typedef std::map<std::string, ImageReplayerAdminSocketCommand<I>*,
+ std::less<>> Commands;
+
+ AdminSocket *admin_socket;
+ Commands commands;
+};
+
+} // anonymous namespace
+
+template <typename I>
+void ImageReplayer<I>::BootstrapProgressContext::update_progress(
+ const std::string &description, bool flush)
+{
+ const std::string desc = "bootstrapping, " + description;
+ replayer->set_state_description(0, desc);
+ if (flush) {
+ replayer->update_mirror_image_status(false, boost::none);
+ }
+}
+
+template <typename I>
+struct ImageReplayer<I>::ReplayerListener
+ : public image_replayer::ReplayerListener {
+ ImageReplayer<I>* image_replayer;
+
+ ReplayerListener(ImageReplayer<I>* image_replayer)
+ : image_replayer(image_replayer) {
+ }
+
+ void handle_notification() override {
+ image_replayer->handle_replayer_notification();
+ }
+};
+
+template <typename I>
+ImageReplayer<I>::ImageReplayer(
+ librados::IoCtx &local_io_ctx, const std::string &local_mirror_uuid,
+ const std::string &global_image_id, Threads<I> *threads,
+ InstanceWatcher<I> *instance_watcher,
+ MirrorStatusUpdater<I>* local_status_updater,
+ journal::CacheManagerHandler *cache_manager_handler,
+ PoolMetaCache* pool_meta_cache) :
+ m_local_io_ctx(local_io_ctx), m_local_mirror_uuid(local_mirror_uuid),
+ m_global_image_id(global_image_id), m_threads(threads),
+ m_instance_watcher(instance_watcher),
+ m_local_status_updater(local_status_updater),
+ m_cache_manager_handler(cache_manager_handler),
+ m_pool_meta_cache(pool_meta_cache),
+ m_local_image_name(global_image_id),
+ m_lock(ceph::make_mutex("rbd::mirror::ImageReplayer " +
+ stringify(local_io_ctx.get_id()) + " " + global_image_id)),
+ m_progress_cxt(this),
+ m_replayer_listener(new ReplayerListener(this))
+{
+ // Register asok commands using a temporary "remote_pool_name/global_image_id"
+ // name. When the image name becomes known on start the asok commands will be
+ // re-registered using "remote_pool_name/remote_image_name" name.
+
+ m_image_spec = image_replayer::util::compute_image_spec(
+ local_io_ctx, global_image_id);
+ register_admin_socket_hook();
+}
+
+template <typename I>
+ImageReplayer<I>::~ImageReplayer()
+{
+ unregister_admin_socket_hook();
+ ceph_assert(m_state_builder == nullptr);
+ ceph_assert(m_on_start_finish == nullptr);
+ ceph_assert(m_on_stop_contexts.empty());
+ ceph_assert(m_bootstrap_request == nullptr);
+ ceph_assert(m_update_status_task == nullptr);
+ delete m_replayer_listener;
+}
+
+template <typename I>
+image_replayer::HealthState ImageReplayer<I>::get_health_state() const {
+ std::lock_guard locker{m_lock};
+
+ if (!m_mirror_image_status_state) {
+ return image_replayer::HEALTH_STATE_OK;
+ } else if (*m_mirror_image_status_state ==
+ cls::rbd::MIRROR_IMAGE_STATUS_STATE_SYNCING ||
+ *m_mirror_image_status_state ==
+ cls::rbd::MIRROR_IMAGE_STATUS_STATE_UNKNOWN) {
+ return image_replayer::HEALTH_STATE_WARNING;
+ }
+ return image_replayer::HEALTH_STATE_ERROR;
+}
+
+template <typename I>
+void ImageReplayer<I>::add_peer(const Peer<I>& peer) {
+ dout(10) << "peer=" << peer << dendl;
+
+ std::lock_guard locker{m_lock};
+ auto it = m_peers.find(peer);
+ if (it == m_peers.end()) {
+ m_peers.insert(peer);
+ }
+}
+
+template <typename I>
+void ImageReplayer<I>::set_state_description(int r, const std::string &desc) {
+ dout(10) << "r=" << r << ", desc=" << desc << dendl;
+
+ std::lock_guard l{m_lock};
+ m_last_r = r;
+ m_state_desc = desc;
+}
+
+template <typename I>
+void ImageReplayer<I>::start(Context *on_finish, bool manual, bool restart)
+{
+ dout(10) << "on_finish=" << on_finish << dendl;
+
+ int r = 0;
+ {
+ std::lock_guard locker{m_lock};
+ if (!is_stopped_()) {
+ derr << "already running" << dendl;
+ r = -EINVAL;
+ } else if (m_manual_stop && !manual) {
+ dout(5) << "stopped manually, ignoring start without manual flag"
+ << dendl;
+ r = -EPERM;
+ } else if (restart && !m_restart_requested) {
+ dout(10) << "canceled restart" << dendl;
+ r = -ECANCELED;
+ } else {
+ m_state = STATE_STARTING;
+ m_last_r = 0;
+ m_state_desc.clear();
+ m_manual_stop = false;
+ m_delete_requested = false;
+ m_restart_requested = false;
+ m_status_removed = false;
+
+ if (on_finish != nullptr) {
+ ceph_assert(m_on_start_finish == nullptr);
+ m_on_start_finish = on_finish;
+ }
+ ceph_assert(m_on_stop_contexts.empty());
+ }
+ }
+
+ if (r < 0) {
+ if (on_finish) {
+ on_finish->complete(r);
+ }
+ return;
+ }
+
+ bootstrap();
+}
+
+template <typename I>
+void ImageReplayer<I>::bootstrap() {
+ dout(10) << dendl;
+
+ std::unique_lock locker{m_lock};
+ if (m_peers.empty()) {
+ locker.unlock();
+
+ dout(5) << "no peer clusters" << dendl;
+ on_start_fail(-ENOENT, "no peer clusters");
+ return;
+ }
+
+ // TODO need to support multiple remote images
+ ceph_assert(!m_peers.empty());
+ m_remote_image_peer = *m_peers.begin();
+
+ if (on_start_interrupted(m_lock)) {
+ return;
+ }
+
+ ceph_assert(m_state_builder == nullptr);
+ auto ctx = create_context_callback<
+ ImageReplayer, &ImageReplayer<I>::handle_bootstrap>(this);
+ auto request = image_replayer::BootstrapRequest<I>::create(
+ m_threads, m_local_io_ctx, m_remote_image_peer.io_ctx, m_instance_watcher,
+ m_global_image_id, m_local_mirror_uuid,
+ m_remote_image_peer.remote_pool_meta, m_cache_manager_handler,
+ m_pool_meta_cache, &m_progress_cxt, &m_state_builder, &m_resync_requested,
+ ctx);
+
+ request->get();
+ m_bootstrap_request = request;
+ locker.unlock();
+
+ update_mirror_image_status(false, boost::none);
+ request->send();
+}
+
+template <typename I>
+void ImageReplayer<I>::handle_bootstrap(int r) {
+ dout(10) << "r=" << r << dendl;
+ {
+ std::lock_guard locker{m_lock};
+ m_bootstrap_request->put();
+ m_bootstrap_request = nullptr;
+ }
+
+ if (on_start_interrupted()) {
+ return;
+ } else if (r == -ENOMSG) {
+ dout(5) << "local image is primary" << dendl;
+ on_start_fail(0, "local image is primary");
+ return;
+ } else if (r == -EREMOTEIO) {
+ dout(5) << "remote image is not primary" << dendl;
+ on_start_fail(-EREMOTEIO, "remote image is not primary");
+ return;
+ } else if (r == -EEXIST) {
+ on_start_fail(r, "split-brain detected");
+ return;
+ } else if (r == -ENOLINK) {
+ m_delete_requested = true;
+ on_start_fail(0, "remote image no longer exists");
+ return;
+ } else if (r == -ERESTART) {
+ on_start_fail(r, "image in transient state, try again");
+ return;
+ } else if (r < 0) {
+ on_start_fail(r, "error bootstrapping replay");
+ return;
+ } else if (m_resync_requested) {
+ on_start_fail(0, "resync requested");
+ return;
+ }
+
+ start_replay();
+}
+
+template <typename I>
+void ImageReplayer<I>::start_replay() {
+ dout(10) << dendl;
+
+ std::unique_lock locker{m_lock};
+ ceph_assert(m_replayer == nullptr);
+ m_replayer = m_state_builder->create_replayer(m_threads, m_instance_watcher,
+ m_local_mirror_uuid,
+ m_pool_meta_cache,
+ m_replayer_listener);
+
+ auto ctx = create_context_callback<
+ ImageReplayer<I>, &ImageReplayer<I>::handle_start_replay>(this);
+ m_replayer->init(ctx);
+}
+
+template <typename I>
+void ImageReplayer<I>::handle_start_replay(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (on_start_interrupted()) {
+ return;
+ } else if (r < 0) {
+ std::string error_description = m_replayer->get_error_description();
+ if (r == -ENOTCONN && m_replayer->is_resync_requested()) {
+ std::unique_lock locker{m_lock};
+ m_resync_requested = true;
+ }
+
+ // shut down not required if init failed
+ m_replayer->destroy();
+ m_replayer = nullptr;
+
+ derr << "error starting replay: " << cpp_strerror(r) << dendl;
+ on_start_fail(r, error_description);
+ return;
+ }
+
+ Context *on_finish = nullptr;
+ {
+ std::unique_lock locker{m_lock};
+ ceph_assert(m_state == STATE_STARTING);
+ m_state = STATE_REPLAYING;
+ std::swap(m_on_start_finish, on_finish);
+
+ std::unique_lock timer_locker{m_threads->timer_lock};
+ schedule_update_mirror_image_replay_status();
+ }
+
+ update_mirror_image_status(true, boost::none);
+ if (on_replay_interrupted()) {
+ if (on_finish != nullptr) {
+ on_finish->complete(r);
+ }
+ return;
+ }
+
+ dout(10) << "start succeeded" << dendl;
+ if (on_finish != nullptr) {
+ dout(10) << "on finish complete, r=" << r << dendl;
+ on_finish->complete(r);
+ }
+}
+
+template <typename I>
+void ImageReplayer<I>::on_start_fail(int r, const std::string &desc)
+{
+ dout(10) << "r=" << r << ", desc=" << desc << dendl;
+ Context *ctx = new LambdaContext([this, r, desc](int _r) {
+ {
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_state == STATE_STARTING);
+ m_state = STATE_STOPPING;
+ if (r < 0 && r != -ECANCELED && r != -EREMOTEIO && r != -ENOENT) {
+ derr << "start failed: " << cpp_strerror(r) << dendl;
+ } else {
+ dout(10) << "start canceled" << dendl;
+ }
+ }
+
+ set_state_description(r, desc);
+ update_mirror_image_status(false, boost::none);
+ shut_down(r);
+ });
+ m_threads->work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+bool ImageReplayer<I>::on_start_interrupted() {
+ std::lock_guard locker{m_lock};
+ return on_start_interrupted(m_lock);
+}
+
+template <typename I>
+bool ImageReplayer<I>::on_start_interrupted(ceph::mutex& lock) {
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+ ceph_assert(m_state == STATE_STARTING);
+ if (!m_stop_requested) {
+ return false;
+ }
+
+ on_start_fail(-ECANCELED, "");
+ return true;
+}
+
+template <typename I>
+void ImageReplayer<I>::stop(Context *on_finish, bool manual, bool restart)
+{
+ dout(10) << "on_finish=" << on_finish << ", manual=" << manual
+ << ", restart=" << restart << dendl;
+
+ image_replayer::BootstrapRequest<I> *bootstrap_request = nullptr;
+ bool shut_down_replay = false;
+ bool is_stopped = false;
+ {
+ std::lock_guard locker{m_lock};
+
+ if (!is_running_()) {
+ if (manual && !m_manual_stop) {
+ dout(10) << "marking manual" << dendl;
+ m_manual_stop = true;
+ }
+ if (!restart && m_restart_requested) {
+ dout(10) << "canceling restart" << dendl;
+ m_restart_requested = false;
+ }
+ if (is_stopped_()) {
+ dout(10) << "already stopped" << dendl;
+ is_stopped = true;
+ } else {
+ dout(10) << "joining in-flight stop" << dendl;
+ if (on_finish != nullptr) {
+ m_on_stop_contexts.push_back(on_finish);
+ }
+ }
+ } else {
+ if (m_state == STATE_STARTING) {
+ dout(10) << "canceling start" << dendl;
+ if (m_bootstrap_request != nullptr) {
+ bootstrap_request = m_bootstrap_request;
+ bootstrap_request->get();
+ }
+ } else {
+ dout(10) << "interrupting replay" << dendl;
+ shut_down_replay = true;
+ }
+
+ ceph_assert(m_on_stop_contexts.empty());
+ if (on_finish != nullptr) {
+ m_on_stop_contexts.push_back(on_finish);
+ }
+ m_stop_requested = true;
+ m_manual_stop = manual;
+ }
+ }
+
+ if (is_stopped) {
+ if (on_finish) {
+ on_finish->complete(-EINVAL);
+ }
+ return;
+ }
+
+ // avoid holding lock since bootstrap request will update status
+ if (bootstrap_request != nullptr) {
+ dout(10) << "canceling bootstrap" << dendl;
+ bootstrap_request->cancel();
+ bootstrap_request->put();
+ }
+
+ if (shut_down_replay) {
+ on_stop_journal_replay();
+ }
+}
+
+template <typename I>
+void ImageReplayer<I>::on_stop_journal_replay(int r, const std::string &desc)
+{
+ dout(10) << dendl;
+
+ {
+ std::lock_guard locker{m_lock};
+ if (m_state != STATE_REPLAYING) {
+ // might be invoked multiple times while stopping
+ return;
+ }
+
+ m_stop_requested = true;
+ m_state = STATE_STOPPING;
+ }
+
+ cancel_update_mirror_image_replay_status();
+ set_state_description(r, desc);
+ update_mirror_image_status(true, boost::none);
+ shut_down(0);
+}
+
+template <typename I>
+void ImageReplayer<I>::restart(Context *on_finish)
+{
+ {
+ std::lock_guard locker{m_lock};
+ m_restart_requested = true;
+ }
+
+ auto ctx = new LambdaContext(
+ [this, on_finish](int r) {
+ if (r < 0) {
+ // Try start anyway.
+ }
+ start(on_finish, true, true);
+ });
+ stop(ctx, false, true);
+}
+
+template <typename I>
+void ImageReplayer<I>::flush()
+{
+ C_SaferCond ctx;
+
+ {
+ std::unique_lock locker{m_lock};
+ if (m_state != STATE_REPLAYING) {
+ return;
+ }
+
+ dout(10) << dendl;
+ ceph_assert(m_replayer != nullptr);
+ m_replayer->flush(&ctx);
+ }
+
+ int r = ctx.wait();
+ if (r >= 0) {
+ update_mirror_image_status(false, boost::none);
+ }
+}
+
+template <typename I>
+bool ImageReplayer<I>::on_replay_interrupted()
+{
+ bool shut_down;
+ {
+ std::lock_guard locker{m_lock};
+ shut_down = m_stop_requested;
+ }
+
+ if (shut_down) {
+ on_stop_journal_replay();
+ }
+ return shut_down;
+}
+
+template <typename I>
+void ImageReplayer<I>::print_status(Formatter *f)
+{
+ dout(10) << dendl;
+
+ std::lock_guard l{m_lock};
+
+ f->open_object_section("image_replayer");
+ f->dump_string("name", m_image_spec);
+ f->dump_string("state", to_string(m_state));
+ f->close_section();
+}
+
+template <typename I>
+void ImageReplayer<I>::schedule_update_mirror_image_replay_status() {
+ ceph_assert(ceph_mutex_is_locked_by_me(m_lock));
+ ceph_assert(ceph_mutex_is_locked_by_me(m_threads->timer_lock));
+ if (m_state != STATE_REPLAYING) {
+ return;
+ }
+
+ dout(10) << dendl;
+
+ // periodically update the replaying status even if nothing changes
+ // so that we can adjust our performance stats
+ ceph_assert(m_update_status_task == nullptr);
+ m_update_status_task = create_context_callback<
+ ImageReplayer<I>,
+ &ImageReplayer<I>::handle_update_mirror_image_replay_status>(this);
+ m_threads->timer->add_event_after(10, m_update_status_task);
+}
+
+template <typename I>
+void ImageReplayer<I>::handle_update_mirror_image_replay_status(int r) {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked_by_me(m_threads->timer_lock));
+
+ ceph_assert(m_update_status_task != nullptr);
+ m_update_status_task = nullptr;
+
+ auto ctx = new LambdaContext([this](int) {
+ update_mirror_image_status(false, boost::none);
+
+ std::unique_lock locker{m_lock};
+ std::unique_lock timer_locker{m_threads->timer_lock};
+
+ schedule_update_mirror_image_replay_status();
+ m_in_flight_op_tracker.finish_op();
+ });
+
+ m_in_flight_op_tracker.start_op();
+ m_threads->work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+void ImageReplayer<I>::cancel_update_mirror_image_replay_status() {
+ std::unique_lock timer_locker{m_threads->timer_lock};
+ if (m_update_status_task != nullptr) {
+ dout(10) << dendl;
+
+ if (m_threads->timer->cancel_event(m_update_status_task)) {
+ m_update_status_task = nullptr;
+ }
+ }
+}
+
+template <typename I>
+void ImageReplayer<I>::update_mirror_image_status(
+ bool force, const OptionalState &opt_state) {
+ dout(15) << "force=" << force << ", "
+ << "state=" << opt_state << dendl;
+
+ {
+ std::lock_guard locker{m_lock};
+ if (!force && !is_stopped_() && !is_running_()) {
+ dout(15) << "shut down in-progress: ignoring update" << dendl;
+ return;
+ }
+ }
+
+ m_in_flight_op_tracker.start_op();
+ auto ctx = new LambdaContext(
+ [this, force, opt_state](int r) {
+ set_mirror_image_status_update(force, opt_state);
+ });
+ m_threads->work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+void ImageReplayer<I>::set_mirror_image_status_update(
+ bool force, const OptionalState &opt_state) {
+ dout(15) << "force=" << force << ", "
+ << "state=" << opt_state << dendl;
+
+ reregister_admin_socket_hook();
+
+ State state;
+ std::string state_desc;
+ int last_r;
+ bool stopping_replay;
+
+ auto mirror_image_status_state = boost::make_optional(
+ false, cls::rbd::MIRROR_IMAGE_STATUS_STATE_UNKNOWN);
+ image_replayer::BootstrapRequest<I>* bootstrap_request = nullptr;
+ {
+ std::lock_guard locker{m_lock};
+ state = m_state;
+ state_desc = m_state_desc;
+ mirror_image_status_state = m_mirror_image_status_state;
+ last_r = m_last_r;
+ stopping_replay = (m_replayer != nullptr);
+
+ if (m_bootstrap_request != nullptr) {
+ bootstrap_request = m_bootstrap_request;
+ bootstrap_request->get();
+ }
+ }
+
+ bool syncing = false;
+ if (bootstrap_request != nullptr) {
+ syncing = bootstrap_request->is_syncing();
+ bootstrap_request->put();
+ bootstrap_request = nullptr;
+ }
+
+ if (opt_state) {
+ state = *opt_state;
+ }
+
+ cls::rbd::MirrorImageSiteStatus status;
+ status.up = true;
+ switch (state) {
+ case STATE_STARTING:
+ if (syncing) {
+ status.state = cls::rbd::MIRROR_IMAGE_STATUS_STATE_SYNCING;
+ status.description = state_desc.empty() ? "syncing" : state_desc;
+ mirror_image_status_state = status.state;
+ } else {
+ status.state = cls::rbd::MIRROR_IMAGE_STATUS_STATE_STARTING_REPLAY;
+ status.description = "starting replay";
+ }
+ break;
+ case STATE_REPLAYING:
+ status.state = cls::rbd::MIRROR_IMAGE_STATUS_STATE_REPLAYING;
+ {
+ std::string desc;
+ auto on_req_finish = new LambdaContext(
+ [this, force](int r) {
+ dout(15) << "replay status ready: r=" << r << dendl;
+ if (r >= 0) {
+ set_mirror_image_status_update(force, boost::none);
+ } else if (r == -EAGAIN) {
+ m_in_flight_op_tracker.finish_op();
+ }
+ });
+
+ ceph_assert(m_replayer != nullptr);
+ if (!m_replayer->get_replay_status(&desc, on_req_finish)) {
+ dout(15) << "waiting for replay status" << dendl;
+ return;
+ }
+
+ status.description = "replaying, " + desc;
+ mirror_image_status_state = boost::make_optional(
+ false, cls::rbd::MIRROR_IMAGE_STATUS_STATE_UNKNOWN);
+ }
+ break;
+ case STATE_STOPPING:
+ if (stopping_replay) {
+ status.state = cls::rbd::MIRROR_IMAGE_STATUS_STATE_STOPPING_REPLAY;
+ status.description = state_desc.empty() ? "stopping replay" : state_desc;
+ break;
+ }
+ // FALLTHROUGH
+ case STATE_STOPPED:
+ if (last_r == -EREMOTEIO) {
+ status.state = cls::rbd::MIRROR_IMAGE_STATUS_STATE_UNKNOWN;
+ status.description = state_desc;
+ mirror_image_status_state = status.state;
+ } else if (last_r < 0 && last_r != -ECANCELED) {
+ status.state = cls::rbd::MIRROR_IMAGE_STATUS_STATE_ERROR;
+ status.description = state_desc;
+ mirror_image_status_state = status.state;
+ } else {
+ status.state = cls::rbd::MIRROR_IMAGE_STATUS_STATE_STOPPED;
+ status.description = state_desc.empty() ? "stopped" : state_desc;
+ mirror_image_status_state = boost::none;
+ }
+ break;
+ default:
+ ceph_assert(!"invalid state");
+ }
+
+ {
+ std::lock_guard locker{m_lock};
+ m_mirror_image_status_state = mirror_image_status_state;
+ }
+
+ // prevent the status from ping-ponging when failed replays are restarted
+ if (mirror_image_status_state &&
+ *mirror_image_status_state == cls::rbd::MIRROR_IMAGE_STATUS_STATE_ERROR) {
+ status.state = *mirror_image_status_state;
+ }
+
+ dout(15) << "status=" << status << dendl;
+ m_local_status_updater->set_mirror_image_status(m_global_image_id, status,
+ force);
+ if (m_remote_image_peer.mirror_status_updater != nullptr) {
+ m_remote_image_peer.mirror_status_updater->set_mirror_image_status(
+ m_global_image_id, status, force);
+ }
+
+ m_in_flight_op_tracker.finish_op();
+}
+
+template <typename I>
+void ImageReplayer<I>::shut_down(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ {
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_state == STATE_STOPPING);
+ }
+
+ if (!m_in_flight_op_tracker.empty()) {
+ dout(15) << "waiting for in-flight operations to complete" << dendl;
+ m_in_flight_op_tracker.wait_for_ops(new LambdaContext([this, r](int) {
+ shut_down(r);
+ }));
+ return;
+ }
+
+ // chain the shut down sequence (reverse order)
+ Context *ctx = new LambdaContext(
+ [this, r](int _r) {
+ update_mirror_image_status(true, STATE_STOPPED);
+ handle_shut_down(r);
+ });
+
+ // destruct the state builder
+ if (m_state_builder != nullptr) {
+ ctx = new LambdaContext([this, ctx](int r) {
+ m_state_builder->close(ctx);
+ });
+ }
+
+ // close the replayer
+ if (m_replayer != nullptr) {
+ ctx = new LambdaContext([this, ctx](int r) {
+ m_replayer->destroy();
+ m_replayer = nullptr;
+ ctx->complete(0);
+ });
+ ctx = new LambdaContext([this, ctx](int r) {
+ m_replayer->shut_down(ctx);
+ });
+ }
+
+ m_threads->work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+void ImageReplayer<I>::handle_shut_down(int r) {
+ bool resync_requested = false;
+ bool delete_requested = false;
+ bool unregister_asok_hook = false;
+ {
+ std::lock_guard locker{m_lock};
+
+ if (m_delete_requested && m_state_builder != nullptr &&
+ !m_state_builder->local_image_id.empty()) {
+ ceph_assert(m_state_builder->remote_image_id.empty());
+ dout(0) << "remote image no longer exists: scheduling deletion" << dendl;
+ unregister_asok_hook = true;
+ std::swap(delete_requested, m_delete_requested);
+ m_delete_in_progress = true;
+ }
+
+ std::swap(resync_requested, m_resync_requested);
+ if (!delete_requested && !resync_requested && m_last_r == -ENOENT &&
+ ((m_state_builder == nullptr) ||
+ (m_state_builder->local_image_id.empty() &&
+ m_state_builder->remote_image_id.empty()))) {
+ dout(0) << "mirror image no longer exists" << dendl;
+ unregister_asok_hook = true;
+ m_finished = true;
+ }
+ }
+
+ if (unregister_asok_hook) {
+ unregister_admin_socket_hook();
+ }
+
+ if (delete_requested || resync_requested) {
+ dout(5) << "moving image to trash" << dendl;
+ auto ctx = new LambdaContext([this, r](int) {
+ handle_shut_down(r);
+ });
+ ImageDeleter<I>::trash_move(m_local_io_ctx, m_global_image_id,
+ resync_requested, m_threads->work_queue, ctx);
+ return;
+ }
+
+ if (!m_in_flight_op_tracker.empty()) {
+ dout(15) << "waiting for in-flight operations to complete" << dendl;
+ m_in_flight_op_tracker.wait_for_ops(new LambdaContext([this, r](int) {
+ handle_shut_down(r);
+ }));
+ return;
+ }
+
+ if (!m_status_removed) {
+ auto ctx = new LambdaContext([this, r](int) {
+ m_status_removed = true;
+ handle_shut_down(r);
+ });
+ remove_image_status(m_delete_in_progress, ctx);
+ return;
+ }
+
+ if (m_state_builder != nullptr) {
+ m_state_builder->destroy();
+ m_state_builder = nullptr;
+ }
+
+ dout(10) << "stop complete" << dendl;
+ Context *on_start = nullptr;
+ Contexts on_stop_contexts;
+ {
+ std::lock_guard locker{m_lock};
+ std::swap(on_start, m_on_start_finish);
+ on_stop_contexts = std::move(m_on_stop_contexts);
+ m_stop_requested = false;
+ ceph_assert(m_state == STATE_STOPPING);
+ m_state = STATE_STOPPED;
+ }
+
+ if (on_start != nullptr) {
+ dout(10) << "on start finish complete, r=" << r << dendl;
+ on_start->complete(r);
+ r = 0;
+ }
+ for (auto ctx : on_stop_contexts) {
+ dout(10) << "on stop finish " << ctx << " complete, r=" << r << dendl;
+ ctx->complete(r);
+ }
+}
+
+template <typename I>
+void ImageReplayer<I>::handle_replayer_notification() {
+ dout(10) << dendl;
+
+ std::unique_lock locker{m_lock};
+ if (m_state != STATE_REPLAYING) {
+ // might be attempting to shut down
+ return;
+ }
+
+ {
+ // detect a rename of the local image
+ ceph_assert(m_state_builder != nullptr &&
+ m_state_builder->local_image_ctx != nullptr);
+ std::shared_lock image_locker{m_state_builder->local_image_ctx->image_lock};
+ if (m_local_image_name != m_state_builder->local_image_ctx->name) {
+ // will re-register with new name after next status update
+ dout(10) << "image renamed" << dendl;
+ m_local_image_name = m_state_builder->local_image_ctx->name;
+ }
+ }
+
+ // replayer cannot be shut down while notification is in-flight
+ ceph_assert(m_replayer != nullptr);
+ locker.unlock();
+
+ if (m_replayer->is_resync_requested()) {
+ dout(10) << "resync requested" << dendl;
+ m_resync_requested = true;
+ on_stop_journal_replay(0, "resync requested");
+ return;
+ }
+
+ if (!m_replayer->is_replaying()) {
+ auto error_code = m_replayer->get_error_code();
+ auto error_description = m_replayer->get_error_description();
+ dout(10) << "replay interrupted: "
+ << "r=" << error_code << ", "
+ << "error=" << error_description << dendl;
+ on_stop_journal_replay(error_code, error_description);
+ return;
+ }
+
+ update_mirror_image_status(false, {});
+}
+
+template <typename I>
+std::string ImageReplayer<I>::to_string(const State state) {
+ switch (state) {
+ case ImageReplayer<I>::STATE_STARTING:
+ return "Starting";
+ case ImageReplayer<I>::STATE_REPLAYING:
+ return "Replaying";
+ case ImageReplayer<I>::STATE_STOPPING:
+ return "Stopping";
+ case ImageReplayer<I>::STATE_STOPPED:
+ return "Stopped";
+ default:
+ break;
+ }
+ return "Unknown(" + stringify(state) + ")";
+}
+
+template <typename I>
+void ImageReplayer<I>::register_admin_socket_hook() {
+ ImageReplayerAdminSocketHook<I> *asok_hook;
+ {
+ std::lock_guard locker{m_lock};
+ if (m_asok_hook != nullptr) {
+ return;
+ }
+
+ dout(15) << "registered asok hook: " << m_image_spec << dendl;
+ asok_hook = new ImageReplayerAdminSocketHook<I>(
+ g_ceph_context, m_image_spec, this);
+ int r = asok_hook->register_commands();
+ if (r == 0) {
+ m_asok_hook = asok_hook;
+ return;
+ }
+ derr << "error registering admin socket commands" << dendl;
+ }
+ delete asok_hook;
+}
+
+template <typename I>
+void ImageReplayer<I>::unregister_admin_socket_hook() {
+ dout(15) << dendl;
+
+ AdminSocketHook *asok_hook = nullptr;
+ {
+ std::lock_guard locker{m_lock};
+ std::swap(asok_hook, m_asok_hook);
+ }
+ delete asok_hook;
+}
+
+template <typename I>
+void ImageReplayer<I>::reregister_admin_socket_hook() {
+ std::unique_lock locker{m_lock};
+ if (m_state == STATE_STARTING && m_bootstrap_request != nullptr) {
+ m_local_image_name = m_bootstrap_request->get_local_image_name();
+ }
+
+ auto image_spec = image_replayer::util::compute_image_spec(
+ m_local_io_ctx, m_local_image_name);
+ if (m_asok_hook != nullptr && m_image_spec == image_spec) {
+ return;
+ }
+
+ dout(15) << "old_image_spec=" << m_image_spec << ", "
+ << "new_image_spec=" << image_spec << dendl;
+ m_image_spec = image_spec;
+
+ if (m_state == STATE_STOPPING || m_state == STATE_STOPPED) {
+ // no need to re-register if stopping
+ return;
+ }
+ locker.unlock();
+
+ unregister_admin_socket_hook();
+ register_admin_socket_hook();
+}
+
+template <typename I>
+void ImageReplayer<I>::remove_image_status(bool force, Context *on_finish)
+{
+ auto ctx = new LambdaContext([this, force, on_finish](int) {
+ remove_image_status_remote(force, on_finish);
+ });
+
+ if (m_local_status_updater->exists(m_global_image_id)) {
+ dout(15) << "removing local mirror image status" << dendl;
+ if (force) {
+ m_local_status_updater->remove_mirror_image_status(
+ m_global_image_id, true, ctx);
+ } else {
+ m_local_status_updater->remove_refresh_mirror_image_status(
+ m_global_image_id, ctx);
+ }
+ return;
+ }
+
+ ctx->complete(0);
+}
+
+template <typename I>
+void ImageReplayer<I>::remove_image_status_remote(bool force, Context *on_finish)
+{
+ if (m_remote_image_peer.mirror_status_updater != nullptr &&
+ m_remote_image_peer.mirror_status_updater->exists(m_global_image_id)) {
+ dout(15) << "removing remote mirror image status" << dendl;
+ if (force) {
+ m_remote_image_peer.mirror_status_updater->remove_mirror_image_status(
+ m_global_image_id, true, on_finish);
+ } else {
+ m_remote_image_peer.mirror_status_updater->remove_refresh_mirror_image_status(
+ m_global_image_id, on_finish);
+ }
+ return;
+ }
+ if (on_finish) {
+ on_finish->complete(0);
+ }
+}
+
+template <typename I>
+std::ostream &operator<<(std::ostream &os, const ImageReplayer<I> &replayer)
+{
+ os << "ImageReplayer: " << &replayer << " [" << replayer.get_local_pool_id()
+ << "/" << replayer.get_global_image_id() << "]";
+ return os;
+}
+
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::ImageReplayer<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/ImageReplayer.h b/src/tools/rbd_mirror/ImageReplayer.h
new file mode 100644
index 000000000..432fdf225
--- /dev/null
+++ b/src/tools/rbd_mirror/ImageReplayer.h
@@ -0,0 +1,273 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_REPLAYER_H
+#define CEPH_RBD_MIRROR_IMAGE_REPLAYER_H
+
+#include "common/AsyncOpTracker.h"
+#include "common/ceph_mutex.h"
+#include "include/rados/librados.hpp"
+#include "cls/rbd/cls_rbd_types.h"
+#include "ProgressContext.h"
+#include "tools/rbd_mirror/Types.h"
+#include "tools/rbd_mirror/image_replayer/Types.h"
+#include <boost/optional.hpp>
+#include <string>
+
+class AdminSocketHook;
+
+namespace journal { struct CacheManagerHandler; }
+namespace librbd { class ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+
+template <typename> struct InstanceWatcher;
+template <typename> struct MirrorStatusUpdater;
+struct PoolMetaCache;
+template <typename> struct Threads;
+
+namespace image_replayer {
+
+class Replayer;
+template <typename> class BootstrapRequest;
+template <typename> class StateBuilder;
+
+} // namespace image_replayer
+
+/**
+ * Replays changes from a remote cluster for a single image.
+ */
+template <typename ImageCtxT = librbd::ImageCtx>
+class ImageReplayer {
+public:
+ static ImageReplayer *create(
+ librados::IoCtx &local_io_ctx, const std::string &local_mirror_uuid,
+ const std::string &global_image_id, Threads<ImageCtxT> *threads,
+ InstanceWatcher<ImageCtxT> *instance_watcher,
+ MirrorStatusUpdater<ImageCtxT>* local_status_updater,
+ journal::CacheManagerHandler *cache_manager_handler,
+ PoolMetaCache* pool_meta_cache) {
+ return new ImageReplayer(local_io_ctx, local_mirror_uuid, global_image_id,
+ threads, instance_watcher, local_status_updater,
+ cache_manager_handler, pool_meta_cache);
+ }
+ void destroy() {
+ delete this;
+ }
+
+ ImageReplayer(librados::IoCtx &local_io_ctx,
+ const std::string &local_mirror_uuid,
+ const std::string &global_image_id,
+ Threads<ImageCtxT> *threads,
+ InstanceWatcher<ImageCtxT> *instance_watcher,
+ MirrorStatusUpdater<ImageCtxT>* local_status_updater,
+ journal::CacheManagerHandler *cache_manager_handler,
+ PoolMetaCache* pool_meta_cache);
+ virtual ~ImageReplayer();
+ ImageReplayer(const ImageReplayer&) = delete;
+ ImageReplayer& operator=(const ImageReplayer&) = delete;
+
+ bool is_stopped() { std::lock_guard l{m_lock}; return is_stopped_(); }
+ bool is_running() { std::lock_guard l{m_lock}; return is_running_(); }
+ bool is_replaying() { std::lock_guard l{m_lock}; return is_replaying_(); }
+
+ std::string get_name() { std::lock_guard l{m_lock}; return m_image_spec; };
+ void set_state_description(int r, const std::string &desc);
+
+ // TODO temporary until policy handles release of image replayers
+ inline bool is_finished() const {
+ std::lock_guard locker{m_lock};
+ return m_finished;
+ }
+ inline void set_finished(bool finished) {
+ std::lock_guard locker{m_lock};
+ m_finished = finished;
+ }
+
+ inline bool is_blocklisted() const {
+ std::lock_guard locker{m_lock};
+ return (m_last_r == -EBLOCKLISTED);
+ }
+
+ image_replayer::HealthState get_health_state() const;
+
+ void add_peer(const Peer<ImageCtxT>& peer);
+
+ inline int64_t get_local_pool_id() const {
+ return m_local_io_ctx.get_id();
+ }
+ inline const std::string& get_global_image_id() const {
+ return m_global_image_id;
+ }
+
+ void start(Context *on_finish, bool manual = false, bool restart = false);
+ void stop(Context *on_finish, bool manual = false, bool restart = false);
+ void restart(Context *on_finish = nullptr);
+ void flush();
+
+ void print_status(Formatter *f);
+
+protected:
+ /**
+ * @verbatim
+ * (error)
+ * <uninitialized> <------------------------------------ FAIL
+ * | ^
+ * v *
+ * <starting> *
+ * | *
+ * v (error) *
+ * BOOTSTRAP_IMAGE * * * * * * * * * * * * * * * * * * * *
+ * | *
+ * v (error) *
+ * START_REPLAY * * * * * * * * * * * * * * * * * * * * * *
+ * |
+ * v
+ * REPLAYING
+ * |
+ * v
+ * JOURNAL_REPLAY_SHUT_DOWN
+ * |
+ * v
+ * LOCAL_IMAGE_CLOSE
+ * |
+ * v
+ * <stopped>
+ *
+ * @endverbatim
+ */
+
+ void on_start_fail(int r, const std::string &desc);
+ bool on_start_interrupted();
+ bool on_start_interrupted(ceph::mutex& lock);
+
+ void on_stop_journal_replay(int r = 0, const std::string &desc = "");
+
+ bool on_replay_interrupted();
+
+private:
+ typedef std::set<Peer<ImageCtxT>> Peers;
+ typedef std::list<Context *> Contexts;
+
+ enum State {
+ STATE_UNKNOWN,
+ STATE_STARTING,
+ STATE_REPLAYING,
+ STATE_STOPPING,
+ STATE_STOPPED,
+ };
+
+ struct ReplayerListener;
+
+ typedef boost::optional<State> OptionalState;
+ typedef boost::optional<cls::rbd::MirrorImageStatusState>
+ OptionalMirrorImageStatusState;
+
+ class BootstrapProgressContext : public ProgressContext {
+ public:
+ BootstrapProgressContext(ImageReplayer<ImageCtxT> *replayer) :
+ replayer(replayer) {
+ }
+
+ void update_progress(const std::string &description,
+ bool flush = true) override;
+
+ private:
+ ImageReplayer<ImageCtxT> *replayer;
+ };
+
+ librados::IoCtx &m_local_io_ctx;
+ std::string m_local_mirror_uuid;
+ std::string m_global_image_id;
+ Threads<ImageCtxT> *m_threads;
+ InstanceWatcher<ImageCtxT> *m_instance_watcher;
+ MirrorStatusUpdater<ImageCtxT>* m_local_status_updater;
+ journal::CacheManagerHandler *m_cache_manager_handler;
+ PoolMetaCache* m_pool_meta_cache;
+
+ Peers m_peers;
+ Peer<ImageCtxT> m_remote_image_peer;
+
+ std::string m_local_image_name;
+ std::string m_image_spec;
+
+ mutable ceph::mutex m_lock;
+ State m_state = STATE_STOPPED;
+ std::string m_state_desc;
+
+ OptionalMirrorImageStatusState m_mirror_image_status_state =
+ boost::make_optional(false, cls::rbd::MIRROR_IMAGE_STATUS_STATE_UNKNOWN);
+ int m_last_r = 0;
+
+ BootstrapProgressContext m_progress_cxt;
+
+ bool m_finished = false;
+ bool m_delete_in_progress = false;
+ bool m_delete_requested = false;
+ bool m_resync_requested = false;
+ bool m_restart_requested = false;
+
+ bool m_status_removed = false;
+
+ image_replayer::StateBuilder<ImageCtxT>* m_state_builder = nullptr;
+ image_replayer::Replayer* m_replayer = nullptr;
+ ReplayerListener* m_replayer_listener = nullptr;
+
+ Context *m_on_start_finish = nullptr;
+ Contexts m_on_stop_contexts;
+ bool m_stop_requested = false;
+ bool m_manual_stop = false;
+
+ AdminSocketHook *m_asok_hook = nullptr;
+
+ image_replayer::BootstrapRequest<ImageCtxT> *m_bootstrap_request = nullptr;
+
+ AsyncOpTracker m_in_flight_op_tracker;
+
+ Context* m_update_status_task = nullptr;
+
+ static std::string to_string(const State state);
+
+ bool is_stopped_() const {
+ return m_state == STATE_STOPPED;
+ }
+ bool is_running_() const {
+ return !is_stopped_() && m_state != STATE_STOPPING && !m_stop_requested;
+ }
+ bool is_replaying_() const {
+ return (m_state == STATE_REPLAYING);
+ }
+
+ void schedule_update_mirror_image_replay_status();
+ void handle_update_mirror_image_replay_status(int r);
+ void cancel_update_mirror_image_replay_status();
+
+ void update_mirror_image_status(bool force, const OptionalState &state);
+ void set_mirror_image_status_update(bool force, const OptionalState &state);
+
+ void shut_down(int r);
+ void handle_shut_down(int r);
+
+ void bootstrap();
+ void handle_bootstrap(int r);
+
+ void start_replay();
+ void handle_start_replay(int r);
+
+ void handle_replayer_notification();
+
+ void register_admin_socket_hook();
+ void unregister_admin_socket_hook();
+ void reregister_admin_socket_hook();
+ void remove_image_status(bool force, Context *on_finish);
+ void remove_image_status_remote(bool force, Context *on_finish);
+
+};
+
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::ImageReplayer<librbd::ImageCtx>;
+
+#endif // CEPH_RBD_MIRROR_IMAGE_REPLAYER_H
diff --git a/src/tools/rbd_mirror/ImageSync.cc b/src/tools/rbd_mirror/ImageSync.cc
new file mode 100644
index 000000000..43d0c6663
--- /dev/null
+++ b/src/tools/rbd_mirror/ImageSync.cc
@@ -0,0 +1,469 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "ImageSync.h"
+#include "InstanceWatcher.h"
+#include "ProgressContext.h"
+#include "common/debug.h"
+#include "common/Timer.h"
+#include "common/errno.h"
+#include "librbd/DeepCopyRequest.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/Utils.h"
+#include "librbd/internal.h"
+#include "librbd/asio/ContextWQ.h"
+#include "librbd/deep_copy/Handler.h"
+#include "tools/rbd_mirror/Threads.h"
+#include "tools/rbd_mirror/image_sync/SyncPointCreateRequest.h"
+#include "tools/rbd_mirror/image_sync/SyncPointPruneRequest.h"
+#include "tools/rbd_mirror/image_sync/Types.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::ImageSync: " \
+ << this << " " << __func__
+
+namespace rbd {
+namespace mirror {
+
+using namespace image_sync;
+using librbd::util::create_async_context_callback;
+using librbd::util::create_context_callback;
+using librbd::util::unique_lock_name;
+
+template <typename I>
+class ImageSync<I>::ImageCopyProgressHandler
+ : public librbd::deep_copy::NoOpHandler {
+public:
+ ImageCopyProgressHandler(ImageSync *image_sync) : image_sync(image_sync) {
+ }
+
+ int update_progress(uint64_t object_no, uint64_t object_count) override {
+ image_sync->handle_copy_image_update_progress(object_no, object_count);
+ return 0;
+ }
+
+ ImageSync *image_sync;
+};
+
+template <typename I>
+ImageSync<I>::ImageSync(
+ Threads<I>* threads,
+ I *local_image_ctx,
+ I *remote_image_ctx,
+ const std::string &local_mirror_uuid,
+ image_sync::SyncPointHandler* sync_point_handler,
+ InstanceWatcher<I> *instance_watcher,
+ ProgressContext *progress_ctx,
+ Context *on_finish)
+ : CancelableRequest("rbd::mirror::ImageSync", local_image_ctx->cct,
+ on_finish),
+ m_threads(threads),
+ m_local_image_ctx(local_image_ctx),
+ m_remote_image_ctx(remote_image_ctx),
+ m_local_mirror_uuid(local_mirror_uuid),
+ m_sync_point_handler(sync_point_handler),
+ m_instance_watcher(instance_watcher),
+ m_progress_ctx(progress_ctx),
+ m_lock(ceph::make_mutex(unique_lock_name("ImageSync::m_lock", this))),
+ m_update_sync_point_interval(
+ m_local_image_ctx->cct->_conf.template get_val<double>(
+ "rbd_mirror_sync_point_update_age")) {
+}
+
+template <typename I>
+ImageSync<I>::~ImageSync() {
+ ceph_assert(m_image_copy_request == nullptr);
+ ceph_assert(m_image_copy_prog_handler == nullptr);
+ ceph_assert(m_update_sync_ctx == nullptr);
+}
+
+template <typename I>
+void ImageSync<I>::send() {
+ send_notify_sync_request();
+}
+
+template <typename I>
+void ImageSync<I>::cancel() {
+ std::lock_guard locker{m_lock};
+
+ dout(10) << dendl;
+
+ m_canceled = true;
+
+ if (m_instance_watcher->cancel_sync_request(m_local_image_ctx->id)) {
+ return;
+ }
+
+ if (m_image_copy_request != nullptr) {
+ m_image_copy_request->cancel();
+ }
+}
+
+template <typename I>
+void ImageSync<I>::send_notify_sync_request() {
+ update_progress("NOTIFY_SYNC_REQUEST");
+
+ dout(10) << dendl;
+
+ m_lock.lock();
+ if (m_canceled) {
+ m_lock.unlock();
+ CancelableRequest::finish(-ECANCELED);
+ return;
+ }
+
+ Context *ctx = create_async_context_callback(
+ m_threads->work_queue, create_context_callback<
+ ImageSync<I>, &ImageSync<I>::handle_notify_sync_request>(this));
+ m_instance_watcher->notify_sync_request(m_local_image_ctx->id, ctx);
+ m_lock.unlock();
+}
+
+template <typename I>
+void ImageSync<I>::handle_notify_sync_request(int r) {
+ dout(10) << ": r=" << r << dendl;
+
+ m_lock.lock();
+ if (r == 0 && m_canceled) {
+ r = -ECANCELED;
+ }
+ m_lock.unlock();
+
+ if (r < 0) {
+ CancelableRequest::finish(r);
+ return;
+ }
+
+ send_prune_catch_up_sync_point();
+}
+
+template <typename I>
+void ImageSync<I>::send_prune_catch_up_sync_point() {
+ update_progress("PRUNE_CATCH_UP_SYNC_POINT");
+
+ if (m_sync_point_handler->get_sync_points().empty()) {
+ send_create_sync_point();
+ return;
+ }
+
+ dout(10) << dendl;
+
+ // prune will remove sync points with missing snapshots and
+ // ensure we have a maximum of one sync point (in case we
+ // restarted)
+ Context *ctx = create_context_callback<
+ ImageSync<I>, &ImageSync<I>::handle_prune_catch_up_sync_point>(this);
+ SyncPointPruneRequest<I> *request = SyncPointPruneRequest<I>::create(
+ m_remote_image_ctx, false, m_sync_point_handler, ctx);
+ request->send();
+}
+
+template <typename I>
+void ImageSync<I>::handle_prune_catch_up_sync_point(int r) {
+ dout(10) << ": r=" << r << dendl;
+
+ if (r < 0) {
+ derr << ": failed to prune catch-up sync point: "
+ << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ send_create_sync_point();
+}
+
+template <typename I>
+void ImageSync<I>::send_create_sync_point() {
+ update_progress("CREATE_SYNC_POINT");
+
+ // TODO: when support for disconnecting laggy clients is added,
+ // re-connect and create catch-up sync point
+ if (!m_sync_point_handler->get_sync_points().empty()) {
+ send_copy_image();
+ return;
+ }
+
+ dout(10) << dendl;
+
+ Context *ctx = create_context_callback<
+ ImageSync<I>, &ImageSync<I>::handle_create_sync_point>(this);
+ SyncPointCreateRequest<I> *request = SyncPointCreateRequest<I>::create(
+ m_remote_image_ctx, m_local_mirror_uuid, m_sync_point_handler, ctx);
+ request->send();
+}
+
+template <typename I>
+void ImageSync<I>::handle_create_sync_point(int r) {
+ dout(10) << ": r=" << r << dendl;
+
+ if (r < 0) {
+ derr << ": failed to create sync point: " << cpp_strerror(r)
+ << dendl;
+ finish(r);
+ return;
+ }
+
+ send_copy_image();
+}
+
+template <typename I>
+void ImageSync<I>::send_copy_image() {
+ librados::snap_t snap_id_start = 0;
+ librados::snap_t snap_id_end;
+ librbd::deep_copy::ObjectNumber object_number;
+ int r = 0;
+
+ m_snap_seqs_copy = m_sync_point_handler->get_snap_seqs();
+ m_sync_points_copy = m_sync_point_handler->get_sync_points();
+ ceph_assert(!m_sync_points_copy.empty());
+ auto &sync_point = m_sync_points_copy.front();
+
+ {
+ std::shared_lock image_locker{m_remote_image_ctx->image_lock};
+ snap_id_end = m_remote_image_ctx->get_snap_id(
+ cls::rbd::UserSnapshotNamespace(), sync_point.snap_name);
+ if (snap_id_end == CEPH_NOSNAP) {
+ derr << ": failed to locate snapshot: " << sync_point.snap_name << dendl;
+ r = -ENOENT;
+ } else if (!sync_point.from_snap_name.empty()) {
+ snap_id_start = m_remote_image_ctx->get_snap_id(
+ cls::rbd::UserSnapshotNamespace(), sync_point.from_snap_name);
+ if (snap_id_start == CEPH_NOSNAP) {
+ derr << ": failed to locate from snapshot: "
+ << sync_point.from_snap_name << dendl;
+ r = -ENOENT;
+ }
+ }
+ object_number = sync_point.object_number;
+ }
+ if (r < 0) {
+ finish(r);
+ return;
+ }
+
+ m_lock.lock();
+ if (m_canceled) {
+ m_lock.unlock();
+ finish(-ECANCELED);
+ return;
+ }
+
+ dout(10) << dendl;
+
+ Context *ctx = create_context_callback<
+ ImageSync<I>, &ImageSync<I>::handle_copy_image>(this);
+ m_image_copy_prog_handler = new ImageCopyProgressHandler(this);
+ m_image_copy_request = librbd::DeepCopyRequest<I>::create(
+ m_remote_image_ctx, m_local_image_ctx, snap_id_start, snap_id_end,
+ 0, false, object_number, m_threads->work_queue, &m_snap_seqs_copy,
+ m_image_copy_prog_handler, ctx);
+ m_image_copy_request->get();
+ m_lock.unlock();
+
+ update_progress("COPY_IMAGE");
+
+ m_image_copy_request->send();
+}
+
+template <typename I>
+void ImageSync<I>::handle_copy_image(int r) {
+ dout(10) << ": r=" << r << dendl;
+
+ {
+ std::scoped_lock locker{m_threads->timer_lock, m_lock};
+ m_image_copy_request->put();
+ m_image_copy_request = nullptr;
+ delete m_image_copy_prog_handler;
+ m_image_copy_prog_handler = nullptr;
+ if (r == 0 && m_canceled) {
+ r = -ECANCELED;
+ }
+
+ if (m_update_sync_ctx != nullptr) {
+ m_threads->timer->cancel_event(m_update_sync_ctx);
+ m_update_sync_ctx = nullptr;
+ }
+
+ if (m_updating_sync_point) {
+ m_ret_val = r;
+ return;
+ }
+ }
+
+ if (r == -ECANCELED) {
+ dout(10) << ": image copy canceled" << dendl;
+ finish(r);
+ return;
+ } else if (r < 0) {
+ derr << ": failed to copy image: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ send_flush_sync_point();
+}
+
+template <typename I>
+void ImageSync<I>::handle_copy_image_update_progress(uint64_t object_no,
+ uint64_t object_count) {
+ int percent = 100 * object_no / object_count;
+ update_progress("COPY_IMAGE " + stringify(percent) + "%");
+
+ std::lock_guard locker{m_lock};
+ m_image_copy_object_no = object_no;
+ m_image_copy_object_count = object_count;
+
+ if (m_update_sync_ctx == nullptr && !m_updating_sync_point) {
+ send_update_sync_point();
+ }
+}
+
+template <typename I>
+void ImageSync<I>::send_update_sync_point() {
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ m_update_sync_ctx = nullptr;
+
+ if (m_canceled) {
+ return;
+ }
+
+ ceph_assert(!m_sync_points_copy.empty());
+ auto sync_point = &m_sync_points_copy.front();
+
+ if (sync_point->object_number &&
+ (m_image_copy_object_no - 1) == sync_point->object_number.get()) {
+ // update sync point did not progress since last sync
+ return;
+ }
+
+ m_updating_sync_point = true;
+
+ if (m_image_copy_object_no > 0) {
+ sync_point->object_number = m_image_copy_object_no - 1;
+ }
+
+ auto ctx = create_context_callback<
+ ImageSync<I>, &ImageSync<I>::handle_update_sync_point>(this);
+ m_sync_point_handler->update_sync_points(m_snap_seqs_copy,
+ m_sync_points_copy, false, ctx);
+}
+
+template <typename I>
+void ImageSync<I>::handle_update_sync_point(int r) {
+ CephContext *cct = m_local_image_ctx->cct;
+ ldout(cct, 20) << ": r=" << r << dendl;
+
+ {
+ std::scoped_lock locker{m_threads->timer_lock, m_lock};
+ m_updating_sync_point = false;
+
+ if (m_image_copy_request != nullptr) {
+ m_update_sync_ctx = new LambdaContext(
+ [this](int r) {
+ std::lock_guard locker{m_lock};
+ this->send_update_sync_point();
+ });
+ m_threads->timer->add_event_after(
+ m_update_sync_point_interval, m_update_sync_ctx);
+ return;
+ }
+ }
+
+ send_flush_sync_point();
+}
+
+template <typename I>
+void ImageSync<I>::send_flush_sync_point() {
+ if (m_ret_val < 0) {
+ finish(m_ret_val);
+ return;
+ }
+
+ update_progress("FLUSH_SYNC_POINT");
+
+ ceph_assert(!m_sync_points_copy.empty());
+ auto sync_point = &m_sync_points_copy.front();
+
+ if (m_image_copy_object_no > 0) {
+ sync_point->object_number = m_image_copy_object_no - 1;
+ } else {
+ sync_point->object_number = boost::none;
+ }
+
+ auto ctx = create_context_callback<
+ ImageSync<I>, &ImageSync<I>::handle_flush_sync_point>(this);
+ m_sync_point_handler->update_sync_points(m_snap_seqs_copy,
+ m_sync_points_copy, false, ctx);
+}
+
+template <typename I>
+void ImageSync<I>::handle_flush_sync_point(int r) {
+ dout(10) << ": r=" << r << dendl;
+
+ if (r < 0) {
+ derr << ": failed to update client data: " << cpp_strerror(r)
+ << dendl;
+ finish(r);
+ return;
+ }
+
+ send_prune_sync_points();
+}
+
+template <typename I>
+void ImageSync<I>::send_prune_sync_points() {
+ dout(10) << dendl;
+
+ update_progress("PRUNE_SYNC_POINTS");
+
+ Context *ctx = create_context_callback<
+ ImageSync<I>, &ImageSync<I>::handle_prune_sync_points>(this);
+ SyncPointPruneRequest<I> *request = SyncPointPruneRequest<I>::create(
+ m_remote_image_ctx, true, m_sync_point_handler, ctx);
+ request->send();
+}
+
+template <typename I>
+void ImageSync<I>::handle_prune_sync_points(int r) {
+ dout(10) << ": r=" << r << dendl;
+
+ if (r < 0) {
+ derr << ": failed to prune sync point: "
+ << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ if (!m_sync_point_handler->get_sync_points().empty()) {
+ send_copy_image();
+ return;
+ }
+
+ finish(0);
+}
+
+template <typename I>
+void ImageSync<I>::update_progress(const std::string &description) {
+ dout(20) << ": " << description << dendl;
+
+ if (m_progress_ctx) {
+ m_progress_ctx->update_progress("IMAGE_SYNC/" + description);
+ }
+}
+
+template <typename I>
+void ImageSync<I>::finish(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ m_instance_watcher->notify_sync_complete(m_local_image_ctx->id);
+ CancelableRequest::finish(r);
+}
+
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::ImageSync<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/ImageSync.h b/src/tools/rbd_mirror/ImageSync.h
new file mode 100644
index 000000000..b3389ce18
--- /dev/null
+++ b/src/tools/rbd_mirror/ImageSync.h
@@ -0,0 +1,151 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_SYNC_H
+#define RBD_MIRROR_IMAGE_SYNC_H
+
+#include "include/int_types.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/Types.h"
+#include "common/ceph_mutex.h"
+#include "tools/rbd_mirror/CancelableRequest.h"
+#include "tools/rbd_mirror/image_sync/Types.h"
+
+class Context;
+namespace journal { class Journaler; }
+namespace librbd { template <typename> class DeepCopyRequest; }
+
+namespace rbd {
+namespace mirror {
+
+class ProgressContext;
+template <typename> class InstanceWatcher;
+template <typename> class Threads;
+
+namespace image_sync { struct SyncPointHandler; }
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class ImageSync : public CancelableRequest {
+public:
+ static ImageSync* create(
+ Threads<ImageCtxT>* threads,
+ ImageCtxT *local_image_ctx,
+ ImageCtxT *remote_image_ctx,
+ const std::string &local_mirror_uuid,
+ image_sync::SyncPointHandler* sync_point_handler,
+ InstanceWatcher<ImageCtxT> *instance_watcher,
+ ProgressContext *progress_ctx,
+ Context *on_finish) {
+ return new ImageSync(threads, local_image_ctx, remote_image_ctx,
+ local_mirror_uuid, sync_point_handler,
+ instance_watcher, progress_ctx, on_finish);
+ }
+
+ ImageSync(
+ Threads<ImageCtxT>* threads,
+ ImageCtxT *local_image_ctx,
+ ImageCtxT *remote_image_ctx,
+ const std::string &local_mirror_uuid,
+ image_sync::SyncPointHandler* sync_point_handler,
+ InstanceWatcher<ImageCtxT> *instance_watcher,
+ ProgressContext *progress_ctx,
+ Context *on_finish);
+ ~ImageSync() override;
+
+ void send() override;
+ void cancel() override;
+
+protected:
+ void finish(int r) override;
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * NOTIFY_SYNC_REQUEST
+ * |
+ * v
+ * PRUNE_CATCH_UP_SYNC_POINT
+ * |
+ * v
+ * CREATE_SYNC_POINT (skip if already exists and
+ * | not disconnected)
+ * v
+ * COPY_IMAGE . . . . . . . . . . . . . .
+ * | .
+ * v .
+ * FLUSH_SYNC_POINT .
+ * | . (image sync canceled)
+ * v .
+ * PRUNE_SYNC_POINTS .
+ * | .
+ * v .
+ * <finish> < . . . . . . . . . . . . . .
+ *
+ * @endverbatim
+ */
+
+ class ImageCopyProgressHandler;
+
+ Threads<ImageCtxT>* m_threads;
+ ImageCtxT *m_local_image_ctx;
+ ImageCtxT *m_remote_image_ctx;
+ std::string m_local_mirror_uuid;
+ image_sync::SyncPointHandler* m_sync_point_handler;
+ InstanceWatcher<ImageCtxT> *m_instance_watcher;
+ ProgressContext *m_progress_ctx;
+
+ ceph::mutex m_lock;
+ bool m_canceled = false;
+
+ librbd::DeepCopyRequest<ImageCtxT> *m_image_copy_request = nullptr;
+ ImageCopyProgressHandler *m_image_copy_prog_handler = nullptr;
+
+ bool m_updating_sync_point = false;
+ Context *m_update_sync_ctx = nullptr;
+ double m_update_sync_point_interval;
+ uint64_t m_image_copy_object_no = 0;
+ uint64_t m_image_copy_object_count = 0;
+
+ librbd::SnapSeqs m_snap_seqs_copy;
+ image_sync::SyncPoints m_sync_points_copy;
+
+ int m_ret_val = 0;
+
+ void send_notify_sync_request();
+ void handle_notify_sync_request(int r);
+
+ void send_prune_catch_up_sync_point();
+ void handle_prune_catch_up_sync_point(int r);
+
+ void send_create_sync_point();
+ void handle_create_sync_point(int r);
+
+ void send_update_max_object_count();
+ void handle_update_max_object_count(int r);
+
+ void send_copy_image();
+ void handle_copy_image(int r);
+ void handle_copy_image_update_progress(uint64_t object_no,
+ uint64_t object_count);
+ void send_update_sync_point();
+ void handle_update_sync_point(int r);
+
+ void send_flush_sync_point();
+ void handle_flush_sync_point(int r);
+
+ void send_prune_sync_points();
+ void handle_prune_sync_points(int r);
+
+ void update_progress(const std::string &description);
+};
+
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::ImageSync<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_SYNC_H
diff --git a/src/tools/rbd_mirror/InstanceReplayer.cc b/src/tools/rbd_mirror/InstanceReplayer.cc
new file mode 100644
index 000000000..e625bf365
--- /dev/null
+++ b/src/tools/rbd_mirror/InstanceReplayer.cc
@@ -0,0 +1,543 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "include/stringify.h"
+#include "common/Cond.h"
+#include "common/Timer.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "librbd/Utils.h"
+#include "librbd/asio/ContextWQ.h"
+#include "ImageReplayer.h"
+#include "InstanceReplayer.h"
+#include "ServiceDaemon.h"
+#include "Threads.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::InstanceReplayer: " \
+ << this << " " << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+
+namespace {
+
+const std::string SERVICE_DAEMON_ASSIGNED_COUNT_KEY("image_assigned_count");
+const std::string SERVICE_DAEMON_WARNING_COUNT_KEY("image_warning_count");
+const std::string SERVICE_DAEMON_ERROR_COUNT_KEY("image_error_count");
+
+} // anonymous namespace
+
+using librbd::util::create_async_context_callback;
+using librbd::util::create_context_callback;
+
+template <typename I>
+InstanceReplayer<I>::InstanceReplayer(
+ librados::IoCtx &local_io_ctx, const std::string &local_mirror_uuid,
+ Threads<I> *threads, ServiceDaemon<I>* service_daemon,
+ MirrorStatusUpdater<I>* local_status_updater,
+ journal::CacheManagerHandler *cache_manager_handler,
+ PoolMetaCache* pool_meta_cache)
+ : m_local_io_ctx(local_io_ctx), m_local_mirror_uuid(local_mirror_uuid),
+ m_threads(threads), m_service_daemon(service_daemon),
+ m_local_status_updater(local_status_updater),
+ m_cache_manager_handler(cache_manager_handler),
+ m_pool_meta_cache(pool_meta_cache),
+ m_lock(ceph::make_mutex("rbd::mirror::InstanceReplayer " +
+ stringify(local_io_ctx.get_id()))) {
+}
+
+template <typename I>
+InstanceReplayer<I>::~InstanceReplayer() {
+ ceph_assert(m_image_state_check_task == nullptr);
+ ceph_assert(m_async_op_tracker.empty());
+ ceph_assert(m_image_replayers.empty());
+}
+
+template <typename I>
+bool InstanceReplayer<I>::is_blocklisted() const {
+ std::lock_guard locker{m_lock};
+ return m_blocklisted;
+}
+
+template <typename I>
+int InstanceReplayer<I>::init() {
+ C_SaferCond init_ctx;
+ init(&init_ctx);
+ return init_ctx.wait();
+}
+
+template <typename I>
+void InstanceReplayer<I>::init(Context *on_finish) {
+ dout(10) << dendl;
+
+ Context *ctx = new LambdaContext(
+ [this, on_finish] (int r) {
+ {
+ std::lock_guard timer_locker{m_threads->timer_lock};
+ schedule_image_state_check_task();
+ }
+ on_finish->complete(0);
+ });
+
+ m_threads->work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+void InstanceReplayer<I>::shut_down() {
+ C_SaferCond shut_down_ctx;
+ shut_down(&shut_down_ctx);
+ int r = shut_down_ctx.wait();
+ ceph_assert(r == 0);
+}
+
+template <typename I>
+void InstanceReplayer<I>::shut_down(Context *on_finish) {
+ dout(10) << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ ceph_assert(m_on_shut_down == nullptr);
+ m_on_shut_down = on_finish;
+
+ Context *ctx = new LambdaContext(
+ [this] (int r) {
+ cancel_image_state_check_task();
+ wait_for_ops();
+ });
+
+ m_threads->work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+void InstanceReplayer<I>::add_peer(const Peer<I>& peer) {
+ dout(10) << "peer=" << peer << dendl;
+
+ std::lock_guard locker{m_lock};
+ auto result = m_peers.insert(peer).second;
+ ceph_assert(result);
+}
+
+template <typename I>
+void InstanceReplayer<I>::release_all(Context *on_finish) {
+ dout(10) << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ C_Gather *gather_ctx = new C_Gather(g_ceph_context, on_finish);
+ for (auto it = m_image_replayers.begin(); it != m_image_replayers.end();
+ it = m_image_replayers.erase(it)) {
+ auto image_replayer = it->second;
+ auto ctx = gather_ctx->new_sub();
+ ctx = new LambdaContext(
+ [image_replayer, ctx] (int r) {
+ image_replayer->destroy();
+ ctx->complete(0);
+ });
+ stop_image_replayer(image_replayer, ctx);
+ }
+ gather_ctx->activate();
+}
+
+template <typename I>
+void InstanceReplayer<I>::acquire_image(InstanceWatcher<I> *instance_watcher,
+ const std::string &global_image_id,
+ Context *on_finish) {
+ dout(10) << "global_image_id=" << global_image_id << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ ceph_assert(m_on_shut_down == nullptr);
+
+ auto it = m_image_replayers.find(global_image_id);
+ if (it == m_image_replayers.end()) {
+ auto image_replayer = ImageReplayer<I>::create(
+ m_local_io_ctx, m_local_mirror_uuid, global_image_id,
+ m_threads, instance_watcher, m_local_status_updater,
+ m_cache_manager_handler, m_pool_meta_cache);
+
+ dout(10) << global_image_id << ": creating replayer " << image_replayer
+ << dendl;
+
+ it = m_image_replayers.insert(std::make_pair(global_image_id,
+ image_replayer)).first;
+
+ // TODO only a single peer is currently supported
+ ceph_assert(m_peers.size() == 1);
+ auto peer = *m_peers.begin();
+ image_replayer->add_peer(peer);
+ start_image_replayer(image_replayer);
+ } else {
+ // A duplicate acquire notification implies (1) connection hiccup or
+ // (2) new leader election. For the second case, restart the replayer to
+ // detect if the image has been deleted while the leader was offline
+ auto& image_replayer = it->second;
+ image_replayer->set_finished(false);
+ image_replayer->restart(new C_TrackedOp(m_async_op_tracker, nullptr));
+ }
+
+ m_threads->work_queue->queue(on_finish, 0);
+}
+
+template <typename I>
+void InstanceReplayer<I>::release_image(const std::string &global_image_id,
+ Context *on_finish) {
+ dout(10) << "global_image_id=" << global_image_id << dendl;
+
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_on_shut_down == nullptr);
+
+ auto it = m_image_replayers.find(global_image_id);
+ if (it == m_image_replayers.end()) {
+ dout(5) << global_image_id << ": not found" << dendl;
+ m_threads->work_queue->queue(on_finish, 0);
+ return;
+ }
+
+ auto image_replayer = it->second;
+ m_image_replayers.erase(it);
+
+ on_finish = new LambdaContext(
+ [image_replayer, on_finish] (int r) {
+ image_replayer->destroy();
+ on_finish->complete(0);
+ });
+ stop_image_replayer(image_replayer, on_finish);
+}
+
+template <typename I>
+void InstanceReplayer<I>::remove_peer_image(const std::string &global_image_id,
+ const std::string &peer_mirror_uuid,
+ Context *on_finish) {
+ dout(10) << "global_image_id=" << global_image_id << ", "
+ << "peer_mirror_uuid=" << peer_mirror_uuid << dendl;
+
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_on_shut_down == nullptr);
+
+ auto it = m_image_replayers.find(global_image_id);
+ if (it != m_image_replayers.end()) {
+ // TODO only a single peer is currently supported, therefore
+ // we can just interrupt the current image replayer and
+ // it will eventually detect that the peer image is missing and
+ // determine if a delete propagation is required.
+ auto image_replayer = it->second;
+ image_replayer->restart(new C_TrackedOp(m_async_op_tracker, nullptr));
+ }
+ m_threads->work_queue->queue(on_finish, 0);
+}
+
+template <typename I>
+void InstanceReplayer<I>::print_status(Formatter *f) {
+ dout(10) << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ f->open_array_section("image_replayers");
+ for (auto &kv : m_image_replayers) {
+ auto &image_replayer = kv.second;
+ image_replayer->print_status(f);
+ }
+ f->close_section();
+}
+
+template <typename I>
+void InstanceReplayer<I>::start()
+{
+ dout(10) << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ m_manual_stop = false;
+
+ auto cct = static_cast<CephContext *>(m_local_io_ctx.cct());
+ auto gather_ctx = new C_Gather(
+ cct, new C_TrackedOp(m_async_op_tracker, nullptr));
+ for (auto &kv : m_image_replayers) {
+ auto &image_replayer = kv.second;
+ image_replayer->start(gather_ctx->new_sub(), true);
+ }
+
+ gather_ctx->activate();
+}
+
+template <typename I>
+void InstanceReplayer<I>::stop()
+{
+ stop(nullptr);
+}
+
+template <typename I>
+void InstanceReplayer<I>::stop(Context *on_finish)
+{
+ dout(10) << dendl;
+
+ if (on_finish == nullptr) {
+ on_finish = new C_TrackedOp(m_async_op_tracker, on_finish);
+ } else {
+ on_finish = new LambdaContext(
+ [this, on_finish] (int r) {
+ m_async_op_tracker.wait_for_ops(on_finish);
+ });
+ }
+
+ auto cct = static_cast<CephContext *>(m_local_io_ctx.cct());
+ auto gather_ctx = new C_Gather(cct, on_finish);
+ {
+ std::lock_guard locker{m_lock};
+
+ m_manual_stop = true;
+
+ for (auto &kv : m_image_replayers) {
+ auto &image_replayer = kv.second;
+ image_replayer->stop(gather_ctx->new_sub(), true);
+ }
+ }
+
+ gather_ctx->activate();
+}
+
+template <typename I>
+void InstanceReplayer<I>::restart()
+{
+ dout(10) << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ m_manual_stop = false;
+
+ for (auto &kv : m_image_replayers) {
+ auto &image_replayer = kv.second;
+ image_replayer->restart(new C_TrackedOp(m_async_op_tracker, nullptr));
+ }
+}
+
+template <typename I>
+void InstanceReplayer<I>::flush()
+{
+ dout(10) << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ for (auto &kv : m_image_replayers) {
+ auto &image_replayer = kv.second;
+ image_replayer->flush();
+ }
+}
+
+template <typename I>
+void InstanceReplayer<I>::start_image_replayer(
+ ImageReplayer<I> *image_replayer) {
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ std::string global_image_id = image_replayer->get_global_image_id();
+ if (!image_replayer->is_stopped()) {
+ return;
+ } else if (image_replayer->is_blocklisted()) {
+ derr << "global_image_id=" << global_image_id << ": blocklisted detected "
+ << "during image replay" << dendl;
+ m_blocklisted = true;
+ return;
+ } else if (image_replayer->is_finished()) {
+ // TODO temporary until policy integrated
+ dout(5) << "removing image replayer for global_image_id="
+ << global_image_id << dendl;
+ m_image_replayers.erase(image_replayer->get_global_image_id());
+ image_replayer->destroy();
+ return;
+ } else if (m_manual_stop) {
+ return;
+ }
+
+ dout(10) << "global_image_id=" << global_image_id << dendl;
+ image_replayer->start(new C_TrackedOp(m_async_op_tracker, nullptr), false);
+}
+
+template <typename I>
+void InstanceReplayer<I>::queue_start_image_replayers() {
+ dout(10) << dendl;
+
+ Context *ctx = create_context_callback<
+ InstanceReplayer, &InstanceReplayer<I>::start_image_replayers>(this);
+ m_async_op_tracker.start_op();
+ m_threads->work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+void InstanceReplayer<I>::start_image_replayers(int r) {
+ dout(10) << dendl;
+
+ std::lock_guard locker{m_lock};
+ if (m_on_shut_down != nullptr) {
+ m_async_op_tracker.finish_op();
+ return;
+ }
+
+ uint64_t image_count = 0;
+ uint64_t warning_count = 0;
+ uint64_t error_count = 0;
+ for (auto it = m_image_replayers.begin();
+ it != m_image_replayers.end();) {
+ auto current_it(it);
+ ++it;
+
+ ++image_count;
+ auto health_state = current_it->second->get_health_state();
+ if (health_state == image_replayer::HEALTH_STATE_WARNING) {
+ ++warning_count;
+ } else if (health_state == image_replayer::HEALTH_STATE_ERROR) {
+ ++error_count;
+ }
+
+ start_image_replayer(current_it->second);
+ }
+
+ m_service_daemon->add_or_update_namespace_attribute(
+ m_local_io_ctx.get_id(), m_local_io_ctx.get_namespace(),
+ SERVICE_DAEMON_ASSIGNED_COUNT_KEY, image_count);
+ m_service_daemon->add_or_update_namespace_attribute(
+ m_local_io_ctx.get_id(), m_local_io_ctx.get_namespace(),
+ SERVICE_DAEMON_WARNING_COUNT_KEY, warning_count);
+ m_service_daemon->add_or_update_namespace_attribute(
+ m_local_io_ctx.get_id(), m_local_io_ctx.get_namespace(),
+ SERVICE_DAEMON_ERROR_COUNT_KEY, error_count);
+
+ m_async_op_tracker.finish_op();
+}
+
+template <typename I>
+void InstanceReplayer<I>::stop_image_replayer(ImageReplayer<I> *image_replayer,
+ Context *on_finish) {
+ dout(10) << image_replayer << " global_image_id="
+ << image_replayer->get_global_image_id() << ", on_finish="
+ << on_finish << dendl;
+
+ if (image_replayer->is_stopped()) {
+ m_threads->work_queue->queue(on_finish, 0);
+ return;
+ }
+
+ m_async_op_tracker.start_op();
+ Context *ctx = create_async_context_callback(
+ m_threads->work_queue, new LambdaContext(
+ [this, image_replayer, on_finish] (int r) {
+ stop_image_replayer(image_replayer, on_finish);
+ m_async_op_tracker.finish_op();
+ }));
+
+ if (image_replayer->is_running()) {
+ image_replayer->stop(ctx, false);
+ } else {
+ int after = 1;
+ dout(10) << "scheduling image replayer " << image_replayer << " stop after "
+ << after << " sec (task " << ctx << ")" << dendl;
+ ctx = new LambdaContext(
+ [this, after, ctx] (int r) {
+ std::lock_guard timer_locker{m_threads->timer_lock};
+ m_threads->timer->add_event_after(after, ctx);
+ });
+ m_threads->work_queue->queue(ctx, 0);
+ }
+}
+
+template <typename I>
+void InstanceReplayer<I>::wait_for_ops() {
+ dout(10) << dendl;
+
+ Context *ctx = create_context_callback<
+ InstanceReplayer, &InstanceReplayer<I>::handle_wait_for_ops>(this);
+
+ m_async_op_tracker.wait_for_ops(ctx);
+}
+
+template <typename I>
+void InstanceReplayer<I>::handle_wait_for_ops(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ ceph_assert(r == 0);
+
+ std::lock_guard locker{m_lock};
+ stop_image_replayers();
+}
+
+template <typename I>
+void InstanceReplayer<I>::stop_image_replayers() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ Context *ctx = create_async_context_callback(
+ m_threads->work_queue, create_context_callback<InstanceReplayer<I>,
+ &InstanceReplayer<I>::handle_stop_image_replayers>(this));
+
+ C_Gather *gather_ctx = new C_Gather(g_ceph_context, ctx);
+ for (auto &it : m_image_replayers) {
+ stop_image_replayer(it.second, gather_ctx->new_sub());
+ }
+ gather_ctx->activate();
+}
+
+template <typename I>
+void InstanceReplayer<I>::handle_stop_image_replayers(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ ceph_assert(r == 0);
+
+ Context *on_finish = nullptr;
+ {
+ std::lock_guard locker{m_lock};
+
+ for (auto &it : m_image_replayers) {
+ ceph_assert(it.second->is_stopped());
+ it.second->destroy();
+ }
+ m_image_replayers.clear();
+
+ ceph_assert(m_on_shut_down != nullptr);
+ std::swap(on_finish, m_on_shut_down);
+ }
+ on_finish->complete(r);
+}
+
+template <typename I>
+void InstanceReplayer<I>::cancel_image_state_check_task() {
+ std::lock_guard timer_locker{m_threads->timer_lock};
+
+ if (m_image_state_check_task == nullptr) {
+ return;
+ }
+
+ dout(10) << m_image_state_check_task << dendl;
+ bool canceled = m_threads->timer->cancel_event(m_image_state_check_task);
+ ceph_assert(canceled);
+ m_image_state_check_task = nullptr;
+}
+
+template <typename I>
+void InstanceReplayer<I>::schedule_image_state_check_task() {
+ ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock));
+ ceph_assert(m_image_state_check_task == nullptr);
+
+ m_image_state_check_task = new LambdaContext(
+ [this](int r) {
+ ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock));
+ m_image_state_check_task = nullptr;
+ schedule_image_state_check_task();
+ queue_start_image_replayers();
+ });
+
+ auto cct = static_cast<CephContext *>(m_local_io_ctx.cct());
+ int after = cct->_conf.get_val<uint64_t>(
+ "rbd_mirror_image_state_check_interval");
+
+ dout(10) << "scheduling image state check after " << after << " sec (task "
+ << m_image_state_check_task << ")" << dendl;
+ m_threads->timer->add_event_after(after, m_image_state_check_task);
+}
+
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::InstanceReplayer<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/InstanceReplayer.h b/src/tools/rbd_mirror/InstanceReplayer.h
new file mode 100644
index 000000000..7a5c79723
--- /dev/null
+++ b/src/tools/rbd_mirror/InstanceReplayer.h
@@ -0,0 +1,138 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_INSTANCE_REPLAYER_H
+#define RBD_MIRROR_INSTANCE_REPLAYER_H
+
+#include <map>
+#include <sstream>
+
+#include "common/AsyncOpTracker.h"
+#include "common/Formatter.h"
+#include "common/ceph_mutex.h"
+#include "tools/rbd_mirror/Types.h"
+
+namespace journal { struct CacheManagerHandler; }
+
+namespace librbd { class ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+
+template <typename> class ImageReplayer;
+template <typename> class InstanceWatcher;
+template <typename> class MirrorStatusUpdater;
+struct PoolMetaCache;
+template <typename> class ServiceDaemon;
+template <typename> struct Threads;
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class InstanceReplayer {
+public:
+ static InstanceReplayer* create(
+ librados::IoCtx &local_io_ctx, const std::string &local_mirror_uuid,
+ Threads<ImageCtxT> *threads, ServiceDaemon<ImageCtxT> *service_daemon,
+ MirrorStatusUpdater<ImageCtxT>* local_status_updater,
+ journal::CacheManagerHandler *cache_manager_handler,
+ PoolMetaCache* pool_meta_cache) {
+ return new InstanceReplayer(local_io_ctx, local_mirror_uuid, threads,
+ service_daemon, local_status_updater,
+ cache_manager_handler, pool_meta_cache);
+ }
+ void destroy() {
+ delete this;
+ }
+
+ InstanceReplayer(librados::IoCtx &local_io_ctx,
+ const std::string &local_mirror_uuid,
+ Threads<ImageCtxT> *threads,
+ ServiceDaemon<ImageCtxT> *service_daemon,
+ MirrorStatusUpdater<ImageCtxT>* local_status_updater,
+ journal::CacheManagerHandler *cache_manager_handler,
+ PoolMetaCache* pool_meta_cache);
+ ~InstanceReplayer();
+
+ bool is_blocklisted() const;
+
+ int init();
+ void shut_down();
+
+ void init(Context *on_finish);
+ void shut_down(Context *on_finish);
+
+ void add_peer(const Peer<ImageCtxT>& peer);
+
+ void acquire_image(InstanceWatcher<ImageCtxT> *instance_watcher,
+ const std::string &global_image_id, Context *on_finish);
+ void release_image(const std::string &global_image_id, Context *on_finish);
+ void remove_peer_image(const std::string &global_image_id,
+ const std::string &peer_mirror_uuid,
+ Context *on_finish);
+
+ void release_all(Context *on_finish);
+
+ void print_status(Formatter *f);
+ void start();
+ void stop();
+ void restart();
+ void flush();
+
+ void stop(Context *on_finish);
+
+private:
+ /**
+ * @verbatim
+ *
+ * <uninitialized> <-------------------\
+ * | (init) | (repeat for each
+ * v STOP_IMAGE_REPLAYER ---\ image replayer)
+ * SCHEDULE_IMAGE_STATE_CHECK_TASK ^ ^ |
+ * | | | |
+ * v (shut_down) | \---------/
+ * <initialized> -----------------> WAIT_FOR_OPS
+ *
+ * @endverbatim
+ */
+
+ typedef std::set<Peer<ImageCtxT>> Peers;
+
+ librados::IoCtx &m_local_io_ctx;
+ std::string m_local_mirror_uuid;
+ Threads<ImageCtxT> *m_threads;
+ ServiceDaemon<ImageCtxT> *m_service_daemon;
+ MirrorStatusUpdater<ImageCtxT>* m_local_status_updater;
+ journal::CacheManagerHandler *m_cache_manager_handler;
+ PoolMetaCache* m_pool_meta_cache;
+
+ mutable ceph::mutex m_lock;
+ AsyncOpTracker m_async_op_tracker;
+ std::map<std::string, ImageReplayer<ImageCtxT> *> m_image_replayers;
+ Peers m_peers;
+ Context *m_image_state_check_task = nullptr;
+ Context *m_on_shut_down = nullptr;
+ bool m_manual_stop = false;
+ bool m_blocklisted = false;
+
+ void wait_for_ops();
+ void handle_wait_for_ops(int r);
+
+ void start_image_replayer(ImageReplayer<ImageCtxT> *image_replayer);
+ void queue_start_image_replayers();
+ void start_image_replayers(int r);
+
+ void stop_image_replayer(ImageReplayer<ImageCtxT> *image_replayer,
+ Context *on_finish);
+
+ void stop_image_replayers();
+ void handle_stop_image_replayers(int r);
+
+ void schedule_image_state_check_task();
+ void cancel_image_state_check_task();
+};
+
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::InstanceReplayer<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_INSTANCE_REPLAYER_H
diff --git a/src/tools/rbd_mirror/InstanceWatcher.cc b/src/tools/rbd_mirror/InstanceWatcher.cc
new file mode 100644
index 000000000..7b531064d
--- /dev/null
+++ b/src/tools/rbd_mirror/InstanceWatcher.cc
@@ -0,0 +1,1290 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "InstanceWatcher.h"
+#include "include/stringify.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "librbd/AsioEngine.h"
+#include "librbd/ManagedLock.h"
+#include "librbd/Utils.h"
+#include "librbd/asio/ContextWQ.h"
+#include "InstanceReplayer.h"
+#include "Throttler.h"
+#include "common/Cond.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::InstanceWatcher: "
+
+namespace rbd {
+namespace mirror {
+
+using namespace instance_watcher;
+
+using librbd::util::create_async_context_callback;
+using librbd::util::create_context_callback;
+using librbd::util::create_rados_callback;
+using librbd::util::unique_lock_name;
+
+namespace {
+
+struct C_GetInstances : public Context {
+ std::vector<std::string> *instance_ids;
+ Context *on_finish;
+ bufferlist out_bl;
+
+ C_GetInstances(std::vector<std::string> *instance_ids, Context *on_finish)
+ : instance_ids(instance_ids), on_finish(on_finish) {
+ }
+
+ void finish(int r) override {
+ dout(10) << "C_GetInstances: " << this << " " << __func__ << ": r=" << r
+ << dendl;
+
+ if (r == 0) {
+ auto it = out_bl.cbegin();
+ r = librbd::cls_client::mirror_instances_list_finish(&it, instance_ids);
+ } else if (r == -ENOENT) {
+ r = 0;
+ }
+ on_finish->complete(r);
+ }
+};
+
+template <typename I>
+struct C_RemoveInstanceRequest : public Context {
+ InstanceWatcher<I> instance_watcher;
+ Context *on_finish;
+
+ C_RemoveInstanceRequest(librados::IoCtx &io_ctx,
+ librbd::AsioEngine& asio_engine,
+ const std::string &instance_id, Context *on_finish)
+ : instance_watcher(io_ctx, asio_engine, nullptr, nullptr, instance_id),
+ on_finish(on_finish) {
+ }
+
+ void send() {
+ dout(10) << "C_RemoveInstanceRequest: " << this << " " << __func__ << dendl;
+
+ instance_watcher.remove(this);
+ }
+
+ void finish(int r) override {
+ dout(10) << "C_RemoveInstanceRequest: " << this << " " << __func__ << ": r="
+ << r << dendl;
+ ceph_assert(r == 0);
+
+ on_finish->complete(r);
+ }
+};
+
+} // anonymous namespace
+
+template <typename I>
+struct InstanceWatcher<I>::C_NotifyInstanceRequest : public Context {
+ InstanceWatcher<I> *instance_watcher;
+ std::string instance_id;
+ uint64_t request_id;
+ bufferlist bl;
+ Context *on_finish;
+ bool send_to_leader;
+ std::unique_ptr<librbd::watcher::Notifier> notifier;
+ librbd::watcher::NotifyResponse response;
+ bool canceling = false;
+
+ C_NotifyInstanceRequest(InstanceWatcher<I> *instance_watcher,
+ const std::string &instance_id, uint64_t request_id,
+ bufferlist &&bl, Context *on_finish)
+ : instance_watcher(instance_watcher), instance_id(instance_id),
+ request_id(request_id), bl(bl), on_finish(on_finish),
+ send_to_leader(instance_id.empty()) {
+ dout(10) << "C_NotifyInstanceRequest: " << this << " " << __func__
+ << ": instance_watcher=" << instance_watcher << ", instance_id="
+ << instance_id << ", request_id=" << request_id << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(instance_watcher->m_lock));
+
+ if (!send_to_leader) {
+ ceph_assert((!instance_id.empty()));
+ notifier.reset(new librbd::watcher::Notifier(
+ instance_watcher->m_work_queue,
+ instance_watcher->m_ioctx,
+ RBD_MIRROR_INSTANCE_PREFIX + instance_id));
+ }
+
+ instance_watcher->m_notify_op_tracker.start_op();
+ auto result = instance_watcher->m_notify_ops.insert(
+ std::make_pair(instance_id, this)).second;
+ ceph_assert(result);
+ }
+
+ void send() {
+ dout(10) << "C_NotifyInstanceRequest: " << this << " " << __func__ << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(instance_watcher->m_lock));
+
+ if (canceling) {
+ dout(10) << "C_NotifyInstanceRequest: " << this << " " << __func__
+ << ": canceling" << dendl;
+ instance_watcher->m_work_queue->queue(this, -ECANCELED);
+ return;
+ }
+
+ if (send_to_leader) {
+ if (instance_watcher->m_leader_instance_id.empty()) {
+ dout(10) << "C_NotifyInstanceRequest: " << this << " " << __func__
+ << ": suspending" << dendl;
+ instance_watcher->suspend_notify_request(this);
+ return;
+ }
+
+ if (instance_watcher->m_leader_instance_id != instance_id) {
+ auto count = instance_watcher->m_notify_ops.erase(
+ std::make_pair(instance_id, this));
+ ceph_assert(count > 0);
+
+ instance_id = instance_watcher->m_leader_instance_id;
+
+ auto result = instance_watcher->m_notify_ops.insert(
+ std::make_pair(instance_id, this)).second;
+ ceph_assert(result);
+
+ notifier.reset(new librbd::watcher::Notifier(
+ instance_watcher->m_work_queue,
+ instance_watcher->m_ioctx,
+ RBD_MIRROR_INSTANCE_PREFIX + instance_id));
+ }
+ }
+
+ dout(10) << "C_NotifyInstanceRequest: " << this << " " << __func__
+ << ": sending to " << instance_id << dendl;
+ notifier->notify(bl, &response, this);
+ }
+
+ void cancel() {
+ dout(10) << "C_NotifyInstanceRequest: " << this << " " << __func__ << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(instance_watcher->m_lock));
+
+ canceling = true;
+ instance_watcher->unsuspend_notify_request(this);
+ }
+
+ void finish(int r) override {
+ dout(10) << "C_NotifyInstanceRequest: " << this << " " << __func__ << ": r="
+ << r << dendl;
+
+ if (r == 0 || r == -ETIMEDOUT) {
+ bool found = false;
+ for (auto &it : response.acks) {
+ auto &bl = it.second;
+ if (it.second.length() == 0) {
+ dout(5) << "C_NotifyInstanceRequest: " << this << " " << __func__
+ << ": no payload in ack, ignoring" << dendl;
+ continue;
+ }
+ try {
+ auto iter = bl.cbegin();
+ NotifyAckPayload ack;
+ decode(ack, iter);
+ if (ack.instance_id != instance_watcher->get_instance_id()) {
+ derr << "C_NotifyInstanceRequest: " << this << " " << __func__
+ << ": ack instance_id (" << ack.instance_id << ") "
+ << "does not match, ignoring" << dendl;
+ continue;
+ }
+ if (ack.request_id != request_id) {
+ derr << "C_NotifyInstanceRequest: " << this << " " << __func__
+ << ": ack request_id (" << ack.request_id << ") "
+ << "does not match, ignoring" << dendl;
+ continue;
+ }
+ r = ack.ret_val;
+ found = true;
+ break;
+ } catch (const buffer::error &err) {
+ derr << "C_NotifyInstanceRequest: " << this << " " << __func__
+ << ": failed to decode ack: " << err.what() << dendl;
+ continue;
+ }
+ }
+
+ if (!found) {
+ if (r == -ETIMEDOUT) {
+ derr << "C_NotifyInstanceRequest: " << this << " " << __func__
+ << ": resending after timeout" << dendl;
+ std::lock_guard locker{instance_watcher->m_lock};
+ send();
+ return;
+ } else {
+ r = -EINVAL;
+ }
+ } else {
+ if (r == -ESTALE && send_to_leader) {
+ derr << "C_NotifyInstanceRequest: " << this << " " << __func__
+ << ": resending due to leader change" << dendl;
+ std::lock_guard locker{instance_watcher->m_lock};
+ send();
+ return;
+ }
+ }
+ }
+
+ on_finish->complete(r);
+
+ {
+ std::lock_guard locker{instance_watcher->m_lock};
+ auto result = instance_watcher->m_notify_ops.erase(
+ std::make_pair(instance_id, this));
+ ceph_assert(result > 0);
+ instance_watcher->m_notify_op_tracker.finish_op();
+ }
+
+ delete this;
+ }
+
+ void complete(int r) override {
+ finish(r);
+ }
+};
+
+template <typename I>
+struct InstanceWatcher<I>::C_SyncRequest : public Context {
+ InstanceWatcher<I> *instance_watcher;
+ std::string sync_id;
+ Context *on_start;
+ Context *on_complete = nullptr;
+ C_NotifyInstanceRequest *req = nullptr;
+
+ C_SyncRequest(InstanceWatcher<I> *instance_watcher,
+ const std::string &sync_id, Context *on_start)
+ : instance_watcher(instance_watcher), sync_id(sync_id),
+ on_start(on_start) {
+ dout(10) << "C_SyncRequest: " << this << " " << __func__ << ": sync_id="
+ << sync_id << dendl;
+ }
+
+ void finish(int r) override {
+ dout(10) << "C_SyncRequest: " << this << " " << __func__ << ": r="
+ << r << dendl;
+
+ if (on_start != nullptr) {
+ instance_watcher->handle_notify_sync_request(this, r);
+ } else {
+ instance_watcher->handle_notify_sync_complete(this, r);
+ delete this;
+ }
+ }
+
+ // called twice
+ void complete(int r) override {
+ finish(r);
+ }
+};
+
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::InstanceWatcher: " \
+ << this << " " << __func__ << ": "
+template <typename I>
+void InstanceWatcher<I>::get_instances(librados::IoCtx &io_ctx,
+ std::vector<std::string> *instance_ids,
+ Context *on_finish) {
+ librados::ObjectReadOperation op;
+ librbd::cls_client::mirror_instances_list_start(&op);
+ C_GetInstances *ctx = new C_GetInstances(instance_ids, on_finish);
+ librados::AioCompletion *aio_comp = create_rados_callback(ctx);
+
+ int r = io_ctx.aio_operate(RBD_MIRROR_LEADER, aio_comp, &op, &ctx->out_bl);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void InstanceWatcher<I>::remove_instance(librados::IoCtx &io_ctx,
+ librbd::AsioEngine& asio_engine,
+ const std::string &instance_id,
+ Context *on_finish) {
+ auto req = new C_RemoveInstanceRequest<I>(io_ctx, asio_engine, instance_id,
+ on_finish);
+ req->send();
+}
+
+template <typename I>
+InstanceWatcher<I> *InstanceWatcher<I>::create(
+ librados::IoCtx &io_ctx, librbd::AsioEngine& asio_engine,
+ InstanceReplayer<I> *instance_replayer,
+ Throttler<I> *image_sync_throttler) {
+ return new InstanceWatcher<I>(io_ctx, asio_engine, instance_replayer,
+ image_sync_throttler,
+ stringify(io_ctx.get_instance_id()));
+}
+
+template <typename I>
+InstanceWatcher<I>::InstanceWatcher(librados::IoCtx &io_ctx,
+ librbd::AsioEngine& asio_engine,
+ InstanceReplayer<I> *instance_replayer,
+ Throttler<I> *image_sync_throttler,
+ const std::string &instance_id)
+ : Watcher(io_ctx, asio_engine.get_work_queue(),
+ RBD_MIRROR_INSTANCE_PREFIX + instance_id),
+ m_instance_replayer(instance_replayer),
+ m_image_sync_throttler(image_sync_throttler), m_instance_id(instance_id),
+ m_lock(ceph::make_mutex(
+ unique_lock_name("rbd::mirror::InstanceWatcher::m_lock", this))),
+ m_instance_lock(librbd::ManagedLock<I>::create(
+ m_ioctx, asio_engine, m_oid, this, librbd::managed_lock::EXCLUSIVE, true,
+ m_cct->_conf.get_val<uint64_t>("rbd_blocklist_expire_seconds"))) {
+}
+
+template <typename I>
+InstanceWatcher<I>::~InstanceWatcher() {
+ ceph_assert(m_requests.empty());
+ ceph_assert(m_notify_ops.empty());
+ ceph_assert(m_notify_op_tracker.empty());
+ ceph_assert(m_suspended_ops.empty());
+ ceph_assert(m_inflight_sync_reqs.empty());
+ m_instance_lock->destroy();
+}
+
+template <typename I>
+int InstanceWatcher<I>::init() {
+ C_SaferCond init_ctx;
+ init(&init_ctx);
+ return init_ctx.wait();
+}
+
+template <typename I>
+void InstanceWatcher<I>::init(Context *on_finish) {
+ dout(10) << "instance_id=" << m_instance_id << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ ceph_assert(m_on_finish == nullptr);
+ m_on_finish = on_finish;
+ m_ret_val = 0;
+
+ register_instance();
+}
+
+template <typename I>
+void InstanceWatcher<I>::shut_down() {
+ C_SaferCond shut_down_ctx;
+ shut_down(&shut_down_ctx);
+ int r = shut_down_ctx.wait();
+ ceph_assert(r == 0);
+}
+
+template <typename I>
+void InstanceWatcher<I>::shut_down(Context *on_finish) {
+ dout(10) << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ ceph_assert(m_on_finish == nullptr);
+ m_on_finish = on_finish;
+ m_ret_val = 0;
+
+ release_lock();
+}
+
+template <typename I>
+void InstanceWatcher<I>::remove(Context *on_finish) {
+ dout(10) << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ ceph_assert(m_on_finish == nullptr);
+ m_on_finish = on_finish;
+ m_ret_val = 0;
+
+ get_instance_locker();
+}
+
+template <typename I>
+void InstanceWatcher<I>::notify_image_acquire(
+ const std::string &instance_id, const std::string &global_image_id,
+ Context *on_notify_ack) {
+ dout(10) << "instance_id=" << instance_id << ", global_image_id="
+ << global_image_id << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ ceph_assert(m_on_finish == nullptr);
+
+ uint64_t request_id = ++m_request_seq;
+ bufferlist bl;
+ encode(NotifyMessage{ImageAcquirePayload{request_id, global_image_id}}, bl);
+ auto req = new C_NotifyInstanceRequest(this, instance_id, request_id,
+ std::move(bl), on_notify_ack);
+ req->send();
+}
+
+template <typename I>
+void InstanceWatcher<I>::notify_image_release(
+ const std::string &instance_id, const std::string &global_image_id,
+ Context *on_notify_ack) {
+ dout(10) << "instance_id=" << instance_id << ", global_image_id="
+ << global_image_id << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ ceph_assert(m_on_finish == nullptr);
+
+ uint64_t request_id = ++m_request_seq;
+ bufferlist bl;
+ encode(NotifyMessage{ImageReleasePayload{request_id, global_image_id}}, bl);
+ auto req = new C_NotifyInstanceRequest(this, instance_id, request_id,
+ std::move(bl), on_notify_ack);
+ req->send();
+}
+
+template <typename I>
+void InstanceWatcher<I>::notify_peer_image_removed(
+ const std::string &instance_id, const std::string &global_image_id,
+ const std::string &peer_mirror_uuid, Context *on_notify_ack) {
+ dout(10) << "instance_id=" << instance_id << ", "
+ << "global_image_id=" << global_image_id << ", "
+ << "peer_mirror_uuid=" << peer_mirror_uuid << dendl;
+
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_on_finish == nullptr);
+
+ uint64_t request_id = ++m_request_seq;
+ bufferlist bl;
+ encode(NotifyMessage{PeerImageRemovedPayload{request_id, global_image_id,
+ peer_mirror_uuid}}, bl);
+ auto req = new C_NotifyInstanceRequest(this, instance_id, request_id,
+ std::move(bl), on_notify_ack);
+ req->send();
+}
+
+template <typename I>
+void InstanceWatcher<I>::notify_sync_request(const std::string &sync_id,
+ Context *on_sync_start) {
+ dout(10) << "sync_id=" << sync_id << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ ceph_assert(m_inflight_sync_reqs.count(sync_id) == 0);
+
+ uint64_t request_id = ++m_request_seq;
+
+ bufferlist bl;
+ encode(NotifyMessage{SyncRequestPayload{request_id, sync_id}}, bl);
+
+ auto sync_ctx = new C_SyncRequest(this, sync_id, on_sync_start);
+ sync_ctx->req = new C_NotifyInstanceRequest(this, "", request_id,
+ std::move(bl), sync_ctx);
+
+ m_inflight_sync_reqs[sync_id] = sync_ctx;
+ sync_ctx->req->send();
+}
+
+template <typename I>
+bool InstanceWatcher<I>::cancel_sync_request(const std::string &sync_id) {
+ dout(10) << "sync_id=" << sync_id << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ auto it = m_inflight_sync_reqs.find(sync_id);
+ if (it == m_inflight_sync_reqs.end()) {
+ return false;
+ }
+
+ auto sync_ctx = it->second;
+
+ if (sync_ctx->on_start == nullptr) {
+ return false;
+ }
+
+ ceph_assert(sync_ctx->req != nullptr);
+ sync_ctx->req->cancel();
+ return true;
+}
+
+template <typename I>
+void InstanceWatcher<I>::notify_sync_start(const std::string &instance_id,
+ const std::string &sync_id) {
+ dout(10) << "sync_id=" << sync_id << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ uint64_t request_id = ++m_request_seq;
+
+ bufferlist bl;
+ encode(NotifyMessage{SyncStartPayload{request_id, sync_id}}, bl);
+
+ auto ctx = new LambdaContext(
+ [this, sync_id] (int r) {
+ dout(10) << "finish: sync_id=" << sync_id << ", r=" << r << dendl;
+ std::lock_guard locker{m_lock};
+ if (r != -ESTALE && is_leader()) {
+ m_image_sync_throttler->finish_op(m_ioctx.get_namespace(), sync_id);
+ }
+ });
+ auto req = new C_NotifyInstanceRequest(this, instance_id, request_id,
+ std::move(bl), ctx);
+ req->send();
+}
+
+template <typename I>
+void InstanceWatcher<I>::notify_sync_complete(const std::string &sync_id) {
+ std::lock_guard locker{m_lock};
+ notify_sync_complete(m_lock, sync_id);
+}
+
+template <typename I>
+void InstanceWatcher<I>::notify_sync_complete(const ceph::mutex&,
+ const std::string &sync_id) {
+ dout(10) << "sync_id=" << sync_id << dendl;
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ auto it = m_inflight_sync_reqs.find(sync_id);
+ ceph_assert(it != m_inflight_sync_reqs.end());
+
+ auto sync_ctx = it->second;
+ ceph_assert(sync_ctx->req == nullptr);
+
+ m_inflight_sync_reqs.erase(it);
+ m_work_queue->queue(sync_ctx, 0);
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_notify_sync_request(C_SyncRequest *sync_ctx,
+ int r) {
+ dout(10) << "sync_id=" << sync_ctx->sync_id << ", r=" << r << dendl;
+
+ Context *on_start = nullptr;
+ {
+ std::lock_guard locker{m_lock};
+ ceph_assert(sync_ctx->req != nullptr);
+ ceph_assert(sync_ctx->on_start != nullptr);
+
+ if (sync_ctx->req->canceling) {
+ r = -ECANCELED;
+ }
+
+ std::swap(sync_ctx->on_start, on_start);
+ sync_ctx->req = nullptr;
+
+ if (r == -ECANCELED) {
+ notify_sync_complete(m_lock, sync_ctx->sync_id);
+ }
+ }
+
+ on_start->complete(r == -ECANCELED ? r : 0);
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_notify_sync_complete(C_SyncRequest *sync_ctx,
+ int r) {
+ dout(10) << "sync_id=" << sync_ctx->sync_id << ", r=" << r << dendl;
+
+ if (sync_ctx->on_complete != nullptr) {
+ sync_ctx->on_complete->complete(r);
+ }
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_acquire_leader() {
+ dout(10) << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ m_leader_instance_id = m_instance_id;
+ unsuspend_notify_requests();
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_release_leader() {
+ dout(10) << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ m_leader_instance_id.clear();
+
+ m_image_sync_throttler->drain(m_ioctx.get_namespace(), -ESTALE);
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_update_leader(
+ const std::string &leader_instance_id) {
+ dout(10) << "leader_instance_id=" << leader_instance_id << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ m_leader_instance_id = leader_instance_id;
+
+ if (!m_leader_instance_id.empty()) {
+ unsuspend_notify_requests();
+ }
+}
+
+template <typename I>
+void InstanceWatcher<I>::cancel_notify_requests(
+ const std::string &instance_id) {
+ dout(10) << "instance_id=" << instance_id << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ for (auto op : m_notify_ops) {
+ if (op.first == instance_id && !op.second->send_to_leader) {
+ op.second->cancel();
+ }
+ }
+}
+
+template <typename I>
+void InstanceWatcher<I>::register_instance() {
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ dout(10) << dendl;
+
+ librados::ObjectWriteOperation op;
+ librbd::cls_client::mirror_instances_add(&op, m_instance_id);
+ librados::AioCompletion *aio_comp = create_rados_callback<
+ InstanceWatcher<I>, &InstanceWatcher<I>::handle_register_instance>(this);
+
+ int r = m_ioctx.aio_operate(RBD_MIRROR_LEADER, aio_comp, &op);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_register_instance(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ Context *on_finish = nullptr;
+ {
+ std::lock_guard locker{m_lock};
+
+ if (r == 0) {
+ create_instance_object();
+ return;
+ }
+
+ derr << "error registering instance: " << cpp_strerror(r) << dendl;
+
+ std::swap(on_finish, m_on_finish);
+ }
+ on_finish->complete(r);
+}
+
+
+template <typename I>
+void InstanceWatcher<I>::create_instance_object() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ librados::ObjectWriteOperation op;
+ op.create(true);
+
+ librados::AioCompletion *aio_comp = create_rados_callback<
+ InstanceWatcher<I>,
+ &InstanceWatcher<I>::handle_create_instance_object>(this);
+ int r = m_ioctx.aio_operate(m_oid, aio_comp, &op);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_create_instance_object(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ if (r < 0) {
+ derr << "error creating " << m_oid << " object: " << cpp_strerror(r)
+ << dendl;
+
+ m_ret_val = r;
+ unregister_instance();
+ return;
+ }
+
+ register_watch();
+}
+
+template <typename I>
+void InstanceWatcher<I>::register_watch() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ Context *ctx = create_async_context_callback(
+ m_work_queue, create_context_callback<
+ InstanceWatcher<I>, &InstanceWatcher<I>::handle_register_watch>(this));
+
+ librbd::Watcher::register_watch(ctx);
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_register_watch(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ if (r < 0) {
+ derr << "error registering instance watcher for " << m_oid << " object: "
+ << cpp_strerror(r) << dendl;
+
+ m_ret_val = r;
+ remove_instance_object();
+ return;
+ }
+
+ acquire_lock();
+}
+
+template <typename I>
+void InstanceWatcher<I>::acquire_lock() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ Context *ctx = create_async_context_callback(
+ m_work_queue, create_context_callback<
+ InstanceWatcher<I>, &InstanceWatcher<I>::handle_acquire_lock>(this));
+
+ m_instance_lock->acquire_lock(ctx);
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_acquire_lock(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ Context *on_finish = nullptr;
+ {
+ std::lock_guard locker{m_lock};
+
+ if (r < 0) {
+
+ derr << "error acquiring instance lock: " << cpp_strerror(r) << dendl;
+
+ m_ret_val = r;
+ unregister_watch();
+ return;
+ }
+
+ std::swap(on_finish, m_on_finish);
+ }
+
+ on_finish->complete(r);
+}
+
+template <typename I>
+void InstanceWatcher<I>::release_lock() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ Context *ctx = create_async_context_callback(
+ m_work_queue, create_context_callback<
+ InstanceWatcher<I>, &InstanceWatcher<I>::handle_release_lock>(this));
+
+ m_instance_lock->shut_down(ctx);
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_release_lock(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ if (r < 0) {
+ derr << "error releasing instance lock: " << cpp_strerror(r) << dendl;
+ }
+
+ unregister_watch();
+}
+
+template <typename I>
+void InstanceWatcher<I>::unregister_watch() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ Context *ctx = create_async_context_callback(
+ m_work_queue, create_context_callback<
+ InstanceWatcher<I>, &InstanceWatcher<I>::handle_unregister_watch>(this));
+
+ librbd::Watcher::unregister_watch(ctx);
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_unregister_watch(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "error unregistering instance watcher for " << m_oid << " object: "
+ << cpp_strerror(r) << dendl;
+ }
+
+ std::lock_guard locker{m_lock};
+ remove_instance_object();
+}
+
+template <typename I>
+void InstanceWatcher<I>::remove_instance_object() {
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ dout(10) << dendl;
+
+ librados::ObjectWriteOperation op;
+ op.remove();
+
+ librados::AioCompletion *aio_comp = create_rados_callback<
+ InstanceWatcher<I>,
+ &InstanceWatcher<I>::handle_remove_instance_object>(this);
+ int r = m_ioctx.aio_operate(m_oid, aio_comp, &op);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_remove_instance_object(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r == -ENOENT) {
+ r = 0;
+ }
+
+ if (r < 0) {
+ derr << "error removing " << m_oid << " object: " << cpp_strerror(r)
+ << dendl;
+ }
+
+ std::lock_guard locker{m_lock};
+ unregister_instance();
+}
+
+template <typename I>
+void InstanceWatcher<I>::unregister_instance() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ librados::ObjectWriteOperation op;
+ librbd::cls_client::mirror_instances_remove(&op, m_instance_id);
+ librados::AioCompletion *aio_comp = create_rados_callback<
+ InstanceWatcher<I>, &InstanceWatcher<I>::handle_unregister_instance>(this);
+
+ int r = m_ioctx.aio_operate(RBD_MIRROR_LEADER, aio_comp, &op);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_unregister_instance(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "error unregistering instance: " << cpp_strerror(r) << dendl;
+ }
+
+ std::lock_guard locker{m_lock};
+ wait_for_notify_ops();
+}
+
+template <typename I>
+void InstanceWatcher<I>::wait_for_notify_ops() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ for (auto op : m_notify_ops) {
+ op.second->cancel();
+ }
+
+ Context *ctx = create_async_context_callback(
+ m_work_queue, create_context_callback<
+ InstanceWatcher<I>, &InstanceWatcher<I>::handle_wait_for_notify_ops>(this));
+
+ m_notify_op_tracker.wait_for_ops(ctx);
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_wait_for_notify_ops(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ ceph_assert(r == 0);
+
+ Context *on_finish = nullptr;
+ {
+ std::lock_guard locker{m_lock};
+
+ ceph_assert(m_notify_ops.empty());
+
+ std::swap(on_finish, m_on_finish);
+ r = m_ret_val;
+ }
+ on_finish->complete(r);
+}
+
+template <typename I>
+void InstanceWatcher<I>::get_instance_locker() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ Context *ctx = create_async_context_callback(
+ m_work_queue, create_context_callback<
+ InstanceWatcher<I>, &InstanceWatcher<I>::handle_get_instance_locker>(this));
+
+ m_instance_lock->get_locker(&m_instance_locker, ctx);
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_get_instance_locker(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ if (r < 0) {
+ if (r != -ENOENT) {
+ derr << "error retrieving instance locker: " << cpp_strerror(r) << dendl;
+ }
+ remove_instance_object();
+ return;
+ }
+
+ break_instance_lock();
+}
+
+template <typename I>
+void InstanceWatcher<I>::break_instance_lock() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ Context *ctx = create_async_context_callback(
+ m_work_queue, create_context_callback<
+ InstanceWatcher<I>, &InstanceWatcher<I>::handle_break_instance_lock>(this));
+
+ m_instance_lock->break_lock(m_instance_locker, true, ctx);
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_break_instance_lock(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ if (r < 0) {
+ if (r != -ENOENT) {
+ derr << "error breaking instance lock: " << cpp_strerror(r) << dendl;
+ }
+ remove_instance_object();
+ return;
+ }
+
+ remove_instance_object();
+}
+
+template <typename I>
+void InstanceWatcher<I>::suspend_notify_request(C_NotifyInstanceRequest *req) {
+ dout(10) << req << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ auto result = m_suspended_ops.insert(req).second;
+ ceph_assert(result);
+}
+
+template <typename I>
+bool InstanceWatcher<I>::unsuspend_notify_request(
+ C_NotifyInstanceRequest *req) {
+ dout(10) << req << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ auto result = m_suspended_ops.erase(req);
+ if (result == 0) {
+ return false;
+ }
+
+ req->send();
+ return true;
+}
+
+template <typename I>
+void InstanceWatcher<I>::unsuspend_notify_requests() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ std::set<C_NotifyInstanceRequest *> suspended_ops;
+ std::swap(m_suspended_ops, suspended_ops);
+
+ for (auto op : suspended_ops) {
+ op->send();
+ }
+}
+
+template <typename I>
+Context *InstanceWatcher<I>::prepare_request(const std::string &instance_id,
+ uint64_t request_id,
+ C_NotifyAck *on_notify_ack) {
+ dout(10) << "instance_id=" << instance_id << ", request_id=" << request_id
+ << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ Context *ctx = nullptr;
+ Request request(instance_id, request_id);
+ auto it = m_requests.find(request);
+
+ if (it != m_requests.end()) {
+ dout(10) << "duplicate for in-progress request" << dendl;
+ delete it->on_notify_ack;
+ m_requests.erase(it);
+ } else {
+ ctx = create_async_context_callback(
+ m_work_queue, new LambdaContext(
+ [this, instance_id, request_id] (int r) {
+ complete_request(instance_id, request_id, r);
+ }));
+ }
+
+ request.on_notify_ack = on_notify_ack;
+ m_requests.insert(request);
+ return ctx;
+}
+
+template <typename I>
+void InstanceWatcher<I>::complete_request(const std::string &instance_id,
+ uint64_t request_id, int r) {
+ dout(10) << "instance_id=" << instance_id << ", request_id=" << request_id
+ << dendl;
+
+ C_NotifyAck *on_notify_ack;
+ {
+ std::lock_guard locker{m_lock};
+ Request request(instance_id, request_id);
+ auto it = m_requests.find(request);
+ ceph_assert(it != m_requests.end());
+ on_notify_ack = it->on_notify_ack;
+ m_requests.erase(it);
+ }
+
+ encode(NotifyAckPayload(instance_id, request_id, r), on_notify_ack->out);
+ on_notify_ack->complete(0);
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_notify(uint64_t notify_id, uint64_t handle,
+ uint64_t notifier_id, bufferlist &bl) {
+ dout(10) << "notify_id=" << notify_id << ", handle=" << handle << ", "
+ << "notifier_id=" << notifier_id << dendl;
+
+ auto ctx = new C_NotifyAck(this, notify_id, handle);
+
+ NotifyMessage notify_message;
+ try {
+ auto iter = bl.cbegin();
+ decode(notify_message, iter);
+ } catch (const buffer::error &err) {
+ derr << "error decoding image notification: " << err.what() << dendl;
+ ctx->complete(0);
+ return;
+ }
+
+ apply_visitor(HandlePayloadVisitor(this, stringify(notifier_id), ctx),
+ notify_message.payload);
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_image_acquire(
+ const std::string &global_image_id, Context *on_finish) {
+ dout(10) << "global_image_id=" << global_image_id << dendl;
+
+ auto ctx = new LambdaContext(
+ [this, global_image_id, on_finish] (int r) {
+ m_instance_replayer->acquire_image(this, global_image_id, on_finish);
+ m_notify_op_tracker.finish_op();
+ });
+
+ m_notify_op_tracker.start_op();
+ m_work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_image_release(
+ const std::string &global_image_id, Context *on_finish) {
+ dout(10) << "global_image_id=" << global_image_id << dendl;
+
+ auto ctx = new LambdaContext(
+ [this, global_image_id, on_finish] (int r) {
+ m_instance_replayer->release_image(global_image_id, on_finish);
+ m_notify_op_tracker.finish_op();
+ });
+
+ m_notify_op_tracker.start_op();
+ m_work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_peer_image_removed(
+ const std::string &global_image_id, const std::string &peer_mirror_uuid,
+ Context *on_finish) {
+ dout(10) << "global_image_id=" << global_image_id << ", "
+ << "peer_mirror_uuid=" << peer_mirror_uuid << dendl;
+
+ auto ctx = new LambdaContext(
+ [this, peer_mirror_uuid, global_image_id, on_finish] (int r) {
+ m_instance_replayer->remove_peer_image(global_image_id,
+ peer_mirror_uuid, on_finish);
+ m_notify_op_tracker.finish_op();
+ });
+
+ m_notify_op_tracker.start_op();
+ m_work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_sync_request(const std::string &instance_id,
+ const std::string &sync_id,
+ Context *on_finish) {
+ dout(10) << "instance_id=" << instance_id << ", sync_id=" << sync_id << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ if (!is_leader()) {
+ dout(10) << "sync request for non-leader" << dendl;
+ m_work_queue->queue(on_finish, -ESTALE);
+ return;
+ }
+
+ Context *on_start = create_async_context_callback(
+ m_work_queue, new LambdaContext(
+ [this, instance_id, sync_id, on_finish] (int r) {
+ dout(10) << "handle_sync_request: finish: instance_id=" << instance_id
+ << ", sync_id=" << sync_id << ", r=" << r << dendl;
+ if (r == 0) {
+ notify_sync_start(instance_id, sync_id);
+ }
+ if (r == -ENOENT) {
+ r = 0;
+ }
+ on_finish->complete(r);
+ }));
+ m_image_sync_throttler->start_op(m_ioctx.get_namespace(), sync_id, on_start);
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_sync_start(const std::string &instance_id,
+ const std::string &sync_id,
+ Context *on_finish) {
+ dout(10) << "instance_id=" << instance_id << ", sync_id=" << sync_id << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ auto it = m_inflight_sync_reqs.find(sync_id);
+ if (it == m_inflight_sync_reqs.end()) {
+ dout(5) << "not found" << dendl;
+ m_work_queue->queue(on_finish, 0);
+ return;
+ }
+
+ auto sync_ctx = it->second;
+
+ if (sync_ctx->on_complete != nullptr) {
+ dout(5) << "duplicate request" << dendl;
+ m_work_queue->queue(sync_ctx->on_complete, -ESTALE);
+ }
+
+ sync_ctx->on_complete = on_finish;
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_payload(const std::string &instance_id,
+ const ImageAcquirePayload &payload,
+ C_NotifyAck *on_notify_ack) {
+ dout(10) << "image_acquire: instance_id=" << instance_id << ", "
+ << "request_id=" << payload.request_id << dendl;
+
+ auto on_finish = prepare_request(instance_id, payload.request_id,
+ on_notify_ack);
+ if (on_finish != nullptr) {
+ handle_image_acquire(payload.global_image_id, on_finish);
+ }
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_payload(const std::string &instance_id,
+ const ImageReleasePayload &payload,
+ C_NotifyAck *on_notify_ack) {
+ dout(10) << "image_release: instance_id=" << instance_id << ", "
+ << "request_id=" << payload.request_id << dendl;
+
+ auto on_finish = prepare_request(instance_id, payload.request_id,
+ on_notify_ack);
+ if (on_finish != nullptr) {
+ handle_image_release(payload.global_image_id, on_finish);
+ }
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_payload(const std::string &instance_id,
+ const PeerImageRemovedPayload &payload,
+ C_NotifyAck *on_notify_ack) {
+ dout(10) << "remove_peer_image: instance_id=" << instance_id << ", "
+ << "request_id=" << payload.request_id << dendl;
+
+ auto on_finish = prepare_request(instance_id, payload.request_id,
+ on_notify_ack);
+ if (on_finish != nullptr) {
+ handle_peer_image_removed(payload.global_image_id, payload.peer_mirror_uuid,
+ on_finish);
+ }
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_payload(const std::string &instance_id,
+ const SyncRequestPayload &payload,
+ C_NotifyAck *on_notify_ack) {
+ dout(10) << "sync_request: instance_id=" << instance_id << ", "
+ << "request_id=" << payload.request_id << dendl;
+
+ auto on_finish = prepare_request(instance_id, payload.request_id,
+ on_notify_ack);
+ if (on_finish == nullptr) {
+ return;
+ }
+
+ handle_sync_request(instance_id, payload.sync_id, on_finish);
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_payload(const std::string &instance_id,
+ const SyncStartPayload &payload,
+ C_NotifyAck *on_notify_ack) {
+ dout(10) << "sync_start: instance_id=" << instance_id << ", "
+ << "request_id=" << payload.request_id << dendl;
+
+ auto on_finish = prepare_request(instance_id, payload.request_id,
+ on_notify_ack);
+ if (on_finish == nullptr) {
+ return;
+ }
+
+ handle_sync_start(instance_id, payload.sync_id, on_finish);
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_payload(const std::string &instance_id,
+ const UnknownPayload &payload,
+ C_NotifyAck *on_notify_ack) {
+ dout(5) << "unknown: instance_id=" << instance_id << dendl;
+
+ on_notify_ack->complete(0);
+}
+
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::InstanceWatcher<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/InstanceWatcher.h b/src/tools/rbd_mirror/InstanceWatcher.h
new file mode 100644
index 000000000..08e40b40b
--- /dev/null
+++ b/src/tools/rbd_mirror/InstanceWatcher.h
@@ -0,0 +1,269 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_INSTANCE_WATCHER_H
+#define CEPH_RBD_MIRROR_INSTANCE_WATCHER_H
+
+#include <map>
+#include <memory>
+#include <set>
+#include <string>
+#include <vector>
+
+#include "common/AsyncOpTracker.h"
+#include "librbd/Watcher.h"
+#include "librbd/managed_lock/Types.h"
+#include "tools/rbd_mirror/instance_watcher/Types.h"
+
+namespace librbd {
+
+class AsioEngine;
+class ImageCtx;
+template <typename> class ManagedLock;
+
+} // namespace librbd
+
+namespace rbd {
+namespace mirror {
+
+template <typename> class InstanceReplayer;
+template <typename> class Throttler;
+template <typename> struct Threads;
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class InstanceWatcher : protected librbd::Watcher {
+ using librbd::Watcher::unregister_watch; // Silence overloaded virtual warning
+public:
+ static void get_instances(librados::IoCtx &io_ctx,
+ std::vector<std::string> *instance_ids,
+ Context *on_finish);
+ static void remove_instance(librados::IoCtx &io_ctx,
+ librbd::AsioEngine& asio_engine,
+ const std::string &instance_id,
+ Context *on_finish);
+
+ static InstanceWatcher *create(
+ librados::IoCtx &io_ctx, librbd::AsioEngine& asio_engine,
+ InstanceReplayer<ImageCtxT> *instance_replayer,
+ Throttler<ImageCtxT> *image_sync_throttler);
+ void destroy() {
+ delete this;
+ }
+
+ InstanceWatcher(librados::IoCtx &io_ctx, librbd::AsioEngine& asio_engine,
+ InstanceReplayer<ImageCtxT> *instance_replayer,
+ Throttler<ImageCtxT> *image_sync_throttler,
+ const std::string &instance_id);
+ ~InstanceWatcher() override;
+
+ inline std::string &get_instance_id() {
+ return m_instance_id;
+ }
+
+ int init();
+ void shut_down();
+
+ void init(Context *on_finish);
+ void shut_down(Context *on_finish);
+ void remove(Context *on_finish);
+
+ void notify_image_acquire(const std::string &instance_id,
+ const std::string &global_image_id,
+ Context *on_notify_ack);
+ void notify_image_release(const std::string &instance_id,
+ const std::string &global_image_id,
+ Context *on_notify_ack);
+ void notify_peer_image_removed(const std::string &instance_id,
+ const std::string &global_image_id,
+ const std::string &peer_mirror_uuid,
+ Context *on_notify_ack);
+
+ void notify_sync_request(const std::string &sync_id, Context *on_sync_start);
+ bool cancel_sync_request(const std::string &sync_id);
+ void notify_sync_complete(const std::string &sync_id);
+
+ void cancel_notify_requests(const std::string &instance_id);
+
+ void handle_acquire_leader();
+ void handle_release_leader();
+ void handle_update_leader(const std::string &leader_instance_id);
+
+private:
+ /**
+ * @verbatim
+ *
+ * BREAK_INSTANCE_LOCK -------\
+ * ^ |
+ * | (error) |
+ * GET_INSTANCE_LOCKER * * *>|
+ * ^ (remove) |
+ * | |
+ * <uninitialized> <----------------+---- WAIT_FOR_NOTIFY_OPS
+ * | (init) ^ | ^
+ * v (error) * | |
+ * REGISTER_INSTANCE * * * * * *|* *> UNREGISTER_INSTANCE
+ * | * | ^
+ * v (error) * v |
+ * CREATE_INSTANCE_OBJECT * * * * * *> REMOVE_INSTANCE_OBJECT
+ * | * ^
+ * v (error) * |
+ * REGISTER_WATCH * * * * * * * * * *> UNREGISTER_WATCH
+ * | * ^
+ * v (error) * |
+ * ACQUIRE_LOCK * * * * * * * * * * * RELEASE_LOCK
+ * | ^
+ * v (shut_down) |
+ * <watching> -------------------------------/
+ *
+ * @endverbatim
+ */
+
+ struct C_NotifyInstanceRequest;
+ struct C_SyncRequest;
+
+ typedef std::pair<std::string, std::string> Id;
+
+ struct HandlePayloadVisitor : public boost::static_visitor<void> {
+ InstanceWatcher *instance_watcher;
+ std::string instance_id;
+ C_NotifyAck *on_notify_ack;
+
+ HandlePayloadVisitor(InstanceWatcher *instance_watcher,
+ const std::string &instance_id,
+ C_NotifyAck *on_notify_ack)
+ : instance_watcher(instance_watcher), instance_id(instance_id),
+ on_notify_ack(on_notify_ack) {
+ }
+
+ template <typename Payload>
+ inline void operator()(const Payload &payload) const {
+ instance_watcher->handle_payload(instance_id, payload, on_notify_ack);
+ }
+ };
+
+ struct Request {
+ std::string instance_id;
+ uint64_t request_id;
+ C_NotifyAck *on_notify_ack = nullptr;
+
+ Request(const std::string &instance_id, uint64_t request_id)
+ : instance_id(instance_id), request_id(request_id) {
+ }
+
+ inline bool operator<(const Request &rhs) const {
+ return instance_id < rhs.instance_id ||
+ (instance_id == rhs.instance_id && request_id < rhs.request_id);
+ }
+ };
+
+ Threads<ImageCtxT> *m_threads;
+ InstanceReplayer<ImageCtxT> *m_instance_replayer;
+ Throttler<ImageCtxT> *m_image_sync_throttler;
+ std::string m_instance_id;
+
+ mutable ceph::mutex m_lock;
+ librbd::ManagedLock<ImageCtxT> *m_instance_lock;
+ Context *m_on_finish = nullptr;
+ int m_ret_val = 0;
+ std::string m_leader_instance_id;
+ librbd::managed_lock::Locker m_instance_locker;
+ std::set<std::pair<std::string, C_NotifyInstanceRequest *>> m_notify_ops;
+ AsyncOpTracker m_notify_op_tracker;
+ uint64_t m_request_seq = 0;
+ std::set<Request> m_requests;
+ std::set<C_NotifyInstanceRequest *> m_suspended_ops;
+ std::map<std::string, C_SyncRequest *> m_inflight_sync_reqs;
+
+ inline bool is_leader() const {
+ return m_leader_instance_id == m_instance_id;
+ }
+
+ void register_instance();
+ void handle_register_instance(int r);
+
+ void create_instance_object();
+ void handle_create_instance_object(int r);
+
+ void register_watch();
+ void handle_register_watch(int r);
+
+ void acquire_lock();
+ void handle_acquire_lock(int r);
+
+ void release_lock();
+ void handle_release_lock(int r);
+
+ void unregister_watch();
+ void handle_unregister_watch(int r);
+
+ void remove_instance_object();
+ void handle_remove_instance_object(int r);
+
+ void unregister_instance();
+ void handle_unregister_instance(int r);
+
+ void wait_for_notify_ops();
+ void handle_wait_for_notify_ops(int r);
+
+ void get_instance_locker();
+ void handle_get_instance_locker(int r);
+
+ void break_instance_lock();
+ void handle_break_instance_lock(int r);
+
+ void suspend_notify_request(C_NotifyInstanceRequest *req);
+ bool unsuspend_notify_request(C_NotifyInstanceRequest *req);
+ void unsuspend_notify_requests();
+
+ void notify_sync_complete(const ceph::mutex& lock, const std::string &sync_id);
+ void handle_notify_sync_request(C_SyncRequest *sync_ctx, int r);
+ void handle_notify_sync_complete(C_SyncRequest *sync_ctx, int r);
+
+ void notify_sync_start(const std::string &instance_id,
+ const std::string &sync_id);
+
+ Context *prepare_request(const std::string &instance_id, uint64_t request_id,
+ C_NotifyAck *on_notify_ack);
+ void complete_request(const std::string &instance_id, uint64_t request_id,
+ int r);
+
+ void handle_notify(uint64_t notify_id, uint64_t handle,
+ uint64_t notifier_id, bufferlist &bl) override;
+
+ void handle_image_acquire(const std::string &global_image_id,
+ Context *on_finish);
+ void handle_image_release(const std::string &global_image_id,
+ Context *on_finish);
+ void handle_peer_image_removed(const std::string &global_image_id,
+ const std::string &peer_mirror_uuid,
+ Context *on_finish);
+
+ void handle_sync_request(const std::string &instance_id,
+ const std::string &sync_id, Context *on_finish);
+ void handle_sync_start(const std::string &instance_id,
+ const std::string &sync_id, Context *on_finish);
+
+ void handle_payload(const std::string &instance_id,
+ const instance_watcher::ImageAcquirePayload &payload,
+ C_NotifyAck *on_notify_ack);
+ void handle_payload(const std::string &instance_id,
+ const instance_watcher::ImageReleasePayload &payload,
+ C_NotifyAck *on_notify_ack);
+ void handle_payload(const std::string &instance_id,
+ const instance_watcher::PeerImageRemovedPayload &payload,
+ C_NotifyAck *on_notify_ack);
+ void handle_payload(const std::string &instance_id,
+ const instance_watcher::SyncRequestPayload &payload,
+ C_NotifyAck *on_notify_ack);
+ void handle_payload(const std::string &instance_id,
+ const instance_watcher::SyncStartPayload &payload,
+ C_NotifyAck *on_notify_ack);
+ void handle_payload(const std::string &instance_id,
+ const instance_watcher::UnknownPayload &payload,
+ C_NotifyAck *on_notify_ack);
+};
+
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_INSTANCE_WATCHER_H
diff --git a/src/tools/rbd_mirror/Instances.cc b/src/tools/rbd_mirror/Instances.cc
new file mode 100644
index 000000000..ca291bb5f
--- /dev/null
+++ b/src/tools/rbd_mirror/Instances.cc
@@ -0,0 +1,356 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "include/stringify.h"
+#include "common/Timer.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "librbd/Utils.h"
+#include "librbd/asio/ContextWQ.h"
+#include "InstanceWatcher.h"
+#include "Instances.h"
+#include "Threads.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::Instances: " \
+ << this << " " << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+
+using librbd::util::create_async_context_callback;
+using librbd::util::create_context_callback;
+using librbd::util::create_rados_callback;
+
+template <typename I>
+Instances<I>::Instances(Threads<I> *threads, librados::IoCtx &ioctx,
+ const std::string& instance_id,
+ instances::Listener& listener) :
+ m_threads(threads), m_ioctx(ioctx), m_instance_id(instance_id),
+ m_listener(listener), m_cct(reinterpret_cast<CephContext *>(ioctx.cct())),
+ m_lock(ceph::make_mutex("rbd::mirror::Instances " + ioctx.get_pool_name())) {
+}
+
+template <typename I>
+Instances<I>::~Instances() {
+}
+
+template <typename I>
+void Instances<I>::init(Context *on_finish) {
+ dout(10) << dendl;
+
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_on_finish == nullptr);
+ m_on_finish = on_finish;
+ get_instances();
+}
+
+template <typename I>
+void Instances<I>::shut_down(Context *on_finish) {
+ dout(10) << dendl;
+
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_on_finish == nullptr);
+ m_on_finish = on_finish;
+
+ Context *ctx = new LambdaContext(
+ [this](int r) {
+ std::scoped_lock locker{m_threads->timer_lock, m_lock};
+ cancel_remove_task();
+ wait_for_ops();
+ });
+
+ m_threads->work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+void Instances<I>::unblock_listener() {
+ dout(5) << dendl;
+
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_listener_blocked);
+ m_listener_blocked = false;
+
+ InstanceIds added_instance_ids;
+ for (auto& pair : m_instances) {
+ if (pair.second.state == INSTANCE_STATE_ADDING) {
+ added_instance_ids.push_back(pair.first);
+ }
+ }
+
+ if (!added_instance_ids.empty()) {
+ m_threads->work_queue->queue(
+ new C_NotifyInstancesAdded(this, added_instance_ids), 0);
+ }
+}
+
+template <typename I>
+void Instances<I>::acked(const InstanceIds& instance_ids) {
+ dout(10) << "instance_ids=" << instance_ids << dendl;
+
+ std::lock_guard locker{m_lock};
+ if (m_on_finish != nullptr) {
+ dout(5) << "received on shut down, ignoring" << dendl;
+ return;
+ }
+
+ Context *ctx = new C_HandleAcked(this, instance_ids);
+ m_threads->work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+void Instances<I>::handle_acked(const InstanceIds& instance_ids) {
+ dout(5) << "instance_ids=" << instance_ids << dendl;
+
+ std::scoped_lock locker{m_threads->timer_lock, m_lock};
+ if (m_on_finish != nullptr) {
+ dout(5) << "handled on shut down, ignoring" << dendl;
+ return;
+ }
+
+ InstanceIds added_instance_ids;
+ auto time = clock_t::now();
+ for (auto& instance_id : instance_ids) {
+ auto &instance = m_instances.insert(
+ std::make_pair(instance_id, Instance{})).first->second;
+ instance.acked_time = time;
+ if (instance.state == INSTANCE_STATE_ADDING) {
+ added_instance_ids.push_back(instance_id);
+ }
+ }
+
+ schedule_remove_task(time);
+ if (!m_listener_blocked && !added_instance_ids.empty()) {
+ m_threads->work_queue->queue(
+ new C_NotifyInstancesAdded(this, added_instance_ids), 0);
+ }
+}
+
+template <typename I>
+void Instances<I>::notify_instances_added(const InstanceIds& instance_ids) {
+ std::unique_lock locker{m_lock};
+ InstanceIds added_instance_ids;
+ for (auto& instance_id : instance_ids) {
+ auto it = m_instances.find(instance_id);
+ if (it != m_instances.end() && it->second.state == INSTANCE_STATE_ADDING) {
+ added_instance_ids.push_back(instance_id);
+ }
+ }
+
+ if (added_instance_ids.empty()) {
+ return;
+ }
+
+ dout(5) << "instance_ids=" << added_instance_ids << dendl;
+ locker.unlock();
+ m_listener.handle_added(added_instance_ids);
+ locker.lock();
+
+ for (auto& instance_id : added_instance_ids) {
+ auto it = m_instances.find(instance_id);
+ if (it != m_instances.end() && it->second.state == INSTANCE_STATE_ADDING) {
+ it->second.state = INSTANCE_STATE_IDLE;
+ }
+ }
+}
+
+template <typename I>
+void Instances<I>::notify_instances_removed(const InstanceIds& instance_ids) {
+ dout(5) << "instance_ids=" << instance_ids << dendl;
+ m_listener.handle_removed(instance_ids);
+
+ std::lock_guard locker{m_lock};
+ for (auto& instance_id : instance_ids) {
+ m_instances.erase(instance_id);
+ }
+}
+
+template <typename I>
+void Instances<I>::list(std::vector<std::string> *instance_ids) {
+ dout(20) << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ for (auto it : m_instances) {
+ instance_ids->push_back(it.first);
+ }
+}
+
+
+template <typename I>
+void Instances<I>::get_instances() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ Context *ctx = create_context_callback<
+ Instances, &Instances<I>::handle_get_instances>(this);
+
+ InstanceWatcher<I>::get_instances(m_ioctx, &m_instance_ids, ctx);
+}
+
+template <typename I>
+void Instances<I>::handle_get_instances(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ Context *on_finish = nullptr;
+ {
+ std::lock_guard locker{m_lock};
+ std::swap(on_finish, m_on_finish);
+ }
+
+ if (r < 0) {
+ derr << "error retrieving instances: " << cpp_strerror(r) << dendl;
+ } else {
+ handle_acked(m_instance_ids);
+ }
+ on_finish->complete(r);
+}
+
+template <typename I>
+void Instances<I>::wait_for_ops() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ Context *ctx = create_async_context_callback(
+ m_threads->work_queue, create_context_callback<
+ Instances, &Instances<I>::handle_wait_for_ops>(this));
+
+ m_async_op_tracker.wait_for_ops(ctx);
+}
+
+template <typename I>
+void Instances<I>::handle_wait_for_ops(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ ceph_assert(r == 0);
+
+ Context *on_finish = nullptr;
+ {
+ std::lock_guard locker{m_lock};
+ std::swap(on_finish, m_on_finish);
+ }
+ on_finish->complete(r);
+}
+
+template <typename I>
+void Instances<I>::remove_instances(const Instances<I>::clock_t::time_point& time) {
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ InstanceIds instance_ids;
+ for (auto& instance_pair : m_instances) {
+ if (instance_pair.first == m_instance_id) {
+ continue;
+ }
+ auto& instance = instance_pair.second;
+ if (instance.state != INSTANCE_STATE_REMOVING &&
+ instance.acked_time <= time) {
+ instance.state = INSTANCE_STATE_REMOVING;
+ instance_ids.push_back(instance_pair.first);
+ }
+ }
+ ceph_assert(!instance_ids.empty());
+
+ dout(10) << "instance_ids=" << instance_ids << dendl;
+ Context* ctx = new LambdaContext([this, instance_ids](int r) {
+ handle_remove_instances(r, instance_ids);
+ });
+ ctx = create_async_context_callback(m_threads->work_queue, ctx);
+
+ auto gather_ctx = new C_Gather(m_cct, ctx);
+ for (auto& instance_id : instance_ids) {
+ InstanceWatcher<I>::remove_instance(m_ioctx, *m_threads->asio_engine,
+ instance_id, gather_ctx->new_sub());
+ }
+
+ m_async_op_tracker.start_op();
+ gather_ctx->activate();
+}
+
+template <typename I>
+void Instances<I>::handle_remove_instances(
+ int r, const InstanceIds& instance_ids) {
+ std::scoped_lock locker{m_threads->timer_lock, m_lock};
+
+ dout(10) << "r=" << r << ", instance_ids=" << instance_ids << dendl;
+ ceph_assert(r == 0);
+
+ // fire removed notification now that instances have been blocklisted
+ m_threads->work_queue->queue(
+ new C_NotifyInstancesRemoved(this, instance_ids), 0);
+
+ // reschedule the timer for the next batch
+ schedule_remove_task(clock_t::now());
+ m_async_op_tracker.finish_op();
+}
+
+template <typename I>
+void Instances<I>::cancel_remove_task() {
+ ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock));
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ if (m_timer_task == nullptr) {
+ return;
+ }
+
+ dout(10) << dendl;
+
+ bool canceled = m_threads->timer->cancel_event(m_timer_task);
+ ceph_assert(canceled);
+ m_timer_task = nullptr;
+}
+
+template <typename I>
+void Instances<I>::schedule_remove_task(const Instances<I>::clock_t::time_point& time) {
+ cancel_remove_task();
+ if (m_on_finish != nullptr) {
+ dout(10) << "received on shut down, ignoring" << dendl;
+ return;
+ }
+
+ int after = m_cct->_conf.get_val<uint64_t>("rbd_mirror_leader_heartbeat_interval") *
+ (1 + m_cct->_conf.get_val<uint64_t>("rbd_mirror_leader_max_missed_heartbeats") +
+ m_cct->_conf.get_val<uint64_t>("rbd_mirror_leader_max_acquire_attempts_before_break"));
+
+ bool schedule = false;
+ auto oldest_time = time;
+ for (auto& instance : m_instances) {
+ if (instance.first == m_instance_id) {
+ continue;
+ }
+ if (instance.second.state == INSTANCE_STATE_REMOVING) {
+ // removal is already in-flight
+ continue;
+ }
+
+ oldest_time = std::min(oldest_time, instance.second.acked_time);
+ schedule = true;
+ }
+
+ if (!schedule) {
+ return;
+ }
+
+ dout(10) << dendl;
+
+ // schedule a time to fire when the oldest instance should be removed
+ m_timer_task = new LambdaContext(
+ [this, oldest_time](int r) {
+ ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock));
+ std::lock_guard locker{m_lock};
+ m_timer_task = nullptr;
+
+ remove_instances(oldest_time);
+ });
+
+ oldest_time += ceph::make_timespan(after);
+ m_threads->timer->add_event_at(oldest_time, m_timer_task);
+}
+
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::Instances<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/Instances.h b/src/tools/rbd_mirror/Instances.h
new file mode 100644
index 000000000..e6e104b73
--- /dev/null
+++ b/src/tools/rbd_mirror/Instances.h
@@ -0,0 +1,168 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_INSTANCES_H
+#define CEPH_RBD_MIRROR_INSTANCES_H
+
+#include <map>
+#include <vector>
+
+#include "include/buffer_fwd.h"
+#include "include/rados/librados_fwd.hpp"
+#include "common/AsyncOpTracker.h"
+#include "common/ceph_mutex.h"
+#include "librbd/Watcher.h"
+#include "tools/rbd_mirror/instances/Types.h"
+
+namespace librbd { class ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+
+template <typename> struct Threads;
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class Instances {
+public:
+ typedef std::vector<std::string> InstanceIds;
+
+ static Instances *create(Threads<ImageCtxT> *threads,
+ librados::IoCtx &ioctx,
+ const std::string& instance_id,
+ instances::Listener& listener) {
+ return new Instances(threads, ioctx, instance_id, listener);
+ }
+ void destroy() {
+ delete this;
+ }
+
+ Instances(Threads<ImageCtxT> *threads, librados::IoCtx &ioctx,
+ const std::string& instance_id, instances::Listener& listener);
+ virtual ~Instances();
+
+ void init(Context *on_finish);
+ void shut_down(Context *on_finish);
+
+ void unblock_listener();
+
+ void acked(const InstanceIds& instance_ids);
+
+ void list(std::vector<std::string> *instance_ids);
+
+private:
+ /**
+ * @verbatim
+ *
+ * <uninitialized> <---------------------\
+ * | (init) ^ |
+ * v (error) * |
+ * GET_INSTANCES * * * * * WAIT_FOR_OPS
+ * | ^
+ * v (shut_down) |
+ * <initialized> ------------------------/
+ * .
+ * . (remove_instance)
+ * v
+ * REMOVE_INSTANCE
+ *
+ * @endverbatim
+ */
+
+ enum InstanceState {
+ INSTANCE_STATE_ADDING,
+ INSTANCE_STATE_IDLE,
+ INSTANCE_STATE_REMOVING
+ };
+
+ using clock_t = ceph::real_clock;
+ struct Instance {
+ clock_t::time_point acked_time{};
+ InstanceState state = INSTANCE_STATE_ADDING;
+ };
+
+ struct C_NotifyBase : public Context {
+ Instances *instances;
+ InstanceIds instance_ids;
+
+ C_NotifyBase(Instances *instances, const InstanceIds& instance_ids)
+ : instances(instances), instance_ids(instance_ids) {
+ instances->m_async_op_tracker.start_op();
+ }
+
+ void finish(int r) override {
+ execute();
+ instances->m_async_op_tracker.finish_op();
+ }
+
+ virtual void execute() = 0;
+ };
+
+ struct C_HandleAcked : public C_NotifyBase {
+ C_HandleAcked(Instances *instances, const InstanceIds& instance_ids)
+ : C_NotifyBase(instances, instance_ids) {
+ }
+
+ void execute() override {
+ this->instances->handle_acked(this->instance_ids);
+ }
+ };
+
+ struct C_NotifyInstancesAdded : public C_NotifyBase {
+ C_NotifyInstancesAdded(Instances *instances,
+ const InstanceIds& instance_ids)
+ : C_NotifyBase(instances, instance_ids) {
+ }
+
+ void execute() override {
+ this->instances->notify_instances_added(this->instance_ids);
+ }
+ };
+
+ struct C_NotifyInstancesRemoved : public C_NotifyBase {
+ C_NotifyInstancesRemoved(Instances *instances,
+ const InstanceIds& instance_ids)
+ : C_NotifyBase(instances, instance_ids) {
+ }
+
+ void execute() override {
+ this->instances->notify_instances_removed(this->instance_ids);
+ }
+ };
+
+ Threads<ImageCtxT> *m_threads;
+ librados::IoCtx &m_ioctx;
+ std::string m_instance_id;
+ instances::Listener& m_listener;
+ CephContext *m_cct;
+
+ ceph::mutex m_lock;
+ InstanceIds m_instance_ids;
+ std::map<std::string, Instance> m_instances;
+ Context *m_on_finish = nullptr;
+ AsyncOpTracker m_async_op_tracker;
+
+ Context *m_timer_task = nullptr;
+
+ bool m_listener_blocked = true;
+
+ void handle_acked(const InstanceIds& instance_ids);
+ void notify_instances_added(const InstanceIds& instance_ids);
+ void notify_instances_removed(const InstanceIds& instance_ids);
+
+ void get_instances();
+ void handle_get_instances(int r);
+
+ void wait_for_ops();
+ void handle_wait_for_ops(int r);
+
+ void remove_instances(const clock_t::time_point& time);
+ void handle_remove_instances(int r, const InstanceIds& instance_ids);
+
+ void cancel_remove_task();
+ void schedule_remove_task(const clock_t::time_point& time);
+};
+
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_INSTANCES_H
diff --git a/src/tools/rbd_mirror/LeaderWatcher.cc b/src/tools/rbd_mirror/LeaderWatcher.cc
new file mode 100644
index 000000000..8f12af14c
--- /dev/null
+++ b/src/tools/rbd_mirror/LeaderWatcher.cc
@@ -0,0 +1,1069 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "LeaderWatcher.h"
+#include "common/Cond.h"
+#include "common/Timer.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "include/stringify.h"
+#include "librbd/Utils.h"
+#include "librbd/asio/ContextWQ.h"
+#include "librbd/watcher/Types.h"
+#include "Threads.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::LeaderWatcher: " \
+ << this << " " << __func__ << ": "
+namespace rbd {
+namespace mirror {
+
+using namespace leader_watcher;
+
+using librbd::util::create_async_context_callback;
+using librbd::util::create_context_callback;
+using librbd::util::create_rados_callback;
+
+template <typename I>
+LeaderWatcher<I>::LeaderWatcher(Threads<I> *threads, librados::IoCtx &io_ctx,
+ leader_watcher::Listener *listener)
+ : Watcher(io_ctx, threads->work_queue, RBD_MIRROR_LEADER),
+ m_threads(threads), m_listener(listener), m_instances_listener(this),
+ m_lock(ceph::make_mutex("rbd::mirror::LeaderWatcher " +
+ io_ctx.get_pool_name())),
+ m_notifier_id(librados::Rados(io_ctx).get_instance_id()),
+ m_instance_id(stringify(m_notifier_id)),
+ m_leader_lock(new LeaderLock(m_ioctx, *m_threads->asio_engine, m_oid, this,
+ true, m_cct->_conf.get_val<uint64_t>(
+ "rbd_blocklist_expire_seconds"))) {
+}
+
+template <typename I>
+LeaderWatcher<I>::~LeaderWatcher() {
+ ceph_assert(m_instances == nullptr);
+ ceph_assert(m_timer_task == nullptr);
+
+ delete m_leader_lock;
+}
+
+template <typename I>
+std::string LeaderWatcher<I>::get_instance_id() {
+ return m_instance_id;
+}
+
+template <typename I>
+int LeaderWatcher<I>::init() {
+ C_SaferCond init_ctx;
+ init(&init_ctx);
+ return init_ctx.wait();
+}
+
+template <typename I>
+void LeaderWatcher<I>::init(Context *on_finish) {
+ dout(10) << "notifier_id=" << m_notifier_id << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ ceph_assert(m_on_finish == nullptr);
+ m_on_finish = on_finish;
+
+ create_leader_object();
+}
+
+template <typename I>
+void LeaderWatcher<I>::create_leader_object() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ librados::ObjectWriteOperation op;
+ op.create(false);
+
+ librados::AioCompletion *aio_comp = create_rados_callback<
+ LeaderWatcher<I>, &LeaderWatcher<I>::handle_create_leader_object>(this);
+ int r = m_ioctx.aio_operate(m_oid, aio_comp, &op);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_create_leader_object(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ Context *on_finish = nullptr;
+ {
+ std::lock_guard locker{m_lock};
+
+ if (r == 0) {
+ register_watch();
+ return;
+ }
+
+ derr << "error creating " << m_oid << " object: " << cpp_strerror(r)
+ << dendl;
+
+ std::swap(on_finish, m_on_finish);
+ }
+ on_finish->complete(r);
+}
+
+template <typename I>
+void LeaderWatcher<I>::register_watch() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ Context *ctx = create_async_context_callback(
+ m_work_queue, create_context_callback<
+ LeaderWatcher<I>, &LeaderWatcher<I>::handle_register_watch>(this));
+
+ librbd::Watcher::register_watch(ctx);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_register_watch(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ Context *on_finish = nullptr;
+ {
+ std::lock_guard timer_locker(m_threads->timer_lock);
+ std::lock_guard locker{m_lock};
+
+ if (r < 0) {
+ derr << "error registering leader watcher for " << m_oid << " object: "
+ << cpp_strerror(r) << dendl;
+ } else {
+ schedule_acquire_leader_lock(0);
+ }
+
+ ceph_assert(m_on_finish != nullptr);
+ std::swap(on_finish, m_on_finish);
+ }
+
+ on_finish->complete(r);
+}
+
+template <typename I>
+void LeaderWatcher<I>::shut_down() {
+ C_SaferCond shut_down_ctx;
+ shut_down(&shut_down_ctx);
+ int r = shut_down_ctx.wait();
+ ceph_assert(r == 0);
+}
+
+template <typename I>
+void LeaderWatcher<I>::shut_down(Context *on_finish) {
+ dout(10) << dendl;
+
+ std::scoped_lock locker{m_threads->timer_lock, m_lock};
+
+ ceph_assert(m_on_shut_down_finish == nullptr);
+ m_on_shut_down_finish = on_finish;
+ cancel_timer_task();
+ shut_down_leader_lock();
+}
+
+template <typename I>
+void LeaderWatcher<I>::shut_down_leader_lock() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ Context *ctx = create_async_context_callback(
+ m_work_queue, create_context_callback<
+ LeaderWatcher<I>, &LeaderWatcher<I>::handle_shut_down_leader_lock>(this));
+
+ m_leader_lock->shut_down(ctx);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_shut_down_leader_lock(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ if (r < 0) {
+ derr << "error shutting down leader lock: " << cpp_strerror(r) << dendl;
+ }
+
+ unregister_watch();
+}
+
+template <typename I>
+void LeaderWatcher<I>::unregister_watch() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ Context *ctx = create_async_context_callback(
+ m_work_queue, create_context_callback<
+ LeaderWatcher<I>, &LeaderWatcher<I>::handle_unregister_watch>(this));
+
+ librbd::Watcher::unregister_watch(ctx);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_unregister_watch(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "error unregistering leader watcher for " << m_oid << " object: "
+ << cpp_strerror(r) << dendl;
+ }
+ wait_for_tasks();
+}
+
+template <typename I>
+void LeaderWatcher<I>::wait_for_tasks() {
+ dout(10) << dendl;
+
+ std::scoped_lock locker{m_threads->timer_lock, m_lock};
+ schedule_timer_task("wait for tasks", 0, false,
+ &LeaderWatcher<I>::handle_wait_for_tasks, true);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_wait_for_tasks() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock));
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+ ceph_assert(m_on_shut_down_finish != nullptr);
+
+ ceph_assert(!m_timer_op_tracker.empty());
+ m_timer_op_tracker.finish_op();
+
+ auto ctx = new LambdaContext([this](int r) {
+ Context *on_finish;
+ {
+ // ensure lock isn't held when completing shut down
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_on_shut_down_finish != nullptr);
+ on_finish = m_on_shut_down_finish;
+ }
+ on_finish->complete(0);
+ });
+ m_work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+bool LeaderWatcher<I>::is_blocklisted() const {
+ std::lock_guard locker{m_lock};
+ return m_blocklisted;
+}
+
+template <typename I>
+bool LeaderWatcher<I>::is_leader() const {
+ std::lock_guard locker{m_lock};
+ return is_leader(m_lock);
+}
+
+template <typename I>
+bool LeaderWatcher<I>::is_leader(ceph::mutex &lock) const {
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ bool leader = m_leader_lock->is_leader();
+ dout(10) << leader << dendl;
+ return leader;
+}
+
+template <typename I>
+bool LeaderWatcher<I>::is_releasing_leader() const {
+ std::lock_guard locker{m_lock};
+ return is_releasing_leader(m_lock);
+}
+
+template <typename I>
+bool LeaderWatcher<I>::is_releasing_leader(ceph::mutex &lock) const {
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ bool releasing = m_leader_lock->is_releasing_leader();
+ dout(10) << releasing << dendl;
+ return releasing;
+}
+
+template <typename I>
+bool LeaderWatcher<I>::get_leader_instance_id(std::string *instance_id) const {
+ dout(10) << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ if (is_leader(m_lock) || is_releasing_leader(m_lock)) {
+ *instance_id = m_instance_id;
+ return true;
+ }
+
+ if (!m_locker.cookie.empty()) {
+ *instance_id = stringify(m_locker.entity.num());
+ return true;
+ }
+
+ return false;
+}
+
+template <typename I>
+void LeaderWatcher<I>::release_leader() {
+ dout(10) << dendl;
+
+ std::lock_guard locker{m_lock};
+ if (!is_leader(m_lock)) {
+ return;
+ }
+
+ release_leader_lock();
+}
+
+template <typename I>
+void LeaderWatcher<I>::list_instances(std::vector<std::string> *instance_ids) {
+ dout(10) << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ instance_ids->clear();
+ if (m_instances != nullptr) {
+ m_instances->list(instance_ids);
+ }
+}
+
+template <typename I>
+void LeaderWatcher<I>::cancel_timer_task() {
+ ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock));
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ if (m_timer_task == nullptr) {
+ return;
+ }
+
+ dout(10) << m_timer_task << dendl;
+ bool canceled = m_threads->timer->cancel_event(m_timer_task);
+ ceph_assert(canceled);
+ m_timer_task = nullptr;
+}
+
+template <typename I>
+void LeaderWatcher<I>::schedule_timer_task(const std::string &name,
+ int delay_factor, bool leader,
+ TimerCallback timer_callback,
+ bool shutting_down) {
+ ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock));
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ if (!shutting_down && m_on_shut_down_finish != nullptr) {
+ return;
+ }
+
+ cancel_timer_task();
+
+ m_timer_task = new LambdaContext(
+ [this, leader, timer_callback](int r) {
+ ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock));
+ m_timer_task = nullptr;
+
+ if (m_timer_op_tracker.empty()) {
+ std::lock_guard locker{m_lock};
+ execute_timer_task(leader, timer_callback);
+ return;
+ }
+
+ // old timer task is still running -- do not start next
+ // task until the previous task completes
+ if (m_timer_gate == nullptr) {
+ m_timer_gate = new C_TimerGate(this);
+ m_timer_op_tracker.wait_for_ops(m_timer_gate);
+ }
+ m_timer_gate->leader = leader;
+ m_timer_gate->timer_callback = timer_callback;
+ });
+
+ int after = delay_factor * m_cct->_conf.get_val<uint64_t>(
+ "rbd_mirror_leader_heartbeat_interval");
+
+ dout(10) << "scheduling " << name << " after " << after << " sec (task "
+ << m_timer_task << ")" << dendl;
+ m_threads->timer->add_event_after(after, m_timer_task);
+}
+
+template <typename I>
+void LeaderWatcher<I>::execute_timer_task(bool leader,
+ TimerCallback timer_callback) {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock));
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+ ceph_assert(m_timer_op_tracker.empty());
+
+ if (is_leader(m_lock) != leader) {
+ return;
+ }
+
+ m_timer_op_tracker.start_op();
+ (this->*timer_callback)();
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_post_acquire_leader_lock(int r,
+ Context *on_finish) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ if (r == -EAGAIN) {
+ dout(10) << "already locked" << dendl;
+ } else {
+ derr << "error acquiring leader lock: " << cpp_strerror(r) << dendl;
+ }
+ on_finish->complete(r);
+ return;
+ }
+
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_on_finish == nullptr);
+ m_on_finish = on_finish;
+ m_ret_val = 0;
+
+ init_instances();
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_pre_release_leader_lock(Context *on_finish) {
+ dout(10) << dendl;
+
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_on_finish == nullptr);
+ m_on_finish = on_finish;
+ m_ret_val = 0;
+
+ notify_listener();
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_post_release_leader_lock(int r,
+ Context *on_finish) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ on_finish->complete(r);
+ return;
+ }
+
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_on_finish == nullptr);
+ m_on_finish = on_finish;
+
+ notify_lock_released();
+}
+
+template <typename I>
+void LeaderWatcher<I>::break_leader_lock() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock));
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+ ceph_assert(!m_timer_op_tracker.empty());
+
+ if (m_locker.cookie.empty()) {
+ get_locker();
+ return;
+ }
+
+ Context *ctx = create_async_context_callback(
+ m_work_queue, create_context_callback<
+ LeaderWatcher<I>, &LeaderWatcher<I>::handle_break_leader_lock>(this));
+
+ m_leader_lock->break_lock(m_locker, true, ctx);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_break_leader_lock(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ std::scoped_lock locker{m_threads->timer_lock, m_lock};
+ ceph_assert(!m_timer_op_tracker.empty());
+
+ if (m_leader_lock->is_shutdown()) {
+ dout(10) << "canceling due to shutdown" << dendl;
+ m_timer_op_tracker.finish_op();
+ return;
+ }
+
+ if (r < 0 && r != -ENOENT) {
+ derr << "error breaking leader lock: " << cpp_strerror(r) << dendl;
+ schedule_acquire_leader_lock(1);
+ m_timer_op_tracker.finish_op();
+ return;
+ }
+
+ m_locker = {};
+ m_acquire_attempts = 0;
+ acquire_leader_lock();
+}
+
+template <typename I>
+void LeaderWatcher<I>::schedule_get_locker(bool reset_leader,
+ uint32_t delay_factor) {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock));
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ if (reset_leader) {
+ m_locker = {};
+ m_acquire_attempts = 0;
+ }
+
+ schedule_timer_task("get locker", delay_factor, false,
+ &LeaderWatcher<I>::get_locker, false);
+}
+
+template <typename I>
+void LeaderWatcher<I>::get_locker() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock));
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+ ceph_assert(!m_timer_op_tracker.empty());
+
+ C_GetLocker *get_locker_ctx = new C_GetLocker(this);
+ Context *ctx = create_async_context_callback(m_work_queue, get_locker_ctx);
+
+ m_leader_lock->get_locker(&get_locker_ctx->locker, ctx);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_get_locker(int r,
+ librbd::managed_lock::Locker& locker) {
+ dout(10) << "r=" << r << dendl;
+
+ std::scoped_lock l{m_threads->timer_lock, m_lock};
+ ceph_assert(!m_timer_op_tracker.empty());
+
+ if (m_leader_lock->is_shutdown()) {
+ dout(10) << "canceling due to shutdown" << dendl;
+ m_timer_op_tracker.finish_op();
+ return;
+ }
+
+ if (is_leader(m_lock)) {
+ m_locker = {};
+ m_timer_op_tracker.finish_op();
+ return;
+ }
+
+ if (r == -ENOENT) {
+ m_locker = {};
+ m_acquire_attempts = 0;
+ acquire_leader_lock();
+ return;
+ } else if (r < 0) {
+ derr << "error retrieving leader locker: " << cpp_strerror(r) << dendl;
+ schedule_get_locker(true, 1);
+ m_timer_op_tracker.finish_op();
+ return;
+ }
+
+ bool notify_listener = false;
+ if (m_locker != locker) {
+ m_locker = locker;
+ notify_listener = true;
+ if (m_acquire_attempts > 1) {
+ dout(10) << "new lock owner detected -- resetting heartbeat counter"
+ << dendl;
+ m_acquire_attempts = 0;
+ }
+ }
+
+ if (m_acquire_attempts >= m_cct->_conf.get_val<uint64_t>(
+ "rbd_mirror_leader_max_acquire_attempts_before_break")) {
+ dout(0) << "breaking leader lock after " << m_acquire_attempts << " "
+ << "failed attempts to acquire" << dendl;
+ break_leader_lock();
+ return;
+ }
+
+ schedule_acquire_leader_lock(1);
+
+ if (!notify_listener) {
+ m_timer_op_tracker.finish_op();
+ return;
+ }
+
+ auto ctx = new LambdaContext(
+ [this](int r) {
+ std::string instance_id;
+ if (get_leader_instance_id(&instance_id)) {
+ m_listener->update_leader_handler(instance_id);
+ }
+ std::scoped_lock locker{m_threads->timer_lock, m_lock};
+ m_timer_op_tracker.finish_op();
+ });
+ m_work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+void LeaderWatcher<I>::schedule_acquire_leader_lock(uint32_t delay_factor) {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock));
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ schedule_timer_task("acquire leader lock",
+ delay_factor *
+ m_cct->_conf.get_val<uint64_t>("rbd_mirror_leader_max_missed_heartbeats"),
+ false, &LeaderWatcher<I>::acquire_leader_lock, false);
+}
+
+template <typename I>
+void LeaderWatcher<I>::acquire_leader_lock() {
+ ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock));
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+ ceph_assert(!m_timer_op_tracker.empty());
+
+ ++m_acquire_attempts;
+ dout(10) << "acquire_attempts=" << m_acquire_attempts << dendl;
+
+ Context *ctx = create_async_context_callback(
+ m_work_queue, create_context_callback<
+ LeaderWatcher<I>, &LeaderWatcher<I>::handle_acquire_leader_lock>(this));
+ m_leader_lock->try_acquire_lock(ctx);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_acquire_leader_lock(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ std::scoped_lock locker{m_threads->timer_lock, m_lock};
+ ceph_assert(!m_timer_op_tracker.empty());
+
+ if (m_leader_lock->is_shutdown()) {
+ dout(10) << "canceling due to shutdown" << dendl;
+ m_timer_op_tracker.finish_op();
+ return;
+ }
+
+ if (r < 0) {
+ if (r == -EAGAIN) {
+ dout(10) << "already locked" << dendl;
+ } else {
+ derr << "error acquiring lock: " << cpp_strerror(r) << dendl;
+ }
+
+ get_locker();
+ return;
+ }
+
+ m_locker = {};
+ m_acquire_attempts = 0;
+
+ if (m_ret_val) {
+ dout(5) << "releasing due to error on notify" << dendl;
+ release_leader_lock();
+ m_timer_op_tracker.finish_op();
+ return;
+ }
+
+ notify_heartbeat();
+}
+
+template <typename I>
+void LeaderWatcher<I>::release_leader_lock() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ Context *ctx = create_async_context_callback(
+ m_work_queue, create_context_callback<
+ LeaderWatcher<I>, &LeaderWatcher<I>::handle_release_leader_lock>(this));
+
+ m_leader_lock->release_lock(ctx);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_release_leader_lock(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ std::scoped_lock locker{m_threads->timer_lock, m_lock};
+
+ if (r < 0) {
+ derr << "error releasing lock: " << cpp_strerror(r) << dendl;
+ return;
+ }
+
+ schedule_acquire_leader_lock(1);
+}
+
+template <typename I>
+void LeaderWatcher<I>::init_instances() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+ ceph_assert(m_instances == nullptr);
+
+ m_instances = Instances<I>::create(m_threads, m_ioctx, m_instance_id,
+ m_instances_listener);
+
+ Context *ctx = create_context_callback<
+ LeaderWatcher<I>, &LeaderWatcher<I>::handle_init_instances>(this);
+
+ m_instances->init(ctx);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_init_instances(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ Context *on_finish = nullptr;
+ if (r < 0) {
+ std::lock_guard locker{m_lock};
+ derr << "error initializing instances: " << cpp_strerror(r) << dendl;
+ m_instances->destroy();
+ m_instances = nullptr;
+
+ ceph_assert(m_on_finish != nullptr);
+ std::swap(m_on_finish, on_finish);
+ } else {
+ std::lock_guard locker{m_lock};
+ notify_listener();
+ return;
+ }
+
+ on_finish->complete(r);
+}
+
+template <typename I>
+void LeaderWatcher<I>::shut_down_instances() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+ ceph_assert(m_instances != nullptr);
+
+ Context *ctx = create_async_context_callback(
+ m_work_queue, create_context_callback<LeaderWatcher<I>,
+ &LeaderWatcher<I>::handle_shut_down_instances>(this));
+
+ m_instances->shut_down(ctx);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_shut_down_instances(int r) {
+ dout(10) << "r=" << r << dendl;
+ ceph_assert(r == 0);
+
+ Context *on_finish = nullptr;
+ {
+ std::lock_guard locker{m_lock};
+
+ m_instances->destroy();
+ m_instances = nullptr;
+
+ ceph_assert(m_on_finish != nullptr);
+ std::swap(m_on_finish, on_finish);
+ }
+ on_finish->complete(r);
+}
+
+template <typename I>
+void LeaderWatcher<I>::notify_listener() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ Context *ctx = create_async_context_callback(
+ m_work_queue, create_context_callback<
+ LeaderWatcher<I>, &LeaderWatcher<I>::handle_notify_listener>(this));
+
+ if (is_leader(m_lock)) {
+ ctx = new LambdaContext(
+ [this, ctx](int r) {
+ m_listener->post_acquire_handler(ctx);
+ });
+ } else {
+ ctx = new LambdaContext(
+ [this, ctx](int r) {
+ m_listener->pre_release_handler(ctx);
+ });
+ }
+ m_work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_notify_listener(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ if (r < 0) {
+ derr << "error notifying listener: " << cpp_strerror(r) << dendl;
+ m_ret_val = r;
+ }
+
+ if (is_leader(m_lock)) {
+ notify_lock_acquired();
+ } else {
+ shut_down_instances();
+ }
+}
+
+template <typename I>
+void LeaderWatcher<I>::notify_lock_acquired() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ Context *ctx = create_context_callback<
+ LeaderWatcher<I>, &LeaderWatcher<I>::handle_notify_lock_acquired>(this);
+
+ bufferlist bl;
+ encode(NotifyMessage{LockAcquiredPayload{}}, bl);
+
+ send_notify(bl, nullptr, ctx);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_notify_lock_acquired(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ Context *on_finish = nullptr;
+ {
+ std::lock_guard locker{m_lock};
+ if (r < 0 && r != -ETIMEDOUT) {
+ derr << "error notifying leader lock acquired: " << cpp_strerror(r)
+ << dendl;
+ m_ret_val = r;
+ }
+
+ ceph_assert(m_on_finish != nullptr);
+ std::swap(m_on_finish, on_finish);
+
+ if (m_ret_val == 0) {
+ // listener should be ready for instance add/remove events now
+ m_instances->unblock_listener();
+ }
+ }
+ on_finish->complete(0);
+}
+
+template <typename I>
+void LeaderWatcher<I>::notify_lock_released() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ Context *ctx = create_context_callback<
+ LeaderWatcher<I>, &LeaderWatcher<I>::handle_notify_lock_released>(this);
+
+ bufferlist bl;
+ encode(NotifyMessage{LockReleasedPayload{}}, bl);
+
+ send_notify(bl, nullptr, ctx);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_notify_lock_released(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ Context *on_finish = nullptr;
+ {
+ std::lock_guard locker{m_lock};
+ if (r < 0 && r != -ETIMEDOUT) {
+ derr << "error notifying leader lock released: " << cpp_strerror(r)
+ << dendl;
+ }
+
+ ceph_assert(m_on_finish != nullptr);
+ std::swap(m_on_finish, on_finish);
+ }
+ on_finish->complete(r);
+}
+
+template <typename I>
+void LeaderWatcher<I>::notify_heartbeat() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock));
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+ ceph_assert(!m_timer_op_tracker.empty());
+
+ if (!is_leader(m_lock)) {
+ dout(5) << "not leader, canceling" << dendl;
+ m_timer_op_tracker.finish_op();
+ return;
+ }
+
+ Context *ctx = create_context_callback<
+ LeaderWatcher<I>, &LeaderWatcher<I>::handle_notify_heartbeat>(this);
+
+ bufferlist bl;
+ encode(NotifyMessage{HeartbeatPayload{}}, bl);
+
+ m_heartbeat_response.acks.clear();
+ send_notify(bl, &m_heartbeat_response, ctx);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_notify_heartbeat(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ std::scoped_lock locker{m_threads->timer_lock, m_lock};
+ ceph_assert(!m_timer_op_tracker.empty());
+
+ m_timer_op_tracker.finish_op();
+ if (m_leader_lock->is_shutdown()) {
+ dout(10) << "canceling due to shutdown" << dendl;
+ return;
+ } else if (!is_leader(m_lock)) {
+ return;
+ }
+
+ if (r < 0 && r != -ETIMEDOUT) {
+ derr << "error notifying heartbeat: " << cpp_strerror(r)
+ << ", releasing leader" << dendl;
+ release_leader_lock();
+ return;
+ }
+
+ dout(10) << m_heartbeat_response.acks.size() << " acks received, "
+ << m_heartbeat_response.timeouts.size() << " timed out" << dendl;
+
+ std::vector<std::string> instance_ids;
+ for (auto &it: m_heartbeat_response.acks) {
+ uint64_t notifier_id = it.first.gid;
+ instance_ids.push_back(stringify(notifier_id));
+ }
+ if (!instance_ids.empty()) {
+ m_instances->acked(instance_ids);
+ }
+
+ schedule_timer_task("heartbeat", 1, true,
+ &LeaderWatcher<I>::notify_heartbeat, false);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_heartbeat(Context *on_notify_ack) {
+ dout(10) << dendl;
+
+ {
+ std::scoped_lock locker{m_threads->timer_lock, m_lock};
+ if (is_leader(m_lock)) {
+ dout(5) << "got another leader heartbeat, ignoring" << dendl;
+ } else if (!m_locker.cookie.empty()) {
+ cancel_timer_task();
+ m_acquire_attempts = 0;
+ schedule_acquire_leader_lock(1);
+ }
+ }
+
+ on_notify_ack->complete(0);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_lock_acquired(Context *on_notify_ack) {
+ dout(10) << dendl;
+
+ {
+ std::scoped_lock locker{m_threads->timer_lock, m_lock};
+ if (is_leader(m_lock)) {
+ dout(5) << "got another leader lock_acquired, ignoring" << dendl;
+ } else {
+ cancel_timer_task();
+ schedule_get_locker(true, 0);
+ }
+ }
+
+ on_notify_ack->complete(0);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_lock_released(Context *on_notify_ack) {
+ dout(10) << dendl;
+
+ {
+ std::scoped_lock locker{m_threads->timer_lock, m_lock};
+ if (is_leader(m_lock)) {
+ dout(5) << "got another leader lock_released, ignoring" << dendl;
+ } else {
+ cancel_timer_task();
+ schedule_get_locker(true, 0);
+ }
+ }
+
+ on_notify_ack->complete(0);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_notify(uint64_t notify_id, uint64_t handle,
+ uint64_t notifier_id, bufferlist &bl) {
+ dout(10) << "notify_id=" << notify_id << ", handle=" << handle << ", "
+ << "notifier_id=" << notifier_id << dendl;
+
+ Context *ctx = new C_NotifyAck(this, notify_id, handle);
+
+ if (notifier_id == m_notifier_id) {
+ dout(10) << "our own notification, ignoring" << dendl;
+ ctx->complete(0);
+ return;
+ }
+
+ NotifyMessage notify_message;
+ try {
+ auto iter = bl.cbegin();
+ decode(notify_message, iter);
+ } catch (const buffer::error &err) {
+ derr << "error decoding image notification: " << err.what() << dendl;
+ ctx->complete(0);
+ return;
+ }
+
+ apply_visitor(HandlePayloadVisitor(this, ctx), notify_message.payload);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_rewatch_complete(int r) {
+ dout(5) << "r=" << r << dendl;
+
+ if (r == -EBLOCKLISTED) {
+ dout(1) << "blocklisted detected" << dendl;
+ m_blocklisted = true;
+ return;
+ }
+
+ m_leader_lock->reacquire_lock(nullptr);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_payload(const HeartbeatPayload &payload,
+ Context *on_notify_ack) {
+ dout(10) << "heartbeat" << dendl;
+
+ handle_heartbeat(on_notify_ack);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_payload(const LockAcquiredPayload &payload,
+ Context *on_notify_ack) {
+ dout(10) << "lock_acquired" << dendl;
+
+ handle_lock_acquired(on_notify_ack);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_payload(const LockReleasedPayload &payload,
+ Context *on_notify_ack) {
+ dout(10) << "lock_released" << dendl;
+
+ handle_lock_released(on_notify_ack);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_payload(const UnknownPayload &payload,
+ Context *on_notify_ack) {
+ dout(10) << "unknown" << dendl;
+
+ on_notify_ack->complete(0);
+}
+
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::LeaderWatcher<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/LeaderWatcher.h b/src/tools/rbd_mirror/LeaderWatcher.h
new file mode 100644
index 000000000..58f23148f
--- /dev/null
+++ b/src/tools/rbd_mirror/LeaderWatcher.h
@@ -0,0 +1,313 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_LEADER_WATCHER_H
+#define CEPH_RBD_MIRROR_LEADER_WATCHER_H
+
+#include <list>
+#include <memory>
+#include <string>
+
+#include "common/AsyncOpTracker.h"
+#include "librbd/ManagedLock.h"
+#include "librbd/Watcher.h"
+#include "librbd/managed_lock/Types.h"
+#include "librbd/watcher/Types.h"
+#include "Instances.h"
+#include "tools/rbd_mirror/instances/Types.h"
+#include "tools/rbd_mirror/leader_watcher/Types.h"
+
+namespace librbd {
+class ImageCtx;
+namespace asio { struct ContextWQ; }
+} // namespace librbd
+
+namespace rbd {
+namespace mirror {
+
+template <typename> struct Threads;
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class LeaderWatcher : protected librbd::Watcher {
+ using librbd::Watcher::unregister_watch; // Silence overloaded virtual warning
+public:
+ static LeaderWatcher* create(Threads<ImageCtxT> *threads,
+ librados::IoCtx &io_ctx,
+ leader_watcher::Listener *listener) {
+ return new LeaderWatcher(threads, io_ctx, listener);
+ }
+
+ LeaderWatcher(Threads<ImageCtxT> *threads, librados::IoCtx &io_ctx,
+ leader_watcher::Listener *listener);
+ ~LeaderWatcher() override;
+
+ int init();
+ void shut_down();
+
+ void init(Context *on_finish);
+ void shut_down(Context *on_finish);
+
+ bool is_blocklisted() const;
+ bool is_leader() const;
+ bool is_releasing_leader() const;
+ bool get_leader_instance_id(std::string *instance_id) const;
+ void release_leader();
+ void list_instances(std::vector<std::string> *instance_ids);
+
+ std::string get_instance_id();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <uninitialized> <------------------------------ WAIT_FOR_TASKS
+ * | (init) ^ ^
+ * v * |
+ * CREATE_OBJECT * * * * * (error) UNREGISTER_WATCH
+ * | * ^
+ * v * |
+ * REGISTER_WATCH * * * * * SHUT_DOWN_LEADER_LOCK
+ * | ^
+ * | (no leader heartbeat and acquire failed) |
+ * | BREAK_LOCK <-------------------------------------\ |
+ * | | (no leader heartbeat) | | (shut down)
+ * | | /----------------------------------------\ | |
+ * | | | (lock_released received) | |
+ * | | | /-------------------------------------\ | |
+ * | | | | (lock_acquired or | | |
+ * | | | | heartbeat received) | | |
+ * | | | | (ENOENT) /-----------\ | | |
+ * | | | | * * * * * * * * * * | | | | |
+ * v v v v v (error) * v | | | |
+ * ACQUIRE_LEADER_LOCK * * * * *> GET_LOCKER ---> <secondary>
+ * | * ^
+ * ....|...................*.................... .....|.....................
+ * . v * . . | post_release .
+ * .INIT_INSTANCES * * * * * . .NOTIFY_LOCK_RELEASED .
+ * . | . .....^.....................
+ * . v . |
+ * .NOTIFY_LISTENER . RELEASE_LEADER_LOCK
+ * . | . ^
+ * . v . .....|.....................
+ * .NOTIFY_LOCK_ACQUIRED . . | .
+ * . | post_acquire . .SHUT_DOWN_INSTANCES .
+ * ....|........................................ . ^ .
+ * v . | .
+ * <leader> -----------------------------------> .NOTIFY_LISTENER .
+ * (shut_down, release_leader, . pre_release .
+ * notify error) ...........................
+ * @endverbatim
+ */
+
+ struct InstancesListener : public instances::Listener {
+ LeaderWatcher* leader_watcher;
+
+ InstancesListener(LeaderWatcher* leader_watcher)
+ : leader_watcher(leader_watcher) {
+ }
+
+ void handle_added(const InstanceIds& instance_ids) override {
+ leader_watcher->m_listener->handle_instances_added(instance_ids);
+ }
+
+ void handle_removed(const InstanceIds& instance_ids) override {
+ leader_watcher->m_listener->handle_instances_removed(instance_ids);
+ }
+ };
+
+ class LeaderLock : public librbd::ManagedLock<ImageCtxT> {
+ public:
+ typedef librbd::ManagedLock<ImageCtxT> Parent;
+
+ LeaderLock(librados::IoCtx& ioctx, librbd::AsioEngine& asio_engine,
+ const std::string& oid, LeaderWatcher *watcher,
+ bool blocklist_on_break_lock,
+ uint32_t blocklist_expire_seconds)
+ : Parent(ioctx, asio_engine, oid, watcher,
+ librbd::managed_lock::EXCLUSIVE, blocklist_on_break_lock,
+ blocklist_expire_seconds),
+ watcher(watcher) {
+ }
+
+ bool is_leader() const {
+ std::lock_guard locker{Parent::m_lock};
+ return Parent::is_state_post_acquiring() || Parent::is_state_locked();
+ }
+
+ bool is_releasing_leader() const {
+ std::lock_guard locker{Parent::m_lock};
+ return Parent::is_state_pre_releasing();
+ }
+
+ protected:
+ void post_acquire_lock_handler(int r, Context *on_finish) {
+ if (r == 0) {
+ // lock is owned at this point
+ std::lock_guard locker{Parent::m_lock};
+ Parent::set_state_post_acquiring();
+ }
+ watcher->handle_post_acquire_leader_lock(r, on_finish);
+ }
+ void pre_release_lock_handler(bool shutting_down,
+ Context *on_finish) {
+ watcher->handle_pre_release_leader_lock(on_finish);
+ }
+ void post_release_lock_handler(bool shutting_down, int r,
+ Context *on_finish) {
+ watcher->handle_post_release_leader_lock(r, on_finish);
+ }
+ private:
+ LeaderWatcher *watcher;
+ };
+
+ struct HandlePayloadVisitor : public boost::static_visitor<void> {
+ LeaderWatcher *leader_watcher;
+ Context *on_notify_ack;
+
+ HandlePayloadVisitor(LeaderWatcher *leader_watcher, Context *on_notify_ack)
+ : leader_watcher(leader_watcher), on_notify_ack(on_notify_ack) {
+ }
+
+ template <typename Payload>
+ inline void operator()(const Payload &payload) const {
+ leader_watcher->handle_payload(payload, on_notify_ack);
+ }
+ };
+
+ struct C_GetLocker : public Context {
+ LeaderWatcher *leader_watcher;
+ librbd::managed_lock::Locker locker;
+
+ C_GetLocker(LeaderWatcher *leader_watcher)
+ : leader_watcher(leader_watcher) {
+ }
+
+ void finish(int r) override {
+ leader_watcher->handle_get_locker(r, locker);
+ }
+ };
+
+ typedef void (LeaderWatcher<ImageCtxT>::*TimerCallback)();
+
+ struct C_TimerGate : public Context {
+ LeaderWatcher *leader_watcher;
+
+ bool leader = false;
+ TimerCallback timer_callback = nullptr;
+
+ C_TimerGate(LeaderWatcher *leader_watcher)
+ : leader_watcher(leader_watcher) {
+ }
+
+ void finish(int r) override {
+ leader_watcher->m_timer_gate = nullptr;
+ leader_watcher->execute_timer_task(leader, timer_callback);
+ }
+ };
+
+ Threads<ImageCtxT> *m_threads;
+ leader_watcher::Listener *m_listener;
+
+ InstancesListener m_instances_listener;
+ mutable ceph::mutex m_lock;
+ uint64_t m_notifier_id;
+ std::string m_instance_id;
+ LeaderLock *m_leader_lock;
+ Context *m_on_finish = nullptr;
+ Context *m_on_shut_down_finish = nullptr;
+ uint64_t m_acquire_attempts = 0;
+ int m_ret_val = 0;
+ Instances<ImageCtxT> *m_instances = nullptr;
+ librbd::managed_lock::Locker m_locker;
+
+ bool m_blocklisted = false;
+
+ AsyncOpTracker m_timer_op_tracker;
+ Context *m_timer_task = nullptr;
+ C_TimerGate *m_timer_gate = nullptr;
+
+ librbd::watcher::NotifyResponse m_heartbeat_response;
+
+ bool is_leader(ceph::mutex &m_lock) const;
+ bool is_releasing_leader(ceph::mutex &m_lock) const;
+
+ void cancel_timer_task();
+ void schedule_timer_task(const std::string &name,
+ int delay_factor, bool leader,
+ TimerCallback callback, bool shutting_down);
+ void execute_timer_task(bool leader, TimerCallback timer_callback);
+
+ void create_leader_object();
+ void handle_create_leader_object(int r);
+
+ void register_watch();
+ void handle_register_watch(int r);
+
+ void shut_down_leader_lock();
+ void handle_shut_down_leader_lock(int r);
+
+ void unregister_watch();
+ void handle_unregister_watch(int r);
+
+ void wait_for_tasks();
+ void handle_wait_for_tasks();
+
+ void break_leader_lock();
+ void handle_break_leader_lock(int r);
+
+ void schedule_get_locker(bool reset_leader, uint32_t delay_factor);
+ void get_locker();
+ void handle_get_locker(int r, librbd::managed_lock::Locker& locker);
+
+ void schedule_acquire_leader_lock(uint32_t delay_factor);
+ void acquire_leader_lock();
+ void handle_acquire_leader_lock(int r);
+
+ void release_leader_lock();
+ void handle_release_leader_lock(int r);
+
+ void init_instances();
+ void handle_init_instances(int r);
+
+ void shut_down_instances();
+ void handle_shut_down_instances(int r);
+
+ void notify_listener();
+ void handle_notify_listener(int r);
+
+ void notify_lock_acquired();
+ void handle_notify_lock_acquired(int r);
+
+ void notify_lock_released();
+ void handle_notify_lock_released(int r);
+
+ void notify_heartbeat();
+ void handle_notify_heartbeat(int r);
+
+ void handle_post_acquire_leader_lock(int r, Context *on_finish);
+ void handle_pre_release_leader_lock(Context *on_finish);
+ void handle_post_release_leader_lock(int r, Context *on_finish);
+
+ void handle_notify(uint64_t notify_id, uint64_t handle,
+ uint64_t notifier_id, bufferlist &bl) override;
+
+ void handle_rewatch_complete(int r) override;
+
+ void handle_heartbeat(Context *on_ack);
+ void handle_lock_acquired(Context *on_ack);
+ void handle_lock_released(Context *on_ack);
+
+ void handle_payload(const leader_watcher::HeartbeatPayload &payload,
+ Context *on_notify_ack);
+ void handle_payload(const leader_watcher::LockAcquiredPayload &payload,
+ Context *on_notify_ack);
+ void handle_payload(const leader_watcher::LockReleasedPayload &payload,
+ Context *on_notify_ack);
+ void handle_payload(const leader_watcher::UnknownPayload &payload,
+ Context *on_notify_ack);
+};
+
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_LEADER_WATCHER_H
diff --git a/src/tools/rbd_mirror/Mirror.cc b/src/tools/rbd_mirror/Mirror.cc
new file mode 100644
index 000000000..f02cfe65d
--- /dev/null
+++ b/src/tools/rbd_mirror/Mirror.cc
@@ -0,0 +1,748 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <signal.h>
+
+#include <boost/range/adaptor/map.hpp>
+
+#include "common/Formatter.h"
+#include "common/PriorityCache.h"
+#include "common/admin_socket.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "journal/Types.h"
+#include "librbd/ImageCtx.h"
+#include "perfglue/heap_profiler.h"
+#include "Mirror.h"
+#include "PoolMetaCache.h"
+#include "ServiceDaemon.h"
+#include "Threads.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+
+using std::list;
+using std::map;
+using std::set;
+using std::string;
+using std::unique_ptr;
+using std::vector;
+
+using librados::Rados;
+using librados::IoCtx;
+using librbd::mirror_peer_t;
+
+namespace rbd {
+namespace mirror {
+
+namespace {
+
+class MirrorAdminSocketCommand {
+public:
+ virtual ~MirrorAdminSocketCommand() {}
+ virtual int call(Formatter *f) = 0;
+};
+
+class StatusCommand : public MirrorAdminSocketCommand {
+public:
+ explicit StatusCommand(Mirror *mirror) : mirror(mirror) {}
+
+ int call(Formatter *f) override {
+ mirror->print_status(f);
+ return 0;
+ }
+
+private:
+ Mirror *mirror;
+};
+
+class StartCommand : public MirrorAdminSocketCommand {
+public:
+ explicit StartCommand(Mirror *mirror) : mirror(mirror) {}
+
+ int call(Formatter *f) override {
+ mirror->start();
+ return 0;
+ }
+
+private:
+ Mirror *mirror;
+};
+
+class StopCommand : public MirrorAdminSocketCommand {
+public:
+ explicit StopCommand(Mirror *mirror) : mirror(mirror) {}
+
+ int call(Formatter *f) override {
+ mirror->stop();
+ return 0;
+ }
+
+private:
+ Mirror *mirror;
+};
+
+class RestartCommand : public MirrorAdminSocketCommand {
+public:
+ explicit RestartCommand(Mirror *mirror) : mirror(mirror) {}
+
+ int call(Formatter *f) override {
+ mirror->restart();
+ return 0;
+ }
+
+private:
+ Mirror *mirror;
+};
+
+class FlushCommand : public MirrorAdminSocketCommand {
+public:
+ explicit FlushCommand(Mirror *mirror) : mirror(mirror) {}
+
+ int call(Formatter *f) override {
+ mirror->flush();
+ return 0;
+ }
+
+private:
+ Mirror *mirror;
+};
+
+class LeaderReleaseCommand : public MirrorAdminSocketCommand {
+public:
+ explicit LeaderReleaseCommand(Mirror *mirror) : mirror(mirror) {}
+
+ int call(Formatter *f) override {
+ mirror->release_leader();
+ return 0;
+ }
+
+private:
+ Mirror *mirror;
+};
+
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::PriCache: " << this << " " \
+ << m_name << " " << __func__ << ": "
+
+struct PriCache : public PriorityCache::PriCache {
+ std::string m_name;
+ int64_t m_base_cache_max_size;
+ int64_t m_extra_cache_max_size;
+
+ PriorityCache::Priority m_base_cache_pri = PriorityCache::Priority::PRI10;
+ PriorityCache::Priority m_extra_cache_pri = PriorityCache::Priority::PRI10;
+ int64_t m_base_cache_bytes = 0;
+ int64_t m_extra_cache_bytes = 0;
+ int64_t m_committed_bytes = 0;
+ double m_cache_ratio = 0;
+
+ PriCache(const std::string &name, uint64_t min_size, uint64_t max_size)
+ : m_name(name), m_base_cache_max_size(min_size),
+ m_extra_cache_max_size(max_size - min_size) {
+ ceph_assert(max_size >= min_size);
+ }
+
+ void prioritize() {
+ if (m_base_cache_pri == PriorityCache::Priority::PRI0) {
+ return;
+ }
+ auto pri = static_cast<uint8_t>(m_base_cache_pri);
+ m_base_cache_pri = static_cast<PriorityCache::Priority>(--pri);
+
+ dout(30) << m_base_cache_pri << dendl;
+ }
+
+ int64_t request_cache_bytes(PriorityCache::Priority pri,
+ uint64_t total_cache) const override {
+ int64_t cache_bytes = 0;
+
+ if (pri == m_base_cache_pri) {
+ cache_bytes += m_base_cache_max_size;
+ }
+ if (pri == m_extra_cache_pri) {
+ cache_bytes += m_extra_cache_max_size;
+ }
+
+ dout(30) << cache_bytes << dendl;
+
+ return cache_bytes;
+ }
+
+ int64_t get_cache_bytes(PriorityCache::Priority pri) const override {
+ int64_t cache_bytes = 0;
+
+ if (pri == m_base_cache_pri) {
+ cache_bytes += m_base_cache_bytes;
+ }
+ if (pri == m_extra_cache_pri) {
+ cache_bytes += m_extra_cache_bytes;
+ }
+
+ dout(30) << "pri=" << pri << " " << cache_bytes << dendl;
+
+ return cache_bytes;
+ }
+
+ int64_t get_cache_bytes() const override {
+ auto cache_bytes = m_base_cache_bytes + m_extra_cache_bytes;
+
+ dout(30) << m_base_cache_bytes << "+" << m_extra_cache_bytes << "="
+ << cache_bytes << dendl;
+
+ return cache_bytes;
+ }
+
+ void set_cache_bytes(PriorityCache::Priority pri, int64_t bytes) override {
+ ceph_assert(bytes >= 0);
+ ceph_assert(pri == m_base_cache_pri || pri == m_extra_cache_pri ||
+ bytes == 0);
+
+ dout(30) << "pri=" << pri << " " << bytes << dendl;
+
+ if (pri == m_base_cache_pri) {
+ m_base_cache_bytes = std::min(m_base_cache_max_size, bytes);
+ bytes -= std::min(m_base_cache_bytes, bytes);
+ }
+
+ if (pri == m_extra_cache_pri) {
+ m_extra_cache_bytes = bytes;
+ }
+ }
+
+ void add_cache_bytes(PriorityCache::Priority pri, int64_t bytes) override {
+ ceph_assert(bytes >= 0);
+ ceph_assert(pri == m_base_cache_pri || pri == m_extra_cache_pri);
+
+ dout(30) << "pri=" << pri << " " << bytes << dendl;
+
+ if (pri == m_base_cache_pri) {
+ ceph_assert(m_base_cache_bytes <= m_base_cache_max_size);
+
+ auto chunk = std::min(m_base_cache_max_size - m_base_cache_bytes, bytes);
+ m_base_cache_bytes += chunk;
+ bytes -= chunk;
+ }
+
+ if (pri == m_extra_cache_pri) {
+ m_extra_cache_bytes += bytes;
+ }
+ }
+
+ int64_t commit_cache_size(uint64_t total_cache) override {
+ m_committed_bytes = p2roundup<int64_t>(get_cache_bytes(), 4096);
+
+ dout(30) << m_committed_bytes << dendl;
+
+ return m_committed_bytes;
+ }
+
+ int64_t get_committed_size() const override {
+ dout(30) << m_committed_bytes << dendl;
+
+ return m_committed_bytes;
+ }
+
+ double get_cache_ratio() const override {
+ dout(30) << m_cache_ratio << dendl;
+
+ return m_cache_ratio;
+ }
+
+ void set_cache_ratio(double ratio) override {
+ dout(30) << m_cache_ratio << dendl;
+
+ m_cache_ratio = ratio;
+ }
+
+ std::string get_cache_name() const override {
+ return m_name;
+ }
+};
+
+} // anonymous namespace
+
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::Mirror: " << this << " " \
+ << __func__ << ": "
+
+class MirrorAdminSocketHook : public AdminSocketHook {
+public:
+ MirrorAdminSocketHook(CephContext *cct, Mirror *mirror) :
+ admin_socket(cct->get_admin_socket()) {
+ std::string command;
+ int r;
+
+ command = "rbd mirror status";
+ r = admin_socket->register_command(command, this,
+ "get status for rbd mirror");
+ if (r == 0) {
+ commands[command] = new StatusCommand(mirror);
+ }
+
+ command = "rbd mirror start";
+ r = admin_socket->register_command(command, this,
+ "start rbd mirror");
+ if (r == 0) {
+ commands[command] = new StartCommand(mirror);
+ }
+
+ command = "rbd mirror stop";
+ r = admin_socket->register_command(command, this,
+ "stop rbd mirror");
+ if (r == 0) {
+ commands[command] = new StopCommand(mirror);
+ }
+
+ command = "rbd mirror restart";
+ r = admin_socket->register_command(command, this,
+ "restart rbd mirror");
+ if (r == 0) {
+ commands[command] = new RestartCommand(mirror);
+ }
+
+ command = "rbd mirror flush";
+ r = admin_socket->register_command(command, this,
+ "flush rbd mirror");
+ if (r == 0) {
+ commands[command] = new FlushCommand(mirror);
+ }
+
+ command = "rbd mirror leader release";
+ r = admin_socket->register_command(command, this,
+ "release rbd mirror leader");
+ if (r == 0) {
+ commands[command] = new LeaderReleaseCommand(mirror);
+ }
+ }
+
+ ~MirrorAdminSocketHook() override {
+ (void)admin_socket->unregister_commands(this);
+ for (Commands::const_iterator i = commands.begin(); i != commands.end();
+ ++i) {
+ delete i->second;
+ }
+ }
+
+ int call(std::string_view command, const cmdmap_t& cmdmap,
+ Formatter *f,
+ std::ostream& errss,
+ bufferlist& out) override {
+ Commands::const_iterator i = commands.find(command);
+ ceph_assert(i != commands.end());
+ return i->second->call(f);
+ }
+
+private:
+ typedef std::map<std::string, MirrorAdminSocketCommand*, std::less<>> Commands;
+
+ AdminSocket *admin_socket;
+ Commands commands;
+};
+
+class CacheManagerHandler : public journal::CacheManagerHandler {
+public:
+ CacheManagerHandler(CephContext *cct)
+ : m_cct(cct) {
+
+ if (!m_cct->_conf.get_val<bool>("rbd_mirror_memory_autotune")) {
+ return;
+ }
+
+ uint64_t base = m_cct->_conf.get_val<Option::size_t>(
+ "rbd_mirror_memory_base");
+ double fragmentation = m_cct->_conf.get_val<double>(
+ "rbd_mirror_memory_expected_fragmentation");
+ uint64_t target = m_cct->_conf.get_val<Option::size_t>(
+ "rbd_mirror_memory_target");
+ uint64_t min = m_cct->_conf.get_val<Option::size_t>(
+ "rbd_mirror_memory_cache_min");
+ uint64_t max = min;
+
+ // When setting the maximum amount of memory to use for cache, first
+ // assume some base amount of memory for the daemon and then fudge in
+ // some overhead for fragmentation that scales with cache usage.
+ uint64_t ltarget = (1.0 - fragmentation) * target;
+ if (ltarget > base + min) {
+ max = ltarget - base;
+ }
+
+ m_next_balance = ceph_clock_now();
+ m_next_resize = ceph_clock_now();
+
+ m_cache_manager = std::make_unique<PriorityCache::Manager>(
+ m_cct, min, max, target, false);
+ }
+
+ ~CacheManagerHandler() {
+ std::lock_guard locker{m_lock};
+
+ ceph_assert(m_caches.empty());
+ }
+
+ void register_cache(const std::string &cache_name,
+ uint64_t min_size, uint64_t max_size,
+ journal::CacheRebalanceHandler* handler) override {
+ if (!m_cache_manager) {
+ handler->handle_cache_rebalanced(max_size);
+ return;
+ }
+
+ dout(20) << cache_name << " min_size=" << min_size << " max_size="
+ << max_size << " handler=" << handler << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ auto p = m_caches.insert(
+ {cache_name, {cache_name, min_size, max_size, handler}});
+ ceph_assert(p.second == true);
+
+ m_cache_manager->insert(cache_name, p.first->second.pri_cache, false);
+ m_next_balance = ceph_clock_now();
+ }
+
+ void unregister_cache(const std::string &cache_name) override {
+ if (!m_cache_manager) {
+ return;
+ }
+
+ dout(20) << cache_name << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ auto it = m_caches.find(cache_name);
+ ceph_assert(it != m_caches.end());
+
+ m_cache_manager->erase(cache_name);
+ m_caches.erase(it);
+ m_next_balance = ceph_clock_now();
+ }
+
+ void run_cache_manager() {
+ if (!m_cache_manager) {
+ return;
+ }
+
+ std::lock_guard locker{m_lock};
+
+ // Before we trim, check and see if it's time to rebalance/resize.
+ auto autotune_interval = m_cct->_conf.get_val<double>(
+ "rbd_mirror_memory_cache_autotune_interval");
+ auto resize_interval = m_cct->_conf.get_val<double>(
+ "rbd_mirror_memory_cache_resize_interval");
+
+ utime_t now = ceph_clock_now();
+
+ if (autotune_interval > 0 && m_next_balance <= now) {
+ dout(20) << "balance" << dendl;
+ m_cache_manager->balance();
+
+ for (auto &it : m_caches) {
+ auto pri_cache = static_cast<PriCache *>(it.second.pri_cache.get());
+ auto new_cache_bytes = pri_cache->get_cache_bytes();
+ it.second.handler->handle_cache_rebalanced(new_cache_bytes);
+ pri_cache->prioritize();
+ }
+
+ m_next_balance = ceph_clock_now();
+ m_next_balance += autotune_interval;
+ }
+
+ if (resize_interval > 0 && m_next_resize < now) {
+ if (ceph_using_tcmalloc()) {
+ dout(20) << "tune memory" << dendl;
+ m_cache_manager->tune_memory();
+ }
+
+ m_next_resize = ceph_clock_now();
+ m_next_resize += resize_interval;
+ }
+ }
+
+private:
+ struct Cache {
+ std::shared_ptr<PriorityCache::PriCache> pri_cache;
+ journal::CacheRebalanceHandler *handler;
+
+ Cache(const std::string name, uint64_t min_size, uint64_t max_size,
+ journal::CacheRebalanceHandler *handler)
+ : pri_cache(new PriCache(name, min_size, max_size)), handler(handler) {
+ }
+ };
+
+ CephContext *m_cct;
+
+ mutable ceph::mutex m_lock =
+ ceph::make_mutex("rbd::mirror::CacheManagerHandler");
+ std::unique_ptr<PriorityCache::Manager> m_cache_manager;
+ std::map<std::string, Cache> m_caches;
+
+ utime_t m_next_balance;
+ utime_t m_next_resize;
+};
+
+Mirror::Mirror(CephContext *cct, const std::vector<const char*> &args) :
+ m_cct(cct),
+ m_args(args),
+ m_local(new librados::Rados()),
+ m_cache_manager_handler(new CacheManagerHandler(cct)),
+ m_pool_meta_cache(new PoolMetaCache(cct)),
+ m_asok_hook(new MirrorAdminSocketHook(cct, this)) {
+}
+
+Mirror::~Mirror()
+{
+ delete m_asok_hook;
+}
+
+void Mirror::handle_signal(int signum)
+{
+ dout(20) << signum << dendl;
+
+ std::lock_guard l{m_lock};
+
+ switch (signum) {
+ case SIGHUP:
+ for (auto &it : m_pool_replayers) {
+ it.second->reopen_logs();
+ }
+ g_ceph_context->reopen_logs();
+ break;
+
+ case SIGINT:
+ case SIGTERM:
+ m_stopping = true;
+ m_cond.notify_all();
+ break;
+
+ default:
+ ceph_abort_msgf("unexpected signal %d", signum);
+ }
+}
+
+int Mirror::init()
+{
+ int r = m_local->init_with_context(m_cct);
+ if (r < 0) {
+ derr << "could not initialize rados handle" << dendl;
+ return r;
+ }
+
+ r = m_local->connect();
+ if (r < 0) {
+ derr << "error connecting to local cluster" << dendl;
+ return r;
+ }
+
+ m_threads = &(m_cct->lookup_or_create_singleton_object<
+ Threads<librbd::ImageCtx>>("rbd_mirror::threads", false, m_local));
+ m_service_daemon.reset(new ServiceDaemon<>(m_cct, m_local, m_threads));
+
+ r = m_service_daemon->init();
+ if (r < 0) {
+ derr << "error registering service daemon: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ m_local_cluster_watcher.reset(new ClusterWatcher(m_local, m_lock,
+ m_service_daemon.get()));
+ return r;
+}
+
+void Mirror::run()
+{
+ dout(20) << "enter" << dendl;
+
+ utime_t next_refresh_pools = ceph_clock_now();
+
+ while (!m_stopping) {
+ utime_t now = ceph_clock_now();
+ bool refresh_pools = next_refresh_pools <= now;
+ if (refresh_pools) {
+ m_local_cluster_watcher->refresh_pools();
+ next_refresh_pools = ceph_clock_now();
+ next_refresh_pools += m_cct->_conf.get_val<uint64_t>(
+ "rbd_mirror_pool_replayers_refresh_interval");
+ }
+ std::unique_lock l{m_lock};
+ if (!m_manual_stop) {
+ if (refresh_pools) {
+ update_pool_replayers(m_local_cluster_watcher->get_pool_peers(),
+ m_local_cluster_watcher->get_site_name());
+ }
+ m_cache_manager_handler->run_cache_manager();
+ }
+ m_cond.wait_for(l, 1s);
+ }
+
+ // stop all pool replayers in parallel
+ std::lock_guard locker{m_lock};
+ for (auto &pool_replayer : m_pool_replayers) {
+ pool_replayer.second->stop(false);
+ }
+ dout(20) << "return" << dendl;
+}
+
+void Mirror::print_status(Formatter *f)
+{
+ dout(20) << "enter" << dendl;
+
+ std::lock_guard l{m_lock};
+
+ if (m_stopping) {
+ return;
+ }
+
+ f->open_object_section("mirror_status");
+ f->open_array_section("pool_replayers");
+ for (auto &pool_replayer : m_pool_replayers) {
+ pool_replayer.second->print_status(f);
+ }
+ f->close_section();
+ f->close_section();
+}
+
+void Mirror::start()
+{
+ dout(20) << "enter" << dendl;
+ std::lock_guard l{m_lock};
+
+ if (m_stopping) {
+ return;
+ }
+
+ m_manual_stop = false;
+
+ for (auto &pool_replayer : m_pool_replayers) {
+ pool_replayer.second->start();
+ }
+}
+
+void Mirror::stop()
+{
+ dout(20) << "enter" << dendl;
+ std::lock_guard l{m_lock};
+
+ if (m_stopping) {
+ return;
+ }
+
+ m_manual_stop = true;
+
+ for (auto &pool_replayer : m_pool_replayers) {
+ pool_replayer.second->stop(true);
+ }
+}
+
+void Mirror::restart()
+{
+ dout(20) << "enter" << dendl;
+ std::lock_guard l{m_lock};
+
+ if (m_stopping) {
+ return;
+ }
+
+ m_manual_stop = false;
+
+ for (auto &pool_replayer : m_pool_replayers) {
+ pool_replayer.second->restart();
+ }
+}
+
+void Mirror::flush()
+{
+ dout(20) << "enter" << dendl;
+ std::lock_guard l{m_lock};
+
+ if (m_stopping || m_manual_stop) {
+ return;
+ }
+
+ for (auto &pool_replayer : m_pool_replayers) {
+ pool_replayer.second->flush();
+ }
+}
+
+void Mirror::release_leader()
+{
+ dout(20) << "enter" << dendl;
+ std::lock_guard l{m_lock};
+
+ if (m_stopping) {
+ return;
+ }
+
+ for (auto &pool_replayer : m_pool_replayers) {
+ pool_replayer.second->release_leader();
+ }
+}
+
+void Mirror::update_pool_replayers(const PoolPeers &pool_peers,
+ const std::string& site_name)
+{
+ dout(20) << "enter" << dendl;
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ // remove stale pool replayers before creating new pool replayers
+ for (auto it = m_pool_replayers.begin(); it != m_pool_replayers.end();) {
+ auto &peer = it->first.second;
+ auto pool_peer_it = pool_peers.find(it->first.first);
+ if (pool_peer_it == pool_peers.end() ||
+ pool_peer_it->second.find(peer) == pool_peer_it->second.end()) {
+ dout(20) << "removing pool replayer for " << peer << dendl;
+ // TODO: make async
+ it->second->shut_down();
+ it = m_pool_replayers.erase(it);
+ } else {
+ ++it;
+ }
+ }
+
+ for (auto &kv : pool_peers) {
+ for (auto &peer : kv.second) {
+ PoolPeer pool_peer(kv.first, peer);
+
+ auto pool_replayers_it = m_pool_replayers.find(pool_peer);
+ if (pool_replayers_it != m_pool_replayers.end()) {
+ auto& pool_replayer = pool_replayers_it->second;
+ if (!m_site_name.empty() && !site_name.empty() &&
+ m_site_name != site_name) {
+ dout(0) << "restarting pool replayer for " << peer << " due to "
+ << "updated site name" << dendl;
+ // TODO: make async
+ pool_replayer->shut_down();
+ pool_replayer->init(site_name);
+ } else if (pool_replayer->is_blocklisted()) {
+ derr << "restarting blocklisted pool replayer for " << peer << dendl;
+ // TODO: make async
+ pool_replayer->shut_down();
+ pool_replayer->init(site_name);
+ } else if (!pool_replayer->is_running()) {
+ derr << "restarting failed pool replayer for " << peer << dendl;
+ // TODO: make async
+ pool_replayer->shut_down();
+ pool_replayer->init(site_name);
+ }
+ } else {
+ dout(20) << "starting pool replayer for " << peer << dendl;
+ unique_ptr<PoolReplayer<>> pool_replayer(
+ new PoolReplayer<>(m_threads, m_service_daemon.get(),
+ m_cache_manager_handler.get(),
+ m_pool_meta_cache.get(), kv.first, peer,
+ m_args));
+
+ // TODO: make async
+ pool_replayer->init(site_name);
+ m_pool_replayers.emplace(pool_peer, std::move(pool_replayer));
+ }
+ }
+
+ // TODO currently only support a single peer
+ }
+
+ m_site_name = site_name;
+}
+
+} // namespace mirror
+} // namespace rbd
diff --git a/src/tools/rbd_mirror/Mirror.h b/src/tools/rbd_mirror/Mirror.h
new file mode 100644
index 000000000..f92a63b68
--- /dev/null
+++ b/src/tools/rbd_mirror/Mirror.h
@@ -0,0 +1,89 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_H
+#define CEPH_RBD_MIRROR_H
+
+#include "common/ceph_context.h"
+#include "common/ceph_mutex.h"
+#include "include/rados/librados.hpp"
+#include "include/utime.h"
+#include "ClusterWatcher.h"
+#include "PoolReplayer.h"
+#include "tools/rbd_mirror/Types.h"
+
+#include <set>
+#include <map>
+#include <memory>
+#include <atomic>
+
+namespace journal { class CacheManagerHandler; }
+
+namespace librbd { struct ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+
+template <typename> struct ServiceDaemon;
+template <typename> struct Threads;
+class CacheManagerHandler;
+class MirrorAdminSocketHook;
+class PoolMetaCache;
+
+/**
+ * Contains the main loop and overall state for rbd-mirror.
+ *
+ * Sets up mirroring, and coordinates between noticing config
+ * changes and applying them.
+ */
+class Mirror {
+public:
+ Mirror(CephContext *cct, const std::vector<const char*> &args);
+ Mirror(const Mirror&) = delete;
+ Mirror& operator=(const Mirror&) = delete;
+ ~Mirror();
+
+ int init();
+ void run();
+ void handle_signal(int signum);
+
+ void print_status(Formatter *f);
+ void start();
+ void stop();
+ void restart();
+ void flush();
+ void release_leader();
+
+private:
+ typedef ClusterWatcher::PoolPeers PoolPeers;
+ typedef std::pair<int64_t, PeerSpec> PoolPeer;
+
+ void update_pool_replayers(const PoolPeers &pool_peers,
+ const std::string& site_name);
+
+ void create_cache_manager();
+ void run_cache_manager(utime_t *next_run_interval);
+
+ CephContext *m_cct;
+ std::vector<const char*> m_args;
+ Threads<librbd::ImageCtx> *m_threads = nullptr;
+ ceph::mutex m_lock = ceph::make_mutex("rbd::mirror::Mirror");
+ ceph::condition_variable m_cond;
+ RadosRef m_local;
+ std::unique_ptr<ServiceDaemon<librbd::ImageCtx>> m_service_daemon;
+
+ // monitor local cluster for config changes in peers
+ std::unique_ptr<ClusterWatcher> m_local_cluster_watcher;
+ std::unique_ptr<CacheManagerHandler> m_cache_manager_handler;
+ std::unique_ptr<PoolMetaCache> m_pool_meta_cache;
+ std::map<PoolPeer, std::unique_ptr<PoolReplayer<>>> m_pool_replayers;
+ std::atomic<bool> m_stopping = { false };
+ bool m_manual_stop = false;
+ MirrorAdminSocketHook *m_asok_hook;
+ std::string m_site_name;
+};
+
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_H
diff --git a/src/tools/rbd_mirror/MirrorStatusUpdater.cc b/src/tools/rbd_mirror/MirrorStatusUpdater.cc
new file mode 100644
index 000000000..257cb1df2
--- /dev/null
+++ b/src/tools/rbd_mirror/MirrorStatusUpdater.cc
@@ -0,0 +1,397 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd_mirror/MirrorStatusUpdater.h"
+#include "include/Context.h"
+#include "include/stringify.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "common/Timer.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/Utils.h"
+#include "librbd/asio/ContextWQ.h"
+#include "tools/rbd_mirror/MirrorStatusWatcher.h"
+#include "tools/rbd_mirror/Threads.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::MirrorStatusUpdater " << this \
+ << " " << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+
+static const double UPDATE_INTERVAL_SECONDS = 30;
+static const uint32_t MAX_UPDATES_PER_OP = 100;
+
+using librbd::util::create_context_callback;
+using librbd::util::create_rados_callback;
+
+template <typename I>
+MirrorStatusUpdater<I>::MirrorStatusUpdater(
+ librados::IoCtx& io_ctx, Threads<I> *threads,
+ const std::string& local_mirror_uuid)
+ : m_io_ctx(io_ctx), m_threads(threads),
+ m_local_mirror_uuid(local_mirror_uuid),
+ m_lock(ceph::make_mutex("rbd::mirror::MirrorStatusUpdater " +
+ stringify(m_io_ctx.get_id()))) {
+ dout(10) << "local_mirror_uuid=" << local_mirror_uuid << ", "
+ << "pool_id=" << m_io_ctx.get_id() << dendl;
+}
+
+template <typename I>
+MirrorStatusUpdater<I>::~MirrorStatusUpdater() {
+ ceph_assert(!m_initialized);
+ delete m_mirror_status_watcher;
+}
+
+template <typename I>
+void MirrorStatusUpdater<I>::init(Context* on_finish) {
+ dout(10) << dendl;
+
+ ceph_assert(!m_initialized);
+ m_initialized = true;
+
+ {
+ std::lock_guard timer_locker{m_threads->timer_lock};
+ schedule_timer_task();
+ }
+
+ init_mirror_status_watcher(on_finish);
+}
+
+template <typename I>
+void MirrorStatusUpdater<I>::init_mirror_status_watcher(Context* on_finish) {
+ dout(10) << dendl;
+
+ auto ctx = new LambdaContext([this, on_finish](int r) {
+ handle_init_mirror_status_watcher(r, on_finish);
+ });
+ m_mirror_status_watcher = MirrorStatusWatcher<I>::create(
+ m_io_ctx, m_threads->work_queue);
+ m_mirror_status_watcher->init(ctx);
+}
+
+template <typename I>
+void MirrorStatusUpdater<I>::handle_init_mirror_status_watcher(
+ int r, Context* on_finish) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to init mirror status watcher: " << cpp_strerror(r)
+ << dendl;
+
+ delete m_mirror_status_watcher;
+ m_mirror_status_watcher = nullptr;
+
+ on_finish = new LambdaContext([r, on_finish](int) {
+ on_finish->complete(r);
+ });
+ shut_down(on_finish);
+ return;
+ }
+
+ m_threads->work_queue->queue(on_finish, 0);
+}
+
+template <typename I>
+void MirrorStatusUpdater<I>::shut_down(Context* on_finish) {
+ dout(10) << dendl;
+
+ {
+ std::lock_guard timer_locker{m_threads->timer_lock};
+ ceph_assert(m_timer_task != nullptr);
+ m_threads->timer->cancel_event(m_timer_task);
+ }
+
+ {
+ std::unique_lock locker(m_lock);
+ ceph_assert(m_initialized);
+ m_initialized = false;
+ }
+
+ shut_down_mirror_status_watcher(on_finish);
+}
+
+template <typename I>
+void MirrorStatusUpdater<I>::shut_down_mirror_status_watcher(
+ Context* on_finish) {
+ if (m_mirror_status_watcher == nullptr) {
+ finalize_shutdown(0, on_finish);
+ return;
+ }
+
+ dout(10) << dendl;
+
+ auto ctx = new LambdaContext([this, on_finish](int r) {
+ handle_shut_down_mirror_status_watcher(r, on_finish);
+ });
+ m_mirror_status_watcher->shut_down(ctx);
+}
+
+template <typename I>
+void MirrorStatusUpdater<I>::handle_shut_down_mirror_status_watcher(
+ int r, Context* on_finish) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to shut down mirror status watcher: " << cpp_strerror(r)
+ << dendl;
+ }
+
+ finalize_shutdown(r, on_finish);
+}
+
+template <typename I>
+void MirrorStatusUpdater<I>::finalize_shutdown(int r, Context* on_finish) {
+ dout(10) << dendl;
+
+ {
+ std::unique_lock locker(m_lock);
+ if (m_update_in_progress) {
+ if (r < 0) {
+ on_finish = new LambdaContext([r, on_finish](int) {
+ on_finish->complete(r);
+ });
+ }
+
+ m_update_on_finish_ctxs.push_back(on_finish);
+ return;
+ }
+ }
+
+ m_threads->work_queue->queue(on_finish, r);
+}
+
+template <typename I>
+bool MirrorStatusUpdater<I>::exists(const std::string& global_image_id) {
+ dout(15) << "global_image_id=" << global_image_id << dendl;
+
+ std::unique_lock locker(m_lock);
+ return (m_global_image_status.count(global_image_id) > 0);
+}
+
+template <typename I>
+void MirrorStatusUpdater<I>::set_mirror_image_status(
+ const std::string& global_image_id,
+ const cls::rbd::MirrorImageSiteStatus& mirror_image_site_status,
+ bool immediate_update) {
+ dout(15) << "global_image_id=" << global_image_id << ", "
+ << "mirror_image_site_status=" << mirror_image_site_status << dendl;
+
+ std::unique_lock locker(m_lock);
+
+ m_global_image_status[global_image_id] = mirror_image_site_status;
+ if (immediate_update) {
+ m_update_global_image_ids.insert(global_image_id);
+ queue_update_task(std::move(locker));
+ }
+}
+
+template <typename I>
+void MirrorStatusUpdater<I>::remove_refresh_mirror_image_status(
+ const std::string& global_image_id,
+ Context* on_finish) {
+ if (try_remove_mirror_image_status(global_image_id, false, false,
+ on_finish)) {
+ m_threads->work_queue->queue(on_finish, 0);
+ }
+}
+
+template <typename I>
+void MirrorStatusUpdater<I>::remove_mirror_image_status(
+ const std::string& global_image_id, bool immediate_update,
+ Context* on_finish) {
+ if (try_remove_mirror_image_status(global_image_id, true, immediate_update,
+ on_finish)) {
+ m_threads->work_queue->queue(on_finish, 0);
+ }
+}
+
+template <typename I>
+bool MirrorStatusUpdater<I>::try_remove_mirror_image_status(
+ const std::string& global_image_id, bool queue_update,
+ bool immediate_update, Context* on_finish) {
+ dout(15) << "global_image_id=" << global_image_id << ", "
+ << "queue_update=" << queue_update << ", "
+ << "immediate_update=" << immediate_update << dendl;
+
+ std::unique_lock locker(m_lock);
+ if ((m_update_in_flight &&
+ m_updating_global_image_ids.count(global_image_id) > 0) ||
+ ((m_update_in_progress || m_update_requested) &&
+ m_update_global_image_ids.count(global_image_id) > 0)) {
+ // if update is scheduled/in-progress, wait for it to complete
+ on_finish = new LambdaContext(
+ [this, global_image_id, queue_update, immediate_update,
+ on_finish](int r) {
+ if (try_remove_mirror_image_status(global_image_id, queue_update,
+ immediate_update, on_finish)) {
+ on_finish->complete(0);
+ }
+ });
+ m_update_on_finish_ctxs.push_back(on_finish);
+ return false;
+ }
+
+ m_global_image_status.erase(global_image_id);
+ if (queue_update) {
+ m_update_global_image_ids.insert(global_image_id);
+ if (immediate_update) {
+ queue_update_task(std::move(locker));
+ }
+ }
+
+ return true;
+}
+
+template <typename I>
+void MirrorStatusUpdater<I>::schedule_timer_task() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock));
+ ceph_assert(m_timer_task == nullptr);
+ m_timer_task = create_context_callback<
+ MirrorStatusUpdater<I>,
+ &MirrorStatusUpdater<I>::handle_timer_task>(this);
+ m_threads->timer->add_event_after(UPDATE_INTERVAL_SECONDS, m_timer_task);
+}
+
+template <typename I>
+void MirrorStatusUpdater<I>::handle_timer_task(int r) {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock));
+ ceph_assert(m_timer_task != nullptr);
+ m_timer_task = nullptr;
+ schedule_timer_task();
+
+ std::unique_lock locker(m_lock);
+ for (auto& pair : m_global_image_status) {
+ m_update_global_image_ids.insert(pair.first);
+ }
+
+ queue_update_task(std::move(locker));
+}
+
+template <typename I>
+void MirrorStatusUpdater<I>::queue_update_task(
+ std::unique_lock<ceph::mutex>&& locker) {
+ if (!m_initialized) {
+ return;
+ }
+
+ if (m_update_in_progress) {
+ if (m_update_in_flight) {
+ dout(10) << "deferring update due to in-flight ops" << dendl;
+ m_update_requested = true;
+ }
+ return;
+ }
+
+ m_update_in_progress = true;
+ ceph_assert(!m_update_in_flight);
+ ceph_assert(!m_update_requested);
+ locker.unlock();
+
+ dout(10) << dendl;
+ auto ctx = create_context_callback<
+ MirrorStatusUpdater<I>,
+ &MirrorStatusUpdater<I>::update_task>(this);
+ m_threads->work_queue->queue(ctx);
+}
+
+template <typename I>
+void MirrorStatusUpdater<I>::update_task(int r) {
+ dout(10) << dendl;
+
+ std::unique_lock locker(m_lock);
+ ceph_assert(m_update_in_progress);
+ ceph_assert(!m_update_in_flight);
+ m_update_in_flight = true;
+
+ std::swap(m_updating_global_image_ids, m_update_global_image_ids);
+ auto updating_global_image_ids = m_updating_global_image_ids;
+ auto global_image_status = m_global_image_status;
+ locker.unlock();
+
+ Context* ctx = create_context_callback<
+ MirrorStatusUpdater<I>,
+ &MirrorStatusUpdater<I>::handle_update_task>(this);
+ if (updating_global_image_ids.empty()) {
+ ctx->complete(0);
+ return;
+ }
+
+ auto gather = new C_Gather(g_ceph_context, ctx);
+
+ auto it = updating_global_image_ids.begin();
+ while (it != updating_global_image_ids.end()) {
+ librados::ObjectWriteOperation op;
+ uint32_t op_count = 0;
+
+ while (it != updating_global_image_ids.end() &&
+ op_count < MAX_UPDATES_PER_OP) {
+ auto& global_image_id = *it;
+ ++it;
+
+ auto status_it = global_image_status.find(global_image_id);
+ if (status_it == global_image_status.end()) {
+ librbd::cls_client::mirror_image_status_remove(&op, global_image_id);
+ ++op_count;
+ continue;
+ }
+
+ status_it->second.mirror_uuid = m_local_mirror_uuid;
+ librbd::cls_client::mirror_image_status_set(&op, global_image_id,
+ status_it->second);
+ ++op_count;
+ }
+
+ auto aio_comp = create_rados_callback(gather->new_sub());
+ int r = m_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op);
+ ceph_assert(r == 0);
+ aio_comp->release();
+ }
+
+ gather->activate();
+}
+
+template <typename I>
+void MirrorStatusUpdater<I>::handle_update_task(int r) {
+ dout(10) << dendl;
+ if (r < 0) {
+ derr << "failed to update mirror image statuses: " << cpp_strerror(r)
+ << dendl;
+ }
+
+ std::unique_lock locker(m_lock);
+
+ Contexts on_finish_ctxs;
+ std::swap(on_finish_ctxs, m_update_on_finish_ctxs);
+
+ ceph_assert(m_update_in_progress);
+ m_update_in_progress = false;
+
+ ceph_assert(m_update_in_flight);
+ m_update_in_flight = false;
+
+ m_updating_global_image_ids.clear();
+
+ if (m_update_requested) {
+ m_update_requested = false;
+ queue_update_task(std::move(locker));
+ } else {
+ locker.unlock();
+ }
+
+ for (auto on_finish : on_finish_ctxs) {
+ on_finish->complete(0);
+ }
+}
+
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::MirrorStatusUpdater<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/MirrorStatusUpdater.h b/src/tools/rbd_mirror/MirrorStatusUpdater.h
new file mode 100644
index 000000000..783b818fc
--- /dev/null
+++ b/src/tools/rbd_mirror/MirrorStatusUpdater.h
@@ -0,0 +1,119 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_MIRROR_STATUS_UPDATER_H
+#define CEPH_RBD_MIRROR_MIRROR_STATUS_UPDATER_H
+
+#include "include/rados/librados.hpp"
+#include "common/ceph_mutex.h"
+#include "cls/rbd/cls_rbd_types.h"
+#include <list>
+#include <map>
+#include <set>
+#include <string>
+
+struct Context;
+namespace librbd { class ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+
+template <typename> struct MirrorStatusWatcher;
+template <typename> struct Threads;
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class MirrorStatusUpdater {
+public:
+
+ static MirrorStatusUpdater* create(librados::IoCtx& io_ctx,
+ Threads<ImageCtxT> *threads,
+ const std::string& local_mirror_uuid) {
+ return new MirrorStatusUpdater(io_ctx, threads, local_mirror_uuid);
+ }
+
+ MirrorStatusUpdater(librados::IoCtx& io_ctx, Threads<ImageCtxT> *threads,
+ const std::string& local_mirror_uuid);
+ ~MirrorStatusUpdater();
+
+ void init(Context* on_finish);
+ void shut_down(Context* on_finish);
+
+ bool exists(const std::string& global_image_id);
+ void set_mirror_image_status(
+ const std::string& global_image_id,
+ const cls::rbd::MirrorImageSiteStatus& mirror_image_site_status,
+ bool immediate_update);
+ void remove_mirror_image_status(const std::string& global_image_id,
+ bool immediate_update, Context* on_finish);
+ void remove_refresh_mirror_image_status(const std::string& global_image_id,
+ Context* on_finish);
+
+private:
+ /**
+ * @verbatim
+ *
+ * <uninitialized> <----------------------\
+ * | (init) ^ (error) |
+ * v * |
+ * INIT_STATUS_WATCHER * * * * * |
+ * | |
+ * | SHUT_DOWN_STATUS_WATCHER
+ * | ^
+ * | |
+ * | (shutdown) |
+ * <initialized> -------------------------/
+ *
+ * @endverbatim
+ */
+ typedef std::list<Context*> Contexts;
+ typedef std::set<std::string> GlobalImageIds;
+ typedef std::map<std::string, cls::rbd::MirrorImageSiteStatus>
+ GlobalImageStatus;
+
+ librados::IoCtx m_io_ctx;
+ Threads<ImageCtxT>* m_threads;
+ std::string m_local_mirror_uuid;
+
+ Context* m_timer_task = nullptr;
+
+ ceph::mutex m_lock;
+
+ bool m_initialized = false;
+
+ MirrorStatusWatcher<ImageCtxT>* m_mirror_status_watcher = nullptr;
+
+ GlobalImageIds m_update_global_image_ids;
+ GlobalImageStatus m_global_image_status;
+
+ bool m_update_in_progress = false;
+ bool m_update_in_flight = false;
+ bool m_update_requested = false;
+ Contexts m_update_on_finish_ctxs;
+ GlobalImageIds m_updating_global_image_ids;
+
+ bool try_remove_mirror_image_status(const std::string& global_image_id,
+ bool queue_update, bool immediate_update,
+ Context* on_finish);
+
+ void init_mirror_status_watcher(Context* on_finish);
+ void handle_init_mirror_status_watcher(int r, Context* on_finish);
+
+ void shut_down_mirror_status_watcher(Context* on_finish);
+ void handle_shut_down_mirror_status_watcher(int r, Context* on_finish);
+ void finalize_shutdown(int r, Context* on_finish);
+
+ void schedule_timer_task();
+ void handle_timer_task(int r);
+
+ void queue_update_task(std::unique_lock<ceph::mutex>&& locker);
+ void update_task(int r);
+ void handle_update_task(int r);
+
+};
+
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::MirrorStatusUpdater<librbd::ImageCtx>;
+
+#endif // CEPH_RBD_MIRROR_MIRROR_STATUS_UPDATER_H
diff --git a/src/tools/rbd_mirror/MirrorStatusWatcher.cc b/src/tools/rbd_mirror/MirrorStatusWatcher.cc
new file mode 100644
index 000000000..3e1564c5b
--- /dev/null
+++ b/src/tools/rbd_mirror/MirrorStatusWatcher.cc
@@ -0,0 +1,74 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "MirrorStatusWatcher.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "librbd/Utils.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::MirrorStatusWatcher: " \
+ << this << " " << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+
+using librbd::util::create_rados_callback;
+
+template <typename I>
+MirrorStatusWatcher<I>::MirrorStatusWatcher(librados::IoCtx &io_ctx,
+ librbd::asio::ContextWQ *work_queue)
+ : Watcher(io_ctx, work_queue, RBD_MIRRORING) {
+}
+
+template <typename I>
+MirrorStatusWatcher<I>::~MirrorStatusWatcher() {
+}
+
+template <typename I>
+void MirrorStatusWatcher<I>::init(Context *on_finish) {
+ dout(20) << dendl;
+
+ on_finish = new LambdaContext(
+ [this, on_finish] (int r) {
+ if (r < 0) {
+ derr << "error removing down statuses: " << cpp_strerror(r) << dendl;
+ on_finish->complete(r);
+ return;
+ }
+ register_watch(on_finish);
+ });
+
+ librados::ObjectWriteOperation op;
+ librbd::cls_client::mirror_image_status_remove_down(&op);
+ librados::AioCompletion *aio_comp = create_rados_callback(on_finish);
+
+ int r = m_ioctx.aio_operate(RBD_MIRRORING, aio_comp, &op);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void MirrorStatusWatcher<I>::shut_down(Context *on_finish) {
+ dout(20) << dendl;
+
+ unregister_watch(on_finish);
+}
+
+template <typename I>
+void MirrorStatusWatcher<I>::handle_notify(uint64_t notify_id, uint64_t handle,
+ uint64_t notifier_id,
+ bufferlist &bl) {
+ dout(20) << dendl;
+
+ bufferlist out;
+ acknowledge_notify(notify_id, handle, out);
+}
+
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::MirrorStatusWatcher<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/MirrorStatusWatcher.h b/src/tools/rbd_mirror/MirrorStatusWatcher.h
new file mode 100644
index 000000000..3335e9e63
--- /dev/null
+++ b/src/tools/rbd_mirror/MirrorStatusWatcher.h
@@ -0,0 +1,43 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_MIRROR_STATUS_WATCHER_H
+#define CEPH_RBD_MIRROR_MIRROR_STATUS_WATCHER_H
+
+#include "librbd/Watcher.h"
+
+namespace librbd {
+class ImageCtx;
+namespace asio { struct ContextWQ; }
+} // namespace librbd
+
+namespace rbd {
+namespace mirror {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class MirrorStatusWatcher : protected librbd::Watcher {
+public:
+ static MirrorStatusWatcher *create(librados::IoCtx &io_ctx,
+ librbd::asio::ContextWQ *work_queue) {
+ return new MirrorStatusWatcher(io_ctx, work_queue);
+ }
+ void destroy() {
+ delete this;
+ }
+
+ MirrorStatusWatcher(librados::IoCtx &io_ctx,
+ librbd::asio::ContextWQ *work_queue);
+ ~MirrorStatusWatcher() override;
+
+ void init(Context *on_finish);
+ void shut_down(Context *on_finish);
+
+protected:
+ void handle_notify(uint64_t notify_id, uint64_t handle,
+ uint64_t notifier_id, bufferlist &bl) override;
+};
+
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_MIRROR_STATUS_WATCHER_H
diff --git a/src/tools/rbd_mirror/NamespaceReplayer.cc b/src/tools/rbd_mirror/NamespaceReplayer.cc
new file mode 100644
index 000000000..d305d8472
--- /dev/null
+++ b/src/tools/rbd_mirror/NamespaceReplayer.cc
@@ -0,0 +1,862 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "NamespaceReplayer.h"
+#include "common/Formatter.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "librbd/Utils.h"
+#include "librbd/api/Config.h"
+#include "librbd/api/Mirror.h"
+#include "librbd/asio/ContextWQ.h"
+#include "ServiceDaemon.h"
+#include "Threads.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::NamespaceReplayer: " \
+ << this << " " << __func__ << ": "
+
+using librbd::util::create_async_context_callback;
+using librbd::util::create_context_callback;
+
+namespace rbd {
+namespace mirror {
+
+using ::operator<<;
+
+namespace {
+
+const std::string SERVICE_DAEMON_LOCAL_COUNT_KEY("image_local_count");
+const std::string SERVICE_DAEMON_REMOTE_COUNT_KEY("image_remote_count");
+
+} // anonymous namespace
+
+template <typename I>
+NamespaceReplayer<I>::NamespaceReplayer(
+ const std::string &name,
+ librados::IoCtx &local_io_ctx, librados::IoCtx &remote_io_ctx,
+ const std::string &local_mirror_uuid,
+ const std::string& local_mirror_peer_uuid,
+ const RemotePoolMeta& remote_pool_meta,
+ Threads<I> *threads,
+ Throttler<I> *image_sync_throttler,
+ Throttler<I> *image_deletion_throttler,
+ ServiceDaemon<I> *service_daemon,
+ journal::CacheManagerHandler *cache_manager_handler,
+ PoolMetaCache* pool_meta_cache) :
+ m_namespace_name(name),
+ m_local_mirror_uuid(local_mirror_uuid),
+ m_local_mirror_peer_uuid(local_mirror_peer_uuid),
+ m_remote_pool_meta(remote_pool_meta),
+ m_threads(threads), m_image_sync_throttler(image_sync_throttler),
+ m_image_deletion_throttler(image_deletion_throttler),
+ m_service_daemon(service_daemon),
+ m_cache_manager_handler(cache_manager_handler),
+ m_pool_meta_cache(pool_meta_cache),
+ m_lock(ceph::make_mutex(librbd::util::unique_lock_name(
+ "rbd::mirror::NamespaceReplayer " + name, this))),
+ m_local_pool_watcher_listener(this, true),
+ m_remote_pool_watcher_listener(this, false),
+ m_image_map_listener(this) {
+ dout(10) << name << dendl;
+
+ m_local_io_ctx.dup(local_io_ctx);
+ m_local_io_ctx.set_namespace(name);
+ m_remote_io_ctx.dup(remote_io_ctx);
+ m_remote_io_ctx.set_namespace(name);
+}
+
+template <typename I>
+bool NamespaceReplayer<I>::is_blocklisted() const {
+ std::lock_guard locker{m_lock};
+ return m_instance_replayer->is_blocklisted() ||
+ (m_local_pool_watcher &&
+ m_local_pool_watcher->is_blocklisted()) ||
+ (m_remote_pool_watcher &&
+ m_remote_pool_watcher->is_blocklisted());
+}
+
+template <typename I>
+void NamespaceReplayer<I>::init(Context *on_finish) {
+ dout(20) << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ ceph_assert(m_on_finish == nullptr);
+ m_on_finish = on_finish;
+
+ init_local_status_updater();
+}
+
+
+template <typename I>
+void NamespaceReplayer<I>::shut_down(Context *on_finish) {
+ dout(20) << dendl;
+
+ {
+ std::lock_guard locker{m_lock};
+
+ ceph_assert(m_on_finish == nullptr);
+ m_on_finish = on_finish;
+
+ if (!m_image_map) {
+ stop_instance_replayer();
+ return;
+ }
+ }
+
+ auto ctx = new LambdaContext(
+ [this] (int r) {
+ std::lock_guard locker{m_lock};
+ stop_instance_replayer();
+ });
+ handle_release_leader(ctx);
+}
+
+template <typename I>
+void NamespaceReplayer<I>::print_status(Formatter *f)
+{
+ dout(20) << dendl;
+
+ ceph_assert(f);
+
+ std::lock_guard locker{m_lock};
+
+ m_instance_replayer->print_status(f);
+
+ if (m_image_deleter) {
+ f->open_object_section("image_deleter");
+ m_image_deleter->print_status(f);
+ f->close_section();
+ }
+}
+
+template <typename I>
+void NamespaceReplayer<I>::start()
+{
+ dout(20) << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ m_instance_replayer->start();
+}
+
+template <typename I>
+void NamespaceReplayer<I>::stop()
+{
+ dout(20) << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ m_instance_replayer->stop();
+}
+
+template <typename I>
+void NamespaceReplayer<I>::restart()
+{
+ dout(20) << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ m_instance_replayer->restart();
+}
+
+template <typename I>
+void NamespaceReplayer<I>::flush()
+{
+ dout(20) << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ m_instance_replayer->flush();
+}
+
+template <typename I>
+void NamespaceReplayer<I>::handle_update(const std::string &mirror_uuid,
+ ImageIds &&added_image_ids,
+ ImageIds &&removed_image_ids) {
+ std::lock_guard locker{m_lock};
+
+ if (!m_image_map) {
+ dout(20) << "not leader" << dendl;
+ return;
+ }
+
+ dout(10) << "mirror_uuid=" << mirror_uuid << ", "
+ << "added_count=" << added_image_ids.size() << ", "
+ << "removed_count=" << removed_image_ids.size() << dendl;
+
+ m_service_daemon->add_or_update_namespace_attribute(
+ m_local_io_ctx.get_id(), m_local_io_ctx.get_namespace(),
+ SERVICE_DAEMON_LOCAL_COUNT_KEY, m_local_pool_watcher->get_image_count());
+ if (m_remote_pool_watcher) {
+ m_service_daemon->add_or_update_namespace_attribute(
+ m_local_io_ctx.get_id(), m_local_io_ctx.get_namespace(),
+ SERVICE_DAEMON_REMOTE_COUNT_KEY,
+ m_remote_pool_watcher->get_image_count());
+ }
+
+ std::set<std::string> added_global_image_ids;
+ for (auto& image_id : added_image_ids) {
+ added_global_image_ids.insert(image_id.global_id);
+ }
+
+ std::set<std::string> removed_global_image_ids;
+ for (auto& image_id : removed_image_ids) {
+ removed_global_image_ids.insert(image_id.global_id);
+ }
+
+ m_image_map->update_images(mirror_uuid,
+ std::move(added_global_image_ids),
+ std::move(removed_global_image_ids));
+}
+
+template <typename I>
+void NamespaceReplayer<I>::handle_acquire_leader(Context *on_finish) {
+ dout(10) << dendl;
+
+ m_instance_watcher->handle_acquire_leader();
+
+ init_image_map(on_finish);
+}
+
+template <typename I>
+void NamespaceReplayer<I>::handle_release_leader(Context *on_finish) {
+ dout(10) << dendl;
+
+ m_instance_watcher->handle_release_leader();
+ shut_down_image_deleter(on_finish);
+}
+
+template <typename I>
+void NamespaceReplayer<I>::handle_update_leader(
+ const std::string &leader_instance_id) {
+ dout(10) << "leader_instance_id=" << leader_instance_id << dendl;
+
+ m_instance_watcher->handle_update_leader(leader_instance_id);
+}
+
+template <typename I>
+void NamespaceReplayer<I>::handle_instances_added(
+ const std::vector<std::string> &instance_ids) {
+ dout(10) << "instance_ids=" << instance_ids << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ if (!m_image_map) {
+ return;
+ }
+
+ m_image_map->update_instances_added(instance_ids);
+}
+
+template <typename I>
+void NamespaceReplayer<I>::handle_instances_removed(
+ const std::vector<std::string> &instance_ids) {
+ dout(10) << "instance_ids=" << instance_ids << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ if (!m_image_map) {
+ return;
+ }
+
+ m_image_map->update_instances_removed(instance_ids);
+}
+
+template <typename I>
+void NamespaceReplayer<I>::init_local_status_updater() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+ ceph_assert(!m_local_status_updater);
+
+ m_local_status_updater.reset(MirrorStatusUpdater<I>::create(
+ m_local_io_ctx, m_threads, ""));
+ auto ctx = create_context_callback<
+ NamespaceReplayer<I>,
+ &NamespaceReplayer<I>::handle_init_local_status_updater>(this);
+
+ m_local_status_updater->init(ctx);
+}
+
+template <typename I>
+void NamespaceReplayer<I>::handle_init_local_status_updater(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ if (r < 0) {
+ derr << "error initializing local mirror status updater: "
+ << cpp_strerror(r) << dendl;
+
+ m_local_status_updater.reset();
+ ceph_assert(m_on_finish != nullptr);
+ m_threads->work_queue->queue(m_on_finish, r);
+ m_on_finish = nullptr;
+ return;
+ }
+
+ init_remote_status_updater();
+}
+
+template <typename I>
+void NamespaceReplayer<I>::init_remote_status_updater() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+ ceph_assert(!m_remote_status_updater);
+
+ m_remote_status_updater.reset(MirrorStatusUpdater<I>::create(
+ m_remote_io_ctx, m_threads, m_local_mirror_uuid));
+ auto ctx = create_context_callback<
+ NamespaceReplayer<I>,
+ &NamespaceReplayer<I>::handle_init_remote_status_updater>(this);
+ m_remote_status_updater->init(ctx);
+}
+
+template <typename I>
+void NamespaceReplayer<I>::handle_init_remote_status_updater(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ if (r < 0) {
+ derr << "error initializing remote mirror status updater: "
+ << cpp_strerror(r) << dendl;
+
+ m_remote_status_updater.reset();
+ m_ret_val = r;
+ shut_down_local_status_updater();
+ return;
+ }
+
+ init_instance_replayer();
+}
+
+template <typename I>
+void NamespaceReplayer<I>::init_instance_replayer() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+ ceph_assert(!m_instance_replayer);
+
+ m_instance_replayer.reset(InstanceReplayer<I>::create(
+ m_local_io_ctx, m_local_mirror_uuid, m_threads, m_service_daemon,
+ m_local_status_updater.get(), m_cache_manager_handler,
+ m_pool_meta_cache));
+ auto ctx = create_context_callback<NamespaceReplayer<I>,
+ &NamespaceReplayer<I>::handle_init_instance_replayer>(this);
+
+ m_instance_replayer->init(ctx);
+}
+
+template <typename I>
+void NamespaceReplayer<I>::handle_init_instance_replayer(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ if (r < 0) {
+ derr << "error initializing instance replayer: " << cpp_strerror(r)
+ << dendl;
+
+ m_instance_replayer.reset();
+ m_ret_val = r;
+ shut_down_remote_status_updater();
+ return;
+ }
+
+ m_instance_replayer->add_peer({m_local_mirror_peer_uuid, m_remote_io_ctx,
+ m_remote_pool_meta,
+ m_remote_status_updater.get()});
+
+ init_instance_watcher();
+}
+
+template <typename I>
+void NamespaceReplayer<I>::init_instance_watcher() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+ ceph_assert(!m_instance_watcher);
+
+ m_instance_watcher.reset(InstanceWatcher<I>::create(
+ m_local_io_ctx, *m_threads->asio_engine, m_instance_replayer.get(),
+ m_image_sync_throttler));
+ auto ctx = create_context_callback<NamespaceReplayer<I>,
+ &NamespaceReplayer<I>::handle_init_instance_watcher>(this);
+
+ m_instance_watcher->init(ctx);
+}
+
+template <typename I>
+void NamespaceReplayer<I>::handle_init_instance_watcher(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ if (r < 0) {
+ derr << "error initializing instance watcher: " << cpp_strerror(r)
+ << dendl;
+
+ m_instance_watcher.reset();
+ m_ret_val = r;
+ shut_down_instance_replayer();
+ return;
+ }
+
+ ceph_assert(m_on_finish != nullptr);
+ m_threads->work_queue->queue(m_on_finish);
+ m_on_finish = nullptr;
+}
+
+template <typename I>
+void NamespaceReplayer<I>::stop_instance_replayer() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ Context *ctx = create_async_context_callback(
+ m_threads->work_queue, create_context_callback<NamespaceReplayer<I>,
+ &NamespaceReplayer<I>::handle_stop_instance_replayer>(this));
+
+ m_instance_replayer->stop(ctx);
+}
+
+template <typename I>
+void NamespaceReplayer<I>::handle_stop_instance_replayer(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "error stopping instance replayer: " << cpp_strerror(r) << dendl;
+ }
+
+ std::lock_guard locker{m_lock};
+
+ shut_down_instance_watcher();
+}
+
+template <typename I>
+void NamespaceReplayer<I>::shut_down_instance_watcher() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+ ceph_assert(m_instance_watcher);
+
+ Context *ctx = create_async_context_callback(
+ m_threads->work_queue, create_context_callback<NamespaceReplayer<I>,
+ &NamespaceReplayer<I>::handle_shut_down_instance_watcher>(this));
+
+ m_instance_watcher->shut_down(ctx);
+}
+
+template <typename I>
+void NamespaceReplayer<I>::handle_shut_down_instance_watcher(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "error shutting instance watcher down: " << cpp_strerror(r)
+ << dendl;
+ }
+
+ std::lock_guard locker{m_lock};
+
+ m_instance_watcher.reset();
+
+ shut_down_instance_replayer();
+}
+
+template <typename I>
+void NamespaceReplayer<I>::shut_down_instance_replayer() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+ ceph_assert(m_instance_replayer);
+
+ Context *ctx = create_async_context_callback(
+ m_threads->work_queue, create_context_callback<NamespaceReplayer<I>,
+ &NamespaceReplayer<I>::handle_shut_down_instance_replayer>(this));
+
+ m_instance_replayer->shut_down(ctx);
+}
+
+template <typename I>
+void NamespaceReplayer<I>::handle_shut_down_instance_replayer(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "error shutting instance replayer down: " << cpp_strerror(r)
+ << dendl;
+ }
+
+ std::lock_guard locker{m_lock};
+
+ m_instance_replayer.reset();
+
+ shut_down_remote_status_updater();
+}
+
+template <typename I>
+void NamespaceReplayer<I>::shut_down_remote_status_updater() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+ ceph_assert(m_remote_status_updater);
+
+ auto ctx = create_async_context_callback(
+ m_threads->work_queue, create_context_callback<
+ NamespaceReplayer<I>,
+ &NamespaceReplayer<I>::handle_shut_down_remote_status_updater>(this));
+ m_remote_status_updater->shut_down(ctx);
+}
+
+template <typename I>
+void NamespaceReplayer<I>::handle_shut_down_remote_status_updater(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "error shutting remote mirror status updater down: "
+ << cpp_strerror(r) << dendl;
+ }
+
+ std::lock_guard locker{m_lock};
+ m_remote_status_updater.reset();
+
+ shut_down_local_status_updater();
+}
+
+template <typename I>
+void NamespaceReplayer<I>::shut_down_local_status_updater() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+ ceph_assert(m_local_status_updater);
+
+ auto ctx = create_async_context_callback(
+ m_threads->work_queue, create_context_callback<
+ NamespaceReplayer<I>,
+ &NamespaceReplayer<I>::handle_shut_down_local_status_updater>(this));
+
+ m_local_status_updater->shut_down(ctx);
+}
+
+template <typename I>
+void NamespaceReplayer<I>::handle_shut_down_local_status_updater(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "error shutting local mirror status updater down: "
+ << cpp_strerror(r) << dendl;
+ }
+
+ std::lock_guard locker{m_lock};
+
+ m_local_status_updater.reset();
+
+ ceph_assert(!m_image_map);
+ ceph_assert(!m_image_deleter);
+ ceph_assert(!m_local_pool_watcher);
+ ceph_assert(!m_remote_pool_watcher);
+ ceph_assert(!m_instance_watcher);
+ ceph_assert(!m_instance_replayer);
+
+ ceph_assert(m_on_finish != nullptr);
+ m_threads->work_queue->queue(m_on_finish, m_ret_val);
+ m_on_finish = nullptr;
+ m_ret_val = 0;
+}
+
+template <typename I>
+void NamespaceReplayer<I>::init_image_map(Context *on_finish) {
+ dout(10) << dendl;
+
+ auto image_map = ImageMap<I>::create(m_local_io_ctx, m_threads,
+ m_instance_watcher->get_instance_id(),
+ m_image_map_listener);
+
+ auto ctx = new LambdaContext(
+ [this, image_map, on_finish](int r) {
+ handle_init_image_map(r, image_map, on_finish);
+ });
+ image_map->init(create_async_context_callback(
+ m_threads->work_queue, ctx));
+}
+
+template <typename I>
+void NamespaceReplayer<I>::handle_init_image_map(int r, ImageMap<I> *image_map,
+ Context *on_finish) {
+ dout(10) << "r=" << r << dendl;
+ if (r < 0) {
+ derr << "failed to init image map: " << cpp_strerror(r) << dendl;
+ on_finish = new LambdaContext([image_map, on_finish, r](int) {
+ delete image_map;
+ on_finish->complete(r);
+ });
+ image_map->shut_down(on_finish);
+ return;
+ }
+
+ ceph_assert(!m_image_map);
+ m_image_map.reset(image_map);
+
+ init_local_pool_watcher(on_finish);
+}
+
+template <typename I>
+void NamespaceReplayer<I>::init_local_pool_watcher(Context *on_finish) {
+ dout(10) << dendl;
+
+ std::lock_guard locker{m_lock};
+ ceph_assert(!m_local_pool_watcher);
+ m_local_pool_watcher.reset(PoolWatcher<I>::create(
+ m_threads, m_local_io_ctx, m_local_mirror_uuid,
+ m_local_pool_watcher_listener));
+
+ // ensure the initial set of local images is up-to-date
+ // after acquiring the leader role
+ auto ctx = new LambdaContext([this, on_finish](int r) {
+ handle_init_local_pool_watcher(r, on_finish);
+ });
+ m_local_pool_watcher->init(create_async_context_callback(
+ m_threads->work_queue, ctx));
+}
+
+template <typename I>
+void NamespaceReplayer<I>::handle_init_local_pool_watcher(
+ int r, Context *on_finish) {
+ dout(10) << "r=" << r << dendl;
+ if (r < 0) {
+ derr << "failed to retrieve local images: " << cpp_strerror(r) << dendl;
+ on_finish = new LambdaContext([on_finish, r](int) {
+ on_finish->complete(r);
+ });
+ shut_down_pool_watchers(on_finish);
+ return;
+ }
+
+ init_remote_pool_watcher(on_finish);
+}
+
+template <typename I>
+void NamespaceReplayer<I>::init_remote_pool_watcher(Context *on_finish) {
+ dout(10) << dendl;
+
+ std::lock_guard locker{m_lock};
+ ceph_assert(!m_remote_pool_watcher);
+ m_remote_pool_watcher.reset(PoolWatcher<I>::create(
+ m_threads, m_remote_io_ctx, m_remote_pool_meta.mirror_uuid,
+ m_remote_pool_watcher_listener));
+
+ auto ctx = new LambdaContext([this, on_finish](int r) {
+ handle_init_remote_pool_watcher(r, on_finish);
+ });
+ m_remote_pool_watcher->init(create_async_context_callback(
+ m_threads->work_queue, ctx));
+}
+
+template <typename I>
+void NamespaceReplayer<I>::handle_init_remote_pool_watcher(
+ int r, Context *on_finish) {
+ dout(10) << "r=" << r << dendl;
+ if (r == -ENOENT) {
+ // Technically nothing to do since the other side doesn't
+ // have mirroring enabled. Eventually the remote pool watcher will
+ // detect images (if mirroring is enabled), so no point propagating
+ // an error which would just busy-spin the state machines.
+ dout(0) << "remote peer does not have mirroring configured" << dendl;
+ } else if (r < 0) {
+ derr << "failed to retrieve remote images: " << cpp_strerror(r) << dendl;
+ on_finish = new LambdaContext([on_finish, r](int) {
+ on_finish->complete(r);
+ });
+ shut_down_pool_watchers(on_finish);
+ return;
+ }
+
+ init_image_deleter(on_finish);
+}
+
+template <typename I>
+void NamespaceReplayer<I>::init_image_deleter(Context *on_finish) {
+ dout(10) << dendl;
+
+ std::lock_guard locker{m_lock};
+ ceph_assert(!m_image_deleter);
+
+ on_finish = new LambdaContext([this, on_finish](int r) {
+ handle_init_image_deleter(r, on_finish);
+ });
+ m_image_deleter.reset(ImageDeleter<I>::create(m_local_io_ctx, m_threads,
+ m_image_deletion_throttler,
+ m_service_daemon));
+ m_image_deleter->init(create_async_context_callback(
+ m_threads->work_queue, on_finish));
+}
+
+template <typename I>
+void NamespaceReplayer<I>::handle_init_image_deleter(
+ int r, Context *on_finish) {
+ dout(10) << "r=" << r << dendl;
+ if (r < 0) {
+ derr << "failed to init image deleter: " << cpp_strerror(r) << dendl;
+ on_finish = new LambdaContext([on_finish, r](int) {
+ on_finish->complete(r);
+ });
+ shut_down_image_deleter(on_finish);
+ return;
+ }
+
+ on_finish->complete(0);
+}
+
+template <typename I>
+void NamespaceReplayer<I>::shut_down_image_deleter(Context* on_finish) {
+ dout(10) << dendl;
+ {
+ std::lock_guard locker{m_lock};
+ if (m_image_deleter) {
+ Context *ctx = new LambdaContext([this, on_finish](int r) {
+ handle_shut_down_image_deleter(r, on_finish);
+ });
+ ctx = create_async_context_callback(m_threads->work_queue, ctx);
+
+ m_image_deleter->shut_down(ctx);
+ return;
+ }
+ }
+ shut_down_pool_watchers(on_finish);
+}
+
+template <typename I>
+void NamespaceReplayer<I>::handle_shut_down_image_deleter(
+ int r, Context* on_finish) {
+ dout(10) << "r=" << r << dendl;
+
+ {
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_image_deleter);
+ m_image_deleter.reset();
+ }
+
+ shut_down_pool_watchers(on_finish);
+}
+
+template <typename I>
+void NamespaceReplayer<I>::shut_down_pool_watchers(Context *on_finish) {
+ dout(10) << dendl;
+
+ {
+ std::lock_guard locker{m_lock};
+ if (m_local_pool_watcher) {
+ Context *ctx = new LambdaContext([this, on_finish](int r) {
+ handle_shut_down_pool_watchers(r, on_finish);
+ });
+ ctx = create_async_context_callback(m_threads->work_queue, ctx);
+
+ auto gather_ctx = new C_Gather(g_ceph_context, ctx);
+ m_local_pool_watcher->shut_down(gather_ctx->new_sub());
+ if (m_remote_pool_watcher) {
+ m_remote_pool_watcher->shut_down(gather_ctx->new_sub());
+ }
+ gather_ctx->activate();
+ return;
+ }
+ }
+
+ on_finish->complete(0);
+}
+
+template <typename I>
+void NamespaceReplayer<I>::handle_shut_down_pool_watchers(
+ int r, Context *on_finish) {
+ dout(10) << "r=" << r << dendl;
+
+ {
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_local_pool_watcher);
+ m_local_pool_watcher.reset();
+
+ if (m_remote_pool_watcher) {
+ m_remote_pool_watcher.reset();
+ }
+ }
+ shut_down_image_map(on_finish);
+}
+
+template <typename I>
+void NamespaceReplayer<I>::shut_down_image_map(Context *on_finish) {
+ dout(5) << dendl;
+
+ std::lock_guard locker{m_lock};
+ if (m_image_map) {
+ on_finish = new LambdaContext(
+ [this, on_finish](int r) {
+ handle_shut_down_image_map(r, on_finish);
+ });
+ m_image_map->shut_down(create_async_context_callback(
+ m_threads->work_queue, on_finish));
+ return;
+ }
+
+ m_threads->work_queue->queue(on_finish);
+}
+
+template <typename I>
+void NamespaceReplayer<I>::handle_shut_down_image_map(int r, Context *on_finish) {
+ dout(5) << "r=" << r << dendl;
+ if (r < 0 && r != -EBLOCKLISTED) {
+ derr << "failed to shut down image map: " << cpp_strerror(r) << dendl;
+ }
+
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_image_map);
+ m_image_map.reset();
+
+ m_instance_replayer->release_all(create_async_context_callback(
+ m_threads->work_queue, on_finish));
+}
+
+template <typename I>
+void NamespaceReplayer<I>::handle_acquire_image(const std::string &global_image_id,
+ const std::string &instance_id,
+ Context* on_finish) {
+ dout(5) << "global_image_id=" << global_image_id << ", "
+ << "instance_id=" << instance_id << dendl;
+
+ m_instance_watcher->notify_image_acquire(instance_id, global_image_id,
+ on_finish);
+}
+
+template <typename I>
+void NamespaceReplayer<I>::handle_release_image(const std::string &global_image_id,
+ const std::string &instance_id,
+ Context* on_finish) {
+ dout(5) << "global_image_id=" << global_image_id << ", "
+ << "instance_id=" << instance_id << dendl;
+
+ m_instance_watcher->notify_image_release(instance_id, global_image_id,
+ on_finish);
+}
+
+template <typename I>
+void NamespaceReplayer<I>::handle_remove_image(const std::string &mirror_uuid,
+ const std::string &global_image_id,
+ const std::string &instance_id,
+ Context* on_finish) {
+ ceph_assert(!mirror_uuid.empty());
+ dout(5) << "mirror_uuid=" << mirror_uuid << ", "
+ << "global_image_id=" << global_image_id << ", "
+ << "instance_id=" << instance_id << dendl;
+
+ m_instance_watcher->notify_peer_image_removed(instance_id, global_image_id,
+ mirror_uuid, on_finish);
+}
+
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::NamespaceReplayer<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/NamespaceReplayer.h b/src/tools/rbd_mirror/NamespaceReplayer.h
new file mode 100644
index 000000000..e304b8253
--- /dev/null
+++ b/src/tools/rbd_mirror/NamespaceReplayer.h
@@ -0,0 +1,308 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_NAMESPACE_REPLAYER_H
+#define CEPH_RBD_MIRROR_NAMESPACE_REPLAYER_H
+
+#include "common/AsyncOpTracker.h"
+#include "common/ceph_mutex.h"
+#include "include/rados/librados.hpp"
+
+#include "tools/rbd_mirror/ImageDeleter.h"
+#include "tools/rbd_mirror/ImageMap.h"
+#include "tools/rbd_mirror/InstanceReplayer.h"
+#include "tools/rbd_mirror/InstanceWatcher.h"
+#include "tools/rbd_mirror/MirrorStatusUpdater.h"
+#include "tools/rbd_mirror/PoolWatcher.h"
+#include "tools/rbd_mirror/Types.h"
+#include "tools/rbd_mirror/image_map/Types.h"
+#include "tools/rbd_mirror/pool_watcher/Types.h"
+
+#include <memory>
+#include <string>
+#include <vector>
+
+class AdminSocketHook;
+
+namespace journal { struct CacheManagerHandler; }
+
+namespace librbd { class ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+
+struct PoolMetaCache;
+template <typename> class ServiceDaemon;
+template <typename> class Throttler;
+template <typename> struct Threads;
+
+/**
+ * Controls mirroring for a single remote cluster.
+ */
+template <typename ImageCtxT = librbd::ImageCtx>
+class NamespaceReplayer {
+public:
+ static NamespaceReplayer *create(
+ const std::string &name,
+ librados::IoCtx &local_ioctx,
+ librados::IoCtx &remote_ioctx,
+ const std::string &local_mirror_uuid,
+ const std::string &local_mirror_peer_uuid,
+ const RemotePoolMeta& remote_pool_meta,
+ Threads<ImageCtxT> *threads,
+ Throttler<ImageCtxT> *image_sync_throttler,
+ Throttler<ImageCtxT> *image_deletion_throttler,
+ ServiceDaemon<ImageCtxT> *service_daemon,
+ journal::CacheManagerHandler *cache_manager_handler,
+ PoolMetaCache* pool_meta_cache) {
+ return new NamespaceReplayer(name, local_ioctx, remote_ioctx,
+ local_mirror_uuid, local_mirror_peer_uuid,
+ remote_pool_meta, threads,
+ image_sync_throttler, image_deletion_throttler,
+ service_daemon, cache_manager_handler,
+ pool_meta_cache);
+ }
+
+ NamespaceReplayer(const std::string &name,
+ librados::IoCtx &local_ioctx,
+ librados::IoCtx &remote_ioctx,
+ const std::string &local_mirror_uuid,
+ const std::string& local_mirror_peer_uuid,
+ const RemotePoolMeta& remote_pool_meta,
+ Threads<ImageCtxT> *threads,
+ Throttler<ImageCtxT> *image_sync_throttler,
+ Throttler<ImageCtxT> *image_deletion_throttler,
+ ServiceDaemon<ImageCtxT> *service_daemon,
+ journal::CacheManagerHandler *cache_manager_handler,
+ PoolMetaCache* pool_meta_cache);
+ NamespaceReplayer(const NamespaceReplayer&) = delete;
+ NamespaceReplayer& operator=(const NamespaceReplayer&) = delete;
+
+ bool is_blocklisted() const;
+
+ void init(Context *on_finish);
+ void shut_down(Context *on_finish);
+
+ void handle_acquire_leader(Context *on_finish);
+ void handle_release_leader(Context *on_finish);
+ void handle_update_leader(const std::string &leader_instance_id);
+ void handle_instances_added(const std::vector<std::string> &instance_ids);
+ void handle_instances_removed(const std::vector<std::string> &instance_ids);
+
+ void print_status(Formatter *f);
+ void start();
+ void stop();
+ void restart();
+ void flush();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <uninitialized> <------------------------------------\
+ * | (init) ^ (error) |
+ * v * |
+ * INIT_LOCAL_STATUS_UPDATER * * * * * * * * > SHUT_DOWN_LOCAL_STATUS_UPDATER
+ * | * (error) ^
+ * v * |
+ * INIT_REMOTE_STATUS_UPDATER * * * * * * * > SHUT_DOWN_REMOTE_STATUS_UPDATER
+ * | * (error) ^
+ * v * |
+ * INIT_INSTANCE_REPLAYER * * * * * * * * * > SHUT_DOWN_INSTANCE_REPLAYER
+ * | * ^
+ * v * |
+ * INIT_INSTANCE_WATCHER * * * * * * * * * * SHUT_DOWN_INSTANCE_WATCHER
+ * | (error) ^
+ * | |
+ * v STOP_INSTANCE_REPLAYER
+ * | ^
+ * | (shut down) |
+ * | /----------------------------------------------/
+ * v |
+ * <follower> <---------------------------\
+ * . |
+ * . |
+ * v (leader acquired) |
+ * INIT_IMAGE_MAP |
+ * | |
+ * v |
+ * INIT_LOCAL_POOL_WATCHER SHUT_DOWN_IMAGE_MAP
+ * | ^
+ * v |
+ * INIT_REMOTE_POOL_WATCHER SHUT_DOWN_POOL_WATCHERS
+ * | ^
+ * v |
+ * INIT_IMAGE_DELETER SHUT_DOWN_IMAGE_DELETER
+ * | ^
+ * v .
+ * <leader> <-----------\ .
+ * . | .
+ * . (image update) | .
+ * . . > NOTIFY_INSTANCE_WATCHER .
+ * . .
+ * . (leader lost / shut down) .
+ * . . . . . . . . . . . . . . . . . . .
+ *
+ * @endverbatim
+ */
+
+ struct PoolWatcherListener : public pool_watcher::Listener {
+ NamespaceReplayer *namespace_replayer;
+ bool local;
+
+ PoolWatcherListener(NamespaceReplayer *namespace_replayer, bool local)
+ : namespace_replayer(namespace_replayer), local(local) {
+ }
+
+ void handle_update(const std::string &mirror_uuid,
+ ImageIds &&added_image_ids,
+ ImageIds &&removed_image_ids) override {
+ namespace_replayer->handle_update((local ? "" : mirror_uuid),
+ std::move(added_image_ids),
+ std::move(removed_image_ids));
+ }
+ };
+
+ struct ImageMapListener : public image_map::Listener {
+ NamespaceReplayer *namespace_replayer;
+
+ ImageMapListener(NamespaceReplayer *namespace_replayer)
+ : namespace_replayer(namespace_replayer) {
+ }
+
+ void acquire_image(const std::string &global_image_id,
+ const std::string &instance_id,
+ Context* on_finish) override {
+ namespace_replayer->handle_acquire_image(global_image_id, instance_id,
+ on_finish);
+ }
+
+ void release_image(const std::string &global_image_id,
+ const std::string &instance_id,
+ Context* on_finish) override {
+ namespace_replayer->handle_release_image(global_image_id, instance_id,
+ on_finish);
+ }
+
+ void remove_image(const std::string &mirror_uuid,
+ const std::string &global_image_id,
+ const std::string &instance_id,
+ Context* on_finish) override {
+ namespace_replayer->handle_remove_image(mirror_uuid, global_image_id,
+ instance_id, on_finish);
+ }
+ };
+
+ void handle_update(const std::string &mirror_uuid,
+ ImageIds &&added_image_ids,
+ ImageIds &&removed_image_ids);
+
+ int init_rados(const std::string &cluster_name,
+ const std::string &client_name,
+ const std::string &mon_host,
+ const std::string &key,
+ const std::string &description, RadosRef *rados_ref,
+ bool strip_cluster_overrides);
+
+ void init_local_status_updater();
+ void handle_init_local_status_updater(int r);
+
+ void init_remote_status_updater();
+ void handle_init_remote_status_updater(int r);
+
+ void init_instance_replayer();
+ void handle_init_instance_replayer(int r);
+
+ void init_instance_watcher();
+ void handle_init_instance_watcher(int r);
+
+ void stop_instance_replayer();
+ void handle_stop_instance_replayer(int r);
+
+ void shut_down_instance_watcher();
+ void handle_shut_down_instance_watcher(int r);
+
+ void shut_down_instance_replayer();
+ void handle_shut_down_instance_replayer(int r);
+
+ void shut_down_remote_status_updater();
+ void handle_shut_down_remote_status_updater(int r);
+
+ void shut_down_local_status_updater();
+ void handle_shut_down_local_status_updater(int r);
+
+ void init_image_map(Context *on_finish);
+ void handle_init_image_map(int r, ImageMap<ImageCtxT> *image_map,
+ Context *on_finish);
+
+ void init_local_pool_watcher(Context *on_finish);
+ void handle_init_local_pool_watcher(int r, Context *on_finish);
+
+ void init_remote_pool_watcher(Context *on_finish);
+ void handle_init_remote_pool_watcher(int r, Context *on_finish);
+
+ void init_image_deleter(Context* on_finish);
+ void handle_init_image_deleter(int r, Context* on_finish);
+
+ void shut_down_image_deleter(Context* on_finish);
+ void handle_shut_down_image_deleter(int r, Context* on_finish);
+
+ void shut_down_pool_watchers(Context *on_finish);
+ void handle_shut_down_pool_watchers(int r, Context *on_finish);
+
+ void shut_down_image_map(Context *on_finish);
+ void handle_shut_down_image_map(int r, Context *on_finish);
+
+ void handle_acquire_image(const std::string &global_image_id,
+ const std::string &instance_id,
+ Context* on_finish);
+ void handle_release_image(const std::string &global_image_id,
+ const std::string &instance_id,
+ Context* on_finish);
+ void handle_remove_image(const std::string &mirror_uuid,
+ const std::string &global_image_id,
+ const std::string &instance_id,
+ Context* on_finish);
+
+ std::string m_namespace_name;
+ librados::IoCtx m_local_io_ctx;
+ librados::IoCtx m_remote_io_ctx;
+ std::string m_local_mirror_uuid;
+ std::string m_local_mirror_peer_uuid;
+ RemotePoolMeta m_remote_pool_meta;
+ Threads<ImageCtxT> *m_threads;
+ Throttler<ImageCtxT> *m_image_sync_throttler;
+ Throttler<ImageCtxT> *m_image_deletion_throttler;
+ ServiceDaemon<ImageCtxT> *m_service_daemon;
+ journal::CacheManagerHandler *m_cache_manager_handler;
+ PoolMetaCache* m_pool_meta_cache;
+
+ mutable ceph::mutex m_lock;
+
+ int m_ret_val = 0;
+ Context *m_on_finish = nullptr;
+
+ std::unique_ptr<MirrorStatusUpdater<ImageCtxT>> m_local_status_updater;
+ std::unique_ptr<MirrorStatusUpdater<ImageCtxT>> m_remote_status_updater;
+
+ PoolWatcherListener m_local_pool_watcher_listener;
+ std::unique_ptr<PoolWatcher<ImageCtxT>> m_local_pool_watcher;
+
+ PoolWatcherListener m_remote_pool_watcher_listener;
+ std::unique_ptr<PoolWatcher<ImageCtxT>> m_remote_pool_watcher;
+
+ std::unique_ptr<InstanceReplayer<ImageCtxT>> m_instance_replayer;
+ std::unique_ptr<ImageDeleter<ImageCtxT>> m_image_deleter;
+
+ ImageMapListener m_image_map_listener;
+ std::unique_ptr<ImageMap<ImageCtxT>> m_image_map;
+
+ std::unique_ptr<InstanceWatcher<ImageCtxT>> m_instance_watcher;
+};
+
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::NamespaceReplayer<librbd::ImageCtx>;
+
+#endif // CEPH_RBD_MIRROR_NAMESPACE_REPLAYER_H
diff --git a/src/tools/rbd_mirror/PoolMetaCache.cc b/src/tools/rbd_mirror/PoolMetaCache.cc
new file mode 100644
index 000000000..261802a55
--- /dev/null
+++ b/src/tools/rbd_mirror/PoolMetaCache.cc
@@ -0,0 +1,83 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "common/debug.h"
+#include "common/dout.h"
+#include "tools/rbd_mirror/PoolMetaCache.h"
+#include <shared_mutex>
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::PoolMetaCache: " \
+ << this << " " << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+
+int PoolMetaCache::get_local_pool_meta(
+ int64_t pool_id,
+ LocalPoolMeta* local_pool_meta) const {
+ dout(15) << "pool_id=" << pool_id << dendl;
+
+ std::shared_lock locker{m_lock};
+ auto it = m_local_pool_metas.find(pool_id);
+ if (it == m_local_pool_metas.end()) {
+ return -ENOENT;
+ }
+
+ *local_pool_meta = it->second;
+ return 0;
+}
+
+void PoolMetaCache::set_local_pool_meta(
+ int64_t pool_id,
+ const LocalPoolMeta& local_pool_meta) {
+ dout(15) << "pool_id=" << pool_id << ", "
+ << "local_pool_meta=" << local_pool_meta << dendl;
+
+ std::unique_lock locker(m_lock);
+ m_local_pool_metas[pool_id] = local_pool_meta;
+}
+
+void PoolMetaCache::remove_local_pool_meta(int64_t pool_id) {
+ dout(15) << "pool_id=" << pool_id << dendl;
+
+ std::unique_lock locker(m_lock);
+ m_local_pool_metas.erase(pool_id);
+}
+
+int PoolMetaCache::get_remote_pool_meta(
+ int64_t pool_id,
+ RemotePoolMeta* remote_pool_meta) const {
+ dout(15) << "pool_id=" << pool_id << dendl;
+
+ std::shared_lock locker{m_lock};
+ auto it = m_remote_pool_metas.find(pool_id);
+ if (it == m_remote_pool_metas.end()) {
+ return -ENOENT;
+ }
+
+ *remote_pool_meta = it->second;
+ return 0;
+}
+
+void PoolMetaCache::set_remote_pool_meta(
+ int64_t pool_id,
+ const RemotePoolMeta& remote_pool_meta) {
+ dout(15) << "pool_id=" << pool_id << ", "
+ << "remote_pool_meta=" << remote_pool_meta << dendl;
+
+ std::unique_lock locker(m_lock);
+ m_remote_pool_metas[pool_id] = remote_pool_meta;
+}
+
+void PoolMetaCache::remove_remote_pool_meta(int64_t pool_id) {
+ dout(15) << "pool_id=" << pool_id << dendl;
+
+ std::unique_lock locker(m_lock);
+ m_remote_pool_metas.erase(pool_id);
+}
+
+} // namespace mirror
+} // namespace rbd
diff --git a/src/tools/rbd_mirror/PoolMetaCache.h b/src/tools/rbd_mirror/PoolMetaCache.h
new file mode 100644
index 000000000..f0440120f
--- /dev/null
+++ b/src/tools/rbd_mirror/PoolMetaCache.h
@@ -0,0 +1,47 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_POOL_META_CACHE_H
+#define CEPH_RBD_MIRROR_POOL_META_CACHE_H
+
+#include "include/int_types.h"
+#include "common/ceph_mutex.h"
+#include "tools/rbd_mirror/Types.h"
+#include <map>
+
+namespace rbd {
+namespace mirror {
+
+class PoolMetaCache {
+public:
+ PoolMetaCache(CephContext* cct)
+ : m_cct(cct) {
+ }
+ PoolMetaCache(const PoolMetaCache&) = delete;
+ PoolMetaCache& operator=(const PoolMetaCache&) = delete;
+
+ int get_local_pool_meta(int64_t pool_id,
+ LocalPoolMeta* local_pool_meta) const;
+ void set_local_pool_meta(int64_t pool_id,
+ const LocalPoolMeta& local_pool_meta);
+ void remove_local_pool_meta(int64_t pool_id);
+
+ int get_remote_pool_meta(int64_t pool_id,
+ RemotePoolMeta* remote_pool_meta) const;
+ void set_remote_pool_meta(int64_t pool_id,
+ const RemotePoolMeta& remote_pool_meta);
+ void remove_remote_pool_meta(int64_t pool_id);
+
+private:
+ CephContext* m_cct;
+
+ mutable ceph::shared_mutex m_lock =
+ ceph::make_shared_mutex("rbd::mirror::PoolMetaCache::m_lock");
+ std::map<int64_t, LocalPoolMeta> m_local_pool_metas;
+ std::map<int64_t, RemotePoolMeta> m_remote_pool_metas;
+};
+
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_POOL_META_CACHE_H
diff --git a/src/tools/rbd_mirror/PoolReplayer.cc b/src/tools/rbd_mirror/PoolReplayer.cc
new file mode 100644
index 000000000..de0d60241
--- /dev/null
+++ b/src/tools/rbd_mirror/PoolReplayer.cc
@@ -0,0 +1,1109 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "PoolReplayer.h"
+#include "common/Cond.h"
+#include "common/Formatter.h"
+#include "common/admin_socket.h"
+#include "common/ceph_argparse.h"
+#include "common/code_environment.h"
+#include "common/common_init.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "global/global_context.h"
+#include "librbd/api/Config.h"
+#include "librbd/api/Namespace.h"
+#include "PoolMetaCache.h"
+#include "RemotePoolPoller.h"
+#include "ServiceDaemon.h"
+#include "Threads.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::PoolReplayer: " \
+ << this << " " << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+
+using ::operator<<;
+
+namespace {
+
+const std::string SERVICE_DAEMON_INSTANCE_ID_KEY("instance_id");
+const std::string SERVICE_DAEMON_LEADER_KEY("leader");
+
+const std::vector<std::string> UNIQUE_PEER_CONFIG_KEYS {
+ {"monmap", "mon_host", "mon_dns_srv_name", "key", "keyfile", "keyring"}};
+
+template <typename I>
+class PoolReplayerAdminSocketCommand {
+public:
+ PoolReplayerAdminSocketCommand(PoolReplayer<I> *pool_replayer)
+ : pool_replayer(pool_replayer) {
+ }
+ virtual ~PoolReplayerAdminSocketCommand() {}
+ virtual int call(Formatter *f) = 0;
+protected:
+ PoolReplayer<I> *pool_replayer;
+};
+
+template <typename I>
+class StatusCommand : public PoolReplayerAdminSocketCommand<I> {
+public:
+ explicit StatusCommand(PoolReplayer<I> *pool_replayer)
+ : PoolReplayerAdminSocketCommand<I>(pool_replayer) {
+ }
+
+ int call(Formatter *f) override {
+ this->pool_replayer->print_status(f);
+ return 0;
+ }
+};
+
+template <typename I>
+class StartCommand : public PoolReplayerAdminSocketCommand<I> {
+public:
+ explicit StartCommand(PoolReplayer<I> *pool_replayer)
+ : PoolReplayerAdminSocketCommand<I>(pool_replayer) {
+ }
+
+ int call(Formatter *f) override {
+ this->pool_replayer->start();
+ return 0;
+ }
+};
+
+template <typename I>
+class StopCommand : public PoolReplayerAdminSocketCommand<I> {
+public:
+ explicit StopCommand(PoolReplayer<I> *pool_replayer)
+ : PoolReplayerAdminSocketCommand<I>(pool_replayer) {
+ }
+
+ int call(Formatter *f) override {
+ this->pool_replayer->stop(true);
+ return 0;
+ }
+};
+
+template <typename I>
+class RestartCommand : public PoolReplayerAdminSocketCommand<I> {
+public:
+ explicit RestartCommand(PoolReplayer<I> *pool_replayer)
+ : PoolReplayerAdminSocketCommand<I>(pool_replayer) {
+ }
+
+ int call(Formatter *f) override {
+ this->pool_replayer->restart();
+ return 0;
+ }
+};
+
+template <typename I>
+class FlushCommand : public PoolReplayerAdminSocketCommand<I> {
+public:
+ explicit FlushCommand(PoolReplayer<I> *pool_replayer)
+ : PoolReplayerAdminSocketCommand<I>(pool_replayer) {
+ }
+
+ int call(Formatter *f) override {
+ this->pool_replayer->flush();
+ return 0;
+ }
+};
+
+template <typename I>
+class LeaderReleaseCommand : public PoolReplayerAdminSocketCommand<I> {
+public:
+ explicit LeaderReleaseCommand(PoolReplayer<I> *pool_replayer)
+ : PoolReplayerAdminSocketCommand<I>(pool_replayer) {
+ }
+
+ int call(Formatter *f) override {
+ this->pool_replayer->release_leader();
+ return 0;
+ }
+};
+
+template <typename I>
+class PoolReplayerAdminSocketHook : public AdminSocketHook {
+public:
+ PoolReplayerAdminSocketHook(CephContext *cct, const std::string &name,
+ PoolReplayer<I> *pool_replayer)
+ : admin_socket(cct->get_admin_socket()) {
+ std::string command;
+ int r;
+
+ command = "rbd mirror status " + name;
+ r = admin_socket->register_command(command, this,
+ "get status for rbd mirror " + name);
+ if (r == 0) {
+ commands[command] = new StatusCommand<I>(pool_replayer);
+ }
+
+ command = "rbd mirror start " + name;
+ r = admin_socket->register_command(command, this,
+ "start rbd mirror " + name);
+ if (r == 0) {
+ commands[command] = new StartCommand<I>(pool_replayer);
+ }
+
+ command = "rbd mirror stop " + name;
+ r = admin_socket->register_command(command, this,
+ "stop rbd mirror " + name);
+ if (r == 0) {
+ commands[command] = new StopCommand<I>(pool_replayer);
+ }
+
+ command = "rbd mirror restart " + name;
+ r = admin_socket->register_command(command, this,
+ "restart rbd mirror " + name);
+ if (r == 0) {
+ commands[command] = new RestartCommand<I>(pool_replayer);
+ }
+
+ command = "rbd mirror flush " + name;
+ r = admin_socket->register_command(command, this,
+ "flush rbd mirror " + name);
+ if (r == 0) {
+ commands[command] = new FlushCommand<I>(pool_replayer);
+ }
+
+ command = "rbd mirror leader release " + name;
+ r = admin_socket->register_command(command, this,
+ "release rbd mirror leader " + name);
+ if (r == 0) {
+ commands[command] = new LeaderReleaseCommand<I>(pool_replayer);
+ }
+ }
+
+ ~PoolReplayerAdminSocketHook() override {
+ (void)admin_socket->unregister_commands(this);
+ for (auto i = commands.begin(); i != commands.end(); ++i) {
+ delete i->second;
+ }
+ }
+
+ int call(std::string_view command, const cmdmap_t& cmdmap,
+ Formatter *f,
+ std::ostream& ss,
+ bufferlist& out) override {
+ auto i = commands.find(command);
+ ceph_assert(i != commands.end());
+ return i->second->call(f);
+ }
+
+private:
+ typedef std::map<std::string, PoolReplayerAdminSocketCommand<I>*,
+ std::less<>> Commands;
+
+ AdminSocket *admin_socket;
+ Commands commands;
+};
+
+} // anonymous namespace
+
+template <typename I>
+struct PoolReplayer<I>::RemotePoolPollerListener
+ : public remote_pool_poller::Listener {
+
+ PoolReplayer<I>* m_pool_replayer;
+
+ RemotePoolPollerListener(PoolReplayer<I>* pool_replayer)
+ : m_pool_replayer(pool_replayer) {
+ }
+
+ void handle_updated(const RemotePoolMeta& remote_pool_meta) override {
+ m_pool_replayer->handle_remote_pool_meta_updated(remote_pool_meta);
+ }
+};
+
+template <typename I>
+PoolReplayer<I>::PoolReplayer(
+ Threads<I> *threads, ServiceDaemon<I> *service_daemon,
+ journal::CacheManagerHandler *cache_manager_handler,
+ PoolMetaCache* pool_meta_cache, int64_t local_pool_id,
+ const PeerSpec &peer, const std::vector<const char*> &args) :
+ m_threads(threads),
+ m_service_daemon(service_daemon),
+ m_cache_manager_handler(cache_manager_handler),
+ m_pool_meta_cache(pool_meta_cache),
+ m_local_pool_id(local_pool_id),
+ m_peer(peer),
+ m_args(args),
+ m_lock(ceph::make_mutex("rbd::mirror::PoolReplayer " + stringify(peer))),
+ m_pool_replayer_thread(this),
+ m_leader_listener(this) {
+}
+
+template <typename I>
+PoolReplayer<I>::~PoolReplayer()
+{
+ shut_down();
+
+ ceph_assert(m_asok_hook == nullptr);
+}
+
+template <typename I>
+bool PoolReplayer<I>::is_blocklisted() const {
+ std::lock_guard locker{m_lock};
+ return m_blocklisted;
+}
+
+template <typename I>
+bool PoolReplayer<I>::is_leader() const {
+ std::lock_guard locker{m_lock};
+ return m_leader_watcher && m_leader_watcher->is_leader();
+}
+
+template <typename I>
+bool PoolReplayer<I>::is_running() const {
+ return m_pool_replayer_thread.is_started() && !m_stopping;
+}
+
+template <typename I>
+void PoolReplayer<I>::init(const std::string& site_name) {
+ std::lock_guard locker{m_lock};
+
+ ceph_assert(!m_pool_replayer_thread.is_started());
+
+ // reset state
+ m_stopping = false;
+ m_blocklisted = false;
+ m_site_name = site_name;
+
+ dout(10) << "replaying for " << m_peer << dendl;
+ int r = init_rados(g_ceph_context->_conf->cluster,
+ g_ceph_context->_conf->name.to_str(),
+ "", "", "local cluster", &m_local_rados, false);
+ if (r < 0) {
+ m_callout_id = m_service_daemon->add_or_update_callout(
+ m_local_pool_id, m_callout_id, service_daemon::CALLOUT_LEVEL_ERROR,
+ "unable to connect to local cluster");
+ return;
+ }
+
+ r = init_rados(m_peer.cluster_name, m_peer.client_name,
+ m_peer.mon_host, m_peer.key,
+ std::string("remote peer ") + stringify(m_peer),
+ &m_remote_rados, true);
+ if (r < 0) {
+ m_callout_id = m_service_daemon->add_or_update_callout(
+ m_local_pool_id, m_callout_id, service_daemon::CALLOUT_LEVEL_ERROR,
+ "unable to connect to remote cluster");
+ return;
+ }
+
+ r = m_local_rados->ioctx_create2(m_local_pool_id, m_local_io_ctx);
+ if (r < 0) {
+ derr << "error accessing local pool " << m_local_pool_id << ": "
+ << cpp_strerror(r) << dendl;
+ return;
+ }
+
+ auto cct = reinterpret_cast<CephContext *>(m_local_io_ctx.cct());
+ librbd::api::Config<I>::apply_pool_overrides(m_local_io_ctx, &cct->_conf);
+
+ r = librbd::cls_client::mirror_uuid_get(&m_local_io_ctx,
+ &m_local_mirror_uuid);
+ if (r < 0) {
+ derr << "failed to retrieve local mirror uuid from pool "
+ << m_local_io_ctx.get_pool_name() << ": " << cpp_strerror(r) << dendl;
+ m_callout_id = m_service_daemon->add_or_update_callout(
+ m_local_pool_id, m_callout_id, service_daemon::CALLOUT_LEVEL_ERROR,
+ "unable to query local mirror uuid");
+ return;
+ }
+
+ r = m_remote_rados->ioctx_create(m_local_io_ctx.get_pool_name().c_str(),
+ m_remote_io_ctx);
+ if (r < 0) {
+ derr << "error accessing remote pool " << m_local_io_ctx.get_pool_name()
+ << ": " << cpp_strerror(r) << dendl;
+ m_callout_id = m_service_daemon->add_or_update_callout(
+ m_local_pool_id, m_callout_id, service_daemon::CALLOUT_LEVEL_WARNING,
+ "unable to access remote pool");
+ return;
+ }
+
+ dout(10) << "connected to " << m_peer << dendl;
+
+ m_image_sync_throttler.reset(
+ Throttler<I>::create(cct, "rbd_mirror_concurrent_image_syncs"));
+
+ m_image_deletion_throttler.reset(
+ Throttler<I>::create(cct, "rbd_mirror_concurrent_image_deletions"));
+
+ m_remote_pool_poller_listener.reset(new RemotePoolPollerListener(this));
+ m_remote_pool_poller.reset(RemotePoolPoller<I>::create(
+ m_threads, m_remote_io_ctx, m_site_name, m_local_mirror_uuid,
+ *m_remote_pool_poller_listener));
+
+ C_SaferCond on_pool_poller_init;
+ m_remote_pool_poller->init(&on_pool_poller_init);
+ r = on_pool_poller_init.wait();
+ if (r < 0) {
+ derr << "failed to initialize remote pool poller: " << cpp_strerror(r)
+ << dendl;
+ m_callout_id = m_service_daemon->add_or_update_callout(
+ m_local_pool_id, m_callout_id, service_daemon::CALLOUT_LEVEL_ERROR,
+ "unable to initialize remote pool poller");
+ m_remote_pool_poller.reset();
+ return;
+ }
+ ceph_assert(!m_remote_pool_meta.mirror_uuid.empty());
+ m_pool_meta_cache->set_remote_pool_meta(
+ m_remote_io_ctx.get_id(), m_remote_pool_meta);
+ m_pool_meta_cache->set_local_pool_meta(
+ m_local_io_ctx.get_id(), {m_local_mirror_uuid});
+
+ m_default_namespace_replayer.reset(NamespaceReplayer<I>::create(
+ "", m_local_io_ctx, m_remote_io_ctx, m_local_mirror_uuid, m_peer.uuid,
+ m_remote_pool_meta, m_threads, m_image_sync_throttler.get(),
+ m_image_deletion_throttler.get(), m_service_daemon,
+ m_cache_manager_handler, m_pool_meta_cache));
+
+ C_SaferCond on_init;
+ m_default_namespace_replayer->init(&on_init);
+ r = on_init.wait();
+ if (r < 0) {
+ derr << "error initializing default namespace replayer: " << cpp_strerror(r)
+ << dendl;
+ m_callout_id = m_service_daemon->add_or_update_callout(
+ m_local_pool_id, m_callout_id, service_daemon::CALLOUT_LEVEL_ERROR,
+ "unable to initialize default namespace replayer");
+ m_default_namespace_replayer.reset();
+ return;
+ }
+
+ m_leader_watcher.reset(LeaderWatcher<I>::create(m_threads, m_local_io_ctx,
+ &m_leader_listener));
+ r = m_leader_watcher->init();
+ if (r < 0) {
+ derr << "error initializing leader watcher: " << cpp_strerror(r) << dendl;
+ m_callout_id = m_service_daemon->add_or_update_callout(
+ m_local_pool_id, m_callout_id, service_daemon::CALLOUT_LEVEL_ERROR,
+ "unable to initialize leader messenger object");
+ m_leader_watcher.reset();
+ return;
+ }
+
+ if (m_callout_id != service_daemon::CALLOUT_ID_NONE) {
+ m_service_daemon->remove_callout(m_local_pool_id, m_callout_id);
+ m_callout_id = service_daemon::CALLOUT_ID_NONE;
+ }
+
+ m_service_daemon->add_or_update_attribute(
+ m_local_io_ctx.get_id(), SERVICE_DAEMON_INSTANCE_ID_KEY,
+ stringify(m_local_io_ctx.get_instance_id()));
+
+ m_pool_replayer_thread.create("pool replayer");
+}
+
+template <typename I>
+void PoolReplayer<I>::shut_down() {
+ {
+ std::lock_guard l{m_lock};
+ m_stopping = true;
+ m_cond.notify_all();
+ }
+ if (m_pool_replayer_thread.is_started()) {
+ m_pool_replayer_thread.join();
+ }
+
+ if (m_leader_watcher) {
+ m_leader_watcher->shut_down();
+ }
+ m_leader_watcher.reset();
+
+ if (m_default_namespace_replayer) {
+ C_SaferCond on_shut_down;
+ m_default_namespace_replayer->shut_down(&on_shut_down);
+ on_shut_down.wait();
+ }
+ m_default_namespace_replayer.reset();
+
+ if (m_remote_pool_poller) {
+ C_SaferCond ctx;
+ m_remote_pool_poller->shut_down(&ctx);
+ ctx.wait();
+
+ m_pool_meta_cache->remove_remote_pool_meta(m_remote_io_ctx.get_id());
+ m_pool_meta_cache->remove_local_pool_meta(m_local_io_ctx.get_id());
+ }
+ m_remote_pool_poller.reset();
+ m_remote_pool_poller_listener.reset();
+
+ m_image_sync_throttler.reset();
+ m_image_deletion_throttler.reset();
+
+ m_local_rados.reset();
+ m_remote_rados.reset();
+}
+
+template <typename I>
+int PoolReplayer<I>::init_rados(const std::string &cluster_name,
+ const std::string &client_name,
+ const std::string &mon_host,
+ const std::string &key,
+ const std::string &description,
+ RadosRef *rados_ref,
+ bool strip_cluster_overrides) {
+ // NOTE: manually bootstrap a CephContext here instead of via
+ // the librados API to avoid mixing global singletons between
+ // the librados shared library and the daemon
+ // TODO: eliminate intermingling of global singletons within Ceph APIs
+ CephInitParameters iparams(CEPH_ENTITY_TYPE_CLIENT);
+ if (client_name.empty() || !iparams.name.from_str(client_name)) {
+ derr << "error initializing cluster handle for " << description << dendl;
+ return -EINVAL;
+ }
+
+ CephContext *cct = common_preinit(iparams, CODE_ENVIRONMENT_LIBRARY,
+ CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS);
+ cct->_conf->cluster = cluster_name;
+
+ // librados::Rados::conf_read_file
+ int r = cct->_conf.parse_config_files(nullptr, nullptr, 0);
+ if (r < 0 && r != -ENOENT) {
+ // do not treat this as fatal, it might still be able to connect
+ derr << "could not read ceph conf for " << description << ": "
+ << cpp_strerror(r) << dendl;
+ }
+
+ // preserve cluster-specific config settings before applying environment/cli
+ // overrides
+ std::map<std::string, std::string> config_values;
+ if (strip_cluster_overrides) {
+ // remote peer connections shouldn't apply cluster-specific
+ // configuration settings
+ for (auto& key : UNIQUE_PEER_CONFIG_KEYS) {
+ config_values[key] = cct->_conf.get_val<std::string>(key);
+ }
+ }
+
+ cct->_conf.parse_env(cct->get_module_type());
+
+ // librados::Rados::conf_parse_env
+ std::vector<const char*> args;
+ r = cct->_conf.parse_argv(args);
+ if (r < 0) {
+ derr << "could not parse environment for " << description << ":"
+ << cpp_strerror(r) << dendl;
+ cct->put();
+ return r;
+ }
+ cct->_conf.parse_env(cct->get_module_type());
+
+ if (!m_args.empty()) {
+ // librados::Rados::conf_parse_argv
+ args = m_args;
+ r = cct->_conf.parse_argv(args);
+ if (r < 0) {
+ derr << "could not parse command line args for " << description << ": "
+ << cpp_strerror(r) << dendl;
+ cct->put();
+ return r;
+ }
+ }
+
+ if (strip_cluster_overrides) {
+ // remote peer connections shouldn't apply cluster-specific
+ // configuration settings
+ for (auto& pair : config_values) {
+ auto value = cct->_conf.get_val<std::string>(pair.first);
+ if (pair.second != value) {
+ dout(0) << "reverting global config option override: "
+ << pair.first << ": " << value << " -> " << pair.second
+ << dendl;
+ cct->_conf.set_val_or_die(pair.first, pair.second);
+ }
+ }
+ }
+
+ if (!g_ceph_context->_conf->admin_socket.empty()) {
+ cct->_conf.set_val_or_die("admin_socket",
+ "$run_dir/$name.$pid.$cluster.$cctid.asok");
+ }
+
+ if (!mon_host.empty()) {
+ r = cct->_conf.set_val("mon_host", mon_host);
+ if (r < 0) {
+ derr << "failed to set mon_host config for " << description << ": "
+ << cpp_strerror(r) << dendl;
+ cct->put();
+ return r;
+ }
+ }
+
+ if (!key.empty()) {
+ r = cct->_conf.set_val("key", key);
+ if (r < 0) {
+ derr << "failed to set key config for " << description << ": "
+ << cpp_strerror(r) << dendl;
+ cct->put();
+ return r;
+ }
+ }
+
+ // disable unnecessary librbd cache
+ cct->_conf.set_val_or_die("rbd_cache", "false");
+ cct->_conf.apply_changes(nullptr);
+ cct->_conf.complain_about_parse_error(cct);
+
+ rados_ref->reset(new librados::Rados());
+
+ r = (*rados_ref)->init_with_context(cct);
+ ceph_assert(r == 0);
+ cct->put();
+
+ r = (*rados_ref)->connect();
+ if (r < 0) {
+ derr << "error connecting to " << description << ": "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+void PoolReplayer<I>::run() {
+ dout(20) << dendl;
+
+ while (true) {
+ std::string asok_hook_name = m_local_io_ctx.get_pool_name() + " " +
+ m_peer.cluster_name;
+ if (m_asok_hook_name != asok_hook_name || m_asok_hook == nullptr) {
+ m_asok_hook_name = asok_hook_name;
+ delete m_asok_hook;
+
+ m_asok_hook = new PoolReplayerAdminSocketHook<I>(g_ceph_context,
+ m_asok_hook_name, this);
+ }
+
+ with_namespace_replayers([this]() { update_namespace_replayers(); });
+
+ std::unique_lock locker{m_lock};
+
+ if (m_leader_watcher->is_blocklisted() ||
+ m_default_namespace_replayer->is_blocklisted()) {
+ m_blocklisted = true;
+ m_stopping = true;
+ }
+
+ for (auto &it : m_namespace_replayers) {
+ if (it.second->is_blocklisted()) {
+ m_blocklisted = true;
+ m_stopping = true;
+ break;
+ }
+ }
+
+ if (m_stopping) {
+ break;
+ }
+
+ auto seconds = g_ceph_context->_conf.get_val<uint64_t>(
+ "rbd_mirror_pool_replayers_refresh_interval");
+ m_cond.wait_for(locker, ceph::make_timespan(seconds));
+ }
+
+ // shut down namespace replayers
+ with_namespace_replayers([this]() { update_namespace_replayers(); });
+
+ delete m_asok_hook;
+ m_asok_hook = nullptr;
+}
+
+template <typename I>
+void PoolReplayer<I>::update_namespace_replayers() {
+ dout(20) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ std::set<std::string> mirroring_namespaces;
+ if (!m_stopping) {
+ int r = list_mirroring_namespaces(&mirroring_namespaces);
+ if (r < 0) {
+ return;
+ }
+ }
+
+ auto cct = reinterpret_cast<CephContext *>(m_local_io_ctx.cct());
+ C_SaferCond cond;
+ auto gather_ctx = new C_Gather(cct, &cond);
+ for (auto it = m_namespace_replayers.begin();
+ it != m_namespace_replayers.end(); ) {
+ auto iter = mirroring_namespaces.find(it->first);
+ if (iter == mirroring_namespaces.end()) {
+ auto namespace_replayer = it->second;
+ auto on_shut_down = new LambdaContext(
+ [namespace_replayer, ctx=gather_ctx->new_sub()](int r) {
+ delete namespace_replayer;
+ ctx->complete(r);
+ });
+ m_service_daemon->remove_namespace(m_local_pool_id, it->first);
+ namespace_replayer->shut_down(on_shut_down);
+ it = m_namespace_replayers.erase(it);
+ } else {
+ mirroring_namespaces.erase(iter);
+ it++;
+ }
+ }
+
+ for (auto &name : mirroring_namespaces) {
+ auto namespace_replayer = NamespaceReplayer<I>::create(
+ name, m_local_io_ctx, m_remote_io_ctx, m_local_mirror_uuid, m_peer.uuid,
+ m_remote_pool_meta, m_threads, m_image_sync_throttler.get(),
+ m_image_deletion_throttler.get(), m_service_daemon,
+ m_cache_manager_handler, m_pool_meta_cache);
+ auto on_init = new LambdaContext(
+ [this, namespace_replayer, name, &mirroring_namespaces,
+ ctx=gather_ctx->new_sub()](int r) {
+ std::lock_guard locker{m_lock};
+ if (r < 0) {
+ derr << "failed to initialize namespace replayer for namespace "
+ << name << ": " << cpp_strerror(r) << dendl;
+ delete namespace_replayer;
+ mirroring_namespaces.erase(name);
+ } else {
+ m_namespace_replayers[name] = namespace_replayer;
+ m_service_daemon->add_namespace(m_local_pool_id, name);
+ }
+ ctx->complete(r);
+ });
+ namespace_replayer->init(on_init);
+ }
+
+ gather_ctx->activate();
+
+ m_lock.unlock();
+ cond.wait();
+ m_lock.lock();
+
+ if (m_leader) {
+ C_SaferCond acquire_cond;
+ auto acquire_gather_ctx = new C_Gather(cct, &acquire_cond);
+
+ for (auto &name : mirroring_namespaces) {
+ namespace_replayer_acquire_leader(name, acquire_gather_ctx->new_sub());
+ }
+ acquire_gather_ctx->activate();
+
+ m_lock.unlock();
+ acquire_cond.wait();
+ m_lock.lock();
+
+ std::vector<std::string> instance_ids;
+ m_leader_watcher->list_instances(&instance_ids);
+
+ for (auto &name : mirroring_namespaces) {
+ auto it = m_namespace_replayers.find(name);
+ if (it == m_namespace_replayers.end()) {
+ // acuire leader for this namespace replayer failed
+ continue;
+ }
+ it->second->handle_instances_added(instance_ids);
+ }
+ } else {
+ std::string leader_instance_id;
+ if (m_leader_watcher->get_leader_instance_id(&leader_instance_id)) {
+ for (auto &name : mirroring_namespaces) {
+ m_namespace_replayers[name]->handle_update_leader(leader_instance_id);
+ }
+ }
+ }
+}
+
+template <typename I>
+int PoolReplayer<I>::list_mirroring_namespaces(
+ std::set<std::string> *namespaces) {
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ std::vector<std::string> names;
+
+ int r = librbd::api::Namespace<I>::list(m_local_io_ctx, &names);
+ if (r < 0) {
+ derr << "failed to list namespaces: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ for (auto &name : names) {
+ cls::rbd::MirrorMode mirror_mode = cls::rbd::MIRROR_MODE_DISABLED;
+ int r = librbd::cls_client::mirror_mode_get(&m_local_io_ctx, &mirror_mode);
+ if (r < 0 && r != -ENOENT) {
+ derr << "failed to get namespace mirror mode: " << cpp_strerror(r)
+ << dendl;
+ if (m_namespace_replayers.count(name) == 0) {
+ continue;
+ }
+ } else if (mirror_mode == cls::rbd::MIRROR_MODE_DISABLED) {
+ dout(10) << "mirroring is disabled for namespace " << name << dendl;
+ continue;
+ }
+
+ namespaces->insert(name);
+ }
+
+ return 0;
+}
+
+template <typename I>
+void PoolReplayer<I>::reopen_logs()
+{
+ std::lock_guard locker{m_lock};
+
+ if (m_local_rados) {
+ reinterpret_cast<CephContext *>(m_local_rados->cct())->reopen_logs();
+ }
+ if (m_remote_rados) {
+ reinterpret_cast<CephContext *>(m_remote_rados->cct())->reopen_logs();
+ }
+}
+
+template <typename I>
+void PoolReplayer<I>::namespace_replayer_acquire_leader(const std::string &name,
+ Context *on_finish) {
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ auto it = m_namespace_replayers.find(name);
+ ceph_assert(it != m_namespace_replayers.end());
+
+ on_finish = new LambdaContext(
+ [this, name, on_finish](int r) {
+ if (r < 0) {
+ derr << "failed to handle acquire leader for namespace: "
+ << name << ": " << cpp_strerror(r) << dendl;
+
+ // remove the namespace replayer -- update_namespace_replayers will
+ // retry to create it and acquire leader.
+
+ std::lock_guard locker{m_lock};
+
+ auto namespace_replayer = m_namespace_replayers[name];
+ m_namespace_replayers.erase(name);
+ auto on_shut_down = new LambdaContext(
+ [namespace_replayer, on_finish](int r) {
+ delete namespace_replayer;
+ on_finish->complete(r);
+ });
+ m_service_daemon->remove_namespace(m_local_pool_id, name);
+ namespace_replayer->shut_down(on_shut_down);
+ return;
+ }
+ on_finish->complete(0);
+ });
+
+ it->second->handle_acquire_leader(on_finish);
+}
+
+template <typename I>
+void PoolReplayer<I>::print_status(Formatter *f) {
+ dout(20) << dendl;
+
+ assert(f);
+
+ std::lock_guard l{m_lock};
+
+ f->open_object_section("pool_replayer_status");
+ f->dump_stream("peer") << m_peer;
+ if (m_local_io_ctx.is_valid()) {
+ f->dump_string("pool", m_local_io_ctx.get_pool_name());
+ f->dump_stream("instance_id") << m_local_io_ctx.get_instance_id();
+ }
+
+ std::string state("running");
+ if (m_manual_stop) {
+ state = "stopped (manual)";
+ } else if (m_stopping) {
+ state = "stopped";
+ } else if (!is_running()) {
+ state = "error";
+ }
+ f->dump_string("state", state);
+
+ if (m_leader_watcher) {
+ std::string leader_instance_id;
+ m_leader_watcher->get_leader_instance_id(&leader_instance_id);
+ f->dump_string("leader_instance_id", leader_instance_id);
+
+ bool leader = m_leader_watcher->is_leader();
+ f->dump_bool("leader", leader);
+ if (leader) {
+ std::vector<std::string> instance_ids;
+ m_leader_watcher->list_instances(&instance_ids);
+ f->open_array_section("instances");
+ for (auto instance_id : instance_ids) {
+ f->dump_string("instance_id", instance_id);
+ }
+ f->close_section(); // instances
+ }
+ }
+
+ if (m_local_rados) {
+ auto cct = reinterpret_cast<CephContext *>(m_local_rados->cct());
+ f->dump_string("local_cluster_admin_socket",
+ cct->_conf.get_val<std::string>("admin_socket"));
+ }
+ if (m_remote_rados) {
+ auto cct = reinterpret_cast<CephContext *>(m_remote_rados->cct());
+ f->dump_string("remote_cluster_admin_socket",
+ cct->_conf.get_val<std::string>("admin_socket"));
+ }
+
+ if (m_image_sync_throttler) {
+ f->open_object_section("sync_throttler");
+ m_image_sync_throttler->print_status(f);
+ f->close_section(); // sync_throttler
+ }
+
+ if (m_image_deletion_throttler) {
+ f->open_object_section("deletion_throttler");
+ m_image_deletion_throttler->print_status(f);
+ f->close_section(); // deletion_throttler
+ }
+
+ if (m_default_namespace_replayer) {
+ m_default_namespace_replayer->print_status(f);
+ }
+
+ f->open_array_section("namespaces");
+ for (auto &it : m_namespace_replayers) {
+ f->open_object_section("namespace");
+ f->dump_string("name", it.first);
+ it.second->print_status(f);
+ f->close_section(); // namespace
+ }
+ f->close_section(); // namespaces
+
+ f->close_section(); // pool_replayer_status
+}
+
+template <typename I>
+void PoolReplayer<I>::start() {
+ dout(20) << dendl;
+
+ std::lock_guard l{m_lock};
+
+ if (m_stopping) {
+ return;
+ }
+
+ m_manual_stop = false;
+
+ if (m_default_namespace_replayer) {
+ m_default_namespace_replayer->start();
+ }
+ for (auto &it : m_namespace_replayers) {
+ it.second->start();
+ }
+}
+
+template <typename I>
+void PoolReplayer<I>::stop(bool manual) {
+ dout(20) << "enter: manual=" << manual << dendl;
+
+ std::lock_guard l{m_lock};
+ if (!manual) {
+ m_stopping = true;
+ m_cond.notify_all();
+ return;
+ } else if (m_stopping) {
+ return;
+ }
+
+ m_manual_stop = true;
+
+ if (m_default_namespace_replayer) {
+ m_default_namespace_replayer->stop();
+ }
+ for (auto &it : m_namespace_replayers) {
+ it.second->stop();
+ }
+}
+
+template <typename I>
+void PoolReplayer<I>::restart() {
+ dout(20) << dendl;
+
+ std::lock_guard l{m_lock};
+
+ if (m_stopping) {
+ return;
+ }
+
+ if (m_default_namespace_replayer) {
+ m_default_namespace_replayer->restart();
+ }
+ for (auto &it : m_namespace_replayers) {
+ it.second->restart();
+ }
+}
+
+template <typename I>
+void PoolReplayer<I>::flush() {
+ dout(20) << dendl;
+
+ std::lock_guard l{m_lock};
+
+ if (m_stopping || m_manual_stop) {
+ return;
+ }
+
+ if (m_default_namespace_replayer) {
+ m_default_namespace_replayer->flush();
+ }
+ for (auto &it : m_namespace_replayers) {
+ it.second->flush();
+ }
+}
+
+template <typename I>
+void PoolReplayer<I>::release_leader() {
+ dout(20) << dendl;
+
+ std::lock_guard l{m_lock};
+
+ if (m_stopping || !m_leader_watcher) {
+ return;
+ }
+
+ m_leader_watcher->release_leader();
+}
+
+template <typename I>
+void PoolReplayer<I>::handle_post_acquire_leader(Context *on_finish) {
+ dout(20) << dendl;
+
+ with_namespace_replayers(
+ [this](Context *on_finish) {
+ dout(10) << "handle_post_acquire_leader" << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ m_service_daemon->add_or_update_attribute(m_local_pool_id,
+ SERVICE_DAEMON_LEADER_KEY,
+ true);
+ auto ctx = new LambdaContext(
+ [this, on_finish](int r) {
+ if (r == 0) {
+ std::lock_guard locker{m_lock};
+ m_leader = true;
+ }
+ on_finish->complete(r);
+ });
+
+ auto cct = reinterpret_cast<CephContext *>(m_local_io_ctx.cct());
+ auto gather_ctx = new C_Gather(cct, ctx);
+
+ m_default_namespace_replayer->handle_acquire_leader(
+ gather_ctx->new_sub());
+
+ for (auto &it : m_namespace_replayers) {
+ namespace_replayer_acquire_leader(it.first, gather_ctx->new_sub());
+ }
+
+ gather_ctx->activate();
+ }, on_finish);
+}
+
+template <typename I>
+void PoolReplayer<I>::handle_pre_release_leader(Context *on_finish) {
+ dout(20) << dendl;
+
+ with_namespace_replayers(
+ [this](Context *on_finish) {
+ dout(10) << "handle_pre_release_leader" << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ m_leader = false;
+ m_service_daemon->remove_attribute(m_local_pool_id,
+ SERVICE_DAEMON_LEADER_KEY);
+
+ auto cct = reinterpret_cast<CephContext *>(m_local_io_ctx.cct());
+ auto gather_ctx = new C_Gather(cct, on_finish);
+
+ m_default_namespace_replayer->handle_release_leader(
+ gather_ctx->new_sub());
+
+ for (auto &it : m_namespace_replayers) {
+ it.second->handle_release_leader(gather_ctx->new_sub());
+ }
+
+ gather_ctx->activate();
+ }, on_finish);
+}
+
+template <typename I>
+void PoolReplayer<I>::handle_update_leader(
+ const std::string &leader_instance_id) {
+ dout(10) << "leader_instance_id=" << leader_instance_id << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ m_default_namespace_replayer->handle_update_leader(leader_instance_id);
+
+ for (auto &it : m_namespace_replayers) {
+ it.second->handle_update_leader(leader_instance_id);
+ }
+}
+
+template <typename I>
+void PoolReplayer<I>::handle_instances_added(
+ const std::vector<std::string> &instance_ids) {
+ dout(5) << "instance_ids=" << instance_ids << dendl;
+
+ std::lock_guard locker{m_lock};
+ if (!m_leader_watcher->is_leader()) {
+ return;
+ }
+
+ m_default_namespace_replayer->handle_instances_added(instance_ids);
+
+ for (auto &it : m_namespace_replayers) {
+ it.second->handle_instances_added(instance_ids);
+ }
+}
+
+template <typename I>
+void PoolReplayer<I>::handle_instances_removed(
+ const std::vector<std::string> &instance_ids) {
+ dout(5) << "instance_ids=" << instance_ids << dendl;
+
+ std::lock_guard locker{m_lock};
+ if (!m_leader_watcher->is_leader()) {
+ return;
+ }
+
+ m_default_namespace_replayer->handle_instances_removed(instance_ids);
+
+ for (auto &it : m_namespace_replayers) {
+ it.second->handle_instances_removed(instance_ids);
+ }
+}
+
+template <typename I>
+void PoolReplayer<I>::handle_remote_pool_meta_updated(
+ const RemotePoolMeta& remote_pool_meta) {
+ dout(5) << "remote_pool_meta=" << remote_pool_meta << dendl;
+
+ if (!m_default_namespace_replayer) {
+ m_remote_pool_meta = remote_pool_meta;
+ return;
+ }
+
+ derr << "remote pool metadata updated unexpectedly" << dendl;
+ std::unique_lock locker{m_lock};
+ m_stopping = true;
+ m_cond.notify_all();
+}
+
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::PoolReplayer<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/PoolReplayer.h b/src/tools/rbd_mirror/PoolReplayer.h
new file mode 100644
index 000000000..e0fd75377
--- /dev/null
+++ b/src/tools/rbd_mirror/PoolReplayer.h
@@ -0,0 +1,288 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_POOL_REPLAYER_H
+#define CEPH_RBD_MIRROR_POOL_REPLAYER_H
+
+#include "common/Cond.h"
+#include "common/ceph_mutex.h"
+#include "include/rados/librados.hpp"
+#include "librbd/Utils.h"
+#include "librbd/asio/ContextWQ.h"
+
+#include "tools/rbd_mirror/LeaderWatcher.h"
+#include "tools/rbd_mirror/NamespaceReplayer.h"
+#include "tools/rbd_mirror/Throttler.h"
+#include "tools/rbd_mirror/Types.h"
+#include "tools/rbd_mirror/leader_watcher/Types.h"
+#include "tools/rbd_mirror/service_daemon/Types.h"
+
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+class AdminSocketHook;
+
+namespace journal { struct CacheManagerHandler; }
+
+namespace librbd { class ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+
+template <typename> class RemotePoolPoller;
+namespace remote_pool_poller { struct Listener; }
+
+struct PoolMetaCache;
+template <typename> class ServiceDaemon;
+template <typename> struct Threads;
+
+
+/**
+ * Controls mirroring for a single remote cluster.
+ */
+template <typename ImageCtxT = librbd::ImageCtx>
+class PoolReplayer {
+public:
+ PoolReplayer(Threads<ImageCtxT> *threads,
+ ServiceDaemon<ImageCtxT> *service_daemon,
+ journal::CacheManagerHandler *cache_manager_handler,
+ PoolMetaCache* pool_meta_cache,
+ int64_t local_pool_id, const PeerSpec &peer,
+ const std::vector<const char*> &args);
+ ~PoolReplayer();
+ PoolReplayer(const PoolReplayer&) = delete;
+ PoolReplayer& operator=(const PoolReplayer&) = delete;
+
+ bool is_blocklisted() const;
+ bool is_leader() const;
+ bool is_running() const;
+
+ void init(const std::string& site_name);
+ void shut_down();
+
+ void run();
+
+ void print_status(Formatter *f);
+ void start();
+ void stop(bool manual);
+ void restart();
+ void flush();
+ void release_leader();
+ void reopen_logs();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * INIT
+ * |
+ * v
+ * <follower> <---------------------\
+ * . |
+ * . (leader acquired) |
+ * v |
+ * NOTIFY_NAMESPACE_WATCHERS NOTIFY_NAMESPACE_WATCHERS
+ * | ^
+ * v .
+ * <leader> .
+ * . .
+ * . (leader lost / shut down) .
+ * . . . . . . . . . . . . . . . .
+ *
+ * @endverbatim
+ */
+
+ struct RemotePoolPollerListener;
+
+ int init_rados(const std::string &cluster_name,
+ const std::string &client_name,
+ const std::string &mon_host,
+ const std::string &key,
+ const std::string &description, RadosRef *rados_ref,
+ bool strip_cluster_overrides);
+
+ void update_namespace_replayers();
+ int list_mirroring_namespaces(std::set<std::string> *namespaces);
+
+ void namespace_replayer_acquire_leader(const std::string &name,
+ Context *on_finish);
+
+ void handle_post_acquire_leader(Context *on_finish);
+ void handle_pre_release_leader(Context *on_finish);
+
+ void handle_update_leader(const std::string &leader_instance_id);
+
+ void handle_instances_added(const std::vector<std::string> &instance_ids);
+ void handle_instances_removed(const std::vector<std::string> &instance_ids);
+
+ // sync version, executed in the caller thread
+ template <typename L>
+ void with_namespace_replayers(L &&callback) {
+ std::lock_guard locker{m_lock};
+
+ if (m_namespace_replayers_locked) {
+ ceph_assert(m_on_namespace_replayers_unlocked == nullptr);
+ C_SaferCond cond;
+ m_on_namespace_replayers_unlocked = &cond;
+ m_lock.unlock();
+ cond.wait();
+ m_lock.lock();
+ } else {
+ m_namespace_replayers_locked = true;
+ }
+
+ ceph_assert(m_namespace_replayers_locked);
+ callback(); // may temporary release the lock
+ ceph_assert(m_namespace_replayers_locked);
+
+ if (m_on_namespace_replayers_unlocked == nullptr) {
+ m_namespace_replayers_locked = false;
+ return;
+ }
+
+ m_threads->work_queue->queue(m_on_namespace_replayers_unlocked);
+ m_on_namespace_replayers_unlocked = nullptr;
+ }
+
+ // async version
+ template <typename L>
+ void with_namespace_replayers(L &&callback, Context *on_finish) {
+ std::lock_guard locker{m_lock};
+
+ on_finish = librbd::util::create_async_context_callback(
+ m_threads->work_queue, new LambdaContext(
+ [this, on_finish](int r) {
+ {
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_namespace_replayers_locked);
+
+ m_namespace_replayers_locked = false;
+
+ if (m_on_namespace_replayers_unlocked != nullptr) {
+ m_namespace_replayers_locked = true;
+ m_threads->work_queue->queue(m_on_namespace_replayers_unlocked);
+ m_on_namespace_replayers_unlocked = nullptr;
+ }
+ }
+ on_finish->complete(r);
+ }));
+
+ auto on_lock = new LambdaContext(
+ [this, callback, on_finish](int) {
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_namespace_replayers_locked);
+
+ callback(on_finish);
+ });
+
+ if (m_namespace_replayers_locked) {
+ ceph_assert(m_on_namespace_replayers_unlocked == nullptr);
+ m_on_namespace_replayers_unlocked = on_lock;
+ return;
+ }
+
+ m_namespace_replayers_locked = true;
+ m_threads->work_queue->queue(on_lock);
+ }
+
+ void handle_remote_pool_meta_updated(const RemotePoolMeta& remote_pool_meta);
+
+ Threads<ImageCtxT> *m_threads;
+ ServiceDaemon<ImageCtxT> *m_service_daemon;
+ journal::CacheManagerHandler *m_cache_manager_handler;
+ PoolMetaCache* m_pool_meta_cache;
+ int64_t m_local_pool_id = -1;
+ PeerSpec m_peer;
+ std::vector<const char*> m_args;
+
+ mutable ceph::mutex m_lock;
+ ceph::condition_variable m_cond;
+ std::string m_site_name;
+ bool m_stopping = false;
+ bool m_manual_stop = false;
+ bool m_blocklisted = false;
+
+ RadosRef m_local_rados;
+ RadosRef m_remote_rados;
+
+ librados::IoCtx m_local_io_ctx;
+ librados::IoCtx m_remote_io_ctx;
+
+ std::string m_local_mirror_uuid;
+
+ RemotePoolMeta m_remote_pool_meta;
+ std::unique_ptr<remote_pool_poller::Listener> m_remote_pool_poller_listener;
+ std::unique_ptr<RemotePoolPoller<ImageCtxT>> m_remote_pool_poller;
+
+ std::unique_ptr<NamespaceReplayer<ImageCtxT>> m_default_namespace_replayer;
+ std::map<std::string, NamespaceReplayer<ImageCtxT> *> m_namespace_replayers;
+
+ std::string m_asok_hook_name;
+ AdminSocketHook *m_asok_hook = nullptr;
+
+ service_daemon::CalloutId m_callout_id = service_daemon::CALLOUT_ID_NONE;
+
+ bool m_leader = false;
+ bool m_namespace_replayers_locked = false;
+ Context *m_on_namespace_replayers_unlocked = nullptr;
+
+ class PoolReplayerThread : public Thread {
+ PoolReplayer *m_pool_replayer;
+ public:
+ PoolReplayerThread(PoolReplayer *pool_replayer)
+ : m_pool_replayer(pool_replayer) {
+ }
+ void *entry() override {
+ m_pool_replayer->run();
+ return 0;
+ }
+ } m_pool_replayer_thread;
+
+ class LeaderListener : public leader_watcher::Listener {
+ public:
+ LeaderListener(PoolReplayer *pool_replayer)
+ : m_pool_replayer(pool_replayer) {
+ }
+
+ protected:
+ void post_acquire_handler(Context *on_finish) override {
+ m_pool_replayer->handle_post_acquire_leader(on_finish);
+ }
+
+ void pre_release_handler(Context *on_finish) override {
+ m_pool_replayer->handle_pre_release_leader(on_finish);
+ }
+
+ void update_leader_handler(
+ const std::string &leader_instance_id) override {
+ m_pool_replayer->handle_update_leader(leader_instance_id);
+ }
+
+ void handle_instances_added(const InstanceIds& instance_ids) override {
+ m_pool_replayer->handle_instances_added(instance_ids);
+ }
+
+ void handle_instances_removed(const InstanceIds& instance_ids) override {
+ m_pool_replayer->handle_instances_removed(instance_ids);
+ }
+
+ private:
+ PoolReplayer *m_pool_replayer;
+ } m_leader_listener;
+
+ std::unique_ptr<LeaderWatcher<ImageCtxT>> m_leader_watcher;
+ std::unique_ptr<Throttler<ImageCtxT>> m_image_sync_throttler;
+ std::unique_ptr<Throttler<ImageCtxT>> m_image_deletion_throttler;
+};
+
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::PoolReplayer<librbd::ImageCtx>;
+
+#endif // CEPH_RBD_MIRROR_POOL_REPLAYER_H
diff --git a/src/tools/rbd_mirror/PoolWatcher.cc b/src/tools/rbd_mirror/PoolWatcher.cc
new file mode 100644
index 000000000..bec931cf3
--- /dev/null
+++ b/src/tools/rbd_mirror/PoolWatcher.cc
@@ -0,0 +1,473 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd_mirror/PoolWatcher.h"
+#include "include/rbd_types.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "common/Timer.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/internal.h"
+#include "librbd/MirroringWatcher.h"
+#include "librbd/Utils.h"
+#include "librbd/api/Image.h"
+#include "librbd/api/Mirror.h"
+#include "librbd/asio/ContextWQ.h"
+#include "tools/rbd_mirror/Threads.h"
+#include "tools/rbd_mirror/pool_watcher/RefreshImagesRequest.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::PoolWatcher: " << this << " " \
+ << __func__ << ": "
+
+using std::list;
+using std::string;
+using std::unique_ptr;
+using std::vector;
+using librbd::util::create_context_callback;
+using librbd::util::create_rados_callback;
+
+namespace rbd {
+namespace mirror {
+
+template <typename I>
+class PoolWatcher<I>::MirroringWatcher : public librbd::MirroringWatcher<I> {
+public:
+ using ContextWQ = typename std::decay<
+ typename std::remove_pointer<
+ decltype(Threads<I>::work_queue)>::type>::type;
+
+ MirroringWatcher(librados::IoCtx &io_ctx, ContextWQ *work_queue,
+ PoolWatcher *pool_watcher)
+ : librbd::MirroringWatcher<I>(io_ctx, work_queue),
+ m_pool_watcher(pool_watcher) {
+ }
+
+ void handle_rewatch_complete(int r) override {
+ m_pool_watcher->handle_rewatch_complete(r);
+ }
+
+ void handle_mode_updated(cls::rbd::MirrorMode mirror_mode) override {
+ // invalidate all image state and refresh the pool contents
+ m_pool_watcher->schedule_refresh_images(5);
+ }
+
+ void handle_image_updated(cls::rbd::MirrorImageState state,
+ const std::string &image_id,
+ const std::string &global_image_id) override {
+ bool enabled = (state == cls::rbd::MIRROR_IMAGE_STATE_ENABLED);
+ m_pool_watcher->handle_image_updated(image_id, global_image_id,
+ enabled);
+ }
+
+private:
+ PoolWatcher *m_pool_watcher;
+};
+
+template <typename I>
+PoolWatcher<I>::PoolWatcher(Threads<I> *threads,
+ librados::IoCtx &io_ctx,
+ const std::string& mirror_uuid,
+ pool_watcher::Listener &listener)
+ : m_threads(threads),
+ m_io_ctx(io_ctx),
+ m_mirror_uuid(mirror_uuid),
+ m_listener(listener),
+ m_lock(ceph::make_mutex(librbd::util::unique_lock_name(
+ "rbd::mirror::PoolWatcher", this))) {
+ m_mirroring_watcher = new MirroringWatcher(m_io_ctx,
+ m_threads->work_queue, this);
+}
+
+template <typename I>
+PoolWatcher<I>::~PoolWatcher() {
+ delete m_mirroring_watcher;
+}
+
+template <typename I>
+bool PoolWatcher<I>::is_blocklisted() const {
+ std::lock_guard locker{m_lock};
+ return m_blocklisted;
+}
+
+template <typename I>
+void PoolWatcher<I>::init(Context *on_finish) {
+ dout(5) << dendl;
+
+ {
+ std::lock_guard locker{m_lock};
+ m_on_init_finish = on_finish;
+
+ ceph_assert(!m_refresh_in_progress);
+ m_refresh_in_progress = true;
+ }
+
+ // start async updates for mirror image directory
+ register_watcher();
+}
+
+template <typename I>
+void PoolWatcher<I>::shut_down(Context *on_finish) {
+ dout(5) << dendl;
+
+ {
+ std::scoped_lock locker{m_threads->timer_lock, m_lock};
+
+ ceph_assert(!m_shutting_down);
+ m_shutting_down = true;
+ if (m_timer_ctx != nullptr) {
+ m_threads->timer->cancel_event(m_timer_ctx);
+ m_timer_ctx = nullptr;
+ }
+ }
+
+ // in-progress unregister tracked as async op
+ unregister_watcher();
+
+ m_async_op_tracker.wait_for_ops(on_finish);
+}
+
+template <typename I>
+void PoolWatcher<I>::register_watcher() {
+ {
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_image_ids_invalid);
+ ceph_assert(m_refresh_in_progress);
+ }
+
+ // if the watch registration is in-flight, let the watcher
+ // handle the transition -- only (re-)register if it's not registered
+ if (!m_mirroring_watcher->is_unregistered()) {
+ refresh_images();
+ return;
+ }
+
+ // first time registering or the watch failed
+ dout(5) << dendl;
+ m_async_op_tracker.start_op();
+
+ Context *ctx = create_context_callback<
+ PoolWatcher, &PoolWatcher<I>::handle_register_watcher>(this);
+ m_mirroring_watcher->register_watch(ctx);
+}
+
+template <typename I>
+void PoolWatcher<I>::handle_register_watcher(int r) {
+ dout(5) << "r=" << r << dendl;
+
+ {
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_image_ids_invalid);
+ ceph_assert(m_refresh_in_progress);
+ if (r < 0) {
+ m_refresh_in_progress = false;
+ }
+ }
+
+ Context *on_init_finish = nullptr;
+ if (r >= 0) {
+ refresh_images();
+ } else if (r == -EBLOCKLISTED) {
+ dout(0) << "detected client is blocklisted" << dendl;
+
+ std::lock_guard locker{m_lock};
+ m_blocklisted = true;
+ std::swap(on_init_finish, m_on_init_finish);
+ } else if (r == -ENOENT) {
+ dout(5) << "mirroring directory does not exist" << dendl;
+ {
+ std::lock_guard locker{m_lock};
+ std::swap(on_init_finish, m_on_init_finish);
+ }
+
+ schedule_refresh_images(30);
+ } else {
+ derr << "unexpected error registering mirroring directory watch: "
+ << cpp_strerror(r) << dendl;
+ schedule_refresh_images(10);
+ }
+
+ m_async_op_tracker.finish_op();
+ if (on_init_finish != nullptr) {
+ on_init_finish->complete(r);
+ }
+}
+
+template <typename I>
+void PoolWatcher<I>::unregister_watcher() {
+ dout(5) << dendl;
+
+ m_async_op_tracker.start_op();
+ Context *ctx = new LambdaContext([this](int r) {
+ dout(5) << "unregister_watcher: r=" << r << dendl;
+ if (r < 0) {
+ derr << "error unregistering watcher for "
+ << m_mirroring_watcher->get_oid() << " object: " << cpp_strerror(r)
+ << dendl;
+ }
+ m_async_op_tracker.finish_op();
+ });
+
+ m_mirroring_watcher->unregister_watch(ctx);
+}
+
+template <typename I>
+void PoolWatcher<I>::refresh_images() {
+ dout(5) << dendl;
+
+ {
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_image_ids_invalid);
+ ceph_assert(m_refresh_in_progress);
+
+ // clear all pending notification events since we need to perform
+ // a full image list refresh
+ m_pending_added_image_ids.clear();
+ m_pending_removed_image_ids.clear();
+ }
+
+ m_async_op_tracker.start_op();
+ m_refresh_image_ids.clear();
+ Context *ctx = create_context_callback<
+ PoolWatcher, &PoolWatcher<I>::handle_refresh_images>(this);
+ auto req = pool_watcher::RefreshImagesRequest<I>::create(m_io_ctx,
+ &m_refresh_image_ids,
+ ctx);
+ req->send();
+}
+
+template <typename I>
+void PoolWatcher<I>::handle_refresh_images(int r) {
+ dout(5) << "r=" << r << dendl;
+
+ bool deferred_refresh = false;
+ bool retry_refresh = false;
+ Context *on_init_finish = nullptr;
+ {
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_image_ids_invalid);
+ ceph_assert(m_refresh_in_progress);
+ m_refresh_in_progress = false;
+
+ if (r == -ENOENT) {
+ dout(5) << "mirroring directory not found" << dendl;
+ r = 0;
+ m_refresh_image_ids.clear();
+ }
+
+ if (m_deferred_refresh) {
+ // need to refresh -- skip the notification
+ deferred_refresh = true;
+ } else if (r >= 0) {
+ m_pending_image_ids = std::move(m_refresh_image_ids);
+ m_image_ids_invalid = false;
+ std::swap(on_init_finish, m_on_init_finish);
+
+ schedule_listener();
+ } else if (r == -EBLOCKLISTED) {
+ dout(0) << "detected client is blocklisted during image refresh" << dendl;
+
+ m_blocklisted = true;
+ std::swap(on_init_finish, m_on_init_finish);
+ } else {
+ retry_refresh = true;
+ }
+ }
+
+ if (deferred_refresh) {
+ dout(5) << "scheduling deferred refresh" << dendl;
+ schedule_refresh_images(0);
+ } else if (retry_refresh) {
+ derr << "failed to retrieve mirroring directory: " << cpp_strerror(r)
+ << dendl;
+ schedule_refresh_images(10);
+ }
+
+ m_async_op_tracker.finish_op();
+ if (on_init_finish != nullptr) {
+ on_init_finish->complete(r);
+ }
+}
+
+template <typename I>
+void PoolWatcher<I>::schedule_refresh_images(double interval) {
+ std::scoped_lock locker{m_threads->timer_lock, m_lock};
+ if (m_shutting_down || m_refresh_in_progress || m_timer_ctx != nullptr) {
+ if (m_refresh_in_progress && !m_deferred_refresh) {
+ dout(5) << "deferring refresh until in-flight refresh completes" << dendl;
+ m_deferred_refresh = true;
+ }
+ return;
+ }
+
+ m_image_ids_invalid = true;
+ m_timer_ctx = m_threads->timer->add_event_after(
+ interval,
+ new LambdaContext([this](int r) {
+ process_refresh_images();
+ }));
+}
+
+template <typename I>
+void PoolWatcher<I>::handle_rewatch_complete(int r) {
+ dout(5) << "r=" << r << dendl;
+
+ if (r == -EBLOCKLISTED) {
+ dout(0) << "detected client is blocklisted" << dendl;
+
+ std::lock_guard locker{m_lock};
+ m_blocklisted = true;
+ return;
+ } else if (r == -ENOENT) {
+ dout(5) << "mirroring directory deleted" << dendl;
+ } else if (r < 0) {
+ derr << "unexpected error re-registering mirroring directory watch: "
+ << cpp_strerror(r) << dendl;
+ }
+
+ schedule_refresh_images(5);
+}
+
+template <typename I>
+void PoolWatcher<I>::handle_image_updated(const std::string &id,
+ const std::string &global_image_id,
+ bool enabled) {
+ dout(10) << "image_id=" << id << ", "
+ << "global_image_id=" << global_image_id << ", "
+ << "enabled=" << enabled << dendl;
+
+ std::lock_guard locker{m_lock};
+ ImageId image_id(global_image_id, id);
+ m_pending_added_image_ids.erase(image_id);
+ m_pending_removed_image_ids.erase(image_id);
+
+ if (enabled) {
+ m_pending_added_image_ids.insert(image_id);
+ schedule_listener();
+ } else {
+ m_pending_removed_image_ids.insert(image_id);
+ schedule_listener();
+ }
+}
+
+template <typename I>
+void PoolWatcher<I>::process_refresh_images() {
+ ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock));
+ ceph_assert(m_timer_ctx != nullptr);
+ m_timer_ctx = nullptr;
+
+ {
+ std::lock_guard locker{m_lock};
+ ceph_assert(!m_refresh_in_progress);
+ m_refresh_in_progress = true;
+ m_deferred_refresh = false;
+ }
+
+ // execute outside of the timer's lock
+ m_async_op_tracker.start_op();
+ Context *ctx = new LambdaContext([this](int r) {
+ register_watcher();
+ m_async_op_tracker.finish_op();
+ });
+ m_threads->work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+void PoolWatcher<I>::schedule_listener() {
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+ m_pending_updates = true;
+ if (m_shutting_down || m_image_ids_invalid || m_notify_listener_in_progress) {
+ return;
+ }
+
+ dout(20) << dendl;
+
+ m_async_op_tracker.start_op();
+ Context *ctx = new LambdaContext([this](int r) {
+ notify_listener();
+ m_async_op_tracker.finish_op();
+ });
+
+ m_notify_listener_in_progress = true;
+ m_threads->work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+void PoolWatcher<I>::notify_listener() {
+ dout(10) << dendl;
+
+ std::string mirror_uuid;
+ ImageIds added_image_ids;
+ ImageIds removed_image_ids;
+ {
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_notify_listener_in_progress);
+ }
+
+ if (!removed_image_ids.empty()) {
+ m_listener.handle_update(mirror_uuid, {}, std::move(removed_image_ids));
+ removed_image_ids.clear();
+ }
+
+ {
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_notify_listener_in_progress);
+
+ // if the watch failed while we didn't own the lock, we are going
+ // to need to perform a full refresh
+ if (m_image_ids_invalid) {
+ m_notify_listener_in_progress = false;
+ return;
+ }
+
+ // merge add/remove notifications into pending set (a given image
+ // can only be in one set or another)
+ for (auto &image_id : m_pending_removed_image_ids) {
+ dout(20) << "image_id=" << image_id << dendl;
+ m_pending_image_ids.erase(image_id);
+ }
+
+ for (auto &image_id : m_pending_added_image_ids) {
+ dout(20) << "image_id=" << image_id << dendl;
+ m_pending_image_ids.erase(image_id);
+ m_pending_image_ids.insert(image_id);
+ }
+ m_pending_added_image_ids.clear();
+
+ // compute added/removed images
+ for (auto &image_id : m_image_ids) {
+ auto it = m_pending_image_ids.find(image_id);
+ if (it == m_pending_image_ids.end() || it->id != image_id.id) {
+ removed_image_ids.insert(image_id);
+ }
+ }
+ for (auto &image_id : m_pending_image_ids) {
+ auto it = m_image_ids.find(image_id);
+ if (it == m_image_ids.end() || it->id != image_id.id) {
+ added_image_ids.insert(image_id);
+ }
+ }
+
+ m_pending_updates = false;
+ m_image_ids = m_pending_image_ids;
+ }
+
+ m_listener.handle_update(m_mirror_uuid, std::move(added_image_ids),
+ std::move(removed_image_ids));
+
+ {
+ std::lock_guard locker{m_lock};
+ m_notify_listener_in_progress = false;
+ if (m_pending_updates) {
+ schedule_listener();
+ }
+ }
+}
+
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::PoolWatcher<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/PoolWatcher.h b/src/tools/rbd_mirror/PoolWatcher.h
new file mode 100644
index 000000000..2905de15f
--- /dev/null
+++ b/src/tools/rbd_mirror/PoolWatcher.h
@@ -0,0 +1,161 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_POOL_WATCHER_H
+#define CEPH_RBD_MIRROR_POOL_WATCHER_H
+
+#include <map>
+#include <memory>
+#include <set>
+#include <string>
+
+#include "common/AsyncOpTracker.h"
+#include "common/ceph_context.h"
+#include "common/ceph_mutex.h"
+#include "include/rados/librados.hpp"
+#include "tools/rbd_mirror/Types.h"
+#include <boost/functional/hash.hpp>
+#include <boost/optional.hpp>
+#include "include/ceph_assert.h"
+#include "tools/rbd_mirror/pool_watcher/Types.h"
+
+namespace librbd { struct ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+
+template <typename> struct Threads;
+
+/**
+ * Keeps track of images that have mirroring enabled within all
+ * pools.
+ */
+template <typename ImageCtxT = librbd::ImageCtx>
+class PoolWatcher {
+public:
+ static PoolWatcher* create(Threads<ImageCtxT> *threads,
+ librados::IoCtx &io_ctx,
+ const std::string& mirror_uuid,
+ pool_watcher::Listener &listener) {
+ return new PoolWatcher(threads, io_ctx, mirror_uuid, listener);
+ }
+
+ PoolWatcher(Threads<ImageCtxT> *threads,
+ librados::IoCtx &io_ctx,
+ const std::string& mirror_uuid,
+ pool_watcher::Listener &listener);
+ ~PoolWatcher();
+ PoolWatcher(const PoolWatcher&) = delete;
+ PoolWatcher& operator=(const PoolWatcher&) = delete;
+
+ bool is_blocklisted() const;
+
+ void init(Context *on_finish = nullptr);
+ void shut_down(Context *on_finish);
+
+ inline uint64_t get_image_count() const {
+ std::lock_guard locker{m_lock};
+ return m_image_ids.size();
+ }
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * INIT
+ * |
+ * v
+ * REGISTER_WATCHER
+ * |
+ * |/--------------------------------\
+ * | |
+ * v |
+ * REFRESH_IMAGES |
+ * | |
+ * |/----------------------------\ |
+ * | | |
+ * v | |
+ * NOTIFY_LISTENER | |
+ * | | |
+ * v | |
+ * IDLE ---\ | |
+ * | | | |
+ * | |\---> IMAGE_UPDATED | |
+ * | | | | |
+ * | | v | |
+ * | | GET_IMAGE_NAME --/ |
+ * | | |
+ * | \----> WATCH_ERROR ---------/
+ * v
+ * SHUT_DOWN
+ * |
+ * v
+ * UNREGISTER_WATCHER
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+ class MirroringWatcher;
+
+ Threads<ImageCtxT> *m_threads;
+ librados::IoCtx m_io_ctx;
+ std::string m_mirror_uuid;
+ pool_watcher::Listener &m_listener;
+
+ ImageIds m_refresh_image_ids;
+ bufferlist m_out_bl;
+
+ mutable ceph::mutex m_lock;
+
+ Context *m_on_init_finish = nullptr;
+
+ ImageIds m_image_ids;
+
+ bool m_pending_updates = false;
+ bool m_notify_listener_in_progress = false;
+ ImageIds m_pending_image_ids;
+ ImageIds m_pending_added_image_ids;
+ ImageIds m_pending_removed_image_ids;
+
+ MirroringWatcher *m_mirroring_watcher;
+
+ Context *m_timer_ctx = nullptr;
+
+ AsyncOpTracker m_async_op_tracker;
+ bool m_blocklisted = false;
+ bool m_shutting_down = false;
+ bool m_image_ids_invalid = true;
+ bool m_refresh_in_progress = false;
+ bool m_deferred_refresh = false;
+
+ void register_watcher();
+ void handle_register_watcher(int r);
+ void unregister_watcher();
+
+ void refresh_images();
+ void handle_refresh_images(int r);
+
+ void schedule_refresh_images(double interval);
+ void process_refresh_images();
+
+ void handle_rewatch_complete(int r);
+ void handle_image_updated(const std::string &image_id,
+ const std::string &global_image_id,
+ bool enabled);
+
+ void schedule_listener();
+ void notify_listener();
+
+};
+
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::PoolWatcher<librbd::ImageCtx>;
+
+#endif // CEPH_RBD_MIRROR_POOL_WATCHER_H
diff --git a/src/tools/rbd_mirror/ProgressContext.h b/src/tools/rbd_mirror/ProgressContext.h
new file mode 100644
index 000000000..e4430ee6a
--- /dev/null
+++ b/src/tools/rbd_mirror/ProgressContext.h
@@ -0,0 +1,21 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_PROGRESS_CONTEXT_H
+#define RBD_MIRROR_PROGRESS_CONTEXT_H
+
+namespace rbd {
+namespace mirror {
+
+class ProgressContext
+{
+public:
+ virtual ~ProgressContext() {}
+ virtual void update_progress(const std::string &description,
+ bool flush = true) = 0;
+};
+
+} // namespace mirror
+} // namespace rbd
+
+#endif // RBD_MIRROR_PROGRESS_CONTEXT_H
diff --git a/src/tools/rbd_mirror/RemotePoolPoller.cc b/src/tools/rbd_mirror/RemotePoolPoller.cc
new file mode 100644
index 000000000..8bfb35d4a
--- /dev/null
+++ b/src/tools/rbd_mirror/RemotePoolPoller.cc
@@ -0,0 +1,267 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "RemotePoolPoller.h"
+#include "include/ceph_assert.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "common/Timer.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/Utils.h"
+#include "librbd/asio/ContextWQ.h"
+#include "tools/rbd_mirror/Threads.h"
+#include "tools/rbd_mirror/Types.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::RemotePoolPoller: " << this << " " \
+ << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+
+static const double POLL_INTERVAL_SECONDS = 30;
+
+using librbd::util::create_rados_callback;
+
+template <typename I>
+RemotePoolPoller<I>::~RemotePoolPoller() {
+ ceph_assert(m_timer_task == nullptr);
+}
+
+template <typename I>
+void RemotePoolPoller<I>::init(Context* on_finish) {
+ dout(10) << dendl;
+
+ ceph_assert(m_state == STATE_INITIALIZING);
+ ceph_assert(m_on_finish == nullptr);
+ m_on_finish = on_finish;
+
+ get_mirror_uuid();
+}
+
+template <typename I>
+void RemotePoolPoller<I>::shut_down(Context* on_finish) {
+ dout(10) << dendl;
+
+ std::unique_lock locker(m_threads->timer_lock);
+ ceph_assert(m_state == STATE_POLLING);
+ m_state = STATE_SHUTTING_DOWN;
+
+ if (m_timer_task == nullptr) {
+ // currently executing a poll
+ ceph_assert(m_on_finish == nullptr);
+ m_on_finish = on_finish;
+ return;
+ }
+
+ m_threads->timer->cancel_event(m_timer_task);
+ m_timer_task = nullptr;
+ m_threads->work_queue->queue(on_finish, 0);
+}
+
+template <typename I>
+void RemotePoolPoller<I>::get_mirror_uuid() {
+ dout(10) << dendl;
+
+ librados::ObjectReadOperation op;
+ librbd::cls_client::mirror_uuid_get_start(&op);
+
+ auto aio_comp = create_rados_callback<
+ RemotePoolPoller<I>, &RemotePoolPoller<I>::handle_get_mirror_uuid>(this);
+ m_out_bl.clear();
+ int r = m_remote_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op, &m_out_bl);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void RemotePoolPoller<I>::handle_get_mirror_uuid(int r) {
+ dout(10) << "r=" << r << dendl;
+ std::string remote_mirror_uuid;
+ if (r >= 0) {
+ auto it = m_out_bl.cbegin();
+ r = librbd::cls_client::mirror_uuid_get_finish(&it, &remote_mirror_uuid);
+ if (r >= 0 && remote_mirror_uuid.empty()) {
+ r = -ENOENT;
+ }
+ }
+
+ if (r < 0) {
+ if (r == -ENOENT) {
+ dout(5) << "remote mirror uuid missing" << dendl;
+ } else {
+ derr << "failed to retrieve remote mirror uuid: " << cpp_strerror(r)
+ << dendl;
+ }
+
+ m_remote_pool_meta.mirror_uuid = "";
+ }
+
+ // if we have the mirror uuid, we will poll until shut down
+ if (m_state == STATE_INITIALIZING) {
+ if (r < 0) {
+ schedule_task(r);
+ return;
+ }
+
+ m_state = STATE_POLLING;
+ }
+
+ dout(10) << "remote_mirror_uuid=" << remote_mirror_uuid << dendl;
+ if (m_remote_pool_meta.mirror_uuid != remote_mirror_uuid) {
+ m_remote_pool_meta.mirror_uuid = remote_mirror_uuid;
+ m_updated = true;
+ }
+
+ mirror_peer_ping();
+}
+
+template <typename I>
+void RemotePoolPoller<I>::mirror_peer_ping() {
+ dout(10) << dendl;
+
+ librados::ObjectWriteOperation op;
+ librbd::cls_client::mirror_peer_ping(&op, m_site_name, m_local_mirror_uuid);
+
+ auto aio_comp = create_rados_callback<
+ RemotePoolPoller<I>, &RemotePoolPoller<I>::handle_mirror_peer_ping>(this);
+ int r = m_remote_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void RemotePoolPoller<I>::handle_mirror_peer_ping(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r == -EOPNOTSUPP) {
+ // older OSD that doesn't support snaphot-based mirroring, so no need
+ // to query remote peers
+ dout(10) << "remote peer does not support snapshot-based mirroring"
+ << dendl;
+ notify_listener();
+ return;
+ } else if (r < 0) {
+ // we can still see if we can perform a peer list and find outselves
+ derr << "failed to ping remote mirror peer: " << cpp_strerror(r) << dendl;
+ }
+
+ mirror_peer_list();
+}
+
+template <typename I>
+void RemotePoolPoller<I>::mirror_peer_list() {
+ dout(10) << dendl;
+
+ librados::ObjectReadOperation op;
+ librbd::cls_client::mirror_peer_list_start(&op);
+
+ auto aio_comp = create_rados_callback<
+ RemotePoolPoller<I>, &RemotePoolPoller<I>::handle_mirror_peer_list>(this);
+ m_out_bl.clear();
+ int r = m_remote_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op, &m_out_bl);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void RemotePoolPoller<I>::handle_mirror_peer_list(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ std::vector<cls::rbd::MirrorPeer> peers;
+ if (r == 0) {
+ auto iter = m_out_bl.cbegin();
+ r = librbd::cls_client::mirror_peer_list_finish(&iter, &peers);
+ }
+
+ if (r < 0) {
+ derr << "failed to retrieve mirror peers: " << cpp_strerror(r) << dendl;
+ }
+
+ cls::rbd::MirrorPeer* matched_peer = nullptr;
+ for (auto& peer : peers) {
+ if (peer.mirror_peer_direction == cls::rbd::MIRROR_PEER_DIRECTION_RX) {
+ continue;
+ }
+
+ if (peer.mirror_uuid == m_local_mirror_uuid) {
+ matched_peer = &peer;
+ break;
+ } else if (peer.site_name == m_site_name) {
+ // keep searching in case we hit an exact match by fsid
+ matched_peer = &peer;
+ }
+ }
+
+ // older OSDs don't support peer ping so we might fail to find a match,
+ // which will prevent snapshot mirroring from functioning
+ std::string remote_mirror_peer_uuid;
+ if (matched_peer != nullptr) {
+ remote_mirror_peer_uuid = matched_peer->uuid;
+ }
+
+ dout(10) << "remote_mirror_peer_uuid=" << remote_mirror_peer_uuid << dendl;
+ if (m_remote_pool_meta.mirror_peer_uuid != remote_mirror_peer_uuid) {
+ m_remote_pool_meta.mirror_peer_uuid = remote_mirror_peer_uuid;
+ m_updated = true;
+ }
+
+ notify_listener();
+}
+
+template <typename I>
+void RemotePoolPoller<I>::notify_listener() {
+ bool updated = false;
+ std::swap(updated, m_updated);
+ if (updated) {
+ dout(10) << dendl;
+ m_listener.handle_updated(m_remote_pool_meta);
+ }
+
+ schedule_task(0);
+}
+
+template <typename I>
+void RemotePoolPoller<I>::schedule_task(int r) {
+ std::unique_lock locker{m_threads->timer_lock};
+
+ if (m_state == STATE_POLLING) {
+ dout(10) << dendl;
+
+ ceph_assert(m_timer_task == nullptr);
+ m_timer_task = new LambdaContext([this](int) {
+ handle_task();
+ });
+
+ m_threads->timer->add_event_after(POLL_INTERVAL_SECONDS, m_timer_task);
+ }
+
+ // finish init or shut down callback
+ if (m_on_finish != nullptr) {
+ locker.unlock();
+ Context* on_finish = nullptr;
+ std::swap(on_finish, m_on_finish);
+ on_finish->complete(m_state == STATE_SHUTTING_DOWN ? 0 : r);
+ }
+}
+
+template <typename I>
+void RemotePoolPoller<I>::handle_task() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked_by_me(m_threads->timer_lock));
+ m_timer_task = nullptr;
+
+ auto ctx = new LambdaContext([this](int) {
+ get_mirror_uuid();
+ });
+ m_threads->work_queue->queue(ctx);
+}
+
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::RemotePoolPoller<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/RemotePoolPoller.h b/src/tools/rbd_mirror/RemotePoolPoller.h
new file mode 100644
index 000000000..19d803ca1
--- /dev/null
+++ b/src/tools/rbd_mirror/RemotePoolPoller.h
@@ -0,0 +1,133 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_REMOTE_POOL_POLLER_H
+#define CEPH_RBD_MIRROR_REMOTE_POOL_POLLER_H
+
+#include "include/rados/librados.hpp"
+#include "tools/rbd_mirror/Types.h"
+#include <string>
+
+struct Context;
+namespace librbd { struct ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+
+template <typename> struct Threads;
+
+namespace remote_pool_poller {
+
+struct Listener {
+ virtual ~Listener() {}
+
+ virtual void handle_updated(const RemotePoolMeta& remote_pool_meta) = 0;
+};
+
+}; // namespace remote_pool_poller
+
+template <typename ImageCtxT>
+class RemotePoolPoller {
+public:
+ static RemotePoolPoller* create(
+ Threads<ImageCtxT>* threads,
+ librados::IoCtx& remote_io_ctx,
+ const std::string& site_name,
+ const std::string& local_mirror_uuid,
+ remote_pool_poller::Listener& listener) {
+ return new RemotePoolPoller(threads, remote_io_ctx, site_name,
+ local_mirror_uuid, listener);
+ }
+
+ RemotePoolPoller(
+ Threads<ImageCtxT>* threads,
+ librados::IoCtx& remote_io_ctx,
+ const std::string& site_name,
+ const std::string& local_mirror_uuid,
+ remote_pool_poller::Listener& listener)
+ : m_threads(threads),
+ m_remote_io_ctx(remote_io_ctx),
+ m_site_name(site_name),
+ m_local_mirror_uuid(local_mirror_uuid),
+ m_listener(listener) {
+ }
+ ~RemotePoolPoller();
+
+ void init(Context* on_finish);
+ void shut_down(Context* on_finish);
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * |/----------------------------\
+ * | |
+ * v |
+ * MIRROR_UUID_GET |
+ * | |
+ * v |
+ * MIRROR_PEER_PING |
+ * | |
+ * v |
+ * MIRROR_PEER_LIST |
+ * | |
+ * v |
+ * MIRROR_UUID_GET |
+ * | |
+ * v (skip if no changes) |
+ * NOTIFY_LISTENER |
+ * | |
+ * | (repeat periodically) |
+ * |\----------------------------/
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+
+ enum State {
+ STATE_INITIALIZING,
+ STATE_POLLING,
+ STATE_SHUTTING_DOWN
+ };
+
+ Threads<ImageCtxT>* m_threads;
+ librados::IoCtx& m_remote_io_ctx;
+ std::string m_site_name;
+ std::string m_local_mirror_uuid;
+ remote_pool_poller::Listener& m_listener;
+
+ bufferlist m_out_bl;
+
+ RemotePoolMeta m_remote_pool_meta;
+ bool m_updated = false;
+
+ State m_state = STATE_INITIALIZING;
+ Context* m_timer_task = nullptr;
+ Context* m_on_finish = nullptr;
+
+ void get_mirror_uuid();
+ void handle_get_mirror_uuid(int r);
+
+ void mirror_peer_ping();
+ void handle_mirror_peer_ping(int r);
+
+ void mirror_peer_list();
+ void handle_mirror_peer_list(int r);
+
+ void notify_listener();
+
+ void schedule_task(int r);
+ void handle_task();
+
+};
+
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::RemotePoolPoller<librbd::ImageCtx>;
+
+#endif // CEPH_RBD_MIRROR_REMOTE_POOL_POLLER_H
diff --git a/src/tools/rbd_mirror/ServiceDaemon.cc b/src/tools/rbd_mirror/ServiceDaemon.cc
new file mode 100644
index 000000000..f3cabcc87
--- /dev/null
+++ b/src/tools/rbd_mirror/ServiceDaemon.cc
@@ -0,0 +1,327 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd_mirror/ServiceDaemon.h"
+#include "include/Context.h"
+#include "include/stringify.h"
+#include "common/ceph_context.h"
+#include "common/config.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "common/Formatter.h"
+#include "common/Timer.h"
+#include "tools/rbd_mirror/Threads.h"
+#include <sstream>
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::ServiceDaemon: " << this << " " \
+ << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+
+namespace {
+
+const std::string RBD_MIRROR_AUTH_ID_PREFIX("rbd-mirror.");
+
+struct AttributeDumpVisitor : public boost::static_visitor<void> {
+ ceph::Formatter *f;
+ const std::string& name;
+
+ AttributeDumpVisitor(ceph::Formatter *f, const std::string& name)
+ : f(f), name(name) {
+ }
+
+ void operator()(bool val) const {
+ f->dump_bool(name.c_str(), val);
+ }
+ void operator()(uint64_t val) const {
+ f->dump_unsigned(name.c_str(), val);
+ }
+ void operator()(const std::string& val) const {
+ f->dump_string(name.c_str(), val);
+ }
+};
+
+} // anonymous namespace
+
+using namespace service_daemon;
+
+template <typename I>
+ServiceDaemon<I>::ServiceDaemon(CephContext *cct, RadosRef rados,
+ Threads<I>* threads)
+ : m_cct(cct), m_rados(rados), m_threads(threads) {
+ dout(20) << dendl;
+}
+
+template <typename I>
+ServiceDaemon<I>::~ServiceDaemon() {
+ dout(20) << dendl;
+ std::lock_guard timer_locker{m_threads->timer_lock};
+ if (m_timer_ctx != nullptr) {
+ m_threads->timer->cancel_event(m_timer_ctx);
+ update_status();
+ }
+}
+
+template <typename I>
+int ServiceDaemon<I>::init() {
+ dout(20) << dendl;
+
+ std::string id = m_cct->_conf->name.get_id();
+ if (id.find(RBD_MIRROR_AUTH_ID_PREFIX) == 0) {
+ id = id.substr(RBD_MIRROR_AUTH_ID_PREFIX.size());
+ }
+
+ std::string instance_id = stringify(m_rados->get_instance_id());
+ std::map<std::string, std::string> service_metadata = {
+ {"id", id}, {"instance_id", instance_id}};
+ int r = m_rados->service_daemon_register("rbd-mirror", instance_id,
+ service_metadata);
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+void ServiceDaemon<I>::add_pool(int64_t pool_id, const std::string& pool_name) {
+ dout(20) << "pool_id=" << pool_id << ", pool_name=" << pool_name << dendl;
+
+ {
+ std::lock_guard locker{m_lock};
+ m_pools.insert({pool_id, {pool_name}});
+ }
+ schedule_update_status();
+}
+
+template <typename I>
+void ServiceDaemon<I>::remove_pool(int64_t pool_id) {
+ dout(20) << "pool_id=" << pool_id << dendl;
+ {
+ std::lock_guard locker{m_lock};
+ m_pools.erase(pool_id);
+ }
+ schedule_update_status();
+}
+
+template <typename I>
+void ServiceDaemon<I>::add_namespace(int64_t pool_id,
+ const std::string& namespace_name) {
+ dout(20) << "pool_id=" << pool_id << ", namespace=" << namespace_name
+ << dendl;
+
+ std::lock_guard locker{m_lock};
+ auto pool_it = m_pools.find(pool_id);
+ if (pool_it == m_pools.end()) {
+ return;
+ }
+ pool_it->second.ns_attributes[namespace_name];
+
+ // don't schedule update status as the namespace attributes are empty yet
+}
+
+template <typename I>
+void ServiceDaemon<I>::remove_namespace(int64_t pool_id,
+ const std::string& namespace_name) {
+ dout(20) << "pool_id=" << pool_id << ", namespace=" << namespace_name
+ << dendl;
+ {
+ std::lock_guard locker{m_lock};
+ auto pool_it = m_pools.find(pool_id);
+ if (pool_it == m_pools.end()) {
+ return;
+ }
+ pool_it->second.ns_attributes.erase(namespace_name);
+ }
+ schedule_update_status();
+}
+
+template <typename I>
+uint64_t ServiceDaemon<I>::add_or_update_callout(int64_t pool_id,
+ uint64_t callout_id,
+ CalloutLevel callout_level,
+ const std::string& text) {
+ dout(20) << "pool_id=" << pool_id << ", "
+ << "callout_id=" << callout_id << ", "
+ << "callout_level=" << callout_level << ", "
+ << "text=" << text << dendl;
+
+ {
+ std::lock_guard locker{m_lock};
+ auto pool_it = m_pools.find(pool_id);
+ if (pool_it == m_pools.end()) {
+ return CALLOUT_ID_NONE;
+ }
+
+ if (callout_id == CALLOUT_ID_NONE) {
+ callout_id = ++m_callout_id;
+ }
+ pool_it->second.callouts[callout_id] = {callout_level, text};
+ }
+
+ schedule_update_status();
+ return callout_id;
+}
+
+template <typename I>
+void ServiceDaemon<I>::remove_callout(int64_t pool_id, uint64_t callout_id) {
+ dout(20) << "pool_id=" << pool_id << ", "
+ << "callout_id=" << callout_id << dendl;
+
+ {
+ std::lock_guard locker{m_lock};
+ auto pool_it = m_pools.find(pool_id);
+ if (pool_it == m_pools.end()) {
+ return;
+ }
+ pool_it->second.callouts.erase(callout_id);
+ }
+
+ schedule_update_status();
+}
+
+template <typename I>
+void ServiceDaemon<I>::add_or_update_attribute(int64_t pool_id,
+ const std::string& key,
+ const AttributeValue& value) {
+ dout(20) << "pool_id=" << pool_id << ", "
+ << "key=" << key << ", "
+ << "value=" << value << dendl;
+
+ {
+ std::lock_guard locker{m_lock};
+ auto pool_it = m_pools.find(pool_id);
+ if (pool_it == m_pools.end()) {
+ return;
+ }
+ pool_it->second.attributes[key] = value;
+ }
+
+ schedule_update_status();
+}
+
+template <typename I>
+void ServiceDaemon<I>::add_or_update_namespace_attribute(
+ int64_t pool_id, const std::string& namespace_name, const std::string& key,
+ const AttributeValue& value) {
+ if (namespace_name.empty()) {
+ add_or_update_attribute(pool_id, key, value);
+ return;
+ }
+
+ dout(20) << "pool_id=" << pool_id << ", "
+ << "namespace=" << namespace_name << ", "
+ << "key=" << key << ", "
+ << "value=" << value << dendl;
+
+ {
+ std::lock_guard locker{m_lock};
+ auto pool_it = m_pools.find(pool_id);
+ if (pool_it == m_pools.end()) {
+ return;
+ }
+
+ auto ns_it = pool_it->second.ns_attributes.find(namespace_name);
+ if (ns_it == pool_it->second.ns_attributes.end()) {
+ return;
+ }
+
+ ns_it->second[key] = value;
+ }
+
+ schedule_update_status();
+}
+
+template <typename I>
+void ServiceDaemon<I>::remove_attribute(int64_t pool_id,
+ const std::string& key) {
+ dout(20) << "pool_id=" << pool_id << ", "
+ << "key=" << key << dendl;
+
+ {
+ std::lock_guard locker{m_lock};
+ auto pool_it = m_pools.find(pool_id);
+ if (pool_it == m_pools.end()) {
+ return;
+ }
+ pool_it->second.attributes.erase(key);
+ }
+
+ schedule_update_status();
+}
+
+template <typename I>
+void ServiceDaemon<I>::schedule_update_status() {
+ std::lock_guard timer_locker{m_threads->timer_lock};
+ if (m_timer_ctx != nullptr) {
+ return;
+ }
+
+ m_timer_ctx = new LambdaContext([this](int) {
+ m_timer_ctx = nullptr;
+ update_status();
+ });
+ m_threads->timer->add_event_after(1, m_timer_ctx);
+}
+
+template <typename I>
+void ServiceDaemon<I>::update_status() {
+ dout(20) << dendl;
+ ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock));
+
+ ceph::JSONFormatter f;
+ {
+ std::lock_guard locker{m_lock};
+ f.open_object_section("pools");
+ for (auto& pool_pair : m_pools) {
+ f.open_object_section(stringify(pool_pair.first).c_str());
+ f.dump_string("name", pool_pair.second.name);
+ f.open_object_section("callouts");
+ for (auto& callout : pool_pair.second.callouts) {
+ f.open_object_section(stringify(callout.first).c_str());
+ f.dump_string("level", stringify(callout.second.level).c_str());
+ f.dump_string("text", callout.second.text.c_str());
+ f.close_section();
+ }
+ f.close_section(); // callouts
+
+ for (auto& attribute : pool_pair.second.attributes) {
+ AttributeDumpVisitor attribute_dump_visitor(&f, attribute.first);
+ boost::apply_visitor(attribute_dump_visitor, attribute.second);
+ }
+
+ if (!pool_pair.second.ns_attributes.empty()) {
+ f.open_object_section("namespaces");
+ for (auto& [ns, attributes] : pool_pair.second.ns_attributes) {
+ f.open_object_section(ns.c_str());
+ for (auto& [key, value] : attributes) {
+ AttributeDumpVisitor attribute_dump_visitor(&f, key);
+ boost::apply_visitor(attribute_dump_visitor, value);
+ }
+ f.close_section(); // namespace
+ }
+ f.close_section(); // namespaces
+ }
+ f.close_section(); // pool
+ }
+ f.close_section(); // pools
+ }
+
+ std::stringstream ss;
+ f.flush(ss);
+
+ int r = m_rados->service_daemon_update_status({{"json", ss.str()}});
+ if (r < 0) {
+ derr << "failed to update service daemon status: " << cpp_strerror(r)
+ << dendl;
+ }
+}
+
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::ServiceDaemon<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/ServiceDaemon.h b/src/tools/rbd_mirror/ServiceDaemon.h
new file mode 100644
index 000000000..8b1e0f584
--- /dev/null
+++ b/src/tools/rbd_mirror/ServiceDaemon.h
@@ -0,0 +1,94 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_SERVICE_DAEMON_H
+#define CEPH_RBD_MIRROR_SERVICE_DAEMON_H
+
+#include "common/ceph_mutex.h"
+#include "include/common_fwd.h"
+#include "tools/rbd_mirror/Types.h"
+#include "tools/rbd_mirror/service_daemon/Types.h"
+#include <map>
+#include <string>
+
+struct Context;
+namespace librbd { struct ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+
+template <typename> struct Threads;
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class ServiceDaemon {
+public:
+ ServiceDaemon(CephContext *cct, RadosRef rados, Threads<ImageCtxT>* threads);
+ ~ServiceDaemon();
+
+ int init();
+
+ void add_pool(int64_t pool_id, const std::string& pool_name);
+ void remove_pool(int64_t pool_id);
+
+ void add_namespace(int64_t pool_id, const std::string& namespace_name);
+ void remove_namespace(int64_t pool_id, const std::string& namespace_name);
+
+ uint64_t add_or_update_callout(int64_t pool_id, uint64_t callout_id,
+ service_daemon::CalloutLevel callout_level,
+ const std::string& text);
+ void remove_callout(int64_t pool_id, uint64_t callout_id);
+
+ void add_or_update_attribute(int64_t pool_id, const std::string& key,
+ const service_daemon::AttributeValue& value);
+ void add_or_update_namespace_attribute(
+ int64_t pool_id, const std::string& namespace_name,
+ const std::string& key, const service_daemon::AttributeValue& value);
+ void remove_attribute(int64_t pool_id, const std::string& key);
+
+private:
+ struct Callout {
+ service_daemon::CalloutLevel level;
+ std::string text;
+
+ Callout() : level(service_daemon::CALLOUT_LEVEL_INFO) {
+ }
+ Callout(service_daemon::CalloutLevel level, const std::string& text)
+ : level(level), text(text) {
+ }
+ };
+ typedef std::map<uint64_t, Callout> Callouts;
+ typedef std::map<std::string, service_daemon::AttributeValue> Attributes;
+ typedef std::map<std::string, Attributes> NamespaceAttributes;
+
+ struct Pool {
+ std::string name;
+ Callouts callouts;
+ Attributes attributes;
+ NamespaceAttributes ns_attributes;
+
+ Pool(const std::string& name) : name(name) {
+ }
+ };
+
+ typedef std::map<int64_t, Pool> Pools;
+
+ CephContext *m_cct;
+ RadosRef m_rados;
+ Threads<ImageCtxT>* m_threads;
+
+ ceph::mutex m_lock = ceph::make_mutex("rbd::mirror::ServiceDaemon");
+ Pools m_pools;
+ uint64_t m_callout_id = service_daemon::CALLOUT_ID_NONE;
+
+ Context* m_timer_ctx = nullptr;
+
+ void schedule_update_status();
+ void update_status();
+};
+
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::ServiceDaemon<librbd::ImageCtx>;
+
+#endif // CEPH_RBD_MIRROR_SERVICE_DAEMON_H
diff --git a/src/tools/rbd_mirror/Threads.cc b/src/tools/rbd_mirror/Threads.cc
new file mode 100644
index 000000000..b0c762641
--- /dev/null
+++ b/src/tools/rbd_mirror/Threads.cc
@@ -0,0 +1,38 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd_mirror/Threads.h"
+#include "common/Timer.h"
+#include "librbd/AsioEngine.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/asio/ContextWQ.h"
+
+namespace rbd {
+namespace mirror {
+
+template <typename I>
+Threads<I>::Threads(std::shared_ptr<librados::Rados>& rados) {
+ auto cct = static_cast<CephContext*>(rados->cct());
+ asio_engine = new librbd::AsioEngine(rados);
+ work_queue = asio_engine->get_work_queue();
+
+ timer = new SafeTimer(cct, timer_lock, true);
+ timer->init();
+}
+
+template <typename I>
+Threads<I>::~Threads() {
+ {
+ std::lock_guard timer_locker{timer_lock};
+ timer->shutdown();
+ }
+ delete timer;
+
+ work_queue->drain();
+ delete asio_engine;
+}
+
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::Threads<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/Threads.h b/src/tools/rbd_mirror/Threads.h
new file mode 100644
index 000000000..35c0b0f1c
--- /dev/null
+++ b/src/tools/rbd_mirror/Threads.h
@@ -0,0 +1,45 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_THREADS_H
+#define CEPH_RBD_MIRROR_THREADS_H
+
+#include "include/common_fwd.h"
+#include "include/rados/librados_fwd.hpp"
+#include "common/ceph_mutex.h"
+#include "common/Timer.h"
+#include <memory>
+
+class ThreadPool;
+
+namespace librbd {
+struct AsioEngine;
+struct ImageCtx;
+namespace asio { struct ContextWQ; }
+} // namespace librbd
+
+namespace rbd {
+namespace mirror {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class Threads {
+public:
+ librbd::AsioEngine* asio_engine = nullptr;
+ librbd::asio::ContextWQ* work_queue = nullptr;
+
+ SafeTimer *timer = nullptr;
+ ceph::mutex timer_lock = ceph::make_mutex("Threads::timer_lock");
+
+ explicit Threads(std::shared_ptr<librados::Rados>& rados);
+ Threads(const Threads&) = delete;
+ Threads& operator=(const Threads&) = delete;
+
+ ~Threads();
+};
+
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::Threads<librbd::ImageCtx>;
+
+#endif // CEPH_RBD_MIRROR_THREADS_H
diff --git a/src/tools/rbd_mirror/Throttler.cc b/src/tools/rbd_mirror/Throttler.cc
new file mode 100644
index 000000000..b20298963
--- /dev/null
+++ b/src/tools/rbd_mirror/Throttler.cc
@@ -0,0 +1,240 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2016 SUSE LINUX GmbH
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include "Throttler.h"
+#include "common/Formatter.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "librbd/Utils.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::Throttler:: " << this \
+ << " " << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+
+template <typename I>
+Throttler<I>::Throttler(CephContext *cct, const std::string &config_key)
+ : m_cct(cct), m_config_key(config_key),
+ m_config_keys{m_config_key.c_str(), nullptr},
+ m_lock(ceph::make_mutex(
+ librbd::util::unique_lock_name("rbd::mirror::Throttler", this))),
+ m_max_concurrent_ops(cct->_conf.get_val<uint64_t>(m_config_key)) {
+ dout(20) << m_config_key << "=" << m_max_concurrent_ops << dendl;
+ m_cct->_conf.add_observer(this);
+}
+
+template <typename I>
+Throttler<I>::~Throttler() {
+ m_cct->_conf.remove_observer(this);
+
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_inflight_ops.empty());
+ ceph_assert(m_queue.empty());
+}
+
+template <typename I>
+void Throttler<I>::start_op(const std::string &ns,
+ const std::string &id_,
+ Context *on_start) {
+ Id id{ns, id_};
+
+ dout(20) << "id=" << id << dendl;
+
+ int r = 0;
+ {
+ std::lock_guard locker{m_lock};
+
+ if (m_inflight_ops.count(id) > 0) {
+ dout(20) << "duplicate for already started op " << id << dendl;
+ } else if (m_queued_ops.count(id) > 0) {
+ dout(20) << "duplicate for already queued op " << id << dendl;
+ std::swap(m_queued_ops[id], on_start);
+ r = -ENOENT;
+ } else if (m_max_concurrent_ops == 0 ||
+ m_inflight_ops.size() < m_max_concurrent_ops) {
+ ceph_assert(m_queue.empty());
+ m_inflight_ops.insert(id);
+ dout(20) << "ready to start op for " << id << " ["
+ << m_inflight_ops.size() << "/" << m_max_concurrent_ops << "]"
+ << dendl;
+ } else {
+ m_queue.push_back(id);
+ std::swap(m_queued_ops[id], on_start);
+ dout(20) << "op for " << id << " has been queued" << dendl;
+ }
+ }
+
+ if (on_start != nullptr) {
+ on_start->complete(r);
+ }
+}
+
+template <typename I>
+bool Throttler<I>::cancel_op(const std::string &ns,
+ const std::string &id_) {
+ Id id{ns, id_};
+
+ dout(20) << "id=" << id << dendl;
+
+ Context *on_start = nullptr;
+ {
+ std::lock_guard locker{m_lock};
+ auto it = m_queued_ops.find(id);
+ if (it != m_queued_ops.end()) {
+ dout(20) << "canceled queued op for " << id << dendl;
+ m_queue.remove(id);
+ on_start = it->second;
+ m_queued_ops.erase(it);
+ }
+ }
+
+ if (on_start == nullptr) {
+ return false;
+ }
+
+ on_start->complete(-ECANCELED);
+ return true;
+}
+
+template <typename I>
+void Throttler<I>::finish_op(const std::string &ns,
+ const std::string &id_) {
+ Id id{ns, id_};
+
+ dout(20) << "id=" << id << dendl;
+
+ if (cancel_op(ns, id_)) {
+ return;
+ }
+
+ Context *on_start = nullptr;
+ {
+ std::lock_guard locker{m_lock};
+
+ m_inflight_ops.erase(id);
+
+ if (m_inflight_ops.size() < m_max_concurrent_ops && !m_queue.empty()) {
+ auto id = m_queue.front();
+ auto it = m_queued_ops.find(id);
+ ceph_assert(it != m_queued_ops.end());
+ m_inflight_ops.insert(id);
+ dout(20) << "ready to start op for " << id << " ["
+ << m_inflight_ops.size() << "/" << m_max_concurrent_ops << "]"
+ << dendl;
+ on_start = it->second;
+ m_queued_ops.erase(it);
+ m_queue.pop_front();
+ }
+ }
+
+ if (on_start != nullptr) {
+ on_start->complete(0);
+ }
+}
+
+template <typename I>
+void Throttler<I>::drain(const std::string &ns, int r) {
+ dout(20) << "ns=" << ns << dendl;
+
+ std::map<Id, Context *> queued_ops;
+ {
+ std::lock_guard locker{m_lock};
+ for (auto it = m_queued_ops.begin(); it != m_queued_ops.end(); ) {
+ if (it->first.first == ns) {
+ queued_ops[it->first] = it->second;
+ m_queue.remove(it->first);
+ it = m_queued_ops.erase(it);
+ } else {
+ it++;
+ }
+ }
+ for (auto it = m_inflight_ops.begin(); it != m_inflight_ops.end(); ) {
+ if (it->first == ns) {
+ dout(20) << "inflight_op " << *it << dendl;
+ it = m_inflight_ops.erase(it);
+ } else {
+ it++;
+ }
+ }
+ }
+
+ for (auto &it : queued_ops) {
+ dout(20) << "queued_op " << it.first << dendl;
+ it.second->complete(r);
+ }
+}
+
+template <typename I>
+void Throttler<I>::set_max_concurrent_ops(uint32_t max) {
+ dout(20) << "max=" << max << dendl;
+
+ std::list<Context *> ops;
+ {
+ std::lock_guard locker{m_lock};
+ m_max_concurrent_ops = max;
+
+ // Start waiting ops in the case of available free slots
+ while ((m_max_concurrent_ops == 0 ||
+ m_inflight_ops.size() < m_max_concurrent_ops) &&
+ !m_queue.empty()) {
+ auto id = m_queue.front();
+ m_inflight_ops.insert(id);
+ dout(20) << "ready to start op for " << id << " ["
+ << m_inflight_ops.size() << "/" << m_max_concurrent_ops << "]"
+ << dendl;
+ auto it = m_queued_ops.find(id);
+ ceph_assert(it != m_queued_ops.end());
+ ops.push_back(it->second);
+ m_queued_ops.erase(it);
+ m_queue.pop_front();
+ }
+ }
+
+ for (const auto& ctx : ops) {
+ ctx->complete(0);
+ }
+}
+
+template <typename I>
+void Throttler<I>::print_status(ceph::Formatter *f) {
+ dout(20) << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ f->dump_int("max_parallel_requests", m_max_concurrent_ops);
+ f->dump_int("running_requests", m_inflight_ops.size());
+ f->dump_int("waiting_requests", m_queue.size());
+}
+
+template <typename I>
+const char** Throttler<I>::get_tracked_conf_keys() const {
+ return m_config_keys;
+}
+
+template <typename I>
+void Throttler<I>::handle_conf_change(const ConfigProxy& conf,
+ const set<string> &changed) {
+ if (changed.count(m_config_key)) {
+ set_max_concurrent_ops(conf.get_val<uint64_t>(m_config_key));
+ }
+}
+
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::Throttler<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/Throttler.h b/src/tools/rbd_mirror/Throttler.h
new file mode 100644
index 000000000..32080238a
--- /dev/null
+++ b/src/tools/rbd_mirror/Throttler.h
@@ -0,0 +1,74 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_THROTTLER_H
+#define RBD_MIRROR_THROTTLER_H
+
+#include <list>
+#include <map>
+#include <set>
+#include <sstream>
+#include <string>
+#include <utility>
+
+#include "common/ceph_mutex.h"
+#include "common/config_obs.h"
+#include "include/common_fwd.h"
+
+class Context;
+
+namespace ceph { class Formatter; }
+namespace librbd { class ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class Throttler : public md_config_obs_t {
+public:
+ static Throttler *create(
+ CephContext *cct,
+ const std::string &config_key) {
+ return new Throttler(cct, config_key);
+ }
+ void destroy() {
+ delete this;
+ }
+
+ Throttler(CephContext *cct,
+ const std::string &config_key);
+ ~Throttler() override;
+
+ void set_max_concurrent_ops(uint32_t max);
+ void start_op(const std::string &ns, const std::string &id,
+ Context *on_start);
+ bool cancel_op(const std::string &ns, const std::string &id);
+ void finish_op(const std::string &ns, const std::string &id);
+ void drain(const std::string &ns, int r);
+
+ void print_status(ceph::Formatter *f);
+
+private:
+ typedef std::pair<std::string, std::string> Id;
+
+ CephContext *m_cct;
+ const std::string m_config_key;
+ mutable const char* m_config_keys[2];
+
+ ceph::mutex m_lock;
+ uint32_t m_max_concurrent_ops;
+ std::list<Id> m_queue;
+ std::map<Id, Context *> m_queued_ops;
+ std::set<Id> m_inflight_ops;
+
+ const char **get_tracked_conf_keys() const override;
+ void handle_conf_change(const ConfigProxy& conf,
+ const std::set<std::string> &changed) override;
+};
+
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::Throttler<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_THROTTLER_H
diff --git a/src/tools/rbd_mirror/Types.cc b/src/tools/rbd_mirror/Types.cc
new file mode 100644
index 000000000..cd71c73b1
--- /dev/null
+++ b/src/tools/rbd_mirror/Types.cc
@@ -0,0 +1,32 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd_mirror/Types.h"
+
+namespace rbd {
+namespace mirror {
+
+std::ostream &operator<<(std::ostream &os, const ImageId &image_id) {
+ return os << "global id=" << image_id.global_id << ", "
+ << "id=" << image_id.id;
+}
+
+std::ostream& operator<<(std::ostream& lhs,
+ const LocalPoolMeta& rhs) {
+ return lhs << "mirror_uuid=" << rhs.mirror_uuid;
+}
+
+std::ostream& operator<<(std::ostream& lhs,
+ const RemotePoolMeta& rhs) {
+ return lhs << "mirror_uuid=" << rhs.mirror_uuid << ", "
+ "mirror_peer_uuid=" << rhs.mirror_peer_uuid;
+}
+
+std::ostream& operator<<(std::ostream& lhs, const PeerSpec &peer) {
+ return lhs << "uuid: " << peer.uuid
+ << " cluster: " << peer.cluster_name
+ << " client: " << peer.client_name;
+}
+
+} // namespace mirror
+} // namespace rbd
diff --git a/src/tools/rbd_mirror/Types.h b/src/tools/rbd_mirror/Types.h
new file mode 100644
index 000000000..7b2a3b5ce
--- /dev/null
+++ b/src/tools/rbd_mirror/Types.h
@@ -0,0 +1,166 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_TYPES_H
+#define CEPH_RBD_MIRROR_TYPES_H
+
+#include <iostream>
+#include <memory>
+#include <set>
+#include <string>
+#include <vector>
+
+#include "include/rados/librados.hpp"
+#include "include/rbd/librbd.hpp"
+
+namespace rbd {
+namespace mirror {
+
+template <typename> struct MirrorStatusUpdater;
+
+// Performance counters
+enum {
+ l_rbd_mirror_journal_first = 27000,
+ l_rbd_mirror_replay,
+ l_rbd_mirror_replay_bytes,
+ l_rbd_mirror_replay_latency,
+ l_rbd_mirror_journal_last,
+ l_rbd_mirror_snapshot_first,
+ l_rbd_mirror_snapshot_replay_snapshots,
+ l_rbd_mirror_snapshot_replay_snapshots_time,
+ l_rbd_mirror_snapshot_replay_bytes,
+ l_rbd_mirror_snapshot_last,
+};
+
+typedef std::shared_ptr<librados::Rados> RadosRef;
+typedef std::shared_ptr<librados::IoCtx> IoCtxRef;
+typedef std::shared_ptr<librbd::Image> ImageRef;
+
+struct ImageId {
+ std::string global_id;
+ std::string id;
+
+ explicit ImageId(const std::string &global_id) : global_id(global_id) {
+ }
+ ImageId(const std::string &global_id, const std::string &id)
+ : global_id(global_id), id(id) {
+ }
+
+ inline bool operator==(const ImageId &rhs) const {
+ return (global_id == rhs.global_id && id == rhs.id);
+ }
+ inline bool operator<(const ImageId &rhs) const {
+ return global_id < rhs.global_id;
+ }
+};
+
+std::ostream &operator<<(std::ostream &, const ImageId &image_id);
+
+typedef std::set<ImageId> ImageIds;
+
+struct LocalPoolMeta {
+ LocalPoolMeta() {}
+ LocalPoolMeta(const std::string& mirror_uuid)
+ : mirror_uuid(mirror_uuid) {
+ }
+
+ std::string mirror_uuid;
+};
+
+std::ostream& operator<<(std::ostream& lhs,
+ const LocalPoolMeta& local_pool_meta);
+
+struct RemotePoolMeta {
+ RemotePoolMeta() {}
+ RemotePoolMeta(const std::string& mirror_uuid,
+ const std::string& mirror_peer_uuid)
+ : mirror_uuid(mirror_uuid),
+ mirror_peer_uuid(mirror_peer_uuid) {
+ }
+
+ std::string mirror_uuid;
+ std::string mirror_peer_uuid;
+};
+
+std::ostream& operator<<(std::ostream& lhs,
+ const RemotePoolMeta& remote_pool_meta);
+
+template <typename I>
+struct Peer {
+ std::string uuid;
+ mutable librados::IoCtx io_ctx;
+ RemotePoolMeta remote_pool_meta;
+ MirrorStatusUpdater<I>* mirror_status_updater = nullptr;
+
+ Peer() {
+ }
+ Peer(const std::string& uuid,
+ librados::IoCtx& io_ctx,
+ const RemotePoolMeta& remote_pool_meta,
+ MirrorStatusUpdater<I>* mirror_status_updater)
+ : io_ctx(io_ctx),
+ remote_pool_meta(remote_pool_meta),
+ mirror_status_updater(mirror_status_updater) {
+ }
+
+ inline bool operator<(const Peer &rhs) const {
+ return uuid < rhs.uuid;
+ }
+};
+
+template <typename I>
+std::ostream& operator<<(std::ostream& lhs, const Peer<I>& peer) {
+ return lhs << peer.remote_pool_meta;
+}
+
+struct PeerSpec {
+ PeerSpec() = default;
+ PeerSpec(const std::string &uuid, const std::string &cluster_name,
+ const std::string &client_name)
+ : uuid(uuid), cluster_name(cluster_name), client_name(client_name)
+ {
+ }
+ PeerSpec(const librbd::mirror_peer_site_t &peer) :
+ uuid(peer.uuid),
+ cluster_name(peer.site_name),
+ client_name(peer.client_name)
+ {
+ }
+
+ std::string uuid;
+ std::string cluster_name;
+ std::string client_name;
+
+ /// optional config properties
+ std::string mon_host;
+ std::string key;
+
+ bool operator==(const PeerSpec& rhs) const {
+ return (uuid == rhs.uuid &&
+ cluster_name == rhs.cluster_name &&
+ client_name == rhs.client_name &&
+ mon_host == rhs.mon_host &&
+ key == rhs.key);
+ }
+ bool operator<(const PeerSpec& rhs) const {
+ if (uuid != rhs.uuid) {
+ return uuid < rhs.uuid;
+ } else if (cluster_name != rhs.cluster_name) {
+ return cluster_name < rhs.cluster_name;
+ } else if (client_name != rhs.client_name) {
+ return client_name < rhs.client_name;
+ } else if (mon_host < rhs.mon_host) {
+ return mon_host < rhs.mon_host;
+ } else {
+ return key < rhs.key;
+ }
+ }
+};
+
+std::ostream& operator<<(std::ostream& lhs, const PeerSpec &peer);
+
+} // namespace mirror
+} // namespace rbd
+
+
+#endif // CEPH_RBD_MIRROR_TYPES_H
diff --git a/src/tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.cc b/src/tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.cc
new file mode 100644
index 000000000..19a98804c
--- /dev/null
+++ b/src/tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.cc
@@ -0,0 +1,299 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "librbd/ExclusiveLock.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/Operations.h"
+#include "librbd/Utils.h"
+#include "librbd/journal/Policy.h"
+#include "tools/rbd_mirror/image_deleter/Types.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_deleter::SnapshotPurgeRequest: " \
+ << this << " " << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+namespace image_deleter {
+
+using librbd::util::create_context_callback;
+
+template <typename I>
+void SnapshotPurgeRequest<I>::send() {
+ open_image();
+}
+
+template <typename I>
+void SnapshotPurgeRequest<I>::open_image() {
+ dout(10) << dendl;
+ m_image_ctx = I::create("", m_image_id, nullptr, m_io_ctx, false);
+
+ // ensure non-primary images can be modified
+ m_image_ctx->read_only_mask &= ~librbd::IMAGE_READ_ONLY_FLAG_NON_PRIMARY;
+
+ {
+ std::unique_lock image_locker{m_image_ctx->image_lock};
+ m_image_ctx->set_journal_policy(new JournalPolicy());
+ }
+
+ Context *ctx = create_context_callback<
+ SnapshotPurgeRequest<I>, &SnapshotPurgeRequest<I>::handle_open_image>(
+ this);
+ m_image_ctx->state->open(librbd::OPEN_FLAG_SKIP_OPEN_PARENT, ctx);
+}
+
+template <typename I>
+void SnapshotPurgeRequest<I>::handle_open_image(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to open image '" << m_image_id << "': " << cpp_strerror(r)
+ << dendl;
+ m_image_ctx = nullptr;
+
+ finish(r);
+ return;
+ }
+
+ acquire_lock();
+}
+
+template <typename I>
+void SnapshotPurgeRequest<I>::acquire_lock() {
+ dout(10) << dendl;
+
+ m_image_ctx->owner_lock.lock_shared();
+ if (m_image_ctx->exclusive_lock == nullptr) {
+ m_image_ctx->owner_lock.unlock_shared();
+
+ start_snap_unprotect();
+ return;
+ }
+
+ m_image_ctx->exclusive_lock->acquire_lock(create_context_callback<
+ SnapshotPurgeRequest<I>, &SnapshotPurgeRequest<I>::handle_acquire_lock>(
+ this));
+ m_image_ctx->owner_lock.unlock_shared();
+}
+
+template <typename I>
+void SnapshotPurgeRequest<I>::handle_acquire_lock(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to acquire exclusive lock: " << cpp_strerror(r) << dendl;
+ m_ret_val = r;
+ close_image();
+ return;
+ }
+
+ start_snap_unprotect();
+}
+
+template <typename I>
+void SnapshotPurgeRequest<I>::start_snap_unprotect() {
+ dout(10) << dendl;
+
+ {
+ std::shared_lock image_locker{m_image_ctx->image_lock};
+ m_snaps = m_image_ctx->snaps;
+ }
+ snap_unprotect();
+}
+
+template <typename I>
+void SnapshotPurgeRequest<I>::snap_unprotect() {
+ if (m_snaps.empty()) {
+ close_image();
+ return;
+ }
+
+ librados::snap_t snap_id = m_snaps.back();
+ m_image_ctx->image_lock.lock_shared();
+ int r = m_image_ctx->get_snap_namespace(snap_id, &m_snap_namespace);
+ if (r < 0) {
+ m_image_ctx->image_lock.unlock_shared();
+
+ derr << "failed to get snap namespace: " << cpp_strerror(r) << dendl;
+ m_ret_val = r;
+ close_image();
+ return;
+ }
+
+ r = m_image_ctx->get_snap_name(snap_id, &m_snap_name);
+ if (r < 0) {
+ m_image_ctx->image_lock.unlock_shared();
+
+ derr << "failed to get snap name: " << cpp_strerror(r) << dendl;
+ m_ret_val = r;
+ close_image();
+ return;
+ }
+
+ bool is_protected;
+ r = m_image_ctx->is_snap_protected(snap_id, &is_protected);
+ if (r < 0) {
+ m_image_ctx->image_lock.unlock_shared();
+
+ derr << "failed to get snap protection status: " << cpp_strerror(r)
+ << dendl;
+ m_ret_val = r;
+ close_image();
+ return;
+ }
+ m_image_ctx->image_lock.unlock_shared();
+
+ if (!is_protected) {
+ snap_remove();
+ return;
+ }
+
+ dout(10) << "snap_id=" << snap_id << ", "
+ << "snap_namespace=" << m_snap_namespace << ", "
+ << "snap_name=" << m_snap_name << dendl;
+
+ auto finish_op_ctx = start_lock_op(&r);
+ if (finish_op_ctx == nullptr) {
+ derr << "lost exclusive lock" << dendl;
+ m_ret_val = r;
+ close_image();
+ return;
+ }
+
+ auto ctx = new LambdaContext([this, finish_op_ctx](int r) {
+ handle_snap_unprotect(r);
+ finish_op_ctx->complete(0);
+ });
+ std::shared_lock owner_locker{m_image_ctx->owner_lock};
+ m_image_ctx->operations->execute_snap_unprotect(
+ m_snap_namespace, m_snap_name.c_str(), ctx);
+}
+
+template <typename I>
+void SnapshotPurgeRequest<I>::handle_snap_unprotect(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r == -EBUSY) {
+ dout(10) << "snapshot in-use" << dendl;
+ m_ret_val = r;
+ close_image();
+ return;
+ } else if (r < 0) {
+ derr << "failed to unprotect snapshot: " << cpp_strerror(r) << dendl;
+ m_ret_val = r;
+ close_image();
+ return;
+ }
+
+ {
+ // avoid the need to refresh to delete the newly unprotected snapshot
+ std::shared_lock image_locker{m_image_ctx->image_lock};
+ librados::snap_t snap_id = m_snaps.back();
+ auto snap_info_it = m_image_ctx->snap_info.find(snap_id);
+ if (snap_info_it != m_image_ctx->snap_info.end()) {
+ snap_info_it->second.protection_status =
+ RBD_PROTECTION_STATUS_UNPROTECTED;
+ }
+ }
+
+ snap_remove();
+}
+
+template <typename I>
+void SnapshotPurgeRequest<I>::snap_remove() {
+ librados::snap_t snap_id = m_snaps.back();
+ dout(10) << "snap_id=" << snap_id << ", "
+ << "snap_namespace=" << m_snap_namespace << ", "
+ << "snap_name=" << m_snap_name << dendl;
+
+ int r;
+ auto finish_op_ctx = start_lock_op(&r);
+ if (finish_op_ctx == nullptr) {
+ derr << "lost exclusive lock" << dendl;
+ m_ret_val = r;
+ close_image();
+ return;
+ }
+
+ auto ctx = new LambdaContext([this, finish_op_ctx](int r) {
+ handle_snap_remove(r);
+ finish_op_ctx->complete(0);
+ });
+ std::shared_lock owner_locker{m_image_ctx->owner_lock};
+ m_image_ctx->operations->execute_snap_remove(
+ m_snap_namespace, m_snap_name.c_str(), ctx);
+}
+
+template <typename I>
+void SnapshotPurgeRequest<I>::handle_snap_remove(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r == -EBUSY) {
+ dout(10) << "snapshot in-use" << dendl;
+ m_ret_val = r;
+ close_image();
+ return;
+ } else if (r < 0) {
+ derr << "failed to remove snapshot: " << cpp_strerror(r) << dendl;
+ m_ret_val = r;
+ close_image();
+ return;
+ }
+
+ m_snaps.pop_back();
+ snap_unprotect();
+}
+
+template <typename I>
+void SnapshotPurgeRequest<I>::close_image() {
+ dout(10) << dendl;
+
+ m_image_ctx->state->close(create_context_callback<
+ SnapshotPurgeRequest<I>,
+ &SnapshotPurgeRequest<I>::handle_close_image>(this));
+}
+
+template <typename I>
+void SnapshotPurgeRequest<I>::handle_close_image(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ m_image_ctx = nullptr;
+
+ if (r < 0) {
+ derr << "failed to close: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+ finish(0);
+}
+
+template <typename I>
+void SnapshotPurgeRequest<I>::finish(int r) {
+ if (m_ret_val < 0) {
+ r = m_ret_val;
+ }
+
+ m_on_finish->complete(r);
+ delete this;
+}
+
+template <typename I>
+Context *SnapshotPurgeRequest<I>::start_lock_op(int* r) {
+ std::shared_lock owner_locker{m_image_ctx->owner_lock};
+ if (m_image_ctx->exclusive_lock == nullptr) {
+ return new LambdaContext([](int r) {});
+ }
+ return m_image_ctx->exclusive_lock->start_op(r);
+}
+
+} // namespace image_deleter
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_deleter::SnapshotPurgeRequest<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.h b/src/tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.h
new file mode 100644
index 000000000..70cae8518
--- /dev/null
+++ b/src/tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.h
@@ -0,0 +1,105 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_DELETER_SNAPSHOT_PURGE_REQUEST_H
+#define CEPH_RBD_MIRROR_IMAGE_DELETER_SNAPSHOT_PURGE_REQUEST_H
+
+#include "include/rados/librados.hpp"
+#include "cls/rbd/cls_rbd_types.h"
+#include <string>
+#include <vector>
+
+class Context;
+namespace librbd { struct ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+namespace image_deleter {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class SnapshotPurgeRequest {
+public:
+ static SnapshotPurgeRequest* create(librados::IoCtx &io_ctx,
+ const std::string &image_id,
+ Context *on_finish) {
+ return new SnapshotPurgeRequest(io_ctx, image_id, on_finish);
+ }
+
+ SnapshotPurgeRequest(librados::IoCtx &io_ctx, const std::string &image_id,
+ Context *on_finish)
+ : m_io_ctx(io_ctx), m_image_id(image_id), m_on_finish(on_finish) {
+ }
+
+ void send();
+
+private:
+ /*
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * OPEN_IMAGE
+ * |
+ * v
+ * ACQUIRE_LOCK
+ * |
+ * | (repeat for each snapshot)
+ * |/------------------------\
+ * | |
+ * v (skip if not needed) |
+ * SNAP_UNPROTECT |
+ * | |
+ * v (skip if not needed) |
+ * SNAP_REMOVE -----------------/
+ * |
+ * v
+ * CLOSE_IMAGE
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+
+ librados::IoCtx &m_io_ctx;
+ std::string m_image_id;
+ Context *m_on_finish;
+
+ ImageCtxT *m_image_ctx = nullptr;
+ int m_ret_val = 0;
+
+ std::vector<librados::snap_t> m_snaps;
+ cls::rbd::SnapshotNamespace m_snap_namespace;
+ std::string m_snap_name;
+
+ void open_image();
+ void handle_open_image(int r);
+
+ void acquire_lock();
+ void handle_acquire_lock(int r);
+
+ void start_snap_unprotect();
+ void snap_unprotect();
+ void handle_snap_unprotect(int r);
+
+ void snap_remove();
+ void handle_snap_remove(int r);
+
+ void close_image();
+ void handle_close_image(int r);
+
+ void finish(int r);
+
+ Context *start_lock_op(int* r);
+
+};
+
+} // namespace image_deleter
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_deleter::SnapshotPurgeRequest<librbd::ImageCtx>;
+
+#endif // CEPH_RBD_MIRROR_IMAGE_DELETER_SNAPSHOT_PURGE_REQUEST_H
+
diff --git a/src/tools/rbd_mirror/image_deleter/TrashMoveRequest.cc b/src/tools/rbd_mirror/image_deleter/TrashMoveRequest.cc
new file mode 100644
index 000000000..e53923ef3
--- /dev/null
+++ b/src/tools/rbd_mirror/image_deleter/TrashMoveRequest.cc
@@ -0,0 +1,419 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd_mirror/image_deleter/TrashMoveRequest.h"
+#include "include/rbd_types.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "common/WorkQueue.h"
+#include "librbd/ExclusiveLock.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/Journal.h"
+#include "librbd/TrashWatcher.h"
+#include "librbd/Utils.h"
+#include "librbd/asio/ContextWQ.h"
+#include "librbd/journal/ResetRequest.h"
+#include "librbd/mirror/ImageRemoveRequest.h"
+#include "librbd/mirror/GetInfoRequest.h"
+#include "librbd/trash/MoveRequest.h"
+#include "tools/rbd_mirror/image_deleter/Types.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_deleter::TrashMoveRequest: " \
+ << this << " " << __func__ << ": "
+namespace rbd {
+namespace mirror {
+namespace image_deleter {
+
+using librbd::util::create_context_callback;
+using librbd::util::create_rados_callback;
+
+template <typename I>
+void TrashMoveRequest<I>::send() {
+ get_mirror_image_id();
+}
+
+template <typename I>
+void TrashMoveRequest<I>::get_mirror_image_id() {
+ dout(10) << dendl;
+
+ librados::ObjectReadOperation op;
+ librbd::cls_client::mirror_image_get_image_id_start(&op, m_global_image_id);
+
+ auto aio_comp = create_rados_callback<
+ TrashMoveRequest<I>,
+ &TrashMoveRequest<I>::handle_get_mirror_image_id>(this);
+ m_out_bl.clear();
+ int r = m_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op, &m_out_bl);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void TrashMoveRequest<I>::handle_get_mirror_image_id(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r == 0) {
+ auto bl_it = m_out_bl.cbegin();
+ r = librbd::cls_client::mirror_image_get_image_id_finish(&bl_it,
+ &m_image_id);
+ }
+ if (r == -ENOENT) {
+ dout(10) << "image " << m_global_image_id << " is not mirrored" << dendl;
+ finish(r);
+ return;
+ } else if (r < 0) {
+ derr << "error retrieving local id for image " << m_global_image_id << ": "
+ << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ get_mirror_info();
+}
+
+template <typename I>
+void TrashMoveRequest<I>::get_mirror_info() {
+ dout(10) << dendl;
+
+ auto ctx = create_context_callback<
+ TrashMoveRequest<I>, &TrashMoveRequest<I>::handle_get_mirror_info>(this);
+ auto req = librbd::mirror::GetInfoRequest<I>::create(
+ m_io_ctx, m_op_work_queue, m_image_id, &m_mirror_image, &m_promotion_state,
+ &m_primary_mirror_uuid, ctx);
+ req->send();
+}
+
+template <typename I>
+void TrashMoveRequest<I>::handle_get_mirror_info(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r == -ENOENT) {
+ dout(5) << "image " << m_global_image_id << " is not mirrored" << dendl;
+ finish(r);
+ return;
+ } else if (r < 0) {
+ derr << "error retrieving image primary info for image "
+ << m_global_image_id << ": " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ if (m_promotion_state == librbd::mirror::PROMOTION_STATE_PRIMARY) {
+ dout(10) << "image " << m_global_image_id << " is local primary" << dendl;
+ finish(-EPERM);
+ return;
+ } else if (m_promotion_state == librbd::mirror::PROMOTION_STATE_ORPHAN &&
+ !m_resync) {
+ dout(10) << "image " << m_global_image_id << " is orphaned" << dendl;
+ finish(-EPERM);
+ return;
+ }
+
+ disable_mirror_image();
+}
+
+template <typename I>
+void TrashMoveRequest<I>::disable_mirror_image() {
+ dout(10) << dendl;
+
+ m_mirror_image.state = cls::rbd::MIRROR_IMAGE_STATE_DISABLING;
+
+ librados::ObjectWriteOperation op;
+ librbd::cls_client::mirror_image_set(&op, m_image_id, m_mirror_image);
+
+ auto aio_comp = create_rados_callback<
+ TrashMoveRequest<I>,
+ &TrashMoveRequest<I>::handle_disable_mirror_image>(this);
+ int r = m_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void TrashMoveRequest<I>::handle_disable_mirror_image(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r == -ENOENT) {
+ dout(10) << "local image is not mirrored, aborting deletion." << dendl;
+ finish(r);
+ return;
+ } else if (r == -EEXIST || r == -EINVAL) {
+ derr << "cannot disable mirroring for image " << m_global_image_id
+ << ": global_image_id has changed/reused: "
+ << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ } else if (r < 0) {
+ derr << "cannot disable mirroring for image " << m_global_image_id
+ << ": " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ open_image();
+}
+
+template <typename I>
+void TrashMoveRequest<I>::open_image() {
+ dout(10) << dendl;
+
+ m_image_ctx = I::create("", m_image_id, nullptr, m_io_ctx, false);
+
+ // ensure non-primary images can be modified
+ m_image_ctx->read_only_mask &= ~librbd::IMAGE_READ_ONLY_FLAG_NON_PRIMARY;
+
+ {
+ // don't attempt to open the journal
+ std::unique_lock image_locker{m_image_ctx->image_lock};
+ m_image_ctx->set_journal_policy(new JournalPolicy());
+ }
+
+ Context *ctx = create_context_callback<
+ TrashMoveRequest<I>, &TrashMoveRequest<I>::handle_open_image>(this);
+ m_image_ctx->state->open(librbd::OPEN_FLAG_SKIP_OPEN_PARENT, ctx);
+}
+
+template <typename I>
+void TrashMoveRequest<I>::handle_open_image(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r == -ENOENT) {
+ dout(5) << "mirror image does not exist, removing orphaned metadata" << dendl;
+ m_image_ctx = nullptr;
+ remove_mirror_image();
+ return;
+ }
+
+ if (r < 0) {
+ derr << "failed to open image: " << cpp_strerror(r) << dendl;
+ m_image_ctx = nullptr;
+ finish(r);
+ return;
+ }
+
+ if (m_image_ctx->old_format) {
+ derr << "cannot move v1 image to trash" << dendl;
+ m_ret_val = -EINVAL;
+ close_image();
+ return;
+ }
+
+ reset_journal();
+}
+
+template <typename I>
+void TrashMoveRequest<I>::reset_journal() {
+ if (m_mirror_image.mode == cls::rbd::MIRROR_IMAGE_MODE_SNAPSHOT) {
+ // snapshot-based mirroring doesn't require journal feature
+ acquire_lock();
+ return;
+ }
+
+ dout(10) << dendl;
+
+ // TODO use Journal thread pool for journal ops until converted to ASIO
+ ContextWQ* context_wq;
+ librbd::Journal<>::get_work_queue(
+ reinterpret_cast<CephContext*>(m_io_ctx.cct()), &context_wq);
+
+ // ensure that if the image is recovered any peers will split-brain
+ auto ctx = create_context_callback<
+ TrashMoveRequest<I>, &TrashMoveRequest<I>::handle_reset_journal>(this);
+ auto req = librbd::journal::ResetRequest<I>::create(
+ m_io_ctx, m_image_id, librbd::Journal<>::IMAGE_CLIENT_ID,
+ librbd::Journal<>::LOCAL_MIRROR_UUID, context_wq, ctx);
+ req->send();
+}
+
+template <typename I>
+void TrashMoveRequest<I>::handle_reset_journal(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0 && r != -ENOENT) {
+ derr << "failed to reset journal: " << cpp_strerror(r) << dendl;
+ m_ret_val = r;
+ close_image();
+ return;
+ }
+
+ acquire_lock();
+}
+
+template <typename I>
+void TrashMoveRequest<I>::acquire_lock() {
+ m_image_ctx->owner_lock.lock_shared();
+ if (m_image_ctx->exclusive_lock == nullptr) {
+ m_image_ctx->owner_lock.unlock_shared();
+
+ if (m_mirror_image.mode == cls::rbd::MIRROR_IMAGE_MODE_SNAPSHOT) {
+ // snapshot-based mirroring doesn't require exclusive-lock
+ trash_move();
+ } else {
+ derr << "exclusive lock feature not enabled" << dendl;
+ m_ret_val = -EINVAL;
+ close_image();
+ }
+ return;
+ }
+
+ dout(10) << dendl;
+
+ Context *ctx = create_context_callback<
+ TrashMoveRequest<I>, &TrashMoveRequest<I>::handle_acquire_lock>(this);
+ m_image_ctx->exclusive_lock->block_requests(0);
+ m_image_ctx->exclusive_lock->acquire_lock(ctx);
+ m_image_ctx->owner_lock.unlock_shared();
+}
+
+template <typename I>
+void TrashMoveRequest<I>::handle_acquire_lock(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to acquire exclusive lock: " << cpp_strerror(r) << dendl;
+ m_ret_val = r;
+ close_image();
+ return;
+ }
+
+ trash_move();
+}
+
+template <typename I>
+void TrashMoveRequest<I>::trash_move() {
+ dout(10) << dendl;
+
+ utime_t delete_time{ceph_clock_now()};
+ utime_t deferment_end_time{delete_time};
+ deferment_end_time +=
+ m_image_ctx->config.template get_val<uint64_t>("rbd_mirroring_delete_delay");
+
+ m_trash_image_spec = {
+ cls::rbd::TRASH_IMAGE_SOURCE_MIRRORING, m_image_ctx->name, delete_time,
+ deferment_end_time};
+
+ Context *ctx = create_context_callback<
+ TrashMoveRequest<I>, &TrashMoveRequest<I>::handle_trash_move>(this);
+ auto req = librbd::trash::MoveRequest<I>::create(
+ m_io_ctx, m_image_id, m_trash_image_spec, ctx);
+ req->send();
+}
+
+template <typename I>
+void TrashMoveRequest<I>::handle_trash_move(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to move image to trash: " << cpp_strerror(r) << dendl;
+ m_ret_val = r;
+ close_image();
+ return;
+ }
+
+ m_moved_to_trash = true;
+ remove_mirror_image();
+}
+
+template <typename I>
+void TrashMoveRequest<I>::remove_mirror_image() {
+ dout(10) << dendl;
+
+ auto ctx = create_context_callback<
+ TrashMoveRequest<I>,
+ &TrashMoveRequest<I>::handle_remove_mirror_image>(this);
+ auto req = librbd::mirror::ImageRemoveRequest<I>::create(
+ m_io_ctx, m_global_image_id, m_image_id, ctx);
+ req->send();
+}
+
+template <typename I>
+void TrashMoveRequest<I>::handle_remove_mirror_image(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r == -ENOENT) {
+ dout(10) << "local image is not mirrored" << dendl;
+ } else if (r < 0) {
+ derr << "failed to remove mirror image state for " << m_global_image_id
+ << ": " << cpp_strerror(r) << dendl;
+ m_ret_val = r;
+ }
+
+ close_image();
+}
+
+template <typename I>
+void TrashMoveRequest<I>::close_image() {
+ dout(10) << dendl;
+
+ if (m_image_ctx == nullptr) {
+ handle_close_image(0);
+ return;
+ }
+ Context *ctx = create_context_callback<
+ TrashMoveRequest<I>, &TrashMoveRequest<I>::handle_close_image>(this);
+ m_image_ctx->state->close(ctx);
+}
+
+template <typename I>
+void TrashMoveRequest<I>::handle_close_image(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ m_image_ctx = nullptr;
+
+ if (r < 0) {
+ derr << "failed to close image: " << cpp_strerror(r) << dendl;
+ }
+
+ // don't send notification if we failed
+ if (!m_moved_to_trash) {
+ finish(0);
+ return;
+ }
+
+ notify_trash_add();
+}
+
+template <typename I>
+void TrashMoveRequest<I>::notify_trash_add() {
+ dout(10) << dendl;
+
+ Context *ctx = create_context_callback<
+ TrashMoveRequest<I>, &TrashMoveRequest<I>::handle_notify_trash_add>(this);
+ librbd::TrashWatcher<I>::notify_image_added(m_io_ctx, m_image_id,
+ m_trash_image_spec, ctx);
+}
+
+template <typename I>
+void TrashMoveRequest<I>::handle_notify_trash_add(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to notify trash watchers: " << cpp_strerror(r) << dendl;
+ }
+
+ finish(0);
+}
+
+template <typename I>
+void TrashMoveRequest<I>::finish(int r) {
+ if (m_ret_val < 0) {
+ r = m_ret_val;
+ }
+
+ dout(10) << "r=" << r << dendl;
+
+ m_on_finish->complete(r);
+ delete this;
+}
+
+} // namespace image_deleter
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_deleter::TrashMoveRequest<librbd::ImageCtx>;
+
diff --git a/src/tools/rbd_mirror/image_deleter/TrashMoveRequest.h b/src/tools/rbd_mirror/image_deleter/TrashMoveRequest.h
new file mode 100644
index 000000000..5b3f02519
--- /dev/null
+++ b/src/tools/rbd_mirror/image_deleter/TrashMoveRequest.h
@@ -0,0 +1,142 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_DELETE_TRASH_MOVE_REQUEST_H
+#define CEPH_RBD_MIRROR_IMAGE_DELETE_TRASH_MOVE_REQUEST_H
+
+#include "include/buffer.h"
+#include "include/rados/librados.hpp"
+#include "cls/rbd/cls_rbd_types.h"
+#include "librbd/mirror/Types.h"
+#include <string>
+
+struct Context;
+namespace librbd {
+struct ImageCtx;
+namespace asio { struct ContextWQ; }
+} // namespace librbd
+
+namespace rbd {
+namespace mirror {
+namespace image_deleter {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class TrashMoveRequest {
+public:
+ static TrashMoveRequest* create(librados::IoCtx& io_ctx,
+ const std::string& global_image_id,
+ bool resync,
+ librbd::asio::ContextWQ* op_work_queue,
+ Context* on_finish) {
+ return new TrashMoveRequest(io_ctx, global_image_id, resync, op_work_queue,
+ on_finish);
+ }
+
+ TrashMoveRequest(librados::IoCtx& io_ctx, const std::string& global_image_id,
+ bool resync, librbd::asio::ContextWQ* op_work_queue,
+ Context* on_finish)
+ : m_io_ctx(io_ctx), m_global_image_id(global_image_id), m_resync(resync),
+ m_op_work_queue(op_work_queue), m_on_finish(on_finish) {
+ }
+
+ void send();
+
+private:
+ /*
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * GET_MIRROR_IMAGE_ID
+ * |
+ * v
+ * GET_MIRROR_INFO
+ * |
+ * v
+ * DISABLE_MIRROR_IMAGE
+ * |
+ * v
+ * OPEN_IMAGE
+ * |
+ * v (skip if not needed)
+ * RESET_JOURNAL
+ * |
+ * v (skip if not needed)
+ * ACQUIRE_LOCK
+ * |
+ * v
+ * TRASH_MOVE
+ * |
+ * v
+ * REMOVE_MIRROR_IMAGE
+ * |
+ * v
+ * CLOSE_IMAGE
+ * |
+ * v
+ * NOTIFY_TRASH_ADD
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+
+ librados::IoCtx &m_io_ctx;
+ std::string m_global_image_id;
+ bool m_resync;
+ librbd::asio::ContextWQ *m_op_work_queue;
+ Context *m_on_finish;
+
+ ceph::bufferlist m_out_bl;
+ std::string m_image_id;
+ cls::rbd::MirrorImage m_mirror_image;
+ librbd::mirror::PromotionState m_promotion_state;
+ std::string m_primary_mirror_uuid;
+ cls::rbd::TrashImageSpec m_trash_image_spec;
+ ImageCtxT *m_image_ctx = nullptr;;
+ int m_ret_val = 0;
+ bool m_moved_to_trash = false;
+
+ void get_mirror_image_id();
+ void handle_get_mirror_image_id(int r);
+
+ void get_mirror_info();
+ void handle_get_mirror_info(int r);
+
+ void disable_mirror_image();
+ void handle_disable_mirror_image(int r);
+
+ void open_image();
+ void handle_open_image(int r);
+
+ void reset_journal();
+ void handle_reset_journal(int r);
+
+ void acquire_lock();
+ void handle_acquire_lock(int r);
+
+ void trash_move();
+ void handle_trash_move(int r);
+
+ void remove_mirror_image();
+ void handle_remove_mirror_image(int r);
+
+ void close_image();
+ void handle_close_image(int r);
+
+ void notify_trash_add();
+ void handle_notify_trash_add(int r);
+
+ void finish(int r);
+
+};
+
+} // namespace image_deleter
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_deleter::TrashMoveRequest<librbd::ImageCtx>;
+
+#endif // CEPH_RBD_MIRROR_IMAGE_DELETE_TRASH_WATCHER_H
diff --git a/src/tools/rbd_mirror/image_deleter/TrashRemoveRequest.cc b/src/tools/rbd_mirror/image_deleter/TrashRemoveRequest.cc
new file mode 100644
index 000000000..4d7c1c9df
--- /dev/null
+++ b/src/tools/rbd_mirror/image_deleter/TrashRemoveRequest.cc
@@ -0,0 +1,265 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd_mirror/image_deleter/TrashRemoveRequest.h"
+#include "include/ceph_assert.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "common/WorkQueue.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/TrashWatcher.h"
+#include "librbd/Utils.h"
+#include "librbd/asio/ContextWQ.h"
+#include "librbd/trash/RemoveRequest.h"
+#include "tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_deleter::TrashRemoveRequest: " \
+ << this << " " << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+namespace image_deleter {
+
+using librbd::util::create_context_callback;
+using librbd::util::create_rados_callback;
+
+template <typename I>
+void TrashRemoveRequest<I>::send() {
+ *m_error_result = ERROR_RESULT_RETRY;
+
+ get_trash_image_spec();
+}
+
+template <typename I>
+void TrashRemoveRequest<I>::get_trash_image_spec() {
+ dout(10) << dendl;
+
+ librados::ObjectReadOperation op;
+ librbd::cls_client::trash_get_start(&op, m_image_id);
+
+ auto aio_comp = create_rados_callback<
+ TrashRemoveRequest<I>,
+ &TrashRemoveRequest<I>::handle_get_trash_image_spec>(this);
+ m_out_bl.clear();
+ int r = m_io_ctx.aio_operate(RBD_TRASH, aio_comp, &op, &m_out_bl);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void TrashRemoveRequest<I>::handle_get_trash_image_spec(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r == 0) {
+ auto bl_it = m_out_bl.cbegin();
+ r = librbd::cls_client::trash_get_finish(&bl_it, &m_trash_image_spec);
+ }
+
+ if (r == -ENOENT || (r >= 0 && m_trash_image_spec.source !=
+ cls::rbd::TRASH_IMAGE_SOURCE_MIRRORING)) {
+ dout(10) << "image id " << m_image_id << " not in mirroring trash" << dendl;
+ finish(0);
+ return;
+ } else if (r < 0) {
+ derr << "error getting image id " << m_image_id << " info from trash: "
+ << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ if (m_trash_image_spec.state != cls::rbd::TRASH_IMAGE_STATE_NORMAL &&
+ m_trash_image_spec.state != cls::rbd::TRASH_IMAGE_STATE_REMOVING) {
+ dout(10) << "image " << m_image_id << " is not in an expected trash state: "
+ << m_trash_image_spec.state << dendl;
+ *m_error_result = ERROR_RESULT_RETRY_IMMEDIATELY;
+ finish(-EBUSY);
+ return;
+ }
+
+ set_trash_state();
+}
+
+template <typename I>
+void TrashRemoveRequest<I>::set_trash_state() {
+ if (m_trash_image_spec.state == cls::rbd::TRASH_IMAGE_STATE_REMOVING) {
+ get_snap_context();
+ return;
+ }
+
+ dout(10) << dendl;
+
+ librados::ObjectWriteOperation op;
+ librbd::cls_client::trash_state_set(&op, m_image_id,
+ cls::rbd::TRASH_IMAGE_STATE_REMOVING,
+ cls::rbd::TRASH_IMAGE_STATE_NORMAL);
+
+ auto aio_comp = create_rados_callback<
+ TrashRemoveRequest<I>,
+ &TrashRemoveRequest<I>::handle_set_trash_state>(this);
+ int r = m_io_ctx.aio_operate(RBD_TRASH, aio_comp, &op);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void TrashRemoveRequest<I>::handle_set_trash_state(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r == -ENOENT) {
+ dout(10) << "image id " << m_image_id << " not in mirroring trash" << dendl;
+ finish(0);
+ return;
+ } else if (r < 0 && r != -EOPNOTSUPP) {
+ derr << "error setting trash image state for image id " << m_image_id
+ << ": " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ get_snap_context();
+}
+
+template <typename I>
+void TrashRemoveRequest<I>::get_snap_context() {
+ dout(10) << dendl;
+
+ librados::ObjectReadOperation op;
+ librbd::cls_client::get_snapcontext_start(&op);
+
+ std::string header_oid = librbd::util::header_name(m_image_id);
+
+ auto aio_comp = create_rados_callback<
+ TrashRemoveRequest<I>,
+ &TrashRemoveRequest<I>::handle_get_snap_context>(this);
+ m_out_bl.clear();
+ int r = m_io_ctx.aio_operate(header_oid, aio_comp, &op, &m_out_bl);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void TrashRemoveRequest<I>::handle_get_snap_context(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ ::SnapContext snapc;
+ if (r == 0) {
+ auto bl_it = m_out_bl.cbegin();
+ r = librbd::cls_client::get_snapcontext_finish(&bl_it, &snapc);
+ }
+ if (r < 0 && r != -ENOENT) {
+ derr << "error retrieving snapshot context for image "
+ << m_image_id << ": " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ m_has_snapshots = (!snapc.empty());
+ purge_snapshots();
+}
+
+template <typename I>
+void TrashRemoveRequest<I>::purge_snapshots() {
+ if (!m_has_snapshots) {
+ remove_image();
+ return;
+ }
+
+ dout(10) << dendl;
+ auto ctx = create_context_callback<
+ TrashRemoveRequest<I>,
+ &TrashRemoveRequest<I>::handle_purge_snapshots>(this);
+ auto req = SnapshotPurgeRequest<I>::create(m_io_ctx, m_image_id, ctx);
+ req->send();
+}
+
+template <typename I>
+void TrashRemoveRequest<I>::handle_purge_snapshots(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r == -EBUSY) {
+ dout(10) << "snapshots still in-use" << dendl;
+ *m_error_result = ERROR_RESULT_RETRY_IMMEDIATELY;
+ finish(r);
+ return;
+ } else if (r < 0) {
+ derr << "failed to purge image snapshots: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ remove_image();
+}
+
+template <typename I>
+void TrashRemoveRequest<I>::remove_image() {
+ dout(10) << dendl;
+
+ auto ctx = create_context_callback<
+ TrashRemoveRequest<I>,
+ &TrashRemoveRequest<I>::handle_remove_image>(this);
+ auto req = librbd::trash::RemoveRequest<I>::create(
+ m_io_ctx, m_image_id, m_op_work_queue, true, m_progress_ctx,
+ ctx);
+ req->send();
+}
+
+template <typename I>
+void TrashRemoveRequest<I>::handle_remove_image(int r) {
+ dout(10) << "r=" << r << dendl;
+ if (r == -ENOTEMPTY) {
+ // image must have clone v2 snapshot still associated to child
+ dout(10) << "snapshots still in-use" << dendl;
+ *m_error_result = ERROR_RESULT_RETRY_IMMEDIATELY;
+ finish(-EBUSY);
+ return;
+ }
+
+ if (r < 0 && r != -ENOENT) {
+ derr << "error removing image " << m_image_id << " "
+ << "(" << m_image_id << ") from local pool: "
+ << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ notify_trash_removed();
+}
+
+template <typename I>
+void TrashRemoveRequest<I>::notify_trash_removed() {
+ dout(10) << dendl;
+
+ Context *ctx = create_context_callback<
+ TrashRemoveRequest<I>,
+ &TrashRemoveRequest<I>::handle_notify_trash_removed>(this);
+ librbd::TrashWatcher<I>::notify_image_removed(m_io_ctx, m_image_id, ctx);
+}
+
+template <typename I>
+void TrashRemoveRequest<I>::handle_notify_trash_removed(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to notify trash watchers: " << cpp_strerror(r) << dendl;
+ }
+
+ finish(0);
+}
+
+template <typename I>
+void TrashRemoveRequest<I>::finish(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ m_on_finish->complete(r);
+ delete this;
+}
+
+} // namespace image_deleter
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_deleter::TrashRemoveRequest<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_deleter/TrashRemoveRequest.h b/src/tools/rbd_mirror/image_deleter/TrashRemoveRequest.h
new file mode 100644
index 000000000..b99736b33
--- /dev/null
+++ b/src/tools/rbd_mirror/image_deleter/TrashRemoveRequest.h
@@ -0,0 +1,117 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_DELETER_TRASH_REMOVE_REQUEST_H
+#define CEPH_RBD_MIRROR_IMAGE_DELETER_TRASH_REMOVE_REQUEST_H
+
+#include "include/rados/librados.hpp"
+#include "include/buffer.h"
+#include "cls/rbd/cls_rbd_types.h"
+#include "librbd/internal.h"
+#include "tools/rbd_mirror/image_deleter/Types.h"
+#include <string>
+#include <vector>
+
+class Context;
+class ContextWQ;
+namespace librbd {
+struct ImageCtx;
+namespace asio { struct ContextWQ; }
+} // namespace librbd
+
+namespace rbd {
+namespace mirror {
+namespace image_deleter {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class TrashRemoveRequest {
+public:
+ static TrashRemoveRequest* create(librados::IoCtx &io_ctx,
+ const std::string &image_id,
+ ErrorResult *error_result,
+ librbd::asio::ContextWQ *op_work_queue,
+ Context *on_finish) {
+ return new TrashRemoveRequest(io_ctx, image_id, error_result, op_work_queue,
+ on_finish);
+ }
+
+ TrashRemoveRequest(librados::IoCtx &io_ctx, const std::string &image_id,
+ ErrorResult *error_result,
+ librbd::asio::ContextWQ *op_work_queue,
+ Context *on_finish)
+ : m_io_ctx(io_ctx), m_image_id(image_id), m_error_result(error_result),
+ m_op_work_queue(op_work_queue), m_on_finish(on_finish) {
+ }
+
+ void send();
+
+private:
+ /*
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * GET_TRASH_IMAGE_SPEC
+ * |
+ * v
+ * SET_TRASH_STATE
+ * |
+ * v
+ * GET_SNAP_CONTEXT
+ * |
+ * v
+ * PURGE_SNAPSHOTS
+ * |
+ * v
+ * TRASH_REMOVE
+ * |
+ * v
+ * NOTIFY_TRASH_REMOVE
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+
+ librados::IoCtx &m_io_ctx;
+ std::string m_image_id;
+ ErrorResult *m_error_result;
+ librbd::asio::ContextWQ *m_op_work_queue;
+ Context *m_on_finish;
+
+ ceph::bufferlist m_out_bl;
+ cls::rbd::TrashImageSpec m_trash_image_spec;
+ bool m_has_snapshots = false;
+ librbd::NoOpProgressContext m_progress_ctx;
+
+ void get_trash_image_spec();
+ void handle_get_trash_image_spec(int r);
+
+ void set_trash_state();
+ void handle_set_trash_state(int r);
+
+ void get_snap_context();
+ void handle_get_snap_context(int r);
+
+ void purge_snapshots();
+ void handle_purge_snapshots(int r);
+
+ void remove_image();
+ void handle_remove_image(int r);
+
+ void notify_trash_removed();
+ void handle_notify_trash_removed(int r);
+
+ void finish(int r);
+
+};
+
+} // namespace image_deleter
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_deleter::TrashRemoveRequest<librbd::ImageCtx>;
+
+#endif // CEPH_RBD_MIRROR_IMAGE_DELETER_TRASH_REMOVE_REQUEST_H
diff --git a/src/tools/rbd_mirror/image_deleter/TrashWatcher.cc b/src/tools/rbd_mirror/image_deleter/TrashWatcher.cc
new file mode 100644
index 000000000..552d77e0e
--- /dev/null
+++ b/src/tools/rbd_mirror/image_deleter/TrashWatcher.cc
@@ -0,0 +1,384 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd_mirror/image_deleter/TrashWatcher.h"
+#include "include/rbd_types.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "common/Timer.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/Utils.h"
+#include "librbd/asio/ContextWQ.h"
+#include "tools/rbd_mirror/Threads.h"
+#include "tools/rbd_mirror/image_deleter/Types.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_deleter::TrashWatcher: " \
+ << this << " " << __func__ << ": "
+
+using librbd::util::create_context_callback;
+using librbd::util::create_rados_callback;
+
+namespace rbd {
+namespace mirror {
+namespace image_deleter {
+
+namespace {
+
+const size_t MAX_RETURN = 1024;
+
+} // anonymous namespace
+
+template <typename I>
+TrashWatcher<I>::TrashWatcher(librados::IoCtx &io_ctx, Threads<I> *threads,
+ TrashListener& trash_listener)
+ : librbd::TrashWatcher<I>(io_ctx, threads->work_queue),
+ m_io_ctx(io_ctx), m_threads(threads), m_trash_listener(trash_listener),
+ m_lock(ceph::make_mutex(librbd::util::unique_lock_name(
+ "rbd::mirror::image_deleter::TrashWatcher", this))) {
+}
+
+template <typename I>
+void TrashWatcher<I>::init(Context *on_finish) {
+ dout(5) << dendl;
+
+ {
+ std::lock_guard locker{m_lock};
+ m_on_init_finish = on_finish;
+
+ ceph_assert(!m_trash_list_in_progress);
+ m_trash_list_in_progress = true;
+ }
+
+ create_trash();
+}
+
+template <typename I>
+void TrashWatcher<I>::shut_down(Context *on_finish) {
+ dout(5) << dendl;
+
+ {
+ std::scoped_lock locker{m_threads->timer_lock, m_lock};
+
+ ceph_assert(!m_shutting_down);
+ m_shutting_down = true;
+ if (m_timer_ctx != nullptr) {
+ m_threads->timer->cancel_event(m_timer_ctx);
+ m_timer_ctx = nullptr;
+ }
+ }
+
+ auto ctx = new LambdaContext([this, on_finish](int r) {
+ unregister_watcher(on_finish);
+ });
+ m_async_op_tracker.wait_for_ops(ctx);
+}
+
+template <typename I>
+void TrashWatcher<I>::handle_image_added(const std::string &image_id,
+ const cls::rbd::TrashImageSpec& spec) {
+ dout(10) << "image_id=" << image_id << dendl;
+
+ std::lock_guard locker{m_lock};
+ add_image(image_id, spec);
+}
+
+template <typename I>
+void TrashWatcher<I>::handle_image_removed(const std::string &image_id) {
+ // ignore removals -- the image deleter will ignore -ENOENTs
+}
+
+template <typename I>
+void TrashWatcher<I>::handle_rewatch_complete(int r) {
+ dout(5) << "r=" << r << dendl;
+
+ if (r == -EBLOCKLISTED) {
+ dout(0) << "detected client is blocklisted" << dendl;
+ return;
+ } else if (r == -ENOENT) {
+ dout(5) << "trash directory deleted" << dendl;
+ } else if (r < 0) {
+ derr << "unexpected error re-registering trash directory watch: "
+ << cpp_strerror(r) << dendl;
+ }
+ schedule_trash_list(30);
+}
+
+template <typename I>
+void TrashWatcher<I>::create_trash() {
+ dout(20) << dendl;
+ {
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_trash_list_in_progress);
+ }
+
+ librados::ObjectWriteOperation op;
+ op.create(false);
+
+ m_async_op_tracker.start_op();
+ auto aio_comp = create_rados_callback<
+ TrashWatcher<I>, &TrashWatcher<I>::handle_create_trash>(this);
+ int r = m_io_ctx.aio_operate(RBD_TRASH, aio_comp, &op);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void TrashWatcher<I>::handle_create_trash(int r) {
+ dout(20) << "r=" << r << dendl;
+ {
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_trash_list_in_progress);
+ }
+
+ Context* on_init_finish = nullptr;
+ if (r == -EBLOCKLISTED || r == -ENOENT) {
+ if (r == -EBLOCKLISTED) {
+ dout(0) << "detected client is blocklisted" << dendl;
+ } else {
+ dout(0) << "detected pool no longer exists" << dendl;
+ }
+
+ std::lock_guard locker{m_lock};
+ std::swap(on_init_finish, m_on_init_finish);
+ m_trash_list_in_progress = false;
+ } else if (r < 0 && r != -EEXIST) {
+ derr << "failed to create trash object: " << cpp_strerror(r) << dendl;
+ {
+ std::lock_guard locker{m_lock};
+ m_trash_list_in_progress = false;
+ }
+
+ schedule_trash_list(30);
+ } else {
+ register_watcher();
+ }
+
+ m_async_op_tracker.finish_op();
+ if (on_init_finish != nullptr) {
+ on_init_finish->complete(r);
+ }
+}
+
+template <typename I>
+void TrashWatcher<I>::register_watcher() {
+ {
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_trash_list_in_progress);
+ }
+
+ // if the watch registration is in-flight, let the watcher
+ // handle the transition -- only (re-)register if it's not registered
+ if (!this->is_unregistered()) {
+ trash_list(true);
+ return;
+ }
+
+ // first time registering or the watch failed
+ dout(5) << dendl;
+ m_async_op_tracker.start_op();
+
+ Context *ctx = create_context_callback<
+ TrashWatcher, &TrashWatcher<I>::handle_register_watcher>(this);
+ this->register_watch(ctx);
+}
+
+template <typename I>
+void TrashWatcher<I>::handle_register_watcher(int r) {
+ dout(5) << "r=" << r << dendl;
+
+ {
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_trash_list_in_progress);
+ if (r < 0) {
+ m_trash_list_in_progress = false;
+ }
+ }
+
+ Context *on_init_finish = nullptr;
+ if (r >= 0) {
+ trash_list(true);
+ } else if (r == -EBLOCKLISTED) {
+ dout(0) << "detected client is blocklisted" << dendl;
+
+ std::lock_guard locker{m_lock};
+ std::swap(on_init_finish, m_on_init_finish);
+ } else {
+ derr << "unexpected error registering trash directory watch: "
+ << cpp_strerror(r) << dendl;
+ schedule_trash_list(10);
+ }
+
+ m_async_op_tracker.finish_op();
+ if (on_init_finish != nullptr) {
+ on_init_finish->complete(r);
+ }
+}
+
+template <typename I>
+void TrashWatcher<I>::unregister_watcher(Context* on_finish) {
+ dout(5) << dendl;
+
+ m_async_op_tracker.start_op();
+ Context *ctx = new LambdaContext([this, on_finish](int r) {
+ handle_unregister_watcher(r, on_finish);
+ });
+ this->unregister_watch(ctx);
+}
+
+template <typename I>
+void TrashWatcher<I>::handle_unregister_watcher(int r, Context* on_finish) {
+ dout(5) << "unregister_watcher: r=" << r << dendl;
+ if (r < 0) {
+ derr << "error unregistering watcher for trash directory: "
+ << cpp_strerror(r) << dendl;
+ }
+ m_async_op_tracker.finish_op();
+ on_finish->complete(0);
+}
+
+template <typename I>
+void TrashWatcher<I>::trash_list(bool initial_request) {
+ if (initial_request) {
+ m_async_op_tracker.start_op();
+ m_last_image_id = "";
+ }
+
+ dout(5) << "last_image_id=" << m_last_image_id << dendl;
+
+ {
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_trash_list_in_progress);
+ }
+
+ librados::ObjectReadOperation op;
+ librbd::cls_client::trash_list_start(&op, m_last_image_id, MAX_RETURN);
+
+ librados::AioCompletion *aio_comp = create_rados_callback<
+ TrashWatcher<I>, &TrashWatcher<I>::handle_trash_list>(this);
+ m_out_bl.clear();
+ int r = m_io_ctx.aio_operate(RBD_TRASH, aio_comp, &op, &m_out_bl);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void TrashWatcher<I>::handle_trash_list(int r) {
+ dout(5) << "r=" << r << dendl;
+
+ std::map<std::string, cls::rbd::TrashImageSpec> images;
+ if (r >= 0) {
+ auto bl_it = m_out_bl.cbegin();
+ r = librbd::cls_client::trash_list_finish(&bl_it, &images);
+ }
+
+ Context *on_init_finish = nullptr;
+ {
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_trash_list_in_progress);
+ if (r >= 0) {
+ for (auto& image : images) {
+ add_image(image.first, image.second);
+ }
+ } else if (r == -ENOENT) {
+ r = 0;
+ }
+
+ if (r == -EBLOCKLISTED) {
+ dout(0) << "detected client is blocklisted during trash refresh" << dendl;
+ m_trash_list_in_progress = false;
+ std::swap(on_init_finish, m_on_init_finish);
+ } else if (r >= 0 && images.size() < MAX_RETURN) {
+ m_trash_list_in_progress = false;
+ std::swap(on_init_finish, m_on_init_finish);
+ } else if (r < 0) {
+ m_trash_list_in_progress = false;
+ }
+ }
+
+ if (r >= 0 && images.size() == MAX_RETURN) {
+ m_last_image_id = images.rbegin()->first;
+ trash_list(false);
+ return;
+ } else if (r < 0 && r != -EBLOCKLISTED) {
+ derr << "failed to retrieve trash directory: " << cpp_strerror(r) << dendl;
+ schedule_trash_list(10);
+ }
+
+ m_async_op_tracker.finish_op();
+ if (on_init_finish != nullptr) {
+ on_init_finish->complete(r);
+ }
+}
+
+template <typename I>
+void TrashWatcher<I>::schedule_trash_list(double interval) {
+ std::scoped_lock locker{m_threads->timer_lock, m_lock};
+ if (m_shutting_down || m_trash_list_in_progress || m_timer_ctx != nullptr) {
+ if (m_trash_list_in_progress && !m_deferred_trash_list) {
+ dout(5) << "deferring refresh until in-flight refresh completes" << dendl;
+ m_deferred_trash_list = true;
+ }
+ return;
+ }
+
+ dout(5) << dendl;
+ m_timer_ctx = m_threads->timer->add_event_after(
+ interval,
+ new LambdaContext([this](int r) {
+ process_trash_list();
+ }));
+}
+
+template <typename I>
+void TrashWatcher<I>::process_trash_list() {
+ dout(5) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock));
+ ceph_assert(m_timer_ctx != nullptr);
+ m_timer_ctx = nullptr;
+
+ {
+ std::lock_guard locker{m_lock};
+ ceph_assert(!m_trash_list_in_progress);
+ m_trash_list_in_progress = true;
+ }
+
+ // execute outside of the timer's lock
+ m_async_op_tracker.start_op();
+ Context *ctx = new LambdaContext([this](int r) {
+ create_trash();
+ m_async_op_tracker.finish_op();
+ });
+ m_threads->work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+void TrashWatcher<I>::add_image(const std::string& image_id,
+ const cls::rbd::TrashImageSpec& spec) {
+ if (spec.source != cls::rbd::TRASH_IMAGE_SOURCE_MIRRORING) {
+ return;
+ }
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+ auto& deferment_end_time = spec.deferment_end_time;
+ dout(10) << "image_id=" << image_id << ", "
+ << "deferment_end_time=" << deferment_end_time << dendl;
+
+ m_async_op_tracker.start_op();
+ auto ctx = new LambdaContext([this, image_id, deferment_end_time](int r) {
+ m_trash_listener.handle_trash_image(image_id,
+ deferment_end_time.to_real_time());
+ m_async_op_tracker.finish_op();
+ });
+ m_threads->work_queue->queue(ctx, 0);
+}
+
+} // namespace image_deleter;
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_deleter::TrashWatcher<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_deleter/TrashWatcher.h b/src/tools/rbd_mirror/image_deleter/TrashWatcher.h
new file mode 100644
index 000000000..e818a102c
--- /dev/null
+++ b/src/tools/rbd_mirror/image_deleter/TrashWatcher.h
@@ -0,0 +1,139 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_DELETE_TRASH_WATCHER_H
+#define CEPH_RBD_MIRROR_IMAGE_DELETE_TRASH_WATCHER_H
+
+#include "include/rados/librados.hpp"
+#include "common/AsyncOpTracker.h"
+#include "common/ceph_mutex.h"
+#include "librbd/TrashWatcher.h"
+#include <set>
+#include <string>
+
+struct Context;
+namespace librbd { struct ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+
+template <typename> struct Threads;
+
+namespace image_deleter {
+
+struct TrashListener;
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class TrashWatcher : public librbd::TrashWatcher<ImageCtxT> {
+public:
+ static TrashWatcher* create(librados::IoCtx &io_ctx,
+ Threads<ImageCtxT> *threads,
+ TrashListener& trash_listener) {
+ return new TrashWatcher(io_ctx, threads, trash_listener);
+ }
+
+ TrashWatcher(librados::IoCtx &io_ctx, Threads<ImageCtxT> *threads,
+ TrashListener& trash_listener);
+ TrashWatcher(const TrashWatcher&) = delete;
+ TrashWatcher& operator=(const TrashWatcher&) = delete;
+
+ void init(Context *on_finish);
+ void shut_down(Context *on_finish);
+
+protected:
+ void handle_image_added(const std::string &image_id,
+ const cls::rbd::TrashImageSpec& spec) override;
+
+ void handle_image_removed(const std::string &image_id) override;
+
+ void handle_rewatch_complete(int r) override;
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * INIT
+ * |
+ * v
+ * CREATE_TRASH
+ * |
+ * v
+ * REGISTER_WATCHER
+ * |
+ * |/--------------------------------\
+ * | |
+ * |/---------\ |
+ * | | |
+ * v | (more images) |
+ * TRASH_LIST ---/ |
+ * | |
+ * |/----------------------------\ |
+ * | | |
+ * v | |
+ * <idle> --\ | |
+ * | | | |
+ * | |\---> IMAGE_ADDED -----/ |
+ * | | |
+ * | \----> WATCH_ERROR ---------/
+ * v
+ * SHUT_DOWN
+ * |
+ * v
+ * UNREGISTER_WATCHER
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+
+ librados::IoCtx m_io_ctx;
+ Threads<ImageCtxT> *m_threads;
+ TrashListener& m_trash_listener;
+
+ std::string m_last_image_id;
+ bufferlist m_out_bl;
+
+ mutable ceph::mutex m_lock;
+
+ Context *m_on_init_finish = nullptr;
+ Context *m_timer_ctx = nullptr;
+
+ AsyncOpTracker m_async_op_tracker;
+ bool m_trash_list_in_progress = false;
+ bool m_deferred_trash_list = false;
+ bool m_shutting_down = false;
+
+ void register_watcher();
+ void handle_register_watcher(int r);
+
+ void create_trash();
+ void handle_create_trash(int r);
+
+ void unregister_watcher(Context* on_finish);
+ void handle_unregister_watcher(int r, Context* on_finish);
+
+ void trash_list(bool initial_request);
+ void handle_trash_list(int r);
+
+ void schedule_trash_list(double interval);
+ void process_trash_list();
+
+ void get_mirror_uuid();
+ void handle_get_mirror_uuid(int r);
+
+ void add_image(const std::string& image_id,
+ const cls::rbd::TrashImageSpec& spec);
+
+};
+
+} // namespace image_deleter
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_deleter::TrashWatcher<librbd::ImageCtx>;
+
+#endif // CEPH_RBD_MIRROR_IMAGE_DELETE_TRASH_WATCHER_H
diff --git a/src/tools/rbd_mirror/image_deleter/Types.h b/src/tools/rbd_mirror/image_deleter/Types.h
new file mode 100644
index 000000000..1c70b7e14
--- /dev/null
+++ b/src/tools/rbd_mirror/image_deleter/Types.h
@@ -0,0 +1,54 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_DELETER_TYPES_H
+#define CEPH_RBD_MIRROR_IMAGE_DELETER_TYPES_H
+
+#include "include/Context.h"
+#include "librbd/journal/Policy.h"
+#include <string>
+
+struct utime_t;
+
+namespace rbd {
+namespace mirror {
+namespace image_deleter {
+
+enum ErrorResult {
+ ERROR_RESULT_COMPLETE,
+ ERROR_RESULT_RETRY,
+ ERROR_RESULT_RETRY_IMMEDIATELY
+};
+
+struct TrashListener {
+ TrashListener() {
+ }
+ TrashListener(const TrashListener&) = delete;
+ TrashListener& operator=(const TrashListener&) = delete;
+
+ virtual ~TrashListener() {
+ }
+
+ virtual void handle_trash_image(const std::string& image_id,
+ const ceph::real_clock::time_point& deferment_end_time) = 0;
+
+};
+
+struct JournalPolicy : public librbd::journal::Policy {
+ bool append_disabled() const override {
+ return true;
+ }
+ bool journal_disabled() const override {
+ return true;
+ }
+
+ void allocate_tag_on_lock(Context *on_finish) override {
+ on_finish->complete(0);
+ }
+};
+
+} // namespace image_deleter
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_IMAGE_DELETER_TYPES_H
diff --git a/src/tools/rbd_mirror/image_map/LoadRequest.cc b/src/tools/rbd_mirror/image_map/LoadRequest.cc
new file mode 100644
index 000000000..46564a160
--- /dev/null
+++ b/src/tools/rbd_mirror/image_map/LoadRequest.cc
@@ -0,0 +1,174 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "common/debug.h"
+#include "common/errno.h"
+
+#include "librbd/Utils.h"
+#include "include/rbd_types.h"
+#include "cls/rbd/cls_rbd_client.h"
+
+#include "UpdateRequest.h"
+#include "LoadRequest.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_map::LoadRequest: " \
+ << this << " " << __func__
+
+namespace rbd {
+namespace mirror {
+namespace image_map {
+
+static const uint32_t MAX_RETURN = 1024;
+
+using librbd::util::create_rados_callback;
+using librbd::util::create_context_callback;
+
+template<typename I>
+LoadRequest<I>::LoadRequest(librados::IoCtx &ioctx,
+ std::map<std::string, cls::rbd::MirrorImageMap> *image_mapping,
+ Context *on_finish)
+ : m_ioctx(ioctx),
+ m_image_mapping(image_mapping),
+ m_on_finish(on_finish) {
+}
+
+template<typename I>
+void LoadRequest<I>::send() {
+ dout(20) << dendl;
+
+ image_map_list();
+}
+
+template<typename I>
+void LoadRequest<I>::image_map_list() {
+ dout(20) << dendl;
+
+ librados::ObjectReadOperation op;
+ librbd::cls_client::mirror_image_map_list_start(&op, m_start_after, MAX_RETURN);
+
+ librados::AioCompletion *aio_comp = create_rados_callback<
+ LoadRequest, &LoadRequest::handle_image_map_list>(this);
+
+ m_out_bl.clear();
+ int r = m_ioctx.aio_operate(RBD_MIRROR_LEADER, aio_comp, &op, &m_out_bl);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template<typename I>
+void LoadRequest<I>::handle_image_map_list(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ std::map<std::string, cls::rbd::MirrorImageMap> image_mapping;
+ if (r == 0) {
+ auto it = m_out_bl.cbegin();
+ r = librbd::cls_client::mirror_image_map_list_finish(&it, &image_mapping);
+ }
+
+ if (r < 0) {
+ derr << ": failed to get image map: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ m_image_mapping->insert(image_mapping.begin(), image_mapping.end());
+
+ if (image_mapping.size() == MAX_RETURN) {
+ m_start_after = image_mapping.rbegin()->first;
+ image_map_list();
+ return;
+ }
+
+ mirror_image_list();
+}
+
+template<typename I>
+void LoadRequest<I>::mirror_image_list() {
+ dout(20) << dendl;
+
+ librados::ObjectReadOperation op;
+ librbd::cls_client::mirror_image_list_start(&op, m_start_after, MAX_RETURN);
+
+ m_out_bl.clear();
+ librados::AioCompletion *aio_comp = create_rados_callback<
+ LoadRequest<I>,
+ &LoadRequest<I>::handle_mirror_image_list>(this);
+ int r = m_ioctx.aio_operate(RBD_MIRRORING, aio_comp, &op, &m_out_bl);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template<typename I>
+void LoadRequest<I>::handle_mirror_image_list(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ std::map<std::string, std::string> ids;
+ if (r == 0) {
+ auto it = m_out_bl.cbegin();
+ r = librbd::cls_client::mirror_image_list_finish(&it, &ids);
+ }
+
+ if (r < 0 && r != -ENOENT) {
+ derr << "failed to list mirrored images: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ for (auto &id : ids) {
+ m_global_image_ids.emplace(id.second);
+ }
+
+ if (ids.size() == MAX_RETURN) {
+ m_start_after = ids.rbegin()->first;
+ mirror_image_list();
+ return;
+ }
+
+ cleanup_image_map();
+}
+
+template<typename I>
+void LoadRequest<I>::cleanup_image_map() {
+ dout(20) << dendl;
+
+ std::set<std::string> map_removals;
+
+ auto it = m_image_mapping->begin();
+ while (it != m_image_mapping->end()) {
+ if (m_global_image_ids.count(it->first) > 0) {
+ ++it;
+ continue;
+ }
+ map_removals.emplace(it->first);
+ it = m_image_mapping->erase(it);
+ }
+
+ if (map_removals.size() == 0) {
+ finish(0);
+ return;
+ }
+
+ auto ctx = create_context_callback<
+ LoadRequest<I>,
+ &LoadRequest<I>::finish>(this);
+ image_map::UpdateRequest<I> *req = image_map::UpdateRequest<I>::create(
+ m_ioctx, {}, std::move(map_removals), ctx);
+ req->send();
+}
+
+template<typename I>
+void LoadRequest<I>::finish(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ m_on_finish->complete(r);
+ delete this;
+}
+
+} // namespace image_map
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_map::LoadRequest<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_map/LoadRequest.h b/src/tools/rbd_mirror/image_map/LoadRequest.h
new file mode 100644
index 000000000..9b1be9685
--- /dev/null
+++ b/src/tools/rbd_mirror/image_map/LoadRequest.h
@@ -0,0 +1,77 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_MAP_LOAD_REQUEST_H
+#define CEPH_RBD_MIRROR_IMAGE_MAP_LOAD_REQUEST_H
+
+#include "cls/rbd/cls_rbd_types.h"
+#include "include/rados/librados.hpp"
+
+class Context;
+
+namespace librbd { class ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+namespace image_map {
+
+template<typename ImageCtxT = librbd::ImageCtx>
+class LoadRequest {
+public:
+ static LoadRequest *create(librados::IoCtx &ioctx,
+ std::map<std::string, cls::rbd::MirrorImageMap> *image_mapping,
+ Context *on_finish) {
+ return new LoadRequest(ioctx, image_mapping, on_finish);
+ }
+
+ void send();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * | . . . . . . . .
+ * v v . MAX_RETURN
+ * IMAGE_MAP_LIST. . . . . . .
+ * |
+ * v
+ * MIRROR_IMAGE_LIST
+ * |
+ * v
+ * CLEANUP_IMAGE_MAP
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+ LoadRequest(librados::IoCtx &ioctx,
+ std::map<std::string, cls::rbd::MirrorImageMap> *image_mapping,
+ Context *on_finish);
+
+ librados::IoCtx &m_ioctx;
+ std::map<std::string, cls::rbd::MirrorImageMap> *m_image_mapping;
+ Context *m_on_finish;
+
+ std::set<std::string> m_global_image_ids;
+
+ bufferlist m_out_bl;
+ std::string m_start_after;
+
+ void image_map_list();
+ void handle_image_map_list(int r);
+
+ void mirror_image_list();
+ void handle_mirror_image_list(int r);
+
+ void cleanup_image_map();
+
+ void finish(int r);
+};
+
+} // namespace image_map
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_IMAGE_MAP_LOAD_REQUEST_H
diff --git a/src/tools/rbd_mirror/image_map/Policy.cc b/src/tools/rbd_mirror/image_map/Policy.cc
new file mode 100644
index 000000000..62fbd12dc
--- /dev/null
+++ b/src/tools/rbd_mirror/image_map/Policy.cc
@@ -0,0 +1,407 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "common/debug.h"
+#include "common/errno.h"
+
+#include "librbd/Utils.h"
+#include "Policy.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_map::Policy: " << this \
+ << " " << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+namespace image_map {
+
+namespace {
+
+bool is_instance_action(ActionType action_type) {
+ switch (action_type) {
+ case ACTION_TYPE_ACQUIRE:
+ case ACTION_TYPE_RELEASE:
+ return true;
+ case ACTION_TYPE_NONE:
+ case ACTION_TYPE_MAP_UPDATE:
+ case ACTION_TYPE_MAP_REMOVE:
+ break;
+ }
+ return false;
+}
+
+} // anonymous namespace
+
+using ::operator<<;
+using librbd::util::unique_lock_name;
+
+Policy::Policy(librados::IoCtx &ioctx)
+ : m_ioctx(ioctx),
+ m_map_lock(ceph::make_shared_mutex(
+ unique_lock_name("rbd::mirror::image_map::Policy::m_map_lock", this))) {
+
+ // map should at least have once instance
+ std::string instance_id = stringify(ioctx.get_instance_id());
+ m_map.emplace(instance_id, std::set<std::string>{});
+}
+
+void Policy::init(
+ const std::map<std::string, cls::rbd::MirrorImageMap> &image_mapping) {
+ dout(20) << dendl;
+
+ std::unique_lock map_lock{m_map_lock};
+ for (auto& it : image_mapping) {
+ ceph_assert(!it.second.instance_id.empty());
+ auto map_result = m_map[it.second.instance_id].emplace(it.first);
+ ceph_assert(map_result.second);
+
+ auto image_state_result = m_image_states.emplace(
+ it.first, ImageState{it.second.instance_id, it.second.mapped_time});
+ ceph_assert(image_state_result.second);
+
+ // ensure we (re)send image acquire actions to the instance
+ auto& image_state = image_state_result.first->second;
+ auto start_action = set_state(&image_state,
+ StateTransition::STATE_INITIALIZING, false);
+ ceph_assert(start_action);
+ }
+}
+
+LookupInfo Policy::lookup(const std::string &global_image_id) {
+ dout(20) << "global_image_id=" << global_image_id << dendl;
+
+ std::shared_lock map_lock{m_map_lock};
+ LookupInfo info;
+
+ auto it = m_image_states.find(global_image_id);
+ if (it != m_image_states.end()) {
+ info.instance_id = it->second.instance_id;
+ info.mapped_time = it->second.mapped_time;
+ }
+ return info;
+}
+
+bool Policy::add_image(const std::string &global_image_id) {
+ dout(5) << "global_image_id=" << global_image_id << dendl;
+
+ std::unique_lock map_lock{m_map_lock};
+ auto image_state_result = m_image_states.emplace(global_image_id,
+ ImageState{});
+ auto& image_state = image_state_result.first->second;
+ if (image_state.state == StateTransition::STATE_INITIALIZING) {
+ // avoid duplicate acquire notifications upon leader startup
+ return false;
+ }
+
+ return set_state(&image_state, StateTransition::STATE_ASSOCIATING, false);
+}
+
+bool Policy::remove_image(const std::string &global_image_id) {
+ dout(5) << "global_image_id=" << global_image_id << dendl;
+
+ std::unique_lock map_lock{m_map_lock};
+ auto it = m_image_states.find(global_image_id);
+ if (it == m_image_states.end()) {
+ return false;
+ }
+
+ auto& image_state = it->second;
+ return set_state(&image_state, StateTransition::STATE_DISSOCIATING, false);
+}
+
+void Policy::add_instances(const InstanceIds &instance_ids,
+ GlobalImageIds* global_image_ids) {
+ dout(5) << "instance_ids=" << instance_ids << dendl;
+
+ std::unique_lock map_lock{m_map_lock};
+ for (auto& instance : instance_ids) {
+ ceph_assert(!instance.empty());
+ m_map.emplace(instance, std::set<std::string>{});
+ }
+
+ // post-failover, remove any dead instances and re-shuffle their images
+ if (m_initial_update) {
+ dout(5) << "initial instance update" << dendl;
+ m_initial_update = false;
+
+ std::set<std::string> alive_instances(instance_ids.begin(),
+ instance_ids.end());
+ InstanceIds dead_instances;
+ for (auto& map_pair : m_map) {
+ if (alive_instances.find(map_pair.first) == alive_instances.end()) {
+ dead_instances.push_back(map_pair.first);
+ }
+ }
+
+ if (!dead_instances.empty()) {
+ remove_instances(m_map_lock, dead_instances, global_image_ids);
+ }
+ }
+
+ GlobalImageIds shuffle_global_image_ids;
+ do_shuffle_add_instances(m_map, m_image_states.size(), &shuffle_global_image_ids);
+ dout(5) << "shuffling global_image_ids=[" << shuffle_global_image_ids
+ << "]" << dendl;
+ for (auto& global_image_id : shuffle_global_image_ids) {
+ auto it = m_image_states.find(global_image_id);
+ ceph_assert(it != m_image_states.end());
+
+ auto& image_state = it->second;
+ if (set_state(&image_state, StateTransition::STATE_SHUFFLING, false)) {
+ global_image_ids->emplace(global_image_id);
+ }
+ }
+}
+
+void Policy::remove_instances(const InstanceIds &instance_ids,
+ GlobalImageIds* global_image_ids) {
+ std::unique_lock map_lock{m_map_lock};
+ remove_instances(m_map_lock, instance_ids, global_image_ids);
+}
+
+void Policy::remove_instances(const ceph::shared_mutex& lock,
+ const InstanceIds &instance_ids,
+ GlobalImageIds* global_image_ids) {
+ ceph_assert(ceph_mutex_is_wlocked(m_map_lock));
+ dout(5) << "instance_ids=" << instance_ids << dendl;
+
+ for (auto& instance_id : instance_ids) {
+ auto map_it = m_map.find(instance_id);
+ if (map_it == m_map.end()) {
+ continue;
+ }
+
+ auto& instance_global_image_ids = map_it->second;
+ if (instance_global_image_ids.empty()) {
+ m_map.erase(map_it);
+ continue;
+ }
+
+ m_dead_instances.insert(instance_id);
+ dout(5) << "force shuffling: instance_id=" << instance_id << ", "
+ << "global_image_ids=[" << instance_global_image_ids << "]"<< dendl;
+ for (auto& global_image_id : instance_global_image_ids) {
+ auto it = m_image_states.find(global_image_id);
+ ceph_assert(it != m_image_states.end());
+
+ auto& image_state = it->second;
+ if (is_state_scheduled(image_state,
+ StateTransition::STATE_DISSOCIATING)) {
+ // don't shuffle images that no longer exist
+ continue;
+ }
+
+ if (set_state(&image_state, StateTransition::STATE_SHUFFLING, true)) {
+ global_image_ids->emplace(global_image_id);
+ }
+ }
+ }
+}
+
+ActionType Policy::start_action(const std::string &global_image_id) {
+ std::unique_lock map_lock{m_map_lock};
+
+ auto it = m_image_states.find(global_image_id);
+ ceph_assert(it != m_image_states.end());
+
+ auto& image_state = it->second;
+ auto& transition = image_state.transition;
+ ceph_assert(transition.action_type != ACTION_TYPE_NONE);
+
+ dout(5) << "global_image_id=" << global_image_id << ", "
+ << "state=" << image_state.state << ", "
+ << "action_type=" << transition.action_type << dendl;
+ if (transition.start_policy_action) {
+ execute_policy_action(global_image_id, &image_state,
+ *transition.start_policy_action);
+ transition.start_policy_action = boost::none;
+ }
+ return transition.action_type;
+}
+
+bool Policy::finish_action(const std::string &global_image_id, int r) {
+ std::unique_lock map_lock{m_map_lock};
+
+ auto it = m_image_states.find(global_image_id);
+ ceph_assert(it != m_image_states.end());
+
+ auto& image_state = it->second;
+ auto& transition = image_state.transition;
+ dout(5) << "global_image_id=" << global_image_id << ", "
+ << "state=" << image_state.state << ", "
+ << "action_type=" << transition.action_type << ", "
+ << "r=" << r << dendl;
+
+ // retry on failure unless it's an RPC message to an instance that is dead
+ if (r < 0 &&
+ (!is_instance_action(image_state.transition.action_type) ||
+ image_state.instance_id == UNMAPPED_INSTANCE_ID ||
+ m_dead_instances.find(image_state.instance_id) ==
+ m_dead_instances.end())) {
+ return true;
+ }
+
+ auto finish_policy_action = transition.finish_policy_action;
+ StateTransition::transit(image_state.state, &image_state.transition);
+ if (transition.finish_state) {
+ // in-progress state machine complete
+ ceph_assert(StateTransition::is_idle(*transition.finish_state));
+ image_state.state = *transition.finish_state;
+ image_state.transition = {};
+ }
+
+ if (StateTransition::is_idle(image_state.state) && image_state.next_state) {
+ // advance to pending state machine
+ bool start_action = set_state(&image_state, *image_state.next_state, false);
+ ceph_assert(start_action);
+ }
+
+ // image state may get purged in execute_policy_action()
+ bool pending_action = image_state.transition.action_type != ACTION_TYPE_NONE;
+ if (finish_policy_action) {
+ execute_policy_action(global_image_id, &image_state, *finish_policy_action);
+ }
+
+ return pending_action;
+}
+
+void Policy::execute_policy_action(
+ const std::string& global_image_id, ImageState* image_state,
+ StateTransition::PolicyAction policy_action) {
+ dout(5) << "global_image_id=" << global_image_id << ", "
+ << "policy_action=" << policy_action << dendl;
+
+ switch (policy_action) {
+ case StateTransition::POLICY_ACTION_MAP:
+ map(global_image_id, image_state);
+ break;
+ case StateTransition::POLICY_ACTION_UNMAP:
+ unmap(global_image_id, image_state);
+ break;
+ case StateTransition::POLICY_ACTION_REMOVE:
+ if (image_state->state == StateTransition::STATE_UNASSOCIATED) {
+ ceph_assert(image_state->instance_id == UNMAPPED_INSTANCE_ID);
+ ceph_assert(!image_state->next_state);
+ m_image_states.erase(global_image_id);
+ }
+ break;
+ }
+}
+
+void Policy::map(const std::string& global_image_id, ImageState* image_state) {
+ ceph_assert(ceph_mutex_is_wlocked(m_map_lock));
+
+ std::string instance_id = image_state->instance_id;
+ if (instance_id != UNMAPPED_INSTANCE_ID && !is_dead_instance(instance_id)) {
+ return;
+ }
+ if (is_dead_instance(instance_id)) {
+ unmap(global_image_id, image_state);
+ }
+
+ instance_id = do_map(m_map, global_image_id);
+ ceph_assert(!instance_id.empty());
+ dout(5) << "global_image_id=" << global_image_id << ", "
+ << "instance_id=" << instance_id << dendl;
+
+ image_state->instance_id = instance_id;
+ image_state->mapped_time = ceph_clock_now();
+
+ auto ins = m_map[instance_id].emplace(global_image_id);
+ ceph_assert(ins.second);
+}
+
+void Policy::unmap(const std::string &global_image_id,
+ ImageState* image_state) {
+ ceph_assert(ceph_mutex_is_wlocked(m_map_lock));
+
+ std::string instance_id = image_state->instance_id;
+ if (instance_id == UNMAPPED_INSTANCE_ID) {
+ return;
+ }
+
+ dout(5) << "global_image_id=" << global_image_id << ", "
+ << "instance_id=" << instance_id << dendl;
+
+ ceph_assert(!instance_id.empty());
+ m_map[instance_id].erase(global_image_id);
+ image_state->instance_id = UNMAPPED_INSTANCE_ID;
+ image_state->mapped_time = {};
+
+ if (is_dead_instance(instance_id) && m_map[instance_id].empty()) {
+ dout(5) << "removing dead instance_id=" << instance_id << dendl;
+ m_map.erase(instance_id);
+ m_dead_instances.erase(instance_id);
+ }
+}
+
+bool Policy::is_image_shuffling(const std::string &global_image_id) {
+ ceph_assert(ceph_mutex_is_locked(m_map_lock));
+
+ auto it = m_image_states.find(global_image_id);
+ ceph_assert(it != m_image_states.end());
+ auto& image_state = it->second;
+
+ // avoid attempting to re-shuffle a pending shuffle
+ auto result = is_state_scheduled(image_state,
+ StateTransition::STATE_SHUFFLING);
+ dout(20) << "global_image_id=" << global_image_id << ", "
+ << "result=" << result << dendl;
+ return result;
+}
+
+bool Policy::can_shuffle_image(const std::string &global_image_id) {
+ ceph_assert(ceph_mutex_is_locked(m_map_lock));
+
+ CephContext *cct = reinterpret_cast<CephContext *>(m_ioctx.cct());
+ int migration_throttle = cct->_conf.get_val<uint64_t>(
+ "rbd_mirror_image_policy_migration_throttle");
+
+ auto it = m_image_states.find(global_image_id);
+ ceph_assert(it != m_image_states.end());
+ auto& image_state = it->second;
+
+ utime_t last_shuffled_time = image_state.mapped_time;
+
+ // idle images that haven't been recently remapped can shuffle
+ utime_t now = ceph_clock_now();
+ auto result = (StateTransition::is_idle(image_state.state) &&
+ ((migration_throttle <= 0) ||
+ (now - last_shuffled_time >= migration_throttle)));
+ dout(10) << "global_image_id=" << global_image_id << ", "
+ << "migration_throttle=" << migration_throttle << ", "
+ << "last_shuffled_time=" << last_shuffled_time << ", "
+ << "result=" << result << dendl;
+ return result;
+}
+
+bool Policy::set_state(ImageState* image_state, StateTransition::State state,
+ bool ignore_current_state) {
+ if (!ignore_current_state && image_state->state == state) {
+ image_state->next_state = boost::none;
+ return false;
+ } else if (StateTransition::is_idle(image_state->state)) {
+ image_state->state = state;
+ image_state->next_state = boost::none;
+
+ StateTransition::transit(image_state->state, &image_state->transition);
+ ceph_assert(image_state->transition.action_type != ACTION_TYPE_NONE);
+ ceph_assert(!image_state->transition.finish_state);
+ return true;
+ }
+
+ image_state->next_state = state;
+ return false;
+}
+
+bool Policy::is_state_scheduled(const ImageState& image_state,
+ StateTransition::State state) const {
+ return (image_state.state == state ||
+ (image_state.next_state && *image_state.next_state == state));
+}
+
+} // namespace image_map
+} // namespace mirror
+} // namespace rbd
diff --git a/src/tools/rbd_mirror/image_map/Policy.h b/src/tools/rbd_mirror/image_map/Policy.h
new file mode 100644
index 000000000..0617bb9ee
--- /dev/null
+++ b/src/tools/rbd_mirror/image_map/Policy.h
@@ -0,0 +1,123 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_MAP_POLICY_H
+#define CEPH_RBD_MIRROR_IMAGE_MAP_POLICY_H
+
+#include <map>
+#include <tuple>
+#include <boost/optional.hpp>
+
+#include "common/RWLock.h"
+#include "cls/rbd/cls_rbd_types.h"
+#include "include/rados/librados.hpp"
+#include "tools/rbd_mirror/image_map/StateTransition.h"
+#include "tools/rbd_mirror/image_map/Types.h"
+
+class Context;
+
+namespace rbd {
+namespace mirror {
+namespace image_map {
+
+class Policy {
+public:
+ Policy(librados::IoCtx &ioctx);
+
+ virtual ~Policy() {
+ }
+
+ // init -- called during initialization
+ void init(
+ const std::map<std::string, cls::rbd::MirrorImageMap> &image_mapping);
+
+ // lookup an image from the map
+ LookupInfo lookup(const std::string &global_image_id);
+
+ // add, remove
+ bool add_image(const std::string &global_image_id);
+ bool remove_image(const std::string &global_image_id);
+
+ // shuffle images when instances are added/removed
+ void add_instances(const InstanceIds &instance_ids,
+ GlobalImageIds* global_image_ids);
+ void remove_instances(const InstanceIds &instance_ids,
+ GlobalImageIds* global_image_ids);
+
+ ActionType start_action(const std::string &global_image_id);
+ bool finish_action(const std::string &global_image_id, int r);
+
+protected:
+ typedef std::map<std::string, std::set<std::string> > InstanceToImageMap;
+
+ bool is_dead_instance(const std::string instance_id) {
+ ceph_assert(ceph_mutex_is_locked(m_map_lock));
+ return m_dead_instances.find(instance_id) != m_dead_instances.end();
+ }
+
+ bool is_image_shuffling(const std::string &global_image_id);
+ bool can_shuffle_image(const std::string &global_image_id);
+
+ // map an image (global image id) to an instance
+ virtual std::string do_map(const InstanceToImageMap& map,
+ const std::string &global_image_id) = 0;
+
+ // shuffle images when instances are added/removed
+ virtual void do_shuffle_add_instances(
+ const InstanceToImageMap& map, size_t image_count,
+ std::set<std::string> *remap_global_image_ids) = 0;
+
+private:
+ struct ImageState {
+ std::string instance_id = UNMAPPED_INSTANCE_ID;
+ utime_t mapped_time;
+
+ ImageState() {}
+ ImageState(const std::string& instance_id, const utime_t& mapped_time)
+ : instance_id(instance_id), mapped_time(mapped_time) {
+ }
+
+ // active state and action
+ StateTransition::State state = StateTransition::STATE_UNASSOCIATED;
+ StateTransition::Transition transition;
+
+ // next scheduled state
+ boost::optional<StateTransition::State> next_state = boost::none;
+ };
+
+ typedef std::map<std::string, ImageState> ImageStates;
+
+ librados::IoCtx &m_ioctx;
+
+ ceph::shared_mutex m_map_lock; // protects m_map
+ InstanceToImageMap m_map; // instance_id -> global_id map
+
+ ImageStates m_image_states;
+ std::set<std::string> m_dead_instances;
+
+ bool m_initial_update = true;
+
+ void remove_instances(const ceph::shared_mutex& lock,
+ const InstanceIds &instance_ids,
+ GlobalImageIds* global_image_ids);
+
+ bool set_state(ImageState* image_state, StateTransition::State state,
+ bool ignore_current_state);
+
+ void execute_policy_action(const std::string& global_image_id,
+ ImageState* image_state,
+ StateTransition::PolicyAction policy_action);
+
+ void map(const std::string& global_image_id, ImageState* image_state);
+ void unmap(const std::string &global_image_id, ImageState* image_state);
+
+ bool is_state_scheduled(const ImageState& image_state,
+ StateTransition::State state) const;
+
+};
+
+} // namespace image_map
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_IMAGE_MAP_POLICY_H
diff --git a/src/tools/rbd_mirror/image_map/SimplePolicy.cc b/src/tools/rbd_mirror/image_map/SimplePolicy.cc
new file mode 100644
index 000000000..f26805819
--- /dev/null
+++ b/src/tools/rbd_mirror/image_map/SimplePolicy.cc
@@ -0,0 +1,89 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "common/debug.h"
+#include "common/errno.h"
+
+#include "SimplePolicy.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_map::SimplePolicy: " << this \
+ << " " << __func__ << ": "
+namespace rbd {
+namespace mirror {
+namespace image_map {
+
+SimplePolicy::SimplePolicy(librados::IoCtx &ioctx)
+ : Policy(ioctx) {
+}
+
+size_t SimplePolicy::calc_images_per_instance(const InstanceToImageMap& map,
+ size_t image_count) {
+ size_t nr_instances = 0;
+ for (auto const &it : map) {
+ if (!Policy::is_dead_instance(it.first)) {
+ ++nr_instances;
+ }
+ }
+ ceph_assert(nr_instances > 0);
+
+ size_t images_per_instance = image_count / nr_instances;
+ if (images_per_instance == 0) {
+ ++images_per_instance;
+ }
+
+ return images_per_instance;
+}
+
+void SimplePolicy::do_shuffle_add_instances(
+ const InstanceToImageMap& map, size_t image_count,
+ std::set<std::string> *remap_global_image_ids) {
+ uint64_t images_per_instance = calc_images_per_instance(map, image_count);
+ dout(5) << "images per instance=" << images_per_instance << dendl;
+
+ for (auto const &instance : map) {
+ if (instance.second.size() <= images_per_instance) {
+ continue;
+ }
+
+ auto it = instance.second.begin();
+ uint64_t cut_off = instance.second.size() - images_per_instance;
+
+ while (it != instance.second.end() && cut_off > 0) {
+ if (Policy::is_image_shuffling(*it)) {
+ --cut_off;
+ } else if (Policy::can_shuffle_image(*it)) {
+ --cut_off;
+ remap_global_image_ids->emplace(*it);
+ }
+
+ ++it;
+ }
+ }
+}
+
+std::string SimplePolicy::do_map(const InstanceToImageMap& map,
+ const std::string &global_image_id) {
+ auto min_it = map.end();
+ for (auto it = map.begin(); it != map.end(); ++it) {
+ ceph_assert(it->second.find(global_image_id) == it->second.end());
+ if (Policy::is_dead_instance(it->first)) {
+ continue;
+ } else if (min_it == map.end()) {
+ min_it = it;
+ } else if (it->second.size() < min_it->second.size()) {
+ min_it = it;
+ }
+ }
+
+ ceph_assert(min_it != map.end());
+ dout(20) << "global_image_id=" << global_image_id << " maps to instance_id="
+ << min_it->first << dendl;
+ return min_it->first;
+}
+
+} // namespace image_map
+} // namespace mirror
+} // namespace rbd
diff --git a/src/tools/rbd_mirror/image_map/SimplePolicy.h b/src/tools/rbd_mirror/image_map/SimplePolicy.h
new file mode 100644
index 000000000..ad2071b2c
--- /dev/null
+++ b/src/tools/rbd_mirror/image_map/SimplePolicy.h
@@ -0,0 +1,39 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_MAP_SIMPLE_POLICY_H
+#define CEPH_RBD_MIRROR_IMAGE_MAP_SIMPLE_POLICY_H
+
+#include "Policy.h"
+
+namespace rbd {
+namespace mirror {
+namespace image_map {
+
+class SimplePolicy : public Policy {
+public:
+ static SimplePolicy *create(librados::IoCtx &ioctx) {
+ return new SimplePolicy(ioctx);
+ }
+
+protected:
+ SimplePolicy(librados::IoCtx &ioctx);
+
+ std::string do_map(const InstanceToImageMap& map,
+ const std::string &global_image_id) override;
+
+ void do_shuffle_add_instances(
+ const InstanceToImageMap& map, size_t image_count,
+ std::set<std::string> *remap_global_image_ids) override;
+
+private:
+ size_t calc_images_per_instance(const InstanceToImageMap& map,
+ size_t image_count);
+
+};
+
+} // namespace image_map
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_IMAGE_MAP_SIMPLE_POLICY_H
diff --git a/src/tools/rbd_mirror/image_map/StateTransition.cc b/src/tools/rbd_mirror/image_map/StateTransition.cc
new file mode 100644
index 000000000..ec5f07ff9
--- /dev/null
+++ b/src/tools/rbd_mirror/image_map/StateTransition.cc
@@ -0,0 +1,94 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <ostream>
+#include "include/ceph_assert.h"
+#include "StateTransition.h"
+
+namespace rbd {
+namespace mirror {
+namespace image_map {
+
+std::ostream &operator<<(std::ostream &os,
+ const StateTransition::State &state) {
+ switch(state) {
+ case StateTransition::STATE_INITIALIZING:
+ os << "INITIALIZING";
+ break;
+ case StateTransition::STATE_ASSOCIATING:
+ os << "ASSOCIATING";
+ break;
+ case StateTransition::STATE_ASSOCIATED:
+ os << "ASSOCIATED";
+ break;
+ case StateTransition::STATE_SHUFFLING:
+ os << "SHUFFLING";
+ break;
+ case StateTransition::STATE_DISSOCIATING:
+ os << "DISSOCIATING";
+ break;
+ case StateTransition::STATE_UNASSOCIATED:
+ os << "UNASSOCIATED";
+ break;
+ }
+ return os;
+}
+
+std::ostream &operator<<(std::ostream &os,
+ const StateTransition::PolicyAction &policy_action) {
+ switch(policy_action) {
+ case StateTransition::POLICY_ACTION_MAP:
+ os << "MAP";
+ break;
+ case StateTransition::POLICY_ACTION_UNMAP:
+ os << "UNMAP";
+ break;
+ case StateTransition::POLICY_ACTION_REMOVE:
+ os << "REMOVE";
+ break;
+ }
+ return os;
+}
+
+const StateTransition::TransitionTable StateTransition::s_transition_table {
+ // state current_action Transition
+ // ---------------------------------------------------------------------------
+ {{STATE_INITIALIZING, ACTION_TYPE_NONE}, {ACTION_TYPE_ACQUIRE, {}, {},
+ {}}},
+ {{STATE_INITIALIZING, ACTION_TYPE_ACQUIRE}, {ACTION_TYPE_NONE, {}, {},
+ {STATE_ASSOCIATED}}},
+
+ {{STATE_ASSOCIATING, ACTION_TYPE_NONE}, {ACTION_TYPE_MAP_UPDATE,
+ {POLICY_ACTION_MAP}, {}, {}}},
+ {{STATE_ASSOCIATING, ACTION_TYPE_MAP_UPDATE}, {ACTION_TYPE_ACQUIRE, {}, {},
+ {}}},
+ {{STATE_ASSOCIATING, ACTION_TYPE_ACQUIRE}, {ACTION_TYPE_NONE, {}, {},
+ {STATE_ASSOCIATED}}},
+
+ {{STATE_DISSOCIATING, ACTION_TYPE_NONE}, {ACTION_TYPE_RELEASE, {},
+ {POLICY_ACTION_UNMAP}, {}}},
+ {{STATE_DISSOCIATING, ACTION_TYPE_RELEASE}, {ACTION_TYPE_MAP_REMOVE, {},
+ {POLICY_ACTION_REMOVE}, {}}},
+ {{STATE_DISSOCIATING, ACTION_TYPE_MAP_REMOVE}, {ACTION_TYPE_NONE, {},
+ {}, {STATE_UNASSOCIATED}}},
+
+ {{STATE_SHUFFLING, ACTION_TYPE_NONE}, {ACTION_TYPE_RELEASE, {},
+ {POLICY_ACTION_UNMAP}, {}}},
+ {{STATE_SHUFFLING, ACTION_TYPE_RELEASE}, {ACTION_TYPE_MAP_UPDATE,
+ {POLICY_ACTION_MAP}, {}, {}}},
+ {{STATE_SHUFFLING, ACTION_TYPE_MAP_UPDATE}, {ACTION_TYPE_ACQUIRE, {}, {},
+ {}}},
+ {{STATE_SHUFFLING, ACTION_TYPE_ACQUIRE}, {ACTION_TYPE_NONE, {}, {},
+ {STATE_ASSOCIATED}}}
+};
+
+void StateTransition::transit(State state, Transition* transition) {
+ auto it = s_transition_table.find({state, transition->action_type});
+ ceph_assert(it != s_transition_table.end());
+
+ *transition = it->second;
+}
+
+} // namespace image_map
+} // namespace mirror
+} // namespace rbd
diff --git a/src/tools/rbd_mirror/image_map/StateTransition.h b/src/tools/rbd_mirror/image_map/StateTransition.h
new file mode 100644
index 000000000..02a5ce4e9
--- /dev/null
+++ b/src/tools/rbd_mirror/image_map/StateTransition.h
@@ -0,0 +1,76 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_MAP_STATE_TRANSITION_H
+#define CEPH_RBD_MIRROR_IMAGE_MAP_STATE_TRANSITION_H
+
+#include "tools/rbd_mirror/image_map/Types.h"
+#include <boost/optional.hpp>
+#include <map>
+
+namespace rbd {
+namespace mirror {
+namespace image_map {
+
+class StateTransition {
+public:
+ enum State {
+ STATE_UNASSOCIATED,
+ STATE_INITIALIZING,
+ STATE_ASSOCIATING,
+ STATE_ASSOCIATED,
+ STATE_SHUFFLING,
+ STATE_DISSOCIATING
+ };
+
+ enum PolicyAction {
+ POLICY_ACTION_MAP,
+ POLICY_ACTION_UNMAP,
+ POLICY_ACTION_REMOVE
+ };
+
+ struct Transition {
+ // image map action
+ ActionType action_type = ACTION_TYPE_NONE;
+
+ // policy internal action
+ boost::optional<PolicyAction> start_policy_action;
+ boost::optional<PolicyAction> finish_policy_action;
+
+ // state machine complete
+ boost::optional<State> finish_state;
+
+ Transition() {
+ }
+ Transition(ActionType action_type,
+ const boost::optional<PolicyAction>& start_policy_action,
+ const boost::optional<PolicyAction>& finish_policy_action,
+ const boost::optional<State>& finish_state)
+ : action_type(action_type), start_policy_action(start_policy_action),
+ finish_policy_action(finish_policy_action), finish_state(finish_state) {
+ }
+ };
+
+ static bool is_idle(State state) {
+ return (state == STATE_UNASSOCIATED || state == STATE_ASSOCIATED);
+ }
+
+ static void transit(State state, Transition* transition);
+
+private:
+ typedef std::pair<State, ActionType> TransitionKey;
+ typedef std::map<TransitionKey, Transition> TransitionTable;
+
+ // image transition table
+ static const TransitionTable s_transition_table;
+};
+
+std::ostream &operator<<(std::ostream &os, const StateTransition::State &state);
+std::ostream &operator<<(std::ostream &os,
+ const StateTransition::PolicyAction &policy_action);
+
+} // namespace image_map
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_IMAGE_MAP_STATE_TRANSITION_H
diff --git a/src/tools/rbd_mirror/image_map/Types.cc b/src/tools/rbd_mirror/image_map/Types.cc
new file mode 100644
index 000000000..47de9c3cf
--- /dev/null
+++ b/src/tools/rbd_mirror/image_map/Types.cc
@@ -0,0 +1,138 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "Types.h"
+#include "include/ceph_assert.h"
+#include "include/stringify.h"
+#include "common/Formatter.h"
+#include <iostream>
+
+namespace rbd {
+namespace mirror {
+namespace image_map {
+
+const std::string UNMAPPED_INSTANCE_ID("");
+
+namespace {
+
+template <typename E>
+class GetTypeVisitor : public boost::static_visitor<E> {
+public:
+ template <typename T>
+ inline E operator()(const T&) const {
+ return T::TYPE;
+ }
+};
+
+class EncodeVisitor : public boost::static_visitor<void> {
+public:
+ explicit EncodeVisitor(bufferlist &bl) : m_bl(bl) {
+ }
+
+ template <typename T>
+ inline void operator()(const T& t) const {
+ using ceph::encode;
+ encode(static_cast<uint32_t>(T::TYPE), m_bl);
+ t.encode(m_bl);
+ }
+private:
+ bufferlist &m_bl;
+};
+
+class DecodeVisitor : public boost::static_visitor<void> {
+public:
+ DecodeVisitor(__u8 version, bufferlist::const_iterator &iter)
+ : m_version(version), m_iter(iter) {
+ }
+
+ template <typename T>
+ inline void operator()(T& t) const {
+ t.decode(m_version, m_iter);
+ }
+private:
+ __u8 m_version;
+ bufferlist::const_iterator &m_iter;
+};
+
+class DumpVisitor : public boost::static_visitor<void> {
+public:
+ explicit DumpVisitor(Formatter *formatter, const std::string &key)
+ : m_formatter(formatter), m_key(key) {}
+
+ template <typename T>
+ inline void operator()(const T& t) const {
+ auto type = T::TYPE;
+ m_formatter->dump_string(m_key.c_str(), stringify(type));
+ t.dump(m_formatter);
+ }
+private:
+ ceph::Formatter *m_formatter;
+ std::string m_key;
+};
+
+} // anonymous namespace
+
+PolicyMetaType PolicyData::get_policy_meta_type() const {
+ return boost::apply_visitor(GetTypeVisitor<PolicyMetaType>(), policy_meta);
+}
+
+void PolicyData::encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ boost::apply_visitor(EncodeVisitor(bl), policy_meta);
+ ENCODE_FINISH(bl);
+}
+
+void PolicyData::decode(bufferlist::const_iterator& it) {
+ DECODE_START(1, it);
+
+ uint32_t policy_meta_type;
+ decode(policy_meta_type, it);
+
+ switch (policy_meta_type) {
+ case POLICY_META_TYPE_NONE:
+ policy_meta = PolicyMetaNone();
+ break;
+ default:
+ policy_meta = PolicyMetaUnknown();
+ break;
+ }
+
+ boost::apply_visitor(DecodeVisitor(struct_v, it), policy_meta);
+ DECODE_FINISH(it);
+}
+
+void PolicyData::dump(Formatter *f) const {
+ boost::apply_visitor(DumpVisitor(f, "policy_meta_type"), policy_meta);
+}
+
+void PolicyData::generate_test_instances(std::list<PolicyData *> &o) {
+ o.push_back(new PolicyData(PolicyMetaNone()));
+}
+
+std::ostream &operator<<(std::ostream &os, const ActionType& action_type) {
+ switch (action_type) {
+ case ACTION_TYPE_NONE:
+ os << "NONE";
+ break;
+ case ACTION_TYPE_MAP_UPDATE:
+ os << "MAP_UPDATE";
+ break;
+ case ACTION_TYPE_MAP_REMOVE:
+ os << "MAP_REMOVE";
+ break;
+ case ACTION_TYPE_ACQUIRE:
+ os << "ACQUIRE";
+ break;
+ case ACTION_TYPE_RELEASE:
+ os << "RELEASE";
+ break;
+ default:
+ os << "UNKNOWN (" << static_cast<uint32_t>(action_type) << ")";
+ break;
+ }
+ return os;
+}
+
+} // namespace image_map
+} // namespace mirror
+} // namespace rbd
diff --git a/src/tools/rbd_mirror/image_map/Types.h b/src/tools/rbd_mirror/image_map/Types.h
new file mode 100644
index 000000000..5a97430f3
--- /dev/null
+++ b/src/tools/rbd_mirror/image_map/Types.h
@@ -0,0 +1,130 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_MAP_TYPES_H
+#define CEPH_RBD_MIRROR_IMAGE_MAP_TYPES_H
+
+#include <iosfwd>
+#include <map>
+#include <set>
+#include <string>
+#include <boost/variant.hpp>
+
+#include "include/buffer.h"
+#include "include/encoding.h"
+#include "include/utime.h"
+#include "tools/rbd_mirror/Types.h"
+
+struct Context;
+
+namespace ceph {
+class Formatter;
+}
+
+namespace rbd {
+namespace mirror {
+namespace image_map {
+
+extern const std::string UNMAPPED_INSTANCE_ID;
+
+struct Listener {
+ virtual ~Listener() {
+ }
+
+ virtual void acquire_image(const std::string &global_image_id,
+ const std::string &instance_id,
+ Context* on_finish) = 0;
+ virtual void release_image(const std::string &global_image_id,
+ const std::string &instance_id,
+ Context* on_finish) = 0;
+ virtual void remove_image(const std::string &mirror_uuid,
+ const std::string &global_image_id,
+ const std::string &instance_id,
+ Context* on_finish) = 0;
+};
+
+struct LookupInfo {
+ std::string instance_id = UNMAPPED_INSTANCE_ID;
+ utime_t mapped_time;
+};
+
+enum ActionType {
+ ACTION_TYPE_NONE,
+ ACTION_TYPE_MAP_UPDATE,
+ ACTION_TYPE_MAP_REMOVE,
+ ACTION_TYPE_ACQUIRE,
+ ACTION_TYPE_RELEASE
+};
+
+typedef std::vector<std::string> InstanceIds;
+typedef std::set<std::string> GlobalImageIds;
+typedef std::map<std::string, ActionType> ImageActionTypes;
+
+enum PolicyMetaType {
+ POLICY_META_TYPE_NONE = 0,
+};
+
+struct PolicyMetaNone {
+ static const PolicyMetaType TYPE = POLICY_META_TYPE_NONE;
+
+ PolicyMetaNone() {
+ }
+
+ void encode(bufferlist& bl) const {
+ }
+
+ void decode(__u8 version, bufferlist::const_iterator& it) {
+ }
+
+ void dump(Formatter *f) const {
+ }
+};
+
+struct PolicyMetaUnknown {
+ static const PolicyMetaType TYPE = static_cast<PolicyMetaType>(-1);
+
+ PolicyMetaUnknown() {
+ }
+
+ void encode(bufferlist& bl) const {
+ ceph_abort();
+ }
+
+ void decode(__u8 version, bufferlist::const_iterator& it) {
+ }
+
+ void dump(Formatter *f) const {
+ }
+};
+
+typedef boost::variant<PolicyMetaNone,
+ PolicyMetaUnknown> PolicyMeta;
+
+struct PolicyData {
+ PolicyData()
+ : policy_meta(PolicyMetaUnknown()) {
+ }
+ PolicyData(const PolicyMeta &policy_meta)
+ : policy_meta(policy_meta) {
+ }
+
+ PolicyMeta policy_meta;
+
+ PolicyMetaType get_policy_meta_type() const;
+
+ void encode(bufferlist& bl) const;
+ void decode(bufferlist::const_iterator& it);
+ void dump(Formatter *f) const;
+
+ static void generate_test_instances(std::list<PolicyData *> &o);
+};
+
+WRITE_CLASS_ENCODER(PolicyData);
+
+std::ostream &operator<<(std::ostream &os, const ActionType &action_type);
+
+} // namespace image_map
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_IMAGE_MAP_TYPES_H
diff --git a/src/tools/rbd_mirror/image_map/UpdateRequest.cc b/src/tools/rbd_mirror/image_map/UpdateRequest.cc
new file mode 100644
index 000000000..799c5670f
--- /dev/null
+++ b/src/tools/rbd_mirror/image_map/UpdateRequest.cc
@@ -0,0 +1,100 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "common/debug.h"
+#include "common/errno.h"
+
+#include "librbd/Utils.h"
+#include "include/rbd_types.h"
+#include "cls/rbd/cls_rbd_client.h"
+
+#include "UpdateRequest.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_map::UpdateRequest: " \
+ << this << " " << __func__
+
+namespace rbd {
+namespace mirror {
+namespace image_map {
+
+using librbd::util::create_rados_callback;
+
+static const uint32_t MAX_UPDATE = 256;
+
+template <typename I>
+UpdateRequest<I>::UpdateRequest(librados::IoCtx &ioctx,
+ std::map<std::string, cls::rbd::MirrorImageMap> &&update_mapping,
+ std::set<std::string> &&remove_global_image_ids, Context *on_finish)
+ : m_ioctx(ioctx),
+ m_update_mapping(update_mapping),
+ m_remove_global_image_ids(remove_global_image_ids),
+ m_on_finish(on_finish) {
+}
+
+template <typename I>
+void UpdateRequest<I>::send() {
+ dout(20) << dendl;
+
+ update_image_map();
+}
+
+template <typename I>
+void UpdateRequest<I>::update_image_map() {
+ dout(20) << dendl;
+
+ if (m_update_mapping.empty() && m_remove_global_image_ids.empty()) {
+ finish(0);
+ return;
+ }
+
+ uint32_t nr_updates = 0;
+ librados::ObjectWriteOperation op;
+
+ auto it1 = m_update_mapping.begin();
+ while (it1 != m_update_mapping.end() && nr_updates++ < MAX_UPDATE) {
+ librbd::cls_client::mirror_image_map_update(&op, it1->first, it1->second);
+ it1 = m_update_mapping.erase(it1);
+ }
+
+ auto it2 = m_remove_global_image_ids.begin();
+ while (it2 != m_remove_global_image_ids.end() && nr_updates++ < MAX_UPDATE) {
+ librbd::cls_client::mirror_image_map_remove(&op, *it2);
+ it2 = m_remove_global_image_ids.erase(it2);
+ }
+
+ librados::AioCompletion *aio_comp = create_rados_callback<
+ UpdateRequest, &UpdateRequest::handle_update_image_map>(this);
+ int r = m_ioctx.aio_operate(RBD_MIRROR_LEADER, aio_comp, &op);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void UpdateRequest<I>::handle_update_image_map(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ if (r < 0) {
+ derr << ": failed to update image map: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ update_image_map();
+}
+
+template <typename I>
+void UpdateRequest<I>::finish(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ m_on_finish->complete(r);
+ delete this;
+}
+
+} // namespace image_map
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_map::UpdateRequest<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_map/UpdateRequest.h b/src/tools/rbd_mirror/image_map/UpdateRequest.h
new file mode 100644
index 000000000..841cc6f9b
--- /dev/null
+++ b/src/tools/rbd_mirror/image_map/UpdateRequest.h
@@ -0,0 +1,65 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_MAP_UPDATE_REQUEST_H
+#define CEPH_RBD_MIRROR_IMAGE_MAP_UPDATE_REQUEST_H
+
+#include "cls/rbd/cls_rbd_types.h"
+#include "include/rados/librados.hpp"
+
+class Context;
+
+namespace librbd { class ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+namespace image_map {
+
+template<typename ImageCtxT = librbd::ImageCtx>
+class UpdateRequest {
+public:
+ // accepts an image map for updation and a collection of
+ // global image ids to purge.
+ static UpdateRequest *create(librados::IoCtx &ioctx,
+ std::map<std::string, cls::rbd::MirrorImageMap> &&update_mapping,
+ std::set<std::string> &&remove_global_image_ids, Context *on_finish) {
+ return new UpdateRequest(ioctx, std::move(update_mapping), std::move(remove_global_image_ids),
+ on_finish);
+ }
+
+ void send();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * | . . . . . . . .
+ * v v . MAX_UPDATE
+ * UPDATE_IMAGE_MAP. . . . . . .
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+ UpdateRequest(librados::IoCtx &ioctx,
+ std::map<std::string, cls::rbd::MirrorImageMap> &&update_mapping,
+ std::set<std::string> &&remove_global_image_ids, Context *on_finish);
+
+ librados::IoCtx &m_ioctx;
+ std::map<std::string, cls::rbd::MirrorImageMap> m_update_mapping;
+ std::set<std::string> m_remove_global_image_ids;
+ Context *m_on_finish;
+
+ void update_image_map();
+ void handle_update_image_map(int r);
+
+ void finish(int r);
+};
+
+} // namespace image_map
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_IMAGE_MAP_UPDATE_REQUEST_H
diff --git a/src/tools/rbd_mirror/image_replayer/BootstrapRequest.cc b/src/tools/rbd_mirror/image_replayer/BootstrapRequest.cc
new file mode 100644
index 000000000..bda5b5f9b
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/BootstrapRequest.cc
@@ -0,0 +1,485 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "include/compat.h"
+#include "BootstrapRequest.h"
+#include "CreateImageRequest.h"
+#include "OpenImageRequest.h"
+#include "OpenLocalImageRequest.h"
+#include "common/debug.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "journal/Journaler.h"
+#include "journal/Settings.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/internal.h"
+#include "librbd/Journal.h"
+#include "librbd/Utils.h"
+#include "librbd/asio/ContextWQ.h"
+#include "librbd/journal/Types.h"
+#include "tools/rbd_mirror/BaseRequest.h"
+#include "tools/rbd_mirror/ImageSync.h"
+#include "tools/rbd_mirror/ProgressContext.h"
+#include "tools/rbd_mirror/Threads.h"
+#include "tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.h"
+#include "tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.h"
+#include "tools/rbd_mirror/image_replayer/journal/StateBuilder.h"
+#include "tools/rbd_mirror/image_replayer/journal/SyncPointHandler.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::" \
+ << "BootstrapRequest: " << this << " " \
+ << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+using librbd::util::create_context_callback;
+using librbd::util::unique_lock_name;
+
+template <typename I>
+BootstrapRequest<I>::BootstrapRequest(
+ Threads<I>* threads,
+ librados::IoCtx& local_io_ctx,
+ librados::IoCtx& remote_io_ctx,
+ InstanceWatcher<I>* instance_watcher,
+ const std::string& global_image_id,
+ const std::string& local_mirror_uuid,
+ const RemotePoolMeta& remote_pool_meta,
+ ::journal::CacheManagerHandler* cache_manager_handler,
+ PoolMetaCache* pool_meta_cache,
+ ProgressContext* progress_ctx,
+ StateBuilder<I>** state_builder,
+ bool* do_resync,
+ Context* on_finish)
+ : CancelableRequest("rbd::mirror::image_replayer::BootstrapRequest",
+ reinterpret_cast<CephContext*>(local_io_ctx.cct()),
+ on_finish),
+ m_threads(threads),
+ m_local_io_ctx(local_io_ctx),
+ m_remote_io_ctx(remote_io_ctx),
+ m_instance_watcher(instance_watcher),
+ m_global_image_id(global_image_id),
+ m_local_mirror_uuid(local_mirror_uuid),
+ m_remote_pool_meta(remote_pool_meta),
+ m_cache_manager_handler(cache_manager_handler),
+ m_pool_meta_cache(pool_meta_cache),
+ m_progress_ctx(progress_ctx),
+ m_state_builder(state_builder),
+ m_do_resync(do_resync),
+ m_lock(ceph::make_mutex(unique_lock_name("BootstrapRequest::m_lock",
+ this))) {
+ dout(10) << dendl;
+}
+
+template <typename I>
+bool BootstrapRequest<I>::is_syncing() const {
+ std::lock_guard locker{m_lock};
+ return (m_image_sync != nullptr);
+}
+
+template <typename I>
+void BootstrapRequest<I>::send() {
+ *m_do_resync = false;
+
+ prepare_local_image();
+}
+
+template <typename I>
+void BootstrapRequest<I>::cancel() {
+ dout(10) << dendl;
+
+ std::lock_guard locker{m_lock};
+ m_canceled = true;
+
+ if (m_image_sync != nullptr) {
+ m_image_sync->cancel();
+ }
+}
+
+template <typename I>
+std::string BootstrapRequest<I>::get_local_image_name() const {
+ std::unique_lock locker{m_lock};
+ return m_local_image_name;
+}
+
+template <typename I>
+void BootstrapRequest<I>::prepare_local_image() {
+ dout(10) << dendl;
+ update_progress("PREPARE_LOCAL_IMAGE");
+
+ {
+ std::unique_lock locker{m_lock};
+ m_local_image_name = m_global_image_id;
+ }
+
+ ceph_assert(*m_state_builder == nullptr);
+ auto ctx = create_context_callback<
+ BootstrapRequest, &BootstrapRequest<I>::handle_prepare_local_image>(this);
+ auto req = image_replayer::PrepareLocalImageRequest<I>::create(
+ m_local_io_ctx, m_global_image_id, &m_prepare_local_image_name,
+ m_state_builder, m_threads->work_queue, ctx);
+ req->send();
+}
+
+template <typename I>
+void BootstrapRequest<I>::handle_prepare_local_image(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ ceph_assert(r < 0 || *m_state_builder != nullptr);
+ if (r == -ENOENT) {
+ dout(10) << "local image does not exist" << dendl;
+ } else if (r < 0) {
+ derr << "error preparing local image for replay: " << cpp_strerror(r)
+ << dendl;
+ finish(r);
+ return;
+ }
+
+ // image replayer will detect the name change (if any) at next
+ // status update
+ if (r >= 0 && !m_prepare_local_image_name.empty()) {
+ std::unique_lock locker{m_lock};
+ m_local_image_name = m_prepare_local_image_name;
+ }
+
+ prepare_remote_image();
+}
+
+template <typename I>
+void BootstrapRequest<I>::prepare_remote_image() {
+ dout(10) << dendl;
+ update_progress("PREPARE_REMOTE_IMAGE");
+
+ Context *ctx = create_context_callback<
+ BootstrapRequest, &BootstrapRequest<I>::handle_prepare_remote_image>(this);
+ auto req = image_replayer::PrepareRemoteImageRequest<I>::create(
+ m_threads, m_local_io_ctx, m_remote_io_ctx, m_global_image_id,
+ m_local_mirror_uuid, m_remote_pool_meta, m_cache_manager_handler,
+ m_state_builder, ctx);
+ req->send();
+}
+
+template <typename I>
+void BootstrapRequest<I>::handle_prepare_remote_image(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ auto state_builder = *m_state_builder;
+ ceph_assert(state_builder == nullptr ||
+ !state_builder->remote_mirror_uuid.empty());
+
+ if (state_builder != nullptr && state_builder->is_local_primary()) {
+ dout(5) << "local image is primary" << dendl;
+ finish(-ENOMSG);
+ return;
+ } else if (r == -ENOENT || state_builder == nullptr) {
+ dout(10) << "remote image does not exist";
+ if (state_builder != nullptr) {
+ *_dout << ": "
+ << "local_image_id=" << state_builder->local_image_id << ", "
+ << "remote_image_id=" << state_builder->remote_image_id << ", "
+ << "is_linked=" << state_builder->is_linked();
+ }
+ *_dout << dendl;
+
+ // TODO need to support multiple remote images
+ if (state_builder != nullptr &&
+ state_builder->remote_image_id.empty() &&
+ (state_builder->local_image_id.empty() ||
+ state_builder->is_linked())) {
+ // both images doesn't exist or local image exists and is non-primary
+ // and linked to the missing remote image
+ finish(-ENOLINK);
+ } else {
+ finish(-ENOENT);
+ }
+ return;
+ } else if (r < 0) {
+ derr << "error preparing remote image for replay: " << cpp_strerror(r)
+ << dendl;
+ finish(r);
+ return;
+ }
+
+ if (!state_builder->is_remote_primary()) {
+ ceph_assert(!state_builder->remote_image_id.empty());
+ if (state_builder->local_image_id.empty()) {
+ dout(10) << "local image does not exist and remote image is not primary"
+ << dendl;
+ finish(-EREMOTEIO);
+ return;
+ } else if (!state_builder->is_linked()) {
+ dout(10) << "local image is unlinked and remote image is not primary"
+ << dendl;
+ finish(-EREMOTEIO);
+ return;
+ }
+ // if the local image is linked to the remote image, we ignore that
+ // the remote image is not primary so that we can replay demotion
+ }
+
+ open_remote_image();
+}
+
+template <typename I>
+void BootstrapRequest<I>::open_remote_image() {
+ ceph_assert(*m_state_builder != nullptr);
+ auto remote_image_id = (*m_state_builder)->remote_image_id;
+ dout(15) << "remote_image_id=" << remote_image_id << dendl;
+
+ update_progress("OPEN_REMOTE_IMAGE");
+
+ auto ctx = create_context_callback<
+ BootstrapRequest<I>,
+ &BootstrapRequest<I>::handle_open_remote_image>(this);
+ ceph_assert(*m_state_builder != nullptr);
+ OpenImageRequest<I> *request = OpenImageRequest<I>::create(
+ m_remote_io_ctx, &(*m_state_builder)->remote_image_ctx, remote_image_id,
+ false, ctx);
+ request->send();
+}
+
+template <typename I>
+void BootstrapRequest<I>::handle_open_remote_image(int r) {
+ dout(15) << "r=" << r << dendl;
+
+ ceph_assert(*m_state_builder != nullptr);
+ if (r < 0) {
+ derr << "failed to open remote image: " << cpp_strerror(r) << dendl;
+ ceph_assert((*m_state_builder)->remote_image_ctx == nullptr);
+ finish(r);
+ return;
+ }
+
+ if ((*m_state_builder)->local_image_id.empty()) {
+ create_local_image();
+ return;
+ }
+
+ open_local_image();
+}
+
+template <typename I>
+void BootstrapRequest<I>::open_local_image() {
+ ceph_assert(*m_state_builder != nullptr);
+ auto local_image_id = (*m_state_builder)->local_image_id;
+
+ dout(15) << "local_image_id=" << local_image_id << dendl;
+
+ update_progress("OPEN_LOCAL_IMAGE");
+
+ Context *ctx = create_context_callback<
+ BootstrapRequest<I>, &BootstrapRequest<I>::handle_open_local_image>(
+ this);
+ OpenLocalImageRequest<I> *request = OpenLocalImageRequest<I>::create(
+ m_local_io_ctx, &(*m_state_builder)->local_image_ctx, local_image_id,
+ m_threads->work_queue, ctx);
+ request->send();
+}
+
+template <typename I>
+void BootstrapRequest<I>::handle_open_local_image(int r) {
+ dout(15) << "r=" << r << dendl;
+
+ ceph_assert(*m_state_builder != nullptr);
+ auto local_image_ctx = (*m_state_builder)->local_image_ctx;
+ ceph_assert((r >= 0 && local_image_ctx != nullptr) ||
+ (r < 0 && local_image_ctx == nullptr));
+
+ if (r == -ENOENT) {
+ dout(10) << "local image missing" << dendl;
+ create_local_image();
+ return;
+ } else if (r == -EREMOTEIO) {
+ dout(10) << "local image is primary -- skipping image replay" << dendl;
+ m_ret_val = r;
+ close_remote_image();
+ return;
+ } else if (r < 0) {
+ derr << "failed to open local image: " << cpp_strerror(r) << dendl;
+ m_ret_val = r;
+ close_remote_image();
+ return;
+ }
+
+ prepare_replay();
+}
+
+template <typename I>
+void BootstrapRequest<I>::prepare_replay() {
+ dout(10) << dendl;
+ update_progress("PREPARE_REPLAY");
+
+ ceph_assert(*m_state_builder != nullptr);
+ auto ctx = create_context_callback<
+ BootstrapRequest<I>, &BootstrapRequest<I>::handle_prepare_replay>(this);
+ auto request = (*m_state_builder)->create_prepare_replay_request(
+ m_local_mirror_uuid, m_progress_ctx, m_do_resync, &m_syncing, ctx);
+ request->send();
+}
+
+template <typename I>
+void BootstrapRequest<I>::handle_prepare_replay(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to prepare local replay: " << cpp_strerror(r) << dendl;
+ m_ret_val = r;
+ close_remote_image();
+ return;
+ } else if (*m_do_resync) {
+ dout(10) << "local image resync requested" << dendl;
+ close_remote_image();
+ return;
+ } else if ((*m_state_builder)->is_disconnected()) {
+ dout(10) << "client flagged disconnected -- skipping bootstrap" << dendl;
+ // The caller is expected to detect disconnect initializing remote journal.
+ m_ret_val = 0;
+ close_remote_image();
+ return;
+ } else if (m_syncing) {
+ dout(10) << "local image still syncing to remote image" << dendl;
+ image_sync();
+ return;
+ }
+
+ close_remote_image();
+}
+
+template <typename I>
+void BootstrapRequest<I>::create_local_image() {
+ dout(10) << dendl;
+ update_progress("CREATE_LOCAL_IMAGE");
+
+ ceph_assert(*m_state_builder != nullptr);
+ auto ctx = create_context_callback<
+ BootstrapRequest<I>,
+ &BootstrapRequest<I>::handle_create_local_image>(this);
+ auto request = (*m_state_builder)->create_local_image_request(
+ m_threads, m_local_io_ctx, m_global_image_id, m_pool_meta_cache,
+ m_progress_ctx, ctx);
+ request->send();
+}
+
+template <typename I>
+void BootstrapRequest<I>::handle_create_local_image(int r) {
+ dout(15) << "r=" << r << dendl;
+
+ if (r < 0) {
+ if (r == -ENOENT) {
+ dout(10) << "parent image does not exist" << dendl;
+ } else {
+ derr << "failed to create local image: " << cpp_strerror(r) << dendl;
+ }
+ m_ret_val = r;
+ close_remote_image();
+ return;
+ }
+
+ open_local_image();
+}
+
+template <typename I>
+void BootstrapRequest<I>::image_sync() {
+ std::unique_lock locker{m_lock};
+ if (m_canceled) {
+ locker.unlock();
+
+ m_ret_val = -ECANCELED;
+ dout(10) << "request canceled" << dendl;
+ close_remote_image();
+ return;
+ }
+
+ dout(15) << dendl;
+ ceph_assert(m_image_sync == nullptr);
+
+ auto state_builder = *m_state_builder;
+ auto sync_point_handler = state_builder->create_sync_point_handler();
+
+ Context *ctx = create_context_callback<
+ BootstrapRequest<I>, &BootstrapRequest<I>::handle_image_sync>(this);
+ m_image_sync = ImageSync<I>::create(
+ m_threads, state_builder->local_image_ctx, state_builder->remote_image_ctx,
+ m_local_mirror_uuid, sync_point_handler, m_instance_watcher,
+ m_progress_ctx, ctx);
+ m_image_sync->get();
+ locker.unlock();
+
+ update_progress("IMAGE_SYNC");
+ m_image_sync->send();
+}
+
+template <typename I>
+void BootstrapRequest<I>::handle_image_sync(int r) {
+ dout(15) << "r=" << r << dendl;
+
+ {
+ std::lock_guard locker{m_lock};
+ m_image_sync->put();
+ m_image_sync = nullptr;
+
+ (*m_state_builder)->destroy_sync_point_handler();
+ }
+
+ if (r < 0) {
+ if (r == -ECANCELED) {
+ dout(10) << "request canceled" << dendl;
+ } else {
+ derr << "failed to sync remote image: " << cpp_strerror(r) << dendl;
+ }
+ m_ret_val = r;
+ }
+
+ close_remote_image();
+}
+
+template <typename I>
+void BootstrapRequest<I>::close_remote_image() {
+ if ((*m_state_builder)->replay_requires_remote_image()) {
+ finish(m_ret_val);
+ return;
+ }
+
+ dout(15) << dendl;
+
+ update_progress("CLOSE_REMOTE_IMAGE");
+
+ auto ctx = create_context_callback<
+ BootstrapRequest<I>,
+ &BootstrapRequest<I>::handle_close_remote_image>(this);
+ ceph_assert(*m_state_builder != nullptr);
+ (*m_state_builder)->close_remote_image(ctx);
+}
+
+template <typename I>
+void BootstrapRequest<I>::handle_close_remote_image(int r) {
+ dout(15) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "error encountered closing remote image: " << cpp_strerror(r)
+ << dendl;
+ }
+
+ finish(m_ret_val);
+}
+
+template <typename I>
+void BootstrapRequest<I>::update_progress(const std::string &description) {
+ dout(15) << description << dendl;
+
+ if (m_progress_ctx) {
+ m_progress_ctx->update_progress(description);
+ }
+}
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_replayer::BootstrapRequest<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_replayer/BootstrapRequest.h b/src/tools/rbd_mirror/image_replayer/BootstrapRequest.h
new file mode 100644
index 000000000..f5bb8dd8a
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/BootstrapRequest.h
@@ -0,0 +1,181 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_BOOTSTRAP_REQUEST_H
+#define RBD_MIRROR_IMAGE_REPLAYER_BOOTSTRAP_REQUEST_H
+
+#include "include/int_types.h"
+#include "include/rados/librados.hpp"
+#include "common/ceph_mutex.h"
+#include "common/Timer.h"
+#include "cls/rbd/cls_rbd_types.h"
+#include "librbd/mirror/Types.h"
+#include "tools/rbd_mirror/CancelableRequest.h"
+#include "tools/rbd_mirror/Types.h"
+#include <string>
+
+class Context;
+
+namespace journal { class CacheManagerHandler; }
+namespace librbd { class ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+
+class ProgressContext;
+
+template <typename> class ImageSync;
+template <typename> class InstanceWatcher;
+struct PoolMetaCache;
+template <typename> struct Threads;
+
+namespace image_replayer {
+
+template <typename> class StateBuilder;
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class BootstrapRequest : public CancelableRequest {
+public:
+ typedef rbd::mirror::ProgressContext ProgressContext;
+
+ static BootstrapRequest* create(
+ Threads<ImageCtxT>* threads,
+ librados::IoCtx& local_io_ctx,
+ librados::IoCtx& remote_io_ctx,
+ InstanceWatcher<ImageCtxT>* instance_watcher,
+ const std::string& global_image_id,
+ const std::string& local_mirror_uuid,
+ const RemotePoolMeta& remote_pool_meta,
+ ::journal::CacheManagerHandler* cache_manager_handler,
+ PoolMetaCache* pool_meta_cache,
+ ProgressContext* progress_ctx,
+ StateBuilder<ImageCtxT>** state_builder,
+ bool* do_resync,
+ Context* on_finish) {
+ return new BootstrapRequest(
+ threads, local_io_ctx, remote_io_ctx, instance_watcher, global_image_id,
+ local_mirror_uuid, remote_pool_meta, cache_manager_handler,
+ pool_meta_cache, progress_ctx, state_builder, do_resync, on_finish);
+ }
+
+ BootstrapRequest(
+ Threads<ImageCtxT>* threads,
+ librados::IoCtx& local_io_ctx,
+ librados::IoCtx& remote_io_ctx,
+ InstanceWatcher<ImageCtxT>* instance_watcher,
+ const std::string& global_image_id,
+ const std::string& local_mirror_uuid,
+ const RemotePoolMeta& remote_pool_meta,
+ ::journal::CacheManagerHandler* cache_manager_handler,
+ PoolMetaCache* pool_meta_cache,
+ ProgressContext* progress_ctx,
+ StateBuilder<ImageCtxT>** state_builder,
+ bool* do_resync,
+ Context* on_finish);
+
+ bool is_syncing() const;
+
+ void send() override;
+ void cancel() override;
+
+ std::string get_local_image_name() const;
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v (error)
+ * PREPARE_LOCAL_IMAGE * * * * * * * * * * * * * * * * * *
+ * | *
+ * v (error) *
+ * PREPARE_REMOTE_IMAGE * * * * * * * * * * * * * * * * * *
+ * | *
+ * v (error) *
+ * OPEN_REMOTE_IMAGE * * * * * * * * * * * * * * * * * * *
+ * | *
+ * | *
+ * \----> CREATE_LOCAL_IMAGE * * * * * * * * * * * * *
+ * | | ^ * *
+ * | | . * *
+ * | v . (image DNE) * *
+ * \----> OPEN_LOCAL_IMAGE * * * * * * * * * * * * * *
+ * | * *
+ * | * *
+ * v * *
+ * PREPARE_REPLAY * * * * * * * * * * * * * * *
+ * | * *
+ * | * *
+ * v (skip if not needed) * *
+ * IMAGE_SYNC * * * * * * * * * * * * * * * * *
+ * | * *
+ * | * *
+ * /---------/ * *
+ * | * *
+ * v * *
+ * CLOSE_REMOTE_IMAGE < * * * * * * * * * * * * * * * * *
+ * | *
+ * v *
+ * <finish> < * * * * * * * * * * * * * * * * * * * * * * *
+ *
+ * @endverbatim
+ */
+ Threads<ImageCtxT>* m_threads;
+ librados::IoCtx &m_local_io_ctx;
+ librados::IoCtx &m_remote_io_ctx;
+ InstanceWatcher<ImageCtxT> *m_instance_watcher;
+ std::string m_global_image_id;
+ std::string m_local_mirror_uuid;
+ RemotePoolMeta m_remote_pool_meta;
+ ::journal::CacheManagerHandler *m_cache_manager_handler;
+ PoolMetaCache* m_pool_meta_cache;
+ ProgressContext *m_progress_ctx;
+ StateBuilder<ImageCtxT>** m_state_builder;
+ bool *m_do_resync;
+
+ mutable ceph::mutex m_lock;
+ bool m_canceled = false;
+
+ int m_ret_val = 0;
+
+ std::string m_local_image_name;
+ std::string m_prepare_local_image_name;
+
+ bool m_syncing = false;
+ ImageSync<ImageCtxT> *m_image_sync = nullptr;
+
+ void prepare_local_image();
+ void handle_prepare_local_image(int r);
+
+ void prepare_remote_image();
+ void handle_prepare_remote_image(int r);
+
+ void open_remote_image();
+ void handle_open_remote_image(int r);
+
+ void open_local_image();
+ void handle_open_local_image(int r);
+
+ void create_local_image();
+ void handle_create_local_image(int r);
+
+ void prepare_replay();
+ void handle_prepare_replay(int r);
+
+ void image_sync();
+ void handle_image_sync(int r);
+
+ void close_remote_image();
+ void handle_close_remote_image(int r);
+
+ void update_progress(const std::string &description);
+};
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_replayer::BootstrapRequest<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_BOOTSTRAP_REQUEST_H
diff --git a/src/tools/rbd_mirror/image_replayer/CloseImageRequest.cc b/src/tools/rbd_mirror/image_replayer/CloseImageRequest.cc
new file mode 100644
index 000000000..872c8baa9
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/CloseImageRequest.cc
@@ -0,0 +1,62 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "CloseImageRequest.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/Utils.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::CloseImageRequest: " \
+ << this << " " << __func__
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+using librbd::util::create_context_callback;
+
+template <typename I>
+CloseImageRequest<I>::CloseImageRequest(I **image_ctx, Context *on_finish)
+ : m_image_ctx(image_ctx), m_on_finish(on_finish) {
+}
+
+template <typename I>
+void CloseImageRequest<I>::send() {
+ close_image();
+}
+
+template <typename I>
+void CloseImageRequest<I>::close_image() {
+ dout(20) << dendl;
+
+ Context *ctx = create_context_callback<
+ CloseImageRequest<I>, &CloseImageRequest<I>::handle_close_image>(this);
+ (*m_image_ctx)->state->close(ctx);
+}
+
+template <typename I>
+void CloseImageRequest<I>::handle_close_image(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ if (r < 0) {
+ derr << ": error encountered while closing image: " << cpp_strerror(r)
+ << dendl;
+ }
+
+ *m_image_ctx = nullptr;
+
+ m_on_finish->complete(0);
+ delete this;
+}
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_replayer::CloseImageRequest<librbd::ImageCtx>;
+
diff --git a/src/tools/rbd_mirror/image_replayer/CloseImageRequest.h b/src/tools/rbd_mirror/image_replayer/CloseImageRequest.h
new file mode 100644
index 000000000..02481369d
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/CloseImageRequest.h
@@ -0,0 +1,56 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_CLOSE_IMAGE_REQUEST_H
+#define RBD_MIRROR_IMAGE_REPLAYER_CLOSE_IMAGE_REQUEST_H
+
+#include "include/int_types.h"
+#include "librbd/ImageCtx.h"
+#include <string>
+
+class Context;
+namespace librbd { class ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class CloseImageRequest {
+public:
+ static CloseImageRequest* create(ImageCtxT **image_ctx, Context *on_finish) {
+ return new CloseImageRequest(image_ctx, on_finish);
+ }
+
+ CloseImageRequest(ImageCtxT **image_ctx, Context *on_finish);
+
+ void send();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * CLOSE_IMAGE
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+ ImageCtxT **m_image_ctx;
+ Context *m_on_finish;
+
+ void close_image();
+ void handle_close_image(int r);
+};
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_replayer::CloseImageRequest<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_CLOSE_IMAGE_REQUEST_H
diff --git a/src/tools/rbd_mirror/image_replayer/CreateImageRequest.cc b/src/tools/rbd_mirror/image_replayer/CreateImageRequest.cc
new file mode 100644
index 000000000..641bb03e8
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/CreateImageRequest.cc
@@ -0,0 +1,451 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "CreateImageRequest.h"
+#include "CloseImageRequest.h"
+#include "OpenImageRequest.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/internal.h"
+#include "librbd/Utils.h"
+#include "librbd/asio/ContextWQ.h"
+#include "librbd/image/CreateRequest.h"
+#include "librbd/image/CloneRequest.h"
+#include "tools/rbd_mirror/PoolMetaCache.h"
+#include "tools/rbd_mirror/Types.h"
+#include "tools/rbd_mirror/Threads.h"
+#include "tools/rbd_mirror/image_replayer/Utils.h"
+#include "tools/rbd_mirror/image_sync/Utils.h"
+#include <boost/algorithm/string/predicate.hpp>
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::CreateImageRequest: " \
+ << this << " " << __func__ << ": "
+
+using librbd::util::create_async_context_callback;
+using librbd::util::create_context_callback;
+using librbd::util::create_rados_callback;
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+template <typename I>
+CreateImageRequest<I>::CreateImageRequest(
+ Threads<I>* threads,
+ librados::IoCtx &local_io_ctx,
+ const std::string &global_image_id,
+ const std::string &remote_mirror_uuid,
+ const std::string &local_image_name,
+ const std::string &local_image_id,
+ I *remote_image_ctx,
+ PoolMetaCache* pool_meta_cache,
+ cls::rbd::MirrorImageMode mirror_image_mode,
+ Context *on_finish)
+ : m_threads(threads), m_local_io_ctx(local_io_ctx),
+ m_global_image_id(global_image_id),
+ m_remote_mirror_uuid(remote_mirror_uuid),
+ m_local_image_name(local_image_name), m_local_image_id(local_image_id),
+ m_remote_image_ctx(remote_image_ctx),
+ m_pool_meta_cache(pool_meta_cache),
+ m_mirror_image_mode(mirror_image_mode), m_on_finish(on_finish) {
+}
+
+template <typename I>
+void CreateImageRequest<I>::send() {
+ int r = validate_parent();
+ if (r < 0) {
+ error(r);
+ return;
+ }
+
+ if (m_remote_parent_spec.pool_id == -1) {
+ create_image();
+ } else {
+ get_parent_global_image_id();
+ }
+}
+
+template <typename I>
+void CreateImageRequest<I>::create_image() {
+ dout(10) << dendl;
+
+ using klass = CreateImageRequest<I>;
+ Context *ctx = create_context_callback<
+ klass, &klass::handle_create_image>(this);
+
+ std::shared_lock image_locker{m_remote_image_ctx->image_lock};
+
+ auto& config{
+ reinterpret_cast<CephContext*>(m_local_io_ctx.cct())->_conf};
+
+ librbd::ImageOptions image_options;
+ populate_image_options(&image_options);
+
+ auto req = librbd::image::CreateRequest<I>::create(
+ config, m_local_io_ctx, m_local_image_name, m_local_image_id,
+ m_remote_image_ctx->size, image_options, 0U, m_mirror_image_mode,
+ m_global_image_id, m_remote_mirror_uuid, m_remote_image_ctx->op_work_queue,
+ ctx);
+ req->send();
+}
+
+template <typename I>
+void CreateImageRequest<I>::handle_create_image(int r) {
+ dout(10) << "r=" << r << dendl;
+ if (r == -EBADF) {
+ dout(5) << "image id " << m_local_image_id << " already in-use" << dendl;
+ finish(r);
+ return;
+ } else if (r < 0) {
+ derr << "failed to create local image: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ finish(0);
+}
+
+template <typename I>
+void CreateImageRequest<I>::get_parent_global_image_id() {
+ dout(10) << dendl;
+
+ librados::ObjectReadOperation op;
+ librbd::cls_client::mirror_image_get_start(&op,
+ m_remote_parent_spec.image_id);
+
+ librados::AioCompletion *aio_comp = create_rados_callback<
+ CreateImageRequest<I>,
+ &CreateImageRequest<I>::handle_get_parent_global_image_id>(this);
+ m_out_bl.clear();
+ int r = m_remote_parent_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op,
+ &m_out_bl);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void CreateImageRequest<I>::handle_get_parent_global_image_id(int r) {
+ dout(10) << "r=" << r << dendl;
+ if (r == 0) {
+ cls::rbd::MirrorImage mirror_image;
+ auto iter = m_out_bl.cbegin();
+ r = librbd::cls_client::mirror_image_get_finish(&iter, &mirror_image);
+ if (r == 0) {
+ m_parent_global_image_id = mirror_image.global_image_id;
+ dout(15) << "parent_global_image_id=" << m_parent_global_image_id
+ << dendl;
+ }
+ }
+
+ if (r == -ENOENT) {
+ dout(10) << "parent image " << m_remote_parent_spec.image_id
+ << " not mirrored" << dendl;
+ finish(r);
+ return;
+ } else if (r < 0) {
+ derr << "failed to retrieve global image id for parent image "
+ << m_remote_parent_spec.image_id << ": " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ get_local_parent_image_id();
+}
+
+template <typename I>
+void CreateImageRequest<I>::get_local_parent_image_id() {
+ dout(10) << dendl;
+
+ librados::ObjectReadOperation op;
+ librbd::cls_client::mirror_image_get_image_id_start(
+ &op, m_parent_global_image_id);
+
+ librados::AioCompletion *aio_comp = create_rados_callback<
+ CreateImageRequest<I>,
+ &CreateImageRequest<I>::handle_get_local_parent_image_id>(this);
+ m_out_bl.clear();
+ int r = m_local_parent_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op,
+ &m_out_bl);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void CreateImageRequest<I>::handle_get_local_parent_image_id(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r == 0) {
+ auto iter = m_out_bl.cbegin();
+ r = librbd::cls_client::mirror_image_get_image_id_finish(
+ &iter, &m_local_parent_spec.image_id);
+ }
+
+ if (r == -ENOENT) {
+ dout(10) << "parent image " << m_parent_global_image_id << " not "
+ << "registered locally" << dendl;
+ finish(r);
+ return;
+ } else if (r < 0) {
+ derr << "failed to retrieve local image id for parent image "
+ << m_parent_global_image_id << ": " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ open_remote_parent_image();
+}
+
+template <typename I>
+void CreateImageRequest<I>::open_remote_parent_image() {
+ dout(10) << dendl;
+
+ Context *ctx = create_context_callback<
+ CreateImageRequest<I>,
+ &CreateImageRequest<I>::handle_open_remote_parent_image>(this);
+ OpenImageRequest<I> *request = OpenImageRequest<I>::create(
+ m_remote_parent_io_ctx, &m_remote_parent_image_ctx,
+ m_remote_parent_spec.image_id, true, ctx);
+ request->send();
+}
+
+template <typename I>
+void CreateImageRequest<I>::handle_open_remote_parent_image(int r) {
+ dout(10) << "r=" << r << dendl;
+ if (r < 0) {
+ derr << "failed to open remote parent image " << m_parent_pool_name << "/"
+ << m_remote_parent_spec.image_id << dendl;
+ finish(r);
+ return;
+ }
+
+ clone_image();
+}
+
+template <typename I>
+void CreateImageRequest<I>::clone_image() {
+ dout(10) << dendl;
+
+ LocalPoolMeta local_parent_pool_meta;
+ int r = m_pool_meta_cache->get_local_pool_meta(
+ m_local_parent_io_ctx.get_id(), &local_parent_pool_meta);
+ if (r < 0) {
+ derr << "failed to retrieve local parent mirror uuid for pool "
+ << m_local_parent_io_ctx.get_id() << dendl;
+ m_ret_val = r;
+ close_remote_parent_image();
+ return;
+ }
+
+ // ensure no image sync snapshots for the local cluster exist in the
+ // remote image
+ bool found_parent_snap = false;
+ bool found_image_sync_snap = false;
+ std::string snap_name;
+ cls::rbd::SnapshotNamespace snap_namespace;
+ {
+ auto snap_prefix = image_sync::util::get_snapshot_name_prefix(
+ local_parent_pool_meta.mirror_uuid);
+
+ std::shared_lock remote_image_locker(m_remote_parent_image_ctx->image_lock);
+ for (auto snap_info : m_remote_parent_image_ctx->snap_info) {
+ if (snap_info.first == m_remote_parent_spec.snap_id) {
+ found_parent_snap = true;
+ snap_name = snap_info.second.name;
+ snap_namespace = snap_info.second.snap_namespace;
+ } else if (boost::starts_with(snap_info.second.name, snap_prefix)) {
+ found_image_sync_snap = true;
+ }
+ }
+ }
+
+ if (!found_parent_snap) {
+ dout(15) << "remote parent image snapshot not found" << dendl;
+ m_ret_val = -ENOENT;
+ close_remote_parent_image();
+ return;
+ } else if (found_image_sync_snap) {
+ dout(15) << "parent image not synced to local cluster" << dendl;
+ m_ret_val = -ENOENT;
+ close_remote_parent_image();
+ return;
+ }
+
+ librbd::ImageOptions opts;
+ populate_image_options(&opts);
+
+ auto& config{
+ reinterpret_cast<CephContext*>(m_local_io_ctx.cct())->_conf};
+
+ using klass = CreateImageRequest<I>;
+ Context *ctx = create_context_callback<
+ klass, &klass::handle_clone_image>(this);
+
+ librbd::image::CloneRequest<I> *req = librbd::image::CloneRequest<I>::create(
+ config, m_local_parent_io_ctx, m_local_parent_spec.image_id, snap_name,
+ snap_namespace, CEPH_NOSNAP, m_local_io_ctx, m_local_image_name,
+ m_local_image_id, opts, m_mirror_image_mode, m_global_image_id,
+ m_remote_mirror_uuid, m_remote_image_ctx->op_work_queue, ctx);
+ req->send();
+}
+
+template <typename I>
+void CreateImageRequest<I>::handle_clone_image(int r) {
+ dout(10) << "r=" << r << dendl;
+ if (r == -EBADF) {
+ dout(5) << "image id " << m_local_image_id << " already in-use" << dendl;
+ m_ret_val = r;
+ } else if (r < 0) {
+ derr << "failed to clone image " << m_parent_pool_name << "/"
+ << m_remote_parent_spec.image_id << " to "
+ << m_local_image_name << dendl;
+ m_ret_val = r;
+ }
+
+ close_remote_parent_image();
+}
+
+template <typename I>
+void CreateImageRequest<I>::close_remote_parent_image() {
+ dout(10) << dendl;
+ Context *ctx = create_context_callback<
+ CreateImageRequest<I>,
+ &CreateImageRequest<I>::handle_close_remote_parent_image>(this);
+ CloseImageRequest<I> *request = CloseImageRequest<I>::create(
+ &m_remote_parent_image_ctx, ctx);
+ request->send();
+}
+
+template <typename I>
+void CreateImageRequest<I>::handle_close_remote_parent_image(int r) {
+ dout(10) << "r=" << r << dendl;
+ if (r < 0) {
+ derr << "error encountered closing remote parent image: "
+ << cpp_strerror(r) << dendl;
+ }
+
+ finish(m_ret_val);
+}
+
+template <typename I>
+void CreateImageRequest<I>::error(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ m_threads->work_queue->queue(create_context_callback<
+ CreateImageRequest<I>, &CreateImageRequest<I>::finish>(this), r);
+}
+
+template <typename I>
+void CreateImageRequest<I>::finish(int r) {
+ dout(10) << "r=" << r << dendl;
+ m_on_finish->complete(r);
+ delete this;
+}
+
+template <typename I>
+int CreateImageRequest<I>::validate_parent() {
+ std::shared_lock owner_locker{m_remote_image_ctx->owner_lock};
+ std::shared_lock image_locker{m_remote_image_ctx->image_lock};
+
+ m_remote_parent_spec = m_remote_image_ctx->parent_md.spec;
+
+ // scan all remote snapshots for a linked parent
+ for (auto &snap_info_pair : m_remote_image_ctx->snap_info) {
+ auto &parent_spec = snap_info_pair.second.parent.spec;
+ if (parent_spec.pool_id == -1) {
+ continue;
+ } else if (m_remote_parent_spec.pool_id == -1) {
+ m_remote_parent_spec = parent_spec;
+ continue;
+ }
+
+ if (m_remote_parent_spec != parent_spec) {
+ derr << "remote image parent spec mismatch" << dendl;
+ return -EINVAL;
+ }
+ }
+
+ if (m_remote_parent_spec.pool_id == -1) {
+ return 0;
+ }
+
+ // map remote parent pool to local parent pool
+ int r = librbd::util::create_ioctx(
+ m_remote_image_ctx->md_ctx, "remote parent pool",
+ m_remote_parent_spec.pool_id, m_remote_parent_spec.pool_namespace,
+ &m_remote_parent_io_ctx);
+ if (r < 0) {
+ derr << "failed to open remote parent pool " << m_remote_parent_spec.pool_id
+ << ": " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ m_parent_pool_name = m_remote_parent_io_ctx.get_pool_name();
+
+ librados::Rados local_rados(m_local_io_ctx);
+ r = local_rados.ioctx_create(m_parent_pool_name.c_str(),
+ m_local_parent_io_ctx);
+ if (r < 0) {
+ derr << "failed to open local parent pool " << m_parent_pool_name << ": "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+ m_local_parent_io_ctx.set_namespace(m_remote_parent_io_ctx.get_namespace());
+
+ return 0;
+}
+
+template <typename I>
+void CreateImageRequest<I>::populate_image_options(
+ librbd::ImageOptions* image_options) {
+ image_options->set(RBD_IMAGE_OPTION_FEATURES,
+ m_remote_image_ctx->features);
+ image_options->set(RBD_IMAGE_OPTION_ORDER, m_remote_image_ctx->order);
+ image_options->set(RBD_IMAGE_OPTION_STRIPE_UNIT,
+ m_remote_image_ctx->stripe_unit);
+ image_options->set(RBD_IMAGE_OPTION_STRIPE_COUNT,
+ m_remote_image_ctx->stripe_count);
+
+ // Determine the data pool for the local image as follows:
+ // 1. If the local pool has a default data pool, use it.
+ // 2. If the remote image has a data pool different from its metadata pool and
+ // a pool with the same name exists locally, use it.
+ // 3. Don't set the data pool explicitly.
+ std::string data_pool;
+ librados::Rados local_rados(m_local_io_ctx);
+ auto default_data_pool = g_ceph_context->_conf.get_val<std::string>("rbd_default_data_pool");
+ auto remote_md_pool = m_remote_image_ctx->md_ctx.get_pool_name();
+ auto remote_data_pool = m_remote_image_ctx->data_ctx.get_pool_name();
+
+ if (default_data_pool != "") {
+ data_pool = default_data_pool;
+ } else if (remote_data_pool != remote_md_pool) {
+ if (local_rados.pool_lookup(remote_data_pool.c_str()) >= 0) {
+ data_pool = remote_data_pool;
+ }
+ }
+
+ if (data_pool != "") {
+ image_options->set(RBD_IMAGE_OPTION_DATA_POOL, data_pool);
+ }
+
+ if (m_remote_parent_spec.pool_id != -1) {
+ uint64_t clone_format = 1;
+ if (m_remote_image_ctx->test_op_features(
+ RBD_OPERATION_FEATURE_CLONE_CHILD)) {
+ clone_format = 2;
+ }
+ image_options->set(RBD_IMAGE_OPTION_CLONE_FORMAT, clone_format);
+ }
+}
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_replayer::CreateImageRequest<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_replayer/CreateImageRequest.h b/src/tools/rbd_mirror/image_replayer/CreateImageRequest.h
new file mode 100644
index 000000000..2ff7794e8
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/CreateImageRequest.h
@@ -0,0 +1,144 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_CREATE_IMAGE_REQUEST_H
+#define RBD_MIRROR_IMAGE_REPLAYER_CREATE_IMAGE_REQUEST_H
+
+#include "include/int_types.h"
+#include "include/types.h"
+#include "include/rados/librados.hpp"
+#include "cls/rbd/cls_rbd_types.h"
+#include "librbd/Types.h"
+#include <string>
+
+class Context;
+namespace librbd { class ImageCtx; }
+namespace librbd { class ImageOptions; }
+
+namespace rbd {
+namespace mirror {
+
+class PoolMetaCache;
+template <typename> struct Threads;
+
+namespace image_replayer {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class CreateImageRequest {
+public:
+ static CreateImageRequest *create(
+ Threads<ImageCtxT> *threads,
+ librados::IoCtx &local_io_ctx,
+ const std::string &global_image_id,
+ const std::string &remote_mirror_uuid,
+ const std::string &local_image_name,
+ const std::string &local_image_id,
+ ImageCtxT *remote_image_ctx,
+ PoolMetaCache* pool_meta_cache,
+ cls::rbd::MirrorImageMode mirror_image_mode,
+ Context *on_finish) {
+ return new CreateImageRequest(threads, local_io_ctx, global_image_id,
+ remote_mirror_uuid, local_image_name,
+ local_image_id, remote_image_ctx,
+ pool_meta_cache, mirror_image_mode,
+ on_finish);
+ }
+
+ CreateImageRequest(
+ Threads<ImageCtxT> *threads, librados::IoCtx &local_io_ctx,
+ const std::string &global_image_id,
+ const std::string &remote_mirror_uuid,
+ const std::string &local_image_name,
+ const std::string &local_image_id,
+ ImageCtxT *remote_image_ctx,
+ PoolMetaCache* pool_meta_cache,
+ cls::rbd::MirrorImageMode mirror_image_mode,
+ Context *on_finish);
+
+ void send();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start> * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+ * | *
+ * | (non-clone) *
+ * |\------------> CREATE_IMAGE ---------------------\ * (error)
+ * | | *
+ * | (clone) | *
+ * \-------------> GET_PARENT_GLOBAL_IMAGE_ID * * * | * * * *
+ * | | * *
+ * v | *
+ * GET_LOCAL_PARENT_IMAGE_ID * * * * | * * * *
+ * | | * *
+ * v | *
+ * OPEN_REMOTE_PARENT * * * * * * * | * * * *
+ * | | * *
+ * v | *
+ * CLONE_IMAGE | *
+ * | | *
+ * v | *
+ * CLOSE_REMOTE_PARENT | *
+ * | v *
+ * \------------------------> <finish> < * *
+ * @endverbatim
+ */
+
+ Threads<ImageCtxT> *m_threads;
+ librados::IoCtx &m_local_io_ctx;
+ std::string m_global_image_id;
+ std::string m_remote_mirror_uuid;
+ std::string m_local_image_name;
+ std::string m_local_image_id;
+ ImageCtxT *m_remote_image_ctx;
+ PoolMetaCache* m_pool_meta_cache;
+ cls::rbd::MirrorImageMode m_mirror_image_mode;
+ Context *m_on_finish;
+
+ librados::IoCtx m_remote_parent_io_ctx;
+ ImageCtxT *m_remote_parent_image_ctx = nullptr;
+ cls::rbd::ParentImageSpec m_remote_parent_spec;
+
+ librados::IoCtx m_local_parent_io_ctx;
+ cls::rbd::ParentImageSpec m_local_parent_spec;
+
+ bufferlist m_out_bl;
+ std::string m_parent_global_image_id;
+ std::string m_parent_pool_name;
+ int m_ret_val = 0;
+
+ void create_image();
+ void handle_create_image(int r);
+
+ void get_parent_global_image_id();
+ void handle_get_parent_global_image_id(int r);
+
+ void get_local_parent_image_id();
+ void handle_get_local_parent_image_id(int r);
+
+ void open_remote_parent_image();
+ void handle_open_remote_parent_image(int r);
+
+ void clone_image();
+ void handle_clone_image(int r);
+
+ void close_remote_parent_image();
+ void handle_close_remote_parent_image(int r);
+
+ void error(int r);
+ void finish(int r);
+
+ int validate_parent();
+
+ void populate_image_options(librbd::ImageOptions* image_options);
+
+};
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_replayer::CreateImageRequest<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_CREATE_IMAGE_REQUEST_H
diff --git a/src/tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.cc b/src/tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.cc
new file mode 100644
index 000000000..74e975373
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.cc
@@ -0,0 +1,85 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.h"
+#include "include/rados/librados.hpp"
+#include "cls/rbd/cls_rbd_client.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/Utils.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::" \
+ << "GetMirrorImageIdRequest: " << this << " " \
+ << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+using librbd::util::create_rados_callback;
+
+template <typename I>
+void GetMirrorImageIdRequest<I>::send() {
+ dout(20) << dendl;
+ get_image_id();
+}
+
+template <typename I>
+void GetMirrorImageIdRequest<I>::get_image_id() {
+ dout(20) << dendl;
+
+ // attempt to cross-reference a image id by the global image id
+ librados::ObjectReadOperation op;
+ librbd::cls_client::mirror_image_get_image_id_start(&op, m_global_image_id);
+
+ librados::AioCompletion *aio_comp = create_rados_callback<
+ GetMirrorImageIdRequest<I>,
+ &GetMirrorImageIdRequest<I>::handle_get_image_id>(
+ this);
+ int r = m_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op, &m_out_bl);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void GetMirrorImageIdRequest<I>::handle_get_image_id(int r) {
+ if (r == 0) {
+ auto iter = m_out_bl.cbegin();
+ r = librbd::cls_client::mirror_image_get_image_id_finish(
+ &iter, m_image_id);
+ }
+
+ dout(20) << "r=" << r << ", "
+ << "image_id=" << *m_image_id << dendl;
+
+ if (r < 0) {
+ if (r == -ENOENT) {
+ dout(10) << "global image " << m_global_image_id << " not registered"
+ << dendl;
+ } else {
+ derr << "failed to retrieve image id: " << cpp_strerror(r) << dendl;
+ }
+ finish(r);
+ return;
+ }
+
+ finish(0);
+}
+
+template <typename I>
+void GetMirrorImageIdRequest<I>::finish(int r) {
+ dout(20) << "r=" << r << dendl;
+
+ m_on_finish->complete(r);
+ delete this;
+}
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_replayer::GetMirrorImageIdRequest<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.h b/src/tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.h
new file mode 100644
index 000000000..b26645138
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.h
@@ -0,0 +1,75 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_GET_MIRROR_IMAGE_ID_REQUEST_H
+#define RBD_MIRROR_IMAGE_REPLAYER_GET_MIRROR_IMAGE_ID_REQUEST_H
+
+#include "include/buffer.h"
+#include "include/rados/librados_fwd.hpp"
+#include <string>
+
+namespace librbd { struct ImageCtx; }
+
+struct Context;
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class GetMirrorImageIdRequest {
+public:
+ static GetMirrorImageIdRequest *create(librados::IoCtx &io_ctx,
+ const std::string &global_image_id,
+ std::string *image_id,
+ Context *on_finish) {
+ return new GetMirrorImageIdRequest(io_ctx, global_image_id, image_id,
+ on_finish);
+ }
+
+ GetMirrorImageIdRequest(librados::IoCtx &io_ctx,
+ const std::string &global_image_id,
+ std::string *image_id,
+ Context *on_finish)
+ : m_io_ctx(io_ctx), m_global_image_id(global_image_id),
+ m_image_id(image_id), m_on_finish(on_finish) {
+ }
+
+ void send();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * GET_IMAGE_ID
+ * |
+ * v
+ * <finish>
+
+ * @endverbatim
+ */
+
+ librados::IoCtx &m_io_ctx;
+ std::string m_global_image_id;
+ std::string *m_image_id;
+ Context *m_on_finish;
+
+ bufferlist m_out_bl;
+
+ void get_image_id();
+ void handle_get_image_id(int r);
+
+ void finish(int r);
+
+};
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_replayer::GetMirrorImageIdRequest<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_GET_MIRROR_IMAGE_ID_REQUEST_H
diff --git a/src/tools/rbd_mirror/image_replayer/OpenImageRequest.cc b/src/tools/rbd_mirror/image_replayer/OpenImageRequest.cc
new file mode 100644
index 000000000..e6ab382be
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/OpenImageRequest.cc
@@ -0,0 +1,79 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "OpenImageRequest.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/Utils.h"
+#include <type_traits>
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::OpenImageRequest: " \
+ << this << " " << __func__ << " "
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+using librbd::util::create_context_callback;
+
+template <typename I>
+OpenImageRequest<I>::OpenImageRequest(librados::IoCtx &io_ctx, I **image_ctx,
+ const std::string &image_id,
+ bool read_only, Context *on_finish)
+ : m_io_ctx(io_ctx), m_image_ctx(image_ctx), m_image_id(image_id),
+ m_read_only(read_only), m_on_finish(on_finish) {
+}
+
+template <typename I>
+void OpenImageRequest<I>::send() {
+ send_open_image();
+}
+
+template <typename I>
+void OpenImageRequest<I>::send_open_image() {
+ dout(20) << dendl;
+
+ *m_image_ctx = I::create("", m_image_id, nullptr, m_io_ctx, m_read_only);
+
+ if (!m_read_only) {
+ // ensure non-primary images can be modified
+ (*m_image_ctx)->read_only_mask = ~librbd::IMAGE_READ_ONLY_FLAG_NON_PRIMARY;
+ }
+
+ Context *ctx = create_context_callback<
+ OpenImageRequest<I>, &OpenImageRequest<I>::handle_open_image>(
+ this);
+ (*m_image_ctx)->state->open(0, ctx);
+}
+
+template <typename I>
+void OpenImageRequest<I>::handle_open_image(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ if (r < 0) {
+ derr << ": failed to open image '" << m_image_id << "': "
+ << cpp_strerror(r) << dendl;
+ *m_image_ctx = nullptr;
+ }
+
+ finish(r);
+}
+
+template <typename I>
+void OpenImageRequest<I>::finish(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ m_on_finish->complete(r);
+ delete this;
+}
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_replayer::OpenImageRequest<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_replayer/OpenImageRequest.h b/src/tools/rbd_mirror/image_replayer/OpenImageRequest.h
new file mode 100644
index 000000000..01ab31171
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/OpenImageRequest.h
@@ -0,0 +1,71 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_OPEN_IMAGE_REQUEST_H
+#define RBD_MIRROR_IMAGE_REPLAYER_OPEN_IMAGE_REQUEST_H
+
+#include "include/int_types.h"
+#include "librbd/ImageCtx.h"
+#include <string>
+
+class Context;
+namespace librbd { class ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class OpenImageRequest {
+public:
+ static OpenImageRequest* create(librados::IoCtx &io_ctx,
+ ImageCtxT **image_ctx,
+ const std::string &image_id,
+ bool read_only, Context *on_finish) {
+ return new OpenImageRequest(io_ctx, image_ctx, image_id, read_only,
+ on_finish);
+ }
+
+ OpenImageRequest(librados::IoCtx &io_ctx, ImageCtxT **image_ctx,
+ const std::string &image_id, bool read_only,
+ Context *on_finish);
+
+ void send();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * OPEN_IMAGE
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+ librados::IoCtx &m_io_ctx;
+ ImageCtxT **m_image_ctx;
+ std::string m_image_id;
+ bool m_read_only;
+ Context *m_on_finish;
+
+ void send_open_image();
+ void handle_open_image(int r);
+
+ void send_close_image(int r);
+ void handle_close_image(int r);
+
+ void finish(int r);
+
+};
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_replayer::OpenImageRequest<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_OPEN_IMAGE_REQUEST_H
diff --git a/src/tools/rbd_mirror/image_replayer/OpenLocalImageRequest.cc b/src/tools/rbd_mirror/image_replayer/OpenLocalImageRequest.cc
new file mode 100644
index 000000000..7f8d9608e
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/OpenLocalImageRequest.cc
@@ -0,0 +1,292 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "include/compat.h"
+#include "CloseImageRequest.h"
+#include "OpenLocalImageRequest.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "librbd/ExclusiveLock.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/Journal.h"
+#include "librbd/Utils.h"
+#include "librbd/asio/ContextWQ.h"
+#include "librbd/exclusive_lock/Policy.h"
+#include "librbd/journal/Policy.h"
+#include "librbd/mirror/GetInfoRequest.h"
+#include <type_traits>
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::OpenLocalImageRequest: " \
+ << this << " " << __func__ << " "
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+using librbd::util::create_context_callback;
+
+namespace {
+
+template <typename I>
+struct MirrorExclusiveLockPolicy : public librbd::exclusive_lock::Policy {
+ I *image_ctx;
+
+ MirrorExclusiveLockPolicy(I *image_ctx) : image_ctx(image_ctx) {
+ }
+
+ bool may_auto_request_lock() override {
+ return false;
+ }
+
+ int lock_requested(bool force) override {
+ int r = -EROFS;
+ {
+ std::shared_lock owner_locker{image_ctx->owner_lock};
+ std::shared_lock image_locker{image_ctx->image_lock};
+ if (image_ctx->journal == nullptr || image_ctx->journal->is_tag_owner()) {
+ r = 0;
+ }
+ }
+
+ if (r == 0) {
+ // if the local image journal has been closed or if it was (force)
+ // promoted allow the lock to be released to another client
+ image_ctx->exclusive_lock->release_lock(nullptr);
+ }
+ return r;
+ }
+
+ bool accept_blocked_request(
+ librbd::exclusive_lock::OperationRequestType request_type) override {
+ switch (request_type) {
+ case librbd::exclusive_lock::OPERATION_REQUEST_TYPE_TRASH_SNAP_REMOVE:
+ case librbd::exclusive_lock::OPERATION_REQUEST_TYPE_FORCE_PROMOTION:
+ return true;
+ default:
+ return false;
+ }
+ }
+};
+
+struct MirrorJournalPolicy : public librbd::journal::Policy {
+ librbd::asio::ContextWQ *work_queue;
+
+ MirrorJournalPolicy(librbd::asio::ContextWQ *work_queue)
+ : work_queue(work_queue) {
+ }
+
+ bool append_disabled() const override {
+ // avoid recording any events to the local journal
+ return true;
+ }
+ bool journal_disabled() const override {
+ return false;
+ }
+
+ void allocate_tag_on_lock(Context *on_finish) override {
+ // rbd-mirror will manually create tags by copying them from the peer
+ work_queue->queue(on_finish, 0);
+ }
+};
+
+} // anonymous namespace
+
+template <typename I>
+OpenLocalImageRequest<I>::OpenLocalImageRequest(
+ librados::IoCtx &local_io_ctx,
+ I **local_image_ctx,
+ const std::string &local_image_id,
+ librbd::asio::ContextWQ *work_queue,
+ Context *on_finish)
+ : m_local_io_ctx(local_io_ctx), m_local_image_ctx(local_image_ctx),
+ m_local_image_id(local_image_id), m_work_queue(work_queue),
+ m_on_finish(on_finish) {
+}
+
+template <typename I>
+void OpenLocalImageRequest<I>::send() {
+ send_open_image();
+}
+
+template <typename I>
+void OpenLocalImageRequest<I>::send_open_image() {
+ dout(20) << dendl;
+
+ *m_local_image_ctx = I::create("", m_local_image_id, nullptr,
+ m_local_io_ctx, false);
+
+ // ensure non-primary images can be modified
+ (*m_local_image_ctx)->read_only_mask =
+ ~librbd::IMAGE_READ_ONLY_FLAG_NON_PRIMARY;
+
+ {
+ std::scoped_lock locker{(*m_local_image_ctx)->owner_lock,
+ (*m_local_image_ctx)->image_lock};
+ (*m_local_image_ctx)->set_exclusive_lock_policy(
+ new MirrorExclusiveLockPolicy<I>(*m_local_image_ctx));
+ (*m_local_image_ctx)->set_journal_policy(
+ new MirrorJournalPolicy(m_work_queue));
+ }
+
+ Context *ctx = create_context_callback<
+ OpenLocalImageRequest<I>, &OpenLocalImageRequest<I>::handle_open_image>(
+ this);
+ (*m_local_image_ctx)->state->open(0, ctx);
+}
+
+template <typename I>
+void OpenLocalImageRequest<I>::handle_open_image(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ if (r < 0) {
+ if (r == -ENOENT) {
+ dout(10) << ": local image does not exist" << dendl;
+ } else {
+ derr << ": failed to open image '" << m_local_image_id << "': "
+ << cpp_strerror(r) << dendl;
+ }
+ *m_local_image_ctx = nullptr;
+ finish(r);
+ return;
+ }
+
+ send_get_mirror_info();
+}
+
+template <typename I>
+void OpenLocalImageRequest<I>::send_get_mirror_info() {
+ dout(20) << dendl;
+
+ Context *ctx = create_context_callback<
+ OpenLocalImageRequest<I>,
+ &OpenLocalImageRequest<I>::handle_get_mirror_info>(
+ this);
+ auto request = librbd::mirror::GetInfoRequest<I>::create(
+ **m_local_image_ctx, &m_mirror_image, &m_promotion_state,
+ &m_primary_mirror_uuid, ctx);
+ request->send();
+}
+
+template <typename I>
+void OpenLocalImageRequest<I>::handle_get_mirror_info(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ if (r == -ENOENT) {
+ dout(5) << ": local image is not mirrored" << dendl;
+ send_close_image(r);
+ return;
+ } else if (r < 0) {
+ derr << ": error querying local image primary status: " << cpp_strerror(r)
+ << dendl;
+ send_close_image(r);
+ return;
+ }
+
+ if (m_mirror_image.state == cls::rbd::MIRROR_IMAGE_STATE_DISABLING) {
+ dout(5) << ": local image mirroring is being disabled" << dendl;
+ send_close_image(-ENOENT);
+ return;
+ }
+
+ // if the local image owns the tag -- don't steal the lock since
+ // we aren't going to mirror peer data into this image anyway
+ if (m_promotion_state == librbd::mirror::PROMOTION_STATE_PRIMARY) {
+ dout(10) << ": local image is primary -- skipping image replay" << dendl;
+ send_close_image(-EREMOTEIO);
+ return;
+ }
+
+ send_lock_image();
+}
+
+template <typename I>
+void OpenLocalImageRequest<I>::send_lock_image() {
+ std::shared_lock owner_locker{(*m_local_image_ctx)->owner_lock};
+ if ((*m_local_image_ctx)->exclusive_lock == nullptr) {
+ owner_locker.unlock();
+ if (m_mirror_image.mode == cls::rbd::MIRROR_IMAGE_MODE_SNAPSHOT) {
+ finish(0);
+ } else {
+ derr << ": image does not support exclusive lock" << dendl;
+ send_close_image(-EINVAL);
+ }
+ return;
+ }
+
+ dout(20) << dendl;
+
+ // disallow any proxied maintenance operations before grabbing lock
+ (*m_local_image_ctx)->exclusive_lock->block_requests(-EROFS);
+
+ Context *ctx = create_context_callback<
+ OpenLocalImageRequest<I>, &OpenLocalImageRequest<I>::handle_lock_image>(
+ this);
+
+ (*m_local_image_ctx)->exclusive_lock->acquire_lock(ctx);
+}
+
+template <typename I>
+void OpenLocalImageRequest<I>::handle_lock_image(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ if (r < 0) {
+ derr << ": failed to lock image '" << m_local_image_id << "': "
+ << cpp_strerror(r) << dendl;
+ send_close_image(r);
+ return;
+ }
+
+ {
+ std::shared_lock owner_locker{(*m_local_image_ctx)->owner_lock};
+ if ((*m_local_image_ctx)->exclusive_lock == nullptr ||
+ !(*m_local_image_ctx)->exclusive_lock->is_lock_owner()) {
+ derr << ": image is not locked" << dendl;
+ send_close_image(-EBUSY);
+ return;
+ }
+ }
+
+ finish(0);
+}
+
+template <typename I>
+void OpenLocalImageRequest<I>::send_close_image(int r) {
+ dout(20) << dendl;
+
+ if (m_ret_val == 0 && r < 0) {
+ m_ret_val = r;
+ }
+
+ Context *ctx = create_context_callback<
+ OpenLocalImageRequest<I>, &OpenLocalImageRequest<I>::handle_close_image>(
+ this);
+ CloseImageRequest<I> *request = CloseImageRequest<I>::create(
+ m_local_image_ctx, ctx);
+ request->send();
+}
+
+template <typename I>
+void OpenLocalImageRequest<I>::handle_close_image(int r) {
+ dout(20) << dendl;
+
+ ceph_assert(r == 0);
+ finish(m_ret_val);
+}
+
+template <typename I>
+void OpenLocalImageRequest<I>::finish(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ m_on_finish->complete(r);
+ delete this;
+}
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_replayer::OpenLocalImageRequest<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_replayer/OpenLocalImageRequest.h b/src/tools/rbd_mirror/image_replayer/OpenLocalImageRequest.h
new file mode 100644
index 000000000..9a642bc39
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/OpenLocalImageRequest.h
@@ -0,0 +1,97 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_OPEN_LOCAL_IMAGE_REQUEST_H
+#define RBD_MIRROR_IMAGE_REPLAYER_OPEN_LOCAL_IMAGE_REQUEST_H
+
+#include "include/int_types.h"
+#include "cls/rbd/cls_rbd_types.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/mirror/Types.h"
+#include <string>
+
+class Context;
+namespace librbd {
+class ImageCtx;
+namespace asio { struct ContextWQ; }
+} // namespace librbd
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class OpenLocalImageRequest {
+public:
+ static OpenLocalImageRequest* create(librados::IoCtx &local_io_ctx,
+ ImageCtxT **local_image_ctx,
+ const std::string &local_image_id,
+ librbd::asio::ContextWQ *work_queue,
+ Context *on_finish) {
+ return new OpenLocalImageRequest(local_io_ctx, local_image_ctx,
+ local_image_id, work_queue, on_finish);
+ }
+
+ OpenLocalImageRequest(librados::IoCtx &local_io_ctx,
+ ImageCtxT **local_image_ctx,
+ const std::string &local_image_id,
+ librbd::asio::ContextWQ *work_queue,
+ Context *on_finish);
+
+ void send();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * OPEN_IMAGE * * * * * * * *
+ * | *
+ * v *
+ * GET_MIRROR_INFO * * * * *
+ * | *
+ * v (skip if primary) v
+ * LOCK_IMAGE * * * > CLOSE_IMAGE
+ * | |
+ * v |
+ * <finish> <---------------/
+ *
+ * @endverbatim
+ */
+ librados::IoCtx &m_local_io_ctx;
+ ImageCtxT **m_local_image_ctx;
+ std::string m_local_image_id;
+ librbd::asio::ContextWQ *m_work_queue;
+ Context *m_on_finish;
+
+ cls::rbd::MirrorImage m_mirror_image;
+ librbd::mirror::PromotionState m_promotion_state =
+ librbd::mirror::PROMOTION_STATE_NON_PRIMARY;
+ std::string m_primary_mirror_uuid;
+ int m_ret_val = 0;
+
+ void send_open_image();
+ void handle_open_image(int r);
+
+ void send_get_mirror_info();
+ void handle_get_mirror_info(int r);
+
+ void send_lock_image();
+ void handle_lock_image(int r);
+
+ void send_close_image(int r);
+ void handle_close_image(int r);
+
+ void finish(int r);
+
+};
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_replayer::OpenLocalImageRequest<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_OPEN_LOCAL_IMAGE_REQUEST_H
diff --git a/src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.cc b/src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.cc
new file mode 100644
index 000000000..b1fef7254
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.cc
@@ -0,0 +1,197 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.h"
+#include "include/rados/librados.hpp"
+#include "cls/rbd/cls_rbd_client.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/Journal.h"
+#include "librbd/Utils.h"
+#include "librbd/mirror/GetInfoRequest.h"
+#include "tools/rbd_mirror/ImageDeleter.h"
+#include "tools/rbd_mirror/Threads.h"
+#include "tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.h"
+#include "tools/rbd_mirror/image_replayer/journal/StateBuilder.h"
+#include "tools/rbd_mirror/image_replayer/snapshot/StateBuilder.h"
+#include <type_traits>
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::" \
+ << "PrepareLocalImageRequest: " << this << " " \
+ << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+using librbd::util::create_context_callback;
+using librbd::util::create_rados_callback;
+
+template <typename I>
+void PrepareLocalImageRequest<I>::send() {
+ dout(10) << dendl;
+ get_local_image_id();
+}
+
+template <typename I>
+void PrepareLocalImageRequest<I>::get_local_image_id() {
+ dout(10) << dendl;
+
+ Context *ctx = create_context_callback<
+ PrepareLocalImageRequest<I>,
+ &PrepareLocalImageRequest<I>::handle_get_local_image_id>(this);
+ auto req = GetMirrorImageIdRequest<I>::create(m_io_ctx, m_global_image_id,
+ &m_local_image_id, ctx);
+ req->send();
+}
+
+template <typename I>
+void PrepareLocalImageRequest<I>::handle_get_local_image_id(int r) {
+ dout(10) << "r=" << r << ", "
+ << "local_image_id=" << m_local_image_id << dendl;
+
+ if (r < 0) {
+ finish(r);
+ return;
+ }
+
+ get_local_image_name();
+}
+
+template <typename I>
+void PrepareLocalImageRequest<I>::get_local_image_name() {
+ dout(10) << dendl;
+
+ librados::ObjectReadOperation op;
+ librbd::cls_client::dir_get_name_start(&op, m_local_image_id);
+
+ m_out_bl.clear();
+ librados::AioCompletion *aio_comp = create_rados_callback<
+ PrepareLocalImageRequest<I>,
+ &PrepareLocalImageRequest<I>::handle_get_local_image_name>(this);
+ int r = m_io_ctx.aio_operate(RBD_DIRECTORY, aio_comp, &op, &m_out_bl);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void PrepareLocalImageRequest<I>::handle_get_local_image_name(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r == 0) {
+ auto it = m_out_bl.cbegin();
+ r = librbd::cls_client::dir_get_name_finish(&it, m_local_image_name);
+ }
+
+ if (r == -ENOENT) {
+ // proceed we should have a mirror image record if we got this far
+ dout(10) << "image does not exist for local image id " << m_local_image_id
+ << dendl;
+ *m_local_image_name = "";
+ } else if (r < 0) {
+ derr << "failed to retrieve image name: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ get_mirror_info();
+}
+
+template <typename I>
+void PrepareLocalImageRequest<I>::get_mirror_info() {
+ dout(10) << dendl;
+
+ auto ctx = create_context_callback<
+ PrepareLocalImageRequest<I>,
+ &PrepareLocalImageRequest<I>::handle_get_mirror_info>(this);
+ auto req = librbd::mirror::GetInfoRequest<I>::create(
+ m_io_ctx, m_work_queue, m_local_image_id, &m_mirror_image,
+ &m_promotion_state, &m_primary_mirror_uuid, ctx);
+ req->send();
+}
+
+template <typename I>
+void PrepareLocalImageRequest<I>::handle_get_mirror_info(int r) {
+ dout(10) << ": r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to retrieve local mirror image info: " << cpp_strerror(r)
+ << dendl;
+ finish(r);
+ return;
+ }
+
+ if (m_mirror_image.state == cls::rbd::MIRROR_IMAGE_STATE_CREATING) {
+ dout(5) << "local image is still in creating state, issuing a removal"
+ << dendl;
+ move_to_trash();
+ return;
+ } else if (m_mirror_image.state == cls::rbd::MIRROR_IMAGE_STATE_DISABLING) {
+ dout(5) << "local image mirroring is in disabling state" << dendl;
+ finish(-ERESTART);
+ return;
+ }
+
+ switch (m_mirror_image.mode) {
+ case cls::rbd::MIRROR_IMAGE_MODE_JOURNAL:
+ // journal-based local image exists
+ {
+ auto state_builder = journal::StateBuilder<I>::create(m_global_image_id);
+ state_builder->local_primary_mirror_uuid = m_primary_mirror_uuid;
+ *m_state_builder = state_builder;
+ }
+ break;
+ case cls::rbd::MIRROR_IMAGE_MODE_SNAPSHOT:
+ // snapshot-based local image exists
+ *m_state_builder = snapshot::StateBuilder<I>::create(m_global_image_id);
+ break;
+ default:
+ derr << "unsupported mirror image mode " << m_mirror_image.mode << " "
+ << "for image " << m_global_image_id << dendl;
+ finish(-EOPNOTSUPP);
+ break;
+ }
+
+ dout(10) << "local_image_id=" << m_local_image_id << ", "
+ << "local_promotion_state=" << m_promotion_state << ", "
+ << "local_primary_mirror_uuid=" << m_primary_mirror_uuid << dendl;
+ (*m_state_builder)->local_image_id = m_local_image_id;
+ (*m_state_builder)->local_promotion_state = m_promotion_state;
+ finish(0);
+}
+
+template <typename I>
+void PrepareLocalImageRequest<I>::move_to_trash() {
+ dout(10) << dendl;
+
+ Context *ctx = create_context_callback<
+ PrepareLocalImageRequest<I>,
+ &PrepareLocalImageRequest<I>::handle_move_to_trash>(this);
+ ImageDeleter<I>::trash_move(m_io_ctx, m_global_image_id,
+ false, m_work_queue, ctx);
+}
+
+template <typename I>
+void PrepareLocalImageRequest<I>::handle_move_to_trash(int r) {
+ dout(10) << ": r=" << r << dendl;
+
+ finish(-ENOENT);
+}
+
+template <typename I>
+void PrepareLocalImageRequest<I>::finish(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ m_on_finish->complete(r);
+ delete this;
+}
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_replayer::PrepareLocalImageRequest<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.h b/src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.h
new file mode 100644
index 000000000..6372169ff
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.h
@@ -0,0 +1,115 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_PREPARE_LOCAL_IMAGE_REQUEST_H
+#define RBD_MIRROR_IMAGE_REPLAYER_PREPARE_LOCAL_IMAGE_REQUEST_H
+
+#include "include/buffer.h"
+#include "include/rados/librados_fwd.hpp"
+#include "cls/rbd/cls_rbd_types.h"
+#include "librbd/mirror/Types.h"
+#include <string>
+
+struct Context;
+
+namespace librbd {
+struct ImageCtx;
+namespace asio { struct ContextWQ; }
+} // namespace librbd
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+template <typename> class StateBuilder;
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class PrepareLocalImageRequest {
+public:
+ static PrepareLocalImageRequest *create(
+ librados::IoCtx &io_ctx,
+ const std::string &global_image_id,
+ std::string *local_image_name,
+ StateBuilder<ImageCtxT>** state_builder,
+ librbd::asio::ContextWQ *work_queue,
+ Context *on_finish) {
+ return new PrepareLocalImageRequest(io_ctx, global_image_id,
+ local_image_name, state_builder,
+ work_queue, on_finish);
+ }
+
+ PrepareLocalImageRequest(
+ librados::IoCtx &io_ctx,
+ const std::string &global_image_id,
+ std::string *local_image_name,
+ StateBuilder<ImageCtxT>** state_builder,
+ librbd::asio::ContextWQ *work_queue,
+ Context *on_finish)
+ : m_io_ctx(io_ctx), m_global_image_id(global_image_id),
+ m_local_image_name(local_image_name), m_state_builder(state_builder),
+ m_work_queue(work_queue), m_on_finish(on_finish) {
+ }
+
+ void send();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * GET_LOCAL_IMAGE_ID
+ * |
+ * v
+ * GET_LOCAL_IMAGE_NAME
+ * |
+ * v
+ * GET_MIRROR_INFO
+ * |
+ * | (if the image mirror state is CREATING)
+ * v
+ * TRASH_MOVE
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+
+ librados::IoCtx &m_io_ctx;
+ std::string m_global_image_id;
+ std::string *m_local_image_name;
+ StateBuilder<ImageCtxT>** m_state_builder;
+ librbd::asio::ContextWQ *m_work_queue;
+ Context *m_on_finish;
+
+ bufferlist m_out_bl;
+ std::string m_local_image_id;
+ cls::rbd::MirrorImage m_mirror_image;
+ librbd::mirror::PromotionState m_promotion_state;
+ std::string m_primary_mirror_uuid;
+
+ void get_local_image_id();
+ void handle_get_local_image_id(int r);
+
+ void get_local_image_name();
+ void handle_get_local_image_name(int r);
+
+ void get_mirror_info();
+ void handle_get_mirror_info(int r);
+
+ void move_to_trash();
+ void handle_move_to_trash(int r);
+
+ void finish(int r);
+
+};
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_replayer::PrepareLocalImageRequest<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_PREPARE_LOCAL_IMAGE_REQUEST_H
diff --git a/src/tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.cc b/src/tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.cc
new file mode 100644
index 000000000..45a44a300
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.cc
@@ -0,0 +1,283 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.h"
+#include "include/rados/librados.hpp"
+#include "cls/rbd/cls_rbd_client.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "journal/Journaler.h"
+#include "journal/Settings.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/Journal.h"
+#include "librbd/Utils.h"
+#include "librbd/asio/ContextWQ.h"
+#include "librbd/mirror/GetInfoRequest.h"
+#include "tools/rbd_mirror/Threads.h"
+#include "tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.h"
+#include "tools/rbd_mirror/image_replayer/Utils.h"
+#include "tools/rbd_mirror/image_replayer/journal/StateBuilder.h"
+#include "tools/rbd_mirror/image_replayer/snapshot/StateBuilder.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::" \
+ << "PrepareRemoteImageRequest: " << this << " " \
+ << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+using librbd::util::create_async_context_callback;
+using librbd::util::create_context_callback;
+using librbd::util::create_rados_callback;
+
+template <typename I>
+void PrepareRemoteImageRequest<I>::send() {
+ if (*m_state_builder != nullptr) {
+ (*m_state_builder)->remote_mirror_uuid = m_remote_pool_meta.mirror_uuid;
+ auto state_builder = dynamic_cast<snapshot::StateBuilder<I>*>(*m_state_builder);
+ if (state_builder) {
+ state_builder->remote_mirror_peer_uuid = m_remote_pool_meta.mirror_peer_uuid;
+ }
+ }
+
+ get_remote_image_id();
+}
+
+template <typename I>
+void PrepareRemoteImageRequest<I>::get_remote_image_id() {
+ dout(10) << dendl;
+
+ Context *ctx = create_context_callback<
+ PrepareRemoteImageRequest<I>,
+ &PrepareRemoteImageRequest<I>::handle_get_remote_image_id>(this);
+ auto req = GetMirrorImageIdRequest<I>::create(m_remote_io_ctx,
+ m_global_image_id,
+ &m_remote_image_id, ctx);
+ req->send();
+}
+
+template <typename I>
+void PrepareRemoteImageRequest<I>::handle_get_remote_image_id(int r) {
+ dout(10) << "r=" << r << ", "
+ << "remote_image_id=" << m_remote_image_id << dendl;
+
+ if (r < 0) {
+ finish(r);
+ return;
+ }
+
+ get_mirror_info();
+}
+
+template <typename I>
+void PrepareRemoteImageRequest<I>::get_mirror_info() {
+ dout(10) << dendl;
+
+ auto ctx = create_context_callback<
+ PrepareRemoteImageRequest<I>,
+ &PrepareRemoteImageRequest<I>::handle_get_mirror_info>(this);
+ auto req = librbd::mirror::GetInfoRequest<I>::create(
+ m_remote_io_ctx, m_threads->work_queue, m_remote_image_id,
+ &m_mirror_image, &m_promotion_state, &m_primary_mirror_uuid,
+ ctx);
+ req->send();
+}
+
+template <typename I>
+void PrepareRemoteImageRequest<I>::handle_get_mirror_info(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r == -ENOENT) {
+ dout(10) << "image " << m_global_image_id << " not mirrored" << dendl;
+ finish(r);
+ return;
+ } else if (r < 0) {
+ derr << "failed to retrieve mirror image details for image "
+ << m_global_image_id << ": " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ auto state_builder = *m_state_builder;
+ if (state_builder != nullptr &&
+ state_builder->get_mirror_image_mode() != m_mirror_image.mode) {
+ derr << "local and remote mirror image using different mirroring modes "
+ << "for image " << m_global_image_id << ": split-brain" << dendl;
+ finish(-EEXIST);
+ return;
+ } else if (m_mirror_image.state == cls::rbd::MIRROR_IMAGE_STATE_DISABLING) {
+ dout(5) << "remote image mirroring is being disabled" << dendl;
+ finish(-ENOENT);
+ return;
+ }
+
+ switch (m_mirror_image.mode) {
+ case cls::rbd::MIRROR_IMAGE_MODE_JOURNAL:
+ get_client();
+ break;
+ case cls::rbd::MIRROR_IMAGE_MODE_SNAPSHOT:
+ finalize_snapshot_state_builder();
+ finish(0);
+ break;
+ default:
+ derr << "unsupported mirror image mode " << m_mirror_image.mode << " "
+ << "for image " << m_global_image_id << dendl;
+ finish(-EOPNOTSUPP);
+ break;
+ }
+}
+
+template <typename I>
+void PrepareRemoteImageRequest<I>::get_client() {
+ dout(10) << dendl;
+
+ auto cct = static_cast<CephContext *>(m_local_io_ctx.cct());
+ ::journal::Settings journal_settings;
+ journal_settings.commit_interval = cct->_conf.get_val<double>(
+ "rbd_mirror_journal_commit_age");
+
+ // TODO use Journal thread pool for journal ops until converted to ASIO
+ ContextWQ* context_wq;
+ librbd::Journal<>::get_work_queue(cct, &context_wq);
+
+ ceph_assert(m_remote_journaler == nullptr);
+ m_remote_journaler = new Journaler(context_wq, m_threads->timer,
+ &m_threads->timer_lock, m_remote_io_ctx,
+ m_remote_image_id, m_local_mirror_uuid,
+ journal_settings, m_cache_manager_handler);
+
+ Context *ctx = create_async_context_callback(
+ m_threads->work_queue, create_context_callback<
+ PrepareRemoteImageRequest<I>,
+ &PrepareRemoteImageRequest<I>::handle_get_client>(this));
+ m_remote_journaler->get_client(m_local_mirror_uuid, &m_client, ctx);
+}
+
+template <typename I>
+void PrepareRemoteImageRequest<I>::handle_get_client(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ MirrorPeerClientMeta client_meta;
+ if (r == -ENOENT) {
+ dout(10) << "client not registered" << dendl;
+ register_client();
+ } else if (r < 0) {
+ derr << "failed to retrieve client: " << cpp_strerror(r) << dendl;
+ finish(r);
+ } else if (!util::decode_client_meta(m_client, &client_meta)) {
+ // require operator intervention since the data is corrupt
+ finish(-EBADMSG);
+ } else {
+ // skip registration if it already exists
+ finalize_journal_state_builder(m_client.state, client_meta);
+ finish(0);
+ }
+}
+
+template <typename I>
+void PrepareRemoteImageRequest<I>::register_client() {
+ dout(10) << dendl;
+
+ auto state_builder = *m_state_builder;
+ librbd::journal::MirrorPeerClientMeta client_meta{
+ (state_builder == nullptr ? "" : state_builder->local_image_id)};
+ client_meta.state = librbd::journal::MIRROR_PEER_STATE_REPLAYING;
+
+ librbd::journal::ClientData client_data{client_meta};
+ bufferlist client_data_bl;
+ encode(client_data, client_data_bl);
+
+ Context *ctx = create_async_context_callback(
+ m_threads->work_queue, create_context_callback<
+ PrepareRemoteImageRequest<I>,
+ &PrepareRemoteImageRequest<I>::handle_register_client>(this));
+ m_remote_journaler->register_client(client_data_bl, ctx);
+}
+
+template <typename I>
+void PrepareRemoteImageRequest<I>::handle_register_client(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to register with remote journal: " << cpp_strerror(r)
+ << dendl;
+ finish(r);
+ return;
+ }
+
+ auto state_builder = *m_state_builder;
+ librbd::journal::MirrorPeerClientMeta client_meta{
+ (state_builder == nullptr ? "" : state_builder->local_image_id)};
+ client_meta.state = librbd::journal::MIRROR_PEER_STATE_REPLAYING;
+ finalize_journal_state_builder(cls::journal::CLIENT_STATE_CONNECTED,
+ client_meta);
+ finish(0);
+}
+
+template <typename I>
+void PrepareRemoteImageRequest<I>::finalize_journal_state_builder(
+ cls::journal::ClientState client_state,
+ const MirrorPeerClientMeta& client_meta) {
+ journal::StateBuilder<I>* state_builder = nullptr;
+ if (*m_state_builder != nullptr) {
+ // already verified that it's a matching builder in
+ // 'handle_get_mirror_info'
+ state_builder = dynamic_cast<journal::StateBuilder<I>*>(*m_state_builder);
+ ceph_assert(state_builder != nullptr);
+ } else {
+ state_builder = journal::StateBuilder<I>::create(m_global_image_id);
+ *m_state_builder = state_builder;
+ }
+
+ state_builder->remote_mirror_uuid = m_remote_pool_meta.mirror_uuid;
+ state_builder->remote_image_id = m_remote_image_id;
+ state_builder->remote_promotion_state = m_promotion_state;
+ state_builder->remote_journaler = m_remote_journaler;
+ state_builder->remote_client_state = client_state;
+ state_builder->remote_client_meta = client_meta;
+}
+
+template <typename I>
+void PrepareRemoteImageRequest<I>::finalize_snapshot_state_builder() {
+ snapshot::StateBuilder<I>* state_builder = nullptr;
+ if (*m_state_builder != nullptr) {
+ state_builder = dynamic_cast<snapshot::StateBuilder<I>*>(*m_state_builder);
+ ceph_assert(state_builder != nullptr);
+ } else {
+ state_builder = snapshot::StateBuilder<I>::create(m_global_image_id);
+ *m_state_builder = state_builder;
+ }
+
+ dout(10) << "remote_mirror_uuid=" << m_remote_pool_meta.mirror_uuid << ", "
+ << "remote_mirror_peer_uuid="
+ << m_remote_pool_meta.mirror_peer_uuid << ", "
+ << "remote_image_id=" << m_remote_image_id << ", "
+ << "remote_promotion_state=" << m_promotion_state << dendl;
+ state_builder->remote_mirror_uuid = m_remote_pool_meta.mirror_uuid;
+ state_builder->remote_mirror_peer_uuid = m_remote_pool_meta.mirror_peer_uuid;
+ state_builder->remote_image_id = m_remote_image_id;
+ state_builder->remote_promotion_state = m_promotion_state;
+}
+
+template <typename I>
+void PrepareRemoteImageRequest<I>::finish(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ delete m_remote_journaler;
+ m_remote_journaler = nullptr;
+ }
+
+ m_on_finish->complete(r);
+ delete this;
+}
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_replayer::PrepareRemoteImageRequest<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.h b/src/tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.h
new file mode 100644
index 000000000..483cfc001
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.h
@@ -0,0 +1,153 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_PREPARE_REMOTE_IMAGE_REQUEST_H
+#define RBD_MIRROR_IMAGE_REPLAYER_PREPARE_REMOTE_IMAGE_REQUEST_H
+
+#include "include/buffer_fwd.h"
+#include "include/rados/librados_fwd.hpp"
+#include "cls/journal/cls_journal_types.h"
+#include "cls/rbd/cls_rbd_types.h"
+#include "librbd/journal/Types.h"
+#include "librbd/journal/TypeTraits.h"
+#include "librbd/mirror/Types.h"
+#include "tools/rbd_mirror/Types.h"
+#include <string>
+
+namespace journal { class Journaler; }
+namespace journal { struct CacheManagerHandler; }
+namespace librbd { struct ImageCtx; }
+namespace librbd { namespace journal { struct MirrorPeerClientMeta; } }
+
+struct Context;
+
+namespace rbd {
+namespace mirror {
+
+template <typename> struct Threads;
+
+namespace image_replayer {
+
+template <typename> class StateBuilder;
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class PrepareRemoteImageRequest {
+public:
+ typedef librbd::journal::TypeTraits<ImageCtxT> TypeTraits;
+ typedef typename TypeTraits::Journaler Journaler;
+ typedef librbd::journal::MirrorPeerClientMeta MirrorPeerClientMeta;
+
+ static PrepareRemoteImageRequest *create(
+ Threads<ImageCtxT> *threads,
+ librados::IoCtx &local_io_ctx,
+ librados::IoCtx &remote_io_ctx,
+ const std::string &global_image_id,
+ const std::string &local_mirror_uuid,
+ const RemotePoolMeta& remote_pool_meta,
+ ::journal::CacheManagerHandler *cache_manager_handler,
+ StateBuilder<ImageCtxT>** state_builder,
+ Context *on_finish) {
+ return new PrepareRemoteImageRequest(threads, local_io_ctx, remote_io_ctx,
+ global_image_id, local_mirror_uuid,
+ remote_pool_meta,
+ cache_manager_handler, state_builder,
+ on_finish);
+ }
+
+ PrepareRemoteImageRequest(
+ Threads<ImageCtxT> *threads,
+ librados::IoCtx &local_io_ctx,
+ librados::IoCtx &remote_io_ctx,
+ const std::string &global_image_id,
+ const std::string &local_mirror_uuid,
+ const RemotePoolMeta& remote_pool_meta,
+ ::journal::CacheManagerHandler *cache_manager_handler,
+ StateBuilder<ImageCtxT>** state_builder,
+ Context *on_finish)
+ : m_threads(threads),
+ m_local_io_ctx(local_io_ctx),
+ m_remote_io_ctx(remote_io_ctx),
+ m_global_image_id(global_image_id),
+ m_local_mirror_uuid(local_mirror_uuid),
+ m_remote_pool_meta(remote_pool_meta),
+ m_cache_manager_handler(cache_manager_handler),
+ m_state_builder(state_builder),
+ m_on_finish(on_finish) {
+ }
+
+ void send();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * GET_REMOTE_IMAGE_ID
+ * |
+ * v
+ * GET_REMOTE_MIRROR_INFO
+ * |
+ * | (journal)
+ * \-----------> GET_CLIENT
+ * | |
+ * | v (skip if not needed)
+ * | REGISTER_CLIENT
+ * | |
+ * | |
+ * |/----------------/
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+
+ Threads<ImageCtxT> *m_threads;
+ librados::IoCtx &m_local_io_ctx;
+ librados::IoCtx &m_remote_io_ctx;
+ std::string m_global_image_id;
+ std::string m_local_mirror_uuid;
+ RemotePoolMeta m_remote_pool_meta;
+ ::journal::CacheManagerHandler *m_cache_manager_handler;
+ StateBuilder<ImageCtxT>** m_state_builder;
+ Context *m_on_finish;
+
+ bufferlist m_out_bl;
+ std::string m_remote_image_id;
+ cls::rbd::MirrorImage m_mirror_image;
+ librbd::mirror::PromotionState m_promotion_state =
+ librbd::mirror::PROMOTION_STATE_UNKNOWN;
+ std::string m_primary_mirror_uuid;
+
+ // journal-based mirroring
+ Journaler *m_remote_journaler = nullptr;
+ cls::journal::Client m_client;
+
+ void get_remote_image_id();
+ void handle_get_remote_image_id(int r);
+
+ void get_mirror_info();
+ void handle_get_mirror_info(int r);
+
+ void get_client();
+ void handle_get_client(int r);
+
+ void register_client();
+ void handle_register_client(int r);
+
+ void finalize_journal_state_builder(cls::journal::ClientState client_state,
+ const MirrorPeerClientMeta& client_meta);
+ void finalize_snapshot_state_builder();
+
+ void finish(int r);
+};
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_replayer::PrepareRemoteImageRequest<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_PREPARE_REMOTE_IMAGE_REQUEST_H
diff --git a/src/tools/rbd_mirror/image_replayer/Replayer.h b/src/tools/rbd_mirror/image_replayer/Replayer.h
new file mode 100644
index 000000000..f3bfa4da0
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/Replayer.h
@@ -0,0 +1,39 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_REPLAYER_H
+#define RBD_MIRROR_IMAGE_REPLAYER_REPLAYER_H
+
+#include <string>
+
+struct Context;
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+struct Replayer {
+ virtual ~Replayer() {}
+
+ virtual void destroy() = 0;
+
+ virtual void init(Context* on_finish) = 0;
+ virtual void shut_down(Context* on_finish) = 0;
+
+ virtual void flush(Context* on_finish) = 0;
+
+ virtual bool get_replay_status(std::string* description,
+ Context* on_finish) = 0;
+
+ virtual bool is_replaying() const = 0;
+ virtual bool is_resync_requested() const = 0;
+
+ virtual int get_error_code() const = 0;
+ virtual std::string get_error_description() const = 0;
+};
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_REPLAYER_H
diff --git a/src/tools/rbd_mirror/image_replayer/ReplayerListener.h b/src/tools/rbd_mirror/image_replayer/ReplayerListener.h
new file mode 100644
index 000000000..f17f401b1
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/ReplayerListener.h
@@ -0,0 +1,21 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_REPLAYER_LISTENER_H
+#define RBD_MIRROR_IMAGE_REPLAYER_REPLAYER_LISTENER_H
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+struct ReplayerListener {
+ virtual ~ReplayerListener() {}
+
+ virtual void handle_notification() = 0;
+};
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_REPLAYER_LISTENER_H
diff --git a/src/tools/rbd_mirror/image_replayer/StateBuilder.cc b/src/tools/rbd_mirror/image_replayer/StateBuilder.cc
new file mode 100644
index 000000000..55fb3509d
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/StateBuilder.cc
@@ -0,0 +1,138 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "StateBuilder.h"
+#include "include/ceph_assert.h"
+#include "include/Context.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "journal/Journaler.h"
+#include "librbd/ImageCtx.h"
+#include "tools/rbd_mirror/image_replayer/CloseImageRequest.h"
+#include "tools/rbd_mirror/image_sync/Types.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::" \
+ << "StateBuilder: " << this << " " \
+ << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+template <typename I>
+StateBuilder<I>::StateBuilder(const std::string& global_image_id)
+ : global_image_id(global_image_id) {
+ dout(10) << "global_image_id=" << global_image_id << dendl;
+}
+
+template <typename I>
+StateBuilder<I>::~StateBuilder() {
+ ceph_assert(local_image_ctx == nullptr);
+ ceph_assert(remote_image_ctx == nullptr);
+ ceph_assert(m_sync_point_handler == nullptr);
+}
+
+template <typename I>
+bool StateBuilder<I>::is_local_primary() const {
+ if (local_promotion_state == librbd::mirror::PROMOTION_STATE_PRIMARY) {
+ ceph_assert(!local_image_id.empty());
+ return true;
+ }
+ return false;
+}
+
+template <typename I>
+bool StateBuilder<I>::is_remote_primary() const {
+ if (remote_promotion_state == librbd::mirror::PROMOTION_STATE_PRIMARY) {
+ ceph_assert(!remote_image_id.empty());
+ return true;
+ }
+ return false;
+}
+
+template <typename I>
+bool StateBuilder<I>::is_linked() const {
+ if (local_promotion_state == librbd::mirror::PROMOTION_STATE_NON_PRIMARY) {
+ ceph_assert(!local_image_id.empty());
+ return is_linked_impl();
+ }
+ return false;
+}
+
+template <typename I>
+void StateBuilder<I>::close_local_image(Context* on_finish) {
+ if (local_image_ctx == nullptr) {
+ on_finish->complete(0);
+ return;
+ }
+
+ dout(10) << dendl;
+ auto ctx = new LambdaContext([this, on_finish](int r) {
+ handle_close_local_image(r, on_finish);
+ });
+ auto request = image_replayer::CloseImageRequest<I>::create(
+ &local_image_ctx, ctx);
+ request->send();
+}
+
+template <typename I>
+void StateBuilder<I>::handle_close_local_image(int r, Context* on_finish) {
+ dout(10) << "r=" << r << dendl;
+
+ ceph_assert(local_image_ctx == nullptr);
+ if (r < 0) {
+ derr << "failed to close local image for image " << global_image_id << ": "
+ << cpp_strerror(r) << dendl;
+ }
+
+ on_finish->complete(r);
+}
+
+template <typename I>
+void StateBuilder<I>::close_remote_image(Context* on_finish) {
+ if (remote_image_ctx == nullptr) {
+ on_finish->complete(0);
+ return;
+ }
+
+ dout(10) << dendl;
+ auto ctx = new LambdaContext([this, on_finish](int r) {
+ handle_close_remote_image(r, on_finish);
+ });
+ auto request = image_replayer::CloseImageRequest<I>::create(
+ &remote_image_ctx, ctx);
+ request->send();
+}
+
+template <typename I>
+void StateBuilder<I>::handle_close_remote_image(int r, Context* on_finish) {
+ dout(10) << "r=" << r << dendl;
+
+ ceph_assert(remote_image_ctx == nullptr);
+ if (r < 0) {
+ derr << "failed to close remote image for image " << global_image_id << ": "
+ << cpp_strerror(r) << dendl;
+ }
+
+ on_finish->complete(r);
+}
+
+template <typename I>
+void StateBuilder<I>::destroy_sync_point_handler() {
+ if (m_sync_point_handler == nullptr) {
+ return;
+ }
+
+ dout(15) << dendl;
+ m_sync_point_handler->destroy();
+ m_sync_point_handler = nullptr;
+}
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_replayer::StateBuilder<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_replayer/StateBuilder.h b/src/tools/rbd_mirror/image_replayer/StateBuilder.h
new file mode 100644
index 000000000..51cf8668c
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/StateBuilder.h
@@ -0,0 +1,114 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_REPLAYER_STATE_BUILDER_H
+#define CEPH_RBD_MIRROR_IMAGE_REPLAYER_STATE_BUILDER_H
+
+#include "include/rados/librados_fwd.hpp"
+#include "cls/rbd/cls_rbd_types.h"
+#include "librbd/mirror/Types.h"
+
+struct Context;
+namespace librbd { struct ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+
+struct BaseRequest;
+template <typename> class InstanceWatcher;
+struct PoolMetaCache;
+struct ProgressContext;
+template <typename> class Threads;
+
+namespace image_sync { struct SyncPointHandler; }
+
+namespace image_replayer {
+
+struct Replayer;
+struct ReplayerListener;
+
+template <typename ImageCtxT>
+class StateBuilder {
+public:
+ StateBuilder(const StateBuilder&) = delete;
+ StateBuilder& operator=(const StateBuilder&) = delete;
+
+ virtual ~StateBuilder();
+
+ virtual void destroy() {
+ delete this;
+ }
+
+ virtual void close(Context* on_finish) = 0;
+
+ virtual bool is_disconnected() const = 0;
+
+ bool is_local_primary() const;
+ bool is_remote_primary() const;
+ bool is_linked() const;
+
+ virtual cls::rbd::MirrorImageMode get_mirror_image_mode() const = 0;
+
+ virtual image_sync::SyncPointHandler* create_sync_point_handler() = 0;
+ void destroy_sync_point_handler();
+
+ virtual bool replay_requires_remote_image() const = 0;
+
+ void close_remote_image(Context* on_finish);
+
+ virtual BaseRequest* create_local_image_request(
+ Threads<ImageCtxT>* threads,
+ librados::IoCtx& local_io_ctx,
+ const std::string& global_image_id,
+ PoolMetaCache* pool_meta_cache,
+ ProgressContext* progress_ctx,
+ Context* on_finish) = 0;
+
+ virtual BaseRequest* create_prepare_replay_request(
+ const std::string& local_mirror_uuid,
+ ProgressContext* progress_ctx,
+ bool* resync_requested,
+ bool* syncing,
+ Context* on_finish) = 0;
+
+ virtual Replayer* create_replayer(
+ Threads<ImageCtxT>* threads,
+ InstanceWatcher<ImageCtxT>* instance_watcher,
+ const std::string& local_mirror_uuid,
+ PoolMetaCache* pool_meta_cache,
+ ReplayerListener* replayer_listener) = 0;
+
+ std::string global_image_id;
+
+ std::string local_image_id;
+ librbd::mirror::PromotionState local_promotion_state =
+ librbd::mirror::PROMOTION_STATE_UNKNOWN;
+ ImageCtxT* local_image_ctx = nullptr;
+
+ std::string remote_mirror_uuid;
+ std::string remote_image_id;
+ librbd::mirror::PromotionState remote_promotion_state =
+ librbd::mirror::PROMOTION_STATE_UNKNOWN;
+ ImageCtxT* remote_image_ctx = nullptr;
+
+protected:
+ image_sync::SyncPointHandler* m_sync_point_handler = nullptr;
+
+ StateBuilder(const std::string& global_image_id);
+
+ void close_local_image(Context* on_finish);
+
+private:
+ virtual bool is_linked_impl() const = 0;
+
+ void handle_close_local_image(int r, Context* on_finish);
+ void handle_close_remote_image(int r, Context* on_finish);
+};
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_replayer::StateBuilder<librbd::ImageCtx>;
+
+#endif // CEPH_RBD_MIRROR_IMAGE_REPLAYER_STATE_BUILDER_H
diff --git a/src/tools/rbd_mirror/image_replayer/TimeRollingMean.cc b/src/tools/rbd_mirror/image_replayer/TimeRollingMean.cc
new file mode 100644
index 000000000..5d9c9aca1
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/TimeRollingMean.cc
@@ -0,0 +1,34 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd_mirror/image_replayer/TimeRollingMean.h"
+#include "common/Clock.h"
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+void TimeRollingMean::operator()(uint32_t value) {
+ auto time = ceph_clock_now();
+ if (m_last_time.is_zero()) {
+ m_last_time = time;
+ } else if (m_last_time.sec() < time.sec()) {
+ auto sec = m_last_time.sec();
+ while (sec++ < time.sec()) {
+ m_rolling_mean(m_sum);
+ m_sum = 0;
+ }
+
+ m_last_time = time;
+ }
+
+ m_sum += value;
+}
+
+double TimeRollingMean::get_average() const {
+ return boost::accumulators::rolling_mean(m_rolling_mean);
+}
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
diff --git a/src/tools/rbd_mirror/image_replayer/TimeRollingMean.h b/src/tools/rbd_mirror/image_replayer/TimeRollingMean.h
new file mode 100644
index 000000000..139ef893f
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/TimeRollingMean.h
@@ -0,0 +1,40 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_TIME_ROLLING_MEAN_H
+#define RBD_MIRROR_IMAGE_REPLAYER_TIME_ROLLING_MEAN_H
+
+#include "include/utime.h"
+#include <boost/accumulators/accumulators.hpp>
+#include <boost/accumulators/statistics/stats.hpp>
+#include <boost/accumulators/statistics/rolling_mean.hpp>
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+class TimeRollingMean {
+public:
+
+ void operator()(uint32_t value);
+
+ double get_average() const;
+
+private:
+ typedef boost::accumulators::accumulator_set<
+ uint64_t, boost::accumulators::stats<
+ boost::accumulators::tag::rolling_mean>> RollingMean;
+
+ utime_t m_last_time;
+ uint64_t m_sum = 0;
+
+ RollingMean m_rolling_mean{
+ boost::accumulators::tag::rolling_window::window_size = 30};
+
+};
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_TIME_ROLLING_MEAN_H
diff --git a/src/tools/rbd_mirror/image_replayer/Types.h b/src/tools/rbd_mirror/image_replayer/Types.h
new file mode 100644
index 000000000..6ab988a76
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/Types.h
@@ -0,0 +1,21 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_REPLAYER_TYPES_H
+#define CEPH_RBD_MIRROR_IMAGE_REPLAYER_TYPES_H
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+enum HealthState {
+ HEALTH_STATE_OK,
+ HEALTH_STATE_WARNING,
+ HEALTH_STATE_ERROR
+};
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_IMAGE_REPLAYER_TYPES_H
diff --git a/src/tools/rbd_mirror/image_replayer/Utils.cc b/src/tools/rbd_mirror/image_replayer/Utils.cc
new file mode 100644
index 000000000..55162a4e4
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/Utils.cc
@@ -0,0 +1,61 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd_mirror/image_replayer/Utils.h"
+#include "include/rados/librados.hpp"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "cls/journal/cls_journal_types.h"
+#include "librbd/journal/Types.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::util::" \
+ << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+namespace util {
+
+std::string compute_image_spec(librados::IoCtx& io_ctx,
+ const std::string& image_name) {
+ std::string name = io_ctx.get_namespace();
+ if (!name.empty()) {
+ name += "/";
+ }
+
+ return io_ctx.get_pool_name() + "/" + name + image_name;
+}
+
+bool decode_client_meta(const cls::journal::Client& client,
+ librbd::journal::MirrorPeerClientMeta* client_meta) {
+ dout(15) << dendl;
+
+ librbd::journal::ClientData client_data;
+ auto it = client.data.cbegin();
+ try {
+ decode(client_data, it);
+ } catch (const buffer::error &err) {
+ derr << "failed to decode client meta data: " << err.what() << dendl;
+ return false;
+ }
+
+ auto local_client_meta = boost::get<librbd::journal::MirrorPeerClientMeta>(
+ &client_data.client_meta);
+ if (local_client_meta == nullptr) {
+ derr << "unknown peer registration" << dendl;
+ return false;
+ }
+
+ *client_meta = *local_client_meta;
+ dout(15) << "client found: client_meta=" << *client_meta << dendl;
+ return true;
+}
+
+} // namespace util
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
diff --git a/src/tools/rbd_mirror/image_replayer/Utils.h b/src/tools/rbd_mirror/image_replayer/Utils.h
new file mode 100644
index 000000000..6c5352cd1
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/Utils.h
@@ -0,0 +1,29 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_UTILS_H
+#define RBD_MIRROR_IMAGE_REPLAYER_UTILS_H
+
+#include "include/rados/librados_fwd.hpp"
+#include <string>
+
+namespace cls { namespace journal { struct Client; } }
+namespace librbd { namespace journal { struct MirrorPeerClientMeta; } }
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+namespace util {
+
+std::string compute_image_spec(librados::IoCtx& io_ctx,
+ const std::string& image_name);
+
+bool decode_client_meta(const cls::journal::Client& client,
+ librbd::journal::MirrorPeerClientMeta* client_meta);
+
+} // namespace util
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_UTILS_H
diff --git a/src/tools/rbd_mirror/image_replayer/journal/CreateLocalImageRequest.cc b/src/tools/rbd_mirror/image_replayer/journal/CreateLocalImageRequest.cc
new file mode 100644
index 000000000..087cf4f5f
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/journal/CreateLocalImageRequest.cc
@@ -0,0 +1,162 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "CreateLocalImageRequest.h"
+#include "include/rados/librados.hpp"
+#include "common/debug.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "journal/Journaler.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/Utils.h"
+#include "librbd/journal/Types.h"
+#include "tools/rbd_mirror/PoolMetaCache.h"
+#include "tools/rbd_mirror/ProgressContext.h"
+#include "tools/rbd_mirror/Threads.h"
+#include "tools/rbd_mirror/image_replayer/CreateImageRequest.h"
+#include "tools/rbd_mirror/image_replayer/Utils.h"
+#include "tools/rbd_mirror/image_replayer/journal/StateBuilder.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::journal::" \
+ << "CreateLocalImageRequest: " << this << " " \
+ << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+namespace journal {
+
+using librbd::util::create_async_context_callback;
+using librbd::util::create_context_callback;
+
+template <typename I>
+void CreateLocalImageRequest<I>::send() {
+ unregister_client();
+}
+
+template <typename I>
+void CreateLocalImageRequest<I>::unregister_client() {
+ dout(10) << dendl;
+ update_progress("UNREGISTER_CLIENT");
+
+ auto ctx = create_context_callback<
+ CreateLocalImageRequest<I>,
+ &CreateLocalImageRequest<I>::handle_unregister_client>(this);
+ m_state_builder->remote_journaler->unregister_client(ctx);
+}
+
+template <typename I>
+void CreateLocalImageRequest<I>::handle_unregister_client(int r) {
+ dout(10) << "r=" << r << dendl;
+ if (r < 0 && r != -ENOENT) {
+ derr << "failed to unregister with remote journal: " << cpp_strerror(r)
+ << dendl;
+ finish(r);
+ return;
+ }
+
+ m_state_builder->local_image_id = "";
+ m_state_builder->remote_client_meta = {};
+ register_client();
+}
+
+template <typename I>
+void CreateLocalImageRequest<I>::register_client() {
+ ceph_assert(m_state_builder->local_image_id.empty());
+ m_state_builder->local_image_id =
+ librbd::util::generate_image_id<I>(m_local_io_ctx);
+ dout(10) << "local_image_id=" << m_state_builder->local_image_id << dendl;
+ update_progress("REGISTER_CLIENT");
+
+ librbd::journal::MirrorPeerClientMeta client_meta{
+ m_state_builder->local_image_id};
+ client_meta.state = librbd::journal::MIRROR_PEER_STATE_SYNCING;
+
+ librbd::journal::ClientData client_data{client_meta};
+ bufferlist client_data_bl;
+ encode(client_data, client_data_bl);
+
+ auto ctx = create_context_callback<
+ CreateLocalImageRequest<I>,
+ &CreateLocalImageRequest<I>::handle_register_client>(this);
+ m_state_builder->remote_journaler->register_client(client_data_bl, ctx);
+}
+
+template <typename I>
+void CreateLocalImageRequest<I>::handle_register_client(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to register with remote journal: " << cpp_strerror(r)
+ << dendl;
+ finish(r);
+ return;
+ }
+
+ m_state_builder->remote_client_state = cls::journal::CLIENT_STATE_CONNECTED;
+ m_state_builder->remote_client_meta = {m_state_builder->local_image_id};
+ m_state_builder->remote_client_meta.state =
+ librbd::journal::MIRROR_PEER_STATE_SYNCING;
+
+ create_local_image();
+}
+
+template <typename I>
+void CreateLocalImageRequest<I>::create_local_image() {
+ dout(10) << "local_image_id=" << m_state_builder->local_image_id << dendl;
+ update_progress("CREATE_LOCAL_IMAGE");
+
+ m_remote_image_ctx->image_lock.lock_shared();
+ std::string image_name = m_remote_image_ctx->name;
+ m_remote_image_ctx->image_lock.unlock_shared();
+
+ auto ctx = create_context_callback<
+ CreateLocalImageRequest<I>,
+ &CreateLocalImageRequest<I>::handle_create_local_image>(this);
+ auto request = CreateImageRequest<I>::create(
+ m_threads, m_local_io_ctx, m_global_image_id,
+ m_state_builder->remote_mirror_uuid, image_name,
+ m_state_builder->local_image_id, m_remote_image_ctx,
+ m_pool_meta_cache, cls::rbd::MIRROR_IMAGE_MODE_JOURNAL, ctx);
+ request->send();
+}
+template <typename I>
+void CreateLocalImageRequest<I>::handle_create_local_image(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r == -EBADF) {
+ dout(5) << "image id " << m_state_builder->local_image_id << " "
+ << "already in-use" << dendl;
+ unregister_client();
+ return;
+ } else if (r < 0) {
+ if (r == -ENOENT) {
+ dout(10) << "parent image does not exist" << dendl;
+ } else {
+ derr << "failed to create local image: " << cpp_strerror(r) << dendl;
+ }
+ finish(r);
+ return;
+ }
+
+ finish(0);
+}
+
+template <typename I>
+void CreateLocalImageRequest<I>::update_progress(
+ const std::string& description) {
+ dout(15) << description << dendl;
+ if (m_progress_ctx != nullptr) {
+ m_progress_ctx->update_progress(description);
+ }
+}
+
+} // namespace journal
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_replayer::journal::CreateLocalImageRequest<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_replayer/journal/CreateLocalImageRequest.h b/src/tools/rbd_mirror/image_replayer/journal/CreateLocalImageRequest.h
new file mode 100644
index 000000000..fc776ecc3
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/journal/CreateLocalImageRequest.h
@@ -0,0 +1,116 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_CREATE_LOCAL_IMAGE_REQUEST_H
+#define RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_CREATE_LOCAL_IMAGE_REQUEST_H
+
+#include "include/rados/librados_fwd.hpp"
+#include "tools/rbd_mirror/BaseRequest.h"
+#include <string>
+
+struct Context;
+namespace journal { class Journaler; }
+namespace librbd { class ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+
+class PoolMetaCache;
+class ProgressContext;
+template <typename> struct Threads;
+
+namespace image_replayer {
+namespace journal {
+
+template <typename> class StateBuilder;
+
+template <typename ImageCtxT>
+class CreateLocalImageRequest : public BaseRequest {
+public:
+ typedef rbd::mirror::ProgressContext ProgressContext;
+
+ static CreateLocalImageRequest* create(
+ Threads<ImageCtxT>* threads,
+ librados::IoCtx& local_io_ctx,
+ ImageCtxT* remote_image_ctx,
+ const std::string& global_image_id,
+ PoolMetaCache* pool_meta_cache,
+ ProgressContext* progress_ctx,
+ StateBuilder<ImageCtxT>* state_builder,
+ Context* on_finish) {
+ return new CreateLocalImageRequest(threads, local_io_ctx, remote_image_ctx,
+ global_image_id, pool_meta_cache,
+ progress_ctx, state_builder, on_finish);
+ }
+
+ CreateLocalImageRequest(
+ Threads<ImageCtxT>* threads,
+ librados::IoCtx& local_io_ctx,
+ ImageCtxT* remote_image_ctx,
+ const std::string& global_image_id,
+ PoolMetaCache* pool_meta_cache,
+ ProgressContext* progress_ctx,
+ StateBuilder<ImageCtxT>* state_builder,
+ Context* on_finish)
+ : BaseRequest(on_finish),
+ m_threads(threads),
+ m_local_io_ctx(local_io_ctx),
+ m_remote_image_ctx(remote_image_ctx),
+ m_global_image_id(global_image_id),
+ m_pool_meta_cache(pool_meta_cache),
+ m_progress_ctx(progress_ctx),
+ m_state_builder(state_builder) {
+ }
+
+ void send();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * UNREGISTER_CLIENT < * * * * * * * *
+ * | *
+ * v *
+ * REGISTER_CLIENT *
+ * | *
+ * v (id exists) *
+ * CREATE_LOCAL_IMAGE * * * * * * * * *
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+
+ Threads<ImageCtxT>* m_threads;
+ librados::IoCtx& m_local_io_ctx;
+ ImageCtxT* m_remote_image_ctx;
+ std::string m_global_image_id;
+ PoolMetaCache* m_pool_meta_cache;
+ ProgressContext* m_progress_ctx;
+ StateBuilder<ImageCtxT>* m_state_builder;
+
+ void unregister_client();
+ void handle_unregister_client(int r);
+
+ void register_client();
+ void handle_register_client(int r);
+
+ void create_local_image();
+ void handle_create_local_image(int r);
+
+ void update_progress(const std::string& description);
+
+};
+
+} // namespace journal
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_replayer::journal::CreateLocalImageRequest<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_CREATE_LOCAL_IMAGE_REQUEST_H
diff --git a/src/tools/rbd_mirror/image_replayer/journal/EventPreprocessor.cc b/src/tools/rbd_mirror/image_replayer/journal/EventPreprocessor.cc
new file mode 100644
index 000000000..f5d49048e
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/journal/EventPreprocessor.cc
@@ -0,0 +1,206 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "EventPreprocessor.h"
+#include "common/debug.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "journal/Journaler.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/Utils.h"
+#include "librbd/asio/ContextWQ.h"
+#include "librbd/journal/Types.h"
+#include <boost/variant.hpp>
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::journal::" \
+ << "EventPreprocessor: " << this << " " << __func__ \
+ << ": "
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+namespace journal {
+
+using librbd::util::create_context_callback;
+
+template <typename I>
+EventPreprocessor<I>::EventPreprocessor(I &local_image_ctx,
+ Journaler &remote_journaler,
+ const std::string &local_mirror_uuid,
+ MirrorPeerClientMeta *client_meta,
+ librbd::asio::ContextWQ *work_queue)
+ : m_local_image_ctx(local_image_ctx), m_remote_journaler(remote_journaler),
+ m_local_mirror_uuid(local_mirror_uuid), m_client_meta(client_meta),
+ m_work_queue(work_queue) {
+}
+
+template <typename I>
+EventPreprocessor<I>::~EventPreprocessor() {
+ ceph_assert(!m_in_progress);
+}
+
+template <typename I>
+bool EventPreprocessor<I>::is_required(const EventEntry &event_entry) {
+ SnapSeqs snap_seqs(m_client_meta->snap_seqs);
+ return (prune_snap_map(&snap_seqs) ||
+ event_entry.get_event_type() ==
+ librbd::journal::EVENT_TYPE_SNAP_RENAME);
+}
+
+template <typename I>
+void EventPreprocessor<I>::preprocess(EventEntry *event_entry,
+ Context *on_finish) {
+ ceph_assert(!m_in_progress);
+ m_in_progress = true;
+ m_event_entry = event_entry;
+ m_on_finish = on_finish;
+
+ refresh_image();
+}
+
+template <typename I>
+void EventPreprocessor<I>::refresh_image() {
+ dout(20) << dendl;
+
+ Context *ctx = create_context_callback<
+ EventPreprocessor<I>, &EventPreprocessor<I>::handle_refresh_image>(this);
+ m_local_image_ctx.state->refresh(ctx);
+}
+
+template <typename I>
+void EventPreprocessor<I>::handle_refresh_image(int r) {
+ dout(20) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "error encountered during image refresh: " << cpp_strerror(r)
+ << dendl;
+ finish(r);
+ return;
+ }
+
+ preprocess_event();
+}
+
+template <typename I>
+void EventPreprocessor<I>::preprocess_event() {
+ dout(20) << dendl;
+
+ m_snap_seqs = m_client_meta->snap_seqs;
+ m_snap_seqs_updated = prune_snap_map(&m_snap_seqs);
+
+ int r = boost::apply_visitor(PreprocessEventVisitor(this),
+ m_event_entry->event);
+ if (r < 0) {
+ finish(r);
+ return;
+ }
+
+ update_client();
+}
+
+template <typename I>
+int EventPreprocessor<I>::preprocess_snap_rename(
+ librbd::journal::SnapRenameEvent &event) {
+ dout(20) << "remote_snap_id=" << event.snap_id << ", "
+ << "src_snap_name=" << event.src_snap_name << ", "
+ << "dest_snap_name=" << event.dst_snap_name << dendl;
+
+ auto snap_seq_it = m_snap_seqs.find(event.snap_id);
+ if (snap_seq_it != m_snap_seqs.end()) {
+ dout(20) << "remapping remote snap id " << snap_seq_it->first << " "
+ << "to local snap id " << snap_seq_it->second << dendl;
+ event.snap_id = snap_seq_it->second;
+ return 0;
+ }
+
+ auto snap_id_it = m_local_image_ctx.snap_ids.find({cls::rbd::UserSnapshotNamespace(),
+ event.src_snap_name});
+ if (snap_id_it == m_local_image_ctx.snap_ids.end()) {
+ dout(20) << "cannot map remote snapshot '" << event.src_snap_name << "' "
+ << "to local snapshot" << dendl;
+ event.snap_id = CEPH_NOSNAP;
+ return -ENOENT;
+ }
+
+ dout(20) << "mapping remote snap id " << event.snap_id << " "
+ << "to local snap id " << snap_id_it->second << dendl;
+ m_snap_seqs_updated = true;
+ m_snap_seqs[event.snap_id] = snap_id_it->second;
+ event.snap_id = snap_id_it->second;
+ return 0;
+}
+
+template <typename I>
+void EventPreprocessor<I>::update_client() {
+ if (!m_snap_seqs_updated) {
+ finish(0);
+ return;
+ }
+
+ dout(20) << dendl;
+ librbd::journal::MirrorPeerClientMeta client_meta(*m_client_meta);
+ client_meta.snap_seqs = m_snap_seqs;
+
+ librbd::journal::ClientData client_data(client_meta);
+ bufferlist data_bl;
+ encode(client_data, data_bl);
+
+ Context *ctx = create_context_callback<
+ EventPreprocessor<I>, &EventPreprocessor<I>::handle_update_client>(
+ this);
+ m_remote_journaler.update_client(data_bl, ctx);
+}
+
+template <typename I>
+void EventPreprocessor<I>::handle_update_client(int r) {
+ dout(20) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to update mirror peer journal client: "
+ << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ m_client_meta->snap_seqs = m_snap_seqs;
+ finish(0);
+}
+
+template <typename I>
+bool EventPreprocessor<I>::prune_snap_map(SnapSeqs *snap_seqs) {
+ bool pruned = false;
+
+ std::shared_lock image_locker{m_local_image_ctx.image_lock};
+ for (auto it = snap_seqs->begin(); it != snap_seqs->end(); ) {
+ auto current_it(it++);
+ if (m_local_image_ctx.snap_info.count(current_it->second) == 0) {
+ snap_seqs->erase(current_it);
+ pruned = true;
+ }
+ }
+ return pruned;
+}
+
+template <typename I>
+void EventPreprocessor<I>::finish(int r) {
+ dout(20) << "r=" << r << dendl;
+
+ Context *on_finish = m_on_finish;
+ m_on_finish = nullptr;
+ m_event_entry = nullptr;
+ m_in_progress = false;
+ m_snap_seqs_updated = false;
+ m_work_queue->queue(on_finish, r);
+}
+
+} // namespace journal
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_replayer::journal::EventPreprocessor<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_replayer/journal/EventPreprocessor.h b/src/tools/rbd_mirror/image_replayer/journal/EventPreprocessor.h
new file mode 100644
index 000000000..12f70eb93
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/journal/EventPreprocessor.h
@@ -0,0 +1,127 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_EVENT_PREPROCESSOR_H
+#define RBD_MIRROR_IMAGE_REPLAYER_EVENT_PREPROCESSOR_H
+
+#include "include/int_types.h"
+#include "librbd/journal/Types.h"
+#include "librbd/journal/TypeTraits.h"
+#include <map>
+#include <string>
+#include <boost/variant/static_visitor.hpp>
+
+struct Context;
+namespace journal { class Journaler; }
+namespace librbd {
+class ImageCtx;
+namespace asio { struct ContextWQ; }
+} // namespace librbd
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+namespace journal {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class EventPreprocessor {
+public:
+ using Journaler = typename librbd::journal::TypeTraits<ImageCtxT>::Journaler;
+ using EventEntry = librbd::journal::EventEntry;
+ using MirrorPeerClientMeta = librbd::journal::MirrorPeerClientMeta;
+
+ static EventPreprocessor *create(ImageCtxT &local_image_ctx,
+ Journaler &remote_journaler,
+ const std::string &local_mirror_uuid,
+ MirrorPeerClientMeta *client_meta,
+ librbd::asio::ContextWQ *work_queue) {
+ return new EventPreprocessor(local_image_ctx, remote_journaler,
+ local_mirror_uuid, client_meta, work_queue);
+ }
+
+ static void destroy(EventPreprocessor* processor) {
+ delete processor;
+ }
+
+ EventPreprocessor(ImageCtxT &local_image_ctx, Journaler &remote_journaler,
+ const std::string &local_mirror_uuid,
+ MirrorPeerClientMeta *client_meta,
+ librbd::asio::ContextWQ *work_queue);
+ ~EventPreprocessor();
+
+ bool is_required(const EventEntry &event_entry);
+ void preprocess(EventEntry *event_entry, Context *on_finish);
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v (skip if not required)
+ * REFRESH_IMAGE
+ * |
+ * v (skip if not required)
+ * PREPROCESS_EVENT
+ * |
+ * v (skip if not required)
+ * UPDATE_CLIENT
+ *
+ * @endverbatim
+ */
+
+ typedef std::map<uint64_t, uint64_t> SnapSeqs;
+
+ class PreprocessEventVisitor : public boost::static_visitor<int> {
+ public:
+ EventPreprocessor *event_preprocessor;
+
+ PreprocessEventVisitor(EventPreprocessor *event_preprocessor)
+ : event_preprocessor(event_preprocessor) {
+ }
+
+ template <typename T>
+ inline int operator()(T&) const {
+ return 0;
+ }
+ inline int operator()(librbd::journal::SnapRenameEvent &event) const {
+ return event_preprocessor->preprocess_snap_rename(event);
+ }
+ };
+
+ ImageCtxT &m_local_image_ctx;
+ Journaler &m_remote_journaler;
+ std::string m_local_mirror_uuid;
+ MirrorPeerClientMeta *m_client_meta;
+ librbd::asio::ContextWQ *m_work_queue;
+
+ bool m_in_progress = false;
+ EventEntry *m_event_entry = nullptr;
+ Context *m_on_finish = nullptr;
+
+ SnapSeqs m_snap_seqs;
+ bool m_snap_seqs_updated = false;
+
+ bool prune_snap_map(SnapSeqs *snap_seqs);
+
+ void refresh_image();
+ void handle_refresh_image(int r);
+
+ void preprocess_event();
+ int preprocess_snap_rename(librbd::journal::SnapRenameEvent &event);
+
+ void update_client();
+ void handle_update_client(int r);
+
+ void finish(int r);
+
+};
+
+} // namespace journal
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_replayer::journal::EventPreprocessor<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_EVENT_PREPROCESSOR_H
diff --git a/src/tools/rbd_mirror/image_replayer/journal/PrepareReplayRequest.cc b/src/tools/rbd_mirror/image_replayer/journal/PrepareReplayRequest.cc
new file mode 100644
index 000000000..c8a96a4ad
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/journal/PrepareReplayRequest.cc
@@ -0,0 +1,316 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "PrepareReplayRequest.h"
+#include "common/debug.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "journal/Journaler.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/Journal.h"
+#include "librbd/Utils.h"
+#include "tools/rbd_mirror/ProgressContext.h"
+#include "tools/rbd_mirror/image_replayer/journal/StateBuilder.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::journal::" \
+ << "PrepareReplayRequest: " << this << " " \
+ << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+namespace journal {
+
+using librbd::util::create_context_callback;
+
+template <typename I>
+void PrepareReplayRequest<I>::send() {
+ *m_resync_requested = false;
+ *m_syncing = false;
+
+ if (m_state_builder->local_image_id !=
+ m_state_builder->remote_client_meta.image_id) {
+ // somehow our local image has a different image id than the image id
+ // registered in the remote image
+ derr << "split-brain detected: local_image_id="
+ << m_state_builder->local_image_id << ", "
+ << "registered local_image_id="
+ << m_state_builder->remote_client_meta.image_id << dendl;
+ finish(-EEXIST);
+ return;
+ }
+
+ std::shared_lock image_locker(m_state_builder->local_image_ctx->image_lock);
+ if (m_state_builder->local_image_ctx->journal == nullptr) {
+ image_locker.unlock();
+
+ derr << "local image does not support journaling" << dendl;
+ finish(-EINVAL);
+ return;
+ }
+
+ int r = m_state_builder->local_image_ctx->journal->is_resync_requested(
+ m_resync_requested);
+ if (r < 0) {
+ image_locker.unlock();
+
+ derr << "failed to check if a resync was requested" << dendl;
+ finish(r);
+ return;
+ }
+
+ m_local_tag_tid = m_state_builder->local_image_ctx->journal->get_tag_tid();
+ m_local_tag_data = m_state_builder->local_image_ctx->journal->get_tag_data();
+ dout(10) << "local tag=" << m_local_tag_tid << ", "
+ << "local tag data=" << m_local_tag_data << dendl;
+ image_locker.unlock();
+
+ if (*m_resync_requested) {
+ finish(0);
+ return;
+ } else if (m_state_builder->remote_client_meta.state ==
+ librbd::journal::MIRROR_PEER_STATE_SYNCING &&
+ m_local_tag_data.mirror_uuid ==
+ m_state_builder->remote_mirror_uuid) {
+ // if the initial sync hasn't completed, we cannot replay
+ *m_syncing = true;
+ finish(0);
+ return;
+ }
+
+ update_client_state();
+}
+
+template <typename I>
+void PrepareReplayRequest<I>::update_client_state() {
+ if (m_state_builder->remote_client_meta.state !=
+ librbd::journal::MIRROR_PEER_STATE_SYNCING ||
+ m_local_tag_data.mirror_uuid == m_state_builder->remote_mirror_uuid) {
+ get_remote_tag_class();
+ return;
+ }
+
+ // our local image is not primary, is flagged as syncing on the remote side,
+ // but is no longer tied to the remote -- this implies we were forced
+ // promoted and then demoted at some point
+ dout(15) << dendl;
+ update_progress("UPDATE_CLIENT_STATE");
+
+ auto client_meta = m_state_builder->remote_client_meta;
+ client_meta.state = librbd::journal::MIRROR_PEER_STATE_REPLAYING;
+
+ librbd::journal::ClientData client_data(client_meta);
+ bufferlist data_bl;
+ encode(client_data, data_bl);
+
+ auto ctx = create_context_callback<
+ PrepareReplayRequest<I>,
+ &PrepareReplayRequest<I>::handle_update_client_state>(this);
+ m_state_builder->remote_journaler->update_client(data_bl, ctx);
+}
+
+template <typename I>
+void PrepareReplayRequest<I>::handle_update_client_state(int r) {
+ dout(15) << "r=" << r << dendl;
+ if (r < 0) {
+ derr << "failed to update client: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ m_state_builder->remote_client_meta.state =
+ librbd::journal::MIRROR_PEER_STATE_REPLAYING;
+ get_remote_tag_class();
+}
+
+template <typename I>
+void PrepareReplayRequest<I>::get_remote_tag_class() {
+ dout(10) << dendl;
+ update_progress("GET_REMOTE_TAG_CLASS");
+
+ auto ctx = create_context_callback<
+ PrepareReplayRequest<I>,
+ &PrepareReplayRequest<I>::handle_get_remote_tag_class>(this);
+ m_state_builder->remote_journaler->get_client(
+ librbd::Journal<>::IMAGE_CLIENT_ID, &m_client, ctx);
+}
+
+template <typename I>
+void PrepareReplayRequest<I>::handle_get_remote_tag_class(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to retrieve remote client: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ librbd::journal::ClientData client_data;
+ auto it = m_client.data.cbegin();
+ try {
+ decode(client_data, it);
+ } catch (const buffer::error &err) {
+ derr << "failed to decode remote client meta data: " << err.what()
+ << dendl;
+ finish(-EBADMSG);
+ return;
+ }
+
+ librbd::journal::ImageClientMeta *client_meta =
+ boost::get<librbd::journal::ImageClientMeta>(&client_data.client_meta);
+ if (client_meta == nullptr) {
+ derr << "unknown remote client registration" << dendl;
+ finish(-EINVAL);
+ return;
+ }
+
+ m_remote_tag_class = client_meta->tag_class;
+ dout(10) << "remote tag class=" << m_remote_tag_class << dendl;
+
+ get_remote_tags();
+}
+
+template <typename I>
+void PrepareReplayRequest<I>::get_remote_tags() {
+ dout(10) << dendl;
+ update_progress("GET_REMOTE_TAGS");
+
+ auto ctx = create_context_callback<
+ PrepareReplayRequest<I>,
+ &PrepareReplayRequest<I>::handle_get_remote_tags>(this);
+ m_state_builder->remote_journaler->get_tags(m_remote_tag_class,
+ &m_remote_tags, ctx);
+}
+
+template <typename I>
+void PrepareReplayRequest<I>::handle_get_remote_tags(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to retrieve remote tags: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ // At this point, the local image was existing, non-primary, and replaying;
+ // and the remote image is primary. Attempt to link the local image's most
+ // recent tag to the remote image's tag chain.
+ bool remote_tag_data_valid = false;
+ librbd::journal::TagData remote_tag_data;
+ boost::optional<uint64_t> remote_orphan_tag_tid =
+ boost::make_optional<uint64_t>(false, 0U);
+ bool reconnect_orphan = false;
+
+ // decode the remote tags
+ for (auto &remote_tag : m_remote_tags) {
+ if (m_local_tag_data.predecessor.commit_valid &&
+ m_local_tag_data.predecessor.mirror_uuid ==
+ m_state_builder->remote_mirror_uuid &&
+ m_local_tag_data.predecessor.tag_tid > remote_tag.tid) {
+ dout(10) << "skipping processed predecessor remote tag "
+ << remote_tag.tid << dendl;
+ continue;
+ }
+
+ try {
+ auto it = remote_tag.data.cbegin();
+ decode(remote_tag_data, it);
+ remote_tag_data_valid = true;
+ } catch (const buffer::error &err) {
+ derr << "failed to decode remote tag " << remote_tag.tid << ": "
+ << err.what() << dendl;
+ finish(-EBADMSG);
+ return;
+ }
+
+ dout(10) << "decoded remote tag " << remote_tag.tid << ": "
+ << remote_tag_data << dendl;
+
+ if (!m_local_tag_data.predecessor.commit_valid) {
+ // newly synced local image (no predecessor) replays from the first tag
+ if (remote_tag_data.mirror_uuid != librbd::Journal<>::LOCAL_MIRROR_UUID) {
+ dout(10) << "skipping non-primary remote tag" << dendl;
+ continue;
+ }
+
+ dout(10) << "using initial primary remote tag" << dendl;
+ break;
+ }
+
+ if (m_local_tag_data.mirror_uuid == librbd::Journal<>::ORPHAN_MIRROR_UUID) {
+ // demotion last available local epoch
+
+ if (remote_tag_data.mirror_uuid == m_local_tag_data.mirror_uuid &&
+ remote_tag_data.predecessor.commit_valid &&
+ remote_tag_data.predecessor.tag_tid ==
+ m_local_tag_data.predecessor.tag_tid) {
+ // demotion matches remote epoch
+
+ if (remote_tag_data.predecessor.mirror_uuid == m_local_mirror_uuid &&
+ m_local_tag_data.predecessor.mirror_uuid ==
+ librbd::Journal<>::LOCAL_MIRROR_UUID) {
+ // local demoted and remote has matching event
+ dout(10) << "found matching local demotion tag" << dendl;
+ remote_orphan_tag_tid = remote_tag.tid;
+ continue;
+ }
+
+ if (m_local_tag_data.predecessor.mirror_uuid ==
+ m_state_builder->remote_mirror_uuid &&
+ remote_tag_data.predecessor.mirror_uuid ==
+ librbd::Journal<>::LOCAL_MIRROR_UUID) {
+ // remote demoted and local has matching event
+ dout(10) << "found matching remote demotion tag" << dendl;
+ remote_orphan_tag_tid = remote_tag.tid;
+ continue;
+ }
+ }
+
+ if (remote_tag_data.mirror_uuid == librbd::Journal<>::LOCAL_MIRROR_UUID &&
+ remote_tag_data.predecessor.mirror_uuid ==
+ librbd::Journal<>::ORPHAN_MIRROR_UUID &&
+ remote_tag_data.predecessor.commit_valid && remote_orphan_tag_tid &&
+ remote_tag_data.predecessor.tag_tid == *remote_orphan_tag_tid) {
+ // remote promotion tag chained to remote/local demotion tag
+ dout(10) << "found chained remote promotion tag" << dendl;
+ reconnect_orphan = true;
+ break;
+ }
+
+ // promotion must follow demotion
+ remote_orphan_tag_tid = boost::none;
+ }
+ }
+
+ if (remote_tag_data_valid &&
+ m_local_tag_data.mirror_uuid == m_state_builder->remote_mirror_uuid) {
+ dout(10) << "local image is in clean replay state" << dendl;
+ } else if (reconnect_orphan) {
+ dout(10) << "remote image was demoted/promoted" << dendl;
+ } else {
+ derr << "split-brain detected -- skipping image replay" << dendl;
+ finish(-EEXIST);
+ return;
+ }
+
+ finish(0);
+}
+
+template <typename I>
+void PrepareReplayRequest<I>::update_progress(const std::string &description) {
+ dout(10) << description << dendl;
+
+ if (m_progress_ctx != nullptr) {
+ m_progress_ctx->update_progress(description);
+ }
+}
+
+} // namespace journal
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_replayer::journal::PrepareReplayRequest<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_replayer/journal/PrepareReplayRequest.h b/src/tools/rbd_mirror/image_replayer/journal/PrepareReplayRequest.h
new file mode 100644
index 000000000..2b6fb659b
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/journal/PrepareReplayRequest.h
@@ -0,0 +1,115 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_PREPARE_REPLAY_REQUEST_H
+#define RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_PREPARE_REPLAY_REQUEST_H
+
+#include "include/int_types.h"
+#include "cls/journal/cls_journal_types.h"
+#include "librbd/journal/Types.h"
+#include "librbd/mirror/Types.h"
+#include "tools/rbd_mirror/BaseRequest.h"
+#include <list>
+#include <string>
+
+struct Context;
+namespace librbd { struct ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+
+class ProgressContext;
+
+namespace image_replayer {
+namespace journal {
+
+template <typename> class StateBuilder;
+
+template <typename ImageCtxT>
+class PrepareReplayRequest : public BaseRequest {
+public:
+ static PrepareReplayRequest* create(
+ const std::string& local_mirror_uuid,
+ ProgressContext* progress_ctx,
+ StateBuilder<ImageCtxT>* state_builder,
+ bool* resync_requested,
+ bool* syncing,
+ Context* on_finish) {
+ return new PrepareReplayRequest(
+ local_mirror_uuid, progress_ctx, state_builder, resync_requested,
+ syncing, on_finish);
+ }
+
+ PrepareReplayRequest(
+ const std::string& local_mirror_uuid,
+ ProgressContext* progress_ctx,
+ StateBuilder<ImageCtxT>* state_builder,
+ bool* resync_requested,
+ bool* syncing,
+ Context* on_finish)
+ : BaseRequest(on_finish),
+ m_local_mirror_uuid(local_mirror_uuid),
+ m_progress_ctx(progress_ctx),
+ m_state_builder(state_builder),
+ m_resync_requested(resync_requested),
+ m_syncing(syncing) {
+ }
+
+ void send() override;
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * UPDATE_CLIENT_STATE
+ * |
+ * v
+ * GET_REMOTE_TAG_CLASS
+ * |
+ * v
+ * GET_REMOTE_TAGS
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+ typedef std::list<cls::journal::Tag> Tags;
+
+ std::string m_local_mirror_uuid;
+ ProgressContext* m_progress_ctx;
+ StateBuilder<ImageCtxT>* m_state_builder;
+ bool* m_resync_requested;
+ bool* m_syncing;
+
+ uint64_t m_local_tag_tid = 0;
+ librbd::journal::TagData m_local_tag_data;
+
+ uint64_t m_remote_tag_class = 0;
+ Tags m_remote_tags;
+ cls::journal::Client m_client;
+
+ void update_client_state();
+ void handle_update_client_state(int r);
+
+ void get_remote_tag_class();
+ void handle_get_remote_tag_class(int r);
+
+ void get_remote_tags();
+ void handle_get_remote_tags(int r);
+
+ void update_progress(const std::string& description);
+
+};
+
+} // namespace journal
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_replayer::journal::PrepareReplayRequest<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_PREPARE_REPLAY_REQUEST_H
diff --git a/src/tools/rbd_mirror/image_replayer/journal/ReplayStatusFormatter.cc b/src/tools/rbd_mirror/image_replayer/journal/ReplayStatusFormatter.cc
new file mode 100644
index 000000000..eb99d5add
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/journal/ReplayStatusFormatter.cc
@@ -0,0 +1,284 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "ReplayStatusFormatter.h"
+#include "common/debug.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "journal/Journaler.h"
+#include "json_spirit/json_spirit.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/Journal.h"
+#include "librbd/Utils.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::journal::" \
+ << "ReplayStatusFormatter: " << this << " " \
+ << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+namespace journal {
+
+using librbd::util::unique_lock_name;
+
+namespace {
+
+double round_to_two_places(double value) {
+ return abs(round(value * 100) / 100);
+}
+
+json_spirit::mObject to_json_object(
+ const cls::journal::ObjectPosition& position) {
+ json_spirit::mObject object;
+ if (position != cls::journal::ObjectPosition{}) {
+ object["object_number"] = position.object_number;
+ object["tag_tid"] = position.tag_tid;
+ object["entry_tid"] = position.entry_tid;
+ }
+ return object;
+}
+
+} // anonymous namespace
+
+template <typename I>
+ReplayStatusFormatter<I>::ReplayStatusFormatter(Journaler *journaler,
+ const std::string &mirror_uuid)
+ : m_journaler(journaler),
+ m_mirror_uuid(mirror_uuid),
+ m_lock(ceph::make_mutex(unique_lock_name("ReplayStatusFormatter::m_lock", this))) {
+}
+
+template <typename I>
+void ReplayStatusFormatter<I>::handle_entry_processed(uint32_t bytes) {
+ dout(20) << dendl;
+
+ m_bytes_per_second(bytes);
+ m_entries_per_second(1);
+}
+
+template <typename I>
+bool ReplayStatusFormatter<I>::get_or_send_update(std::string *description,
+ Context *on_finish) {
+ dout(20) << dendl;
+
+ bool in_progress = false;
+ {
+ std::lock_guard locker{m_lock};
+ if (m_on_finish) {
+ in_progress = true;
+ } else {
+ m_on_finish = on_finish;
+ }
+ }
+
+ if (in_progress) {
+ dout(10) << "previous request is still in progress, ignoring" << dendl;
+ on_finish->complete(-EAGAIN);
+ return false;
+ }
+
+ m_master_position = cls::journal::ObjectPosition();
+ m_mirror_position = cls::journal::ObjectPosition();
+
+ cls::journal::Client master_client, mirror_client;
+ int r;
+
+ r = m_journaler->get_cached_client(librbd::Journal<>::IMAGE_CLIENT_ID,
+ &master_client);
+ if (r < 0) {
+ derr << "error retrieving registered master client: "
+ << cpp_strerror(r) << dendl;
+ } else {
+ r = m_journaler->get_cached_client(m_mirror_uuid, &mirror_client);
+ if (r < 0) {
+ derr << "error retrieving registered mirror client: "
+ << cpp_strerror(r) << dendl;
+ }
+ }
+
+ if (!master_client.commit_position.object_positions.empty()) {
+ m_master_position =
+ *(master_client.commit_position.object_positions.begin());
+ }
+
+ if (!mirror_client.commit_position.object_positions.empty()) {
+ m_mirror_position =
+ *(mirror_client.commit_position.object_positions.begin());
+ }
+
+ if (!calculate_behind_master_or_send_update()) {
+ dout(20) << "need to update tag cache" << dendl;
+ return false;
+ }
+
+ format(description);
+
+ {
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_on_finish == on_finish);
+ m_on_finish = nullptr;
+ }
+
+ on_finish->complete(-EEXIST);
+ return true;
+}
+
+template <typename I>
+bool ReplayStatusFormatter<I>::calculate_behind_master_or_send_update() {
+ dout(20) << "m_master_position=" << m_master_position
+ << ", m_mirror_position=" << m_mirror_position << dendl;
+
+ m_entries_behind_master = 0;
+
+ if (m_master_position == cls::journal::ObjectPosition() ||
+ m_master_position.tag_tid < m_mirror_position.tag_tid) {
+ return true;
+ }
+
+ cls::journal::ObjectPosition master = m_master_position;
+ uint64_t mirror_tag_tid = m_mirror_position.tag_tid;
+
+ while (master.tag_tid > mirror_tag_tid) {
+ auto tag_it = m_tag_cache.find(master.tag_tid);
+ if (tag_it == m_tag_cache.end()) {
+ send_update_tag_cache(master.tag_tid, mirror_tag_tid);
+ return false;
+ }
+ librbd::journal::TagData &tag_data = tag_it->second;
+ m_entries_behind_master += master.entry_tid;
+ master = {0, tag_data.predecessor.tag_tid, tag_data.predecessor.entry_tid};
+ }
+ if (master.tag_tid == mirror_tag_tid &&
+ master.entry_tid > m_mirror_position.entry_tid) {
+ m_entries_behind_master += master.entry_tid - m_mirror_position.entry_tid;
+ }
+
+ dout(20) << "clearing tags not needed any more (below mirror position)"
+ << dendl;
+
+ uint64_t tag_tid = mirror_tag_tid;
+ size_t old_size = m_tag_cache.size();
+ while (tag_tid != 0) {
+ auto tag_it = m_tag_cache.find(tag_tid);
+ if (tag_it == m_tag_cache.end()) {
+ break;
+ }
+ librbd::journal::TagData &tag_data = tag_it->second;
+
+ dout(20) << "erasing tag " << tag_data << "for tag_tid " << tag_tid
+ << dendl;
+
+ tag_tid = tag_data.predecessor.tag_tid;
+ m_tag_cache.erase(tag_it);
+ }
+
+ dout(20) << old_size - m_tag_cache.size() << " entries cleared" << dendl;
+
+ return true;
+}
+
+template <typename I>
+void ReplayStatusFormatter<I>::send_update_tag_cache(uint64_t master_tag_tid,
+ uint64_t mirror_tag_tid) {
+ if (master_tag_tid <= mirror_tag_tid ||
+ m_tag_cache.find(master_tag_tid) != m_tag_cache.end()) {
+ Context *on_finish = nullptr;
+ {
+ std::lock_guard locker{m_lock};
+ std::swap(m_on_finish, on_finish);
+ }
+
+ ceph_assert(on_finish);
+ on_finish->complete(0);
+ return;
+ }
+
+ dout(20) << "master_tag_tid=" << master_tag_tid << ", mirror_tag_tid="
+ << mirror_tag_tid << dendl;
+
+ auto ctx = new LambdaContext(
+ [this, master_tag_tid, mirror_tag_tid](int r) {
+ handle_update_tag_cache(master_tag_tid, mirror_tag_tid, r);
+ });
+ m_journaler->get_tag(master_tag_tid, &m_tag, ctx);
+}
+
+template <typename I>
+void ReplayStatusFormatter<I>::handle_update_tag_cache(uint64_t master_tag_tid,
+ uint64_t mirror_tag_tid,
+ int r) {
+ librbd::journal::TagData tag_data;
+
+ if (r < 0) {
+ derr << "error retrieving tag " << master_tag_tid << ": " << cpp_strerror(r)
+ << dendl;
+ } else {
+ dout(20) << "retrieved tag " << master_tag_tid << ": " << m_tag << dendl;
+
+ auto it = m_tag.data.cbegin();
+ try {
+ decode(tag_data, it);
+ } catch (const buffer::error &err) {
+ derr << "error decoding tag " << master_tag_tid << ": " << err.what()
+ << dendl;
+ }
+ }
+
+ if (tag_data.predecessor.mirror_uuid !=
+ librbd::Journal<>::LOCAL_MIRROR_UUID &&
+ tag_data.predecessor.mirror_uuid !=
+ librbd::Journal<>::ORPHAN_MIRROR_UUID) {
+ dout(20) << "hit remote image non-primary epoch" << dendl;
+ tag_data.predecessor = {};
+ }
+
+ dout(20) << "decoded tag " << master_tag_tid << ": " << tag_data << dendl;
+
+ m_tag_cache[master_tag_tid] = tag_data;
+ send_update_tag_cache(tag_data.predecessor.tag_tid, mirror_tag_tid);
+}
+
+template <typename I>
+void ReplayStatusFormatter<I>::format(std::string *description) {
+ dout(20) << "m_master_position=" << m_master_position
+ << ", m_mirror_position=" << m_mirror_position
+ << ", m_entries_behind_master=" << m_entries_behind_master << dendl;
+
+ json_spirit::mObject root_obj;
+ root_obj["primary_position"] = to_json_object(m_master_position);
+ root_obj["non_primary_position"] = to_json_object(m_mirror_position);
+ root_obj["entries_behind_primary"] = (
+ m_entries_behind_master > 0 ? m_entries_behind_master : 0);
+
+ m_bytes_per_second(0);
+ root_obj["bytes_per_second"] = round_to_two_places(
+ m_bytes_per_second.get_average());
+
+ m_entries_per_second(0);
+ auto entries_per_second = m_entries_per_second.get_average();
+ root_obj["entries_per_second"] = round_to_two_places(entries_per_second);
+
+ if (m_entries_behind_master > 0 && entries_per_second > 0) {
+ std::uint64_t seconds_until_synced = round_to_two_places(
+ m_entries_behind_master / entries_per_second);
+ if (seconds_until_synced >= std::numeric_limits<uint64_t>::max()) {
+ seconds_until_synced = std::numeric_limits<uint64_t>::max();
+ }
+
+ root_obj["seconds_until_synced"] = seconds_until_synced;
+ }
+
+ *description = json_spirit::write(
+ root_obj, json_spirit::remove_trailing_zeros);
+}
+
+} // namespace journal
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_replayer::journal::ReplayStatusFormatter<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_replayer/journal/ReplayStatusFormatter.h b/src/tools/rbd_mirror/image_replayer/journal/ReplayStatusFormatter.h
new file mode 100644
index 000000000..5dbbfe10d
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/journal/ReplayStatusFormatter.h
@@ -0,0 +1,70 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_REPLAY_STATUS_FORMATTER_H
+#define RBD_MIRROR_IMAGE_REPLAYER_REPLAY_STATUS_FORMATTER_H
+
+#include "include/Context.h"
+#include "common/ceph_mutex.h"
+#include "cls/journal/cls_journal_types.h"
+#include "librbd/journal/Types.h"
+#include "librbd/journal/TypeTraits.h"
+#include "tools/rbd_mirror/image_replayer/TimeRollingMean.h"
+
+namespace journal { class Journaler; }
+namespace librbd { class ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+namespace journal {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class ReplayStatusFormatter {
+public:
+ typedef typename librbd::journal::TypeTraits<ImageCtxT>::Journaler Journaler;
+
+ static ReplayStatusFormatter* create(Journaler *journaler,
+ const std::string &mirror_uuid) {
+ return new ReplayStatusFormatter(journaler, mirror_uuid);
+ }
+
+ static void destroy(ReplayStatusFormatter* formatter) {
+ delete formatter;
+ }
+
+ ReplayStatusFormatter(Journaler *journaler, const std::string &mirror_uuid);
+
+ void handle_entry_processed(uint32_t bytes);
+
+ bool get_or_send_update(std::string *description, Context *on_finish);
+
+private:
+ Journaler *m_journaler;
+ std::string m_mirror_uuid;
+ ceph::mutex m_lock;
+ Context *m_on_finish = nullptr;
+ cls::journal::ObjectPosition m_master_position;
+ cls::journal::ObjectPosition m_mirror_position;
+ int64_t m_entries_behind_master = 0;
+ cls::journal::Tag m_tag;
+ std::map<uint64_t, librbd::journal::TagData> m_tag_cache;
+
+ TimeRollingMean m_bytes_per_second;
+ TimeRollingMean m_entries_per_second;
+
+ bool calculate_behind_master_or_send_update();
+ void send_update_tag_cache(uint64_t master_tag_tid, uint64_t mirror_tag_tid);
+ void handle_update_tag_cache(uint64_t master_tag_tid, uint64_t mirror_tag_tid,
+ int r);
+ void format(std::string *description);
+};
+
+} // namespace journal
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_replayer::journal::ReplayStatusFormatter<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_REPLAY_STATUS_FORMATTER_H
diff --git a/src/tools/rbd_mirror/image_replayer/journal/Replayer.cc b/src/tools/rbd_mirror/image_replayer/journal/Replayer.cc
new file mode 100644
index 000000000..3ce9104d2
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/journal/Replayer.cc
@@ -0,0 +1,1303 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "Replayer.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "common/Timer.h"
+#include "librbd/Journal.h"
+#include "librbd/Utils.h"
+#include "librbd/asio/ContextWQ.h"
+#include "librbd/journal/Replay.h"
+#include "journal/Journaler.h"
+#include "journal/JournalMetadataListener.h"
+#include "journal/ReplayHandler.h"
+#include "tools/rbd_mirror/Threads.h"
+#include "tools/rbd_mirror/Types.h"
+#include "tools/rbd_mirror/image_replayer/CloseImageRequest.h"
+#include "tools/rbd_mirror/image_replayer/ReplayerListener.h"
+#include "tools/rbd_mirror/image_replayer/Utils.h"
+#include "tools/rbd_mirror/image_replayer/journal/EventPreprocessor.h"
+#include "tools/rbd_mirror/image_replayer/journal/ReplayStatusFormatter.h"
+#include "tools/rbd_mirror/image_replayer/journal/StateBuilder.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::journal::" \
+ << "Replayer: " << this << " " << __func__ << ": "
+
+extern PerfCounters *g_journal_perf_counters;
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+namespace journal {
+
+namespace {
+
+uint32_t calculate_replay_delay(const utime_t &event_time,
+ int mirroring_replay_delay) {
+ if (mirroring_replay_delay <= 0) {
+ return 0;
+ }
+
+ utime_t now = ceph_clock_now();
+ if (event_time + mirroring_replay_delay <= now) {
+ return 0;
+ }
+
+ // ensure it is rounded up when converting to integer
+ return (event_time + mirroring_replay_delay - now) + 1;
+}
+
+} // anonymous namespace
+
+using librbd::util::create_async_context_callback;
+using librbd::util::create_context_callback;
+
+template <typename I>
+struct Replayer<I>::C_ReplayCommitted : public Context {
+ Replayer* replayer;
+ ReplayEntry replay_entry;
+ uint64_t replay_bytes;
+ utime_t replay_start_time;
+
+ C_ReplayCommitted(Replayer* replayer, ReplayEntry &&replay_entry,
+ uint64_t replay_bytes, const utime_t &replay_start_time)
+ : replayer(replayer), replay_entry(std::move(replay_entry)),
+ replay_bytes(replay_bytes), replay_start_time(replay_start_time) {
+ }
+
+ void finish(int r) override {
+ replayer->handle_process_entry_safe(replay_entry, replay_bytes,
+ replay_start_time, r);
+ }
+};
+
+template <typename I>
+struct Replayer<I>::RemoteJournalerListener
+ : public ::journal::JournalMetadataListener {
+ Replayer* replayer;
+
+ RemoteJournalerListener(Replayer* replayer) : replayer(replayer) {}
+
+ void handle_update(::journal::JournalMetadata*) override {
+ auto ctx = new C_TrackedOp(
+ replayer->m_in_flight_op_tracker,
+ new LambdaContext([this](int r) {
+ replayer->handle_remote_journal_metadata_updated();
+ }));
+ replayer->m_threads->work_queue->queue(ctx, 0);
+ }
+};
+
+template <typename I>
+struct Replayer<I>::RemoteReplayHandler : public ::journal::ReplayHandler {
+ Replayer* replayer;
+
+ RemoteReplayHandler(Replayer* replayer) : replayer(replayer) {}
+ ~RemoteReplayHandler() override {};
+
+ void handle_entries_available() override {
+ replayer->handle_replay_ready();
+ }
+
+ void handle_complete(int r) override {
+ std::string error;
+ if (r == -ENOMEM) {
+ error = "not enough memory in autotune cache";
+ } else if (r < 0) {
+ error = "replay completed with error: " + cpp_strerror(r);
+ }
+ replayer->handle_replay_complete(r, error);
+ }
+};
+
+template <typename I>
+struct Replayer<I>::LocalJournalListener
+ : public librbd::journal::Listener {
+ Replayer* replayer;
+
+ LocalJournalListener(Replayer* replayer) : replayer(replayer) {
+ }
+
+ void handle_close() override {
+ replayer->handle_replay_complete(0, "");
+ }
+
+ void handle_promoted() override {
+ replayer->handle_replay_complete(0, "force promoted");
+ }
+
+ void handle_resync() override {
+ replayer->handle_resync_image();
+ }
+};
+
+template <typename I>
+Replayer<I>::Replayer(
+ Threads<I>* threads,
+ const std::string& local_mirror_uuid,
+ StateBuilder<I>* state_builder,
+ ReplayerListener* replayer_listener)
+ : m_threads(threads),
+ m_local_mirror_uuid(local_mirror_uuid),
+ m_state_builder(state_builder),
+ m_replayer_listener(replayer_listener),
+ m_lock(ceph::make_mutex(librbd::util::unique_lock_name(
+ "rbd::mirror::image_replayer::journal::Replayer", this))) {
+ dout(10) << dendl;
+}
+
+template <typename I>
+Replayer<I>::~Replayer() {
+ dout(10) << dendl;
+
+ {
+ std::unique_lock locker{m_lock};
+ unregister_perf_counters();
+ }
+
+ ceph_assert(m_remote_listener == nullptr);
+ ceph_assert(m_local_journal_listener == nullptr);
+ ceph_assert(m_local_journal_replay == nullptr);
+ ceph_assert(m_remote_replay_handler == nullptr);
+ ceph_assert(m_event_preprocessor == nullptr);
+ ceph_assert(m_replay_status_formatter == nullptr);
+ ceph_assert(m_delayed_preprocess_task == nullptr);
+ ceph_assert(m_flush_local_replay_task == nullptr);
+ ceph_assert(m_state_builder->local_image_ctx == nullptr);
+}
+
+template <typename I>
+void Replayer<I>::init(Context* on_finish) {
+ dout(10) << dendl;
+
+ {
+ auto local_image_ctx = m_state_builder->local_image_ctx;
+ std::shared_lock image_locker{local_image_ctx->image_lock};
+ m_image_spec = util::compute_image_spec(local_image_ctx->md_ctx,
+ local_image_ctx->name);
+ }
+
+ {
+ std::unique_lock locker{m_lock};
+ register_perf_counters();
+ }
+
+ ceph_assert(m_on_init_shutdown == nullptr);
+ m_on_init_shutdown = on_finish;
+
+ init_remote_journaler();
+}
+
+template <typename I>
+void Replayer<I>::shut_down(Context* on_finish) {
+ dout(10) << dendl;
+
+ std::unique_lock locker{m_lock};
+ ceph_assert(m_on_init_shutdown == nullptr);
+ m_on_init_shutdown = on_finish;
+
+ if (m_state == STATE_INIT) {
+ // raced with the last piece of the init state machine
+ return;
+ } else if (m_state == STATE_REPLAYING) {
+ m_state = STATE_COMPLETE;
+ }
+
+ // if shutting down due to an error notification, we don't
+ // need to propagate the same error again
+ m_error_code = 0;
+ m_error_description = "";
+
+ cancel_delayed_preprocess_task();
+ cancel_flush_local_replay_task();
+ wait_for_flush();
+}
+
+template <typename I>
+void Replayer<I>::flush(Context* on_finish) {
+ dout(10) << dendl;
+
+ flush_local_replay(new C_TrackedOp(m_in_flight_op_tracker, on_finish));
+}
+
+template <typename I>
+bool Replayer<I>::get_replay_status(std::string* description,
+ Context* on_finish) {
+ dout(10) << dendl;
+
+ std::unique_lock locker{m_lock};
+ if (m_replay_status_formatter == nullptr) {
+ derr << "replay not running" << dendl;
+ locker.unlock();
+
+ on_finish->complete(-EAGAIN);
+ return false;
+ }
+
+ on_finish = new C_TrackedOp(m_in_flight_op_tracker, on_finish);
+ return m_replay_status_formatter->get_or_send_update(description,
+ on_finish);
+}
+
+template <typename I>
+void Replayer<I>::init_remote_journaler() {
+ dout(10) << dendl;
+
+ Context *ctx = create_context_callback<
+ Replayer, &Replayer<I>::handle_init_remote_journaler>(this);
+ m_state_builder->remote_journaler->init(ctx);
+}
+
+template <typename I>
+void Replayer<I>::handle_init_remote_journaler(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ std::unique_lock locker{m_lock};
+ if (r < 0) {
+ derr << "failed to initialize remote journal: " << cpp_strerror(r) << dendl;
+ handle_replay_complete(locker, r, "error initializing remote journal");
+ close_local_image();
+ return;
+ }
+
+ // listen for metadata updates to check for disconnect events
+ ceph_assert(m_remote_listener == nullptr);
+ m_remote_listener = new RemoteJournalerListener(this);
+ m_state_builder->remote_journaler->add_listener(m_remote_listener);
+
+ cls::journal::Client remote_client;
+ r = m_state_builder->remote_journaler->get_cached_client(m_local_mirror_uuid,
+ &remote_client);
+ if (r < 0) {
+ derr << "error retrieving remote journal client: " << cpp_strerror(r)
+ << dendl;
+ handle_replay_complete(locker, r, "error retrieving remote journal client");
+ close_local_image();
+ return;
+ }
+
+ std::string error;
+ r = validate_remote_client_state(remote_client,
+ &m_state_builder->remote_client_meta,
+ &m_resync_requested, &error);
+ if (r < 0) {
+ handle_replay_complete(locker, r, error);
+ close_local_image();
+ return;
+ }
+
+ start_external_replay(locker);
+}
+
+template <typename I>
+void Replayer<I>::start_external_replay(std::unique_lock<ceph::mutex>& locker) {
+ dout(10) << dendl;
+
+ auto local_image_ctx = m_state_builder->local_image_ctx;
+ std::shared_lock local_image_locker{local_image_ctx->image_lock};
+
+ ceph_assert(m_local_journal == nullptr);
+ m_local_journal = local_image_ctx->journal;
+ if (m_local_journal == nullptr) {
+ local_image_locker.unlock();
+
+ derr << "local image journal closed" << dendl;
+ handle_replay_complete(locker, -EINVAL, "error accessing local journal");
+ close_local_image();
+ return;
+ }
+
+ // safe to hold pointer to journal after external playback starts
+ Context *start_ctx = create_context_callback<
+ Replayer, &Replayer<I>::handle_start_external_replay>(this);
+ m_local_journal->start_external_replay(&m_local_journal_replay, start_ctx);
+}
+
+template <typename I>
+void Replayer<I>::handle_start_external_replay(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ std::unique_lock locker{m_lock};
+ if (r < 0) {
+ ceph_assert(m_local_journal_replay == nullptr);
+ derr << "error starting external replay on local image "
+ << m_state_builder->local_image_ctx->id << ": "
+ << cpp_strerror(r) << dendl;
+
+ handle_replay_complete(locker, r, "error starting replay on local image");
+ close_local_image();
+ return;
+ }
+
+ if (!notify_init_complete(locker)) {
+ return;
+ }
+
+ m_state = STATE_REPLAYING;
+
+ // check for resync/promotion state after adding listener
+ if (!add_local_journal_listener(locker)) {
+ return;
+ }
+
+ // start remote journal replay
+ m_event_preprocessor = EventPreprocessor<I>::create(
+ *m_state_builder->local_image_ctx, *m_state_builder->remote_journaler,
+ m_local_mirror_uuid, &m_state_builder->remote_client_meta,
+ m_threads->work_queue);
+ m_replay_status_formatter = ReplayStatusFormatter<I>::create(
+ m_state_builder->remote_journaler, m_local_mirror_uuid);
+
+ auto cct = static_cast<CephContext *>(m_state_builder->local_image_ctx->cct);
+ double poll_seconds = cct->_conf.get_val<double>(
+ "rbd_mirror_journal_poll_age");
+ m_remote_replay_handler = new RemoteReplayHandler(this);
+ m_state_builder->remote_journaler->start_live_replay(m_remote_replay_handler,
+ poll_seconds);
+
+ notify_status_updated();
+}
+
+template <typename I>
+bool Replayer<I>::add_local_journal_listener(
+ std::unique_lock<ceph::mutex>& locker) {
+ dout(10) << dendl;
+
+ // listen for promotion and resync requests against local journal
+ ceph_assert(m_local_journal_listener == nullptr);
+ m_local_journal_listener = new LocalJournalListener(this);
+ m_local_journal->add_listener(m_local_journal_listener);
+
+ // verify that the local image wasn't force-promoted and that a resync hasn't
+ // been requested now that we are listening for events
+ if (m_local_journal->is_tag_owner()) {
+ dout(10) << "local image force-promoted" << dendl;
+ handle_replay_complete(locker, 0, "force promoted");
+ return false;
+ }
+
+ bool resync_requested = false;
+ int r = m_local_journal->is_resync_requested(&resync_requested);
+ if (r < 0) {
+ dout(10) << "failed to determine resync state: " << cpp_strerror(r)
+ << dendl;
+ handle_replay_complete(locker, r, "error parsing resync state");
+ return false;
+ } else if (resync_requested) {
+ dout(10) << "local image resync requested" << dendl;
+ handle_replay_complete(locker, 0, "resync requested");
+ return false;
+ }
+
+ return true;
+}
+
+template <typename I>
+bool Replayer<I>::notify_init_complete(std::unique_lock<ceph::mutex>& locker) {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked_by_me(m_lock));
+ ceph_assert(m_state == STATE_INIT);
+
+ // notify that init has completed
+ Context *on_finish = nullptr;
+ std::swap(m_on_init_shutdown, on_finish);
+
+ locker.unlock();
+ on_finish->complete(0);
+ locker.lock();
+
+ if (m_on_init_shutdown != nullptr) {
+ // shut down requested after we notified init complete but before we
+ // grabbed the lock
+ close_local_image();
+ return false;
+ }
+
+ return true;
+}
+
+template <typename I>
+void Replayer<I>::wait_for_flush() {
+ ceph_assert(ceph_mutex_is_locked_by_me(m_lock));
+
+ // ensure that we don't have two concurrent local journal replay shut downs
+ dout(10) << dendl;
+ auto ctx = create_async_context_callback(
+ m_threads->work_queue, create_context_callback<
+ Replayer<I>, &Replayer<I>::handle_wait_for_flush>(this));
+ m_flush_tracker.wait_for_ops(ctx);
+}
+
+template <typename I>
+void Replayer<I>::handle_wait_for_flush(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ shut_down_local_journal_replay();
+}
+
+template <typename I>
+void Replayer<I>::shut_down_local_journal_replay() {
+ std::unique_lock locker{m_lock};
+
+ if (m_local_journal_replay == nullptr) {
+ wait_for_event_replay();
+ return;
+ }
+
+ // It's required to stop the local journal replay state machine prior to
+ // waiting for the events to complete. This is to ensure that IO is properly
+ // flushed (it might be batched), wait for any running ops to complete, and
+ // to cancel any ops waiting for their associated OnFinish events.
+ dout(10) << dendl;
+ auto ctx = create_context_callback<
+ Replayer<I>, &Replayer<I>::handle_shut_down_local_journal_replay>(this);
+ m_local_journal_replay->shut_down(true, ctx);
+}
+
+template <typename I>
+void Replayer<I>::handle_shut_down_local_journal_replay(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ std::unique_lock locker{m_lock};
+ if (r < 0) {
+ derr << "error shutting down journal replay: " << cpp_strerror(r) << dendl;
+ handle_replay_error(r, "failed to shut down local journal replay");
+ }
+
+ wait_for_event_replay();
+}
+
+template <typename I>
+void Replayer<I>::wait_for_event_replay() {
+ ceph_assert(ceph_mutex_is_locked_by_me(m_lock));
+
+ dout(10) << dendl;
+ auto ctx = create_async_context_callback(
+ m_threads->work_queue, create_context_callback<
+ Replayer<I>, &Replayer<I>::handle_wait_for_event_replay>(this));
+ m_event_replay_tracker.wait_for_ops(ctx);
+}
+
+template <typename I>
+void Replayer<I>::handle_wait_for_event_replay(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ std::unique_lock locker{m_lock};
+ close_local_image();
+}
+
+template <typename I>
+void Replayer<I>::close_local_image() {
+ ceph_assert(ceph_mutex_is_locked_by_me(m_lock));
+ if (m_state_builder->local_image_ctx == nullptr) {
+ stop_remote_journaler_replay();
+ return;
+ }
+
+ dout(10) << dendl;
+ if (m_local_journal_listener != nullptr) {
+ // blocks if listener notification is in-progress
+ m_local_journal->remove_listener(m_local_journal_listener);
+ delete m_local_journal_listener;
+ m_local_journal_listener = nullptr;
+ }
+
+ if (m_local_journal_replay != nullptr) {
+ m_local_journal->stop_external_replay();
+ m_local_journal_replay = nullptr;
+ }
+
+ if (m_event_preprocessor != nullptr) {
+ image_replayer::journal::EventPreprocessor<I>::destroy(
+ m_event_preprocessor);
+ m_event_preprocessor = nullptr;
+ }
+
+ m_local_journal.reset();
+
+ // NOTE: it's important to ensure that the local image is fully
+ // closed before attempting to close the remote journal in
+ // case the remote cluster is unreachable
+ ceph_assert(m_state_builder->local_image_ctx != nullptr);
+ auto ctx = create_context_callback<
+ Replayer<I>, &Replayer<I>::handle_close_local_image>(this);
+ auto request = image_replayer::CloseImageRequest<I>::create(
+ &m_state_builder->local_image_ctx, ctx);
+ request->send();
+}
+
+
+template <typename I>
+void Replayer<I>::handle_close_local_image(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ std::unique_lock locker{m_lock};
+ if (r < 0) {
+ derr << "error closing local iamge: " << cpp_strerror(r) << dendl;
+ handle_replay_error(r, "failed to close local image");
+ }
+
+ ceph_assert(m_state_builder->local_image_ctx == nullptr);
+ stop_remote_journaler_replay();
+}
+
+template <typename I>
+void Replayer<I>::stop_remote_journaler_replay() {
+ ceph_assert(ceph_mutex_is_locked_by_me(m_lock));
+
+ if (m_state_builder->remote_journaler == nullptr) {
+ wait_for_in_flight_ops();
+ return;
+ } else if (m_remote_replay_handler == nullptr) {
+ wait_for_in_flight_ops();
+ return;
+ }
+
+ dout(10) << dendl;
+ auto ctx = create_async_context_callback(
+ m_threads->work_queue, create_context_callback<
+ Replayer<I>, &Replayer<I>::handle_stop_remote_journaler_replay>(this));
+ m_state_builder->remote_journaler->stop_replay(ctx);
+}
+
+template <typename I>
+void Replayer<I>::handle_stop_remote_journaler_replay(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ std::unique_lock locker{m_lock};
+ if (r < 0) {
+ derr << "failed to stop remote journaler replay : " << cpp_strerror(r)
+ << dendl;
+ handle_replay_error(r, "failed to stop remote journaler replay");
+ }
+
+ delete m_remote_replay_handler;
+ m_remote_replay_handler = nullptr;
+
+ wait_for_in_flight_ops();
+}
+
+template <typename I>
+void Replayer<I>::wait_for_in_flight_ops() {
+ dout(10) << dendl;
+ if (m_remote_listener != nullptr) {
+ m_state_builder->remote_journaler->remove_listener(m_remote_listener);
+ delete m_remote_listener;
+ m_remote_listener = nullptr;
+ }
+
+ auto ctx = create_async_context_callback(
+ m_threads->work_queue, create_context_callback<
+ Replayer<I>, &Replayer<I>::handle_wait_for_in_flight_ops>(this));
+ m_in_flight_op_tracker.wait_for_ops(ctx);
+}
+
+template <typename I>
+void Replayer<I>::handle_wait_for_in_flight_ops(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ ReplayStatusFormatter<I>::destroy(m_replay_status_formatter);
+ m_replay_status_formatter = nullptr;
+
+ Context* on_init_shutdown = nullptr;
+ {
+ std::unique_lock locker{m_lock};
+ ceph_assert(m_on_init_shutdown != nullptr);
+ std::swap(m_on_init_shutdown, on_init_shutdown);
+ m_state = STATE_COMPLETE;
+ }
+ on_init_shutdown->complete(m_error_code);
+}
+
+template <typename I>
+void Replayer<I>::handle_remote_journal_metadata_updated() {
+ dout(20) << dendl;
+
+ std::unique_lock locker{m_lock};
+ if (m_state != STATE_REPLAYING) {
+ return;
+ }
+
+ cls::journal::Client remote_client;
+ int r = m_state_builder->remote_journaler->get_cached_client(
+ m_local_mirror_uuid, &remote_client);
+ if (r < 0) {
+ derr << "failed to retrieve client: " << cpp_strerror(r) << dendl;
+ return;
+ }
+
+ librbd::journal::MirrorPeerClientMeta remote_client_meta;
+ std::string error;
+ r = validate_remote_client_state(remote_client, &remote_client_meta,
+ &m_resync_requested, &error);
+ if (r < 0) {
+ dout(0) << "client flagged disconnected, stopping image replay" << dendl;
+ handle_replay_complete(locker, r, error);
+ }
+}
+
+template <typename I>
+void Replayer<I>::schedule_flush_local_replay_task() {
+ ceph_assert(ceph_mutex_is_locked_by_me(m_lock));
+
+ std::unique_lock timer_locker{m_threads->timer_lock};
+ if (m_state != STATE_REPLAYING || m_flush_local_replay_task != nullptr) {
+ return;
+ }
+
+ dout(15) << dendl;
+ m_flush_local_replay_task = create_async_context_callback(
+ m_threads->work_queue, create_context_callback<
+ Replayer<I>, &Replayer<I>::handle_flush_local_replay_task>(this));
+ m_threads->timer->add_event_after(30, m_flush_local_replay_task);
+}
+
+template <typename I>
+void Replayer<I>::cancel_flush_local_replay_task() {
+ ceph_assert(ceph_mutex_is_locked_by_me(m_lock));
+
+ std::unique_lock timer_locker{m_threads->timer_lock};
+ if (m_flush_local_replay_task != nullptr) {
+ dout(10) << dendl;
+ m_threads->timer->cancel_event(m_flush_local_replay_task);
+ m_flush_local_replay_task = nullptr;
+ }
+}
+
+template <typename I>
+void Replayer<I>::handle_flush_local_replay_task(int) {
+ dout(15) << dendl;
+
+ m_in_flight_op_tracker.start_op();
+ auto on_finish = new LambdaContext([this](int) {
+ std::unique_lock locker{m_lock};
+
+ {
+ std::unique_lock timer_locker{m_threads->timer_lock};
+ m_flush_local_replay_task = nullptr;
+ }
+
+ notify_status_updated();
+ m_in_flight_op_tracker.finish_op();
+ });
+ flush_local_replay(on_finish);
+}
+
+template <typename I>
+void Replayer<I>::flush_local_replay(Context* on_flush) {
+ std::unique_lock locker{m_lock};
+ if (m_state != STATE_REPLAYING) {
+ locker.unlock();
+ on_flush->complete(0);
+ return;
+ } else if (m_local_journal_replay == nullptr) {
+ // raced w/ a tag creation stop/start, which implies that
+ // the replay is flushed
+ locker.unlock();
+ flush_commit_position(on_flush);
+ return;
+ }
+
+ dout(15) << dendl;
+ auto ctx = new LambdaContext(
+ [this, on_flush](int r) {
+ handle_flush_local_replay(on_flush, r);
+ });
+ m_local_journal_replay->flush(ctx);
+}
+
+template <typename I>
+void Replayer<I>::handle_flush_local_replay(Context* on_flush, int r) {
+ dout(15) << "r=" << r << dendl;
+ if (r < 0) {
+ derr << "error flushing local replay: " << cpp_strerror(r) << dendl;
+ on_flush->complete(r);
+ return;
+ }
+
+ flush_commit_position(on_flush);
+}
+
+template <typename I>
+void Replayer<I>::flush_commit_position(Context* on_flush) {
+ std::unique_lock locker{m_lock};
+ if (m_state != STATE_REPLAYING) {
+ locker.unlock();
+ on_flush->complete(0);
+ return;
+ }
+
+ dout(15) << dendl;
+ auto ctx = new LambdaContext(
+ [this, on_flush](int r) {
+ handle_flush_commit_position(on_flush, r);
+ });
+ m_state_builder->remote_journaler->flush_commit_position(ctx);
+}
+
+template <typename I>
+void Replayer<I>::handle_flush_commit_position(Context* on_flush, int r) {
+ dout(15) << "r=" << r << dendl;
+ if (r < 0) {
+ derr << "error flushing remote journal commit position: "
+ << cpp_strerror(r) << dendl;
+ }
+
+ on_flush->complete(r);
+}
+
+template <typename I>
+void Replayer<I>::handle_replay_error(int r, const std::string &error) {
+ ceph_assert(ceph_mutex_is_locked_by_me(m_lock));
+
+ if (m_error_code == 0) {
+ m_error_code = r;
+ m_error_description = error;
+ }
+}
+
+template <typename I>
+bool Replayer<I>::is_replay_complete() const {
+ std::unique_lock locker{m_lock};
+ return is_replay_complete(locker);
+}
+
+template <typename I>
+bool Replayer<I>::is_replay_complete(
+ const std::unique_lock<ceph::mutex>&) const {
+ ceph_assert(ceph_mutex_is_locked_by_me(m_lock));
+ return (m_state == STATE_COMPLETE);
+}
+
+template <typename I>
+void Replayer<I>::handle_replay_complete(int r, const std::string &error) {
+ std::unique_lock locker{m_lock};
+ handle_replay_complete(locker, r, error);
+}
+
+template <typename I>
+void Replayer<I>::handle_replay_complete(
+ const std::unique_lock<ceph::mutex>&, int r, const std::string &error) {
+ ceph_assert(ceph_mutex_is_locked_by_me(m_lock));
+
+ dout(10) << "r=" << r << ", error=" << error << dendl;
+ if (r < 0) {
+ derr << "replay encountered an error: " << cpp_strerror(r) << dendl;
+ handle_replay_error(r, error);
+ }
+
+ if (m_state != STATE_REPLAYING) {
+ return;
+ }
+
+ m_state = STATE_COMPLETE;
+ notify_status_updated();
+}
+
+template <typename I>
+void Replayer<I>::handle_replay_ready() {
+ std::unique_lock locker{m_lock};
+ handle_replay_ready(locker);
+}
+
+template <typename I>
+void Replayer<I>::handle_replay_ready(
+ std::unique_lock<ceph::mutex>& locker) {
+ ceph_assert(ceph_mutex_is_locked_by_me(m_lock));
+
+ dout(20) << dendl;
+ if (is_replay_complete(locker)) {
+ return;
+ }
+
+ if (!m_state_builder->remote_journaler->try_pop_front(&m_replay_entry,
+ &m_replay_tag_tid)) {
+ dout(20) << "no entries ready for replay" << dendl;
+ return;
+ }
+
+ // can safely drop lock once the entry is tracked
+ m_event_replay_tracker.start_op();
+ locker.unlock();
+
+ dout(20) << "entry tid=" << m_replay_entry.get_commit_tid()
+ << "tag_tid=" << m_replay_tag_tid << dendl;
+ if (!m_replay_tag_valid || m_replay_tag.tid != m_replay_tag_tid) {
+ // must allocate a new local journal tag prior to processing
+ replay_flush();
+ return;
+ }
+
+ preprocess_entry();
+}
+
+template <typename I>
+void Replayer<I>::replay_flush() {
+ dout(10) << dendl;
+ m_flush_tracker.start_op();
+
+ // shut down the replay to flush all IO and ops and create a new
+ // replayer to handle the new tag epoch
+ auto ctx = create_context_callback<
+ Replayer<I>, &Replayer<I>::handle_replay_flush_shut_down>(this);
+ ceph_assert(m_local_journal_replay != nullptr);
+ m_local_journal_replay->shut_down(false, ctx);
+}
+
+template <typename I>
+void Replayer<I>::handle_replay_flush_shut_down(int r) {
+ std::unique_lock locker{m_lock};
+ dout(10) << "r=" << r << dendl;
+
+ ceph_assert(m_local_journal != nullptr);
+ ceph_assert(m_local_journal_listener != nullptr);
+
+ // blocks if listener notification is in-progress
+ m_local_journal->remove_listener(m_local_journal_listener);
+ delete m_local_journal_listener;
+ m_local_journal_listener = nullptr;
+
+ m_local_journal->stop_external_replay();
+ m_local_journal_replay = nullptr;
+ m_local_journal.reset();
+
+ if (r < 0) {
+ locker.unlock();
+
+ handle_replay_flush(r);
+ return;
+ }
+
+ // journal might have been closed now that we stopped external replay
+ auto local_image_ctx = m_state_builder->local_image_ctx;
+ std::shared_lock local_image_locker{local_image_ctx->image_lock};
+ m_local_journal = local_image_ctx->journal;
+ if (m_local_journal == nullptr) {
+ local_image_locker.unlock();
+ locker.unlock();
+
+ derr << "local image journal closed" << dendl;
+ handle_replay_flush(-EINVAL);
+ return;
+ }
+
+ auto ctx = create_context_callback<
+ Replayer<I>, &Replayer<I>::handle_replay_flush>(this);
+ m_local_journal->start_external_replay(&m_local_journal_replay, ctx);
+}
+
+template <typename I>
+void Replayer<I>::handle_replay_flush(int r) {
+ std::unique_lock locker{m_lock};
+ dout(10) << "r=" << r << dendl;
+ m_flush_tracker.finish_op();
+
+ if (r < 0) {
+ derr << "replay flush encountered an error: " << cpp_strerror(r) << dendl;
+ handle_replay_complete(locker, r, "replay flush encountered an error");
+ m_event_replay_tracker.finish_op();
+ return;
+ } else if (is_replay_complete(locker)) {
+ m_event_replay_tracker.finish_op();
+ return;
+ }
+
+ // check for resync/promotion state after adding listener
+ if (!add_local_journal_listener(locker)) {
+ m_event_replay_tracker.finish_op();
+ return;
+ }
+ locker.unlock();
+
+ get_remote_tag();
+}
+
+template <typename I>
+void Replayer<I>::get_remote_tag() {
+ dout(15) << "tag_tid: " << m_replay_tag_tid << dendl;
+
+ Context *ctx = create_context_callback<
+ Replayer, &Replayer<I>::handle_get_remote_tag>(this);
+ m_state_builder->remote_journaler->get_tag(m_replay_tag_tid, &m_replay_tag,
+ ctx);
+}
+
+template <typename I>
+void Replayer<I>::handle_get_remote_tag(int r) {
+ dout(15) << "r=" << r << dendl;
+
+ if (r == 0) {
+ try {
+ auto it = m_replay_tag.data.cbegin();
+ decode(m_replay_tag_data, it);
+ } catch (const buffer::error &err) {
+ r = -EBADMSG;
+ }
+ }
+
+ if (r < 0) {
+ derr << "failed to retrieve remote tag " << m_replay_tag_tid << ": "
+ << cpp_strerror(r) << dendl;
+ handle_replay_complete(r, "failed to retrieve remote tag");
+ m_event_replay_tracker.finish_op();
+ return;
+ }
+
+ m_replay_tag_valid = true;
+ dout(15) << "decoded remote tag " << m_replay_tag_tid << ": "
+ << m_replay_tag_data << dendl;
+
+ allocate_local_tag();
+}
+
+template <typename I>
+void Replayer<I>::allocate_local_tag() {
+ dout(15) << dendl;
+
+ std::string mirror_uuid = m_replay_tag_data.mirror_uuid;
+ if (mirror_uuid == librbd::Journal<>::LOCAL_MIRROR_UUID) {
+ mirror_uuid = m_state_builder->remote_mirror_uuid;
+ } else if (mirror_uuid == m_local_mirror_uuid) {
+ mirror_uuid = librbd::Journal<>::LOCAL_MIRROR_UUID;
+ } else if (mirror_uuid == librbd::Journal<>::ORPHAN_MIRROR_UUID) {
+ // handle possible edge condition where daemon can failover and
+ // the local image has already been promoted/demoted
+ auto local_tag_data = m_local_journal->get_tag_data();
+ if (local_tag_data.mirror_uuid == librbd::Journal<>::ORPHAN_MIRROR_UUID &&
+ (local_tag_data.predecessor.commit_valid &&
+ local_tag_data.predecessor.mirror_uuid ==
+ librbd::Journal<>::LOCAL_MIRROR_UUID)) {
+ dout(15) << "skipping stale demotion event" << dendl;
+ handle_process_entry_safe(m_replay_entry, m_replay_bytes,
+ m_replay_start_time, 0);
+ handle_replay_ready();
+ return;
+ } else {
+ dout(5) << "encountered image demotion: stopping" << dendl;
+ handle_replay_complete(0, "");
+ }
+ }
+
+ librbd::journal::TagPredecessor predecessor(m_replay_tag_data.predecessor);
+ if (predecessor.mirror_uuid == librbd::Journal<>::LOCAL_MIRROR_UUID) {
+ predecessor.mirror_uuid = m_state_builder->remote_mirror_uuid;
+ } else if (predecessor.mirror_uuid == m_local_mirror_uuid) {
+ predecessor.mirror_uuid = librbd::Journal<>::LOCAL_MIRROR_UUID;
+ }
+
+ dout(15) << "mirror_uuid=" << mirror_uuid << ", "
+ << "predecessor=" << predecessor << ", "
+ << "replay_tag_tid=" << m_replay_tag_tid << dendl;
+ Context *ctx = create_context_callback<
+ Replayer, &Replayer<I>::handle_allocate_local_tag>(this);
+ m_local_journal->allocate_tag(mirror_uuid, predecessor, ctx);
+}
+
+template <typename I>
+void Replayer<I>::handle_allocate_local_tag(int r) {
+ dout(15) << "r=" << r << ", "
+ << "tag_tid=" << m_local_journal->get_tag_tid() << dendl;
+ if (r < 0) {
+ derr << "failed to allocate journal tag: " << cpp_strerror(r) << dendl;
+ handle_replay_complete(r, "failed to allocate journal tag");
+ m_event_replay_tracker.finish_op();
+ return;
+ }
+
+ preprocess_entry();
+}
+
+template <typename I>
+void Replayer<I>::preprocess_entry() {
+ dout(20) << "preprocessing entry tid=" << m_replay_entry.get_commit_tid()
+ << dendl;
+
+ bufferlist data = m_replay_entry.get_data();
+ auto it = data.cbegin();
+ int r = m_local_journal_replay->decode(&it, &m_event_entry);
+ if (r < 0) {
+ derr << "failed to decode journal event" << dendl;
+ handle_replay_complete(r, "failed to decode journal event");
+ m_event_replay_tracker.finish_op();
+ return;
+ }
+
+ m_replay_bytes = data.length();
+ uint32_t delay = calculate_replay_delay(
+ m_event_entry.timestamp,
+ m_state_builder->local_image_ctx->mirroring_replay_delay);
+ if (delay == 0) {
+ handle_preprocess_entry_ready(0);
+ return;
+ }
+
+ std::unique_lock locker{m_lock};
+ if (is_replay_complete(locker)) {
+ // don't schedule a delayed replay task if a shut-down is in-progress
+ m_event_replay_tracker.finish_op();
+ return;
+ }
+
+ dout(20) << "delaying replay by " << delay << " sec" << dendl;
+ std::unique_lock timer_locker{m_threads->timer_lock};
+ ceph_assert(m_delayed_preprocess_task == nullptr);
+ m_delayed_preprocess_task = create_context_callback<
+ Replayer<I>, &Replayer<I>::handle_delayed_preprocess_task>(this);
+ m_threads->timer->add_event_after(delay, m_delayed_preprocess_task);
+}
+
+template <typename I>
+void Replayer<I>::handle_delayed_preprocess_task(int r) {
+ dout(20) << "r=" << r << dendl;
+
+ ceph_assert(ceph_mutex_is_locked_by_me(m_threads->timer_lock));
+ m_delayed_preprocess_task = nullptr;
+
+ m_threads->work_queue->queue(create_context_callback<
+ Replayer, &Replayer<I>::handle_preprocess_entry_ready>(this), 0);
+}
+
+template <typename I>
+void Replayer<I>::handle_preprocess_entry_ready(int r) {
+ dout(20) << "r=" << r << dendl;
+ ceph_assert(r == 0);
+
+ m_replay_start_time = ceph_clock_now();
+ if (!m_event_preprocessor->is_required(m_event_entry)) {
+ process_entry();
+ return;
+ }
+
+ Context *ctx = create_context_callback<
+ Replayer, &Replayer<I>::handle_preprocess_entry_safe>(this);
+ m_event_preprocessor->preprocess(&m_event_entry, ctx);
+}
+
+template <typename I>
+void Replayer<I>::handle_preprocess_entry_safe(int r) {
+ dout(20) << "r=" << r << dendl;
+
+ if (r < 0) {
+ if (r == -ECANCELED) {
+ handle_replay_complete(0, "lost exclusive lock");
+ } else {
+ derr << "failed to preprocess journal event" << dendl;
+ handle_replay_complete(r, "failed to preprocess journal event");
+ }
+
+ m_event_replay_tracker.finish_op();
+ return;
+ }
+
+ process_entry();
+}
+
+template <typename I>
+void Replayer<I>::process_entry() {
+ dout(20) << "processing entry tid=" << m_replay_entry.get_commit_tid()
+ << dendl;
+
+ Context *on_ready = create_context_callback<
+ Replayer, &Replayer<I>::handle_process_entry_ready>(this);
+ Context *on_commit = new C_ReplayCommitted(this, std::move(m_replay_entry),
+ m_replay_bytes,
+ m_replay_start_time);
+
+ m_local_journal_replay->process(m_event_entry, on_ready, on_commit);
+}
+
+template <typename I>
+void Replayer<I>::handle_process_entry_ready(int r) {
+ std::unique_lock locker{m_lock};
+
+ dout(20) << dendl;
+ ceph_assert(r == 0);
+
+ bool update_status = false;
+ {
+ auto local_image_ctx = m_state_builder->local_image_ctx;
+ std::shared_lock image_locker{local_image_ctx->image_lock};
+ auto image_spec = util::compute_image_spec(local_image_ctx->md_ctx,
+ local_image_ctx->name);
+ if (m_image_spec != image_spec) {
+ m_image_spec = image_spec;
+ update_status = true;
+ }
+ }
+
+ m_replay_status_formatter->handle_entry_processed(m_replay_bytes);
+
+ if (update_status) {
+ unregister_perf_counters();
+ register_perf_counters();
+ notify_status_updated();
+ }
+
+ // attempt to process the next event
+ handle_replay_ready(locker);
+}
+
+template <typename I>
+void Replayer<I>::handle_process_entry_safe(
+ const ReplayEntry &replay_entry, uint64_t replay_bytes,
+ const utime_t &replay_start_time, int r) {
+ dout(20) << "commit_tid=" << replay_entry.get_commit_tid() << ", r=" << r
+ << dendl;
+
+ if (r < 0) {
+ derr << "failed to commit journal event: " << cpp_strerror(r) << dendl;
+ handle_replay_complete(r, "failed to commit journal event");
+ } else {
+ ceph_assert(m_state_builder->remote_journaler != nullptr);
+ m_state_builder->remote_journaler->committed(replay_entry);
+ }
+
+ auto latency = ceph_clock_now() - replay_start_time;
+ if (g_journal_perf_counters) {
+ g_journal_perf_counters->inc(l_rbd_mirror_replay);
+ g_journal_perf_counters->inc(l_rbd_mirror_replay_bytes, replay_bytes);
+ g_journal_perf_counters->tinc(l_rbd_mirror_replay_latency, latency);
+ }
+
+ auto ctx = new LambdaContext(
+ [this, replay_bytes, latency](int r) {
+ std::unique_lock locker{m_lock};
+ schedule_flush_local_replay_task();
+
+ if (m_perf_counters) {
+ m_perf_counters->inc(l_rbd_mirror_replay);
+ m_perf_counters->inc(l_rbd_mirror_replay_bytes, replay_bytes);
+ m_perf_counters->tinc(l_rbd_mirror_replay_latency, latency);
+ }
+
+ m_event_replay_tracker.finish_op();
+ });
+ m_threads->work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+void Replayer<I>::handle_resync_image() {
+ dout(10) << dendl;
+
+ std::unique_lock locker{m_lock};
+ m_resync_requested = true;
+ handle_replay_complete(locker, 0, "resync requested");
+}
+
+template <typename I>
+void Replayer<I>::notify_status_updated() {
+ ceph_assert(ceph_mutex_is_locked_by_me(m_lock));
+
+ dout(10) << dendl;
+
+ auto ctx = new C_TrackedOp(m_in_flight_op_tracker, new LambdaContext(
+ [this](int) {
+ m_replayer_listener->handle_notification();
+ }));
+ m_threads->work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+void Replayer<I>::cancel_delayed_preprocess_task() {
+ ceph_assert(ceph_mutex_is_locked_by_me(m_lock));
+
+ bool canceled_delayed_preprocess_task = false;
+ {
+ std::unique_lock timer_locker{m_threads->timer_lock};
+ if (m_delayed_preprocess_task != nullptr) {
+ dout(10) << dendl;
+ canceled_delayed_preprocess_task = m_threads->timer->cancel_event(
+ m_delayed_preprocess_task);
+ ceph_assert(canceled_delayed_preprocess_task);
+ m_delayed_preprocess_task = nullptr;
+ }
+ }
+
+ if (canceled_delayed_preprocess_task) {
+ // wake up sleeping replay
+ m_event_replay_tracker.finish_op();
+ }
+}
+
+template <typename I>
+int Replayer<I>::validate_remote_client_state(
+ const cls::journal::Client& remote_client,
+ librbd::journal::MirrorPeerClientMeta* remote_client_meta,
+ bool* resync_requested, std::string* error) {
+ ceph_assert(ceph_mutex_is_locked_by_me(m_lock));
+
+ if (!util::decode_client_meta(remote_client, remote_client_meta)) {
+ // require operator intervention since the data is corrupt
+ *error = "error retrieving remote journal client";
+ return -EBADMSG;
+ }
+
+ auto local_image_ctx = m_state_builder->local_image_ctx;
+ dout(5) << "image_id=" << local_image_ctx->id << ", "
+ << "remote_client_meta.image_id="
+ << remote_client_meta->image_id << ", "
+ << "remote_client.state=" << remote_client.state << dendl;
+ if (remote_client_meta->image_id == local_image_ctx->id &&
+ remote_client.state != cls::journal::CLIENT_STATE_CONNECTED) {
+ dout(5) << "client flagged disconnected, stopping image replay" << dendl;
+ if (local_image_ctx->config.template get_val<bool>(
+ "rbd_mirroring_resync_after_disconnect")) {
+ dout(10) << "disconnected: automatic resync" << dendl;
+ *resync_requested = true;
+ *error = "disconnected: automatic resync";
+ return -ENOTCONN;
+ } else {
+ dout(10) << "disconnected" << dendl;
+ *error = "disconnected";
+ return -ENOTCONN;
+ }
+ }
+
+ return 0;
+}
+
+template <typename I>
+void Replayer<I>::register_perf_counters() {
+ dout(5) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked_by_me(m_lock));
+ ceph_assert(m_perf_counters == nullptr);
+
+ auto cct = static_cast<CephContext *>(m_state_builder->local_image_ctx->cct);
+ auto prio = cct->_conf.get_val<int64_t>("rbd_mirror_image_perf_stats_prio");
+ PerfCountersBuilder plb(g_ceph_context, "rbd_mirror_image_" + m_image_spec,
+ l_rbd_mirror_journal_first, l_rbd_mirror_journal_last);
+ plb.add_u64_counter(l_rbd_mirror_replay, "replay", "Replays", "r", prio);
+ plb.add_u64_counter(l_rbd_mirror_replay_bytes, "replay_bytes",
+ "Replayed data", "rb", prio, unit_t(UNIT_BYTES));
+ plb.add_time_avg(l_rbd_mirror_replay_latency, "replay_latency",
+ "Replay latency", "rl", prio);
+ m_perf_counters = plb.create_perf_counters();
+ g_ceph_context->get_perfcounters_collection()->add(m_perf_counters);
+}
+
+template <typename I>
+void Replayer<I>::unregister_perf_counters() {
+ dout(5) << dendl;
+ ceph_assert(ceph_mutex_is_locked_by_me(m_lock));
+
+ PerfCounters *perf_counters = nullptr;
+ std::swap(perf_counters, m_perf_counters);
+
+ if (perf_counters != nullptr) {
+ g_ceph_context->get_perfcounters_collection()->remove(perf_counters);
+ delete perf_counters;
+ }
+}
+
+} // namespace journal
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_replayer::journal::Replayer<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_replayer/journal/Replayer.h b/src/tools/rbd_mirror/image_replayer/journal/Replayer.h
new file mode 100644
index 000000000..6b1f36d9c
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/journal/Replayer.h
@@ -0,0 +1,323 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_REPLAYER_H
+#define RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_REPLAYER_H
+
+#include "tools/rbd_mirror/image_replayer/Replayer.h"
+#include "include/utime.h"
+#include "common/AsyncOpTracker.h"
+#include "common/ceph_mutex.h"
+#include "common/RefCountedObj.h"
+#include "cls/journal/cls_journal_types.h"
+#include "journal/ReplayEntry.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/journal/Types.h"
+#include "librbd/journal/TypeTraits.h"
+#include <string>
+#include <type_traits>
+
+namespace journal { class Journaler; }
+namespace librbd {
+
+struct ImageCtx;
+namespace journal { template <typename I> class Replay; }
+
+} // namespace librbd
+
+namespace rbd {
+namespace mirror {
+
+template <typename> struct Threads;
+
+namespace image_replayer {
+
+struct ReplayerListener;
+
+namespace journal {
+
+template <typename> class EventPreprocessor;
+template <typename> class ReplayStatusFormatter;
+template <typename> class StateBuilder;
+
+template <typename ImageCtxT>
+class Replayer : public image_replayer::Replayer {
+public:
+ typedef typename librbd::journal::TypeTraits<ImageCtxT>::Journaler Journaler;
+
+ static Replayer* create(
+ Threads<ImageCtxT>* threads,
+ const std::string& local_mirror_uuid,
+ StateBuilder<ImageCtxT>* state_builder,
+ ReplayerListener* replayer_listener) {
+ return new Replayer(threads, local_mirror_uuid, state_builder,
+ replayer_listener);
+ }
+
+ Replayer(
+ Threads<ImageCtxT>* threads,
+ const std::string& local_mirror_uuid,
+ StateBuilder<ImageCtxT>* state_builder,
+ ReplayerListener* replayer_listener);
+ ~Replayer();
+
+ void destroy() override {
+ delete this;
+ }
+
+ void init(Context* on_finish) override;
+ void shut_down(Context* on_finish) override;
+
+ void flush(Context* on_finish) override;
+
+ bool get_replay_status(std::string* description, Context* on_finish) override;
+
+ bool is_replaying() const override {
+ std::unique_lock locker{m_lock};
+ return (m_state == STATE_REPLAYING);
+ }
+
+ bool is_resync_requested() const override {
+ std::unique_lock locker(m_lock);
+ return m_resync_requested;
+ }
+
+ int get_error_code() const override {
+ std::unique_lock locker(m_lock);
+ return m_error_code;
+ }
+
+ std::string get_error_description() const override {
+ std::unique_lock locker(m_lock);
+ return m_error_description;
+ }
+
+ std::string get_image_spec() const {
+ std::unique_lock locker(m_lock);
+ return m_image_spec;
+ }
+
+private:
+ /**
+ * @verbatim
+ *
+ * <init>
+ * |
+ * v (error)
+ * INIT_REMOTE_JOURNALER * * * * * * * * * * * * * * * * * * *
+ * | *
+ * v (error) *
+ * START_EXTERNAL_REPLAY * * * * * * * * * * * * * * * * * * *
+ * | *
+ * | /--------------------------------------------\ *
+ * | | | *
+ * v v (asok flush) | *
+ * REPLAYING -------------> LOCAL_REPLAY_FLUSH | *
+ * | \ | | *
+ * | | v | *
+ * | | FLUSH_COMMIT_POSITION | *
+ * | | | | *
+ * | | \--------------------/| *
+ * | | | *
+ * | | (entries available) | *
+ * | \-----------> REPLAY_READY | *
+ * | | | *
+ * | | (skip if not | *
+ * | v needed) (error) *
+ * | REPLAY_FLUSH * * * * * * * * * *
+ * | | | * *
+ * | | (skip if not | * *
+ * | v needed) (error) * *
+ * | GET_REMOTE_TAG * * * * * * * * *
+ * | | | * *
+ * | | (skip if not | * *
+ * | v needed) (error) * *
+ * | ALLOCATE_LOCAL_TAG * * * * * * *
+ * | | | * *
+ * | v (error) * *
+ * | PREPROCESS_ENTRY * * * * * * * *
+ * | | | * *
+ * | v (error) * *
+ * | PROCESS_ENTRY * * * * * * * * * *
+ * | | | * *
+ * | \---------------------/ * *
+ * v (shutdown) * *
+ * REPLAY_COMPLETE < * * * * * * * * * * * * * * * * * * * *
+ * | *
+ * v *
+ * WAIT_FOR_FLUSH *
+ * | *
+ * v *
+ * SHUT_DOWN_LOCAL_JOURNAL_REPLAY *
+ * | *
+ * v *
+ * WAIT_FOR_REPLAY *
+ * | *
+ * v *
+ * CLOSE_LOCAL_IMAGE < * * * * * * * * * * * * * * * * * * * *
+ * |
+ * v (skip if not started)
+ * STOP_REMOTE_JOURNALER_REPLAY
+ * |
+ * v
+ * WAIT_FOR_IN_FLIGHT_OPS
+ * |
+ * v
+ * <shutdown>
+ *
+ * @endverbatim
+ */
+
+ typedef typename librbd::journal::TypeTraits<ImageCtxT>::ReplayEntry ReplayEntry;
+
+ enum State {
+ STATE_INIT,
+ STATE_REPLAYING,
+ STATE_COMPLETE
+ };
+
+ struct C_ReplayCommitted;
+ struct RemoteJournalerListener;
+ struct RemoteReplayHandler;
+ struct LocalJournalListener;
+
+ Threads<ImageCtxT>* m_threads;
+ std::string m_local_mirror_uuid;
+ StateBuilder<ImageCtxT>* m_state_builder;
+ ReplayerListener* m_replayer_listener;
+
+ mutable ceph::mutex m_lock;
+
+ std::string m_image_spec;
+ Context* m_on_init_shutdown = nullptr;
+
+ State m_state = STATE_INIT;
+ int m_error_code = 0;
+ std::string m_error_description;
+ bool m_resync_requested = false;
+
+ ceph::ref_t<typename std::remove_pointer<decltype(ImageCtxT::journal)>::type>
+ m_local_journal;
+ RemoteJournalerListener* m_remote_listener = nullptr;
+
+ librbd::journal::Replay<ImageCtxT>* m_local_journal_replay = nullptr;
+ EventPreprocessor<ImageCtxT>* m_event_preprocessor = nullptr;
+ ReplayStatusFormatter<ImageCtxT>* m_replay_status_formatter = nullptr;
+ RemoteReplayHandler* m_remote_replay_handler = nullptr;
+ LocalJournalListener* m_local_journal_listener = nullptr;
+
+ PerfCounters *m_perf_counters = nullptr;
+
+ ReplayEntry m_replay_entry;
+ uint64_t m_replay_bytes = 0;
+ utime_t m_replay_start_time;
+ bool m_replay_tag_valid = false;
+ uint64_t m_replay_tag_tid = 0;
+ cls::journal::Tag m_replay_tag;
+ librbd::journal::TagData m_replay_tag_data;
+ librbd::journal::EventEntry m_event_entry;
+
+ AsyncOpTracker m_flush_tracker;
+
+ AsyncOpTracker m_event_replay_tracker;
+ Context *m_delayed_preprocess_task = nullptr;
+
+ AsyncOpTracker m_in_flight_op_tracker;
+ Context *m_flush_local_replay_task = nullptr;
+
+ void handle_remote_journal_metadata_updated();
+
+ void schedule_flush_local_replay_task();
+ void cancel_flush_local_replay_task();
+ void handle_flush_local_replay_task(int r);
+
+ void flush_local_replay(Context* on_flush);
+ void handle_flush_local_replay(Context* on_flush, int r);
+
+ void flush_commit_position(Context* on_flush);
+ void handle_flush_commit_position(Context* on_flush, int r);
+
+ void init_remote_journaler();
+ void handle_init_remote_journaler(int r);
+
+ void start_external_replay(std::unique_lock<ceph::mutex>& locker);
+ void handle_start_external_replay(int r);
+
+ bool add_local_journal_listener(std::unique_lock<ceph::mutex>& locker);
+
+ bool notify_init_complete(std::unique_lock<ceph::mutex>& locker);
+
+ void wait_for_flush();
+ void handle_wait_for_flush(int r);
+
+ void shut_down_local_journal_replay();
+ void handle_shut_down_local_journal_replay(int r);
+
+ void wait_for_event_replay();
+ void handle_wait_for_event_replay(int r);
+
+ void close_local_image();
+ void handle_close_local_image(int r);
+
+ void stop_remote_journaler_replay();
+ void handle_stop_remote_journaler_replay(int r);
+
+ void wait_for_in_flight_ops();
+ void handle_wait_for_in_flight_ops(int r);
+
+ void replay_flush();
+ void handle_replay_flush_shut_down(int r);
+ void handle_replay_flush(int r);
+
+ void get_remote_tag();
+ void handle_get_remote_tag(int r);
+
+ void allocate_local_tag();
+ void handle_allocate_local_tag(int r);
+
+ void handle_replay_error(int r, const std::string &error);
+
+ bool is_replay_complete() const;
+ bool is_replay_complete(const std::unique_lock<ceph::mutex>& locker) const;
+
+ void handle_replay_complete(int r, const std::string &error_desc);
+ void handle_replay_complete(const std::unique_lock<ceph::mutex>&,
+ int r, const std::string &error_desc);
+ void handle_replay_ready();
+ void handle_replay_ready(std::unique_lock<ceph::mutex>& locker);
+
+ void preprocess_entry();
+ void handle_delayed_preprocess_task(int r);
+ void handle_preprocess_entry_ready(int r);
+ void handle_preprocess_entry_safe(int r);
+
+ void process_entry();
+ void handle_process_entry_ready(int r);
+ void handle_process_entry_safe(const ReplayEntry& replay_entry,
+ uint64_t relay_bytes,
+ const utime_t &replay_start_time, int r);
+
+ void handle_resync_image();
+
+ void notify_status_updated();
+
+ void cancel_delayed_preprocess_task();
+
+ int validate_remote_client_state(
+ const cls::journal::Client& remote_client,
+ librbd::journal::MirrorPeerClientMeta* remote_client_meta,
+ bool* resync_requested, std::string* error);
+
+ void register_perf_counters();
+ void unregister_perf_counters();
+
+};
+
+} // namespace journal
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_replayer::journal::Replayer<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_REPLAYER_H
diff --git a/src/tools/rbd_mirror/image_replayer/journal/StateBuilder.cc b/src/tools/rbd_mirror/image_replayer/journal/StateBuilder.cc
new file mode 100644
index 000000000..5f1fb0e2f
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/journal/StateBuilder.cc
@@ -0,0 +1,149 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "StateBuilder.h"
+#include "include/ceph_assert.h"
+#include "include/Context.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "journal/Journaler.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/Journal.h"
+#include "tools/rbd_mirror/image_replayer/journal/CreateLocalImageRequest.h"
+#include "tools/rbd_mirror/image_replayer/journal/PrepareReplayRequest.h"
+#include "tools/rbd_mirror/image_replayer/journal/Replayer.h"
+#include "tools/rbd_mirror/image_replayer/journal/SyncPointHandler.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::journal::" \
+ << "StateBuilder: " << this << " " \
+ << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+namespace journal {
+
+template <typename I>
+StateBuilder<I>::StateBuilder(const std::string& global_image_id)
+ : image_replayer::StateBuilder<I>(global_image_id) {
+}
+
+template <typename I>
+StateBuilder<I>::~StateBuilder() {
+ ceph_assert(remote_journaler == nullptr);
+}
+
+template <typename I>
+void StateBuilder<I>::close(Context* on_finish) {
+ dout(10) << dendl;
+
+ // close the remote journaler after closing the local image
+ // in case we have lost contact w/ the remote cluster and
+ // will block
+ on_finish = new LambdaContext([this, on_finish](int) {
+ shut_down_remote_journaler(on_finish);
+ });
+ on_finish = new LambdaContext([this, on_finish](int) {
+ this->close_local_image(on_finish);
+ });
+ this->close_remote_image(on_finish);
+}
+
+template <typename I>
+bool StateBuilder<I>::is_disconnected() const {
+ return (remote_client_state == cls::journal::CLIENT_STATE_DISCONNECTED);
+}
+
+template <typename I>
+bool StateBuilder<I>::is_linked_impl() const {
+ ceph_assert(!this->remote_mirror_uuid.empty());
+ return (local_primary_mirror_uuid == this->remote_mirror_uuid);
+}
+
+template <typename I>
+cls::rbd::MirrorImageMode StateBuilder<I>::get_mirror_image_mode() const {
+ return cls::rbd::MIRROR_IMAGE_MODE_JOURNAL;
+}
+
+template <typename I>
+image_sync::SyncPointHandler* StateBuilder<I>::create_sync_point_handler() {
+ dout(10) << dendl;
+
+ this->m_sync_point_handler = SyncPointHandler<I>::create(this);
+ return this->m_sync_point_handler;
+}
+
+template <typename I>
+BaseRequest* StateBuilder<I>::create_local_image_request(
+ Threads<I>* threads,
+ librados::IoCtx& local_io_ctx,
+ const std::string& global_image_id,
+ PoolMetaCache* pool_meta_cache,
+ ProgressContext* progress_ctx,
+ Context* on_finish) {
+ return CreateLocalImageRequest<I>::create(
+ threads, local_io_ctx, this->remote_image_ctx, this->global_image_id,
+ pool_meta_cache, progress_ctx, this, on_finish);
+}
+
+template <typename I>
+BaseRequest* StateBuilder<I>::create_prepare_replay_request(
+ const std::string& local_mirror_uuid,
+ ProgressContext* progress_ctx,
+ bool* resync_requested,
+ bool* syncing,
+ Context* on_finish) {
+ return PrepareReplayRequest<I>::create(
+ local_mirror_uuid, progress_ctx, this, resync_requested, syncing,
+ on_finish);
+}
+
+template <typename I>
+image_replayer::Replayer* StateBuilder<I>::create_replayer(
+ Threads<I>* threads,
+ InstanceWatcher<I>* instance_watcher,
+ const std::string& local_mirror_uuid,
+ PoolMetaCache* pool_meta_cache,
+ ReplayerListener* replayer_listener) {
+ return Replayer<I>::create(
+ threads, local_mirror_uuid, this, replayer_listener);
+}
+
+template <typename I>
+void StateBuilder<I>::shut_down_remote_journaler(Context* on_finish) {
+ if (remote_journaler == nullptr) {
+ on_finish->complete(0);
+ return;
+ }
+
+ dout(10) << dendl;
+ auto ctx = new LambdaContext([this, on_finish](int r) {
+ handle_shut_down_remote_journaler(r, on_finish);
+ });
+ remote_journaler->shut_down(ctx);
+}
+
+template <typename I>
+void StateBuilder<I>::handle_shut_down_remote_journaler(int r,
+ Context* on_finish) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to shut down remote journaler: " << cpp_strerror(r)
+ << dendl;
+ }
+
+ delete remote_journaler;
+ remote_journaler = nullptr;
+ on_finish->complete(r);
+}
+
+} // namespace journal
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_replayer::journal::StateBuilder<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_replayer/journal/StateBuilder.h b/src/tools/rbd_mirror/image_replayer/journal/StateBuilder.h
new file mode 100644
index 000000000..790d1390b
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/journal/StateBuilder.h
@@ -0,0 +1,94 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_STATE_BUILDER_H
+#define CEPH_RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_STATE_BUILDER_H
+
+#include "tools/rbd_mirror/image_replayer/StateBuilder.h"
+#include "cls/journal/cls_journal_types.h"
+#include "librbd/journal/Types.h"
+#include "librbd/journal/TypeTraits.h"
+#include <string>
+
+struct Context;
+
+namespace librbd { struct ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+namespace journal {
+
+template <typename> class SyncPointHandler;
+
+template <typename ImageCtxT>
+class StateBuilder : public image_replayer::StateBuilder<ImageCtxT> {
+public:
+ typedef librbd::journal::TypeTraits<ImageCtxT> TypeTraits;
+ typedef typename TypeTraits::Journaler Journaler;
+
+ static StateBuilder* create(const std::string& global_image_id) {
+ return new StateBuilder(global_image_id);
+ }
+
+ StateBuilder(const std::string& global_image_id);
+ ~StateBuilder() override;
+
+ void close(Context* on_finish) override;
+
+ bool is_disconnected() const override;
+
+ cls::rbd::MirrorImageMode get_mirror_image_mode() const override;
+
+ image_sync::SyncPointHandler* create_sync_point_handler() override;
+
+ bool replay_requires_remote_image() const override {
+ return false;
+ }
+
+ BaseRequest* create_local_image_request(
+ Threads<ImageCtxT>* threads,
+ librados::IoCtx& local_io_ctx,
+ const std::string& global_image_id,
+ PoolMetaCache* pool_meta_cache,
+ ProgressContext* progress_ctx,
+ Context* on_finish) override;
+
+ BaseRequest* create_prepare_replay_request(
+ const std::string& local_mirror_uuid,
+ ProgressContext* progress_ctx,
+ bool* resync_requested,
+ bool* syncing,
+ Context* on_finish) override;
+
+ image_replayer::Replayer* create_replayer(
+ Threads<ImageCtxT>* threads,
+ InstanceWatcher<ImageCtxT>* instance_watcher,
+ const std::string& local_mirror_uuid,
+ PoolMetaCache* pool_meta_cache,
+ ReplayerListener* replayer_listener) override;
+
+ std::string local_primary_mirror_uuid;
+
+ Journaler* remote_journaler = nullptr;
+ cls::journal::ClientState remote_client_state =
+ cls::journal::CLIENT_STATE_CONNECTED;
+ librbd::journal::MirrorPeerClientMeta remote_client_meta;
+
+ SyncPointHandler<ImageCtxT>* sync_point_handler = nullptr;
+
+private:
+ bool is_linked_impl() const override;
+
+ void shut_down_remote_journaler(Context* on_finish);
+ void handle_shut_down_remote_journaler(int r, Context* on_finish);
+};
+
+} // namespace journal
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_replayer::journal::StateBuilder<librbd::ImageCtx>;
+
+#endif // CEPH_RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_STATE_BUILDER_H
diff --git a/src/tools/rbd_mirror/image_replayer/journal/SyncPointHandler.cc b/src/tools/rbd_mirror/image_replayer/journal/SyncPointHandler.cc
new file mode 100644
index 000000000..66d13e555
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/journal/SyncPointHandler.cc
@@ -0,0 +1,109 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "SyncPointHandler.h"
+#include "StateBuilder.h"
+#include "include/ceph_assert.h"
+#include "include/Context.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "journal/Journaler.h"
+#include "librbd/ImageCtx.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::journal::" \
+ << "SyncPointHandler: " << this << " " \
+ << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+namespace journal {
+
+template <typename I>
+SyncPointHandler<I>::SyncPointHandler(StateBuilder<I>* state_builder)
+ : m_state_builder(state_builder),
+ m_client_meta_copy(state_builder->remote_client_meta) {
+}
+
+template <typename I>
+typename SyncPointHandler<I>::SyncPoints
+SyncPointHandler<I>::get_sync_points() const {
+ SyncPoints sync_points;
+ for (auto& sync_point : m_client_meta_copy.sync_points) {
+ sync_points.emplace_back(
+ sync_point.snap_namespace,
+ sync_point.snap_name,
+ sync_point.from_snap_name,
+ sync_point.object_number);
+ }
+ return sync_points;
+}
+
+template <typename I>
+librbd::SnapSeqs SyncPointHandler<I>::get_snap_seqs() const {
+ return m_client_meta_copy.snap_seqs;
+}
+
+template <typename I>
+void SyncPointHandler<I>::update_sync_points(
+ const librbd::SnapSeqs& snap_seqs, const SyncPoints& sync_points,
+ bool sync_complete, Context* on_finish) {
+ dout(10) << dendl;
+
+ if (sync_complete && sync_points.empty()) {
+ m_client_meta_copy.state = librbd::journal::MIRROR_PEER_STATE_REPLAYING;
+ }
+
+ m_client_meta_copy.snap_seqs = snap_seqs;
+ m_client_meta_copy.sync_points.clear();
+ for (auto& sync_point : sync_points) {
+ m_client_meta_copy.sync_points.emplace_back(
+ sync_point.snap_namespace,
+ sync_point.snap_name,
+ sync_point.from_snap_name,
+ sync_point.object_number);
+
+ if (sync_point.object_number) {
+ m_client_meta_copy.sync_object_count = std::max(
+ m_client_meta_copy.sync_object_count, *sync_point.object_number + 1);
+ }
+ }
+
+ dout(20) << "client_meta=" << m_client_meta_copy << dendl;
+ bufferlist client_data_bl;
+ librbd::journal::ClientData client_data{m_client_meta_copy};
+ encode(client_data, client_data_bl);
+
+ auto ctx = new LambdaContext([this, on_finish](int r) {
+ handle_update_sync_points(r, on_finish);
+ });
+ m_state_builder->remote_journaler->update_client(client_data_bl, ctx);
+}
+
+template <typename I>
+void SyncPointHandler<I>::handle_update_sync_points(int r, Context* on_finish) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r >= 0) {
+ m_state_builder->remote_client_meta.snap_seqs =
+ m_client_meta_copy.snap_seqs;
+ m_state_builder->remote_client_meta.sync_points =
+ m_client_meta_copy.sync_points;
+ } else {
+ derr << "failed to update remote journal client meta for image "
+ << m_state_builder->global_image_id << ": " << cpp_strerror(r)
+ << dendl;
+ }
+
+ on_finish->complete(r);
+}
+
+} // namespace journal
+} // namespace image_sync
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_replayer::journal::SyncPointHandler<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_replayer/journal/SyncPointHandler.h b/src/tools/rbd_mirror/image_replayer/journal/SyncPointHandler.h
new file mode 100644
index 000000000..b4f492c19
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/journal/SyncPointHandler.h
@@ -0,0 +1,55 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_SYNC_POINT_HANDLER_H
+#define RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_SYNC_POINT_HANDLER_H
+
+#include "tools/rbd_mirror/image_sync/Types.h"
+#include "librbd/journal/Types.h"
+
+struct Context;
+namespace librbd { struct ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+namespace journal {
+
+template <typename> class StateBuilder;
+
+template <typename ImageCtxT>
+class SyncPointHandler : public image_sync::SyncPointHandler {
+public:
+ using SyncPoint = image_sync::SyncPoint;
+ using SyncPoints = image_sync::SyncPoints;
+
+ static SyncPointHandler* create(StateBuilder<ImageCtxT>* state_builder) {
+ return new SyncPointHandler(state_builder);
+ }
+ SyncPointHandler(StateBuilder<ImageCtxT>* state_builder);
+
+ SyncPoints get_sync_points() const override;
+ librbd::SnapSeqs get_snap_seqs() const override;
+
+ void update_sync_points(const librbd::SnapSeqs& snap_seqs,
+ const SyncPoints& sync_points,
+ bool sync_complete,
+ Context* on_finish) override;
+
+private:
+ StateBuilder<ImageCtxT>* m_state_builder;
+
+ librbd::journal::MirrorPeerClientMeta m_client_meta_copy;
+
+ void handle_update_sync_points(int r, Context* on_finish);
+
+};
+
+} // namespace journal
+} // namespace image_sync
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_replayer::journal::SyncPointHandler<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_SYNC_POINT_HANDLER_H
diff --git a/src/tools/rbd_mirror/image_replayer/snapshot/ApplyImageStateRequest.cc b/src/tools/rbd_mirror/image_replayer/snapshot/ApplyImageStateRequest.cc
new file mode 100644
index 000000000..2ed321738
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/snapshot/ApplyImageStateRequest.cc
@@ -0,0 +1,658 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "ApplyImageStateRequest.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/Operations.h"
+#include "librbd/Utils.h"
+#include "librbd/image/GetMetadataRequest.h"
+#include "tools/rbd_mirror/image_replayer/snapshot/Utils.h"
+#include <boost/algorithm/string/predicate.hpp>
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::snapshot::" \
+ << "ApplyImageStateRequest: " << this << " " \
+ << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+namespace snapshot {
+
+using librbd::util::create_context_callback;
+using librbd::util::create_rados_callback;
+
+template <typename I>
+ApplyImageStateRequest<I>::ApplyImageStateRequest(
+ const std::string& local_mirror_uuid,
+ const std::string& remote_mirror_uuid,
+ I* local_image_ctx,
+ I* remote_image_ctx,
+ librbd::mirror::snapshot::ImageState image_state,
+ Context* on_finish)
+ : m_local_mirror_uuid(local_mirror_uuid),
+ m_remote_mirror_uuid(remote_mirror_uuid),
+ m_local_image_ctx(local_image_ctx),
+ m_remote_image_ctx(remote_image_ctx),
+ m_image_state(image_state),
+ m_on_finish(on_finish) {
+ dout(15) << "image_state=" << m_image_state << dendl;
+
+ std::shared_lock image_locker{m_local_image_ctx->image_lock};
+ m_features = m_local_image_ctx->features & ~RBD_FEATURES_IMPLICIT_ENABLE;
+ compute_local_to_remote_snap_ids();
+}
+
+template <typename I>
+void ApplyImageStateRequest<I>::send() {
+ rename_image();
+}
+
+template <typename I>
+void ApplyImageStateRequest<I>::rename_image() {
+ std::shared_lock owner_locker{m_local_image_ctx->owner_lock};
+ std::shared_lock image_locker{m_local_image_ctx->image_lock};
+ if (m_local_image_ctx->name == m_image_state.name) {
+ image_locker.unlock();
+ owner_locker.unlock();
+
+ update_features();
+ return;
+ }
+ image_locker.unlock();
+
+ dout(15) << "local_image_name=" << m_local_image_ctx->name << ", "
+ << "remote_image_name=" << m_image_state.name << dendl;
+
+ auto ctx = create_context_callback<
+ ApplyImageStateRequest<I>,
+ &ApplyImageStateRequest<I>::handle_rename_image>(this);
+ m_local_image_ctx->operations->execute_rename(m_image_state.name, ctx);
+}
+
+template <typename I>
+void ApplyImageStateRequest<I>::handle_rename_image(int r) {
+ dout(15) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to rename image to '" << m_image_state.name << "': "
+ << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ update_features();
+}
+
+template <typename I>
+void ApplyImageStateRequest<I>::update_features() {
+ uint64_t feature_updates = 0UL;
+ bool enabled = false;
+
+ auto image_state_features =
+ m_image_state.features & ~RBD_FEATURES_IMPLICIT_ENABLE;
+ feature_updates = (m_features & ~image_state_features);
+ if (feature_updates == 0UL) {
+ feature_updates = (image_state_features & ~m_features);
+ enabled = (feature_updates != 0UL);
+ }
+
+ if (feature_updates == 0UL) {
+ get_image_meta();
+ return;
+ }
+
+ dout(15) << "image_features=" << m_features << ", "
+ << "state_features=" << image_state_features << ", "
+ << "feature_updates=" << feature_updates << ", "
+ << "enabled=" << enabled << dendl;
+
+ if (enabled) {
+ m_features |= feature_updates;
+ } else {
+ m_features &= ~feature_updates;
+ }
+
+ std::shared_lock owner_lock{m_local_image_ctx->owner_lock};
+ auto ctx = create_context_callback<
+ ApplyImageStateRequest<I>,
+ &ApplyImageStateRequest<I>::handle_update_features>(this);
+ m_local_image_ctx->operations->execute_update_features(
+ feature_updates, enabled, ctx, 0U);
+}
+
+template <typename I>
+void ApplyImageStateRequest<I>::handle_update_features(int r) {
+ dout(15) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to update image features: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ update_features();
+}
+
+template <typename I>
+void ApplyImageStateRequest<I>::get_image_meta() {
+ dout(15) << dendl;
+
+ auto ctx = create_context_callback<
+ ApplyImageStateRequest<I>,
+ &ApplyImageStateRequest<I>::handle_get_image_meta>(this);
+ auto req = librbd::image::GetMetadataRequest<I>::create(
+ m_local_image_ctx->md_ctx, m_local_image_ctx->header_oid, true, "", "", 0U,
+ &m_metadata, ctx);
+ req->send();
+}
+
+template <typename I>
+void ApplyImageStateRequest<I>::handle_get_image_meta(int r) {
+ dout(15) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to fetch local image metadata: " << cpp_strerror(r)
+ << dendl;
+ finish(r);
+ return;
+ }
+
+ update_image_meta();
+}
+
+template <typename I>
+void ApplyImageStateRequest<I>::update_image_meta() {
+ std::set<std::string> keys_to_remove;
+ for (const auto& [key, value] : m_metadata) {
+ if (m_image_state.metadata.count(key) == 0) {
+ dout(15) << "removing image-meta key '" << key << "'" << dendl;
+ keys_to_remove.insert(key);
+ }
+ }
+
+ std::map<std::string, bufferlist> metadata_to_update;
+ for (const auto& [key, value] : m_image_state.metadata) {
+ auto it = m_metadata.find(key);
+ if (it == m_metadata.end() || !it->second.contents_equal(value)) {
+ dout(15) << "updating image-meta key '" << key << "'" << dendl;
+ metadata_to_update.insert({key, value});
+ }
+ }
+
+ if (keys_to_remove.empty() && metadata_to_update.empty()) {
+ unprotect_snapshot();
+ return;
+ }
+
+ dout(15) << dendl;
+
+ librados::ObjectWriteOperation op;
+ for (const auto& key : keys_to_remove) {
+ librbd::cls_client::metadata_remove(&op, key);
+ }
+ if (!metadata_to_update.empty()) {
+ librbd::cls_client::metadata_set(&op, metadata_to_update);
+ }
+
+ auto aio_comp = create_rados_callback<
+ ApplyImageStateRequest<I>,
+ &ApplyImageStateRequest<I>::handle_update_image_meta>(this);
+ int r = m_local_image_ctx->md_ctx.aio_operate(m_local_image_ctx->header_oid, aio_comp,
+ &op);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void ApplyImageStateRequest<I>::handle_update_image_meta(int r) {
+ dout(15) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to update image metadata: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ m_metadata.clear();
+
+ m_prev_snap_id = CEPH_NOSNAP;
+ unprotect_snapshot();
+}
+
+template <typename I>
+void ApplyImageStateRequest<I>::unprotect_snapshot() {
+ std::shared_lock image_locker{m_local_image_ctx->image_lock};
+
+ auto snap_it = m_local_image_ctx->snap_info.begin();
+ if (m_prev_snap_id != CEPH_NOSNAP) {
+ snap_it = m_local_image_ctx->snap_info.upper_bound(m_prev_snap_id);
+ }
+
+ for (; snap_it != m_local_image_ctx->snap_info.end(); ++snap_it) {
+ auto snap_id = snap_it->first;
+ const auto& snap_info = snap_it->second;
+
+ auto user_ns = boost::get<cls::rbd::UserSnapshotNamespace>(
+ &snap_info.snap_namespace);
+ if (user_ns == nullptr) {
+ dout(20) << "snapshot " << snap_id << " is not a user snapshot" << dendl;
+ continue;
+ }
+
+ if (snap_info.protection_status == RBD_PROTECTION_STATUS_UNPROTECTED) {
+ dout(20) << "snapshot " << snap_id << " is already unprotected" << dendl;
+ continue;
+ }
+
+ auto snap_id_map_it = m_local_to_remote_snap_ids.find(snap_id);
+ if (snap_id_map_it == m_local_to_remote_snap_ids.end()) {
+ dout(15) << "snapshot " << snap_id << " does not exist in remote image"
+ << dendl;
+ break;
+ }
+
+ auto remote_snap_id = snap_id_map_it->second;
+ auto snap_state_it = m_image_state.snapshots.find(remote_snap_id);
+ if (snap_state_it == m_image_state.snapshots.end()) {
+ dout(15) << "snapshot " << snap_id << " does not exist in remote image "
+ << "state" << dendl;
+ break;
+ }
+
+ const auto& snap_state = snap_state_it->second;
+ if (snap_state.protection_status == RBD_PROTECTION_STATUS_UNPROTECTED) {
+ dout(15) << "snapshot " << snap_id << " is unprotected in remote image"
+ << dendl;
+ break;
+ }
+ }
+
+ if (snap_it == m_local_image_ctx->snap_info.end()) {
+ image_locker.unlock();
+
+ // no local snapshots to unprotect
+ m_prev_snap_id = CEPH_NOSNAP;
+ remove_snapshot();
+ return;
+ }
+
+ m_prev_snap_id = snap_it->first;
+ m_snap_name = snap_it->second.name;
+ image_locker.unlock();
+
+ dout(15) << "snap_name=" << m_snap_name << ", "
+ << "snap_id=" << m_prev_snap_id << dendl;
+
+ std::shared_lock owner_locker{m_local_image_ctx->owner_lock};
+ auto ctx = create_context_callback<
+ ApplyImageStateRequest<I>,
+ &ApplyImageStateRequest<I>::handle_unprotect_snapshot>(this);
+ m_local_image_ctx->operations->execute_snap_unprotect(
+ cls::rbd::UserSnapshotNamespace{}, m_snap_name.c_str(), ctx);
+}
+
+template <typename I>
+void ApplyImageStateRequest<I>::handle_unprotect_snapshot(int r) {
+ dout(15) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to unprotect snapshot " << m_snap_name << ": "
+ << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ unprotect_snapshot();
+}
+
+template <typename I>
+void ApplyImageStateRequest<I>::remove_snapshot() {
+ std::shared_lock image_locker{m_local_image_ctx->image_lock};
+
+ auto snap_it = m_local_image_ctx->snap_info.begin();
+ if (m_prev_snap_id != CEPH_NOSNAP) {
+ snap_it = m_local_image_ctx->snap_info.upper_bound(m_prev_snap_id);
+ }
+
+ for (; snap_it != m_local_image_ctx->snap_info.end(); ++snap_it) {
+ auto snap_id = snap_it->first;
+ const auto& snap_info = snap_it->second;
+
+ auto user_ns = boost::get<cls::rbd::UserSnapshotNamespace>(
+ &snap_info.snap_namespace);
+ if (user_ns == nullptr) {
+ dout(20) << "snapshot " << snap_id << " is not a user snapshot" << dendl;
+ continue;
+ }
+
+ auto snap_id_map_it = m_local_to_remote_snap_ids.find(snap_id);
+ if (snap_id_map_it == m_local_to_remote_snap_ids.end()) {
+ dout(15) << "snapshot " << snap_id << " does not exist in remote image"
+ << dendl;
+ break;
+ }
+
+ auto remote_snap_id = snap_id_map_it->second;
+ auto snap_state_it = m_image_state.snapshots.find(remote_snap_id);
+ if (snap_state_it == m_image_state.snapshots.end()) {
+ dout(15) << "snapshot " << snap_id << " does not exist in remote image "
+ << "state" << dendl;
+ break;
+ }
+ }
+
+ if (snap_it == m_local_image_ctx->snap_info.end()) {
+ image_locker.unlock();
+
+ // no local snapshots to remove
+ m_prev_snap_id = CEPH_NOSNAP;
+ protect_snapshot();
+ return;
+ }
+
+ m_prev_snap_id = snap_it->first;
+ m_snap_name = snap_it->second.name;
+ image_locker.unlock();
+
+ dout(15) << "snap_name=" << m_snap_name << ", "
+ << "snap_id=" << m_prev_snap_id << dendl;
+
+ std::shared_lock owner_locker{m_local_image_ctx->owner_lock};
+ auto ctx = create_context_callback<
+ ApplyImageStateRequest<I>,
+ &ApplyImageStateRequest<I>::handle_remove_snapshot>(this);
+ m_local_image_ctx->operations->execute_snap_remove(
+ cls::rbd::UserSnapshotNamespace{}, m_snap_name.c_str(), ctx);
+}
+
+template <typename I>
+void ApplyImageStateRequest<I>::handle_remove_snapshot(int r) {
+ dout(15) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to remove snapshot " << m_snap_name << ": "
+ << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ remove_snapshot();
+}
+
+template <typename I>
+void ApplyImageStateRequest<I>::protect_snapshot() {
+ std::shared_lock image_locker{m_local_image_ctx->image_lock};
+
+ auto snap_it = m_local_image_ctx->snap_info.begin();
+ if (m_prev_snap_id != CEPH_NOSNAP) {
+ snap_it = m_local_image_ctx->snap_info.upper_bound(m_prev_snap_id);
+ }
+
+ for (; snap_it != m_local_image_ctx->snap_info.end(); ++snap_it) {
+ auto snap_id = snap_it->first;
+ const auto& snap_info = snap_it->second;
+
+ auto user_ns = boost::get<cls::rbd::UserSnapshotNamespace>(
+ &snap_info.snap_namespace);
+ if (user_ns == nullptr) {
+ dout(20) << "snapshot " << snap_id << " is not a user snapshot" << dendl;
+ continue;
+ }
+
+ if (snap_info.protection_status == RBD_PROTECTION_STATUS_PROTECTED) {
+ dout(20) << "snapshot " << snap_id << " is already protected" << dendl;
+ continue;
+ }
+
+ auto snap_id_map_it = m_local_to_remote_snap_ids.find(snap_id);
+ if (snap_id_map_it == m_local_to_remote_snap_ids.end()) {
+ dout(15) << "snapshot " << snap_id << " does not exist in remote image"
+ << dendl;
+ continue;
+ }
+
+ auto remote_snap_id = snap_id_map_it->second;
+ auto snap_state_it = m_image_state.snapshots.find(remote_snap_id);
+ if (snap_state_it == m_image_state.snapshots.end()) {
+ dout(15) << "snapshot " << snap_id << " does not exist in remote image "
+ << "state" << dendl;
+ continue;
+ }
+
+ const auto& snap_state = snap_state_it->second;
+ if (snap_state.protection_status == RBD_PROTECTION_STATUS_PROTECTED) {
+ dout(15) << "snapshot " << snap_id << " is protected in remote image"
+ << dendl;
+ break;
+ }
+ }
+
+ if (snap_it == m_local_image_ctx->snap_info.end()) {
+ image_locker.unlock();
+
+ // no local snapshots to protect
+ m_prev_snap_id = CEPH_NOSNAP;
+ rename_snapshot();
+ return;
+ }
+
+ m_prev_snap_id = snap_it->first;
+ m_snap_name = snap_it->second.name;
+ image_locker.unlock();
+
+ dout(15) << "snap_name=" << m_snap_name << ", "
+ << "snap_id=" << m_prev_snap_id << dendl;
+
+ std::shared_lock owner_locker{m_local_image_ctx->owner_lock};
+ auto ctx = create_context_callback<
+ ApplyImageStateRequest<I>,
+ &ApplyImageStateRequest<I>::handle_protect_snapshot>(this);
+ m_local_image_ctx->operations->execute_snap_protect(
+ cls::rbd::UserSnapshotNamespace{}, m_snap_name.c_str(), ctx);
+}
+
+template <typename I>
+void ApplyImageStateRequest<I>::handle_protect_snapshot(int r) {
+ dout(15) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to protect snapshot " << m_snap_name << ": "
+ << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ protect_snapshot();
+}
+
+template <typename I>
+void ApplyImageStateRequest<I>::rename_snapshot() {
+ std::shared_lock image_locker{m_local_image_ctx->image_lock};
+
+ auto snap_it = m_local_image_ctx->snap_info.begin();
+ if (m_prev_snap_id != CEPH_NOSNAP) {
+ snap_it = m_local_image_ctx->snap_info.upper_bound(m_prev_snap_id);
+ }
+
+ for (; snap_it != m_local_image_ctx->snap_info.end(); ++snap_it) {
+ auto snap_id = snap_it->first;
+ const auto& snap_info = snap_it->second;
+
+ auto user_ns = boost::get<cls::rbd::UserSnapshotNamespace>(
+ &snap_info.snap_namespace);
+ if (user_ns == nullptr) {
+ dout(20) << "snapshot " << snap_id << " is not a user snapshot" << dendl;
+ continue;
+ }
+
+ auto snap_id_map_it = m_local_to_remote_snap_ids.find(snap_id);
+ if (snap_id_map_it == m_local_to_remote_snap_ids.end()) {
+ dout(15) << "snapshot " << snap_id << " does not exist in remote image"
+ << dendl;
+ continue;
+ }
+
+ auto remote_snap_id = snap_id_map_it->second;
+ auto snap_state_it = m_image_state.snapshots.find(remote_snap_id);
+ if (snap_state_it == m_image_state.snapshots.end()) {
+ dout(15) << "snapshot " << snap_id << " does not exist in remote image "
+ << "state" << dendl;
+ continue;
+ }
+
+ const auto& snap_state = snap_state_it->second;
+ if (snap_info.name != snap_state.name) {
+ dout(15) << "snapshot " << snap_id << " has been renamed from '"
+ << snap_info.name << "' to '" << snap_state.name << "'"
+ << dendl;
+ m_snap_name = snap_state.name;
+ break;
+ }
+ }
+
+ if (snap_it == m_local_image_ctx->snap_info.end()) {
+ image_locker.unlock();
+
+ // no local snapshots to protect
+ m_prev_snap_id = CEPH_NOSNAP;
+ set_snapshot_limit();
+ return;
+ }
+
+ m_prev_snap_id = snap_it->first;
+ image_locker.unlock();
+
+ dout(15) << "snap_name=" << m_snap_name << ", "
+ << "snap_id=" << m_prev_snap_id << dendl;
+
+ std::shared_lock owner_locker{m_local_image_ctx->owner_lock};
+ auto ctx = create_context_callback<
+ ApplyImageStateRequest<I>,
+ &ApplyImageStateRequest<I>::handle_rename_snapshot>(this);
+ m_local_image_ctx->operations->execute_snap_rename(
+ m_prev_snap_id, m_snap_name.c_str(), ctx);
+}
+
+template <typename I>
+void ApplyImageStateRequest<I>::handle_rename_snapshot(int r) {
+ dout(15) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to protect snapshot " << m_snap_name << ": "
+ << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ rename_snapshot();
+}
+
+template <typename I>
+void ApplyImageStateRequest<I>::set_snapshot_limit() {
+ dout(15) << "snap_limit=" << m_image_state.snap_limit << dendl;
+
+ // no need to even check the current limit -- just set it
+ std::shared_lock owner_locker{m_local_image_ctx->owner_lock};
+ auto ctx = create_context_callback<
+ ApplyImageStateRequest<I>,
+ &ApplyImageStateRequest<I>::handle_set_snapshot_limit>(this);
+ m_local_image_ctx->operations->execute_snap_set_limit(
+ m_image_state.snap_limit, ctx);
+}
+
+template <typename I>
+void ApplyImageStateRequest<I>::handle_set_snapshot_limit(int r) {
+ dout(15) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to update snapshot limit: " << cpp_strerror(r)
+ << dendl;
+ }
+
+ finish(r);
+}
+
+template <typename I>
+void ApplyImageStateRequest<I>::finish(int r) {
+ dout(15) << "r=" << r << dendl;
+
+ m_on_finish->complete(r);
+ delete this;
+}
+
+template <typename I>
+uint64_t ApplyImageStateRequest<I>::compute_remote_snap_id(
+ uint64_t local_snap_id) {
+ ceph_assert(ceph_mutex_is_locked(m_local_image_ctx->image_lock));
+ ceph_assert(ceph_mutex_is_locked(m_remote_image_ctx->image_lock));
+
+ // Search our local non-primary snapshots for a mapping to the remote
+ // snapshot. The non-primary mirror snapshot with the mappings will always
+ // come at or after the snapshot we are searching against
+ auto remote_snap_id = util::compute_remote_snap_id(
+ m_local_image_ctx->image_lock, m_local_image_ctx->snap_info,
+ local_snap_id, m_remote_mirror_uuid);
+ if (remote_snap_id != CEPH_NOSNAP) {
+ return remote_snap_id;
+ }
+
+ // if we failed to find a match to a remote snapshot in our local non-primary
+ // snapshots, check the remote image for non-primary snapshot mappings back
+ // to our snapshot
+ for (auto snap_it = m_remote_image_ctx->snap_info.begin();
+ snap_it != m_remote_image_ctx->snap_info.end(); ++snap_it) {
+ auto snap_id = snap_it->first;
+ auto mirror_ns = boost::get<cls::rbd::MirrorSnapshotNamespace>(
+ &snap_it->second.snap_namespace);
+ if (mirror_ns == nullptr || !mirror_ns->is_non_primary()) {
+ continue;
+ }
+
+ if (mirror_ns->primary_mirror_uuid != m_local_mirror_uuid) {
+ dout(20) << "remote snapshot " << snap_id << " not tied to local"
+ << dendl;
+ continue;
+ } else if (mirror_ns->primary_snap_id == local_snap_id) {
+ dout(15) << "local snapshot " << local_snap_id << " maps to "
+ << "remote snapshot " << snap_id << dendl;
+ return snap_id;
+ }
+
+ const auto& snap_seqs = mirror_ns->snap_seqs;
+ for (auto [local_snap_id_seq, remote_snap_id_seq] : snap_seqs) {
+ if (local_snap_id_seq == local_snap_id) {
+ dout(15) << "local snapshot " << local_snap_id << " maps to "
+ << "remote snapshot " << remote_snap_id_seq << dendl;
+ return remote_snap_id_seq;
+ }
+ }
+ }
+
+ return CEPH_NOSNAP;
+}
+
+template <typename I>
+void ApplyImageStateRequest<I>::compute_local_to_remote_snap_ids() {
+ ceph_assert(ceph_mutex_is_locked(m_local_image_ctx->image_lock));
+ std::shared_lock remote_image_locker{m_remote_image_ctx->image_lock};
+
+ for (const auto& [snap_id, snap_info] : m_local_image_ctx->snap_info) {
+ m_local_to_remote_snap_ids[snap_id] = compute_remote_snap_id(snap_id);
+ }
+
+ dout(15) << "local_to_remote_snap_ids=" << m_local_to_remote_snap_ids
+ << dendl;
+}
+
+} // namespace snapshot
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_replayer::snapshot::ApplyImageStateRequest<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_replayer/snapshot/ApplyImageStateRequest.h b/src/tools/rbd_mirror/image_replayer/snapshot/ApplyImageStateRequest.h
new file mode 100644
index 000000000..0e2d09ddf
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/snapshot/ApplyImageStateRequest.h
@@ -0,0 +1,155 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_APPLY_IMAGE_STATE_REQUEST_H
+#define RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_APPLY_IMAGE_STATE_REQUEST_H
+
+#include "common/ceph_mutex.h"
+#include "librbd/mirror/snapshot/Types.h"
+#include <map>
+#include <string>
+
+struct Context;
+
+namespace librbd {
+
+struct ImageCtx;
+
+} // namespace librbd
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+namespace snapshot {
+
+template <typename> class EventPreprocessor;
+template <typename> class ReplayStatusFormatter;
+template <typename> class StateBuilder;
+
+template <typename ImageCtxT>
+class ApplyImageStateRequest {
+public:
+ static ApplyImageStateRequest* create(
+ const std::string& local_mirror_uuid,
+ const std::string& remote_mirror_uuid,
+ ImageCtxT* local_image_ctx,
+ ImageCtxT* remote_image_ctx,
+ librbd::mirror::snapshot::ImageState image_state,
+ Context* on_finish) {
+ return new ApplyImageStateRequest(local_mirror_uuid, remote_mirror_uuid,
+ local_image_ctx, remote_image_ctx,
+ image_state, on_finish);
+ }
+
+ ApplyImageStateRequest(
+ const std::string& local_mirror_uuid,
+ const std::string& remote_mirror_uuid,
+ ImageCtxT* local_image_ctx,
+ ImageCtxT* remote_image_ctx,
+ librbd::mirror::snapshot::ImageState image_state,
+ Context* on_finish);
+
+ void send();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * RENAME_IMAGE
+ * |
+ * | /---------\
+ * | | |
+ * v v |
+ * UPDATE_FEATURES -----/
+ * |
+ * v
+ * GET_IMAGE_META
+ * |
+ * | /---------\
+ * | | |
+ * v v |
+ * UPDATE_IMAGE_META ---/
+ * |
+ * | /---------\
+ * | | |
+ * v v |
+ * UNPROTECT_SNAPSHOT |
+ * | |
+ * v |
+ * REMOVE_SNAPSHOT |
+ * | |
+ * v |
+ * PROTECT_SNAPSHOT |
+ * | |
+ * v |
+ * RENAME_SNAPSHOT -----/
+ * |
+ * v
+ * SET_SNAPSHOT_LIMIT
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+
+ std::string m_local_mirror_uuid;
+ std::string m_remote_mirror_uuid;
+ ImageCtxT* m_local_image_ctx;
+ ImageCtxT* m_remote_image_ctx;
+ librbd::mirror::snapshot::ImageState m_image_state;
+ Context* m_on_finish;
+
+ std::map<uint64_t, uint64_t> m_local_to_remote_snap_ids;
+
+ uint64_t m_features = 0;
+
+ std::map<std::string, bufferlist> m_metadata;
+
+ uint64_t m_prev_snap_id = 0;
+ std::string m_snap_name;
+
+ void rename_image();
+ void handle_rename_image(int r);
+
+ void update_features();
+ void handle_update_features(int r);
+
+ void get_image_meta();
+ void handle_get_image_meta(int r);
+
+ void update_image_meta();
+ void handle_update_image_meta(int r);
+
+ void unprotect_snapshot();
+ void handle_unprotect_snapshot(int r);
+
+ void remove_snapshot();
+ void handle_remove_snapshot(int r);
+
+ void protect_snapshot();
+ void handle_protect_snapshot(int r);
+
+ void rename_snapshot();
+ void handle_rename_snapshot(int r);
+
+ void set_snapshot_limit();
+ void handle_set_snapshot_limit(int r);
+
+ void finish(int r);
+
+ uint64_t compute_remote_snap_id(uint64_t snap_id);
+ void compute_local_to_remote_snap_ids();
+};
+
+} // namespace snapshot
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_replayer::snapshot::ApplyImageStateRequest<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_APPLY_IMAGE_STATE_REQUEST_H
diff --git a/src/tools/rbd_mirror/image_replayer/snapshot/CreateLocalImageRequest.cc b/src/tools/rbd_mirror/image_replayer/snapshot/CreateLocalImageRequest.cc
new file mode 100644
index 000000000..c923395c9
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/snapshot/CreateLocalImageRequest.cc
@@ -0,0 +1,204 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "CreateLocalImageRequest.h"
+#include "include/rados/librados.hpp"
+#include "common/debug.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "cls/rbd/cls_rbd_types.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/Utils.h"
+#include "tools/rbd_mirror/ProgressContext.h"
+#include "tools/rbd_mirror/image_replayer/CreateImageRequest.h"
+#include "tools/rbd_mirror/image_replayer/snapshot/StateBuilder.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::snapshot::" \
+ << "CreateLocalImageRequest: " << this << " " \
+ << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+namespace snapshot {
+
+using librbd::util::create_context_callback;
+using librbd::util::create_rados_callback;
+
+template <typename I>
+void CreateLocalImageRequest<I>::send() {
+ disable_mirror_image();
+}
+
+template <typename I>
+void CreateLocalImageRequest<I>::disable_mirror_image() {
+ if (m_state_builder->local_image_id.empty()) {
+ add_mirror_image();
+ return;
+ }
+
+ dout(10) << dendl;
+ update_progress("DISABLE_MIRROR_IMAGE");
+
+ // need to send 'disabling' since the cls methods will fail if we aren't
+ // in that state
+ cls::rbd::MirrorImage mirror_image{
+ cls::rbd::MIRROR_IMAGE_MODE_SNAPSHOT, m_global_image_id,
+ cls::rbd::MIRROR_IMAGE_STATE_DISABLING};
+ librados::ObjectWriteOperation op;
+ librbd::cls_client::mirror_image_set(&op, m_state_builder->local_image_id,
+ mirror_image);
+
+ auto aio_comp = create_rados_callback<
+ CreateLocalImageRequest<I>,
+ &CreateLocalImageRequest<I>::handle_disable_mirror_image>(this);
+ int r = m_local_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void CreateLocalImageRequest<I>::handle_disable_mirror_image(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to disable mirror image " << m_global_image_id << ": "
+ << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ remove_mirror_image();
+}
+
+template <typename I>
+void CreateLocalImageRequest<I>::remove_mirror_image() {
+ dout(10) << dendl;
+ update_progress("REMOVE_MIRROR_IMAGE");
+
+ librados::ObjectWriteOperation op;
+ librbd::cls_client::mirror_image_remove(&op, m_state_builder->local_image_id);
+
+ auto aio_comp = create_rados_callback<
+ CreateLocalImageRequest<I>,
+ &CreateLocalImageRequest<I>::handle_remove_mirror_image>(this);
+ int r = m_local_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void CreateLocalImageRequest<I>::handle_remove_mirror_image(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to remove mirror image " << m_global_image_id << ": "
+ << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ m_state_builder->local_image_id = "";
+ add_mirror_image();
+}
+
+template <typename I>
+void CreateLocalImageRequest<I>::add_mirror_image() {
+ ceph_assert(m_state_builder->local_image_id.empty());
+ m_state_builder->local_image_id =
+ librbd::util::generate_image_id<I>(m_local_io_ctx);
+
+ dout(10) << "local_image_id=" << m_state_builder->local_image_id << dendl;
+ update_progress("ADD_MIRROR_IMAGE");
+
+ // use 'creating' to track a partially constructed image. it will
+ // be switched to 'enabled' once the image is fully created
+ cls::rbd::MirrorImage mirror_image{
+ cls::rbd::MIRROR_IMAGE_MODE_SNAPSHOT, m_global_image_id,
+ cls::rbd::MIRROR_IMAGE_STATE_CREATING};
+ librados::ObjectWriteOperation op;
+ librbd::cls_client::mirror_image_set(&op, m_state_builder->local_image_id,
+ mirror_image);
+
+ auto aio_comp = create_rados_callback<
+ CreateLocalImageRequest<I>,
+ &CreateLocalImageRequest<I>::handle_add_mirror_image>(this);
+ int r = m_local_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void CreateLocalImageRequest<I>::handle_add_mirror_image(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to register mirror image " << m_global_image_id << ": "
+ << cpp_strerror(r) << dendl;
+ this->finish(r);
+ return;
+ }
+
+ create_local_image();
+}
+
+template <typename I>
+void CreateLocalImageRequest<I>::create_local_image() {
+ dout(10) << "local_image_id=" << m_state_builder->local_image_id << dendl;
+ update_progress("CREATE_LOCAL_IMAGE");
+
+ m_remote_image_ctx->image_lock.lock_shared();
+ std::string image_name = m_remote_image_ctx->name;
+ m_remote_image_ctx->image_lock.unlock_shared();
+
+ auto ctx = create_context_callback<
+ CreateLocalImageRequest<I>,
+ &CreateLocalImageRequest<I>::handle_create_local_image>(this);
+ auto request = CreateImageRequest<I>::create(
+ m_threads, m_local_io_ctx, m_global_image_id,
+ m_state_builder->remote_mirror_uuid, image_name,
+ m_state_builder->local_image_id, m_remote_image_ctx,
+ m_pool_meta_cache, cls::rbd::MIRROR_IMAGE_MODE_SNAPSHOT, ctx);
+ request->send();
+}
+template <typename I>
+void CreateLocalImageRequest<I>::handle_create_local_image(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r == -EBADF) {
+ dout(5) << "image id " << m_state_builder->local_image_id << " "
+ << "already in-use" << dendl;
+ disable_mirror_image();
+ return;
+ } else if (r < 0) {
+ if (r == -ENOENT) {
+ dout(10) << "parent image does not exist" << dendl;
+ } else {
+ derr << "failed to create local image: " << cpp_strerror(r) << dendl;
+ }
+ finish(r);
+ return;
+ }
+
+ finish(0);
+}
+
+template <typename I>
+void CreateLocalImageRequest<I>::update_progress(
+ const std::string& description) {
+ dout(15) << description << dendl;
+ if (m_progress_ctx != nullptr) {
+ m_progress_ctx->update_progress(description);
+ }
+}
+
+} // namespace snapshot
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_replayer::snapshot::CreateLocalImageRequest<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_replayer/snapshot/CreateLocalImageRequest.h b/src/tools/rbd_mirror/image_replayer/snapshot/CreateLocalImageRequest.h
new file mode 100644
index 000000000..3345154b4
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/snapshot/CreateLocalImageRequest.h
@@ -0,0 +1,121 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_CREATE_LOCAL_IMAGE_REQUEST_H
+#define RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_CREATE_LOCAL_IMAGE_REQUEST_H
+
+#include "include/rados/librados_fwd.hpp"
+#include "tools/rbd_mirror/BaseRequest.h"
+#include <string>
+
+struct Context;
+namespace librbd { class ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+
+class PoolMetaCache;
+class ProgressContext;
+template <typename> struct Threads;
+
+namespace image_replayer {
+namespace snapshot {
+
+template <typename> class StateBuilder;
+
+template <typename ImageCtxT>
+class CreateLocalImageRequest : public BaseRequest {
+public:
+ typedef rbd::mirror::ProgressContext ProgressContext;
+
+ static CreateLocalImageRequest* create(
+ Threads<ImageCtxT>* threads,
+ librados::IoCtx& local_io_ctx,
+ ImageCtxT* remote_image_ctx,
+ const std::string& global_image_id,
+ PoolMetaCache* pool_meta_cache,
+ ProgressContext* progress_ctx,
+ StateBuilder<ImageCtxT>* state_builder,
+ Context* on_finish) {
+ return new CreateLocalImageRequest(threads, local_io_ctx, remote_image_ctx,
+ global_image_id, pool_meta_cache,
+ progress_ctx, state_builder, on_finish);
+ }
+
+ CreateLocalImageRequest(
+ Threads<ImageCtxT>* threads,
+ librados::IoCtx& local_io_ctx,
+ ImageCtxT* remote_image_ctx,
+ const std::string& global_image_id,
+ PoolMetaCache* pool_meta_cache,
+ ProgressContext* progress_ctx,
+ StateBuilder<ImageCtxT>* state_builder,
+ Context* on_finish)
+ : BaseRequest(on_finish),
+ m_threads(threads),
+ m_local_io_ctx(local_io_ctx),
+ m_remote_image_ctx(remote_image_ctx),
+ m_global_image_id(global_image_id),
+ m_pool_meta_cache(pool_meta_cache),
+ m_progress_ctx(progress_ctx),
+ m_state_builder(state_builder) {
+ }
+
+ void send();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * DISABLE_MIRROR_IMAGE < * * * * * *
+ * | *
+ * v *
+ * REMOVE_MIRROR_IMAGE *
+ * | *
+ * v *
+ * ADD_MIRROR_IMAGE *
+ * | *
+ * v (id exists) *
+ * CREATE_LOCAL_IMAGE * * * * * * * *
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+
+ Threads<ImageCtxT>* m_threads;
+ librados::IoCtx& m_local_io_ctx;
+ ImageCtxT* m_remote_image_ctx;
+ std::string m_global_image_id;
+ PoolMetaCache* m_pool_meta_cache;
+ ProgressContext* m_progress_ctx;
+ StateBuilder<ImageCtxT>* m_state_builder;
+
+ void disable_mirror_image();
+ void handle_disable_mirror_image(int r);
+
+ void remove_mirror_image();
+ void handle_remove_mirror_image(int r);
+
+ void add_mirror_image();
+ void handle_add_mirror_image(int r);
+
+ void create_local_image();
+ void handle_create_local_image(int r);
+
+ void update_progress(const std::string& description);
+
+};
+
+} // namespace snapshot
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_replayer::snapshot::CreateLocalImageRequest<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_CREATE_LOCAL_IMAGE_REQUEST_H
diff --git a/src/tools/rbd_mirror/image_replayer/snapshot/PrepareReplayRequest.cc b/src/tools/rbd_mirror/image_replayer/snapshot/PrepareReplayRequest.cc
new file mode 100644
index 000000000..575eb8534
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/snapshot/PrepareReplayRequest.cc
@@ -0,0 +1,70 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "PrepareReplayRequest.h"
+#include "common/debug.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/Utils.h"
+#include "librbd/mirror/snapshot/ImageMeta.h"
+#include "tools/rbd_mirror/ProgressContext.h"
+#include "tools/rbd_mirror/image_replayer/snapshot/StateBuilder.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::snapshot::" \
+ << "PrepareReplayRequest: " << this << " " \
+ << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+namespace snapshot {
+
+using librbd::util::create_context_callback;
+
+template <typename I>
+void PrepareReplayRequest<I>::send() {
+ *m_resync_requested = false;
+ *m_syncing = false;
+
+ load_local_image_meta();
+}
+
+template <typename I>
+void PrepareReplayRequest<I>::load_local_image_meta() {
+ dout(15) << dendl;
+
+ ceph_assert(m_state_builder->local_image_meta == nullptr);
+ m_state_builder->local_image_meta =
+ librbd::mirror::snapshot::ImageMeta<I>::create(
+ m_state_builder->local_image_ctx, m_local_mirror_uuid);
+
+ auto ctx = create_context_callback<
+ PrepareReplayRequest<I>,
+ &PrepareReplayRequest<I>::handle_load_local_image_meta>(this);
+ m_state_builder->local_image_meta->load(ctx);
+}
+
+template <typename I>
+void PrepareReplayRequest<I>::handle_load_local_image_meta(int r) {
+ dout(15) << "r=" << r << dendl;
+
+ if (r < 0 && r != -ENOENT) {
+ derr << "failed to load local image-meta: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ *m_resync_requested = m_state_builder->local_image_meta->resync_requested;
+ finish(0);
+}
+
+} // namespace snapshot
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_replayer::snapshot::PrepareReplayRequest<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_replayer/snapshot/PrepareReplayRequest.h b/src/tools/rbd_mirror/image_replayer/snapshot/PrepareReplayRequest.h
new file mode 100644
index 000000000..4e9246acd
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/snapshot/PrepareReplayRequest.h
@@ -0,0 +1,92 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_PREPARE_REPLAY_REQUEST_H
+#define RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_PREPARE_REPLAY_REQUEST_H
+
+#include "include/int_types.h"
+#include "librbd/mirror/Types.h"
+#include "tools/rbd_mirror/BaseRequest.h"
+#include <list>
+#include <string>
+
+struct Context;
+namespace librbd { struct ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+
+class ProgressContext;
+
+namespace image_replayer {
+namespace snapshot {
+
+template <typename> class StateBuilder;
+
+template <typename ImageCtxT>
+class PrepareReplayRequest : public BaseRequest {
+public:
+ static PrepareReplayRequest* create(
+ const std::string& local_mirror_uuid,
+ ProgressContext* progress_ctx,
+ StateBuilder<ImageCtxT>* state_builder,
+ bool* resync_requested,
+ bool* syncing,
+ Context* on_finish) {
+ return new PrepareReplayRequest(
+ local_mirror_uuid, progress_ctx, state_builder, resync_requested,
+ syncing, on_finish);
+ }
+
+ PrepareReplayRequest(
+ const std::string& local_mirror_uuid,
+ ProgressContext* progress_ctx,
+ StateBuilder<ImageCtxT>* state_builder,
+ bool* resync_requested,
+ bool* syncing,
+ Context* on_finish)
+ : BaseRequest(on_finish),
+ m_local_mirror_uuid(local_mirror_uuid),
+ m_progress_ctx(progress_ctx),
+ m_state_builder(state_builder),
+ m_resync_requested(resync_requested),
+ m_syncing(syncing) {
+ }
+
+ void send() override;
+
+private:
+ // TODO
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * LOAD_LOCAL_IMAGE_META
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+
+ std::string m_local_mirror_uuid;
+ ProgressContext* m_progress_ctx;
+ StateBuilder<ImageCtxT>* m_state_builder;
+ bool* m_resync_requested;
+ bool* m_syncing;
+
+ void load_local_image_meta();
+ void handle_load_local_image_meta(int r);
+
+};
+
+} // namespace snapshot
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_replayer::snapshot::PrepareReplayRequest<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_PREPARE_REPLAY_REQUEST_H
diff --git a/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.cc b/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.cc
new file mode 100644
index 000000000..4a44a57bc
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.cc
@@ -0,0 +1,1586 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "Replayer.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "include/stringify.h"
+#include "common/Timer.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "json_spirit/json_spirit.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/Operations.h"
+#include "librbd/Utils.h"
+#include "librbd/asio/ContextWQ.h"
+#include "librbd/deep_copy/Handler.h"
+#include "librbd/deep_copy/ImageCopyRequest.h"
+#include "librbd/deep_copy/SnapshotCopyRequest.h"
+#include "librbd/mirror/ImageStateUpdateRequest.h"
+#include "librbd/mirror/snapshot/CreateNonPrimaryRequest.h"
+#include "librbd/mirror/snapshot/GetImageStateRequest.h"
+#include "librbd/mirror/snapshot/ImageMeta.h"
+#include "librbd/mirror/snapshot/UnlinkPeerRequest.h"
+#include "tools/rbd_mirror/InstanceWatcher.h"
+#include "tools/rbd_mirror/PoolMetaCache.h"
+#include "tools/rbd_mirror/Threads.h"
+#include "tools/rbd_mirror/Types.h"
+#include "tools/rbd_mirror/image_replayer/CloseImageRequest.h"
+#include "tools/rbd_mirror/image_replayer/ReplayerListener.h"
+#include "tools/rbd_mirror/image_replayer/Utils.h"
+#include "tools/rbd_mirror/image_replayer/snapshot/ApplyImageStateRequest.h"
+#include "tools/rbd_mirror/image_replayer/snapshot/StateBuilder.h"
+#include "tools/rbd_mirror/image_replayer/snapshot/Utils.h"
+#include <set>
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::snapshot::" \
+ << "Replayer: " << this << " " << __func__ << ": "
+
+extern PerfCounters *g_snapshot_perf_counters;
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+namespace snapshot {
+
+namespace {
+
+double round_to_two_places(double value) {
+ return abs(round(value * 100) / 100);
+}
+
+template<typename I>
+std::pair<uint64_t, librbd::SnapInfo*> get_newest_mirror_snapshot(
+ I* image_ctx) {
+ for (auto snap_info_it = image_ctx->snap_info.rbegin();
+ snap_info_it != image_ctx->snap_info.rend(); ++snap_info_it) {
+ const auto& snap_ns = snap_info_it->second.snap_namespace;
+ auto mirror_ns = boost::get<
+ cls::rbd::MirrorSnapshotNamespace>(&snap_ns);
+ if (mirror_ns == nullptr || !mirror_ns->complete) {
+ continue;
+ }
+
+ return {snap_info_it->first, &snap_info_it->second};
+ }
+
+ return {CEPH_NOSNAP, nullptr};
+}
+
+} // anonymous namespace
+
+using librbd::util::create_async_context_callback;
+using librbd::util::create_context_callback;
+using librbd::util::create_rados_callback;
+
+template <typename I>
+struct Replayer<I>::C_UpdateWatchCtx : public librbd::UpdateWatchCtx {
+ Replayer<I>* replayer;
+
+ C_UpdateWatchCtx(Replayer<I>* replayer) : replayer(replayer) {
+ }
+
+ void handle_notify() override {
+ replayer->handle_image_update_notify();
+ }
+};
+
+template <typename I>
+struct Replayer<I>::DeepCopyHandler : public librbd::deep_copy::Handler {
+ Replayer *replayer;
+
+ DeepCopyHandler(Replayer* replayer) : replayer(replayer) {
+ }
+
+ void handle_read(uint64_t bytes_read) override {
+ replayer->handle_copy_image_read(bytes_read);
+ }
+
+ int update_progress(uint64_t object_number, uint64_t object_count) override {
+ replayer->handle_copy_image_progress(object_number, object_count);
+ return 0;
+ }
+};
+
+template <typename I>
+Replayer<I>::Replayer(
+ Threads<I>* threads,
+ InstanceWatcher<I>* instance_watcher,
+ const std::string& local_mirror_uuid,
+ PoolMetaCache* pool_meta_cache,
+ StateBuilder<I>* state_builder,
+ ReplayerListener* replayer_listener)
+ : m_threads(threads),
+ m_instance_watcher(instance_watcher),
+ m_local_mirror_uuid(local_mirror_uuid),
+ m_pool_meta_cache(pool_meta_cache),
+ m_state_builder(state_builder),
+ m_replayer_listener(replayer_listener),
+ m_lock(ceph::make_mutex(librbd::util::unique_lock_name(
+ "rbd::mirror::image_replayer::snapshot::Replayer", this))) {
+ dout(10) << dendl;
+}
+
+template <typename I>
+Replayer<I>::~Replayer() {
+ dout(10) << dendl;
+
+ {
+ std::unique_lock locker{m_lock};
+ unregister_perf_counters();
+ }
+
+ ceph_assert(m_state == STATE_COMPLETE);
+ ceph_assert(m_update_watch_ctx == nullptr);
+ ceph_assert(m_deep_copy_handler == nullptr);
+}
+
+template <typename I>
+void Replayer<I>::init(Context* on_finish) {
+ dout(10) << dendl;
+
+ ceph_assert(m_state == STATE_INIT);
+
+ RemotePoolMeta remote_pool_meta;
+ int r = m_pool_meta_cache->get_remote_pool_meta(
+ m_state_builder->remote_image_ctx->md_ctx.get_id(), &remote_pool_meta);
+ if (r < 0 || remote_pool_meta.mirror_peer_uuid.empty()) {
+ derr << "failed to retrieve mirror peer uuid from remote pool" << dendl;
+ m_state = STATE_COMPLETE;
+ m_threads->work_queue->queue(on_finish, r);
+ return;
+ }
+
+ m_remote_mirror_peer_uuid = remote_pool_meta.mirror_peer_uuid;
+ dout(10) << "remote_mirror_peer_uuid=" << m_remote_mirror_peer_uuid << dendl;
+
+ {
+ auto local_image_ctx = m_state_builder->local_image_ctx;
+ std::shared_lock image_locker{local_image_ctx->image_lock};
+ m_image_spec = image_replayer::util::compute_image_spec(
+ local_image_ctx->md_ctx, local_image_ctx->name);
+ }
+
+ {
+ std::unique_lock locker{m_lock};
+ register_perf_counters();
+ }
+
+ ceph_assert(m_on_init_shutdown == nullptr);
+ m_on_init_shutdown = on_finish;
+
+ register_local_update_watcher();
+}
+
+template <typename I>
+void Replayer<I>::shut_down(Context* on_finish) {
+ dout(10) << dendl;
+
+ std::unique_lock locker{m_lock};
+ ceph_assert(m_on_init_shutdown == nullptr);
+ m_on_init_shutdown = on_finish;
+ m_error_code = 0;
+ m_error_description = "";
+
+ ceph_assert(m_state != STATE_INIT);
+ auto state = STATE_COMPLETE;
+ std::swap(m_state, state);
+
+ if (state == STATE_REPLAYING) {
+ // if a sync request was pending, request a cancelation
+ m_instance_watcher->cancel_sync_request(
+ m_state_builder->local_image_ctx->id);
+
+ // TODO interrupt snapshot copy and image copy state machines even if remote
+ // cluster is unreachable
+ dout(10) << "shut down pending on completion of snapshot replay" << dendl;
+ return;
+ }
+ locker.unlock();
+
+ unregister_remote_update_watcher();
+}
+
+template <typename I>
+void Replayer<I>::flush(Context* on_finish) {
+ dout(10) << dendl;
+
+ // TODO
+ m_threads->work_queue->queue(on_finish, 0);
+}
+
+template <typename I>
+bool Replayer<I>::get_replay_status(std::string* description,
+ Context* on_finish) {
+ dout(10) << dendl;
+
+ std::unique_lock locker{m_lock};
+ if (m_state != STATE_REPLAYING && m_state != STATE_IDLE) {
+ locker.unlock();
+
+ derr << "replay not running" << dendl;
+ on_finish->complete(-EAGAIN);
+ return false;
+ }
+
+ std::shared_lock local_image_locker{
+ m_state_builder->local_image_ctx->image_lock};
+ auto [local_snap_id, local_snap_info] = get_newest_mirror_snapshot(
+ m_state_builder->local_image_ctx);
+
+ std::shared_lock remote_image_locker{
+ m_state_builder->remote_image_ctx->image_lock};
+ auto [remote_snap_id, remote_snap_info] = get_newest_mirror_snapshot(
+ m_state_builder->remote_image_ctx);
+
+ if (remote_snap_info == nullptr) {
+ remote_image_locker.unlock();
+ local_image_locker.unlock();
+ locker.unlock();
+
+ derr << "remote image does not contain mirror snapshots" << dendl;
+ on_finish->complete(-EAGAIN);
+ return false;
+ }
+
+ std::string replay_state = "idle";
+ if (m_remote_snap_id_end != CEPH_NOSNAP) {
+ replay_state = "syncing";
+ }
+
+ json_spirit::mObject root_obj;
+ root_obj["replay_state"] = replay_state;
+ root_obj["remote_snapshot_timestamp"] = remote_snap_info->timestamp.sec();
+
+ auto matching_remote_snap_id = util::compute_remote_snap_id(
+ m_state_builder->local_image_ctx->image_lock,
+ m_state_builder->local_image_ctx->snap_info,
+ local_snap_id, m_state_builder->remote_mirror_uuid);
+ auto matching_remote_snap_it =
+ m_state_builder->remote_image_ctx->snap_info.find(matching_remote_snap_id);
+ if (matching_remote_snap_id != CEPH_NOSNAP &&
+ matching_remote_snap_it !=
+ m_state_builder->remote_image_ctx->snap_info.end()) {
+ // use the timestamp from the matching remote image since
+ // the local snapshot would just be the time the snapshot was
+ // synced and not the consistency point in time.
+ root_obj["local_snapshot_timestamp"] =
+ matching_remote_snap_it->second.timestamp.sec();
+ }
+
+ matching_remote_snap_it = m_state_builder->remote_image_ctx->snap_info.find(
+ m_remote_snap_id_end);
+ if (m_remote_snap_id_end != CEPH_NOSNAP &&
+ matching_remote_snap_it !=
+ m_state_builder->remote_image_ctx->snap_info.end()) {
+ root_obj["syncing_snapshot_timestamp"] = remote_snap_info->timestamp.sec();
+ root_obj["syncing_percent"] = static_cast<uint64_t>(
+ 100 * m_local_mirror_snap_ns.last_copied_object_number /
+ static_cast<float>(std::max<uint64_t>(1U, m_local_object_count)));
+ }
+
+ m_bytes_per_second(0);
+ auto bytes_per_second = m_bytes_per_second.get_average();
+ root_obj["bytes_per_second"] = round_to_two_places(bytes_per_second);
+
+ auto bytes_per_snapshot = boost::accumulators::rolling_mean(
+ m_bytes_per_snapshot);
+ root_obj["bytes_per_snapshot"] = round_to_two_places(bytes_per_snapshot);
+
+ auto pending_bytes = bytes_per_snapshot * m_pending_snapshots;
+ if (bytes_per_second > 0 && m_pending_snapshots > 0) {
+ std::uint64_t seconds_until_synced = round_to_two_places(
+ pending_bytes / bytes_per_second);
+ if (seconds_until_synced >= std::numeric_limits<uint64_t>::max()) {
+ seconds_until_synced = std::numeric_limits<uint64_t>::max();
+ }
+
+ root_obj["seconds_until_synced"] = seconds_until_synced;
+ }
+
+ *description = json_spirit::write(
+ root_obj, json_spirit::remove_trailing_zeros);
+
+ local_image_locker.unlock();
+ remote_image_locker.unlock();
+ locker.unlock();
+ on_finish->complete(-EEXIST);
+ return true;
+}
+
+template <typename I>
+void Replayer<I>::load_local_image_meta() {
+ dout(10) << dendl;
+
+ {
+ // reset state in case new snapshot is added while we are scanning
+ std::unique_lock locker{m_lock};
+ m_image_updated = false;
+ }
+
+ bool update_status = false;
+ {
+ auto local_image_ctx = m_state_builder->local_image_ctx;
+ std::shared_lock image_locker{local_image_ctx->image_lock};
+ auto image_spec = image_replayer::util::compute_image_spec(
+ local_image_ctx->md_ctx, local_image_ctx->name);
+ if (m_image_spec != image_spec) {
+ m_image_spec = image_spec;
+ update_status = true;
+ }
+ }
+ if (update_status) {
+ std::unique_lock locker{m_lock};
+ unregister_perf_counters();
+ register_perf_counters();
+ notify_status_updated();
+ }
+
+ ceph_assert(m_state_builder->local_image_meta != nullptr);
+ auto ctx = create_context_callback<
+ Replayer<I>, &Replayer<I>::handle_load_local_image_meta>(this);
+ m_state_builder->local_image_meta->load(ctx);
+}
+
+template <typename I>
+void Replayer<I>::handle_load_local_image_meta(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0 && r != -ENOENT) {
+ derr << "failed to load local image-meta: " << cpp_strerror(r) << dendl;
+ handle_replay_complete(r, "failed to load local image-meta");
+ return;
+ }
+
+ if (r >= 0 && m_state_builder->local_image_meta->resync_requested) {
+ m_resync_requested = true;
+
+ dout(10) << "local image resync requested" << dendl;
+ handle_replay_complete(0, "resync requested");
+ return;
+ }
+
+ refresh_local_image();
+}
+
+template <typename I>
+void Replayer<I>::refresh_local_image() {
+ if (!m_state_builder->local_image_ctx->state->is_refresh_required()) {
+ refresh_remote_image();
+ return;
+ }
+
+ dout(10) << dendl;
+ auto ctx = create_context_callback<
+ Replayer<I>, &Replayer<I>::handle_refresh_local_image>(this);
+ m_state_builder->local_image_ctx->state->refresh(ctx);
+}
+
+template <typename I>
+void Replayer<I>::handle_refresh_local_image(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to refresh local image: " << cpp_strerror(r) << dendl;
+ handle_replay_complete(r, "failed to refresh local image");
+ return;
+ }
+
+ refresh_remote_image();
+}
+
+template <typename I>
+void Replayer<I>::refresh_remote_image() {
+ if (!m_state_builder->remote_image_ctx->state->is_refresh_required()) {
+ std::unique_lock locker{m_lock};
+ scan_local_mirror_snapshots(&locker);
+ return;
+ }
+
+ dout(10) << dendl;
+ auto ctx = create_context_callback<
+ Replayer<I>, &Replayer<I>::handle_refresh_remote_image>(this);
+ m_state_builder->remote_image_ctx->state->refresh(ctx);
+}
+
+template <typename I>
+void Replayer<I>::handle_refresh_remote_image(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to refresh remote image: " << cpp_strerror(r) << dendl;
+ handle_replay_complete(r, "failed to refresh remote image");
+ return;
+ }
+
+ std::unique_lock locker{m_lock};
+ scan_local_mirror_snapshots(&locker);
+}
+
+template <typename I>
+void Replayer<I>::scan_local_mirror_snapshots(
+ std::unique_lock<ceph::mutex>* locker) {
+ if (is_replay_interrupted(locker)) {
+ return;
+ }
+
+ dout(10) << dendl;
+
+ m_local_snap_id_start = 0;
+ m_local_snap_id_end = CEPH_NOSNAP;
+ m_local_mirror_snap_ns = {};
+ m_local_object_count = 0;
+
+ m_remote_snap_id_start = 0;
+ m_remote_snap_id_end = CEPH_NOSNAP;
+ m_remote_mirror_snap_ns = {};
+
+ std::set<uint64_t> prune_snap_ids;
+
+ auto local_image_ctx = m_state_builder->local_image_ctx;
+ std::shared_lock image_locker{local_image_ctx->image_lock};
+ for (auto snap_info_it = local_image_ctx->snap_info.begin();
+ snap_info_it != local_image_ctx->snap_info.end(); ++snap_info_it) {
+ const auto& snap_ns = snap_info_it->second.snap_namespace;
+ auto mirror_ns = boost::get<
+ cls::rbd::MirrorSnapshotNamespace>(&snap_ns);
+ if (mirror_ns == nullptr) {
+ continue;
+ }
+
+ dout(15) << "local mirror snapshot: id=" << snap_info_it->first << ", "
+ << "mirror_ns=" << *mirror_ns << dendl;
+ m_local_mirror_snap_ns = *mirror_ns;
+
+ auto local_snap_id = snap_info_it->first;
+ if (mirror_ns->is_non_primary()) {
+ if (mirror_ns->complete) {
+ // if remote has new snapshots, we would sync from here
+ m_local_snap_id_start = local_snap_id;
+ ceph_assert(m_local_snap_id_end == CEPH_NOSNAP);
+
+ if (mirror_ns->mirror_peer_uuids.empty()) {
+ // no other peer will attempt to sync to this snapshot so store as
+ // a candidate for removal
+ prune_snap_ids.insert(local_snap_id);
+ }
+ } else if (mirror_ns->last_copied_object_number == 0 &&
+ m_local_snap_id_start > 0) {
+ // snapshot might be missing image state, object-map, etc, so just
+ // delete and re-create it if we haven't started copying data
+ // objects. Also only prune this snapshot since we will need the
+ // previous mirror snapshot for syncing. Special case exception for
+ // the first non-primary snapshot since we know its snapshot is
+ // well-formed because otherwise the mirror-image-state would have
+ // forced an image deletion.
+ prune_snap_ids.clear();
+ prune_snap_ids.insert(local_snap_id);
+ break;
+ } else {
+ // start snap will be last complete mirror snapshot or initial
+ // image revision
+ m_local_snap_id_end = local_snap_id;
+ break;
+ }
+ } else if (mirror_ns->is_primary()) {
+ if (mirror_ns->complete) {
+ m_local_snap_id_start = local_snap_id;
+ ceph_assert(m_local_snap_id_end == CEPH_NOSNAP);
+ } else {
+ derr << "incomplete local primary snapshot" << dendl;
+ handle_replay_complete(locker, -EINVAL,
+ "incomplete local primary snapshot");
+ return;
+ }
+ } else {
+ derr << "unknown local mirror snapshot state" << dendl;
+ handle_replay_complete(locker, -EINVAL,
+ "invalid local mirror snapshot state");
+ return;
+ }
+ }
+ image_locker.unlock();
+
+ if (m_local_snap_id_start > 0) {
+ // remove candidate that is required for delta snapshot sync
+ prune_snap_ids.erase(m_local_snap_id_start);
+ }
+ if (!prune_snap_ids.empty()) {
+ locker->unlock();
+
+ auto prune_snap_id = *prune_snap_ids.begin();
+ dout(5) << "pruning unused non-primary snapshot " << prune_snap_id << dendl;
+ prune_non_primary_snapshot(prune_snap_id);
+ return;
+ }
+
+ if (m_local_snap_id_start > 0 || m_local_snap_id_end != CEPH_NOSNAP) {
+ if (m_local_mirror_snap_ns.is_non_primary() &&
+ m_local_mirror_snap_ns.primary_mirror_uuid !=
+ m_state_builder->remote_mirror_uuid) {
+ // TODO support multiple peers
+ derr << "local image linked to unknown peer: "
+ << m_local_mirror_snap_ns.primary_mirror_uuid << dendl;
+ handle_replay_complete(locker, -EEXIST,
+ "local image linked to unknown peer");
+ return;
+ } else if (m_local_mirror_snap_ns.state ==
+ cls::rbd::MIRROR_SNAPSHOT_STATE_PRIMARY) {
+ dout(5) << "local image promoted" << dendl;
+ handle_replay_complete(locker, 0, "force promoted");
+ return;
+ }
+
+ dout(10) << "found local mirror snapshot: "
+ << "local_snap_id_start=" << m_local_snap_id_start << ", "
+ << "local_snap_id_end=" << m_local_snap_id_end << ", "
+ << "local_snap_ns=" << m_local_mirror_snap_ns << dendl;
+ if (!m_local_mirror_snap_ns.is_primary() &&
+ m_local_mirror_snap_ns.complete) {
+ // our remote sync should start after this completed snapshot
+ m_remote_snap_id_start = m_local_mirror_snap_ns.primary_snap_id;
+ }
+ }
+
+ // we don't have any mirror snapshots or only completed non-primary
+ // mirror snapshots
+ scan_remote_mirror_snapshots(locker);
+}
+
+template <typename I>
+void Replayer<I>::scan_remote_mirror_snapshots(
+ std::unique_lock<ceph::mutex>* locker) {
+ dout(10) << dendl;
+
+ m_pending_snapshots = 0;
+
+ std::set<uint64_t> unlink_snap_ids;
+ bool split_brain = false;
+ bool remote_demoted = false;
+ auto remote_image_ctx = m_state_builder->remote_image_ctx;
+ std::shared_lock image_locker{remote_image_ctx->image_lock};
+ for (auto snap_info_it = remote_image_ctx->snap_info.begin();
+ snap_info_it != remote_image_ctx->snap_info.end(); ++snap_info_it) {
+ const auto& snap_ns = snap_info_it->second.snap_namespace;
+ auto mirror_ns = boost::get<
+ cls::rbd::MirrorSnapshotNamespace>(&snap_ns);
+ if (mirror_ns == nullptr) {
+ continue;
+ }
+
+ dout(15) << "remote mirror snapshot: id=" << snap_info_it->first << ", "
+ << "mirror_ns=" << *mirror_ns << dendl;
+ remote_demoted = mirror_ns->is_demoted();
+ if (!mirror_ns->is_primary() && !mirror_ns->is_non_primary()) {
+ derr << "unknown remote mirror snapshot state" << dendl;
+ handle_replay_complete(locker, -EINVAL,
+ "invalid remote mirror snapshot state");
+ return;
+ } else if (mirror_ns->mirror_peer_uuids.count(m_remote_mirror_peer_uuid) ==
+ 0) {
+ dout(15) << "skipping remote snapshot due to missing mirror peer"
+ << dendl;
+ continue;
+ }
+
+ auto remote_snap_id = snap_info_it->first;
+ if (m_local_snap_id_start > 0 || m_local_snap_id_end != CEPH_NOSNAP) {
+ // we have a local mirror snapshot
+ if (m_local_mirror_snap_ns.is_non_primary()) {
+ // previously validated that it was linked to remote
+ ceph_assert(m_local_mirror_snap_ns.primary_mirror_uuid ==
+ m_state_builder->remote_mirror_uuid);
+
+ if (m_remote_snap_id_end == CEPH_NOSNAP) {
+ // haven't found the end snap so treat this as a candidate for unlink
+ unlink_snap_ids.insert(remote_snap_id);
+ }
+ if (m_local_mirror_snap_ns.complete &&
+ m_local_mirror_snap_ns.primary_snap_id >= remote_snap_id) {
+ // skip past completed remote snapshot
+ m_remote_snap_id_start = remote_snap_id;
+ m_remote_mirror_snap_ns = *mirror_ns;
+ dout(15) << "skipping synced remote snapshot " << remote_snap_id
+ << dendl;
+ continue;
+ } else if (!m_local_mirror_snap_ns.complete &&
+ m_local_mirror_snap_ns.primary_snap_id > remote_snap_id) {
+ // skip until we get to the in-progress remote snapshot
+ dout(15) << "skipping synced remote snapshot " << remote_snap_id
+ << " while search for in-progress sync" << dendl;
+ m_remote_snap_id_start = remote_snap_id;
+ m_remote_mirror_snap_ns = *mirror_ns;
+ continue;
+ }
+ } else if (m_local_mirror_snap_ns.state ==
+ cls::rbd::MIRROR_SNAPSHOT_STATE_PRIMARY_DEMOTED) {
+ // find the matching demotion snapshot in remote image
+ ceph_assert(m_local_snap_id_start > 0);
+ if (mirror_ns->state ==
+ cls::rbd::MIRROR_SNAPSHOT_STATE_NON_PRIMARY_DEMOTED &&
+ mirror_ns->primary_mirror_uuid == m_local_mirror_uuid &&
+ mirror_ns->primary_snap_id == m_local_snap_id_start) {
+ dout(10) << "located matching demotion snapshot: "
+ << "remote_snap_id=" << remote_snap_id << ", "
+ << "local_snap_id=" << m_local_snap_id_start << dendl;
+ m_remote_snap_id_start = remote_snap_id;
+ split_brain = false;
+ continue;
+ } else if (m_remote_snap_id_start == 0) {
+ // still looking for our matching demotion snapshot
+ dout(15) << "skipping remote snapshot " << remote_snap_id << " "
+ << "while searching for demotion" << dendl;
+ split_brain = true;
+ continue;
+ }
+ } else {
+ // should not have been able to reach this
+ ceph_assert(false);
+ }
+ } else if (!mirror_ns->is_primary()) {
+ dout(15) << "skipping non-primary remote snapshot" << dendl;
+ continue;
+ }
+
+ // found candidate snapshot to sync
+ ++m_pending_snapshots;
+ if (m_remote_snap_id_end != CEPH_NOSNAP) {
+ continue;
+ }
+
+ // first primary snapshot where were are listed as a peer
+ m_remote_snap_id_end = remote_snap_id;
+ m_remote_mirror_snap_ns = *mirror_ns;
+ }
+
+ if (m_remote_snap_id_start != 0 &&
+ remote_image_ctx->snap_info.count(m_remote_snap_id_start) == 0) {
+ // the remote start snapshot was deleted out from under us
+ derr << "failed to locate remote start snapshot: "
+ << "snap_id=" << m_remote_snap_id_start << dendl;
+ split_brain = true;
+ }
+
+ image_locker.unlock();
+
+ if (!split_brain) {
+ unlink_snap_ids.erase(m_remote_snap_id_start);
+ unlink_snap_ids.erase(m_remote_snap_id_end);
+ if (!unlink_snap_ids.empty()) {
+ locker->unlock();
+
+ // retry the unlinking process for a remote snapshot that we do not
+ // need anymore
+ auto remote_snap_id = *unlink_snap_ids.begin();
+ dout(10) << "unlinking from remote snapshot " << remote_snap_id << dendl;
+ unlink_peer(remote_snap_id);
+ return;
+ }
+
+ if (m_remote_snap_id_end != CEPH_NOSNAP) {
+ dout(10) << "found remote mirror snapshot: "
+ << "remote_snap_id_start=" << m_remote_snap_id_start << ", "
+ << "remote_snap_id_end=" << m_remote_snap_id_end << ", "
+ << "remote_snap_ns=" << m_remote_mirror_snap_ns << dendl;
+ if (m_remote_mirror_snap_ns.complete) {
+ locker->unlock();
+
+ if (m_local_snap_id_end != CEPH_NOSNAP &&
+ !m_local_mirror_snap_ns.complete) {
+ // attempt to resume image-sync
+ dout(10) << "local image contains in-progress mirror snapshot"
+ << dendl;
+ get_local_image_state();
+ } else {
+ copy_snapshots();
+ }
+ return;
+ } else {
+ // might have raced with the creation of a remote mirror snapshot
+ // so we will need to refresh and rescan once it completes
+ dout(15) << "remote mirror snapshot not complete" << dendl;
+ }
+ }
+ }
+
+ if (m_image_updated) {
+ // received update notification while scanning image, restart ...
+ m_image_updated = false;
+ locker->unlock();
+
+ dout(10) << "restarting snapshot scan due to remote update notification"
+ << dendl;
+ load_local_image_meta();
+ return;
+ }
+
+ if (is_replay_interrupted(locker)) {
+ return;
+ } else if (split_brain) {
+ derr << "split-brain detected: failed to find matching non-primary "
+ << "snapshot in remote image: "
+ << "local_snap_id_start=" << m_local_snap_id_start << ", "
+ << "local_snap_ns=" << m_local_mirror_snap_ns << dendl;
+ handle_replay_complete(locker, -EEXIST, "split-brain");
+ return;
+ } else if (remote_demoted) {
+ dout(10) << "remote image demoted" << dendl;
+ handle_replay_complete(locker, -EREMOTEIO, "remote image demoted");
+ return;
+ }
+
+ dout(10) << "all remote snapshots synced: idling waiting for new snapshot"
+ << dendl;
+ ceph_assert(m_state == STATE_REPLAYING);
+ m_state = STATE_IDLE;
+
+ notify_status_updated();
+}
+
+template <typename I>
+void Replayer<I>::prune_non_primary_snapshot(uint64_t snap_id) {
+ dout(10) << "snap_id=" << snap_id << dendl;
+
+ auto local_image_ctx = m_state_builder->local_image_ctx;
+ bool snap_valid = false;
+ cls::rbd::SnapshotNamespace snap_namespace;
+ std::string snap_name;
+
+ {
+ std::shared_lock image_locker{local_image_ctx->image_lock};
+ auto snap_info = local_image_ctx->get_snap_info(snap_id);
+ if (snap_info != nullptr) {
+ snap_valid = true;
+ snap_namespace = snap_info->snap_namespace;
+ snap_name = snap_info->name;
+
+ ceph_assert(boost::get<cls::rbd::MirrorSnapshotNamespace>(
+ &snap_namespace) != nullptr);
+ }
+ }
+
+ if (!snap_valid) {
+ load_local_image_meta();
+ return;
+ }
+
+ auto ctx = create_context_callback<
+ Replayer<I>, &Replayer<I>::handle_prune_non_primary_snapshot>(this);
+ local_image_ctx->operations->snap_remove(snap_namespace, snap_name, ctx);
+}
+
+template <typename I>
+void Replayer<I>::handle_prune_non_primary_snapshot(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0 && r != -ENOENT) {
+ derr << "failed to prune non-primary snapshot: " << cpp_strerror(r)
+ << dendl;
+ handle_replay_complete(r, "failed to prune non-primary snapshot");
+ return;
+ }
+
+ if (is_replay_interrupted()) {
+ return;
+ }
+
+ load_local_image_meta();
+}
+
+template <typename I>
+void Replayer<I>::copy_snapshots() {
+ dout(10) << "remote_snap_id_start=" << m_remote_snap_id_start << ", "
+ << "remote_snap_id_end=" << m_remote_snap_id_end << ", "
+ << "local_snap_id_start=" << m_local_snap_id_start << dendl;
+
+ ceph_assert(m_remote_snap_id_start != CEPH_NOSNAP);
+ ceph_assert(m_remote_snap_id_end > 0 &&
+ m_remote_snap_id_end != CEPH_NOSNAP);
+ ceph_assert(m_local_snap_id_start != CEPH_NOSNAP);
+
+ m_local_mirror_snap_ns = {};
+ auto ctx = create_context_callback<
+ Replayer<I>, &Replayer<I>::handle_copy_snapshots>(this);
+ auto req = librbd::deep_copy::SnapshotCopyRequest<I>::create(
+ m_state_builder->remote_image_ctx, m_state_builder->local_image_ctx,
+ m_remote_snap_id_start, m_remote_snap_id_end, m_local_snap_id_start,
+ false, m_threads->work_queue, &m_local_mirror_snap_ns.snap_seqs,
+ ctx);
+ req->send();
+}
+
+template <typename I>
+void Replayer<I>::handle_copy_snapshots(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to copy snapshots from remote to local image: "
+ << cpp_strerror(r) << dendl;
+ handle_replay_complete(
+ r, "failed to copy snapshots from remote to local image");
+ return;
+ }
+
+ dout(10) << "remote_snap_id_start=" << m_remote_snap_id_start << ", "
+ << "remote_snap_id_end=" << m_remote_snap_id_end << ", "
+ << "local_snap_id_start=" << m_local_snap_id_start << ", "
+ << "snap_seqs=" << m_local_mirror_snap_ns.snap_seqs << dendl;
+ get_remote_image_state();
+}
+
+template <typename I>
+void Replayer<I>::get_remote_image_state() {
+ dout(10) << dendl;
+
+ auto ctx = create_context_callback<
+ Replayer<I>, &Replayer<I>::handle_get_remote_image_state>(this);
+ auto req = librbd::mirror::snapshot::GetImageStateRequest<I>::create(
+ m_state_builder->remote_image_ctx, m_remote_snap_id_end,
+ &m_image_state, ctx);
+ req->send();
+}
+
+template <typename I>
+void Replayer<I>::handle_get_remote_image_state(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to retrieve remote snapshot image state: "
+ << cpp_strerror(r) << dendl;
+ handle_replay_complete(r, "failed to retrieve remote snapshot image state");
+ return;
+ }
+
+ create_non_primary_snapshot();
+}
+
+template <typename I>
+void Replayer<I>::get_local_image_state() {
+ dout(10) << dendl;
+
+ ceph_assert(m_local_snap_id_end != CEPH_NOSNAP);
+ auto ctx = create_context_callback<
+ Replayer<I>, &Replayer<I>::handle_get_local_image_state>(this);
+ auto req = librbd::mirror::snapshot::GetImageStateRequest<I>::create(
+ m_state_builder->local_image_ctx, m_local_snap_id_end,
+ &m_image_state, ctx);
+ req->send();
+}
+
+template <typename I>
+void Replayer<I>::handle_get_local_image_state(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to retrieve local snapshot image state: "
+ << cpp_strerror(r) << dendl;
+ handle_replay_complete(r, "failed to retrieve local snapshot image state");
+ return;
+ }
+
+ request_sync();
+}
+
+template <typename I>
+void Replayer<I>::create_non_primary_snapshot() {
+ auto local_image_ctx = m_state_builder->local_image_ctx;
+
+ if (m_local_snap_id_start > 0) {
+ std::shared_lock local_image_locker{local_image_ctx->image_lock};
+
+ auto local_snap_info_it = local_image_ctx->snap_info.find(
+ m_local_snap_id_start);
+ if (local_snap_info_it == local_image_ctx->snap_info.end()) {
+ local_image_locker.unlock();
+
+ derr << "failed to locate local snapshot " << m_local_snap_id_start
+ << dendl;
+ handle_replay_complete(-ENOENT, "failed to locate local start snapshot");
+ return;
+ }
+
+ auto mirror_ns = boost::get<cls::rbd::MirrorSnapshotNamespace>(
+ &local_snap_info_it->second.snap_namespace);
+ ceph_assert(mirror_ns != nullptr);
+
+ auto remote_image_ctx = m_state_builder->remote_image_ctx;
+ std::shared_lock remote_image_locker{remote_image_ctx->image_lock};
+
+ // (re)build a full mapping from remote to local snap ids for all user
+ // snapshots to support applying image state in the future
+ for (auto& [remote_snap_id, remote_snap_info] :
+ remote_image_ctx->snap_info) {
+ if (remote_snap_id >= m_remote_snap_id_end) {
+ break;
+ }
+
+ // we can ignore all non-user snapshots since image state only includes
+ // user snapshots
+ if (boost::get<cls::rbd::UserSnapshotNamespace>(
+ &remote_snap_info.snap_namespace) == nullptr) {
+ continue;
+ }
+
+ uint64_t local_snap_id = CEPH_NOSNAP;
+ if (mirror_ns->is_demoted() && !m_remote_mirror_snap_ns.is_demoted()) {
+ // if we are creating a non-primary snapshot following a demotion,
+ // re-build the full snapshot sequence since we don't have a valid
+ // snapshot mapping
+ auto local_snap_id_it = local_image_ctx->snap_ids.find(
+ {remote_snap_info.snap_namespace, remote_snap_info.name});
+ if (local_snap_id_it != local_image_ctx->snap_ids.end()) {
+ local_snap_id = local_snap_id_it->second;
+ }
+ } else {
+ auto snap_seq_it = mirror_ns->snap_seqs.find(remote_snap_id);
+ if (snap_seq_it != mirror_ns->snap_seqs.end()) {
+ local_snap_id = snap_seq_it->second;
+ }
+ }
+
+ if (m_local_mirror_snap_ns.snap_seqs.count(remote_snap_id) == 0 &&
+ local_snap_id != CEPH_NOSNAP) {
+ dout(15) << "mapping remote snapshot " << remote_snap_id << " to "
+ << "local snapshot " << local_snap_id << dendl;
+ m_local_mirror_snap_ns.snap_seqs[remote_snap_id] = local_snap_id;
+ }
+ }
+ }
+
+ dout(10) << "demoted=" << m_remote_mirror_snap_ns.is_demoted() << ", "
+ << "primary_mirror_uuid="
+ << m_state_builder->remote_mirror_uuid << ", "
+ << "primary_snap_id=" << m_remote_snap_id_end << ", "
+ << "snap_seqs=" << m_local_mirror_snap_ns.snap_seqs << dendl;
+
+ auto ctx = create_context_callback<
+ Replayer<I>, &Replayer<I>::handle_create_non_primary_snapshot>(this);
+ auto req = librbd::mirror::snapshot::CreateNonPrimaryRequest<I>::create(
+ local_image_ctx, m_remote_mirror_snap_ns.is_demoted(),
+ m_state_builder->remote_mirror_uuid, m_remote_snap_id_end,
+ m_local_mirror_snap_ns.snap_seqs, m_image_state, &m_local_snap_id_end, ctx);
+ req->send();
+}
+
+template <typename I>
+void Replayer<I>::handle_create_non_primary_snapshot(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to create local mirror snapshot: " << cpp_strerror(r)
+ << dendl;
+ handle_replay_complete(r, "failed to create local mirror snapshot");
+ return;
+ }
+
+ dout(15) << "local_snap_id_end=" << m_local_snap_id_end << dendl;
+
+ update_mirror_image_state();
+}
+
+template <typename I>
+void Replayer<I>::update_mirror_image_state() {
+ if (m_local_snap_id_start > 0) {
+ request_sync();
+ return;
+ }
+
+ // a newly created non-primary image has a local mirror state of CREATING
+ // until this point so that we could avoid preserving the image until
+ // the first non-primary snapshot linked the two images together.
+ dout(10) << dendl;
+ auto ctx = create_context_callback<
+ Replayer<I>, &Replayer<I>::handle_update_mirror_image_state>(this);
+ auto req = librbd::mirror::ImageStateUpdateRequest<I>::create(
+ m_state_builder->local_image_ctx->md_ctx,
+ m_state_builder->local_image_ctx->id,
+ cls::rbd::MIRROR_IMAGE_STATE_ENABLED, {}, ctx);
+ req->send();
+}
+
+template <typename I>
+void Replayer<I>::handle_update_mirror_image_state(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to update local mirror image state: " << cpp_strerror(r)
+ << dendl;
+ handle_replay_complete(r, "failed to update local mirror image state");
+ return;
+ }
+
+ request_sync();
+}
+
+template <typename I>
+void Replayer<I>::request_sync() {
+ if (m_remote_mirror_snap_ns.clean_since_snap_id == m_remote_snap_id_start) {
+ dout(10) << "skipping unnecessary image copy: "
+ << "remote_snap_id_start=" << m_remote_snap_id_start << ", "
+ << "remote_mirror_snap_ns=" << m_remote_mirror_snap_ns << dendl;
+ apply_image_state();
+ return;
+ }
+
+ dout(10) << dendl;
+ std::unique_lock locker{m_lock};
+ if (is_replay_interrupted(&locker)) {
+ return;
+ }
+
+ auto ctx = create_async_context_callback(
+ m_threads->work_queue, create_context_callback<
+ Replayer<I>, &Replayer<I>::handle_request_sync>(this));
+ m_instance_watcher->notify_sync_request(m_state_builder->local_image_ctx->id,
+ ctx);
+}
+
+template <typename I>
+void Replayer<I>::handle_request_sync(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ std::unique_lock locker{m_lock};
+ if (is_replay_interrupted(&locker)) {
+ return;
+ } else if (r == -ECANCELED) {
+ dout(5) << "image-sync canceled" << dendl;
+ handle_replay_complete(&locker, r, "image-sync canceled");
+ return;
+ } else if (r < 0) {
+ derr << "failed to request image-sync: " << cpp_strerror(r) << dendl;
+ handle_replay_complete(&locker, r, "failed to request image-sync");
+ return;
+ }
+
+ m_sync_in_progress = true;
+ locker.unlock();
+
+ copy_image();
+}
+
+template <typename I>
+void Replayer<I>::copy_image() {
+ dout(10) << "remote_snap_id_start=" << m_remote_snap_id_start << ", "
+ << "remote_snap_id_end=" << m_remote_snap_id_end << ", "
+ << "local_snap_id_start=" << m_local_snap_id_start << ", "
+ << "last_copied_object_number="
+ << m_local_mirror_snap_ns.last_copied_object_number << ", "
+ << "snap_seqs=" << m_local_mirror_snap_ns.snap_seqs << dendl;
+
+ m_snapshot_bytes = 0;
+ m_snapshot_replay_start = ceph_clock_now();
+ m_deep_copy_handler = new DeepCopyHandler(this);
+ auto ctx = create_context_callback<
+ Replayer<I>, &Replayer<I>::handle_copy_image>(this);
+ auto req = librbd::deep_copy::ImageCopyRequest<I>::create(
+ m_state_builder->remote_image_ctx, m_state_builder->local_image_ctx,
+ m_remote_snap_id_start, m_remote_snap_id_end, m_local_snap_id_start, false,
+ (m_local_mirror_snap_ns.last_copied_object_number > 0 ?
+ librbd::deep_copy::ObjectNumber{
+ m_local_mirror_snap_ns.last_copied_object_number} :
+ librbd::deep_copy::ObjectNumber{}),
+ m_local_mirror_snap_ns.snap_seqs, m_deep_copy_handler, ctx);
+ req->send();
+}
+
+template <typename I>
+void Replayer<I>::handle_copy_image(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ delete m_deep_copy_handler;
+ m_deep_copy_handler = nullptr;
+
+ if (r < 0) {
+ derr << "failed to copy remote image to local image: " << cpp_strerror(r)
+ << dendl;
+ handle_replay_complete(r, "failed to copy remote image");
+ return;
+ }
+
+ {
+ std::unique_lock locker{m_lock};
+ m_bytes_per_snapshot(m_snapshot_bytes);
+ auto time = ceph_clock_now() - m_snapshot_replay_start;
+ if (g_snapshot_perf_counters) {
+ g_snapshot_perf_counters->inc(l_rbd_mirror_snapshot_replay_bytes,
+ m_snapshot_bytes);
+ g_snapshot_perf_counters->inc(l_rbd_mirror_snapshot_replay_snapshots);
+ g_snapshot_perf_counters->tinc(
+ l_rbd_mirror_snapshot_replay_snapshots_time, time);
+ }
+ if (m_perf_counters) {
+ m_perf_counters->inc(l_rbd_mirror_snapshot_replay_bytes, m_snapshot_bytes);
+ m_perf_counters->inc(l_rbd_mirror_snapshot_replay_snapshots);
+ m_perf_counters->tinc(l_rbd_mirror_snapshot_replay_snapshots_time, time);
+ }
+ m_snapshot_bytes = 0;
+ }
+
+ apply_image_state();
+}
+
+template <typename I>
+void Replayer<I>::handle_copy_image_progress(uint64_t object_number,
+ uint64_t object_count) {
+ dout(10) << "object_number=" << object_number << ", "
+ << "object_count=" << object_count << dendl;
+
+ std::unique_lock locker{m_lock};
+ m_local_mirror_snap_ns.last_copied_object_number = std::min(
+ object_number, object_count);
+ m_local_object_count = object_count;
+
+ update_non_primary_snapshot(false);
+}
+
+template <typename I>
+void Replayer<I>::handle_copy_image_read(uint64_t bytes_read) {
+ dout(20) << "bytes_read=" << bytes_read << dendl;
+
+ std::unique_lock locker{m_lock};
+ m_bytes_per_second(bytes_read);
+ m_snapshot_bytes += bytes_read;
+}
+
+template <typename I>
+void Replayer<I>::apply_image_state() {
+ dout(10) << dendl;
+
+ auto ctx = create_context_callback<
+ Replayer<I>, &Replayer<I>::handle_apply_image_state>(this);
+ auto req = ApplyImageStateRequest<I>::create(
+ m_local_mirror_uuid,
+ m_state_builder->remote_mirror_uuid,
+ m_state_builder->local_image_ctx,
+ m_state_builder->remote_image_ctx,
+ m_image_state, ctx);
+ req->send();
+}
+
+template <typename I>
+void Replayer<I>::handle_apply_image_state(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0 && r != -ENOENT) {
+ derr << "failed to apply remote image state to local image: "
+ << cpp_strerror(r) << dendl;
+ handle_replay_complete(r, "failed to apply remote image state");
+ return;
+ }
+
+ std::unique_lock locker{m_lock};
+ update_non_primary_snapshot(true);
+}
+
+template <typename I>
+void Replayer<I>::update_non_primary_snapshot(bool complete) {
+ ceph_assert(ceph_mutex_is_locked_by_me(m_lock));
+ if (!complete) {
+ // disallow two in-flight updates if this isn't the completion of the sync
+ if (m_updating_sync_point) {
+ return;
+ }
+ m_updating_sync_point = true;
+ } else {
+ m_local_mirror_snap_ns.complete = true;
+ }
+
+ dout(10) << dendl;
+
+ librados::ObjectWriteOperation op;
+ librbd::cls_client::mirror_image_snapshot_set_copy_progress(
+ &op, m_local_snap_id_end, m_local_mirror_snap_ns.complete,
+ m_local_mirror_snap_ns.last_copied_object_number);
+
+ auto ctx = new C_TrackedOp(
+ m_in_flight_op_tracker, new LambdaContext([this, complete](int r) {
+ handle_update_non_primary_snapshot(complete, r);
+ }));
+ auto aio_comp = create_rados_callback(ctx);
+ int r = m_state_builder->local_image_ctx->md_ctx.aio_operate(
+ m_state_builder->local_image_ctx->header_oid, aio_comp, &op);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void Replayer<I>::handle_update_non_primary_snapshot(bool complete, int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to update local snapshot progress: " << cpp_strerror(r)
+ << dendl;
+ if (complete) {
+ // only fail if this was the final update
+ handle_replay_complete(r, "failed to update local snapshot progress");
+ return;
+ }
+ }
+
+ if (!complete) {
+ // periodic sync-point update -- do not advance state machine
+ std::unique_lock locker{m_lock};
+
+ ceph_assert(m_updating_sync_point);
+ m_updating_sync_point = false;
+ return;
+ }
+
+ notify_image_update();
+}
+
+template <typename I>
+void Replayer<I>::notify_image_update() {
+ dout(10) << dendl;
+
+ auto ctx = create_context_callback<
+ Replayer<I>, &Replayer<I>::handle_notify_image_update>(this);
+ m_state_builder->local_image_ctx->notify_update(ctx);
+}
+
+template <typename I>
+void Replayer<I>::handle_notify_image_update(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to notify local image update: " << cpp_strerror(r) << dendl;
+ }
+
+ unlink_peer(m_remote_snap_id_start);
+}
+
+template <typename I>
+void Replayer<I>::unlink_peer(uint64_t remote_snap_id) {
+ if (remote_snap_id == 0) {
+ finish_sync();
+ return;
+ }
+
+ // local snapshot fully synced -- we no longer depend on the sync
+ // start snapshot in the remote image
+ dout(10) << "remote_snap_id=" << remote_snap_id << dendl;
+
+ auto ctx = create_context_callback<
+ Replayer<I>, &Replayer<I>::handle_unlink_peer>(this);
+ auto req = librbd::mirror::snapshot::UnlinkPeerRequest<I>::create(
+ m_state_builder->remote_image_ctx, remote_snap_id,
+ m_remote_mirror_peer_uuid, ctx);
+ req->send();
+}
+
+template <typename I>
+void Replayer<I>::handle_unlink_peer(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0 && r != -ENOENT) {
+ derr << "failed to unlink local peer from remote image: " << cpp_strerror(r)
+ << dendl;
+ handle_replay_complete(r, "failed to unlink local peer from remote image");
+ return;
+ }
+
+ finish_sync();
+}
+
+template <typename I>
+void Replayer<I>::finish_sync() {
+ dout(10) << dendl;
+
+ {
+ std::unique_lock locker{m_lock};
+ notify_status_updated();
+
+ if (m_sync_in_progress) {
+ m_sync_in_progress = false;
+ m_instance_watcher->notify_sync_complete(
+ m_state_builder->local_image_ctx->id);
+ }
+ }
+
+ if (is_replay_interrupted()) {
+ return;
+ }
+
+ load_local_image_meta();
+}
+
+template <typename I>
+void Replayer<I>::register_local_update_watcher() {
+ dout(10) << dendl;
+
+ m_update_watch_ctx = new C_UpdateWatchCtx(this);
+
+ int r = m_state_builder->local_image_ctx->state->register_update_watcher(
+ m_update_watch_ctx, &m_local_update_watcher_handle);
+ auto ctx = create_context_callback<
+ Replayer<I>, &Replayer<I>::handle_register_local_update_watcher>(this);
+ m_threads->work_queue->queue(ctx, r);
+}
+
+template <typename I>
+void Replayer<I>::handle_register_local_update_watcher(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to register local update watcher: " << cpp_strerror(r)
+ << dendl;
+ handle_replay_complete(r, "failed to register local image update watcher");
+ m_state = STATE_COMPLETE;
+
+ delete m_update_watch_ctx;
+ m_update_watch_ctx = nullptr;
+
+ Context* on_init = nullptr;
+ std::swap(on_init, m_on_init_shutdown);
+ on_init->complete(r);
+ return;
+ }
+
+ register_remote_update_watcher();
+}
+
+template <typename I>
+void Replayer<I>::register_remote_update_watcher() {
+ dout(10) << dendl;
+
+ int r = m_state_builder->remote_image_ctx->state->register_update_watcher(
+ m_update_watch_ctx, &m_remote_update_watcher_handle);
+ auto ctx = create_context_callback<
+ Replayer<I>, &Replayer<I>::handle_register_remote_update_watcher>(this);
+ m_threads->work_queue->queue(ctx, r);
+}
+
+template <typename I>
+void Replayer<I>::handle_register_remote_update_watcher(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to register remote update watcher: " << cpp_strerror(r)
+ << dendl;
+ handle_replay_complete(r, "failed to register remote image update watcher");
+ m_state = STATE_COMPLETE;
+
+ unregister_local_update_watcher();
+ return;
+ }
+
+ m_state = STATE_REPLAYING;
+
+ Context* on_init = nullptr;
+ std::swap(on_init, m_on_init_shutdown);
+ on_init->complete(0);
+
+ // delay initial snapshot scan until after we have alerted
+ // image replayer that we have initialized in case an error
+ // occurs
+ {
+ std::unique_lock locker{m_lock};
+ notify_status_updated();
+ }
+
+ load_local_image_meta();
+}
+
+template <typename I>
+void Replayer<I>::unregister_remote_update_watcher() {
+ dout(10) << dendl;
+
+ auto ctx = create_context_callback<
+ Replayer<I>,
+ &Replayer<I>::handle_unregister_remote_update_watcher>(this);
+ m_state_builder->remote_image_ctx->state->unregister_update_watcher(
+ m_remote_update_watcher_handle, ctx);
+}
+
+template <typename I>
+void Replayer<I>::handle_unregister_remote_update_watcher(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to unregister remote update watcher: " << cpp_strerror(r)
+ << dendl;
+ }
+
+ unregister_local_update_watcher();
+}
+
+template <typename I>
+void Replayer<I>::unregister_local_update_watcher() {
+ dout(10) << dendl;
+
+ auto ctx = create_context_callback<
+ Replayer<I>,
+ &Replayer<I>::handle_unregister_local_update_watcher>(this);
+ m_state_builder->local_image_ctx->state->unregister_update_watcher(
+ m_local_update_watcher_handle, ctx);
+}
+
+template <typename I>
+void Replayer<I>::handle_unregister_local_update_watcher(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to unregister local update watcher: " << cpp_strerror(r)
+ << dendl;
+ }
+
+ delete m_update_watch_ctx;
+ m_update_watch_ctx = nullptr;
+
+ wait_for_in_flight_ops();
+}
+
+template <typename I>
+void Replayer<I>::wait_for_in_flight_ops() {
+ dout(10) << dendl;
+
+ auto ctx = create_async_context_callback(
+ m_threads->work_queue, create_context_callback<
+ Replayer<I>, &Replayer<I>::handle_wait_for_in_flight_ops>(this));
+ m_in_flight_op_tracker.wait_for_ops(ctx);
+}
+
+template <typename I>
+void Replayer<I>::handle_wait_for_in_flight_ops(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ Context* on_shutdown = nullptr;
+ {
+ std::unique_lock locker{m_lock};
+ ceph_assert(m_on_init_shutdown != nullptr);
+ std::swap(on_shutdown, m_on_init_shutdown);
+ }
+ on_shutdown->complete(m_error_code);
+}
+
+template <typename I>
+void Replayer<I>::handle_image_update_notify() {
+ dout(10) << dendl;
+
+ std::unique_lock locker{m_lock};
+ if (m_state == STATE_REPLAYING) {
+ dout(15) << "flagging snapshot rescan required" << dendl;
+ m_image_updated = true;
+ } else if (m_state == STATE_IDLE) {
+ m_state = STATE_REPLAYING;
+ locker.unlock();
+
+ dout(15) << "restarting idle replayer" << dendl;
+ load_local_image_meta();
+ }
+}
+
+template <typename I>
+void Replayer<I>::handle_replay_complete(int r,
+ const std::string& description) {
+ std::unique_lock locker{m_lock};
+ handle_replay_complete(&locker, r, description);
+}
+
+template <typename I>
+void Replayer<I>::handle_replay_complete(std::unique_lock<ceph::mutex>* locker,
+ int r,
+ const std::string& description) {
+ ceph_assert(ceph_mutex_is_locked_by_me(m_lock));
+
+ if (m_sync_in_progress) {
+ m_sync_in_progress = false;
+ m_instance_watcher->notify_sync_complete(
+ m_state_builder->local_image_ctx->id);
+ }
+
+ // don't set error code and description if resuming a pending
+ // shutdown
+ if (is_replay_interrupted(locker)) {
+ return;
+ }
+
+ if (m_error_code == 0) {
+ m_error_code = r;
+ m_error_description = description;
+ }
+
+ if (m_state != STATE_REPLAYING && m_state != STATE_IDLE) {
+ return;
+ }
+
+ m_state = STATE_COMPLETE;
+ notify_status_updated();
+}
+
+template <typename I>
+void Replayer<I>::notify_status_updated() {
+ ceph_assert(ceph_mutex_is_locked_by_me(m_lock));
+
+ dout(10) << dendl;
+ auto ctx = new C_TrackedOp(m_in_flight_op_tracker, new LambdaContext(
+ [this](int) {
+ m_replayer_listener->handle_notification();
+ }));
+ m_threads->work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+bool Replayer<I>::is_replay_interrupted() {
+ std::unique_lock locker{m_lock};
+ return is_replay_interrupted(&locker);
+}
+
+template <typename I>
+bool Replayer<I>::is_replay_interrupted(std::unique_lock<ceph::mutex>* locker) {
+ if (m_state == STATE_COMPLETE) {
+ locker->unlock();
+
+ dout(10) << "resuming pending shut down" << dendl;
+ unregister_remote_update_watcher();
+ return true;
+ }
+ return false;
+}
+
+template <typename I>
+void Replayer<I>::register_perf_counters() {
+ dout(5) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked_by_me(m_lock));
+ ceph_assert(m_perf_counters == nullptr);
+
+ auto cct = static_cast<CephContext *>(m_state_builder->local_image_ctx->cct);
+ auto prio = cct->_conf.get_val<int64_t>("rbd_mirror_image_perf_stats_prio");
+ PerfCountersBuilder plb(g_ceph_context,
+ "rbd_mirror_snapshot_image_" + m_image_spec,
+ l_rbd_mirror_snapshot_first,
+ l_rbd_mirror_snapshot_last);
+ plb.add_u64_counter(l_rbd_mirror_snapshot_replay_snapshots,
+ "snapshots", "Snapshots", "r", prio);
+ plb.add_time_avg(l_rbd_mirror_snapshot_replay_snapshots_time,
+ "snapshots_time", "Snapshots time", "rl", prio);
+ plb.add_u64_counter(l_rbd_mirror_snapshot_replay_bytes, "replay_bytes",
+ "Replayed data", "rb", prio, unit_t(UNIT_BYTES));
+ m_perf_counters = plb.create_perf_counters();
+ g_ceph_context->get_perfcounters_collection()->add(m_perf_counters);
+}
+
+template <typename I>
+void Replayer<I>::unregister_perf_counters() {
+ dout(5) << dendl;
+ ceph_assert(ceph_mutex_is_locked_by_me(m_lock));
+
+ PerfCounters *perf_counters = nullptr;
+ std::swap(perf_counters, m_perf_counters);
+
+ if (perf_counters != nullptr) {
+ g_ceph_context->get_perfcounters_collection()->remove(perf_counters);
+ delete perf_counters;
+ }
+}
+
+} // namespace snapshot
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_replayer::snapshot::Replayer<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.h b/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.h
new file mode 100644
index 000000000..e3c4c2089
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.h
@@ -0,0 +1,346 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_REPLAYER_H
+#define RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_REPLAYER_H
+
+#include "tools/rbd_mirror/image_replayer/Replayer.h"
+#include "common/ceph_mutex.h"
+#include "common/AsyncOpTracker.h"
+#include "cls/rbd/cls_rbd_types.h"
+#include "librbd/mirror/snapshot/Types.h"
+#include "tools/rbd_mirror/image_replayer/TimeRollingMean.h"
+#include <boost/accumulators/accumulators.hpp>
+#include <boost/accumulators/statistics/stats.hpp>
+#include <boost/accumulators/statistics/rolling_mean.hpp>
+#include <string>
+#include <type_traits>
+
+namespace librbd {
+
+struct ImageCtx;
+namespace snapshot { template <typename I> class Replay; }
+
+} // namespace librbd
+
+namespace rbd {
+namespace mirror {
+
+template <typename> struct InstanceWatcher;
+class PoolMetaCache;
+template <typename> struct Threads;
+
+namespace image_replayer {
+
+struct ReplayerListener;
+
+namespace snapshot {
+
+template <typename> class EventPreprocessor;
+template <typename> class ReplayStatusFormatter;
+template <typename> class StateBuilder;
+
+template <typename ImageCtxT>
+class Replayer : public image_replayer::Replayer {
+public:
+ static Replayer* create(
+ Threads<ImageCtxT>* threads,
+ InstanceWatcher<ImageCtxT>* instance_watcher,
+ const std::string& local_mirror_uuid,
+ PoolMetaCache* pool_meta_cache,
+ StateBuilder<ImageCtxT>* state_builder,
+ ReplayerListener* replayer_listener) {
+ return new Replayer(threads, instance_watcher, local_mirror_uuid,
+ pool_meta_cache, state_builder, replayer_listener);
+ }
+
+ Replayer(
+ Threads<ImageCtxT>* threads,
+ InstanceWatcher<ImageCtxT>* instance_watcher,
+ const std::string& local_mirror_uuid,
+ PoolMetaCache* pool_meta_cache,
+ StateBuilder<ImageCtxT>* state_builder,
+ ReplayerListener* replayer_listener);
+ ~Replayer();
+
+ void destroy() override {
+ delete this;
+ }
+
+ void init(Context* on_finish) override;
+ void shut_down(Context* on_finish) override;
+
+ void flush(Context* on_finish) override;
+
+ bool get_replay_status(std::string* description, Context* on_finish) override;
+
+ bool is_replaying() const override {
+ std::unique_lock locker{m_lock};
+ return (m_state == STATE_REPLAYING || m_state == STATE_IDLE);
+ }
+
+ bool is_resync_requested() const override {
+ std::unique_lock locker{m_lock};
+ return m_resync_requested;
+ }
+
+ int get_error_code() const override {
+ std::unique_lock locker(m_lock);
+ return m_error_code;
+ }
+
+ std::string get_error_description() const override {
+ std::unique_lock locker(m_lock);
+ return m_error_description;
+ }
+
+ std::string get_image_spec() const {
+ std::unique_lock locker(m_lock);
+ return m_image_spec;
+ }
+
+private:
+ /**
+ * @verbatim
+ *
+ * <init>
+ * |
+ * v
+ * REGISTER_LOCAL_UPDATE_WATCHER
+ * |
+ * v
+ * REGISTER_REMOTE_UPDATE_WATCHER
+ * |
+ * v
+ * LOAD_LOCAL_IMAGE_META <----------------------------\
+ * | |
+ * v (skip if not needed) |
+ * REFRESH_LOCAL_IMAGE |
+ * | |
+ * v (skip if not needed) |
+ * REFRESH_REMOTE_IMAGE |
+ * | |
+ * | (unused non-primary snapshot) |
+ * |\--------------> PRUNE_NON_PRIMARY_SNAPSHOT---/|
+ * | |
+ * | (interrupted sync) |
+ * |\--------------> GET_LOCAL_IMAGE_STATE ------\ |
+ * | | |
+ * | (new snapshot) | |
+ * |\--------------> COPY_SNAPSHOTS | |
+ * | | | |
+ * | v | |
+ * | GET_REMOTE_IMAGE_STATE | |
+ * | | | |
+ * | v | |
+ * | CREATE_NON_PRIMARY_SNAPSHOT | |
+ * | | | |
+ * | v (skip if not needed)| |
+ * | UPDATE_MIRROR_IMAGE_STATE | |
+ * | | | |
+ * | |/--------------------/ |
+ * | | |
+ * | v |
+ * | REQUEST_SYNC |
+ * | | |
+ * | v |
+ * | COPY_IMAGE |
+ * | | |
+ * | v |
+ * | APPLY_IMAGE_STATE |
+ * | | |
+ * | v |
+ * | UPDATE_NON_PRIMARY_SNAPSHOT |
+ * | | |
+ * | v |
+ * | NOTIFY_IMAGE_UPDATE |
+ * | | |
+ * | (interrupted unlink) v |
+ * |\--------------> UNLINK_PEER |
+ * | | |
+ * | v |
+ * | NOTIFY_LISTENER |
+ * | | |
+ * | \----------------------/|
+ * | |
+ * | (remote demoted) |
+ * \---------------> NOTIFY_LISTENER |
+ * | | |
+ * |/--------------------/ |
+ * | |
+ * | (update notification) |
+ * <idle> --------------------------------------------/
+ * |
+ * v
+ * <shut down>
+ * |
+ * v
+ * UNREGISTER_REMOTE_UPDATE_WATCHER
+ * |
+ * v
+ * UNREGISTER_LOCAL_UPDATE_WATCHER
+ * |
+ * v
+ * WAIT_FOR_IN_FLIGHT_OPS
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+
+ enum State {
+ STATE_INIT,
+ STATE_REPLAYING,
+ STATE_IDLE,
+ STATE_COMPLETE
+ };
+
+ struct C_UpdateWatchCtx;
+ struct DeepCopyHandler;
+
+ Threads<ImageCtxT>* m_threads;
+ InstanceWatcher<ImageCtxT>* m_instance_watcher;
+ std::string m_local_mirror_uuid;
+ PoolMetaCache* m_pool_meta_cache;
+ StateBuilder<ImageCtxT>* m_state_builder;
+ ReplayerListener* m_replayer_listener;
+
+ mutable ceph::mutex m_lock;
+
+ State m_state = STATE_INIT;
+
+ std::string m_image_spec;
+ Context* m_on_init_shutdown = nullptr;
+
+ bool m_resync_requested = false;
+ int m_error_code = 0;
+ std::string m_error_description;
+
+ C_UpdateWatchCtx* m_update_watch_ctx = nullptr;
+ uint64_t m_local_update_watcher_handle = 0;
+ uint64_t m_remote_update_watcher_handle = 0;
+ bool m_image_updated = false;
+
+ AsyncOpTracker m_in_flight_op_tracker;
+
+ uint64_t m_local_snap_id_start = 0;
+ uint64_t m_local_snap_id_end = CEPH_NOSNAP;
+ cls::rbd::MirrorSnapshotNamespace m_local_mirror_snap_ns;
+ uint64_t m_local_object_count = 0;
+
+ std::string m_remote_mirror_peer_uuid;
+ uint64_t m_remote_snap_id_start = 0;
+ uint64_t m_remote_snap_id_end = CEPH_NOSNAP;
+ cls::rbd::MirrorSnapshotNamespace m_remote_mirror_snap_ns;
+
+ librbd::mirror::snapshot::ImageState m_image_state;
+ DeepCopyHandler* m_deep_copy_handler = nullptr;
+
+ TimeRollingMean m_bytes_per_second;
+
+ uint64_t m_snapshot_bytes = 0;
+ boost::accumulators::accumulator_set<
+ uint64_t, boost::accumulators::stats<
+ boost::accumulators::tag::rolling_mean>> m_bytes_per_snapshot{
+ boost::accumulators::tag::rolling_window::window_size = 2};
+ utime_t m_snapshot_replay_start;
+
+ uint32_t m_pending_snapshots = 0;
+
+ bool m_remote_image_updated = false;
+ bool m_updating_sync_point = false;
+ bool m_sync_in_progress = false;
+
+ PerfCounters *m_perf_counters = nullptr;
+
+ void load_local_image_meta();
+ void handle_load_local_image_meta(int r);
+
+ void refresh_local_image();
+ void handle_refresh_local_image(int r);
+
+ void refresh_remote_image();
+ void handle_refresh_remote_image(int r);
+
+ void scan_local_mirror_snapshots(std::unique_lock<ceph::mutex>* locker);
+ void scan_remote_mirror_snapshots(std::unique_lock<ceph::mutex>* locker);
+
+ void prune_non_primary_snapshot(uint64_t snap_id);
+ void handle_prune_non_primary_snapshot(int r);
+
+ void copy_snapshots();
+ void handle_copy_snapshots(int r);
+
+ void get_remote_image_state();
+ void handle_get_remote_image_state(int r);
+
+ void get_local_image_state();
+ void handle_get_local_image_state(int r);
+
+ void create_non_primary_snapshot();
+ void handle_create_non_primary_snapshot(int r);
+
+ void update_mirror_image_state();
+ void handle_update_mirror_image_state(int r);
+
+ void request_sync();
+ void handle_request_sync(int r);
+
+ void copy_image();
+ void handle_copy_image(int r);
+ void handle_copy_image_progress(uint64_t object_number,
+ uint64_t object_count);
+ void handle_copy_image_read(uint64_t bytes_read);
+
+ void apply_image_state();
+ void handle_apply_image_state(int r);
+
+ void update_non_primary_snapshot(bool complete);
+ void handle_update_non_primary_snapshot(bool complete, int r);
+
+ void notify_image_update();
+ void handle_notify_image_update(int r);
+
+ void unlink_peer(uint64_t remote_snap_id);
+ void handle_unlink_peer(int r);
+
+ void finish_sync();
+
+ void register_local_update_watcher();
+ void handle_register_local_update_watcher(int r);
+
+ void register_remote_update_watcher();
+ void handle_register_remote_update_watcher(int r);
+
+ void unregister_remote_update_watcher();
+ void handle_unregister_remote_update_watcher(int r);
+
+ void unregister_local_update_watcher();
+ void handle_unregister_local_update_watcher(int r);
+
+ void wait_for_in_flight_ops();
+ void handle_wait_for_in_flight_ops(int r);
+
+ void handle_image_update_notify();
+
+ void handle_replay_complete(int r, const std::string& description);
+ void handle_replay_complete(std::unique_lock<ceph::mutex>* locker,
+ int r, const std::string& description);
+ void notify_status_updated();
+
+ bool is_replay_interrupted();
+ bool is_replay_interrupted(std::unique_lock<ceph::mutex>* lock);
+
+ void register_perf_counters();
+ void unregister_perf_counters();
+};
+
+} // namespace snapshot
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_replayer::snapshot::Replayer<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_REPLAYER_H
diff --git a/src/tools/rbd_mirror/image_replayer/snapshot/StateBuilder.cc b/src/tools/rbd_mirror/image_replayer/snapshot/StateBuilder.cc
new file mode 100644
index 000000000..ca3e6918b
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/snapshot/StateBuilder.cc
@@ -0,0 +1,120 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "StateBuilder.h"
+#include "include/ceph_assert.h"
+#include "include/Context.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/mirror/snapshot/ImageMeta.h"
+#include "tools/rbd_mirror/image_replayer/snapshot/CreateLocalImageRequest.h"
+#include "tools/rbd_mirror/image_replayer/snapshot/PrepareReplayRequest.h"
+#include "tools/rbd_mirror/image_replayer/snapshot/Replayer.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::snapshot::" \
+ << "StateBuilder: " << this << " " \
+ << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+namespace snapshot {
+
+template <typename I>
+StateBuilder<I>::StateBuilder(const std::string& global_image_id)
+ : image_replayer::StateBuilder<I>(global_image_id) {
+}
+
+template <typename I>
+StateBuilder<I>::~StateBuilder() {
+ ceph_assert(local_image_meta == nullptr);
+}
+
+template <typename I>
+void StateBuilder<I>::close(Context* on_finish) {
+ dout(10) << dendl;
+
+ delete local_image_meta;
+ local_image_meta = nullptr;
+
+ // close the remote image after closing the local
+ // image in case the remote cluster is unreachable and
+ // we cannot close it.
+ on_finish = new LambdaContext([this, on_finish](int) {
+ this->close_remote_image(on_finish);
+ });
+ this->close_local_image(on_finish);
+}
+
+template <typename I>
+bool StateBuilder<I>::is_disconnected() const {
+ return false;
+}
+
+template <typename I>
+bool StateBuilder<I>::is_linked_impl() const {
+ // the remote has to have us registered as a peer
+ return !remote_mirror_peer_uuid.empty();
+}
+
+template <typename I>
+cls::rbd::MirrorImageMode StateBuilder<I>::get_mirror_image_mode() const {
+ return cls::rbd::MIRROR_IMAGE_MODE_SNAPSHOT;
+}
+
+template <typename I>
+image_sync::SyncPointHandler* StateBuilder<I>::create_sync_point_handler() {
+ dout(10) << dendl;
+
+ // TODO
+ ceph_assert(false);
+ return nullptr;
+}
+
+template <typename I>
+BaseRequest* StateBuilder<I>::create_local_image_request(
+ Threads<I>* threads,
+ librados::IoCtx& local_io_ctx,
+ const std::string& global_image_id,
+ PoolMetaCache* pool_meta_cache,
+ ProgressContext* progress_ctx,
+ Context* on_finish) {
+ return CreateLocalImageRequest<I>::create(
+ threads, local_io_ctx, this->remote_image_ctx, global_image_id,
+ pool_meta_cache, progress_ctx, this, on_finish);
+}
+
+template <typename I>
+BaseRequest* StateBuilder<I>::create_prepare_replay_request(
+ const std::string& local_mirror_uuid,
+ ProgressContext* progress_ctx,
+ bool* resync_requested,
+ bool* syncing,
+ Context* on_finish) {
+ return PrepareReplayRequest<I>::create(
+ local_mirror_uuid, progress_ctx, this, resync_requested, syncing,
+ on_finish);
+}
+
+template <typename I>
+image_replayer::Replayer* StateBuilder<I>::create_replayer(
+ Threads<I>* threads,
+ InstanceWatcher<I>* instance_watcher,
+ const std::string& local_mirror_uuid,
+ PoolMetaCache* pool_meta_cache,
+ ReplayerListener* replayer_listener) {
+ return Replayer<I>::create(
+ threads, instance_watcher, local_mirror_uuid, pool_meta_cache, this,
+ replayer_listener);
+}
+
+} // namespace snapshot
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_replayer::snapshot::StateBuilder<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_replayer/snapshot/StateBuilder.h b/src/tools/rbd_mirror/image_replayer/snapshot/StateBuilder.h
new file mode 100644
index 000000000..a4ab82982
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/snapshot/StateBuilder.h
@@ -0,0 +1,93 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_STATE_BUILDER_H
+#define CEPH_RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_STATE_BUILDER_H
+
+#include "tools/rbd_mirror/image_replayer/StateBuilder.h"
+#include <string>
+
+struct Context;
+
+namespace librbd {
+
+struct ImageCtx;
+
+namespace mirror {
+namespace snapshot {
+
+template <typename> class ImageMeta;
+
+} // namespace snapshot
+} // namespace mirror
+} // namespace librbd
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+namespace snapshot {
+
+template <typename> class SyncPointHandler;
+
+template <typename ImageCtxT>
+class StateBuilder : public image_replayer::StateBuilder<ImageCtxT> {
+public:
+ static StateBuilder* create(const std::string& global_image_id) {
+ return new StateBuilder(global_image_id);
+ }
+
+ StateBuilder(const std::string& global_image_id);
+ ~StateBuilder() override;
+
+ void close(Context* on_finish) override;
+
+ bool is_disconnected() const override;
+
+ cls::rbd::MirrorImageMode get_mirror_image_mode() const override;
+
+ image_sync::SyncPointHandler* create_sync_point_handler() override;
+
+ bool replay_requires_remote_image() const override {
+ return true;
+ }
+
+ BaseRequest* create_local_image_request(
+ Threads<ImageCtxT>* threads,
+ librados::IoCtx& local_io_ctx,
+ const std::string& global_image_id,
+ PoolMetaCache* pool_meta_cache,
+ ProgressContext* progress_ctx,
+ Context* on_finish) override;
+
+ BaseRequest* create_prepare_replay_request(
+ const std::string& local_mirror_uuid,
+ ProgressContext* progress_ctx,
+ bool* resync_requested,
+ bool* syncing,
+ Context* on_finish) override;
+
+ image_replayer::Replayer* create_replayer(
+ Threads<ImageCtxT>* threads,
+ InstanceWatcher<ImageCtxT>* instance_watcher,
+ const std::string& local_mirror_uuid,
+ PoolMetaCache* pool_meta_cache,
+ ReplayerListener* replayer_listener) override;
+
+ SyncPointHandler<ImageCtxT>* sync_point_handler = nullptr;
+
+ std::string remote_mirror_peer_uuid;
+
+ librbd::mirror::snapshot::ImageMeta<ImageCtxT>* local_image_meta = nullptr;
+
+private:
+ bool is_linked_impl() const override;
+};
+
+} // namespace snapshot
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_replayer::snapshot::StateBuilder<librbd::ImageCtx>;
+
+#endif // CEPH_RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_STATE_BUILDER_H
diff --git a/src/tools/rbd_mirror/image_replayer/snapshot/Utils.cc b/src/tools/rbd_mirror/image_replayer/snapshot/Utils.cc
new file mode 100644
index 000000000..7c20410cb
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/snapshot/Utils.cc
@@ -0,0 +1,65 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "Utils.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "cls/rbd/cls_rbd_types.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::snapshot::util::" \
+ << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+namespace snapshot {
+namespace util {
+
+uint64_t compute_remote_snap_id(
+ const ceph::shared_mutex& local_image_lock,
+ const std::map<librados::snap_t, librbd::SnapInfo>& local_snap_infos,
+ uint64_t local_snap_id, const std::string& remote_mirror_uuid) {
+ ceph_assert(ceph_mutex_is_locked(local_image_lock));
+
+ // Search our local non-primary snapshots for a mapping to the remote
+ // snapshot. The non-primary mirror snapshot with the mappings will always
+ // come at or after the snapshot we are searching against
+ for (auto snap_it = local_snap_infos.lower_bound(local_snap_id);
+ snap_it != local_snap_infos.end(); ++snap_it) {
+ auto mirror_ns = boost::get<cls::rbd::MirrorSnapshotNamespace>(
+ &snap_it->second.snap_namespace);
+ if (mirror_ns == nullptr || !mirror_ns->is_non_primary()) {
+ continue;
+ }
+
+ if (mirror_ns->primary_mirror_uuid != remote_mirror_uuid) {
+ dout(20) << "local snapshot " << snap_it->first << " not tied to remote"
+ << dendl;
+ continue;
+ } else if (local_snap_id == snap_it->first) {
+ dout(15) << "local snapshot " << local_snap_id << " maps to "
+ << "remote snapshot " << mirror_ns->primary_snap_id << dendl;
+ return mirror_ns->primary_snap_id;
+ }
+
+ const auto& snap_seqs = mirror_ns->snap_seqs;
+ for (auto [remote_snap_id_seq, local_snap_id_seq] : snap_seqs) {
+ if (local_snap_id_seq == local_snap_id) {
+ dout(15) << "local snapshot " << local_snap_id << " maps to "
+ << "remote snapshot " << remote_snap_id_seq << dendl;
+ return remote_snap_id_seq;
+ }
+ }
+ }
+
+ return CEPH_NOSNAP;
+}
+
+} // namespace util
+} // namespace snapshot
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
diff --git a/src/tools/rbd_mirror/image_replayer/snapshot/Utils.h b/src/tools/rbd_mirror/image_replayer/snapshot/Utils.h
new file mode 100644
index 000000000..8efc58685
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/snapshot/Utils.h
@@ -0,0 +1,30 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_UTILS_H
+#define RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_UTILS_H
+
+#include "include/int_types.h"
+#include "include/rados/librados.hpp"
+#include "common/ceph_mutex.h"
+#include "librbd/Types.h"
+#include <map>
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+namespace snapshot {
+namespace util {
+
+uint64_t compute_remote_snap_id(
+ const ceph::shared_mutex& local_image_lock,
+ const std::map<librados::snap_t, librbd::SnapInfo>& local_snap_infos,
+ uint64_t local_snap_id, const std::string& remote_mirror_uuid);
+
+} // namespace util
+} // namespace snapshot
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_UTILS_H
diff --git a/src/tools/rbd_mirror/image_sync/SyncPointCreateRequest.cc b/src/tools/rbd_mirror/image_sync/SyncPointCreateRequest.cc
new file mode 100644
index 000000000..1bd5d77f0
--- /dev/null
+++ b/src/tools/rbd_mirror/image_sync/SyncPointCreateRequest.cc
@@ -0,0 +1,172 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "SyncPointCreateRequest.h"
+#include "include/uuid.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/Operations.h"
+#include "librbd/Utils.h"
+#include "tools/rbd_mirror/image_sync/Types.h"
+#include "tools/rbd_mirror/image_sync/Utils.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_sync::SyncPointCreateRequest: " \
+ << this << " " << __func__
+
+namespace rbd {
+namespace mirror {
+namespace image_sync {
+
+using librbd::util::create_context_callback;
+
+template <typename I>
+SyncPointCreateRequest<I>::SyncPointCreateRequest(
+ I *remote_image_ctx,
+ const std::string &local_mirror_uuid,
+ SyncPointHandler* sync_point_handler,
+ Context *on_finish)
+ : m_remote_image_ctx(remote_image_ctx),
+ m_local_mirror_uuid(local_mirror_uuid),
+ m_sync_point_handler(sync_point_handler),
+ m_on_finish(on_finish) {
+ m_sync_points_copy = m_sync_point_handler->get_sync_points();
+ ceph_assert(m_sync_points_copy.size() < 2);
+
+ // initialize the updated client meta with the new sync point
+ m_sync_points_copy.emplace_back();
+ if (m_sync_points_copy.size() > 1) {
+ m_sync_points_copy.back().from_snap_name =
+ m_sync_points_copy.front().snap_name;
+ }
+}
+
+template <typename I>
+void SyncPointCreateRequest<I>::send() {
+ send_update_sync_points();
+}
+
+template <typename I>
+void SyncPointCreateRequest<I>::send_update_sync_points() {
+ uuid_d uuid_gen;
+ uuid_gen.generate_random();
+
+ auto& sync_point = m_sync_points_copy.back();
+ sync_point.snap_name = util::get_snapshot_name_prefix(m_local_mirror_uuid) +
+ uuid_gen.to_string();
+
+ auto ctx = create_context_callback<
+ SyncPointCreateRequest<I>,
+ &SyncPointCreateRequest<I>::handle_update_sync_points>(this);
+ m_sync_point_handler->update_sync_points(
+ m_sync_point_handler->get_snap_seqs(), m_sync_points_copy, false, ctx);
+}
+
+template <typename I>
+void SyncPointCreateRequest<I>::handle_update_sync_points(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ if (r < 0) {
+ derr << ": failed to update client data: " << cpp_strerror(r)
+ << dendl;
+ finish(r);
+ return;
+ }
+
+ send_refresh_image();
+}
+
+template <typename I>
+void SyncPointCreateRequest<I>::send_refresh_image() {
+ dout(20) << dendl;
+
+ Context *ctx = create_context_callback<
+ SyncPointCreateRequest<I>, &SyncPointCreateRequest<I>::handle_refresh_image>(
+ this);
+ m_remote_image_ctx->state->refresh(ctx);
+}
+
+template <typename I>
+void SyncPointCreateRequest<I>::handle_refresh_image(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ if (r < 0) {
+ derr << ": remote image refresh failed: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ send_create_snap();
+}
+
+template <typename I>
+void SyncPointCreateRequest<I>::send_create_snap() {
+ dout(20) << dendl;
+
+ auto& sync_point = m_sync_points_copy.back();
+
+ Context *ctx = create_context_callback<
+ SyncPointCreateRequest<I>, &SyncPointCreateRequest<I>::handle_create_snap>(
+ this);
+ m_remote_image_ctx->operations->snap_create(
+ cls::rbd::UserSnapshotNamespace(), sync_point.snap_name.c_str(),
+ librbd::SNAP_CREATE_FLAG_SKIP_NOTIFY_QUIESCE, m_prog_ctx, ctx);
+}
+
+template <typename I>
+void SyncPointCreateRequest<I>::handle_create_snap(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ if (r == -EEXIST) {
+ send_update_sync_points();
+ return;
+ } else if (r < 0) {
+ derr << ": failed to create snapshot: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ send_final_refresh_image();
+}
+
+template <typename I>
+void SyncPointCreateRequest<I>::send_final_refresh_image() {
+ dout(20) << dendl;
+
+ Context *ctx = create_context_callback<
+ SyncPointCreateRequest<I>,
+ &SyncPointCreateRequest<I>::handle_final_refresh_image>(this);
+ m_remote_image_ctx->state->refresh(ctx);
+}
+
+template <typename I>
+void SyncPointCreateRequest<I>::handle_final_refresh_image(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ if (r < 0) {
+ derr << ": failed to refresh image for snapshot: " << cpp_strerror(r)
+ << dendl;
+ finish(r);
+ return;
+ }
+
+ finish(0);
+}
+
+template <typename I>
+void SyncPointCreateRequest<I>::finish(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ m_on_finish->complete(r);
+ delete this;
+}
+
+} // namespace image_sync
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_sync::SyncPointCreateRequest<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_sync/SyncPointCreateRequest.h b/src/tools/rbd_mirror/image_sync/SyncPointCreateRequest.h
new file mode 100644
index 000000000..9b52b8374
--- /dev/null
+++ b/src/tools/rbd_mirror/image_sync/SyncPointCreateRequest.h
@@ -0,0 +1,93 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_SYNC_SYNC_POINT_CREATE_REQUEST_H
+#define RBD_MIRROR_IMAGE_SYNC_SYNC_POINT_CREATE_REQUEST_H
+
+#include "librbd/internal.h"
+#include "Types.h"
+#include <string>
+
+class Context;
+namespace journal { class Journaler; }
+namespace librbd { class ImageCtx; }
+namespace librbd { namespace journal { struct MirrorPeerClientMeta; } }
+
+namespace rbd {
+namespace mirror {
+namespace image_sync {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class SyncPointCreateRequest {
+public:
+ static SyncPointCreateRequest* create(
+ ImageCtxT *remote_image_ctx,
+ const std::string &local_mirror_uuid,
+ SyncPointHandler* sync_point_handler,
+ Context *on_finish) {
+ return new SyncPointCreateRequest(remote_image_ctx, local_mirror_uuid,
+ sync_point_handler, on_finish);
+ }
+
+ SyncPointCreateRequest(
+ ImageCtxT *remote_image_ctx,
+ const std::string &local_mirror_uuid,
+ SyncPointHandler* sync_point_handler,
+ Context *on_finish);
+
+ void send();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * UPDATE_SYNC_POINTS < . .
+ * | .
+ * v .
+ * REFRESH_IMAGE .
+ * | . (repeat on EEXIST)
+ * v .
+ * CREATE_SNAP . . . . . .
+ * |
+ * v
+ * REFRESH_IMAGE
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+
+ ImageCtxT *m_remote_image_ctx;
+ std::string m_local_mirror_uuid;
+ SyncPointHandler* m_sync_point_handler;
+ Context *m_on_finish;
+
+ SyncPoints m_sync_points_copy;
+ librbd::NoOpProgressContext m_prog_ctx;
+
+ void send_update_sync_points();
+ void handle_update_sync_points(int r);
+
+ void send_refresh_image();
+ void handle_refresh_image(int r);
+
+ void send_create_snap();
+ void handle_create_snap(int r);
+
+ void send_final_refresh_image();
+ void handle_final_refresh_image(int r);
+
+ void finish(int r);
+};
+
+} // namespace image_sync
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_sync::SyncPointCreateRequest<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_SYNC_SYNC_POINT_CREATE_REQUEST_H
diff --git a/src/tools/rbd_mirror/image_sync/SyncPointPruneRequest.cc b/src/tools/rbd_mirror/image_sync/SyncPointPruneRequest.cc
new file mode 100644
index 000000000..d1cd32b39
--- /dev/null
+++ b/src/tools/rbd_mirror/image_sync/SyncPointPruneRequest.cc
@@ -0,0 +1,213 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "SyncPointPruneRequest.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/Operations.h"
+#include "librbd/Utils.h"
+#include <set>
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_sync::SyncPointPruneRequest: " \
+ << this << " " << __func__
+namespace rbd {
+namespace mirror {
+namespace image_sync {
+
+using librbd::util::create_context_callback;
+
+template <typename I>
+SyncPointPruneRequest<I>::SyncPointPruneRequest(
+ I *remote_image_ctx,
+ bool sync_complete,
+ SyncPointHandler* sync_point_handler,
+ Context *on_finish)
+ : m_remote_image_ctx(remote_image_ctx),
+ m_sync_complete(sync_complete),
+ m_sync_point_handler(sync_point_handler),
+ m_on_finish(on_finish) {
+ m_sync_points_copy = m_sync_point_handler->get_sync_points();
+}
+
+template <typename I>
+void SyncPointPruneRequest<I>::send() {
+ if (m_sync_points_copy.empty()) {
+ send_remove_snap();
+ return;
+ }
+
+ if (m_sync_complete) {
+ // if sync is complete, we can remove the master sync point
+ auto it = m_sync_points_copy.begin();
+ auto& sync_point = *it;
+
+ ++it;
+ if (it == m_sync_points_copy.end() ||
+ it->from_snap_name != sync_point.snap_name) {
+ m_snap_names.push_back(sync_point.snap_name);
+ }
+
+ if (!sync_point.from_snap_name.empty()) {
+ m_snap_names.push_back(sync_point.from_snap_name);
+ }
+ } else {
+ // if we have more than one sync point or invalid sync points,
+ // trim them off
+ std::shared_lock image_locker{m_remote_image_ctx->image_lock};
+ std::set<std::string> snap_names;
+ for (auto it = m_sync_points_copy.rbegin();
+ it != m_sync_points_copy.rend(); ++it) {
+ auto& sync_point = *it;
+ if (&sync_point == &m_sync_points_copy.front()) {
+ if (m_remote_image_ctx->get_snap_id(
+ cls::rbd::UserSnapshotNamespace(), sync_point.snap_name) ==
+ CEPH_NOSNAP) {
+ derr << ": failed to locate sync point snapshot: "
+ << sync_point.snap_name << dendl;
+ } else if (!sync_point.from_snap_name.empty()) {
+ derr << ": unexpected from_snap_name in primary sync point: "
+ << sync_point.from_snap_name << dendl;
+ } else {
+ // first sync point is OK -- keep it
+ break;
+ }
+ m_invalid_master_sync_point = true;
+ }
+
+ if (snap_names.count(sync_point.snap_name) == 0) {
+ snap_names.insert(sync_point.snap_name);
+ m_snap_names.push_back(sync_point.snap_name);
+ }
+
+ auto& front_sync_point = m_sync_points_copy.front();
+ if (!sync_point.from_snap_name.empty() &&
+ snap_names.count(sync_point.from_snap_name) == 0 &&
+ sync_point.from_snap_name != front_sync_point.snap_name) {
+ snap_names.insert(sync_point.from_snap_name);
+ m_snap_names.push_back(sync_point.from_snap_name);
+ }
+ }
+ }
+
+ send_remove_snap();
+}
+
+template <typename I>
+void SyncPointPruneRequest<I>::send_remove_snap() {
+ if (m_snap_names.empty()) {
+ send_refresh_image();
+ return;
+ }
+
+ const std::string &snap_name = m_snap_names.front();
+
+ dout(20) << ": snap_name=" << snap_name << dendl;
+
+ Context *ctx = create_context_callback<
+ SyncPointPruneRequest<I>, &SyncPointPruneRequest<I>::handle_remove_snap>(
+ this);
+ m_remote_image_ctx->operations->snap_remove(cls::rbd::UserSnapshotNamespace(),
+ snap_name.c_str(),
+ ctx);
+}
+
+template <typename I>
+void SyncPointPruneRequest<I>::handle_remove_snap(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ ceph_assert(!m_snap_names.empty());
+ std::string snap_name = m_snap_names.front();
+ m_snap_names.pop_front();
+
+ if (r == -ENOENT) {
+ r = 0;
+ }
+ if (r < 0) {
+ derr << ": failed to remove snapshot '" << snap_name << "': "
+ << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ send_remove_snap();
+}
+
+template <typename I>
+void SyncPointPruneRequest<I>::send_refresh_image() {
+ dout(20) << dendl;
+
+ Context *ctx = create_context_callback<
+ SyncPointPruneRequest<I>, &SyncPointPruneRequest<I>::handle_refresh_image>(
+ this);
+ m_remote_image_ctx->state->refresh(ctx);
+}
+
+template <typename I>
+void SyncPointPruneRequest<I>::handle_refresh_image(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ if (r < 0) {
+ derr << ": remote image refresh failed: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ send_update_sync_points();
+}
+
+template <typename I>
+void SyncPointPruneRequest<I>::send_update_sync_points() {
+ dout(20) << dendl;
+
+ if (m_sync_complete) {
+ m_sync_points_copy.pop_front();
+ } else {
+ while (m_sync_points_copy.size() > 1) {
+ m_sync_points_copy.pop_back();
+ }
+ if (m_invalid_master_sync_point) {
+ // all subsequent sync points would have been pruned
+ m_sync_points_copy.clear();
+ }
+ }
+
+ auto ctx = create_context_callback<
+ SyncPointPruneRequest<I>,
+ &SyncPointPruneRequest<I>::handle_update_sync_points>(this);
+ m_sync_point_handler->update_sync_points(
+ m_sync_point_handler->get_snap_seqs(), m_sync_points_copy,
+ m_sync_complete, ctx);
+}
+
+template <typename I>
+void SyncPointPruneRequest<I>::handle_update_sync_points(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ if (r < 0) {
+ derr << ": failed to update client data: " << cpp_strerror(r)
+ << dendl;
+ finish(r);
+ return;
+ }
+
+ finish(0);
+}
+
+template <typename I>
+void SyncPointPruneRequest<I>::finish(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ m_on_finish->complete(r);
+ delete this;
+}
+
+} // namespace image_sync
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_sync::SyncPointPruneRequest<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_sync/SyncPointPruneRequest.h b/src/tools/rbd_mirror/image_sync/SyncPointPruneRequest.h
new file mode 100644
index 000000000..08bf840b1
--- /dev/null
+++ b/src/tools/rbd_mirror/image_sync/SyncPointPruneRequest.h
@@ -0,0 +1,91 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_SYNC_SYNC_POINT_PRUNE_REQUEST_H
+#define RBD_MIRROR_IMAGE_SYNC_SYNC_POINT_PRUNE_REQUEST_H
+
+#include "tools/rbd_mirror/image_sync/Types.h"
+#include <list>
+#include <string>
+
+class Context;
+namespace journal { class Journaler; }
+namespace librbd { class ImageCtx; }
+namespace librbd { namespace journal { struct MirrorPeerClientMeta; } }
+
+namespace rbd {
+namespace mirror {
+namespace image_sync {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class SyncPointPruneRequest {
+public:
+ static SyncPointPruneRequest* create(
+ ImageCtxT *remote_image_ctx,
+ bool sync_complete,
+ SyncPointHandler* sync_point_handler,
+ Context *on_finish) {
+ return new SyncPointPruneRequest(remote_image_ctx, sync_complete,
+ sync_point_handler, on_finish);
+ }
+
+ SyncPointPruneRequest(
+ ImageCtxT *remote_image_ctx,
+ bool sync_complete,
+ SyncPointHandler* sync_point_handler,
+ Context *on_finish);
+
+ void send();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * | . . . . .
+ * | . .
+ * v v . (repeat if from snap
+ * REMOVE_SNAP . . . unused by other sync)
+ * |
+ * v
+ * REFRESH_IMAGE
+ * |
+ * v
+ * UPDATE_CLIENT
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+
+ ImageCtxT *m_remote_image_ctx;
+ bool m_sync_complete;
+ SyncPointHandler* m_sync_point_handler;
+ Context *m_on_finish;
+
+ SyncPoints m_sync_points_copy;
+ std::list<std::string> m_snap_names;
+
+ bool m_invalid_master_sync_point = false;
+
+ void send_remove_snap();
+ void handle_remove_snap(int r);
+
+ void send_refresh_image();
+ void handle_refresh_image(int r);
+
+ void send_update_sync_points();
+ void handle_update_sync_points(int r);
+
+ void finish(int r);
+};
+
+} // namespace image_sync
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_sync::SyncPointPruneRequest<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_SYNC_SYNC_POINT_PRUNE_REQUEST_H
diff --git a/src/tools/rbd_mirror/image_sync/Types.h b/src/tools/rbd_mirror/image_sync/Types.h
new file mode 100644
index 000000000..d748dc93e
--- /dev/null
+++ b/src/tools/rbd_mirror/image_sync/Types.h
@@ -0,0 +1,74 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_SYNC_TYPES_H
+#define RBD_MIRROR_IMAGE_SYNC_TYPES_H
+
+#include "cls/rbd/cls_rbd_types.h"
+#include "librbd/Types.h"
+#include <list>
+#include <string>
+#include <boost/optional.hpp>
+
+struct Context;
+
+namespace rbd {
+namespace mirror {
+namespace image_sync {
+
+struct SyncPoint {
+ typedef boost::optional<uint64_t> ObjectNumber;
+
+ SyncPoint() {
+ }
+ SyncPoint(const cls::rbd::SnapshotNamespace& snap_namespace,
+ const std::string& snap_name,
+ const std::string& from_snap_name,
+ const ObjectNumber& object_number)
+ : snap_namespace(snap_namespace), snap_name(snap_name),
+ from_snap_name(from_snap_name), object_number(object_number) {
+ }
+
+ cls::rbd::SnapshotNamespace snap_namespace =
+ {cls::rbd::UserSnapshotNamespace{}};
+ std::string snap_name;
+ std::string from_snap_name;
+ ObjectNumber object_number = boost::none;
+
+ bool operator==(const SyncPoint& rhs) const {
+ return (snap_namespace == rhs.snap_namespace &&
+ snap_name == rhs.snap_name &&
+ from_snap_name == rhs.from_snap_name &&
+ object_number == rhs.object_number);
+ }
+};
+
+typedef std::list<SyncPoint> SyncPoints;
+
+struct SyncPointHandler {
+public:
+ SyncPointHandler(const SyncPointHandler&) = delete;
+ SyncPointHandler& operator=(const SyncPointHandler&) = delete;
+
+ virtual ~SyncPointHandler() {}
+ virtual void destroy() {
+ delete this;
+ }
+
+ virtual SyncPoints get_sync_points() const = 0;
+ virtual librbd::SnapSeqs get_snap_seqs() const = 0;
+
+ virtual void update_sync_points(const librbd::SnapSeqs& snap_seq,
+ const SyncPoints& sync_points,
+ bool sync_complete,
+ Context* on_finish) = 0;
+
+protected:
+ SyncPointHandler() {}
+};
+
+} // namespace image_sync
+} // namespace mirror
+} // namespace rbd
+
+#endif // RBD_MIRROR_IMAGE_SYNC_TYPES_H
diff --git a/src/tools/rbd_mirror/image_sync/Utils.cc b/src/tools/rbd_mirror/image_sync/Utils.cc
new file mode 100644
index 000000000..6a3eae72d
--- /dev/null
+++ b/src/tools/rbd_mirror/image_sync/Utils.cc
@@ -0,0 +1,24 @@
+// -*- mode:c++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "Utils.h"
+
+namespace rbd {
+namespace mirror {
+namespace image_sync {
+namespace util {
+
+namespace {
+
+static const std::string SNAP_NAME_PREFIX(".rbd-mirror");
+
+} // anonymous namespace
+
+std::string get_snapshot_name_prefix(const std::string& local_mirror_uuid) {
+ return SNAP_NAME_PREFIX + "." + local_mirror_uuid + ".";
+}
+
+} // namespace util
+} // namespace image_sync
+} // namespace mirror
+} // namespace rbd
diff --git a/src/tools/rbd_mirror/image_sync/Utils.h b/src/tools/rbd_mirror/image_sync/Utils.h
new file mode 100644
index 000000000..139699daa
--- /dev/null
+++ b/src/tools/rbd_mirror/image_sync/Utils.h
@@ -0,0 +1,16 @@
+// -*- mode:c++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <string>
+
+namespace rbd {
+namespace mirror {
+namespace image_sync {
+namespace util {
+
+std::string get_snapshot_name_prefix(const std::string& local_mirror_uuid);
+
+} // namespace util
+} // namespace image_sync
+} // namespace mirror
+} // namespace rbd
diff --git a/src/tools/rbd_mirror/instance_watcher/Types.cc b/src/tools/rbd_mirror/instance_watcher/Types.cc
new file mode 100644
index 000000000..0e9922733
--- /dev/null
+++ b/src/tools/rbd_mirror/instance_watcher/Types.cc
@@ -0,0 +1,245 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "Types.h"
+#include "include/ceph_assert.h"
+#include "include/stringify.h"
+#include "common/Formatter.h"
+
+namespace rbd {
+namespace mirror {
+namespace instance_watcher {
+
+namespace {
+
+class EncodePayloadVisitor : public boost::static_visitor<void> {
+public:
+ explicit EncodePayloadVisitor(bufferlist &bl) : m_bl(bl) {}
+
+ template <typename Payload>
+ inline void operator()(const Payload &payload) const {
+ using ceph::encode;
+ encode(static_cast<uint32_t>(Payload::NOTIFY_OP), m_bl);
+ payload.encode(m_bl);
+ }
+
+private:
+ bufferlist &m_bl;
+};
+
+class DecodePayloadVisitor : public boost::static_visitor<void> {
+public:
+ DecodePayloadVisitor(__u8 version, bufferlist::const_iterator &iter)
+ : m_version(version), m_iter(iter) {}
+
+ template <typename Payload>
+ inline void operator()(Payload &payload) const {
+ payload.decode(m_version, m_iter);
+ }
+
+private:
+ __u8 m_version;
+ bufferlist::const_iterator &m_iter;
+};
+
+class DumpPayloadVisitor : public boost::static_visitor<void> {
+public:
+ explicit DumpPayloadVisitor(Formatter *formatter) : m_formatter(formatter) {}
+
+ template <typename Payload>
+ inline void operator()(const Payload &payload) const {
+ NotifyOp notify_op = Payload::NOTIFY_OP;
+ m_formatter->dump_string("notify_op", stringify(notify_op));
+ payload.dump(m_formatter);
+ }
+
+private:
+ ceph::Formatter *m_formatter;
+};
+
+} // anonymous namespace
+
+void PayloadBase::encode(bufferlist &bl) const {
+ using ceph::encode;
+ encode(request_id, bl);
+}
+
+void PayloadBase::decode(__u8 version, bufferlist::const_iterator &iter) {
+ using ceph::decode;
+ decode(request_id, iter);
+}
+
+void PayloadBase::dump(Formatter *f) const {
+ f->dump_unsigned("request_id", request_id);
+}
+
+void ImagePayloadBase::encode(bufferlist &bl) const {
+ using ceph::encode;
+ PayloadBase::encode(bl);
+ encode(global_image_id, bl);
+}
+
+void ImagePayloadBase::decode(__u8 version, bufferlist::const_iterator &iter) {
+ using ceph::decode;
+ PayloadBase::decode(version, iter);
+ decode(global_image_id, iter);
+}
+
+void ImagePayloadBase::dump(Formatter *f) const {
+ PayloadBase::dump(f);
+ f->dump_string("global_image_id", global_image_id);
+}
+
+void PeerImageRemovedPayload::encode(bufferlist &bl) const {
+ using ceph::encode;
+ PayloadBase::encode(bl);
+ encode(global_image_id, bl);
+ encode(peer_mirror_uuid, bl);
+}
+
+void PeerImageRemovedPayload::decode(__u8 version, bufferlist::const_iterator &iter) {
+ using ceph::decode;
+ PayloadBase::decode(version, iter);
+ decode(global_image_id, iter);
+ decode(peer_mirror_uuid, iter);
+}
+
+void PeerImageRemovedPayload::dump(Formatter *f) const {
+ PayloadBase::dump(f);
+ f->dump_string("global_image_id", global_image_id);
+ f->dump_string("peer_mirror_uuid", peer_mirror_uuid);
+}
+
+void SyncPayloadBase::encode(bufferlist &bl) const {
+ using ceph::encode;
+ PayloadBase::encode(bl);
+ encode(sync_id, bl);
+}
+
+void SyncPayloadBase::decode(__u8 version, bufferlist::const_iterator &iter) {
+ using ceph::decode;
+ PayloadBase::decode(version, iter);
+ decode(sync_id, iter);
+}
+
+void SyncPayloadBase::dump(Formatter *f) const {
+ PayloadBase::dump(f);
+ f->dump_string("sync_id", sync_id);
+}
+
+void UnknownPayload::encode(bufferlist &bl) const {
+ ceph_abort();
+}
+
+void UnknownPayload::decode(__u8 version, bufferlist::const_iterator &iter) {
+}
+
+void UnknownPayload::dump(Formatter *f) const {
+}
+
+void NotifyMessage::encode(bufferlist& bl) const {
+ ENCODE_START(2, 2, bl);
+ boost::apply_visitor(EncodePayloadVisitor(bl), payload);
+ ENCODE_FINISH(bl);
+}
+
+void NotifyMessage::decode(bufferlist::const_iterator& iter) {
+ DECODE_START(2, iter);
+
+ uint32_t notify_op;
+ decode(notify_op, iter);
+
+ // select the correct payload variant based upon the encoded op
+ switch (notify_op) {
+ case NOTIFY_OP_IMAGE_ACQUIRE:
+ payload = ImageAcquirePayload();
+ break;
+ case NOTIFY_OP_IMAGE_RELEASE:
+ payload = ImageReleasePayload();
+ break;
+ case NOTIFY_OP_PEER_IMAGE_REMOVED:
+ payload = PeerImageRemovedPayload();
+ break;
+ case NOTIFY_OP_SYNC_REQUEST:
+ payload = SyncRequestPayload();
+ break;
+ case NOTIFY_OP_SYNC_START:
+ payload = SyncStartPayload();
+ break;
+ default:
+ payload = UnknownPayload();
+ break;
+ }
+
+ apply_visitor(DecodePayloadVisitor(struct_v, iter), payload);
+ DECODE_FINISH(iter);
+}
+
+void NotifyMessage::dump(Formatter *f) const {
+ apply_visitor(DumpPayloadVisitor(f), payload);
+}
+
+void NotifyMessage::generate_test_instances(std::list<NotifyMessage *> &o) {
+ o.push_back(new NotifyMessage(ImageAcquirePayload()));
+ o.push_back(new NotifyMessage(ImageAcquirePayload(1, "gid")));
+
+ o.push_back(new NotifyMessage(ImageReleasePayload()));
+ o.push_back(new NotifyMessage(ImageReleasePayload(1, "gid")));
+
+ o.push_back(new NotifyMessage(PeerImageRemovedPayload()));
+ o.push_back(new NotifyMessage(PeerImageRemovedPayload(1, "gid", "uuid")));
+
+ o.push_back(new NotifyMessage(SyncRequestPayload()));
+ o.push_back(new NotifyMessage(SyncRequestPayload(1, "sync_id")));
+
+ o.push_back(new NotifyMessage(SyncStartPayload()));
+ o.push_back(new NotifyMessage(SyncStartPayload(1, "sync_id")));
+}
+
+std::ostream &operator<<(std::ostream &out, const NotifyOp &op) {
+ switch (op) {
+ case NOTIFY_OP_IMAGE_ACQUIRE:
+ out << "ImageAcquire";
+ break;
+ case NOTIFY_OP_IMAGE_RELEASE:
+ out << "ImageRelease";
+ break;
+ case NOTIFY_OP_PEER_IMAGE_REMOVED:
+ out << "PeerImageRemoved";
+ break;
+ case NOTIFY_OP_SYNC_REQUEST:
+ out << "SyncRequest";
+ break;
+ case NOTIFY_OP_SYNC_START:
+ out << "SyncStart";
+ break;
+ default:
+ out << "Unknown (" << static_cast<uint32_t>(op) << ")";
+ break;
+ }
+ return out;
+}
+
+void NotifyAckPayload::encode(bufferlist &bl) const {
+ using ceph::encode;
+ encode(instance_id, bl);
+ encode(request_id, bl);
+ encode(ret_val, bl);
+}
+
+void NotifyAckPayload::decode(bufferlist::const_iterator &iter) {
+ using ceph::decode;
+ decode(instance_id, iter);
+ decode(request_id, iter);
+ decode(ret_val, iter);
+}
+
+void NotifyAckPayload::dump(Formatter *f) const {
+ f->dump_string("instance_id", instance_id);
+ f->dump_unsigned("request_id", request_id);
+ f->dump_int("request_id", ret_val);
+}
+
+} // namespace instance_watcher
+} // namespace mirror
+} // namespace rbd
diff --git a/src/tools/rbd_mirror/instance_watcher/Types.h b/src/tools/rbd_mirror/instance_watcher/Types.h
new file mode 100644
index 000000000..b0b7b7791
--- /dev/null
+++ b/src/tools/rbd_mirror/instance_watcher/Types.h
@@ -0,0 +1,197 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_INSTANCE_WATCHER_TYPES_H
+#define RBD_MIRROR_INSTANCE_WATCHER_TYPES_H
+
+#include <string>
+#include <set>
+#include <boost/variant.hpp>
+
+#include "include/buffer_fwd.h"
+#include "include/encoding.h"
+#include "include/int_types.h"
+
+namespace ceph { class Formatter; }
+
+namespace rbd {
+namespace mirror {
+namespace instance_watcher {
+
+enum NotifyOp {
+ NOTIFY_OP_IMAGE_ACQUIRE = 0,
+ NOTIFY_OP_IMAGE_RELEASE = 1,
+ NOTIFY_OP_PEER_IMAGE_REMOVED = 2,
+ NOTIFY_OP_SYNC_REQUEST = 3,
+ NOTIFY_OP_SYNC_START = 4
+};
+
+struct PayloadBase {
+ uint64_t request_id;
+
+ PayloadBase() : request_id(0) {
+ }
+
+ PayloadBase(uint64_t request_id) : request_id(request_id) {
+ }
+
+ void encode(bufferlist &bl) const;
+ void decode(__u8 version, bufferlist::const_iterator &iter);
+ void dump(Formatter *f) const;
+};
+
+struct ImagePayloadBase : public PayloadBase {
+ std::string global_image_id;
+
+ ImagePayloadBase() : PayloadBase() {
+ }
+
+ ImagePayloadBase(uint64_t request_id, const std::string &global_image_id)
+ : PayloadBase(request_id), global_image_id(global_image_id) {
+ }
+
+ void encode(bufferlist &bl) const;
+ void decode(__u8 version, bufferlist::const_iterator &iter);
+ void dump(Formatter *f) const;
+};
+
+struct ImageAcquirePayload : public ImagePayloadBase {
+ static const NotifyOp NOTIFY_OP = NOTIFY_OP_IMAGE_ACQUIRE;
+
+ ImageAcquirePayload() {
+ }
+ ImageAcquirePayload(uint64_t request_id, const std::string &global_image_id)
+ : ImagePayloadBase(request_id, global_image_id) {
+ }
+};
+
+struct ImageReleasePayload : public ImagePayloadBase {
+ static const NotifyOp NOTIFY_OP = NOTIFY_OP_IMAGE_RELEASE;
+
+ ImageReleasePayload() {
+ }
+ ImageReleasePayload(uint64_t request_id, const std::string &global_image_id)
+ : ImagePayloadBase(request_id, global_image_id) {
+ }
+};
+
+struct PeerImageRemovedPayload : public PayloadBase {
+ static const NotifyOp NOTIFY_OP = NOTIFY_OP_PEER_IMAGE_REMOVED;
+
+ std::string global_image_id;
+ std::string peer_mirror_uuid;
+
+ PeerImageRemovedPayload() {
+ }
+ PeerImageRemovedPayload(uint64_t request_id,
+ const std::string& global_image_id,
+ const std::string& peer_mirror_uuid)
+ : PayloadBase(request_id),
+ global_image_id(global_image_id), peer_mirror_uuid(peer_mirror_uuid) {
+ }
+
+ void encode(bufferlist &bl) const;
+ void decode(__u8 version, bufferlist::const_iterator &iter);
+ void dump(Formatter *f) const;
+};
+
+struct SyncPayloadBase : public PayloadBase {
+ std::string sync_id;
+
+ SyncPayloadBase() : PayloadBase() {
+ }
+
+ SyncPayloadBase(uint64_t request_id, const std::string &sync_id)
+ : PayloadBase(request_id), sync_id(sync_id) {
+ }
+
+ void encode(bufferlist &bl) const;
+ void decode(__u8 version, bufferlist::const_iterator &iter);
+ void dump(Formatter *f) const;
+};
+
+struct SyncRequestPayload : public SyncPayloadBase {
+ static const NotifyOp NOTIFY_OP = NOTIFY_OP_SYNC_REQUEST;
+
+ SyncRequestPayload() : SyncPayloadBase() {
+ }
+
+ SyncRequestPayload(uint64_t request_id, const std::string &sync_id)
+ : SyncPayloadBase(request_id, sync_id) {
+ }
+};
+
+struct SyncStartPayload : public SyncPayloadBase {
+ static const NotifyOp NOTIFY_OP = NOTIFY_OP_SYNC_START;
+
+ SyncStartPayload() : SyncPayloadBase() {
+ }
+
+ SyncStartPayload(uint64_t request_id, const std::string &sync_id)
+ : SyncPayloadBase(request_id, sync_id) {
+ }
+};
+
+struct UnknownPayload {
+ static const NotifyOp NOTIFY_OP = static_cast<NotifyOp>(-1);
+
+ UnknownPayload() {
+ }
+
+ void encode(bufferlist &bl) const;
+ void decode(__u8 version, bufferlist::const_iterator &iter);
+ void dump(Formatter *f) const;
+};
+
+typedef boost::variant<ImageAcquirePayload,
+ ImageReleasePayload,
+ PeerImageRemovedPayload,
+ SyncRequestPayload,
+ SyncStartPayload,
+ UnknownPayload> Payload;
+
+struct NotifyMessage {
+ NotifyMessage(const Payload &payload = UnknownPayload()) : payload(payload) {
+ }
+
+ Payload payload;
+
+ void encode(bufferlist& bl) const;
+ void decode(bufferlist::const_iterator& it);
+ void dump(Formatter *f) const;
+
+ static void generate_test_instances(std::list<NotifyMessage *> &o);
+};
+
+WRITE_CLASS_ENCODER(NotifyMessage);
+
+std::ostream &operator<<(std::ostream &out, const NotifyOp &op);
+
+struct NotifyAckPayload {
+ std::string instance_id;
+ uint64_t request_id;
+ int ret_val;
+
+ NotifyAckPayload() : request_id(0), ret_val(0) {
+ }
+
+ NotifyAckPayload(const std::string &instance_id, uint64_t request_id,
+ int ret_val)
+ : instance_id(instance_id), request_id(request_id), ret_val(ret_val) {
+ }
+
+ void encode(bufferlist &bl) const;
+ void decode(bufferlist::const_iterator& it);
+ void dump(Formatter *f) const;
+};
+
+WRITE_CLASS_ENCODER(NotifyAckPayload);
+
+} // namespace instance_watcher
+} // namespace mirror
+} // namespace librbd
+
+using rbd::mirror::instance_watcher::encode;
+using rbd::mirror::instance_watcher::decode;
+
+#endif // RBD_MIRROR_INSTANCE_WATCHER_TYPES_H
diff --git a/src/tools/rbd_mirror/instances/Types.h b/src/tools/rbd_mirror/instances/Types.h
new file mode 100644
index 000000000..8b0a68fc3
--- /dev/null
+++ b/src/tools/rbd_mirror/instances/Types.h
@@ -0,0 +1,28 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_INSTANCES_TYPES_H
+#define CEPH_RBD_MIRROR_INSTANCES_TYPES_H
+
+#include <string>
+#include <vector>
+
+namespace rbd {
+namespace mirror {
+namespace instances {
+
+struct Listener {
+ typedef std::vector<std::string> InstanceIds;
+
+ virtual ~Listener() {
+ }
+
+ virtual void handle_added(const InstanceIds& instance_ids) = 0;
+ virtual void handle_removed(const InstanceIds& instance_ids) = 0;
+};
+
+} // namespace instances
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_INSTANCES_TYPES_H
diff --git a/src/tools/rbd_mirror/leader_watcher/Types.cc b/src/tools/rbd_mirror/leader_watcher/Types.cc
new file mode 100644
index 000000000..d2fb7908f
--- /dev/null
+++ b/src/tools/rbd_mirror/leader_watcher/Types.cc
@@ -0,0 +1,161 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "Types.h"
+#include "include/ceph_assert.h"
+#include "include/stringify.h"
+#include "common/Formatter.h"
+
+namespace rbd {
+namespace mirror {
+namespace leader_watcher {
+
+namespace {
+
+class EncodePayloadVisitor : public boost::static_visitor<void> {
+public:
+ explicit EncodePayloadVisitor(bufferlist &bl) : m_bl(bl) {}
+
+ template <typename Payload>
+ inline void operator()(const Payload &payload) const {
+ using ceph::encode;
+ encode(static_cast<uint32_t>(Payload::NOTIFY_OP), m_bl);
+ payload.encode(m_bl);
+ }
+
+private:
+ bufferlist &m_bl;
+};
+
+class DecodePayloadVisitor : public boost::static_visitor<void> {
+public:
+ DecodePayloadVisitor(__u8 version, bufferlist::const_iterator &iter)
+ : m_version(version), m_iter(iter) {}
+
+ template <typename Payload>
+ inline void operator()(Payload &payload) const {
+ payload.decode(m_version, m_iter);
+ }
+
+private:
+ __u8 m_version;
+ bufferlist::const_iterator &m_iter;
+};
+
+class DumpPayloadVisitor : public boost::static_visitor<void> {
+public:
+ explicit DumpPayloadVisitor(Formatter *formatter) : m_formatter(formatter) {}
+
+ template <typename Payload>
+ inline void operator()(const Payload &payload) const {
+ NotifyOp notify_op = Payload::NOTIFY_OP;
+ m_formatter->dump_string("notify_op", stringify(notify_op));
+ payload.dump(m_formatter);
+ }
+
+private:
+ ceph::Formatter *m_formatter;
+};
+
+} // anonymous namespace
+
+void HeartbeatPayload::encode(bufferlist &bl) const {
+}
+
+void HeartbeatPayload::decode(__u8 version, bufferlist::const_iterator &iter) {
+}
+
+void HeartbeatPayload::dump(Formatter *f) const {
+}
+
+void LockAcquiredPayload::encode(bufferlist &bl) const {
+}
+
+void LockAcquiredPayload::decode(__u8 version, bufferlist::const_iterator &iter) {
+}
+
+void LockAcquiredPayload::dump(Formatter *f) const {
+}
+
+void LockReleasedPayload::encode(bufferlist &bl) const {
+}
+
+void LockReleasedPayload::decode(__u8 version, bufferlist::const_iterator &iter) {
+}
+
+void LockReleasedPayload::dump(Formatter *f) const {
+}
+
+void UnknownPayload::encode(bufferlist &bl) const {
+ ceph_abort();
+}
+
+void UnknownPayload::decode(__u8 version, bufferlist::const_iterator &iter) {
+}
+
+void UnknownPayload::dump(Formatter *f) const {
+}
+
+void NotifyMessage::encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ boost::apply_visitor(EncodePayloadVisitor(bl), payload);
+ ENCODE_FINISH(bl);
+}
+
+void NotifyMessage::decode(bufferlist::const_iterator& iter) {
+ DECODE_START(1, iter);
+
+ uint32_t notify_op;
+ decode(notify_op, iter);
+
+ // select the correct payload variant based upon the encoded op
+ switch (notify_op) {
+ case NOTIFY_OP_HEARTBEAT:
+ payload = HeartbeatPayload();
+ break;
+ case NOTIFY_OP_LOCK_ACQUIRED:
+ payload = LockAcquiredPayload();
+ break;
+ case NOTIFY_OP_LOCK_RELEASED:
+ payload = LockReleasedPayload();
+ break;
+ default:
+ payload = UnknownPayload();
+ break;
+ }
+
+ apply_visitor(DecodePayloadVisitor(struct_v, iter), payload);
+ DECODE_FINISH(iter);
+}
+
+void NotifyMessage::dump(Formatter *f) const {
+ apply_visitor(DumpPayloadVisitor(f), payload);
+}
+
+void NotifyMessage::generate_test_instances(std::list<NotifyMessage *> &o) {
+ o.push_back(new NotifyMessage(HeartbeatPayload()));
+ o.push_back(new NotifyMessage(LockAcquiredPayload()));
+ o.push_back(new NotifyMessage(LockReleasedPayload()));
+}
+
+std::ostream &operator<<(std::ostream &out, const NotifyOp &op) {
+ switch (op) {
+ case NOTIFY_OP_HEARTBEAT:
+ out << "Heartbeat";
+ break;
+ case NOTIFY_OP_LOCK_ACQUIRED:
+ out << "LockAcquired";
+ break;
+ case NOTIFY_OP_LOCK_RELEASED:
+ out << "LockReleased";
+ break;
+ default:
+ out << "Unknown (" << static_cast<uint32_t>(op) << ")";
+ break;
+ }
+ return out;
+}
+
+} // namespace leader_watcher
+} // namespace mirror
+} // namespace librbd
diff --git a/src/tools/rbd_mirror/leader_watcher/Types.h b/src/tools/rbd_mirror/leader_watcher/Types.h
new file mode 100644
index 000000000..1278e54b7
--- /dev/null
+++ b/src/tools/rbd_mirror/leader_watcher/Types.h
@@ -0,0 +1,117 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_LEADER_WATCHER_TYPES_H
+#define RBD_MIRROR_LEADER_WATCHER_TYPES_H
+
+#include "include/int_types.h"
+#include "include/buffer_fwd.h"
+#include "include/encoding.h"
+#include <string>
+#include <vector>
+#include <boost/variant.hpp>
+
+struct Context;
+
+namespace ceph { class Formatter; }
+
+namespace rbd {
+namespace mirror {
+namespace leader_watcher {
+
+struct Listener {
+ typedef std::vector<std::string> InstanceIds;
+
+ virtual ~Listener() {
+ }
+
+ virtual void post_acquire_handler(Context *on_finish) = 0;
+ virtual void pre_release_handler(Context *on_finish) = 0;
+
+ virtual void update_leader_handler(
+ const std::string &leader_instance_id) = 0;
+
+ virtual void handle_instances_added(const InstanceIds& instance_ids) = 0;
+ virtual void handle_instances_removed(const InstanceIds& instance_ids) = 0;
+};
+
+enum NotifyOp {
+ NOTIFY_OP_HEARTBEAT = 0,
+ NOTIFY_OP_LOCK_ACQUIRED = 1,
+ NOTIFY_OP_LOCK_RELEASED = 2,
+};
+
+struct HeartbeatPayload {
+ static const NotifyOp NOTIFY_OP = NOTIFY_OP_HEARTBEAT;
+
+ HeartbeatPayload() {
+ }
+
+ void encode(bufferlist &bl) const;
+ void decode(__u8 version, bufferlist::const_iterator &iter);
+ void dump(Formatter *f) const;
+};
+
+struct LockAcquiredPayload {
+ static const NotifyOp NOTIFY_OP = NOTIFY_OP_LOCK_ACQUIRED;
+
+ LockAcquiredPayload() {
+ }
+
+ void encode(bufferlist &bl) const;
+ void decode(__u8 version, bufferlist::const_iterator &iter);
+ void dump(Formatter *f) const;
+};
+
+struct LockReleasedPayload {
+ static const NotifyOp NOTIFY_OP = NOTIFY_OP_LOCK_RELEASED;
+
+ LockReleasedPayload() {
+ }
+
+ void encode(bufferlist &bl) const;
+ void decode(__u8 version, bufferlist::const_iterator &iter);
+ void dump(Formatter *f) const;
+};
+
+struct UnknownPayload {
+ static const NotifyOp NOTIFY_OP = static_cast<NotifyOp>(-1);
+
+ UnknownPayload() {
+ }
+
+ void encode(bufferlist &bl) const;
+ void decode(__u8 version, bufferlist::const_iterator &iter);
+ void dump(Formatter *f) const;
+};
+
+typedef boost::variant<HeartbeatPayload,
+ LockAcquiredPayload,
+ LockReleasedPayload,
+ UnknownPayload> Payload;
+
+struct NotifyMessage {
+ NotifyMessage(const Payload &payload = UnknownPayload()) : payload(payload) {
+ }
+
+ Payload payload;
+
+ void encode(bufferlist& bl) const;
+ void decode(bufferlist::const_iterator& it);
+ void dump(Formatter *f) const;
+
+ static void generate_test_instances(std::list<NotifyMessage *> &o);
+};
+
+WRITE_CLASS_ENCODER(NotifyMessage);
+
+std::ostream &operator<<(std::ostream &out, const NotifyOp &op);
+
+} // namespace leader_watcher
+} // namespace mirror
+} // namespace librbd
+
+using rbd::mirror::leader_watcher::encode;
+using rbd::mirror::leader_watcher::decode;
+
+#endif // RBD_MIRROR_LEADER_WATCHER_TYPES_H
diff --git a/src/tools/rbd_mirror/main.cc b/src/tools/rbd_mirror/main.cc
new file mode 100644
index 000000000..74c97272e
--- /dev/null
+++ b/src/tools/rbd_mirror/main.cc
@@ -0,0 +1,123 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "common/ceph_argparse.h"
+#include "common/config.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "common/perf_counters.h"
+#include "global/global_init.h"
+#include "global/signal_handler.h"
+#include "Mirror.h"
+#include "Types.h"
+
+#include <vector>
+
+rbd::mirror::Mirror *mirror = nullptr;
+PerfCounters *g_journal_perf_counters = nullptr;
+PerfCounters *g_snapshot_perf_counters = nullptr;
+
+void usage() {
+ std::cout << "usage: rbd-mirror [options...]" << std::endl;
+ std::cout << "options:\n";
+ std::cout << " -m monaddress[:port] connect to specified monitor\n";
+ std::cout << " --keyring=<path> path to keyring for local cluster\n";
+ std::cout << " --log-file=<logfile> file to log debug output\n";
+ std::cout << " --debug-rbd-mirror=<log-level>/<memory-level> set rbd-mirror debug level\n";
+ generic_server_usage();
+}
+
+static void handle_signal(int signum)
+{
+ if (mirror)
+ mirror->handle_signal(signum);
+}
+
+int main(int argc, const char **argv)
+{
+ std::vector<const char*> args;
+ argv_to_vec(argc, argv, args);
+ if (args.empty()) {
+ cerr << argv[0] << ": -h or --help for usage" << std::endl;
+ exit(1);
+ }
+ if (ceph_argparse_need_usage(args)) {
+ usage();
+ exit(0);
+ }
+
+ auto cct = global_init(nullptr, args, CEPH_ENTITY_TYPE_CLIENT,
+ CODE_ENVIRONMENT_DAEMON,
+ CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS);
+
+ if (g_conf()->daemonize) {
+ global_init_daemonize(g_ceph_context);
+ }
+
+ common_init_finish(g_ceph_context);
+
+ init_async_signal_handler();
+ register_async_signal_handler(SIGHUP, handle_signal);
+ register_async_signal_handler_oneshot(SIGINT, handle_signal);
+ register_async_signal_handler_oneshot(SIGTERM, handle_signal);
+
+ std::vector<const char*> cmd_args;
+ argv_to_vec(argc, argv, cmd_args);
+
+ // disable unnecessary librbd cache
+ g_ceph_context->_conf.set_val_or_die("rbd_cache", "false");
+
+ auto prio =
+ g_ceph_context->_conf.get_val<int64_t>("rbd_mirror_perf_stats_prio");
+ {
+ PerfCountersBuilder plb(g_ceph_context, "rbd_mirror",
+ rbd::mirror::l_rbd_mirror_journal_first,
+ rbd::mirror::l_rbd_mirror_journal_last);
+ plb.add_u64_counter(rbd::mirror::l_rbd_mirror_replay, "replay", "Replays",
+ "r", prio);
+ plb.add_u64_counter(rbd::mirror::l_rbd_mirror_replay_bytes, "replay_bytes",
+ "Replayed data", "rb", prio, unit_t(UNIT_BYTES));
+ plb.add_time_avg(rbd::mirror::l_rbd_mirror_replay_latency, "replay_latency",
+ "Replay latency", "rl", prio);
+ g_journal_perf_counters = plb.create_perf_counters();
+ }
+ {
+ PerfCountersBuilder plb(g_ceph_context, "rbd_mirror_snapshot",
+ rbd::mirror::l_rbd_mirror_snapshot_first,
+ rbd::mirror::l_rbd_mirror_snapshot_last);
+ plb.add_u64_counter(rbd::mirror::l_rbd_mirror_snapshot_replay_snapshots,
+ "snapshots", "Snapshots", "r", prio);
+ plb.add_time_avg(rbd::mirror::l_rbd_mirror_snapshot_replay_snapshots_time,
+ "snapshots_time", "Snapshots time", "rl", prio);
+ plb.add_u64_counter(rbd::mirror::l_rbd_mirror_snapshot_replay_bytes,
+ "replay_bytes", "Replayed data", "rb", prio,
+ unit_t(UNIT_BYTES));
+ g_snapshot_perf_counters = plb.create_perf_counters();
+ }
+ g_ceph_context->get_perfcounters_collection()->add(g_journal_perf_counters);
+ g_ceph_context->get_perfcounters_collection()->add(g_snapshot_perf_counters);
+
+ mirror = new rbd::mirror::Mirror(g_ceph_context, cmd_args);
+ int r = mirror->init();
+ if (r < 0) {
+ std::cerr << "failed to initialize: " << cpp_strerror(r) << std::endl;
+ goto cleanup;
+ }
+
+ mirror->run();
+
+ cleanup:
+ unregister_async_signal_handler(SIGHUP, handle_signal);
+ unregister_async_signal_handler(SIGINT, handle_signal);
+ unregister_async_signal_handler(SIGTERM, handle_signal);
+ shutdown_async_signal_handler();
+
+ g_ceph_context->get_perfcounters_collection()->remove(g_journal_perf_counters);
+ g_ceph_context->get_perfcounters_collection()->remove(g_snapshot_perf_counters);
+
+ delete mirror;
+ delete g_journal_perf_counters;
+ delete g_snapshot_perf_counters;
+
+ return r < 0 ? EXIT_SUCCESS : EXIT_FAILURE;
+}
diff --git a/src/tools/rbd_mirror/pool_watcher/RefreshImagesRequest.cc b/src/tools/rbd_mirror/pool_watcher/RefreshImagesRequest.cc
new file mode 100644
index 000000000..a1d9c1b54
--- /dev/null
+++ b/src/tools/rbd_mirror/pool_watcher/RefreshImagesRequest.cc
@@ -0,0 +1,89 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd_mirror/pool_watcher/RefreshImagesRequest.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "librbd/Utils.h"
+#include <map>
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::pool_watcher::RefreshImagesRequest " \
+ << this << " " << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+namespace pool_watcher {
+
+static const uint32_t MAX_RETURN = 1024;
+
+using librbd::util::create_rados_callback;
+
+template <typename I>
+void RefreshImagesRequest<I>::send() {
+ m_image_ids->clear();
+ mirror_image_list();
+}
+
+template <typename I>
+void RefreshImagesRequest<I>::mirror_image_list() {
+ dout(10) << dendl;
+
+ librados::ObjectReadOperation op;
+ librbd::cls_client::mirror_image_list_start(&op, m_start_after, MAX_RETURN);
+
+ m_out_bl.clear();
+ librados::AioCompletion *aio_comp = create_rados_callback<
+ RefreshImagesRequest<I>,
+ &RefreshImagesRequest<I>::handle_mirror_image_list>(this);
+ int r = m_remote_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op, &m_out_bl);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void RefreshImagesRequest<I>::handle_mirror_image_list(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ std::map<std::string, std::string> ids;
+ if (r == 0) {
+ auto it = m_out_bl.cbegin();
+ r = librbd::cls_client::mirror_image_list_finish(&it, &ids);
+ }
+
+ if (r < 0 && r != -ENOENT) {
+ derr << "failed to list mirrored images: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ // store as global -> local image ids
+ for (auto &id : ids) {
+ m_image_ids->emplace(id.second, id.first);
+ }
+
+ if (ids.size() == MAX_RETURN) {
+ m_start_after = ids.rbegin()->first;
+ mirror_image_list();
+ return;
+ }
+
+ finish(0);
+}
+
+template <typename I>
+void RefreshImagesRequest<I>::finish(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ m_on_finish->complete(r);
+ delete this;
+}
+
+} // namespace pool_watcher
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::pool_watcher::RefreshImagesRequest<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/pool_watcher/RefreshImagesRequest.h b/src/tools/rbd_mirror/pool_watcher/RefreshImagesRequest.h
new file mode 100644
index 000000000..8bfeabe29
--- /dev/null
+++ b/src/tools/rbd_mirror/pool_watcher/RefreshImagesRequest.h
@@ -0,0 +1,73 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_POOL_WATCHER_REFRESH_IMAGES_REQUEST_H
+#define CEPH_RBD_MIRROR_POOL_WATCHER_REFRESH_IMAGES_REQUEST_H
+
+#include "include/buffer.h"
+#include "include/rados/librados.hpp"
+#include "tools/rbd_mirror/Types.h"
+#include <string>
+
+struct Context;
+
+namespace librbd { struct ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+namespace pool_watcher {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class RefreshImagesRequest {
+public:
+ static RefreshImagesRequest *create(librados::IoCtx &remote_io_ctx,
+ ImageIds *image_ids, Context *on_finish) {
+ return new RefreshImagesRequest(remote_io_ctx, image_ids, on_finish);
+ }
+
+ RefreshImagesRequest(librados::IoCtx &remote_io_ctx, ImageIds *image_ids,
+ Context *on_finish)
+ : m_remote_io_ctx(remote_io_ctx), m_image_ids(image_ids),
+ m_on_finish(on_finish) {
+ }
+
+ void send();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * | /-------------\
+ * | | |
+ * v v | (more images)
+ * MIRROR_IMAGE_LIST ---/
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+
+ librados::IoCtx &m_remote_io_ctx;
+ ImageIds *m_image_ids;
+ Context *m_on_finish;
+
+ bufferlist m_out_bl;
+ std::string m_start_after;
+
+ void mirror_image_list();
+ void handle_mirror_image_list(int r);
+
+ void finish(int r);
+
+};
+
+} // namespace pool_watcher
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::pool_watcher::RefreshImagesRequest<librbd::ImageCtx>;
+
+#endif // CEPH_RBD_MIRROR_POOL_WATCHER_REFRESH_IMAGES_REQUEST_H
diff --git a/src/tools/rbd_mirror/pool_watcher/Types.h b/src/tools/rbd_mirror/pool_watcher/Types.h
new file mode 100644
index 000000000..52dfc342d
--- /dev/null
+++ b/src/tools/rbd_mirror/pool_watcher/Types.h
@@ -0,0 +1,27 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_POOL_WATCHER_TYPES_H
+#define CEPH_RBD_MIRROR_POOL_WATCHER_TYPES_H
+
+#include "tools/rbd_mirror/Types.h"
+#include <string>
+
+namespace rbd {
+namespace mirror {
+namespace pool_watcher {
+
+struct Listener {
+ virtual ~Listener() {
+ }
+
+ virtual void handle_update(const std::string &mirror_uuid,
+ ImageIds &&added_image_ids,
+ ImageIds &&removed_image_ids) = 0;
+};
+
+} // namespace pool_watcher
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_POOL_WATCHER_TYPES_H
diff --git a/src/tools/rbd_mirror/service_daemon/Types.cc b/src/tools/rbd_mirror/service_daemon/Types.cc
new file mode 100644
index 000000000..7dc6537c5
--- /dev/null
+++ b/src/tools/rbd_mirror/service_daemon/Types.cc
@@ -0,0 +1,29 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd_mirror/service_daemon/Types.h"
+#include <iostream>
+
+namespace rbd {
+namespace mirror {
+namespace service_daemon {
+
+std::ostream& operator<<(std::ostream& os, const CalloutLevel& callout_level) {
+ switch (callout_level) {
+ case CALLOUT_LEVEL_INFO:
+ os << "info";
+ break;
+ case CALLOUT_LEVEL_WARNING:
+ os << "warning";
+ break;
+ case CALLOUT_LEVEL_ERROR:
+ os << "error";
+ break;
+ }
+ return os;
+}
+
+} // namespace service_daemon
+} // namespace mirror
+} // namespace rbd
+
diff --git a/src/tools/rbd_mirror/service_daemon/Types.h b/src/tools/rbd_mirror/service_daemon/Types.h
new file mode 100644
index 000000000..3aab72016
--- /dev/null
+++ b/src/tools/rbd_mirror/service_daemon/Types.h
@@ -0,0 +1,33 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_SERVICE_DAEMON_TYPES_H
+#define CEPH_RBD_MIRROR_SERVICE_DAEMON_TYPES_H
+
+#include "include/int_types.h"
+#include <iosfwd>
+#include <string>
+#include <boost/variant.hpp>
+
+namespace rbd {
+namespace mirror {
+namespace service_daemon {
+
+typedef uint64_t CalloutId;
+const uint64_t CALLOUT_ID_NONE {0};
+
+enum CalloutLevel {
+ CALLOUT_LEVEL_INFO,
+ CALLOUT_LEVEL_WARNING,
+ CALLOUT_LEVEL_ERROR
+};
+
+std::ostream& operator<<(std::ostream& os, const CalloutLevel& callout_level);
+
+typedef boost::variant<bool, uint64_t, std::string> AttributeValue;
+
+} // namespace service_daemon
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_SERVICE_DAEMON_TYPES_H