summaryrefslogtreecommitdiffstats
path: root/src/tools/rbd
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/tools/rbd/ArgumentTypes.cc576
-rw-r--r--src/tools/rbd/ArgumentTypes.h244
-rw-r--r--src/tools/rbd/CMakeLists.txt80
-rw-r--r--src/tools/rbd/IndentStream.cc59
-rw-r--r--src/tools/rbd/IndentStream.h60
-rw-r--r--src/tools/rbd/MirrorDaemonServiceInfo.cc307
-rw-r--r--src/tools/rbd/MirrorDaemonServiceInfo.h78
-rw-r--r--src/tools/rbd/OptionPrinter.cc161
-rw-r--r--src/tools/rbd/OptionPrinter.h43
-rw-r--r--src/tools/rbd/Schedule.cc367
-rw-r--r--src/tools/rbd/Schedule.h67
-rw-r--r--src/tools/rbd/Shell.cc487
-rw-r--r--src/tools/rbd/Shell.h76
-rw-r--r--src/tools/rbd/Utils.cc1203
-rw-r--r--src/tools/rbd/Utils.h283
-rw-r--r--src/tools/rbd/action/Bench.cc589
-rw-r--r--src/tools/rbd/action/Children.cc167
-rw-r--r--src/tools/rbd/action/Clone.cc99
-rw-r--r--src/tools/rbd/action/Config.cc891
-rw-r--r--src/tools/rbd/action/Copy.cc195
-rw-r--r--src/tools/rbd/action/Create.cc257
-rw-r--r--src/tools/rbd/action/Device.cc285
-rw-r--r--src/tools/rbd/action/Diff.cc142
-rw-r--r--src/tools/rbd/action/DiskUsage.cc377
-rw-r--r--src/tools/rbd/action/Encryption.cc117
-rw-r--r--src/tools/rbd/action/Export.cc653
-rw-r--r--src/tools/rbd/action/Feature.cc116
-rw-r--r--src/tools/rbd/action/Flatten.cc92
-rw-r--r--src/tools/rbd/action/Ggate.cc180
-rw-r--r--src/tools/rbd/action/Group.cc912
-rw-r--r--src/tools/rbd/action/ImageMeta.cc345
-rw-r--r--src/tools/rbd/action/Import.cc1036
-rw-r--r--src/tools/rbd/action/Info.cc471
-rw-r--r--src/tools/rbd/action/Journal.cc1251
-rw-r--r--src/tools/rbd/action/Kernel.cc679
-rw-r--r--src/tools/rbd/action/List.cc346
-rw-r--r--src/tools/rbd/action/Lock.cc279
-rw-r--r--src/tools/rbd/action/MergeDiff.cc456
-rw-r--r--src/tools/rbd/action/Migration.cc429
-rw-r--r--src/tools/rbd/action/MirrorImage.cc605
-rw-r--r--src/tools/rbd/action/MirrorPool.cc1772
-rw-r--r--src/tools/rbd/action/MirrorSnapshotSchedule.cc322
-rw-r--r--src/tools/rbd/action/Namespace.cc191
-rw-r--r--src/tools/rbd/action/Nbd.cc389
-rw-r--r--src/tools/rbd/action/ObjectMap.cc131
-rw-r--r--src/tools/rbd/action/Perf.cc717
-rw-r--r--src/tools/rbd/action/PersistentCache.cc122
-rw-r--r--src/tools/rbd/action/Pool.cc162
-rw-r--r--src/tools/rbd/action/Remove.cc161
-rw-r--r--src/tools/rbd/action/Rename.cc94
-rw-r--r--src/tools/rbd/action/Resize.cc123
-rw-r--r--src/tools/rbd/action/Snap.cc972
-rw-r--r--src/tools/rbd/action/Sparsify.cc82
-rw-r--r--src/tools/rbd/action/Status.cc365
-rw-r--r--src/tools/rbd/action/Trash.cc543
-rw-r--r--src/tools/rbd/action/TrashPurgeSchedule.cc355
-rw-r--r--src/tools/rbd/action/Watch.cc149
-rw-r--r--src/tools/rbd/action/Wnbd.cc172
-rw-r--r--src/tools/rbd/rbd.cc10
-rw-r--r--src/tools/rbd_ggate/CMakeLists.txt9
-rw-r--r--src/tools/rbd_ggate/Driver.cc165
-rw-r--r--src/tools/rbd_ggate/Driver.h50
-rw-r--r--src/tools/rbd_ggate/Request.h55
-rw-r--r--src/tools/rbd_ggate/Server.cc262
-rw-r--r--src/tools/rbd_ggate/Server.h88
-rw-r--r--src/tools/rbd_ggate/Watcher.cc48
-rw-r--r--src/tools/rbd_ggate/Watcher.h34
-rw-r--r--src/tools/rbd_ggate/debug.cc55
-rw-r--r--src/tools/rbd_ggate/debug.h17
-rw-r--r--src/tools/rbd_ggate/ggate_drv.c379
-rw-r--r--src/tools/rbd_ggate/ggate_drv.h64
-rw-r--r--src/tools/rbd_ggate/main.cc516
-rw-r--r--src/tools/rbd_mirror/BaseRequest.h33
-rw-r--r--src/tools/rbd_mirror/CMakeLists.txt91
-rw-r--r--src/tools/rbd_mirror/CancelableRequest.h44
-rw-r--r--src/tools/rbd_mirror/ClusterWatcher.cc252
-rw-r--r--src/tools/rbd_mirror/ClusterWatcher.h73
-rw-r--r--src/tools/rbd_mirror/ImageDeleter.cc549
-rw-r--r--src/tools/rbd_mirror/ImageDeleter.h189
-rw-r--r--src/tools/rbd_mirror/ImageMap.cc604
-rw-r--r--src/tools/rbd_mirror/ImageMap.h175
-rw-r--r--src/tools/rbd_mirror/ImageReplayer.cc1201
-rw-r--r--src/tools/rbd_mirror/ImageReplayer.h273
-rw-r--r--src/tools/rbd_mirror/ImageSync.cc469
-rw-r--r--src/tools/rbd_mirror/ImageSync.h151
-rw-r--r--src/tools/rbd_mirror/InstanceReplayer.cc543
-rw-r--r--src/tools/rbd_mirror/InstanceReplayer.h138
-rw-r--r--src/tools/rbd_mirror/InstanceWatcher.cc1290
-rw-r--r--src/tools/rbd_mirror/InstanceWatcher.h269
-rw-r--r--src/tools/rbd_mirror/Instances.cc356
-rw-r--r--src/tools/rbd_mirror/Instances.h168
-rw-r--r--src/tools/rbd_mirror/LeaderWatcher.cc1069
-rw-r--r--src/tools/rbd_mirror/LeaderWatcher.h313
-rw-r--r--src/tools/rbd_mirror/Mirror.cc763
-rw-r--r--src/tools/rbd_mirror/Mirror.h89
-rw-r--r--src/tools/rbd_mirror/MirrorStatusUpdater.cc397
-rw-r--r--src/tools/rbd_mirror/MirrorStatusUpdater.h119
-rw-r--r--src/tools/rbd_mirror/MirrorStatusWatcher.cc74
-rw-r--r--src/tools/rbd_mirror/MirrorStatusWatcher.h43
-rw-r--r--src/tools/rbd_mirror/NamespaceReplayer.cc862
-rw-r--r--src/tools/rbd_mirror/NamespaceReplayer.h308
-rw-r--r--src/tools/rbd_mirror/PoolMetaCache.cc83
-rw-r--r--src/tools/rbd_mirror/PoolMetaCache.h47
-rw-r--r--src/tools/rbd_mirror/PoolReplayer.cc1110
-rw-r--r--src/tools/rbd_mirror/PoolReplayer.h288
-rw-r--r--src/tools/rbd_mirror/PoolWatcher.cc473
-rw-r--r--src/tools/rbd_mirror/PoolWatcher.h161
-rw-r--r--src/tools/rbd_mirror/ProgressContext.h21
-rw-r--r--src/tools/rbd_mirror/RemotePoolPoller.cc267
-rw-r--r--src/tools/rbd_mirror/RemotePoolPoller.h133
-rw-r--r--src/tools/rbd_mirror/ServiceDaemon.cc327
-rw-r--r--src/tools/rbd_mirror/ServiceDaemon.h94
-rw-r--r--src/tools/rbd_mirror/Threads.cc38
-rw-r--r--src/tools/rbd_mirror/Threads.h45
-rw-r--r--src/tools/rbd_mirror/Throttler.cc240
-rw-r--r--src/tools/rbd_mirror/Throttler.h74
-rw-r--r--src/tools/rbd_mirror/Types.cc32
-rw-r--r--src/tools/rbd_mirror/Types.h171
-rw-r--r--src/tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.cc299
-rw-r--r--src/tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.h105
-rw-r--r--src/tools/rbd_mirror/image_deleter/TrashMoveRequest.cc419
-rw-r--r--src/tools/rbd_mirror/image_deleter/TrashMoveRequest.h142
-rw-r--r--src/tools/rbd_mirror/image_deleter/TrashRemoveRequest.cc265
-rw-r--r--src/tools/rbd_mirror/image_deleter/TrashRemoveRequest.h117
-rw-r--r--src/tools/rbd_mirror/image_deleter/TrashWatcher.cc384
-rw-r--r--src/tools/rbd_mirror/image_deleter/TrashWatcher.h139
-rw-r--r--src/tools/rbd_mirror/image_deleter/Types.h54
-rw-r--r--src/tools/rbd_mirror/image_map/LoadRequest.cc174
-rw-r--r--src/tools/rbd_mirror/image_map/LoadRequest.h77
-rw-r--r--src/tools/rbd_mirror/image_map/Policy.cc407
-rw-r--r--src/tools/rbd_mirror/image_map/Policy.h122
-rw-r--r--src/tools/rbd_mirror/image_map/SimplePolicy.cc89
-rw-r--r--src/tools/rbd_mirror/image_map/SimplePolicy.h39
-rw-r--r--src/tools/rbd_mirror/image_map/StateTransition.cc94
-rw-r--r--src/tools/rbd_mirror/image_map/StateTransition.h76
-rw-r--r--src/tools/rbd_mirror/image_map/Types.cc138
-rw-r--r--src/tools/rbd_mirror/image_map/Types.h130
-rw-r--r--src/tools/rbd_mirror/image_map/UpdateRequest.cc100
-rw-r--r--src/tools/rbd_mirror/image_map/UpdateRequest.h65
-rw-r--r--src/tools/rbd_mirror/image_replayer/BootstrapRequest.cc485
-rw-r--r--src/tools/rbd_mirror/image_replayer/BootstrapRequest.h181
-rw-r--r--src/tools/rbd_mirror/image_replayer/CloseImageRequest.cc62
-rw-r--r--src/tools/rbd_mirror/image_replayer/CloseImageRequest.h56
-rw-r--r--src/tools/rbd_mirror/image_replayer/CreateImageRequest.cc451
-rw-r--r--src/tools/rbd_mirror/image_replayer/CreateImageRequest.h144
-rw-r--r--src/tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.cc85
-rw-r--r--src/tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.h75
-rw-r--r--src/tools/rbd_mirror/image_replayer/OpenImageRequest.cc79
-rw-r--r--src/tools/rbd_mirror/image_replayer/OpenImageRequest.h71
-rw-r--r--src/tools/rbd_mirror/image_replayer/OpenLocalImageRequest.cc292
-rw-r--r--src/tools/rbd_mirror/image_replayer/OpenLocalImageRequest.h97
-rw-r--r--src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.cc197
-rw-r--r--src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.h115
-rw-r--r--src/tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.cc283
-rw-r--r--src/tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.h153
-rw-r--r--src/tools/rbd_mirror/image_replayer/Replayer.h39
-rw-r--r--src/tools/rbd_mirror/image_replayer/ReplayerListener.h21
-rw-r--r--src/tools/rbd_mirror/image_replayer/StateBuilder.cc138
-rw-r--r--src/tools/rbd_mirror/image_replayer/StateBuilder.h114
-rw-r--r--src/tools/rbd_mirror/image_replayer/TimeRollingMean.cc34
-rw-r--r--src/tools/rbd_mirror/image_replayer/TimeRollingMean.h40
-rw-r--r--src/tools/rbd_mirror/image_replayer/Types.h21
-rw-r--r--src/tools/rbd_mirror/image_replayer/Utils.cc61
-rw-r--r--src/tools/rbd_mirror/image_replayer/Utils.h29
-rw-r--r--src/tools/rbd_mirror/image_replayer/journal/CreateLocalImageRequest.cc162
-rw-r--r--src/tools/rbd_mirror/image_replayer/journal/CreateLocalImageRequest.h116
-rw-r--r--src/tools/rbd_mirror/image_replayer/journal/EventPreprocessor.cc206
-rw-r--r--src/tools/rbd_mirror/image_replayer/journal/EventPreprocessor.h127
-rw-r--r--src/tools/rbd_mirror/image_replayer/journal/PrepareReplayRequest.cc316
-rw-r--r--src/tools/rbd_mirror/image_replayer/journal/PrepareReplayRequest.h115
-rw-r--r--src/tools/rbd_mirror/image_replayer/journal/ReplayStatusFormatter.cc284
-rw-r--r--src/tools/rbd_mirror/image_replayer/journal/ReplayStatusFormatter.h70
-rw-r--r--src/tools/rbd_mirror/image_replayer/journal/Replayer.cc1317
-rw-r--r--src/tools/rbd_mirror/image_replayer/journal/Replayer.h323
-rw-r--r--src/tools/rbd_mirror/image_replayer/journal/StateBuilder.cc149
-rw-r--r--src/tools/rbd_mirror/image_replayer/journal/StateBuilder.h94
-rw-r--r--src/tools/rbd_mirror/image_replayer/journal/SyncPointHandler.cc109
-rw-r--r--src/tools/rbd_mirror/image_replayer/journal/SyncPointHandler.h55
-rw-r--r--src/tools/rbd_mirror/image_replayer/snapshot/ApplyImageStateRequest.cc658
-rw-r--r--src/tools/rbd_mirror/image_replayer/snapshot/ApplyImageStateRequest.h155
-rw-r--r--src/tools/rbd_mirror/image_replayer/snapshot/CreateLocalImageRequest.cc204
-rw-r--r--src/tools/rbd_mirror/image_replayer/snapshot/CreateLocalImageRequest.h121
-rw-r--r--src/tools/rbd_mirror/image_replayer/snapshot/PrepareReplayRequest.cc70
-rw-r--r--src/tools/rbd_mirror/image_replayer/snapshot/PrepareReplayRequest.h92
-rw-r--r--src/tools/rbd_mirror/image_replayer/snapshot/Replayer.cc1633
-rw-r--r--src/tools/rbd_mirror/image_replayer/snapshot/Replayer.h349
-rw-r--r--src/tools/rbd_mirror/image_replayer/snapshot/StateBuilder.cc120
-rw-r--r--src/tools/rbd_mirror/image_replayer/snapshot/StateBuilder.h93
-rw-r--r--src/tools/rbd_mirror/image_replayer/snapshot/Utils.cc65
-rw-r--r--src/tools/rbd_mirror/image_replayer/snapshot/Utils.h30
-rw-r--r--src/tools/rbd_mirror/image_sync/SyncPointCreateRequest.cc172
-rw-r--r--src/tools/rbd_mirror/image_sync/SyncPointCreateRequest.h93
-rw-r--r--src/tools/rbd_mirror/image_sync/SyncPointPruneRequest.cc213
-rw-r--r--src/tools/rbd_mirror/image_sync/SyncPointPruneRequest.h91
-rw-r--r--src/tools/rbd_mirror/image_sync/Types.h74
-rw-r--r--src/tools/rbd_mirror/image_sync/Utils.cc24
-rw-r--r--src/tools/rbd_mirror/image_sync/Utils.h16
-rw-r--r--src/tools/rbd_mirror/instance_watcher/Types.cc245
-rw-r--r--src/tools/rbd_mirror/instance_watcher/Types.h197
-rw-r--r--src/tools/rbd_mirror/instances/Types.h28
-rw-r--r--src/tools/rbd_mirror/leader_watcher/Types.cc161
-rw-r--r--src/tools/rbd_mirror/leader_watcher/Types.h117
-rw-r--r--src/tools/rbd_mirror/main.cc124
-rw-r--r--src/tools/rbd_mirror/pool_watcher/RefreshImagesRequest.cc89
-rw-r--r--src/tools/rbd_mirror/pool_watcher/RefreshImagesRequest.h73
-rw-r--r--src/tools/rbd_mirror/pool_watcher/Types.h27
-rw-r--r--src/tools/rbd_mirror/service_daemon/Types.cc29
-rw-r--r--src/tools/rbd_mirror/service_daemon/Types.h33
-rw-r--r--src/tools/rbd_nbd/CMakeLists.txt4
-rw-r--r--src/tools/rbd_nbd/nbd-netlink.h100
-rw-r--r--src/tools/rbd_nbd/rbd-nbd.cc2441
-rwxr-xr-xsrc/tools/rbd_nbd/rbd-nbd_quiesce31
-rw-r--r--src/tools/rbd_recover_tool/FAQ16
-rw-r--r--src/tools/rbd_recover_tool/README97
-rw-r--r--src/tools/rbd_recover_tool/TODO2
-rw-r--r--src/tools/rbd_recover_tool/common_h412
-rw-r--r--src/tools/rbd_recover_tool/config/mds_host0
-rw-r--r--src/tools/rbd_recover_tool/config/mon_host0
-rw-r--r--src/tools/rbd_recover_tool/config/osd_host_path0
-rw-r--r--src/tools/rbd_recover_tool/database_h1134
-rw-r--r--src/tools/rbd_recover_tool/epoch_h119
-rw-r--r--src/tools/rbd_recover_tool/metadata_h368
-rwxr-xr-xsrc/tools/rbd_recover_tool/osd_job170
-rwxr-xr-xsrc/tools/rbd_recover_tool/rbd-recover-tool327
-rwxr-xr-xsrc/tools/rbd_recover_tool/test_rbd_recover_tool.sh542
-rw-r--r--src/tools/rbd_wnbd/CMakeLists.txt11
-rw-r--r--src/tools/rbd_wnbd/rbd_wnbd.cc1871
-rw-r--r--src/tools/rbd_wnbd/rbd_wnbd.h193
-rw-r--r--src/tools/rbd_wnbd/wnbd_handler.cc456
-rw-r--r--src/tools/rbd_wnbd/wnbd_handler.h188
-rw-r--r--src/tools/rbd_wnbd/wnbd_wmi.cc261
-rw-r--r--src/tools/rbd_wnbd/wnbd_wmi.h109
232 files changed, 62592 insertions, 0 deletions
diff --git a/src/tools/rbd/ArgumentTypes.cc b/src/tools/rbd/ArgumentTypes.cc
new file mode 100644
index 000000000..17a06c805
--- /dev/null
+++ b/src/tools/rbd/ArgumentTypes.cc
@@ -0,0 +1,576 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd/ArgumentTypes.h"
+#include "tools/rbd/Shell.h"
+#include "tools/rbd/Utils.h"
+#include "include/rbd/features.h"
+#include "common/config_proxy.h"
+#include "common/strtol.h"
+#include "common/Formatter.h"
+#include "global/global_context.h"
+#include <iostream>
+#include <boost/tokenizer.hpp>
+
+namespace rbd {
+namespace argument_types {
+
+namespace po = boost::program_options;
+
+const std::map<uint64_t, std::string> ImageFeatures::FEATURE_MAPPING = {
+ {RBD_FEATURE_LAYERING, RBD_FEATURE_NAME_LAYERING},
+ {RBD_FEATURE_STRIPINGV2, RBD_FEATURE_NAME_STRIPINGV2},
+ {RBD_FEATURE_EXCLUSIVE_LOCK, RBD_FEATURE_NAME_EXCLUSIVE_LOCK},
+ {RBD_FEATURE_OBJECT_MAP, RBD_FEATURE_NAME_OBJECT_MAP},
+ {RBD_FEATURE_FAST_DIFF, RBD_FEATURE_NAME_FAST_DIFF},
+ {RBD_FEATURE_DEEP_FLATTEN, RBD_FEATURE_NAME_DEEP_FLATTEN},
+ {RBD_FEATURE_JOURNALING, RBD_FEATURE_NAME_JOURNALING},
+ {RBD_FEATURE_DATA_POOL, RBD_FEATURE_NAME_DATA_POOL},
+ {RBD_FEATURE_OPERATIONS, RBD_FEATURE_NAME_OPERATIONS},
+ {RBD_FEATURE_MIGRATING, RBD_FEATURE_NAME_MIGRATING},
+ {RBD_FEATURE_NON_PRIMARY, RBD_FEATURE_NAME_NON_PRIMARY},
+ {RBD_FEATURE_DIRTY_CACHE, RBD_FEATURE_NAME_DIRTY_CACHE},
+};
+
+Format::Formatter Format::create_formatter(bool pretty) const {
+ if (value == "json") {
+ return Formatter(new JSONFormatter(pretty));
+ } else if (value == "xml") {
+ return Formatter(new XMLFormatter(pretty));
+ }
+ return Formatter();
+}
+
+std::string get_name_prefix(ArgumentModifier modifier) {
+ switch (modifier) {
+ case ARGUMENT_MODIFIER_SOURCE:
+ return SOURCE_PREFIX;
+ case ARGUMENT_MODIFIER_DEST:
+ return DEST_PREFIX;
+ default:
+ return "";
+ }
+}
+
+std::string get_description_prefix(ArgumentModifier modifier) {
+ switch (modifier) {
+ case ARGUMENT_MODIFIER_SOURCE:
+ return "source ";
+ case ARGUMENT_MODIFIER_DEST:
+ return "destination ";
+ default:
+ return "";
+ }
+}
+
+void add_pool_option(po::options_description *opt,
+ ArgumentModifier modifier,
+ const std::string &desc_suffix) {
+ std::string name = POOL_NAME + ",p";
+ std::string description = "pool name";
+ switch (modifier) {
+ case ARGUMENT_MODIFIER_NONE:
+ break;
+ case ARGUMENT_MODIFIER_SOURCE:
+ description = "source " + description;
+ break;
+ case ARGUMENT_MODIFIER_DEST:
+ name = DEST_POOL_NAME;
+ description = "destination " + description;
+ break;
+ }
+ description += desc_suffix;
+
+ // TODO add validator
+ opt->add_options()
+ (name.c_str(), po::value<std::string>(), description.c_str());
+}
+
+void add_namespace_option(boost::program_options::options_description *opt,
+ ArgumentModifier modifier) {
+ std::string name = NAMESPACE_NAME;
+ std::string description = "namespace name";
+ switch (modifier) {
+ case ARGUMENT_MODIFIER_NONE:
+ break;
+ case ARGUMENT_MODIFIER_SOURCE:
+ description = "source " + description;
+ break;
+ case ARGUMENT_MODIFIER_DEST:
+ name = DEST_NAMESPACE_NAME;
+ description = "destination " + description;
+ break;
+ }
+
+ // TODO add validator
+ opt->add_options()
+ (name.c_str(), po::value<std::string>(), description.c_str());
+}
+
+void add_image_option(po::options_description *opt,
+ ArgumentModifier modifier,
+ const std::string &desc_suffix) {
+ std::string name = IMAGE_NAME;
+ std::string description = "image name";
+ switch (modifier) {
+ case ARGUMENT_MODIFIER_NONE:
+ break;
+ case ARGUMENT_MODIFIER_SOURCE:
+ description = "source " + description;
+ break;
+ case ARGUMENT_MODIFIER_DEST:
+ name = DEST_IMAGE_NAME;
+ description = "destination " + description;
+ break;
+ }
+ description += desc_suffix;
+
+ // TODO add validator
+ opt->add_options()
+ (name.c_str(), po::value<std::string>(), description.c_str());
+}
+
+void add_image_id_option(po::options_description *opt,
+ const std::string &desc_suffix) {
+ std::string name = IMAGE_ID;
+ std::string description = "image id";
+ description += desc_suffix;
+
+ // TODO add validator
+ opt->add_options()
+ (name.c_str(), po::value<std::string>(), description.c_str());
+}
+
+void add_snap_option(po::options_description *opt,
+ ArgumentModifier modifier) {
+
+ std::string name = SNAPSHOT_NAME;
+ std::string description = "snapshot name";
+ switch (modifier) {
+ case ARGUMENT_MODIFIER_NONE:
+ break;
+ case ARGUMENT_MODIFIER_DEST:
+ name = DEST_SNAPSHOT_NAME;
+ description = "destination " + description;
+ break;
+ case ARGUMENT_MODIFIER_SOURCE:
+ description = "source " + description;
+ break;
+ }
+
+ // TODO add validator
+ opt->add_options()
+ (name.c_str(), po::value<std::string>(), description.c_str());
+}
+
+void add_snap_id_option(po::options_description *opt) {
+ opt->add_options()
+ (SNAPSHOT_ID.c_str(), po::value<uint64_t>(), "snapshot id");
+}
+
+void add_pool_options(boost::program_options::options_description *pos,
+ boost::program_options::options_description *opt,
+ bool namespaces_supported) {
+ opt->add_options()
+ ((POOL_NAME + ",p").c_str(), po::value<std::string>(), "pool name");
+ if (namespaces_supported) {
+ add_namespace_option(opt, ARGUMENT_MODIFIER_NONE);
+ pos->add_options()
+ ("pool-spec", "pool specification\n"
+ "(example: <pool-name>[/<namespace>]");
+ } else {
+ pos->add_options()
+ ("pool-name", "pool name");
+ }
+}
+
+void add_image_spec_options(po::options_description *pos,
+ po::options_description *opt,
+ ArgumentModifier modifier) {
+ pos->add_options()
+ ((get_name_prefix(modifier) + IMAGE_SPEC).c_str(),
+ (get_description_prefix(modifier) + "image specification\n" +
+ "(example: [<pool-name>/[<namespace>/]]<image-name>)").c_str());
+ add_pool_option(opt, modifier);
+ add_namespace_option(opt, modifier);
+ add_image_option(opt, modifier);
+}
+
+void add_snap_spec_options(po::options_description *pos,
+ po::options_description *opt,
+ ArgumentModifier modifier) {
+ pos->add_options()
+ ((get_name_prefix(modifier) + SNAPSHOT_SPEC).c_str(),
+ (get_description_prefix(modifier) + "snapshot specification\n" +
+ "(example: [<pool-name>/[<namespace>/]]<image-name>@<snap-name>)").c_str());
+ add_pool_option(opt, modifier);
+ add_namespace_option(opt, modifier);
+ add_image_option(opt, modifier);
+ add_snap_option(opt, modifier);
+}
+
+void add_image_or_snap_spec_options(po::options_description *pos,
+ po::options_description *opt,
+ ArgumentModifier modifier) {
+ pos->add_options()
+ ((get_name_prefix(modifier) + IMAGE_OR_SNAPSHOT_SPEC).c_str(),
+ (get_description_prefix(modifier) + "image or snapshot specification\n" +
+ "(example: [<pool-name>/[<namespace>/]]<image-name>[@<snap-name>])").c_str());
+ add_pool_option(opt, modifier);
+ add_namespace_option(opt, modifier);
+ add_image_option(opt, modifier);
+ add_snap_option(opt, modifier);
+}
+
+void add_create_image_options(po::options_description *opt,
+ bool include_format) {
+ // TODO get default image format from conf
+ if (include_format) {
+ opt->add_options()
+ (IMAGE_FORMAT.c_str(), po::value<ImageFormat>(),
+ "image format [default: 2]")
+ (IMAGE_NEW_FORMAT.c_str(),
+ po::value<ImageNewFormat>()->zero_tokens(),
+ "deprecated[:image-format 2]");
+ }
+
+ opt->add_options()
+ (IMAGE_ORDER.c_str(), po::value<ImageOrder>(),
+ "deprecated[:object-size]")
+ (IMAGE_OBJECT_SIZE.c_str(), po::value<ImageObjectSize>(),
+ "object size in B/K/M [4K <= object size <= 32M]")
+ (IMAGE_FEATURES.c_str(), po::value<ImageFeatures>()->composing(),
+ ("image features\n" + get_short_features_help(true)).c_str())
+ (IMAGE_SHARED.c_str(), po::bool_switch(), "shared image")
+ (IMAGE_STRIPE_UNIT.c_str(), po::value<ImageObjectSize>(), "stripe unit in B/K/M")
+ (IMAGE_STRIPE_COUNT.c_str(), po::value<uint64_t>(), "stripe count")
+ (IMAGE_DATA_POOL.c_str(), po::value<std::string>(), "data pool")
+ (IMAGE_MIRROR_IMAGE_MODE.c_str(), po::value<MirrorImageMode>(),
+ "mirror image mode [journal or snapshot]");
+
+ add_create_journal_options(opt);
+}
+
+void add_create_journal_options(po::options_description *opt) {
+ opt->add_options()
+ (JOURNAL_SPLAY_WIDTH.c_str(), po::value<uint64_t>(),
+ "number of active journal objects")
+ (JOURNAL_OBJECT_SIZE.c_str(), po::value<JournalObjectSize>(),
+ "size of journal objects [4K <= size <= 64M]")
+ (JOURNAL_POOL.c_str(), po::value<std::string>(),
+ "pool for journal objects");
+}
+
+void add_size_option(boost::program_options::options_description *opt) {
+ opt->add_options()
+ ((IMAGE_SIZE + ",s").c_str(), po::value<ImageSize>()->required(),
+ "image size (in M/G/T) [default: M]");
+}
+
+void add_sparse_size_option(boost::program_options::options_description *opt) {
+ opt->add_options()
+ (IMAGE_SPARSE_SIZE.c_str(), po::value<ImageObjectSize>(),
+ "sparse size in B/K/M [default: 4K]");
+}
+
+void add_path_options(boost::program_options::options_description *pos,
+ boost::program_options::options_description *opt,
+ const std::string &description) {
+ pos->add_options()
+ (PATH_NAME.c_str(), po::value<std::string>(), description.c_str());
+ opt->add_options()
+ (PATH.c_str(), po::value<std::string>(), description.c_str());
+}
+
+void add_limit_option(po::options_description *opt) {
+ std::string description = "maximum allowed snapshot count";
+
+ opt->add_options()
+ (LIMIT.c_str(), po::value<uint64_t>(), description.c_str());
+}
+
+void add_no_progress_option(boost::program_options::options_description *opt) {
+ opt->add_options()
+ (NO_PROGRESS.c_str(), po::bool_switch(), "disable progress output");
+}
+
+void add_format_options(boost::program_options::options_description *opt) {
+ opt->add_options()
+ (FORMAT.c_str(), po::value<Format>(), "output format (plain, json, or xml) [default: plain]")
+ (PRETTY_FORMAT.c_str(), po::bool_switch(),
+ "pretty formatting (json and xml)");
+}
+
+void add_verbose_option(boost::program_options::options_description *opt) {
+ opt->add_options()
+ (VERBOSE.c_str(), po::bool_switch(), "be verbose");
+}
+
+void add_no_error_option(boost::program_options::options_description *opt) {
+ opt->add_options()
+ (NO_ERR.c_str(), po::bool_switch(), "continue after error");
+}
+
+void add_export_format_option(boost::program_options::options_description *opt) {
+ opt->add_options()
+ ("export-format", po::value<ExportFormat>(), "format of image file");
+}
+
+void add_flatten_option(boost::program_options::options_description *opt) {
+ opt->add_options()
+ (IMAGE_FLATTEN.c_str(), po::bool_switch(),
+ "fill clone with parent data (make it independent)");
+}
+
+void add_snap_create_options(po::options_description *opt) {
+ opt->add_options()
+ (SKIP_QUIESCE.c_str(), po::bool_switch(), "do not run quiesce hooks")
+ (IGNORE_QUIESCE_ERROR.c_str(), po::bool_switch(),
+ "ignore quiesce hook error");
+}
+
+void add_encryption_options(boost::program_options::options_description *opt) {
+ opt->add_options()
+ (ENCRYPTION_FORMAT.c_str(),
+ po::value<std::vector<EncryptionFormat>>(),
+ "encryption format (luks, luks1, luks2) [default: luks]");
+
+ opt->add_options()
+ (ENCRYPTION_PASSPHRASE_FILE.c_str(),
+ po::value<std::vector<std::string>>(),
+ "path to file containing passphrase for unlocking the image");
+}
+
+std::string get_short_features_help(bool append_suffix) {
+ std::ostringstream oss;
+ bool first_feature = true;
+ oss << "[";
+ for (auto &pair : ImageFeatures::FEATURE_MAPPING) {
+ if ((pair.first & RBD_FEATURES_IMPLICIT_ENABLE) != 0ULL) {
+ // hide implicitly enabled features from list
+ continue;
+ } else if (!append_suffix && (pair.first & RBD_FEATURES_MUTABLE) == 0ULL) {
+ // hide non-mutable features for the 'rbd feature XYZ' command
+ continue;
+ }
+
+ if (!first_feature) {
+ oss << ", ";
+ }
+ first_feature = false;
+
+ std::string suffix;
+ if (append_suffix) {
+ if ((pair.first & rbd::utils::get_rbd_default_features(g_ceph_context)) != 0) {
+ suffix += "+";
+ }
+ if ((pair.first & RBD_FEATURES_MUTABLE) != 0) {
+ suffix += "*";
+ } else if ((pair.first & RBD_FEATURES_DISABLE_ONLY) != 0) {
+ suffix += "-";
+ }
+ if (!suffix.empty()) {
+ suffix = "(" + suffix + ")";
+ }
+ }
+ oss << pair.second << suffix;
+ }
+ oss << "]";
+ return oss.str();
+}
+
+std::string get_long_features_help() {
+ std::ostringstream oss;
+ oss << "Image Features:" << std::endl
+ << " (*) supports enabling/disabling on existing images" << std::endl
+ << " (-) supports disabling-only on existing images" << std::endl
+ << " (+) enabled by default for new images if features not specified"
+ << std::endl;
+ return oss.str();
+}
+
+void validate(boost::any& v, const std::vector<std::string>& values,
+ ImageSize *target_type, int) {
+ po::validators::check_first_occurrence(v);
+ const std::string &s = po::validators::get_single_string(values);
+
+ std::string parse_error;
+ uint64_t size = strict_iecstrtoll(s, &parse_error);
+ if (!parse_error.empty()) {
+ throw po::validation_error(po::validation_error::invalid_option_value);
+ }
+
+ //NOTE: We can remove below given three lines of code once all applications,
+ //which use this CLI will adopt B/K/M/G/T/P/E with size value
+ if (isdigit(*s.rbegin())) {
+ size = size << 20; // Default MB to Bytes
+ }
+ v = boost::any(size);
+}
+
+void validate(boost::any& v, const std::vector<std::string>& values,
+ ImageOrder *target_type, int dummy) {
+ po::validators::check_first_occurrence(v);
+ const std::string &s = po::validators::get_single_string(values);
+ try {
+ uint64_t order = boost::lexical_cast<uint64_t>(s);
+ if (order >= 12 && order <= 25) {
+ v = boost::any(order);
+ return;
+ }
+ } catch (const boost::bad_lexical_cast &) {
+ }
+ throw po::validation_error(po::validation_error::invalid_option_value);
+}
+
+void validate(boost::any& v, const std::vector<std::string>& values,
+ ImageObjectSize *target_type, int dummy) {
+ po::validators::check_first_occurrence(v);
+ const std::string &s = po::validators::get_single_string(values);
+
+ std::string parse_error;
+ uint64_t objectsize = strict_iecstrtoll(s, &parse_error);
+ if (!parse_error.empty()) {
+ throw po::validation_error(po::validation_error::invalid_option_value);
+ }
+ v = boost::any(objectsize);
+}
+
+void validate(boost::any& v, const std::vector<std::string>& values,
+ ImageFormat *target_type, int dummy) {
+ po::validators::check_first_occurrence(v);
+ const std::string &s = po::validators::get_single_string(values);
+ try {
+ uint32_t format = boost::lexical_cast<uint32_t>(s);
+ if (format == 1 || format == 2) {
+ v = boost::any(format);
+ return;
+ }
+ } catch (const boost::bad_lexical_cast &) {
+ }
+ throw po::validation_error(po::validation_error::invalid_option_value);
+}
+
+void validate(boost::any& v, const std::vector<std::string>& values,
+ ImageNewFormat *target_type, int dummy) {
+ v = boost::any(true);
+}
+
+void validate(boost::any& v, const std::vector<std::string>& values,
+ ImageFeatures *target_type, int) {
+ if (v.empty()) {
+ v = boost::any(static_cast<uint64_t>(0));
+ }
+
+ uint64_t &features = boost::any_cast<uint64_t &>(v);
+ for (auto &value : values) {
+ boost::char_separator<char> sep(",");
+ boost::tokenizer<boost::char_separator<char> > tok(value, sep);
+ for (auto &token : tok) {
+ bool matched = false;
+ for (auto &it : ImageFeatures::FEATURE_MAPPING) {
+ if (token == it.second) {
+ features |= it.first;
+ matched = true;
+ break;
+ }
+ }
+
+ if (!matched) {
+ throw po::validation_error(po::validation_error::invalid_option_value);
+ }
+ }
+ }
+}
+
+void validate(boost::any& v, const std::vector<std::string>& values,
+ MirrorImageMode* mirror_image_mode, int) {
+ po::validators::check_first_occurrence(v);
+ const std::string &s = po::validators::get_single_string(values);
+ if (s == "journal") {
+ v = boost::any(RBD_MIRROR_IMAGE_MODE_JOURNAL);
+ } else if (s == "snapshot") {
+ v = boost::any(RBD_MIRROR_IMAGE_MODE_SNAPSHOT);
+ } else {
+ throw po::validation_error(po::validation_error::invalid_option_value);
+ }
+}
+
+void validate(boost::any& v, const std::vector<std::string>& values,
+ Format *target_type, int) {
+ po::validators::check_first_occurrence(v);
+ const std::string &s = po::validators::get_single_string(values);
+ if (s == "plain" || s == "json" || s == "xml") {
+ v = boost::any(Format(s));
+ } else {
+ throw po::validation_error(po::validation_error::invalid_option_value);
+ }
+}
+
+void validate(boost::any& v, const std::vector<std::string>& values,
+ JournalObjectSize *target_type, int) {
+ po::validators::check_first_occurrence(v);
+ const std::string &s = po::validators::get_single_string(values);
+
+ std::string parse_error;
+ uint64_t size = strict_iecstrtoll(s, &parse_error);
+ if (parse_error.empty() && (size >= (1 << 12)) && (size <= (1 << 26))) {
+ v = boost::any(size);
+ return;
+ }
+ throw po::validation_error(po::validation_error::invalid_option_value);
+}
+
+void validate(boost::any& v, const std::vector<std::string>& values,
+ EncryptionAlgorithm *target_type, int) {
+ po::validators::check_first_occurrence(v);
+ const std::string &s = po::validators::get_single_string(values);
+ if (s == "aes-128") {
+ v = boost::any(RBD_ENCRYPTION_ALGORITHM_AES128);
+ } else if (s == "aes-256") {
+ v = boost::any(RBD_ENCRYPTION_ALGORITHM_AES256);
+ } else {
+ throw po::validation_error(po::validation_error::invalid_option_value);
+ }
+}
+
+void validate(boost::any& v, const std::vector<std::string>& values,
+ EncryptionFormat *target_type, int) {
+ po::validators::check_first_occurrence(v);
+ const std::string &s = po::validators::get_single_string(values);
+ if (s == "luks") {
+ v = boost::any(EncryptionFormat{RBD_ENCRYPTION_FORMAT_LUKS});
+ } else if (s == "luks1") {
+ v = boost::any(EncryptionFormat{RBD_ENCRYPTION_FORMAT_LUKS1});
+ } else if (s == "luks2") {
+ v = boost::any(EncryptionFormat{RBD_ENCRYPTION_FORMAT_LUKS2});
+ } else {
+ throw po::validation_error(po::validation_error::invalid_option_value);
+ }
+}
+
+void validate(boost::any& v, const std::vector<std::string>& values,
+ ExportFormat *target_type, int) {
+ po::validators::check_first_occurrence(v);
+ const std::string &s = po::validators::get_single_string(values);
+
+ std::string parse_error;
+ uint64_t format = strict_iecstrtoll(s, &parse_error);
+ if (!parse_error.empty() || (format != 1 && format != 2)) {
+ throw po::validation_error(po::validation_error::invalid_option_value);
+ }
+
+ v = boost::any(format);
+}
+
+void validate(boost::any& v, const std::vector<std::string>& values,
+ Secret *target_type, int) {
+
+ po::validators::check_first_occurrence(v);
+ const std::string &s = po::validators::get_single_string(values);
+ g_conf().set_val_or_die("keyfile", s.c_str());
+ v = boost::any(s);
+}
+
+} // namespace argument_types
+} // namespace rbd
diff --git a/src/tools/rbd/ArgumentTypes.h b/src/tools/rbd/ArgumentTypes.h
new file mode 100644
index 000000000..db16b4b3c
--- /dev/null
+++ b/src/tools/rbd/ArgumentTypes.h
@@ -0,0 +1,244 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_ARGUMENT_TYPES_H
+#define CEPH_RBD_ARGUMENT_TYPES_H
+
+#include "include/int_types.h"
+#include <set>
+#include <string>
+#include <vector>
+#include <boost/any.hpp>
+#include <boost/program_options.hpp>
+#include <boost/shared_ptr.hpp>
+
+namespace ceph { class Formatter; }
+
+namespace rbd {
+namespace argument_types {
+
+enum ArgumentModifier {
+ ARGUMENT_MODIFIER_NONE,
+ ARGUMENT_MODIFIER_SOURCE,
+ ARGUMENT_MODIFIER_DEST
+};
+
+enum SpecFormat {
+ SPEC_FORMAT_IMAGE,
+ SPEC_FORMAT_SNAPSHOT,
+ SPEC_FORMAT_IMAGE_OR_SNAPSHOT
+};
+
+static const std::string SOURCE_PREFIX("source-");
+static const std::string DEST_PREFIX("dest-");
+
+// positional arguments
+static const std::string POSITIONAL_COMMAND_SPEC("positional-command-spec");
+static const std::string POSITIONAL_ARGUMENTS("positional-arguments");
+static const std::string IMAGE_SPEC("image-spec");
+static const std::string SNAPSHOT_SPEC("snap-spec");
+static const std::string IMAGE_OR_SNAPSHOT_SPEC("image-or-snap-spec");
+static const std::string PATH_NAME("path-name");
+static const std::string IMAGE_ID("image-id");
+
+// optional arguments
+static const std::string CONFIG_PATH("conf");
+static const std::string POOL_NAME("pool");
+static const std::string DEST_POOL_NAME("dest-pool");
+static const std::string NAMESPACE_NAME("namespace");
+static const std::string DEST_NAMESPACE_NAME("dest-namespace");
+static const std::string IMAGE_NAME("image");
+static const std::string DEST_IMAGE_NAME("dest");
+static const std::string SNAPSHOT_NAME("snap");
+static const std::string SNAPSHOT_ID("snap-id");
+static const std::string DEST_SNAPSHOT_NAME("dest-snap");
+static const std::string PATH("path");
+static const std::string FROM_SNAPSHOT_NAME("from-snap");
+static const std::string WHOLE_OBJECT("whole-object");
+
+// encryption arguments
+static const std::string ENCRYPTION_FORMAT("encryption-format");
+static const std::string ENCRYPTION_PASSPHRASE_FILE("encryption-passphrase-file");
+
+static const std::string IMAGE_FORMAT("image-format");
+static const std::string IMAGE_NEW_FORMAT("new-format");
+static const std::string IMAGE_ORDER("order");
+static const std::string IMAGE_OBJECT_SIZE("object-size");
+static const std::string IMAGE_FEATURES("image-feature");
+static const std::string IMAGE_SHARED("image-shared");
+static const std::string IMAGE_SIZE("size");
+static const std::string IMAGE_STRIPE_UNIT("stripe-unit");
+static const std::string IMAGE_STRIPE_COUNT("stripe-count");
+static const std::string IMAGE_DATA_POOL("data-pool");
+static const std::string IMAGE_SPARSE_SIZE("sparse-size");
+static const std::string IMAGE_THICK_PROVISION("thick-provision");
+static const std::string IMAGE_FLATTEN("flatten");
+static const std::string IMAGE_MIRROR_IMAGE_MODE("mirror-image-mode");
+
+static const std::string JOURNAL_OBJECT_SIZE("journal-object-size");
+static const std::string JOURNAL_SPLAY_WIDTH("journal-splay-width");
+static const std::string JOURNAL_POOL("journal-pool");
+
+static const std::string NO_PROGRESS("no-progress");
+static const std::string FORMAT("format");
+static const std::string PRETTY_FORMAT("pretty-format");
+static const std::string VERBOSE("verbose");
+static const std::string NO_ERR("no-error");
+
+static const std::string LIMIT("limit");
+
+static const std::string SKIP_QUIESCE("skip-quiesce");
+static const std::string IGNORE_QUIESCE_ERROR("ignore-quiesce-error");
+
+static const std::set<std::string> SWITCH_ARGUMENTS = {
+ WHOLE_OBJECT, IMAGE_SHARED, IMAGE_THICK_PROVISION, IMAGE_FLATTEN,
+ NO_PROGRESS, PRETTY_FORMAT, VERBOSE, NO_ERR, SKIP_QUIESCE,
+ IGNORE_QUIESCE_ERROR
+};
+
+struct ImageSize {};
+struct ImageOrder {};
+struct ImageObjectSize {};
+struct ImageFormat {};
+struct ImageNewFormat {};
+
+struct ImageFeatures {
+ static const std::map<uint64_t, std::string> FEATURE_MAPPING;
+
+ uint64_t features;
+};
+
+struct MirrorImageMode {};
+
+template <typename T>
+struct TypedValue {
+ T value;
+ TypedValue(const T& t) : value(t) {}
+};
+
+struct Format : public TypedValue<std::string> {
+ typedef boost::shared_ptr<ceph::Formatter> Formatter;
+
+ Format(const std::string &format) : TypedValue<std::string>(format) {}
+
+ Formatter create_formatter(bool pretty) const;
+};
+
+struct JournalObjectSize {};
+
+struct ExportFormat {};
+
+struct Secret {};
+
+struct EncryptionAlgorithm {};
+struct EncryptionFormat {
+ uint64_t format;
+};
+
+void add_export_format_option(boost::program_options::options_description *opt);
+
+std::string get_name_prefix(ArgumentModifier modifier);
+std::string get_description_prefix(ArgumentModifier modifier);
+
+void add_all_option(boost::program_options::options_description *opt,
+ std::string description);
+
+void add_pool_option(boost::program_options::options_description *opt,
+ ArgumentModifier modifier,
+ const std::string &desc_suffix = "");
+void add_namespace_option(boost::program_options::options_description *opt,
+ ArgumentModifier modifier);
+
+void add_image_option(boost::program_options::options_description *opt,
+ ArgumentModifier modifier,
+ const std::string &desc_suffix = "");
+
+void add_image_id_option(boost::program_options::options_description *opt,
+ const std::string &desc_suffix = "");
+
+void add_snap_option(boost::program_options::options_description *opt,
+ ArgumentModifier modifier);
+void add_snap_id_option(boost::program_options::options_description *opt);
+
+void add_pool_options(boost::program_options::options_description *pos,
+ boost::program_options::options_description *opt,
+ bool namespaces_supported);
+
+void add_image_spec_options(boost::program_options::options_description *pos,
+ boost::program_options::options_description *opt,
+ ArgumentModifier modifier);
+
+void add_snap_spec_options(boost::program_options::options_description *pos,
+ boost::program_options::options_description *opt,
+ ArgumentModifier modifier);
+
+void add_image_or_snap_spec_options(
+ boost::program_options::options_description *pos,
+ boost::program_options::options_description *opt,
+ ArgumentModifier modifier);
+
+void add_create_image_options(boost::program_options::options_description *opt,
+ bool include_format);
+
+void add_create_journal_options(
+ boost::program_options::options_description *opt);
+
+void add_size_option(boost::program_options::options_description *opt);
+
+void add_sparse_size_option(boost::program_options::options_description *opt);
+
+void add_path_options(boost::program_options::options_description *pos,
+ boost::program_options::options_description *opt,
+ const std::string &description);
+
+void add_limit_option(boost::program_options::options_description *opt);
+
+void add_no_progress_option(boost::program_options::options_description *opt);
+
+void add_format_options(boost::program_options::options_description *opt);
+
+void add_verbose_option(boost::program_options::options_description *opt);
+
+void add_no_error_option(boost::program_options::options_description *opt);
+
+void add_flatten_option(boost::program_options::options_description *opt);
+
+void add_snap_create_options(boost::program_options::options_description *opt);
+
+void add_encryption_options(boost::program_options::options_description *opt);
+
+std::string get_short_features_help(bool append_suffix);
+std::string get_long_features_help();
+
+void validate(boost::any& v, const std::vector<std::string>& values,
+ ExportFormat *target_type, int);
+void validate(boost::any& v, const std::vector<std::string>& values,
+ ImageSize *target_type, int);
+void validate(boost::any& v, const std::vector<std::string>& values,
+ ImageOrder *target_type, int);
+void validate(boost::any& v, const std::vector<std::string>& values,
+ ImageObjectSize *target_type, int);
+void validate(boost::any& v, const std::vector<std::string>& values,
+ ImageFormat *target_type, int);
+void validate(boost::any& v, const std::vector<std::string>& values,
+ ImageNewFormat *target_type, int);
+void validate(boost::any& v, const std::vector<std::string>& values,
+ ImageFeatures *target_type, int);
+void validate(boost::any& v, const std::vector<std::string>& values,
+ Format *target_type, int);
+void validate(boost::any& v, const std::vector<std::string>& values,
+ JournalObjectSize *target_type, int);
+void validate(boost::any& v, const std::vector<std::string>& values,
+ EncryptionAlgorithm *target_type, int);
+void validate(boost::any& v, const std::vector<std::string>& values,
+ EncryptionFormat *target_type, int);
+void validate(boost::any& v, const std::vector<std::string>& values,
+ Secret *target_type, int);
+
+
+std::ostream &operator<<(std::ostream &os, const ImageFeatures &features);
+
+} // namespace argument_types
+} // namespace rbd
+
+#endif // CEPH_RBD_ARGUMENT_TYPES_H
diff --git a/src/tools/rbd/CMakeLists.txt b/src/tools/rbd/CMakeLists.txt
new file mode 100644
index 000000000..5a895354d
--- /dev/null
+++ b/src/tools/rbd/CMakeLists.txt
@@ -0,0 +1,80 @@
+set(CURSES_NEED_NCURSES TRUE)
+# libcurses may not be available on some platforms (e.g. Windows).
+find_package(Curses)
+
+set(rbd_srcs
+ rbd.cc
+ ArgumentTypes.cc
+ IndentStream.cc
+ MirrorDaemonServiceInfo.cc
+ OptionPrinter.cc
+ Schedule.cc
+ Shell.cc
+ Utils.cc
+ action/Bench.cc
+ action/Children.cc
+ action/Clone.cc
+ action/Config.cc
+ action/Copy.cc
+ action/Create.cc
+ action/Device.cc
+ action/Diff.cc
+ action/DiskUsage.cc
+ action/Encryption.cc
+ action/Export.cc
+ action/Feature.cc
+ action/Flatten.cc
+ action/Ggate.cc
+ action/Group.cc
+ action/ImageMeta.cc
+ action/Import.cc
+ action/Info.cc
+ action/Journal.cc
+ action/Kernel.cc
+ action/List.cc
+ action/Lock.cc
+ action/MergeDiff.cc
+ action/Migration.cc
+ action/MirrorImage.cc
+ action/MirrorPool.cc
+ action/MirrorSnapshotSchedule.cc
+ action/Namespace.cc
+ action/Nbd.cc
+ action/ObjectMap.cc
+ action/Perf.cc
+ action/PersistentCache.cc
+ action/Pool.cc
+ action/Remove.cc
+ action/Rename.cc
+ action/Resize.cc
+ action/Snap.cc
+ action/Sparsify.cc
+ action/Status.cc
+ action/TrashPurgeSchedule.cc
+ action/Trash.cc
+ action/Watch.cc
+ action/Wnbd.cc)
+
+add_executable(rbd ${rbd_srcs}
+ $<TARGET_OBJECTS:common_texttable_obj>)
+set_target_properties(rbd PROPERTIES OUTPUT_NAME rbd)
+target_link_libraries(rbd
+ cls_journal_client
+ cls_rbd_client
+ rbd_types
+ librbd
+ journal
+ libneorados
+ librados
+ ceph-common global
+ ${BLKID_LIBRARIES} ${CMAKE_DL_LIBS})
+if(CURSES_FOUND)
+ target_compile_definitions(rbd PRIVATE HAVE_CURSES)
+ target_link_libraries(rbd ${CURSES_LIBRARIES})
+endif()
+if(WITH_KRBD)
+ target_link_libraries(rbd
+ krbd)
+endif()
+
+install(TARGETS rbd DESTINATION bin)
diff --git a/src/tools/rbd/IndentStream.cc b/src/tools/rbd/IndentStream.cc
new file mode 100644
index 000000000..83591a8cb
--- /dev/null
+++ b/src/tools/rbd/IndentStream.cc
@@ -0,0 +1,59 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd/IndentStream.h"
+
+namespace rbd {
+
+int IndentBuffer::overflow (int c) {
+ if (traits_type::eq_int_type(traits_type::eof(), c)) {
+ return traits_type::not_eof(c);
+ }
+
+ int r;
+ switch (c) {
+ case '\n':
+ m_buffer += c;
+ flush_line();
+ r = m_streambuf->sputn(m_buffer.c_str(), m_buffer.size());
+ m_buffer.clear();
+ return r;
+ case '\t':
+ // convert tab to single space and fall-through
+ c = ' ';
+ default:
+ if (m_indent + m_buffer.size() >= m_line_length) {
+ size_t word_offset = m_buffer.find_last_of(m_delim);
+ bool space_delim = (m_delim == " ");
+ if (word_offset == std::string::npos && !space_delim) {
+ word_offset = m_buffer.find_last_of(" ");
+ }
+
+ if (word_offset != std::string::npos) {
+ flush_line();
+ m_streambuf->sputn(m_buffer.c_str(), word_offset);
+ m_buffer = std::string(m_buffer,
+ word_offset + (space_delim ? 1 : 0));
+ } else {
+ flush_line();
+ m_streambuf->sputn(m_buffer.c_str(), m_buffer.size());
+ m_buffer.clear();
+ }
+ m_streambuf->sputc('\n');
+ }
+ m_buffer += c;
+ return c;
+ }
+}
+
+void IndentBuffer::flush_line() {
+ if (m_initial_offset >= m_indent) {
+ m_initial_offset = 0;
+ m_streambuf->sputc('\n');
+ }
+
+ m_streambuf->sputn(m_indent_prefix.c_str(), m_indent - m_initial_offset);
+ m_initial_offset = 0;
+}
+
+} // namespace rbd
diff --git a/src/tools/rbd/IndentStream.h b/src/tools/rbd/IndentStream.h
new file mode 100644
index 000000000..85ccc85b3
--- /dev/null
+++ b/src/tools/rbd/IndentStream.h
@@ -0,0 +1,60 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_INDENT_STREAM_H
+#define CEPH_RBD_INDENT_STREAM_H
+
+#include "include/int_types.h"
+#include <iostream>
+#include <streambuf>
+#include <iomanip>
+
+namespace rbd {
+
+class IndentBuffer : public std::streambuf {
+public:
+ IndentBuffer(size_t indent, size_t initial_offset, size_t line_length,
+ std::streambuf *streambuf)
+ : m_indent(indent), m_initial_offset(initial_offset),
+ m_line_length(line_length), m_streambuf(streambuf),
+ m_delim(" "), m_indent_prefix(m_indent, ' ') {
+ }
+
+ void set_delimiter(const std::string &delim) {
+ m_delim = delim;
+ }
+
+protected:
+ int overflow (int c) override;
+
+private:
+ size_t m_indent;
+ size_t m_initial_offset;
+ size_t m_line_length;
+ std::streambuf *m_streambuf;
+
+ std::string m_delim;
+ std::string m_indent_prefix;
+ std::string m_buffer;
+
+ void flush_line();
+};
+
+class IndentStream : public std::ostream {
+public:
+ IndentStream(size_t indent, size_t initial_offset, size_t line_length,
+ std::ostream &os)
+ : std::ostream(&m_indent_buffer),
+ m_indent_buffer(indent, initial_offset, line_length, os.rdbuf()) {
+ }
+
+ void set_delimiter(const std::string &delim) {
+ m_indent_buffer.set_delimiter(delim);
+ }
+private:
+ IndentBuffer m_indent_buffer;
+};
+
+} // namespace rbd
+
+#endif // CEPH_RBD_INDENT_STREAM_ITERATOR_H
diff --git a/src/tools/rbd/MirrorDaemonServiceInfo.cc b/src/tools/rbd/MirrorDaemonServiceInfo.cc
new file mode 100644
index 000000000..e7422e66a
--- /dev/null
+++ b/src/tools/rbd/MirrorDaemonServiceInfo.cc
@@ -0,0 +1,307 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "common/ceph_json.h"
+#include "common/errno.h"
+#include "include/rados/librados.hpp"
+#include "include/stringify.h"
+#include "tools/rbd/MirrorDaemonServiceInfo.h"
+
+#include <boost/scope_exit.hpp>
+#include <iostream>
+
+#include "json_spirit/json_spirit.h"
+
+namespace rbd {
+
+std::ostream& operator<<(std::ostream& os, MirrorHealth mirror_health) {
+ switch (mirror_health) {
+ case MIRROR_HEALTH_OK:
+ os << "OK";
+ break;
+ case MIRROR_HEALTH_UNKNOWN:
+ os << "UNKNOWN";
+ break;
+ case MIRROR_HEALTH_WARNING:
+ os << "WARNING";
+ break;
+ case MIRROR_HEALTH_ERROR:
+ os << "ERROR";
+ break;
+ }
+ return os;
+}
+
+std::string MirrorService::get_image_description() const {
+ std::string description = (!client_id.empty() ? client_id :
+ stringify(service_id));
+ if (!hostname.empty()) {
+ description += " on " + hostname;
+ }
+ return description;
+}
+
+void MirrorService::dump_image(
+ argument_types::Format::Formatter formatter) const {
+ formatter->open_object_section("daemon_service");
+ formatter->dump_string("service_id", service_id);
+ formatter->dump_string("instance_id", instance_id);
+ formatter->dump_string("daemon_id", client_id);
+ formatter->dump_string("hostname", hostname);
+ formatter->close_section();
+}
+
+int MirrorDaemonServiceInfo::init() {
+ int r = get_mirror_service_dump();
+ if (r < 0) {
+ return r;
+ } else if (m_mirror_services.empty()) {
+ return 0;
+ }
+
+ r = get_mirror_service_status();
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+const MirrorService* MirrorDaemonServiceInfo::get_by_service_id(
+ const std::string& service_id) const {
+ auto it = m_mirror_services.find(service_id);
+ if (it == m_mirror_services.end()) {
+ return nullptr;
+ }
+
+ return &it->second;
+}
+
+const MirrorService* MirrorDaemonServiceInfo::get_by_instance_id(
+ const std::string& instance_id) const {
+ auto it = m_instance_to_service_ids.find(instance_id);
+ if (it == m_instance_to_service_ids.end()) {
+ return nullptr;
+ }
+
+ return get_by_service_id(it->second);
+}
+
+MirrorServices MirrorDaemonServiceInfo::get_mirror_services() const {
+ MirrorServices mirror_services;
+ for (auto& it : m_mirror_services) {
+ mirror_services.push_back(it.second);
+ }
+ return mirror_services;
+}
+
+int MirrorDaemonServiceInfo::get_mirror_service_dump() {
+ librados::Rados rados(m_io_ctx);
+ std::string cmd = R"({"prefix": "service dump", "format": "json"})";
+ bufferlist in_bl;
+ bufferlist out_bl;
+
+ int r = rados.mon_command(cmd, in_bl, &out_bl, nullptr);
+ if (r < 0) {
+ std::cerr << "rbd: failed to query services: " << cpp_strerror(r)
+ << std::endl;
+ return r;
+ }
+
+ json_spirit::mValue json_root;
+ if(!json_spirit::read(out_bl.to_str(), json_root)) {
+ std::cerr << "rbd: invalid service dump JSON received" << std::endl;
+ return -EBADMSG;
+ }
+
+ try {
+ auto& services = json_root.get_obj()["services"];
+ if (services.is_null()) {
+ std::cerr << "rbd: missing services in service dump JSON" << std::endl;
+ return -EBADMSG;
+ }
+
+ auto& service = services.get_obj()["rbd-mirror"];
+ if (service.is_null()) {
+ // no rbd-mirror daemons running
+ return 0;
+ }
+
+ auto& daemons = service.get_obj()["daemons"];
+ if (daemons.is_null()) {
+ return 0;
+ }
+
+ for (auto& daemon_pair : daemons.get_obj()) {
+ // rbd-mirror instances will always be integers but other objects
+ // are included
+ auto& service_id = daemon_pair.first;
+ if (daemon_pair.second.type() != json_spirit::obj_type) {
+ continue;
+ }
+
+ auto& daemon = daemon_pair.second.get_obj();
+ auto& metadata_val = daemon["metadata"];
+ if (metadata_val.is_null()) {
+ continue;
+ }
+ auto& metadata = metadata_val.get_obj();
+
+ MirrorService mirror_service{service_id};
+
+ auto& client_id = metadata["id"];
+ if (!client_id.is_null()) {
+ mirror_service.client_id = client_id.get_str();
+ }
+
+ auto& ceph_version = metadata["ceph_version_short"];
+ if (!ceph_version.is_null()) {
+ mirror_service.ceph_version = ceph_version.get_str();
+ }
+
+ auto& hostname = metadata["hostname"];
+ if (!hostname.is_null()) {
+ mirror_service.hostname = hostname.get_str();
+ }
+
+ m_mirror_services[service_id] = mirror_service;
+ }
+
+ } catch (std::runtime_error&) {
+ std::cerr << "rbd: unexpected service dump JSON received" << std::endl;
+ return -EBADMSG;
+ }
+
+ return 0;
+}
+
+int MirrorDaemonServiceInfo::get_mirror_service_status() {
+ librados::Rados rados(m_io_ctx);
+ std::string cmd = R"({"prefix": "service status", "format": "json"})";
+ bufferlist in_bl;
+ bufferlist out_bl;
+
+ int r = rados.mon_command(cmd, in_bl, &out_bl, nullptr);
+ if (r < 0) {
+ std::cerr << "rbd: failed to query service status: " << cpp_strerror(r)
+ << std::endl;
+ return r;
+ }
+ json_spirit::mValue json_root;
+ if(!json_spirit::read(out_bl.to_str(), json_root)) {
+ std::cerr << "rbd: invalid service status JSON received" << std::endl;
+ return -EBADMSG;
+ }
+
+ bool found_leader = false;
+ bool found_pool = false;
+
+ try {
+ auto& service = json_root.get_obj()["rbd-mirror"];
+ if (service.is_null()) {
+ return 0;
+ }
+
+ for (auto& daemon_pair : service.get_obj()) {
+ std::string service_id = daemon_pair.first;
+ auto it = m_mirror_services.find(service_id);
+ if (it == m_mirror_services.end()) {
+ continue;
+ }
+
+ auto& mirror_service = it->second;
+ auto& daemon = daemon_pair.second.get_obj();
+ auto& status = daemon["status"];
+ if (status.is_null()) {
+ mirror_service.callouts.push_back("not reporting status");
+ mirror_service.health = MIRROR_HEALTH_WARNING;
+ continue;
+ }
+
+ auto& json = status.get_obj()["json"];
+ if (json.is_null()) {
+ mirror_service.callouts.push_back("not reporting status");
+ mirror_service.health = MIRROR_HEALTH_WARNING;
+ continue;
+ }
+
+ json_spirit::mValue json_status;
+ if(!json_spirit::read(json.get_str(), json_status)) {
+ std::cerr << "rbd: invalid service status daemon status JSON received"
+ << std::endl;
+ return -EBADMSG;
+ }
+
+ auto& pool_val = json_status.get_obj()[stringify(m_io_ctx.get_id())];
+ if (pool_val.is_null()) {
+ mirror_service.callouts.push_back("not reporting status for pool");
+ mirror_service.health = MIRROR_HEALTH_WARNING;
+ continue;
+ }
+
+ auto& pool = pool_val.get_obj();
+ found_pool = true;
+
+ auto& instance_id = pool["instance_id"];
+ if (!instance_id.is_null()) {
+ mirror_service.instance_id = instance_id.get_str();
+ m_instance_to_service_ids[mirror_service.instance_id] = service_id;
+ }
+
+ auto& leader = pool["leader"];
+ if (!leader.is_null() && leader.get_bool()) {
+ mirror_service.leader = true;
+ found_leader = true;
+ }
+
+ MirrorHealth mirror_service_health = MIRROR_HEALTH_OK;
+ auto& callouts = pool["callouts"];
+ if (!callouts.is_null()) {
+ for (auto& callout_pair : callouts.get_obj()) {
+ auto& callout = callout_pair.second.get_obj();
+ auto& level = callout["level"];
+ if (level.is_null()) {
+ continue;
+ }
+
+ auto& level_str = level.get_str();
+ if (mirror_service_health < MIRROR_HEALTH_ERROR &&
+ level_str == "error") {
+ mirror_service_health = MIRROR_HEALTH_ERROR;
+ } else if (mirror_service_health < MIRROR_HEALTH_WARNING &&
+ level_str == "warning") {
+ mirror_service_health = MIRROR_HEALTH_WARNING;
+ }
+
+ auto& text = callout["text"];
+ if (!text.is_null()) {
+ mirror_service.callouts.push_back(text.get_str());
+ }
+ }
+ }
+ mirror_service.health = mirror_service_health;
+ }
+ } catch (std::runtime_error&) {
+ std::cerr << "rbd: unexpected service status JSON received" << std::endl;
+ return -EBADMSG;
+ }
+
+ // compute overall daemon health
+ m_daemon_health = MIRROR_HEALTH_OK;
+ if (!found_pool) {
+ // no daemons are reporting status for this pool
+ m_daemon_health = MIRROR_HEALTH_ERROR;
+ } else if (!found_leader) {
+ // no daemons are reporting leader role for this pool
+ m_daemon_health = MIRROR_HEALTH_WARNING;
+ }
+
+ for (auto& pair : m_mirror_services) {
+ m_daemon_health = std::max(m_daemon_health, pair.second.health);
+ }
+
+ return 0;
+}
+
+} // namespace rbd
+
diff --git a/src/tools/rbd/MirrorDaemonServiceInfo.h b/src/tools/rbd/MirrorDaemonServiceInfo.h
new file mode 100644
index 000000000..d667332e5
--- /dev/null
+++ b/src/tools/rbd/MirrorDaemonServiceInfo.h
@@ -0,0 +1,78 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_DAEMON_SERVICE_INFO_H
+#define CEPH_RBD_MIRROR_DAEMON_SERVICE_INFO_H
+
+#include "include/rados/librados_fwd.hpp"
+#include "tools/rbd/ArgumentTypes.h"
+
+#include <iosfwd>
+#include <list>
+#include <map>
+#include <string>
+
+namespace rbd {
+
+enum MirrorHealth {
+ MIRROR_HEALTH_OK = 0,
+ MIRROR_HEALTH_UNKNOWN = 1,
+ MIRROR_HEALTH_WARNING = 2,
+ MIRROR_HEALTH_ERROR = 3
+};
+
+std::ostream& operator<<(std::ostream& os, MirrorHealth mirror_health);
+
+struct MirrorService {
+ MirrorService() {}
+ explicit MirrorService(const std::string& service_id)
+ : service_id(service_id) {
+ }
+
+ std::string service_id;
+ std::string instance_id;
+ bool leader = false;
+ std::string client_id;
+ std::string ceph_version;
+ std::string hostname;
+ std::list<std::string> callouts;
+
+ MirrorHealth health = MIRROR_HEALTH_UNKNOWN;
+
+ std::string get_image_description() const;
+ void dump_image(argument_types::Format::Formatter formatter) const;
+};
+
+typedef std::list<MirrorService> MirrorServices;
+
+class MirrorDaemonServiceInfo {
+public:
+ MirrorDaemonServiceInfo(librados::IoCtx &io_ctx) : m_io_ctx(io_ctx) {
+ }
+
+ int init();
+
+ const MirrorService* get_by_service_id(const std::string& service_id) const;
+ const MirrorService* get_by_instance_id(const std::string& instance_id) const;
+
+ MirrorServices get_mirror_services() const;
+ MirrorHealth get_daemon_health() const {
+ return m_daemon_health;
+ }
+
+private:
+ librados::IoCtx &m_io_ctx;
+
+ std::map<std::string, MirrorService> m_mirror_services;
+ std::map<std::string, std::string> m_instance_to_service_ids;
+
+ MirrorHealth m_daemon_health = MIRROR_HEALTH_UNKNOWN;
+
+ int get_mirror_service_dump();
+ int get_mirror_service_status();
+
+};
+
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_DAEMON_SERVICE_INFO_H
diff --git a/src/tools/rbd/OptionPrinter.cc b/src/tools/rbd/OptionPrinter.cc
new file mode 100644
index 000000000..0fea6b691
--- /dev/null
+++ b/src/tools/rbd/OptionPrinter.cc
@@ -0,0 +1,161 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd/OptionPrinter.h"
+#include "tools/rbd/IndentStream.h"
+#include "include/ceph_assert.h"
+
+namespace rbd {
+
+namespace po = boost::program_options;
+
+const std::string OptionPrinter::POSITIONAL_ARGUMENTS("Positional arguments");
+const std::string OptionPrinter::OPTIONAL_ARGUMENTS("Optional arguments");
+
+const size_t OptionPrinter::MAX_DESCRIPTION_OFFSET;
+
+OptionPrinter::OptionPrinter(const OptionsDescription &positional,
+ const OptionsDescription &optional)
+ : m_positional(positional), m_optional(optional) {
+}
+
+void OptionPrinter::print_short(std::ostream &os, size_t initial_offset) {
+ size_t max_option_width = 0;
+ std::vector<std::string> optionals;
+ for (size_t i = 0; i < m_optional.options().size(); ++i) {
+ std::stringstream option;
+
+ bool required = m_optional.options()[i]->semantic()->is_required();
+ if (!required) {
+ option << "[";
+ }
+ option << "--" << m_optional.options()[i]->long_name();
+ if (m_optional.options()[i]->semantic()->max_tokens() != 0) {
+ option << " <" << m_optional.options()[i]->long_name() << ">";
+ }
+ if (!required) {
+ option << "]";
+ }
+ max_option_width = std::max(max_option_width, option.str().size());
+ optionals.emplace_back(option.str());
+ }
+
+ std::vector<std::string> positionals;
+ for (size_t i = 0; i < m_positional.options().size(); ++i) {
+ std::stringstream option;
+
+ // we overload po::value<std::string>()->default_value("") to signify
+ // an optional positional argument (purely for help printing purposes)
+ boost::any v;
+ bool required = !m_positional.options()[i]->semantic()->apply_default(v);
+ if (!required) {
+ auto ptr = boost::any_cast<std::string>(&v);
+ ceph_assert(ptr && ptr->empty());
+ option << "[";
+ }
+ option << "<" << m_positional.options()[i]->long_name() << ">";
+ if (m_positional.options()[i]->semantic()->max_tokens() > 1) {
+ option << " [<" << m_positional.options()[i]->long_name() << "> ...]";
+ }
+ if (!required) {
+ option << "]";
+ }
+
+ max_option_width = std::max(max_option_width, option.str().size());
+ positionals.emplace_back(option.str());
+
+ if (m_positional.options()[i]->semantic()->max_tokens() > 1) {
+ break;
+ }
+ }
+
+ size_t indent = std::min(initial_offset, MAX_DESCRIPTION_OFFSET) + 1;
+ if (indent + max_option_width + 2 > LINE_WIDTH) {
+ // decrease the indent so that we don't wrap past the end of the line
+ indent = LINE_WIDTH - max_option_width - 2;
+ }
+
+ IndentStream indent_stream(indent, initial_offset, LINE_WIDTH, os);
+ indent_stream.set_delimiter("[");
+ for (auto& option : optionals) {
+ indent_stream << option << " ";
+ }
+
+ if (optionals.size() > 0 || positionals.size() == 0) {
+ indent_stream << std::endl;
+ }
+
+ if (positionals.size() > 0) {
+ indent_stream.set_delimiter(" ");
+ for (auto& option : positionals) {
+ indent_stream << option << " ";
+ }
+ indent_stream << std::endl;
+ }
+}
+
+void OptionPrinter::print_optional(const OptionsDescription &global_opts,
+ size_t &name_width, std::ostream &os) {
+ std::string indent2(2, ' ');
+
+ for (size_t i = 0; i < global_opts.options().size(); ++i) {
+ std::string description = global_opts.options()[i]->description();
+ auto result = boost::find_first(description, "deprecated");
+ if (!result.empty()) {
+ continue;
+ }
+ std::stringstream ss;
+ ss << indent2
+ << global_opts.options()[i]->format_name() << " "
+ << global_opts.options()[i]->format_parameter();
+
+ std::cout << ss.str();
+ IndentStream indent_stream(name_width, ss.str().size(), LINE_WIDTH, std::cout);
+ indent_stream << global_opts.options()[i]->description() << std::endl;
+ }
+
+}
+
+void OptionPrinter::print_detailed(std::ostream &os) {
+ std::string indent_prefix(2, ' ');
+ size_t name_width = compute_name_width(indent_prefix.size());
+
+ if (m_positional.options().size() > 0) {
+ std::cout << POSITIONAL_ARGUMENTS << std::endl;
+ for (size_t i = 0; i < m_positional.options().size(); ++i) {
+ std::stringstream ss;
+ ss << indent_prefix << "<" << m_positional.options()[i]->long_name()
+ << ">";
+
+ std::cout << ss.str();
+ IndentStream indent_stream(name_width, ss.str().size(), LINE_WIDTH, os);
+ indent_stream << m_positional.options()[i]->description() << std::endl;
+ }
+ std::cout << std::endl;
+ }
+
+ if (m_optional.options().size() > 0) {
+ std::cout << OPTIONAL_ARGUMENTS << std::endl;
+ print_optional(m_optional, name_width, os);
+ std::cout << std::endl;
+ }
+}
+
+size_t OptionPrinter::compute_name_width(size_t indent) {
+ size_t width = MIN_NAME_WIDTH;
+ std::vector<OptionsDescription> descs = {m_positional, m_optional};
+ for (size_t desc_idx = 0; desc_idx < descs.size(); ++desc_idx) {
+ const OptionsDescription &desc = descs[desc_idx];
+ for (size_t opt_idx = 0; opt_idx < desc.options().size(); ++opt_idx) {
+ size_t name_width = desc.options()[opt_idx]->format_name().size() +
+ desc.options()[opt_idx]->format_parameter().size()
+ + 1;
+ width = std::max(width, name_width);
+ }
+ }
+ width += indent;
+ width = std::min(width, MAX_DESCRIPTION_OFFSET) + 1;
+ return width;
+}
+
+} // namespace rbd
diff --git a/src/tools/rbd/OptionPrinter.h b/src/tools/rbd/OptionPrinter.h
new file mode 100644
index 000000000..06d3a3c99
--- /dev/null
+++ b/src/tools/rbd/OptionPrinter.h
@@ -0,0 +1,43 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_OPTION_PRINTER_H
+#define CEPH_RBD_OPTION_PRINTER_H
+
+#include "include/int_types.h"
+#include <string>
+#include <vector>
+#include <boost/algorithm/string.hpp>
+#include <boost/program_options.hpp>
+
+namespace rbd {
+
+class OptionPrinter {
+public:
+ typedef boost::program_options::options_description OptionsDescription;
+
+ static const std::string POSITIONAL_ARGUMENTS;
+ static const std::string OPTIONAL_ARGUMENTS;
+
+ static const size_t LINE_WIDTH = 80;
+ static const size_t MIN_NAME_WIDTH = 20;
+ static const size_t MAX_DESCRIPTION_OFFSET = 37;
+
+ OptionPrinter(const OptionsDescription &positional,
+ const OptionsDescription &optional);
+
+ void print_short(std::ostream &os, size_t initial_offset);
+ void print_detailed(std::ostream &os);
+ static void print_optional(const OptionsDescription &global_opts,
+ size_t &name_width, std::ostream &os);
+
+private:
+ const OptionsDescription &m_positional;
+ const OptionsDescription &m_optional;
+
+ size_t compute_name_width(size_t indent);
+};
+
+} // namespace rbd
+
+#endif // CEPH_RBD_OPTION_PRINTER_H
diff --git a/src/tools/rbd/Schedule.cc b/src/tools/rbd/Schedule.cc
new file mode 100644
index 000000000..15dda3aee
--- /dev/null
+++ b/src/tools/rbd/Schedule.cc
@@ -0,0 +1,367 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "common/Formatter.h"
+#include "common/TextTable.h"
+#include "common/ceph_json.h"
+#include "tools/rbd/ArgumentTypes.h"
+#include "tools/rbd/Schedule.h"
+#include "tools/rbd/Utils.h"
+
+#include <iostream>
+#include <regex>
+
+namespace rbd {
+
+namespace at = argument_types;
+namespace po = boost::program_options;
+
+namespace {
+
+int parse_schedule_name(const std::string &name, bool allow_images,
+ std::string *pool_name, std::string *namespace_name,
+ std::string *image_name) {
+ // parse names like:
+ // '', 'rbd/', 'rbd/ns/', 'rbd/image', 'rbd/ns/image'
+ std::regex pattern("^(?:([^/]+)/(?:(?:([^/]+)/|)(?:([^/@]+))?)?)?$");
+ std::smatch match;
+ if (!std::regex_match(name, match, pattern)) {
+ return -EINVAL;
+ }
+
+ if (match[1].matched) {
+ *pool_name = match[1];
+ } else {
+ *pool_name = "-";
+ }
+
+ if (match[2].matched) {
+ *namespace_name = match[2];
+ } else if (match[3].matched) {
+ *namespace_name = "";
+ } else {
+ *namespace_name = "-";
+ }
+
+ if (match[3].matched) {
+ if (!allow_images) {
+ return -EINVAL;
+ }
+ *image_name = match[3];
+ } else {
+ *image_name = "-";
+ }
+
+ return 0;
+}
+
+} // anonymous namespace
+
+void add_level_spec_options(po::options_description *options,
+ bool allow_image) {
+ at::add_pool_option(options, at::ARGUMENT_MODIFIER_NONE);
+ at::add_namespace_option(options, at::ARGUMENT_MODIFIER_NONE);
+ if (allow_image) {
+ at::add_image_option(options, at::ARGUMENT_MODIFIER_NONE);
+ }
+}
+
+int get_level_spec_args(const po::variables_map &vm,
+ std::map<std::string, std::string> *args) {
+ if (vm.count(at::IMAGE_NAME)) {
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+
+ int r = utils::extract_spec(vm[at::IMAGE_NAME].as<std::string>(),
+ &pool_name, &namespace_name, &image_name,
+ nullptr, utils::SPEC_VALIDATION_FULL);
+ if (r < 0) {
+ return r;
+ }
+
+ if (!pool_name.empty()) {
+ if (vm.count(at::POOL_NAME)) {
+ std::cerr << "rbd: pool is specified both via pool and image options"
+ << std::endl;
+ return -EINVAL;
+ }
+ if (vm.count(at::NAMESPACE_NAME)) {
+ std::cerr << "rbd: namespace is specified both via namespace and image"
+ << " options" << std::endl;
+ return -EINVAL;
+ }
+ }
+
+ if (vm.count(at::POOL_NAME)) {
+ pool_name = vm[at::POOL_NAME].as<std::string>();
+ }
+
+ if (vm.count(at::NAMESPACE_NAME)) {
+ namespace_name = vm[at::NAMESPACE_NAME].as<std::string>();
+ }
+
+ if (namespace_name.empty()) {
+ (*args)["level_spec"] = pool_name + "/" + image_name;
+ } else {
+ (*args)["level_spec"] = pool_name + "/" + namespace_name + "/" +
+ image_name;
+ }
+ return 0;
+ }
+
+ if (vm.count(at::NAMESPACE_NAME)) {
+ std::string pool_name;
+ std::string namespace_name;
+
+ if (vm.count(at::POOL_NAME)) {
+ pool_name = vm[at::POOL_NAME].as<std::string>();
+ }
+
+ namespace_name = vm[at::NAMESPACE_NAME].as<std::string>();
+
+ (*args)["level_spec"] = pool_name + "/" + namespace_name + "/";
+
+ return 0;
+ }
+
+ if (vm.count(at::POOL_NAME)) {
+ std::string pool_name = vm[at::POOL_NAME].as<std::string>();
+
+ (*args)["level_spec"] = pool_name + "/";
+
+ return 0;
+ }
+
+ (*args)["level_spec"] = "";
+
+ return 0;
+}
+
+void normalize_level_spec_args(std::map<std::string, std::string> *args) {
+ std::map<std::string, std::string> raw_args;
+ std::swap(raw_args, *args);
+
+ auto default_pool_name = utils::get_default_pool_name();
+ for (auto [key, value] : raw_args) {
+ if (key == "level_spec" && !value.empty() && value[0] == '/') {
+ value = default_pool_name + value;
+ }
+
+ (*args)[key] = value;
+ }
+}
+
+void add_schedule_options(po::options_description *positional,
+ bool mandatory) {
+ if (mandatory) {
+ positional->add_options()
+ ("interval", "schedule interval");
+ } else {
+ positional->add_options()
+ ("interval", po::value<std::string>()->default_value(""),
+ "schedule interval");
+ }
+ positional->add_options()
+ ("start-time", po::value<std::string>()->default_value(""),
+ "schedule start time");
+}
+
+int get_schedule_args(const po::variables_map &vm, bool mandatory,
+ std::map<std::string, std::string> *args) {
+ size_t arg_index = 0;
+
+ std::string interval = utils::get_positional_argument(vm, arg_index++);
+ if (interval.empty()) {
+ if (mandatory) {
+ std::cerr << "rbd: missing 'interval' argument" << std::endl;
+ return -EINVAL;
+ }
+ return 0;
+ }
+ (*args)["interval"] = interval;
+
+ std::string start_time = utils::get_positional_argument(vm, arg_index++);
+ if (!start_time.empty()) {
+ (*args)["start_time"] = start_time;
+ }
+
+ return 0;
+}
+
+int Schedule::parse(json_spirit::mValue &schedule_val) {
+ if (schedule_val.type() != json_spirit::array_type) {
+ std::cerr << "rbd: unexpected schedule JSON received: "
+ << "schedule is not array" << std::endl;
+ return -EBADMSG;
+ }
+
+ try {
+ for (auto &item_val : schedule_val.get_array()) {
+ if (item_val.type() != json_spirit::obj_type) {
+ std::cerr << "rbd: unexpected schedule JSON received: "
+ << "schedule item is not object" << std::endl;
+ return -EBADMSG;
+ }
+
+ auto &item = item_val.get_obj();
+
+ if (item["interval"].type() != json_spirit::str_type) {
+ std::cerr << "rbd: unexpected schedule JSON received: "
+ << "interval is not string" << std::endl;
+ return -EBADMSG;
+ }
+ auto interval = item["interval"].get_str();
+
+ std::string start_time;
+ if (item["start_time"].type() == json_spirit::str_type) {
+ start_time = item["start_time"].get_str();
+ }
+
+ items.push_back({interval, start_time});
+ }
+
+ } catch (std::runtime_error &) {
+ std::cerr << "rbd: invalid schedule JSON received" << std::endl;
+ return -EBADMSG;
+ }
+
+ return 0;
+}
+
+void Schedule::dump(ceph::Formatter *f) {
+ f->open_array_section("items");
+ for (auto &item : items) {
+ f->open_object_section("item");
+ f->dump_string("interval", item.first);
+ f->dump_string("start_time", item.second);
+ f->close_section(); // item
+ }
+ f->close_section(); // items
+}
+
+std::ostream& operator<<(std::ostream& os, Schedule &s) {
+ std::string delimiter;
+ for (auto &item : s.items) {
+ os << delimiter << "every " << item.first;
+ if (!item.second.empty()) {
+ os << " starting at " << item.second;
+ }
+ delimiter = ", ";
+ }
+ return os;
+}
+
+int ScheduleList::parse(const std::string &list) {
+ json_spirit::mValue json_root;
+ if (!json_spirit::read(list, json_root)) {
+ std::cerr << "rbd: invalid schedule list JSON received" << std::endl;
+ return -EBADMSG;
+ }
+
+ try {
+ for (auto &[id, schedule_val] : json_root.get_obj()) {
+ if (schedule_val.type() != json_spirit::obj_type) {
+ std::cerr << "rbd: unexpected schedule list JSON received: "
+ << "schedule_val is not object" << std::endl;
+ return -EBADMSG;
+ }
+ auto &schedule = schedule_val.get_obj();
+ if (schedule["name"].type() != json_spirit::str_type) {
+ std::cerr << "rbd: unexpected schedule list JSON received: "
+ << "schedule name is not string" << std::endl;
+ return -EBADMSG;
+ }
+ auto name = schedule["name"].get_str();
+
+ if (schedule["schedule"].type() != json_spirit::array_type) {
+ std::cerr << "rbd: unexpected schedule list JSON received: "
+ << "schedule is not array" << std::endl;
+ return -EBADMSG;
+ }
+
+ Schedule s;
+ int r = s.parse(schedule["schedule"]);
+ if (r < 0) {
+ return r;
+ }
+ schedules[name] = s;
+ }
+ } catch (std::runtime_error &) {
+ std::cerr << "rbd: invalid schedule list JSON received" << std::endl;
+ return -EBADMSG;
+ }
+
+ return 0;
+}
+
+Schedule *ScheduleList::find(const std::string &name) {
+ auto it = schedules.find(name);
+ if (it == schedules.end()) {
+ return nullptr;
+ }
+
+ return &it->second;
+}
+
+void ScheduleList::dump(ceph::Formatter *f) {
+ f->open_array_section("schedules");
+ for (auto &[name, s] : schedules) {
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+
+ int r = parse_schedule_name(name, allow_images, &pool_name, &namespace_name,
+ &image_name);
+ if (r < 0) {
+ continue;
+ }
+
+ f->open_object_section("schedule");
+ f->dump_string("pool", pool_name);
+ f->dump_string("namespace", namespace_name);
+ if (allow_images) {
+ f->dump_string("image", image_name);
+ }
+ s.dump(f);
+ f->close_section();
+ }
+ f->close_section();
+}
+
+std::ostream& operator<<(std::ostream& os, ScheduleList &l) {
+ TextTable tbl;
+ tbl.define_column("POOL", TextTable::LEFT, TextTable::LEFT);
+ tbl.define_column("NAMESPACE", TextTable::LEFT, TextTable::LEFT);
+ if (l.allow_images) {
+ tbl.define_column("IMAGE", TextTable::LEFT, TextTable::LEFT);
+ }
+ tbl.define_column("SCHEDULE", TextTable::LEFT, TextTable::LEFT);
+
+ for (auto &[name, s] : l.schedules) {
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+
+ int r = parse_schedule_name(name, l.allow_images, &pool_name,
+ &namespace_name, &image_name);
+ if (r < 0) {
+ continue;
+ }
+
+ std::stringstream ss;
+ ss << s;
+
+ tbl << pool_name << namespace_name;
+ if (l.allow_images) {
+ tbl << image_name;
+ }
+ tbl << ss.str() << TextTable::endrow;
+ }
+
+ os << tbl;
+ return os;
+}
+
+} // namespace rbd
+
diff --git a/src/tools/rbd/Schedule.h b/src/tools/rbd/Schedule.h
new file mode 100644
index 000000000..bf0964bb1
--- /dev/null
+++ b/src/tools/rbd/Schedule.h
@@ -0,0 +1,67 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_SCHEDULE_H
+#define CEPH_RBD_SCHEDULE_H
+
+#include "json_spirit/json_spirit.h"
+
+#include <iostream>
+#include <list>
+#include <map>
+#include <string>
+#include <boost/program_options.hpp>
+
+namespace ceph { class Formatter; }
+
+namespace rbd {
+
+void add_level_spec_options(
+ boost::program_options::options_description *options, bool allow_image=true);
+int get_level_spec_args(const boost::program_options::variables_map &vm,
+ std::map<std::string, std::string> *args);
+void normalize_level_spec_args(std::map<std::string, std::string> *args);
+
+void add_schedule_options(
+ boost::program_options::options_description *positional, bool mandatory);
+int get_schedule_args(const boost::program_options::variables_map &vm,
+ bool mandatory, std::map<std::string, std::string> *args);
+
+class Schedule {
+public:
+ Schedule() {
+ }
+
+ int parse(json_spirit::mValue &schedule_val);
+ void dump(ceph::Formatter *f);
+
+ friend std::ostream& operator<<(std::ostream& os, Schedule &s);
+
+private:
+ std::string name;
+ std::list<std::pair<std::string, std::string>> items;
+};
+
+std::ostream& operator<<(std::ostream& os, Schedule &s);
+
+class ScheduleList {
+public:
+ ScheduleList(bool allow_images=true) : allow_images(allow_images) {
+ }
+
+ int parse(const std::string &list);
+ Schedule *find(const std::string &name);
+ void dump(ceph::Formatter *f);
+
+ friend std::ostream& operator<<(std::ostream& os, ScheduleList &l);
+
+private:
+ bool allow_images;
+ std::map<std::string, Schedule> schedules;
+};
+
+std::ostream& operator<<(std::ostream& os, ScheduleList &l);
+
+} // namespace rbd
+
+#endif // CEPH_RBD_SCHEDULE_H
diff --git a/src/tools/rbd/Shell.cc b/src/tools/rbd/Shell.cc
new file mode 100644
index 000000000..ab1d20331
--- /dev/null
+++ b/src/tools/rbd/Shell.cc
@@ -0,0 +1,487 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd/Shell.h"
+#include "tools/rbd/ArgumentTypes.h"
+#include "tools/rbd/IndentStream.h"
+#include "tools/rbd/OptionPrinter.h"
+#include "common/ceph_argparse.h"
+#include "common/config.h"
+#include "global/global_context.h"
+#include "global/global_init.h"
+#include "include/stringify.h"
+#include <algorithm>
+#include <iostream>
+#include <set>
+
+namespace rbd {
+
+namespace at = argument_types;
+namespace po = boost::program_options;
+
+namespace {
+
+static const std::string APP_NAME("rbd");
+static const std::string HELP_SPEC("help");
+static const std::string BASH_COMPLETION_SPEC("bash-completion");
+
+boost::intrusive_ptr<CephContext> global_init(
+ int argc, const char **argv, std::vector<std::string> *command_args,
+ std::vector<std::string> *global_init_args) {
+ auto cmd_args = argv_to_vec(argc, argv);
+ std::vector<const char*> args(cmd_args);
+ auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT,
+ CODE_ENVIRONMENT_UTILITY,
+ CINIT_FLAG_NO_MON_CONFIG);
+
+ *command_args = {args.begin(), args.end()};
+
+ // Scan command line arguments for ceph global init args (those are
+ // filtered out from args vector by global_init).
+
+ auto cursor = args.begin();
+ for (auto &arg : cmd_args) {
+ auto iter = cursor;
+ for (; iter != args.end(); iter++) {
+ if (*iter == arg) {
+ break;
+ }
+ }
+ if (iter == args.end()) {
+ // filtered out by global_init
+ global_init_args->push_back(arg);
+ } else {
+ cursor = ++iter;
+ }
+ }
+
+ return cct;
+}
+
+std::string format_command_spec(const Shell::CommandSpec &spec) {
+ return joinify<std::string>(spec.begin(), spec.end(), " ");
+}
+
+std::string format_alias_spec(const Shell::CommandSpec &spec,
+ const Shell::CommandSpec &alias_spec) {
+ auto spec_it = spec.begin();
+ auto alias_it = alias_spec.begin();
+ int level = 0;
+ while (spec_it != spec.end() && alias_it != alias_spec.end() &&
+ *spec_it == *alias_it) {
+ spec_it++;
+ alias_it++;
+ level++;
+ }
+ ceph_assert(spec_it != spec.end() && alias_it != alias_spec.end());
+
+ if (level < 2) {
+ return joinify<std::string>(alias_spec.begin(), alias_spec.end(), " ");
+ } else {
+ return "... " + joinify<std::string>(alias_it, alias_spec.end(), " ");
+ }
+}
+
+std::string format_command_name(const Shell::CommandSpec &spec,
+ const Shell::CommandSpec &alias_spec) {
+ std::string name = format_command_spec(spec);
+ if (!alias_spec.empty()) {
+ name += " (" + format_alias_spec(spec, alias_spec) + ")";
+ }
+ return name;
+}
+
+std::string format_option_suffix(
+ const boost::shared_ptr<po::option_description> &option) {
+ std::string suffix;
+ if (option->semantic()->max_tokens() != 0) {
+ if (option->description().find("path") != std::string::npos ||
+ option->description().find("file") != std::string::npos) {
+ suffix += " path";
+ } else if (option->description().find("host") != std::string::npos) {
+ suffix += " host";
+ } else {
+ suffix += " arg";
+ }
+ }
+ return suffix;
+}
+
+} // anonymous namespace
+
+std::vector<Shell::Action *>& Shell::get_actions() {
+ static std::vector<Action *> actions;
+
+ return actions;
+}
+
+std::set<std::string>& Shell::get_switch_arguments() {
+ static std::set<std::string> switch_arguments;
+
+ return switch_arguments;
+}
+
+void print_deprecated_warning(po::option_description option, std::string description) {
+ auto pos = description.find_first_of(":");
+ if (pos != std::string::npos) {
+ std::string param = description.substr(pos + 1, description.size() - pos - 2);
+ std::cerr << "rbd: " << option.format_name() << " is deprecated, use --"
+ << param << std::endl;
+ }
+}
+
+int Shell::execute(int argc, const char **argv) {
+ std::vector<std::string> arguments;
+ std::vector<std::string> ceph_global_init_args;
+ auto cct = global_init(argc, argv, &arguments, &ceph_global_init_args);
+
+ std::vector<std::string> command_spec;
+ get_command_spec(arguments, &command_spec);
+ bool is_alias = true;
+
+ if (command_spec.empty() || command_spec == CommandSpec({"help"})) {
+ // list all available actions
+ print_help();
+ return 0;
+ } else if (command_spec[0] == HELP_SPEC) {
+ // list help for specific action
+ command_spec.erase(command_spec.begin());
+ Action *action = find_action(command_spec, NULL, &is_alias);
+ if (action == NULL) {
+ print_unknown_action(command_spec);
+ return EXIT_FAILURE;
+ } else {
+ print_action_help(action, is_alias);
+ return 0;
+ }
+ } else if (command_spec[0] == BASH_COMPLETION_SPEC) {
+ command_spec.erase(command_spec.begin());
+ print_bash_completion(command_spec);
+ return 0;
+ }
+
+ CommandSpec *matching_spec;
+ Action *action = find_action(command_spec, &matching_spec, &is_alias);
+ if (action == NULL) {
+ print_unknown_action(command_spec);
+ return EXIT_FAILURE;
+ }
+
+ po::variables_map vm;
+ try {
+ po::options_description positional_opts;
+ po::options_description command_opts;
+ (*action->get_arguments)(&positional_opts, &command_opts);
+
+ // dynamically allocate options for our command (e.g. snap list) and
+ // its associated positional arguments
+ po::options_description argument_opts;
+ argument_opts.add_options()
+ (at::POSITIONAL_COMMAND_SPEC.c_str(),
+ po::value<std::vector<std::string> >()->required(), "")
+ (at::POSITIONAL_ARGUMENTS.c_str(),
+ po::value<std::vector<std::string> >(), "");
+
+ po::positional_options_description positional_options;
+ positional_options.add(at::POSITIONAL_COMMAND_SPEC.c_str(),
+ matching_spec->size());
+ if (!positional_opts.options().empty()) {
+ int max_count = positional_opts.options().size();
+ if (positional_opts.options().back()->semantic()->max_tokens() > 1)
+ max_count = -1;
+ positional_options.add(at::POSITIONAL_ARGUMENTS.c_str(), max_count);
+ }
+
+ po::options_description group_opts;
+ group_opts.add(command_opts)
+ .add(argument_opts);
+
+ po::store(po::command_line_parser(arguments)
+ .style(po::command_line_style::default_style &
+ ~po::command_line_style::allow_guessing)
+ .options(group_opts)
+ .positional(positional_options)
+ .run(), vm);
+
+ if (vm[at::POSITIONAL_COMMAND_SPEC].as<std::vector<std::string> >() !=
+ *matching_spec) {
+ std::cerr << "rbd: failed to parse command" << std::endl;
+ return EXIT_FAILURE;
+ }
+
+ int r = (*action->execute)(vm, ceph_global_init_args);
+
+ if (vm.size() > 0) {
+ for (auto opt : vm) {
+ try {
+ auto option = command_opts.find(opt.first, false);
+ auto description = option.description();
+ auto result = boost::find_first(description, "deprecated");
+ if (!result.empty()) {
+ print_deprecated_warning(option, description);
+ }
+ } catch (std::exception& e) {
+ continue;
+ }
+ }
+ }
+
+ po::options_description global_opts;
+ get_global_options(&global_opts);
+ auto it = ceph_global_init_args.begin();
+ for ( ; it != ceph_global_init_args.end(); ++it) {
+ auto pos = (*it).find_last_of("-");
+ auto prefix_style = po::command_line_style::allow_long;
+ if (pos == 0) {
+ prefix_style = po::command_line_style::allow_dash_for_short;
+ } else if (pos == std::string::npos) {
+ continue;
+ }
+
+ for (size_t i = 0; i < global_opts.options().size(); ++i) {
+ std::string param_name = global_opts.options()[i]->canonical_display_name(
+ prefix_style);
+ auto description = global_opts.options()[i]->description();
+ auto result = boost::find_first(description, "deprecated");
+ if (!result.empty() && *it == param_name) {
+ print_deprecated_warning(*global_opts.options()[i], description);
+ break;
+ }
+ }
+ }
+
+ if (r != 0) {
+ return std::abs(r);
+ }
+ } catch (po::required_option& e) {
+ std::cerr << "rbd: " << e.what() << std::endl;
+ return EXIT_FAILURE;
+ } catch (po::too_many_positional_options_error& e) {
+ std::cerr << "rbd: too many arguments" << std::endl;
+ return EXIT_FAILURE;
+ } catch (po::error& e) {
+ std::cerr << "rbd: " << e.what() << std::endl;
+ return EXIT_FAILURE;
+ }
+
+ return 0;
+}
+
+void Shell::get_command_spec(const std::vector<std::string> &arguments,
+ std::vector<std::string> *command_spec) {
+ for (size_t i = 0; i < arguments.size(); ++i) {
+ std::string arg(arguments[i]);
+ if (arg == "-h" || arg == "--help") {
+ *command_spec = {HELP_SPEC};
+ return;
+ } else if (arg == "--") {
+ // all arguments after a double-dash are positional
+ if (i + 1 < arguments.size()) {
+ command_spec->insert(command_spec->end(),
+ arguments.data() + i + 1,
+ arguments.data() + arguments.size());
+ }
+ return;
+ } else if (arg[0] == '-') {
+ // if the option is not a switch, skip its value
+ if (arg.size() >= 2 &&
+ (arg[1] == '-' ||
+ get_switch_arguments().count(arg.substr(1, 1)) == 0) &&
+ (arg[1] != '-' ||
+ get_switch_arguments().count(arg.substr(2, std::string::npos)) == 0) &&
+ at::SWITCH_ARGUMENTS.count(arg.substr(2, std::string::npos)) == 0 &&
+ arg.find('=') == std::string::npos) {
+ ++i;
+ }
+ } else {
+ command_spec->push_back(arg);
+ }
+ }
+}
+
+Shell::Action *Shell::find_action(const CommandSpec &command_spec,
+ CommandSpec **matching_spec, bool *is_alias) {
+ // sort such that all "trash purge schedule ..." actions come before
+ // "trash purge"
+ std::vector<Action *> actions(get_actions());
+ std::sort(actions.begin(), actions.end(), [](auto lhs, auto rhs) {
+ return lhs->command_spec.size() > rhs->command_spec.size();
+ });
+
+ for (Action *action : actions) {
+ if (action->command_spec.size() <= command_spec.size()) {
+ if (std::equal(action->command_spec.begin(),
+ action->command_spec.end(),
+ command_spec.begin())) {
+ if (matching_spec != NULL) {
+ *matching_spec = &action->command_spec;
+ }
+ *is_alias = false;
+ return action;
+ }
+ }
+ if (!action->alias_command_spec.empty() &&
+ action->alias_command_spec.size() <= command_spec.size()) {
+ if (std::equal(action->alias_command_spec.begin(),
+ action->alias_command_spec.end(),
+ command_spec.begin())) {
+ if (matching_spec != NULL) {
+ *matching_spec = &action->alias_command_spec;
+ }
+ *is_alias = true;
+ return action;
+ }
+ }
+ }
+ return NULL;
+}
+
+void Shell::get_global_options(po::options_description *opts) {
+ opts->add_options()
+ ((at::CONFIG_PATH + ",c").c_str(), po::value<std::string>(), "path to cluster configuration")
+ ("cluster", po::value<std::string>(), "cluster name")
+ ("id", po::value<std::string>(), "client id (without 'client.' prefix)")
+ ("user", po::value<std::string>(), "deprecated[:id]")
+ ("name,n", po::value<std::string>(), "client name")
+ ("mon_host,m", po::value<std::string>(), "monitor host")
+ ("secret", po::value<at::Secret>(), "deprecated[:keyfile]")
+ ("keyfile,K", po::value<std::string>(), "path to secret key")
+ ("keyring,k", po::value<std::string>(), "path to keyring");
+}
+
+void Shell::print_help() {
+ std::cout << "usage: " << APP_NAME << " <command> ..."
+ << std::endl << std::endl
+ << "Command-line interface for managing Ceph RBD images."
+ << std::endl << std::endl;
+
+ std::vector<Action *> actions(get_actions());
+ std::sort(actions.begin(), actions.end(),
+ [](Action *lhs, Action *rhs) { return lhs->command_spec <
+ rhs->command_spec; });
+
+ std::cout << OptionPrinter::POSITIONAL_ARGUMENTS << ":" << std::endl
+ << " <command>" << std::endl;
+
+ // since the commands have spaces, we have to build our own formatter
+ std::string indent(4, ' ');
+ size_t name_width = OptionPrinter::MIN_NAME_WIDTH;
+ for (size_t i = 0; i < actions.size(); ++i) {
+ Action *action = actions[i];
+ std::string name = format_command_name(action->command_spec,
+ action->alias_command_spec);
+ name_width = std::max(name_width, name.size());
+ }
+ name_width += indent.size();
+ name_width = std::min(name_width, OptionPrinter::MAX_DESCRIPTION_OFFSET) + 1;
+
+ for (size_t i = 0; i < actions.size(); ++i) {
+ Action *action = actions[i];
+ if (!action->visible)
+ continue;
+ std::stringstream ss;
+ ss << indent
+ << format_command_name(action->command_spec, action->alias_command_spec);
+
+ std::cout << ss.str();
+ if (!action->description.empty()) {
+ IndentStream indent_stream(name_width, ss.str().size(),
+ OptionPrinter::LINE_WIDTH,
+ std::cout);
+ indent_stream << action->description << std::endl;
+ } else {
+ std::cout << std::endl;
+ }
+ }
+
+ po::options_description global_opts;
+ get_global_options(&global_opts);
+
+ std::cout << std::endl << OptionPrinter::OPTIONAL_ARGUMENTS << ":" << std::endl;
+ OptionPrinter::print_optional(global_opts, name_width, std::cout);
+
+ std::cout << std::endl
+ << "See '" << APP_NAME << " help <command>' for help on a specific "
+ << "command." << std::endl;
+ }
+
+void Shell::print_action_help(Action *action, bool is_alias) {
+ std::stringstream ss;
+ ss << "usage: " << APP_NAME << " "
+ << format_command_spec(is_alias ? action->alias_command_spec : action->command_spec);
+ std::cout << ss.str();
+
+ po::options_description positional;
+ po::options_description options;
+ (*action->get_arguments)(&positional, &options);
+
+ OptionPrinter option_printer(positional, options);
+ option_printer.print_short(std::cout, ss.str().size());
+
+ if (!action->description.empty()) {
+ std::cout << std::endl << action->description << std::endl;
+ }
+
+ std::cout << std::endl;
+ option_printer.print_detailed(std::cout);
+
+ if (!action->help.empty()) {
+ std::cout << action->help << std::endl;
+ }
+}
+
+void Shell::print_unknown_action(const std::vector<std::string> &command_spec) {
+ std::cerr << "error: unknown option '"
+ << joinify<std::string>(command_spec.begin(),
+ command_spec.end(), " ") << "'"
+ << std::endl << std::endl;
+ print_help();
+}
+
+void Shell::print_bash_completion(const CommandSpec &command_spec) {
+
+ bool is_alias = true;
+
+ Action *action = find_action(command_spec, NULL, &is_alias);
+ po::options_description global_opts;
+ get_global_options(&global_opts);
+ print_bash_completion_options(global_opts);
+
+ if (action != nullptr) {
+ po::options_description positional_opts;
+ po::options_description command_opts;
+ (*action->get_arguments)(&positional_opts, &command_opts);
+ print_bash_completion_options(command_opts);
+ } else {
+ std::cout << "|help";
+ for (size_t i = 0; i < get_actions().size(); ++i) {
+ Action *action = get_actions()[i];
+ std::cout << "|"
+ << joinify<std::string>(action->command_spec.begin(),
+ action->command_spec.end(), " ");
+ if (!action->alias_command_spec.empty()) {
+ std::cout << "|"
+ << joinify<std::string>(action->alias_command_spec.begin(),
+ action->alias_command_spec.end(),
+ " ");
+ }
+ }
+ }
+ std::cout << "|" << std::endl;
+}
+
+void Shell::print_bash_completion_options(const po::options_description &ops) {
+ for (size_t i = 0; i < ops.options().size(); ++i) {
+ auto option = ops.options()[i];
+ std::string long_name(option->canonical_display_name(0));
+ std::string short_name(option->canonical_display_name(
+ po::command_line_style::allow_dash_for_short));
+
+ std::cout << "|--" << long_name << format_option_suffix(option);
+ if (long_name != short_name) {
+ std::cout << "|" << short_name << format_option_suffix(option);
+ }
+ }
+}
+
+} // namespace rbd
diff --git a/src/tools/rbd/Shell.h b/src/tools/rbd/Shell.h
new file mode 100644
index 000000000..fe3dee46b
--- /dev/null
+++ b/src/tools/rbd/Shell.h
@@ -0,0 +1,76 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_SHELL_H
+#define CEPH_RBD_SHELL_H
+
+#include "include/int_types.h"
+#include <set>
+#include <string>
+#include <vector>
+#include <boost/program_options.hpp>
+
+namespace rbd {
+
+class Shell {
+public:
+ typedef std::vector<std::string> CommandSpec;
+
+ struct Action {
+ typedef void (*GetArguments)(boost::program_options::options_description *,
+ boost::program_options::options_description *);
+ typedef int (*Execute)(const boost::program_options::variables_map &,
+ const std::vector<std::string> &);
+
+ CommandSpec command_spec;
+ CommandSpec alias_command_spec;
+ const std::string description;
+ const std::string help;
+ GetArguments get_arguments;
+ Execute execute;
+ bool visible;
+
+ template <typename Args, typename Execute>
+ Action(const std::initializer_list<std::string> &command_spec,
+ const std::initializer_list<std::string> &alias_command_spec,
+ const std::string &description, const std::string &help,
+ Args args, Execute execute, bool visible = true)
+ : command_spec(command_spec), alias_command_spec(alias_command_spec),
+ description(description), help(help), get_arguments(args),
+ execute(execute), visible(visible) {
+ Shell::get_actions().push_back(this);
+ }
+
+ };
+
+ struct SwitchArguments {
+ SwitchArguments(const std::initializer_list<std::string> &arguments) {
+ Shell::get_switch_arguments().insert(arguments.begin(), arguments.end());
+ }
+ };
+
+ int execute(int argc, const char **argv);
+
+private:
+ static std::vector<Action *>& get_actions();
+ static std::set<std::string>& get_switch_arguments();
+
+ void get_command_spec(const std::vector<std::string> &arguments,
+ std::vector<std::string> *command_spec);
+ Action *find_action(const CommandSpec &command_spec,
+ CommandSpec **matching_spec, bool *is_alias);
+
+ void get_global_options(boost::program_options::options_description *opts);
+
+ void print_help();
+ void print_action_help(Action *action, bool is_alias);
+ void print_unknown_action(const CommandSpec &command_spec);
+
+ void print_bash_completion(const CommandSpec &command_spec);
+ void print_bash_completion_options(
+ const boost::program_options::options_description &ops);
+};
+
+} // namespace rbd
+
+#endif // CEPH_RBD_SHELL_H
diff --git a/src/tools/rbd/Utils.cc b/src/tools/rbd/Utils.cc
new file mode 100644
index 000000000..71da0bd27
--- /dev/null
+++ b/src/tools/rbd/Utils.cc
@@ -0,0 +1,1203 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd/Utils.h"
+#include "include/ceph_assert.h"
+#include "include/Context.h"
+#include "include/encoding.h"
+#include "common/common_init.h"
+#include "include/stringify.h"
+#include "include/rbd/features.h"
+#include "common/config.h"
+#include "common/errno.h"
+#include "common/escape.h"
+#include "common/safe_io.h"
+#include "global/global_context.h"
+#include <fstream>
+#include <iostream>
+#include <regex>
+#include <boost/algorithm/string.hpp>
+#include <boost/lexical_cast.hpp>
+
+namespace rbd {
+namespace utils {
+
+namespace at = argument_types;
+namespace po = boost::program_options;
+
+namespace {
+
+static std::string mgr_command_args_to_str(
+ const std::map<std::string, std::string> &args) {
+ std::string out = "";
+
+ std::string delimiter;
+ for (auto &it : args) {
+ out += delimiter + "\"" + it.first + "\": \"" +
+ stringify(json_stream_escaper(it.second)) + "\"";
+ delimiter = ",\n";
+ }
+
+ return out;
+}
+
+} // anonymous namespace
+
+int ProgressContext::update_progress(uint64_t offset, uint64_t total) {
+ if (progress) {
+ int pc = get_percentage(offset, total);
+ if (pc > last_pc) {
+ std::cerr << "\r" << operation << ": "
+ << pc << "% complete..." << std::flush;
+ last_pc = pc;
+ }
+ }
+ return 0;
+}
+
+void ProgressContext::finish() {
+ if (progress) {
+ std::cerr << "\r" << operation << ": 100% complete...done." << std::endl;
+ }
+}
+
+void ProgressContext::fail() {
+ if (progress) {
+ std::cerr << "\r" << operation << ": " << last_pc << "% complete...failed."
+ << std::endl;
+ }
+}
+
+int get_percentage(uint64_t part, uint64_t whole) {
+ return whole ? (100 * part / whole) : 0;
+}
+
+void aio_context_callback(librbd::completion_t completion, void *arg)
+{
+ librbd::RBD::AioCompletion *aio_completion =
+ reinterpret_cast<librbd::RBD::AioCompletion*>(completion);
+ Context *context = reinterpret_cast<Context *>(arg);
+ context->complete(aio_completion->get_return_value());
+ aio_completion->release();
+}
+
+int read_string(int fd, unsigned max, std::string *out) {
+ char buf[4];
+
+ int r = safe_read_exact(fd, buf, 4);
+ if (r < 0)
+ return r;
+
+ bufferlist bl;
+ bl.append(buf, 4);
+ auto p = bl.cbegin();
+ uint32_t len;
+ decode(len, p);
+ if (len > max)
+ return -EINVAL;
+
+ char sbuf[len];
+ r = safe_read_exact(fd, sbuf, len);
+ if (r < 0)
+ return r;
+ out->assign(sbuf, len);
+ return len;
+}
+
+int extract_spec(const std::string &spec, std::string *pool_name,
+ std::string *namespace_name, std::string *name,
+ std::string *snap_name, SpecValidation spec_validation) {
+ if (!g_ceph_context->_conf.get_val<bool>("rbd_validate_names")) {
+ spec_validation = SPEC_VALIDATION_NONE;
+ }
+
+ std::regex pattern;
+ switch (spec_validation) {
+ case SPEC_VALIDATION_FULL:
+ // disallow "/" and "@" in all names
+ pattern = "^(?:([^/@]+)/(?:([^/@]+)/)?)?([^/@]+)(?:@([^/@]+))?$";
+ break;
+ case SPEC_VALIDATION_SNAP:
+ // disallow "/" and "@" in snap name
+ pattern = "^(?:([^/]+)/(?:([^/@]+)/)?)?([^@]+)(?:@([^/@]+))?$";
+ break;
+ case SPEC_VALIDATION_NONE:
+ // relaxed pattern assumes pool is before first "/",
+ // namespace is before second "/", and snap name is after first "@"
+ pattern = "^(?:([^/]+)/(?:([^/@]+)/)?)?([^@]+)(?:@(.+))?$";
+ break;
+ default:
+ ceph_abort();
+ break;
+ }
+
+ std::smatch match;
+ if (!std::regex_match(spec, match, pattern)) {
+ std::cerr << "rbd: invalid spec '" << spec << "'" << std::endl;
+ return -EINVAL;
+ }
+
+ if (match[1].matched) {
+ if (pool_name != nullptr) {
+ *pool_name = match[1];
+ } else {
+ std::cerr << "rbd: pool name specified for a command that doesn't use it"
+ << std::endl;
+ return -EINVAL;
+ }
+ }
+
+ if (match[2].matched) {
+ if (namespace_name != nullptr) {
+ *namespace_name = match[2];
+ } else {
+ std::cerr << "rbd: namespace name specified for a command that doesn't "
+ << "use it" << std::endl;
+ return -EINVAL;
+ }
+ }
+
+ if (name != nullptr) {
+ *name = match[3];
+ }
+
+ if (match[4].matched) {
+ if (snap_name != nullptr) {
+ *snap_name = match[4];
+ } else {
+ std::cerr << "rbd: snapshot name specified for a command that doesn't "
+ << "use it" << std::endl;
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+
+std::string get_positional_argument(const po::variables_map &vm, size_t index) {
+ if (vm.count(at::POSITIONAL_ARGUMENTS) == 0) {
+ return "";
+ }
+
+ const std::vector<std::string> &args =
+ boost::any_cast<std::vector<std::string> >(
+ vm[at::POSITIONAL_ARGUMENTS].value());
+ if (index < args.size()) {
+ return args[index];
+ }
+ return "";
+}
+
+void normalize_pool_name(std::string* pool_name) {
+ if (pool_name->empty()) {
+ *pool_name = get_default_pool_name();
+ }
+}
+
+std::string get_default_pool_name() {
+ return g_ceph_context->_conf.get_val<std::string>("rbd_default_pool");
+}
+
+int get_pool_and_namespace_names(
+ const boost::program_options::variables_map &vm, bool validate_pool_name,
+ std::string* pool_name, std::string* namespace_name, size_t *arg_index) {
+ if (namespace_name != nullptr && vm.count(at::NAMESPACE_NAME)) {
+ *namespace_name = vm[at::NAMESPACE_NAME].as<std::string>();
+ }
+
+ if (vm.count(at::POOL_NAME)) {
+ *pool_name = vm[at::POOL_NAME].as<std::string>();
+ } else {
+ *pool_name = get_positional_argument(vm, *arg_index);
+ if (!pool_name->empty()) {
+ if (namespace_name != nullptr) {
+ auto slash_pos = pool_name->find_last_of('/');
+ if (slash_pos != std::string::npos) {
+ *namespace_name = pool_name->substr(slash_pos + 1);
+ }
+ *pool_name = pool_name->substr(0, slash_pos);
+ }
+ ++(*arg_index);
+ }
+ }
+
+ if (!g_ceph_context->_conf.get_val<bool>("rbd_validate_names")) {
+ validate_pool_name = false;
+ }
+
+ if (validate_pool_name &&
+ pool_name->find_first_of("/@") != std::string::npos) {
+ std::cerr << "rbd: invalid pool '" << *pool_name << "'" << std::endl;
+ return -EINVAL;
+ } else if (namespace_name != nullptr &&
+ namespace_name->find_first_of("/@") != std::string::npos) {
+ std::cerr << "rbd: invalid namespace '" << *namespace_name << "'"
+ << std::endl;
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int get_pool_image_id(const po::variables_map &vm,
+ size_t *spec_arg_index,
+ std::string *pool_name,
+ std::string *namespace_name,
+ std::string *image_id) {
+
+ if (vm.count(at::POOL_NAME) && pool_name != nullptr) {
+ *pool_name = vm[at::POOL_NAME].as<std::string>();
+ }
+ if (vm.count(at::NAMESPACE_NAME) && namespace_name != nullptr) {
+ *namespace_name = vm[at::NAMESPACE_NAME].as<std::string>();
+ }
+ if (vm.count(at::IMAGE_ID) && image_id != nullptr) {
+ *image_id = vm[at::IMAGE_ID].as<std::string>();
+ }
+
+ int r;
+ if (image_id != nullptr && spec_arg_index != nullptr && image_id->empty()) {
+ std::string spec = get_positional_argument(vm, (*spec_arg_index)++);
+ if (!spec.empty()) {
+ r = extract_spec(spec, pool_name, namespace_name, image_id, nullptr,
+ SPEC_VALIDATION_FULL);
+ if (r < 0) {
+ return r;
+ }
+ }
+ }
+
+ if (image_id != nullptr && image_id->empty()) {
+ std::cerr << "rbd: image id was not specified" << std::endl;
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int get_image_or_snap_spec(const po::variables_map &vm, std::string *spec) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string nspace_name;
+ std::string image_name;
+ std::string snap_name;
+ int r = get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &nspace_name,
+ &image_name, &snap_name, true, SNAPSHOT_PRESENCE_PERMITTED,
+ SPEC_VALIDATION_NONE);
+ if (r < 0) {
+ return r;
+ }
+
+ if (pool_name.empty()) {
+ // connect to the cluster to get the default pool
+ librados::Rados rados;
+ r = init_rados(&rados);
+ if (r < 0) {
+ return r;
+ }
+
+ normalize_pool_name(&pool_name);
+ }
+
+ spec->append(pool_name);
+ spec->append("/");
+ if (!nspace_name.empty()) {
+ spec->append(nspace_name);
+ spec->append("/");
+ }
+ spec->append(image_name);
+ if (!snap_name.empty()) {
+ spec->append("@");
+ spec->append(snap_name);
+ }
+
+ return 0;
+}
+
+void append_options_as_args(const std::vector<std::string> &options,
+ std::vector<std::string> *args) {
+ for (auto &opts : options) {
+ std::vector<std::string> args_;
+ boost::split(args_, opts, boost::is_any_of(","));
+ for (auto &o : args_) {
+ args->push_back("--" + o);
+ }
+ }
+}
+
+int get_pool_image_snapshot_names(const po::variables_map &vm,
+ at::ArgumentModifier mod,
+ size_t *spec_arg_index,
+ std::string *pool_name,
+ std::string *namespace_name,
+ std::string *image_name,
+ std::string *snap_name,
+ bool image_name_required,
+ SnapshotPresence snapshot_presence,
+ SpecValidation spec_validation) {
+ std::string pool_key = (mod == at::ARGUMENT_MODIFIER_DEST ?
+ at::DEST_POOL_NAME : at::POOL_NAME);
+ std::string image_key = (mod == at::ARGUMENT_MODIFIER_DEST ?
+ at::DEST_IMAGE_NAME : at::IMAGE_NAME);
+ return get_pool_generic_snapshot_names(vm, mod, spec_arg_index, pool_key,
+ pool_name, namespace_name, image_key,
+ "image", image_name, snap_name,
+ image_name_required, snapshot_presence,
+ spec_validation);
+}
+
+int get_pool_generic_snapshot_names(const po::variables_map &vm,
+ at::ArgumentModifier mod,
+ size_t *spec_arg_index,
+ const std::string& pool_key,
+ std::string *pool_name,
+ std::string *namespace_name,
+ const std::string& generic_key,
+ const std::string& generic_key_desc,
+ std::string *generic_name,
+ std::string *snap_name,
+ bool generic_name_required,
+ SnapshotPresence snapshot_presence,
+ SpecValidation spec_validation) {
+ std::string namespace_key = (mod == at::ARGUMENT_MODIFIER_DEST ?
+ at::DEST_NAMESPACE_NAME : at::NAMESPACE_NAME);
+ std::string snap_key = (mod == at::ARGUMENT_MODIFIER_DEST ?
+ at::DEST_SNAPSHOT_NAME : at::SNAPSHOT_NAME);
+
+ if (vm.count(pool_key) && pool_name != nullptr) {
+ *pool_name = vm[pool_key].as<std::string>();
+ }
+ if (vm.count(namespace_key) && namespace_name != nullptr) {
+ *namespace_name = vm[namespace_key].as<std::string>();
+ }
+ if (vm.count(generic_key) && generic_name != nullptr) {
+ *generic_name = vm[generic_key].as<std::string>();
+ }
+ if (vm.count(snap_key) && snap_name != nullptr) {
+ *snap_name = vm[snap_key].as<std::string>();
+ }
+
+ int r;
+ if ((generic_key == at::IMAGE_NAME || generic_key == at::DEST_IMAGE_NAME) &&
+ generic_name != nullptr && !generic_name->empty()) {
+ // despite the separate pool and snapshot name options,
+ // we can also specify them via the image option
+ std::string image_name_copy(*generic_name);
+ r = extract_spec(image_name_copy, pool_name, namespace_name, generic_name,
+ snap_name, spec_validation);
+ if (r < 0) {
+ return r;
+ }
+ }
+
+ if (generic_name != nullptr && spec_arg_index != nullptr &&
+ generic_name->empty()) {
+ std::string spec = get_positional_argument(vm, (*spec_arg_index)++);
+ if (!spec.empty()) {
+ r = extract_spec(spec, pool_name, namespace_name, generic_name, snap_name,
+ spec_validation);
+ if (r < 0) {
+ return r;
+ }
+ }
+ }
+
+ if (generic_name != nullptr && generic_name_required &&
+ generic_name->empty()) {
+ std::string prefix = at::get_description_prefix(mod);
+ std::cerr << "rbd: "
+ << (mod == at::ARGUMENT_MODIFIER_DEST ? prefix : std::string())
+ << generic_key_desc << " name was not specified" << std::endl;
+ return -EINVAL;
+ }
+
+ std::regex pattern("^[^@/]*?$");
+ if (spec_validation == SPEC_VALIDATION_FULL) {
+ // validate pool name while creating/renaming/copying/cloning/importing/etc
+ if ((pool_name != nullptr) && !std::regex_match (*pool_name, pattern)) {
+ std::cerr << "rbd: invalid pool name '" << *pool_name << "'" << std::endl;
+ return -EINVAL;
+ }
+ }
+
+ if (namespace_name != nullptr && !namespace_name->empty() &&
+ !std::regex_match (*namespace_name, pattern)) {
+ std::cerr << "rbd: invalid namespace name '" << *namespace_name << "'"
+ << std::endl;
+ return -EINVAL;
+ }
+
+ if (snap_name != nullptr) {
+ r = validate_snapshot_name(mod, *snap_name, snapshot_presence,
+ spec_validation);
+ if (r < 0) {
+ return r;
+ }
+ }
+ return 0;
+}
+
+int validate_snapshot_name(at::ArgumentModifier mod,
+ const std::string &snap_name,
+ SnapshotPresence snapshot_presence,
+ SpecValidation spec_validation) {
+ std::string prefix = at::get_description_prefix(mod);
+ switch (snapshot_presence) {
+ case SNAPSHOT_PRESENCE_PERMITTED:
+ break;
+ case SNAPSHOT_PRESENCE_NONE:
+ if (!snap_name.empty()) {
+ std::cerr << "rbd: "
+ << (mod == at::ARGUMENT_MODIFIER_DEST ? prefix : std::string())
+ << "snapshot name specified for a command that doesn't use it"
+ << std::endl;
+ return -EINVAL;
+ }
+ break;
+ case SNAPSHOT_PRESENCE_REQUIRED:
+ if (snap_name.empty()) {
+ std::cerr << "rbd: "
+ << (mod == at::ARGUMENT_MODIFIER_DEST ? prefix : std::string())
+ << "snapshot name was not specified" << std::endl;
+ return -EINVAL;
+ }
+ break;
+ }
+
+ if (spec_validation == SPEC_VALIDATION_SNAP) {
+ // disallow "/" and "@" in snap name
+ std::regex pattern("^[^@/]*?$");
+ if (!std::regex_match (snap_name, pattern)) {
+ std::cerr << "rbd: invalid snap name '" << snap_name << "'" << std::endl;
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+
+int get_image_options(const boost::program_options::variables_map &vm,
+ bool get_format, librbd::ImageOptions *opts) {
+ uint64_t order = 0, stripe_unit = 0, stripe_count = 0, object_size = 0;
+ uint64_t features = 0, features_clear = 0;
+ std::string data_pool;
+ bool order_specified = true;
+ bool features_specified = false;
+ bool features_clear_specified = false;
+ bool stripe_specified = false;
+
+ if (vm.count(at::IMAGE_ORDER)) {
+ order = vm[at::IMAGE_ORDER].as<uint64_t>();
+ } else if (vm.count(at::IMAGE_OBJECT_SIZE)) {
+ object_size = vm[at::IMAGE_OBJECT_SIZE].as<uint64_t>();
+ order = std::round(std::log2(object_size));
+ } else {
+ order_specified = false;
+ }
+
+ if (vm.count(at::IMAGE_FEATURES)) {
+ features = vm[at::IMAGE_FEATURES].as<uint64_t>();
+ features_specified = true;
+ }
+
+ if (vm.count(at::IMAGE_STRIPE_UNIT)) {
+ stripe_unit = vm[at::IMAGE_STRIPE_UNIT].as<uint64_t>();
+ stripe_specified = true;
+ }
+
+ if (vm.count(at::IMAGE_STRIPE_COUNT)) {
+ stripe_count = vm[at::IMAGE_STRIPE_COUNT].as<uint64_t>();
+ stripe_specified = true;
+ }
+
+ if (vm.count(at::IMAGE_SHARED) && vm[at::IMAGE_SHARED].as<bool>()) {
+ if (features_specified) {
+ features &= ~RBD_FEATURES_SINGLE_CLIENT;
+ } else {
+ features_clear |= RBD_FEATURES_SINGLE_CLIENT;
+ features_clear_specified = true;
+ }
+ }
+
+ if (vm.count(at::IMAGE_DATA_POOL)) {
+ data_pool = vm[at::IMAGE_DATA_POOL].as<std::string>();
+ }
+
+ if (get_format) {
+ uint64_t format = 0;
+ bool format_specified = false;
+ if (vm.count(at::IMAGE_NEW_FORMAT)) {
+ format = 2;
+ format_specified = true;
+ } else if (vm.count(at::IMAGE_FORMAT)) {
+ format = vm[at::IMAGE_FORMAT].as<uint32_t>();
+ format_specified = true;
+ }
+ if (format == 1) {
+ std::cerr << "rbd: image format 1 is deprecated" << std::endl;
+ }
+
+ if (features_specified && features != 0) {
+ if (format_specified && format == 1) {
+ std::cerr << "rbd: features not allowed with format 1; "
+ << "use --image-format 2" << std::endl;
+ return -EINVAL;
+ } else {
+ format = 2;
+ format_specified = true;
+ }
+ }
+
+ if ((stripe_unit || stripe_count) &&
+ (stripe_unit != (1ull << order) && stripe_count != 1)) {
+ if (format_specified && format == 1) {
+ std::cerr << "rbd: non-default striping not allowed with format 1; "
+ << "use --image-format 2" << std::endl;
+ return -EINVAL;
+ } else {
+ format = 2;
+ format_specified = true;
+ }
+ }
+
+ if (!data_pool.empty()) {
+ if (format_specified && format == 1) {
+ std::cerr << "rbd: data pool not allowed with format 1; "
+ << "use --image-format 2" << std::endl;
+ return -EINVAL;
+ } else {
+ format = 2;
+ format_specified = true;
+ }
+ }
+
+ if (format_specified) {
+ int r = g_conf().set_val("rbd_default_format", stringify(format));
+ ceph_assert(r == 0);
+ opts->set(RBD_IMAGE_OPTION_FORMAT, format);
+ }
+ }
+
+ if (order_specified)
+ opts->set(RBD_IMAGE_OPTION_ORDER, order);
+ if (features_specified)
+ opts->set(RBD_IMAGE_OPTION_FEATURES, features);
+ if (features_clear_specified) {
+ opts->set(RBD_IMAGE_OPTION_FEATURES_CLEAR, features_clear);
+ }
+ if (stripe_specified) {
+ opts->set(RBD_IMAGE_OPTION_STRIPE_UNIT, stripe_unit);
+ opts->set(RBD_IMAGE_OPTION_STRIPE_COUNT, stripe_count);
+ }
+ if (!data_pool.empty()) {
+ opts->set(RBD_IMAGE_OPTION_DATA_POOL, data_pool);
+ }
+ int r = get_journal_options(vm, opts);
+ if (r < 0) {
+ return r;
+ }
+
+ r = get_flatten_option(vm, opts);
+ if (r < 0) {
+ return r;
+ }
+
+ if (vm.count(at::IMAGE_MIRROR_IMAGE_MODE)) {
+ opts->set(RBD_IMAGE_OPTION_MIRROR_IMAGE_MODE,
+ vm[at::IMAGE_MIRROR_IMAGE_MODE].as<librbd::mirror_image_mode_t>());
+ }
+
+ return 0;
+}
+
+int get_journal_options(const boost::program_options::variables_map &vm,
+ librbd::ImageOptions *opts) {
+
+ if (vm.count(at::JOURNAL_OBJECT_SIZE)) {
+ uint64_t size = vm[at::JOURNAL_OBJECT_SIZE].as<uint64_t>();
+ uint64_t order = 12;
+ while ((1ULL << order) < size) {
+ order++;
+ }
+ opts->set(RBD_IMAGE_OPTION_JOURNAL_ORDER, order);
+
+ int r = g_conf().set_val("rbd_journal_order", stringify(order));
+ ceph_assert(r == 0);
+ }
+ if (vm.count(at::JOURNAL_SPLAY_WIDTH)) {
+ opts->set(RBD_IMAGE_OPTION_JOURNAL_SPLAY_WIDTH,
+ vm[at::JOURNAL_SPLAY_WIDTH].as<uint64_t>());
+
+ int r = g_conf().set_val("rbd_journal_splay_width",
+ stringify(
+ vm[at::JOURNAL_SPLAY_WIDTH].as<uint64_t>()));
+ ceph_assert(r == 0);
+ }
+ if (vm.count(at::JOURNAL_POOL)) {
+ opts->set(RBD_IMAGE_OPTION_JOURNAL_POOL,
+ vm[at::JOURNAL_POOL].as<std::string>());
+
+ int r = g_conf().set_val("rbd_journal_pool",
+ vm[at::JOURNAL_POOL].as<std::string>());
+ ceph_assert(r == 0);
+ }
+
+ return 0;
+}
+
+int get_flatten_option(const boost::program_options::variables_map &vm,
+ librbd::ImageOptions *opts) {
+ if (vm.count(at::IMAGE_FLATTEN) && vm[at::IMAGE_FLATTEN].as<bool>()) {
+ uint64_t flatten = 1;
+ opts->set(RBD_IMAGE_OPTION_FLATTEN, flatten);
+ }
+ return 0;
+}
+
+int get_image_size(const boost::program_options::variables_map &vm,
+ uint64_t *size) {
+ if (vm.count(at::IMAGE_SIZE) == 0) {
+ std::cerr << "rbd: must specify --size <M/G/T>" << std::endl;
+ return -EINVAL;
+ }
+
+ *size = vm[at::IMAGE_SIZE].as<uint64_t>();
+ return 0;
+}
+
+int get_path(const boost::program_options::variables_map &vm,
+ size_t *arg_index, std::string *path) {
+ if (vm.count(at::PATH)) {
+ *path = vm[at::PATH].as<std::string>();
+ } else {
+ *path = get_positional_argument(vm, *arg_index);
+ if (!path->empty()) {
+ ++(*arg_index);
+ }
+ }
+
+ if (path->empty()) {
+ std::cerr << "rbd: path was not specified" << std::endl;
+ return -EINVAL;
+ }
+ return 0;
+}
+
+int get_formatter(const po::variables_map &vm,
+ at::Format::Formatter *formatter) {
+ if (vm.count(at::FORMAT)) {
+ bool pretty = vm[at::PRETTY_FORMAT].as<bool>();
+ *formatter = vm[at::FORMAT].as<at::Format>().create_formatter(pretty);
+ if (*formatter == nullptr && pretty) {
+ std::cerr << "rbd: --pretty-format only works when --format "
+ << "is json or xml" << std::endl;
+ return -EINVAL;
+ } else if (*formatter != nullptr && !pretty) {
+ formatter->get()->enable_line_break();
+ }
+ } else if (vm[at::PRETTY_FORMAT].as<bool>()) {
+ std::cerr << "rbd: --pretty-format only works when --format "
+ << "is json or xml" << std::endl;
+ return -EINVAL;
+ }
+ return 0;
+}
+
+int get_snap_create_flags(const po::variables_map &vm, uint32_t *flags) {
+ if (vm[at::SKIP_QUIESCE].as<bool>() &&
+ vm[at::IGNORE_QUIESCE_ERROR].as<bool>()) {
+ std::cerr << "rbd: " << at::IGNORE_QUIESCE_ERROR
+ << " cannot be used together with " << at::SKIP_QUIESCE
+ << std::endl;
+ return -EINVAL;
+ }
+
+ *flags = 0;
+ if (vm[at::SKIP_QUIESCE].as<bool>()) {
+ *flags |= RBD_SNAP_CREATE_SKIP_QUIESCE;
+ } else if (vm[at::IGNORE_QUIESCE_ERROR].as<bool>()) {
+ *flags |= RBD_SNAP_CREATE_IGNORE_QUIESCE_ERROR;
+ }
+ return 0;
+}
+
+int get_encryption_options(const boost::program_options::variables_map &vm,
+ EncryptionOptions* result) {
+ std::vector<std::string> passphrase_files;
+ if (vm.count(at::ENCRYPTION_PASSPHRASE_FILE)) {
+ passphrase_files =
+ vm[at::ENCRYPTION_PASSPHRASE_FILE].as<std::vector<std::string>>();
+ }
+
+ std::vector<at::EncryptionFormat> formats;
+ if (vm.count(at::ENCRYPTION_FORMAT)) {
+ formats = vm[at::ENCRYPTION_FORMAT].as<decltype(formats)>();
+ } else if (vm.count(at::ENCRYPTION_PASSPHRASE_FILE)) {
+ formats.resize(passphrase_files.size(),
+ at::EncryptionFormat{RBD_ENCRYPTION_FORMAT_LUKS});
+ }
+
+ if (formats.size() != passphrase_files.size()) {
+ std::cerr << "rbd: encryption formats count does not match "
+ << "passphrase files count" << std::endl;
+ return -EINVAL;
+ }
+
+ result->specs.clear();
+ result->specs.reserve(formats.size());
+ for (size_t i = 0; i < formats.size(); ++i) {
+ std::ifstream file(passphrase_files[i], std::ios::in | std::ios::binary);
+ if (file.fail()) {
+ std::cerr << "rbd: unable to open passphrase file '"
+ << passphrase_files[i] << "': " << cpp_strerror(errno)
+ << std::endl;
+ return -errno;
+ }
+ std::string passphrase((std::istreambuf_iterator<char>(file)),
+ std::istreambuf_iterator<char>());
+ file.close();
+
+ switch (formats[i].format) {
+ case RBD_ENCRYPTION_FORMAT_LUKS: {
+ auto opts = new librbd::encryption_luks_format_options_t{
+ std::move(passphrase)};
+ result->specs.push_back(
+ {RBD_ENCRYPTION_FORMAT_LUKS, opts, sizeof(*opts)});
+ break;
+ }
+ case RBD_ENCRYPTION_FORMAT_LUKS1: {
+ auto opts = new librbd::encryption_luks1_format_options_t{
+ .passphrase = std::move(passphrase)};
+ result->specs.push_back(
+ {RBD_ENCRYPTION_FORMAT_LUKS1, opts, sizeof(*opts)});
+ break;
+ }
+ case RBD_ENCRYPTION_FORMAT_LUKS2: {
+ auto opts = new librbd::encryption_luks2_format_options_t{
+ .passphrase = std::move(passphrase)};
+ result->specs.push_back(
+ {RBD_ENCRYPTION_FORMAT_LUKS2, opts, sizeof(*opts)});
+ break;
+ }
+ default:
+ ceph_abort();
+ }
+ }
+
+ return 0;
+}
+
+void init_context() {
+ g_conf().set_val_or_die("rbd_cache_writethrough_until_flush", "false");
+ g_conf().apply_changes(nullptr);
+}
+
+int init_rados(librados::Rados *rados) {
+ init_context();
+
+ int r = rados->init_with_context(g_ceph_context);
+ if (r < 0) {
+ std::cerr << "rbd: couldn't initialize rados!" << std::endl;
+ return r;
+ }
+
+ r = rados->connect();
+ if (r < 0) {
+ std::cerr << "rbd: couldn't connect to the cluster!" << std::endl;
+ return r;
+ }
+
+ return 0;
+}
+
+int init(const std::string &pool_name, const std::string& namespace_name,
+ librados::Rados *rados, librados::IoCtx *io_ctx) {
+ init_context();
+
+ int r = init_rados(rados);
+ if (r < 0) {
+ return r;
+ }
+
+ r = init_io_ctx(*rados, pool_name, namespace_name, io_ctx);
+ if (r < 0) {
+ return r;
+ }
+ return 0;
+}
+
+int init_io_ctx(librados::Rados &rados, std::string pool_name,
+ const std::string& namespace_name, librados::IoCtx *io_ctx) {
+ normalize_pool_name(&pool_name);
+
+ int r = rados.ioctx_create(pool_name.c_str(), *io_ctx);
+ if (r < 0) {
+ if (r == -ENOENT && pool_name == get_default_pool_name()) {
+ std::cerr << "rbd: error opening default pool "
+ << "'" << pool_name << "'" << std::endl
+ << "Ensure that the default pool has been created or specify "
+ << "an alternate pool name." << std::endl;
+ } else {
+ std::cerr << "rbd: error opening pool '" << pool_name << "': "
+ << cpp_strerror(r) << std::endl;
+ }
+ return r;
+ }
+
+ return set_namespace(namespace_name, io_ctx);
+}
+
+int set_namespace(const std::string& namespace_name, librados::IoCtx *io_ctx) {
+ if (!namespace_name.empty()) {
+ librbd::RBD rbd;
+ bool exists = false;
+ int r = rbd.namespace_exists(*io_ctx, namespace_name.c_str(), &exists);
+ if (r < 0) {
+ std::cerr << "rbd: error asserting namespace: "
+ << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ if (!exists) {
+ std::cerr << "rbd: namespace '" << namespace_name << "' does not exist."
+ << std::endl;
+ return -ENOENT;
+ }
+ }
+ io_ctx->set_namespace(namespace_name);
+ return 0;
+}
+
+void disable_cache() {
+ g_conf().set_val_or_die("rbd_cache", "false");
+}
+
+int open_image(librados::IoCtx &io_ctx, const std::string &image_name,
+ bool read_only, librbd::Image *image) {
+ int r;
+ librbd::RBD rbd;
+ if (read_only) {
+ r = rbd.open_read_only(io_ctx, *image, image_name.c_str(), NULL);
+ } else {
+ r = rbd.open(io_ctx, *image, image_name.c_str());
+ }
+
+ if (r < 0) {
+ std::cerr << "rbd: error opening image " << image_name << ": "
+ << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ return 0;
+}
+
+int open_image_by_id(librados::IoCtx &io_ctx, const std::string &image_id,
+ bool read_only, librbd::Image *image) {
+ int r;
+ librbd::RBD rbd;
+ if (read_only) {
+ r = rbd.open_by_id_read_only(io_ctx, *image, image_id.c_str(), NULL);
+ } else {
+ r = rbd.open_by_id(io_ctx, *image, image_id.c_str());
+ }
+
+ if (r < 0) {
+ std::cerr << "rbd: error opening image with id " << image_id << ": "
+ << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ return 0;
+}
+
+int init_and_open_image(const std::string &pool_name,
+ const std::string &namespace_name,
+ const std::string &image_name,
+ const std::string &image_id,
+ const std::string &snap_name, bool read_only,
+ librados::Rados *rados, librados::IoCtx *io_ctx,
+ librbd::Image *image) {
+ int r = init(pool_name, namespace_name, rados, io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ if (image_id.empty()) {
+ r = open_image(*io_ctx, image_name, read_only, image);
+ } else {
+ r = open_image_by_id(*io_ctx, image_id, read_only, image);
+ }
+ if (r < 0) {
+ return r;
+ }
+
+ if (!snap_name.empty()) {
+ r = snap_set(*image, snap_name);
+ if (r < 0) {
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+int snap_set(librbd::Image &image, const std::string &snap_name) {
+ int r = image.snap_set(snap_name.c_str());
+ if (r < 0) {
+ std::cerr << "error setting snapshot context: " << cpp_strerror(r)
+ << std::endl;
+ return r;
+ }
+ return 0;
+}
+
+void calc_sparse_extent(const bufferptr &bp,
+ size_t sparse_size,
+ size_t buffer_offset,
+ uint64_t buffer_length,
+ size_t *write_length,
+ bool *zeroed) {
+ if (sparse_size == 0) {
+ // sparse writes are disabled -- write the full extent
+ ceph_assert(buffer_offset == 0);
+ *write_length = buffer_length;
+ *zeroed = false;
+ return;
+ }
+
+ *write_length = 0;
+ size_t original_offset = buffer_offset;
+ while (buffer_offset < buffer_length) {
+ size_t extent_size = std::min<size_t>(
+ sparse_size, buffer_length - buffer_offset);
+
+ bufferptr extent(bp, buffer_offset, extent_size);
+
+ bool extent_is_zero = extent.is_zero();
+ if (original_offset == buffer_offset) {
+ *zeroed = extent_is_zero;
+ } else if (*zeroed != extent_is_zero) {
+ ceph_assert(*write_length > 0);
+ return;
+ }
+
+ buffer_offset += extent_size;
+ *write_length += extent_size;
+ }
+}
+
+std::string image_id(librbd::Image& image) {
+ std::string id;
+ int r = image.get_id(&id);
+ if (r < 0) {
+ return std::string();
+ }
+ return id;
+}
+
+std::string mirror_image_mode(librbd::mirror_image_mode_t mode) {
+ switch (mode) {
+ case RBD_MIRROR_IMAGE_MODE_JOURNAL:
+ return "journal";
+ case RBD_MIRROR_IMAGE_MODE_SNAPSHOT:
+ return "snapshot";
+ default:
+ return "unknown";
+ }
+}
+
+std::string mirror_image_state(librbd::mirror_image_state_t state) {
+ switch (state) {
+ case RBD_MIRROR_IMAGE_DISABLING:
+ return "disabling";
+ case RBD_MIRROR_IMAGE_ENABLED:
+ return "enabled";
+ case RBD_MIRROR_IMAGE_DISABLED:
+ return "disabled";
+ default:
+ return "unknown";
+ }
+}
+
+std::string mirror_image_status_state(
+ librbd::mirror_image_status_state_t state) {
+ switch (state) {
+ case MIRROR_IMAGE_STATUS_STATE_UNKNOWN:
+ return "unknown";
+ case MIRROR_IMAGE_STATUS_STATE_ERROR:
+ return "error";
+ case MIRROR_IMAGE_STATUS_STATE_SYNCING:
+ return "syncing";
+ case MIRROR_IMAGE_STATUS_STATE_STARTING_REPLAY:
+ return "starting_replay";
+ case MIRROR_IMAGE_STATUS_STATE_REPLAYING:
+ return "replaying";
+ case MIRROR_IMAGE_STATUS_STATE_STOPPING_REPLAY:
+ return "stopping_replay";
+ case MIRROR_IMAGE_STATUS_STATE_STOPPED:
+ return "stopped";
+ default:
+ return "unknown (" + stringify(static_cast<uint32_t>(state)) + ")";
+ }
+}
+
+std::string mirror_image_site_status_state(
+ const librbd::mirror_image_site_status_t& status) {
+ return (status.up ? "up+" : "down+") +
+ mirror_image_status_state(status.state);
+}
+
+std::string mirror_image_global_status_state(
+ const librbd::mirror_image_global_status_t& status) {
+ librbd::mirror_image_site_status_t local_status;
+ int r = get_local_mirror_image_status(status, &local_status);
+ if (r < 0) {
+ return "down+unknown";
+ }
+
+ return mirror_image_site_status_state(local_status);
+}
+
+int get_local_mirror_image_status(
+ const librbd::mirror_image_global_status_t& status,
+ librbd::mirror_image_site_status_t* local_status) {
+ auto it = std::find_if(status.site_statuses.begin(),
+ status.site_statuses.end(),
+ [](auto& site_status) {
+ return (site_status.mirror_uuid ==
+ RBD_MIRROR_IMAGE_STATUS_LOCAL_MIRROR_UUID);
+ });
+ if (it == status.site_statuses.end()) {
+ return -ENOENT;
+ }
+
+ *local_status = *it;
+ return 0;
+}
+
+std::string timestr(time_t t) {
+ if (t == 0) {
+ return "";
+ }
+
+ struct tm tm;
+
+ localtime_r(&t, &tm);
+
+ char buf[32];
+ strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M:%S", &tm);
+
+ return buf;
+}
+
+uint64_t get_rbd_default_features(CephContext* cct) {
+ auto features = cct->_conf.get_val<std::string>("rbd_default_features");
+ return boost::lexical_cast<uint64_t>(features);
+}
+
+bool is_not_user_snap_namespace(librbd::Image* image,
+ const librbd::snap_info_t &snap_info)
+{
+ librbd::snap_namespace_type_t namespace_type;
+ int r = image->snap_get_namespace_type(snap_info.id, &namespace_type);
+ if (r < 0) {
+ return false;
+ }
+ return namespace_type != RBD_SNAP_NAMESPACE_TYPE_USER;
+}
+
+void get_mirror_peer_sites(
+ librados::IoCtx& io_ctx,
+ std::vector<librbd::mirror_peer_site_t>* mirror_peers) {
+ librados::IoCtx default_io_ctx;
+ default_io_ctx.dup(io_ctx);
+ default_io_ctx.set_namespace("");
+
+ mirror_peers->clear();
+
+ librbd::RBD rbd;
+ int r = rbd.mirror_peer_site_list(default_io_ctx, mirror_peers);
+ if (r < 0 && r != -ENOENT) {
+ std::cerr << "rbd: failed to list mirror peers" << std::endl;
+ }
+}
+
+void get_mirror_peer_mirror_uuids_to_names(
+ const std::vector<librbd::mirror_peer_site_t>& mirror_peers,
+ std::map<std::string, std::string>* mirror_uuids_to_name) {
+ mirror_uuids_to_name->clear();
+ for (auto& peer : mirror_peers) {
+ if (!peer.mirror_uuid.empty() && !peer.site_name.empty()) {
+ (*mirror_uuids_to_name)[peer.mirror_uuid] = peer.site_name;
+ }
+ }
+}
+
+void populate_unknown_mirror_image_site_statuses(
+ const std::vector<librbd::mirror_peer_site_t>& mirror_peers,
+ librbd::mirror_image_global_status_t* global_status) {
+ std::set<std::string> missing_mirror_uuids;
+ librbd::mirror_peer_direction_t mirror_peer_direction =
+ RBD_MIRROR_PEER_DIRECTION_RX_TX;
+ for (auto& peer : mirror_peers) {
+ if (peer.uuid == mirror_peers.begin()->uuid) {
+ mirror_peer_direction = peer.direction;
+ } else if (mirror_peer_direction != RBD_MIRROR_PEER_DIRECTION_RX_TX &&
+ mirror_peer_direction != peer.direction) {
+ mirror_peer_direction = RBD_MIRROR_PEER_DIRECTION_RX_TX;
+ }
+
+ if (!peer.mirror_uuid.empty() &&
+ peer.direction != RBD_MIRROR_PEER_DIRECTION_TX) {
+ missing_mirror_uuids.insert(peer.mirror_uuid);
+ }
+ }
+
+ if (mirror_peer_direction != RBD_MIRROR_PEER_DIRECTION_TX) {
+ missing_mirror_uuids.insert(RBD_MIRROR_IMAGE_STATUS_LOCAL_MIRROR_UUID);
+ }
+
+ std::vector<librbd::mirror_image_site_status_t> site_statuses;
+ site_statuses.reserve(missing_mirror_uuids.size());
+
+ for (auto& site_status : global_status->site_statuses) {
+ if (missing_mirror_uuids.count(site_status.mirror_uuid) > 0) {
+ missing_mirror_uuids.erase(site_status.mirror_uuid);
+ site_statuses.push_back(site_status);
+ }
+ }
+
+ for (auto& mirror_uuid : missing_mirror_uuids) {
+ site_statuses.push_back({mirror_uuid, MIRROR_IMAGE_STATUS_STATE_UNKNOWN,
+ "status not found", 0, false});
+ }
+
+ std::swap(global_status->site_statuses, site_statuses);
+}
+
+int mgr_command(librados::Rados& rados, const std::string& cmd,
+ const std::map<std::string, std::string> &args,
+ std::ostream *out_os, std::ostream *err_os) {
+ std::string command = R"(
+ {
+ "prefix": ")" + cmd + R"(", )" + mgr_command_args_to_str(args) + R"(
+ })";
+
+ bufferlist in_bl;
+ bufferlist out_bl;
+ std::string outs;
+ int r = rados.mgr_command(command, in_bl, &out_bl, &outs);
+ if (r < 0) {
+ (*err_os) << "rbd: " << cmd << " failed: " << cpp_strerror(r);
+ if (!outs.empty()) {
+ (*err_os) << ": " << outs;
+ }
+ (*err_os) << std::endl;
+ return r;
+ }
+
+ if (out_bl.length() != 0) {
+ (*out_os) << out_bl.c_str();
+ }
+
+ return 0;
+}
+
+} // namespace utils
+} // namespace rbd
diff --git a/src/tools/rbd/Utils.h b/src/tools/rbd/Utils.h
new file mode 100644
index 000000000..5076fd7fe
--- /dev/null
+++ b/src/tools/rbd/Utils.h
@@ -0,0 +1,283 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_UTILS_H
+#define CEPH_RBD_UTILS_H
+
+#include "include/compat.h"
+#include "include/int_types.h"
+#include "include/rados/librados.hpp"
+#include "include/rbd/librbd.hpp"
+#include "tools/rbd/ArgumentTypes.h"
+#include <map>
+#include <string>
+#include <boost/program_options.hpp>
+
+namespace rbd {
+namespace utils {
+
+namespace detail {
+
+template <typename T, void(T::*MF)(int)>
+void aio_completion_callback(librbd::completion_t completion,
+ void *arg) {
+ librbd::RBD::AioCompletion *aio_completion =
+ reinterpret_cast<librbd::RBD::AioCompletion*>(completion);
+
+ // complete the AIO callback in separate thread context
+ T *t = reinterpret_cast<T *>(arg);
+ int r = aio_completion->get_return_value();
+ aio_completion->release();
+
+ (t->*MF)(r);
+}
+
+} // namespace detail
+
+static const std::string RBD_DIFF_BANNER ("rbd diff v1\n");
+static const size_t RBD_DEFAULT_SPARSE_SIZE = 4096;
+
+static const std::string RBD_IMAGE_BANNER_V2 ("rbd image v2\n");
+static const std::string RBD_IMAGE_DIFFS_BANNER_V2 ("rbd image diffs v2\n");
+static const std::string RBD_DIFF_BANNER_V2 ("rbd diff v2\n");
+
+#define RBD_DIFF_FROM_SNAP 'f'
+#define RBD_DIFF_TO_SNAP 't'
+#define RBD_DIFF_IMAGE_SIZE 's'
+#define RBD_DIFF_WRITE 'w'
+#define RBD_DIFF_ZERO 'z'
+#define RBD_DIFF_END 'e'
+
+#define RBD_SNAP_PROTECTION_STATUS 'p'
+
+#define RBD_EXPORT_IMAGE_ORDER 'O'
+#define RBD_EXPORT_IMAGE_FEATURES 'T'
+#define RBD_EXPORT_IMAGE_STRIPE_UNIT 'U'
+#define RBD_EXPORT_IMAGE_STRIPE_COUNT 'C'
+#define RBD_EXPORT_IMAGE_META 'M'
+#define RBD_EXPORT_IMAGE_END 'E'
+
+enum SnapshotPresence {
+ SNAPSHOT_PRESENCE_NONE,
+ SNAPSHOT_PRESENCE_PERMITTED,
+ SNAPSHOT_PRESENCE_REQUIRED
+};
+
+enum SpecValidation {
+ SPEC_VALIDATION_FULL,
+ SPEC_VALIDATION_SNAP,
+ SPEC_VALIDATION_NONE
+};
+
+struct ProgressContext : public librbd::ProgressContext {
+ const char *operation;
+ bool progress;
+ int last_pc;
+
+ ProgressContext(const char *o, bool no_progress)
+ : operation(o), progress(!no_progress), last_pc(0) {
+ }
+
+ int update_progress(uint64_t offset, uint64_t total) override;
+ void finish();
+ void fail();
+};
+
+int get_percentage(uint64_t part, uint64_t whole);
+
+struct EncryptionOptions {
+ std::vector<librbd::encryption_spec_t> specs;
+
+ ~EncryptionOptions() {
+ for (auto& spec : specs) {
+ switch (spec.format) {
+ case RBD_ENCRYPTION_FORMAT_LUKS: {
+ auto opts =
+ static_cast<librbd::encryption_luks_format_options_t*>(spec.opts);
+ ceph_memzero_s(opts->passphrase.data(), opts->passphrase.size(),
+ opts->passphrase.size());
+ delete opts;
+ break;
+ }
+ case RBD_ENCRYPTION_FORMAT_LUKS1: {
+ auto opts =
+ static_cast<librbd::encryption_luks1_format_options_t*>(spec.opts);
+ ceph_memzero_s(opts->passphrase.data(), opts->passphrase.size(),
+ opts->passphrase.size());
+ delete opts;
+ break;
+ }
+ case RBD_ENCRYPTION_FORMAT_LUKS2: {
+ auto opts =
+ static_cast<librbd::encryption_luks2_format_options_t*>(spec.opts);
+ ceph_memzero_s(opts->passphrase.data(), opts->passphrase.size(),
+ opts->passphrase.size());
+ delete opts;
+ break;
+ }
+ default:
+ ceph_abort();
+ }
+ }
+ }
+};
+
+template <typename T, void(T::*MF)(int)>
+librbd::RBD::AioCompletion *create_aio_completion(T *t) {
+ return new librbd::RBD::AioCompletion(
+ t, &detail::aio_completion_callback<T, MF>);
+}
+
+void aio_context_callback(librbd::completion_t completion, void *arg);
+
+int read_string(int fd, unsigned max, std::string *out);
+
+int extract_spec(const std::string &spec, std::string *pool_name,
+ std::string *namespace_name, std::string *name,
+ std::string *snap_name, SpecValidation spec_validation);
+
+std::string get_positional_argument(
+ const boost::program_options::variables_map &vm, size_t index);
+
+void normalize_pool_name(std::string* pool_name);
+std::string get_default_pool_name();
+
+int get_image_or_snap_spec(const boost::program_options::variables_map &vm,
+ std::string *spec);
+
+void append_options_as_args(const std::vector<std::string> &options,
+ std::vector<std::string> *args);
+
+int get_pool_and_namespace_names(
+ const boost::program_options::variables_map &vm, bool validate_pool_name,
+ std::string* pool_name, std::string* namespace_name, size_t *arg_index);
+
+int get_pool_image_snapshot_names(
+ const boost::program_options::variables_map &vm,
+ argument_types::ArgumentModifier mod, size_t *spec_arg_index,
+ std::string *pool_name, std::string *namespace_name,
+ std::string *image_name, std::string *snap_name, bool image_name_required,
+ SnapshotPresence snapshot_presence, SpecValidation spec_validation);
+
+int get_pool_generic_snapshot_names(
+ const boost::program_options::variables_map &vm,
+ argument_types::ArgumentModifier mod, size_t *spec_arg_index,
+ const std::string& pool_key, std::string *pool_name,
+ std::string *namespace_name, const std::string& generic_key,
+ const std::string& generic_key_desc, std::string *generic_name,
+ std::string *snap_name, bool generic_name_required,
+ SnapshotPresence snapshot_presence, SpecValidation spec_validation);
+
+int get_pool_image_id(const boost::program_options::variables_map &vm,
+ size_t *spec_arg_index,
+ std::string *pool_name,
+ std::string *namespace_name,
+ std::string *image_id);
+
+int validate_snapshot_name(argument_types::ArgumentModifier mod,
+ const std::string &snap_name,
+ SnapshotPresence snapshot_presence,
+ SpecValidation spec_validation);
+
+int get_image_options(const boost::program_options::variables_map &vm,
+ bool get_format, librbd::ImageOptions* opts);
+
+int get_journal_options(const boost::program_options::variables_map &vm,
+ librbd::ImageOptions *opts);
+
+int get_flatten_option(const boost::program_options::variables_map &vm,
+ librbd::ImageOptions *opts);
+
+int get_image_size(const boost::program_options::variables_map &vm,
+ uint64_t *size);
+
+int get_path(const boost::program_options::variables_map &vm,
+ size_t *arg_index, std::string *path);
+
+int get_formatter(const boost::program_options::variables_map &vm,
+ argument_types::Format::Formatter *formatter);
+
+int get_snap_create_flags(const boost::program_options::variables_map &vm,
+ uint32_t *flags);
+
+int get_encryption_options(const boost::program_options::variables_map &vm,
+ EncryptionOptions* result);
+
+void init_context();
+
+int init_rados(librados::Rados *rados);
+
+int init(const std::string& pool_name, const std::string& namespace_name,
+ librados::Rados *rados, librados::IoCtx *io_ctx);
+int init_io_ctx(librados::Rados &rados, std::string pool_name,
+ const std::string& namespace_name, librados::IoCtx *io_ctx);
+int set_namespace(const std::string& namespace_name, librados::IoCtx *io_ctx);
+
+void disable_cache();
+
+int open_image(librados::IoCtx &io_ctx, const std::string &image_name,
+ bool read_only, librbd::Image *image);
+
+int open_image_by_id(librados::IoCtx &io_ctx, const std::string &image_id,
+ bool read_only, librbd::Image *image);
+
+int init_and_open_image(const std::string &pool_name,
+ const std::string &namespace_name,
+ const std::string &image_name,
+ const std::string &image_id,
+ const std::string &snap_name, bool read_only,
+ librados::Rados *rados, librados::IoCtx *io_ctx,
+ librbd::Image *image);
+
+int snap_set(librbd::Image &image, const std::string &snap_name);
+
+void calc_sparse_extent(const bufferptr &bp,
+ size_t sparse_size,
+ size_t buffer_offset,
+ uint64_t length,
+ size_t *write_length,
+ bool *zeroed);
+
+bool is_not_user_snap_namespace(librbd::Image* image,
+ const librbd::snap_info_t &snap_info);
+
+std::string image_id(librbd::Image& image);
+
+std::string mirror_image_mode(
+ librbd::mirror_image_mode_t mirror_image_mode);
+std::string mirror_image_state(
+ librbd::mirror_image_state_t mirror_image_state);
+std::string mirror_image_status_state(
+ librbd::mirror_image_status_state_t state);
+std::string mirror_image_site_status_state(
+ const librbd::mirror_image_site_status_t& status);
+std::string mirror_image_global_status_state(
+ const librbd::mirror_image_global_status_t& status);
+
+int get_local_mirror_image_status(
+ const librbd::mirror_image_global_status_t& status,
+ librbd::mirror_image_site_status_t* local_status);
+
+std::string timestr(time_t t);
+
+// duplicate here to not include librbd_internal lib
+uint64_t get_rbd_default_features(CephContext* cct);
+
+void get_mirror_peer_sites(
+ librados::IoCtx& io_ctx,
+ std::vector<librbd::mirror_peer_site_t>* mirror_peers);
+void get_mirror_peer_mirror_uuids_to_names(
+ const std::vector<librbd::mirror_peer_site_t>& mirror_peers,
+ std::map<std::string, std::string>* fsid_to_name);
+void populate_unknown_mirror_image_site_statuses(
+ const std::vector<librbd::mirror_peer_site_t>& mirror_peers,
+ librbd::mirror_image_global_status_t* global_status);
+
+int mgr_command(librados::Rados& rados, const std::string& cmd,
+ const std::map<std::string, std::string> &args,
+ std::ostream *out_os, std::ostream *err_os);
+
+} // namespace utils
+} // namespace rbd
+
+#endif // CEPH_RBD_UTILS_H
diff --git a/src/tools/rbd/action/Bench.cc b/src/tools/rbd/action/Bench.cc
new file mode 100644
index 000000000..061a76d33
--- /dev/null
+++ b/src/tools/rbd/action/Bench.cc
@@ -0,0 +1,589 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd/ArgumentTypes.h"
+#include "tools/rbd/Shell.h"
+#include "tools/rbd/Utils.h"
+#include "common/errno.h"
+#include "common/strtol.h"
+#include "common/ceph_mutex.h"
+#include "include/types.h"
+#include "global/signal_handler.h"
+#include <atomic>
+#include <chrono>
+#include <iostream>
+#include <boost/accumulators/accumulators.hpp>
+#include <boost/accumulators/statistics/stats.hpp>
+#include <boost/accumulators/statistics/rolling_sum.hpp>
+#include <boost/program_options.hpp>
+
+using namespace std::chrono;
+
+static std::atomic<bool> terminating;
+static void handle_signal(int signum)
+{
+ ceph_assert(signum == SIGINT || signum == SIGTERM);
+ terminating = true;
+}
+
+namespace rbd {
+namespace action {
+namespace bench {
+
+namespace at = argument_types;
+namespace po = boost::program_options;
+
+namespace {
+
+enum io_type_t {
+ IO_TYPE_READ = 0,
+ IO_TYPE_WRITE,
+ IO_TYPE_RW,
+
+ IO_TYPE_NUM,
+};
+
+enum io_pattern_t {
+ IO_PATTERN_RAND,
+ IO_PATTERN_SEQ,
+ IO_PATTERN_FULL_SEQ
+};
+
+struct IOType {};
+struct Size {};
+struct IOPattern {};
+
+void validate(boost::any& v, const std::vector<std::string>& values,
+ Size *target_type, int) {
+ po::validators::check_first_occurrence(v);
+ const std::string &s = po::validators::get_single_string(values);
+
+ std::string parse_error;
+ uint64_t size = strict_iecstrtoll(s, &parse_error);
+ if (!parse_error.empty()) {
+ throw po::validation_error(po::validation_error::invalid_option_value);
+ }
+ v = boost::any(size);
+}
+
+void validate(boost::any& v, const std::vector<std::string>& values,
+ IOPattern *target_type, int) {
+ po::validators::check_first_occurrence(v);
+ const std::string &s = po::validators::get_single_string(values);
+ if (s == "rand") {
+ v = IO_PATTERN_RAND;
+ } else if (s == "seq") {
+ v = IO_PATTERN_SEQ;
+ } else if (s == "full-seq") {
+ v = IO_PATTERN_FULL_SEQ;
+ } else {
+ throw po::validation_error(po::validation_error::invalid_option_value);
+ }
+}
+
+io_type_t get_io_type(std::string io_type_string) {
+ if (io_type_string == "read")
+ return IO_TYPE_READ;
+ else if (io_type_string == "write")
+ return IO_TYPE_WRITE;
+ else if (io_type_string == "readwrite" || io_type_string == "rw")
+ return IO_TYPE_RW;
+ else
+ return IO_TYPE_NUM;
+}
+
+void validate(boost::any& v, const std::vector<std::string>& values,
+ IOType *target_type, int) {
+ po::validators::check_first_occurrence(v);
+ const std::string &s = po::validators::get_single_string(values);
+ io_type_t io_type = get_io_type(s);
+ if (io_type >= IO_TYPE_NUM)
+ throw po::validation_error(po::validation_error::invalid_option_value);
+ else
+ v = boost::any(io_type);
+}
+
+} // anonymous namespace
+
+static void rbd_bencher_completion(void *c, void *pc);
+struct rbd_bencher;
+
+struct bencher_completer {
+ rbd_bencher *bencher;
+ bufferlist *bl;
+
+public:
+ bencher_completer(rbd_bencher *bencher, bufferlist *bl)
+ : bencher(bencher), bl(bl)
+ { }
+
+ ~bencher_completer()
+ {
+ if (bl)
+ delete bl;
+ }
+};
+
+struct rbd_bencher {
+ librbd::Image *image;
+ ceph::mutex lock = ceph::make_mutex("rbd_bencher::lock");
+ ceph::condition_variable cond;
+ int in_flight;
+ io_type_t io_type;
+ uint64_t io_size;
+ bufferlist write_bl;
+
+ explicit rbd_bencher(librbd::Image *i, io_type_t io_type, uint64_t io_size)
+ : image(i),
+ in_flight(0),
+ io_type(io_type),
+ io_size(io_size)
+ {
+ if (io_type == IO_TYPE_WRITE || io_type == IO_TYPE_RW) {
+ bufferptr bp(io_size);
+ memset(bp.c_str(), rand() & 0xff, io_size);
+ write_bl.push_back(bp);
+ }
+ }
+
+ void start_io(int max, uint64_t off, uint64_t len, int op_flags, bool read_flag)
+ {
+ {
+ std::lock_guard l{lock};
+ in_flight++;
+ }
+
+ librbd::RBD::AioCompletion *c;
+ if (read_flag) {
+ bufferlist *read_bl = new bufferlist();
+ c = new librbd::RBD::AioCompletion((void *)(new bencher_completer(this, read_bl)),
+ rbd_bencher_completion);
+ image->aio_read2(off, len, *read_bl, c, op_flags);
+ } else {
+ c = new librbd::RBD::AioCompletion((void *)(new bencher_completer(this, NULL)),
+ rbd_bencher_completion);
+ image->aio_write2(off, len, write_bl, c, op_flags);
+ }
+ }
+
+ int wait_for(int max, bool interrupt_on_terminating) {
+ std::unique_lock l{lock};
+ while (in_flight > max && !(terminating && interrupt_on_terminating)) {
+ cond.wait_for(l, 200ms);
+ }
+
+ return terminating ? -EINTR : 0;
+ }
+
+};
+
+void rbd_bencher_completion(void *vc, void *pc)
+{
+ librbd::RBD::AioCompletion *c = (librbd::RBD::AioCompletion *)vc;
+ bencher_completer *bc = static_cast<bencher_completer *>(pc);
+ rbd_bencher *b = bc->bencher;
+ //cout << "complete " << c << std::endl;
+ int ret = c->get_return_value();
+ if (b->io_type == IO_TYPE_WRITE && ret != 0) {
+ std::cout << "write error: " << cpp_strerror(ret) << std::endl;
+ exit(ret < 0 ? -ret : ret);
+ } else if (b->io_type == IO_TYPE_READ && (unsigned int)ret != b->io_size) {
+ std::cout << "read error: " << cpp_strerror(ret) << std::endl;
+ exit(ret < 0 ? -ret : ret);
+ }
+ b->lock.lock();
+ b->in_flight--;
+ b->cond.notify_all();
+ b->lock.unlock();
+ c->release();
+ delete bc;
+}
+
+bool should_read(uint64_t read_proportion)
+{
+ uint64_t rand_num = rand() % 100;
+
+ if (rand_num < read_proportion)
+ return true;
+ else
+ return false;
+}
+
+int do_bench(librbd::Image& image, io_type_t io_type,
+ uint64_t io_size, uint64_t io_threads,
+ uint64_t io_bytes, io_pattern_t io_pattern,
+ uint64_t read_proportion)
+{
+ uint64_t size = 0;
+ image.size(&size);
+ if (io_size > size) {
+ std::cerr << "rbd: io-size " << byte_u_t(io_size) << " "
+ << "larger than image size " << byte_u_t(size) << std::endl;
+ return -EINVAL;
+ }
+
+ if (io_size > std::numeric_limits<uint32_t>::max()) {
+ std::cerr << "rbd: io-size should be less than 4G" << std::endl;
+ return -EINVAL;
+ }
+
+ int r = image.flush();
+ if (r < 0 && (r != -EROFS || io_type != IO_TYPE_READ)) {
+ std::cerr << "rbd: failed to flush: " << cpp_strerror(r) << std::endl;
+ return r;
+ }
+
+ rbd_bencher b(&image, io_type, io_size);
+
+ std::cout << "bench "
+ << " type " << (io_type == IO_TYPE_READ ? "read" :
+ io_type == IO_TYPE_WRITE ? "write" : "readwrite")
+ << (io_type == IO_TYPE_RW ? " read:write=" +
+ std::to_string(read_proportion) + ":" +
+ std::to_string(100 - read_proportion) : "")
+ << " io_size " << io_size
+ << " io_threads " << io_threads
+ << " bytes " << io_bytes
+ << " pattern ";
+ switch (io_pattern) {
+ case IO_PATTERN_RAND:
+ std::cout << "random";
+ break;
+ case IO_PATTERN_SEQ:
+ std::cout << "sequential";
+ break;
+ case IO_PATTERN_FULL_SEQ:
+ std::cout << "full sequential";
+ break;
+ default:
+ ceph_assert(false);
+ break;
+ }
+ std::cout << std::endl;
+
+ srand(time(NULL) % (unsigned long) -1);
+
+ coarse_mono_time start = coarse_mono_clock::now();
+ std::chrono::duration<double> last = std::chrono::duration<double>::zero();
+ uint64_t ios = 0;
+
+ std::vector<uint64_t> thread_offset;
+ uint64_t i;
+ uint64_t seq_chunk_length = (size / io_size / io_threads) * io_size;;
+
+ // disturb all thread's offset
+ for (i = 0; i < io_threads; i++) {
+ uint64_t start_pos = 0;
+ switch (io_pattern) {
+ case IO_PATTERN_RAND:
+ start_pos = (rand() % (size / io_size)) * io_size;
+ break;
+ case IO_PATTERN_SEQ:
+ start_pos = seq_chunk_length * i;
+ break;
+ case IO_PATTERN_FULL_SEQ:
+ start_pos = i * io_size;
+ break;
+ default:
+ break;
+ }
+ thread_offset.push_back(start_pos);
+ }
+
+ const int WINDOW_SIZE = 5;
+ typedef boost::accumulators::accumulator_set<
+ double, boost::accumulators::stats<
+ boost::accumulators::tag::rolling_sum> > RollingSum;
+
+ RollingSum time_acc(
+ boost::accumulators::tag::rolling_window::window_size = WINDOW_SIZE);
+ RollingSum ios_acc(
+ boost::accumulators::tag::rolling_window::window_size = WINDOW_SIZE);
+ RollingSum off_acc(
+ boost::accumulators::tag::rolling_window::window_size = WINDOW_SIZE);
+ uint64_t cur_ios = 0;
+ uint64_t cur_off = 0;
+
+ int op_flags;
+ if (io_pattern == IO_PATTERN_RAND) {
+ op_flags = LIBRADOS_OP_FLAG_FADVISE_RANDOM;
+ } else {
+ op_flags = LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL;
+ }
+
+ printf(" SEC OPS OPS/SEC BYTES/SEC\n");
+ uint64_t off;
+ int read_ops = 0;
+ int write_ops = 0;
+
+ for (off = 0; off < io_bytes; ) {
+ // Issue I/O
+ i = 0;
+ int r = 0;
+ while (i < io_threads && off < io_bytes) {
+ bool read_flag = should_read(read_proportion);
+
+ r = b.wait_for(io_threads - 1, true);
+ if (r < 0) {
+ break;
+ }
+ b.start_io(io_threads, thread_offset[i], io_size, op_flags, read_flag);
+
+ ++i;
+ ++ios;
+ off += io_size;
+
+ ++cur_ios;
+ cur_off += io_size;
+
+ if (read_flag)
+ read_ops++;
+ else
+ write_ops++;
+ }
+
+ if (r < 0) {
+ break;
+ }
+
+ // Set the thread_offsets of next I/O
+ for (i = 0; i < io_threads; ++i) {
+ switch (io_pattern) {
+ case IO_PATTERN_RAND:
+ thread_offset[i] = (rand() % (size / io_size)) * io_size;
+ continue;
+ case IO_PATTERN_SEQ:
+ if (off < (seq_chunk_length * io_threads)) {
+ thread_offset[i] += io_size;
+ } else {
+ // thread_offset is adjusted to the chunks unassigned to threads.
+ thread_offset[i] = off + (i * io_size);
+ }
+ if (thread_offset[i] + io_size > size) {
+ thread_offset[i] = seq_chunk_length * i;
+ }
+ break;
+ case IO_PATTERN_FULL_SEQ:
+ thread_offset[i] += (io_size * io_threads);
+ if (thread_offset[i] >= size) {
+ thread_offset[i] = i * io_size;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ coarse_mono_time now = coarse_mono_clock::now();
+ std::chrono::duration<double> elapsed = now - start;
+ if (last == std::chrono::duration<double>::zero()) {
+ last = elapsed;
+ } else if ((int)elapsed.count() != (int)last.count()) {
+ time_acc((elapsed - last).count());
+ ios_acc(static_cast<double>(cur_ios));
+ off_acc(static_cast<double>(cur_off));
+ cur_ios = 0;
+ cur_off = 0;
+
+ double time_sum = boost::accumulators::rolling_sum(time_acc);
+ std::cout.width(5);
+ std::cout << (int)elapsed.count();
+ std::cout.width(10);
+ std::cout << ios - io_threads;
+ std::cout.width(10);
+ std::cout << boost::accumulators::rolling_sum(ios_acc) / time_sum;
+ std::cout.width(10);
+ std::cout << byte_u_t(boost::accumulators::rolling_sum(off_acc) / time_sum) << "/s"
+ << std::endl;
+ last = elapsed;
+ }
+ }
+ b.wait_for(0, false);
+
+ if (io_type != IO_TYPE_READ) {
+ r = image.flush();
+ if (r < 0) {
+ std::cerr << "rbd: failed to flush at the end: " << cpp_strerror(r)
+ << std::endl;
+ }
+ }
+
+ coarse_mono_time now = coarse_mono_clock::now();
+ std::chrono::duration<double> elapsed = now - start;
+
+ std::cout << "elapsed: " << (int)elapsed.count() << " "
+ << "ops: " << ios << " "
+ << "ops/sec: " << (double)ios / elapsed.count() << " "
+ << "bytes/sec: " << byte_u_t((double)off / elapsed.count()) << "/s"
+ << std::endl;
+
+ if (io_type == IO_TYPE_RW) {
+ std::cout << "read_ops: " << read_ops << " "
+ << "read_ops/sec: " << (double)read_ops / elapsed.count() << " "
+ << "read_bytes/sec: " << byte_u_t((double)read_ops * io_size / elapsed.count()) << "/s"
+ << std::endl;
+
+ std::cout << "write_ops: " << write_ops << " "
+ << "write_ops/sec: " << (double)write_ops / elapsed.count() << " "
+ << "write_bytes/sec: " << byte_u_t((double)write_ops * io_size / elapsed.count()) << "/s"
+ << std::endl;
+
+ }
+
+ return 0;
+}
+
+void add_bench_common_options(po::options_description *positional,
+ po::options_description *options) {
+ at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE);
+
+ options->add_options()
+ ("io-size", po::value<Size>(), "IO size (in B/K/M/G) (< 4G) [default: 4K]")
+ ("io-threads", po::value<uint32_t>(), "ios in flight [default: 16]")
+ ("io-total", po::value<Size>(), "total size for IO (in B/K/M/G/T) [default: 1G]")
+ ("io-pattern", po::value<IOPattern>(), "IO pattern (rand, seq, or full-seq) [default: seq]")
+ ("rw-mix-read", po::value<uint64_t>(), "read proportion in readwrite (<= 100) [default: 50]");
+}
+
+void get_arguments_for_write(po::options_description *positional,
+ po::options_description *options) {
+ add_bench_common_options(positional, options);
+}
+
+void get_arguments_for_bench(po::options_description *positional,
+ po::options_description *options) {
+ add_bench_common_options(positional, options);
+
+ options->add_options()
+ ("io-type", po::value<IOType>()->required(), "IO type (read, write, or readwrite(rw))");
+}
+
+int bench_execute(const po::variables_map &vm, io_type_t bench_io_type) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+ std::string snap_name;
+ utils::SnapshotPresence snap_presence = utils::SNAPSHOT_PRESENCE_NONE;
+ if (bench_io_type == IO_TYPE_READ)
+ snap_presence = utils::SNAPSHOT_PRESENCE_PERMITTED;
+
+ int r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name,
+ &image_name, &snap_name, true, snap_presence, utils::SPEC_VALIDATION_NONE);
+ if (r < 0) {
+ return r;
+ }
+
+ uint64_t bench_io_size;
+ if (vm.count("io-size")) {
+ bench_io_size = vm["io-size"].as<uint64_t>();
+ } else {
+ bench_io_size = 4096;
+ }
+ if (bench_io_size == 0) {
+ std::cerr << "rbd: --io-size should be greater than zero." << std::endl;
+ return -EINVAL;
+ }
+
+ uint32_t bench_io_threads;
+ if (vm.count("io-threads")) {
+ bench_io_threads = vm["io-threads"].as<uint32_t>();
+ } else {
+ bench_io_threads = 16;
+ }
+ if (bench_io_threads == 0) {
+ std::cerr << "rbd: --io-threads should be greater than zero." << std::endl;
+ return -EINVAL;
+ }
+
+ uint64_t bench_bytes;
+ if (vm.count("io-total")) {
+ bench_bytes = vm["io-total"].as<uint64_t>();
+ } else {
+ bench_bytes = 1 << 30;
+ }
+
+ io_pattern_t bench_pattern;
+ if (vm.count("io-pattern")) {
+ bench_pattern = vm["io-pattern"].as<io_pattern_t>();
+ } else {
+ bench_pattern = IO_PATTERN_SEQ;
+ }
+
+ uint64_t bench_read_proportion;
+ if (bench_io_type == IO_TYPE_READ) {
+ bench_read_proportion = 100;
+ } else if (bench_io_type == IO_TYPE_WRITE) {
+ bench_read_proportion = 0;
+ } else {
+ if (vm.count("rw-mix-read")) {
+ bench_read_proportion = vm["rw-mix-read"].as<uint64_t>();
+ } else {
+ bench_read_proportion = 50;
+ }
+
+ if (bench_read_proportion > 100) {
+ std::cerr << "rbd: --rw-mix-read should not be larger than 100." << std::endl;
+ return -EINVAL;
+ }
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ librbd::Image image;
+ r = utils::init_and_open_image(pool_name, namespace_name, image_name, "",
+ snap_name, false, &rados, &io_ctx, &image);
+ if (r < 0) {
+ return r;
+ }
+
+ init_async_signal_handler();
+ register_async_signal_handler(SIGHUP, sighup_handler);
+ register_async_signal_handler_oneshot(SIGINT, handle_signal);
+ register_async_signal_handler_oneshot(SIGTERM, handle_signal);
+
+ r = do_bench(image, bench_io_type, bench_io_size, bench_io_threads,
+ bench_bytes, bench_pattern, bench_read_proportion);
+
+ unregister_async_signal_handler(SIGHUP, sighup_handler);
+ unregister_async_signal_handler(SIGINT, handle_signal);
+ unregister_async_signal_handler(SIGTERM, handle_signal);
+ shutdown_async_signal_handler();
+
+ if (r < 0) {
+ std::cerr << "bench failed: " << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ return 0;
+}
+
+int execute_for_write(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ std::cerr << "rbd: bench-write is deprecated, use rbd bench --io-type write ..." << std::endl;
+ return bench_execute(vm, IO_TYPE_WRITE);
+}
+
+int execute_for_bench(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ io_type_t bench_io_type;
+ if (vm.count("io-type")) {
+ bench_io_type = vm["io-type"].as<io_type_t>();
+ } else {
+ std::cerr << "rbd: --io-type must be specified." << std::endl;
+ return -EINVAL;
+ }
+
+ return bench_execute(vm, bench_io_type);
+}
+
+Shell::Action action_write(
+ {"bench-write"}, {}, "Simple write benchmark. (Deprecated, please use `rbd bench --io-type write` instead.)",
+ "", &get_arguments_for_write, &execute_for_write, false);
+
+Shell::Action action_bench(
+ {"bench"}, {}, "Simple benchmark.", "", &get_arguments_for_bench, &execute_for_bench);
+
+} // namespace bench
+} // namespace action
+} // namespace rbd
diff --git a/src/tools/rbd/action/Children.cc b/src/tools/rbd/action/Children.cc
new file mode 100644
index 000000000..58e861b69
--- /dev/null
+++ b/src/tools/rbd/action/Children.cc
@@ -0,0 +1,167 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd/ArgumentTypes.h"
+#include "tools/rbd/Shell.h"
+#include "tools/rbd/Utils.h"
+#include "common/errno.h"
+#include "common/Formatter.h"
+#include <iostream>
+#include <boost/program_options.hpp>
+
+namespace rbd {
+namespace action {
+namespace children {
+
+namespace at = argument_types;
+namespace po = boost::program_options;
+
+int do_list_children(librados::IoCtx &io_ctx, librbd::Image &image,
+ bool all_flag, bool descendants_flag, Formatter *f)
+{
+ std::vector<librbd::linked_image_spec_t> children;
+ librbd::RBD rbd;
+ int r;
+ if (descendants_flag) {
+ r = image.list_descendants(&children);
+ } else {
+ r = image.list_children3(&children);
+ }
+ if (r < 0)
+ return r;
+
+ if (f)
+ f->open_array_section("children");
+
+ for (auto& child : children) {
+ bool trash = child.trash;
+ if (f) {
+ if (all_flag) {
+ f->open_object_section("child");
+ f->dump_string("pool", child.pool_name);
+ f->dump_string("pool_namespace", child.pool_namespace);
+ f->dump_string("image", child.image_name);
+ f->dump_string("id", child.image_id);
+ f->dump_bool("trash", child.trash);
+ f->close_section();
+ } else if (!trash) {
+ f->open_object_section("child");
+ f->dump_string("pool", child.pool_name);
+ f->dump_string("pool_namespace", child.pool_namespace);
+ f->dump_string("image", child.image_name);
+ f->close_section();
+ }
+ } else if (all_flag || !trash) {
+ if (child.pool_name.empty()) {
+ std::cout << "(child missing " << child.pool_id << "/";
+ } else {
+ std::cout << child.pool_name << "/";
+ }
+ if (!child.pool_namespace.empty()) {
+ std::cout << child.pool_namespace << "/";
+ }
+ if (child.image_name.empty()) {
+ std::cout << child.image_id << ")";
+ } else {
+ std::cout << child.image_name;
+ if (trash) {
+ std::cout << " (trash " << child.image_id << ")";
+ }
+ }
+ std::cout << std::endl;
+ }
+ }
+
+ if (f) {
+ f->close_section();
+ f->flush(std::cout);
+ }
+
+ return 0;
+}
+
+void get_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_image_or_snap_spec_options(positional, options,
+ at::ARGUMENT_MODIFIER_NONE);
+ at::add_snap_id_option(options);
+ options->add_options()
+ ("all,a", po::bool_switch(), "list all children (include trash)");
+ options->add_options()
+ ("descendants", po::bool_switch(), "include all descendants");
+ at::add_format_options(options);
+}
+
+int execute(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ uint64_t snap_id = LIBRADOS_SNAP_HEAD;
+ if (vm.count(at::SNAPSHOT_ID)) {
+ snap_id = vm[at::SNAPSHOT_ID].as<uint64_t>();
+ }
+
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+ std::string snap_name;
+ int r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name,
+ &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_PERMITTED,
+ utils::SPEC_VALIDATION_NONE);
+ if (r < 0) {
+ return r;
+ }
+
+ if (snap_id != LIBRADOS_SNAP_HEAD && !snap_name.empty()) {
+ std::cerr << "rbd: trying to access snapshot using both name and id."
+ << std::endl;
+ return -EINVAL;
+ }
+
+ at::Format::Formatter formatter;
+ r = utils::get_formatter(vm, &formatter);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ librbd::Image image;
+ r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "",
+ true, &rados, &io_ctx, &image);
+ if (r < 0) {
+ return r;
+ }
+
+ if (!snap_name.empty()) {
+ r = image.snap_set(snap_name.c_str());
+ } else if (snap_id != LIBRADOS_SNAP_HEAD) {
+ r = image.snap_set_by_id(snap_id);
+ }
+ if (r == -ENOENT) {
+ std::cerr << "rbd: snapshot does not exist." << std::endl;
+ return r;
+ } else if (r < 0) {
+ std::cerr << "rbd: error setting snapshot: " << cpp_strerror(r)
+ << std::endl;
+ return r;
+ }
+
+ r = do_list_children(io_ctx, image, vm["all"].as<bool>(),
+ vm["descendants"].as<bool>(), formatter.get());
+ if (r < 0) {
+ std::cerr << "rbd: listing children failed: " << cpp_strerror(r)
+ << std::endl;
+ return r;
+ }
+ return 0;
+}
+
+Shell::SwitchArguments switched_arguments({"all", "a", "descendants"});
+Shell::Action action(
+ {"children"}, {}, "Display children of an image or its snapshot.", "",
+ &get_arguments, &execute);
+
+} // namespace children
+} // namespace action
+} // namespace rbd
diff --git a/src/tools/rbd/action/Clone.cc b/src/tools/rbd/action/Clone.cc
new file mode 100644
index 000000000..6406c957e
--- /dev/null
+++ b/src/tools/rbd/action/Clone.cc
@@ -0,0 +1,99 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd/ArgumentTypes.h"
+#include "tools/rbd/Shell.h"
+#include "tools/rbd/Utils.h"
+#include "common/errno.h"
+#include <iostream>
+#include <boost/program_options.hpp>
+
+namespace rbd {
+namespace action {
+namespace clone {
+
+namespace at = argument_types;
+namespace po = boost::program_options;
+
+int do_clone(librbd::RBD &rbd, librados::IoCtx &p_ioctx,
+ const char *p_name, const char *p_snapname,
+ librados::IoCtx &c_ioctx, const char *c_name,
+ librbd::ImageOptions& opts) {
+ return rbd.clone3(p_ioctx, p_name, p_snapname, c_ioctx, c_name, opts);
+}
+
+void get_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_snap_spec_options(positional, options, at::ARGUMENT_MODIFIER_SOURCE);
+ at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_DEST);
+ at::add_create_image_options(options, false);
+}
+
+int execute(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+ std::string snap_name;
+ int r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_SOURCE, &arg_index, &pool_name, &namespace_name,
+ &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_REQUIRED,
+ utils::SPEC_VALIDATION_NONE);
+ if (r < 0) {
+ return r;
+ }
+
+ std::string dst_pool_name;
+ std::string dst_namespace_name;
+ std::string dst_image_name;
+ std::string dst_snap_name;
+ r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_DEST, &arg_index, &dst_pool_name,
+ &dst_namespace_name, &dst_image_name, &dst_snap_name, true,
+ utils::SNAPSHOT_PRESENCE_NONE, utils::SPEC_VALIDATION_FULL);
+ if (r < 0) {
+ return r;
+ }
+
+ librbd::ImageOptions opts;
+ r = utils::get_image_options(vm, false, &opts);
+ if (r < 0) {
+ return r;
+ }
+ opts.set(RBD_IMAGE_OPTION_FORMAT, static_cast<uint64_t>(2));
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ r = utils::init(pool_name, namespace_name, &rados, &io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::IoCtx dst_io_ctx;
+ r = utils::init_io_ctx(rados, dst_pool_name, dst_namespace_name, &dst_io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ librbd::RBD rbd;
+ r = do_clone(rbd, io_ctx, image_name.c_str(), snap_name.c_str(), dst_io_ctx,
+ dst_image_name.c_str(), opts);
+ if (r == -EXDEV) {
+ std::cerr << "rbd: clone v2 required for cross-namespace clones."
+ << std::endl;
+ return r;
+ } else if (r < 0) {
+ std::cerr << "rbd: clone error: " << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ return 0;
+}
+
+Shell::Action action(
+ {"clone"}, {}, "Clone a snapshot into a CoW child image.",
+ at::get_long_features_help(), &get_arguments, &execute);
+
+} // namespace clone
+} // namespace action
+} // namespace rbd
diff --git a/src/tools/rbd/action/Config.cc b/src/tools/rbd/action/Config.cc
new file mode 100644
index 000000000..b038485ce
--- /dev/null
+++ b/src/tools/rbd/action/Config.cc
@@ -0,0 +1,891 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "common/Formatter.h"
+#include "common/TextTable.h"
+#include "common/ceph_context.h"
+#include "common/ceph_json.h"
+#include "common/escape.h"
+#include "common/errno.h"
+#include "common/options.h"
+#include "global/global_context.h"
+#include "include/stringify.h"
+
+#include "tools/rbd/ArgumentTypes.h"
+#include "tools/rbd/Shell.h"
+#include "tools/rbd/Utils.h"
+
+#include <iostream>
+
+#include <boost/algorithm/string/predicate.hpp>
+#include <boost/program_options.hpp>
+
+#include "json_spirit/json_spirit.h"
+
+namespace rbd {
+namespace action {
+namespace config {
+
+namespace at = argument_types;
+namespace po = boost::program_options;
+
+namespace {
+
+const std::string METADATA_CONF_PREFIX = "conf_";
+
+void add_config_entity_option(
+ boost::program_options::options_description *positional) {
+ positional->add_options()
+ ("config-entity", "config entity (global, client, client.<id>)");
+}
+
+void add_pool_option(boost::program_options::options_description *positional) {
+ positional->add_options()
+ ("pool-name", "pool name");
+}
+
+void add_key_option(po::options_description *positional) {
+ positional->add_options()
+ ("key", "config key");
+}
+
+int get_config_entity(const po::variables_map &vm, std::string *config_entity) {
+ *config_entity = utils::get_positional_argument(vm, 0);
+
+ if (*config_entity != "global" && *config_entity != "client" &&
+ !boost::starts_with(*config_entity, ("client."))) {
+ std::cerr << "rbd: invalid config entity: " << *config_entity
+ << " (must be global, client or client.<id>)" << std::endl;
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int get_pool(const po::variables_map &vm, std::string *pool_name) {
+ *pool_name = utils::get_positional_argument(vm, 0);
+ if (pool_name->empty()) {
+ std::cerr << "rbd: pool name was not specified" << std::endl;
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int get_key(const po::variables_map &vm, size_t *arg_index,
+ std::string *key) {
+ *key = utils::get_positional_argument(vm, *arg_index);
+ if (key->empty()) {
+ std::cerr << "rbd: config key was not specified" << std::endl;
+ return -EINVAL;
+ } else {
+ ++(*arg_index);
+ }
+
+ if (!boost::starts_with(*key, "rbd_")) {
+ std::cerr << "rbd: not rbd option: " << *key << std::endl;
+ return -EINVAL;
+ }
+
+ std::string value;
+ int r = g_ceph_context->_conf.get_val(key->c_str(), &value);
+ if (r < 0) {
+ std::cerr << "rbd: invalid config key: " << *key << std::endl;
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+std::ostream& operator<<(std::ostream& os,
+ const librbd::config_source_t& source) {
+ switch (source) {
+ case RBD_CONFIG_SOURCE_CONFIG:
+ os << "config";
+ break;
+ case RBD_CONFIG_SOURCE_POOL:
+ os << "pool";
+ break;
+ case RBD_CONFIG_SOURCE_IMAGE:
+ os << "image";
+ break;
+ default:
+ os << "unknown (" << static_cast<uint32_t>(source) << ")";
+ break;
+ }
+ return os;
+}
+
+int config_global_list(
+ librados::Rados &rados, const std::string &config_entity,
+ std::map<std::string, std::pair<std::string, std::string>> *options) {
+ bool client_id_config_entity =
+ boost::starts_with(config_entity, ("client."));
+ std::string cmd =
+ "{"
+ "\"prefix\": \"config dump\", "
+ "\"format\": \"json\" "
+ "}";
+ bufferlist in_bl;
+ bufferlist out_bl;
+ std::string ss;
+ int r = rados.mon_command(cmd, in_bl, &out_bl, &ss);
+ if (r < 0) {
+ std::cerr << "rbd: error reading config: " << ss << std::endl;
+ return r;
+ }
+
+ json_spirit::mValue json_root;
+ if (!json_spirit::read(out_bl.to_str(), json_root)) {
+ std::cerr << "rbd: error parsing config dump" << std::endl;
+ return -EINVAL;
+ }
+
+ try {
+ auto &json_array = json_root.get_array();
+ for (auto& e : json_array) {
+ auto &json_obj = e.get_obj();
+ std::string section;
+ std::string name;
+ std::string value;
+
+ for (auto &pairs : json_obj) {
+ if (pairs.first == "section") {
+ section = pairs.second.get_str();
+ } else if (pairs.first == "name") {
+ name = pairs.second.get_str();
+ } else if (pairs.first == "value") {
+ value = pairs.second.get_str();
+ }
+ }
+
+ if (!boost::starts_with(name, "rbd_")) {
+ continue;
+ }
+ if (section != "global" && section != "client" &&
+ (!client_id_config_entity || section != config_entity)) {
+ continue;
+ }
+ if (config_entity == "global" && section != "global") {
+ continue;
+ }
+ auto it = options->find(name);
+ if (it == options->end()) {
+ (*options)[name] = {value, section};
+ continue;
+ }
+ if (section == "client") {
+ if (it->second.second == "global") {
+ it->second = {value, section};
+ }
+ } else if (client_id_config_entity) {
+ it->second = {value, section};
+ }
+ }
+ } catch (std::runtime_error &e) {
+ std::cerr << "rbd: error parsing config dump: " << e.what() << std::endl;
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+} // anonymous namespace
+
+void get_global_get_arguments(po::options_description *positional,
+ po::options_description *options) {
+ add_config_entity_option(positional);
+ add_key_option(positional);
+}
+
+int execute_global_get(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ std::string config_entity;
+ int r = get_config_entity(vm, &config_entity);
+ if (r < 0) {
+ return r;
+ }
+
+ std::string key;
+ size_t arg_index = 1;
+ r = get_key(vm, &arg_index, &key);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ r = utils::init_rados(&rados);
+ if (r < 0) {
+ return r;
+ }
+
+ std::map<std::string, std::pair<std::string, std::string>> options;
+ r = config_global_list(rados, config_entity, &options);
+ if (r < 0) {
+ return r;
+ }
+
+ auto it = options.find(key);
+
+ if (it == options.end() || it->second.second != config_entity) {
+ std::cerr << "rbd: " << key << " is not set" << std::endl;
+ return -ENOENT;
+ }
+
+ std::cout << it->second.first << std::endl;
+ return 0;
+}
+
+void get_global_set_arguments(po::options_description *positional,
+ po::options_description *options) {
+ add_config_entity_option(positional);
+ add_key_option(positional);
+ positional->add_options()
+ ("value", "config value");
+}
+
+int execute_global_set(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ std::string config_entity;
+ int r = get_config_entity(vm, &config_entity);
+ if (r < 0) {
+ return r;
+ }
+
+ std::string key;
+ size_t arg_index = 1;
+ r = get_key(vm, &arg_index, &key);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ r = utils::init_rados(&rados);
+ if (r < 0) {
+ return r;
+ }
+
+ std::string value = utils::get_positional_argument(vm, 2);
+ std::string cmd =
+ "{"
+ "\"prefix\": \"config set\", "
+ "\"who\": \"" + stringify(json_stream_escaper(config_entity)) + "\", "
+ "\"name\": \"" + key + "\", "
+ "\"value\": \"" + stringify(json_stream_escaper(value)) + "\""
+ "}";
+ bufferlist in_bl;
+ std::string ss;
+ r = rados.mon_command(cmd, in_bl, nullptr, &ss);
+ if (r < 0) {
+ std::cerr << "rbd: error setting " << key << ": " << ss << std::endl;
+ return r;
+ }
+
+ return 0;
+}
+
+void get_global_remove_arguments(po::options_description *positional,
+ po::options_description *options) {
+ add_config_entity_option(positional);
+ add_key_option(positional);
+}
+
+int execute_global_remove(
+ const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ std::string config_entity;
+ int r = get_config_entity(vm, &config_entity);
+ if (r < 0) {
+ return r;
+ }
+
+ std::string key;
+ size_t arg_index = 1;
+ r = get_key(vm, &arg_index, &key);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ r = utils::init_rados(&rados);
+ if (r < 0) {
+ return r;
+ }
+
+ std::string cmd =
+ "{"
+ "\"prefix\": \"config rm\", "
+ "\"who\": \"" + stringify(json_stream_escaper(config_entity)) + "\", "
+ "\"name\": \"" + key + "\""
+ "}";
+ bufferlist in_bl;
+ std::string ss;
+ r = rados.mon_command(cmd, in_bl, nullptr, &ss);
+ if (r < 0) {
+ std::cerr << "rbd: error removing " << key << ": " << ss << std::endl;
+ return r;
+ }
+
+ return 0;
+}
+
+void get_global_list_arguments(po::options_description *positional,
+ po::options_description *options) {
+ add_config_entity_option(positional);
+ at::add_format_options(options);
+}
+
+int execute_global_list(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ std::string config_entity;
+ int r = get_config_entity(vm, &config_entity);
+ if (r < 0) {
+ return r;
+ }
+
+ at::Format::Formatter f;
+ r = utils::get_formatter(vm, &f);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ r = utils::init_rados(&rados);
+ if (r < 0) {
+ return r;
+ }
+
+ std::map<std::string, std::pair<std::string, std::string>> options;
+ r = config_global_list(rados, config_entity, &options);
+ if (r < 0) {
+ return r;
+ }
+
+ if (options.empty() && !f) {
+ return 0;
+ }
+
+ TextTable tbl;
+
+ if (f) {
+ f->open_array_section("config");
+ } else {
+ tbl.define_column("Name", TextTable::LEFT, TextTable::LEFT);
+ tbl.define_column("Value", TextTable::LEFT, TextTable::LEFT);
+ tbl.define_column("Section", TextTable::LEFT, TextTable::LEFT);
+ }
+
+ for (const auto &it : options) {
+ if (f) {
+ f->open_object_section("option");
+ f->dump_string("name", it.first);
+ f->dump_string("value", it.second.first);
+ f->dump_string("section", it.second.second);
+ f->close_section();
+ } else {
+ tbl << it.first << it.second.first << it.second.second
+ << TextTable::endrow;
+ }
+ }
+
+ if (f) {
+ f->close_section();
+ f->flush(std::cout);
+ } else {
+ std::cout << tbl;
+ }
+
+ return 0;
+}
+
+void get_pool_get_arguments(po::options_description *positional,
+ po::options_description *options) {
+ add_pool_option(positional);
+ add_key_option(positional);
+}
+
+int execute_pool_get(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ std::string pool_name;
+ int r = get_pool(vm, &pool_name);
+ if (r < 0) {
+ return r;
+ }
+
+ std::string key;
+ size_t arg_index = 1;
+ r = get_key(vm, &arg_index, &key);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ r = utils::init(pool_name, "", &rados, &io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ librbd::RBD rbd;
+ std::string value;
+
+ r = rbd.pool_metadata_get(io_ctx, METADATA_CONF_PREFIX + key, &value);
+ if (r < 0) {
+ if (r == -ENOENT) {
+ std::cerr << "rbd: " << key << " is not set" << std::endl;
+ } else {
+ std::cerr << "rbd: failed to get " << key << ": " << cpp_strerror(r)
+ << std::endl;
+ }
+ return r;
+ }
+
+ std::cout << value << std::endl;
+ return 0;
+}
+
+void get_pool_set_arguments(po::options_description *positional,
+ po::options_description *options) {
+ add_pool_option(positional);
+ add_key_option(positional);
+ positional->add_options()
+ ("value", "config value");
+}
+
+int execute_pool_set(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ std::string pool_name;
+ int r = get_pool(vm, &pool_name);
+ if (r < 0) {
+ return r;
+ }
+
+ std::string key;
+ size_t arg_index = 1;
+ r = get_key(vm, &arg_index, &key);
+ if (r < 0) {
+ return r;
+ }
+
+ std::string value = utils::get_positional_argument(vm, 2);
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ r = utils::init(pool_name, "", &rados, &io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ librbd::RBD rbd;
+ r = rbd.pool_metadata_set(io_ctx, METADATA_CONF_PREFIX + key, value);
+ if (r < 0) {
+ std::cerr << "rbd: failed to set " << key << ": " << cpp_strerror(r)
+ << std::endl;
+ return r;
+ }
+
+ return 0;
+}
+
+void get_pool_remove_arguments(po::options_description *positional,
+ po::options_description *options) {
+ add_pool_option(positional);
+ add_key_option(positional);
+}
+
+int execute_pool_remove(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ std::string pool_name;
+ int r = get_pool(vm, &pool_name);
+ if (r < 0) {
+ return r;
+ }
+
+ std::string key;
+ size_t arg_index = 1;
+ r = get_key(vm, &arg_index, &key);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ r = utils::init(pool_name, "", &rados, &io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ librbd::RBD rbd;
+ r = rbd.pool_metadata_remove(io_ctx, METADATA_CONF_PREFIX + key);
+ if (r < 0) {
+ std::cerr << "rbd: failed to remove " << key << ": " << cpp_strerror(r)
+ << std::endl;
+ return r;
+ }
+
+ return 0;
+}
+
+void get_pool_list_arguments(po::options_description *positional,
+ po::options_description *options) {
+ add_pool_option(positional);
+ at::add_format_options(options);
+}
+
+int execute_pool_list(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ std::string pool_name;
+ int r = get_pool(vm, &pool_name);
+ if (r < 0) {
+ return r;
+ }
+
+ at::Format::Formatter f;
+ r = utils::get_formatter(vm, &f);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ r = utils::init(pool_name, "", &rados, &io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ TextTable tbl;
+ librbd::RBD rbd;
+ std::vector<librbd::config_option_t> options;
+
+ r = rbd.config_list(io_ctx, &options);
+ if (r < 0) {
+ std::cerr << "rbd: failed to list config: " << cpp_strerror(r) << std::endl;
+ return r;
+ }
+
+ if (f) {
+ f->open_array_section("config");
+ } else {
+ tbl.define_column("Name", TextTable::LEFT, TextTable::LEFT);
+ tbl.define_column("Value", TextTable::LEFT, TextTable::LEFT);
+ tbl.define_column("Source", TextTable::LEFT, TextTable::LEFT);
+ }
+
+ for (auto &option : options) {
+ if (f) {
+ f->open_object_section("option");
+ f->dump_string("name", option.name);
+ f->dump_string("value", option.value);
+ f->dump_stream("source") << option.source;
+ f->close_section();
+ } else {
+ std::ostringstream source;
+ source << option.source;
+ tbl << option.name << option.value << source.str() << TextTable::endrow;
+ }
+ }
+
+ if (f) {
+ f->close_section();
+ f->flush(std::cout);
+ } else {
+ std::cout << tbl;
+ }
+
+ return 0;
+}
+
+void get_image_get_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE);
+ add_key_option(positional);
+}
+
+int execute_image_get(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+ std::string snap_name;
+ int r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name,
+ &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE,
+ utils::SPEC_VALIDATION_NONE);
+ if (r < 0) {
+ return r;
+ }
+
+ std::string key;
+ r = get_key(vm, &arg_index, &key);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ librbd::Image image;
+ r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "",
+ false, &rados, &io_ctx, &image);
+ if (r < 0) {
+ return r;
+ }
+
+ std::string value;
+
+ r = image.metadata_get(METADATA_CONF_PREFIX + key, &value);
+ if (r < 0) {
+ if (r == -ENOENT) {
+ std::cerr << "rbd: " << key << " is not set" << std::endl;
+ } else {
+ std::cerr << "rbd: failed to get " << key << ": " << cpp_strerror(r)
+ << std::endl;
+ }
+ return r;
+ }
+
+ std::cout << value << std::endl;
+ return 0;
+}
+
+void get_image_set_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE);
+ add_key_option(positional);
+ positional->add_options()
+ ("value", "config value");
+}
+
+int execute_image_set(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+ std::string snap_name;
+ int r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name,
+ &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE,
+ utils::SPEC_VALIDATION_NONE);
+ if (r < 0) {
+ return r;
+ }
+
+ std::string key;
+ r = get_key(vm, &arg_index, &key);
+ if (r < 0) {
+ return r;
+ }
+
+ std::string value = utils::get_positional_argument(vm, arg_index);
+ if (value.empty()) {
+ std::cerr << "rbd: image config value was not specified" << std::endl;
+ return -EINVAL;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ librbd::Image image;
+ r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "",
+ false, &rados, &io_ctx, &image);
+ if (r < 0) {
+ return r;
+ }
+
+ r = image.metadata_set(METADATA_CONF_PREFIX + key, value);
+ if (r < 0) {
+ std::cerr << "rbd: failed to set " << key << ": " << cpp_strerror(r)
+ << std::endl;
+ return r;
+ }
+
+ return 0;
+}
+
+void get_image_remove_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE);
+ add_key_option(positional);
+}
+
+int execute_image_remove(
+ const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+ std::string snap_name;
+ int r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name,
+ &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE,
+ utils::SPEC_VALIDATION_NONE);
+ if (r < 0) {
+ return r;
+ }
+
+ std::string key;
+ r = get_key(vm, &arg_index, &key);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ librbd::Image image;
+ r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "",
+ false, &rados, &io_ctx, &image);
+ if (r < 0) {
+ return r;
+ }
+
+ r = image.metadata_remove(METADATA_CONF_PREFIX + key);
+ if (r < 0) {
+ std::cerr << "rbd: failed to remove " << key << ": " << cpp_strerror(r)
+ << std::endl;
+ return r;
+ }
+
+ return 0;
+}
+
+void get_image_list_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE);
+ at::add_format_options(options);
+}
+
+int execute_image_list(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+ std::string snap_name;
+ int r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name,
+ &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE,
+ utils::SPEC_VALIDATION_NONE);
+ if (r < 0) {
+ return r;
+ }
+
+ at::Format::Formatter f;
+ r = utils::get_formatter(vm, &f);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ librbd::Image image;
+ r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "",
+ false, &rados, &io_ctx, &image);
+ if (r < 0) {
+ return r;
+ }
+
+ TextTable tbl;
+ std::vector<librbd::config_option_t> options;
+
+ r = image.config_list(&options);
+ if (r < 0) {
+ std::cerr << "rbd: failed to list config: " << cpp_strerror(r) << std::endl;
+ return r;
+ }
+
+ if (options.empty()) {
+ if (f == nullptr) {
+ std::cout << "There are no values" << std::endl;
+ }
+ return 0;
+ }
+
+ if (f) {
+ f->open_array_section("config");
+ } else {
+ tbl.define_column("Name", TextTable::LEFT, TextTable::LEFT);
+ tbl.define_column("Value", TextTable::LEFT, TextTable::LEFT);
+ tbl.define_column("Source", TextTable::LEFT, TextTable::LEFT);
+ }
+
+ for (auto &option : options) {
+ if (f) {
+ f->open_object_section("option");
+ f->dump_string("name", option.name);
+ f->dump_string("value", option.value);
+ f->dump_stream("source") << option.source;
+ f->close_section();
+ } else {
+ std::ostringstream source;
+ source << option.source;
+ tbl << option.name << option.value << source.str() << TextTable::endrow;
+ }
+ }
+
+ if (f == nullptr) {
+ bool single = (options.size() == 1);
+ std::cout << "There " << (single ? "is" : "are") << " " << options.size()
+ << " " << (single ? "value" : "values") << ":" << std::endl;
+ }
+
+ if (f) {
+ f->close_section();
+ f->flush(std::cout);
+ } else {
+ std::cout << tbl;
+ }
+
+ return 0;
+}
+
+Shell::Action action_global_get(
+ {"config", "global", "get"}, {},
+ "Get a global-level configuration override.", "",
+ &get_global_get_arguments, &execute_global_get);
+Shell::Action action_global_set(
+ {"config", "global", "set"}, {},
+ "Set a global-level configuration override.", "",
+ &get_global_set_arguments, &execute_global_set);
+Shell::Action action_global_remove(
+ {"config", "global", "remove"}, {"config", "global", "rm"},
+ "Remove a global-level configuration override.", "",
+ &get_global_remove_arguments, &execute_global_remove);
+Shell::Action action_global_list(
+ {"config", "global", "list"}, {"config", "global", "ls"},
+ "List global-level configuration overrides.", "",
+ &get_global_list_arguments, &execute_global_list);
+
+Shell::Action action_pool_get(
+ {"config", "pool", "get"}, {}, "Get a pool-level configuration override.", "",
+ &get_pool_get_arguments, &execute_pool_get);
+Shell::Action action_pool_set(
+ {"config", "pool", "set"}, {}, "Set a pool-level configuration override.", "",
+ &get_pool_set_arguments, &execute_pool_set);
+Shell::Action action_pool_remove(
+ {"config", "pool", "remove"}, {"config", "pool", "rm"},
+ "Remove a pool-level configuration override.", "",
+ &get_pool_remove_arguments, &execute_pool_remove);
+Shell::Action action_pool_list(
+ {"config", "pool", "list"}, {"config", "pool", "ls"},
+ "List pool-level configuration overrides.", "",
+ &get_pool_list_arguments, &execute_pool_list);
+
+Shell::Action action_image_get(
+ {"config", "image", "get"}, {}, "Get an image-level configuration override.",
+ "", &get_image_get_arguments, &execute_image_get);
+Shell::Action action_image_set(
+ {"config", "image", "set"}, {}, "Set an image-level configuration override.",
+ "", &get_image_set_arguments, &execute_image_set);
+Shell::Action action_image_remove(
+ {"config", "image", "remove"}, {"config", "image", "rm"},
+ "Remove an image-level configuration override.", "",
+ &get_image_remove_arguments, &execute_image_remove);
+Shell::Action action_image_list(
+ {"config", "image", "list"}, {"config", "image", "ls"},
+ "List image-level configuration overrides.", "",
+ &get_image_list_arguments, &execute_image_list);
+
+} // namespace config
+} // namespace action
+} // namespace rbd
diff --git a/src/tools/rbd/action/Copy.cc b/src/tools/rbd/action/Copy.cc
new file mode 100644
index 000000000..9e42c0652
--- /dev/null
+++ b/src/tools/rbd/action/Copy.cc
@@ -0,0 +1,195 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd/ArgumentTypes.h"
+#include "tools/rbd/Shell.h"
+#include "tools/rbd/Utils.h"
+#include "common/errno.h"
+#include <iostream>
+#include <boost/program_options.hpp>
+
+namespace rbd {
+namespace action {
+namespace copy {
+
+namespace at = argument_types;
+namespace po = boost::program_options;
+
+static int do_copy(librbd::Image &src, librados::IoCtx& dest_pp,
+ const char *destname, librbd::ImageOptions& opts,
+ bool no_progress,
+ size_t sparse_size)
+{
+ utils::ProgressContext pc("Image copy", no_progress);
+ int r = src.copy_with_progress4(dest_pp, destname, opts, pc, sparse_size);
+ if (r < 0){
+ pc.fail();
+ return r;
+ }
+ pc.finish();
+ return 0;
+}
+
+void get_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_image_or_snap_spec_options(positional, options,
+ at::ARGUMENT_MODIFIER_SOURCE);
+ at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_DEST);
+ at::add_create_image_options(options, false);
+ at::add_sparse_size_option(options);
+ at::add_no_progress_option(options);
+}
+
+int execute(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+ std::string snap_name;
+ int r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_SOURCE, &arg_index, &pool_name, &namespace_name,
+ &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_PERMITTED,
+ utils::SPEC_VALIDATION_NONE);
+ if (r < 0) {
+ return r;
+ }
+
+ std::string dst_pool_name;
+ std::string dst_namespace_name;
+ std::string dst_image_name;
+ std::string dst_snap_name;
+ r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_DEST, &arg_index, &dst_pool_name,
+ &dst_namespace_name, &dst_image_name, &dst_snap_name, true,
+ utils::SNAPSHOT_PRESENCE_NONE, utils::SPEC_VALIDATION_FULL);
+ if (r < 0) {
+ return r;
+ }
+
+ librbd::ImageOptions opts;
+ r = utils::get_image_options(vm, false, &opts);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ librbd::Image image;
+ r = utils::init_and_open_image(pool_name, namespace_name, image_name, "",
+ snap_name, true, &rados, &io_ctx, &image);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::IoCtx dst_io_ctx;
+ r = utils::init_io_ctx(rados, dst_pool_name, dst_namespace_name, &dst_io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ size_t sparse_size = utils::RBD_DEFAULT_SPARSE_SIZE;
+ if (vm.count(at::IMAGE_SPARSE_SIZE)) {
+ sparse_size = vm[at::IMAGE_SPARSE_SIZE].as<size_t>();
+ }
+ r = do_copy(image, dst_io_ctx, dst_image_name.c_str(), opts,
+ vm[at::NO_PROGRESS].as<bool>(), sparse_size);
+ if (r < 0) {
+ std::cerr << "rbd: copy failed: " << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ return 0;
+}
+
+Shell::Action action(
+ {"copy"}, {"cp"}, "Copy src image to dest.", at::get_long_features_help(),
+ &get_arguments, &execute);
+
+static int do_deep_copy(librbd::Image &src, librados::IoCtx& dest_pp,
+ const char *destname, librbd::ImageOptions& opts,
+ bool no_progress)
+{
+ utils::ProgressContext pc("Image deep copy", no_progress);
+ int r = src.deep_copy_with_progress(dest_pp, destname, opts, pc);
+ if (r < 0){
+ pc.fail();
+ return r;
+ }
+ pc.finish();
+ return 0;
+}
+
+void get_arguments_deep(po::options_description *positional,
+ po::options_description *options) {
+ at::add_image_or_snap_spec_options(positional, options,
+ at::ARGUMENT_MODIFIER_SOURCE);
+ at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_DEST);
+ at::add_create_image_options(options, false);
+ at::add_flatten_option(options);
+ at::add_no_progress_option(options);
+}
+
+int execute_deep(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+ std::string snap_name;
+ int r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_SOURCE, &arg_index, &pool_name, &namespace_name,
+ &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_PERMITTED,
+ utils::SPEC_VALIDATION_NONE);
+ if (r < 0) {
+ return r;
+ }
+
+ std::string dst_pool_name;
+ std::string dst_namespace_name;
+ std::string dst_image_name;
+ std::string dst_snap_name;
+ r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_DEST, &arg_index, &dst_pool_name,
+ &dst_namespace_name, &dst_image_name, &dst_snap_name, true,
+ utils::SNAPSHOT_PRESENCE_NONE, utils::SPEC_VALIDATION_FULL);
+ if (r < 0) {
+ return r;
+ }
+
+ librbd::ImageOptions opts;
+ r = utils::get_image_options(vm, false, &opts);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ librbd::Image image;
+ r = utils::init_and_open_image(pool_name, namespace_name, image_name, "",
+ snap_name, true, &rados, &io_ctx, &image);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::IoCtx dst_io_ctx;
+ r = utils::init_io_ctx(rados, dst_pool_name, dst_namespace_name, &dst_io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ r = do_deep_copy(image, dst_io_ctx, dst_image_name.c_str(), opts,
+ vm[at::NO_PROGRESS].as<bool>());
+ if (r < 0) {
+ std::cerr << "rbd: deep copy failed: " << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ return 0;
+}
+
+Shell::Action action_deep(
+ {"deep", "copy"}, {"deep", "cp"}, "Deep copy (including snapshots) src image to dest.",
+ at::get_long_features_help(), &get_arguments_deep, &execute_deep);
+
+} // namespace copy
+} // namespace action
+} // namespace rbd
diff --git a/src/tools/rbd/action/Create.cc b/src/tools/rbd/action/Create.cc
new file mode 100644
index 000000000..047a8cb77
--- /dev/null
+++ b/src/tools/rbd/action/Create.cc
@@ -0,0 +1,257 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd/ArgumentTypes.h"
+#include "tools/rbd/Shell.h"
+#include "tools/rbd/Utils.h"
+#include "common/ceph_mutex.h"
+#include "common/config_proxy.h"
+#include "common/errno.h"
+#include "global/global_context.h"
+#include <iostream>
+#include <boost/program_options.hpp>
+
+namespace rbd {
+namespace action {
+namespace create {
+
+namespace at = argument_types;
+namespace po = boost::program_options;
+
+static int do_create(librbd::RBD &rbd, librados::IoCtx& io_ctx,
+ const char *imgname, uint64_t size,
+ librbd::ImageOptions& opts) {
+ return rbd.create4(io_ctx, imgname, size, opts);
+}
+
+void get_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE);
+ at::add_create_image_options(options, true);
+ options->add_options()
+ (at::IMAGE_THICK_PROVISION.c_str(), po::bool_switch(), "fully allocate storage and zero image");
+ at::add_size_option(options);
+ at::add_no_progress_option(options);
+}
+
+void thick_provision_writer_completion(rbd_completion_t, void *);
+
+struct thick_provision_writer {
+ librbd::Image *image;
+ ceph::mutex lock = ceph::make_mutex("thick_provision_writer::lock");
+ ceph::condition_variable cond;
+ uint64_t chunk_size;
+ uint64_t concurr;
+ struct {
+ uint64_t in_flight;
+ int io_error;
+ } io_status;
+
+ // Constructor
+ explicit thick_provision_writer(librbd::Image *i, librbd::ImageOptions &o)
+ : image(i)
+ {
+ // If error cases occur, the code is aborted, because
+ // constructor cannot return error value.
+ ceph_assert(g_ceph_context != nullptr);
+
+ librbd::image_info_t info;
+ int r = image->stat(info, sizeof(info));
+ ceph_assert(r >= 0);
+
+ uint64_t order = info.order;
+ if (order == 0) {
+ order = g_conf().get_val<uint64_t>("rbd_default_order");
+ }
+
+ auto stripe_count = std::max<uint64_t>(1U, image->get_stripe_count());
+ chunk_size = (1ull << order) * stripe_count;
+
+ concurr = std::max<uint64_t>(
+ 1U, g_conf().get_val<uint64_t>("rbd_concurrent_management_ops") /
+ stripe_count);
+
+ io_status.in_flight = 0;
+ io_status.io_error = 0;
+ }
+
+ int start_io(uint64_t write_offset)
+ {
+ {
+ std::lock_guard l{lock};
+ io_status.in_flight++;
+ if (io_status.in_flight > concurr) {
+ io_status.in_flight--;
+ return -EINVAL;
+ }
+ }
+
+ librbd::RBD::AioCompletion *c;
+ c = new librbd::RBD::AioCompletion(this, thick_provision_writer_completion);
+ int r;
+ r = image->aio_write_zeroes(write_offset, chunk_size, c,
+ RBD_WRITE_ZEROES_FLAG_THICK_PROVISION,
+ LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL);
+ if (r < 0) {
+ std::lock_guard l{lock};
+ io_status.io_error = r;
+ }
+ return r;
+ }
+
+ int wait_for(uint64_t max) {
+ using namespace std::chrono_literals;
+ std::unique_lock l{lock};
+ int r = io_status.io_error;
+
+ while (io_status.in_flight > max) {
+ cond.wait_for(l, 200ms);
+ }
+ return r;
+ }
+};
+
+void thick_provision_writer_completion(rbd_completion_t rc, void *pc) {
+ librbd::RBD::AioCompletion *ac = (librbd::RBD::AioCompletion *)rc;
+ thick_provision_writer *tc = static_cast<thick_provision_writer *>(pc);
+
+ int r = ac->get_return_value();
+ tc->lock.lock();
+ if (r < 0 && tc->io_status.io_error >= 0) {
+ tc->io_status.io_error = r;
+ }
+ tc->io_status.in_flight--;
+ tc->cond.notify_all();
+ tc->lock.unlock();
+ ac->release();
+}
+
+int write_data(librbd::Image &image, librbd::ImageOptions &opts,
+ bool no_progress) {
+ uint64_t image_size;
+ int r = 0;
+ utils::ProgressContext pc("Thick provisioning", no_progress);
+
+ if (image.size(&image_size) != 0) {
+ return -EINVAL;
+ }
+
+ thick_provision_writer tpw(&image, opts);
+ uint64_t off;
+ uint64_t i;
+ for (off = 0; off < image_size;) {
+ i = 0;
+ while (i < tpw.concurr && off < image_size) {
+ tpw.wait_for(tpw.concurr - 1);
+ r = tpw.start_io(off);
+ if (r != 0) {
+ goto err_writesame;
+ }
+ ++i;
+ off += tpw.chunk_size;
+ if(off > image_size) {
+ off = image_size;
+ }
+ pc.update_progress(off, image_size);
+ }
+ }
+
+ tpw.wait_for(0);
+ r = image.flush();
+ if (r < 0) {
+ std::cerr << "rbd: failed to flush at the end: " << cpp_strerror(r)
+ << std::endl;
+ goto err_writesame;
+ }
+ pc.finish();
+
+ return r;
+
+err_writesame:
+ tpw.wait_for(0);
+ pc.fail();
+
+ return r;
+}
+
+int thick_write(const std::string &image_name,librados::IoCtx &io_ctx,
+ librbd::ImageOptions &opts, bool no_progress) {
+ int r;
+ librbd::Image image;
+
+ r = utils::open_image(io_ctx, image_name, false, &image);
+ if (r < 0) {
+ return r;
+ }
+
+ r = write_data(image, opts, no_progress);
+
+ image.close();
+
+ return r;
+}
+
+int execute(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+ std::string snap_name;
+ int r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name,
+ &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE,
+ utils::SPEC_VALIDATION_FULL);
+ if (r < 0) {
+ return r;
+ }
+
+ librbd::ImageOptions opts;
+ r = utils::get_image_options(vm, true, &opts);
+ if (r < 0) {
+ return r;
+ }
+
+ uint64_t size;
+ r = utils::get_image_size(vm, &size);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ r = utils::init(pool_name, namespace_name, &rados, &io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ librbd::RBD rbd;
+ r = do_create(rbd, io_ctx, image_name.c_str(), size, opts);
+ if (!namespace_name.empty() && r == -ENOENT) {
+ std::cerr << "rbd: namespace not found - it must be created with "
+ << "'rbd namespace create' before creating an image."
+ << std::endl;
+ return r;
+ } else if (r < 0) {
+ std::cerr << "rbd: create error: " << cpp_strerror(r) << std::endl;
+ return r;
+ }
+
+ if (vm.count(at::IMAGE_THICK_PROVISION) && vm[at::IMAGE_THICK_PROVISION].as<bool>()) {
+ r = thick_write(image_name, io_ctx, opts, vm[at::NO_PROGRESS].as<bool>());
+ if (r < 0) {
+ std::cerr << "rbd: image created but error encountered during thick provisioning: "
+ << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ }
+ return 0;
+}
+
+Shell::Action action(
+ {"create"}, {}, "Create an empty image.", at::get_long_features_help(),
+ &get_arguments, &execute);
+
+} // namespace create
+} // namespace action
+} // namespace rbd
diff --git a/src/tools/rbd/action/Device.cc b/src/tools/rbd/action/Device.cc
new file mode 100644
index 000000000..878081438
--- /dev/null
+++ b/src/tools/rbd/action/Device.cc
@@ -0,0 +1,285 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "acconfig.h"
+#include "tools/rbd/ArgumentTypes.h"
+#include "tools/rbd/Shell.h"
+
+#include <boost/program_options.hpp>
+
+#include "include/ceph_assert.h"
+
+namespace rbd {
+namespace action {
+
+namespace at = argument_types;
+namespace po = boost::program_options;
+
+#define DECLARE_DEVICE_OPERATIONS(ns) \
+ namespace ns { \
+ int execute_list(const po::variables_map &vm, \
+ const std::vector<std::string> &ceph_global_args); \
+ int execute_map(const po::variables_map &vm, \
+ const std::vector<std::string> &ceph_global_args); \
+ int execute_unmap(const po::variables_map &vm, \
+ const std::vector<std::string> &ceph_global_args); \
+ int execute_attach(const po::variables_map &vm, \
+ const std::vector<std::string> &ceph_global_args); \
+ int execute_detach(const po::variables_map &vm, \
+ const std::vector<std::string> &ceph_global_args); \
+ }
+
+DECLARE_DEVICE_OPERATIONS(ggate);
+DECLARE_DEVICE_OPERATIONS(kernel);
+DECLARE_DEVICE_OPERATIONS(nbd);
+DECLARE_DEVICE_OPERATIONS(wnbd);
+
+namespace device {
+
+namespace {
+
+struct DeviceOperations {
+ int (*execute_list)(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_args);
+ int (*execute_map)(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_args);
+ int (*execute_unmap)(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_args);
+ int (*execute_attach)(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_args);
+ int (*execute_detach)(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_args);
+};
+
+const DeviceOperations ggate_operations = {
+ ggate::execute_list,
+ ggate::execute_map,
+ ggate::execute_unmap,
+ ggate::execute_attach,
+ ggate::execute_detach,
+};
+
+const DeviceOperations krbd_operations = {
+ kernel::execute_list,
+ kernel::execute_map,
+ kernel::execute_unmap,
+ kernel::execute_attach,
+ kernel::execute_detach,
+};
+
+const DeviceOperations nbd_operations = {
+ nbd::execute_list,
+ nbd::execute_map,
+ nbd::execute_unmap,
+ nbd::execute_attach,
+ nbd::execute_detach,
+};
+
+const DeviceOperations wnbd_operations = {
+ wnbd::execute_list,
+ wnbd::execute_map,
+ wnbd::execute_unmap,
+ wnbd::execute_attach,
+ wnbd::execute_detach,
+};
+
+enum device_type_t {
+ DEVICE_TYPE_GGATE,
+ DEVICE_TYPE_KRBD,
+ DEVICE_TYPE_NBD,
+ DEVICE_TYPE_WNBD,
+};
+
+struct DeviceType {};
+
+void validate(boost::any& v, const std::vector<std::string>& values,
+ DeviceType *target_type, int) {
+ po::validators::check_first_occurrence(v);
+ const std::string &s = po::validators::get_single_string(values);
+
+ #ifdef _WIN32
+ if (s == "wnbd") {
+ v = boost::any(DEVICE_TYPE_WNBD);
+ #else
+ if (s == "nbd") {
+ v = boost::any(DEVICE_TYPE_NBD);
+ } else if (s == "ggate") {
+ v = boost::any(DEVICE_TYPE_GGATE);
+ } else if (s == "krbd") {
+ v = boost::any(DEVICE_TYPE_KRBD);
+ #endif /* _WIN32 */
+ } else {
+ throw po::validation_error(po::validation_error::invalid_option_value);
+ }
+}
+
+void add_device_type_option(po::options_description *options) {
+ options->add_options()
+ ("device-type,t", po::value<DeviceType>(),
+#ifdef _WIN32
+ "device type [wnbd]");
+#else
+ "device type [ggate, krbd (default), nbd]");
+#endif
+}
+
+void add_device_specific_options(po::options_description *options) {
+ options->add_options()
+ ("options,o", po::value<std::vector<std::string>>(),
+ "device specific options");
+}
+
+device_type_t get_device_type(const po::variables_map &vm) {
+ if (vm.count("device-type")) {
+ return vm["device-type"].as<device_type_t>();
+ }
+ #ifndef _WIN32
+ return DEVICE_TYPE_KRBD;
+ #else
+ return DEVICE_TYPE_WNBD;
+ #endif
+}
+
+const DeviceOperations *get_device_operations(const po::variables_map &vm) {
+ switch (get_device_type(vm)) {
+ case DEVICE_TYPE_GGATE:
+ return &ggate_operations;
+ case DEVICE_TYPE_KRBD:
+ return &krbd_operations;
+ case DEVICE_TYPE_NBD:
+ return &nbd_operations;
+ case DEVICE_TYPE_WNBD:
+ return &wnbd_operations;
+ default:
+ ceph_abort();
+ return nullptr;
+ }
+}
+
+} // anonymous namespace
+
+void get_list_arguments(po::options_description *positional,
+ po::options_description *options) {
+ add_device_type_option(options);
+ at::add_format_options(options);
+}
+
+int execute_list(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ return (*get_device_operations(vm)->execute_list)(vm, ceph_global_init_args);
+}
+
+void get_map_arguments(po::options_description *positional,
+ po::options_description *options) {
+ add_device_type_option(options);
+ at::add_image_or_snap_spec_options(positional, options,
+ at::ARGUMENT_MODIFIER_NONE);
+ options->add_options()
+ ("show-cookie", po::bool_switch(), "show device cookie")
+ ("cookie", po::value<std::string>(), "specify device cookie")
+ ("read-only", po::bool_switch(), "map read-only")
+ ("exclusive", po::bool_switch(), "disable automatic exclusive lock transitions")
+ ("quiesce", po::bool_switch(), "use quiesce hooks")
+ ("quiesce-hook", po::value<std::string>(), "quiesce hook path");
+ at::add_snap_id_option(options);
+ add_device_specific_options(options);
+}
+
+int execute_map(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ return (*get_device_operations(vm)->execute_map)(vm, ceph_global_init_args);
+}
+
+void get_unmap_arguments(po::options_description *positional,
+ po::options_description *options) {
+ add_device_type_option(options);
+ positional->add_options()
+ ("image-or-snap-or-device-spec",
+ "image, snapshot, or device specification\n"
+ "[<pool-name>/[<namespace>/]]<image-name>[@<snap-name>] or <device-path>");
+ at::add_pool_option(options, at::ARGUMENT_MODIFIER_NONE);
+ at::add_namespace_option(options, at::ARGUMENT_MODIFIER_NONE);
+ at::add_image_option(options, at::ARGUMENT_MODIFIER_NONE);
+ at::add_snap_option(options, at::ARGUMENT_MODIFIER_NONE);
+ at::add_snap_id_option(options);
+ add_device_specific_options(options);
+}
+
+int execute_unmap(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ return (*get_device_operations(vm)->execute_unmap)(vm, ceph_global_init_args);
+}
+
+void get_attach_arguments(po::options_description *positional,
+ po::options_description *options) {
+ add_device_type_option(options);
+ at::add_image_or_snap_spec_options(positional, options,
+ at::ARGUMENT_MODIFIER_NONE);
+ options->add_options()
+ ("device", po::value<std::string>()->required(), "specify device path")
+ ("show-cookie", po::bool_switch(), "show device cookie")
+ ("cookie", po::value<std::string>(), "specify device cookie")
+ ("read-only", po::bool_switch(), "attach read-only")
+ ("force", po::bool_switch(), "force attach")
+ ("exclusive", po::bool_switch(), "disable automatic exclusive lock transitions")
+ ("quiesce", po::bool_switch(), "use quiesce hooks")
+ ("quiesce-hook", po::value<std::string>(), "quiesce hook path");
+ at::add_snap_id_option(options);
+ add_device_specific_options(options);
+}
+
+int execute_attach(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ return (*get_device_operations(vm)->execute_attach)(vm, ceph_global_init_args);
+}
+
+void get_detach_arguments(po::options_description *positional,
+ po::options_description *options) {
+ add_device_type_option(options);
+ positional->add_options()
+ ("image-or-snap-or-device-spec",
+ "image, snapshot, or device specification\n"
+ "[<pool-name>/[<namespace>/]]<image-name>[@<snap-name>] or <device-path>");
+ at::add_pool_option(options, at::ARGUMENT_MODIFIER_NONE);
+ at::add_namespace_option(options, at::ARGUMENT_MODIFIER_NONE);
+ at::add_image_option(options, at::ARGUMENT_MODIFIER_NONE);
+ at::add_snap_option(options, at::ARGUMENT_MODIFIER_NONE);
+ at::add_snap_id_option(options);
+ add_device_specific_options(options);
+}
+
+int execute_detach(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ return (*get_device_operations(vm)->execute_detach)(vm, ceph_global_init_args);
+}
+
+Shell::SwitchArguments switched_arguments({"exclusive", "force", "quiesce",
+ "read-only", "show-cookie"});
+
+Shell::Action action_list(
+ {"device", "list"}, {"showmapped"}, "List mapped rbd images.", "",
+ &get_list_arguments, &execute_list);
+// yet another alias for list command
+Shell::Action action_ls(
+ {"device", "ls"}, {}, "List mapped rbd images.", "",
+ &get_list_arguments, &execute_list, false);
+
+Shell::Action action_map(
+ {"device", "map"}, {"map"}, "Map an image to a block device.", "",
+ &get_map_arguments, &execute_map);
+
+Shell::Action action_unmap(
+ {"device", "unmap"}, {"unmap"}, "Unmap a rbd device.", "",
+ &get_unmap_arguments, &execute_unmap);
+
+Shell::Action action_attach(
+ {"device", "attach"}, {}, "Attach image to device.", "",
+ &get_attach_arguments, &execute_attach);
+
+Shell::Action action_detach(
+ {"device", "detach"}, {}, "Detach image from device.", "",
+ &get_detach_arguments, &execute_detach);
+
+} // namespace device
+} // namespace action
+} // namespace rbd
diff --git a/src/tools/rbd/action/Diff.cc b/src/tools/rbd/action/Diff.cc
new file mode 100644
index 000000000..838ef6cc5
--- /dev/null
+++ b/src/tools/rbd/action/Diff.cc
@@ -0,0 +1,142 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd/ArgumentTypes.h"
+#include "tools/rbd/Shell.h"
+#include "tools/rbd/Utils.h"
+#include "common/errno.h"
+#include "common/Formatter.h"
+#include "common/TextTable.h"
+#include <iostream>
+#include <boost/program_options.hpp>
+
+namespace rbd {
+namespace action {
+namespace diff {
+
+namespace at = argument_types;
+namespace po = boost::program_options;
+
+struct output_method {
+ output_method() : f(NULL), t(NULL), empty(true) {}
+ Formatter *f;
+ TextTable *t;
+ bool empty;
+};
+
+static int diff_cb(uint64_t ofs, size_t len, int exists, void *arg)
+{
+ output_method *om = static_cast<output_method *>(arg);
+ om->empty = false;
+ if (om->f) {
+ om->f->open_object_section("extent");
+ om->f->dump_unsigned("offset", ofs);
+ om->f->dump_unsigned("length", len);
+ om->f->dump_string("exists", exists ? "true" : "false");
+ om->f->close_section();
+ } else {
+ ceph_assert(om->t);
+ *(om->t) << ofs << len << (exists ? "data" : "zero") << TextTable::endrow;
+ }
+ return 0;
+}
+
+static int do_diff(librbd::Image& image, const char *fromsnapname,
+ bool whole_object, Formatter *f)
+{
+ int r;
+ librbd::image_info_t info;
+
+ r = image.stat(info, sizeof(info));
+ if (r < 0)
+ return r;
+
+ output_method om;
+ if (f) {
+ om.f = f;
+ f->open_array_section("extents");
+ } else {
+ om.t = new TextTable();
+ om.t->define_column("Offset", TextTable::LEFT, TextTable::LEFT);
+ om.t->define_column("Length", TextTable::LEFT, TextTable::LEFT);
+ om.t->define_column("Type", TextTable::LEFT, TextTable::LEFT);
+ }
+
+ r = image.diff_iterate2(fromsnapname, 0, info.size, true, whole_object,
+ diff_cb, &om);
+ if (f) {
+ f->close_section();
+ f->flush(std::cout);
+ } else {
+ if (!om.empty)
+ std::cout << *om.t;
+ delete om.t;
+ }
+ return r;
+}
+
+void get_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_image_or_snap_spec_options(positional, options,
+ at::ARGUMENT_MODIFIER_NONE);
+ options->add_options()
+ (at::FROM_SNAPSHOT_NAME.c_str(), po::value<std::string>(),
+ "snapshot starting point")
+ (at::WHOLE_OBJECT.c_str(), po::bool_switch(), "compare whole object");
+ at::add_format_options(options);
+}
+
+int execute(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+ std::string snap_name;
+ int r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name,
+ &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_PERMITTED,
+ utils::SPEC_VALIDATION_NONE);
+ if (r < 0) {
+ return r;
+ }
+
+ std::string from_snap_name;
+ if (vm.count(at::FROM_SNAPSHOT_NAME)) {
+ from_snap_name = vm[at::FROM_SNAPSHOT_NAME].as<std::string>();
+ }
+
+ bool diff_whole_object = vm[at::WHOLE_OBJECT].as<bool>();
+
+ at::Format::Formatter formatter;
+ r = utils::get_formatter(vm, &formatter);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ librbd::Image image;
+ r = utils::init_and_open_image(pool_name, namespace_name, image_name, "",
+ snap_name, true, &rados, &io_ctx, &image);
+ if (r < 0) {
+ return r;
+ }
+
+ r = do_diff(image, from_snap_name.empty() ? nullptr : from_snap_name.c_str(),
+ diff_whole_object, formatter.get());
+ if (r < 0) {
+ std::cerr << "rbd: diff error: " << cpp_strerror(r) << std::endl;
+ return -r;
+ }
+ return 0;
+}
+
+Shell::Action action(
+ {"diff"}, {},
+ "Print extents that differ since a previous snap, or image creation.", "",
+ &get_arguments, &execute);
+
+} // namespace diff
+} // namespace action
+} // namespace rbd
diff --git a/src/tools/rbd/action/DiskUsage.cc b/src/tools/rbd/action/DiskUsage.cc
new file mode 100644
index 000000000..12fb8cfde
--- /dev/null
+++ b/src/tools/rbd/action/DiskUsage.cc
@@ -0,0 +1,377 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd/ArgumentTypes.h"
+#include "tools/rbd/Shell.h"
+#include "tools/rbd/Utils.h"
+#include "include/types.h"
+#include "include/stringify.h"
+#include "common/errno.h"
+#include "common/Formatter.h"
+#include "common/TextTable.h"
+#include <algorithm>
+#include <iostream>
+#include <boost/bind/bind.hpp>
+#include <boost/program_options.hpp>
+
+namespace rbd {
+namespace action {
+namespace disk_usage {
+
+namespace at = argument_types;
+namespace po = boost::program_options;
+using namespace boost::placeholders;
+
+static int disk_usage_callback(uint64_t offset, size_t len, int exists,
+ void *arg) {
+ uint64_t *used_size = reinterpret_cast<uint64_t *>(arg);
+ if (exists) {
+ (*used_size) += len;
+ }
+ return 0;
+}
+
+static int get_image_disk_usage(const std::string& name,
+ const std::string& snap_name,
+ const std::string& from_snap_name,
+ librbd::Image &image,
+ bool exact,
+ uint64_t size,
+ uint64_t *used_size){
+
+ const char* from = NULL;
+ if (!from_snap_name.empty()) {
+ from = from_snap_name.c_str();
+ }
+
+ uint64_t flags;
+ int r = image.get_flags(&flags);
+ if (r < 0) {
+ std::cerr << "rbd: failed to retrieve image flags: " << cpp_strerror(r)
+ << std::endl;
+ return r;
+ }
+ if ((flags & RBD_FLAG_FAST_DIFF_INVALID) != 0) {
+ std::cerr << "warning: fast-diff map is invalid for " << name
+ << (snap_name.empty() ? "" : "@" + snap_name) << ". "
+ << "operation may be slow." << std::endl;
+ }
+
+ *used_size = 0;
+ r = image.diff_iterate2(from, 0, size, false, !exact,
+ &disk_usage_callback, used_size);
+ if (r < 0) {
+ std::cerr << "rbd: failed to iterate diffs: " << cpp_strerror(r)
+ << std::endl;
+ return r;
+ }
+
+ return 0;
+}
+
+void format_image_disk_usage(const std::string& name,
+ const std::string& id,
+ const std::string& snap_name,
+ uint64_t snap_id,
+ uint64_t size,
+ uint64_t used_size,
+ TextTable& tbl, Formatter *f) {
+ if (f) {
+ f->open_object_section("image");
+ f->dump_string("name", name);
+ f->dump_string("id", id);
+ if (!snap_name.empty()) {
+ f->dump_string("snapshot", snap_name);
+ f->dump_unsigned("snapshot_id", snap_id);
+ }
+ f->dump_unsigned("provisioned_size", size);
+ f->dump_unsigned("used_size" , used_size);
+ f->close_section();
+ } else {
+ std::string full_name = name;
+ if (!snap_name.empty()) {
+ full_name += "@" + snap_name;
+ }
+ tbl << full_name
+ << stringify(byte_u_t(size))
+ << stringify(byte_u_t(used_size))
+ << TextTable::endrow;
+ }
+}
+
+static int do_disk_usage(librbd::RBD &rbd, librados::IoCtx &io_ctx,
+ const char *imgname, const char *snapname,
+ const char *from_snapname, bool exact, Formatter *f,
+ bool merge_snap) {
+ std::vector<librbd::image_spec_t> images;
+ int r = rbd.list2(io_ctx, &images);
+ if (r == -ENOENT) {
+ r = 0;
+ } else if (r < 0) {
+ return r;
+ }
+
+ TextTable tbl;
+ if (f) {
+ f->open_object_section("stats");
+ f->open_array_section("images");
+ } else {
+ tbl.define_column("NAME", TextTable::LEFT, TextTable::LEFT);
+ tbl.define_column("PROVISIONED", TextTable::LEFT, TextTable::RIGHT);
+ tbl.define_column("USED", TextTable::LEFT, TextTable::RIGHT);
+ }
+
+ uint32_t count = 0;
+ uint64_t used_size = 0;
+ uint64_t total_prov = 0;
+ uint64_t total_used = 0;
+ uint64_t snap_id = CEPH_NOSNAP;
+ uint64_t from_id = CEPH_NOSNAP;
+ bool found = false;
+ for (auto& image_spec : images) {
+ if (imgname != NULL && image_spec.name != imgname) {
+ continue;
+ }
+ found = true;
+
+ librbd::Image image;
+ r = rbd.open_read_only(io_ctx, image, image_spec.name.c_str(), NULL);
+ if (r < 0) {
+ if (r != -ENOENT) {
+ std::cerr << "rbd: error opening " << image_spec.name << ": "
+ << cpp_strerror(r) << std::endl;
+ }
+ continue;
+ }
+
+ uint64_t features;
+ r = image.features(&features);
+ if (r < 0) {
+ std::cerr << "rbd: failed to retrieve image features: " << cpp_strerror(r)
+ << std::endl;
+ goto out;
+ }
+ if ((features & RBD_FEATURE_FAST_DIFF) == 0) {
+ std::cerr << "warning: fast-diff map is not enabled for "
+ << image_spec.name << ". " << "operation may be slow."
+ << std::endl;
+ }
+
+ librbd::image_info_t info;
+ if (image.stat(info, sizeof(info)) < 0) {
+ r = -EINVAL;
+ goto out;
+ }
+
+ std::vector<librbd::snap_info_t> snap_list;
+ r = image.snap_list(snap_list);
+ if (r < 0) {
+ std::cerr << "rbd: error opening " << image_spec.name << " snapshots: "
+ << cpp_strerror(r) << std::endl;
+ continue;
+ }
+
+ snap_list.erase(remove_if(snap_list.begin(),
+ snap_list.end(),
+ boost::bind(utils::is_not_user_snap_namespace, &image, _1)),
+ snap_list.end());
+
+ bool found_from_snap = (from_snapname == nullptr);
+ bool found_snap = (snapname == nullptr);
+ bool found_from = (from_snapname == nullptr);
+ std::string last_snap_name;
+ std::sort(snap_list.begin(), snap_list.end(),
+ boost::bind(&librbd::snap_info_t::id, _1) <
+ boost::bind(&librbd::snap_info_t::id, _2));
+ if (!found_snap || !found_from) {
+ for (auto &snap_info : snap_list) {
+ if (!found_snap && snap_info.name == snapname) {
+ snap_id = snap_info.id;
+ found_snap = true;
+ }
+ if (!found_from && snap_info.name == from_snapname) {
+ from_id = snap_info.id;
+ found_from = true;
+ }
+ if (found_snap && found_from) {
+ break;
+ }
+ }
+ }
+ if ((snapname != nullptr && snap_id == CEPH_NOSNAP) ||
+ (from_snapname != nullptr && from_id == CEPH_NOSNAP)) {
+ std::cerr << "specified snapshot is not found." << std::endl;
+ return -ENOENT;
+ }
+ if (snap_id != CEPH_NOSNAP && from_id != CEPH_NOSNAP) {
+ if (from_id == snap_id) {
+ // no diskusage.
+ return 0;
+ }
+ if (from_id >= snap_id) {
+ return -EINVAL;
+ }
+ }
+
+ uint64_t image_full_used_size = 0;
+
+ for (std::vector<librbd::snap_info_t>::const_iterator snap =
+ snap_list.begin(); snap != snap_list.end(); ++snap) {
+ librbd::Image snap_image;
+ r = rbd.open_read_only(io_ctx, snap_image, image_spec.name.c_str(),
+ snap->name.c_str());
+ if (r < 0) {
+ std::cerr << "rbd: error opening snapshot " << image_spec.name << "@"
+ << snap->name << ": " << cpp_strerror(r) << std::endl;
+ goto out;
+ }
+
+ if (imgname == nullptr || found_from_snap ||
+ (found_from_snap && snapname != nullptr && snap->name == snapname)) {
+
+ r = get_image_disk_usage(image_spec.name, snap->name, last_snap_name, snap_image, exact, snap->size, &used_size);
+ if (r < 0) {
+ goto out;
+ }
+ if (!merge_snap) {
+ format_image_disk_usage(image_spec.name, image_spec.id, snap->name,
+ snap->id, snap->size, used_size, tbl, f);
+ }
+
+ image_full_used_size += used_size;
+
+ if (snapname != NULL) {
+ total_prov += snap->size;
+ }
+ total_used += used_size;
+ ++count;
+ }
+
+ if (!found_from_snap && from_snapname != nullptr &&
+ snap->name == from_snapname) {
+ found_from_snap = true;
+ }
+ if (snapname != nullptr && snap->name == snapname) {
+ break;
+ }
+ last_snap_name = snap->name;
+ }
+
+ if (snapname == NULL) {
+ r = get_image_disk_usage(image_spec.name, "", last_snap_name, image, exact, info.size, &used_size);
+ if (r < 0) {
+ goto out;
+ }
+
+ image_full_used_size += used_size;
+
+ if (!merge_snap) {
+ format_image_disk_usage(image_spec.name, image_spec.id, "", CEPH_NOSNAP,
+ info.size, used_size, tbl, f);
+ } else {
+ format_image_disk_usage(image_spec.name, image_spec.id, "", CEPH_NOSNAP,
+ info.size, image_full_used_size, tbl, f);
+ }
+
+ total_prov += info.size;
+ total_used += used_size;
+ ++count;
+ }
+ }
+ if (imgname != nullptr && !found) {
+ std::cerr << "specified image " << imgname << " is not found." << std::endl;
+ return -ENOENT;
+ }
+
+out:
+ if (f) {
+ f->close_section();
+ if (imgname == NULL) {
+ f->dump_unsigned("total_provisioned_size", total_prov);
+ f->dump_unsigned("total_used_size", total_used);
+ }
+ f->close_section();
+ f->flush(std::cout);
+ } else if (!images.empty()) {
+ if (count > 1) {
+ tbl << "<TOTAL>"
+ << stringify(byte_u_t(total_prov))
+ << stringify(byte_u_t(total_used))
+ << TextTable::endrow;
+ }
+ std::cout << tbl;
+ }
+
+ return r < 0 ? r : 0;
+}
+
+void get_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_image_or_snap_spec_options(positional, options,
+ at::ARGUMENT_MODIFIER_NONE);
+ at::add_format_options(options);
+ options->add_options()
+ (at::FROM_SNAPSHOT_NAME.c_str(), po::value<std::string>(),
+ "snapshot starting point")
+ ("exact", po::bool_switch(), "compute exact disk usage (slow)")
+ ("merge-snapshots", po::bool_switch(),
+ "merge snapshot sizes with its image");
+}
+
+int execute(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+ std::string snap_name;
+ int r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name,
+ &image_name, &snap_name, vm.count(at::FROM_SNAPSHOT_NAME),
+ utils::SNAPSHOT_PRESENCE_PERMITTED, utils::SPEC_VALIDATION_NONE);
+ if (r < 0) {
+ return r;
+ }
+
+ std::string from_snap_name;
+ if (vm.count(at::FROM_SNAPSHOT_NAME)) {
+ from_snap_name = vm[at::FROM_SNAPSHOT_NAME].as<std::string>();
+ }
+
+ at::Format::Formatter formatter;
+ r = utils::get_formatter(vm, &formatter);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ r = utils::init(pool_name, namespace_name, &rados, &io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ utils::disable_cache();
+
+ librbd::RBD rbd;
+ r = do_disk_usage(rbd, io_ctx,
+ image_name.empty() ? nullptr: image_name.c_str(),
+ snap_name.empty() ? nullptr : snap_name.c_str(),
+ from_snap_name.empty() ? nullptr : from_snap_name.c_str(),
+ vm["exact"].as<bool>(), formatter.get(),
+ vm["merge-snapshots"].as<bool>());
+ if (r < 0) {
+ std::cerr << "rbd: du failed: " << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ return 0;
+}
+
+Shell::SwitchArguments switched_arguments({"exact", "merge-snapshots"});
+Shell::Action action(
+ {"disk-usage"}, {"du"}, "Show disk usage stats for pool, image or snapshot.",
+ "", &get_arguments, &execute);
+
+} // namespace disk_usage
+} // namespace action
+} // namespace rbd
diff --git a/src/tools/rbd/action/Encryption.cc b/src/tools/rbd/action/Encryption.cc
new file mode 100644
index 000000000..ecd4f0cb5
--- /dev/null
+++ b/src/tools/rbd/action/Encryption.cc
@@ -0,0 +1,117 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "include/compat.h"
+#include "include/scope_guard.h"
+#include "tools/rbd/ArgumentTypes.h"
+#include "tools/rbd/Shell.h"
+#include "tools/rbd/Utils.h"
+#include "common/errno.h"
+#include <fstream>
+#include <iostream>
+#include <boost/program_options.hpp>
+
+namespace rbd {
+namespace action {
+namespace encryption {
+
+namespace at = argument_types;
+namespace po = boost::program_options;
+
+void get_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE);
+ positional->add_options()
+ ("format", "encryption format [possible values: luks1, luks2]")
+ ("passphrase-file",
+ "path of file containing passphrase for unlocking the image");
+ options->add_options()
+ ("cipher-alg", po::value<at::EncryptionAlgorithm>(),
+ "encryption algorithm [possible values: aes-128, aes-256 (default)]");
+}
+
+int execute(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+ std::string snap_name;
+ int r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name,
+ &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE,
+ utils::SPEC_VALIDATION_NONE);
+ if (r < 0) {
+ return r;
+ }
+
+ std::string format_str = utils::get_positional_argument(vm, arg_index++);
+ if (format_str.empty()) {
+ std::cerr << "rbd: must specify format." << std::endl;
+ return -EINVAL;
+ }
+
+ std::string passphrase_file =
+ utils::get_positional_argument(vm, arg_index++);
+ if (passphrase_file.empty()) {
+ std::cerr << "rbd: must specify passphrase-file." << std::endl;
+ return -EINVAL;
+ }
+
+ auto alg = RBD_ENCRYPTION_ALGORITHM_AES256;
+ if (vm.count("cipher-alg")) {
+ alg = vm["cipher-alg"].as<librbd::encryption_algorithm_t>();
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ librbd::Image image;
+ r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "",
+ false, &rados, &io_ctx, &image);
+ if (r < 0) {
+ return r;
+ }
+
+ std::ifstream file(passphrase_file, std::ios::in | std::ios::binary);
+ if (file.fail()) {
+ std::cerr << "rbd: unable to open passphrase file '" << passphrase_file
+ << "': " << cpp_strerror(errno) << std::endl;
+ return -errno;
+ }
+ std::string passphrase((std::istreambuf_iterator<char>(file)),
+ std::istreambuf_iterator<char>());
+ file.close();
+
+ if (format_str == "luks1") {
+ librbd::encryption_luks1_format_options_t opts = {
+ alg, std::move(passphrase)};
+ r = image.encryption_format(
+ RBD_ENCRYPTION_FORMAT_LUKS1, &opts, sizeof(opts));
+ ceph_memzero_s(opts.passphrase.data(), opts.passphrase.size(),
+ opts.passphrase.size());
+ } else if (format_str == "luks2") {
+ librbd::encryption_luks2_format_options_t opts = {
+ alg, std::move(passphrase)};
+ r = image.encryption_format(
+ RBD_ENCRYPTION_FORMAT_LUKS2, &opts, sizeof(opts));
+ ceph_memzero_s(opts.passphrase.data(), opts.passphrase.size(),
+ opts.passphrase.size());
+ } else {
+ std::cerr << "rbd: unsupported encryption format" << std::endl;
+ return -ENOTSUP;
+ }
+
+ if (r < 0) {
+ std::cerr << "rbd: encryption format error: " << cpp_strerror(r)
+ << std::endl;
+ }
+ return r;
+}
+
+Shell::Action action(
+ {"encryption", "format"}, {}, "Format image to an encrypted format.", "",
+ &get_arguments, &execute);
+
+} // namespace encryption
+} // namespace action
+} // namespace rbd
diff --git a/src/tools/rbd/action/Export.cc b/src/tools/rbd/action/Export.cc
new file mode 100644
index 000000000..ddcf0f2c3
--- /dev/null
+++ b/src/tools/rbd/action/Export.cc
@@ -0,0 +1,653 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "include/compat.h"
+#include "tools/rbd/ArgumentTypes.h"
+#include "tools/rbd/Shell.h"
+#include "tools/rbd/Utils.h"
+#include "include/Context.h"
+#include "common/errno.h"
+#include "common/Throttle.h"
+#include "include/encoding.h"
+#include <iostream>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <boost/program_options.hpp>
+#include <boost/scope_exit.hpp>
+
+using std::cerr;
+using std::string;
+
+namespace rbd {
+namespace action {
+namespace export_full {
+
+struct ExportDiffContext {
+ librbd::Image *image;
+ int fd;
+ int export_format;
+ uint64_t totalsize;
+ utils::ProgressContext pc;
+ OrderedThrottle throttle;
+
+ ExportDiffContext(librbd::Image *i, int f, uint64_t t, int max_ops,
+ bool no_progress, int eformat) :
+ image(i), fd(f), export_format(eformat), totalsize(t), pc("Exporting image", no_progress),
+ throttle(max_ops, true) {
+ }
+};
+
+class C_ExportDiff : public Context {
+public:
+ C_ExportDiff(ExportDiffContext *edc, uint64_t offset, uint64_t length,
+ bool exists, int export_format)
+ : m_export_diff_context(edc), m_offset(offset), m_length(length),
+ m_exists(exists), m_export_format(export_format) {
+ }
+
+ int send() {
+ if (m_export_diff_context->throttle.pending_error()) {
+ return m_export_diff_context->throttle.wait_for_ret();
+ }
+
+ C_OrderedThrottle *ctx = m_export_diff_context->throttle.start_op(this);
+ if (m_exists) {
+ librbd::RBD::AioCompletion *aio_completion =
+ new librbd::RBD::AioCompletion(ctx, &utils::aio_context_callback);
+
+ int op_flags = LIBRADOS_OP_FLAG_FADVISE_NOCACHE;
+ int r = m_export_diff_context->image->aio_read2(
+ m_offset, m_length, m_read_data, aio_completion, op_flags);
+ if (r < 0) {
+ aio_completion->release();
+ ctx->complete(r);
+ }
+ } else {
+ ctx->complete(0);
+ }
+ return 0;
+ }
+
+ static int export_diff_cb(uint64_t offset, size_t length, int exists,
+ void *arg) {
+ ExportDiffContext *edc = reinterpret_cast<ExportDiffContext *>(arg);
+
+ C_ExportDiff *context = new C_ExportDiff(edc, offset, length, exists, edc->export_format);
+ return context->send();
+ }
+
+protected:
+ void finish(int r) override {
+ if (r >= 0) {
+ if (m_exists) {
+ m_exists = !m_read_data.is_zero();
+ }
+ r = write_extent(m_export_diff_context, m_offset, m_length, m_exists, m_export_format);
+ if (r == 0 && m_exists) {
+ r = m_read_data.write_fd(m_export_diff_context->fd);
+ }
+ }
+ m_export_diff_context->throttle.end_op(r);
+ }
+
+private:
+ ExportDiffContext *m_export_diff_context;
+ uint64_t m_offset;
+ uint64_t m_length;
+ bool m_exists;
+ int m_export_format;
+ bufferlist m_read_data;
+
+ static int write_extent(ExportDiffContext *edc, uint64_t offset,
+ uint64_t length, bool exists, int export_format) {
+ // extent
+ bufferlist bl;
+ __u8 tag = exists ? RBD_DIFF_WRITE : RBD_DIFF_ZERO;
+ uint64_t len = 0;
+ encode(tag, bl);
+ if (export_format == 2) {
+ if (tag == RBD_DIFF_WRITE)
+ len = 8 + 8 + length;
+ else
+ len = 8 + 8;
+ encode(len, bl);
+ }
+ encode(offset, bl);
+ encode(length, bl);
+ int r = bl.write_fd(edc->fd);
+
+ edc->pc.update_progress(offset, edc->totalsize);
+ return r;
+ }
+};
+
+
+int do_export_diff_fd(librbd::Image& image, const char *fromsnapname,
+ const char *endsnapname, bool whole_object,
+ int fd, bool no_progress, int export_format)
+{
+ int r;
+ librbd::image_info_t info;
+
+ r = image.stat(info, sizeof(info));
+ if (r < 0)
+ return r;
+
+ {
+ // header
+ bufferlist bl;
+ if (export_format == 1)
+ bl.append(utils::RBD_DIFF_BANNER);
+ else
+ bl.append(utils::RBD_DIFF_BANNER_V2);
+
+ __u8 tag;
+ uint64_t len = 0;
+ if (fromsnapname) {
+ tag = RBD_DIFF_FROM_SNAP;
+ encode(tag, bl);
+ std::string from(fromsnapname);
+ if (export_format == 2) {
+ len = from.length() + 4;
+ encode(len, bl);
+ }
+ encode(from, bl);
+ }
+
+ if (endsnapname) {
+ tag = RBD_DIFF_TO_SNAP;
+ encode(tag, bl);
+ std::string to(endsnapname);
+ if (export_format == 2) {
+ len = to.length() + 4;
+ encode(len, bl);
+ }
+ encode(to, bl);
+ }
+
+ if (endsnapname && export_format == 2) {
+ tag = RBD_SNAP_PROTECTION_STATUS;
+ encode(tag, bl);
+ bool is_protected = false;
+ r = image.snap_is_protected(endsnapname, &is_protected);
+ if (r < 0) {
+ return r;
+ }
+ len = 1;
+ encode(len, bl);
+ encode(is_protected, bl);
+ }
+
+ tag = RBD_DIFF_IMAGE_SIZE;
+ encode(tag, bl);
+ uint64_t endsize = info.size;
+ if (export_format == 2) {
+ len = 8;
+ encode(len, bl);
+ }
+ encode(endsize, bl);
+
+ r = bl.write_fd(fd);
+ if (r < 0) {
+ return r;
+ }
+ }
+ ExportDiffContext edc(&image, fd, info.size,
+ g_conf().get_val<uint64_t>("rbd_concurrent_management_ops"),
+ no_progress, export_format);
+ r = image.diff_iterate2(fromsnapname, 0, info.size, true, whole_object,
+ &C_ExportDiff::export_diff_cb, (void *)&edc);
+ if (r < 0) {
+ goto out;
+ }
+
+ r = edc.throttle.wait_for_ret();
+ if (r < 0) {
+ goto out;
+ }
+
+ {
+ __u8 tag = RBD_DIFF_END;
+ bufferlist bl;
+ encode(tag, bl);
+ r = bl.write_fd(fd);
+ }
+
+out:
+ if (r < 0)
+ edc.pc.fail();
+ else
+ edc.pc.finish();
+
+ return r;
+}
+
+int do_export_diff(librbd::Image& image, const char *fromsnapname,
+ const char *endsnapname, bool whole_object,
+ const char *path, bool no_progress)
+{
+ int r;
+ int fd;
+
+ if (strcmp(path, "-") == 0)
+ fd = STDOUT_FILENO;
+ else
+ fd = open(path, O_WRONLY | O_CREAT | O_EXCL | O_BINARY, 0644);
+ if (fd < 0)
+ return -errno;
+
+ r = do_export_diff_fd(image, fromsnapname, endsnapname, whole_object, fd, no_progress, 1);
+
+ if (fd != 1)
+ close(fd);
+ if (r < 0 && fd != 1) {
+ remove(path);
+ }
+
+ return r;
+}
+
+
+namespace at = argument_types;
+namespace po = boost::program_options;
+
+void get_arguments_diff(po::options_description *positional,
+ po::options_description *options) {
+ at::add_image_or_snap_spec_options(positional, options,
+ at::ARGUMENT_MODIFIER_SOURCE);
+ at::add_path_options(positional, options,
+ "export file (or '-' for stdout)");
+ options->add_options()
+ (at::FROM_SNAPSHOT_NAME.c_str(), po::value<std::string>(),
+ "snapshot starting point")
+ (at::WHOLE_OBJECT.c_str(), po::bool_switch(), "compare whole object");
+ at::add_no_progress_option(options);
+}
+
+int execute_diff(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+ std::string snap_name;
+ int r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_SOURCE, &arg_index, &pool_name, &namespace_name,
+ &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_PERMITTED,
+ utils::SPEC_VALIDATION_NONE);
+ if (r < 0) {
+ return r;
+ }
+
+ std::string path;
+ r = utils::get_path(vm, &arg_index, &path);
+ if (r < 0) {
+ return r;
+ }
+
+ std::string from_snap_name;
+ if (vm.count(at::FROM_SNAPSHOT_NAME)) {
+ from_snap_name = vm[at::FROM_SNAPSHOT_NAME].as<std::string>();
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ librbd::Image image;
+ r = utils::init_and_open_image(pool_name, namespace_name, image_name, "",
+ snap_name, true, &rados, &io_ctx, &image);
+ if (r < 0) {
+ return r;
+ }
+
+ r = do_export_diff(image,
+ from_snap_name.empty() ? nullptr : from_snap_name.c_str(),
+ snap_name.empty() ? nullptr : snap_name.c_str(),
+ vm[at::WHOLE_OBJECT].as<bool>(), path.c_str(),
+ vm[at::NO_PROGRESS].as<bool>());
+ if (r < 0) {
+ std::cerr << "rbd: export-diff error: " << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ return 0;
+}
+
+Shell::Action action_diff(
+ {"export-diff"}, {}, "Export incremental diff to file.", "",
+ &get_arguments_diff, &execute_diff);
+
+class C_Export : public Context
+{
+public:
+ C_Export(OrderedThrottle &ordered_throttle, librbd::Image &image,
+ uint64_t fd_offset, uint64_t offset, uint64_t length, int fd)
+ : m_throttle(ordered_throttle), m_image(image), m_dest_offset(fd_offset),
+ m_offset(offset), m_length(length), m_fd(fd)
+ {
+ }
+
+ void send()
+ {
+ auto ctx = m_throttle.start_op(this);
+ auto aio_completion = new librbd::RBD::AioCompletion(
+ ctx, &utils::aio_context_callback);
+ int op_flags = LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL |
+ LIBRADOS_OP_FLAG_FADVISE_NOCACHE;
+ int r = m_image.aio_read2(m_offset, m_length, m_bufferlist,
+ aio_completion, op_flags);
+ if (r < 0) {
+ cerr << "rbd: error requesting read from source image" << std::endl;
+ aio_completion->release();
+ m_throttle.end_op(r);
+ }
+ }
+
+ void finish(int r) override
+ {
+ BOOST_SCOPE_EXIT((&m_throttle) (&r))
+ {
+ m_throttle.end_op(r);
+ } BOOST_SCOPE_EXIT_END
+
+ if (r < 0) {
+ cerr << "rbd: error reading from source image at offset "
+ << m_offset << ": " << cpp_strerror(r) << std::endl;
+ return;
+ }
+
+ ceph_assert(m_bufferlist.length() == static_cast<size_t>(r));
+ if (m_fd != STDOUT_FILENO) {
+ if (m_bufferlist.is_zero()) {
+ return;
+ }
+
+ uint64_t chkret = lseek64(m_fd, m_dest_offset, SEEK_SET);
+ if (chkret != m_dest_offset) {
+ cerr << "rbd: error seeking destination image to offset "
+ << m_dest_offset << std::endl;
+ r = -errno;
+ return;
+ }
+ }
+
+ r = m_bufferlist.write_fd(m_fd);
+ if (r < 0) {
+ cerr << "rbd: error writing to destination image at offset "
+ << m_dest_offset << std::endl;
+ }
+ }
+
+private:
+ OrderedThrottle &m_throttle;
+ librbd::Image &m_image;
+ bufferlist m_bufferlist;
+ uint64_t m_dest_offset;
+ uint64_t m_offset;
+ uint64_t m_length;
+ int m_fd;
+};
+
+const uint32_t MAX_KEYS = 64;
+
+static int do_export_v2(librbd::Image& image, librbd::image_info_t &info, int fd,
+ uint64_t period, int max_concurrent_ops, utils::ProgressContext &pc)
+{
+ int r = 0;
+ // header
+ bufferlist bl;
+ bl.append(utils::RBD_IMAGE_BANNER_V2);
+
+ __u8 tag;
+ uint64_t length;
+ // encode order
+ tag = RBD_EXPORT_IMAGE_ORDER;
+ length = 8;
+ encode(tag, bl);
+ encode(length, bl);
+ encode(uint64_t(info.order), bl);
+
+ // encode features
+ tag = RBD_EXPORT_IMAGE_FEATURES;
+ uint64_t features;
+ image.features(&features);
+ length = 8;
+ encode(tag, bl);
+ encode(length, bl);
+ encode(features, bl);
+
+ // encode stripe_unit and stripe_count
+ tag = RBD_EXPORT_IMAGE_STRIPE_UNIT;
+ uint64_t stripe_unit;
+ stripe_unit = image.get_stripe_unit();
+ length = 8;
+ encode(tag, bl);
+ encode(length, bl);
+ encode(stripe_unit, bl);
+
+ tag = RBD_EXPORT_IMAGE_STRIPE_COUNT;
+ uint64_t stripe_count;
+ stripe_count = image.get_stripe_count();
+ length = 8;
+ encode(tag, bl);
+ encode(length, bl);
+ encode(stripe_count, bl);
+
+ //retrieve metadata of image
+ std::map<std::string, string> imagemetas;
+ std::string last_key;
+ bool more_results = true;
+ while (more_results) {
+ std::map<std::string, bufferlist> pairs;
+ r = image.metadata_list(last_key, MAX_KEYS, &pairs);
+ if (r < 0) {
+ std::cerr << "failed to retrieve metadata of image : " << cpp_strerror(r)
+ << std::endl;
+ return r;
+ }
+
+ if (!pairs.empty()) {
+ last_key = pairs.rbegin()->first;
+
+ for (auto kv : pairs) {
+ std::string key = kv.first;
+ std::string val(kv.second.c_str(), kv.second.length());
+ imagemetas[key] = val;
+ }
+ }
+ more_results = (pairs.size() == MAX_KEYS);
+ }
+
+ //encode imageMeta key and value
+ for (std::map<std::string, string>::iterator it = imagemetas.begin();
+ it != imagemetas.end(); ++it) {
+ string key = it->first;
+ string value = it->second;
+
+ tag = RBD_EXPORT_IMAGE_META;
+ length = key.length() + value.length() + 4 * 2;
+ encode(tag, bl);
+ encode(length, bl);
+ encode(key, bl);
+ encode(value, bl);
+ }
+
+ // encode end tag
+ tag = RBD_EXPORT_IMAGE_END;
+ encode(tag, bl);
+
+ // write bl to fd.
+ r = bl.write_fd(fd);
+ if (r < 0) {
+ return r;
+ }
+
+ // header for snapshots
+ bl.clear();
+ bl.append(utils::RBD_IMAGE_DIFFS_BANNER_V2);
+
+ std::vector<librbd::snap_info_t> snaps;
+ r = image.snap_list(snaps);
+ if (r < 0) {
+ return r;
+ }
+
+ uint64_t diff_num = snaps.size() + 1;
+ encode(diff_num, bl);
+
+ r = bl.write_fd(fd);
+ if (r < 0) {
+ return r;
+ }
+
+ const char *last_snap = NULL;
+ for (size_t i = 0; i < snaps.size(); ++i) {
+ utils::snap_set(image, snaps[i].name.c_str());
+ r = do_export_diff_fd(image, last_snap, snaps[i].name.c_str(), false, fd, true, 2);
+ if (r < 0) {
+ return r;
+ }
+ pc.update_progress(i, snaps.size() + 1);
+ last_snap = snaps[i].name.c_str();
+ }
+ utils::snap_set(image, std::string(""));
+ r = do_export_diff_fd(image, last_snap, nullptr, false, fd, true, 2);
+ if (r < 0) {
+ return r;
+ }
+ pc.update_progress(snaps.size() + 1, snaps.size() + 1);
+ return r;
+}
+
+static int do_export_v1(librbd::Image& image, librbd::image_info_t &info,
+ int fd, uint64_t period, int max_concurrent_ops,
+ utils::ProgressContext &pc)
+{
+ int r = 0;
+ size_t file_size = 0;
+ OrderedThrottle throttle(max_concurrent_ops, false);
+ for (uint64_t offset = 0; offset < info.size; offset += period) {
+ if (throttle.pending_error()) {
+ break;
+ }
+
+ uint64_t length = std::min(period, info.size - offset);
+ C_Export *ctx = new C_Export(throttle, image, file_size + offset, offset,
+ length, fd);
+ ctx->send();
+
+ pc.update_progress(offset, info.size);
+ }
+
+ file_size += info.size;
+ r = throttle.wait_for_ret();
+ if (fd != 1) {
+ if (r >= 0) {
+ r = ftruncate(fd, file_size);
+ if (r < 0)
+ return r;
+
+ uint64_t chkret = lseek64(fd, file_size, SEEK_SET);
+ if (chkret != file_size)
+ r = errno;
+ }
+ }
+ return r;
+}
+
+static int do_export(librbd::Image& image, const char *path, bool no_progress,
+ int export_format)
+{
+ librbd::image_info_t info;
+ int64_t r = image.stat(info, sizeof(info));
+ if (r < 0)
+ return r;
+
+ int fd;
+ int max_concurrent_ops = g_conf().get_val<uint64_t>("rbd_concurrent_management_ops");
+ bool to_stdout = (strcmp(path, "-") == 0);
+ if (to_stdout) {
+ fd = STDOUT_FILENO;
+ } else {
+ fd = open(path, O_WRONLY | O_CREAT | O_EXCL | O_BINARY, 0644);
+ if (fd < 0) {
+ return -errno;
+ }
+#ifdef HAVE_POSIX_FADVISE
+ posix_fadvise(fd, 0, 0, POSIX_FADV_SEQUENTIAL);
+#endif
+ }
+
+ utils::ProgressContext pc("Exporting image", no_progress);
+ uint64_t period = image.get_stripe_count() * (1ull << info.order);
+
+ if (export_format == 1)
+ r = do_export_v1(image, info, fd, period, max_concurrent_ops, pc);
+ else
+ r = do_export_v2(image, info, fd, period, max_concurrent_ops, pc);
+
+ if (r < 0)
+ pc.fail();
+ else
+ pc.finish();
+ if (!to_stdout)
+ close(fd);
+ return r;
+}
+
+void get_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_image_or_snap_spec_options(positional, options,
+ at::ARGUMENT_MODIFIER_SOURCE);
+ at::add_path_options(positional, options,
+ "export file (or '-' for stdout)");
+ at::add_no_progress_option(options);
+ at::add_export_format_option(options);
+}
+
+int execute(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+ std::string snap_name;
+ int r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_SOURCE, &arg_index, &pool_name, &namespace_name,
+ &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_PERMITTED,
+ utils::SPEC_VALIDATION_NONE);
+ if (r < 0) {
+ return r;
+ }
+
+ std::string path;
+ r = utils::get_path(vm, &arg_index, &path);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ librbd::Image image;
+ r = utils::init_and_open_image(pool_name, namespace_name, image_name, "",
+ snap_name, true, &rados, &io_ctx, &image);
+ if (r < 0) {
+ return r;
+ }
+
+ int format = 1;
+ if (vm.count("export-format"))
+ format = vm["export-format"].as<uint64_t>();
+
+ r = do_export(image, path.c_str(), vm[at::NO_PROGRESS].as<bool>(), format);
+ if (r < 0) {
+ std::cerr << "rbd: export error: " << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ return 0;
+}
+
+Shell::Action action(
+ {"export"}, {}, "Export image to file.", "", &get_arguments, &execute);
+
+} // namespace export_full
+} // namespace action
+} // namespace rbd
diff --git a/src/tools/rbd/action/Feature.cc b/src/tools/rbd/action/Feature.cc
new file mode 100644
index 000000000..13a7b6ea7
--- /dev/null
+++ b/src/tools/rbd/action/Feature.cc
@@ -0,0 +1,116 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd/ArgumentTypes.h"
+#include "tools/rbd/Shell.h"
+#include "tools/rbd/Utils.h"
+#include "include/stringify.h"
+#include "common/errno.h"
+#include <iostream>
+#include <map>
+#include <boost/program_options.hpp>
+
+namespace rbd {
+namespace action {
+namespace feature {
+
+namespace at = argument_types;
+namespace po = boost::program_options;
+
+void get_arguments(po::options_description *positional,
+ po::options_description *options, bool enabled) {
+ at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE);
+ positional->add_options()
+ ("features", po::value<at::ImageFeatures>()->multitoken(),
+ ("image features\n" + at::get_short_features_help(false)).c_str());
+ if (enabled) {
+ at::add_create_journal_options(options);
+ }
+}
+
+void get_arguments_disable(po::options_description *positional,
+ po::options_description *options) {
+ get_arguments(positional, options, false);
+}
+
+void get_arguments_enable(po::options_description *positional,
+ po::options_description *options) {
+ get_arguments(positional, options, true);
+}
+
+int execute(const po::variables_map &vm, bool enabled) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+ std::string snap_name;
+ int r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name,
+ &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE,
+ utils::SPEC_VALIDATION_NONE);
+ if (r < 0) {
+ return r;
+ }
+
+ librbd::ImageOptions opts;
+ r = utils::get_journal_options(vm, &opts);
+ if (r < 0) {
+ return r;
+ }
+
+ std::vector<std::string> feature_names;
+ if (vm.count(at::POSITIONAL_ARGUMENTS)) {
+ const std::vector<std::string> &args =
+ vm[at::POSITIONAL_ARGUMENTS].as<std::vector<std::string> >();
+ feature_names.insert(feature_names.end(), args.begin() + arg_index,
+ args.end());
+ }
+
+ if (feature_names.empty()) {
+ std::cerr << "rbd: at least one feature name must be specified"
+ << std::endl;
+ return -EINVAL;
+ }
+
+ boost::any features_any(static_cast<uint64_t>(0));
+ at::ImageFeatures image_features;
+ at::validate(features_any, feature_names, &image_features, 0);
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ librbd::Image image;
+ r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "",
+ false, &rados, &io_ctx, &image);
+ if (r < 0) {
+ return r;
+ }
+
+ r = image.update_features(boost::any_cast<uint64_t>(features_any), enabled);
+ if (r < 0) {
+ std::cerr << "rbd: failed to update image features: " << cpp_strerror(r)
+ << std::endl;
+ return r;
+ }
+ return 0;
+}
+
+int execute_disable(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ return execute(vm, false);
+}
+
+int execute_enable(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ return execute(vm, true);
+}
+
+Shell::Action action_disable(
+ {"feature", "disable"}, {}, "Disable the specified image feature.", "",
+ &get_arguments_disable, &execute_disable);
+Shell::Action action_enable(
+ {"feature", "enable"}, {}, "Enable the specified image feature.", "",
+ &get_arguments_enable, &execute_enable);
+
+} // namespace feature
+} // namespace action
+} // namespace rbd
diff --git a/src/tools/rbd/action/Flatten.cc b/src/tools/rbd/action/Flatten.cc
new file mode 100644
index 000000000..87fadc999
--- /dev/null
+++ b/src/tools/rbd/action/Flatten.cc
@@ -0,0 +1,92 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd/ArgumentTypes.h"
+#include "tools/rbd/Shell.h"
+#include "tools/rbd/Utils.h"
+#include "common/errno.h"
+#include <iostream>
+#include <boost/program_options.hpp>
+
+namespace rbd {
+namespace action {
+namespace flatten {
+
+namespace at = argument_types;
+namespace po = boost::program_options;
+
+static int do_flatten(librbd::Image& image, bool no_progress)
+{
+ utils::ProgressContext pc("Image flatten", no_progress);
+ int r = image.flatten_with_progress(pc);
+ if (r < 0) {
+ pc.fail();
+ return r;
+ }
+ pc.finish();
+ return 0;
+}
+
+void get_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE);
+ at::add_no_progress_option(options);
+ at::add_encryption_options(options);
+}
+
+int execute(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+ std::string snap_name;
+ int r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name,
+ &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE,
+ utils::SPEC_VALIDATION_NONE);
+ if (r < 0) {
+ return r;
+ }
+
+ utils::EncryptionOptions encryption_options;
+ r = utils::get_encryption_options(vm, &encryption_options);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ librbd::Image image;
+ r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "",
+ false, &rados, &io_ctx, &image);
+ if (r < 0) {
+ return r;
+ }
+
+ auto spec_count = encryption_options.specs.size();
+ if (spec_count > 0) {
+ r = image.encryption_load2(&encryption_options.specs[0], spec_count);
+
+ if (r < 0) {
+ std::cerr << "rbd: encryption load failed: " << cpp_strerror(r)
+ << std::endl;
+ return r;
+ }
+ }
+
+ r = do_flatten(image, vm[at::NO_PROGRESS].as<bool>());
+ if (r < 0) {
+ std::cerr << "rbd: flatten error: " << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ return 0;
+}
+
+Shell::Action action(
+ {"flatten"}, {}, "Fill clone with parent data (make it independent).", "",
+ &get_arguments, &execute);
+
+} // namespace flatten
+} // namespace action
+} // namespace rbd
diff --git a/src/tools/rbd/action/Ggate.cc b/src/tools/rbd/action/Ggate.cc
new file mode 100644
index 000000000..11782d70a
--- /dev/null
+++ b/src/tools/rbd/action/Ggate.cc
@@ -0,0 +1,180 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <sys/param.h>
+#include <errno.h>
+#include <unistd.h>
+
+#include "include/stringify.h"
+#include "common/SubProcess.h"
+
+#include "tools/rbd/ArgumentTypes.h"
+#include "tools/rbd/Shell.h"
+#include "tools/rbd/Utils.h"
+
+#include <boost/algorithm/string.hpp>
+#include <boost/algorithm/string/predicate.hpp>
+#include <boost/program_options.hpp>
+
+#include <iostream>
+
+namespace rbd {
+namespace action {
+namespace ggate {
+
+namespace at = argument_types;
+namespace po = boost::program_options;
+
+#if defined(__FreeBSD__)
+static int call_ggate_cmd(const po::variables_map &vm,
+ const std::vector<std::string> &args,
+ const std::vector<std::string> &ceph_global_args) {
+ SubProcess process("rbd-ggate", SubProcess::KEEP, SubProcess::KEEP,
+ SubProcess::KEEP);
+
+ for (auto &arg : ceph_global_args) {
+ process.add_cmd_arg(arg.c_str());
+ }
+
+ for (auto &arg : args) {
+ process.add_cmd_arg(arg.c_str());
+ }
+
+ if (process.spawn()) {
+ std::cerr << "rbd: failed to run rbd-ggate: " << process.err() << std::endl;
+ return -EINVAL;
+ } else if (process.join()) {
+ std::cerr << "rbd: rbd-ggate failed with error: " << process.err()
+ << std::endl;
+ return -EINVAL;
+ }
+
+ return 0;
+}
+#endif
+
+int execute_list(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+#if !defined(__FreeBSD__)
+ std::cerr << "rbd: ggate is only supported on FreeBSD" << std::endl;
+ return -EOPNOTSUPP;
+#else
+ std::vector<std::string> args;
+
+ args.push_back("list");
+
+ if (vm.count("format")) {
+ args.push_back("--format");
+ args.push_back(vm["format"].as<at::Format>().value);
+ }
+ if (vm["pretty-format"].as<bool>()) {
+ args.push_back("--pretty-format");
+ }
+
+ return call_ggate_cmd(vm, args, ceph_global_init_args);
+#endif
+}
+
+int execute_map(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+#if !defined(__FreeBSD__)
+ std::cerr << "rbd: ggate is only supported on FreeBSD" << std::endl;
+ return -EOPNOTSUPP;
+#else
+ std::vector<std::string> args;
+
+ args.push_back("map");
+ std::string img;
+ int r = utils::get_image_or_snap_spec(vm, &img);
+ if (r < 0) {
+ return r;
+ }
+ args.push_back(img);
+
+ if (vm["quiesce"].as<bool>()) {
+ std::cerr << "rbd: warning: quiesce is not supported" << std::endl;
+ }
+
+ if (vm["read-only"].as<bool>()) {
+ args.push_back("--read-only");
+ }
+
+ if (vm["exclusive"].as<bool>()) {
+ args.push_back("--exclusive");
+ }
+
+ if (vm.count("quiesce-hook")) {
+ std::cerr << "rbd: warning: quiesce-hook is not supported" << std::endl;
+ }
+
+ if (vm.count("options")) {
+ utils::append_options_as_args(vm["options"].as<std::vector<std::string>>(),
+ &args);
+ }
+
+ return call_ggate_cmd(vm, args, ceph_global_init_args);
+#endif
+}
+
+int execute_unmap(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+#if !defined(__FreeBSD__)
+ std::cerr << "rbd: ggate is only supported on FreeBSD" << std::endl;
+ return -EOPNOTSUPP;
+#else
+ std::string device_name = utils::get_positional_argument(vm, 0);
+ if (!boost::starts_with(device_name, "/dev/")) {
+ device_name.clear();
+ }
+
+ std::string image_name;
+ if (device_name.empty()) {
+ int r = utils::get_image_or_snap_spec(vm, &image_name);
+ if (r < 0) {
+ return r;
+ }
+ }
+
+ if (device_name.empty() && image_name.empty()) {
+ std::cerr << "rbd: unmap requires either image name or device path"
+ << std::endl;
+ return -EINVAL;
+ }
+
+ std::vector<std::string> args;
+
+ args.push_back("unmap");
+ args.push_back(device_name.empty() ? image_name : device_name);
+
+ if (vm.count("options")) {
+ utils::append_options_as_args(vm["options"].as<std::vector<std::string>>(),
+ &args);
+ }
+
+ return call_ggate_cmd(vm, args, ceph_global_init_args);
+#endif
+}
+
+int execute_attach(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+#if !defined(__FreeBSD__)
+ std::cerr << "rbd: ggate is only supported on FreeBSD" << std::endl;
+#else
+ std::cerr << "rbd: ggate attach command not supported" << std::endl;
+#endif
+ return -EOPNOTSUPP;
+}
+
+int execute_detach(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+#if !defined(__FreeBSD__)
+ std::cerr << "rbd: ggate is only supported on FreeBSD" << std::endl;
+#else
+ std::cerr << "rbd: ggate detach command not supported" << std::endl;
+#endif
+ return -EOPNOTSUPP;
+}
+
+} // namespace ggate
+} // namespace action
+} // namespace rbd
diff --git a/src/tools/rbd/action/Group.cc b/src/tools/rbd/action/Group.cc
new file mode 100644
index 000000000..5c2232a6f
--- /dev/null
+++ b/src/tools/rbd/action/Group.cc
@@ -0,0 +1,912 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <iostream>
+
+#include "tools/rbd/ArgumentTypes.h"
+#include "tools/rbd/Shell.h"
+#include "tools/rbd/Utils.h"
+#include "include/rbd_types.h"
+#include "cls/rbd/cls_rbd_types.h"
+#include "common/errno.h"
+#include "common/Formatter.h"
+#include "common/TextTable.h"
+
+namespace rbd {
+namespace action {
+namespace group {
+
+namespace at = argument_types;
+namespace po = boost::program_options;
+
+static const std::string GROUP_SPEC("group-spec");
+static const std::string GROUP_SNAP_SPEC("group-snap-spec");
+
+static const std::string GROUP_NAME("group");
+static const std::string DEST_GROUP_NAME("dest-group");
+
+static const std::string GROUP_POOL_NAME("group-" + at::POOL_NAME);
+static const std::string IMAGE_POOL_NAME("image-" + at::POOL_NAME);
+
+void add_group_option(po::options_description *opt,
+ at::ArgumentModifier modifier) {
+ std::string name = GROUP_NAME;
+ std::string description = at::get_description_prefix(modifier) + "group name";
+ switch (modifier) {
+ case at::ARGUMENT_MODIFIER_NONE:
+ case at::ARGUMENT_MODIFIER_SOURCE:
+ break;
+ case at::ARGUMENT_MODIFIER_DEST:
+ name = DEST_GROUP_NAME;
+ break;
+ }
+
+ // TODO add validator
+ opt->add_options()
+ (name.c_str(), po::value<std::string>(), description.c_str());
+}
+
+void add_prefixed_pool_option(po::options_description *opt,
+ const std::string &prefix) {
+ std::string name = prefix + "-" + at::POOL_NAME;
+ std::string description = prefix + " pool name";
+
+ opt->add_options()
+ (name.c_str(), po::value<std::string>(), description.c_str());
+}
+
+void add_prefixed_namespace_option(po::options_description *opt,
+ const std::string &prefix) {
+ std::string name = prefix + "-" + at::NAMESPACE_NAME;
+ std::string description = prefix + " namespace name";
+
+ opt->add_options()
+ (name.c_str(), po::value<std::string>(), description.c_str());
+}
+
+void add_group_spec_options(po::options_description *pos,
+ po::options_description *opt,
+ at::ArgumentModifier modifier,
+ bool snap) {
+ at::add_pool_option(opt, modifier);
+ at::add_namespace_option(opt, modifier);
+ add_group_option(opt, modifier);
+ if (!snap) {
+ pos->add_options()
+ ((get_name_prefix(modifier) + GROUP_SPEC).c_str(),
+ (get_description_prefix(modifier) + "group specification\n" +
+ "(example: [<pool-name>/[<namespace>/]]<group-name>)").c_str());
+ } else {
+ add_snap_option(opt, modifier);
+ pos->add_options()
+ ((get_name_prefix(modifier) + GROUP_SNAP_SPEC).c_str(),
+ (get_description_prefix(modifier) + "group specification\n" +
+ "(example: [<pool-name>/[<namespace>/]]<group-name>@<snap-name>)").c_str());
+ }
+}
+
+int execute_create(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+
+ std::string pool_name;
+ std::string namespace_name;
+ std::string group_name;
+
+ int r = utils::get_pool_generic_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, at::POOL_NAME, &pool_name,
+ &namespace_name, GROUP_NAME, "group", &group_name, nullptr, true,
+ utils::SNAPSHOT_PRESENCE_NONE, utils::SPEC_VALIDATION_FULL);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+
+ r = utils::init(pool_name, namespace_name, &rados, &io_ctx);
+ if (r < 0) {
+ return r;
+ }
+ librbd::RBD rbd;
+ r = rbd.group_create(io_ctx, group_name.c_str());
+ if (r < 0) {
+ std::cerr << "rbd: create error: " << cpp_strerror(r) << std::endl;
+ return r;
+ }
+
+ return 0;
+}
+
+int execute_list(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ std::string pool_name;
+ std::string namespace_name;
+ size_t arg_index = 0;
+ int r = utils::get_pool_and_namespace_names(vm, false, &pool_name,
+ &namespace_name, &arg_index);
+ if (r < 0) {
+ return r;
+ }
+
+ at::Format::Formatter formatter;
+ r = utils::get_formatter(vm, &formatter);
+ if (r < 0) {
+ return r;
+ }
+ Formatter *f = formatter.get();
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ r = utils::init(pool_name, namespace_name, &rados, &io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ librbd::RBD rbd;
+ std::vector<std::string> names;
+ r = rbd.group_list(io_ctx, &names);
+ if (r < 0)
+ return r;
+
+ if (f)
+ f->open_array_section("groups");
+ for (auto i : names) {
+ if (f)
+ f->dump_string("name", i);
+ else
+ std::cout << i << std::endl;
+ }
+ if (f) {
+ f->close_section();
+ f->flush(std::cout);
+ }
+
+ return 0;
+}
+
+int execute_remove(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+
+ std::string pool_name;
+ std::string namespace_name;
+ std::string group_name;
+
+ int r = utils::get_pool_generic_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, at::POOL_NAME, &pool_name,
+ &namespace_name, GROUP_NAME, "group", &group_name, nullptr, true,
+ utils::SNAPSHOT_PRESENCE_NONE, utils::SPEC_VALIDATION_FULL);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+
+ r = utils::init(pool_name, namespace_name, &rados, &io_ctx);
+ if (r < 0) {
+ return r;
+ }
+ librbd::RBD rbd;
+
+ r = rbd.group_remove(io_ctx, group_name.c_str());
+ if (r < 0) {
+ std::cerr << "rbd: remove error: " << cpp_strerror(r) << std::endl;
+ return r;
+ }
+
+ return 0;
+}
+
+int execute_rename(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+
+ std::string pool_name;
+ std::string namespace_name;
+ std::string group_name;
+
+ int r = utils::get_pool_generic_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, at::POOL_NAME, &pool_name,
+ &namespace_name, GROUP_NAME, "group", &group_name, nullptr, true,
+ utils::SNAPSHOT_PRESENCE_NONE, utils::SPEC_VALIDATION_FULL);
+ if (r < 0) {
+ return r;
+ }
+
+ std::string dest_pool_name;
+ std::string dest_namespace_name;
+ std::string dest_group_name;
+
+ r = utils::get_pool_generic_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_DEST, &arg_index, at::DEST_POOL_NAME,
+ &dest_pool_name, &dest_namespace_name, DEST_GROUP_NAME, "group",
+ &dest_group_name, nullptr, true, utils::SNAPSHOT_PRESENCE_NONE,
+ utils::SPEC_VALIDATION_FULL);
+ if (r < 0) {
+ return r;
+ }
+
+ if (pool_name != dest_pool_name) {
+ std::cerr << "rbd: group rename across pools not supported" << std::endl
+ << "source pool: " << pool_name << ", dest pool: "
+ << dest_pool_name << std::endl;
+ return -EINVAL;
+ } else if (namespace_name != dest_namespace_name) {
+ std::cerr << "rbd: group rename across namespaces not supported"
+ << std::endl
+ << "source namespace: " << namespace_name << ", dest namespace: "
+ << dest_namespace_name << std::endl;
+ return -EINVAL;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ r = utils::init(pool_name, namespace_name, &rados, &io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ librbd::RBD rbd;
+ r = rbd.group_rename(io_ctx, group_name.c_str(),
+ dest_group_name.c_str());
+
+ if (r < 0) {
+ std::cerr << "rbd: failed to rename group: "
+ << cpp_strerror(r) << std::endl;
+ return r;
+ }
+
+ return 0;
+}
+
+int execute_add(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ // Parse group data.
+ std::string group_pool_name;
+ std::string group_namespace_name;
+ std::string group_name;
+
+ int r = utils::get_pool_generic_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, GROUP_POOL_NAME,
+ &group_pool_name, &group_namespace_name, GROUP_NAME, "group", &group_name,
+ nullptr, true, utils::SNAPSHOT_PRESENCE_NONE, utils::SPEC_VALIDATION_FULL);
+ if (r < 0) {
+ return r;
+ }
+
+ std::string image_pool_name;
+ std::string image_namespace_name;
+ std::string image_name;
+
+ r = utils::get_pool_generic_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, IMAGE_POOL_NAME,
+ &image_pool_name, &image_namespace_name, at::IMAGE_NAME, "image",
+ &image_name, nullptr, true, utils::SNAPSHOT_PRESENCE_NONE,
+ utils::SPEC_VALIDATION_FULL);
+ if (r < 0) {
+ return r;
+ }
+
+ if (group_namespace_name != image_namespace_name) {
+ std::cerr << "rbd: group and image namespace must match." << std::endl;
+ return -EINVAL;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx cg_io_ctx;
+ r = utils::init(group_pool_name, group_namespace_name, &rados, &cg_io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::IoCtx image_io_ctx;
+ r = utils::init(image_pool_name, group_namespace_name, &rados, &image_io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ librbd::RBD rbd;
+ r = rbd.group_image_add(cg_io_ctx, group_name.c_str(),
+ image_io_ctx, image_name.c_str());
+ if (r < 0) {
+ std::cerr << "rbd: add image error: " << cpp_strerror(r) << std::endl;
+ return r;
+ }
+
+ return 0;
+}
+
+int execute_remove_image(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+
+ std::string group_pool_name;
+ std::string group_namespace_name;
+ std::string group_name;
+
+ int r = utils::get_pool_generic_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, GROUP_POOL_NAME,
+ &group_pool_name, &group_namespace_name, GROUP_NAME, "group", &group_name,
+ nullptr, true, utils::SNAPSHOT_PRESENCE_NONE, utils::SPEC_VALIDATION_FULL);
+ if (r < 0) {
+ return r;
+ }
+
+ std::string image_pool_name;
+ std::string image_namespace_name;
+ std::string image_name;
+ std::string image_id;
+
+ if (vm.count(at::IMAGE_ID)) {
+ image_id = vm[at::IMAGE_ID].as<std::string>();
+ }
+
+ r = utils::get_pool_generic_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, IMAGE_POOL_NAME,
+ &image_pool_name, &image_namespace_name, at::IMAGE_NAME, "image",
+ &image_name, nullptr, image_id.empty(), utils::SNAPSHOT_PRESENCE_NONE,
+ utils::SPEC_VALIDATION_FULL);
+ if (r < 0) {
+ return r;
+ }
+
+ if (group_namespace_name != image_namespace_name) {
+ std::cerr << "rbd: group and image namespace must match." << std::endl;
+ return -EINVAL;
+ } else if (!image_id.empty() && !image_name.empty()) {
+ std::cerr << "rbd: trying to access image using both name and id. "
+ << std::endl;
+ return -EINVAL;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx cg_io_ctx;
+ r = utils::init(group_pool_name, group_namespace_name, &rados, &cg_io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::IoCtx image_io_ctx;
+ r = utils::init(image_pool_name, group_namespace_name, &rados, &image_io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ librbd::RBD rbd;
+ if (image_id.empty()) {
+ r = rbd.group_image_remove(cg_io_ctx, group_name.c_str(),
+ image_io_ctx, image_name.c_str());
+ } else {
+ r = rbd.group_image_remove_by_id(cg_io_ctx, group_name.c_str(),
+ image_io_ctx, image_id.c_str());
+ }
+ if (r < 0) {
+ std::cerr << "rbd: remove image error: " << cpp_strerror(r) << std::endl;
+ return r;
+ }
+
+ return 0;
+}
+
+int execute_list_images(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string group_name;
+
+ int r = utils::get_pool_generic_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, at::POOL_NAME, &pool_name,
+ &namespace_name, GROUP_NAME, "group", &group_name, nullptr, true,
+ utils::SNAPSHOT_PRESENCE_NONE, utils::SPEC_VALIDATION_FULL);
+ if (r < 0) {
+ return r;
+ }
+
+ at::Format::Formatter formatter;
+ r = utils::get_formatter(vm, &formatter);
+ if (r < 0) {
+ return r;
+ }
+ Formatter *f = formatter.get();
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ r = utils::init(pool_name, namespace_name, &rados, &io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ librbd::RBD rbd;
+ std::vector<librbd::group_image_info_t> images;
+
+ r = rbd.group_image_list(io_ctx, group_name.c_str(), &images,
+ sizeof(librbd::group_image_info_t));
+
+ if (r == -ENOENT)
+ r = 0;
+
+ if (r < 0)
+ return r;
+
+ std::sort(images.begin(), images.end(),
+ [](const librbd::group_image_info_t &lhs,
+ const librbd::group_image_info_t &rhs) {
+ if (lhs.pool != rhs.pool) {
+ return lhs.pool < rhs.pool;
+ }
+ return lhs.name < rhs.name;
+ }
+ );
+
+ if (f)
+ f->open_array_section("images");
+
+ for (auto image : images) {
+ std::string image_name = image.name;
+ int state = image.state;
+ std::string state_string;
+ if (RBD_GROUP_IMAGE_STATE_INCOMPLETE == state) {
+ state_string = "incomplete";
+ }
+
+ std::string pool_name = "";
+
+ librados::Rados rados(io_ctx);
+ librados::IoCtx pool_io_ctx;
+ r = rados.ioctx_create2(image.pool, pool_io_ctx);
+ if (r < 0) {
+ pool_name = "<missing image pool " + stringify(image.pool) + ">";
+ } else {
+ pool_name = pool_io_ctx.get_pool_name();
+ }
+
+ if (f) {
+ f->open_object_section("image");
+ f->dump_string("image", image_name);
+ f->dump_string("pool", pool_name);
+ f->dump_string("namespace", io_ctx.get_namespace());
+ f->dump_int("state", state);
+ f->close_section();
+ } else {
+ std::cout << pool_name << "/";
+ if (!io_ctx.get_namespace().empty()) {
+ std::cout << io_ctx.get_namespace() << "/";
+ }
+ std::cout << image_name << " " << state_string << std::endl;
+ }
+ }
+
+ if (f) {
+ f->close_section();
+ f->flush(std::cout);
+ }
+
+ return 0;
+}
+
+int execute_group_snap_create(const po::variables_map &vm,
+ const std::vector<std::string> &global_args) {
+ size_t arg_index = 0;
+
+ std::string pool_name;
+ std::string namespace_name;
+ std::string group_name;
+ std::string snap_name;
+
+ int r = utils::get_pool_generic_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, at::POOL_NAME, &pool_name,
+ &namespace_name, GROUP_NAME, "group", &group_name, &snap_name, true,
+ utils::SNAPSHOT_PRESENCE_REQUIRED, utils::SPEC_VALIDATION_FULL);
+ if (r < 0) {
+ return r;
+ }
+
+ uint32_t flags;
+ r = utils::get_snap_create_flags(vm, &flags);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::IoCtx io_ctx;
+ librados::Rados rados;
+
+ r = utils::init(pool_name, namespace_name, &rados, &io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ librbd::RBD rbd;
+ r = rbd.group_snap_create2(io_ctx, group_name.c_str(), snap_name.c_str(),
+ flags);
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+int execute_group_snap_remove(const po::variables_map &vm,
+ const std::vector<std::string> &global_args) {
+ size_t arg_index = 0;
+
+ std::string pool_name;
+ std::string namespace_name;
+ std::string group_name;
+ std::string snap_name;
+
+ int r = utils::get_pool_generic_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, at::POOL_NAME, &pool_name,
+ &namespace_name, GROUP_NAME, "group", &group_name, &snap_name, true,
+ utils::SNAPSHOT_PRESENCE_REQUIRED, utils::SPEC_VALIDATION_FULL);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::IoCtx io_ctx;
+ librados::Rados rados;
+
+ r = utils::init(pool_name, namespace_name, &rados, &io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ librbd::RBD rbd;
+ r = rbd.group_snap_remove(io_ctx, group_name.c_str(), snap_name.c_str());
+ if (r < 0) {
+ std::cerr << "rbd: failed to remove group snapshot: "
+ << cpp_strerror(r) << std::endl;
+ return r;
+ }
+
+ return 0;
+}
+
+int execute_group_snap_rename(const po::variables_map &vm,
+ const std::vector<std::string> &global_args) {
+ size_t arg_index = 0;
+
+ std::string pool_name;
+ std::string namespace_name;
+ std::string group_name;
+ std::string source_snap_name;
+
+ int r = utils::get_pool_generic_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, at::POOL_NAME, &pool_name,
+ &namespace_name, GROUP_NAME, "group", &group_name, &source_snap_name, true,
+ utils::SNAPSHOT_PRESENCE_REQUIRED, utils::SPEC_VALIDATION_FULL);
+ if (r < 0) {
+ return r;
+ }
+
+ std::string dest_snap_name;
+ if (vm.count(at::DEST_SNAPSHOT_NAME)) {
+ dest_snap_name = vm[at::DEST_SNAPSHOT_NAME].as<std::string>();
+ }
+
+ if (dest_snap_name.empty()) {
+ dest_snap_name = utils::get_positional_argument(vm, arg_index++);
+ }
+
+ if (dest_snap_name.empty()) {
+ std::cerr << "rbd: destination snapshot name was not specified"
+ << std::endl;
+ return -EINVAL;
+ }
+
+ r = utils::validate_snapshot_name(at::ARGUMENT_MODIFIER_DEST, dest_snap_name,
+ utils::SNAPSHOT_PRESENCE_REQUIRED,
+ utils::SPEC_VALIDATION_SNAP);
+ if (r < 0) {
+ return r;
+ }
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ r = utils::init(pool_name, namespace_name, &rados, &io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ librbd::RBD rbd;
+ r = rbd.group_snap_rename(io_ctx, group_name.c_str(),
+ source_snap_name.c_str(), dest_snap_name.c_str());
+
+ if (r < 0) {
+ std::cerr << "rbd: failed to rename group snapshot: "
+ << cpp_strerror(r) << std::endl;
+ return r;
+ }
+
+ return 0;
+}
+
+int execute_group_snap_list(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string group_name;
+
+ int r = utils::get_pool_generic_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, at::POOL_NAME, &pool_name,
+ &namespace_name, GROUP_NAME, "group", &group_name, nullptr, true,
+ utils::SNAPSHOT_PRESENCE_NONE, utils::SPEC_VALIDATION_FULL);
+ if (r < 0) {
+ return r;
+ }
+
+ at::Format::Formatter formatter;
+ r = utils::get_formatter(vm, &formatter);
+ if (r < 0) {
+ return r;
+ }
+ Formatter *f = formatter.get();
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ r = utils::init(pool_name, namespace_name, &rados, &io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ librbd::RBD rbd;
+ std::vector<librbd::group_snap_info_t> snaps;
+
+ r = rbd.group_snap_list(io_ctx, group_name.c_str(), &snaps,
+ sizeof(librbd::group_snap_info_t));
+
+ if (r == -ENOENT) {
+ r = 0;
+ }
+ if (r < 0) {
+ return r;
+ }
+
+ TextTable t;
+ if (f) {
+ f->open_array_section("group_snaps");
+ } else {
+ t.define_column("NAME", TextTable::LEFT, TextTable::LEFT);
+ t.define_column("STATUS", TextTable::LEFT, TextTable::RIGHT);
+ }
+
+ for (auto i : snaps) {
+ std::string snap_name = i.name;
+ int state = i.state;
+ std::string state_string;
+ if (RBD_GROUP_SNAP_STATE_INCOMPLETE == state) {
+ state_string = "incomplete";
+ } else {
+ state_string = "ok";
+ }
+ if (r < 0) {
+ return r;
+ }
+ if (f) {
+ f->open_object_section("group_snap");
+ f->dump_string("snapshot", snap_name);
+ f->dump_string("state", state_string);
+ f->close_section();
+ } else {
+ t << snap_name << state_string << TextTable::endrow;
+ }
+ }
+
+ if (f) {
+ f->close_section();
+ f->flush(std::cout);
+ } else if (snaps.size()) {
+ std::cout << t;
+ }
+ return 0;
+}
+
+int execute_group_snap_rollback(const po::variables_map &vm,
+ const std::vector<std::string> &global_args) {
+ size_t arg_index = 0;
+
+ std::string group_name;
+ std::string namespace_name;
+ std::string pool_name;
+ std::string snap_name;
+
+ int r = utils::get_pool_generic_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, at::POOL_NAME, &pool_name,
+ &namespace_name, GROUP_NAME, "group", &group_name, &snap_name, true,
+ utils::SNAPSHOT_PRESENCE_REQUIRED, utils::SPEC_VALIDATION_FULL);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::IoCtx io_ctx;
+ librados::Rados rados;
+
+ r = utils::init(pool_name, namespace_name, &rados, &io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ librbd::RBD rbd;
+ utils::ProgressContext pc("Rolling back to group snapshot",
+ vm[at::NO_PROGRESS].as<bool>());
+ r = rbd.group_snap_rollback_with_progress(io_ctx, group_name.c_str(),
+ snap_name.c_str(), pc);
+ if (r < 0) {
+ pc.fail();
+ std::cerr << "rbd: rollback group to snapshot failed: "
+ << cpp_strerror(r) << std::endl;
+ return r;
+ }
+
+ pc.finish();
+ return 0;
+}
+
+void get_create_arguments(po::options_description *positional,
+ po::options_description *options) {
+ add_group_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE,
+ false);
+}
+
+void get_remove_arguments(po::options_description *positional,
+ po::options_description *options) {
+ add_group_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE,
+ false);
+}
+
+void get_list_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_pool_options(positional, options, true);
+ at::add_format_options(options);
+}
+
+void get_rename_arguments(po::options_description *positional,
+ po::options_description *options) {
+ add_group_spec_options(positional, options, at::ARGUMENT_MODIFIER_SOURCE,
+ false);
+ add_group_spec_options(positional, options, at::ARGUMENT_MODIFIER_DEST,
+ false);
+}
+
+void get_add_arguments(po::options_description *positional,
+ po::options_description *options) {
+ positional->add_options()
+ (GROUP_SPEC.c_str(),
+ "group specification\n"
+ "(example: [<pool-name>/[<namespace>/]]<group-name>)");
+
+ add_prefixed_pool_option(options, "group");
+ add_prefixed_namespace_option(options, "group");
+ add_group_option(options, at::ARGUMENT_MODIFIER_NONE);
+
+ positional->add_options()
+ (at::IMAGE_SPEC.c_str(),
+ "image specification\n"
+ "(example: [<pool-name>/[<namespace>/]]<image-name>)");
+
+ add_prefixed_pool_option(options, "image");
+ add_prefixed_namespace_option(options, "image");
+ at::add_image_option(options, at::ARGUMENT_MODIFIER_NONE);
+
+ at::add_pool_option(options, at::ARGUMENT_MODIFIER_NONE,
+ " unless overridden");
+}
+
+void get_remove_image_arguments(po::options_description *positional,
+ po::options_description *options) {
+ positional->add_options()
+ (GROUP_SPEC.c_str(),
+ "group specification\n"
+ "(example: [<pool-name>/[<namespace>/]]<group-name>)");
+
+ add_prefixed_pool_option(options, "group");
+ add_prefixed_namespace_option(options, "group");
+ add_group_option(options, at::ARGUMENT_MODIFIER_NONE);
+
+ positional->add_options()
+ (at::IMAGE_SPEC.c_str(),
+ "image specification\n"
+ "(example: [<pool-name>/[<namespace>/]]<image-name>)");
+
+ add_prefixed_pool_option(options, "image");
+ add_prefixed_namespace_option(options, "image");
+ at::add_image_option(options, at::ARGUMENT_MODIFIER_NONE);
+
+ at::add_pool_option(options, at::ARGUMENT_MODIFIER_NONE,
+ " unless overridden");
+ at::add_image_id_option(options);
+}
+
+void get_list_images_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_format_options(options);
+ add_group_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE,
+ false);
+}
+
+void get_group_snap_create_arguments(po::options_description *positional,
+ po::options_description *options) {
+ add_group_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE,
+ true);
+ at::add_snap_create_options(options);
+}
+
+void get_group_snap_remove_arguments(po::options_description *positional,
+ po::options_description *options) {
+ add_group_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE,
+ true);
+}
+
+void get_group_snap_rename_arguments(po::options_description *positional,
+ po::options_description *options) {
+ add_group_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE,
+ true);
+
+ positional->add_options()
+ (at::DEST_SNAPSHOT_NAME.c_str(),
+ "destination snapshot name\n(example: <snap-name>)");
+ at::add_snap_option(options, at::ARGUMENT_MODIFIER_DEST);
+}
+
+void get_group_snap_list_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_format_options(options);
+ add_group_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE,
+ false);
+}
+
+void get_group_snap_rollback_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_no_progress_option(options);
+ add_group_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE,
+ true);
+}
+
+Shell::Action action_create(
+ {"group", "create"}, {}, "Create a group.",
+ "", &get_create_arguments, &execute_create);
+Shell::Action action_remove(
+ {"group", "remove"}, {"group", "rm"}, "Delete a group.",
+ "", &get_remove_arguments, &execute_remove);
+Shell::Action action_list(
+ {"group", "list"}, {"group", "ls"}, "List rbd groups.",
+ "", &get_list_arguments, &execute_list);
+Shell::Action action_rename(
+ {"group", "rename"}, {}, "Rename a group within pool.",
+ "", &get_rename_arguments, &execute_rename);
+Shell::Action action_add(
+ {"group", "image", "add"}, {}, "Add an image to a group.",
+ "", &get_add_arguments, &execute_add);
+Shell::Action action_remove_image(
+ {"group", "image", "remove"}, {"group", "image", "rm"},
+ "Remove an image from a group.", "",
+ &get_remove_image_arguments, &execute_remove_image);
+Shell::Action action_list_images(
+ {"group", "image", "list"}, {"group", "image", "ls"},
+ "List images in a group.", "",
+ &get_list_images_arguments, &execute_list_images);
+Shell::Action action_group_snap_create(
+ {"group", "snap", "create"}, {}, "Make a snapshot of a group.",
+ "", &get_group_snap_create_arguments, &execute_group_snap_create);
+Shell::Action action_group_snap_remove(
+ {"group", "snap", "remove"}, {"group", "snap", "rm"},
+ "Remove a snapshot from a group.",
+ "", &get_group_snap_remove_arguments, &execute_group_snap_remove);
+Shell::Action action_group_snap_rename(
+ {"group", "snap", "rename"}, {}, "Rename group's snapshot.",
+ "", &get_group_snap_rename_arguments, &execute_group_snap_rename);
+Shell::Action action_group_snap_list(
+ {"group", "snap", "list"}, {"group", "snap", "ls"},
+ "List snapshots of a group.",
+ "", &get_group_snap_list_arguments, &execute_group_snap_list);
+Shell::Action action_group_snap_rollback(
+ {"group", "snap", "rollback"}, {},
+ "Rollback group to snapshot.",
+ "", &get_group_snap_rollback_arguments, &execute_group_snap_rollback);
+
+} // namespace group
+} // namespace action
+} // namespace rbd
diff --git a/src/tools/rbd/action/ImageMeta.cc b/src/tools/rbd/action/ImageMeta.cc
new file mode 100644
index 000000000..20c4555da
--- /dev/null
+++ b/src/tools/rbd/action/ImageMeta.cc
@@ -0,0 +1,345 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd/ArgumentTypes.h"
+#include "tools/rbd/Shell.h"
+#include "tools/rbd/Utils.h"
+#include "common/errno.h"
+#include "common/Formatter.h"
+#include "common/TextTable.h"
+#include <iostream>
+#include <boost/program_options.hpp>
+
+namespace rbd {
+namespace action {
+namespace image_meta {
+
+namespace at = argument_types;
+namespace po = boost::program_options;
+
+namespace {
+
+void add_key_option(po::options_description *positional) {
+ positional->add_options()
+ ("key", "image meta key");
+}
+
+int get_key(const po::variables_map &vm, size_t *arg_index,
+ std::string *key) {
+ *key = utils::get_positional_argument(vm, *arg_index);
+ if (key->empty()) {
+ std::cerr << "rbd: metadata key was not specified" << std::endl;
+ return -EINVAL;
+ } else {
+ ++(*arg_index);
+ }
+ return 0;
+}
+
+const uint32_t MAX_KEYS = 64;
+
+} // anonymous namespace
+
+static int do_metadata_list(librbd::Image& image, Formatter *f)
+{
+ int r;
+ TextTable tbl;
+
+ size_t count = 0;
+ std::string last_key;
+ bool more_results = true;
+ while (more_results) {
+ std::map<std::string, bufferlist> pairs;
+ r = image.metadata_list(last_key, MAX_KEYS, &pairs);
+ if (r < 0) {
+ std::cerr << "failed to list metadata of image : " << cpp_strerror(r)
+ << std::endl;
+ return r;
+ }
+
+ more_results = (pairs.size() == MAX_KEYS);
+ if (!pairs.empty()) {
+ if (count == 0) {
+ if (f) {
+ f->open_object_section("metadatas");
+ } else {
+ tbl.define_column("Key", TextTable::LEFT, TextTable::LEFT);
+ tbl.define_column("Value", TextTable::LEFT, TextTable::LEFT);
+ }
+ }
+
+ last_key = pairs.rbegin()->first;
+ count += pairs.size();
+
+ for (auto kv : pairs) {
+ std::string val(kv.second.c_str(), kv.second.length());
+ if (f) {
+ f->dump_string(kv.first.c_str(), val.c_str());
+ } else {
+ tbl << kv.first << val << TextTable::endrow;
+ }
+ }
+ }
+ }
+
+ if (f == nullptr) {
+ bool single = (count == 1);
+ std::cout << "There " << (single ? "is" : "are") << " " << count << " "
+ << (single ? "metadatum" : "metadata") << " on this image"
+ << (count == 0 ? "." : ":") << std::endl;
+ }
+
+ if (count > 0) {
+ if (f) {
+ f->close_section();
+ f->flush(std::cout);
+ } else {
+ std::cout << std::endl << tbl;
+ }
+ }
+ return 0;
+}
+
+static int do_metadata_set(librbd::Image& image, std::string &key,
+ std::string &value)
+{
+ int r = image.metadata_set(key, value);
+ if (r < 0) {
+ std::cerr << "failed to set metadata " << key << " of image : "
+ << cpp_strerror(r) << std::endl;
+ }
+ return r;
+}
+
+static int do_metadata_remove(librbd::Image& image, std::string &key)
+{
+ int r = image.metadata_remove(key);
+ if (r == -ENOENT) {
+ std::cerr << "rbd: no existing metadata key " << key << " of image : "
+ << cpp_strerror(r) << std::endl;
+ } else if(r < 0) {
+ std::cerr << "failed to remove metadata " << key << " of image : "
+ << cpp_strerror(r) << std::endl;
+ }
+ return r;
+}
+
+static int do_metadata_get(librbd::Image& image, std::string &key)
+{
+ std::string s;
+ int r = image.metadata_get(key, &s);
+ if (r < 0) {
+ std::cerr << "failed to get metadata " << key << " of image : "
+ << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ std::cout << s << std::endl;
+ return r;
+}
+
+void get_list_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE);
+ at::add_format_options(options);
+}
+
+int execute_list(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+ std::string snap_name;
+ int r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name,
+ &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE,
+ utils::SPEC_VALIDATION_NONE);
+ if (r < 0) {
+ return r;
+ }
+
+ at::Format::Formatter formatter;
+ r = utils::get_formatter(vm, &formatter);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ librbd::Image image;
+ r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "",
+ false, &rados, &io_ctx, &image);
+ if (r < 0) {
+ return r;
+ }
+
+ r = do_metadata_list(image, formatter.get());
+ if (r < 0) {
+ std::cerr << "rbd: listing metadata failed: " << cpp_strerror(r)
+ << std::endl;
+ return r;
+ }
+ return 0;
+}
+
+void get_get_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE);
+ add_key_option(positional);
+}
+
+int execute_get(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+ std::string snap_name;
+ int r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name,
+ &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE,
+ utils::SPEC_VALIDATION_NONE);
+ if (r < 0) {
+ return r;
+ }
+
+ std::string key;
+ r = get_key(vm, &arg_index, &key);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ librbd::Image image;
+ r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "",
+ false, &rados, &io_ctx, &image);
+ if (r < 0) {
+ return r;
+ }
+
+ r = do_metadata_get(image, key);
+ if (r < 0) {
+ std::cerr << "rbd: getting metadata failed: " << cpp_strerror(r)
+ << std::endl;
+ return r;
+ }
+ return 0;
+}
+
+void get_set_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE);
+ add_key_option(positional);
+ positional->add_options()
+ ("value", "image meta value");
+}
+
+int execute_set(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+ std::string snap_name;
+ int r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name,
+ &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE,
+ utils::SPEC_VALIDATION_NONE);
+ if (r < 0) {
+ return r;
+ }
+
+ std::string key;
+ r = get_key(vm, &arg_index, &key);
+ if (r < 0) {
+ return r;
+ }
+
+ std::string value = utils::get_positional_argument(vm, arg_index);
+ if (value.empty()) {
+ std::cerr << "rbd: metadata value was not specified" << std::endl;
+ return -EINVAL;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ librbd::Image image;
+ r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "",
+ false, &rados, &io_ctx, &image);
+ if (r < 0) {
+ return r;
+ }
+
+ r = do_metadata_set(image, key, value);
+ if (r < 0) {
+ std::cerr << "rbd: setting metadata failed: " << cpp_strerror(r)
+ << std::endl;
+ return r;
+ }
+ return 0;
+}
+
+void get_remove_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE);
+ add_key_option(positional);
+}
+
+int execute_remove(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+ std::string snap_name;
+ int r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name,
+ &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE,
+ utils::SPEC_VALIDATION_NONE);
+ if (r < 0) {
+ return r;
+ }
+
+ std::string key;
+ r = get_key(vm, &arg_index, &key);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ librbd::Image image;
+ r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "",
+ false, &rados, &io_ctx, &image);
+ if (r < 0) {
+ return r;
+ }
+
+ r = do_metadata_remove(image, key);
+ if (r < 0) {
+ std::cerr << "rbd: removing metadata failed: " << cpp_strerror(r)
+ << std::endl;
+ return r;
+ }
+ return 0;
+}
+
+Shell::Action action_list(
+ {"image-meta", "list"}, {"image-meta", "ls"}, "Image metadata list keys with values.", "",
+ &get_list_arguments, &execute_list);
+Shell::Action action_get(
+ {"image-meta", "get"}, {},
+ "Image metadata get the value associated with the key.", "",
+ &get_get_arguments, &execute_get);
+Shell::Action action_set(
+ {"image-meta", "set"}, {}, "Image metadata set key with value.", "",
+ &get_set_arguments, &execute_set);
+Shell::Action action_remove(
+ {"image-meta", "remove"}, {"image-meta", "rm"},
+ "Image metadata remove the key and value associated.", "",
+ &get_remove_arguments, &execute_remove);
+
+} // namespace image_meta
+} // namespace action
+} // namespace rbd
diff --git a/src/tools/rbd/action/Import.cc b/src/tools/rbd/action/Import.cc
new file mode 100644
index 000000000..3358c5bc6
--- /dev/null
+++ b/src/tools/rbd/action/Import.cc
@@ -0,0 +1,1036 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd/ArgumentTypes.h"
+#include "tools/rbd/Shell.h"
+#include "tools/rbd/Utils.h"
+#include "include/Context.h"
+#include "common/blkdev.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "common/Throttle.h"
+#include "include/compat.h"
+#include "include/encoding.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "common/safe_io.h"
+#include <iostream>
+#include <boost/program_options.hpp>
+#include <boost/scoped_ptr.hpp>
+#include "include/ceph_assert.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd
+
+using std::cerr;
+using std::string;
+
+namespace rbd {
+namespace action {
+namespace import {
+
+struct ImportDiffContext {
+ librbd::Image *image;
+ int fd;
+ size_t size;
+ utils::ProgressContext pc;
+ OrderedThrottle throttle;
+ uint64_t last_offset;
+
+ ImportDiffContext(librbd::Image *image, int fd, size_t size, bool no_progress)
+ : image(image), fd(fd), size(size), pc("Importing image diff", no_progress),
+ throttle((fd == STDIN_FILENO) ? 1 :
+ g_conf().get_val<uint64_t>("rbd_concurrent_management_ops"),
+ false),
+ last_offset(0) {
+ }
+
+ void update_size(size_t new_size)
+ {
+ if (fd == STDIN_FILENO) {
+ size = new_size;
+ }
+ }
+
+ void update_progress(uint64_t off)
+ {
+ if (size) {
+ pc.update_progress(off, size);
+ last_offset = off;
+ }
+ }
+
+ void update_progress()
+ {
+ uint64_t off = last_offset;
+ if (fd != STDIN_FILENO) {
+ off = lseek(fd, 0, SEEK_CUR);
+ }
+
+ update_progress(off);
+ }
+
+ void finish(int r)
+ {
+ if (r < 0) {
+ pc.fail();
+ } else {
+ pc.finish();
+ }
+ }
+};
+
+class C_ImportDiff : public Context {
+public:
+ C_ImportDiff(ImportDiffContext *idiffctx, bufferlist data, uint64_t offset,
+ uint64_t length, bool write_zeroes)
+ : m_idiffctx(idiffctx), m_data(data), m_offset(offset), m_length(length),
+ m_write_zeroes(write_zeroes) {
+ // use block offset (stdin) or import file position to report
+ // progress.
+ if (m_idiffctx->fd == STDIN_FILENO) {
+ m_prog_offset = offset;
+ } else {
+ m_prog_offset = lseek(m_idiffctx->fd, 0, SEEK_CUR);
+ }
+ }
+
+ int send()
+ {
+ if (m_idiffctx->throttle.pending_error()) {
+ return m_idiffctx->throttle.wait_for_ret();
+ }
+
+ C_OrderedThrottle *ctx = m_idiffctx->throttle.start_op(this);
+ librbd::RBD::AioCompletion *aio_completion =
+ new librbd::RBD::AioCompletion(ctx, &utils::aio_context_callback);
+
+ int r;
+ if (m_write_zeroes) {
+ r = m_idiffctx->image->aio_write_zeroes(m_offset, m_length,
+ aio_completion, 0U,
+ LIBRADOS_OP_FLAG_FADVISE_NOCACHE);
+ } else {
+ r = m_idiffctx->image->aio_write2(m_offset, m_length, m_data,
+ aio_completion,
+ LIBRADOS_OP_FLAG_FADVISE_NOCACHE);
+ }
+
+ if (r < 0) {
+ aio_completion->release();
+ ctx->complete(r);
+ }
+
+ return r;
+ }
+
+ void finish(int r) override
+ {
+ m_idiffctx->update_progress(m_prog_offset);
+ m_idiffctx->throttle.end_op(r);
+ }
+
+private:
+ ImportDiffContext *m_idiffctx;
+ bufferlist m_data;
+ uint64_t m_offset;
+ uint64_t m_length;
+ bool m_write_zeroes;
+ uint64_t m_prog_offset;
+};
+
+static int do_image_snap_from(ImportDiffContext *idiffctx)
+{
+ int r;
+ string from;
+ r = utils::read_string(idiffctx->fd, 4096, &from); // 4k limit to make sure we don't get a garbage string
+ if (r < 0) {
+ std::cerr << "rbd: failed to decode start snap name" << std::endl;
+ return r;
+ }
+
+ bool exists;
+ r = idiffctx->image->snap_exists2(from.c_str(), &exists);
+ if (r < 0) {
+ std::cerr << "rbd: failed to query start snap state" << std::endl;
+ return r;
+ }
+
+ if (!exists) {
+ std::cerr << "start snapshot '" << from
+ << "' does not exist in the image, aborting" << std::endl;
+ return -EINVAL;
+ }
+
+ idiffctx->update_progress();
+ return 0;
+}
+
+static int do_image_snap_to(ImportDiffContext *idiffctx, std::string *tosnap)
+{
+ int r;
+ string to;
+ r = utils::read_string(idiffctx->fd, 4096, &to); // 4k limit to make sure we don't get a garbage string
+ if (r < 0) {
+ std::cerr << "rbd: failed to decode end snap name" << std::endl;
+ return r;
+ }
+
+ bool exists;
+ r = idiffctx->image->snap_exists2(to.c_str(), &exists);
+ if (r < 0) {
+ std::cerr << "rbd: failed to query end snap state" << std::endl;
+ return r;
+ }
+
+ if (exists) {
+ std::cerr << "end snapshot '" << to << "' already exists, aborting"
+ << std::endl;
+ return -EEXIST;
+ }
+
+ *tosnap = to;
+ idiffctx->update_progress();
+
+ return 0;
+}
+
+static int get_snap_protection_status(ImportDiffContext *idiffctx,
+ bool *is_protected)
+{
+ int r;
+ char buf[sizeof(__u8)];
+ r = safe_read_exact(idiffctx->fd, buf, sizeof(buf));
+ if (r < 0) {
+ std::cerr << "rbd: failed to decode snap protection status" << std::endl;
+ return r;
+ }
+
+ *is_protected = (buf[0] != 0);
+ idiffctx->update_progress();
+
+ return 0;
+}
+
+static int do_image_resize(ImportDiffContext *idiffctx)
+{
+ int r;
+ char buf[sizeof(uint64_t)];
+ uint64_t end_size;
+ r = safe_read_exact(idiffctx->fd, buf, sizeof(buf));
+ if (r < 0) {
+ std::cerr << "rbd: failed to decode image size" << std::endl;
+ return r;
+ }
+
+ bufferlist bl;
+ bl.append(buf, sizeof(buf));
+ auto p = bl.cbegin();
+ decode(end_size, p);
+
+ uint64_t cur_size;
+ idiffctx->image->size(&cur_size);
+ if (cur_size != end_size) {
+ idiffctx->image->resize(end_size);
+ }
+
+ idiffctx->update_size(end_size);
+ idiffctx->update_progress();
+ return 0;
+}
+
+static int do_image_io(ImportDiffContext *idiffctx, bool write_zeroes,
+ size_t sparse_size)
+{
+ int r;
+ char buf[16];
+ r = safe_read_exact(idiffctx->fd, buf, sizeof(buf));
+ if (r < 0) {
+ std::cerr << "rbd: failed to decode IO length" << std::endl;
+ return r;
+ }
+
+ bufferlist bl;
+ bl.append(buf, sizeof(buf));
+ auto p = bl.cbegin();
+
+ uint64_t image_offset, buffer_length;
+ decode(image_offset, p);
+ decode(buffer_length, p);
+
+ if (!write_zeroes) {
+ bufferptr bp = buffer::create(buffer_length);
+ r = safe_read_exact(idiffctx->fd, bp.c_str(), buffer_length);
+ if (r < 0) {
+ std::cerr << "rbd: failed to decode write data" << std::endl;
+ return r;
+ }
+
+ size_t buffer_offset = 0;
+ while (buffer_offset < buffer_length) {
+ size_t write_length = 0;
+ bool zeroed = false;
+ utils::calc_sparse_extent(bp, sparse_size, buffer_offset, buffer_length,
+ &write_length, &zeroed);
+ ceph_assert(write_length > 0);
+
+ bufferlist write_bl;
+ if (!zeroed) {
+ bufferptr write_ptr(bp, buffer_offset, write_length);
+ write_bl.push_back(write_ptr);
+ ceph_assert(write_bl.length() == write_length);
+ }
+
+ C_ImportDiff *ctx = new C_ImportDiff(idiffctx, write_bl,
+ image_offset + buffer_offset,
+ write_length, zeroed);
+ r = ctx->send();
+ if (r < 0) {
+ return r;
+ }
+
+ buffer_offset += write_length;
+ }
+ } else {
+ bufferlist data;
+ C_ImportDiff *ctx = new C_ImportDiff(idiffctx, data, image_offset,
+ buffer_length, true);
+ return ctx->send();
+ }
+ return r;
+}
+
+static int validate_banner(int fd, std::string banner)
+{
+ int r;
+ char buf[banner.size() + 1];
+ memset(buf, 0, sizeof(buf));
+ r = safe_read_exact(fd, buf, banner.size());
+ if (r < 0) {
+ std::cerr << "rbd: failed to decode diff banner" << std::endl;
+ return r;
+ }
+
+ buf[banner.size()] = '\0';
+ if (strcmp(buf, banner.c_str())) {
+ std::cerr << "rbd: invalid or unexpected diff banner" << std::endl;
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int skip_tag(int fd, uint64_t length)
+{
+ int r;
+
+ if (fd == STDIN_FILENO) {
+ // read the appending data out to skip this tag.
+ char buf[4096];
+ uint64_t len = std::min<uint64_t>(length, sizeof(buf));
+ while (len > 0) {
+ r = safe_read_exact(fd, buf, len);
+ if (r < 0) {
+ std::cerr << "rbd: failed to decode skipped tag data" << std::endl;
+ return r;
+ }
+ length -= len;
+ len = std::min<uint64_t>(length, sizeof(buf));
+ }
+ } else {
+ // lseek to skip this tag
+ off64_t offs = lseek64(fd, length, SEEK_CUR);
+ if (offs < 0) {
+ return -errno;
+ }
+ }
+
+ return 0;
+}
+
+static int read_tag(int fd, __u8 end_tag, int format, __u8 *tag, uint64_t *readlen)
+{
+ int r;
+ __u8 read_tag;
+
+ r = safe_read_exact(fd, &read_tag, sizeof(read_tag));
+ if (r < 0) {
+ std::cerr << "rbd: failed to decode tag" << std::endl;
+ return r;
+ }
+
+ *tag = read_tag;
+ if (read_tag != end_tag && format == 2) {
+ char buf[sizeof(uint64_t)];
+ r = safe_read_exact(fd, buf, sizeof(buf));
+ if (r < 0) {
+ std::cerr << "rbd: failed to decode tag length" << std::endl;
+ return r;
+ }
+
+ bufferlist bl;
+ bl.append(buf, sizeof(buf));
+ auto p = bl.cbegin();
+ decode(*readlen, p);
+ }
+
+ return 0;
+}
+
+int do_import_diff_fd(librados::Rados &rados, librbd::Image &image, int fd,
+ bool no_progress, int format, size_t sparse_size)
+{
+ int r;
+
+ uint64_t size = 0;
+ bool from_stdin = (fd == STDIN_FILENO);
+ if (!from_stdin) {
+ struct stat stat_buf;
+ r = ::fstat(fd, &stat_buf);
+ if (r < 0) {
+ std::cerr << "rbd: failed to stat specified diff file" << std::endl;
+ return r;
+ }
+ size = (uint64_t)stat_buf.st_size;
+ }
+
+ r = validate_banner(fd, (format == 1 ? utils::RBD_DIFF_BANNER :
+ utils::RBD_DIFF_BANNER_V2));
+ if (r < 0) {
+ return r;
+ }
+
+ // begin image import
+ std::string tosnap;
+ bool is_protected = false;
+ ImportDiffContext idiffctx(&image, fd, size, no_progress);
+ while (r == 0) {
+ __u8 tag;
+ uint64_t length = 0;
+
+ r = read_tag(fd, RBD_DIFF_END, format, &tag, &length);
+ if (r < 0 || tag == RBD_DIFF_END) {
+ break;
+ }
+
+ if (tag == RBD_DIFF_FROM_SNAP) {
+ r = do_image_snap_from(&idiffctx);
+ } else if (tag == RBD_DIFF_TO_SNAP) {
+ r = do_image_snap_to(&idiffctx, &tosnap);
+ } else if (tag == RBD_SNAP_PROTECTION_STATUS) {
+ r = get_snap_protection_status(&idiffctx, &is_protected);
+ } else if (tag == RBD_DIFF_IMAGE_SIZE) {
+ r = do_image_resize(&idiffctx);
+ } else if (tag == RBD_DIFF_WRITE || tag == RBD_DIFF_ZERO) {
+ r = do_image_io(&idiffctx, (tag == RBD_DIFF_ZERO), sparse_size);
+ } else {
+ std::cerr << "unrecognized tag byte " << (int)tag << " in stream; skipping"
+ << std::endl;
+ r = skip_tag(fd, length);
+ }
+ }
+
+ int temp_r = idiffctx.throttle.wait_for_ret();
+ r = (r < 0) ? r : temp_r; // preserve original error
+ if (r == 0 && tosnap.length()) {
+ r = idiffctx.image->snap_create(tosnap.c_str());
+ if (r == 0 && is_protected) {
+ r = idiffctx.image->snap_protect(tosnap.c_str());
+ }
+ }
+
+ idiffctx.finish(r);
+ return r;
+}
+
+int do_import_diff(librados::Rados &rados, librbd::Image &image,
+ const char *path, bool no_progress, size_t sparse_size)
+{
+ int r;
+ int fd;
+
+ if (strcmp(path, "-") == 0) {
+ fd = STDIN_FILENO;
+ } else {
+ fd = open(path, O_RDONLY|O_BINARY);
+ if (fd < 0) {
+ r = -errno;
+ std::cerr << "rbd: error opening " << path << std::endl;
+ return r;
+ }
+ }
+ r = do_import_diff_fd(rados, image, fd, no_progress, 1, sparse_size);
+
+ if (fd != 0)
+ close(fd);
+ return r;
+}
+
+namespace at = argument_types;
+namespace po = boost::program_options;
+
+void get_arguments_diff(po::options_description *positional,
+ po::options_description *options) {
+ at::add_path_options(positional, options,
+ "import file (or '-' for stdin)");
+ at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE);
+ at::add_sparse_size_option(options);
+ at::add_no_progress_option(options);
+}
+
+int execute_diff(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ std::string path;
+ size_t arg_index = 0;
+ int r = utils::get_path(vm, &arg_index, &path);
+ if (r < 0) {
+ return r;
+ }
+
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+ std::string snap_name;
+ r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name,
+ &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE,
+ utils::SPEC_VALIDATION_NONE);
+ if (r < 0) {
+ return r;
+ }
+
+ size_t sparse_size = utils::RBD_DEFAULT_SPARSE_SIZE;
+ if (vm.count(at::IMAGE_SPARSE_SIZE)) {
+ sparse_size = vm[at::IMAGE_SPARSE_SIZE].as<size_t>();
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ librbd::Image image;
+ r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "",
+ false, &rados, &io_ctx, &image);
+ if (r < 0) {
+ return r;
+ }
+
+ r = do_import_diff(rados, image, path.c_str(),
+ vm[at::NO_PROGRESS].as<bool>(), sparse_size);
+ if (r == -EDOM) {
+ r = -EBADMSG;
+ }
+ if (r < 0) {
+ cerr << "rbd: import-diff failed: " << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ return 0;
+}
+
+Shell::Action action_diff(
+ {"import-diff"}, {}, "Import an incremental diff.", "", &get_arguments_diff,
+ &execute_diff);
+
+class C_Import : public Context {
+public:
+ C_Import(SimpleThrottle &simple_throttle, librbd::Image &image,
+ bufferlist &bl, uint64_t offset)
+ : m_throttle(simple_throttle), m_image(image),
+ m_aio_completion(
+ new librbd::RBD::AioCompletion(this, &utils::aio_context_callback)),
+ m_bufferlist(bl), m_offset(offset)
+ {
+ }
+
+ void send()
+ {
+ m_throttle.start_op();
+
+ int op_flags = LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL |
+ LIBRADOS_OP_FLAG_FADVISE_NOCACHE;
+ int r = m_image.aio_write2(m_offset, m_bufferlist.length(), m_bufferlist,
+ m_aio_completion, op_flags);
+ if (r < 0) {
+ std::cerr << "rbd: error requesting write to destination image"
+ << std::endl;
+ m_aio_completion->release();
+ m_throttle.end_op(r);
+ }
+ }
+
+ void finish(int r) override
+ {
+ if (r < 0) {
+ std::cerr << "rbd: error writing to destination image at offset "
+ << m_offset << ": " << cpp_strerror(r) << std::endl;
+ }
+ m_throttle.end_op(r);
+ }
+
+private:
+ SimpleThrottle &m_throttle;
+ librbd::Image &m_image;
+ librbd::RBD::AioCompletion *m_aio_completion;
+ bufferlist m_bufferlist;
+ uint64_t m_offset;
+};
+
+static int decode_and_set_image_option(int fd, uint64_t imageopt, librbd::ImageOptions& opts)
+{
+ int r;
+ char buf[sizeof(uint64_t)];
+
+ r = safe_read_exact(fd, buf, sizeof(buf));
+ if (r < 0) {
+ std::cerr << "rbd: failed to decode image option" << std::endl;
+ return r;
+ }
+
+ bufferlist bl;
+ bl.append(buf, sizeof(buf));
+ auto it = bl.cbegin();
+
+ uint64_t val;
+ decode(val, it);
+
+ if (opts.get(imageopt, &val) != 0) {
+ opts.set(imageopt, val);
+ }
+
+ return 0;
+}
+
+static int do_import_metadata(int import_format, librbd::Image& image,
+ const std::map<std::string, std::string> &imagemetas)
+{
+ int r = 0;
+
+ //v1 format
+ if (import_format == 1) {
+ return 0;
+ }
+
+ for (std::map<std::string, std::string>::const_iterator it = imagemetas.begin();
+ it != imagemetas.end(); ++it) {
+ r = image.metadata_set(it->first, it->second);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int decode_imagemeta(int fd, uint64_t length, std::map<std::string, std::string>* imagemetas)
+{
+ int r;
+ string key;
+ string value;
+
+ r = utils::read_string(fd, length, &key);
+ if (r < 0) {
+ std::cerr << "rbd: failed to decode metadata key" << std::endl;
+ return r;
+ }
+
+ r = utils::read_string(fd, length, &value);
+ if (r < 0) {
+ std::cerr << "rbd: failed to decode metadata value" << std::endl;
+ return r;
+ }
+
+ (*imagemetas)[key] = value;
+ return 0;
+}
+
+static int do_import_header(int fd, int import_format, librbd::ImageOptions& opts,
+ std::map<std::string, std::string>* imagemetas)
+{
+ // There is no header in v1 image.
+ if (import_format == 1) {
+ return 0;
+ }
+
+ int r;
+ r = validate_banner(fd, utils::RBD_IMAGE_BANNER_V2);
+ if (r < 0) {
+ return r;
+ }
+
+ // As V1 format for image is already deprecated, import image in V2 by default.
+ uint64_t image_format = 2;
+ if (opts.get(RBD_IMAGE_OPTION_FORMAT, &image_format) != 0) {
+ opts.set(RBD_IMAGE_OPTION_FORMAT, image_format);
+ }
+
+ while (r == 0) {
+ __u8 tag;
+ uint64_t length = 0;
+ r = read_tag(fd, RBD_EXPORT_IMAGE_END, image_format, &tag, &length);
+ if (r < 0 || tag == RBD_EXPORT_IMAGE_END) {
+ break;
+ }
+
+ if (tag == RBD_EXPORT_IMAGE_ORDER) {
+ r = decode_and_set_image_option(fd, RBD_IMAGE_OPTION_ORDER, opts);
+ } else if (tag == RBD_EXPORT_IMAGE_FEATURES) {
+ r = decode_and_set_image_option(fd, RBD_IMAGE_OPTION_FEATURES, opts);
+ } else if (tag == RBD_EXPORT_IMAGE_STRIPE_UNIT) {
+ r = decode_and_set_image_option(fd, RBD_IMAGE_OPTION_STRIPE_UNIT, opts);
+ } else if (tag == RBD_EXPORT_IMAGE_STRIPE_COUNT) {
+ r = decode_and_set_image_option(fd, RBD_IMAGE_OPTION_STRIPE_COUNT, opts);
+ } else if (tag == RBD_EXPORT_IMAGE_META) {
+ r = decode_imagemeta(fd, length, imagemetas);
+ } else {
+ std::cerr << "rbd: invalid tag in image properties zone: " << tag << "Skip it."
+ << std::endl;
+ r = skip_tag(fd, length);
+ }
+ }
+
+ return r;
+}
+
+static int do_import_v2(librados::Rados &rados, int fd, librbd::Image &image,
+ uint64_t size, size_t imgblklen,
+ utils::ProgressContext &pc, size_t sparse_size)
+{
+ int r = 0;
+ r = validate_banner(fd, utils::RBD_IMAGE_DIFFS_BANNER_V2);
+ if (r < 0) {
+ return r;
+ }
+
+ char buf[sizeof(uint64_t)];
+ r = safe_read_exact(fd, buf, sizeof(buf));
+ if (r < 0) {
+ std::cerr << "rbd: failed to decode diff count" << std::endl;
+ return r;
+ }
+ bufferlist bl;
+ bl.append(buf, sizeof(buf));
+ auto p = bl.cbegin();
+ uint64_t diff_num;
+ decode(diff_num, p);
+ for (size_t i = 0; i < diff_num; i++) {
+ r = do_import_diff_fd(rados, image, fd, true, 2, sparse_size);
+ if (r < 0) {
+ pc.fail();
+ std::cerr << "rbd: import-diff failed: " << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ pc.update_progress(i + 1, diff_num);
+ }
+
+ return r;
+}
+
+static int do_import_v1(int fd, librbd::Image &image, uint64_t size,
+ size_t imgblklen, utils::ProgressContext &pc,
+ size_t sparse_size)
+{
+ int r = 0;
+ size_t reqlen = imgblklen; // amount requested from read
+ ssize_t readlen; // amount received from one read
+ size_t blklen = 0; // amount accumulated from reads to fill blk
+ char *p = new char[imgblklen];
+ uint64_t image_pos = 0;
+ bool from_stdin = (fd == STDIN_FILENO);
+ boost::scoped_ptr<SimpleThrottle> throttle;
+
+ if (from_stdin) {
+ throttle.reset(new SimpleThrottle(1, false));
+ } else {
+ throttle.reset(new SimpleThrottle(
+ g_conf().get_val<uint64_t>("rbd_concurrent_management_ops"), false));
+ }
+
+ reqlen = std::min<uint64_t>(reqlen, size);
+ // loop body handles 0 return, as we may have a block to flush
+ while ((readlen = ::read(fd, p + blklen, reqlen)) >= 0) {
+ if (throttle->pending_error()) {
+ break;
+ }
+
+ blklen += readlen;
+ // if read was short, try again to fill the block before writing
+ if (readlen && ((size_t)readlen < reqlen)) {
+ reqlen -= readlen;
+ continue;
+ }
+ if (!from_stdin)
+ pc.update_progress(image_pos, size);
+
+ bufferptr blkptr(p, blklen);
+ // resize output image by binary expansion as we go for stdin
+ if (from_stdin && (image_pos + (size_t)blklen) > size) {
+ size *= 2;
+ r = image.resize(size);
+ if (r < 0) {
+ std::cerr << "rbd: can't resize image during import" << std::endl;
+ goto out;
+ }
+ }
+
+ // write as much as we got; perhaps less than imgblklen
+ // but skip writing zeros to create sparse images
+ size_t buffer_offset = 0;
+ while (buffer_offset < blklen) {
+ size_t write_length = 0;
+ bool zeroed = false;
+ utils::calc_sparse_extent(blkptr, sparse_size, buffer_offset, blklen,
+ &write_length, &zeroed);
+
+ if (!zeroed) {
+ bufferlist write_bl;
+ bufferptr write_ptr(blkptr, buffer_offset, write_length);
+ write_bl.push_back(write_ptr);
+ ceph_assert(write_bl.length() == write_length);
+
+ C_Import *ctx = new C_Import(*throttle, image, write_bl,
+ image_pos + buffer_offset);
+ ctx->send();
+ }
+
+ buffer_offset += write_length;
+ }
+
+ // done with whole block, whether written or not
+ image_pos += blklen;
+ if (!from_stdin && image_pos >= size)
+ break;
+ // if read had returned 0, we're at EOF and should quit
+ if (readlen == 0)
+ break;
+ blklen = 0;
+ reqlen = imgblklen;
+ }
+ r = throttle->wait_for_ret();
+ if (r < 0) {
+ goto out;
+ }
+
+ if (fd == STDIN_FILENO) {
+ r = image.resize(image_pos);
+ if (r < 0) {
+ std::cerr << "rbd: final image resize failed" << std::endl;
+ goto out;
+ }
+ }
+out:
+ delete[] p;
+ return r;
+}
+
+static int do_import(librados::Rados &rados, librbd::RBD &rbd,
+ librados::IoCtx& io_ctx, const char *imgname,
+ const char *path, librbd::ImageOptions& opts,
+ bool no_progress, int import_format, size_t sparse_size)
+{
+ int fd, r;
+ struct stat stat_buf;
+ utils::ProgressContext pc("Importing image", no_progress);
+ std::map<std::string, std::string> imagemetas;
+
+ ceph_assert(imgname);
+
+ uint64_t order;
+ if (opts.get(RBD_IMAGE_OPTION_ORDER, &order) != 0) {
+ order = g_conf().get_val<uint64_t>("rbd_default_order");
+ }
+
+ // try to fill whole imgblklen blocks for sparsification
+ size_t imgblklen = 1 << order;
+ librbd::Image image;
+ uint64_t size = 0;
+
+ bool from_stdin = !strcmp(path, "-");
+ if (from_stdin) {
+ fd = STDIN_FILENO;
+ size = 1ULL << order;
+ } else {
+ if ((fd = open(path, O_RDONLY|O_BINARY)) < 0) {
+ r = -errno;
+ std::cerr << "rbd: error opening " << path << std::endl;
+ goto done2;
+ }
+
+ if ((fstat(fd, &stat_buf)) < 0) {
+ r = -errno;
+ std::cerr << "rbd: stat error " << path << std::endl;
+ goto done;
+ }
+ if (S_ISDIR(stat_buf.st_mode)) {
+ r = -EISDIR;
+ std::cerr << "rbd: cannot import a directory" << std::endl;
+ goto done;
+ }
+ if (stat_buf.st_size)
+ size = (uint64_t)stat_buf.st_size;
+
+ if (!size) {
+ int64_t bdev_size = 0;
+ BlkDev blkdev(fd);
+ r = blkdev.get_size(&bdev_size);
+ if (r < 0) {
+ std::cerr << "rbd: unable to get size of file/block device"
+ << std::endl;
+ goto done;
+ }
+ ceph_assert(bdev_size >= 0);
+ size = (uint64_t) bdev_size;
+ }
+#ifdef HAVE_POSIX_FADVISE
+ posix_fadvise(fd, 0, 0, POSIX_FADV_SEQUENTIAL);
+#endif
+ }
+
+ r = do_import_header(fd, import_format, opts, &imagemetas);
+ if (r < 0) {
+ std::cerr << "rbd: import header failed." << std::endl;
+ goto done;
+ }
+
+ r = rbd.create4(io_ctx, imgname, size, opts);
+ if (r < 0) {
+ std::cerr << "rbd: image creation failed" << std::endl;
+ goto done;
+ }
+
+ r = rbd.open(io_ctx, image, imgname);
+ if (r < 0) {
+ std::cerr << "rbd: failed to open image" << std::endl;
+ goto err;
+ }
+
+ r = do_import_metadata(import_format, image, imagemetas);
+ if (r < 0) {
+ std::cerr << "rbd: failed to import image-meta" << std::endl;
+ goto err;
+ }
+
+ if (import_format == 1) {
+ r = do_import_v1(fd, image, size, imgblklen, pc, sparse_size);
+ } else {
+ r = do_import_v2(rados, fd, image, size, imgblklen, pc, sparse_size);
+ }
+ if (r < 0) {
+ std::cerr << "rbd: failed to import image" << std::endl;
+ image.close();
+ goto err;
+ }
+
+ r = image.close();
+err:
+ if (r < 0)
+ rbd.remove(io_ctx, imgname);
+done:
+ if (r < 0)
+ pc.fail();
+ else
+ pc.finish();
+ if (!from_stdin)
+ close(fd);
+done2:
+ return r;
+}
+
+void get_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_path_options(positional, options,
+ "import file (or '-' for stdin)");
+ at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_DEST);
+ at::add_create_image_options(options, true);
+ at::add_sparse_size_option(options);
+ at::add_no_progress_option(options);
+ at::add_export_format_option(options);
+
+ // TODO legacy rbd allowed import to accept both 'image'/'dest' and
+ // 'pool'/'dest-pool'
+ at::add_pool_option(options, at::ARGUMENT_MODIFIER_NONE, " deprecated[:dest-pool]");
+ at::add_image_option(options, at::ARGUMENT_MODIFIER_NONE, " deprecated[:dest]");
+}
+
+int execute(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ std::string path;
+ size_t arg_index = 0;
+ int r = utils::get_path(vm, &arg_index, &path);
+ if (r < 0) {
+ return r;
+ }
+
+ // odd check to support legacy / deprecated behavior of import
+ std::string deprecated_pool_name;
+ if (vm.count(at::POOL_NAME)) {
+ deprecated_pool_name = vm[at::POOL_NAME].as<std::string>();
+ }
+
+ std::string deprecated_image_name;
+ if (vm.count(at::IMAGE_NAME)) {
+ deprecated_image_name = vm[at::IMAGE_NAME].as<std::string>();
+ } else {
+ deprecated_image_name = path.substr(path.find_last_of("/\\") + 1);
+ }
+
+ std::string deprecated_snap_name;
+ r = utils::extract_spec(deprecated_image_name, &deprecated_pool_name,
+ nullptr, &deprecated_image_name,
+ &deprecated_snap_name, utils::SPEC_VALIDATION_FULL);
+ if (r < 0) {
+ return r;
+ }
+
+ size_t sparse_size = utils::RBD_DEFAULT_SPARSE_SIZE;
+ if (vm.count(at::IMAGE_SPARSE_SIZE)) {
+ sparse_size = vm[at::IMAGE_SPARSE_SIZE].as<size_t>();
+ }
+
+ std::string pool_name = deprecated_pool_name;
+ std::string namespace_name;
+ std::string image_name;
+ std::string snap_name = deprecated_snap_name;
+ r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_DEST, &arg_index, &pool_name, &namespace_name,
+ &image_name, &snap_name, false, utils::SNAPSHOT_PRESENCE_NONE,
+ utils::SPEC_VALIDATION_FULL);
+ if (r < 0) {
+ return r;
+ }
+
+ if (image_name.empty()) {
+ image_name = deprecated_image_name;
+ }
+
+ librbd::ImageOptions opts;
+ r = utils::get_image_options(vm, true, &opts);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ r = utils::init(pool_name, namespace_name, &rados, &io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ int format = 1;
+ if (vm.count("export-format"))
+ format = vm["export-format"].as<uint64_t>();
+
+ librbd::RBD rbd;
+ r = do_import(rados, rbd, io_ctx, image_name.c_str(), path.c_str(),
+ opts, vm[at::NO_PROGRESS].as<bool>(), format, sparse_size);
+ if (r < 0) {
+ std::cerr << "rbd: import failed: " << cpp_strerror(r) << std::endl;
+ return r;
+ }
+
+ return 0;
+}
+
+Shell::Action action(
+ {"import"}, {}, "Import image from file.", at::get_long_features_help(),
+ &get_arguments, &execute);
+
+} // namespace import
+} // namespace action
+} // namespace rbd
diff --git a/src/tools/rbd/action/Info.cc b/src/tools/rbd/action/Info.cc
new file mode 100644
index 000000000..f8d053cd7
--- /dev/null
+++ b/src/tools/rbd/action/Info.cc
@@ -0,0 +1,471 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd/ArgumentTypes.h"
+#include "tools/rbd/Shell.h"
+#include "tools/rbd/Utils.h"
+#include "include/types.h"
+#include "include/stringify.h"
+#include "common/errno.h"
+#include "common/Formatter.h"
+#include <iostream>
+#include <boost/program_options.hpp>
+
+#include "common/Clock.h"
+
+namespace rbd {
+namespace action {
+namespace info {
+
+namespace at = argument_types;
+namespace po = boost::program_options;
+
+static void format_bitmask(Formatter *f, const std::string &name,
+ const std::map<uint64_t, std::string>& mapping,
+ uint64_t bitmask)
+{
+ int count = 0;
+ std::string group_name(name + "s");
+ if (f == NULL) {
+ std::cout << "\t" << group_name << ": ";
+ } else {
+ f->open_array_section(group_name.c_str());
+ }
+ for (std::map<uint64_t, std::string>::const_iterator it = mapping.begin();
+ it != mapping.end(); ++it) {
+ if ((it->first & bitmask) == 0) {
+ continue;
+ }
+
+ if (f == NULL) {
+ if (count++ > 0) {
+ std::cout << ", ";
+ }
+ std::cout << it->second;
+ } else {
+ f->dump_string(name.c_str(), it->second);
+ }
+ }
+ if (f == NULL) {
+ std::cout << std::endl;
+ } else {
+ f->close_section();
+ }
+}
+
+static void format_features(Formatter *f, uint64_t features)
+{
+ format_bitmask(f, "feature", at::ImageFeatures::FEATURE_MAPPING, features);
+}
+
+static void format_op_features(Formatter *f, uint64_t op_features)
+{
+ static std::map<uint64_t, std::string> mapping = {
+ {RBD_OPERATION_FEATURE_CLONE_PARENT, RBD_OPERATION_FEATURE_NAME_CLONE_PARENT},
+ {RBD_OPERATION_FEATURE_CLONE_CHILD, RBD_OPERATION_FEATURE_NAME_CLONE_CHILD},
+ {RBD_OPERATION_FEATURE_GROUP, RBD_OPERATION_FEATURE_NAME_GROUP},
+ {RBD_OPERATION_FEATURE_SNAP_TRASH, RBD_OPERATION_FEATURE_NAME_SNAP_TRASH}};
+ format_bitmask(f, "op_feature", mapping, op_features);
+}
+
+static void format_flags(Formatter *f, uint64_t flags)
+{
+ std::map<uint64_t, std::string> mapping = {
+ {RBD_FLAG_OBJECT_MAP_INVALID, "object map invalid"},
+ {RBD_FLAG_FAST_DIFF_INVALID, "fast diff invalid"}};
+ format_bitmask(f, "flag", mapping, flags);
+}
+
+void format_timestamp(struct timespec timestamp, std::string &timestamp_str) {
+ if(timestamp.tv_sec != 0) {
+ time_t ts = timestamp.tv_sec;
+ timestamp_str = ctime(&ts);
+ timestamp_str = timestamp_str.substr(0, timestamp_str.length() - 1);
+ }
+}
+
+static int do_show_info(librados::IoCtx &io_ctx, librbd::Image& image,
+ const std::string &snapname, Formatter *f)
+{
+ librbd::image_info_t info;
+ uint8_t old_format;
+ uint64_t overlap, features, flags, snap_limit;
+ bool snap_protected = false;
+ librbd::mirror_image_info_t mirror_image;
+ librbd::mirror_image_mode_t mirror_mode = RBD_MIRROR_IMAGE_MODE_JOURNAL;
+ std::vector<librbd::snap_info_t> snaps;
+ int r;
+
+ std::string imgname;
+ r = image.get_name(&imgname);
+ if (r < 0)
+ return r;
+
+ r = image.snap_list(snaps);
+ if (r < 0)
+ return r;
+
+ r = image.stat(info, sizeof(info));
+ if (r < 0)
+ return r;
+
+ r = image.old_format(&old_format);
+ if (r < 0)
+ return r;
+
+ std::string imgid;
+ if (!old_format) {
+ r = image.get_id(&imgid);
+ if (r < 0)
+ return r;
+ }
+
+ std::string data_pool;
+ if (!old_format) {
+ int64_t data_pool_id = image.get_data_pool_id();
+ if (data_pool_id != io_ctx.get_id()) {
+ librados::Rados rados(io_ctx);
+ librados::IoCtx data_io_ctx;
+ r = rados.ioctx_create2(data_pool_id, data_io_ctx);
+ if (r < 0) {
+ data_pool = "<missing data pool " + stringify(data_pool_id) + ">";
+ } else {
+ data_pool = data_io_ctx.get_pool_name();
+ }
+ }
+ }
+
+ r = image.overlap(&overlap);
+ if (r < 0)
+ return r;
+
+ r = image.features(&features);
+ if (r < 0)
+ return r;
+
+ uint64_t op_features;
+ r = image.get_op_features(&op_features);
+ if (r < 0) {
+ return r;
+ }
+
+ r = image.get_flags(&flags);
+ if (r < 0) {
+ return r;
+ }
+
+ if (!snapname.empty()) {
+ r = image.snap_is_protected(snapname.c_str(), &snap_protected);
+ if (r < 0)
+ return r;
+ }
+
+ mirror_image.state = RBD_MIRROR_IMAGE_DISABLED;
+ r = image.mirror_image_get_info(&mirror_image, sizeof(mirror_image));
+ if (r < 0) {
+ return r;
+ }
+
+ if (mirror_image.state != RBD_MIRROR_IMAGE_DISABLED) {
+ r = image.mirror_image_get_mode(&mirror_mode);
+ if (r < 0) {
+ return r;
+ }
+ }
+
+ r = image.snap_get_limit(&snap_limit);
+ if (r < 0)
+ return r;
+
+ std::string prefix = image.get_block_name_prefix();
+
+ librbd::group_info_t group_info;
+ r = image.get_group(&group_info, sizeof(group_info));
+ if (r < 0) {
+ return r;
+ }
+
+ std::string group_string = "";
+ if (RBD_GROUP_INVALID_POOL != group_info.pool) {
+ std::string group_pool;
+ librados::Rados rados(io_ctx);
+ librados::IoCtx group_io_ctx;
+ r = rados.ioctx_create2(group_info.pool, group_io_ctx);
+ if (r < 0) {
+ group_pool = "<missing group pool " + stringify(group_info.pool) + ">";
+ } else {
+ group_pool = group_io_ctx.get_pool_name();
+ }
+
+ group_string = group_pool + "/";
+ if (!io_ctx.get_namespace().empty()) {
+ group_string += io_ctx.get_namespace() + "/";
+ }
+ group_string += group_info.name;
+ }
+
+ struct timespec create_timestamp;
+ image.get_create_timestamp(&create_timestamp);
+
+ std::string create_timestamp_str = "";
+ format_timestamp(create_timestamp, create_timestamp_str);
+
+ struct timespec access_timestamp;
+ image.get_access_timestamp(&access_timestamp);
+
+ std::string access_timestamp_str = "";
+ format_timestamp(access_timestamp, access_timestamp_str);
+
+ struct timespec modify_timestamp;
+ image.get_modify_timestamp(&modify_timestamp);
+
+ std::string modify_timestamp_str = "";
+ format_timestamp(modify_timestamp, modify_timestamp_str);
+
+ if (f) {
+ f->open_object_section("image");
+ f->dump_string("name", imgname);
+ f->dump_string("id", imgid);
+ f->dump_unsigned("size", info.size);
+ f->dump_unsigned("objects", info.num_objs);
+ f->dump_int("order", info.order);
+ f->dump_unsigned("object_size", info.obj_size);
+ f->dump_int("snapshot_count", snaps.size());
+ if (!data_pool.empty()) {
+ f->dump_string("data_pool", data_pool);
+ }
+ f->dump_string("block_name_prefix", prefix);
+ f->dump_int("format", (old_format ? 1 : 2));
+ } else {
+ std::cout << "rbd image '" << imgname << "':\n"
+ << "\tsize " << byte_u_t(info.size) << " in "
+ << info.num_objs << " objects"
+ << std::endl
+ << "\torder " << info.order
+ << " (" << byte_u_t(info.obj_size) << " objects)"
+ << std::endl
+ << "\tsnapshot_count: " << snaps.size()
+ << std::endl;
+ if (!imgid.empty()) {
+ std::cout << "\tid: " << imgid << std::endl;
+ }
+ if (!data_pool.empty()) {
+ std::cout << "\tdata_pool: " << data_pool << std::endl;
+ }
+ std::cout << "\tblock_name_prefix: " << prefix
+ << std::endl
+ << "\tformat: " << (old_format ? "1" : "2")
+ << std::endl;
+ }
+
+ if (!old_format) {
+ format_features(f, features);
+ format_op_features(f, op_features);
+ format_flags(f, flags);
+ }
+
+ if (!group_string.empty()) {
+ if (f) {
+ f->dump_string("group", group_string);
+ } else {
+ std::cout << "\tgroup: " << group_string
+ << std::endl;
+ }
+ }
+
+ if (!create_timestamp_str.empty()) {
+ if (f) {
+ f->dump_string("create_timestamp", create_timestamp_str);
+ } else {
+ std::cout << "\tcreate_timestamp: " << create_timestamp_str
+ << std::endl;
+ }
+ }
+
+ if (!access_timestamp_str.empty()) {
+ if (f) {
+ f->dump_string("access_timestamp", access_timestamp_str);
+ } else {
+ std::cout << "\taccess_timestamp: " << access_timestamp_str
+ << std::endl;
+ }
+ }
+
+ if (!modify_timestamp_str.empty()) {
+ if (f) {
+ f->dump_string("modify_timestamp", modify_timestamp_str);
+ } else {
+ std::cout << "\tmodify_timestamp: " << modify_timestamp_str
+ << std::endl;
+ }
+ }
+
+ // snapshot info, if present
+ if (!snapname.empty()) {
+ if (f) {
+ f->dump_string("protected", snap_protected ? "true" : "false");
+ } else {
+ std::cout << "\tprotected: " << (snap_protected ? "True" : "False")
+ << std::endl;
+ }
+ }
+
+ if (snap_limit < UINT64_MAX) {
+ if (f) {
+ f->dump_unsigned("snapshot_limit", snap_limit);
+ } else {
+ std::cout << "\tsnapshot_limit: " << snap_limit << std::endl;
+ }
+ }
+
+ // parent info, if present
+ librbd::linked_image_spec_t parent_image_spec;
+ librbd::snap_spec_t parent_snap_spec;
+ if ((image.get_parent(&parent_image_spec, &parent_snap_spec) == 0) &&
+ (parent_image_spec.image_name.length() > 0)) {
+ if (f) {
+ f->open_object_section("parent");
+ f->dump_string("pool", parent_image_spec.pool_name);
+ f->dump_string("pool_namespace", parent_image_spec.pool_namespace);
+ f->dump_string("image", parent_image_spec.image_name);
+ f->dump_string("id", parent_image_spec.image_id);
+ f->dump_string("snapshot", parent_snap_spec.name);
+ f->dump_bool("trash", parent_image_spec.trash);
+ f->dump_unsigned("overlap", overlap);
+ f->close_section();
+ } else {
+ std::cout << "\tparent: " << parent_image_spec.pool_name << "/";
+ if (!parent_image_spec.pool_namespace.empty()) {
+ std::cout << parent_image_spec.pool_namespace << "/";
+ }
+ std::cout << parent_image_spec.image_name << "@"
+ << parent_snap_spec.name;
+ if (parent_image_spec.trash) {
+ std::cout << " (trash " << parent_image_spec.image_id << ")";
+ }
+ std::cout << std::endl;
+ std::cout << "\toverlap: " << byte_u_t(overlap) << std::endl;
+ }
+ }
+
+ // striping info, if feature is set
+ if (features & RBD_FEATURE_STRIPINGV2) {
+ if (f) {
+ f->dump_unsigned("stripe_unit", image.get_stripe_unit());
+ f->dump_unsigned("stripe_count", image.get_stripe_count());
+ } else {
+ std::cout << "\tstripe unit: " << byte_u_t(image.get_stripe_unit())
+ << std::endl
+ << "\tstripe count: " << image.get_stripe_count() << std::endl;
+ }
+ }
+
+ if (features & RBD_FEATURE_JOURNALING) {
+ if (f) {
+ f->dump_string("journal", utils::image_id(image));
+ } else {
+ std::cout << "\tjournal: " << utils::image_id(image) << std::endl;
+ }
+ }
+
+ if (features & RBD_FEATURE_JOURNALING ||
+ mirror_image.state != RBD_MIRROR_IMAGE_DISABLED) {
+ if (f) {
+ f->open_object_section("mirroring");
+ f->dump_string("mode",
+ utils::mirror_image_mode(mirror_mode));
+ f->dump_string("state",
+ utils::mirror_image_state(mirror_image.state));
+ if (mirror_image.state != RBD_MIRROR_IMAGE_DISABLED) {
+ f->dump_string("global_id", mirror_image.global_id);
+ f->dump_bool("primary", mirror_image.primary);
+ }
+ f->close_section();
+ } else {
+ std::cout << "\tmirroring state: "
+ << utils::mirror_image_state(mirror_image.state) << std::endl;
+ if (mirror_image.state != RBD_MIRROR_IMAGE_DISABLED) {
+ std::cout << "\tmirroring mode: "
+ << utils::mirror_image_mode(mirror_mode) << std::endl
+ << "\tmirroring global id: " << mirror_image.global_id
+ << std::endl
+ << "\tmirroring primary: "
+ << (mirror_image.primary ? "true" : "false") <<std::endl;
+ }
+ }
+ }
+
+ if (f) {
+ f->close_section();
+ f->flush(std::cout);
+ }
+
+ return 0;
+}
+
+void get_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_image_or_snap_spec_options(positional, options,
+ at::ARGUMENT_MODIFIER_NONE);
+ at::add_image_id_option(options);
+ at::add_format_options(options);
+}
+
+int execute(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+ std::string snap_name;
+ std::string image_id;
+
+ if (vm.count(at::IMAGE_ID)) {
+ image_id = vm[at::IMAGE_ID].as<std::string>();
+ }
+
+ int r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name,
+ &image_name, &snap_name, image_id.empty(),
+ utils::SNAPSHOT_PRESENCE_PERMITTED, utils::SPEC_VALIDATION_NONE);
+ if (r < 0) {
+ return r;
+ }
+
+ if (!image_id.empty() && !image_name.empty()) {
+ std::cerr << "rbd: trying to access image using both name and id. "
+ << std::endl;
+ return -EINVAL;
+ }
+
+ at::Format::Formatter formatter;
+ r = utils::get_formatter(vm, &formatter);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ librbd::Image image;
+ r = utils::init_and_open_image(pool_name, namespace_name, image_name,
+ image_id, snap_name, true, &rados, &io_ctx,
+ &image);
+ if (r < 0) {
+ return r;
+ }
+
+ r = do_show_info(io_ctx, image, snap_name, formatter.get());
+ if (r < 0) {
+ std::cerr << "rbd: info: " << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ return 0;
+}
+
+Shell::Action action(
+ {"info"}, {}, "Show information about image size, striping, etc.", "",
+ &get_arguments, &execute);
+
+} // namespace info
+} // namespace action
+} // namespace rbd
diff --git a/src/tools/rbd/action/Journal.cc b/src/tools/rbd/action/Journal.cc
new file mode 100644
index 000000000..08606fcc3
--- /dev/null
+++ b/src/tools/rbd/action/Journal.cc
@@ -0,0 +1,1251 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd/ArgumentTypes.h"
+#include "tools/rbd/Shell.h"
+#include "tools/rbd/Utils.h"
+#include "common/Cond.h"
+#include "common/Formatter.h"
+#include "common/ceph_json.h"
+#include "common/errno.h"
+#include "common/safe_io.h"
+#include "include/stringify.h"
+#include <fstream>
+#include <sstream>
+#include <boost/program_options.hpp>
+#include "cls/rbd/cls_rbd_client.h"
+#include "cls/journal/cls_journal_types.h"
+#include "cls/journal/cls_journal_client.h"
+
+#include "journal/Journaler.h"
+#include "journal/ReplayEntry.h"
+#include "journal/ReplayHandler.h"
+#include "journal/Settings.h"
+#include "librbd/journal/Types.h"
+
+namespace rbd {
+namespace action {
+namespace journal {
+
+namespace at = argument_types;
+namespace po = boost::program_options;
+
+static const std::string JOURNAL_SPEC("journal-spec");
+static const std::string JOURNAL_NAME("journal");
+static const std::string DEST_JOURNAL_NAME("dest-journal");
+
+void add_journal_option(po::options_description *opt,
+ at::ArgumentModifier modifier) {
+ std::string name = JOURNAL_NAME;
+ std::string description = at::get_description_prefix(modifier) +
+ "journal name";
+ switch (modifier) {
+ case at::ARGUMENT_MODIFIER_NONE:
+ case at::ARGUMENT_MODIFIER_SOURCE:
+ break;
+ case at::ARGUMENT_MODIFIER_DEST:
+ name = DEST_JOURNAL_NAME;
+ break;
+ }
+
+ // TODO add validator
+ opt->add_options()
+ (name.c_str(), po::value<std::string>(), description.c_str());
+}
+
+void add_journal_spec_options(po::options_description *pos,
+ po::options_description *opt,
+ at::ArgumentModifier modifier) {
+
+ pos->add_options()
+ ((get_name_prefix(modifier) + JOURNAL_SPEC).c_str(),
+ (get_description_prefix(modifier) + "journal specification\n" +
+ "(example: [<pool-name>/[<namespace>/]]<journal-name>)").c_str());
+ add_pool_option(opt, modifier);
+ add_namespace_option(opt, modifier);
+ add_image_option(opt, modifier);
+ add_journal_option(opt, modifier);
+}
+
+int get_pool_journal_names(const po::variables_map &vm,
+ at::ArgumentModifier mod,
+ size_t *spec_arg_index,
+ std::string *pool_name,
+ std::string *namespace_name,
+ std::string *journal_name) {
+ std::string pool_key = (mod == at::ARGUMENT_MODIFIER_DEST ?
+ at::DEST_POOL_NAME : at::POOL_NAME);
+ std::string namespace_key = (mod == at::ARGUMENT_MODIFIER_DEST ?
+ at::DEST_NAMESPACE_NAME : at::NAMESPACE_NAME);
+ std::string image_key = (mod == at::ARGUMENT_MODIFIER_DEST ?
+ at::DEST_IMAGE_NAME : at::IMAGE_NAME);
+ std::string journal_key = (mod == at::ARGUMENT_MODIFIER_DEST ?
+ DEST_JOURNAL_NAME : JOURNAL_NAME);
+
+ if (vm.count(pool_key) && pool_name != nullptr) {
+ *pool_name = vm[pool_key].as<std::string>();
+ }
+ if (vm.count(namespace_key) && namespace_name != nullptr) {
+ *namespace_name = vm[namespace_key].as<std::string>();
+ }
+ if (vm.count(journal_key) && journal_name != nullptr) {
+ *journal_name = vm[journal_key].as<std::string>();
+ }
+
+ std::string image_name;
+ if (vm.count(image_key)) {
+ image_name = vm[image_key].as<std::string>();
+ }
+
+ int r;
+ if (journal_name != nullptr && !journal_name->empty()) {
+ // despite the separate pool option,
+ // we can also specify them via the journal option
+ std::string journal_name_copy(*journal_name);
+ r = extract_spec(journal_name_copy, pool_name, namespace_name, journal_name,
+ nullptr, utils::SPEC_VALIDATION_FULL);
+ if (r < 0) {
+ return r;
+ }
+ }
+
+ if (!image_name.empty()) {
+ // despite the separate pool option,
+ // we can also specify them via the image option
+ std::string image_name_copy(image_name);
+ r = extract_spec(image_name_copy, pool_name, namespace_name, &image_name,
+ nullptr, utils::SPEC_VALIDATION_NONE);
+ if (r < 0) {
+ return r;
+ }
+ }
+
+ if (journal_name != nullptr && spec_arg_index != nullptr &&
+ journal_name->empty()) {
+ std::string spec = utils::get_positional_argument(vm, (*spec_arg_index)++);
+ if (!spec.empty()) {
+ r = extract_spec(spec, pool_name, namespace_name, journal_name, nullptr,
+ utils::SPEC_VALIDATION_FULL);
+ if (r < 0) {
+ return r;
+ }
+ }
+ }
+
+ if (pool_name != nullptr && pool_name->empty()) {
+ *pool_name = utils::get_default_pool_name();
+ }
+
+ if (pool_name != nullptr && namespace_name != nullptr &&
+ journal_name != nullptr && journal_name->empty() && !image_name.empty()) {
+ // Try to get journal name from image info.
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ librbd::Image image;
+ int r = utils::init_and_open_image(*pool_name, *namespace_name, image_name,
+ "", "", true, &rados, &io_ctx, &image);
+ if (r < 0) {
+ std::cerr << "rbd: failed to open image " << image_name
+ << " to get journal name: " << cpp_strerror(r) << std::endl;
+ return r;
+ }
+
+ uint64_t features;
+ r = image.features(&features);
+ if (r < 0) {
+ return r;
+ }
+ if ((features & RBD_FEATURE_JOURNALING) == 0) {
+ std::cerr << "rbd: journaling is not enabled for image " << image_name
+ << std::endl;
+ return -EINVAL;
+ }
+ *journal_name = utils::image_id(image);
+ }
+
+ if (journal_name != nullptr && journal_name->empty()) {
+ std::string prefix = at::get_description_prefix(mod);
+ std::cerr << "rbd: "
+ << (mod == at::ARGUMENT_MODIFIER_DEST ? prefix : std::string())
+ << "journal was not specified" << std::endl;
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int do_show_journal_info(librados::Rados& rados, librados::IoCtx& io_ctx,
+ const std::string& journal_id, Formatter *f)
+{
+ int r;
+ C_SaferCond cond;
+
+ std::string header_oid = ::journal::Journaler::header_oid(journal_id);
+ std::string object_oid_prefix = ::journal::Journaler::object_oid_prefix(
+ io_ctx.get_id(), journal_id);
+ uint8_t order;
+ uint8_t splay_width;
+ int64_t pool_id;
+
+ cls::journal::client::get_immutable_metadata(io_ctx, header_oid, &order,
+ &splay_width, &pool_id, &cond);
+ r = cond.wait();
+ if (r < 0) {
+ std::cerr << "failed to get journal metadata: " << cpp_strerror(r)
+ << std::endl;
+ return r;
+ }
+
+ std::string object_pool_name;
+ if (pool_id >= 0) {
+ r = rados.pool_reverse_lookup(pool_id, &object_pool_name);
+ if (r < 0) {
+ std::cerr << "error looking up pool name for pool_id=" << pool_id << ": "
+ << cpp_strerror(r) << std::endl;
+ }
+ }
+
+ if (f) {
+ f->open_object_section("journal");
+ f->dump_string("journal_id", journal_id);
+ f->dump_string("header_oid", header_oid);
+ f->dump_string("object_oid_prefix", object_oid_prefix);
+ f->dump_int("order", order);
+ f->dump_int("splay_width", splay_width);
+ if (!object_pool_name.empty()) {
+ f->dump_string("object_pool", object_pool_name);
+ }
+ f->close_section();
+ f->flush(std::cout);
+ } else {
+ std::cout << "rbd journal '" << journal_id << "':" << std::endl;
+ std::cout << "\theader_oid: " << header_oid << std::endl;
+ std::cout << "\tobject_oid_prefix: " << object_oid_prefix << std::endl;
+ std::cout << "\torder: " << static_cast<int>(order) << " ("
+ << byte_u_t(1ull << order) << " objects)"<< std::endl;
+ std::cout << "\tsplay_width: " << static_cast<int>(splay_width) << std::endl;
+ if (!object_pool_name.empty()) {
+ std::cout << "\tobject_pool: " << object_pool_name << std::endl;
+ }
+ }
+ return 0;
+}
+
+static int do_show_journal_status(librados::IoCtx& io_ctx,
+ const std::string& journal_id, Formatter *f)
+{
+ int r;
+
+ C_SaferCond cond;
+ uint64_t minimum_set;
+ uint64_t active_set;
+ std::set<cls::journal::Client> registered_clients;
+ std::string oid = ::journal::Journaler::header_oid(journal_id);
+
+ cls::journal::client::get_mutable_metadata(io_ctx, oid, &minimum_set,
+ &active_set, &registered_clients,
+ &cond);
+ r = cond.wait();
+ if (r < 0) {
+ std::cerr << "warning: failed to get journal metadata" << std::endl;
+ return r;
+ }
+
+ if (f) {
+ f->open_object_section("status");
+ f->dump_unsigned("minimum_set", minimum_set);
+ f->dump_unsigned("active_set", active_set);
+ f->open_array_section("registered_clients");
+ for (std::set<cls::journal::Client>::iterator c =
+ registered_clients.begin(); c != registered_clients.end(); ++c) {
+ f->open_object_section("client");
+ c->dump(f);
+ f->close_section();
+ }
+ f->close_section();
+ f->close_section();
+ f->flush(std::cout);
+ } else {
+ std::cout << "minimum_set: " << minimum_set << std::endl;
+ std::cout << "active_set: " << active_set << std::endl;
+ std::cout << "registered clients: " << std::endl;
+ for (std::set<cls::journal::Client>::iterator c =
+ registered_clients.begin(); c != registered_clients.end(); ++c) {
+ std::cout << "\t" << *c << std::endl;
+ }
+ }
+ return 0;
+}
+
+static int do_reset_journal(librados::IoCtx& io_ctx,
+ const std::string& journal_id)
+{
+ // disable/re-enable journaling to delete/re-create the journal
+ // to properly handle mirroring constraints
+ std::string image_name;
+ int r = librbd::cls_client::dir_get_name(&io_ctx, RBD_DIRECTORY, journal_id,
+ &image_name);
+ if (r < 0) {
+ std::cerr << "failed to locate journal's image: " << cpp_strerror(r)
+ << std::endl;
+ return r;
+ }
+
+ librbd::Image image;
+ r = utils::open_image(io_ctx, image_name, false, &image);
+ if (r < 0) {
+ std::cerr << "failed to open image: " << cpp_strerror(r) << std::endl;
+ return r;
+ }
+
+ r = image.update_features(RBD_FEATURE_JOURNALING, false);
+ if (r < 0) {
+ std::cerr << "failed to disable image journaling: " << cpp_strerror(r)
+ << std::endl;
+ return r;
+ }
+
+ r = image.update_features(RBD_FEATURE_JOURNALING, true);
+ if (r < 0) {
+ std::cerr << "failed to re-enable image journaling: " << cpp_strerror(r)
+ << std::endl;
+ return r;
+ }
+ return 0;
+}
+
+static int do_disconnect_journal_client(librados::IoCtx& io_ctx,
+ const std::string& journal_id,
+ const std::string& client_id)
+{
+ int r;
+
+ C_SaferCond cond;
+ uint64_t minimum_set;
+ uint64_t active_set;
+ std::set<cls::journal::Client> registered_clients;
+ std::string oid = ::journal::Journaler::header_oid(journal_id);
+
+ cls::journal::client::get_mutable_metadata(io_ctx, oid, &minimum_set,
+ &active_set, &registered_clients,
+ &cond);
+ r = cond.wait();
+ if (r < 0) {
+ std::cerr << "warning: failed to get journal metadata" << std::endl;
+ return r;
+ }
+
+ static const std::string IMAGE_CLIENT_ID("");
+
+ bool found = false;
+ for (auto &c : registered_clients) {
+ if (c.id == IMAGE_CLIENT_ID || (!client_id.empty() && client_id != c.id)) {
+ continue;
+ }
+ r = cls::journal::client::client_update_state(io_ctx, oid, c.id,
+ cls::journal::CLIENT_STATE_DISCONNECTED);
+ if (r < 0) {
+ std::cerr << "warning: failed to disconnect client " << c.id << ": "
+ << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ std::cout << "client " << c.id << " disconnected" << std::endl;
+ found = true;
+ }
+
+ if (!found) {
+ if (!client_id.empty()) {
+ std::cerr << "warning: client " << client_id << " is not registered"
+ << std::endl;
+ } else {
+ std::cerr << "no registered clients to disconnect" << std::endl;
+ }
+ return -ENOENT;
+ }
+
+ bufferlist bl;
+ r = io_ctx.notify2(oid, bl, 5000, NULL);
+ if (r < 0) {
+ std::cerr << "warning: failed to notify state change:" << ": "
+ << cpp_strerror(r) << std::endl;
+ return r;
+ }
+
+ return 0;
+}
+
+class Journaler : public ::journal::Journaler {
+public:
+ Journaler(librados::IoCtx& io_ctx, const std::string& journal_id,
+ const std::string &client_id) :
+ ::journal::Journaler(io_ctx, journal_id, client_id, {}, nullptr) {
+ }
+
+ int init() {
+ int r;
+
+ // TODO register with librbd payload
+ r = register_client(bufferlist());
+ if (r < 0) {
+ std::cerr << "failed to register client: " << cpp_strerror(r)
+ << std::endl;
+ return r;
+ }
+
+ C_SaferCond cond;
+
+ ::journal::Journaler::init(&cond);
+ r = cond.wait();
+ if (r < 0) {
+ std::cerr << "failed to initialize journal: " << cpp_strerror(r)
+ << std::endl;
+ (void) unregister_client();
+ return r;
+ }
+
+ return 0;
+ }
+
+ int shut_down() {
+ int r = unregister_client();
+ if (r < 0) {
+ std::cerr << "rbd: failed to unregister journal client: "
+ << cpp_strerror(r) << std::endl;
+ }
+ ::journal::Journaler::shut_down();
+
+ return r;
+ }
+};
+
+class JournalPlayer {
+public:
+ JournalPlayer(librados::IoCtx& io_ctx, const std::string& journal_id,
+ const std::string &client_id) :
+ m_journaler(io_ctx, journal_id, client_id),
+ m_cond(),
+ m_r(0) {
+ }
+
+ virtual ~JournalPlayer() {}
+
+ virtual int exec() {
+ int r;
+
+ r = m_journaler.init();
+ if (r < 0) {
+ return r;
+ }
+
+ ReplayHandler replay_handler(this);
+
+ m_journaler.start_replay(&replay_handler);
+
+ r = m_cond.wait();
+ if (r < 0) {
+ std::cerr << "rbd: failed to process journal: " << cpp_strerror(r)
+ << std::endl;
+ if (m_r == 0) {
+ m_r = r;
+ }
+ }
+ return m_r;
+ }
+
+ int shut_down() {
+ return m_journaler.shut_down();
+ }
+
+protected:
+ struct ReplayHandler : public ::journal::ReplayHandler {
+ JournalPlayer *journal;
+ explicit ReplayHandler(JournalPlayer *_journal) : journal(_journal) {}
+
+ void handle_entries_available() override {
+ journal->handle_replay_ready();
+ }
+ void handle_complete(int r) override {
+ journal->handle_replay_complete(r);
+ }
+ };
+
+ void handle_replay_ready() {
+ int r = 0;
+ while (true) {
+ ::journal::ReplayEntry replay_entry;
+ uint64_t tag_id;
+ if (!m_journaler.try_pop_front(&replay_entry, &tag_id)) {
+ break;
+ }
+
+ r = process_entry(replay_entry, tag_id);
+ if (r < 0) {
+ break;
+ }
+ }
+ }
+
+ virtual int process_entry(::journal::ReplayEntry replay_entry,
+ uint64_t tag_id) = 0;
+
+ void handle_replay_complete(int r) {
+ if (m_r == 0 && r < 0) {
+ m_r = r;
+ }
+ m_journaler.stop_replay(&m_cond);
+ }
+
+ Journaler m_journaler;
+ C_SaferCond m_cond;
+ int m_r;
+};
+
+static int inspect_entry(bufferlist& data,
+ librbd::journal::EventEntry& event_entry,
+ bool verbose) {
+ try {
+ auto it = data.cbegin();
+ decode(event_entry, it);
+ } catch (const buffer::error &err) {
+ std::cerr << "failed to decode event entry: " << err.what() << std::endl;
+ return -EINVAL;
+ }
+ if (verbose) {
+ JSONFormatter f(true);
+ f.open_object_section("event_entry");
+ event_entry.dump(&f);
+ f.close_section();
+ f.flush(std::cout);
+ }
+ return 0;
+}
+
+class JournalInspector : public JournalPlayer {
+public:
+ JournalInspector(librados::IoCtx& io_ctx, const std::string& journal_id,
+ bool verbose) :
+ JournalPlayer(io_ctx, journal_id, "INSPECT"),
+ m_verbose(verbose),
+ m_s() {
+ }
+
+ int exec() override {
+ int r = JournalPlayer::exec();
+ m_s.print();
+ return r;
+ }
+
+private:
+ struct Stats {
+ Stats() : total(0), error(0) {}
+
+ void print() {
+ std::cout << "Summary:" << std::endl
+ << " " << total << " entries inspected, " << error << " errors"
+ << std::endl;
+ }
+
+ int total;
+ int error;
+ };
+
+ int process_entry(::journal::ReplayEntry replay_entry,
+ uint64_t tag_id) override {
+ m_s.total++;
+ if (m_verbose) {
+ std::cout << "Entry: tag_id=" << tag_id << ", commit_tid="
+ << replay_entry.get_commit_tid() << std::endl;
+ }
+ bufferlist data = replay_entry.get_data();
+ librbd::journal::EventEntry event_entry;
+ int r = inspect_entry(data, event_entry, m_verbose);
+ if (r < 0) {
+ m_r = r;
+ m_s.error++;
+ }
+ return 0;
+ }
+
+ bool m_verbose;
+ Stats m_s;
+};
+
+static int do_inspect_journal(librados::IoCtx& io_ctx,
+ const std::string& journal_id,
+ bool verbose) {
+ JournalInspector inspector(io_ctx, journal_id, verbose);
+ int r = inspector.exec();
+ if (r < 0) {
+ inspector.shut_down();
+ return r;
+ }
+
+ r = inspector.shut_down();
+ if (r < 0) {
+ return r;
+ }
+ return 0;
+}
+
+struct ExportEntry {
+ uint64_t tag_id;
+ uint64_t commit_tid;
+ int type;
+ bufferlist entry;
+
+ ExportEntry() : tag_id(0), commit_tid(0), type(0), entry() {}
+
+ ExportEntry(uint64_t tag_id, uint64_t commit_tid, int type,
+ const bufferlist& entry)
+ : tag_id(tag_id), commit_tid(commit_tid), type(type), entry(entry) {
+ }
+
+ void dump(Formatter *f) const {
+ ::encode_json("tag_id", tag_id, f);
+ ::encode_json("commit_tid", commit_tid, f);
+ ::encode_json("type", type, f);
+ ::encode_json("entry", entry, f);
+ }
+
+ void decode_json(JSONObj *obj) {
+ JSONDecoder::decode_json("tag_id", tag_id, obj);
+ JSONDecoder::decode_json("commit_tid", commit_tid, obj);
+ JSONDecoder::decode_json("type", type, obj);
+ JSONDecoder::decode_json("entry", entry, obj);
+ }
+};
+
+class JournalExporter : public JournalPlayer {
+public:
+ JournalExporter(librados::IoCtx& io_ctx, const std::string& journal_id,
+ int fd, bool no_error, bool verbose) :
+ JournalPlayer(io_ctx, journal_id, "EXPORT"),
+ m_journal_id(journal_id),
+ m_fd(fd),
+ m_no_error(no_error),
+ m_verbose(verbose),
+ m_s() {
+ }
+
+ int exec() override {
+ std::string header("# journal_id: " + m_journal_id + "\n");
+ int r;
+ r = safe_write(m_fd, header.c_str(), header.size());
+ if (r < 0) {
+ std::cerr << "rbd: failed to write to export file: " << cpp_strerror(r)
+ << std::endl;
+ return r;
+ }
+ r = JournalPlayer::exec();
+ m_s.print();
+ return r;
+ }
+
+private:
+ struct Stats {
+ Stats() : total(0), error(0) {}
+
+ void print() {
+ std::cout << total << " entries processed, " << error << " errors"
+ << std::endl;
+ }
+
+ int total;
+ int error;
+ };
+
+ int process_entry(::journal::ReplayEntry replay_entry,
+ uint64_t tag_id) override {
+ m_s.total++;
+ int type = -1;
+ bufferlist entry = replay_entry.get_data();
+ librbd::journal::EventEntry event_entry;
+ int r = inspect_entry(entry, event_entry, m_verbose);
+ if (r < 0) {
+ m_s.error++;
+ m_r = r;
+ return m_no_error ? 0 : r;
+ } else {
+ type = event_entry.get_event_type();
+ }
+ ExportEntry export_entry(tag_id, replay_entry.get_commit_tid(), type,
+ entry);
+ JSONFormatter f;
+ ::encode_json("event_entry", export_entry, &f);
+ std::ostringstream oss;
+ f.flush(oss);
+ std::string objstr = oss.str();
+ std::string header = stringify(objstr.size()) + " ";
+ r = safe_write(m_fd, header.c_str(), header.size());
+ if (r == 0) {
+ r = safe_write(m_fd, objstr.c_str(), objstr.size());
+ }
+ if (r == 0) {
+ r = safe_write(m_fd, "\n", 1);
+ }
+ if (r < 0) {
+ std::cerr << "rbd: failed to write to export file: " << cpp_strerror(r)
+ << std::endl;
+ m_s.error++;
+ return r;
+ }
+ return 0;
+ }
+
+ std::string m_journal_id;
+ int m_fd;
+ bool m_no_error;
+ bool m_verbose;
+ Stats m_s;
+};
+
+static int do_export_journal(librados::IoCtx& io_ctx,
+ const std::string& journal_id,
+ const std::string& path,
+ bool no_error, bool verbose) {
+ int r;
+ int fd;
+ bool to_stdout = path == "-";
+ if (to_stdout) {
+ fd = STDOUT_FILENO;
+ } else {
+ fd = open(path.c_str(), O_WRONLY | O_CREAT | O_EXCL | O_BINARY, 0644);
+ if (fd < 0) {
+ r = -errno;
+ std::cerr << "rbd: error creating " << path << std::endl;
+ return r;
+ }
+#ifdef HAVE_POSIX_FADVISE
+ posix_fadvise(fd, 0, 0, POSIX_FADV_SEQUENTIAL);
+#endif
+ }
+
+ JournalExporter exporter(io_ctx, journal_id, fd, no_error, verbose);
+ r = exporter.exec();
+
+ if (!to_stdout) {
+ close(fd);
+ }
+
+ int shut_down_r = exporter.shut_down();
+ if (r == 0 && shut_down_r < 0) {
+ r = shut_down_r;
+ }
+
+ return r;
+}
+
+class JournalImporter {
+public:
+ JournalImporter(librados::IoCtx& io_ctx, const std::string& journal_id,
+ int fd, bool no_error, bool verbose) :
+ m_journaler(io_ctx, journal_id, "IMPORT"),
+ m_fd(fd),
+ m_no_error(no_error),
+ m_verbose(verbose) {
+ }
+
+ bool read_entry(bufferlist& bl, int& r) {
+ // Entries are stored in the file using the following format:
+ //
+ // # Optional comments
+ // NNN {json encoded entry}
+ // ...
+ //
+ // Where NNN is the encoded entry size.
+ bl.clear();
+ char buf[80];
+ // Skip line feed and comments (lines started with #).
+ while ((r = safe_read_exact(m_fd, buf, 1)) == 0) {
+ if (buf[0] == '\n') {
+ continue;
+ } else if (buf[0] == '#') {
+ while ((r = safe_read_exact(m_fd, buf, 1)) == 0) {
+ if (buf[0] == '\n') {
+ break;
+ }
+ }
+ } else {
+ break;
+ }
+ }
+ if (r < 0) {
+ if (r == -EDOM) {
+ r = 0;
+ }
+ return false;
+ }
+ // Read entry size to buf.
+ if (!isdigit(buf[0])) {
+ r = -EINVAL;
+ std::cerr << "rbd: import data invalid format (digit expected)"
+ << std::endl;
+ return false;
+ }
+ for (size_t i = 1; i < sizeof(buf); i++) {
+ r = safe_read_exact(m_fd, buf + i, 1);
+ if (r < 0) {
+ std::cerr << "rbd: error reading import data" << std::endl;
+ return false;
+ }
+ if (!isdigit(buf[i])) {
+ if (buf[i] != ' ') {
+ r = -EINVAL;
+ std::cerr << "rbd: import data invalid format (space expected)"
+ << std::endl;
+ return false;
+ }
+ buf[i] = '\0';
+ break;
+ }
+ }
+ int entry_size = atoi(buf);
+ if (entry_size == 0) {
+ r = -EINVAL;
+ std::cerr << "rbd: import data invalid format (zero entry size)"
+ << std::endl;
+ return false;
+ }
+ ceph_assert(entry_size > 0);
+ // Read entry.
+ r = bl.read_fd(m_fd, entry_size);
+ if (r < 0) {
+ std::cerr << "rbd: error reading from stdin: " << cpp_strerror(r)
+ << std::endl;
+ return false;
+ }
+ if (r != entry_size) {
+ std::cerr << "rbd: error reading from stdin: truncated"
+ << std::endl;
+ r = -EINVAL;
+ return false;
+ }
+ r = 0;
+ return true;
+ }
+
+ int exec() {
+ int r = m_journaler.init();
+ if (r < 0) {
+ return r;
+ }
+ m_journaler.start_append(0);
+
+ int r1 = 0;
+ bufferlist bl;
+ int n = 0;
+ int error_count = 0;
+ while (read_entry(bl, r)) {
+ n++;
+ error_count++;
+ JSONParser p;
+ if (!p.parse(bl.c_str(), bl.length())) {
+ std::cerr << "rbd: error parsing input (entry " << n << ")"
+ << std::endl;
+ r = -EINVAL;
+ if (m_no_error) {
+ r1 = r;
+ continue;
+ } else {
+ break;
+ }
+ }
+ ExportEntry e;
+ try {
+ decode_json_obj(e, &p);
+ } catch (const JSONDecoder::err& err) {
+ std::cerr << "rbd: error json decoding import data (entry " << n << "):"
+ << err.what() << std::endl;
+ r = -EINVAL;
+ if (m_no_error) {
+ r1 = r;
+ continue;
+ } else {
+ break;
+ }
+ }
+ librbd::journal::EventEntry event_entry;
+ r = inspect_entry(e.entry, event_entry, m_verbose);
+ if (r < 0) {
+ std::cerr << "rbd: corrupted entry " << n << ": tag_tid=" << e.tag_id
+ << ", commit_tid=" << e.commit_tid << std::endl;
+ if (m_no_error) {
+ r1 = r;
+ continue;
+ } else {
+ break;
+ }
+ }
+ m_journaler.append(e.tag_id, e.entry);
+ error_count--;
+ }
+
+ std::cout << n << " entries processed, " << error_count << " errors" << std::endl;
+
+ std::cout << "Waiting for journal append to complete..." << std::endl;
+
+ C_SaferCond cond;
+ m_journaler.stop_append(&cond);
+ r = cond.wait();
+
+ if (r < 0) {
+ std::cerr << "failed to append journal: " << cpp_strerror(r) << std::endl;
+ }
+
+ if (r1 < 0 && r == 0) {
+ r = r1;
+ }
+ return r;
+ }
+
+ int shut_down() {
+ return m_journaler.shut_down();
+ }
+
+private:
+ Journaler m_journaler;
+ int m_fd;
+ bool m_no_error;
+ bool m_verbose;
+};
+
+static int do_import_journal(librados::IoCtx& io_ctx,
+ const std::string& journal_id,
+ const std::string& path,
+ bool no_error, bool verbose) {
+ int r;
+
+ int fd;
+ bool from_stdin = path == "-";
+ if (from_stdin) {
+ fd = STDIN_FILENO;
+ } else {
+ if ((fd = open(path.c_str(), O_RDONLY|O_BINARY)) < 0) {
+ r = -errno;
+ std::cerr << "rbd: error opening " << path << std::endl;
+ return r;
+ }
+#ifdef HAVE_POSIX_FADVISE
+ posix_fadvise(fd, 0, 0, POSIX_FADV_SEQUENTIAL);
+#endif
+ }
+
+ JournalImporter importer(io_ctx, journal_id, fd, no_error, verbose);
+ r = importer.exec();
+
+ if (!from_stdin) {
+ close(fd);
+ }
+
+ int shut_down_r = importer.shut_down();
+ if (r == 0 && shut_down_r < 0) {
+ r = shut_down_r;
+ }
+
+ return r;
+}
+
+void get_info_arguments(po::options_description *positional,
+ po::options_description *options) {
+ add_journal_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE);
+ at::add_format_options(options);
+}
+
+int execute_info(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string journal_name;
+ int r = get_pool_journal_names(vm, at::ARGUMENT_MODIFIER_NONE, &arg_index,
+ &pool_name, &namespace_name, &journal_name);
+ if (r < 0) {
+ return r;
+ }
+
+ at::Format::Formatter formatter;
+ r = utils::get_formatter(vm, &formatter);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ r = utils::init(pool_name, namespace_name, &rados, &io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ r = do_show_journal_info(rados, io_ctx, journal_name, formatter.get());
+ if (r < 0) {
+ std::cerr << "rbd: journal info: " << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ return 0;
+
+}
+
+void get_status_arguments(po::options_description *positional,
+ po::options_description *options) {
+ add_journal_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE);
+ at::add_format_options(options);
+}
+
+int execute_status(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string journal_name;
+ int r = get_pool_journal_names(vm, at::ARGUMENT_MODIFIER_NONE, &arg_index,
+ &pool_name, &namespace_name, &journal_name);
+ if (r < 0) {
+ return r;
+ }
+
+ at::Format::Formatter formatter;
+ r = utils::get_formatter(vm, &formatter);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ r = utils::init(pool_name, namespace_name, &rados, &io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ r = do_show_journal_status(io_ctx, journal_name, formatter.get());
+ if (r < 0) {
+ std::cerr << "rbd: journal status: " << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ return 0;
+}
+
+void get_reset_arguments(po::options_description *positional,
+ po::options_description *options) {
+ add_journal_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE);
+}
+
+int execute_reset(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string journal_name;
+ int r = get_pool_journal_names(vm, at::ARGUMENT_MODIFIER_NONE, &arg_index,
+ &pool_name, &namespace_name, &journal_name);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ r = utils::init(pool_name, namespace_name, &rados, &io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ r = do_reset_journal(io_ctx, journal_name);
+ if (r < 0) {
+ std::cerr << "rbd: journal reset: " << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ return 0;
+}
+
+void get_client_disconnect_arguments(po::options_description *positional,
+ po::options_description *options) {
+ add_journal_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE);
+ options->add_options()
+ ("client-id", po::value<std::string>(),
+ "client ID (or leave unspecified to disconnect all)");
+}
+
+int execute_client_disconnect(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string journal_name;
+ int r = get_pool_journal_names(vm, at::ARGUMENT_MODIFIER_NONE, &arg_index,
+ &pool_name, &namespace_name, &journal_name);
+ if (r < 0) {
+ return r;
+ }
+
+ std::string client_id;
+ if (vm.count("client-id")) {
+ client_id = vm["client-id"].as<std::string>();
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ r = utils::init(pool_name, namespace_name, &rados, &io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ r = do_disconnect_journal_client(io_ctx, journal_name, client_id);
+ if (r < 0) {
+ std::cerr << "rbd: journal client disconnect: " << cpp_strerror(r)
+ << std::endl;
+ return r;
+ }
+ return 0;
+}
+
+void get_inspect_arguments(po::options_description *positional,
+ po::options_description *options) {
+ add_journal_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE);
+ at::add_verbose_option(options);
+}
+
+int execute_inspect(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string journal_name;
+ int r = get_pool_journal_names(vm, at::ARGUMENT_MODIFIER_NONE, &arg_index,
+ &pool_name, &namespace_name, &journal_name);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ r = utils::init(pool_name, namespace_name, &rados, &io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ r = do_inspect_journal(io_ctx, journal_name, vm[at::VERBOSE].as<bool>());
+ if (r < 0) {
+ std::cerr << "rbd: journal inspect: " << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ return 0;
+}
+
+void get_export_arguments(po::options_description *positional,
+ po::options_description *options) {
+ add_journal_spec_options(positional, options,
+ at::ARGUMENT_MODIFIER_SOURCE);
+ at::add_path_options(positional, options,
+ "export file (or '-' for stdout)");
+ at::add_verbose_option(options);
+ at::add_no_error_option(options);
+}
+
+int execute_export(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string journal_name;
+ int r = get_pool_journal_names(vm, at::ARGUMENT_MODIFIER_SOURCE, &arg_index,
+ &pool_name, &namespace_name, &journal_name);
+ if (r < 0) {
+ return r;
+ }
+
+ std::string path;
+ r = utils::get_path(vm, &arg_index, &path);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ r = utils::init(pool_name, namespace_name, &rados, &io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ r = do_export_journal(io_ctx, journal_name, path, vm[at::NO_ERR].as<bool>(),
+ vm[at::VERBOSE].as<bool>());
+ if (r < 0) {
+ std::cerr << "rbd: journal export: " << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ return 0;
+}
+
+void get_import_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_path_options(positional, options,
+ "import file (or '-' for stdin)");
+ add_journal_spec_options(positional, options, at::ARGUMENT_MODIFIER_DEST);
+ at::add_verbose_option(options);
+ at::add_no_error_option(options);
+}
+
+int execute_import(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ std::string path;
+ size_t arg_index = 0;
+ int r = utils::get_path(vm, &arg_index, &path);
+ if (r < 0) {
+ return r;
+ }
+
+ std::string pool_name;
+ std::string namespace_name;
+ std::string journal_name;
+ r = get_pool_journal_names(vm, at::ARGUMENT_MODIFIER_DEST, &arg_index,
+ &pool_name, &namespace_name, &journal_name);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ r = utils::init(pool_name, namespace_name, &rados, &io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ r = do_import_journal(io_ctx, journal_name, path, vm[at::NO_ERR].as<bool>(),
+ vm[at::VERBOSE].as<bool>());
+ if (r < 0) {
+ std::cerr << "rbd: journal import: " << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ return 0;
+}
+
+Shell::Action action_info(
+ {"journal", "info"}, {}, "Show information about image journal.", "",
+ &get_info_arguments, &execute_info);
+
+Shell::Action action_status(
+ {"journal", "status"}, {}, "Show status of image journal.", "",
+ &get_status_arguments, &execute_status);
+
+Shell::Action action_reset(
+ {"journal", "reset"}, {}, "Reset image journal.", "",
+ &get_reset_arguments, &execute_reset);
+
+Shell::Action action_inspect(
+ {"journal", "inspect"}, {}, "Inspect image journal for structural errors.", "",
+ &get_inspect_arguments, &execute_inspect);
+
+Shell::Action action_export(
+ {"journal", "export"}, {}, "Export image journal.", "",
+ &get_export_arguments, &execute_export);
+
+Shell::Action action_import(
+ {"journal", "import"}, {}, "Import image journal.", "",
+ &get_import_arguments, &execute_import);
+
+Shell::Action action_disconnect(
+ {"journal", "client", "disconnect"}, {},
+ "Flag image journal client as disconnected.", "",
+ &get_client_disconnect_arguments, &execute_client_disconnect);
+
+} // namespace journal
+} // namespace action
+} // namespace rbd
diff --git a/src/tools/rbd/action/Kernel.cc b/src/tools/rbd/action/Kernel.cc
new file mode 100644
index 000000000..117f9492d
--- /dev/null
+++ b/src/tools/rbd/action/Kernel.cc
@@ -0,0 +1,679 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "acconfig.h"
+#include "tools/rbd/ArgumentTypes.h"
+#include "tools/rbd/Shell.h"
+#include "tools/rbd/Utils.h"
+#include "include/krbd.h"
+#include "include/stringify.h"
+#include "include/uuid.h"
+#include "common/config_proxy.h"
+#include "common/errno.h"
+#include "common/safe_io.h"
+#include "common/strtol.h"
+#include "common/Formatter.h"
+#include "msg/msg_types.h"
+#include "global/global_context.h"
+#include <iostream>
+#include <boost/algorithm/string/predicate.hpp>
+#include <boost/scope_exit.hpp>
+#include <boost/program_options.hpp>
+
+namespace rbd {
+namespace action {
+namespace kernel {
+
+namespace at = argument_types;
+namespace po = boost::program_options;
+
+typedef std::map<std::string, std::string> MapOptions;
+
+static std::string map_option_uuid_cb(const char *value_char)
+{
+ uuid_d u;
+ if (!u.parse(value_char))
+ return "";
+
+ return stringify(u);
+}
+
+static std::string map_option_ip_cb(const char *value_char)
+{
+ entity_addr_t a;
+ if (!a.parse(value_char)) {
+ return "";
+ }
+
+ return stringify(a.get_sockaddr());
+}
+
+static std::string map_option_int_cb(const char *value_char)
+{
+ std::string err;
+ int d = strict_strtol(value_char, 10, &err);
+ if (!err.empty() || d < 0)
+ return "";
+
+ return stringify(d);
+}
+
+static std::string map_option_string_cb(const char *value_char)
+{
+ return value_char;
+}
+
+static std::string map_option_read_from_replica_cb(const char *value_char)
+{
+ if (!strcmp(value_char, "no") || !strcmp(value_char, "balance") ||
+ !strcmp(value_char, "localize")) {
+ return value_char;
+ }
+ return "";
+}
+
+static std::string map_option_compression_hint_cb(const char *value_char)
+{
+ if (!strcmp(value_char, "none") || !strcmp(value_char, "compressible") ||
+ !strcmp(value_char, "incompressible")) {
+ return value_char;
+ }
+ return "";
+}
+
+static std::string map_option_ms_mode_cb(const char *value_char)
+{
+ if (!strcmp(value_char, "legacy") || !strcmp(value_char, "crc") ||
+ !strcmp(value_char, "secure") || !strcmp(value_char, "prefer-crc") ||
+ !strcmp(value_char, "prefer-secure")) {
+ return value_char;
+ }
+ return "";
+}
+
+static void put_map_option(const std::string &key, const std::string &val,
+ MapOptions* map_options)
+{
+ (*map_options)[key] = val;
+}
+
+static int put_map_option_value(const std::string &opt, const char *value_char,
+ std::string (*parse_cb)(const char *),
+ MapOptions* map_options)
+{
+ if (!value_char || *value_char == '\0') {
+ std::cerr << "rbd: " << opt << " option requires a value" << std::endl;
+ return -EINVAL;
+ }
+
+ std::string value = parse_cb(value_char);
+ if (value.empty()) {
+ std::cerr << "rbd: invalid " << opt << " value '" << value_char << "'"
+ << std::endl;
+ return -EINVAL;
+ }
+
+ put_map_option(opt, opt + "=" + value, map_options);
+ return 0;
+}
+
+static int parse_map_options(const std::string &options_string,
+ MapOptions* map_options)
+{
+ char *options = strdup(options_string.c_str());
+ BOOST_SCOPE_EXIT(options) {
+ free(options);
+ } BOOST_SCOPE_EXIT_END;
+
+ for (char *this_char = strtok(options, ", ");
+ this_char != NULL;
+ this_char = strtok(NULL, ",")) {
+ char *value_char;
+
+ if ((value_char = strchr(this_char, '=')) != NULL)
+ *value_char++ = '\0';
+
+ if (!strcmp(this_char, "fsid")) {
+ if (put_map_option_value("fsid", value_char, map_option_uuid_cb,
+ map_options))
+ return -EINVAL;
+ } else if (!strcmp(this_char, "ip")) {
+ if (put_map_option_value("ip", value_char, map_option_ip_cb,
+ map_options))
+ return -EINVAL;
+ } else if (!strcmp(this_char, "share") || !strcmp(this_char, "noshare")) {
+ put_map_option("share", this_char, map_options);
+ } else if (!strcmp(this_char, "crc") || !strcmp(this_char, "nocrc")) {
+ put_map_option("crc", this_char, map_options);
+ } else if (!strcmp(this_char, "cephx_require_signatures") ||
+ !strcmp(this_char, "nocephx_require_signatures")) {
+ put_map_option("cephx_require_signatures", this_char, map_options);
+ } else if (!strcmp(this_char, "tcp_nodelay") ||
+ !strcmp(this_char, "notcp_nodelay")) {
+ put_map_option("tcp_nodelay", this_char, map_options);
+ } else if (!strcmp(this_char, "cephx_sign_messages") ||
+ !strcmp(this_char, "nocephx_sign_messages")) {
+ put_map_option("cephx_sign_messages", this_char, map_options);
+ } else if (!strcmp(this_char, "mount_timeout")) {
+ if (put_map_option_value("mount_timeout", value_char, map_option_int_cb,
+ map_options))
+ return -EINVAL;
+ } else if (!strcmp(this_char, "osd_request_timeout")) {
+ if (put_map_option_value("osd_request_timeout", value_char,
+ map_option_int_cb, map_options))
+ return -EINVAL;
+ } else if (!strcmp(this_char, "lock_timeout")) {
+ if (put_map_option_value("lock_timeout", value_char, map_option_int_cb,
+ map_options))
+ return -EINVAL;
+ } else if (!strcmp(this_char, "osdkeepalive")) {
+ if (put_map_option_value("osdkeepalive", value_char, map_option_int_cb,
+ map_options))
+ return -EINVAL;
+ } else if (!strcmp(this_char, "osd_idle_ttl")) {
+ if (put_map_option_value("osd_idle_ttl", value_char, map_option_int_cb,
+ map_options))
+ return -EINVAL;
+ } else if (!strcmp(this_char, "rw") || !strcmp(this_char, "ro")) {
+ put_map_option("rw", this_char, map_options);
+ } else if (!strcmp(this_char, "queue_depth")) {
+ if (put_map_option_value("queue_depth", value_char, map_option_int_cb,
+ map_options))
+ return -EINVAL;
+ } else if (!strcmp(this_char, "lock_on_read")) {
+ put_map_option("lock_on_read", this_char, map_options);
+ } else if (!strcmp(this_char, "exclusive")) {
+ put_map_option("exclusive", this_char, map_options);
+ } else if (!strcmp(this_char, "notrim")) {
+ put_map_option("notrim", this_char, map_options);
+ } else if (!strcmp(this_char, "abort_on_full")) {
+ put_map_option("abort_on_full", this_char, map_options);
+ } else if (!strcmp(this_char, "alloc_size")) {
+ if (put_map_option_value("alloc_size", value_char, map_option_int_cb,
+ map_options))
+ return -EINVAL;
+ } else if (!strcmp(this_char, "crush_location")) {
+ if (put_map_option_value("crush_location", value_char,
+ map_option_string_cb, map_options))
+ return -EINVAL;
+ } else if (!strcmp(this_char, "read_from_replica")) {
+ if (put_map_option_value("read_from_replica", value_char,
+ map_option_read_from_replica_cb, map_options))
+ return -EINVAL;
+ } else if (!strcmp(this_char, "compression_hint")) {
+ if (put_map_option_value("compression_hint", value_char,
+ map_option_compression_hint_cb, map_options))
+ return -EINVAL;
+ } else if (!strcmp(this_char, "ms_mode")) {
+ if (put_map_option_value("ms_mode", value_char, map_option_ms_mode_cb,
+ map_options))
+ return -EINVAL;
+ } else if (!strcmp(this_char, "rxbounce")) {
+ put_map_option("rxbounce", this_char, map_options);
+ } else if (!strcmp(this_char, "udev") || !strcmp(this_char, "noudev")) {
+ put_map_option("udev", this_char, map_options);
+ } else {
+ std::cerr << "rbd: unknown map option '" << this_char << "'" << std::endl;
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static int parse_unmap_options(const std::string &options_string,
+ MapOptions* unmap_options)
+{
+ char *options = strdup(options_string.c_str());
+ BOOST_SCOPE_EXIT(options) {
+ free(options);
+ } BOOST_SCOPE_EXIT_END;
+
+ for (char *this_char = strtok(options, ", ");
+ this_char != NULL;
+ this_char = strtok(NULL, ",")) {
+ char *value_char;
+
+ if ((value_char = strchr(this_char, '=')) != NULL)
+ *value_char++ = '\0';
+
+ if (!strcmp(this_char, "force")) {
+ put_map_option("force", this_char, unmap_options);
+ } else if (!strcmp(this_char, "udev") || !strcmp(this_char, "noudev")) {
+ put_map_option("udev", this_char, unmap_options);
+ } else {
+ std::cerr << "rbd: unknown unmap option '" << this_char << "'"
+ << std::endl;
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static int do_kernel_list(Formatter *f) {
+#if defined(WITH_KRBD)
+ struct krbd_ctx *krbd;
+ int r;
+
+ r = krbd_create_from_context(g_ceph_context, 0, &krbd);
+ if (r < 0)
+ return r;
+
+ r = krbd_showmapped(krbd, f);
+
+ krbd_destroy(krbd);
+ return r;
+#else
+ std::cerr << "rbd: kernel device is not supported" << std::endl;
+ return -EOPNOTSUPP;
+#endif
+}
+
+static int get_unsupported_features(librbd::Image &image,
+ uint64_t *unsupported_features)
+{
+ char buf[20];
+ uint64_t features, supported_features;
+ int r;
+
+ r = safe_read_file("/sys/bus/rbd/", "supported_features", buf,
+ sizeof(buf) - 1);
+ if (r < 0)
+ return r;
+
+ buf[r] = '\0';
+ try {
+ supported_features = std::stoull(buf, nullptr, 16);
+ } catch (...) {
+ return -EINVAL;
+ }
+
+ r = image.features(&features);
+ if (r < 0)
+ return r;
+
+ *unsupported_features = features & ~supported_features;
+ return 0;
+}
+
+/*
+ * hint user to check syslog for krbd related messages and provide suggestions
+ * based on errno return by krbd_map(). also note that even if some librbd calls
+ * fail, we at least dump the "try dmesg..." message to aid debugging.
+ */
+static void print_error_description(const char *poolname,
+ const char *nspace_name,
+ const char *imgname,
+ const char *snapname,
+ int maperrno)
+{
+ int r;
+ uint8_t oldformat;
+ librados::Rados rados;
+ librados::IoCtx ioctx;
+ librbd::Image image;
+
+ if (maperrno == -ENOENT)
+ goto done;
+
+ r = utils::init_and_open_image(poolname, nspace_name, imgname, "", snapname,
+ true, &rados, &ioctx, &image);
+ if (r < 0)
+ goto done;
+
+ r = image.old_format(&oldformat);
+ if (r < 0)
+ goto done;
+
+ /*
+ * kernel returns -ENXIO when mapping a V2 image due to unsupported feature
+ * set - so, hint about that too...
+ */
+ if (!oldformat && (maperrno == -ENXIO)) {
+ uint64_t unsupported_features;
+ bool need_terminate = true;
+
+ std::cout << "RBD image feature set mismatch. ";
+ r = get_unsupported_features(image, &unsupported_features);
+ if (r == 0 && (unsupported_features & ~RBD_FEATURES_ALL) == 0) {
+ uint64_t immutable = RBD_FEATURES_ALL & ~(RBD_FEATURES_MUTABLE |
+ RBD_FEATURES_DISABLE_ONLY);
+ if (unsupported_features & immutable) {
+ std::cout << "This image cannot be mapped because the following "
+ << "immutable features are unsupported by the kernel:";
+ unsupported_features &= immutable;
+ need_terminate = false;
+ } else {
+ std::cout << "You can disable features unsupported by the kernel "
+ << "with \"rbd feature disable ";
+ if (poolname != utils::get_default_pool_name() || *nspace_name) {
+ std::cout << poolname << "/";
+ }
+ if (*nspace_name) {
+ std::cout << nspace_name << "/";
+ }
+ std::cout << imgname;
+ }
+ } else {
+ std::cout << "Try disabling features unsupported by the kernel "
+ << "with \"rbd feature disable";
+ unsupported_features = 0;
+ }
+ for (auto it : at::ImageFeatures::FEATURE_MAPPING) {
+ if (it.first & unsupported_features) {
+ std::cout << " " << it.second;
+ }
+ }
+ if (need_terminate)
+ std::cout << "\"";
+ std::cout << "." << std::endl;
+ }
+
+ done:
+ std::cout << "In some cases useful info is found in syslog - try \"dmesg | tail\"." << std::endl;
+}
+
+static int do_kernel_map(const char *poolname, const char *nspace_name,
+ const char *imgname, const char *snapname,
+ MapOptions&& map_options)
+{
+#if defined(WITH_KRBD)
+ struct krbd_ctx *krbd;
+ std::ostringstream oss;
+ uint32_t flags = 0;
+ char *devnode;
+ int r;
+
+ for (auto it = map_options.begin(); it != map_options.end(); ) {
+ // for compatibility with < 3.7 kernels, assume that rw is on by
+ // default and omit it even if it was specified by the user
+ // (see ceph.git commit fb0f1986449b)
+ if (it->first == "rw" && it->second == "rw") {
+ it = map_options.erase(it);
+ } else if (it->first == "udev") {
+ if (it->second == "noudev") {
+ flags |= KRBD_CTX_F_NOUDEV;
+ }
+ it = map_options.erase(it);
+ } else {
+ if (it != map_options.begin())
+ oss << ",";
+ oss << it->second;
+ ++it;
+ }
+ }
+
+ r = krbd_create_from_context(g_ceph_context, flags, &krbd);
+ if (r < 0)
+ return r;
+
+ r = krbd_is_mapped(krbd, poolname, nspace_name, imgname, snapname, &devnode);
+ if (r < 0) {
+ std::cerr << "rbd: warning: can't get image map information: "
+ << cpp_strerror(r) << std::endl;
+ } else if (r > 0) {
+ std::cerr << "rbd: warning: image already mapped as " << devnode
+ << std::endl;
+ free(devnode);
+ }
+
+ r = krbd_map(krbd, poolname, nspace_name, imgname, snapname,
+ oss.str().c_str(), &devnode);
+ if (r < 0) {
+ print_error_description(poolname, nspace_name, imgname, snapname, r);
+ goto out;
+ }
+
+ std::cout << devnode << std::endl;
+
+ free(devnode);
+out:
+ krbd_destroy(krbd);
+ return r;
+#else
+ std::cerr << "rbd: kernel device is not supported" << std::endl;
+ return -EOPNOTSUPP;
+#endif
+}
+
+static int do_kernel_unmap(const char *dev, const char *poolname,
+ const char *nspace_name, const char *imgname,
+ const char *snapname, MapOptions&& unmap_options)
+{
+#if defined(WITH_KRBD)
+ struct krbd_ctx *krbd;
+ std::ostringstream oss;
+ uint32_t flags = 0;
+ int r;
+
+ for (auto it = unmap_options.begin(); it != unmap_options.end(); ) {
+ if (it->first == "udev") {
+ if (it->second == "noudev") {
+ flags |= KRBD_CTX_F_NOUDEV;
+ }
+ it = unmap_options.erase(it);
+ } else {
+ if (it != unmap_options.begin())
+ oss << ",";
+ oss << it->second;
+ ++it;
+ }
+ }
+
+ r = krbd_create_from_context(g_ceph_context, flags, &krbd);
+ if (r < 0)
+ return r;
+
+ if (dev)
+ r = krbd_unmap(krbd, dev, oss.str().c_str());
+ else
+ r = krbd_unmap_by_spec(krbd, poolname, nspace_name, imgname, snapname,
+ oss.str().c_str());
+
+ krbd_destroy(krbd);
+ return r;
+#else
+ std::cerr << "rbd: kernel device is not supported" << std::endl;
+ return -EOPNOTSUPP;
+#endif
+}
+
+int execute_list(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ at::Format::Formatter formatter;
+ int r = utils::get_formatter(vm, &formatter);
+ if (r < 0) {
+ return r;
+ }
+
+ utils::init_context();
+
+ r = do_kernel_list(formatter.get());
+ if (r < 0) {
+ std::cerr << "rbd: device list failed: " << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ return 0;
+}
+
+int execute_map(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string nspace_name;
+ std::string image_name;
+ std::string snap_name;
+ int r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &nspace_name,
+ &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_PERMITTED,
+ utils::SPEC_VALIDATION_NONE);
+ if (r < 0) {
+ return r;
+ }
+
+ MapOptions map_options;
+ if (vm.count("options")) {
+ for (auto &options : vm["options"].as<std::vector<std::string>>()) {
+ r = parse_map_options(options, &map_options);
+ if (r < 0) {
+ std::cerr << "rbd: couldn't parse map options" << std::endl;
+ return r;
+ }
+ }
+ }
+
+ // parse options common to all device types after parsing krbd-specific
+ // options so that common options win (in particular "-o rw --read-only"
+ // should result in read-only mapping)
+ if (vm["read-only"].as<bool>()) {
+ put_map_option("rw", "ro", &map_options);
+ }
+ if (vm["exclusive"].as<bool>()) {
+ put_map_option("exclusive", "exclusive", &map_options);
+ }
+ if (vm["quiesce"].as<bool>()) {
+ std::cerr << "rbd: warning: quiesce is not supported" << std::endl;
+ }
+ if (vm.count("quiesce-hook")) {
+ std::cerr << "rbd: warning: quiesce-hook is not supported" << std::endl;
+ }
+
+ // connect to the cluster to get the default pool and the default map
+ // options
+ librados::Rados rados;
+ r = utils::init_rados(&rados);
+ if (r < 0) {
+ return r;
+ }
+
+ utils::normalize_pool_name(&pool_name);
+
+ librados::IoCtx ioctx;
+ librbd::Image image;
+ r = utils::init_io_ctx(rados, pool_name, nspace_name, &ioctx);
+ if (r < 0) {
+ return r;
+ }
+
+ r = utils::open_image(ioctx, image_name, true, &image);
+ if (r < 0) {
+ return r;
+ }
+
+ MapOptions default_map_options;
+ std::vector<librbd::config_option_t> options;
+ image.config_list(&options);
+ for (const auto &option : options) {
+ if (option.name == "rbd_default_map_options") {
+ r = parse_map_options(option.value, &default_map_options);
+ if (r < 0) {
+ std::cerr << "rbd: couldn't parse default map options" << std::endl;
+ return r;
+ }
+
+ break;
+ }
+ }
+
+ for (auto& [key, value] : default_map_options) {
+ if (map_options.count(key) == 0) {
+ map_options[key] = value;
+ }
+ }
+
+ r = do_kernel_map(pool_name.c_str(), nspace_name.c_str(), image_name.c_str(),
+ snap_name.c_str(), std::move(map_options));
+ if (r < 0) {
+ std::cerr << "rbd: map failed: " << cpp_strerror(r) << std::endl;
+ return r;
+ }
+
+ return 0;
+}
+
+int execute_unmap(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ std::string device_name = utils::get_positional_argument(vm, 0);
+ if (!boost::starts_with(device_name, "/dev/")) {
+ device_name.clear();
+ }
+
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string nspace_name;
+ std::string image_name;
+ std::string snap_name;
+ int r;
+ if (device_name.empty()) {
+ r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &nspace_name,
+ &image_name, &snap_name, false, utils::SNAPSHOT_PRESENCE_PERMITTED,
+ utils::SPEC_VALIDATION_NONE);
+ if (r < 0) {
+ return r;
+ }
+ }
+
+ if (device_name.empty() && image_name.empty()) {
+ std::cerr << "rbd: unmap requires either image name or device path"
+ << std::endl;
+ return -EINVAL;
+ }
+
+ MapOptions unmap_options;
+ if (vm.count("options")) {
+ for (auto &options : vm["options"].as<std::vector<std::string>>()) {
+ r = parse_unmap_options(options, &unmap_options);
+ if (r < 0) {
+ std::cerr << "rbd: couldn't parse unmap options" << std::endl;
+ return r;
+ }
+ }
+ }
+
+ if (device_name.empty() && pool_name.empty()) {
+ // connect to the cluster to get the default pool
+ librados::Rados rados;
+ r = utils::init_rados(&rados);
+ if (r < 0) {
+ return r;
+ }
+
+ utils::normalize_pool_name(&pool_name);
+ }
+
+ r = do_kernel_unmap(device_name.empty() ? nullptr : device_name.c_str(),
+ pool_name.c_str(), nspace_name.c_str(),
+ image_name.c_str(), snap_name.c_str(),
+ std::move(unmap_options));
+ if (r < 0) {
+ std::cerr << "rbd: unmap failed: " << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ return 0;
+}
+
+int execute_attach(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+#if defined(WITH_KRBD)
+ std::cerr << "rbd: krbd does not support attach" << std::endl;
+#else
+ std::cerr << "rbd: kernel device is not supported" << std::endl;
+#endif
+ return -EOPNOTSUPP;
+}
+
+int execute_detach(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+#if defined(WITH_KRBD)
+ std::cerr << "rbd: krbd does not support detach" << std::endl;
+#else
+ std::cerr << "rbd: kernel device is not supported" << std::endl;
+#endif
+ return -EOPNOTSUPP;
+}
+
+} // namespace kernel
+} // namespace action
+} // namespace rbd
diff --git a/src/tools/rbd/action/List.cc b/src/tools/rbd/action/List.cc
new file mode 100644
index 000000000..8fdc3c1a7
--- /dev/null
+++ b/src/tools/rbd/action/List.cc
@@ -0,0 +1,346 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd/ArgumentTypes.h"
+#include "tools/rbd/Shell.h"
+#include "tools/rbd/Utils.h"
+#include "include/Context.h"
+#include "include/stringify.h"
+#include "include/types.h"
+#include "common/errno.h"
+#include "common/Formatter.h"
+#include "common/TextTable.h"
+#include <iostream>
+#include <boost/bind/bind.hpp>
+#include <boost/program_options.hpp>
+#include "global/global_context.h"
+
+namespace rbd {
+
+namespace action {
+namespace list {
+
+namespace at = argument_types;
+namespace po = boost::program_options;
+using namespace boost::placeholders;
+
+enum WorkerState {
+ STATE_IDLE = 0,
+ STATE_OPENED,
+ STATE_DONE
+} ;
+
+struct WorkerEntry {
+ librbd::Image img;
+ librbd::RBD::AioCompletion* completion;
+ WorkerState state;
+ std::string name;
+ std::string id;
+
+ WorkerEntry() {
+ state = STATE_IDLE;
+ completion = nullptr;
+ }
+};
+
+
+int list_process_image(librados::Rados* rados, WorkerEntry* w, bool lflag, Formatter *f, TextTable &tbl)
+{
+ int r = 0;
+ librbd::image_info_t info;
+ std::string parent;
+
+ // handle second-nth trips through loop
+ librbd::linked_image_spec_t parent_image_spec;
+ librbd::snap_spec_t parent_snap_spec;
+ r = w->img.get_parent(&parent_image_spec, &parent_snap_spec);
+ if (r < 0 && r != -ENOENT) {
+ return r;
+ }
+
+ bool has_parent = false;
+ if (r != -ENOENT) {
+ parent = parent_image_spec.pool_name + "/";
+ if (!parent_image_spec.pool_namespace.empty()) {
+ parent += parent_image_spec.pool_namespace + "/";
+ }
+ parent += parent_image_spec.image_name + "@" + parent_snap_spec.name;
+ has_parent = true;
+ }
+
+ if (w->img.stat(info, sizeof(info)) < 0) {
+ return -EINVAL;
+ }
+
+ uint8_t old_format;
+ w->img.old_format(&old_format);
+
+ std::list<librbd::locker_t> lockers;
+ bool exclusive;
+ r = w->img.list_lockers(&lockers, &exclusive, NULL);
+ if (r < 0)
+ return r;
+ std::string lockstr;
+ if (!lockers.empty()) {
+ lockstr = (exclusive) ? "excl" : "shr";
+ }
+
+ if (f) {
+ f->open_object_section("image");
+ f->dump_string("image", w->name);
+ f->dump_string("id", w->id);
+ f->dump_unsigned("size", info.size);
+ if (has_parent) {
+ f->open_object_section("parent");
+ f->dump_string("pool", parent_image_spec.pool_name);
+ f->dump_string("pool_namespace", parent_image_spec.pool_namespace);
+ f->dump_string("image", parent_image_spec.image_name);
+ f->dump_string("snapshot", parent_snap_spec.name);
+ f->close_section();
+ }
+ f->dump_int("format", old_format ? 1 : 2);
+ if (!lockers.empty())
+ f->dump_string("lock_type", exclusive ? "exclusive" : "shared");
+ f->close_section();
+ } else {
+ tbl << w->name
+ << stringify(byte_u_t(info.size))
+ << parent
+ << ((old_format) ? '1' : '2')
+ << "" // protect doesn't apply to images
+ << lockstr
+ << TextTable::endrow;
+ }
+
+ std::vector<librbd::snap_info_t> snaplist;
+ if (w->img.snap_list(snaplist) >= 0 && !snaplist.empty()) {
+ snaplist.erase(remove_if(snaplist.begin(),
+ snaplist.end(),
+ boost::bind(utils::is_not_user_snap_namespace, &w->img, _1)),
+ snaplist.end());
+ for (std::vector<librbd::snap_info_t>::iterator s = snaplist.begin();
+ s != snaplist.end(); ++s) {
+ bool is_protected;
+ bool has_parent = false;
+ parent.clear();
+ w->img.snap_set(s->name.c_str());
+ r = w->img.snap_is_protected(s->name.c_str(), &is_protected);
+ if (r < 0)
+ return r;
+ if (w->img.get_parent(&parent_image_spec, &parent_snap_spec) >= 0) {
+ parent = parent_image_spec.pool_name + "/";
+ if (!parent_image_spec.pool_namespace.empty()) {
+ parent += parent_image_spec.pool_namespace + "/";
+ }
+ parent += parent_image_spec.image_name + "@" + parent_snap_spec.name;
+ has_parent = true;
+ }
+ if (f) {
+ f->open_object_section("snapshot");
+ f->dump_string("image", w->name);
+ f->dump_string("id", w->id);
+ f->dump_string("snapshot", s->name);
+ f->dump_unsigned("snapshot_id", s->id);
+ f->dump_unsigned("size", s->size);
+ if (has_parent) {
+ f->open_object_section("parent");
+ f->dump_string("pool", parent_image_spec.pool_name);
+ f->dump_string("pool_namespace", parent_image_spec.pool_namespace);
+ f->dump_string("image", parent_image_spec.image_name);
+ f->dump_string("snapshot", parent_snap_spec.name);
+ f->close_section();
+ }
+ f->dump_int("format", old_format ? 1 : 2);
+ f->dump_string("protected", is_protected ? "true" : "false");
+ f->close_section();
+ } else {
+ tbl << w->name + "@" + s->name
+ << stringify(byte_u_t(s->size))
+ << parent
+ << ((old_format) ? '1' : '2')
+ << (is_protected ? "yes" : "")
+ << "" // locks don't apply to snaps
+ << TextTable::endrow;
+ }
+ }
+ }
+
+ return 0;
+}
+
+int do_list(const std::string &pool_name, const std::string& namespace_name,
+ bool lflag, Formatter *f) {
+ std::vector<WorkerEntry*> workers;
+ std::vector<librbd::image_spec_t> images;
+ librados::Rados rados;
+ librbd::RBD rbd;
+ librados::IoCtx ioctx;
+
+ int r = utils::init(pool_name, namespace_name, &rados, &ioctx);
+ if (r < 0) {
+ return r;
+ }
+
+ int threads = g_conf().get_val<uint64_t>("rbd_concurrent_management_ops");
+ if (threads < 1) {
+ threads = 1;
+ }
+ if (threads > 32) {
+ threads = 32;
+ }
+
+ utils::disable_cache();
+
+ r = rbd.list2(ioctx, &images);
+ if (r < 0)
+ return r;
+
+ if (!lflag) {
+ if (f)
+ f->open_array_section("images");
+ for (auto& image : images) {
+ if (f)
+ f->dump_string("name", image.name);
+ else
+ std::cout << image.name << std::endl;
+ }
+ if (f) {
+ f->close_section();
+ f->flush(std::cout);
+ }
+ return 0;
+ }
+
+ TextTable tbl;
+
+ if (f) {
+ f->open_array_section("images");
+ } else {
+ tbl.define_column("NAME", TextTable::LEFT, TextTable::LEFT);
+ tbl.define_column("SIZE", TextTable::LEFT, TextTable::RIGHT);
+ tbl.define_column("PARENT", TextTable::LEFT, TextTable::LEFT);
+ tbl.define_column("FMT", TextTable::LEFT, TextTable::RIGHT);
+ tbl.define_column("PROT", TextTable::LEFT, TextTable::LEFT);
+ tbl.define_column("LOCK", TextTable::LEFT, TextTable::LEFT);
+ }
+
+ for (size_t left = 0; left < std::min<size_t>(threads, images.size());
+ left++) {
+ workers.push_back(new WorkerEntry());
+ }
+
+ auto i = images.begin();
+ while (true) {
+ size_t workers_idle = 0;
+ for (auto comp : workers) {
+ switch (comp->state) {
+ case STATE_DONE:
+ comp->completion->wait_for_complete();
+ comp->state = STATE_IDLE;
+ comp->completion->release();
+ comp->completion = nullptr;
+ // we want it to fall through in this case
+ case STATE_IDLE:
+ if (i == images.end()) {
+ workers_idle++;
+ continue;
+ }
+ comp->name = i->name;
+ comp->id = i->id;
+ comp->completion = new librbd::RBD::AioCompletion(nullptr, nullptr);
+ r = rbd.aio_open_read_only(ioctx, comp->img, i->name.c_str(), nullptr,
+ comp->completion);
+ i++;
+ comp->state = STATE_OPENED;
+ break;
+ case STATE_OPENED:
+ comp->completion->wait_for_complete();
+ // image might disappear between rbd.list() and rbd.open(); ignore
+ // that, warn about other possible errors (EPERM, say, for opening
+ // an old-format image, because you need execute permission for the
+ // class method)
+ r = comp->completion->get_return_value();
+ comp->completion->release();
+ if (r < 0) {
+ std::cerr << "rbd: error opening " << comp->name << ": "
+ << cpp_strerror(r) << std::endl;
+
+ // in any event, continue to next image
+ comp->state = STATE_IDLE;
+ continue;
+ }
+ r = list_process_image(&rados, comp, lflag, f, tbl);
+ if (r < 0) {
+ std::cerr << "rbd: error processing image " << comp->name << ": "
+ << cpp_strerror(r) << std::endl;
+ }
+ comp->completion = new librbd::RBD::AioCompletion(nullptr, nullptr);
+ r = comp->img.aio_close(comp->completion);
+ comp->state = STATE_DONE;
+ break;
+ }
+ }
+ if (workers_idle == workers.size()) {
+ break;
+ }
+ }
+
+ if (f) {
+ f->close_section();
+ f->flush(std::cout);
+ } else if (!images.empty()) {
+ std::cout << tbl;
+ }
+
+ rados.shutdown();
+
+ for (auto comp : workers) {
+ delete comp;
+ }
+
+ return r < 0 ? r : 0;
+}
+
+void get_arguments(po::options_description *positional,
+ po::options_description *options) {
+ options->add_options()
+ ("long,l", po::bool_switch(), "long listing format");
+ at::add_pool_options(positional, options, true);
+ at::add_format_options(options);
+}
+
+int execute(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ std::string pool_name;
+ std::string namespace_name;
+ size_t arg_index = 0;
+ int r = utils::get_pool_and_namespace_names(vm, false, &pool_name,
+ &namespace_name, &arg_index);
+ if (r < 0) {
+ return r;
+ }
+
+ at::Format::Formatter formatter;
+ r = utils::get_formatter(vm, &formatter);
+ if (r < 0) {
+ return r;
+ }
+
+ r = do_list(pool_name, namespace_name, vm["long"].as<bool>(),
+ formatter.get());
+ if (r < 0) {
+ std::cerr << "rbd: listing images failed: " << cpp_strerror(r)
+ << std::endl;
+ return r;
+ }
+
+ return 0;
+}
+
+Shell::SwitchArguments switched_arguments({"long", "l"});
+Shell::Action action(
+ {"list"}, {"ls"}, "List rbd images.", "", &get_arguments, &execute);
+
+} // namespace list
+} // namespace action
+} // namespace rbd
diff --git a/src/tools/rbd/action/Lock.cc b/src/tools/rbd/action/Lock.cc
new file mode 100644
index 000000000..754cb384c
--- /dev/null
+++ b/src/tools/rbd/action/Lock.cc
@@ -0,0 +1,279 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd/ArgumentTypes.h"
+#include "tools/rbd/Shell.h"
+#include "tools/rbd/Utils.h"
+#include "common/errno.h"
+#include "common/Formatter.h"
+#include "common/TextTable.h"
+#include <iostream>
+#include <boost/program_options.hpp>
+
+namespace rbd {
+namespace action {
+namespace lock {
+
+namespace at = argument_types;
+namespace po = boost::program_options;
+
+namespace {
+
+void add_id_option(po::options_description *positional) {
+ positional->add_options()
+ ("lock-id", "unique lock id");
+}
+
+int get_id(const po::variables_map &vm, size_t *arg_index,
+ std::string *id) {
+ *id = utils::get_positional_argument(vm, *arg_index);
+ if (id->empty()) {
+ std::cerr << "rbd: lock id was not specified" << std::endl;
+ return -EINVAL;
+ } else {
+ ++(*arg_index);
+ }
+ return 0;
+}
+
+} // anonymous namespace
+
+static int do_lock_list(librbd::Image& image, Formatter *f)
+{
+ std::list<librbd::locker_t> lockers;
+ bool exclusive;
+ std::string tag;
+ TextTable tbl;
+ int r;
+
+ r = image.list_lockers(&lockers, &exclusive, &tag);
+ if (r < 0)
+ return r;
+
+ if (f) {
+ f->open_array_section("locks");
+ } else {
+ tbl.define_column("Locker", TextTable::LEFT, TextTable::LEFT);
+ tbl.define_column("ID", TextTable::LEFT, TextTable::LEFT);
+ tbl.define_column("Address", TextTable::LEFT, TextTable::LEFT);
+ }
+
+ if (lockers.size()) {
+ bool one = (lockers.size() == 1);
+
+ if (!f) {
+ std::cout << "There " << (one ? "is " : "are ") << lockers.size()
+ << (exclusive ? " exclusive" : " shared")
+ << " lock" << (one ? "" : "s") << " on this image.\n";
+ if (!exclusive)
+ std::cout << "Lock tag: " << tag << "\n";
+ }
+
+ for (std::list<librbd::locker_t>::const_iterator it = lockers.begin();
+ it != lockers.end(); ++it) {
+ if (f) {
+ f->open_object_section("lock");
+ f->dump_string("id", it->cookie);
+ f->dump_string("locker", it->client);
+ f->dump_string("address", it->address);
+ f->close_section();
+ } else {
+ tbl << it->client << it->cookie << it->address << TextTable::endrow;
+ }
+ }
+ if (!f)
+ std::cout << tbl;
+ }
+
+ if (f) {
+ f->close_section();
+ f->flush(std::cout);
+ }
+ return 0;
+}
+
+static int do_lock_add(librbd::Image& image, const char *cookie,
+ const char *tag)
+{
+ if (tag)
+ return image.lock_shared(cookie, tag);
+ else
+ return image.lock_exclusive(cookie);
+}
+
+static int do_lock_remove(librbd::Image& image, const char *client,
+ const char *cookie)
+{
+ return image.break_lock(client, cookie);
+}
+
+void get_list_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE);
+ at::add_format_options(options);
+}
+
+int execute_list(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+ std::string snap_name;
+ int r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name,
+ &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE,
+ utils::SPEC_VALIDATION_NONE);
+ if (r < 0) {
+ return r;
+ }
+
+ at::Format::Formatter formatter;
+ r = utils::get_formatter(vm, &formatter);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ librbd::Image image;
+ r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "",
+ true, &rados, &io_ctx, &image);
+ if (r < 0) {
+ return r;
+ }
+
+ r = do_lock_list(image, formatter.get());
+ if (r < 0) {
+ std::cerr << "rbd: listing locks failed: " << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ return 0;
+}
+
+void get_add_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE);
+ add_id_option(positional);
+ options->add_options()
+ ("shared", po::value<std::string>(), "shared lock tag");
+}
+
+int execute_add(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+ std::string snap_name;
+ int r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name,
+ &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE,
+ utils::SPEC_VALIDATION_NONE);
+ if (r < 0) {
+ return r;
+ }
+
+ std::string lock_cookie;
+ r = get_id(vm, &arg_index, &lock_cookie);
+ if (r < 0) {
+ return r;
+ }
+
+ std::string lock_tag;
+ if (vm.count("shared")) {
+ lock_tag = vm["shared"].as<std::string>();
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ librbd::Image image;
+ r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "",
+ false, &rados, &io_ctx, &image);
+ if (r < 0) {
+ return r;
+ }
+
+ r = do_lock_add(image, lock_cookie.c_str(),
+ lock_tag.empty() ? nullptr : lock_tag.c_str());
+ if (r < 0) {
+ if (r == -EBUSY || r == -EEXIST) {
+ if (!lock_tag.empty()) {
+ std::cerr << "rbd: lock is already held by someone else"
+ << " with a different tag" << std::endl;
+ } else {
+ std::cerr << "rbd: lock is already held by someone else" << std::endl;
+ }
+ } else {
+ std::cerr << "rbd: taking lock failed: " << cpp_strerror(r) << std::endl;
+ }
+ return r;
+ }
+ return 0;
+}
+
+void get_remove_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE);
+ add_id_option(positional);
+ positional->add_options()
+ ("locker", "locker client");
+}
+
+int execute_remove(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+ std::string snap_name;
+ int r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name,
+ &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE,
+ utils::SPEC_VALIDATION_NONE);
+ if (r < 0) {
+ return r;
+ }
+
+ std::string lock_cookie;
+ r = get_id(vm, &arg_index, &lock_cookie);
+ if (r < 0) {
+ return r;
+ }
+
+ std::string lock_client = utils::get_positional_argument(vm, arg_index);
+ if (lock_client.empty()) {
+ std::cerr << "rbd: locker was not specified" << std::endl;
+ return -EINVAL;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ librbd::Image image;
+ r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "",
+ false, &rados, &io_ctx, &image);
+ if (r < 0) {
+ return r;
+ }
+
+ r = do_lock_remove(image, lock_client.c_str(), lock_cookie.c_str());
+ if (r < 0) {
+ std::cerr << "rbd: releasing lock failed: " << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ return 0;
+}
+
+Shell::Action action_list(
+ {"lock", "list"}, {"lock", "ls"}, "Show locks held on an image.", "",
+ &get_list_arguments, &execute_list);
+Shell::Action action_add(
+ {"lock", "add"}, {}, "Take a lock on an image.", "",
+ &get_add_arguments, &execute_add);
+Shell::Action action_remove(
+ {"lock", "remove"}, {"lock", "rm"}, "Release a lock on an image.", "",
+ &get_remove_arguments, &execute_remove);
+
+} // namespace lock
+} // namespace action
+} // namespace rbd
diff --git a/src/tools/rbd/action/MergeDiff.cc b/src/tools/rbd/action/MergeDiff.cc
new file mode 100644
index 000000000..c387be9a4
--- /dev/null
+++ b/src/tools/rbd/action/MergeDiff.cc
@@ -0,0 +1,456 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#define _LARGEFILE64_SOURCE
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "include/compat.h"
+#include "tools/rbd/ArgumentTypes.h"
+#include "tools/rbd/Shell.h"
+#include "tools/rbd/Utils.h"
+#include "common/safe_io.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include <iostream>
+#include <boost/program_options.hpp>
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd
+
+using std::string;
+
+namespace rbd {
+namespace action {
+namespace merge_diff {
+
+namespace at = argument_types;
+namespace po = boost::program_options;
+
+static int parse_diff_header(int fd, __u8 *tag, string *from, string *to, uint64_t *size)
+{
+ int r;
+
+ {//header
+ char buf[utils::RBD_DIFF_BANNER.size() + 1];
+ r = safe_read_exact(fd, buf, utils::RBD_DIFF_BANNER.size());
+ if (r < 0)
+ return r;
+
+ buf[utils::RBD_DIFF_BANNER.size()] = '\0';
+ if (strcmp(buf, utils::RBD_DIFF_BANNER.c_str())) {
+ std::cerr << "invalid banner '" << buf << "', expected '"
+ << utils::RBD_DIFF_BANNER << "'" << std::endl;
+ return -EINVAL;
+ }
+ }
+
+ while (true) {
+ r = safe_read_exact(fd, tag, 1);
+ if (r < 0)
+ return r;
+
+ if (*tag == RBD_DIFF_FROM_SNAP) {
+ r = utils::read_string(fd, 4096, from); // 4k limit to make sure we don't get a garbage string
+ if (r < 0)
+ return r;
+ dout(2) << " from snap " << *from << dendl;
+ } else if (*tag == RBD_DIFF_TO_SNAP) {
+ r = utils::read_string(fd, 4096, to); // 4k limit to make sure we don't get a garbage string
+ if (r < 0)
+ return r;
+ dout(2) << " to snap " << *to << dendl;
+ } else if (*tag == RBD_DIFF_IMAGE_SIZE) {
+ char buf[8];
+ r = safe_read_exact(fd, buf, 8);
+ if (r < 0)
+ return r;
+
+ bufferlist bl;
+ bl.append(buf, 8);
+ auto p = bl.cbegin();
+ decode(*size, p);
+ } else {
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static int parse_diff_body(int fd, __u8 *tag, uint64_t *offset, uint64_t *length)
+{
+ int r;
+
+ if (!(*tag)) {
+ r = safe_read_exact(fd, tag, 1);
+ if (r < 0)
+ return r;
+ }
+
+ if (*tag == RBD_DIFF_END) {
+ offset = 0;
+ length = 0;
+ return 0;
+ }
+
+ if (*tag != RBD_DIFF_WRITE && *tag != RBD_DIFF_ZERO)
+ return -ENOTSUP;
+
+ char buf[16];
+ r = safe_read_exact(fd, buf, 16);
+ if (r < 0)
+ return r;
+
+ bufferlist bl;
+ bl.append(buf, 16);
+ auto p = bl.cbegin();
+ decode(*offset, p);
+ decode(*length, p);
+
+ if (!(*length))
+ return -ENOTSUP;
+
+ return 0;
+}
+
+/*
+ * fd: the diff file to read from
+ * pd: the diff file to be written into
+ */
+static int accept_diff_body(int fd, int pd, __u8 tag, uint64_t offset, uint64_t length)
+{
+ if (tag == RBD_DIFF_END)
+ return 0;
+
+ bufferlist bl;
+ encode(tag, bl);
+ encode(offset, bl);
+ encode(length, bl);
+ int r;
+ r = bl.write_fd(pd);
+ if (r < 0)
+ return r;
+
+ if (tag == RBD_DIFF_WRITE) {
+ bufferptr bp = buffer::create(length);
+ r = safe_read_exact(fd, bp.c_str(), length);
+ if (r < 0)
+ return r;
+ bufferlist data;
+ data.append(bp);
+ r = data.write_fd(pd);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+/*
+ * Merge two diff files into one single file
+ * Note: It does not do the merging work if
+ * either of the source diff files is stripped,
+ * since which complicates the process and is
+ * rarely used
+ */
+static int do_merge_diff(const char *first, const char *second,
+ const char *path, bool no_progress)
+{
+ utils::ProgressContext pc("Merging image diff", no_progress);
+ int fd = -1, sd = -1, pd = -1, r;
+
+ string f_from, f_to;
+ string s_from, s_to;
+ uint64_t f_size = 0;
+ uint64_t s_size = 0;
+ uint64_t pc_size;
+
+ __u8 f_tag = 0, s_tag = 0;
+ uint64_t f_off = 0, f_len = 0;
+ uint64_t s_off = 0, s_len = 0;
+ bool f_end = false, s_end = false;
+
+ bool first_stdin = !strcmp(first, "-");
+ if (first_stdin) {
+ fd = STDIN_FILENO;
+ } else {
+ fd = open(first, O_RDONLY|O_BINARY);
+ if (fd < 0) {
+ r = -errno;
+ std::cerr << "rbd: error opening " << first << std::endl;
+ goto done;
+ }
+ }
+
+ sd = open(second, O_RDONLY|O_BINARY);
+ if (sd < 0) {
+ r = -errno;
+ std::cerr << "rbd: error opening " << second << std::endl;
+ goto done;
+ }
+
+ if (strcmp(path, "-") == 0) {
+ pd = 1;
+ } else {
+ pd = open(path, O_WRONLY | O_CREAT | O_EXCL | O_BINARY, 0644);
+ if (pd < 0) {
+ r = -errno;
+ std::cerr << "rbd: error create " << path << std::endl;
+ goto done;
+ }
+ }
+
+ //We just handle the case like 'banner, [ftag], [ttag], stag, [wztag]*,etag',
+ // and the (offset,length) in wztag must be ascending order.
+ r = parse_diff_header(fd, &f_tag, &f_from, &f_to, &f_size);
+ if (r < 0) {
+ std::cerr << "rbd: failed to parse first diff header" << std::endl;
+ goto done;
+ }
+
+ r = parse_diff_header(sd, &s_tag, &s_from, &s_to, &s_size);
+ if (r < 0) {
+ std::cerr << "rbd: failed to parse second diff header" << std::endl;
+ goto done;
+ }
+
+ if (f_to != s_from) {
+ r = -EINVAL;
+ std::cerr << "The first TO snapshot must be equal with the second FROM "
+ << "snapshot, aborting" << std::endl;
+ goto done;
+ }
+
+ {
+ // header
+ bufferlist bl;
+ bl.append(utils::RBD_DIFF_BANNER);
+
+ __u8 tag;
+ if (f_from.size()) {
+ tag = RBD_DIFF_FROM_SNAP;
+ encode(tag, bl);
+ encode(f_from, bl);
+ }
+
+ if (s_to.size()) {
+ tag = RBD_DIFF_TO_SNAP;
+ encode(tag, bl);
+ encode(s_to, bl);
+ }
+
+ tag = RBD_DIFF_IMAGE_SIZE;
+ encode(tag, bl);
+ encode(s_size, bl);
+
+ r = bl.write_fd(pd);
+ if (r < 0) {
+ std::cerr << "rbd: failed to write merged diff header" << std::endl;
+ goto done;
+ }
+ }
+ if (f_size > s_size)
+ pc_size = f_size << 1;
+ else
+ pc_size = s_size << 1;
+
+ //data block
+ while (!f_end || !s_end) {
+ // progress through input
+ pc.update_progress(f_off + s_off, pc_size);
+
+ if (!f_end && !f_len) {
+ uint64_t last_off = f_off;
+
+ r = parse_diff_body(fd, &f_tag, &f_off, &f_len);
+ dout(2) << "first diff data chunk: tag=" << f_tag << ", "
+ << "off=" << f_off << ", "
+ << "len=" << f_len << dendl;
+ if (r < 0) {
+ std::cerr << "rbd: failed to read first diff data chunk header"
+ << std::endl;
+ goto done;
+ }
+
+ if (f_tag == RBD_DIFF_END) {
+ f_end = true;
+ f_tag = RBD_DIFF_ZERO;
+ f_off = f_size;
+ if (f_size < s_size)
+ f_len = s_size - f_size;
+ else
+ f_len = 0;
+ }
+
+ if (last_off > f_off) {
+ r = -ENOTSUP;
+ std::cerr << "rbd: out-of-order offset from first diff ("
+ << last_off << " > " << f_off << ")" << std::endl;
+ goto done;
+ }
+ }
+
+ if (!s_end && !s_len) {
+ uint64_t last_off = s_off;
+
+ r = parse_diff_body(sd, &s_tag, &s_off, &s_len);
+ dout(2) << "second diff data chunk: tag=" << s_tag << ", "
+ << "off=" << s_off << ", "
+ << "len=" << s_len << dendl;
+ if (r < 0) {
+ std::cerr << "rbd: failed to read second diff data chunk header"
+ << std::endl;
+ goto done;
+ }
+
+ if (s_tag == RBD_DIFF_END) {
+ s_end = true;
+ s_off = s_size;
+ if (s_size < f_size)
+ s_len = f_size - s_size;
+ else
+ s_len = 0;
+ }
+
+ if (last_off > s_off) {
+ r = -ENOTSUP;
+ std::cerr << "rbd: out-of-order offset from second diff ("
+ << last_off << " > " << s_off << ")" << std::endl;
+ goto done;
+ }
+ }
+
+ if (f_off < s_off && f_len) {
+ uint64_t delta = s_off - f_off;
+ if (delta > f_len)
+ delta = f_len;
+ r = accept_diff_body(fd, pd, f_tag, f_off, delta);
+ if (r < 0) {
+ std::cerr << "rbd: failed to merge diff chunk" << std::endl;
+ goto done;
+ }
+ f_off += delta;
+ f_len -= delta;
+
+ if (!f_len) {
+ f_tag = 0;
+ continue;
+ }
+ }
+ ceph_assert(f_off >= s_off);
+
+ if (f_off < s_off + s_len && f_len) {
+ uint64_t delta = s_off + s_len - f_off;
+ if (delta > f_len)
+ delta = f_len;
+ if (f_tag == RBD_DIFF_WRITE) {
+ if (first_stdin) {
+ bufferptr bp = buffer::create(delta);
+ r = safe_read_exact(fd, bp.c_str(), delta);
+ } else {
+ off64_t l = lseek64(fd, delta, SEEK_CUR);
+ r = l < 0 ? -errno : 0;
+ }
+ if (r < 0) {
+ std::cerr << "rbd: failed to skip first diff data" << std::endl;
+ goto done;
+ }
+ }
+ f_off += delta;
+ f_len -= delta;
+
+ if (!f_len) {
+ f_tag = 0;
+ continue;
+ }
+ }
+ ceph_assert(f_off >= s_off + s_len);
+ if (s_len) {
+ r = accept_diff_body(sd, pd, s_tag, s_off, s_len);
+ if (r < 0) {
+ std::cerr << "rbd: failed to merge diff chunk" << std::endl;
+ goto done;
+ }
+ s_off += s_len;
+ s_len = 0;
+ s_tag = 0;
+ } else {
+ ceph_assert(f_end && s_end);
+ }
+ continue;
+ }
+
+ {//tail
+ __u8 tag = RBD_DIFF_END;
+ bufferlist bl;
+ encode(tag, bl);
+ r = bl.write_fd(pd);
+ }
+
+done:
+ if (pd > 2)
+ close(pd);
+ if (sd > 2)
+ close(sd);
+ if (fd > 2)
+ close(fd);
+
+ if(r < 0) {
+ pc.fail();
+ if (pd > 2)
+ unlink(path);
+ } else
+ pc.finish();
+
+ return r;
+}
+
+void get_arguments(po::options_description *positional,
+ po::options_description *options) {
+ positional->add_options()
+ ("diff1-path", "path to first diff (or '-' for stdin)")
+ ("diff2-path", "path to second diff");
+ at::add_path_options(positional, options,
+ "path to merged diff (or '-' for stdout)");
+ at::add_no_progress_option(options);
+}
+
+int execute(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ std::string first_diff = utils::get_positional_argument(vm, 0);
+ if (first_diff.empty()) {
+ std::cerr << "rbd: first diff was not specified" << std::endl;
+ return -EINVAL;
+ }
+
+ std::string second_diff = utils::get_positional_argument(vm, 1);
+ if (second_diff.empty()) {
+ std::cerr << "rbd: second diff was not specified" << std::endl;
+ return -EINVAL;
+ }
+
+ std::string path;
+ size_t arg_index = 2;
+ int r = utils::get_path(vm, &arg_index, &path);
+ if (r < 0) {
+ return r;
+ }
+
+ r = do_merge_diff(first_diff.c_str(), second_diff.c_str(), path.c_str(),
+ vm[at::NO_PROGRESS].as<bool>());
+ if (r < 0) {
+ std::cerr << "rbd: merge-diff error" << std::endl;
+ return -r;
+ }
+
+ return 0;
+}
+
+Shell::Action action(
+ {"merge-diff"}, {}, "Merge two diff exports together.", "",
+ &get_arguments, &execute);
+
+} // namespace merge_diff
+} // namespace action
+} // namespace rbd
diff --git a/src/tools/rbd/action/Migration.cc b/src/tools/rbd/action/Migration.cc
new file mode 100644
index 000000000..1ce6201d9
--- /dev/null
+++ b/src/tools/rbd/action/Migration.cc
@@ -0,0 +1,429 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "include/compat.h"
+#include "common/errno.h"
+#include "common/safe_io.h"
+
+#include "tools/rbd/ArgumentTypes.h"
+#include "tools/rbd/Shell.h"
+#include "tools/rbd/Utils.h"
+
+#include <sys/types.h>
+#include <fcntl.h>
+#include <iostream>
+#include <boost/program_options.hpp>
+
+namespace rbd {
+namespace action {
+namespace migration {
+
+namespace at = argument_types;
+namespace po = boost::program_options;
+
+static int do_execute(librados::IoCtx& io_ctx, const std::string &image_name,
+ bool no_progress) {
+ utils::ProgressContext pc("Image migration", no_progress);
+ int r = librbd::RBD().migration_execute_with_progress(io_ctx,
+ image_name.c_str(), pc);
+ if (r < 0) {
+ pc.fail();
+ std::cerr << "rbd: migration failed: " << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ pc.finish();
+ return 0;
+}
+
+static int do_abort(librados::IoCtx& io_ctx, const std::string &image_name,
+ bool no_progress) {
+ utils::ProgressContext pc("Abort image migration", no_progress);
+ int r = librbd::RBD().migration_abort_with_progress(io_ctx,
+ image_name.c_str(), pc);
+ if (r < 0) {
+ pc.fail();
+ std::cerr << "rbd: aborting migration failed: " << cpp_strerror(r)
+ << std::endl;
+ return r;
+ }
+ pc.finish();
+ return 0;
+}
+
+static int do_commit(librados::IoCtx& io_ctx, const std::string &image_name,
+ bool force, bool no_progress) {
+ librbd::image_migration_status_t migration_status;
+ int r = librbd::RBD().migration_status(io_ctx, image_name.c_str(),
+ &migration_status,
+ sizeof(migration_status));
+ if (r < 0) {
+ std::cerr << "rbd: getting migration status failed: " << cpp_strerror(r)
+ << std::endl;
+ return r;
+ }
+
+ librados::IoCtx dst_io_ctx;
+ r = librados::Rados(io_ctx).ioctx_create2(migration_status.dest_pool_id, dst_io_ctx);
+ if (r < 0) {
+ std::cerr << "rbd: accessing source pool id="
+ << migration_status.dest_pool_id << " failed: "
+ << cpp_strerror(r) << std::endl;
+ return r;
+ }
+
+ r = utils::set_namespace(migration_status.dest_pool_namespace, &dst_io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ librbd::Image image;
+ r = utils::open_image_by_id(dst_io_ctx, migration_status.dest_image_id,
+ true, &image);
+ if (r < 0) {
+ return r;
+ }
+
+ std::vector<librbd::linked_image_spec_t> children;
+ r = image.list_descendants(&children);
+ if (r < 0) {
+ std::cerr << "rbd: listing descendants failed: " << cpp_strerror(r)
+ << std::endl;
+ return r;
+ }
+
+ if (children.size() > 0) {
+ std::cerr << "rbd: the image has "
+ << (children.size() == 1 ? "a descendant" : "descendants") << ": "
+ << std::endl;
+ for (auto& child : children) {
+ std::cerr << " " << child.pool_name << "/";
+ if (!child.pool_namespace.empty()) {
+ std::cerr << child.pool_namespace << "/";
+ }
+ std::cerr << child.image_name;
+ if (child.trash) {
+ std::cerr << " (trash " << child.image_id << ")";
+ }
+ std::cerr << std::endl;
+ }
+ std::cerr << "Warning: in-use, read-only descendant images"
+ << " will not detect the parent update." << std::endl;
+ if (force) {
+ std::cerr << "Proceeding anyway due to force flag set." << std::endl;
+ } else {
+ std::cerr << "Ensure no descendant images are opened read-only"
+ << " and run again with force flag." << std::endl;
+ return -EBUSY;
+ }
+ }
+
+ utils::ProgressContext pc("Commit image migration", no_progress);
+ r = librbd::RBD().migration_commit_with_progress(io_ctx, image_name.c_str(),
+ pc);
+ if (r < 0) {
+ pc.fail();
+ std::cerr << "rbd: committing migration failed: " << cpp_strerror(r)
+ << std::endl;
+ return r;
+ }
+ pc.finish();
+ return 0;
+}
+
+void get_prepare_arguments(po::options_description *positional,
+ po::options_description *options) {
+ options->add_options()
+ ("import-only", po::bool_switch(), "only import data from source")
+ ("source-spec-path", po::value<std::string>(),
+ "source-spec file (or '-' for stdin)")
+ ("source-spec", po::value<std::string>(),
+ "source-spec");
+ at::add_image_or_snap_spec_options(positional, options,
+ at::ARGUMENT_MODIFIER_SOURCE);
+ at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_DEST);
+ at::add_create_image_options(options, true);
+ at::add_flatten_option(options);
+}
+
+int execute_prepare(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ bool import_only = vm["import-only"].as<bool>();
+
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+ std::string snap_name;
+ int r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_SOURCE, &arg_index, &pool_name, &namespace_name,
+ &image_name, import_only ? &snap_name : nullptr, true,
+ import_only ? utils::SNAPSHOT_PRESENCE_PERMITTED :
+ utils::SNAPSHOT_PRESENCE_NONE,
+ utils::SPEC_VALIDATION_NONE);
+ if (r < 0) {
+ return r;
+ }
+
+ std::string dst_pool_name;
+ std::string dst_namespace_name;
+ std::string dst_image_name;
+ r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_DEST, &arg_index, &dst_pool_name,
+ &dst_namespace_name, &dst_image_name, nullptr, false,
+ utils::SNAPSHOT_PRESENCE_NONE, utils::SPEC_VALIDATION_FULL);
+ if (r < 0) {
+ return r;
+ }
+
+ std::string source_spec;
+ if (vm.count("source-spec") && vm.count("source-spec-path")) {
+ std::cerr << "rbd: cannot specify both source-spec and source-spec-path"
+ << std::endl;
+ return -EINVAL;
+ } else if (vm.count("source-spec-path")) {
+ std::string source_spec_path = vm["source-spec-path"].as<std::string>();
+
+ int fd = STDIN_FILENO;
+ if (source_spec_path != "-") {
+ fd = open(source_spec_path.c_str(), O_RDONLY);
+ if (fd < 0) {
+ r = -errno;
+ std::cerr << "rbd: error opening " << source_spec_path << std::endl;
+ return r;
+ }
+ }
+
+ source_spec.resize(4096);
+ r = safe_read(fd, source_spec.data(), source_spec.size() - 1);
+ if (fd != STDIN_FILENO) {
+ VOID_TEMP_FAILURE_RETRY(close(fd));
+ }
+
+ if (r >= 0) {
+ source_spec.resize(r);
+ } else {
+ std::cerr << "rbd: error reading source-spec file: " << cpp_strerror(r)
+ << std::endl;
+ return r;
+ }
+ } else if (vm.count("source-spec")) {
+ source_spec = vm["source-spec"].as<std::string>();
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ r = utils::init(pool_name, namespace_name, &rados, &io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::IoCtx dst_io_ctx;
+ if (source_spec.empty()) {
+ utils::normalize_pool_name(&dst_pool_name);
+ r = utils::init_io_ctx(rados, dst_pool_name, dst_namespace_name,
+ &dst_io_ctx);
+ if (r < 0) {
+ return r;
+ }
+ }
+
+ if (import_only && source_spec.empty()) {
+ if (snap_name.empty()) {
+ std::cerr << "rbd: snapshot name was not specified" << std::endl;
+ return -EINVAL;
+ }
+
+ std::stringstream ss;
+ ss << R"({)"
+ << R"("type":"native",)"
+ << R"("pool_id":)" << io_ctx.get_id() << R"(,)"
+ << R"("pool_namespace":")" << io_ctx.get_namespace() << R"(",)"
+ << R"("image_name":")" << image_name << R"(",)"
+ << R"("snap_name":")" << snap_name << R"(")"
+ << R"(})";
+ source_spec = ss.str();
+
+ if (dst_image_name.empty()) {
+ std::cerr << "rbd: destination image name must be provided" << std::endl;
+ return -EINVAL;
+ }
+ io_ctx = dst_io_ctx;
+ image_name = dst_image_name;
+ snap_name = "";
+ } else if (!import_only && !source_spec.empty()) {
+ std::cerr << "rbd: --import-only must be used in combination with "
+ << "source-spec/source-spec-path" << std::endl;
+ return -EINVAL;
+ }
+
+ if (!snap_name.empty()) {
+ std::cerr << "rbd: snapshot name specified for a command that doesn't "
+ << "use it" << std::endl;
+ return -EINVAL;
+ }
+
+ librbd::ImageOptions opts;
+ r = utils::get_image_options(vm, true, &opts);
+ if (r < 0) {
+ return r;
+ }
+
+ if (source_spec.empty()) {
+ if (dst_image_name.empty()) {
+ dst_image_name = image_name;
+ }
+
+ int r = librbd::RBD().migration_prepare(io_ctx, image_name.c_str(),
+ dst_io_ctx, dst_image_name.c_str(),
+ opts);
+ if (r < 0) {
+ std::cerr << "rbd: preparing migration failed: " << cpp_strerror(r)
+ << std::endl;
+ return r;
+ }
+ } else {
+ ceph_assert(import_only);
+ r = librbd::RBD().migration_prepare_import(source_spec.c_str(), io_ctx,
+ image_name.c_str(), opts);
+ if (r < 0) {
+ std::cerr << "rbd: preparing import migration failed: " << cpp_strerror(r)
+ << std::endl;
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+void get_execute_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE);
+ at::add_no_progress_option(options);
+}
+
+int execute_execute(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+ int r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name,
+ &image_name, nullptr, true, utils::SNAPSHOT_PRESENCE_NONE,
+ utils::SPEC_VALIDATION_NONE);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ r = utils::init(pool_name, namespace_name, &rados, &io_ctx);
+ if (r < 0) {
+ return r;
+ }
+ io_ctx.set_pool_full_try();
+
+ r = do_execute(io_ctx, image_name, vm[at::NO_PROGRESS].as<bool>());
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+void get_abort_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE);
+ at::add_no_progress_option(options);
+}
+
+int execute_abort(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+ int r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name,
+ &image_name, nullptr, true, utils::SNAPSHOT_PRESENCE_NONE,
+ utils::SPEC_VALIDATION_NONE);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ r = utils::init(pool_name, namespace_name, &rados, &io_ctx);
+ if (r < 0) {
+ return r;
+ }
+ io_ctx.set_pool_full_try();
+
+ r = do_abort(io_ctx, image_name, vm[at::NO_PROGRESS].as<bool>());
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+void get_commit_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE);
+ at::add_no_progress_option(options);
+ options->add_options()
+ ("force", po::bool_switch(), "proceed even if the image has children");
+}
+
+int execute_commit(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+ int r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name,
+ &image_name, nullptr, true, utils::SNAPSHOT_PRESENCE_NONE,
+ utils::SPEC_VALIDATION_NONE);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ r = utils::init(pool_name, namespace_name, &rados, &io_ctx);
+ if (r < 0) {
+ return r;
+ }
+ io_ctx.set_pool_full_try();
+
+ r = do_commit(io_ctx, image_name, vm["force"].as<bool>(),
+ vm[at::NO_PROGRESS].as<bool>());
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+Shell::SwitchArguments switched_arguments({"import-only"});
+
+Shell::Action action_prepare(
+ {"migration", "prepare"}, {}, "Prepare image migration.",
+ at::get_long_features_help(), &get_prepare_arguments, &execute_prepare);
+
+Shell::Action action_execute(
+ {"migration", "execute"}, {}, "Execute image migration.", "",
+ &get_execute_arguments, &execute_execute);
+
+Shell::Action action_abort(
+ {"migration", "abort"}, {}, "Cancel interrupted image migration.", "",
+ &get_abort_arguments, &execute_abort);
+
+Shell::Action action_commit(
+ {"migration", "commit"}, {}, "Commit image migration.", "",
+ &get_commit_arguments, &execute_commit);
+
+} // namespace migration
+} // namespace action
+} // namespace rbd
diff --git a/src/tools/rbd/action/MirrorImage.cc b/src/tools/rbd/action/MirrorImage.cc
new file mode 100644
index 000000000..505d377f4
--- /dev/null
+++ b/src/tools/rbd/action/MirrorImage.cc
@@ -0,0 +1,605 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2016 SUSE LINUX GmbH
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+#include "tools/rbd/ArgumentTypes.h"
+#include "tools/rbd/MirrorDaemonServiceInfo.h"
+#include "tools/rbd/Shell.h"
+#include "tools/rbd/Utils.h"
+#include "include/stringify.h"
+#include "common/config.h"
+#include "common/errno.h"
+#include "common/Formatter.h"
+#include "common/TextTable.h"
+#include "global/global_context.h"
+#include <iostream>
+#include <boost/program_options.hpp>
+
+namespace rbd {
+namespace action {
+namespace mirror_image {
+
+namespace at = argument_types;
+namespace po = boost::program_options;
+
+namespace {
+
+int validate_mirroring_enabled(librbd::Image &image, bool snapshot = false) {
+ librbd::mirror_image_info_t mirror_image;
+ int r = image.mirror_image_get_info(&mirror_image, sizeof(mirror_image));
+ if (r < 0) {
+ std::cerr << "rbd: failed to retrieve mirror info: "
+ << cpp_strerror(r) << std::endl;
+ return r;
+ }
+
+ if (mirror_image.state != RBD_MIRROR_IMAGE_ENABLED) {
+ std::cerr << "rbd: mirroring not enabled on the image" << std::endl;
+ return -EINVAL;
+ }
+
+ if (snapshot) {
+ librbd::mirror_image_mode_t mode;
+ r = image.mirror_image_get_mode(&mode);
+ if (r < 0) {
+ std::cerr << "rbd: failed to retrieve mirror mode: "
+ << cpp_strerror(r) << std::endl;
+ return r;
+ }
+
+ if (mode != RBD_MIRROR_IMAGE_MODE_SNAPSHOT) {
+ std::cerr << "rbd: snapshot based mirroring not enabled on the image"
+ << std::endl;
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+} // anonymous namespace
+
+void get_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE);
+}
+
+void get_arguments_enable(po::options_description *positional,
+ po::options_description *options) {
+ at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE);
+ positional->add_options()
+ ("mode", po::value<std::string>()->default_value(""),
+ "mirror image mode (journal or snapshot) [default: journal]");
+}
+
+void get_arguments_disable(po::options_description *positional,
+ po::options_description *options) {
+ options->add_options()
+ ("force", po::bool_switch(), "disable even if not primary");
+ at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE);
+}
+
+int execute_enable_disable(const po::variables_map &vm, bool enable,
+ bool force) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+ std::string snap_name;
+ int r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name,
+ &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE,
+ utils::SPEC_VALIDATION_NONE);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ librbd::Image image;
+ r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "",
+ false, &rados, &io_ctx, &image);
+ if (r < 0) {
+ return r;
+ }
+
+ if (enable) {
+ librbd::mirror_image_mode_t mode = RBD_MIRROR_IMAGE_MODE_JOURNAL;
+ std::string mode_arg = utils::get_positional_argument(vm, arg_index++);
+ if (mode_arg == "journal") {
+ mode = RBD_MIRROR_IMAGE_MODE_JOURNAL;
+ } else if (mode_arg == "snapshot") {
+ mode = RBD_MIRROR_IMAGE_MODE_SNAPSHOT;
+ } else if (!mode_arg.empty()) {
+ std::cerr << "rbd: invalid mode name: " << mode_arg << std::endl;
+ return -EINVAL;
+ }
+ r = image.mirror_image_enable2(mode);
+ } else {
+ r = image.mirror_image_disable(force);
+ }
+ if (r < 0) {
+ return r;
+ }
+
+ std::cout << (enable ? "Mirroring enabled" : "Mirroring disabled")
+ << std::endl;
+
+ return 0;
+}
+
+int execute_disable(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ return execute_enable_disable(vm, false, vm["force"].as<bool>());
+}
+
+int execute_enable(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ return execute_enable_disable(vm, true, false);
+}
+
+void get_arguments_promote(po::options_description *positional,
+ po::options_description *options) {
+ options->add_options()
+ ("force", po::bool_switch(), "promote even if not cleanly demoted by remote cluster");
+ at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE);
+}
+
+int execute_promote(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+ std::string snap_name;
+ int r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name,
+ &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE,
+ utils::SPEC_VALIDATION_NONE);
+ if (r < 0) {
+ return r;
+ }
+
+ bool force = vm["force"].as<bool>();
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ librbd::Image image;
+ r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "",
+ false, &rados, &io_ctx, &image);
+ if (r < 0) {
+ return r;
+ }
+
+ r = validate_mirroring_enabled(image);
+ if (r < 0) {
+ return r;
+ }
+
+ r = image.mirror_image_promote(force);
+ if (r < 0) {
+ std::cerr << "rbd: error promoting image to primary" << std::endl;
+ return r;
+ }
+
+ std::cout << "Image promoted to primary" << std::endl;
+ return 0;
+}
+
+int execute_demote(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+ std::string snap_name;
+ int r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name,
+ &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE,
+ utils::SPEC_VALIDATION_NONE);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ librbd::Image image;
+ r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "",
+ false, &rados, &io_ctx, &image);
+ if (r < 0) {
+ return r;
+ }
+
+ r = validate_mirroring_enabled(image);
+ if (r < 0) {
+ return r;
+ }
+
+ r = image.mirror_image_demote();
+ if (r < 0) {
+ std::cerr << "rbd: error demoting image to non-primary" << std::endl;
+ return r;
+ }
+
+ std::cout << "Image demoted to non-primary" << std::endl;
+ return 0;
+}
+
+int execute_resync(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+ std::string snap_name;
+ int r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name,
+ &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE,
+ utils::SPEC_VALIDATION_NONE);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ librbd::Image image;
+ r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "",
+ false, &rados, &io_ctx, &image);
+ if (r < 0) {
+ return r;
+ }
+
+ r = validate_mirroring_enabled(image);
+ if (r < 0) {
+ return r;
+ }
+
+ r = image.mirror_image_resync();
+ if (r < 0) {
+ std::cerr << "rbd: error flagging image resync" << std::endl;
+ return r;
+ }
+
+ std::cout << "Flagged image for resync from primary" << std::endl;
+ return 0;
+}
+
+void get_status_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE);
+ at::add_format_options(options);
+}
+
+int execute_status(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ at::Format::Formatter formatter;
+ int r = utils::get_formatter(vm, &formatter);
+ if (r < 0) {
+ return r;
+ }
+
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+ std::string snap_name;
+ r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name,
+ &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE,
+ utils::SPEC_VALIDATION_NONE);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ librbd::Image image;
+ r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "",
+ false, &rados, &io_ctx, &image);
+ if (r < 0) {
+ return r;
+ }
+
+ r = validate_mirroring_enabled(image);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::IoCtx default_ns_io_ctx;
+ default_ns_io_ctx.dup(io_ctx);
+ default_ns_io_ctx.set_namespace("");
+
+ std::vector<librbd::mirror_peer_site_t> mirror_peers;
+ utils::get_mirror_peer_sites(default_ns_io_ctx, &mirror_peers);
+
+ std::map<std::string, std::string> peer_mirror_uuids_to_name;
+ utils::get_mirror_peer_mirror_uuids_to_names(mirror_peers,
+ &peer_mirror_uuids_to_name);
+
+ librbd::mirror_image_global_status_t status;
+ r = image.mirror_image_get_global_status(&status, sizeof(status));
+ if (r < 0) {
+ std::cerr << "rbd: failed to get status for image " << image_name << ": "
+ << cpp_strerror(r) << std::endl;
+ return r;
+ }
+
+ utils::populate_unknown_mirror_image_site_statuses(mirror_peers, &status);
+
+ std::string instance_id;
+ MirrorDaemonServiceInfo daemon_service_info(io_ctx);
+
+ librbd::mirror_image_site_status_t local_status;
+ int local_site_r = utils::get_local_mirror_image_status(
+ status, &local_status);
+ status.site_statuses.erase(
+ std::remove_if(status.site_statuses.begin(),
+ status.site_statuses.end(),
+ [](auto& status) {
+ return (status.mirror_uuid ==
+ RBD_MIRROR_IMAGE_STATUS_LOCAL_MIRROR_UUID);
+ }),
+ status.site_statuses.end());
+
+ if (local_site_r >= 0 && local_status.up) {
+ r = image.mirror_image_get_instance_id(&instance_id);
+ if (r == -EOPNOTSUPP) {
+ std::cerr << "rbd: newer release of Ceph OSDs required to map image "
+ << "to rbd-mirror daemon instance" << std::endl;
+ // not fatal
+ } else if (r < 0 && r != -ENOENT) {
+ std::cerr << "rbd: failed to get service id for image "
+ << image_name << ": " << cpp_strerror(r) << std::endl;
+ // not fatal
+ } else if (!instance_id.empty()) {
+ daemon_service_info.init();
+ }
+ }
+
+ std::vector<librbd::snap_info_t> snaps;
+ if (status.info.primary && status.info.state == RBD_MIRROR_IMAGE_ENABLED) {
+ librbd::mirror_image_mode_t mode = RBD_MIRROR_IMAGE_MODE_JOURNAL;
+ r = image.mirror_image_get_mode(&mode);
+ if (r < 0) {
+ std::cerr << "rbd: failed to retrieve mirror mode: "
+ << cpp_strerror(r) << std::endl;
+ // not fatal
+ }
+
+ if (mode == RBD_MIRROR_IMAGE_MODE_SNAPSHOT) {
+ image.snap_list(snaps);
+ snaps.erase(
+ remove_if(snaps.begin(),
+ snaps.end(),
+ [&image](const librbd::snap_info_t &snap) {
+ librbd::snap_namespace_type_t type;
+ int r = image.snap_get_namespace_type(snap.id, &type);
+ if (r < 0) {
+ return false;
+ }
+ return type != RBD_SNAP_NAMESPACE_TYPE_MIRROR;
+ }),
+ snaps.end());
+ }
+ }
+
+ auto mirror_service = daemon_service_info.get_by_instance_id(instance_id);
+
+ if (formatter != nullptr) {
+ formatter->open_object_section("image");
+ formatter->dump_string("name", image_name);
+ formatter->dump_string("global_id", status.info.global_id);
+ if (local_site_r >= 0) {
+ formatter->dump_string("state", utils::mirror_image_site_status_state(
+ local_status));
+ formatter->dump_string("description", local_status.description);
+ if (mirror_service != nullptr) {
+ mirror_service->dump_image(formatter);
+ }
+ formatter->dump_string("last_update", utils::timestr(
+ local_status.last_update));
+ }
+ if (!status.site_statuses.empty()) {
+ formatter->open_array_section("peer_sites");
+ for (auto& status : status.site_statuses) {
+ formatter->open_object_section("peer_site");
+
+ auto name_it = peer_mirror_uuids_to_name.find(status.mirror_uuid);
+ formatter->dump_string("site_name",
+ (name_it != peer_mirror_uuids_to_name.end() ? name_it->second : ""));
+ formatter->dump_string("mirror_uuids", status.mirror_uuid);
+
+ formatter->dump_string("state", utils::mirror_image_site_status_state(
+ status));
+ formatter->dump_string("description", status.description);
+ formatter->dump_string("last_update", utils::timestr(
+ status.last_update));
+ formatter->close_section(); // peer_site
+ }
+ formatter->close_section(); // peer_sites
+ }
+ if (!snaps.empty()) {
+ formatter->open_array_section("snapshots");
+ for (auto &snap : snaps) {
+ librbd::snap_mirror_namespace_t info;
+ r = image.snap_get_mirror_namespace(snap.id, &info, sizeof(info));
+ if (r < 0 ||
+ (info.state != RBD_SNAP_MIRROR_STATE_PRIMARY &&
+ info.state != RBD_SNAP_MIRROR_STATE_PRIMARY_DEMOTED)) {
+ continue;
+ }
+ formatter->open_object_section("snapshot");
+ formatter->dump_unsigned("id", snap.id);
+ formatter->dump_string("name", snap.name);
+ formatter->dump_bool("demoted",
+ info.state == RBD_SNAP_MIRROR_STATE_PRIMARY_DEMOTED);
+ formatter->open_array_section("mirror_peer_uuids");
+ for (auto &peer : info.mirror_peer_uuids) {
+ formatter->dump_string("peer_uuid", peer);
+ }
+ formatter->close_section(); // mirror_peer_uuids
+ formatter->close_section(); // snapshot
+ }
+ formatter->close_section(); // snapshots
+ }
+ formatter->close_section(); // image
+ formatter->flush(std::cout);
+ } else {
+ std::cout << image_name << ":\n"
+ << " global_id: " << status.info.global_id << "\n";
+ if (local_site_r >= 0) {
+ std::cout << " state: " << utils::mirror_image_site_status_state(
+ local_status) << "\n"
+ << " description: " << local_status.description << "\n";
+ if (mirror_service != nullptr) {
+ std::cout << " service: " <<
+ mirror_service->get_image_description() << "\n";
+ }
+ std::cout << " last_update: " << utils::timestr(
+ local_status.last_update) << std::endl;
+ }
+ if (!status.site_statuses.empty()) {
+ std::cout << " peer_sites:" << std::endl;
+
+ bool first_site = true;
+ for (auto& site : status.site_statuses) {
+ if (!first_site) {
+ std::cout << std::endl;
+ }
+ first_site = false;
+
+ auto name_it = peer_mirror_uuids_to_name.find(site.mirror_uuid);
+ std::cout << " name: "
+ << (name_it != peer_mirror_uuids_to_name.end() ?
+ name_it->second : site.mirror_uuid)
+ << std::endl
+ << " state: " << utils::mirror_image_site_status_state(
+ site) << std::endl
+ << " description: " << site.description << std::endl
+ << " last_update: " << utils::timestr(
+ site.last_update) << std::endl;
+ }
+ }
+ if (!snaps.empty()) {
+ std::cout << " snapshots:" << std::endl;
+
+ bool first_site = true;
+ for (auto &snap : snaps) {
+ librbd::snap_mirror_namespace_t info;
+ r = image.snap_get_mirror_namespace(snap.id, &info, sizeof(info));
+ if (r < 0 ||
+ (info.state != RBD_SNAP_MIRROR_STATE_PRIMARY &&
+ info.state != RBD_SNAP_MIRROR_STATE_PRIMARY_DEMOTED)) {
+ continue;
+ }
+
+ if (!first_site) {
+ std::cout << std::endl;
+ }
+
+ first_site = false;
+ std::cout << " " << snap.id << " " << snap.name << " ("
+ << (info.state == RBD_SNAP_MIRROR_STATE_PRIMARY_DEMOTED ?
+ "demoted " : "")
+ << "peer_uuids:[" << info.mirror_peer_uuids << "])";
+ }
+ std::cout << std::endl;
+ }
+ }
+
+ return 0;
+}
+
+void get_snapshot_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE);
+ at::add_snap_create_options(options);
+}
+
+int execute_snapshot(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+ int r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name,
+ &image_name, nullptr, true, utils::SNAPSHOT_PRESENCE_NONE,
+ utils::SPEC_VALIDATION_NONE);
+ if (r < 0) {
+ return r;
+ }
+
+ uint32_t flags;
+ r = utils::get_snap_create_flags(vm, &flags);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ librbd::Image image;
+ r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "",
+ false, &rados, &io_ctx, &image);
+ if (r < 0) {
+ return r;
+ }
+
+ r = validate_mirroring_enabled(image, true);
+ if (r < 0) {
+ return r;
+ }
+
+ uint64_t snap_id;
+ r = image.mirror_image_create_snapshot2(flags, &snap_id);
+ if (r < 0) {
+ std::cerr << "rbd: error creating snapshot: " << cpp_strerror(r)
+ << std::endl;
+ return r;
+ }
+
+ std::cout << "Snapshot ID: " << snap_id << std::endl;
+ return 0;
+}
+
+Shell::Action action_enable(
+ {"mirror", "image", "enable"}, {},
+ "Enable RBD mirroring for an image.", "",
+ &get_arguments_enable, &execute_enable);
+Shell::Action action_disable(
+ {"mirror", "image", "disable"}, {},
+ "Disable RBD mirroring for an image.", "",
+ &get_arguments_disable, &execute_disable);
+Shell::Action action_promote(
+ {"mirror", "image", "promote"}, {},
+ "Promote an image to primary for RBD mirroring.", "",
+ &get_arguments_promote, &execute_promote);
+Shell::Action action_demote(
+ {"mirror", "image", "demote"}, {},
+ "Demote an image to non-primary for RBD mirroring.", "",
+ &get_arguments, &execute_demote);
+Shell::Action action_resync(
+ {"mirror", "image", "resync"}, {},
+ "Force resync to primary image for RBD mirroring.", "",
+ &get_arguments, &execute_resync);
+Shell::Action action_status(
+ {"mirror", "image", "status"}, {},
+ "Show RBD mirroring status for an image.", "",
+ &get_status_arguments, &execute_status);
+Shell::Action action_snapshot(
+ {"mirror", "image", "snapshot"}, {},
+ "Create RBD mirroring image snapshot.", "",
+ &get_snapshot_arguments, &execute_snapshot);
+
+} // namespace mirror_image
+} // namespace action
+} // namespace rbd
diff --git a/src/tools/rbd/action/MirrorPool.cc b/src/tools/rbd/action/MirrorPool.cc
new file mode 100644
index 000000000..b714c3bab
--- /dev/null
+++ b/src/tools/rbd/action/MirrorPool.cc
@@ -0,0 +1,1772 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd/ArgumentTypes.h"
+#include "tools/rbd/MirrorDaemonServiceInfo.h"
+#include "tools/rbd/Shell.h"
+#include "tools/rbd/Utils.h"
+#include "include/buffer.h"
+#include "include/Context.h"
+#include "include/stringify.h"
+#include "include/rbd/librbd.hpp"
+#include "common/ceph_json.h"
+#include "common/config.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "common/Formatter.h"
+#include "common/TextTable.h"
+#include "common/Throttle.h"
+#include "global/global_context.h"
+#include <fstream>
+#include <functional>
+#include <iostream>
+#include <regex>
+#include <set>
+#include <boost/program_options.hpp>
+#include "include/ceph_assert.h"
+
+#include <atomic>
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::action::MirrorPool: "
+
+namespace rbd {
+namespace action {
+namespace mirror_pool {
+
+namespace at = argument_types;
+namespace po = boost::program_options;
+
+static const std::string ALL_NAME("all");
+static const std::string SITE_NAME("site-name");
+
+namespace {
+
+void add_site_name_optional(po::options_description *options) {
+ options->add_options()
+ (SITE_NAME.c_str(), po::value<std::string>(), "local site name");
+}
+
+int set_site_name(librados::Rados& rados, const std::string& site_name) {
+ librbd::RBD rbd;
+ int r = rbd.mirror_site_name_set(rados, site_name);
+ if (r == -EOPNOTSUPP) {
+ std::cerr << "rbd: cluster does not support site names" << std::endl;
+ return r;
+ } else if (r < 0) {
+ std::cerr << "rbd: failed to set site name" << cpp_strerror(r)
+ << std::endl;
+ return r;
+ }
+
+ return 0;
+}
+
+struct MirrorPeerDirection {};
+
+void validate(boost::any& v, const std::vector<std::string>& values,
+ MirrorPeerDirection *target_type, int permit_tx) {
+ po::validators::check_first_occurrence(v);
+ const std::string &s = po::validators::get_single_string(values);
+
+ if (s == "rx-only") {
+ v = boost::any(RBD_MIRROR_PEER_DIRECTION_RX);
+ } else if (s == "rx-tx") {
+ v = boost::any(RBD_MIRROR_PEER_DIRECTION_RX_TX);
+ } else if (permit_tx != 0 && s == "tx-only") {
+ v = boost::any(RBD_MIRROR_PEER_DIRECTION_TX);
+ } else {
+ throw po::validation_error(po::validation_error::invalid_option_value);
+ }
+}
+
+void add_direction_optional(po::options_description *options) {
+ options->add_options()
+ ("direction", po::value<MirrorPeerDirection>(),
+ "mirroring direction (rx-only, rx-tx)\n"
+ "[default: rx-tx]");
+}
+
+int validate_mirroring_enabled(librados::IoCtx& io_ctx) {
+ librbd::RBD rbd;
+ rbd_mirror_mode_t mirror_mode;
+ int r = rbd.mirror_mode_get(io_ctx, &mirror_mode);
+ if (r < 0) {
+ std::cerr << "rbd: failed to retrieve mirror mode: "
+ << cpp_strerror(r) << std::endl;
+ return r;
+ }
+
+ if (mirror_mode == RBD_MIRROR_MODE_DISABLED) {
+ std::cerr << "rbd: mirroring not enabled on the pool" << std::endl;
+ return -EINVAL;
+ }
+ return 0;
+}
+
+int validate_uuid(const std::string &uuid) {
+ std::regex pattern("^[A-F0-9]{8}-[A-F0-9]{4}-[A-F0-9]{4}-[A-F0-9]{4}-[A-F0-9]{12}$",
+ std::regex::icase);
+ std::smatch match;
+ if (!std::regex_match(uuid, match, pattern)) {
+ std::cerr << "rbd: invalid uuid '" << uuid << "'" << std::endl;
+ return -EINVAL;
+ }
+ return 0;
+}
+
+int read_key_file(std::string path, std::string* key) {
+ std::ifstream key_file;
+ key_file.open(path);
+ if (key_file.fail()) {
+ std::cerr << "rbd: failed to open " << path << std::endl;
+ return -EINVAL;
+ }
+
+ std::getline(key_file, *key);
+ if (key_file.bad()) {
+ std::cerr << "rbd: failed to read key from " << path << std::endl;
+ return -EINVAL;
+ }
+
+ key_file.close();
+ return 0;
+}
+
+void add_uuid_option(po::options_description *positional) {
+ positional->add_options()
+ ("uuid", po::value<std::string>(), "peer uuid");
+}
+
+int get_uuid(const po::variables_map &vm, size_t arg_index,
+ std::string *uuid) {
+ *uuid = utils::get_positional_argument(vm, arg_index);
+ if (uuid->empty()) {
+ std::cerr << "rbd: must specify peer uuid" << std::endl;
+ return -EINVAL;
+ }
+ return validate_uuid(*uuid);
+}
+
+int get_remote_cluster_spec(const po::variables_map &vm,
+ const std::string &spec,
+ std::string *remote_client_name,
+ std::string *remote_cluster,
+ std::map<std::string, std::string>* attributes) {
+ if (vm.count("remote-client-name")) {
+ *remote_client_name = vm["remote-client-name"].as<std::string>();
+ }
+ if (vm.count("remote-cluster")) {
+ *remote_cluster = vm["remote-cluster"].as<std::string>();
+ }
+ if (vm.count("remote-mon-host")) {
+ (*attributes)["mon_host"] = vm["remote-mon-host"].as<std::string>();
+ }
+ if (vm.count("remote-key-file")) {
+ std::string key;
+ int r = read_key_file(vm["remote-key-file"].as<std::string>(), &key);
+ if (r < 0) {
+ return r;
+ }
+ (*attributes)["key"] = key;
+ }
+
+ if (!spec.empty()) {
+ std::regex pattern("^(?:(client\\.[^@]+)@)?([^/@]+)$");
+ std::smatch match;
+ if (!std::regex_match(spec, match, pattern)) {
+ std::cerr << "rbd: invalid spec '" << spec << "'" << std::endl;
+ return -EINVAL;
+ }
+ if (match[1].matched) {
+ *remote_client_name = match[1];
+ }
+ *remote_cluster = match[2];
+ }
+
+ if (remote_cluster->empty()) {
+ std::cerr << "rbd: remote cluster was not specified" << std::endl;
+ return -EINVAL;
+ }
+ return 0;
+}
+
+int set_peer_config_key(librados::IoCtx& io_ctx, const std::string& peer_uuid,
+ std::map<std::string, std::string>&& attributes) {
+ librbd::RBD rbd;
+ int r = rbd.mirror_peer_site_set_attributes(io_ctx, peer_uuid, attributes);
+ if (r == -EPERM) {
+ std::cerr << "rbd: permission denied attempting to set peer "
+ << "config-key secrets in the monitor" << std::endl;
+ return r;
+ } else if (r < 0) {
+ std::cerr << "rbd: failed to update mirroring peer config: "
+ << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ return 0;
+}
+
+int get_peer_config_key(librados::IoCtx& io_ctx, const std::string& peer_uuid,
+ std::map<std::string, std::string>* attributes) {
+ librbd::RBD rbd;
+ int r = rbd.mirror_peer_site_get_attributes(io_ctx, peer_uuid, attributes);
+ if (r == -ENOENT) {
+ return r;
+ } else if (r == -EPERM) {
+ std::cerr << "rbd: permission denied attempting to access peer "
+ << "config-key secrets from the monitor" << std::endl;
+ return r;
+ } else if (r == -EINVAL) {
+ std::cerr << "rbd: corrupt mirroring peer config" << std::endl;
+ return r;
+ } else if (r < 0) {
+ std::cerr << "rbd: error reading mirroring peer config: "
+ << cpp_strerror(r) << std::endl;
+ return r;
+ }
+
+ return 0;
+}
+
+int update_peer_config_key(librados::IoCtx& io_ctx,
+ const std::string& peer_uuid,
+ const std::string& key,
+ const std::string& value) {
+ std::map<std::string, std::string> attributes;
+ int r = get_peer_config_key(io_ctx, peer_uuid, &attributes);
+ if (r == -ENOENT) {
+ return set_peer_config_key(io_ctx, peer_uuid, {{key, value}});
+ } else if (r < 0) {
+ return r;
+ }
+
+ if (value.empty()) {
+ attributes.erase(key);
+ } else {
+ attributes[key] = value;
+ }
+ return set_peer_config_key(io_ctx, peer_uuid, std::move(attributes));
+}
+
+int format_mirror_peers(librados::IoCtx& io_ctx,
+ at::Format::Formatter formatter,
+ const std::vector<librbd::mirror_peer_site_t> &peers,
+ bool config_key) {
+ if (formatter != nullptr) {
+ formatter->open_array_section("peers");
+ } else {
+ std::cout << "Peer Sites: ";
+ if (peers.empty()) {
+ std::cout << "none";
+ }
+ std::cout << std::endl;
+ }
+
+ for (auto &peer : peers) {
+ std::map<std::string, std::string> attributes;
+ if (config_key) {
+ int r = get_peer_config_key(io_ctx, peer.uuid, &attributes);
+ if (r < 0 && r != -ENOENT) {
+ return r;
+ }
+ }
+
+ std::string direction;
+ switch (peer.direction) {
+ case RBD_MIRROR_PEER_DIRECTION_RX:
+ direction = "rx-only";
+ break;
+ case RBD_MIRROR_PEER_DIRECTION_TX:
+ direction = "tx-only";
+ break;
+ case RBD_MIRROR_PEER_DIRECTION_RX_TX:
+ direction = "rx-tx";
+ break;
+ default:
+ direction = "unknown";
+ break;
+ }
+
+ if (formatter != nullptr) {
+ formatter->open_object_section("peer");
+ formatter->dump_string("uuid", peer.uuid);
+ formatter->dump_string("direction", direction);
+ formatter->dump_string("site_name", peer.site_name);
+ formatter->dump_string("mirror_uuid", peer.mirror_uuid);
+ formatter->dump_string("client_name", peer.client_name);
+ for (auto& pair : attributes) {
+ formatter->dump_string(pair.first.c_str(), pair.second);
+ }
+ formatter->close_section();
+ } else {
+ std::cout << std::endl
+ << "UUID: " << peer.uuid << std::endl
+ << "Name: " << peer.site_name << std::endl;
+ if (peer.direction != RBD_MIRROR_PEER_DIRECTION_RX ||
+ !peer.mirror_uuid.empty()) {
+ std::cout << "Mirror UUID: " << peer.mirror_uuid << std::endl;
+ }
+ std::cout << "Direction: " << direction << std::endl;
+ if (peer.direction != RBD_MIRROR_PEER_DIRECTION_TX ||
+ !peer.client_name.empty()) {
+ std::cout << "Client: " << peer.client_name << std::endl;
+ }
+ if (config_key) {
+ std::cout << "Mon Host: " << attributes["mon_host"] << std::endl
+ << "Key: " << attributes["key"] << std::endl;
+ }
+ if (peer.site_name != peers.rbegin()->site_name) {
+ std::cout << std::endl;
+ }
+ }
+ }
+
+ if (formatter != nullptr) {
+ formatter->close_section();
+ }
+ return 0;
+}
+
+class ImageRequestBase {
+public:
+ void send() {
+ dout(20) << this << " " << __func__ << ": image_name=" << m_image_name
+ << dendl;
+
+ auto ctx = new LambdaContext([this](int r) {
+ handle_finalize(r);
+ });
+
+ // will pause here until slots are available
+ m_finalize_ctx = m_throttle.start_op(ctx);
+
+ open_image();
+ }
+
+protected:
+ ImageRequestBase(librados::IoCtx &io_ctx, OrderedThrottle &throttle,
+ const std::string &image_name)
+ : m_io_ctx(io_ctx), m_throttle(throttle), m_image_name(image_name) {
+ }
+ virtual ~ImageRequestBase() {
+ }
+
+ virtual bool skip_get_info() const {
+ return false;
+ }
+ virtual void get_info(librbd::Image &image, librbd::mirror_image_info_t *info,
+ librbd::RBD::AioCompletion *aio_comp) {
+ image.aio_mirror_image_get_info(info, sizeof(librbd::mirror_image_info_t),
+ aio_comp);
+ }
+
+ virtual bool skip_action(const librbd::mirror_image_info_t &info) const {
+ return false;
+ }
+ virtual void execute_action(librbd::Image &image,
+ librbd::RBD::AioCompletion *aio_comp) = 0;
+ virtual void handle_execute_action(int r) {
+ dout(20) << this << " " << __func__ << ": r=" << r << dendl;
+
+ if (r < 0 && r != -ENOENT) {
+ std::cerr << "rbd: failed to " << get_action_type() << " image "
+ << m_image_name << ": " << cpp_strerror(r) << std::endl;
+ m_ret_val = r;
+ }
+
+ close_image();
+ }
+
+ virtual void finalize_action() {
+ }
+ virtual std::string get_action_type() const = 0;
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * OPEN_IMAGE
+ * |
+ * v
+ * GET_INFO
+ * |
+ * v
+ * EXECUTE_ACTION
+ * |
+ * v
+ * CLOSE_IMAGE
+ * |
+ * v
+ * FINALIZE_ACTION
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+
+ librados::IoCtx &m_io_ctx;
+ OrderedThrottle &m_throttle;
+ const std::string m_image_name;
+
+ librbd::Image m_image;
+ Context *m_finalize_ctx = nullptr;
+
+ librbd::mirror_image_info_t m_mirror_image_info;
+
+ int m_ret_val = 0;
+
+ void open_image() {
+ dout(20) << this << " " << __func__ << dendl;
+
+ librbd::RBD rbd;
+ auto aio_completion = utils::create_aio_completion<
+ ImageRequestBase, &ImageRequestBase::handle_open_image>(this);
+ rbd.aio_open(m_io_ctx, m_image, m_image_name.c_str(), nullptr,
+ aio_completion);
+ }
+
+ void handle_open_image(int r) {
+ dout(20) << this << " " << __func__ << ": r=" << r << dendl;
+
+ if (r < 0) {
+ std::cerr << "rbd: failed to open image "
+ << m_image_name << ": " << cpp_strerror(r) << std::endl;
+ m_finalize_ctx->complete(r);
+ return;
+ }
+
+ get_info();
+ }
+
+ void get_info() {
+ if (skip_get_info()) {
+ execute_action();
+ return;
+ }
+ dout(20) << this << " " << __func__ << dendl;
+
+ auto aio_completion = utils::create_aio_completion<
+ ImageRequestBase, &ImageRequestBase::handle_get_info>(this);
+ get_info(m_image, &m_mirror_image_info, aio_completion);
+ }
+
+ void handle_get_info(int r) {
+ dout(20) << this << " " << __func__ << ": r=" << r << dendl;
+
+ if (r == -ENOENT) {
+ close_image();
+ return;
+ } else if (r < 0) {
+ std::cerr << "rbd: failed to retrieve mirror image info for "
+ << m_image_name << ": " << cpp_strerror(r) << std::endl;
+ m_ret_val = r;
+ close_image();
+ return;
+ }
+
+ execute_action();
+ }
+
+ void execute_action() {
+ if (skip_action(m_mirror_image_info)) {
+ close_image();
+ return;
+ }
+ dout(20) << this << " " << __func__ << dendl;
+
+ auto aio_completion = utils::create_aio_completion<
+ ImageRequestBase, &ImageRequestBase::handle_execute_action>(this);
+ execute_action(m_image, aio_completion);
+ }
+
+ void close_image() {
+ dout(20) << this << " " << __func__ << dendl;
+
+ auto aio_completion = utils::create_aio_completion<
+ ImageRequestBase, &ImageRequestBase::handle_close_image>(this);
+ m_image.aio_close(aio_completion);
+ }
+
+ void handle_close_image(int r) {
+ dout(20) << this << " " << __func__ << ": r=" << r << dendl;
+
+ if (r < 0) {
+ std::cerr << "rbd: failed to close image "
+ << m_image_name << ": " << cpp_strerror(r) << std::endl;
+ }
+
+ m_finalize_ctx->complete(r);
+ }
+
+ void handle_finalize(int r) {
+ dout(20) << this << " " << __func__ << ": r=" << r << dendl;
+
+ if (r == 0 && m_ret_val < 0) {
+ r = m_ret_val;
+ }
+ if (r >= 0) {
+ finalize_action();
+ }
+ m_throttle.end_op(r);
+ delete this;
+ }
+
+};
+
+class PromoteImageRequest : public ImageRequestBase {
+public:
+ PromoteImageRequest(librados::IoCtx &io_ctx, OrderedThrottle &throttle,
+ const std::string &image_name, std::atomic<unsigned> *counter,
+ bool force)
+ : ImageRequestBase(io_ctx, throttle, image_name), m_counter(counter),
+ m_force(force) {
+ }
+
+protected:
+ bool skip_action(const librbd::mirror_image_info_t &info) const override {
+ return (info.state != RBD_MIRROR_IMAGE_ENABLED || info.primary);
+ }
+
+ void execute_action(librbd::Image &image,
+ librbd::RBD::AioCompletion *aio_comp) override {
+ image.aio_mirror_image_promote(m_force, aio_comp);
+ }
+
+ void handle_execute_action(int r) override {
+ if (r >= 0) {
+ (*m_counter)++;
+ }
+ ImageRequestBase::handle_execute_action(r);
+ }
+
+ std::string get_action_type() const override {
+ return "promote";
+ }
+
+private:
+ std::atomic<unsigned> *m_counter = nullptr;
+ bool m_force;
+};
+
+class DemoteImageRequest : public ImageRequestBase {
+public:
+ DemoteImageRequest(librados::IoCtx &io_ctx, OrderedThrottle &throttle,
+ const std::string &image_name, std::atomic<unsigned> *counter)
+ : ImageRequestBase(io_ctx, throttle, image_name), m_counter(counter) {
+ }
+
+protected:
+ bool skip_action(const librbd::mirror_image_info_t &info) const override {
+ return (info.state != RBD_MIRROR_IMAGE_ENABLED || !info.primary);
+ }
+
+ void execute_action(librbd::Image &image,
+ librbd::RBD::AioCompletion *aio_comp) override {
+ image.aio_mirror_image_demote(aio_comp);
+ }
+ void handle_execute_action(int r) override {
+ if (r >= 0) {
+ (*m_counter)++;
+ }
+ ImageRequestBase::handle_execute_action(r);
+ }
+
+ std::string get_action_type() const override {
+ return "demote";
+ }
+
+private:
+ std::atomic<unsigned> *m_counter = nullptr;
+};
+
+class StatusImageRequest : public ImageRequestBase {
+public:
+ StatusImageRequest(
+ librados::IoCtx &io_ctx, OrderedThrottle &throttle,
+ const std::string &image_name,
+ const std::map<std::string, std::string> &instance_ids,
+ const std::vector<librbd::mirror_peer_site_t>& mirror_peers,
+ const std::map<std::string, std::string> &peer_mirror_uuids_to_name,
+ const MirrorDaemonServiceInfo &daemon_service_info,
+ at::Format::Formatter formatter)
+ : ImageRequestBase(io_ctx, throttle, image_name),
+ m_instance_ids(instance_ids), m_mirror_peers(mirror_peers),
+ m_peer_mirror_uuids_to_name(peer_mirror_uuids_to_name),
+ m_daemon_service_info(daemon_service_info), m_formatter(formatter) {
+ }
+
+protected:
+ bool skip_get_info() const override {
+ return true;
+ }
+
+ void execute_action(librbd::Image &image,
+ librbd::RBD::AioCompletion *aio_comp) override {
+ image.get_id(&m_image_id);
+ image.aio_mirror_image_get_global_status(
+ &m_mirror_image_global_status, sizeof(m_mirror_image_global_status),
+ aio_comp);
+ }
+
+ void finalize_action() override {
+ if (m_mirror_image_global_status.info.global_id.empty()) {
+ return;
+ }
+
+ utils::populate_unknown_mirror_image_site_statuses(
+ m_mirror_peers, &m_mirror_image_global_status);
+
+ librbd::mirror_image_site_status_t local_status;
+ int local_site_r = utils::get_local_mirror_image_status(
+ m_mirror_image_global_status, &local_status);
+ m_mirror_image_global_status.site_statuses.erase(
+ std::remove_if(m_mirror_image_global_status.site_statuses.begin(),
+ m_mirror_image_global_status.site_statuses.end(),
+ [](auto& status) {
+ return (status.mirror_uuid ==
+ RBD_MIRROR_IMAGE_STATUS_LOCAL_MIRROR_UUID);
+ }),
+ m_mirror_image_global_status.site_statuses.end());
+
+ std::string instance_id = (local_site_r >= 0 && local_status.up &&
+ m_instance_ids.count(m_image_id)) ?
+ m_instance_ids.find(m_image_id)->second : "";
+
+ auto mirror_service = m_daemon_service_info.get_by_instance_id(instance_id);
+ if (m_formatter != nullptr) {
+ m_formatter->open_object_section("image");
+ m_formatter->dump_string("name", m_mirror_image_global_status.name);
+ m_formatter->dump_string(
+ "global_id", m_mirror_image_global_status.info.global_id);
+ if (local_site_r >= 0) {
+ m_formatter->dump_string("state", utils::mirror_image_site_status_state(
+ local_status));
+ m_formatter->dump_string("description", local_status.description);
+ if (mirror_service != nullptr) {
+ mirror_service->dump_image(m_formatter);
+ }
+ m_formatter->dump_string("last_update", utils::timestr(
+ local_status.last_update));
+ }
+ if (!m_mirror_image_global_status.site_statuses.empty()) {
+ m_formatter->open_array_section("peer_sites");
+ for (auto& status : m_mirror_image_global_status.site_statuses) {
+ m_formatter->open_object_section("peer_site");
+
+ auto name_it = m_peer_mirror_uuids_to_name.find(status.mirror_uuid);
+ m_formatter->dump_string("site_name",
+ (name_it != m_peer_mirror_uuids_to_name.end() ?
+ name_it->second : ""));
+ m_formatter->dump_string("mirror_uuids", status.mirror_uuid);
+
+ m_formatter->dump_string(
+ "state", utils::mirror_image_site_status_state(status));
+ m_formatter->dump_string("description", status.description);
+ m_formatter->dump_string("last_update", utils::timestr(
+ status.last_update));
+ m_formatter->close_section(); // peer_site
+ }
+ m_formatter->close_section(); // peer_sites
+ }
+ m_formatter->close_section(); // image
+ } else {
+ std::cout << std::endl
+ << m_mirror_image_global_status.name << ":" << std::endl
+ << " global_id: "
+ << m_mirror_image_global_status.info.global_id << std::endl;
+ if (local_site_r >= 0) {
+ std::cout << " state: " << utils::mirror_image_site_status_state(
+ local_status) << std::endl
+ << " description: " << local_status.description << std::endl;
+ if (mirror_service != nullptr) {
+ std::cout << " service: " <<
+ mirror_service->get_image_description() << std::endl;
+ }
+ std::cout << " last_update: " << utils::timestr(
+ local_status.last_update) << std::endl;
+ }
+ if (!m_mirror_image_global_status.site_statuses.empty()) {
+ std::cout << " peer_sites:" << std::endl;
+ bool first_site = true;
+ for (auto& site : m_mirror_image_global_status.site_statuses) {
+ if (!first_site) {
+ std::cout << std::endl;
+ }
+ first_site = false;
+
+ auto name_it = m_peer_mirror_uuids_to_name.find(site.mirror_uuid);
+ std::cout << " name: "
+ << (name_it != m_peer_mirror_uuids_to_name.end() ?
+ name_it->second : site.mirror_uuid)
+ << std::endl
+ << " state: " << utils::mirror_image_site_status_state(
+ site) << std::endl
+ << " description: " << site.description << std::endl
+ << " last_update: " << utils::timestr(
+ site.last_update) << std::endl;
+ }
+ }
+ }
+ }
+
+ std::string get_action_type() const override {
+ return "status";
+ }
+
+private:
+ const std::map<std::string, std::string> &m_instance_ids;
+ const std::vector<librbd::mirror_peer_site_t> &m_mirror_peers;
+ const std::map<std::string, std::string> &m_peer_mirror_uuids_to_name;
+ const MirrorDaemonServiceInfo &m_daemon_service_info;
+ at::Format::Formatter m_formatter;
+ std::string m_image_id;
+ librbd::mirror_image_global_status_t m_mirror_image_global_status;
+};
+
+template <typename RequestT>
+class ImageRequestAllocator {
+public:
+ template <class... Args>
+ RequestT *operator()(librados::IoCtx &io_ctx, OrderedThrottle &throttle,
+ const std::string &image_name, Args&&... args) {
+ return new RequestT(io_ctx, throttle, image_name,
+ std::forward<Args>(args)...);
+ }
+};
+
+template <typename RequestT>
+class ImageRequestGenerator {
+public:
+ template <class... Args>
+ ImageRequestGenerator(librados::IoCtx &io_ctx, Args&&... args)
+ : m_io_ctx(io_ctx),
+ m_factory(std::bind(ImageRequestAllocator<RequestT>(),
+ std::ref(m_io_ctx), std::ref(m_throttle),
+ std::placeholders::_1, std::forward<Args>(args)...)),
+ m_throttle(g_conf().get_val<uint64_t>("rbd_concurrent_management_ops"),
+ true) {
+ }
+
+ int execute() {
+ // use the alphabetical list of image names for pool-level
+ // mirror image operations
+ librbd::RBD rbd;
+ int r = rbd.list2(m_io_ctx, &m_images);
+ if (r < 0 && r != -ENOENT) {
+ std::cerr << "rbd: failed to list images within pool" << std::endl;
+ return r;
+ }
+
+ for (auto &image : m_images) {
+ auto request = m_factory(image.name);
+ request->send();
+ }
+
+ return m_throttle.wait_for_ret();
+ }
+private:
+ typedef std::function<RequestT*(const std::string&)> Factory;
+
+ librados::IoCtx &m_io_ctx;
+ Factory m_factory;
+
+ OrderedThrottle m_throttle;
+
+ std::vector<librbd::image_spec_t> m_images;
+
+};
+
+int get_mirror_image_status(
+ librados::IoCtx& io_ctx, uint32_t* total_images,
+ std::map<librbd::mirror_image_status_state_t, int>* mirror_image_states,
+ MirrorHealth* mirror_image_health) {
+ librbd::RBD rbd;
+ int r = rbd.mirror_image_status_summary(io_ctx, mirror_image_states);
+ if (r < 0) {
+ std::cerr << "rbd: failed to get status summary for mirrored images: "
+ << cpp_strerror(r) << std::endl;
+ return r;
+ }
+
+ *mirror_image_health = MIRROR_HEALTH_OK;
+ for (auto &it : *mirror_image_states) {
+ auto &state = it.first;
+ if (*mirror_image_health < MIRROR_HEALTH_WARNING &&
+ (state != MIRROR_IMAGE_STATUS_STATE_REPLAYING &&
+ state != MIRROR_IMAGE_STATUS_STATE_STOPPED)) {
+ *mirror_image_health = MIRROR_HEALTH_WARNING;
+ }
+ if (*mirror_image_health < MIRROR_HEALTH_ERROR &&
+ state == MIRROR_IMAGE_STATUS_STATE_ERROR) {
+ *mirror_image_health = MIRROR_HEALTH_ERROR;
+ }
+ *total_images += it.second;
+ }
+
+ return 0;
+}
+
+} // anonymous namespace
+
+void get_peer_bootstrap_create_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_pool_options(positional, options, false);
+ add_site_name_optional(options);
+}
+
+int execute_peer_bootstrap_create(
+ const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ std::string pool_name;
+ size_t arg_index = 0;
+ int r = utils::get_pool_and_namespace_names(vm, true, &pool_name,
+ nullptr, &arg_index);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ r = utils::init(pool_name, "", &rados, &io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ r = validate_mirroring_enabled(io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ if (vm.count(SITE_NAME)) {
+ r = set_site_name(rados, vm[SITE_NAME].as<std::string>());
+ if (r < 0) {
+ return r;
+ }
+ }
+
+ librbd::RBD rbd;
+ std::string token;
+ r = rbd.mirror_peer_bootstrap_create(io_ctx, &token);
+ if (r == -EEXIST) {
+ std::cerr << "rbd: mismatch with pre-existing RBD mirroring peer user caps"
+ << std::endl;
+ } else if (r < 0) {
+ std::cerr << "rbd: failed to create mirroring bootstrap token: "
+ << cpp_strerror(r) << std::endl;
+ return r;
+ }
+
+ std::cout << token << std::endl;
+ return 0;
+}
+
+void get_peer_bootstrap_import_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_pool_options(positional, options, false);
+ add_site_name_optional(options);
+ positional->add_options()
+ ("token-path", po::value<std::string>(),
+ "bootstrap token file (or '-' for stdin)");
+ options->add_options()
+ ("token-path", po::value<std::string>(),
+ "bootstrap token file (or '-' for stdin)");
+ add_direction_optional(options);
+}
+
+int execute_peer_bootstrap_import(
+ const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ std::string pool_name;
+ size_t arg_index = 0;
+ int r = utils::get_pool_and_namespace_names(vm, true, &pool_name,
+ nullptr, &arg_index);
+ if (r < 0) {
+ return r;
+ }
+
+ std::string token_path;
+ if (vm.count("token-path")) {
+ token_path = vm["token-path"].as<std::string>();
+ } else {
+ token_path = utils::get_positional_argument(vm, arg_index++);
+ }
+
+ if (token_path.empty()) {
+ std::cerr << "rbd: token path was not specified" << std::endl;
+ return -EINVAL;
+ }
+
+ rbd_mirror_peer_direction_t mirror_peer_direction =
+ RBD_MIRROR_PEER_DIRECTION_RX_TX;
+ if (vm.count("direction")) {
+ mirror_peer_direction = vm["direction"].as<rbd_mirror_peer_direction_t>();
+ }
+
+ int fd = STDIN_FILENO;
+ if (token_path != "-") {
+ fd = open(token_path.c_str(), O_RDONLY|O_BINARY);
+ if (fd < 0) {
+ r = -errno;
+ std::cerr << "rbd: error opening " << token_path << std::endl;
+ return r;
+ }
+ }
+
+ char token[1024];
+ memset(token, 0, sizeof(token));
+ r = safe_read(fd, token, sizeof(token) - 1);
+ if (fd != STDIN_FILENO) {
+ VOID_TEMP_FAILURE_RETRY(close(fd));
+ }
+
+ if (r < 0) {
+ std::cerr << "rbd: error reading token file: " << cpp_strerror(r)
+ << std::endl;
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ r = utils::init(pool_name, "", &rados, &io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ if (vm.count(SITE_NAME)) {
+ r = set_site_name(rados, vm[SITE_NAME].as<std::string>());
+ if (r < 0) {
+ return r;
+ }
+ }
+
+ librbd::RBD rbd;
+ r = rbd.mirror_peer_bootstrap_import(io_ctx, mirror_peer_direction, token);
+ if (r == -ENOSYS) {
+ std::cerr << "rbd: mirroring is not enabled on remote peer" << std::endl;
+ return r;
+ } else if (r < 0) {
+ std::cerr << "rbd: failed to import peer bootstrap token" << std::endl;
+ return r;
+ }
+
+ return 0;
+}
+
+void get_peer_add_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_pool_options(positional, options, false);
+ positional->add_options()
+ ("remote-cluster-spec", "remote cluster spec\n"
+ "(example: [<client name>@]<cluster name>)");
+ options->add_options()
+ ("remote-client-name", po::value<std::string>(), "remote client name")
+ ("remote-cluster", po::value<std::string>(), "remote cluster name")
+ ("remote-mon-host", po::value<std::string>(), "remote mon host(s)")
+ ("remote-key-file", po::value<std::string>(),
+ "path to file containing remote key");
+ add_direction_optional(options);
+}
+
+int execute_peer_add(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ std::string pool_name;
+ size_t arg_index = 0;
+ int r = utils::get_pool_and_namespace_names(vm, true, &pool_name,
+ nullptr, &arg_index);
+ if (r < 0) {
+ return r;
+ }
+
+ std::string remote_client_name = g_ceph_context->_conf->name.to_str();
+ std::string remote_cluster;
+ std::map<std::string, std::string> attributes;
+ r = get_remote_cluster_spec(
+ vm, utils::get_positional_argument(vm, arg_index),
+ &remote_client_name, &remote_cluster, &attributes);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ r = utils::init(pool_name, "", &rados, &io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ r = validate_mirroring_enabled(io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ // TODO: temporary restriction to prevent adding multiple peers
+ // until rbd-mirror daemon can properly handle the scenario
+ librbd::RBD rbd;
+ std::vector<librbd::mirror_peer_site_t> mirror_peers;
+ r = rbd.mirror_peer_site_list(io_ctx, &mirror_peers);
+ if (r < 0) {
+ std::cerr << "rbd: failed to list mirror peers" << std::endl;
+ return r;
+ }
+
+ // ignore tx-only peers since the restriction is for rx
+ mirror_peers.erase(
+ std::remove_if(
+ mirror_peers.begin(), mirror_peers.end(),
+ [](const librbd::mirror_peer_site_t& peer) {
+ return (peer.direction == RBD_MIRROR_PEER_DIRECTION_TX);
+ }),
+ mirror_peers.end());
+
+ if (!mirror_peers.empty()) {
+ std::cerr << "rbd: multiple RX peers are not currently supported"
+ << std::endl;
+ return -EINVAL;
+ }
+
+ rbd_mirror_peer_direction_t mirror_peer_direction =
+ RBD_MIRROR_PEER_DIRECTION_RX_TX;
+ if (vm.count("direction")) {
+ mirror_peer_direction = vm["direction"].as<rbd_mirror_peer_direction_t>();
+ }
+
+ std::string uuid;
+ r = rbd.mirror_peer_site_add(
+ io_ctx, &uuid, mirror_peer_direction, remote_cluster, remote_client_name);
+ if (r == -EEXIST) {
+ std::cerr << "rbd: mirror peer already exists" << std::endl;
+ return r;
+ } else if (r < 0) {
+ std::cerr << "rbd: error adding mirror peer" << std::endl;
+ return r;
+ }
+
+ if (!attributes.empty()) {
+ r = set_peer_config_key(io_ctx, uuid, std::move(attributes));
+ if (r < 0) {
+ return r;
+ }
+ }
+
+ std::cout << uuid << std::endl;
+ return 0;
+}
+
+void get_peer_remove_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_pool_options(positional, options, false);
+ add_uuid_option(positional);
+}
+
+int execute_peer_remove(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ std::string pool_name;
+ size_t arg_index = 0;
+ int r = utils::get_pool_and_namespace_names(vm, true, &pool_name,
+ nullptr, &arg_index);
+ if (r < 0) {
+ return r;
+ }
+
+ std::string uuid;
+ r = get_uuid(vm, arg_index, &uuid);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ r = utils::init(pool_name, "", &rados, &io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ r = validate_mirroring_enabled(io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ librbd::RBD rbd;
+ r = rbd.mirror_peer_site_remove(io_ctx, uuid);
+ if (r < 0) {
+ std::cerr << "rbd: error removing mirror peer" << std::endl;
+ return r;
+ }
+ return 0;
+}
+
+void get_peer_set_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_pool_options(positional, options, false);
+ add_uuid_option(positional);
+ positional->add_options()
+ ("key", "peer parameter\n"
+ "(direction, site-name, client, mon-host, key-file)")
+ ("value", "new value for specified key\n"
+ "(rx-only, tx-only, or rx-tx for direction)");
+}
+
+int execute_peer_set(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ std::string pool_name;
+ size_t arg_index = 0;
+ int r = utils::get_pool_and_namespace_names(vm, true, &pool_name,
+ nullptr, &arg_index);
+ if (r < 0) {
+ return r;
+ }
+
+ std::string uuid;
+ r = get_uuid(vm, arg_index++, &uuid);
+ if (r < 0) {
+ return r;
+ }
+
+ std::set<std::string> valid_keys{{"direction", "site-name", "cluster",
+ "client", "mon-host", "key-file"}};
+ std::string key = utils::get_positional_argument(vm, arg_index++);
+ if (valid_keys.find(key) == valid_keys.end()) {
+ std::cerr << "rbd: must specify ";
+ for (auto& valid_key : valid_keys) {
+ std::cerr << "'" << valid_key << "'";
+ if (&valid_key != &(*valid_keys.rbegin())) {
+ std::cerr << ", ";
+ }
+ }
+ std::cerr << " key." << std::endl;
+ return -EINVAL;
+ }
+
+ std::string value = utils::get_positional_argument(vm, arg_index++);
+ if (value.empty() && (key == "client" || key == "cluster")) {
+ std::cerr << "rbd: must specify new " << key << " value." << std::endl;
+ } else if (key == "key-file") {
+ key = "key";
+ r = read_key_file(value, &value);
+ if (r < 0) {
+ return r;
+ }
+ } else if (key == "mon-host") {
+ key = "mon_host";
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ r = utils::init(pool_name, "", &rados, &io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ r = validate_mirroring_enabled(io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ librbd::RBD rbd;
+ if (key == "client") {
+ r = rbd.mirror_peer_site_set_client_name(io_ctx, uuid.c_str(),
+ value.c_str());
+ } else if (key == "site-name" || key == "cluster") {
+ r = rbd.mirror_peer_site_set_name(io_ctx, uuid.c_str(), value.c_str());
+ } else if (key == "direction") {
+ MirrorPeerDirection tag;
+ boost::any direction;
+ try {
+ validate(direction, {value}, &tag, 1);
+ } catch (...) {
+ std::cerr << "rbd: invalid direction" << std::endl;
+ return -EINVAL;
+ }
+
+ auto peer_direction = boost::any_cast<rbd_mirror_peer_direction_t>(
+ direction);
+ if (peer_direction != RBD_MIRROR_PEER_DIRECTION_TX) {
+ // TODO: temporary restriction to prevent adding multiple peers
+ // until rbd-mirror daemon can properly handle the scenario
+ std::vector<librbd::mirror_peer_site_t> mirror_peers;
+ r = rbd.mirror_peer_site_list(io_ctx, &mirror_peers);
+ if (r < 0) {
+ std::cerr << "rbd: failed to list mirror peers" << std::endl;
+ return r;
+ }
+
+ // ignore peer to be updated and tx-only peers since the restriction is
+ // for rx
+ mirror_peers.erase(
+ std::remove_if(
+ mirror_peers.begin(), mirror_peers.end(),
+ [uuid](const librbd::mirror_peer_site_t& peer) {
+ return (peer.uuid == uuid ||
+ peer.direction == RBD_MIRROR_PEER_DIRECTION_TX);
+ }),
+ mirror_peers.end());
+
+ if (!mirror_peers.empty()) {
+ std::cerr << "rbd: multiple RX peers are not currently supported"
+ << std::endl;
+ return -EINVAL;
+ }
+ }
+
+ r = rbd.mirror_peer_site_set_direction(io_ctx, uuid, peer_direction);
+ } else {
+ r = update_peer_config_key(io_ctx, uuid, key, value);
+ }
+
+ if (r == -ENOENT) {
+ std::cerr << "rbd: mirror peer " << uuid << " does not exist"
+ << std::endl;
+ }
+
+ if (r < 0) {
+ return r;
+ }
+ return 0;
+}
+
+void get_disable_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_pool_options(positional, options, true);
+}
+
+void get_enable_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_pool_options(positional, options, true);
+ positional->add_options()
+ ("mode", "mirror mode [image or pool]");
+ add_site_name_optional(options);
+}
+
+int execute_enable_disable(librados::IoCtx& io_ctx,
+ rbd_mirror_mode_t next_mirror_mode,
+ const std::string &mode, bool ignore_no_update) {
+ librbd::RBD rbd;
+ rbd_mirror_mode_t current_mirror_mode;
+ int r = rbd.mirror_mode_get(io_ctx, &current_mirror_mode);
+ if (r < 0) {
+ std::cerr << "rbd: failed to retrieve mirror mode: "
+ << cpp_strerror(r) << std::endl;
+ return r;
+ }
+
+ if (current_mirror_mode == next_mirror_mode) {
+ if (!ignore_no_update) {
+ if (mode == "disabled") {
+ std::cout << "rbd: mirroring is already " << mode << std::endl;
+ } else {
+ std::cout << "rbd: mirroring is already configured for "
+ << mode << " mode" << std::endl;
+ }
+ }
+ return 0;
+ } else if (next_mirror_mode == RBD_MIRROR_MODE_IMAGE &&
+ current_mirror_mode == RBD_MIRROR_MODE_POOL) {
+ std::cout << "note: changing mirroring mode from pool to image"
+ << std::endl;
+ } else if (next_mirror_mode == RBD_MIRROR_MODE_POOL &&
+ current_mirror_mode == RBD_MIRROR_MODE_IMAGE) {
+ std::cout << "note: changing mirroring mode from image to pool"
+ << std::endl;
+ }
+
+ r = rbd.mirror_mode_set(io_ctx, next_mirror_mode);
+ if (r < 0) {
+ return r;
+ }
+ return 0;
+}
+
+int execute_disable(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ std::string pool_name;
+ std::string namespace_name;
+ size_t arg_index = 0;
+ int r = utils::get_pool_and_namespace_names(vm, true, &pool_name,
+ &namespace_name, &arg_index);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+
+ r = utils::init(pool_name, namespace_name, &rados, &io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ return execute_enable_disable(io_ctx, RBD_MIRROR_MODE_DISABLED, "disabled",
+ false);
+}
+
+int execute_enable(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ std::string pool_name;
+ std::string namespace_name;
+ size_t arg_index = 0;
+ int r = utils::get_pool_and_namespace_names(vm, true, &pool_name,
+ &namespace_name, &arg_index);
+ if (r < 0) {
+ return r;
+ }
+
+ rbd_mirror_mode_t mirror_mode;
+ std::string mode = utils::get_positional_argument(vm, arg_index++);
+ if (mode == "image") {
+ mirror_mode = RBD_MIRROR_MODE_IMAGE;
+ } else if (mode == "pool") {
+ mirror_mode = RBD_MIRROR_MODE_POOL;
+ } else {
+ std::cerr << "rbd: must specify 'image' or 'pool' mode." << std::endl;
+ return -EINVAL;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+
+ r = utils::init(pool_name, namespace_name, &rados, &io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ bool updated = false;
+ if (vm.count(SITE_NAME)) {
+ librbd::RBD rbd;
+
+ auto site_name = vm[SITE_NAME].as<std::string>();
+ std::string original_site_name;
+ r = rbd.mirror_site_name_get(rados, &original_site_name);
+ updated = (r >= 0 && site_name != original_site_name);
+
+ r = set_site_name(rados, site_name);
+ if (r < 0) {
+ return r;
+ }
+ }
+
+ return execute_enable_disable(io_ctx, mirror_mode, mode, updated);
+}
+
+void get_info_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_pool_options(positional, options, true);
+ at::add_format_options(options);
+ options->add_options()
+ (ALL_NAME.c_str(), po::bool_switch(), "list all attributes");
+}
+
+int execute_info(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ std::string pool_name;
+ std::string namespace_name;
+ size_t arg_index = 0;
+ int r = utils::get_pool_and_namespace_names(vm, false, &pool_name,
+ &namespace_name, &arg_index);
+ if (r < 0) {
+ return r;
+ }
+
+ at::Format::Formatter formatter;
+ r = utils::get_formatter(vm, &formatter);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ r = utils::init(pool_name, namespace_name, &rados, &io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ librbd::RBD rbd;
+ rbd_mirror_mode_t mirror_mode;
+ r = rbd.mirror_mode_get(io_ctx, &mirror_mode);
+ if (r < 0) {
+ return r;
+ }
+
+ std::string site_name;
+ r = rbd.mirror_site_name_get(rados, &site_name);
+ if (r < 0 && r != -EOPNOTSUPP) {
+ return r;
+ }
+
+ std::vector<librbd::mirror_peer_site_t> mirror_peers;
+ if (namespace_name.empty()) {
+ r = rbd.mirror_peer_site_list(io_ctx, &mirror_peers);
+ if (r < 0) {
+ return r;
+ }
+ }
+
+ std::string mirror_mode_desc;
+ switch (mirror_mode) {
+ case RBD_MIRROR_MODE_DISABLED:
+ mirror_mode_desc = "disabled";
+ break;
+ case RBD_MIRROR_MODE_IMAGE:
+ mirror_mode_desc = "image";
+ break;
+ case RBD_MIRROR_MODE_POOL:
+ mirror_mode_desc = "pool";
+ break;
+ default:
+ mirror_mode_desc = "unknown";
+ break;
+ }
+
+ if (formatter != nullptr) {
+ formatter->open_object_section("mirror");
+ formatter->dump_string("mode", mirror_mode_desc);
+ } else {
+ std::cout << "Mode: " << mirror_mode_desc << std::endl;
+ }
+
+ if (mirror_mode != RBD_MIRROR_MODE_DISABLED && namespace_name.empty()) {
+ if (formatter != nullptr) {
+ formatter->dump_string("site_name", site_name);
+ } else {
+ std::cout << "Site Name: " << site_name << std::endl
+ << std::endl;
+ }
+
+ r = format_mirror_peers(io_ctx, formatter, mirror_peers,
+ vm[ALL_NAME].as<bool>());
+ if (r < 0) {
+ return r;
+ }
+ }
+ if (formatter != nullptr) {
+ formatter->close_section();
+ formatter->flush(std::cout);
+ }
+ return 0;
+}
+
+void get_status_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_pool_options(positional, options, true);
+ at::add_format_options(options);
+ at::add_verbose_option(options);
+}
+
+int execute_status(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ std::string pool_name;
+ std::string namespace_name;
+ size_t arg_index = 0;
+ int r = utils::get_pool_and_namespace_names(vm, false, &pool_name,
+ &namespace_name, &arg_index);
+ if (r < 0) {
+ return r;
+ }
+
+ at::Format::Formatter formatter;
+ r = utils::get_formatter(vm, &formatter);
+ if (r < 0) {
+ return r;
+ }
+
+ bool verbose = vm[at::VERBOSE].as<bool>();
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ r = utils::init(pool_name, namespace_name, &rados, &io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ r = validate_mirroring_enabled(io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ librbd::RBD rbd;
+
+ uint32_t total_images = 0;
+ std::map<librbd::mirror_image_status_state_t, int> mirror_image_states;
+ MirrorHealth mirror_image_health = MIRROR_HEALTH_UNKNOWN;
+ r = get_mirror_image_status(io_ctx, &total_images, &mirror_image_states,
+ &mirror_image_health);
+ if (r < 0) {
+ return r;
+ }
+
+ MirrorDaemonServiceInfo daemon_service_info(io_ctx);
+ daemon_service_info.init();
+
+ MirrorHealth mirror_daemon_health = daemon_service_info.get_daemon_health();
+ auto mirror_services = daemon_service_info.get_mirror_services();
+
+ auto mirror_health = std::max(mirror_image_health, mirror_daemon_health);
+
+ if (formatter != nullptr) {
+ formatter->open_object_section("status");
+ formatter->open_object_section("summary");
+ formatter->dump_stream("health") << mirror_health;
+ formatter->dump_stream("daemon_health") << mirror_daemon_health;
+ formatter->dump_stream("image_health") << mirror_image_health;
+ formatter->open_object_section("states");
+ for (auto &it : mirror_image_states) {
+ std::string state_name = utils::mirror_image_status_state(it.first);
+ formatter->dump_int(state_name.c_str(), it.second);
+ }
+ formatter->close_section(); // states
+ formatter->close_section(); // summary
+ } else {
+ std::cout << "health: " << mirror_health << std::endl;
+ std::cout << "daemon health: " << mirror_daemon_health << std::endl;
+ std::cout << "image health: " << mirror_image_health << std::endl;
+ std::cout << "images: " << total_images << " total" << std::endl;
+ for (auto &it : mirror_image_states) {
+ std::cout << " " << it.second << " "
+ << utils::mirror_image_status_state(it.first) << std::endl;
+ }
+ }
+
+ int ret = 0;
+
+ if (verbose) {
+ // dump per-daemon status
+ if (formatter != nullptr) {
+ formatter->open_array_section("daemons");
+ for (auto& mirror_service : mirror_services) {
+ formatter->open_object_section("daemon");
+ formatter->dump_string("service_id", mirror_service.service_id);
+ formatter->dump_string("instance_id", mirror_service.instance_id);
+ formatter->dump_string("client_id", mirror_service.client_id);
+ formatter->dump_string("hostname", mirror_service.hostname);
+ formatter->dump_string("ceph_version", mirror_service.ceph_version);
+ formatter->dump_bool("leader", mirror_service.leader);
+ formatter->dump_stream("health") << mirror_service.health;
+ if (!mirror_service.callouts.empty()) {
+ formatter->open_array_section("callouts");
+ for (auto& callout : mirror_service.callouts) {
+ formatter->dump_string("callout", callout);
+ }
+ formatter->close_section(); // callouts
+ }
+ formatter->close_section(); // daemon
+ }
+ formatter->close_section(); // daemons
+ } else {
+ std::cout << std::endl << "DAEMONS" << std::endl;
+ if (mirror_services.empty()) {
+ std::cout << " none" << std::endl;
+ }
+ for (auto& mirror_service : mirror_services) {
+ std::cout << "service " << mirror_service.service_id << ":"
+ << std::endl
+ << " instance_id: " << mirror_service.instance_id
+ << std::endl
+ << " client_id: " << mirror_service.client_id << std::endl
+ << " hostname: " << mirror_service.hostname << std::endl
+ << " version: " << mirror_service.ceph_version << std::endl
+ << " leader: " << (mirror_service.leader ? "true" : "false")
+ << std::endl
+ << " health: " << mirror_service.health << std::endl;
+ if (!mirror_service.callouts.empty()) {
+ std::cout << " callouts: " << mirror_service.callouts << std::endl;
+ }
+ std::cout << std::endl;
+ }
+ std::cout << std::endl;
+ }
+
+ // dump per-image status
+ librados::IoCtx default_ns_io_ctx;
+ default_ns_io_ctx.dup(io_ctx);
+ default_ns_io_ctx.set_namespace("");
+ std::vector<librbd::mirror_peer_site_t> mirror_peers;
+ utils::get_mirror_peer_sites(default_ns_io_ctx, &mirror_peers);
+
+ std::map<std::string, std::string> peer_mirror_uuids_to_name;
+ utils::get_mirror_peer_mirror_uuids_to_names(mirror_peers,
+ &peer_mirror_uuids_to_name);
+
+ if (formatter != nullptr) {
+ formatter->open_array_section("images");
+ } else {
+ std::cout << "IMAGES";
+ }
+
+ std::map<std::string, std::string> instance_ids;
+
+ std::string start_image_id;
+ while (true) {
+ std::map<std::string, std::string> ids;
+ r = rbd.mirror_image_instance_id_list(io_ctx, start_image_id, 1024, &ids);
+ if (r < 0) {
+ if (r == -EOPNOTSUPP) {
+ std::cerr << "rbd: newer release of Ceph OSDs required to map image "
+ << "to rbd-mirror daemon instance" << std::endl;
+ } else {
+ std::cerr << "rbd: failed to get instance id list: "
+ << cpp_strerror(r) << std::endl;
+ }
+ // not fatal
+ break;
+ }
+ if (ids.empty()) {
+ break;
+ }
+ instance_ids.insert(ids.begin(), ids.end());
+ start_image_id = ids.rbegin()->first;
+ }
+
+ ImageRequestGenerator<StatusImageRequest> generator(
+ io_ctx, instance_ids, mirror_peers, peer_mirror_uuids_to_name,
+ daemon_service_info, formatter);
+ ret = generator.execute();
+
+ if (formatter != nullptr) {
+ formatter->close_section(); // images
+ }
+ }
+
+ if (formatter != nullptr) {
+ formatter->close_section(); // status
+ formatter->flush(std::cout);
+ }
+
+ return ret;
+}
+
+void get_promote_arguments(po::options_description *positional,
+ po::options_description *options) {
+ options->add_options()
+ ("force", po::bool_switch(),
+ "promote even if not cleanly demoted by remote cluster");
+ at::add_pool_options(positional, options, true);
+}
+
+int execute_promote(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ std::string pool_name;
+ std::string namespace_name;
+ size_t arg_index = 0;
+ int r = utils::get_pool_and_namespace_names(vm, true, &pool_name,
+ &namespace_name, &arg_index);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ r = utils::init(pool_name, namespace_name, &rados, &io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ r = validate_mirroring_enabled(io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ utils::disable_cache();
+
+ std::atomic<unsigned> counter = { 0 };
+ ImageRequestGenerator<PromoteImageRequest> generator(io_ctx, &counter,
+ vm["force"].as<bool>());
+ r = generator.execute();
+
+ std::cout << "Promoted " << counter.load() << " mirrored images" << std::endl;
+ return r;
+}
+
+void get_demote_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_pool_options(positional, options, true);
+}
+
+int execute_demote(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ std::string pool_name;
+ std::string namespace_name;
+ size_t arg_index = 0;
+ int r = utils::get_pool_and_namespace_names(vm, true, &pool_name,
+ &namespace_name, &arg_index);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ r = utils::init(pool_name, namespace_name, &rados, &io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ r = validate_mirroring_enabled(io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ utils::disable_cache();
+
+ std::atomic<unsigned> counter { 0 };
+ ImageRequestGenerator<DemoteImageRequest> generator(io_ctx, &counter);
+ r = generator.execute();
+
+ std::cout << "Demoted " << counter.load() << " mirrored images" << std::endl;
+ return r;
+}
+
+Shell::Action action_bootstrap_create(
+ {"mirror", "pool", "peer", "bootstrap", "create"}, {},
+ "Create a peer bootstrap token to import in a remote cluster", "",
+ &get_peer_bootstrap_create_arguments, &execute_peer_bootstrap_create);
+Shell::Action action_bootstreap_import(
+ {"mirror", "pool", "peer", "bootstrap", "import"}, {},
+ "Import a peer bootstrap token created from a remote cluster", "",
+ &get_peer_bootstrap_import_arguments, &execute_peer_bootstrap_import);
+
+Shell::Action action_add(
+ {"mirror", "pool", "peer", "add"}, {},
+ "Add a mirroring peer to a pool.", "",
+ &get_peer_add_arguments, &execute_peer_add);
+Shell::Action action_remove(
+ {"mirror", "pool", "peer", "remove"}, {},
+ "Remove a mirroring peer from a pool.", "",
+ &get_peer_remove_arguments, &execute_peer_remove);
+Shell::Action action_set(
+ {"mirror", "pool", "peer", "set"}, {},
+ "Update mirroring peer settings.", "",
+ &get_peer_set_arguments, &execute_peer_set);
+
+Shell::Action action_disable(
+ {"mirror", "pool", "disable"}, {},
+ "Disable RBD mirroring by default within a pool.", "",
+ &get_disable_arguments, &execute_disable);
+Shell::Action action_enable(
+ {"mirror", "pool", "enable"}, {},
+ "Enable RBD mirroring by default within a pool.", "",
+ &get_enable_arguments, &execute_enable);
+Shell::Action action_info(
+ {"mirror", "pool", "info"}, {},
+ "Show information about the pool mirroring configuration.", {},
+ &get_info_arguments, &execute_info);
+Shell::Action action_status(
+ {"mirror", "pool", "status"}, {},
+ "Show status for all mirrored images in the pool.", {},
+ &get_status_arguments, &execute_status);
+Shell::Action action_promote(
+ {"mirror", "pool", "promote"}, {},
+ "Promote all non-primary images in the pool.", {},
+ &get_promote_arguments, &execute_promote);
+Shell::Action action_demote(
+ {"mirror", "pool", "demote"}, {},
+ "Demote all primary images in the pool.", {},
+ &get_demote_arguments, &execute_demote);
+
+} // namespace mirror_pool
+} // namespace action
+} // namespace rbd
diff --git a/src/tools/rbd/action/MirrorSnapshotSchedule.cc b/src/tools/rbd/action/MirrorSnapshotSchedule.cc
new file mode 100644
index 000000000..3f269c2ad
--- /dev/null
+++ b/src/tools/rbd/action/MirrorSnapshotSchedule.cc
@@ -0,0 +1,322 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd/ArgumentTypes.h"
+#include "tools/rbd/Schedule.h"
+#include "tools/rbd/Shell.h"
+#include "tools/rbd/Utils.h"
+#include "common/ceph_context.h"
+#include "common/ceph_json.h"
+#include "common/errno.h"
+#include "common/Formatter.h"
+#include "common/TextTable.h"
+#include "global/global_context.h"
+#include "include/stringify.h"
+
+#include <iostream>
+#include <list>
+#include <map>
+#include <string>
+#include <boost/program_options.hpp>
+
+#include "json_spirit/json_spirit.h"
+
+namespace rbd {
+namespace action {
+namespace mirror_snapshot_schedule {
+
+namespace at = argument_types;
+namespace po = boost::program_options;
+
+namespace {
+
+class ScheduleStatus {
+public:
+ ScheduleStatus() {
+ }
+
+ int parse(const std::string &status) {
+ json_spirit::mValue json_root;
+ if(!json_spirit::read(status, json_root)) {
+ std::cerr << "rbd: invalid schedule status JSON received" << std::endl;
+ return -EBADMSG;
+ }
+
+ try {
+ auto &s = json_root.get_obj();
+
+ if (s["scheduled_images"].type() != json_spirit::array_type) {
+ std::cerr << "rbd: unexpected schedule JSON received: "
+ << "scheduled_images is not array" << std::endl;
+ return -EBADMSG;
+ }
+
+ for (auto &item_val : s["scheduled_images"].get_array()) {
+ if (item_val.type() != json_spirit::obj_type) {
+ std::cerr << "rbd: unexpected schedule status JSON received: "
+ << "schedule item is not object" << std::endl;
+ return -EBADMSG;
+ }
+
+ auto &item = item_val.get_obj();
+
+ if (item["schedule_time"].type() != json_spirit::str_type) {
+ std::cerr << "rbd: unexpected schedule JSON received: "
+ << "schedule_time is not string" << std::endl;
+ return -EBADMSG;
+ }
+ auto schedule_time = item["schedule_time"].get_str();
+
+ if (item["image"].type() != json_spirit::str_type) {
+ std::cerr << "rbd: unexpected schedule JSON received: "
+ << "image is not string" << std::endl;
+ return -EBADMSG;
+ }
+ auto image = item["image"].get_str();
+
+ scheduled_images.push_back({schedule_time, image});
+ }
+
+ } catch (std::runtime_error &) {
+ std::cerr << "rbd: invalid schedule JSON received" << std::endl;
+ return -EBADMSG;
+ }
+
+ return 0;
+ }
+
+ void dump(Formatter *f) {
+ f->open_array_section("scheduled_images");
+ for (auto &image : scheduled_images) {
+ f->open_object_section("image");
+ f->dump_string("schedule_time", image.first);
+ f->dump_string("image", image.second);
+ f->close_section(); // image
+ }
+ f->close_section(); // scheduled_images
+ }
+
+ friend std::ostream& operator<<(std::ostream& os, ScheduleStatus &d);
+
+private:
+
+ std::list<std::pair<std::string, std::string>> scheduled_images;
+};
+
+std::ostream& operator<<(std::ostream& os, ScheduleStatus &s) {
+ TextTable tbl;
+ tbl.define_column("SCHEDULE TIME", TextTable::LEFT, TextTable::LEFT);
+ tbl.define_column("IMAGE", TextTable::LEFT, TextTable::LEFT);
+
+ for (auto &[schedule_time, image] : s.scheduled_images) {
+ tbl << schedule_time << image << TextTable::endrow;
+ }
+
+ os << tbl;
+ return os;
+}
+
+} // anonymous namespace
+
+void get_arguments_add(po::options_description *positional,
+ po::options_description *options) {
+ add_level_spec_options(options);
+ add_schedule_options(positional, true);
+}
+
+int execute_add(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ std::map<std::string, std::string> args;
+
+ int r = get_level_spec_args(vm, &args);
+ if (r < 0) {
+ return r;
+ }
+ r = get_schedule_args(vm, true, &args);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ r = utils::init_rados(&rados);
+ if (r < 0) {
+ return r;
+ }
+
+ normalize_level_spec_args(&args);
+ r = utils::mgr_command(rados, "rbd mirror snapshot schedule add", args,
+ &std::cout, &std::cerr);
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+void get_arguments_remove(po::options_description *positional,
+ po::options_description *options) {
+ add_level_spec_options(options);
+ add_schedule_options(positional, false);
+}
+
+int execute_remove(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ std::map<std::string, std::string> args;
+
+ int r = get_level_spec_args(vm, &args);
+ if (r < 0) {
+ return r;
+ }
+ r = get_schedule_args(vm, false, &args);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ r = utils::init_rados(&rados);
+ if (r < 0) {
+ return r;
+ }
+
+ normalize_level_spec_args(&args);
+ r = utils::mgr_command(rados, "rbd mirror snapshot schedule remove", args,
+ &std::cout, &std::cerr);
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+void get_arguments_list(po::options_description *positional,
+ po::options_description *options) {
+ add_level_spec_options(options);
+ options->add_options()
+ ("recursive,R", po::bool_switch(), "list all schedules");
+ at::add_format_options(options);
+}
+
+int execute_list(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ std::map<std::string, std::string> args;
+
+ int r = get_level_spec_args(vm, &args);
+ if (r < 0) {
+ return r;
+ }
+
+ at::Format::Formatter formatter;
+ r = utils::get_formatter(vm, &formatter);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ r = utils::init_rados(&rados);
+ if (r < 0) {
+ return r;
+ }
+
+ normalize_level_spec_args(&args);
+ std::stringstream out;
+ r = utils::mgr_command(rados, "rbd mirror snapshot schedule list", args, &out,
+ &std::cerr);
+ if (r < 0) {
+ return r;
+ }
+
+ ScheduleList schedule_list;
+ r = schedule_list.parse(out.str());
+ if (r < 0) {
+ return r;
+ }
+
+ if (vm["recursive"].as<bool>()) {
+ if (formatter.get()) {
+ schedule_list.dump(formatter.get());
+ formatter->flush(std::cout);
+ } else {
+ std::cout << schedule_list;
+ }
+ } else {
+ auto schedule = schedule_list.find(args["level_spec"]);
+ if (schedule == nullptr) {
+ return -ENOENT;
+ }
+
+ if (formatter.get()) {
+ schedule->dump(formatter.get());
+ formatter->flush(std::cout);
+ } else {
+ std::cout << *schedule << std::endl;
+ }
+ }
+
+ return 0;
+}
+
+void get_arguments_status(po::options_description *positional,
+ po::options_description *options) {
+ add_level_spec_options(options);
+ at::add_format_options(options);
+}
+
+int execute_status(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ std::map<std::string, std::string> args;
+
+ int r = get_level_spec_args(vm, &args);
+ if (r < 0) {
+ return r;
+ }
+
+ at::Format::Formatter formatter;
+ r = utils::get_formatter(vm, &formatter);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ r = utils::init_rados(&rados);
+ if (r < 0) {
+ return r;
+ }
+
+ normalize_level_spec_args(&args);
+ std::stringstream out;
+ r = utils::mgr_command(rados, "rbd mirror snapshot schedule status", args,
+ &out, &std::cerr);
+ ScheduleStatus schedule_status;
+ r = schedule_status.parse(out.str());
+ if (r < 0) {
+ return r;
+ }
+
+ if (formatter.get()) {
+ schedule_status.dump(formatter.get());
+ formatter->flush(std::cout);
+ } else {
+ std::cout << schedule_status;
+ }
+
+ return 0;
+}
+
+Shell::Action add_action(
+ {"mirror", "snapshot", "schedule", "add"}, {},
+ "Add mirror snapshot schedule.", "", &get_arguments_add, &execute_add);
+Shell::Action remove_action(
+ {"mirror", "snapshot", "schedule", "remove"},
+ {"mirror", "snapshot", "schedule", "rm"}, "Remove mirror snapshot schedule.",
+ "", &get_arguments_remove, &execute_remove);
+Shell::Action list_action(
+ {"mirror", "snapshot", "schedule", "list"},
+ {"mirror", "snapshot", "schedule", "ls"}, "List mirror snapshot schedule.",
+ "", &get_arguments_list, &execute_list);
+Shell::Action status_action(
+ {"mirror", "snapshot", "schedule", "status"}, {},
+ "Show mirror snapshot schedule status.", "", &get_arguments_status, &execute_status);
+
+} // namespace mirror_snapshot_schedule
+} // namespace action
+} // namespace rbd
diff --git a/src/tools/rbd/action/Namespace.cc b/src/tools/rbd/action/Namespace.cc
new file mode 100644
index 000000000..12d92bff8
--- /dev/null
+++ b/src/tools/rbd/action/Namespace.cc
@@ -0,0 +1,191 @@
+
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd/ArgumentTypes.h"
+#include "tools/rbd/Shell.h"
+#include "tools/rbd/Utils.h"
+#include "common/errno.h"
+#include "include/stringify.h"
+#include "common/Formatter.h"
+#include "common/TextTable.h"
+#include <algorithm>
+#include <iostream>
+#include <boost/program_options.hpp>
+
+namespace rbd {
+namespace action {
+namespace ns {
+
+namespace at = argument_types;
+namespace po = boost::program_options;
+
+void get_create_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_pool_options(positional, options, true);
+}
+
+int execute_create(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ std::string pool_name;
+ std::string namespace_name;
+ size_t arg_index = 0;
+ int r = utils::get_pool_and_namespace_names(vm, true, &pool_name,
+ &namespace_name, &arg_index);
+ if (r < 0) {
+ return r;
+ }
+
+ if (namespace_name.empty()) {
+ std::cerr << "rbd: namespace name was not specified" << std::endl;
+ return -EINVAL;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ r = utils::init(pool_name, "", &rados, &io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ librbd::RBD rbd;
+ r = rbd.namespace_create(io_ctx, namespace_name.c_str());
+ if (r < 0) {
+ std::cerr << "rbd: failed to created namespace: " << cpp_strerror(r)
+ << std::endl;
+ return r;
+ }
+
+ return 0;
+}
+
+void get_remove_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_pool_options(positional, options, true);
+}
+
+int execute_remove(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ std::string pool_name;
+ std::string namespace_name;
+ size_t arg_index = 0;
+ int r = utils::get_pool_and_namespace_names(vm, true, &pool_name,
+ &namespace_name, &arg_index);
+ if (r < 0) {
+ return r;
+ }
+
+ if (namespace_name.empty()) {
+ std::cerr << "rbd: namespace name was not specified" << std::endl;
+ return -EINVAL;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ r = utils::init(pool_name, "", &rados, &io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ librbd::RBD rbd;
+ r = rbd.namespace_remove(io_ctx, namespace_name.c_str());
+ if (r == -EBUSY) {
+ std::cerr << "rbd: namespace contains images which must be deleted first."
+ << std::endl;
+ return r;
+ } else if (r == -ENOENT) {
+ std::cerr << "rbd: namespace does not exist." << std::endl;
+ return r;
+ } else if (r < 0) {
+ std::cerr << "rbd: failed to remove namespace: " << cpp_strerror(r)
+ << std::endl;
+ return r;
+ }
+
+ return 0;
+}
+
+void get_list_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_pool_options(positional, options, false);
+ at::add_format_options(options);
+}
+
+int execute_list(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ std::string pool_name;
+ size_t arg_index = 0;
+ int r = utils::get_pool_and_namespace_names(vm, true, &pool_name,
+ nullptr, &arg_index);
+ if (r < 0) {
+ return r;
+ }
+
+ at::Format::Formatter formatter;
+ r = utils::get_formatter(vm, &formatter);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ r = utils::init(pool_name, "", &rados, &io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ librbd::RBD rbd;
+ std::vector<std::string> names;
+ r = rbd.namespace_list(io_ctx, &names);
+ if (r < 0 && r != -ENOENT) {
+ std::cerr << "rbd: failed to list namespaces: " << cpp_strerror(r)
+ << std::endl;
+ return r;
+ }
+
+ std::sort(names.begin(), names.end());
+
+ TextTable tbl;
+ if (formatter) {
+ formatter->open_array_section("namespaces");
+ } else {
+ tbl.define_column("NAME", TextTable::LEFT, TextTable::LEFT);
+ }
+
+ for (auto& name : names) {
+ if (formatter) {
+ formatter->open_object_section("namespace");
+ formatter->dump_string("name", name);
+ formatter->close_section();
+ } else {
+ tbl << name << TextTable::endrow;
+ }
+ }
+
+ if (formatter) {
+ formatter->close_section();
+ formatter->flush(std::cout);
+ } else if (!names.empty()) {
+ std::cout << tbl;
+ }
+
+ return 0;
+}
+
+Shell::Action action_create(
+ {"namespace", "create"}, {},
+ "Create an RBD image namespace.", "",
+ &get_create_arguments, &execute_create);
+
+Shell::Action action_remove(
+ {"namespace", "remove"}, {"namespace", "rm"},
+ "Remove an RBD image namespace.", "",
+ &get_remove_arguments, &execute_remove);
+
+Shell::Action action_list(
+ {"namespace", "list"}, {"namespace", "ls"}, "List RBD image namespaces.", "",
+ &get_list_arguments, &execute_list);
+
+} // namespace ns
+} // namespace action
+} // namespace rbd
diff --git a/src/tools/rbd/action/Nbd.cc b/src/tools/rbd/action/Nbd.cc
new file mode 100644
index 000000000..dd5ef3290
--- /dev/null
+++ b/src/tools/rbd/action/Nbd.cc
@@ -0,0 +1,389 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd/ArgumentTypes.h"
+#include "tools/rbd/Shell.h"
+#include "tools/rbd/Utils.h"
+#include "include/stringify.h"
+#include "common/SubProcess.h"
+#include <iostream>
+#include <boost/algorithm/string.hpp>
+#include <boost/algorithm/string/predicate.hpp>
+#include <boost/program_options.hpp>
+
+namespace rbd {
+namespace action {
+namespace nbd {
+
+namespace at = argument_types;
+namespace po = boost::program_options;
+
+static int call_nbd_cmd(const po::variables_map &vm,
+ const std::vector<std::string> &args,
+ const std::vector<std::string> &ceph_global_init_args) {
+ #if defined(__FreeBSD__) || defined(_WIN32)
+ std::cerr << "rbd: nbd device is not supported" << std::endl;
+ return -EOPNOTSUPP;
+ #else
+ char exe_path[PATH_MAX];
+ ssize_t exe_path_bytes = readlink("/proc/self/exe", exe_path,
+ sizeof(exe_path) - 1);
+ if (exe_path_bytes < 0) {
+ strcpy(exe_path, "rbd-nbd");
+ } else {
+ if (snprintf(exe_path + exe_path_bytes,
+ sizeof(exe_path) - exe_path_bytes,
+ "-nbd") < 0) {
+ return -EOVERFLOW;
+ }
+ }
+
+ SubProcess process(exe_path, SubProcess::KEEP, SubProcess::KEEP, SubProcess::KEEP);
+
+ for (auto &arg : ceph_global_init_args) {
+ process.add_cmd_arg(arg.c_str());
+ }
+
+ for (auto &arg : args) {
+ process.add_cmd_arg(arg.c_str());
+ }
+
+ if (process.spawn()) {
+ std::cerr << "rbd: failed to run rbd-nbd: " << process.err() << std::endl;
+ return -EINVAL;
+ } else if (process.join()) {
+ std::cerr << "rbd: rbd-nbd failed with error: " << process.err() << std::endl;
+ return -EINVAL;
+ }
+
+ return 0;
+ #endif
+}
+
+int execute_list(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+#if defined(__FreeBSD__) || defined(_WIN32)
+ std::cerr << "rbd: nbd device is not supported" << std::endl;
+ return -EOPNOTSUPP;
+#else
+ std::vector<std::string> args;
+
+ args.push_back("list-mapped");
+
+ if (vm.count("format")) {
+ args.push_back("--format");
+ args.push_back(vm["format"].as<at::Format>().value);
+ }
+ if (vm["pretty-format"].as<bool>()) {
+ args.push_back("--pretty-format");
+ }
+
+ return call_nbd_cmd(vm, args, ceph_global_init_args);
+#endif
+}
+
+int execute_attach(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+#if defined(__FreeBSD__) || defined(_WIN32)
+ std::cerr << "rbd: nbd device is not supported" << std::endl;
+ return -EOPNOTSUPP;
+#else
+ std::vector<std::string> args;
+ std::string device_path;
+
+ args.push_back("attach");
+ std::string img;
+ int r = utils::get_image_or_snap_spec(vm, &img);
+ if (r < 0) {
+ return r;
+ }
+ args.push_back(img);
+
+ if (vm.count("device")) {
+ device_path = vm["device"].as<std::string>();
+ args.push_back("--device");
+ args.push_back(device_path);
+ } else {
+ std::cerr << "rbd: device was not specified" << std::endl;
+ return -EINVAL;
+ }
+
+ if (vm["show-cookie"].as<bool>()) {
+ args.push_back("--show-cookie");
+ }
+
+ if (vm.count("cookie")) {
+ args.push_back("--cookie");
+ args.push_back(vm["cookie"].as<std::string>());
+ } else if (!vm["force"].as<bool>()) {
+ std::cerr << "rbd: could not validate attach request\n";
+ std::cerr << "rbd: mismatching the image and the device may lead to data corruption\n";
+ std::cerr << "rbd: must specify --cookie <arg> or --force to proceed" << std::endl;
+ return -EINVAL;
+ }
+
+ if (vm.count(at::SNAPSHOT_ID)) {
+ args.push_back("--snap-id");
+ args.push_back(std::to_string(vm[at::SNAPSHOT_ID].as<uint64_t>()));
+ }
+
+ if (vm["quiesce"].as<bool>()) {
+ args.push_back("--quiesce");
+ }
+
+ if (vm["read-only"].as<bool>()) {
+ args.push_back("--read-only");
+ }
+
+ if (vm["exclusive"].as<bool>()) {
+ args.push_back("--exclusive");
+ }
+
+ if (vm.count("quiesce-hook")) {
+ args.push_back("--quiesce-hook");
+ args.push_back(vm["quiesce-hook"].as<std::string>());
+ }
+
+ if (vm.count("options")) {
+ utils::append_options_as_args(vm["options"].as<std::vector<std::string>>(),
+ &args);
+ }
+
+ return call_nbd_cmd(vm, args, ceph_global_init_args);
+#endif
+}
+
+int execute_detach(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+#if defined(__FreeBSD__) || defined(_WIN32)
+ std::cerr << "rbd: nbd device is not supported" << std::endl;
+ return -EOPNOTSUPP;
+#else
+ std::string device_name = utils::get_positional_argument(vm, 0);
+ if (!boost::starts_with(device_name, "/dev/")) {
+ device_name.clear();
+ }
+
+ std::vector<std::string> args;
+
+ args.push_back("detach");
+ std::string image_name;
+ if (device_name.empty()) {
+ int r = utils::get_image_or_snap_spec(vm, &image_name);
+ if (r < 0) {
+ return r;
+ }
+
+ if (image_name.empty()) {
+ std::cerr << "rbd: detach requires either image name or device path"
+ << std::endl;
+ return -EINVAL;
+ }
+
+ if (vm.count(at::SNAPSHOT_ID)) {
+ args.push_back("--snap-id");
+ args.push_back(std::to_string(vm[at::SNAPSHOT_ID].as<uint64_t>()));
+ }
+ }
+
+ args.push_back(device_name.empty() ? image_name : device_name);
+
+ if (vm.count("options")) {
+ utils::append_options_as_args(vm["options"].as<std::vector<std::string>>(),
+ &args);
+ }
+
+ return call_nbd_cmd(vm, args, ceph_global_init_args);
+#endif
+}
+
+int execute_map(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+#if defined(__FreeBSD__) || defined(_WIN32)
+ std::cerr << "rbd: nbd device is not supported" << std::endl;
+ return -EOPNOTSUPP;
+#else
+ std::vector<std::string> args;
+
+ args.push_back("map");
+ std::string img;
+ int r = utils::get_image_or_snap_spec(vm, &img);
+ if (r < 0) {
+ return r;
+ }
+ args.push_back(img);
+
+ if (vm["quiesce"].as<bool>()) {
+ args.push_back("--quiesce");
+ }
+
+ if (vm["show-cookie"].as<bool>()) {
+ args.push_back("--show-cookie");
+ }
+
+ if (vm.count("cookie")) {
+ args.push_back("--cookie");
+ args.push_back(vm["cookie"].as<std::string>());
+ }
+
+ if (vm.count(at::SNAPSHOT_ID)) {
+ args.push_back("--snap-id");
+ args.push_back(std::to_string(vm[at::SNAPSHOT_ID].as<uint64_t>()));
+ }
+
+ if (vm["read-only"].as<bool>()) {
+ args.push_back("--read-only");
+ }
+
+ if (vm["exclusive"].as<bool>()) {
+ args.push_back("--exclusive");
+ }
+
+ if (vm.count("quiesce-hook")) {
+ args.push_back("--quiesce-hook");
+ args.push_back(vm["quiesce-hook"].as<std::string>());
+ }
+
+ if (vm.count("options")) {
+ utils::append_options_as_args(vm["options"].as<std::vector<std::string>>(),
+ &args);
+ }
+
+ return call_nbd_cmd(vm, args, ceph_global_init_args);
+#endif
+}
+
+int execute_unmap(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+#if defined(__FreeBSD__) || defined(_WIN32)
+ std::cerr << "rbd: nbd device is not supported" << std::endl;
+ return -EOPNOTSUPP;
+#else
+ std::string device_name = utils::get_positional_argument(vm, 0);
+ if (!boost::starts_with(device_name, "/dev/")) {
+ device_name.clear();
+ }
+
+ std::vector<std::string> args;
+
+ args.push_back("unmap");
+ std::string image_name;
+ if (device_name.empty()) {
+ int r = utils::get_image_or_snap_spec(vm, &image_name);
+ if (r < 0) {
+ return r;
+ }
+
+ if (image_name.empty()) {
+ std::cerr << "rbd: unmap requires either image name or device path"
+ << std::endl;
+ return -EINVAL;
+ }
+
+ if (vm.count(at::SNAPSHOT_ID)) {
+ args.push_back("--snap-id");
+ args.push_back(std::to_string(vm[at::SNAPSHOT_ID].as<uint64_t>()));
+ }
+ }
+
+ args.push_back(device_name.empty() ? image_name : device_name);
+
+ if (vm.count("options")) {
+ utils::append_options_as_args(vm["options"].as<std::vector<std::string>>(),
+ &args);
+ }
+
+ return call_nbd_cmd(vm, args, ceph_global_init_args);
+#endif
+}
+
+void get_list_arguments_deprecated(po::options_description *positional,
+ po::options_description *options) {
+ at::add_format_options(options);
+}
+
+int execute_list_deprecated(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_args) {
+ std::cerr << "rbd: 'nbd list' command is deprecated, "
+ << "use 'device list -t nbd' instead" << std::endl;
+ return execute_list(vm, ceph_global_args);
+}
+
+void get_map_arguments_deprecated(po::options_description *positional,
+ po::options_description *options) {
+ at::add_image_or_snap_spec_options(positional, options,
+ at::ARGUMENT_MODIFIER_NONE);
+ options->add_options()
+ ("read-only", po::bool_switch(), "map read-only")
+ ("exclusive", po::bool_switch(), "forbid writes by other clients")
+ ("device", po::value<std::string>(), "specify nbd device")
+ ("nbds_max", po::value<std::string>(), "override module param nbds_max")
+ ("max_part", po::value<std::string>(), "override module param max_part")
+ ("timeout", po::value<std::string>(), "set nbd request timeout (seconds)");
+}
+
+int execute_map_deprecated(const po::variables_map &vm_deprecated,
+ const std::vector<std::string> &ceph_global_args) {
+ std::cerr << "rbd: 'nbd map' command is deprecated, "
+ << "use 'device map -t nbd' instead" << std::endl;
+
+ po::options_description options;
+ options.add_options()
+ ("options,o", po::value<std::vector<std::string>>()
+ ->default_value(std::vector<std::string>(), ""), "");
+
+ po::variables_map vm = vm_deprecated;
+ po::store(po::command_line_parser({}).options(options).run(), vm);
+
+ std::vector<std::string> opts;
+ if (vm_deprecated.count("device")) {
+ opts.push_back("device=" + vm_deprecated["device"].as<std::string>());
+ }
+ if (vm_deprecated.count("nbds_max")) {
+ opts.push_back("nbds_max=" + vm_deprecated["nbds_max"].as<std::string>());
+ }
+ if (vm_deprecated.count("max_part")) {
+ opts.push_back("max_part=" + vm_deprecated["max_part"].as<std::string>());
+ }
+ if (vm_deprecated.count("timeout")) {
+ opts.push_back("timeout=" + vm_deprecated["timeout"].as<std::string>());
+ }
+
+ vm.at("options").value() = boost::any(opts);
+
+ return execute_map(vm, ceph_global_args);
+}
+
+void get_unmap_arguments_deprecated(po::options_description *positional,
+ po::options_description *options) {
+ positional->add_options()
+ ("image-or-snap-or-device-spec",
+ "image, snapshot, or device specification\n"
+ "[<pool-name>/]<image-name>[@<snap-name>] or <device-path>");
+ at::add_pool_option(options, at::ARGUMENT_MODIFIER_NONE);
+ at::add_image_option(options, at::ARGUMENT_MODIFIER_NONE);
+ at::add_snap_option(options, at::ARGUMENT_MODIFIER_NONE);
+}
+
+int execute_unmap_deprecated(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_args) {
+ std::cerr << "rbd: 'nbd unmap' command is deprecated, "
+ << "use 'device unmap -t nbd' instead" << std::endl;
+ return execute_unmap(vm, ceph_global_args);
+}
+
+Shell::Action action_show_deprecated(
+ {"nbd", "list"}, {"nbd", "ls"}, "List the nbd devices already used.", "",
+ &get_list_arguments_deprecated, &execute_list_deprecated, false);
+
+Shell::Action action_map_deprecated(
+ {"nbd", "map"}, {}, "Map image to a nbd device.", "",
+ &get_map_arguments_deprecated, &execute_map_deprecated, false);
+
+Shell::Action action_unmap_deprecated(
+ {"nbd", "unmap"}, {}, "Unmap a nbd device.", "",
+ &get_unmap_arguments_deprecated, &execute_unmap_deprecated, false);
+
+} // namespace nbd
+} // namespace action
+} // namespace rbd
diff --git a/src/tools/rbd/action/ObjectMap.cc b/src/tools/rbd/action/ObjectMap.cc
new file mode 100644
index 000000000..40ee2d472
--- /dev/null
+++ b/src/tools/rbd/action/ObjectMap.cc
@@ -0,0 +1,131 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd/ArgumentTypes.h"
+#include "tools/rbd/Shell.h"
+#include "tools/rbd/Utils.h"
+#include "common/errno.h"
+#include <iostream>
+#include <boost/program_options.hpp>
+
+namespace rbd {
+namespace action {
+namespace object_map {
+
+namespace at = argument_types;
+namespace po = boost::program_options;
+
+static int do_object_map_rebuild(librbd::Image &image, bool no_progress)
+{
+ utils::ProgressContext pc("Object Map Rebuild", no_progress);
+ int r = image.rebuild_object_map(pc);
+ if (r < 0) {
+ pc.fail();
+ return r;
+ }
+ pc.finish();
+ return 0;
+}
+
+void get_rebuild_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_image_or_snap_spec_options(positional, options,
+ at::ARGUMENT_MODIFIER_NONE);
+ at::add_no_progress_option(options);
+}
+
+int execute_rebuild(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+ std::string snap_name;
+ int r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name,
+ &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_PERMITTED,
+ utils::SPEC_VALIDATION_NONE);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ librbd::Image image;
+ r = utils::init_and_open_image(pool_name, namespace_name, image_name, "",
+ snap_name, false, &rados, &io_ctx, &image);
+ if (r < 0) {
+ return r;
+ }
+
+ r = do_object_map_rebuild(image, vm[at::NO_PROGRESS].as<bool>());
+ if (r < 0) {
+ std::cerr << "rbd: rebuilding object map failed: " << cpp_strerror(r)
+ << std::endl;
+ return r;
+ }
+ return 0;
+}
+
+static int do_object_map_check(librbd::Image &image, bool no_progress)
+{
+ utils::ProgressContext pc("Object Map Check", no_progress);
+ int r = image.check_object_map(pc);
+ if (r < 0) {
+ pc.fail();
+ return r;
+ }
+ pc.finish();
+ return 0;
+}
+
+void get_check_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_image_or_snap_spec_options(positional, options,
+ at::ARGUMENT_MODIFIER_NONE);
+ at::add_no_progress_option(options);
+}
+
+int execute_check(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+ std::string snap_name;
+ int r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name,
+ &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_PERMITTED,
+ utils::SPEC_VALIDATION_NONE);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ librbd::Image image;
+ r = utils::init_and_open_image(pool_name, namespace_name, image_name, "",
+ snap_name, false, &rados, &io_ctx, &image);
+ if (r < 0) {
+ return r;
+ }
+
+ r = do_object_map_check(image, vm[at::NO_PROGRESS].as<bool>());
+ if (r < 0) {
+ std::cerr << "rbd: checking object map failed: " << cpp_strerror(r)
+ << std::endl;
+ return r;
+ }
+ return 0;
+}
+
+Shell::Action action_rebuild(
+ {"object-map", "rebuild"}, {}, "Rebuild an invalid object map.", "",
+ &get_rebuild_arguments, &execute_rebuild);
+Shell::Action action_check(
+ {"object-map", "check"}, {}, "Verify the object map is correct.", "",
+ &get_check_arguments, &execute_check);
+
+} // namespace object_map
+} // namespace action
+} // namespace rbd
diff --git a/src/tools/rbd/action/Perf.cc b/src/tools/rbd/action/Perf.cc
new file mode 100644
index 000000000..b39beac91
--- /dev/null
+++ b/src/tools/rbd/action/Perf.cc
@@ -0,0 +1,717 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd/ArgumentTypes.h"
+#include "tools/rbd/Shell.h"
+#include "tools/rbd/Utils.h"
+#include "include/stringify.h"
+#include "common/ceph_context.h"
+#include "common/ceph_json.h"
+#include "common/errno.h"
+#include "common/Formatter.h"
+#include "common/TextTable.h"
+#include "global/global_context.h"
+#ifdef HAVE_CURSES
+#include <ncurses.h>
+#endif
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <iostream>
+#include <vector>
+#include <boost/algorithm/string.hpp>
+#include <boost/assign.hpp>
+#include <boost/bimap.hpp>
+#include <boost/program_options.hpp>
+#include "json_spirit/json_spirit.h"
+
+namespace rbd {
+namespace action {
+namespace perf {
+
+namespace at = argument_types;
+namespace po = boost::program_options;
+
+namespace {
+
+enum class StatDescriptor {
+ WRITE_OPS = 0,
+ READ_OPS,
+ WRITE_BYTES,
+ READ_BYTES,
+ WRITE_LATENCY,
+ READ_LATENCY
+};
+
+typedef boost::bimap<StatDescriptor, std::string> StatDescriptors;
+
+static const StatDescriptors STAT_DESCRIPTORS =
+ boost::assign::list_of<StatDescriptors::relation>
+ (StatDescriptor::WRITE_OPS, "write_ops")
+ (StatDescriptor::READ_OPS, "read_ops")
+ (StatDescriptor::WRITE_BYTES, "write_bytes")
+ (StatDescriptor::READ_BYTES, "read_bytes")
+ (StatDescriptor::WRITE_LATENCY, "write_latency")
+ (StatDescriptor::READ_LATENCY, "read_latency");
+
+std::ostream& operator<<(std::ostream& os, const StatDescriptor& val) {
+ auto it = STAT_DESCRIPTORS.left.find(val);
+ if (it == STAT_DESCRIPTORS.left.end()) {
+ os << "unknown (" << static_cast<int>(val) << ")";
+ } else {
+ os << it->second;
+ }
+ return os;
+}
+
+void validate(boost::any& v, const std::vector<std::string>& values,
+ StatDescriptor *target_type, int) {
+ po::validators::check_first_occurrence(v);
+ std::string s = po::validators::get_single_string(values);
+ boost::replace_all(s, "_", " ");
+ boost::replace_all(s, "-", "_");
+
+ auto it = STAT_DESCRIPTORS.right.find(s);
+ if (it == STAT_DESCRIPTORS.right.end()) {
+ throw po::validation_error(po::validation_error::invalid_option_value);
+ }
+ v = boost::any(it->second);
+}
+
+struct ImageStat {
+ ImageStat(const std::string& pool_name, const std::string& pool_namespace,
+ const std::string& image_name)
+ : pool_name(pool_name), pool_namespace(pool_namespace),
+ image_name(image_name) {
+ stats.resize(STAT_DESCRIPTORS.size());
+ }
+
+ std::string pool_name;
+ std::string pool_namespace;
+ std::string image_name;
+ std::vector<double> stats;
+};
+
+typedef std::vector<ImageStat> ImageStats;
+
+typedef std::pair<std::string, std::string> SpecPair;
+
+std::string format_pool_spec(const std::string& pool,
+ const std::string& pool_namespace) {
+ std::string pool_spec{pool};
+ if (!pool_namespace.empty()) {
+ pool_spec += "/" + pool_namespace;
+ }
+ return pool_spec;
+}
+
+int query_iostats(librados::Rados& rados, const std::string& pool_spec,
+ StatDescriptor sort_by, ImageStats* image_stats,
+ std::ostream& err_os) {
+ auto sort_by_str = STAT_DESCRIPTORS.left.find(sort_by)->second;
+
+ std::string cmd = R"(
+ {
+ "prefix": "rbd perf image stats",
+ "pool_spec": ")" + pool_spec + R"(",
+ "sort_by": ")" + sort_by_str + R"(",
+ "format": "json"
+ }")";
+
+ bufferlist in_bl;
+ bufferlist out_bl;
+ std::string outs;
+ int r = rados.mgr_command(cmd, in_bl, &out_bl, &outs);
+ if (r == -EOPNOTSUPP) {
+ err_os << "rbd: 'rbd_support' mgr module is not enabled."
+ << std::endl << std::endl
+ << "Use 'ceph mgr module enable rbd_support' to enable."
+ << std::endl;
+ return r;
+ } else if (r < 0) {
+ err_os << "rbd: mgr command failed: " << cpp_strerror(r);
+ if (!outs.empty()) {
+ err_os << ": " << outs;
+ }
+ err_os << std::endl;
+ return r;
+ }
+
+ json_spirit::mValue json_root;
+ if (!json_spirit::read(out_bl.to_str(), json_root)) {
+ err_os << "rbd: error parsing perf stats" << std::endl;
+ return -EINVAL;
+ }
+
+ image_stats->clear();
+ try {
+ auto& root = json_root.get_obj();
+
+ // map JSON stat descriptor order to our internal order
+ std::map<uint32_t, uint32_t> json_to_internal_stats;
+ auto& json_stat_descriptors = root["stat_descriptors"].get_array();
+ for (size_t idx = 0; idx < json_stat_descriptors.size(); ++idx) {
+ auto it = STAT_DESCRIPTORS.right.find(
+ json_stat_descriptors[idx].get_str());
+ if (it == STAT_DESCRIPTORS.right.end()) {
+ continue;
+ }
+ json_to_internal_stats[idx] = static_cast<uint32_t>(it->second);
+ }
+
+ // cache a mapping from pool descriptors back to pool-specs
+ std::map<std::string, SpecPair> json_to_internal_pools;
+ auto& pool_descriptors = root["pool_descriptors"].get_obj();
+ for (auto& pool : pool_descriptors) {
+ auto& pool_spec = pool.second.get_str();
+ auto pos = pool_spec.rfind("/");
+
+ SpecPair pair{pool_spec.substr(0, pos), ""};
+ if (pos != std::string::npos) {
+ pair.second = pool_spec.substr(pos + 1);
+ }
+
+ json_to_internal_pools[pool.first] = pair;
+ }
+
+ auto& stats = root["stats"].get_array();
+ for (auto& stat : stats) {
+ auto& stat_obj = stat.get_obj();
+ if (!stat_obj.empty()) {
+ auto& image_spec = stat_obj.begin()->first;
+
+ auto pos = image_spec.find("/");
+ SpecPair pair{image_spec.substr(0, pos), ""};
+ if (pos != std::string::npos) {
+ pair.second = image_spec.substr(pos + 1);
+ }
+
+ const auto pool_it = json_to_internal_pools.find(pair.first);
+ if (pool_it == json_to_internal_pools.end()) {
+ continue;
+ }
+
+ image_stats->emplace_back(
+ pool_it->second.first, pool_it->second.second, pair.second);
+
+ auto& image_stat = image_stats->back();
+ auto& data = stat_obj.begin()->second.get_array();
+ for (auto& indexes : json_to_internal_stats) {
+ image_stat.stats[indexes.second] = data[indexes.first].get_real();
+ }
+ }
+ }
+ } catch (std::runtime_error &e) {
+ err_os << "rbd: error parsing perf stats: " << e.what() << std::endl;
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+void format_stat(StatDescriptor stat_descriptor, double stat,
+ std::ostream& os) {
+ switch (stat_descriptor) {
+ case StatDescriptor::WRITE_OPS:
+ case StatDescriptor::READ_OPS:
+ os << si_u_t(stat) << "/s";
+ break;
+ case StatDescriptor::WRITE_BYTES:
+ case StatDescriptor::READ_BYTES:
+ os << byte_u_t(stat) << "/s";
+ break;
+ case StatDescriptor::WRITE_LATENCY:
+ case StatDescriptor::READ_LATENCY:
+ os << std::fixed << std::setprecision(2);
+ if (stat >= 1000000000) {
+ os << (stat / 1000000000) << " s";
+ } else if (stat >= 1000000) {
+ os << (stat / 1000000) << " ms";
+ } else if (stat >= 1000) {
+ os << (stat / 1000) << " us";
+ } else {
+ os << stat << " ns";
+ }
+ break;
+ default:
+ ceph_assert(false);
+ break;
+ }
+}
+
+} // anonymous namespace
+
+namespace iostat {
+
+struct Iterations {};
+
+void validate(boost::any& v, const std::vector<std::string>& values,
+ Iterations *target_type, int) {
+ po::validators::check_first_occurrence(v);
+ auto& s = po::validators::get_single_string(values);
+
+ try {
+ auto iterations = boost::lexical_cast<uint32_t>(s);
+ if (iterations > 0) {
+ v = boost::any(iterations);
+ return;
+ }
+ } catch (const boost::bad_lexical_cast &) {
+ }
+ throw po::validation_error(po::validation_error::invalid_option_value);
+}
+
+void format(const ImageStats& image_stats, Formatter* f, bool global_search) {
+ TextTable tbl;
+ if (f) {
+ f->open_array_section("images");
+ } else {
+ tbl.define_column("NAME", TextTable::LEFT, TextTable::LEFT);
+ for (auto& stat : STAT_DESCRIPTORS.left) {
+ std::string title;
+ switch (stat.first) {
+ case StatDescriptor::WRITE_OPS:
+ title = "WR ";
+ break;
+ case StatDescriptor::READ_OPS:
+ title = "RD ";
+ break;
+ case StatDescriptor::WRITE_BYTES:
+ title = "WR_BYTES ";
+ break;
+ case StatDescriptor::READ_BYTES:
+ title = "RD_BYTES ";
+ break;
+ case StatDescriptor::WRITE_LATENCY:
+ title = "WR_LAT ";
+ break;
+ case StatDescriptor::READ_LATENCY:
+ title = "RD_LAT ";
+ break;
+ default:
+ ceph_assert(false);
+ break;
+ }
+ tbl.define_column(title, TextTable::RIGHT, TextTable::RIGHT);
+ }
+ }
+
+ for (auto& image_stat : image_stats) {
+ if (f) {
+ f->open_object_section("image");
+ f->dump_string("pool", image_stat.pool_name);
+ f->dump_string("pool_namespace", image_stat.pool_namespace);
+ f->dump_string("image", image_stat.image_name);
+ for (auto& pair : STAT_DESCRIPTORS.left) {
+ f->dump_float(pair.second.c_str(),
+ image_stat.stats[static_cast<size_t>(pair.first)]);
+ }
+ f->close_section();
+ } else {
+ std::string name;
+ if (global_search) {
+ name += image_stat.pool_name + "/";
+ if (!image_stat.pool_namespace.empty()) {
+ name += image_stat.pool_namespace + "/";
+ }
+ }
+ name += image_stat.image_name;
+
+ tbl << name;
+ for (auto& pair : STAT_DESCRIPTORS.left) {
+ std::stringstream str;
+ format_stat(pair.first,
+ image_stat.stats[static_cast<size_t>(pair.first)], str);
+ str << ' ';
+ tbl << str.str();
+ }
+ tbl << TextTable::endrow;
+ }
+ }
+
+ if (f) {
+ f->close_section();
+ f->flush(std::cout);
+ } else {
+ std::cout << tbl << std::endl;
+ }
+}
+
+} // namespace iostat
+
+#ifdef HAVE_CURSES
+namespace iotop {
+
+class MainWindow {
+public:
+ MainWindow(librados::Rados& rados, const std::string& pool_spec)
+ : m_rados(rados), m_pool_spec(pool_spec) {
+ initscr();
+ curs_set(0);
+ cbreak();
+ noecho();
+ keypad(stdscr, TRUE);
+ nodelay(stdscr, TRUE);
+
+ init_columns();
+ }
+
+ int run() {
+ redraw();
+
+ int r = 0;
+ std::stringstream err_str;
+ while (true) {
+ r = query_iostats(m_rados, m_pool_spec, m_sort_by, &m_image_stats,
+ err_str);
+ if (r < 0) {
+ break;
+ return r;
+ }
+
+ redraw();
+ wait_for_key_or_delay();
+
+ int ch = getch();
+ if (ch == 'q' || ch == 'Q') {
+ break;
+ } else if (ch == '<' || ch == KEY_LEFT) {
+ auto it = STAT_DESCRIPTORS.left.find(m_sort_by);
+ if (it != STAT_DESCRIPTORS.left.begin()) {
+ m_sort_by = (--it)->first;
+ }
+ } else if (ch == '>' || ch == KEY_RIGHT) {
+ auto it = STAT_DESCRIPTORS.left.find(m_sort_by);
+ if (it != STAT_DESCRIPTORS.left.end() &&
+ ++it != STAT_DESCRIPTORS.left.end()) {
+ m_sort_by = it->first;
+ }
+ }
+ }
+
+ endwin();
+
+ if (r < 0) {
+ std::cerr << err_str.str() << std::endl;
+ }
+ return r;
+ }
+
+private:
+ static const size_t STAT_COLUMN_WIDTH = 12;
+
+ librados::Rados& m_rados;
+ std::string m_pool_spec;
+
+ ImageStats m_image_stats;
+ StatDescriptor m_sort_by = StatDescriptor::WRITE_OPS;
+
+ bool m_pending_win_opened = false;
+ WINDOW* m_pending_win = nullptr;
+
+ int m_height = 1;
+ int m_width = 1;
+
+ std::map<StatDescriptor, std::string> m_columns;
+
+ void init_columns() {
+ m_columns.clear();
+ for (auto& pair : STAT_DESCRIPTORS.left) {
+ std::string title;
+ switch (pair.first) {
+ case StatDescriptor::WRITE_OPS:
+ title = "WRITES OPS";
+ break;
+ case StatDescriptor::READ_OPS:
+ title = "READS OPS";
+ break;
+ case StatDescriptor::WRITE_BYTES:
+ title = "WRITE BYTES";
+ break;
+ case StatDescriptor::READ_BYTES:
+ title = "READ BYTES";
+ break;
+ case StatDescriptor::WRITE_LATENCY:
+ title = "WRITE LAT";
+ break;
+ case StatDescriptor::READ_LATENCY:
+ title = "READ LAT";
+ break;
+ default:
+ ceph_assert(false);
+ break;
+ }
+ m_columns[pair.first] = (title);
+ }
+ }
+
+ void redraw() {
+ getmaxyx(stdscr, m_height, m_width);
+
+ redraw_main_window();
+ redraw_pending_window();
+
+ doupdate();
+ }
+
+ void redraw_main_window() {
+ werase(stdscr);
+ mvhline(0, 0, ' ' | A_REVERSE, m_width);
+
+ // print header for all metrics
+ int remaining_cols = m_width;
+ std::stringstream str;
+ for (auto& pair : m_columns) {
+ int attr = A_REVERSE;
+ std::string title;
+ if (pair.first == m_sort_by) {
+ title += '>';
+ attr |= A_BOLD;
+ } else {
+ title += ' ';
+ }
+ title += pair.second;
+
+ str.str("");
+ str << std::right << std::setfill(' ')
+ << std::setw(STAT_COLUMN_WIDTH)
+ << title << ' ';
+
+ attrset(attr);
+ addstr(str.str().c_str());
+ remaining_cols -= title.size();
+ }
+
+ attrset(A_REVERSE);
+ addstr("IMAGE");
+ attrset(A_NORMAL);
+
+ // print each image (one per line)
+ int row = 1;
+ int remaining_lines = m_height - 1;
+ for (auto& image_stat : m_image_stats) {
+ if (remaining_lines <= 0) {
+ break;
+ }
+ --remaining_lines;
+
+ move(row++, 0);
+ for (auto& pair : m_columns) {
+ str.str("");
+ format_stat(pair.first,
+ image_stat.stats[static_cast<size_t>(pair.first)], str);
+ auto value = str.str().substr(0, STAT_COLUMN_WIDTH);
+
+ str.str("");
+ str << std::right << std::setfill(' ')
+ << std::setw(STAT_COLUMN_WIDTH)
+ << value << ' ';
+ addstr(str.str().c_str());
+ }
+
+ std::string image;
+ if (m_pool_spec.empty()) {
+ image = format_pool_spec(image_stat.pool_name,
+ image_stat.pool_namespace) + "/";
+ }
+ image += image_stat.image_name;
+ addstr(image.substr(0, remaining_cols).c_str());
+ }
+
+ wnoutrefresh(stdscr);
+ }
+
+ void redraw_pending_window() {
+ // draw a "please by patient" window while waiting
+ const char* msg = "Waiting for initial stats";
+ int height = 5;
+ int width = strlen(msg) + 4;;
+ int starty = (m_height - height) / 2;
+ int startx = (m_width - width) / 2;
+
+ if (m_image_stats.empty() && !m_pending_win_opened) {
+ m_pending_win_opened = true;
+ m_pending_win = newwin(height, width, starty, startx);
+ }
+
+ if (m_pending_win != nullptr) {
+ if (m_image_stats.empty()) {
+ box(m_pending_win, 0 , 0);
+ mvwaddstr(m_pending_win, 2, 2, msg);
+ wnoutrefresh(m_pending_win);
+ } else {
+ delwin(m_pending_win);
+ m_pending_win = nullptr;
+ }
+ }
+ }
+
+ void wait_for_key_or_delay() {
+ fd_set fds;
+ FD_ZERO(&fds);
+ FD_SET(STDIN_FILENO, &fds);
+
+ // no point to refreshing faster than the stats period
+ struct timeval tval;
+ tval.tv_sec = std::min<uint32_t>(
+ 10, g_conf().get_val<int64_t>("mgr_stats_period"));
+ tval.tv_usec = 0;
+
+ select(STDIN_FILENO + 1, &fds, NULL, NULL, &tval);
+ }
+};
+
+} // namespace iotop
+#endif // HAVE_CURSES
+
+
+void get_arguments_iostat(po::options_description *positional,
+ po::options_description *options) {
+ at::add_pool_options(positional, options, true);
+ options->add_options()
+ ("iterations", po::value<iostat::Iterations>(),
+ "iterations of metric collection [> 0]")
+ ("sort-by", po::value<StatDescriptor>()->default_value(StatDescriptor::WRITE_OPS),
+ "sort-by IO metric "
+ "(write-ops, read-ops, write-bytes, read-bytes, write-latency, read-latency) "
+ "[default: write-ops]");
+ at::add_format_options(options);
+}
+
+int execute_iostat(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ std::string pool;
+ std::string pool_namespace;
+ size_t arg_index = 0;
+ int r = utils::get_pool_and_namespace_names(vm, false, &pool,
+ &pool_namespace, &arg_index);
+ if (r < 0) {
+ return r;
+ }
+
+ uint32_t iterations = 0;
+ if (vm.count("iterations")) {
+ iterations = vm["iterations"].as<uint32_t>();
+ }
+ auto sort_by = vm["sort-by"].as<StatDescriptor>();
+
+ at::Format::Formatter formatter;
+ r = utils::get_formatter(vm, &formatter);
+ if (r < 0) {
+ return r;
+ }
+
+ auto f = formatter.get();
+ if (iterations > 1 && f != nullptr) {
+ std::cerr << "rbd: specifing iterations is not valid with formatted output"
+ << std::endl;
+ return -EINVAL;
+ }
+
+ librados::Rados rados;
+ r = utils::init_rados(&rados);
+ if (r < 0) {
+ return r;
+ }
+
+ r = rados.wait_for_latest_osdmap();
+ if (r < 0) {
+ std::cerr << "rbd: failed to retrieve OSD map" << std::endl;
+ return r;
+ }
+
+ if (!pool_namespace.empty()) {
+ // default empty pool name only if namespace is specified to allow
+ // for an empty pool_spec (-> GLOBAL_POOL_KEY)
+ utils::normalize_pool_name(&pool);
+ }
+ std::string pool_spec = format_pool_spec(pool, pool_namespace);
+
+ // no point to refreshing faster than the stats period
+ auto delay = std::min<uint32_t>(10, g_conf().get_val<int64_t>("mgr_stats_period"));
+
+ ImageStats image_stats;
+ uint32_t count = 0;
+ bool printed_notice = false;
+ while (count++ < iterations || iterations == 0) {
+ r = query_iostats(rados, pool_spec, sort_by, &image_stats, std::cerr);
+ if (r < 0) {
+ return r;
+ }
+
+ if (count == 1 && image_stats.empty()) {
+ count = 0;
+ if (!printed_notice) {
+ std::cerr << "rbd: waiting for initial image stats"
+ << std::endl << std::endl;;
+ printed_notice = true;
+ }
+ } else {
+ iostat::format(image_stats, f, pool_spec.empty());
+ if (f != nullptr) {
+ break;
+ }
+ }
+
+ sleep(delay);
+ }
+
+ return 0;
+}
+
+#ifdef HAVE_CURSES
+void get_arguments_iotop(po::options_description *positional,
+ po::options_description *options) {
+ at::add_pool_options(positional, options, true);
+}
+
+int execute_iotop(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ std::string pool;
+ std::string pool_namespace;
+ size_t arg_index = 0;
+ int r = utils::get_pool_and_namespace_names(vm, false, &pool,
+ &pool_namespace, &arg_index);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ r = utils::init_rados(&rados);
+ if (r < 0) {
+ return r;
+ }
+
+ r = rados.wait_for_latest_osdmap();
+ if (r < 0) {
+ std::cerr << "rbd: failed to retrieve OSD map" << std::endl;
+ return r;
+ }
+
+ if (!pool_namespace.empty()) {
+ // default empty pool name only if namespace is specified to allow
+ // for an empty pool_spec (-> GLOBAL_POOL_KEY)
+ utils::normalize_pool_name(&pool);
+ }
+ iotop::MainWindow mainWindow(rados, format_pool_spec(pool, pool_namespace));
+ r = mainWindow.run();
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+Shell::Action top_action(
+ {"perf", "image", "iotop"}, {}, "Display a top-like IO monitor.", "",
+ &get_arguments_iotop, &execute_iotop);
+
+#endif // HAVE_CURSES
+
+Shell::Action stat_action(
+ {"perf", "image", "iostat"}, {}, "Display image IO statistics.", "",
+ &get_arguments_iostat, &execute_iostat);
+} // namespace perf
+} // namespace action
+} // namespace rbd
diff --git a/src/tools/rbd/action/PersistentCache.cc b/src/tools/rbd/action/PersistentCache.cc
new file mode 100644
index 000000000..949006b82
--- /dev/null
+++ b/src/tools/rbd/action/PersistentCache.cc
@@ -0,0 +1,122 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd/ArgumentTypes.h"
+#include "tools/rbd/Shell.h"
+#include "tools/rbd/Utils.h"
+#include "include/types.h"
+#include "include/rbd_types.h"
+#include "include/stringify.h"
+#include "common/errno.h"
+#include "common/Formatter.h"
+#include <iostream>
+#include <boost/program_options.hpp>
+
+namespace rbd {
+namespace action {
+namespace persistent_cache {
+
+namespace at = argument_types;
+namespace po = boost::program_options;
+
+void get_arguments_invalidate(po::options_description *positional,
+ po::options_description *options) {
+ at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE);
+ at::add_image_id_option(options);
+}
+
+int execute_invalidate(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+ std::string snap_name;
+ int r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name,
+ &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE,
+ utils::SPEC_VALIDATION_NONE);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ librbd::Image image;
+ r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "",
+ false, &rados, &io_ctx, &image);
+ if (r < 0) {
+ return r;
+ }
+
+ r = image.invalidate_cache();
+ if (r < 0) {
+ std::cerr << "rbd: invalidating persistent cache failed: "
+ << cpp_strerror(r) << std::endl;
+ return r;
+ }
+
+ return 0;
+}
+
+void get_arguments_flush(po::options_description *positional,
+ po::options_description *options) {
+ at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE);
+ at::add_image_id_option(options);
+}
+
+int execute_flush(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+ std::string snap_name;
+ int r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name,
+ &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE,
+ utils::SPEC_VALIDATION_NONE);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ librbd::Image image;
+ r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "",
+ false, &rados, &io_ctx, &image);
+ if (r < 0) {
+ return r;
+ }
+
+ uint64_t features;
+ r = image.features(&features);
+ if (r < 0) {
+ return r;
+ }
+
+ if (features & RBD_FEATURE_DIRTY_CACHE) {
+ r = image.flush();
+ if (r < 0) {
+ std::cerr << "rbd: flushing persistent cache failed: "
+ << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ } else {
+ std::cout << "rbd: persistent cache is clean or disabled" << std::endl;
+ }
+
+ return 0;
+}
+
+Shell::Action action_invalidate(
+ {"persistent-cache", "invalidate"}, {},
+ "Invalidate (discard) existing / dirty persistent cache.", "",
+ &get_arguments_invalidate, &execute_invalidate);
+Shell::Action action_flush(
+ {"persistent-cache", "flush"}, {}, "Flush persistent cache.", "",
+ &get_arguments_flush, &execute_flush);
+
+} // namespace persistent_cache
+} // namespace action
+} // namespace rbd
diff --git a/src/tools/rbd/action/Pool.cc b/src/tools/rbd/action/Pool.cc
new file mode 100644
index 000000000..2ad8e17ff
--- /dev/null
+++ b/src/tools/rbd/action/Pool.cc
@@ -0,0 +1,162 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd/ArgumentTypes.h"
+#include "tools/rbd/Shell.h"
+#include "tools/rbd/Utils.h"
+#include "include/stringify.h"
+#include "common/errno.h"
+#include "common/Formatter.h"
+#include <iostream>
+#include <boost/program_options.hpp>
+
+namespace rbd {
+namespace action {
+namespace pool {
+
+namespace at = argument_types;
+namespace po = boost::program_options;
+
+void get_arguments_init(po::options_description *positional,
+ po::options_description *options) {
+ at::add_pool_options(positional, options, false);
+ options->add_options()
+ ("force", po::bool_switch(),
+ "force initialize pool for RBD use if registered by another application");
+}
+
+int execute_init(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ std::string pool_name;
+ size_t arg_index = 0;
+ int r = utils::get_pool_and_namespace_names(vm, false, &pool_name,
+ nullptr, &arg_index);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ r = utils::init(pool_name, "", &rados, &io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ librbd::RBD rbd;
+ r = rbd.pool_init(io_ctx, vm["force"].as<bool>());
+ if (r == -EOPNOTSUPP) {
+ std::cerr << "rbd: luminous or later release required." << std::endl;
+ } else if (r == -EPERM) {
+ std::cerr << "rbd: pool already registered to a different application."
+ << std::endl;
+ } else if (r < 0) {
+ std::cerr << "rbd: error registered application: " << cpp_strerror(r)
+ << std::endl;
+ }
+
+ return 0;
+}
+
+void get_arguments_stats(po::options_description *positional,
+ po::options_description *options) {
+ at::add_pool_options(positional, options, true);
+ at::add_format_options(options);
+}
+
+int execute_stats(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ std::string pool_name;
+ std::string namespace_name;
+ size_t arg_index = 0;
+ int r = utils::get_pool_and_namespace_names(vm, false, &pool_name,
+ &namespace_name, &arg_index);
+ if (r < 0) {
+ return r;
+ }
+
+ at::Format::Formatter formatter;
+ r = utils::get_formatter(vm, &formatter);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ r = utils::init(pool_name, namespace_name, &rados, &io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ librbd::RBD rbd;
+ uint64_t image_count;
+ uint64_t provisioned_bytes;
+ uint64_t snap_count;
+ uint64_t trash_count;
+ uint64_t trash_provisioned_bytes;
+ uint64_t trash_snap_count;
+
+ librbd::PoolStats pool_stats;
+ pool_stats.add(RBD_POOL_STAT_OPTION_IMAGES, &image_count);
+ pool_stats.add(RBD_POOL_STAT_OPTION_IMAGE_MAX_PROVISIONED_BYTES,
+ &provisioned_bytes);
+ pool_stats.add(RBD_POOL_STAT_OPTION_IMAGE_SNAPSHOTS, &snap_count);
+ pool_stats.add(RBD_POOL_STAT_OPTION_TRASH_IMAGES, &trash_count);
+ pool_stats.add(RBD_POOL_STAT_OPTION_TRASH_MAX_PROVISIONED_BYTES,
+ &trash_provisioned_bytes);
+ pool_stats.add(RBD_POOL_STAT_OPTION_TRASH_SNAPSHOTS, &trash_snap_count);
+
+ r = rbd.pool_stats_get(io_ctx, &pool_stats);
+ if (r < 0) {
+ std::cerr << "rbd: failed to query pool stats: " << cpp_strerror(r)
+ << std::endl;
+ return r;
+ }
+
+ if (formatter) {
+ formatter->open_object_section("stats");
+ formatter->open_object_section("images");
+ formatter->dump_unsigned("count", image_count);
+ formatter->dump_unsigned("provisioned_bytes", provisioned_bytes);
+ formatter->dump_unsigned("snap_count", snap_count);
+ formatter->close_section();
+ formatter->open_object_section("trash");
+ formatter->dump_unsigned("count", trash_count);
+ formatter->dump_unsigned("provisioned_bytes", trash_provisioned_bytes);
+ formatter->dump_unsigned("snap_count", trash_snap_count);
+ formatter->close_section();
+ formatter->close_section();
+ formatter->flush(std::cout);
+ } else {
+ std::cout << "Total Images: " << image_count;
+ if (trash_count > 0) {
+ std::cout << " (" << trash_count << " in trash)";
+ }
+ std::cout << std::endl;
+
+ std::cout << "Total Snapshots: " << snap_count;
+ if (trash_count > 0) {
+ std::cout << " (" << trash_snap_count << " in trash)";
+ }
+ std::cout << std::endl;
+
+ std::cout << "Provisioned Size: " << byte_u_t(provisioned_bytes);
+ if (trash_count > 0) {
+ std::cout << " (" << byte_u_t(trash_provisioned_bytes) << " in trash)";
+ }
+ std::cout << std::endl;
+ }
+
+ return 0;
+}
+
+Shell::Action init_action(
+ {"pool", "init"}, {}, "Initialize pool for use by RBD.", "",
+ &get_arguments_init, &execute_init);
+Shell::Action stat_action(
+ {"pool", "stats"}, {}, "Display pool statistics.",
+ "Note: legacy v1 images are not included in stats",
+ &get_arguments_stats, &execute_stats);
+
+} // namespace pool
+} // namespace action
+} // namespace rbd
diff --git a/src/tools/rbd/action/Remove.cc b/src/tools/rbd/action/Remove.cc
new file mode 100644
index 000000000..c5dcf2323
--- /dev/null
+++ b/src/tools/rbd/action/Remove.cc
@@ -0,0 +1,161 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd/ArgumentTypes.h"
+#include "tools/rbd/Shell.h"
+#include "tools/rbd/Utils.h"
+#include "common/errno.h"
+#include "include/stringify.h"
+#include <iostream>
+#include <boost/program_options.hpp>
+
+namespace rbd {
+namespace action {
+namespace remove {
+
+namespace {
+
+bool is_auto_delete_snapshot(librbd::Image* image,
+ const librbd::snap_info_t &snap_info) {
+ librbd::snap_namespace_type_t namespace_type;
+ int r = image->snap_get_namespace_type(snap_info.id, &namespace_type);
+ if (r < 0) {
+ return false;
+ }
+
+ switch (namespace_type) {
+ case RBD_SNAP_NAMESPACE_TYPE_TRASH:
+ return true;
+ default:
+ return false;
+ }
+}
+
+} // anonymous namespace
+
+namespace at = argument_types;
+namespace po = boost::program_options;
+
+static int do_delete(librbd::RBD &rbd, librados::IoCtx& io_ctx,
+ const char *imgname, bool no_progress)
+{
+ utils::ProgressContext pc("Removing image", no_progress);
+ int r = rbd.remove_with_progress(io_ctx, imgname, pc);
+ if (r < 0) {
+ pc.fail();
+ return r;
+ }
+ pc.finish();
+ return 0;
+}
+
+void get_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE);
+ at::add_no_progress_option(options);
+}
+
+int execute(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+ std::string snap_name;
+ int r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name,
+ &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE,
+ utils::SPEC_VALIDATION_NONE);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ r = utils::init(pool_name, namespace_name, &rados, &io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ io_ctx.set_pool_full_try();
+
+ librbd::RBD rbd;
+ r = do_delete(rbd, io_ctx, image_name.c_str(),
+ vm[at::NO_PROGRESS].as<bool>());
+ if (r < 0) {
+ if (r == -ENOTEMPTY) {
+ librbd::Image image;
+ std::vector<librbd::snap_info_t> snaps;
+ int image_r = utils::open_image(io_ctx, image_name, true, &image);
+ if (image_r >= 0) {
+ image_r = image.snap_list(snaps);
+ }
+ if (image_r >= 0) {
+ snaps.erase(std::remove_if(snaps.begin(), snaps.end(),
+ [&image](const librbd::snap_info_t& snap) {
+ return is_auto_delete_snapshot(&image,
+ snap);
+ }),
+ snaps.end());
+ }
+
+ if (!snaps.empty()) {
+ std::cerr << "rbd: image has snapshots - these must be deleted"
+ << " with 'rbd snap purge' before the image can be removed."
+ << std::endl;
+ } else {
+ std::cerr << "rbd: image has snapshots with linked clones - these must "
+ << "be deleted or flattened before the image can be removed."
+ << std::endl;
+ }
+ } else if (r == -EBUSY) {
+ std::cerr << "rbd: error: image still has watchers"
+ << std::endl
+ << "This means the image is still open or the client using "
+ << "it crashed. Try again after closing/unmapping it or "
+ << "waiting 30s for the crashed client to timeout."
+ << std::endl;
+ } else if (r == -EMLINK) {
+ librbd::Image image;
+ int image_r = utils::open_image(io_ctx, image_name, true, &image);
+ librbd::group_info_t group_info;
+ if (image_r == 0) {
+ image_r = image.get_group(&group_info, sizeof(group_info));
+ }
+ if (image_r == 0) {
+ std::string pool_name = "";
+ librados::Rados rados(io_ctx);
+ librados::IoCtx pool_io_ctx;
+ image_r = rados.ioctx_create2(group_info.pool, pool_io_ctx);
+ if (image_r < 0) {
+ pool_name = "<missing group pool " + stringify(group_info.pool) + ">";
+ } else {
+ pool_name = pool_io_ctx.get_pool_name();
+ }
+ std::cerr << "rbd: error: image belongs to a group "
+ << pool_name << "/";
+ if (!io_ctx.get_namespace().empty()) {
+ std::cerr << io_ctx.get_namespace() << "/";
+ }
+ std::cerr << group_info.name;
+ } else
+ std::cerr << "rbd: error: image belongs to a group";
+
+ std::cerr << std::endl
+ << "Remove the image from the group and try again."
+ << std::endl;
+ image.close();
+ } else {
+ std::cerr << "rbd: delete error: " << cpp_strerror(r) << std::endl;
+ }
+ return r;
+ }
+ return 0;
+}
+
+Shell::Action action(
+ {"remove"}, {"rm"}, "Delete an image.", "", &get_arguments, &execute);
+
+} // namespace remove
+} // namespace action
+} // namespace rbd
diff --git a/src/tools/rbd/action/Rename.cc b/src/tools/rbd/action/Rename.cc
new file mode 100644
index 000000000..b4954bcbb
--- /dev/null
+++ b/src/tools/rbd/action/Rename.cc
@@ -0,0 +1,94 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd/ArgumentTypes.h"
+#include "tools/rbd/Shell.h"
+#include "tools/rbd/Utils.h"
+#include "common/errno.h"
+#include <iostream>
+#include <boost/program_options.hpp>
+
+namespace rbd {
+namespace action {
+namespace rename {
+
+namespace at = argument_types;
+namespace po = boost::program_options;
+
+static int do_rename(librbd::RBD &rbd, librados::IoCtx& io_ctx,
+ const char *imgname, const char *destname)
+{
+ int r = rbd.rename(io_ctx, imgname, destname);
+ if (r < 0)
+ return r;
+ return 0;
+}
+
+void get_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_SOURCE);
+ at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_DEST);
+}
+
+int execute(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+ std::string snap_name;
+ int r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_SOURCE, &arg_index, &pool_name, &namespace_name,
+ &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE,
+ utils::SPEC_VALIDATION_NONE);
+ if (r < 0) {
+ return r;
+ }
+
+ std::string dst_image_name;
+ std::string dst_snap_name;
+ std::string dst_pool_name = pool_name;
+ std::string dst_namespace_name = namespace_name;
+ r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_DEST, &arg_index, &dst_pool_name,
+ &dst_namespace_name, &dst_image_name, &dst_snap_name, true,
+ utils::SNAPSHOT_PRESENCE_NONE, utils::SPEC_VALIDATION_FULL);
+ if (r < 0) {
+ return r;
+ }
+
+ if (pool_name != dst_pool_name) {
+ std::cerr << "rbd: mv/rename across pools not supported" << std::endl
+ << "source pool: " << pool_name << " dest pool: " << dst_pool_name
+ << std::endl;
+ return -EINVAL;
+ } else if (namespace_name != dst_namespace_name) {
+ std::cerr << "rbd: mv/rename across namespaces not supported" << std::endl
+ << "source namespace: " << namespace_name << " dest namespace: "
+ << dst_namespace_name << std::endl;
+ return -EINVAL;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ r = utils::init(pool_name, namespace_name, &rados, &io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ librbd::RBD rbd;
+ r = do_rename(rbd, io_ctx, image_name.c_str(), dst_image_name.c_str());
+ if (r < 0) {
+ std::cerr << "rbd: rename error: " << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ return 0;
+}
+
+Shell::Action action(
+ {"rename"}, {"mv"}, "Rename image within pool.", "", &get_arguments,
+ &execute);
+
+} // namespace rename
+} // namespace action
+} // namespace rbd
diff --git a/src/tools/rbd/action/Resize.cc b/src/tools/rbd/action/Resize.cc
new file mode 100644
index 000000000..79fbbd127
--- /dev/null
+++ b/src/tools/rbd/action/Resize.cc
@@ -0,0 +1,123 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd/ArgumentTypes.h"
+#include "tools/rbd/Shell.h"
+#include "tools/rbd/Utils.h"
+#include "common/errno.h"
+#include <iostream>
+#include <boost/program_options.hpp>
+
+namespace rbd {
+namespace action {
+namespace resize {
+
+namespace at = argument_types;
+namespace po = boost::program_options;
+
+static int do_resize(librbd::Image& image, uint64_t size, bool allow_shrink, bool no_progress)
+{
+ utils::ProgressContext pc("Resizing image", no_progress);
+ int r = image.resize2(size, allow_shrink, pc);
+ if (r < 0) {
+ pc.fail();
+ return r;
+ }
+ pc.finish();
+ return 0;
+}
+
+void get_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE);
+ at::add_size_option(options);
+ options->add_options()
+ ("allow-shrink", po::bool_switch(), "permit shrinking");
+ at::add_no_progress_option(options);
+ at::add_encryption_options(options);
+}
+
+int execute(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+ std::string snap_name;
+ int r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name,
+ &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE,
+ utils::SPEC_VALIDATION_NONE);
+ if (r < 0) {
+ return r;
+ }
+
+ uint64_t size;
+ r = utils::get_image_size(vm, &size);
+ if (r < 0) {
+ return r;
+ }
+
+ utils::EncryptionOptions encryption_options;
+ r = utils::get_encryption_options(vm, &encryption_options);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ librbd::Image image;
+ r = utils::init_and_open_image(pool_name, namespace_name, image_name, "",
+ snap_name, false, &rados, &io_ctx, &image);
+ if (r < 0) {
+ return r;
+ }
+
+ if (!encryption_options.specs.empty()) {
+ r = image.encryption_load2(encryption_options.specs.data(),
+ encryption_options.specs.size());
+ if (r < 0) {
+ std::cerr << "rbd: encryption load failed: " << cpp_strerror(r)
+ << std::endl;
+ return r;
+ }
+ }
+
+ librbd::image_info_t info;
+ r = image.stat(info, sizeof(info));
+ if (r < 0) {
+ std::cerr << "rbd: resize error: " << cpp_strerror(r) << std::endl;
+ return r;
+ }
+
+ if (info.size == size) {
+ std::cerr << "rbd: new size is equal to original size " << std::endl;
+ return -EINVAL;
+ }
+
+ if (info.size > size && !vm["allow-shrink"].as<bool>()) {
+ r = -EINVAL;
+ } else {
+ r = do_resize(image, size, vm["allow-shrink"].as<bool>(), vm[at::NO_PROGRESS].as<bool>());
+ }
+
+ if (r < 0) {
+ if (r == -EINVAL && !vm["allow-shrink"].as<bool>()) {
+ std::cerr << "rbd: shrinking an image is only allowed with the "
+ << "--allow-shrink flag" << std::endl;
+ return r;
+ }
+ std::cerr << "rbd: resize error: " << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ return 0;
+}
+
+Shell::SwitchArguments switched_arguments({"allow-shrink"});
+Shell::Action action(
+ {"resize"}, {}, "Resize (expand or shrink) image.", "", &get_arguments,
+ &execute);
+
+} // namespace resize
+} // namespace action
+} // namespace rbd
diff --git a/src/tools/rbd/action/Snap.cc b/src/tools/rbd/action/Snap.cc
new file mode 100644
index 000000000..cb87735f9
--- /dev/null
+++ b/src/tools/rbd/action/Snap.cc
@@ -0,0 +1,972 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd/ArgumentTypes.h"
+#include "tools/rbd/Shell.h"
+#include "tools/rbd/Utils.h"
+#include "include/types.h"
+#include "include/stringify.h"
+#include "common/errno.h"
+#include "common/Formatter.h"
+#include "common/TextTable.h"
+#include <iostream>
+#include <boost/program_options.hpp>
+#include <boost/bind/bind.hpp>
+
+namespace rbd {
+namespace action {
+namespace snap {
+
+using namespace boost::placeholders;
+
+static const std::string ALL_NAME("all");
+
+namespace at = argument_types;
+namespace po = boost::program_options;
+
+int do_list_snaps(librbd::Image& image, Formatter *f, bool all_snaps, librados::Rados& rados)
+{
+ std::vector<librbd::snap_info_t> snaps;
+ TextTable t;
+ int r;
+
+ r = image.snap_list(snaps);
+ if (r < 0) {
+ std::cerr << "rbd: unable to list snapshots" << std::endl;
+ return r;
+ }
+
+ librbd::image_info_t info;
+ if (!all_snaps) {
+ snaps.erase(remove_if(snaps.begin(),
+ snaps.end(),
+ boost::bind(utils::is_not_user_snap_namespace, &image, _1)),
+ snaps.end());
+ } else if (!f) {
+ r = image.stat(info, sizeof(info));
+ if (r < 0) {
+ std::cerr << "rbd: unable to get image info" << std::endl;
+ return r;
+ }
+ }
+
+ if (f) {
+ f->open_array_section("snapshots");
+ } else {
+ t.define_column("SNAPID", TextTable::LEFT, TextTable::RIGHT);
+ t.define_column("NAME", TextTable::LEFT, TextTable::LEFT);
+ t.define_column("SIZE", TextTable::LEFT, TextTable::RIGHT);
+ t.define_column("PROTECTED", TextTable::LEFT, TextTable::LEFT);
+ t.define_column("TIMESTAMP", TextTable::LEFT, TextTable::RIGHT);
+ if (all_snaps) {
+ t.define_column("NAMESPACE", TextTable::LEFT, TextTable::LEFT);
+ }
+ }
+
+ std::list<std::pair<int64_t, std::string>> pool_list;
+ rados.pool_list2(pool_list);
+ std::map<int64_t, std::string> pool_map(pool_list.begin(), pool_list.end());
+
+ for (std::vector<librbd::snap_info_t>::iterator s = snaps.begin();
+ s != snaps.end(); ++s) {
+ struct timespec timestamp;
+ bool snap_protected = false;
+ image.snap_get_timestamp(s->id, &timestamp);
+ std::string tt_str = "";
+ if(timestamp.tv_sec != 0) {
+ time_t tt = timestamp.tv_sec;
+ tt_str = ctime(&tt);
+ tt_str = tt_str.substr(0, tt_str.length() - 1);
+ }
+
+ librbd::snap_namespace_type_t snap_namespace;
+ r = image.snap_get_namespace_type(s->id, &snap_namespace);
+ if (r < 0) {
+ std::cerr << "rbd: unable to retrieve snap namespace" << std::endl;
+ return r;
+ }
+
+ std::string snap_namespace_name = "Unknown";
+ switch (snap_namespace) {
+ case RBD_SNAP_NAMESPACE_TYPE_USER:
+ snap_namespace_name = "user";
+ break;
+ case RBD_SNAP_NAMESPACE_TYPE_GROUP:
+ snap_namespace_name = "group";
+ break;
+ case RBD_SNAP_NAMESPACE_TYPE_TRASH:
+ snap_namespace_name = "trash";
+ break;
+ case RBD_SNAP_NAMESPACE_TYPE_MIRROR:
+ snap_namespace_name = "mirror";
+ break;
+ }
+
+ int get_trash_res = -ENOENT;
+ std::string trash_original_name;
+ int get_group_res = -ENOENT;
+ librbd::snap_group_namespace_t group_snap;
+ int get_mirror_res = -ENOENT;
+ librbd::snap_mirror_namespace_t mirror_snap;
+ std::string mirror_snap_state = "unknown";
+ if (snap_namespace == RBD_SNAP_NAMESPACE_TYPE_GROUP) {
+ get_group_res = image.snap_get_group_namespace(s->id, &group_snap,
+ sizeof(group_snap));
+ } else if (snap_namespace == RBD_SNAP_NAMESPACE_TYPE_TRASH) {
+ get_trash_res = image.snap_get_trash_namespace(
+ s->id, &trash_original_name);
+ } else if (snap_namespace == RBD_SNAP_NAMESPACE_TYPE_MIRROR) {
+ get_mirror_res = image.snap_get_mirror_namespace(
+ s->id, &mirror_snap, sizeof(mirror_snap));
+
+ switch (mirror_snap.state) {
+ case RBD_SNAP_MIRROR_STATE_PRIMARY:
+ mirror_snap_state = "primary";
+ break;
+ case RBD_SNAP_MIRROR_STATE_NON_PRIMARY:
+ mirror_snap_state = "non-primary";
+ break;
+ case RBD_SNAP_MIRROR_STATE_PRIMARY_DEMOTED:
+ case RBD_SNAP_MIRROR_STATE_NON_PRIMARY_DEMOTED:
+ mirror_snap_state = "demoted";
+ break;
+ }
+ }
+
+ std::string protected_str = "";
+ if (snap_namespace == RBD_SNAP_NAMESPACE_TYPE_USER) {
+ r = image.snap_is_protected(s->name.c_str(), &snap_protected);
+ if (r < 0) {
+ std::cerr << "rbd: unable to retrieve snap protection" << std::endl;
+ return r;
+ }
+ }
+
+ if (f) {
+ protected_str = snap_protected ? "true" : "false";
+ f->open_object_section("snapshot");
+ f->dump_unsigned("id", s->id);
+ f->dump_string("name", s->name);
+ f->dump_unsigned("size", s->size);
+ f->dump_string("protected", protected_str);
+ f->dump_string("timestamp", tt_str);
+ if (all_snaps) {
+ f->open_object_section("namespace");
+ f->dump_string("type", snap_namespace_name);
+ if (get_group_res == 0) {
+ std::string pool_name = pool_map[group_snap.group_pool];
+ f->dump_string("pool", pool_name);
+ f->dump_string("group", group_snap.group_name);
+ f->dump_string("group snap", group_snap.group_snap_name);
+ } else if (get_trash_res == 0) {
+ f->dump_string("original_name", trash_original_name);
+ } else if (get_mirror_res == 0) {
+ f->dump_string("state", mirror_snap_state);
+ f->open_array_section("mirror_peer_uuids");
+ for (auto &uuid : mirror_snap.mirror_peer_uuids) {
+ f->dump_string("peer_uuid", uuid);
+ }
+ f->close_section();
+ f->dump_bool("complete", mirror_snap.complete);
+ if (mirror_snap.state == RBD_SNAP_MIRROR_STATE_NON_PRIMARY ||
+ mirror_snap.state == RBD_SNAP_MIRROR_STATE_NON_PRIMARY_DEMOTED) {
+ f->dump_string("primary_mirror_uuid",
+ mirror_snap.primary_mirror_uuid);
+ f->dump_unsigned("primary_snap_id",
+ mirror_snap.primary_snap_id);
+ f->dump_unsigned("last_copied_object_number",
+ mirror_snap.last_copied_object_number);
+ }
+ }
+ f->close_section();
+ }
+ f->close_section();
+ } else {
+ protected_str = snap_protected ? "yes" : "";
+ t << s->id << s->name << stringify(byte_u_t(s->size)) << protected_str << tt_str;
+
+ if (all_snaps) {
+ std::ostringstream oss;
+ oss << snap_namespace_name;
+
+ if (get_group_res == 0) {
+ std::string pool_name = pool_map[group_snap.group_pool];
+ oss << " (" << pool_name << "/"
+ << group_snap.group_name << "@"
+ << group_snap.group_snap_name << ")";
+ } else if (get_trash_res == 0) {
+ oss << " (" << trash_original_name << ")";
+ } else if (get_mirror_res == 0) {
+ oss << " (" << mirror_snap_state << " "
+ << "peer_uuids:[" << mirror_snap.mirror_peer_uuids << "]";
+ if (mirror_snap.state == RBD_SNAP_MIRROR_STATE_NON_PRIMARY ||
+ mirror_snap.state == RBD_SNAP_MIRROR_STATE_NON_PRIMARY_DEMOTED) {
+ oss << " " << mirror_snap.primary_mirror_uuid << ":"
+ << mirror_snap.primary_snap_id << " ";
+ if (!mirror_snap.complete) {
+ if (info.num_objs > 0) {
+ auto progress = std::min<uint64_t>(
+ 100, 100 * mirror_snap.last_copied_object_number /
+ info.num_objs);
+ oss << progress << "% ";
+ } else {
+ oss << "not ";
+ }
+ }
+ oss << "copied";
+ }
+ oss << ")";
+ }
+
+ t << oss.str();
+ }
+ t << TextTable::endrow;
+ }
+ }
+
+ if (f) {
+ f->close_section();
+ f->flush(std::cout);
+ } else if (snaps.size()) {
+ std::cout << t;
+ }
+
+ return 0;
+}
+
+int do_add_snap(librbd::Image& image, const char *snapname,
+ uint32_t flags, bool no_progress)
+{
+ utils::ProgressContext pc("Creating snap", no_progress);
+
+ int r = image.snap_create2(snapname, flags, pc);
+ if (r < 0) {
+ pc.fail();
+ return r;
+ }
+
+ pc.finish();
+ return 0;
+}
+
+int do_remove_snap(librbd::Image& image, const char *snapname, bool force,
+ bool no_progress)
+{
+ uint32_t flags = force? RBD_SNAP_REMOVE_FORCE : 0;
+ int r = 0;
+ utils::ProgressContext pc("Removing snap", no_progress);
+
+ r = image.snap_remove2(snapname, flags, pc);
+ if (r < 0) {
+ pc.fail();
+ return r;
+ }
+
+ pc.finish();
+ return 0;
+}
+
+int do_rollback_snap(librbd::Image& image, const char *snapname,
+ bool no_progress)
+{
+ utils::ProgressContext pc("Rolling back to snapshot", no_progress);
+ int r = image.snap_rollback_with_progress(snapname, pc);
+ if (r < 0) {
+ pc.fail();
+ return r;
+ }
+ pc.finish();
+ return 0;
+}
+
+int do_purge_snaps(librbd::Image& image, bool no_progress)
+{
+ utils::ProgressContext pc("Removing all snapshots", no_progress);
+ std::vector<librbd::snap_info_t> snaps;
+ bool is_protected = false;
+ int r = image.snap_list(snaps);
+ if (r < 0) {
+ pc.fail();
+ return r;
+ } else if (0 == snaps.size()) {
+ return 0;
+ } else {
+ std::list<std::string> protect;
+ snaps.erase(remove_if(snaps.begin(),
+ snaps.end(),
+ boost::bind(utils::is_not_user_snap_namespace, &image, _1)),
+ snaps.end());
+ for (auto it = snaps.begin(); it != snaps.end();) {
+ r = image.snap_is_protected(it->name.c_str(), &is_protected);
+ if (r < 0) {
+ pc.fail();
+ return r;
+ } else if (is_protected == true) {
+ protect.push_back(it->name.c_str());
+ snaps.erase(it);
+ } else {
+ ++it;
+ }
+ }
+
+ if (!protect.empty()) {
+ std::cout << "rbd: error removing snapshot(s) '" << protect << "', which "
+ << (1 == protect.size() ? "is" : "are")
+ << " protected - these must be unprotected with "
+ << "`rbd snap unprotect`."
+ << std::endl;
+ }
+ for (size_t i = 0; i < snaps.size(); ++i) {
+ r = image.snap_remove(snaps[i].name.c_str());
+ if (r < 0) {
+ pc.fail();
+ return r;
+ }
+ pc.update_progress(i + 1, snaps.size() + protect.size());
+ }
+
+ if (!protect.empty()) {
+ pc.fail();
+ } else if (snaps.size() > 0) {
+ pc.finish();
+ }
+
+ return 0;
+ }
+}
+
+int do_protect_snap(librbd::Image& image, const char *snapname)
+{
+ int r = image.snap_protect(snapname);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int do_unprotect_snap(librbd::Image& image, const char *snapname)
+{
+ int r = image.snap_unprotect(snapname);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int do_set_limit(librbd::Image& image, uint64_t limit)
+{
+ return image.snap_set_limit(limit);
+}
+
+void get_list_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE);
+ at::add_image_id_option(options);
+ at::add_format_options(options);
+
+ std::string name = ALL_NAME + ",a";
+
+ options->add_options()
+ (name.c_str(), po::bool_switch(), "list snapshots from all namespaces");
+}
+
+int execute_list(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+ std::string snap_name;
+ std::string image_id;
+
+ if (vm.count(at::IMAGE_ID)) {
+ image_id = vm[at::IMAGE_ID].as<std::string>();
+ }
+
+ int r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name,
+ &image_name, &snap_name, image_id.empty(),
+ utils::SNAPSHOT_PRESENCE_NONE, utils::SPEC_VALIDATION_NONE);
+ if (r < 0) {
+ return r;
+ }
+
+ if (!image_id.empty() && !image_name.empty()) {
+ std::cerr << "rbd: trying to access image using both name and id. "
+ << std::endl;
+ return -EINVAL;
+ }
+
+ at::Format::Formatter formatter;
+ r = utils::get_formatter(vm, &formatter);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ librbd::Image image;
+ r = utils::init_and_open_image(pool_name, namespace_name, image_name,
+ image_id, "", true, &rados, &io_ctx, &image);
+ if (r < 0) {
+ return r;
+ }
+
+ bool all_snaps = vm[ALL_NAME].as<bool>();
+ r = do_list_snaps(image, formatter.get(), all_snaps, rados);
+ if (r < 0) {
+ std::cerr << "rbd: failed to list snapshots: " << cpp_strerror(r)
+ << std::endl;
+ return r;
+ }
+ return 0;
+}
+
+void get_create_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_snap_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE);
+ at::add_snap_create_options(options);
+ at::add_no_progress_option(options);
+}
+
+int execute_create(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+ std::string snap_name;
+ int r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name,
+ &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_REQUIRED,
+ utils::SPEC_VALIDATION_SNAP);
+ if (r < 0) {
+ return r;
+ }
+
+ uint32_t flags;
+ r = utils::get_snap_create_flags(vm, &flags);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ librbd::Image image;
+ r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "",
+ false, &rados, &io_ctx, &image);
+ if (r < 0) {
+ return r;
+ }
+
+ r = do_add_snap(image, snap_name.c_str(), flags,
+ vm[at::NO_PROGRESS].as<bool>());
+ if (r < 0) {
+ std::cerr << "rbd: failed to create snapshot: " << cpp_strerror(r)
+ << std::endl;
+ return r;
+ }
+ return 0;
+}
+
+void get_remove_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_snap_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE);
+ at::add_image_id_option(options);
+ at::add_snap_id_option(options);
+ at::add_no_progress_option(options);
+
+ options->add_options()
+ ("force", po::bool_switch(), "flatten children and unprotect snapshot if needed.");
+}
+
+int execute_remove(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+ std::string snap_name;
+ std::string image_id;
+ uint64_t snap_id = CEPH_NOSNAP;
+ bool force = vm["force"].as<bool>();
+ bool no_progress = vm[at::NO_PROGRESS].as<bool>();
+
+ if (vm.count(at::IMAGE_ID)) {
+ image_id = vm[at::IMAGE_ID].as<std::string>();
+ }
+ if (vm.count(at::SNAPSHOT_ID)) {
+ snap_id = vm[at::SNAPSHOT_ID].as<uint64_t>();
+ }
+
+ int r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name,
+ &image_name, &snap_name, image_id.empty(),
+ (snap_id == CEPH_NOSNAP ? utils::SNAPSHOT_PRESENCE_REQUIRED :
+ utils::SNAPSHOT_PRESENCE_PERMITTED),
+ utils::SPEC_VALIDATION_NONE);
+ if (r < 0) {
+ return r;
+ }
+
+ if (!image_id.empty() && !image_name.empty()) {
+ std::cerr << "rbd: trying to access image using both name and id."
+ << std::endl;
+ return -EINVAL;
+ } else if (!snap_name.empty() && snap_id != CEPH_NOSNAP) {
+ std::cerr << "rbd: trying to access snapshot using both name and id."
+ << std::endl;
+ return -EINVAL;
+ } else if ((force || no_progress) && snap_id != CEPH_NOSNAP) {
+ std::cerr << "rbd: force and no-progress options not permitted when "
+ << "removing by id." << std::endl;
+ return -EINVAL;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ librbd::Image image;
+ r = utils::init(pool_name, namespace_name, &rados, &io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ io_ctx.set_pool_full_try();
+ if (image_id.empty()) {
+ r = utils::open_image(io_ctx, image_name, false, &image);
+ } else {
+ r = utils::open_image_by_id(io_ctx, image_id, false, &image);
+ }
+ if (r < 0) {
+ return r;
+ }
+
+ if (!snap_name.empty()) {
+ r = do_remove_snap(image, snap_name.c_str(), force, no_progress);
+ } else {
+ r = image.snap_remove_by_id(snap_id);
+ }
+
+ if (r < 0) {
+ if (r == -EBUSY) {
+ std::cerr << "rbd: snapshot "
+ << (snap_name.empty() ? std::string("id ") + stringify(snap_id) :
+ std::string("'") + snap_name + "'")
+ << " is protected from removal." << std::endl;
+ } else {
+ std::cerr << "rbd: failed to remove snapshot: " << cpp_strerror(r)
+ << std::endl;
+ }
+ return r;
+ }
+ return 0;
+}
+
+void get_purge_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE);
+ at::add_image_id_option(options);
+ at::add_no_progress_option(options);
+}
+
+int execute_purge(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+ std::string snap_name;
+ std::string image_id;
+
+ if (vm.count(at::IMAGE_ID)) {
+ image_id = vm[at::IMAGE_ID].as<std::string>();
+ }
+
+ int r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name,
+ &image_name, &snap_name, image_id.empty(),
+ utils::SNAPSHOT_PRESENCE_NONE, utils::SPEC_VALIDATION_NONE);
+ if (r < 0) {
+ return r;
+ }
+
+ if (!image_id.empty() && !image_name.empty()) {
+ std::cerr << "rbd: trying to access image using both name and id. "
+ << std::endl;
+ return -EINVAL;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ librbd::Image image;
+ r = utils::init(pool_name, namespace_name, &rados, &io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ io_ctx.set_pool_full_try();
+ if (image_id.empty()) {
+ r = utils::open_image(io_ctx, image_name, false, &image);
+ } else {
+ r = utils::open_image_by_id(io_ctx, image_id, false, &image);
+ }
+ if (r < 0) {
+ return r;
+ }
+
+ r = do_purge_snaps(image, vm[at::NO_PROGRESS].as<bool>());
+ if (r < 0) {
+ if (r != -EBUSY) {
+ std::cerr << "rbd: removing snaps failed: " << cpp_strerror(r)
+ << std::endl;
+ }
+ return r;
+ }
+ return 0;
+}
+
+void get_rollback_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_snap_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE);
+ at::add_no_progress_option(options);
+}
+
+int execute_rollback(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+ std::string snap_name;
+ int r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name,
+ &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_REQUIRED,
+ utils::SPEC_VALIDATION_NONE);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ librbd::Image image;
+ r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "",
+ false, &rados, &io_ctx, &image);
+ if (r < 0) {
+ return r;
+ }
+
+ r = do_rollback_snap(image, snap_name.c_str(),
+ vm[at::NO_PROGRESS].as<bool>());
+ if (r < 0) {
+ std::cerr << "rbd: rollback failed: " << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ return 0;
+}
+
+void get_protect_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_snap_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE);
+}
+
+int execute_protect(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+ std::string snap_name;
+ int r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name,
+ &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_REQUIRED,
+ utils::SPEC_VALIDATION_NONE);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ librbd::Image image;
+ r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "",
+ false, &rados, &io_ctx, &image);
+ if (r < 0) {
+ return r;
+ }
+
+ bool is_protected = false;
+ r = image.snap_is_protected(snap_name.c_str(), &is_protected);
+ if (r < 0) {
+ std::cerr << "rbd: protecting snap failed: " << cpp_strerror(r)
+ << std::endl;
+ return r;
+ } else if (is_protected) {
+ std::cerr << "rbd: snap is already protected" << std::endl;
+ return -EBUSY;
+ }
+
+ r = do_protect_snap(image, snap_name.c_str());
+ if (r < 0) {
+ std::cerr << "rbd: protecting snap failed: " << cpp_strerror(r)
+ << std::endl;
+ return r;
+ }
+ return 0;
+}
+
+void get_unprotect_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_snap_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE);
+ at::add_image_id_option(options);
+}
+
+int execute_unprotect(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+ std::string snap_name;
+ std::string image_id;
+
+ if (vm.count(at::IMAGE_ID)) {
+ image_id = vm[at::IMAGE_ID].as<std::string>();
+ }
+
+ int r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name,
+ &image_name, &snap_name, image_id.empty(),
+ utils::SNAPSHOT_PRESENCE_REQUIRED, utils::SPEC_VALIDATION_NONE);
+ if (r < 0) {
+ return r;
+ }
+
+ if (!image_id.empty() && !image_name.empty()) {
+ std::cerr << "rbd: trying to access image using both name and id. "
+ << std::endl;
+ return -EINVAL;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ librbd::Image image;
+ r = utils::init(pool_name, namespace_name, &rados, &io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ io_ctx.set_pool_full_try();
+ if (image_id.empty()) {
+ r = utils::open_image(io_ctx, image_name, false, &image);
+ } else {
+ r = utils::open_image_by_id(io_ctx, image_id, false, &image);
+ }
+ if (r < 0) {
+ return r;
+ }
+
+ bool is_protected = false;
+ r = image.snap_is_protected(snap_name.c_str(), &is_protected);
+ if (r < 0) {
+ std::cerr << "rbd: unprotecting snap failed: " << cpp_strerror(r)
+ << std::endl;
+ return r;
+ } else if (!is_protected) {
+ std::cerr << "rbd: snap is already unprotected" << std::endl;
+ return -EINVAL;
+ }
+
+ r = do_unprotect_snap(image, snap_name.c_str());
+ if (r < 0) {
+ std::cerr << "rbd: unprotecting snap failed: " << cpp_strerror(r)
+ << std::endl;
+ return r;
+ }
+ return 0;
+}
+
+void get_set_limit_arguments(po::options_description *pos,
+ po::options_description *opt) {
+ at::add_image_spec_options(pos, opt, at::ARGUMENT_MODIFIER_NONE);
+ at::add_limit_option(opt);
+}
+
+int execute_set_limit(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+ std::string snap_name;
+ uint64_t limit;
+
+ int r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name,
+ &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE,
+ utils::SPEC_VALIDATION_NONE);
+ if (r < 0) {
+ return r;
+ }
+
+ if (vm.count(at::LIMIT)) {
+ limit = vm[at::LIMIT].as<uint64_t>();
+ } else {
+ std::cerr << "rbd: must specify --limit <num>" << std::endl;
+ return -ERANGE;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ librbd::Image image;
+ r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "",
+ false, &rados, &io_ctx, &image);
+ if (r < 0) {
+ return r;
+ }
+
+ r = do_set_limit(image, limit);
+ if (r < 0) {
+ std::cerr << "rbd: setting snapshot limit failed: " << cpp_strerror(r)
+ << std::endl;
+ return r;
+ }
+ return 0;
+}
+
+void get_clear_limit_arguments(po::options_description *pos,
+ po::options_description *opt) {
+ at::add_image_spec_options(pos, opt, at::ARGUMENT_MODIFIER_NONE);
+}
+
+int execute_clear_limit(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+ std::string snap_name;
+
+ int r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name,
+ &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE,
+ utils::SPEC_VALIDATION_NONE);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ librbd::Image image;
+ r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "",
+ false, &rados, &io_ctx, &image);
+ if (r < 0) {
+ return r;
+ }
+
+ r = do_set_limit(image, UINT64_MAX);
+ if (r < 0) {
+ std::cerr << "rbd: clearing snapshot limit failed: " << cpp_strerror(r)
+ << std::endl;
+ return r;
+ }
+ return 0;
+}
+
+void get_rename_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_snap_spec_options(positional, options, at::ARGUMENT_MODIFIER_SOURCE);
+ at::add_snap_spec_options(positional, options, at::ARGUMENT_MODIFIER_DEST);
+}
+
+int execute_rename(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+ std::string src_snap_name;
+ int r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_SOURCE, &arg_index, &pool_name, &namespace_name,
+ &image_name, &src_snap_name, true, utils::SNAPSHOT_PRESENCE_REQUIRED,
+ utils::SPEC_VALIDATION_NONE);
+ if (r < 0) {
+ return -r;
+ }
+
+ std::string dest_pool_name(pool_name);
+ std::string dest_namespace_name(namespace_name);
+ std::string dest_image_name;
+ std::string dest_snap_name;
+ r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_DEST, &arg_index, &dest_pool_name,
+ &dest_namespace_name, &dest_image_name, &dest_snap_name, true,
+ utils::SNAPSHOT_PRESENCE_REQUIRED, utils::SPEC_VALIDATION_SNAP);
+ if (r < 0) {
+ return -r;
+ }
+
+ if (pool_name != dest_pool_name) {
+ std::cerr << "rbd: source and destination pool must be the same"
+ << std::endl;
+ return -EINVAL;
+ } else if (namespace_name != dest_namespace_name) {
+ std::cerr << "rbd: source and destination namespace must be the same"
+ << std::endl;
+ return -EINVAL;
+ } else if (image_name != dest_image_name) {
+ std::cerr << "rbd: source and destination image name must be the same"
+ << std::endl;
+ return -EINVAL;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ librbd::Image image;
+ r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "",
+ false, &rados, &io_ctx, &image);
+ if (r < 0) {
+ return r;
+ }
+
+ r = image.snap_rename(src_snap_name.c_str(), dest_snap_name.c_str());
+ if (r < 0) {
+ std::cerr << "rbd: renaming snap failed: " << cpp_strerror(r)
+ << std::endl;
+ return r;
+ }
+ return 0;
+}
+
+Shell::Action action_list(
+ {"snap", "list"}, {"snap", "ls"}, "Dump list of image snapshots.", "",
+ &get_list_arguments, &execute_list);
+Shell::Action action_create(
+ {"snap", "create"}, {"snap", "add"}, "Create a snapshot.", "",
+ &get_create_arguments, &execute_create);
+Shell::Action action_remove(
+ {"snap", "remove"}, {"snap", "rm"}, "Delete a snapshot.", "",
+ &get_remove_arguments, &execute_remove);
+Shell::Action action_purge(
+ {"snap", "purge"}, {}, "Delete all unprotected snapshots.", "",
+ &get_purge_arguments, &execute_purge);
+Shell::Action action_rollback(
+ {"snap", "rollback"}, {"snap", "revert"}, "Rollback image to snapshot.", "",
+ &get_rollback_arguments, &execute_rollback);
+Shell::Action action_protect(
+ {"snap", "protect"}, {}, "Prevent a snapshot from being deleted.", "",
+ &get_protect_arguments, &execute_protect);
+Shell::Action action_unprotect(
+ {"snap", "unprotect"}, {}, "Allow a snapshot to be deleted.", "",
+ &get_unprotect_arguments, &execute_unprotect);
+Shell::Action action_set_limit(
+ {"snap", "limit", "set"}, {}, "Limit the number of snapshots.", "",
+ &get_set_limit_arguments, &execute_set_limit);
+Shell::Action action_clear_limit(
+ {"snap", "limit", "clear"}, {}, "Remove snapshot limit.", "",
+ &get_clear_limit_arguments, &execute_clear_limit);
+Shell::Action action_rename(
+ {"snap", "rename"}, {}, "Rename a snapshot.", "",
+ &get_rename_arguments, &execute_rename);
+
+} // namespace snap
+} // namespace action
+} // namespace rbd
diff --git a/src/tools/rbd/action/Sparsify.cc b/src/tools/rbd/action/Sparsify.cc
new file mode 100644
index 000000000..a345f920b
--- /dev/null
+++ b/src/tools/rbd/action/Sparsify.cc
@@ -0,0 +1,82 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd/ArgumentTypes.h"
+#include "tools/rbd/Shell.h"
+#include "tools/rbd/Utils.h"
+#include "common/errno.h"
+#include <iostream>
+#include <boost/program_options.hpp>
+
+namespace rbd {
+namespace action {
+namespace sparsify {
+
+namespace at = argument_types;
+namespace po = boost::program_options;
+
+static int do_sparsify(librbd::Image& image, size_t sparse_size,
+ bool no_progress)
+{
+ utils::ProgressContext pc("Image sparsify", no_progress);
+ int r = image.sparsify_with_progress(sparse_size, pc);
+ if (r < 0) {
+ pc.fail();
+ return r;
+ }
+ pc.finish();
+ return 0;
+}
+
+void get_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE);
+ at::add_no_progress_option(options);
+ at::add_sparse_size_option(options);
+}
+
+int execute(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+ std::string snap_name;
+ int r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name,
+ &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE,
+ utils::SPEC_VALIDATION_NONE);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ librbd::Image image;
+ r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "",
+ false, &rados, &io_ctx, &image);
+ if (r < 0) {
+ return r;
+ }
+
+ size_t sparse_size = utils::RBD_DEFAULT_SPARSE_SIZE;
+ if (vm.count(at::IMAGE_SPARSE_SIZE)) {
+ sparse_size = vm[at::IMAGE_SPARSE_SIZE].as<size_t>();
+ }
+
+ r = do_sparsify(image, sparse_size, vm[at::NO_PROGRESS].as<bool>());
+ if (r < 0) {
+ std::cerr << "rbd: sparsify error: " << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ return 0;
+}
+
+Shell::Action action(
+ {"sparsify"}, {},
+ "Reclaim space for zeroed image extents.", "",
+ &get_arguments, &execute);
+
+} // namespace sparsify
+} // namespace action
+} // namespace rbd
diff --git a/src/tools/rbd/action/Status.cc b/src/tools/rbd/action/Status.cc
new file mode 100644
index 000000000..958a686c4
--- /dev/null
+++ b/src/tools/rbd/action/Status.cc
@@ -0,0 +1,365 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "common/errno.h"
+#include "common/Formatter.h"
+#include "json_spirit/json_spirit.h"
+#include "tools/rbd/ArgumentTypes.h"
+#include "tools/rbd/Shell.h"
+#include "tools/rbd/Utils.h"
+#include "include/rbd_types.h"
+#include "include/stringify.h"
+#include "librbd/cache/Types.h"
+#include <iostream>
+#include <boost/program_options.hpp>
+
+namespace rbd {
+namespace action {
+namespace status {
+
+namespace at = argument_types;
+namespace po = boost::program_options;
+
+static int do_show_status(librados::IoCtx& io_ctx, const std::string &image_name,
+ librbd::Image &image, Formatter *f)
+{
+ int r;
+ std::list<librbd::image_watcher_t> watchers;
+
+ r = image.list_watchers(watchers);
+ if (r < 0)
+ return r;
+
+ uint64_t features;
+ r = image.features(&features);
+ if (r < 0) {
+ return r;
+ }
+
+ librbd::image_migration_status_t migration_status;
+ std::string source_spec;
+ std::string source_pool_name;
+ std::string dest_pool_name;
+ std::string migration_state;
+ if ((features & RBD_FEATURE_MIGRATING) != 0) {
+ r = librbd::RBD().migration_status(io_ctx, image_name.c_str(),
+ &migration_status,
+ sizeof(migration_status));
+ if (r < 0) {
+ std::cerr << "rbd: getting migration status failed: " << cpp_strerror(r)
+ << std::endl;
+ // not fatal
+ } else {
+ if (migration_status.source_pool_id >= 0) {
+ librados::IoCtx src_io_ctx;
+ r = librados::Rados(io_ctx).ioctx_create2(migration_status.source_pool_id, src_io_ctx);
+ if (r < 0) {
+ source_pool_name = stringify(migration_status.source_pool_id);
+ } else {
+ source_pool_name = src_io_ctx.get_pool_name();
+ }
+ } else {
+ r = image.get_migration_source_spec(&source_spec);
+ if (r < 0) {
+ std::cerr << "rbd: getting migration source spec failed: "
+ << cpp_strerror(r) << std::endl;
+ }
+ }
+
+ librados::IoCtx dst_io_ctx;
+ r = librados::Rados(io_ctx).ioctx_create2(migration_status.dest_pool_id, dst_io_ctx);
+ if (r < 0) {
+ dest_pool_name = stringify(migration_status.dest_pool_id);
+ } else {
+ dest_pool_name = dst_io_ctx.get_pool_name();
+ }
+
+ switch (migration_status.state) {
+ case RBD_IMAGE_MIGRATION_STATE_ERROR:
+ migration_state = "error";
+ break;
+ case RBD_IMAGE_MIGRATION_STATE_PREPARING:
+ migration_state = "preparing";
+ break;
+ case RBD_IMAGE_MIGRATION_STATE_PREPARED:
+ migration_state = "prepared";
+ break;
+ case RBD_IMAGE_MIGRATION_STATE_EXECUTING:
+ migration_state = "executing";
+ break;
+ case RBD_IMAGE_MIGRATION_STATE_EXECUTED:
+ migration_state = "executed";
+ break;
+ case RBD_IMAGE_MIGRATION_STATE_ABORTING:
+ migration_state = "aborting";
+ break;
+ default:
+ migration_state = "unknown";
+ }
+ }
+ }
+
+ struct {
+ // decoded
+ std::string host;
+ std::string path;
+ uint64_t size;
+ std::string mode;
+ std::string stats_timestamp;
+ bool present;
+ bool empty;
+ bool clean;
+ uint64_t allocated_bytes;
+ uint64_t cached_bytes;
+ uint64_t dirty_bytes;
+ uint64_t free_bytes;
+ uint64_t hits_full;
+ uint64_t hits_partial;
+ uint64_t misses;
+ uint64_t hit_bytes;
+ uint64_t miss_bytes;
+
+ // calculated
+ uint64_t total_read_ops;
+ uint64_t total_read_bytes;
+ int hits_full_percent;
+ int hits_partial_percent;
+ int hit_bytes_percent;
+ } cache_state;
+ std::string cache_str;
+ if (features & RBD_FEATURE_DIRTY_CACHE) {
+ r = image.metadata_get(librbd::cache::PERSISTENT_CACHE_STATE, &cache_str);
+ if (r < 0) {
+ std::cerr << "rbd: getting persistent cache state failed: " << cpp_strerror(r)
+ << std::endl;
+ // not fatal
+ }
+ json_spirit::mValue json_root;
+ if (!json_spirit::read(cache_str.c_str(), json_root)) {
+ std::cerr << "rbd: parsing persistent cache state failed" << std::endl;
+ cache_str.clear();
+ } else {
+ try {
+ auto& o = json_root.get_obj();
+ cache_state.host = o["host"].get_str();
+ cache_state.path = o["path"].get_str();
+ cache_state.size = o["size"].get_uint64();
+ cache_state.mode = o["mode"].get_str();
+ time_t stats_timestamp_sec = o["stats_timestamp"].get_uint64();
+ cache_state.stats_timestamp = ctime(&stats_timestamp_sec);
+ cache_state.stats_timestamp.pop_back();
+ cache_state.present = o["present"].get_bool();
+ cache_state.empty = o["empty"].get_bool();
+ cache_state.clean = o["clean"].get_bool();
+ cache_state.allocated_bytes = o["allocated_bytes"].get_uint64();
+ cache_state.cached_bytes = o["cached_bytes"].get_uint64();
+ cache_state.dirty_bytes = o["dirty_bytes"].get_uint64();
+ cache_state.free_bytes = o["free_bytes"].get_uint64();
+ cache_state.hits_full = o["hits_full"].get_uint64();
+ cache_state.hits_partial = o["hits_partial"].get_uint64();
+ cache_state.misses = o["misses"].get_uint64();
+ cache_state.hit_bytes = o["hit_bytes"].get_uint64();
+ cache_state.miss_bytes = o["miss_bytes"].get_uint64();
+ } catch (std::runtime_error &e) {
+ std::cerr << "rbd: parsing persistent cache state failed: " << e.what()
+ << std::endl;
+ cache_str.clear();
+ }
+ cache_state.total_read_ops = cache_state.hits_full +
+ cache_state.hits_partial + cache_state.misses;
+ cache_state.total_read_bytes = cache_state.hit_bytes +
+ cache_state.miss_bytes;
+ cache_state.hits_full_percent = utils::get_percentage(
+ cache_state.hits_full, cache_state.total_read_ops);
+ cache_state.hits_partial_percent = utils::get_percentage(
+ cache_state.hits_partial, cache_state.total_read_ops);
+ cache_state.hit_bytes_percent = utils::get_percentage(
+ cache_state.hit_bytes, cache_state.total_read_bytes);
+ }
+ }
+
+ if (f)
+ f->open_object_section("status");
+
+ if (f) {
+ f->open_array_section("watchers");
+ for (auto &watcher : watchers) {
+ f->open_object_section("watcher");
+ f->dump_string("address", watcher.addr);
+ f->dump_unsigned("client", watcher.id);
+ f->dump_unsigned("cookie", watcher.cookie);
+ f->close_section();
+ }
+ f->close_section(); // watchers
+ if (!migration_state.empty()) {
+ f->open_object_section("migration");
+ if (!source_spec.empty()) {
+ f->dump_string("source_spec", source_spec);
+ } else {
+ f->dump_string("source_pool_name", source_pool_name);
+ f->dump_string("source_pool_namespace",
+ migration_status.source_pool_namespace);
+ f->dump_string("source_image_name", migration_status.source_image_name);
+ f->dump_string("source_image_id", migration_status.source_image_id);
+ }
+ f->dump_string("dest_pool_name", dest_pool_name);
+ f->dump_string("dest_pool_namespace",
+ migration_status.dest_pool_namespace);
+ f->dump_string("dest_image_name", migration_status.dest_image_name);
+ f->dump_string("dest_image_id", migration_status.dest_image_id);
+ f->dump_string("state", migration_state);
+ f->dump_string("state_description", migration_status.state_description);
+ f->close_section(); // migration
+ }
+ if (!cache_str.empty()) {
+ f->open_object_section("persistent_cache");
+ f->dump_string("host", cache_state.host);
+ f->dump_string("path", cache_state.path);
+ f->dump_unsigned("size", cache_state.size);
+ f->dump_string("mode", cache_state.mode);
+ f->dump_string("stats_timestamp", cache_state.stats_timestamp);
+ f->dump_bool("present", cache_state.present);
+ f->dump_bool("empty", cache_state.empty);
+ f->dump_bool("clean", cache_state.clean);
+ f->dump_unsigned("allocated_bytes", cache_state.allocated_bytes);
+ f->dump_unsigned("cached_bytes", cache_state.cached_bytes);
+ f->dump_unsigned("dirty_bytes", cache_state.dirty_bytes);
+ f->dump_unsigned("free_bytes", cache_state.free_bytes);
+ f->dump_unsigned("hits_full", cache_state.hits_full);
+ f->dump_int("hits_full_percent", cache_state.hits_full_percent);
+ f->dump_unsigned("hits_partial", cache_state.hits_partial);
+ f->dump_int("hits_partial_percent", cache_state.hits_partial_percent);
+ f->dump_unsigned("misses", cache_state.misses);
+ f->dump_unsigned("hit_bytes", cache_state.hit_bytes);
+ f->dump_int("hit_bytes_percent", cache_state.hit_bytes_percent);
+ f->dump_unsigned("miss_bytes", cache_state.miss_bytes);
+ f->close_section(); // persistent_cache
+ }
+ } else {
+ if (watchers.size()) {
+ std::cout << "Watchers:" << std::endl;
+ for (auto &watcher : watchers) {
+ std::cout << "\twatcher=" << watcher.addr << " client." << watcher.id
+ << " cookie=" << watcher.cookie << std::endl;
+ }
+ } else {
+ std::cout << "Watchers: none" << std::endl;
+ }
+ if (!migration_state.empty()) {
+ if (!migration_status.source_pool_namespace.empty()) {
+ source_pool_name += ("/" + migration_status.source_pool_namespace);
+ }
+ if (!migration_status.dest_pool_namespace.empty()) {
+ dest_pool_name += ("/" + migration_status.dest_pool_namespace);
+ }
+
+ std::cout << "Migration:" << std::endl;
+ std::cout << "\tsource: ";
+ if (!source_spec.empty()) {
+ std::cout << source_spec;
+ } else {
+ std::cout << source_pool_name << "/"
+ << migration_status.source_image_name;
+ if (!migration_status.source_image_id.empty()) {
+ std::cout << " (" << migration_status.source_image_id << ")";
+ }
+ }
+ std::cout << std::endl;
+ std::cout << "\tdestination: " << dest_pool_name << "/"
+ << migration_status.dest_image_name << " ("
+ << migration_status.dest_image_id << ")" << std::endl;
+ std::cout << "\tstate: " << migration_state;
+ if (!migration_status.state_description.empty()) {
+ std::cout << " (" << migration_status.state_description << ")";
+ }
+ std::cout << std::endl;
+ }
+ if (!cache_str.empty()) {
+ std::cout << "Persistent cache state:" << std::endl;
+ std::cout << "\thost: " << cache_state.host << std::endl;
+ std::cout << "\tpath: " << cache_state.path << std::endl;
+ std::cout << "\tsize: " << byte_u_t(cache_state.size) << std::endl;
+ std::cout << "\tmode: " << cache_state.mode << std::endl;
+ std::cout << "\tstats_timestamp: " << cache_state.stats_timestamp
+ << std::endl;
+ std::cout << "\tpresent: " << (cache_state.present ? "true" : "false")
+ << "\tempty: " << (cache_state.empty ? "true" : "false")
+ << "\tclean: " << (cache_state.clean ? "true" : "false")
+ << std::endl;
+ std::cout << "\tallocated: " << byte_u_t(cache_state.allocated_bytes)
+ << std::endl;
+ std::cout << "\tcached: " << byte_u_t(cache_state.cached_bytes)
+ << std::endl;
+ std::cout << "\tdirty: " << byte_u_t(cache_state.dirty_bytes) << std::endl;
+ std::cout << "\tfree: " << byte_u_t(cache_state.free_bytes) << std::endl;
+ std::cout << "\thits_full: " << cache_state.hits_full << " / "
+ << cache_state.hits_full_percent << "%" << std::endl;
+ std::cout << "\thits_partial: " << cache_state.hits_partial << " / "
+ << cache_state.hits_partial_percent << "%" << std::endl;
+ std::cout << "\tmisses: " << cache_state.misses << std::endl;
+ std::cout << "\thit_bytes: " << byte_u_t(cache_state.hit_bytes) << " / "
+ << cache_state.hit_bytes_percent << "%" << std::endl;
+ std::cout << "\tmiss_bytes: " << byte_u_t(cache_state.miss_bytes)
+ << std::endl;
+ }
+ }
+
+ if (f) {
+ f->close_section(); // status
+ f->flush(std::cout);
+ }
+
+ return 0;
+}
+
+void get_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE);
+ at::add_format_options(options);
+}
+
+int execute(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+ std::string snap_name;
+ int r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name,
+ &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE,
+ utils::SPEC_VALIDATION_NONE);
+ if (r < 0) {
+ return r;
+ }
+
+ at::Format::Formatter formatter;
+ r = utils::get_formatter(vm, &formatter);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ librbd::Image image;
+ r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "",
+ true, &rados, &io_ctx, &image);
+ if (r < 0) {
+ return r;
+ }
+
+ r = do_show_status(io_ctx, image_name, image, formatter.get());
+ if (r < 0) {
+ std::cerr << "rbd: show status failed: " << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ return 0;
+}
+
+Shell::Action action(
+ {"status"}, {}, "Show the status of this image.", "", &get_arguments,
+ &execute);
+
+} // namespace status
+} // namespace action
+} // namespace rbd
diff --git a/src/tools/rbd/action/Trash.cc b/src/tools/rbd/action/Trash.cc
new file mode 100644
index 000000000..f1fd4df3c
--- /dev/null
+++ b/src/tools/rbd/action/Trash.cc
@@ -0,0 +1,543 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2017 SUSE LINUX GmbH
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include "tools/rbd/ArgumentTypes.h"
+#include "tools/rbd/Shell.h"
+#include "tools/rbd/Utils.h"
+#include "common/errno.h"
+#include "include/stringify.h"
+#include "common/Formatter.h"
+#include "common/TextTable.h"
+#include "common/Clock.h"
+#include <iostream>
+#include <sstream>
+#include <boost/program_options.hpp>
+#include <boost/bind/bind.hpp>
+
+namespace rbd {
+namespace action {
+namespace trash {
+using namespace boost::placeholders;
+
+namespace at = argument_types;
+namespace po = boost::program_options;
+
+//Optional arguments used only by this set of commands (rbd trash *)
+static const std::string EXPIRES_AT("expires-at");
+static const std::string EXPIRED_BEFORE("expired-before");
+static const std::string THRESHOLD("threshold");
+
+static bool is_not_trash_user(const librbd::trash_image_info_t &trash_info) {
+ return trash_info.source != RBD_TRASH_IMAGE_SOURCE_USER &&
+ trash_info.source != RBD_TRASH_IMAGE_SOURCE_USER_PARENT;
+}
+
+void get_move_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE);
+ options->add_options()
+ (EXPIRES_AT.c_str(), po::value<std::string>()->default_value("now"),
+ "set the expiration time of an image so it can be purged when it is stale");
+}
+
+int execute_move(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+ std::string snap_name;
+
+ int r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name,
+ &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE,
+ utils::SPEC_VALIDATION_NONE);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ r = utils::init(pool_name, namespace_name, &rados, &io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ utime_t now = ceph_clock_now();
+ utime_t exp_time = now;
+ std::string expires_at;
+ if (vm.find(EXPIRES_AT) != vm.end()) {
+ expires_at = vm[EXPIRES_AT].as<std::string>();
+ r = utime_t::invoke_date(expires_at, &exp_time);
+ if (r < 0) {
+ std::cerr << "rbd: error calling /bin/date: " << cpp_strerror(r)
+ << std::endl;
+ return r;
+ }
+ }
+
+ time_t dt = (exp_time - now).sec();
+ if(dt < 0) {
+ std::cerr << "rbd: cannot use a date in the past as an expiration date"
+ << std::endl;
+ return -EINVAL;
+ }
+
+ librbd::RBD rbd;
+ r = rbd.trash_move(io_ctx, image_name.c_str(), dt);
+ if (r < 0) {
+ std::cerr << "rbd: deferred delete error: " << cpp_strerror(r)
+ << std::endl;
+ }
+
+ if (expires_at != "now") {
+ std::cout << "rbd: image " << image_name << " will expire at " << exp_time << std::endl;
+ }
+ return r;
+}
+
+void get_remove_arguments(po::options_description *positional,
+ po::options_description *options) {
+ positional->add_options()
+ (at::IMAGE_ID.c_str(), "image id\n(example: [<pool-name>/[<namespace>/]]<image-id>)");
+ at::add_pool_option(options, at::ARGUMENT_MODIFIER_NONE);
+ at::add_namespace_option(options, at::ARGUMENT_MODIFIER_NONE);
+ at::add_image_id_option(options);
+
+ at::add_no_progress_option(options);
+ options->add_options()
+ ("force", po::bool_switch(), "force remove of non-expired delayed images");
+}
+
+int execute_remove(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_id;
+ int r = utils::get_pool_image_id(vm, &arg_index, &pool_name, &namespace_name,
+ &image_id);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ r = utils::init(pool_name, namespace_name, &rados, &io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ io_ctx.set_pool_full_try();
+ librbd::RBD rbd;
+
+ utils::ProgressContext pc("Removing image", vm[at::NO_PROGRESS].as<bool>());
+ r = rbd.trash_remove_with_progress(io_ctx, image_id.c_str(),
+ vm["force"].as<bool>(), pc);
+ if (r < 0) {
+ if (r == -ENOTEMPTY) {
+ std::cerr << "rbd: image has snapshots - these must be deleted"
+ << " with 'rbd snap purge' before the image can be removed."
+ << std::endl;
+ } else if (r == -EUCLEAN) {
+ std::cerr << "rbd: error: image not fully moved to trash."
+ << std::endl;
+ } else if (r == -EBUSY) {
+ std::cerr << "rbd: error: image still has watchers"
+ << std::endl
+ << "This means the image is still open or the client using "
+ << "it crashed. Try again after closing/unmapping it or "
+ << "waiting 30s for the crashed client to timeout."
+ << std::endl;
+ } else if (r == -EMLINK) {
+ std::cerr << std::endl
+ << "Remove the image from the group and try again."
+ << std::endl;
+ } else if (r == -EPERM) {
+ std::cerr << std::endl
+ << "Deferment time has not expired, please use --force if you "
+ << "really want to remove the image"
+ << std::endl;
+ } else {
+ std::cerr << "rbd: remove error: " << cpp_strerror(r) << std::endl;
+ }
+ pc.fail();
+ return r;
+ }
+
+ pc.finish();
+
+ return r;
+}
+
+std::string delete_status(time_t deferment_end_time) {
+ time_t now = time(nullptr);
+
+ std::string time_str = ctime(&deferment_end_time);
+ time_str = time_str.substr(0, time_str.length() - 1);
+
+ std::stringstream ss;
+ if (now < deferment_end_time) {
+ ss << "protected until " << time_str;
+ } else {
+ ss << "expired at " << time_str;
+ }
+
+ return ss.str();
+}
+
+int do_list(librbd::RBD &rbd, librados::IoCtx& io_ctx, bool long_flag,
+ bool all_flag, Formatter *f) {
+ std::vector<librbd::trash_image_info_t> trash_entries;
+ int r = rbd.trash_list(io_ctx, trash_entries);
+ if (r < 0) {
+ return r;
+ }
+
+ if (!all_flag) {
+ trash_entries.erase(remove_if(trash_entries.begin(),
+ trash_entries.end(),
+ boost::bind(is_not_trash_user, _1)),
+ trash_entries.end());
+ }
+
+ if (!long_flag) {
+ if (f) {
+ f->open_array_section("trash");
+ }
+ for (const auto& entry : trash_entries) {
+ if (f) {
+ f->open_object_section("image");
+ f->dump_string("id", entry.id);
+ f->dump_string("name", entry.name);
+ f->close_section();
+ } else {
+ std::cout << entry.id << " " << entry.name << std::endl;
+ }
+ }
+ if (f) {
+ f->close_section();
+ f->flush(std::cout);
+ }
+ return 0;
+ }
+
+ TextTable tbl;
+
+ if (f) {
+ f->open_array_section("trash");
+ } else {
+ tbl.define_column("ID", TextTable::LEFT, TextTable::LEFT);
+ tbl.define_column("NAME", TextTable::LEFT, TextTable::LEFT);
+ tbl.define_column("SOURCE", TextTable::LEFT, TextTable::LEFT);
+ tbl.define_column("DELETED_AT", TextTable::LEFT, TextTable::LEFT);
+ tbl.define_column("STATUS", TextTable::LEFT, TextTable::LEFT);
+ tbl.define_column("PARENT", TextTable::LEFT, TextTable::LEFT);
+ }
+
+ for (const auto& entry : trash_entries) {
+ librbd::Image im;
+
+ r = rbd.open_by_id_read_only(io_ctx, im, entry.id.c_str(), NULL);
+ // image might disappear between rbd.list() and rbd.open(); ignore
+ // that, warn about other possible errors (EPERM, say, for opening
+ // an old-format image, because you need execute permission for the
+ // class method)
+ if (r < 0) {
+ if (r != -ENOENT) {
+ std::cerr << "rbd: error opening " << entry.id << ": "
+ << cpp_strerror(r) << std::endl;
+ }
+ // in any event, continue to next image
+ continue;
+ }
+
+ std::string del_source;
+ switch (entry.source) {
+ case RBD_TRASH_IMAGE_SOURCE_USER:
+ del_source = "USER";
+ break;
+ case RBD_TRASH_IMAGE_SOURCE_MIRRORING:
+ del_source = "MIRRORING";
+ break;
+ case RBD_TRASH_IMAGE_SOURCE_MIGRATION:
+ del_source = "MIGRATION";
+ break;
+ case RBD_TRASH_IMAGE_SOURCE_REMOVING:
+ del_source = "REMOVING";
+ break;
+ case RBD_TRASH_IMAGE_SOURCE_USER_PARENT:
+ del_source = "USER_PARENT";
+ break;
+ }
+
+ std::string time_str = ctime(&entry.deletion_time);
+ time_str = time_str.substr(0, time_str.length() - 1);
+
+ bool has_parent = false;
+ std::string parent;
+ librbd::linked_image_spec_t parent_image;
+ librbd::snap_spec_t parent_snap;
+ r = im.get_parent(&parent_image, &parent_snap);
+ if (r == -ENOENT) {
+ r = 0;
+ } else if (r < 0) {
+ return r;
+ } else {
+ parent = parent_image.pool_name + "/";
+ if (!parent_image.pool_namespace.empty()) {
+ parent += parent_image.pool_namespace + "/";
+ }
+ parent += parent_image.image_name + "@" + parent_snap.name;
+ has_parent = true;
+ }
+
+ if (f) {
+ f->open_object_section("image");
+ f->dump_string("id", entry.id);
+ f->dump_string("name", entry.name);
+ f->dump_string("source", del_source);
+ f->dump_string("deleted_at", time_str);
+ f->dump_string("status",
+ delete_status(entry.deferment_end_time));
+ if (has_parent) {
+ f->open_object_section("parent");
+ f->dump_string("pool", parent_image.pool_name);
+ f->dump_string("pool_namespace", parent_image.pool_namespace);
+ f->dump_string("image", parent_image.image_name);
+ f->dump_string("snapshot", parent_snap.name);
+ f->close_section();
+ }
+ f->close_section();
+ } else {
+ tbl << entry.id
+ << entry.name
+ << del_source
+ << time_str
+ << delete_status(entry.deferment_end_time);
+ if (has_parent)
+ tbl << parent;
+ tbl << TextTable::endrow;
+ }
+ }
+
+ if (f) {
+ f->close_section();
+ f->flush(std::cout);
+ } else if (!trash_entries.empty()) {
+ std::cout << tbl;
+ }
+
+ return r < 0 ? r : 0;
+}
+
+void get_list_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_pool_options(positional, options, true);
+ options->add_options()
+ ("all,a", po::bool_switch(), "list images from all sources");
+ options->add_options()
+ ("long,l", po::bool_switch(), "long listing format");
+ at::add_format_options(options);
+}
+
+int execute_list(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ std::string pool_name;
+ std::string namespace_name;
+ size_t arg_index = 0;
+ int r = utils::get_pool_and_namespace_names(vm, false, &pool_name,
+ &namespace_name, &arg_index);
+ if (r < 0) {
+ return r;
+ }
+
+ at::Format::Formatter formatter;
+ r = utils::get_formatter(vm, &formatter);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ r = utils::init(pool_name, namespace_name, &rados, &io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ utils::disable_cache();
+
+ librbd::RBD rbd;
+ r = do_list(rbd, io_ctx, vm["long"].as<bool>(), vm["all"].as<bool>(),
+ formatter.get());
+ if (r < 0) {
+ std::cerr << "rbd: trash list: " << cpp_strerror(r) << std::endl;
+ return r;
+ }
+
+ return 0;
+}
+
+void get_purge_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_pool_options(positional, options, true);
+ at::add_no_progress_option(options);
+
+ options->add_options()
+ (EXPIRED_BEFORE.c_str(), po::value<std::string>()->value_name("date"),
+ "purges images that expired before the given date");
+ options->add_options()
+ (THRESHOLD.c_str(), po::value<float>(),
+ "purges images until the current pool data usage is reduced to X%, "
+ "value range: 0.0-1.0");
+}
+
+int execute_purge(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ std::string pool_name;
+ std::string namespace_name;
+ size_t arg_index = 0;
+ int r = utils::get_pool_and_namespace_names(vm, false, &pool_name,
+ &namespace_name, &arg_index);
+ if (r < 0) {
+ return r;
+ }
+
+ utils::disable_cache();
+
+ librbd::RBD rbd;
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ r = utils::init(pool_name, namespace_name, &rados, &io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ io_ctx.set_pool_full_try();
+
+ float threshold = -1;
+ time_t expire_ts = 0;
+
+ if (vm.find(THRESHOLD) != vm.end()) {
+ threshold = vm[THRESHOLD].as<float>();
+ } else {
+ if (vm.find(EXPIRED_BEFORE) != vm.end()) {
+ utime_t new_time;
+ r = utime_t::invoke_date(vm[EXPIRED_BEFORE].as<std::string>(), &new_time);
+ if (r < 0) {
+ std::cerr << "rbd: error calling /bin/date: " << cpp_strerror(r)
+ << std::endl;
+ return r;
+ }
+ expire_ts = new_time.sec();
+ }
+ }
+
+ utils::ProgressContext pc("Removing images", vm[at::NO_PROGRESS].as<bool>());
+ r = rbd.trash_purge_with_progress(io_ctx, expire_ts, threshold, pc);
+ if (r < 0) {
+ pc.fail();
+ if (r == -ENOTEMPTY || r == -EBUSY || r == -EMLINK || r == -EUCLEAN) {
+ std::cerr << "rbd: some expired images could not be removed"
+ << std::endl
+ << "Ensure that they are closed/unmapped, do not have "
+ << "snapshots (including trashed snapshots with linked "
+ << "clones), are not in a group and were moved to the "
+ << "trash successfully."
+ << std::endl;
+ }
+ return r;
+ }
+
+ pc.finish();
+ return 0;
+}
+
+void get_restore_arguments(po::options_description *positional,
+ po::options_description *options) {
+ positional->add_options()
+ (at::IMAGE_ID.c_str(), "image id\n(example: [<pool-name>/]<image-id>)");
+ at::add_pool_option(options, at::ARGUMENT_MODIFIER_NONE);
+ at::add_namespace_option(options, at::ARGUMENT_MODIFIER_NONE);
+ at::add_image_id_option(options);
+ at::add_image_option(options, at::ARGUMENT_MODIFIER_NONE, "");
+}
+
+int execute_restore(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_id;
+ int r = utils::get_pool_image_id(vm, &arg_index, &pool_name, &namespace_name,
+ &image_id);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ r = utils::init(pool_name, namespace_name, &rados, &io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ std::string name;
+ if (vm.find(at::IMAGE_NAME) != vm.end()) {
+ name = vm[at::IMAGE_NAME].as<std::string>();
+ }
+
+ librbd::RBD rbd;
+ r = rbd.trash_restore(io_ctx, image_id.c_str(), name.c_str());
+ if (r < 0) {
+ if (r == -ENOENT) {
+ std::cerr << "rbd: error: image does not exist in trash"
+ << std::endl;
+ } else if (r == -EEXIST) {
+ std::cerr << "rbd: error: an image with the same name already exists, "
+ << "try again with a different name"
+ << std::endl;
+ } else {
+ std::cerr << "rbd: restore error: " << cpp_strerror(r) << std::endl;
+ }
+ return r;
+ }
+
+ return r;
+}
+
+Shell::Action action_move(
+ {"trash", "move"}, {"trash", "mv"}, "Move an image to the trash.", "",
+ &get_move_arguments, &execute_move);
+
+Shell::Action action_remove(
+ {"trash", "remove"}, {"trash", "rm"}, "Remove an image from trash.", "",
+ &get_remove_arguments, &execute_remove);
+
+Shell::Action action_purge(
+ {"trash", "purge"}, {}, "Remove all expired images from trash.", "",
+ &get_purge_arguments, &execute_purge);
+
+Shell::Action action_list(
+ {"trash", "list"}, {"trash", "ls"}, "List trash images.", "",
+ &get_list_arguments, &execute_list);
+
+Shell::Action action_restore(
+ {"trash", "restore"}, {}, "Restore an image from trash.", "",
+ &get_restore_arguments, &execute_restore);
+
+} // namespace trash
+} // namespace action
+} // namespace rbd
diff --git a/src/tools/rbd/action/TrashPurgeSchedule.cc b/src/tools/rbd/action/TrashPurgeSchedule.cc
new file mode 100644
index 000000000..5c133c295
--- /dev/null
+++ b/src/tools/rbd/action/TrashPurgeSchedule.cc
@@ -0,0 +1,355 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd/ArgumentTypes.h"
+#include "tools/rbd/Schedule.h"
+#include "tools/rbd/Shell.h"
+#include "tools/rbd/Utils.h"
+#include "common/ceph_context.h"
+#include "common/ceph_json.h"
+#include "common/errno.h"
+#include "common/Formatter.h"
+#include "common/TextTable.h"
+#include "global/global_context.h"
+#include "include/stringify.h"
+
+#include <iostream>
+#include <list>
+#include <map>
+#include <set>
+#include <string>
+#include <boost/program_options.hpp>
+
+#include "json_spirit/json_spirit.h"
+
+namespace rbd {
+namespace action {
+namespace trash_purge_schedule {
+
+namespace at = argument_types;
+namespace po = boost::program_options;
+
+namespace {
+
+class ScheduleStatus {
+public:
+ ScheduleStatus() {
+ }
+
+ int parse(const std::string &status) {
+ json_spirit::mValue json_root;
+ if(!json_spirit::read(status, json_root)) {
+ std::cerr << "rbd: invalid schedule status JSON received" << std::endl;
+ return -EBADMSG;
+ }
+
+ try {
+ auto &s = json_root.get_obj();
+
+ if (s["scheduled"].type() != json_spirit::array_type) {
+ std::cerr << "rbd: unexpected schedule JSON received: "
+ << "scheduled is not array" << std::endl;
+ return -EBADMSG;
+ }
+
+ for (auto &item_val : s["scheduled"].get_array()) {
+ if (item_val.type() != json_spirit::obj_type) {
+ std::cerr << "rbd: unexpected schedule status JSON received: "
+ << "schedule item is not object" << std::endl;
+ return -EBADMSG;
+ }
+
+ auto &item = item_val.get_obj();
+
+ if (item["pool_name"].type() != json_spirit::str_type) {
+ std::cerr << "rbd: unexpected schedule JSON received: "
+ << "pool_name is not string" << std::endl;
+ return -EBADMSG;
+ }
+ auto pool_name = item["pool_name"].get_str();
+
+ if (item["namespace"].type() != json_spirit::str_type) {
+ std::cerr << "rbd: unexpected schedule JSON received: "
+ << "namespace is not string" << std::endl;
+ return -EBADMSG;
+ }
+ auto namespace_name = item["namespace"].get_str();
+
+ if (item["schedule_time"].type() != json_spirit::str_type) {
+ std::cerr << "rbd: unexpected schedule JSON received: "
+ << "schedule_time is not string" << std::endl;
+ return -EBADMSG;
+ }
+ auto schedule_time = item["schedule_time"].get_str();
+
+ scheduled.insert({pool_name, namespace_name, schedule_time});
+ }
+
+ } catch (std::runtime_error &) {
+ std::cerr << "rbd: invalid schedule JSON received" << std::endl;
+ return -EBADMSG;
+ }
+
+ return 0;
+ }
+
+ void dump(Formatter *f) {
+ f->open_array_section("scheduled");
+ for (auto &item : scheduled) {
+ f->open_object_section("item");
+ f->dump_string("pool", item.pool_name);
+ f->dump_string("namespace", item.namespace_name);
+ f->dump_string("schedule_time", item.schedule_time);
+ f->close_section(); // item
+ }
+ f->close_section(); // scheduled
+ }
+
+ friend std::ostream& operator<<(std::ostream& os, ScheduleStatus &d);
+
+private:
+
+ struct Item {
+ std::string pool_name;
+ std::string namespace_name;
+ std::string schedule_time;
+
+ Item(const std::string &pool_name, const std::string &namespace_name,
+ const std::string &schedule_time)
+ : pool_name(pool_name), namespace_name(namespace_name),
+ schedule_time(schedule_time) {
+ }
+
+ bool operator<(const Item &rhs) const {
+ if (pool_name != rhs.pool_name) {
+ return pool_name < rhs.pool_name;
+ }
+ return namespace_name < rhs.namespace_name;
+ }
+ };
+
+ std::set<Item> scheduled;
+};
+
+std::ostream& operator<<(std::ostream& os, ScheduleStatus &s) {
+ TextTable tbl;
+ tbl.define_column("POOL", TextTable::LEFT, TextTable::LEFT);
+ tbl.define_column("NAMESPACE", TextTable::LEFT, TextTable::LEFT);
+ tbl.define_column("SCHEDULE TIME", TextTable::LEFT, TextTable::LEFT);
+
+ for (auto &item : s.scheduled) {
+ tbl << item.pool_name << item.namespace_name << item.schedule_time
+ << TextTable::endrow;
+ }
+
+ os << tbl;
+ return os;
+}
+
+} // anonymous namespace
+
+void get_arguments_add(po::options_description *positional,
+ po::options_description *options) {
+ add_level_spec_options(options, false);
+ add_schedule_options(positional, true);
+}
+
+int execute_add(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ std::map<std::string, std::string> args;
+
+ int r = get_level_spec_args(vm, &args);
+ if (r < 0) {
+ return r;
+ }
+ r = get_schedule_args(vm, true, &args);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ r = utils::init_rados(&rados);
+ if (r < 0) {
+ return r;
+ }
+
+ normalize_level_spec_args(&args);
+ r = utils::mgr_command(rados, "rbd trash purge schedule add", args,
+ &std::cout, &std::cerr);
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+void get_arguments_remove(po::options_description *positional,
+ po::options_description *options) {
+ add_level_spec_options(options, false);
+ add_schedule_options(positional, false);
+}
+
+int execute_remove(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ std::map<std::string, std::string> args;
+
+ int r = get_level_spec_args(vm, &args);
+ if (r < 0) {
+ return r;
+ }
+ r = get_schedule_args(vm, false, &args);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ r = utils::init_rados(&rados);
+ if (r < 0) {
+ return r;
+ }
+
+ normalize_level_spec_args(&args);
+ r = utils::mgr_command(rados, "rbd trash purge schedule remove", args,
+ &std::cout, &std::cerr);
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+void get_arguments_list(po::options_description *positional,
+ po::options_description *options) {
+ add_level_spec_options(options, false);
+ options->add_options()
+ ("recursive,R", po::bool_switch(), "list all schedules");
+ at::add_format_options(options);
+}
+
+int execute_list(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ std::map<std::string, std::string> args;
+
+ int r = get_level_spec_args(vm, &args);
+ if (r < 0) {
+ return r;
+ }
+
+ at::Format::Formatter formatter;
+ r = utils::get_formatter(vm, &formatter);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ r = utils::init_rados(&rados);
+ if (r < 0) {
+ return r;
+ }
+
+ normalize_level_spec_args(&args);
+ std::stringstream out;
+ r = utils::mgr_command(rados, "rbd trash purge schedule list", args, &out,
+ &std::cerr);
+ if (r < 0) {
+ return r;
+ }
+
+ ScheduleList schedule_list(false);
+ r = schedule_list.parse(out.str());
+ if (r < 0) {
+ return r;
+ }
+
+ if (vm["recursive"].as<bool>()) {
+ if (formatter.get()) {
+ schedule_list.dump(formatter.get());
+ formatter->flush(std::cout);
+ } else {
+ std::cout << schedule_list;
+ }
+ } else {
+ auto schedule = schedule_list.find(args["level_spec"]);
+ if (schedule == nullptr) {
+ return -ENOENT;
+ }
+
+ if (formatter.get()) {
+ schedule->dump(formatter.get());
+ formatter->flush(std::cout);
+ } else {
+ std::cout << *schedule << std::endl;
+ }
+ }
+
+ return 0;
+}
+
+void get_arguments_status(po::options_description *positional,
+ po::options_description *options) {
+ add_level_spec_options(options, false);
+ at::add_format_options(options);
+}
+
+int execute_status(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ std::map<std::string, std::string> args;
+
+ int r = get_level_spec_args(vm, &args);
+ if (r < 0) {
+ return r;
+ }
+
+ at::Format::Formatter formatter;
+ r = utils::get_formatter(vm, &formatter);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ r = utils::init_rados(&rados);
+ if (r < 0) {
+ return r;
+ }
+
+ normalize_level_spec_args(&args);
+ std::stringstream out;
+ r = utils::mgr_command(rados, "rbd trash purge schedule status", args, &out,
+ &std::cerr);
+ ScheduleStatus schedule_status;
+ r = schedule_status.parse(out.str());
+ if (r < 0) {
+ return r;
+ }
+
+ if (formatter.get()) {
+ schedule_status.dump(formatter.get());
+ formatter->flush(std::cout);
+ } else {
+ std::cout << schedule_status;
+ }
+
+ return 0;
+}
+
+Shell::SwitchArguments switched_arguments({"recursive", "R"});
+
+Shell::Action add_action(
+ {"trash", "purge", "schedule", "add"}, {}, "Add trash purge schedule.", "",
+ &get_arguments_add, &execute_add);
+Shell::Action remove_action(
+ {"trash", "purge", "schedule", "remove"},
+ {"trash", "purge", "schedule", "rm"}, "Remove trash purge schedule.",
+ "", &get_arguments_remove, &execute_remove);
+Shell::Action list_action(
+ {"trash", "purge", "schedule", "list"},
+ {"trash", "purge", "schedule", "ls"}, "List trash purge schedule.",
+ "", &get_arguments_list, &execute_list);
+Shell::Action status_action(
+ {"trash", "purge", "schedule", "status"}, {},
+ "Show trash purge schedule status.", "", &get_arguments_status,
+ &execute_status);
+
+} // namespace trash_purge_schedule
+} // namespace action
+} // namespace rbd
diff --git a/src/tools/rbd/action/Watch.cc b/src/tools/rbd/action/Watch.cc
new file mode 100644
index 000000000..98697bc28
--- /dev/null
+++ b/src/tools/rbd/action/Watch.cc
@@ -0,0 +1,149 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd/ArgumentTypes.h"
+#include "tools/rbd/Shell.h"
+#include "tools/rbd/Utils.h"
+#include "include/rbd_types.h"
+#include "librbd/WatchNotifyTypes.h"
+#include "common/errno.h"
+#include <iostream>
+#include <boost/program_options.hpp>
+
+namespace rbd {
+namespace action {
+namespace watch {
+
+namespace at = argument_types;
+namespace po = boost::program_options;
+
+class RbdWatchCtx : public librados::WatchCtx2 {
+public:
+ RbdWatchCtx(librados::IoCtx& io_ctx, const char *image_name,
+ const std::string &header_oid)
+ : m_io_ctx(io_ctx), m_image_name(image_name), m_header_oid(header_oid)
+ {
+ }
+
+ ~RbdWatchCtx() override {}
+
+ void handle_notify(uint64_t notify_id,
+ uint64_t cookie,
+ uint64_t notifier_id,
+ bufferlist& bl) override {
+ using namespace librbd::watch_notify;
+ NotifyMessage notify_message;
+ if (bl.length() == 0) {
+ notify_message = NotifyMessage(new HeaderUpdatePayload());
+ } else {
+ try {
+ auto iter = bl.cbegin();
+ notify_message.decode(iter);
+ } catch (const buffer::error &err) {
+ std::cerr << "rbd: failed to decode image notification" << std::endl;
+ }
+ }
+
+ std::cout << m_image_name << " received notification: notify_id="
+ << notify_id << ", cookie=" << cookie << ", notifier_id="
+ << notifier_id << ", bl.length=" << bl.length() << ", notify_op="
+ << notify_message.get_notify_op() << std::endl;
+ bufferlist reply;
+ m_io_ctx.notify_ack(m_header_oid, notify_id, cookie, reply);
+ }
+
+ void handle_error(uint64_t cookie, int err) override {
+ std::cerr << m_image_name << " received error: cookie=" << cookie << ", "
+ << "err=" << cpp_strerror(err) << std::endl;
+ }
+private:
+ librados::IoCtx m_io_ctx;
+ const char *m_image_name;
+ std::string m_header_oid;
+};
+
+static int do_watch(librados::IoCtx& pp, librbd::Image &image,
+ const char *imgname)
+{
+ uint8_t old_format;
+ int r = image.old_format(&old_format);
+ if (r < 0) {
+ std::cerr << "failed to query format" << std::endl;
+ return r;
+ }
+
+ std::string header_oid;
+ if (old_format != 0) {
+ header_oid = std::string(imgname) + RBD_SUFFIX;
+ } else {
+ std::string id;
+ r = image.get_id(&id);
+ if (r < 0) {
+ return r;
+ }
+
+ header_oid = RBD_HEADER_PREFIX + id;
+ }
+
+ uint64_t cookie;
+ RbdWatchCtx ctx(pp, imgname, header_oid);
+ r = pp.watch2(header_oid, &cookie, &ctx);
+ if (r < 0) {
+ std::cerr << "rbd: watch failed" << std::endl;
+ return r;
+ }
+
+ std::cout << "press enter to exit..." << std::endl;
+ getchar();
+
+ r = pp.unwatch2(cookie);
+ if (r < 0) {
+ std::cerr << "rbd: unwatch failed" << std::endl;
+ return r;
+ }
+ return 0;
+}
+
+void get_arguments(po::options_description *positional,
+ po::options_description *options) {
+ at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE);
+}
+
+int execute(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+ size_t arg_index = 0;
+ std::string pool_name;
+ std::string namespace_name;
+ std::string image_name;
+ std::string snap_name;
+ int r = utils::get_pool_image_snapshot_names(
+ vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name,
+ &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE,
+ utils::SPEC_VALIDATION_NONE);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::Rados rados;
+ librados::IoCtx io_ctx;
+ librbd::Image image;
+ r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "",
+ true, &rados, &io_ctx, &image);
+ if (r < 0) {
+ return r;
+ }
+
+ r = do_watch(io_ctx, image, image_name.c_str());
+ if (r < 0) {
+ std::cerr << "rbd: watch failed: " << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ return 0;
+}
+
+Shell::Action action(
+ {"watch"}, {}, "Watch events on image.", "", &get_arguments, &execute);
+
+} // namespace watch
+} // namespace action
+} // namespace rbd
diff --git a/src/tools/rbd/action/Wnbd.cc b/src/tools/rbd/action/Wnbd.cc
new file mode 100644
index 000000000..85d2c7057
--- /dev/null
+++ b/src/tools/rbd/action/Wnbd.cc
@@ -0,0 +1,172 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd/ArgumentTypes.h"
+#include "tools/rbd/Shell.h"
+#include "tools/rbd/Utils.h"
+#include "include/stringify.h"
+#include "common/SubProcess.h"
+#include <iostream>
+#include <boost/algorithm/string.hpp>
+#include <boost/algorithm/string/predicate.hpp>
+#include <boost/program_options.hpp>
+
+namespace rbd {
+namespace action {
+namespace wnbd {
+
+namespace at = argument_types;
+namespace po = boost::program_options;
+
+#if defined(_WIN32)
+static int call_wnbd_cmd(const po::variables_map &vm,
+ const std::vector<std::string> &args,
+ const std::vector<std::string> &ceph_global_init_args) {
+ char exe_path[PATH_MAX];
+ ssize_t exe_path_bytes = get_self_exe_path(exe_path, PATH_MAX);
+
+ if (exe_path_bytes > 4) {
+ // Drop .exe suffix as we're going to add the "-wnbd" suffix.
+ exe_path[strlen(exe_path) - 4] = '\0';
+ exe_path_bytes -= 4;
+ }
+
+ if (exe_path_bytes < 0) {
+ strcpy(exe_path, "rbd-wnbd");
+ } else {
+ if (snprintf(exe_path + exe_path_bytes,
+ sizeof(exe_path) - exe_path_bytes,
+ "-wnbd") < 0) {
+ return -EOVERFLOW;
+ }
+ }
+
+ SubProcess process(exe_path, SubProcess::KEEP, SubProcess::KEEP, SubProcess::KEEP);
+
+ for (auto &arg : ceph_global_init_args) {
+ process.add_cmd_arg(arg.c_str());
+ }
+
+ for (auto &arg : args) {
+ process.add_cmd_arg(arg.c_str());
+ }
+
+ if (process.spawn()) {
+ std::cerr << "rbd: failed to run rbd-wnbd: " << process.err() << std::endl;
+ return -EINVAL;
+ }
+ int exit_code = process.join();
+ if (exit_code) {
+ std::cerr << "rbd: rbd-wnbd failed with error: " << process.err() << std::endl;
+ return exit_code;
+ }
+
+ return 0;
+}
+#endif
+
+int execute_list(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+#if !defined(_WIN32)
+ std::cerr << "rbd: wnbd is only supported on Windows" << std::endl;
+ return -EOPNOTSUPP;
+#else
+ std::vector<std::string> args;
+
+ args.push_back("list");
+
+ if (vm.count("format")) {
+ args.push_back("--format");
+ args.push_back(vm["format"].as<at::Format>().value);
+ }
+ if (vm["pretty-format"].as<bool>()) {
+ args.push_back("--pretty-format");
+ }
+
+ return call_wnbd_cmd(vm, args, ceph_global_init_args);
+#endif
+}
+
+int execute_map(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+#if !defined(_WIN32)
+ std::cerr << "rbd: wnbd is only supported on Windows" << std::endl;
+ return -EOPNOTSUPP;
+#else
+ std::vector<std::string> args;
+
+ args.push_back("map");
+ std::string img;
+ int r = utils::get_image_or_snap_spec(vm, &img);
+ if (r < 0) {
+ return r;
+ }
+ args.push_back(img);
+
+ if (vm["read-only"].as<bool>()) {
+ args.push_back("--read-only");
+ }
+
+ if (vm["exclusive"].as<bool>()) {
+ args.push_back("--exclusive");
+ }
+
+ if (vm.count("options")) {
+ utils::append_options_as_args(vm["options"].as<std::vector<std::string>>(),
+ &args);
+ }
+
+ return call_wnbd_cmd(vm, args, ceph_global_init_args);
+#endif
+}
+
+int execute_unmap(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+#if !defined(_WIN32)
+ std::cerr << "rbd: wnbd is only supported on Windows" << std::endl;
+ return -EOPNOTSUPP;
+#else
+ std::string image_name;
+
+ int r = utils::get_image_or_snap_spec(vm, &image_name);
+ if (r < 0) {
+ return r;
+ }
+
+ std::vector<std::string> args;
+
+ args.push_back("unmap");
+ args.push_back(image_name);
+
+ if (vm.count("options")) {
+ utils::append_options_as_args(vm["options"].as<std::vector<std::string>>(),
+ &args);
+ }
+
+ return call_wnbd_cmd(vm, args, ceph_global_init_args);
+#endif
+}
+
+int execute_attach(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+#if !defined(_WIN32)
+ std::cerr << "rbd: wnbd is only supported on Windows" << std::endl;
+#else
+ std::cerr << "rbd: wnbd attach command not supported" << std::endl;
+#endif
+ return -EOPNOTSUPP;
+}
+
+int execute_detach(const po::variables_map &vm,
+ const std::vector<std::string> &ceph_global_init_args) {
+#if !defined(_WIN32)
+ std::cerr << "rbd: wnbd is only supported on Windows" << std::endl;
+#else
+ std::cerr << "rbd: wnbd detach command not supported" << std::endl;
+#endif
+ return -EOPNOTSUPP;
+}
+
+} // namespace wnbd
+} // namespace action
+} // namespace rbd
diff --git a/src/tools/rbd/rbd.cc b/src/tools/rbd/rbd.cc
new file mode 100644
index 000000000..a8c59d575
--- /dev/null
+++ b/src/tools/rbd/rbd.cc
@@ -0,0 +1,10 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd/Shell.h"
+
+int main(int argc, const char **argv)
+{
+ rbd::Shell shell;
+ return shell.execute(argc, argv);
+}
diff --git a/src/tools/rbd_ggate/CMakeLists.txt b/src/tools/rbd_ggate/CMakeLists.txt
new file mode 100644
index 000000000..5c5572c48
--- /dev/null
+++ b/src/tools/rbd_ggate/CMakeLists.txt
@@ -0,0 +1,9 @@
+add_executable(rbd-ggate
+ Driver.cc
+ Server.cc
+ Watcher.cc
+ debug.cc
+ ggate_drv.c
+ main.cc)
+target_link_libraries(rbd-ggate geom librbd librados global)
+install(TARGETS rbd-ggate DESTINATION bin)
diff --git a/src/tools/rbd_ggate/Driver.cc b/src/tools/rbd_ggate/Driver.cc
new file mode 100644
index 000000000..80acfe00c
--- /dev/null
+++ b/src/tools/rbd_ggate/Driver.cc
@@ -0,0 +1,165 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <stdlib.h>
+
+#include "common/debug.h"
+#include "common/errno.h"
+#include "Driver.h"
+#include "Request.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::ggate::Driver: " << this \
+ << " " << __func__ << ": "
+
+namespace rbd {
+namespace ggate {
+
+int Driver::load() {
+
+ return ggate_drv_load();
+}
+
+int Driver::kill(const std::string &devname) {
+
+ int r = ggate_drv_kill(devname.c_str());
+
+ return r;
+}
+
+int Driver::list(std::map<std::string, DevInfo> *devices) {
+ size_t size = 1024;
+ ggate_drv_info *devs = nullptr;
+ int r;
+
+ while (size <= 1024 * 1024) {
+ devs = static_cast<ggate_drv_info *>(
+ realloc(static_cast<void *>(devs), size * sizeof(*devs)));
+ r = ggate_drv_list(devs, &size);
+ if (r != -ERANGE) {
+ break;
+ }
+ }
+ if (r < 0) {
+ goto free;
+ }
+
+ devices->clear();
+ for (size_t i = 0; i < size; i++) {
+ auto &dev = devs[i];
+ (*devices)[dev.id] = {dev.name, dev.info};
+ }
+
+free:
+ free(devs);
+
+ return r;
+}
+
+Driver::Driver(const std::string &devname, size_t sectorsize, size_t mediasize,
+ bool readonly, const std::string &info)
+ : m_devname(devname), m_sectorsize(sectorsize), m_mediasize(mediasize),
+ m_readonly(readonly), m_info(info) {
+}
+
+int Driver::init() {
+ dout(20) << dendl;
+
+ char name[PATH_MAX];
+ size_t namelen;
+
+ if (m_devname.empty()) {
+ name[0] = '\0';
+ namelen = PATH_MAX;
+ } else {
+ namelen = m_devname.size();
+ if (namelen >= PATH_MAX) {
+ return -ENAMETOOLONG;
+ }
+ strncpy(name, m_devname.c_str(), namelen + 1);
+ }
+
+ int r = ggate_drv_create(name, namelen, m_sectorsize, m_mediasize, m_readonly,
+ m_info.c_str(), &m_drv);
+ if (r < 0) {
+ return r;
+ }
+
+ if (m_devname.empty()) {
+ m_devname = name;
+ }
+
+ return 0;
+}
+
+std::string Driver::get_devname() const {
+ dout(30) << m_devname << dendl;
+
+ return m_devname;
+}
+
+void Driver::shut_down() {
+ dout(20) << dendl;
+
+ ggate_drv_destroy(m_drv);
+}
+
+int Driver::resize(size_t newsize) {
+ dout(20) << "newsize=" << newsize << dendl;
+
+ int r = ggate_drv_resize(m_drv, newsize);
+ if (r < 0) {
+ return r;
+ }
+
+ m_mediasize = newsize;
+ return 0;
+}
+
+int Driver::recv(Request **req) {
+ dout(20) << dendl;
+
+ ggate_drv_req_t req_;
+
+ int r = ggate_drv_recv(m_drv, &req_);
+ if (r < 0) {
+ return r;
+ }
+
+ *req = new Request(req_);
+
+ dout(20) << "req=" << *req << dendl;
+
+ if (ggate_drv_req_cmd(req_) == GGATE_DRV_CMD_WRITE) {
+ bufferptr ptr(buffer::claim_malloc(
+ ggate_drv_req_length(req_),
+ static_cast<char *>(ggate_drv_req_release_buf(req_))));
+ (*req)->bl.push_back(ptr);
+ }
+
+ return 0;
+}
+
+int Driver::send(Request *req) {
+ dout(20) << "req=" << req << dendl;
+
+ if (ggate_drv_req_cmd(req->req) == GGATE_DRV_CMD_READ &&
+ ggate_drv_req_error(req->req) == 0) {
+ ceph_assert(req->bl.length() == ggate_drv_req_length(req->req));
+ // TODO: avoid copying?
+ req->bl.begin().copy(ggate_drv_req_length(req->req),
+ static_cast<char *>(ggate_drv_req_buf(req->req)));
+ dout(20) << "copied resulting " << req->bl.length() << " bytes to "
+ << ggate_drv_req_buf(req->req) << dendl;
+ }
+
+ int r = ggate_drv_send(m_drv, req->req);
+
+ delete req;
+ return r;
+}
+
+} // namespace ggate
+} // namespace rbd
diff --git a/src/tools/rbd_ggate/Driver.h b/src/tools/rbd_ggate/Driver.h
new file mode 100644
index 000000000..50be72b9c
--- /dev/null
+++ b/src/tools/rbd_ggate/Driver.h
@@ -0,0 +1,50 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_GGATE_DRIVER_H
+#define CEPH_RBD_GGATE_DRIVER_H
+
+#include <map>
+#include <string>
+
+#include "ggate_drv.h"
+
+namespace rbd {
+namespace ggate {
+
+struct Request;
+
+class Driver {
+public:
+ typedef std::pair<std::string, std::string> DevInfo;
+ static int load();
+ static int kill(const std::string &devname);
+ static int list(std::map<std::string, DevInfo> *devices);
+
+ Driver(const std::string &devname, size_t sectorsize, size_t mediasize,
+ bool readonly, const std::string &info);
+
+ int init();
+ void shut_down();
+
+ std::string get_devname() const;
+
+ int recv(Request **req);
+ int send(Request *req);
+
+ int resize(size_t newsize);
+
+private:
+ std::string m_devname;
+ size_t m_sectorsize;
+ size_t m_mediasize;
+ bool m_readonly;
+ std::string m_info;
+ ggate_drv_t m_drv = 0;
+};
+
+} // namespace ggate
+} // namespace rbd
+
+#endif // CEPH_RBD_GGATE_DRIVER_H
+
diff --git a/src/tools/rbd_ggate/Request.h b/src/tools/rbd_ggate/Request.h
new file mode 100644
index 000000000..66f219858
--- /dev/null
+++ b/src/tools/rbd_ggate/Request.h
@@ -0,0 +1,55 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_GGATE_REQUEST_H
+#define CEPH_RBD_GGATE_REQUEST_H
+
+#include "ggate_drv.h"
+
+namespace rbd {
+namespace ggate {
+
+struct Request {
+ enum Command {
+ Unknown = 0,
+ Write = 1,
+ Read = 2,
+ Flush = 3,
+ Discard = 4,
+ };
+
+ ggate_drv_req_t req;
+ bufferlist bl;
+
+ Request(ggate_drv_req_t req) : req(req) {
+ }
+
+ uint64_t get_id() {
+ return ggate_drv_req_id(req);
+ }
+
+ Command get_cmd() {
+ return static_cast<Command>(ggate_drv_req_cmd(req));
+ }
+
+ size_t get_length() {
+ return ggate_drv_req_length(req);
+ }
+
+ uint64_t get_offset() {
+ return ggate_drv_req_offset(req);
+ }
+
+ uint64_t get_error() {
+ return ggate_drv_req_error(req);
+ }
+
+ void set_error(int error) {
+ ggate_drv_req_set_error(req, error);
+ }
+};
+
+} // namespace ggate
+} // namespace rbd
+
+#endif // CEPH_RBD_GGATE_REQUEST_H
diff --git a/src/tools/rbd_ggate/Server.cc b/src/tools/rbd_ggate/Server.cc
new file mode 100644
index 000000000..2565ba10f
--- /dev/null
+++ b/src/tools/rbd_ggate/Server.cc
@@ -0,0 +1,262 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "common/debug.h"
+#include "common/errno.h"
+#include "Driver.h"
+#include "Server.h"
+#include "Request.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::ggate::Server: " << this \
+ << " " << __func__ << ": "
+
+namespace rbd {
+namespace ggate {
+
+Server::Server(Driver *drv, librbd::Image& image)
+ : m_drv(drv), m_image(image),
+ m_reader_thread(this, &Server::reader_entry),
+ m_writer_thread(this, &Server::writer_entry) {
+}
+
+void Server::run() {
+ dout(10) << dendl;
+
+ int r = start();
+ ceph_assert(r == 0);
+
+ dout(20) << "entering run loop" << dendl;
+
+ {
+ std::unique_lock locker{m_lock};
+ m_cond.wait(locker, [this] { return m_stopping;});
+ }
+
+ dout(20) << "exiting run loop" << dendl;
+
+ stop();
+}
+
+int Server::start() {
+ dout(10) << dendl;
+
+ m_reader_thread.create("rbd_reader");
+ m_writer_thread.create("rbd_writer");
+ return 0;
+}
+
+void Server::stop() {
+ dout(10) << dendl;
+
+ {
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_stopping);
+ }
+
+ m_reader_thread.join();
+ m_writer_thread.join();
+
+ wait_clean();
+}
+
+void Server::io_start(IOContext *ctx) {
+ dout(20) << ctx << dendl;
+
+ std::lock_guard locker{m_lock};
+ m_io_pending.push_back(&ctx->item);
+}
+
+void Server::io_finish(IOContext *ctx) {
+ dout(20) << ctx << dendl;
+
+ std::lock_guard locker{m_lock};
+ ceph_assert(ctx->item.is_on_list());
+
+ ctx->item.remove_myself();
+ m_io_finished.push_back(&ctx->item);
+ m_cond.notify_all();
+}
+
+Server::IOContext *Server::wait_io_finish() {
+ dout(20) << dendl;
+
+ std::unique_lock locker{m_lock};
+ m_cond.wait(locker, [this] { return !m_io_finished.empty() || m_stopping;});
+
+ if (m_io_finished.empty()) {
+ return nullptr;
+ }
+
+ IOContext *ret = m_io_finished.front();
+ m_io_finished.pop_front();
+
+ return ret;
+}
+
+void Server::wait_clean() {
+ dout(20) << dendl;
+
+ ceph_assert(!m_reader_thread.is_started());
+
+ std::unique_lock locker{m_lock};
+ m_cond.wait(locker, [this] { return m_io_pending.empty();});
+
+ while (!m_io_finished.empty()) {
+ std::unique_ptr<IOContext> free_ctx(m_io_finished.front());
+ m_io_finished.pop_front();
+ }
+}
+
+void Server::aio_callback(librbd::completion_t cb, void *arg) {
+ librbd::RBD::AioCompletion *aio_completion =
+ reinterpret_cast<librbd::RBD::AioCompletion*>(cb);
+
+ IOContext *ctx = reinterpret_cast<IOContext *>(arg);
+ int r = aio_completion->get_return_value();
+
+ ctx->server->handle_aio(ctx, r);
+ aio_completion->release();
+}
+
+void Server::handle_aio(IOContext *ctx, int r) {
+ dout(20) << ctx << ": r=" << r << dendl;
+
+ if (r == -EINVAL) {
+ // if shrinking an image, a pagecache writeback might reference
+ // extents outside of the range of the new image extents
+ dout(5) << "masking IO out-of-bounds error" << dendl;
+ ctx->req->bl.clear();
+ r = 0;
+ }
+
+ if (r < 0) {
+ ctx->req->set_error(-r);
+ } else if ((ctx->req->get_cmd() == Request::Read) &&
+ r != static_cast<int>(ctx->req->get_length())) {
+ int pad_byte_count = static_cast<int> (ctx->req->get_length()) - r;
+ ctx->req->bl.append_zero(pad_byte_count);
+ dout(20) << ctx << ": pad byte count: " << pad_byte_count << dendl;
+ ctx->req->set_error(0);
+ } else {
+ ctx->req->set_error(0);
+ }
+ io_finish(ctx);
+}
+
+void Server::reader_entry() {
+ dout(20) << dendl;
+
+ while (!m_stopping) {
+ std::unique_ptr<IOContext> ctx(new IOContext(this));
+
+ dout(20) << "waiting for ggate request" << dendl;
+
+ int r = m_drv->recv(&ctx->req);
+ if (r < 0) {
+ if (r != -ECANCELED) {
+ derr << "recv: " << cpp_strerror(r) << dendl;
+ }
+ std::lock_guard locker{m_lock};
+ m_stopping = true;
+ m_cond.notify_all();
+ return;
+ }
+
+ IOContext *pctx = ctx.release();
+
+ dout(20) << pctx << ": start: " << *pctx << dendl;
+
+ io_start(pctx);
+ librbd::RBD::AioCompletion *c =
+ new librbd::RBD::AioCompletion(pctx, aio_callback);
+ switch (pctx->req->get_cmd())
+ {
+ case rbd::ggate::Request::Write:
+ m_image.aio_write(pctx->req->get_offset(), pctx->req->get_length(),
+ pctx->req->bl, c);
+ break;
+ case rbd::ggate::Request::Read:
+ m_image.aio_read(pctx->req->get_offset(), pctx->req->get_length(),
+ pctx->req->bl, c);
+ break;
+ case rbd::ggate::Request::Flush:
+ m_image.aio_flush(c);
+ break;
+ case rbd::ggate::Request::Discard:
+ m_image.aio_discard(pctx->req->get_offset(), pctx->req->get_length(), c);
+ break;
+ default:
+ derr << pctx << ": invalid request command: " << pctx->req->get_cmd()
+ << dendl;
+ c->release();
+ std::lock_guard locker{m_lock};
+ m_stopping = true;
+ m_cond.notify_all();
+ return;
+ }
+ }
+ dout(20) << "terminated" << dendl;
+}
+
+void Server::writer_entry() {
+ dout(20) << dendl;
+
+ while (!m_stopping) {
+ dout(20) << "waiting for io request" << dendl;
+
+ std::unique_ptr<IOContext> ctx(wait_io_finish());
+ if (!ctx) {
+ dout(20) << "no io requests, terminating" << dendl;
+ return;
+ }
+
+ dout(20) << ctx.get() << ": got: " << *ctx << dendl;
+
+ int r = m_drv->send(ctx->req);
+ if (r < 0) {
+ derr << ctx.get() << ": send: " << cpp_strerror(r) << dendl;
+ std::lock_guard locker{m_lock};
+ m_stopping = true;
+ m_cond.notify_all();
+ return;
+ }
+ dout(20) << ctx.get() << " finish" << dendl;
+ }
+ dout(20) << "terminated" << dendl;
+}
+
+std::ostream &operator<<(std::ostream &os, const Server::IOContext &ctx) {
+
+ os << "[" << ctx.req->get_id();
+
+ switch (ctx.req->get_cmd())
+ {
+ case rbd::ggate::Request::Write:
+ os << " Write ";
+ break;
+ case rbd::ggate::Request::Read:
+ os << " Read ";
+ break;
+ case rbd::ggate::Request::Flush:
+ os << " Flush ";
+ break;
+ case rbd::ggate::Request::Discard:
+ os << " Discard ";
+ break;
+ default:
+ os << " Unknow(" << ctx.req->get_cmd() << ") ";
+ break;
+ }
+
+ os << ctx.req->get_offset() << "~" << ctx.req->get_length() << " "
+ << ctx.req->get_error() << "]";
+
+ return os;
+}
+
+} // namespace ggate
+} // namespace rbd
+
diff --git a/src/tools/rbd_ggate/Server.h b/src/tools/rbd_ggate/Server.h
new file mode 100644
index 000000000..bb31b89f7
--- /dev/null
+++ b/src/tools/rbd_ggate/Server.h
@@ -0,0 +1,88 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_GGATE_SERVER_H
+#define CEPH_RBD_GGATE_SERVER_H
+
+#include "include/rbd/librbd.hpp"
+#include "include/xlist.h"
+#include "common/ceph_mutex.h"
+#include "common/Thread.h"
+
+namespace rbd {
+namespace ggate {
+
+class Driver;
+struct Request;
+
+class Server {
+public:
+ Server(Driver *drv, librbd::Image& image);
+
+ void run();
+
+private:
+ struct IOContext {
+ xlist<IOContext*>::item item;
+ Server *server;
+ Request *req = nullptr;
+
+ IOContext(Server *server) : item(this), server(server) {
+ }
+ };
+
+ class ThreadHelper : public Thread {
+ public:
+ typedef void (Server::*entry_func)();
+
+ ThreadHelper(Server *server, entry_func func)
+ : server(server), func(func) {
+ }
+
+ protected:
+ virtual void* entry() {
+ (server->*func)();
+ return nullptr;
+ }
+
+ private:
+ Server *server;
+ entry_func func;
+ };
+
+ friend std::ostream &operator<<(std::ostream &os, const IOContext &ctx);
+
+ Driver *m_drv;
+ librbd::Image &m_image;
+
+ mutable ceph::mutex m_lock =
+ ceph::make_mutex("rbd::ggate::Server::m_lock");
+ ceph::condition_variable m_cond;
+ bool m_stopping = false;
+ ThreadHelper m_reader_thread, m_writer_thread;
+ xlist<IOContext*> m_io_pending;
+ xlist<IOContext*> m_io_finished;
+
+ static void aio_callback(librbd::completion_t cb, void *arg);
+
+ int start();
+ void stop();
+
+ void reader_entry();
+ void writer_entry();
+
+ void io_start(IOContext *ctx);
+ void io_finish(IOContext *ctx);
+
+ IOContext *wait_io_finish();
+ void wait_clean();
+
+ void handle_aio(IOContext *ctx, int r);
+};
+
+std::ostream &operator<<(std::ostream &os, const Server::IOContext &ctx);
+
+} // namespace ggate
+} // namespace rbd
+
+#endif // CEPH_RBD_GGATE_SERVER_H
diff --git a/src/tools/rbd_ggate/Watcher.cc b/src/tools/rbd_ggate/Watcher.cc
new file mode 100644
index 000000000..57b3f960e
--- /dev/null
+++ b/src/tools/rbd_ggate/Watcher.cc
@@ -0,0 +1,48 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "common/debug.h"
+#include "common/errno.h"
+#include "Driver.h"
+#include "Watcher.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::ggate::Watcher: " << this \
+ << " " << __func__ << ": "
+
+namespace rbd {
+namespace ggate {
+
+Watcher::Watcher(Driver *drv, librados::IoCtx &ioctx, librbd::Image &image,
+ size_t size)
+ : m_drv(drv), m_ioctx(ioctx), m_image(image), m_size(size) {
+}
+
+void Watcher::handle_notify() {
+ dout(20) << dendl;
+
+ librbd::image_info_t info;
+
+ if (m_image.stat(info, sizeof(info)) == 0) {
+ size_t new_size = info.size;
+
+ if (new_size != m_size) {
+ int r = m_drv->resize(new_size);
+ if (r < 0) {
+ derr << "resize failed: " << cpp_strerror(r) << dendl;
+ m_drv->shut_down();
+ }
+ r = m_image.invalidate_cache();
+ if (r < 0) {
+ derr << "invalidate rbd cache failed: " << cpp_strerror(r) << dendl;
+ m_drv->shut_down();
+ }
+ m_size = new_size;
+ }
+ }
+}
+
+} // namespace ggate
+} // namespace rbd
diff --git a/src/tools/rbd_ggate/Watcher.h b/src/tools/rbd_ggate/Watcher.h
new file mode 100644
index 000000000..8f524b43f
--- /dev/null
+++ b/src/tools/rbd_ggate/Watcher.h
@@ -0,0 +1,34 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_GGATE_WATCHER_H
+#define CEPH_RBD_GGATE_WATCHER_H
+
+#include "include/rbd/librbd.hpp"
+
+namespace rbd {
+namespace ggate {
+
+class Driver;
+
+class Watcher : public librbd::UpdateWatchCtx
+{
+public:
+ Watcher(Driver *m_drv, librados::IoCtx &ioctx, librbd::Image &image,
+ size_t size);
+
+ void handle_notify() override;
+
+private:
+ Driver *m_drv;
+ librados::IoCtx &m_ioctx;
+ librbd::Image &m_image;
+ size_t m_size;
+};
+
+
+} // namespace ggate
+} // namespace rbd
+
+#endif // CEPH_RBD_GGATE_WATCHER_H
+
diff --git a/src/tools/rbd_ggate/debug.cc b/src/tools/rbd_ggate/debug.cc
new file mode 100644
index 000000000..b675ba5b3
--- /dev/null
+++ b/src/tools/rbd_ggate/debug.cc
@@ -0,0 +1,55 @@
+#include "common/debug.h"
+#include "common/errno.h"
+#include "debug.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::ggate: "
+
+extern "C" void debugv(int level, const char *fmt, va_list ap) {
+ char *msg;
+ int saved_errno = errno;
+
+ if (g_ceph_context == nullptr) {
+ return;
+ }
+
+ vasprintf(&msg, fmt, ap);
+
+ dout(ceph::dout::need_dynamic(level)) << msg << dendl;
+
+ free(msg);
+ errno = saved_errno;
+}
+
+extern "C" void debug(int level, const char *fmt, ...) {
+ va_list ap;
+
+ va_start(ap, fmt);
+ debugv(level, fmt, ap);
+ va_end(ap);
+}
+
+extern "C" void errx(const char *fmt, ...) {
+ va_list ap;
+
+ va_start(ap, fmt);
+ debugv(-1, fmt, ap);
+ va_end(ap);
+}
+
+extern "C" void err(const char *fmt, ...) {
+ va_list ap;
+ char *msg;
+ int saved_errno = errno;
+
+ va_start(ap, fmt);
+ vasprintf(&msg, fmt, ap);
+ va_end(ap);
+ errno = saved_errno;
+
+ errx("%s: %s", msg, cpp_strerror(errno).c_str());
+
+ free(msg);
+}
diff --git a/src/tools/rbd_ggate/debug.h b/src/tools/rbd_ggate/debug.h
new file mode 100644
index 000000000..da9b46a38
--- /dev/null
+++ b/src/tools/rbd_ggate/debug.h
@@ -0,0 +1,17 @@
+#ifndef CEPH_RBD_GGATE_DEBUG_H
+#define CEPH_RBD_GGATE_DEBUG_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void debug(int level, const char *fmt, ...) __printflike(2, 3);
+void debugv(int level, const char *fmt, va_list ap) __printflike(2, 0);
+void err(const char *fmt, ...) __printflike(1, 2);
+void errx(const char *fmt, ...) __printflike(1, 2);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // CEPH_RBD_GGATE_DEBUG_H
diff --git a/src/tools/rbd_ggate/ggate_drv.c b/src/tools/rbd_ggate/ggate_drv.c
new file mode 100644
index 000000000..11f6cf0a4
--- /dev/null
+++ b/src/tools/rbd_ggate/ggate_drv.c
@@ -0,0 +1,379 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <sys/param.h>
+#include <sys/bio.h>
+#include <sys/disk.h>
+#include <sys/linker.h>
+#include <sys/queue.h>
+#include <sys/stat.h>
+
+#include <geom/gate/g_gate.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <libgeom.h>
+
+#include "debug.h"
+#include "ggate_drv.h"
+
+uint64_t ggate_drv_req_id(ggate_drv_req_t req) {
+ struct g_gate_ctl_io *ggio = (struct g_gate_ctl_io *)req;
+
+ return ggio->gctl_seq;
+}
+
+int ggate_drv_req_cmd(ggate_drv_req_t req) {
+ struct g_gate_ctl_io *ggio = (struct g_gate_ctl_io *)req;
+
+ switch (ggio->gctl_cmd) {
+ case BIO_WRITE:
+ return GGATE_DRV_CMD_WRITE;
+ case BIO_READ:
+ return GGATE_DRV_CMD_READ;
+ case BIO_FLUSH:
+ return GGATE_DRV_CMD_FLUSH;
+ case BIO_DELETE:
+ return GGATE_DRV_CMD_DISCARD;
+ default:
+ return GGATE_DRV_CMD_UNKNOWN;
+ }
+}
+
+uint64_t ggate_drv_req_offset(ggate_drv_req_t req) {
+ struct g_gate_ctl_io *ggio = (struct g_gate_ctl_io *)req;
+
+ return ggio->gctl_offset;
+}
+
+size_t ggate_drv_req_length(ggate_drv_req_t req) {
+ struct g_gate_ctl_io *ggio = (struct g_gate_ctl_io *)req;
+
+ return ggio->gctl_length;
+}
+
+void *ggate_drv_req_buf(ggate_drv_req_t req) {
+ struct g_gate_ctl_io *ggio = (struct g_gate_ctl_io *)req;
+
+ return ggio->gctl_data;
+}
+
+int ggate_drv_req_error(ggate_drv_req_t req) {
+ struct g_gate_ctl_io *ggio = (struct g_gate_ctl_io *)req;
+
+ return ggio->gctl_error;
+}
+
+void ggate_drv_req_set_error(ggate_drv_req_t req, int error) {
+ struct g_gate_ctl_io *ggio = (struct g_gate_ctl_io *)req;
+
+ ggio->gctl_error = error;
+}
+
+void *ggate_drv_req_release_buf(ggate_drv_req_t req) {
+ struct g_gate_ctl_io *ggio = (struct g_gate_ctl_io *)req;
+
+ void *data = ggio->gctl_data;
+ ggio->gctl_data = NULL;
+
+ return data;
+}
+
+struct ggate_drv {
+ int fd;
+ int unit;
+};
+
+int ggate_drv_load() {
+ if (modfind("g_gate") != -1) {
+ /* Present in kernel. */
+ return 0;
+ }
+
+ if (kldload("geom_gate") == -1 || modfind("g_gate") == -1) {
+ if (errno != EEXIST) {
+ err("failed to load geom_gate module");
+ return -errno;
+ }
+ }
+ return 0;
+}
+
+int ggate_drv_create(char *name, size_t namelen, size_t sectorsize,
+ size_t mediasize, bool readonly, const char *info, ggate_drv_t *drv_) {
+ struct ggate_drv *drv;
+ struct g_gate_ctl_create ggiocreate;
+
+ debug(20, "%s: name=%s, sectorsize=%zd, mediasize=%zd, readonly=%d, info=%s",
+ __func__, name, sectorsize, mediasize, (int)readonly, info);
+
+ if (*name != '\0') {
+ if (namelen > sizeof(ggiocreate.gctl_name) - 1) {
+ return -ENAMETOOLONG;
+ }
+ }
+
+ /*
+ * We communicate with ggate via /dev/ggctl. Open it.
+ */
+ int fd = open("/dev/" G_GATE_CTL_NAME, O_RDWR);
+ if (fd == -1) {
+ err("failed to open /dev/" G_GATE_CTL_NAME);
+ return -errno;
+ }
+
+ drv = calloc(1, sizeof(*drv));
+ if (drv == NULL) {
+ errno = -ENOMEM;
+ goto fail_close;
+ }
+
+ /*
+ * Create provider.
+ */
+ memset(&ggiocreate, 0, sizeof(ggiocreate));
+ ggiocreate.gctl_version = G_GATE_VERSION;
+ ggiocreate.gctl_mediasize = mediasize;
+ ggiocreate.gctl_sectorsize = sectorsize;
+ ggiocreate.gctl_flags = readonly ? G_GATE_FLAG_READONLY : 0;
+ ggiocreate.gctl_maxcount = 0;
+ ggiocreate.gctl_timeout = 0;
+ if (*name != '\0') {
+ ggiocreate.gctl_unit = G_GATE_NAME_GIVEN;
+ strlcpy(ggiocreate.gctl_name, name, sizeof(ggiocreate.gctl_name));
+ } else {
+ ggiocreate.gctl_unit = G_GATE_UNIT_AUTO;
+ }
+ strlcpy(ggiocreate.gctl_info, info, sizeof(ggiocreate.gctl_info));
+ if (ioctl(fd, G_GATE_CMD_CREATE, &ggiocreate) == -1) {
+ err("failed to create " G_GATE_PROVIDER_NAME " device");
+ goto fail;
+ }
+
+ debug(20, "%s: created, unit: %d, name: %s", __func__, ggiocreate.gctl_unit,
+ ggiocreate.gctl_name);
+
+ drv->fd = fd;
+ drv->unit = ggiocreate.gctl_unit;
+ *drv_ = drv;
+
+ if (*name == '\0') {
+ snprintf(name, namelen, "%s%d", G_GATE_PROVIDER_NAME, drv->unit);
+ }
+
+ return 0;
+
+fail:
+ free(drv);
+fail_close:
+ close(fd);
+ return -errno;
+}
+
+void ggate_drv_destroy(ggate_drv_t drv_) {
+ struct ggate_drv *drv = (struct ggate_drv *)drv_;
+ struct g_gate_ctl_destroy ggiodestroy;
+
+ debug(20, "%s %p", __func__, drv);
+
+ memset(&ggiodestroy, 0, sizeof(ggiodestroy));
+ ggiodestroy.gctl_version = G_GATE_VERSION;
+ ggiodestroy.gctl_unit = drv->unit;
+ ggiodestroy.gctl_force = 1;
+
+ // Remember errno.
+ int rerrno = errno;
+
+ int r = ioctl(drv->fd, G_GATE_CMD_DESTROY, &ggiodestroy);
+ if (r == -1) {
+ err("failed to destroy /dev/%s%d device", G_GATE_PROVIDER_NAME,
+ drv->unit);
+ }
+ // Restore errno.
+ errno = rerrno;
+
+ free(drv);
+}
+
+int ggate_drv_resize(ggate_drv_t drv_, size_t newsize) {
+ struct ggate_drv *drv = (struct ggate_drv *)drv_;
+
+ debug(20, "%s %p: newsize=%zd", __func__, drv, newsize);
+
+ struct g_gate_ctl_modify ggiomodify;
+
+ memset(&ggiomodify, 0, sizeof(ggiomodify));
+ ggiomodify.gctl_version = G_GATE_VERSION;
+ ggiomodify.gctl_unit = drv->unit;
+ ggiomodify.gctl_modify = GG_MODIFY_MEDIASIZE;
+ ggiomodify.gctl_mediasize = newsize;
+
+ int r = ioctl(drv->fd, G_GATE_CMD_MODIFY, &ggiomodify);
+ if (r == -1) {
+ r = -errno;
+ err("failed to resize /dev/%s%d device", G_GATE_PROVIDER_NAME, drv->unit);
+ }
+ return r;
+}
+
+int ggate_drv_kill(const char *devname) {
+ debug(20, "%s %s", __func__, devname);
+
+ int fd = open("/dev/" G_GATE_CTL_NAME, O_RDWR);
+ if (fd == -1) {
+ err("failed to open /dev/" G_GATE_CTL_NAME);
+ return -errno;
+ }
+
+ struct g_gate_ctl_destroy ggiodestroy;
+ memset(&ggiodestroy, 0, sizeof(ggiodestroy));
+ ggiodestroy.gctl_version = G_GATE_VERSION;
+ ggiodestroy.gctl_unit = G_GATE_NAME_GIVEN;
+ ggiodestroy.gctl_force = 1;
+
+ strlcpy(ggiodestroy.gctl_name, devname, sizeof(ggiodestroy.gctl_name));
+
+ int r = ioctl(fd, G_GATE_CMD_DESTROY, &ggiodestroy);
+ if (r == -1) {
+ r = -errno;
+ err("failed to destroy %s device", devname);
+ }
+
+ close(fd);
+ return r;
+}
+
+int ggate_drv_recv(ggate_drv_t drv_, ggate_drv_req_t *req) {
+ struct ggate_drv *drv = (struct ggate_drv *)drv_;
+ struct g_gate_ctl_io *ggio;
+ int error, r;
+
+ debug(20, "%s", __func__);
+
+ ggio = calloc(1, sizeof(*ggio));
+ if (ggio == NULL) {
+ return -ENOMEM;
+ }
+
+ ggio->gctl_version = G_GATE_VERSION;
+ ggio->gctl_unit = drv->unit;
+ ggio->gctl_data = malloc(MAXPHYS);
+ ggio->gctl_length = MAXPHYS;
+
+ debug(20, "%s: waiting for request from kernel", __func__);
+ if (ioctl(drv->fd, G_GATE_CMD_START, ggio) == -1) {
+ err("%s: G_GATE_CMD_START failed", __func__);
+ return -errno;
+ }
+
+ debug(20, "%s: got request from kernel: "
+ "unit=%d, seq=%ju, cmd=%u, offset=%ju, length=%ju, error=%d, data=%p",
+ __func__, ggio->gctl_unit, (uintmax_t)ggio->gctl_seq, ggio->gctl_cmd,
+ (uintmax_t)ggio->gctl_offset, (uintmax_t)ggio->gctl_length,
+ ggio->gctl_error, ggio->gctl_data);
+
+ error = ggio->gctl_error;
+ switch (error) {
+ case 0:
+ break;
+ case ECANCELED:
+ debug(10, "%s: canceled: exit gracefully", __func__);
+ r = -error;
+ goto fail;
+ case ENOMEM:
+ /*
+ * Buffer too small? Impossible, we allocate MAXPHYS
+ * bytes - request can't be bigger than that.
+ */
+ /* FALLTHROUGH */
+ case ENXIO:
+ default:
+ errno = error;
+ err("%s: G_GATE_CMD_START failed", __func__);
+ r = -error;
+ goto fail;
+ }
+
+ *req = ggio;
+ return 0;
+
+fail:
+ free(ggio->gctl_data);
+ free(ggio);
+ return r;
+}
+
+int ggate_drv_send(ggate_drv_t drv_, ggate_drv_req_t req) {
+ struct ggate_drv *drv = (struct ggate_drv *)drv_;
+ struct g_gate_ctl_io *ggio = (struct g_gate_ctl_io *)req;
+ int r = 0;
+
+ debug(20, "%s: send request to kernel: "
+ "unit=%d, seq=%ju, cmd=%u, offset=%ju, length=%ju, error=%d, data=%p",
+ __func__, ggio->gctl_unit, (uintmax_t)ggio->gctl_seq, ggio->gctl_cmd,
+ (uintmax_t)ggio->gctl_offset, (uintmax_t)ggio->gctl_length,
+ ggio->gctl_error, ggio->gctl_data);
+
+ if (ioctl(drv->fd, G_GATE_CMD_DONE, ggio) == -1) {
+ err("%s: G_GATE_CMD_DONE failed", __func__);
+ r = -errno;
+ }
+
+ free(ggio->gctl_data);
+ free(ggio);
+ return r;
+}
+
+static const char * get_conf(struct ggeom *gp, const char *name) {
+ struct gconfig *conf;
+
+ LIST_FOREACH(conf, &gp->lg_config, lg_config) {
+ if (strcmp(conf->lg_name, name) == 0)
+ return (conf->lg_val);
+ }
+ return "";
+}
+
+int ggate_drv_list(struct ggate_drv_info *info, size_t *size) {
+ struct gmesh mesh;
+ struct gclass *class;
+ struct ggeom *gp;
+ int r;
+ size_t max_size;
+
+ r = geom_gettree(&mesh);
+ if (r != 0) {
+ return -errno;
+ }
+
+ max_size = *size;
+ *size = 0;
+
+ LIST_FOREACH(class, &mesh.lg_class, lg_class) {
+ if (strcmp(class->lg_name, G_GATE_CLASS_NAME) == 0) {
+ LIST_FOREACH(gp, &class->lg_geom, lg_geom) {
+ (*size)++;
+ }
+ if (*size > max_size) {
+ r = -ERANGE;
+ goto done;
+ }
+ LIST_FOREACH(gp, &class->lg_geom, lg_geom) {
+ strlcpy(info->id, get_conf(gp, "unit"), sizeof(info->id));
+ strlcpy(info->name, gp->lg_name, sizeof(info->name));
+ strlcpy(info->info, get_conf(gp, "info"), sizeof(info->info));
+ info++;
+ }
+ }
+ }
+
+done:
+ geom_deletetree(&mesh);
+ return r;
+}
diff --git a/src/tools/rbd_ggate/ggate_drv.h b/src/tools/rbd_ggate/ggate_drv.h
new file mode 100644
index 000000000..69268ebd4
--- /dev/null
+++ b/src/tools/rbd_ggate/ggate_drv.h
@@ -0,0 +1,64 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_GGATE_GGATE_DRV_H
+#define CEPH_RBD_GGATE_GGATE_DRV_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/param.h>
+
+#include <stdbool.h>
+#include <stdint.h>
+
+typedef void *ggate_drv_t;
+typedef void *ggate_drv_req_t;
+
+/*
+ * GGATE driver commands. They are mapped to GgateReq::Command.
+ */
+enum {
+ GGATE_DRV_CMD_UNKNOWN = 0,
+ GGATE_DRV_CMD_WRITE = 1,
+ GGATE_DRV_CMD_READ = 2,
+ GGATE_DRV_CMD_FLUSH = 3,
+ GGATE_DRV_CMD_DISCARD = 4,
+};
+
+struct ggate_drv_info {
+ char id[16];
+ char name[NAME_MAX];
+ char info[2048]; /* G_GATE_INFOSIZE */
+};
+
+uint64_t ggate_drv_req_id(ggate_drv_req_t req);
+int ggate_drv_req_cmd(ggate_drv_req_t req);
+void *ggate_drv_req_buf(ggate_drv_req_t req);
+size_t ggate_drv_req_length(ggate_drv_req_t req);
+uint64_t ggate_drv_req_offset(ggate_drv_req_t req);
+int ggate_drv_req_error(ggate_drv_req_t req);
+
+void ggate_drv_req_set_error(ggate_drv_req_t req, int error);
+void *ggate_drv_req_release_buf(ggate_drv_req_t req);
+
+int ggate_drv_load();
+
+int ggate_drv_create(char *name, size_t namelen, size_t sectorsize,
+ size_t mediasize, bool readonly, const char *info, ggate_drv_t *drv);
+void ggate_drv_destroy(ggate_drv_t drv);
+
+int ggate_drv_recv(ggate_drv_t drv, ggate_drv_req_t *req);
+int ggate_drv_send(ggate_drv_t drv, ggate_drv_req_t req);
+
+int ggate_drv_resize(ggate_drv_t drv, size_t newsize);
+
+int ggate_drv_kill(const char *devname);
+int ggate_drv_list(struct ggate_drv_info *info, size_t *size);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // CEPH_RBD_GGATE_GGATE_DRV_H
diff --git a/src/tools/rbd_ggate/main.cc b/src/tools/rbd_ggate/main.cc
new file mode 100644
index 000000000..0942f5689
--- /dev/null
+++ b/src/tools/rbd_ggate/main.cc
@@ -0,0 +1,516 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "include/int_types.h"
+
+#include <sys/types.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <errno.h>
+#include <string.h>
+#include <assert.h>
+
+#include <iostream>
+#include <memory>
+#include <boost/algorithm/string/predicate.hpp>
+#include <regex>
+
+#include "common/Formatter.h"
+#include "common/Preforker.h"
+#include "common/TextTable.h"
+#include "common/ceph_argparse.h"
+#include "common/config_proxy.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "global/global_init.h"
+#include "global/signal_handler.h"
+
+#include "include/rados/librados.hpp"
+#include "include/rbd/librbd.hpp"
+#include "include/stringify.h"
+
+#include "Driver.h"
+#include "Server.h"
+#include "Watcher.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd-ggate: " << __func__ << ": "
+
+static void usage() {
+ std::cout << "Usage: rbd-ggate [options] map <image-or-snap-spec> Map an image to ggate device\n"
+ << " unmap <device path> Unmap ggate device\n"
+ << " list List mapped ggate devices\n"
+ << "\n"
+ << "Map options:\n"
+ << " --device <device path> Specify ggate device path\n"
+ << " --read-only Map readonly\n"
+ << " --exclusive Forbid writes by other clients\n"
+ << "\n"
+ << "List options:\n"
+ << " --format plain|json|xml Output format (default: plain)\n"
+ << " --pretty-format Pretty formatting (json and xml)\n"
+ << std::endl;
+ generic_server_usage();
+}
+
+static std::string devpath, poolname, nsname, imgname, snapname;
+static bool readonly = false;
+static bool exclusive = false;
+
+static std::unique_ptr<rbd::ggate::Driver> drv;
+
+static void handle_signal(int signum)
+{
+ derr << "*** Got signal " << sig_str(signum) << " ***" << dendl;
+
+ ceph_assert(signum == SIGINT || signum == SIGTERM);
+ ceph_assert(drv);
+
+ drv->shut_down();
+}
+
+static int do_map(int argc, const char *argv[])
+{
+ int r;
+
+ librados::Rados rados;
+ librbd::RBD rbd;
+ librados::IoCtx io_ctx;
+ librbd::Image image;
+
+ librbd::image_info_t info;
+ std::string desc;
+
+ Preforker forker;
+
+ auto args = argv_to_vec(argc, argv);
+ auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT,
+ CODE_ENVIRONMENT_DAEMON,
+ CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS);
+ g_ceph_context->_conf.set_val_or_die("pid_file", "");
+
+ if (global_init_prefork(g_ceph_context) >= 0) {
+ std::string err;
+ r = forker.prefork(err);
+ if (r < 0) {
+ std::cerr << err << std::endl;
+ return r;
+ }
+ if (forker.is_parent()) {
+ if (forker.parent_wait(err) != 0) {
+ return -ENXIO;
+ }
+ return 0;
+ }
+ global_init_postfork_start(g_ceph_context);
+ }
+
+ common_init_finish(g_ceph_context);
+ global_init_chdir(g_ceph_context);
+
+ if (poolname.empty()) {
+ poolname = g_ceph_context->_conf.get_val<std::string>("rbd_default_pool");
+ }
+
+ std::string devname = boost::starts_with(devpath, "/dev/") ?
+ devpath.substr(5) : devpath;
+ std::unique_ptr<rbd::ggate::Watcher> watcher;
+ uint64_t handle;
+
+ r = rados.init_with_context(g_ceph_context);
+ if (r < 0) {
+ goto done;
+ }
+
+ r = rados.connect();
+ if (r < 0) {
+ std::cerr << "rbd-ggate: failed to connect to cluster: " << cpp_strerror(r)
+ << std::endl;
+ goto done;
+ }
+
+ r = rados.ioctx_create(poolname.c_str(), io_ctx);
+ if (r < 0) {
+ std::cerr << "rbd-ggate: failed to acces pool " << poolname << ": "
+ << cpp_strerror(r) << std::endl;
+ goto done;
+ }
+
+ io_ctx.set_namespace(nsname);
+
+ r = rbd.open(io_ctx, image, imgname.c_str());
+ if (r < 0) {
+ std::cerr << "rbd-ggate: failed to open image " << imgname << ": "
+ << cpp_strerror(r) << std::endl;
+ goto done;
+ }
+
+ if (exclusive) {
+ r = image.lock_acquire(RBD_LOCK_MODE_EXCLUSIVE);
+ if (r < 0) {
+ std::cerr << "rbd-ggate: failed to acquire exclusive lock: "
+ << cpp_strerror(r) << std::endl;
+ goto done;
+ }
+ }
+
+ desc = "RBD " + poolname + "/" + (nsname.empty() ? "" : nsname + "/") +
+ imgname;
+
+ if (!snapname.empty()) {
+ r = image.snap_set(snapname.c_str());
+ if (r < 0) {
+ std::cerr << "rbd-ggate: failed to set snapshot " << snapname << ": "
+ << cpp_strerror(r) << std::endl;
+ goto done;
+ }
+ readonly = true;
+ desc += "@" + snapname;
+ }
+
+ r = image.stat(info, sizeof(info));
+ if (r < 0) {
+ std::cerr << "rbd-ggate: image stat failed: " << cpp_strerror(r)
+ << std::endl;
+ goto done;
+ }
+
+ rbd::ggate::Driver::load();
+ drv.reset(new rbd::ggate::Driver(devname, 512, info.size, readonly, desc));
+ r = drv->init();
+ if (r < 0) {
+ r = -errno;
+ std::cerr << "rbd-ggate: failed to create ggate device: " << cpp_strerror(r)
+ << std::endl;
+ goto done;
+ }
+
+ watcher.reset(new rbd::ggate::Watcher(drv.get(), io_ctx, image, info.size));
+ r = image.update_watch(watcher.get(), &handle);
+ if (r < 0) {
+ std::cerr << "rbd-ggate: failed to set watcher: " << cpp_strerror(r)
+ << std::endl;
+ drv->shut_down();
+ goto done;
+ }
+
+ std::cout << "/dev/" << drv->get_devname() << std::endl;
+
+ if (g_conf()->daemonize) {
+ global_init_postfork_finish(g_ceph_context);
+ forker.daemonize();
+ }
+
+ init_async_signal_handler();
+ register_async_signal_handler(SIGHUP, sighup_handler);
+ register_async_signal_handler_oneshot(SIGINT, handle_signal);
+ register_async_signal_handler_oneshot(SIGTERM, handle_signal);
+
+ rbd::ggate::Server(drv.get(), image).run();
+
+ unregister_async_signal_handler(SIGHUP, sighup_handler);
+ unregister_async_signal_handler(SIGINT, handle_signal);
+ unregister_async_signal_handler(SIGTERM, handle_signal);
+ shutdown_async_signal_handler();
+
+ r = image.update_unwatch(handle);
+ ceph_assert(r == 0);
+
+done:
+ image.close();
+ io_ctx.close();
+ rados.shutdown();
+
+ if (r < 0) {
+ std::cerr << "rbd-ggate: failed to map: " << cpp_strerror(r) << std::endl;
+ }
+
+ forker.exit(r < 0 ? EXIT_FAILURE : 0);
+ // Unreachable;
+ return r;
+}
+
+static int do_unmap()
+{
+ std::string devname = boost::starts_with(devpath, "/dev/") ?
+ devpath.substr(5) : devpath;
+
+ int r = rbd::ggate::Driver::kill(devname);
+ if (r < 0) {
+ cerr << "rbd-ggate: failed to destroy " << devname << ": "
+ << cpp_strerror(r) << std::endl;
+ return r;
+ }
+
+ return 0;
+}
+
+static int parse_imgpath(const std::string &imgpath, std::string *poolname,
+ std::string *nsname, std::string *imgname,
+ std::string *snapname) {
+ std::regex pattern("^(?:([^/]+)/(?:([^/@]+)/)?)?([^@]+)(?:@([^/@]+))?$");
+ std::smatch match;
+ if (!std::regex_match(imgpath, match, pattern)) {
+ std::cerr << "rbd-ggate: invalid spec '" << imgpath << "'" << std::endl;
+ return -EINVAL;
+ }
+
+ if (match[1].matched) {
+ *poolname = match[1];
+ }
+
+ if (match[2].matched) {
+ *nsname = match[2];
+ }
+
+ *imgname = match[3];
+
+ if (match[4].matched) {
+ *snapname = match[4];
+ }
+
+ return 0;
+}
+
+static bool find_mapped_dev_by_spec(const std::string &spec,
+ std::string *devname) {
+ std::string poolname, nsname, imgname, snapname;
+ int r = parse_imgpath(spec, &poolname, &nsname, &imgname, &snapname);
+ if (r < 0) {
+ return false;
+ }
+ if (poolname.empty()) {
+ // We could use rbd_default_pool config to set pool name but then
+ // we would need to initialize the global context. So right now it
+ // is mandatory for the user to specify a pool. Fortunately the
+ // preferred way for users to call rbd-ggate is via rbd, which
+ // cares to set the pool name.
+ return false;
+ }
+
+ std::map<std::string, rbd::ggate::Driver::DevInfo> devs;
+ r = rbd::ggate::Driver::list(&devs);
+ if (r < 0) {
+ return false;
+ }
+
+ for (auto &it : devs) {
+ auto &name = it.second.first;
+ auto &info = it.second.second;
+ if (!boost::starts_with(info, "RBD ")) {
+ continue;
+ }
+
+ std::string p, n, i, s;
+ parse_imgpath(info.substr(4), &p, &n, &i, &s);
+ if (p == poolname && n == nsname && i == imgname && s == snapname) {
+ *devname = name;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static int do_list(const std::string &format, bool pretty_format)
+{
+ rbd::ggate::Driver::load();
+
+ std::map<std::string, rbd::ggate::Driver::DevInfo> devs;
+ int r = rbd::ggate::Driver::list(&devs);
+ if (r < 0) {
+ return -r;
+ }
+
+ std::unique_ptr<ceph::Formatter> f;
+ TextTable tbl;
+
+ if (format == "json") {
+ f.reset(new JSONFormatter(pretty_format));
+ } else if (format == "xml") {
+ f.reset(new XMLFormatter(pretty_format));
+ } else if (!format.empty() && format != "plain") {
+ std::cerr << "rbd-ggate: invalid output format: " << format << std::endl;
+ return -EINVAL;
+ }
+
+ if (f) {
+ f->open_array_section("devices");
+ } else {
+ tbl.define_column("id", TextTable::LEFT, TextTable::LEFT);
+ tbl.define_column("pool", TextTable::LEFT, TextTable::LEFT);
+ tbl.define_column("namespace", TextTable::LEFT, TextTable::LEFT);
+ tbl.define_column("image", TextTable::LEFT, TextTable::LEFT);
+ tbl.define_column("snap", TextTable::LEFT, TextTable::LEFT);
+ tbl.define_column("device", TextTable::LEFT, TextTable::LEFT);
+ }
+
+ int count = 0;
+
+ for (auto &it : devs) {
+ auto &id = it.first;
+ auto &name = it.second.first;
+ auto &info = it.second.second;
+ if (!boost::starts_with(info, "RBD ")) {
+ continue;
+ }
+
+ std::string poolname;
+ std::string nsname;
+ std::string imgname;
+ std::string snapname(f ? "" : "-");
+ parse_imgpath(info.substr(4), &poolname, &nsname, &imgname, &snapname);
+
+ if (f) {
+ f->open_object_section("device");
+ f->dump_string("id", id);
+ f->dump_string("pool", poolname);
+ f->dump_string("namespace", nsname);
+ f->dump_string("image", imgname);
+ f->dump_string("snap", snapname);
+ f->dump_string("device", "/dev/" + name);
+ f->close_section();
+ } else {
+ tbl << id << poolname << nsname << imgname << snapname << "/dev/" + name
+ << TextTable::endrow;
+ }
+ count++;
+ }
+
+ if (f) {
+ f->close_section(); // devices
+ f->flush(std::cout);
+ } else if (count > 0) {
+ std::cout << tbl;
+ }
+
+ return 0;
+}
+
+int main(int argc, const char *argv[]) {
+ int r;
+ enum {
+ None,
+ Connect,
+ Disconnect,
+ List
+ } cmd = None;
+
+ auto args = argv_to_vec(argc, argv);
+ if (args.empty()) {
+ cerr << argv[0] << ": -h or --help for usage" << std::endl;
+ exit(1);
+ }
+ if (ceph_argparse_need_usage(args)) {
+ usage();
+ exit(0);
+ }
+ // filter out ceph config options
+ ConfigProxy{false}.parse_argv(args);
+
+ std::string format;
+ bool pretty_format = false;
+
+ for (auto i = args.begin(); i != args.end(); ) {
+ if (ceph_argparse_flag(args, i, "-h", "--help", (char*)NULL)) {
+ usage();
+ return 0;
+ } else if (ceph_argparse_witharg(args, i, &devpath, "--device",
+ (char *)NULL)) {
+ } else if (ceph_argparse_flag(args, i, "--read-only", (char *)NULL)) {
+ readonly = true;
+ } else if (ceph_argparse_flag(args, i, "--exclusive", (char *)NULL)) {
+ exclusive = true;
+ } else if (ceph_argparse_witharg(args, i, &format, "--format",
+ (char *)NULL)) {
+ } else if (ceph_argparse_flag(args, i, "--pretty-format", (char *)NULL)) {
+ pretty_format = true;
+ } else {
+ ++i;
+ }
+ }
+
+ if (args.begin() != args.end()) {
+ if (strcmp(*args.begin(), "map") == 0) {
+ cmd = Connect;
+ } else if (strcmp(*args.begin(), "unmap") == 0) {
+ cmd = Disconnect;
+ } else if (strcmp(*args.begin(), "list") == 0) {
+ cmd = List;
+ } else {
+ cerr << "rbd-ggate: unknown command: " << *args.begin() << std::endl;
+ return EXIT_FAILURE;
+ }
+ args.erase(args.begin());
+ }
+
+ if (cmd == None) {
+ cerr << "rbd-ggate: must specify command" << std::endl;
+ return EXIT_FAILURE;
+ }
+
+ switch (cmd) {
+ case Connect:
+ if (args.begin() == args.end()) {
+ cerr << "rbd-ggate: must specify image-or-snap-spec" << std::endl;
+ return EXIT_FAILURE;
+ }
+ if (parse_imgpath(*args.begin(), &poolname, &nsname, &imgname,
+ &snapname) < 0) {
+ return EXIT_FAILURE;
+ }
+ args.erase(args.begin());
+ break;
+ case Disconnect:
+ if (args.begin() == args.end()) {
+ std::cerr << "rbd-ggate: must specify ggate device or image-or-snap-spec"
+ << std::endl;
+ return EXIT_FAILURE;
+ }
+ if (boost::starts_with(*args.begin(), "/dev/") ||
+ !find_mapped_dev_by_spec(*args.begin(), &devpath)) {
+ devpath = *args.begin();
+ }
+ args.erase(args.begin());
+ break;
+ default:
+ break;
+ }
+
+ if (args.begin() != args.end()) {
+ cerr << "rbd-ggate: unknown args: " << *args.begin() << std::endl;
+ return EXIT_FAILURE;
+ }
+
+ switch (cmd) {
+ case Connect:
+ if (imgname.empty()) {
+ cerr << "rbd-ggate: image name was not specified" << std::endl;
+ return EXIT_FAILURE;
+ }
+
+ r = do_map(argc, argv);
+ if (r < 0)
+ return EXIT_FAILURE;
+ break;
+ case Disconnect:
+ r = do_unmap();
+ if (r < 0)
+ return EXIT_FAILURE;
+ break;
+ case List:
+ r = do_list(format, pretty_format);
+ if (r < 0)
+ return EXIT_FAILURE;
+ break;
+ default:
+ usage();
+ return EXIT_FAILURE;
+ }
+
+ return 0;
+}
diff --git a/src/tools/rbd_mirror/BaseRequest.h b/src/tools/rbd_mirror/BaseRequest.h
new file mode 100644
index 000000000..0da98651d
--- /dev/null
+++ b/src/tools/rbd_mirror/BaseRequest.h
@@ -0,0 +1,33 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_BASE_REQUEST_H
+#define CEPH_RBD_MIRROR_BASE_REQUEST_H
+
+#include "include/Context.h"
+
+namespace rbd {
+namespace mirror {
+
+class BaseRequest {
+public:
+ BaseRequest(Context *on_finish) : m_on_finish(on_finish) {
+ }
+ virtual ~BaseRequest() {}
+
+ virtual void send() = 0;
+
+protected:
+ virtual void finish(int r) {
+ m_on_finish->complete(r);
+ delete this;
+ }
+
+private:
+ Context *m_on_finish;
+};
+
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_BASE_REQUEST_H
diff --git a/src/tools/rbd_mirror/CMakeLists.txt b/src/tools/rbd_mirror/CMakeLists.txt
new file mode 100644
index 000000000..43a6f03fe
--- /dev/null
+++ b/src/tools/rbd_mirror/CMakeLists.txt
@@ -0,0 +1,91 @@
+add_library(rbd_mirror_types STATIC
+ image_map/Types.cc
+ instance_watcher/Types.cc
+ leader_watcher/Types.cc)
+
+set(rbd_mirror_internal
+ ClusterWatcher.cc
+ ImageDeleter.cc
+ ImageMap.cc
+ ImageReplayer.cc
+ ImageSync.cc
+ InstanceReplayer.cc
+ InstanceWatcher.cc
+ Instances.cc
+ LeaderWatcher.cc
+ Mirror.cc
+ MirrorStatusUpdater.cc
+ MirrorStatusWatcher.cc
+ NamespaceReplayer.cc
+ PoolMetaCache.cc
+ PoolReplayer.cc
+ PoolWatcher.cc
+ RemotePoolPoller.cc
+ ServiceDaemon.cc
+ Threads.cc
+ Throttler.cc
+ Types.cc
+ image_deleter/SnapshotPurgeRequest.cc
+ image_deleter/TrashMoveRequest.cc
+ image_deleter/TrashRemoveRequest.cc
+ image_deleter/TrashWatcher.cc
+ image_map/LoadRequest.cc
+ image_map/Policy.cc
+ image_map/SimplePolicy.cc
+ image_map/StateTransition.cc
+ image_map/UpdateRequest.cc
+ image_replayer/BootstrapRequest.cc
+ image_replayer/CloseImageRequest.cc
+ image_replayer/CreateImageRequest.cc
+ image_replayer/GetMirrorImageIdRequest.cc
+ image_replayer/OpenImageRequest.cc
+ image_replayer/OpenLocalImageRequest.cc
+ image_replayer/PrepareLocalImageRequest.cc
+ image_replayer/PrepareRemoteImageRequest.cc
+ image_replayer/StateBuilder.cc
+ image_replayer/TimeRollingMean.cc
+ image_replayer/Utils.cc
+ image_replayer/journal/CreateLocalImageRequest.cc
+ image_replayer/journal/EventPreprocessor.cc
+ image_replayer/journal/PrepareReplayRequest.cc
+ image_replayer/journal/Replayer.cc
+ image_replayer/journal/ReplayStatusFormatter.cc
+ image_replayer/journal/StateBuilder.cc
+ image_replayer/journal/SyncPointHandler.cc
+ image_replayer/snapshot/ApplyImageStateRequest.cc
+ image_replayer/snapshot/CreateLocalImageRequest.cc
+ image_replayer/snapshot/PrepareReplayRequest.cc
+ image_replayer/snapshot/Replayer.cc
+ image_replayer/snapshot/StateBuilder.cc
+ image_replayer/snapshot/Utils.cc
+ image_sync/SyncPointCreateRequest.cc
+ image_sync/SyncPointPruneRequest.cc
+ image_sync/Utils.cc
+ pool_watcher/RefreshImagesRequest.cc
+ service_daemon/Types.cc)
+
+add_library(rbd_mirror_internal STATIC
+ ${rbd_mirror_internal}
+ $<TARGET_OBJECTS:common_prioritycache_obj>)
+
+add_executable(rbd-mirror
+ main.cc)
+target_link_libraries(rbd-mirror
+ rbd_mirror_internal
+ rbd_mirror_types
+ rbd_api
+ rbd_internal
+ rbd_types
+ journal
+ libneorados
+ librados
+ osdc
+ cls_rbd_client
+ cls_lock_client
+ cls_journal_client
+ global
+ heap_profiler
+ ${ALLOC_LIBS}
+ OpenSSL::SSL)
+install(TARGETS rbd-mirror
+ DESTINATION ${CMAKE_INSTALL_BINDIR})
diff --git a/src/tools/rbd_mirror/CancelableRequest.h b/src/tools/rbd_mirror/CancelableRequest.h
new file mode 100644
index 000000000..26e8dcb5b
--- /dev/null
+++ b/src/tools/rbd_mirror/CancelableRequest.h
@@ -0,0 +1,44 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_CANCELABLE_REQUEST_H
+#define CEPH_RBD_MIRROR_CANCELABLE_REQUEST_H
+
+#include "common/RefCountedObj.h"
+#include "include/Context.h"
+
+namespace rbd {
+namespace mirror {
+
+class CancelableRequest : public RefCountedObject {
+public:
+ CancelableRequest(const std::string& name, CephContext *cct,
+ Context *on_finish)
+ : RefCountedObject(cct), m_name(name), m_cct(cct),
+ m_on_finish(on_finish) {
+ }
+
+ virtual void send() = 0;
+ virtual void cancel() {}
+
+protected:
+ virtual void finish(int r) {
+ if (m_cct) {
+ lsubdout(m_cct, rbd_mirror, 20) << m_name << "::finish: r=" << r << dendl;
+ }
+ if (m_on_finish) {
+ m_on_finish->complete(r);
+ }
+ put();
+ }
+
+private:
+ const std::string m_name;
+ CephContext *m_cct;
+ Context *m_on_finish;
+};
+
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_CANCELABLE_REQUEST_H
diff --git a/src/tools/rbd_mirror/ClusterWatcher.cc b/src/tools/rbd_mirror/ClusterWatcher.cc
new file mode 100644
index 000000000..8bafb336e
--- /dev/null
+++ b/src/tools/rbd_mirror/ClusterWatcher.cc
@@ -0,0 +1,252 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "ClusterWatcher.h"
+#include "include/stringify.h"
+#include "common/ceph_json.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "librbd/internal.h"
+#include "librbd/api/Mirror.h"
+#include "tools/rbd_mirror/ServiceDaemon.h"
+#include "json_spirit/json_spirit.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::ClusterWatcher:" << this << " " \
+ << __func__ << ": "
+
+using std::list;
+using std::map;
+using std::pair;
+using std::set;
+using std::string;
+using std::unique_ptr;
+using std::vector;
+
+using librados::Rados;
+using librados::IoCtx;
+
+namespace rbd {
+namespace mirror {
+
+ClusterWatcher::ClusterWatcher(RadosRef cluster, ceph::mutex &lock,
+ ServiceDaemon<librbd::ImageCtx>* service_daemon)
+ : m_cluster(cluster), m_lock(lock), m_service_daemon(service_daemon)
+{
+}
+
+const ClusterWatcher::PoolPeers& ClusterWatcher::get_pool_peers() const
+{
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+ return m_pool_peers;
+}
+
+std::string ClusterWatcher::get_site_name() const {
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+ return m_site_name;
+}
+
+void ClusterWatcher::refresh_pools()
+{
+ dout(20) << "enter" << dendl;
+
+ PoolPeers pool_peers;
+ read_pool_peers(&pool_peers);
+
+ std::string site_name;
+ int r = read_site_name(&site_name);
+
+ std::lock_guard l{m_lock};
+ m_pool_peers = pool_peers;
+
+ if (r >= 0) {
+ m_site_name = site_name;
+ }
+
+ // TODO: perhaps use a workqueue instead, once we get notifications
+ // about config changes for existing pools
+}
+
+void ClusterWatcher::read_pool_peers(PoolPeers *pool_peers)
+{
+ int r = m_cluster->wait_for_latest_osdmap();
+ if (r < 0) {
+ derr << "error waiting for OSD map: " << cpp_strerror(r) << dendl;
+ return;
+ }
+
+ list<pair<int64_t, string> > pools;
+ r = m_cluster->pool_list2(pools);
+ if (r < 0) {
+ derr << "error listing pools: " << cpp_strerror(r) << dendl;
+ return;
+ }
+
+ std::set<int64_t> service_pool_ids;
+ for (auto& kv : pools) {
+ int64_t pool_id = kv.first;
+ auto& pool_name = kv.second;
+ int64_t base_tier;
+ r = m_cluster->pool_get_base_tier(pool_id, &base_tier);
+ if (r == -ENOENT) {
+ dout(10) << "pool " << pool_name << " no longer exists" << dendl;
+ continue;
+ } else if (r < 0) {
+ derr << "Error retrieving base tier for pool " << pool_name << dendl;
+ continue;
+ }
+ if (pool_id != base_tier) {
+ // pool is a cache; skip it
+ continue;
+ }
+
+ IoCtx ioctx;
+ r = m_cluster->ioctx_create2(pool_id, ioctx);
+ if (r == -ENOENT) {
+ dout(10) << "pool " << pool_id << " no longer exists" << dendl;
+ continue;
+ } else if (r < 0) {
+ derr << "Error accessing pool " << pool_name << cpp_strerror(r) << dendl;
+ continue;
+ }
+
+ cls::rbd::MirrorMode mirror_mode_internal;
+ r = librbd::cls_client::mirror_mode_get(&ioctx, &mirror_mode_internal);
+ if (r == 0 && mirror_mode_internal == cls::rbd::MIRROR_MODE_DISABLED) {
+ dout(10) << "mirroring is disabled for pool " << pool_name << dendl;
+ continue;
+ }
+
+ service_pool_ids.insert(pool_id);
+ if (m_service_pools.find(pool_id) == m_service_pools.end()) {
+ m_service_pools[pool_id] = {};
+ m_service_daemon->add_pool(pool_id, pool_name);
+ }
+
+ if (r == -EPERM) {
+ dout(10) << "access denied querying pool " << pool_name << dendl;
+ m_service_pools[pool_id] = m_service_daemon->add_or_update_callout(
+ pool_id, m_service_pools[pool_id],
+ service_daemon::CALLOUT_LEVEL_WARNING, "access denied");
+ continue;
+ } else if (r < 0) {
+ derr << "could not tell whether mirroring was enabled for " << pool_name
+ << " : " << cpp_strerror(r) << dendl;
+ m_service_pools[pool_id] = m_service_daemon->add_or_update_callout(
+ pool_id, m_service_pools[pool_id],
+ service_daemon::CALLOUT_LEVEL_WARNING, "mirroring mode query failed");
+ continue;
+ }
+
+ vector<librbd::mirror_peer_site_t> configs;
+ r = librbd::api::Mirror<>::peer_site_list(ioctx, &configs);
+ if (r < 0) {
+ derr << "error reading mirroring config for pool " << pool_name
+ << cpp_strerror(r) << dendl;
+ m_service_pools[pool_id] = m_service_daemon->add_or_update_callout(
+ pool_id, m_service_pools[pool_id],
+ service_daemon::CALLOUT_LEVEL_ERROR, "mirroring peer list failed");
+ continue;
+ }
+
+ std::vector<PeerSpec> peers;
+ peers.reserve(configs.size());
+ for (auto& peer : configs) {
+ if (peer.direction != RBD_MIRROR_PEER_DIRECTION_TX) {
+ peers.push_back(peer);
+ }
+ }
+
+ for (auto& peer : peers) {
+ r = resolve_peer_site_config_keys(pool_id, pool_name, &peer);
+ if (r < 0) {
+ break;
+ }
+ }
+
+ if (m_service_pools[pool_id] != service_daemon::CALLOUT_ID_NONE) {
+ m_service_daemon->remove_callout(pool_id, m_service_pools[pool_id]);
+ m_service_pools[pool_id] = service_daemon::CALLOUT_ID_NONE;
+ }
+
+ pool_peers->emplace(pool_id, Peers{peers.begin(), peers.end()});
+ }
+
+ for (auto it = m_service_pools.begin(); it != m_service_pools.end(); ) {
+ auto current_it(it++);
+ if (service_pool_ids.find(current_it->first) == service_pool_ids.end()) {
+ m_service_daemon->remove_pool(current_it->first);
+ m_service_pools.erase(current_it->first);
+ }
+ }
+}
+
+int ClusterWatcher::read_site_name(std::string* site_name) {
+ dout(10) << dendl;
+
+ librbd::RBD rbd;
+ return rbd.mirror_site_name_get(*m_cluster, site_name);
+}
+
+int ClusterWatcher::resolve_peer_site_config_keys(int64_t pool_id,
+ const std::string& pool_name,
+ PeerSpec* peer) {
+ dout(10) << "retrieving config-key: pool_id=" << pool_id << ", "
+ << "pool_name=" << pool_name << ", "
+ << "peer_uuid=" << peer->uuid << dendl;
+
+ std::string cmd =
+ "{"
+ "\"prefix\": \"config-key get\", "
+ "\"key\": \"" RBD_MIRROR_PEER_CONFIG_KEY_PREFIX + stringify(pool_id) +
+ "/" + peer->uuid + "\""
+ "}";
+
+ bufferlist in_bl;
+ bufferlist out_bl;
+ int r = m_cluster->mon_command(cmd, in_bl, &out_bl, nullptr);
+ if (r == -ENOENT || out_bl.length() == 0) {
+ return 0;
+ } else if (r < 0) {
+ derr << "error reading mirroring peer config for pool " << pool_name << ": "
+ << cpp_strerror(r) << dendl;
+ m_service_pools[pool_id] = m_service_daemon->add_or_update_callout(
+ pool_id, m_service_pools[pool_id],
+ service_daemon::CALLOUT_LEVEL_WARNING,
+ "mirroring peer config-key query failed");
+ return r;
+ }
+
+ bool json_valid = false;
+ json_spirit::mValue json_root;
+ if(json_spirit::read(out_bl.to_str(), json_root)) {
+ try {
+ auto& json_obj = json_root.get_obj();
+ if (json_obj.count("mon_host")) {
+ peer->mon_host = json_obj["mon_host"].get_str();
+ }
+ if (json_obj.count("key")) {
+ peer->key = json_obj["key"].get_str();
+ }
+ json_valid = true;
+ } catch (std::runtime_error&) {
+ }
+ }
+
+ if (!json_valid) {
+ derr << "error parsing mirroring peer config for pool " << pool_name << ", "
+ << "peer " << peer->uuid << dendl;
+ m_service_pools[pool_id] = m_service_daemon->add_or_update_callout(
+ pool_id, m_service_pools[pool_id],
+ service_daemon::CALLOUT_LEVEL_WARNING,
+ "mirroring peer config-key decode failed");
+ }
+
+ return 0;
+}
+
+} // namespace mirror
+} // namespace rbd
diff --git a/src/tools/rbd_mirror/ClusterWatcher.h b/src/tools/rbd_mirror/ClusterWatcher.h
new file mode 100644
index 000000000..93356fec6
--- /dev/null
+++ b/src/tools/rbd_mirror/ClusterWatcher.h
@@ -0,0 +1,73 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_CLUSTER_WATCHER_H
+#define CEPH_RBD_MIRROR_CLUSTER_WATCHER_H
+
+#include <map>
+#include <memory>
+#include <set>
+
+#include "common/ceph_context.h"
+#include "common/ceph_mutex.h"
+#include "common/Timer.h"
+#include "include/rados/librados.hpp"
+#include "tools/rbd_mirror/Types.h"
+#include "tools/rbd_mirror/service_daemon/Types.h"
+#include <unordered_map>
+
+namespace librbd { struct ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+
+template <typename> class ServiceDaemon;
+
+/**
+ * Tracks mirroring configuration for pools in a single
+ * cluster.
+ */
+class ClusterWatcher {
+public:
+ struct PeerSpecCompare {
+ bool operator()(const PeerSpec& lhs, const PeerSpec& rhs) const {
+ return (lhs.uuid < rhs.uuid);
+ }
+ };
+ typedef std::set<PeerSpec, PeerSpecCompare> Peers;
+ typedef std::map<int64_t, Peers> PoolPeers;
+
+ ClusterWatcher(RadosRef cluster, ceph::mutex &lock,
+ ServiceDaemon<librbd::ImageCtx>* service_daemon);
+ ~ClusterWatcher() = default;
+ ClusterWatcher(const ClusterWatcher&) = delete;
+ ClusterWatcher& operator=(const ClusterWatcher&) = delete;
+
+ // Caller controls frequency of calls
+ void refresh_pools();
+ const PoolPeers& get_pool_peers() const;
+ std::string get_site_name() const;
+
+private:
+ typedef std::unordered_map<int64_t, service_daemon::CalloutId> ServicePools;
+
+ RadosRef m_cluster;
+ ceph::mutex &m_lock;
+ ServiceDaemon<librbd::ImageCtx>* m_service_daemon;
+
+ ServicePools m_service_pools;
+ PoolPeers m_pool_peers;
+ std::string m_site_name;
+
+ void read_pool_peers(PoolPeers *pool_peers);
+
+ int read_site_name(std::string* site_name);
+
+ int resolve_peer_site_config_keys(
+ int64_t pool_id, const std::string& pool_name, PeerSpec* peer);
+};
+
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_CLUSTER_WATCHER_H
diff --git a/src/tools/rbd_mirror/ImageDeleter.cc b/src/tools/rbd_mirror/ImageDeleter.cc
new file mode 100644
index 000000000..ba137e6fd
--- /dev/null
+++ b/src/tools/rbd_mirror/ImageDeleter.cc
@@ -0,0 +1,549 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2016 SUSE LINUX GmbH
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include "include/rados/librados.hpp"
+#include "common/Formatter.h"
+#include "common/admin_socket.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "common/Timer.h"
+#include "global/global_context.h"
+#include "librbd/internal.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/Operations.h"
+#include "librbd/asio/ContextWQ.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "cls/rbd/cls_rbd_types.h"
+#include "librbd/Utils.h"
+#include "ImageDeleter.h"
+#include "tools/rbd_mirror/Threads.h"
+#include "tools/rbd_mirror/Throttler.h"
+#include "tools/rbd_mirror/image_deleter/TrashMoveRequest.h"
+#include "tools/rbd_mirror/image_deleter/TrashRemoveRequest.h"
+#include "tools/rbd_mirror/image_deleter/TrashWatcher.h"
+#include <map>
+#include <sstream>
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+
+using std::string;
+using std::stringstream;
+using std::vector;
+using std::pair;
+using std::make_pair;
+
+using librados::IoCtx;
+using namespace librbd;
+
+namespace rbd {
+namespace mirror {
+
+using librbd::util::create_async_context_callback;
+
+namespace {
+
+class ImageDeleterAdminSocketCommand {
+public:
+ virtual ~ImageDeleterAdminSocketCommand() {}
+ virtual int call(Formatter *f) = 0;
+};
+
+template <typename I>
+class StatusCommand : public ImageDeleterAdminSocketCommand {
+public:
+ explicit StatusCommand(ImageDeleter<I> *image_del) : image_del(image_del) {}
+
+ int call(Formatter *f) override {
+ image_del->print_status(f);
+ return 0;
+ }
+
+private:
+ ImageDeleter<I> *image_del;
+};
+
+} // anonymous namespace
+
+template <typename I>
+class ImageDeleterAdminSocketHook : public AdminSocketHook {
+public:
+ ImageDeleterAdminSocketHook(CephContext *cct, const std::string& pool_name,
+ ImageDeleter<I> *image_del) :
+ admin_socket(cct->get_admin_socket()) {
+
+ std::string command;
+ int r;
+
+ command = "rbd mirror deletion status " + pool_name;
+ r = admin_socket->register_command(command, this,
+ "get status for image deleter");
+ if (r == 0) {
+ commands[command] = new StatusCommand<I>(image_del);
+ }
+
+ }
+
+ ~ImageDeleterAdminSocketHook() override {
+ (void)admin_socket->unregister_commands(this);
+ for (Commands::const_iterator i = commands.begin(); i != commands.end();
+ ++i) {
+ delete i->second;
+ }
+ }
+
+ int call(std::string_view command, const cmdmap_t& cmdmap,
+ const bufferlist&,
+ Formatter *f,
+ std::ostream& errss,
+ bufferlist& out) override {
+ Commands::const_iterator i = commands.find(command);
+ ceph_assert(i != commands.end());
+ return i->second->call(f);
+ }
+
+private:
+ typedef std::map<std::string, ImageDeleterAdminSocketCommand*,
+ std::less<>> Commands;
+ AdminSocket *admin_socket;
+ Commands commands;
+};
+
+template <typename I>
+ImageDeleter<I>::ImageDeleter(
+ librados::IoCtx& local_io_ctx, Threads<librbd::ImageCtx>* threads,
+ Throttler<librbd::ImageCtx>* image_deletion_throttler,
+ ServiceDaemon<librbd::ImageCtx>* service_daemon)
+ : m_local_io_ctx(local_io_ctx), m_threads(threads),
+ m_image_deletion_throttler(image_deletion_throttler),
+ m_service_daemon(service_daemon), m_trash_listener(this),
+ m_lock(ceph::make_mutex(
+ librbd::util::unique_lock_name("rbd::mirror::ImageDeleter::m_lock",
+ this))) {
+}
+
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::ImageDeleter: " << " " \
+ << __func__ << ": "
+
+template <typename I>
+void ImageDeleter<I>::trash_move(librados::IoCtx& local_io_ctx,
+ const std::string& global_image_id,
+ bool resync,
+ librbd::asio::ContextWQ* work_queue,
+ Context* on_finish) {
+ dout(10) << "global_image_id=" << global_image_id << ", "
+ << "resync=" << resync << dendl;
+
+ auto req = rbd::mirror::image_deleter::TrashMoveRequest<>::create(
+ local_io_ctx, global_image_id, resync, work_queue, on_finish);
+ req->send();
+}
+
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::ImageDeleter: " << this << " " \
+ << __func__ << ": "
+
+template <typename I>
+void ImageDeleter<I>::init(Context* on_finish) {
+ dout(10) << dendl;
+
+ m_asok_hook = new ImageDeleterAdminSocketHook<I>(
+ g_ceph_context, m_local_io_ctx.get_pool_name(), this);
+
+ m_trash_watcher = image_deleter::TrashWatcher<I>::create(m_local_io_ctx,
+ m_threads,
+ m_trash_listener);
+ m_trash_watcher->init(on_finish);
+}
+
+template <typename I>
+void ImageDeleter<I>::shut_down(Context* on_finish) {
+ dout(10) << dendl;
+
+ delete m_asok_hook;
+ m_asok_hook = nullptr;
+
+ m_image_deletion_throttler->drain(m_local_io_ctx.get_namespace(),
+ -ESTALE);
+
+ shut_down_trash_watcher(on_finish);
+}
+
+template <typename I>
+void ImageDeleter<I>::shut_down_trash_watcher(Context* on_finish) {
+ dout(10) << dendl;
+ ceph_assert(m_trash_watcher);
+ auto ctx = new LambdaContext([this, on_finish](int r) {
+ delete m_trash_watcher;
+ m_trash_watcher = nullptr;
+
+ wait_for_ops(on_finish);
+ });
+ m_trash_watcher->shut_down(ctx);
+}
+
+template <typename I>
+void ImageDeleter<I>::wait_for_ops(Context* on_finish) {
+ {
+ std::scoped_lock locker{m_threads->timer_lock, m_lock};
+ m_running = false;
+ cancel_retry_timer();
+ }
+
+ auto ctx = new LambdaContext([this, on_finish](int) {
+ cancel_all_deletions(on_finish);
+ });
+ m_async_op_tracker.wait_for_ops(ctx);
+}
+
+template <typename I>
+void ImageDeleter<I>::cancel_all_deletions(Context* on_finish) {
+ m_image_deletion_throttler->drain(m_local_io_ctx.get_namespace(),
+ -ECANCELED);
+ {
+ std::lock_guard locker{m_lock};
+ // wake up any external state machines waiting on deletions
+ ceph_assert(m_in_flight_delete_queue.empty());
+ for (auto& queue : {&m_delete_queue, &m_retry_delete_queue}) {
+ for (auto& info : *queue) {
+ notify_on_delete(info->image_id, -ECANCELED);
+ }
+ queue->clear();
+ }
+ }
+ on_finish->complete(0);
+}
+
+template <typename I>
+void ImageDeleter<I>::wait_for_deletion(const std::string& image_id,
+ bool scheduled_only,
+ Context* on_finish) {
+ dout(5) << "image_id=" << image_id << dendl;
+
+ on_finish = new LambdaContext([this, on_finish](int r) {
+ m_threads->work_queue->queue(on_finish, r);
+ });
+
+ std::lock_guard locker{m_lock};
+ auto del_info = find_delete_info(image_id);
+ if (!del_info && scheduled_only) {
+ // image not scheduled for deletion
+ on_finish->complete(0);
+ return;
+ }
+
+ notify_on_delete(image_id, -ESTALE);
+ m_on_delete_contexts[image_id] = on_finish;
+}
+
+template <typename I>
+void ImageDeleter<I>::complete_active_delete(DeleteInfoRef* delete_info,
+ int r) {
+ dout(20) << "info=" << *delete_info << ", r=" << r << dendl;
+ std::lock_guard locker{m_lock};
+ notify_on_delete((*delete_info)->image_id, r);
+ delete_info->reset();
+}
+
+template <typename I>
+void ImageDeleter<I>::enqueue_failed_delete(DeleteInfoRef* delete_info,
+ int error_code,
+ double retry_delay) {
+ dout(20) << "info=" << *delete_info << ", r=" << error_code << dendl;
+ if (error_code == -EBLOCKLISTED) {
+ std::lock_guard locker{m_lock};
+ derr << "blocklisted while deleting local image" << dendl;
+ complete_active_delete(delete_info, error_code);
+ return;
+ }
+
+ std::scoped_lock locker{m_threads->timer_lock, m_lock};
+ auto& delete_info_ref = *delete_info;
+ notify_on_delete(delete_info_ref->image_id, error_code);
+ delete_info_ref->error_code = error_code;
+ ++delete_info_ref->retries;
+ delete_info_ref->retry_time = (clock_t::now() +
+ ceph::make_timespan(retry_delay));
+ m_retry_delete_queue.push_back(delete_info_ref);
+
+ schedule_retry_timer();
+}
+
+template <typename I>
+typename ImageDeleter<I>::DeleteInfoRef
+ImageDeleter<I>::find_delete_info(const std::string &image_id) {
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+ DeleteQueue delete_queues[] = {m_in_flight_delete_queue,
+ m_retry_delete_queue,
+ m_delete_queue};
+
+ DeleteInfo delete_info{image_id};
+ for (auto& queue : delete_queues) {
+ auto it = std::find_if(queue.begin(), queue.end(),
+ [&delete_info](const DeleteInfoRef& ref) {
+ return delete_info == *ref;
+ });
+ if (it != queue.end()) {
+ return *it;
+ }
+ }
+ return {};
+}
+
+template <typename I>
+void ImageDeleter<I>::print_status(Formatter *f) {
+ dout(20) << dendl;
+
+ f->open_object_section("image_deleter_status");
+ f->open_array_section("delete_images_queue");
+
+ std::lock_guard l{m_lock};
+ for (const auto& image : m_delete_queue) {
+ image->print_status(f);
+ }
+
+ f->close_section();
+ f->open_array_section("failed_deletes_queue");
+ for (const auto& image : m_retry_delete_queue) {
+ image->print_status(f, true);
+ }
+
+ f->close_section();
+ f->close_section();
+}
+
+template <typename I>
+vector<string> ImageDeleter<I>::get_delete_queue_items() {
+ vector<string> items;
+
+ std::lock_guard l{m_lock};
+ for (const auto& del_info : m_delete_queue) {
+ items.push_back(del_info->image_id);
+ }
+
+ return items;
+}
+
+template <typename I>
+vector<pair<string, int> > ImageDeleter<I>::get_failed_queue_items() {
+ vector<pair<string, int> > items;
+
+ std::lock_guard l{m_lock};
+ for (const auto& del_info : m_retry_delete_queue) {
+ items.push_back(make_pair(del_info->image_id,
+ del_info->error_code));
+ }
+
+ return items;
+}
+
+template <typename I>
+void ImageDeleter<I>::remove_images() {
+ dout(10) << dendl;
+
+ std::lock_guard locker{m_lock};
+ while (m_running && !m_delete_queue.empty()) {
+
+ DeleteInfoRef delete_info = m_delete_queue.front();
+ m_delete_queue.pop_front();
+
+ ceph_assert(delete_info);
+
+ auto on_start = create_async_context_callback(
+ m_threads->work_queue, new LambdaContext(
+ [this, delete_info](int r) {
+ if (r < 0) {
+ notify_on_delete(delete_info->image_id, r);
+ return;
+ }
+ remove_image(delete_info);
+ }));
+
+ m_image_deletion_throttler->start_op(m_local_io_ctx.get_namespace(),
+ delete_info->image_id, on_start);
+ }
+}
+
+template <typename I>
+void ImageDeleter<I>::remove_image(DeleteInfoRef delete_info) {
+ dout(10) << "info=" << *delete_info << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ m_in_flight_delete_queue.push_back(delete_info);
+ m_async_op_tracker.start_op();
+
+ auto ctx = new LambdaContext([this, delete_info](int r) {
+ handle_remove_image(delete_info, r);
+ m_async_op_tracker.finish_op();
+ });
+
+ auto req = image_deleter::TrashRemoveRequest<I>::create(
+ m_local_io_ctx, delete_info->image_id, &delete_info->error_result,
+ m_threads->work_queue, ctx);
+ req->send();
+}
+
+template <typename I>
+void ImageDeleter<I>::handle_remove_image(DeleteInfoRef delete_info,
+ int r) {
+ dout(10) << "info=" << *delete_info << ", r=" << r << dendl;
+
+ m_image_deletion_throttler->finish_op(m_local_io_ctx.get_namespace(),
+ delete_info->image_id);
+ {
+ std::lock_guard locker{m_lock};
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+ auto it = std::find(m_in_flight_delete_queue.begin(),
+ m_in_flight_delete_queue.end(), delete_info);
+ ceph_assert(it != m_in_flight_delete_queue.end());
+ m_in_flight_delete_queue.erase(it);
+ }
+
+ if (r < 0) {
+ if (delete_info->error_result == image_deleter::ERROR_RESULT_COMPLETE) {
+ complete_active_delete(&delete_info, r);
+ } else if (delete_info->error_result ==
+ image_deleter::ERROR_RESULT_RETRY_IMMEDIATELY) {
+ enqueue_failed_delete(&delete_info, r, m_busy_interval);
+ } else {
+ auto cct = reinterpret_cast<CephContext *>(m_local_io_ctx.cct());
+ double failed_interval = cct->_conf.get_val<double>(
+ "rbd_mirror_delete_retry_interval");
+ enqueue_failed_delete(&delete_info, r, failed_interval);
+ }
+ } else {
+ complete_active_delete(&delete_info, 0);
+ }
+
+ // process the next queued image to delete
+ remove_images();
+}
+
+template <typename I>
+void ImageDeleter<I>::schedule_retry_timer() {
+ ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock));
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+ if (!m_running || m_timer_ctx != nullptr || m_retry_delete_queue.empty()) {
+ return;
+ }
+
+ dout(10) << dendl;
+ auto &delete_info = m_retry_delete_queue.front();
+ m_timer_ctx = new LambdaContext([this](int r) {
+ handle_retry_timer();
+ });
+ m_threads->timer->add_event_at(delete_info->retry_time, m_timer_ctx);
+}
+
+template <typename I>
+void ImageDeleter<I>::cancel_retry_timer() {
+ dout(10) << dendl;
+ ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock));
+ if (m_timer_ctx != nullptr) {
+ bool canceled = m_threads->timer->cancel_event(m_timer_ctx);
+ m_timer_ctx = nullptr;
+ ceph_assert(canceled);
+ }
+}
+
+template <typename I>
+void ImageDeleter<I>::handle_retry_timer() {
+ dout(10) << dendl;
+ ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock));
+ std::lock_guard locker{m_lock};
+
+ ceph_assert(m_timer_ctx != nullptr);
+ m_timer_ctx = nullptr;
+
+ ceph_assert(m_running);
+ ceph_assert(!m_retry_delete_queue.empty());
+
+ // move all ready-to-ready items back to main queue
+ auto now = clock_t::now();
+ while (!m_retry_delete_queue.empty()) {
+ auto &delete_info = m_retry_delete_queue.front();
+ if (delete_info->retry_time > now) {
+ break;
+ }
+
+ m_delete_queue.push_back(delete_info);
+ m_retry_delete_queue.pop_front();
+ }
+
+ // schedule wake up for any future retries
+ schedule_retry_timer();
+
+ // start (concurrent) removal of images
+ m_async_op_tracker.start_op();
+ auto ctx = new LambdaContext([this](int r) {
+ remove_images();
+ m_async_op_tracker.finish_op();
+ });
+ m_threads->work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+void ImageDeleter<I>::handle_trash_image(const std::string& image_id,
+ const ImageDeleter<I>::clock_t::time_point& deferment_end_time) {
+ std::scoped_lock locker{m_threads->timer_lock, m_lock};
+
+ auto del_info = find_delete_info(image_id);
+ if (del_info != nullptr) {
+ dout(20) << "image " << image_id << " "
+ << "was already scheduled for deletion" << dendl;
+ return;
+ }
+
+ dout(10) << "image_id=" << image_id << ", "
+ << "deferment_end_time=" << utime_t{deferment_end_time} << dendl;
+
+ del_info.reset(new DeleteInfo(image_id));
+ del_info->retry_time = deferment_end_time;
+ m_retry_delete_queue.push_back(del_info);
+
+ schedule_retry_timer();
+}
+
+template <typename I>
+void ImageDeleter<I>::notify_on_delete(const std::string& image_id,
+ int r) {
+ dout(10) << "image_id=" << image_id << ", r=" << r << dendl;
+ auto it = m_on_delete_contexts.find(image_id);
+ if (it == m_on_delete_contexts.end()) {
+ return;
+ }
+
+ it->second->complete(r);
+ m_on_delete_contexts.erase(it);
+}
+
+template <typename I>
+void ImageDeleter<I>::DeleteInfo::print_status(Formatter *f,
+ bool print_failure_info) {
+ f->open_object_section("delete_info");
+ f->dump_string("image_id", image_id);
+ if (print_failure_info) {
+ f->dump_string("error_code", cpp_strerror(error_code));
+ f->dump_int("retries", retries);
+ }
+ f->close_section();
+}
+
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::ImageDeleter<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/ImageDeleter.h b/src/tools/rbd_mirror/ImageDeleter.h
new file mode 100644
index 000000000..5fe79496b
--- /dev/null
+++ b/src/tools/rbd_mirror/ImageDeleter.h
@@ -0,0 +1,189 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2016 SUSE LINUX GmbH
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_DELETER_H
+#define CEPH_RBD_MIRROR_IMAGE_DELETER_H
+
+#include "include/utime.h"
+#include "common/AsyncOpTracker.h"
+#include "common/ceph_mutex.h"
+#include "common/Timer.h"
+#include "tools/rbd_mirror/Types.h"
+#include "tools/rbd_mirror/image_deleter/Types.h"
+#include <atomic>
+#include <deque>
+#include <iosfwd>
+#include <map>
+#include <memory>
+#include <vector>
+
+class AdminSocketHook;
+class Context;
+namespace librbd {
+struct ImageCtx;
+namespace asio { struct ContextWQ; }
+} // namespace librbd
+
+namespace rbd {
+namespace mirror {
+
+template <typename> class ServiceDaemon;
+template <typename> class Threads;
+template <typename> class Throttler;
+
+namespace image_deleter { template <typename> struct TrashWatcher; }
+
+/**
+ * Manage deletion of non-primary images.
+ */
+template <typename ImageCtxT = librbd::ImageCtx>
+class ImageDeleter {
+public:
+ static ImageDeleter* create(
+ librados::IoCtx& local_io_ctx, Threads<librbd::ImageCtx>* threads,
+ Throttler<librbd::ImageCtx>* image_deletion_throttler,
+ ServiceDaemon<librbd::ImageCtx>* service_daemon) {
+ return new ImageDeleter(local_io_ctx, threads, image_deletion_throttler,
+ service_daemon);
+ }
+
+ ImageDeleter(librados::IoCtx& local_io_ctx,
+ Threads<librbd::ImageCtx>* threads,
+ Throttler<librbd::ImageCtx>* image_deletion_throttler,
+ ServiceDaemon<librbd::ImageCtx>* service_daemon);
+
+ ImageDeleter(const ImageDeleter&) = delete;
+ ImageDeleter& operator=(const ImageDeleter&) = delete;
+
+ static void trash_move(librados::IoCtx& local_io_ctx,
+ const std::string& global_image_id, bool resync,
+ librbd::asio::ContextWQ* work_queue,
+ Context* on_finish);
+
+ void init(Context* on_finish);
+ void shut_down(Context* on_finish);
+
+ void print_status(Formatter *f);
+
+ // for testing purposes
+ void wait_for_deletion(const std::string &image_id,
+ bool scheduled_only, Context* on_finish);
+
+ std::vector<std::string> get_delete_queue_items();
+ std::vector<std::pair<std::string, int> > get_failed_queue_items();
+
+ inline void set_busy_timer_interval(double interval) {
+ m_busy_interval = interval;
+ }
+
+private:
+ using clock_t = ceph::real_clock;
+ struct TrashListener : public image_deleter::TrashListener {
+ ImageDeleter *image_deleter;
+
+ TrashListener(ImageDeleter *image_deleter) : image_deleter(image_deleter) {
+ }
+
+ void handle_trash_image(const std::string& image_id,
+ const ceph::real_clock::time_point& deferment_end_time) override {
+ image_deleter->handle_trash_image(image_id, deferment_end_time);
+ }
+ };
+
+ struct DeleteInfo {
+ std::string image_id;
+
+ image_deleter::ErrorResult error_result = {};
+ int error_code = 0;
+ clock_t::time_point retry_time;
+ int retries = 0;
+
+ DeleteInfo(const std::string& image_id)
+ : image_id(image_id) {
+ }
+
+ inline bool operator==(const DeleteInfo& delete_info) const {
+ return (image_id == delete_info.image_id);
+ }
+
+ friend std::ostream& operator<<(std::ostream& os, DeleteInfo& delete_info) {
+ os << "[image_id=" << delete_info.image_id << "]";
+ return os;
+ }
+
+ void print_status(Formatter *f,
+ bool print_failure_info=false);
+ };
+ typedef std::shared_ptr<DeleteInfo> DeleteInfoRef;
+ typedef std::deque<DeleteInfoRef> DeleteQueue;
+ typedef std::map<std::string, Context*> OnDeleteContexts;
+
+ librados::IoCtx& m_local_io_ctx;
+ Threads<librbd::ImageCtx>* m_threads;
+ Throttler<librbd::ImageCtx>* m_image_deletion_throttler;
+ ServiceDaemon<librbd::ImageCtx>* m_service_daemon;
+
+ image_deleter::TrashWatcher<ImageCtxT>* m_trash_watcher = nullptr;
+ TrashListener m_trash_listener;
+
+ std::atomic<unsigned> m_running { 1 };
+
+ double m_busy_interval = 1;
+
+ AsyncOpTracker m_async_op_tracker;
+
+ ceph::mutex m_lock;
+ DeleteQueue m_delete_queue;
+ DeleteQueue m_retry_delete_queue;
+ DeleteQueue m_in_flight_delete_queue;
+
+ OnDeleteContexts m_on_delete_contexts;
+
+ AdminSocketHook *m_asok_hook = nullptr;
+
+ Context *m_timer_ctx = nullptr;
+
+ bool process_image_delete();
+
+ void complete_active_delete(DeleteInfoRef* delete_info, int r);
+ void enqueue_failed_delete(DeleteInfoRef* delete_info, int error_code,
+ double retry_delay);
+
+ DeleteInfoRef find_delete_info(const std::string &image_id);
+
+ void remove_images();
+ void remove_image(DeleteInfoRef delete_info);
+ void handle_remove_image(DeleteInfoRef delete_info, int r);
+
+ void schedule_retry_timer();
+ void cancel_retry_timer();
+ void handle_retry_timer();
+
+ void handle_trash_image(const std::string& image_id,
+ const clock_t::time_point& deferment_end_time);
+
+ void shut_down_trash_watcher(Context* on_finish);
+ void wait_for_ops(Context* on_finish);
+ void cancel_all_deletions(Context* on_finish);
+
+ void notify_on_delete(const std::string& image_id, int r);
+
+};
+
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::ImageDeleter<librbd::ImageCtx>;
+
+#endif // CEPH_RBD_MIRROR_IMAGE_DELETER_H
diff --git a/src/tools/rbd_mirror/ImageMap.cc b/src/tools/rbd_mirror/ImageMap.cc
new file mode 100644
index 000000000..bd005b466
--- /dev/null
+++ b/src/tools/rbd_mirror/ImageMap.cc
@@ -0,0 +1,604 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "common/debug.h"
+#include "common/errno.h"
+#include "common/Timer.h"
+
+#include "librbd/Utils.h"
+#include "librbd/asio/ContextWQ.h"
+#include "tools/rbd_mirror/Threads.h"
+
+#include "ImageMap.h"
+#include "image_map/LoadRequest.h"
+#include "image_map/SimplePolicy.h"
+#include "image_map/UpdateRequest.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::ImageMap: " << this << " " \
+ << __func__ << ": "
+
+using namespace std;
+
+namespace rbd {
+namespace mirror {
+
+using ::operator<<;
+using image_map::Policy;
+
+using librbd::util::unique_lock_name;
+using librbd::util::create_async_context_callback;
+
+template <typename I>
+struct ImageMap<I>::C_NotifyInstance : public Context {
+ ImageMap* image_map;
+ std::string global_image_id;
+ bool acquire_release;
+
+ C_NotifyInstance(ImageMap* image_map, const std::string& global_image_id,
+ bool acquire_release)
+ : image_map(image_map), global_image_id(global_image_id),
+ acquire_release(acquire_release) {
+ image_map->start_async_op();
+ }
+
+ void finish(int r) override {
+ if (acquire_release) {
+ image_map->handle_peer_ack(global_image_id, r);
+ } else {
+ image_map->handle_peer_ack_remove(global_image_id, r);
+ }
+ image_map->finish_async_op();
+ }
+};
+
+template <typename I>
+ImageMap<I>::ImageMap(librados::IoCtx &ioctx, Threads<I> *threads,
+ const std::string& instance_id,
+ image_map::Listener &listener)
+ : m_ioctx(ioctx), m_threads(threads), m_instance_id(instance_id),
+ m_listener(listener),
+ m_lock(ceph::make_mutex(
+ unique_lock_name("rbd::mirror::ImageMap::m_lock", this))) {
+}
+
+template <typename I>
+ImageMap<I>::~ImageMap() {
+ ceph_assert(m_async_op_tracker.empty());
+ ceph_assert(m_timer_task == nullptr);
+ ceph_assert(m_rebalance_task == nullptr);
+}
+
+template <typename I>
+void ImageMap<I>::continue_action(const std::set<std::string> &global_image_ids,
+ int r) {
+ dout(20) << dendl;
+
+ {
+ std::lock_guard locker{m_lock};
+ if (m_shutting_down) {
+ return;
+ }
+
+ for (auto const &global_image_id : global_image_ids) {
+ bool schedule = m_policy->finish_action(global_image_id, r);
+ if (schedule) {
+ schedule_action(global_image_id);
+ }
+ }
+ }
+
+ schedule_update_task();
+}
+
+template <typename I>
+void ImageMap<I>::handle_update_request(
+ const Updates &updates,
+ const std::set<std::string> &remove_global_image_ids, int r) {
+ dout(20) << "r=" << r << dendl;
+
+ std::set<std::string> global_image_ids;
+
+ global_image_ids.insert(remove_global_image_ids.begin(),
+ remove_global_image_ids.end());
+ for (auto const &update : updates) {
+ global_image_ids.insert(update.global_image_id);
+ }
+
+ continue_action(global_image_ids, r);
+}
+
+template <typename I>
+void ImageMap<I>::update_image_mapping(Updates&& map_updates,
+ std::set<std::string>&& map_removals) {
+ if (map_updates.empty() && map_removals.empty()) {
+ return;
+ }
+
+ dout(5) << "updates=[" << map_updates << "], "
+ << "removes=[" << map_removals << "]" << dendl;
+
+ Context *on_finish = new LambdaContext(
+ [this, map_updates, map_removals](int r) {
+ handle_update_request(map_updates, map_removals, r);
+ finish_async_op();
+ });
+ on_finish = create_async_context_callback(m_threads->work_queue, on_finish);
+
+ // empty meta policy for now..
+ image_map::PolicyMetaNone policy_meta;
+
+ bufferlist bl;
+ encode(image_map::PolicyData(policy_meta), bl);
+
+ // prepare update map
+ std::map<std::string, cls::rbd::MirrorImageMap> update_mapping;
+ for (auto const &update : map_updates) {
+ update_mapping.emplace(
+ update.global_image_id, cls::rbd::MirrorImageMap(update.instance_id,
+ update.mapped_time, bl));
+ }
+
+ start_async_op();
+ image_map::UpdateRequest<I> *req = image_map::UpdateRequest<I>::create(
+ m_ioctx, std::move(update_mapping), std::move(map_removals), on_finish);
+ req->send();
+}
+
+template <typename I>
+void ImageMap<I>::process_updates() {
+ dout(20) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock));
+ ceph_assert(m_timer_task == nullptr);
+
+ Updates map_updates;
+ std::set<std::string> map_removals;
+ Updates acquire_updates;
+ Updates release_updates;
+
+ // gather updates by advancing the state machine
+ m_lock.lock();
+ for (auto const &global_image_id : m_global_image_ids) {
+ image_map::ActionType action_type =
+ m_policy->start_action(global_image_id);
+ image_map::LookupInfo info = m_policy->lookup(global_image_id);
+
+ dout(15) << "global_image_id=" << global_image_id << ", "
+ << "action=" << action_type << ", "
+ << "instance=" << info.instance_id << dendl;
+ switch (action_type) {
+ case image_map::ACTION_TYPE_NONE:
+ continue;
+ case image_map::ACTION_TYPE_MAP_UPDATE:
+ ceph_assert(info.instance_id != image_map::UNMAPPED_INSTANCE_ID);
+ map_updates.emplace_back(global_image_id, info.instance_id,
+ info.mapped_time);
+ break;
+ case image_map::ACTION_TYPE_MAP_REMOVE:
+ map_removals.emplace(global_image_id);
+ break;
+ case image_map::ACTION_TYPE_ACQUIRE:
+ ceph_assert(info.instance_id != image_map::UNMAPPED_INSTANCE_ID);
+ acquire_updates.emplace_back(global_image_id, info.instance_id);
+ break;
+ case image_map::ACTION_TYPE_RELEASE:
+ ceph_assert(info.instance_id != image_map::UNMAPPED_INSTANCE_ID);
+ release_updates.emplace_back(global_image_id, info.instance_id);
+ break;
+ }
+ }
+ m_global_image_ids.clear();
+ m_lock.unlock();
+
+ // notify listener (acquire, release) and update on-disk map. note
+ // that its safe to process this outside m_lock as we still hold
+ // timer lock.
+ notify_listener_acquire_release_images(acquire_updates, release_updates);
+ update_image_mapping(std::move(map_updates), std::move(map_removals));
+}
+
+template <typename I>
+void ImageMap<I>::schedule_update_task() {
+ std::lock_guard timer_lock{m_threads->timer_lock};
+ schedule_update_task(m_threads->timer_lock);
+}
+
+template <typename I>
+void ImageMap<I>::schedule_update_task(const ceph::mutex &timer_lock) {
+ ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock));
+
+ schedule_rebalance_task();
+
+ if (m_timer_task != nullptr) {
+ return;
+ }
+
+ {
+ std::lock_guard locker{m_lock};
+ if (m_global_image_ids.empty()) {
+ return;
+ }
+ }
+
+ m_timer_task = new LambdaContext([this](int r) {
+ ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock));
+ m_timer_task = nullptr;
+
+ process_updates();
+ });
+
+ CephContext *cct = reinterpret_cast<CephContext *>(m_ioctx.cct());
+ double after = cct->_conf.get_val<double>("rbd_mirror_image_policy_update_throttle_interval");
+
+ dout(20) << "scheduling image check update (" << m_timer_task << ")"
+ << " after " << after << " second(s)" << dendl;
+ m_threads->timer->add_event_after(after, m_timer_task);
+}
+
+template <typename I>
+void ImageMap<I>::rebalance() {
+ ceph_assert(m_rebalance_task == nullptr);
+
+ {
+ std::lock_guard locker{m_lock};
+ if (m_async_op_tracker.empty() && m_global_image_ids.empty()){
+ dout(20) << "starting rebalance" << dendl;
+
+ std::set<std::string> remap_global_image_ids;
+ m_policy->add_instances({}, &remap_global_image_ids);
+
+ for (auto const &global_image_id : remap_global_image_ids) {
+ schedule_action(global_image_id);
+ }
+ }
+ }
+
+ schedule_update_task(m_threads->timer_lock);
+}
+
+template <typename I>
+void ImageMap<I>::schedule_rebalance_task() {
+ ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock));
+
+ CephContext *cct = reinterpret_cast<CephContext *>(m_ioctx.cct());
+
+ // fetch the updated value of idle timeout for (re)scheduling
+ double resched_after = cct->_conf.get_val<double>(
+ "rbd_mirror_image_policy_rebalance_timeout");
+ if (!resched_after) {
+ return;
+ }
+
+ // cancel existing rebalance task if any before scheduling
+ if (m_rebalance_task != nullptr) {
+ m_threads->timer->cancel_event(m_rebalance_task);
+ }
+
+ m_rebalance_task = new LambdaContext([this](int _) {
+ ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock));
+ m_rebalance_task = nullptr;
+
+ rebalance();
+ });
+
+ dout(20) << "scheduling rebalance (" << m_rebalance_task << ")"
+ << " after " << resched_after << " second(s)" << dendl;
+ m_threads->timer->add_event_after(resched_after, m_rebalance_task);
+}
+
+template <typename I>
+void ImageMap<I>::schedule_action(const std::string &global_image_id) {
+ dout(20) << "global_image_id=" << global_image_id << dendl;
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ m_global_image_ids.emplace(global_image_id);
+}
+
+template <typename I>
+void ImageMap<I>::notify_listener_acquire_release_images(
+ const Updates &acquire, const Updates &release) {
+ if (acquire.empty() && release.empty()) {
+ return;
+ }
+
+ dout(5) << "acquire=[" << acquire << "], "
+ << "release=[" << release << "]" << dendl;
+
+ for (auto const &update : acquire) {
+ m_listener.acquire_image(
+ update.global_image_id, update.instance_id,
+ create_async_context_callback(
+ m_threads->work_queue,
+ new C_NotifyInstance(this, update.global_image_id, true)));
+ }
+
+ for (auto const &update : release) {
+ m_listener.release_image(
+ update.global_image_id, update.instance_id,
+ create_async_context_callback(
+ m_threads->work_queue,
+ new C_NotifyInstance(this, update.global_image_id, true)));
+ }
+}
+
+template <typename I>
+void ImageMap<I>::notify_listener_remove_images(const std::string &peer_uuid,
+ const Updates &remove) {
+ dout(5) << "peer_uuid=" << peer_uuid << ", "
+ << "remove=[" << remove << "]" << dendl;
+
+ for (auto const &update : remove) {
+ m_listener.remove_image(
+ peer_uuid, update.global_image_id, update.instance_id,
+ create_async_context_callback(
+ m_threads->work_queue,
+ new C_NotifyInstance(this, update.global_image_id, false)));
+ }
+}
+
+template <typename I>
+void ImageMap<I>::handle_load(const std::map<std::string,
+ cls::rbd::MirrorImageMap> &image_mapping) {
+ dout(20) << dendl;
+
+ {
+ std::lock_guard locker{m_lock};
+ m_policy->init(image_mapping);
+
+ for (auto& pair : image_mapping) {
+ schedule_action(pair.first);
+ }
+ }
+ schedule_update_task();
+}
+
+template <typename I>
+void ImageMap<I>::handle_peer_ack_remove(const std::string &global_image_id,
+ int r) {
+ std::lock_guard locker{m_lock};
+ dout(5) << "global_image_id=" << global_image_id << dendl;
+
+ if (r < 0) {
+ derr << "failed to remove global_image_id=" << global_image_id << dendl;
+ }
+
+ auto peer_it = m_peer_map.find(global_image_id);
+ if (peer_it == m_peer_map.end()) {
+ return;
+ }
+
+ m_peer_map.erase(peer_it);
+}
+
+template <typename I>
+void ImageMap<I>::update_images_added(
+ const std::string &peer_uuid,
+ const std::set<std::string> &global_image_ids) {
+ dout(5) << "peer_uuid=" << peer_uuid << ", "
+ << "global_image_ids=[" << global_image_ids << "]" << dendl;
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ for (auto const &global_image_id : global_image_ids) {
+ auto result = m_peer_map[global_image_id].insert(peer_uuid);
+ if (result.second && m_peer_map[global_image_id].size() == 1) {
+ if (m_policy->add_image(global_image_id)) {
+ schedule_action(global_image_id);
+ }
+ }
+ }
+}
+
+template <typename I>
+void ImageMap<I>::update_images_removed(
+ const std::string &peer_uuid,
+ const std::set<std::string> &global_image_ids) {
+ dout(5) << "peer_uuid=" << peer_uuid << ", "
+ << "global_image_ids=[" << global_image_ids << "]" << dendl;
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ Updates to_remove;
+ for (auto const &global_image_id : global_image_ids) {
+ image_map::LookupInfo info = m_policy->lookup(global_image_id);
+ bool image_mapped = (info.instance_id != image_map::UNMAPPED_INSTANCE_ID);
+
+ bool image_removed = image_mapped;
+ bool peer_removed = false;
+ auto peer_it = m_peer_map.find(global_image_id);
+ if (peer_it != m_peer_map.end()) {
+ auto& peer_set = peer_it->second;
+ peer_removed = peer_set.erase(peer_uuid);
+ image_removed = peer_removed && peer_set.empty();
+ }
+
+ if (image_mapped && peer_removed && !peer_uuid.empty()) {
+ // peer image has been deleted
+ to_remove.emplace_back(global_image_id, info.instance_id);
+ }
+
+ if (image_removed) {
+ // local and peer images have been deleted
+ if (m_policy->remove_image(global_image_id)) {
+ schedule_action(global_image_id);
+ }
+ }
+ }
+
+ if (!to_remove.empty()) {
+ // removal notification will be notified instantly. this is safe
+ // even after scheduling action for images as we still hold m_lock
+ notify_listener_remove_images(peer_uuid, to_remove);
+ }
+}
+
+template <typename I>
+void ImageMap<I>::update_instances_added(
+ const std::vector<std::string> &instance_ids) {
+ {
+ std::lock_guard locker{m_lock};
+ if (m_shutting_down) {
+ return;
+ }
+
+ std::vector<std::string> filtered_instance_ids;
+ filter_instance_ids(instance_ids, &filtered_instance_ids, false);
+ if (filtered_instance_ids.empty()) {
+ return;
+ }
+
+ dout(20) << "instance_ids=" << filtered_instance_ids << dendl;
+
+ std::set<std::string> remap_global_image_ids;
+ m_policy->add_instances(filtered_instance_ids, &remap_global_image_ids);
+
+ for (auto const &global_image_id : remap_global_image_ids) {
+ schedule_action(global_image_id);
+ }
+ }
+
+ schedule_update_task();
+}
+
+template <typename I>
+void ImageMap<I>::update_instances_removed(
+ const std::vector<std::string> &instance_ids) {
+ {
+ std::lock_guard locker{m_lock};
+ if (m_shutting_down) {
+ return;
+ }
+
+ std::vector<std::string> filtered_instance_ids;
+ filter_instance_ids(instance_ids, &filtered_instance_ids, true);
+ if (filtered_instance_ids.empty()) {
+ return;
+ }
+
+ dout(20) << "instance_ids=" << filtered_instance_ids << dendl;
+
+ std::set<std::string> remap_global_image_ids;
+ m_policy->remove_instances(filtered_instance_ids, &remap_global_image_ids);
+
+ for (auto const &global_image_id : remap_global_image_ids) {
+ schedule_action(global_image_id);
+ }
+ }
+
+ schedule_update_task();
+}
+
+template <typename I>
+void ImageMap<I>::update_images(const std::string &peer_uuid,
+ std::set<std::string> &&added_global_image_ids,
+ std::set<std::string> &&removed_global_image_ids) {
+ dout(5) << "peer_uuid=" << peer_uuid << ", " << "added_count="
+ << added_global_image_ids.size() << ", " << "removed_count="
+ << removed_global_image_ids.size() << dendl;
+
+ {
+ std::lock_guard locker{m_lock};
+ if (m_shutting_down) {
+ return;
+ }
+
+ if (!removed_global_image_ids.empty()) {
+ update_images_removed(peer_uuid, removed_global_image_ids);
+ }
+ if (!added_global_image_ids.empty()) {
+ update_images_added(peer_uuid, added_global_image_ids);
+ }
+ }
+
+ schedule_update_task();
+}
+
+template <typename I>
+void ImageMap<I>::handle_peer_ack(const std::string &global_image_id, int r) {
+ dout (20) << "global_image_id=" << global_image_id << ", r=" << r
+ << dendl;
+
+ continue_action({global_image_id}, r);
+}
+
+template <typename I>
+void ImageMap<I>::init(Context *on_finish) {
+ dout(20) << dendl;
+
+ CephContext *cct = reinterpret_cast<CephContext *>(m_ioctx.cct());
+ std::string policy_type = cct->_conf.get_val<string>("rbd_mirror_image_policy_type");
+
+ if (policy_type == "none" || policy_type == "simple") {
+ m_policy.reset(image_map::SimplePolicy::create(m_ioctx));
+ } else {
+ ceph_abort(); // not really needed as such, but catch it.
+ }
+
+ dout(20) << "mapping policy=" << policy_type << dendl;
+
+ start_async_op();
+ C_LoadMap *ctx = new C_LoadMap(this, on_finish);
+ image_map::LoadRequest<I> *req = image_map::LoadRequest<I>::create(
+ m_ioctx, &ctx->image_mapping, ctx);
+ req->send();
+}
+
+template <typename I>
+void ImageMap<I>::shut_down(Context *on_finish) {
+ dout(20) << dendl;
+
+ {
+ std::lock_guard timer_lock{m_threads->timer_lock};
+
+ {
+ std::lock_guard locker{m_lock};
+ ceph_assert(!m_shutting_down);
+
+ m_shutting_down = true;
+ m_policy.reset();
+ }
+
+ if (m_timer_task != nullptr) {
+ m_threads->timer->cancel_event(m_timer_task);
+ m_timer_task = nullptr;
+ }
+ if (m_rebalance_task != nullptr) {
+ m_threads->timer->cancel_event(m_rebalance_task);
+ m_rebalance_task = nullptr;
+ }
+ }
+
+ wait_for_async_ops(on_finish);
+}
+
+template <typename I>
+void ImageMap<I>::filter_instance_ids(
+ const std::vector<std::string> &instance_ids,
+ std::vector<std::string> *filtered_instance_ids, bool removal) const {
+ CephContext *cct = reinterpret_cast<CephContext *>(m_ioctx.cct());
+ std::string policy_type = cct->_conf.get_val<string>("rbd_mirror_image_policy_type");
+
+ if (policy_type != "none") {
+ *filtered_instance_ids = instance_ids;
+ return;
+ }
+
+ if (removal) {
+ // propagate removals for external instances
+ for (auto& instance_id : instance_ids) {
+ if (instance_id != m_instance_id) {
+ filtered_instance_ids->push_back(instance_id);
+ }
+ }
+ } else if (std::find(instance_ids.begin(), instance_ids.end(),
+ m_instance_id) != instance_ids.end()) {
+ // propagate addition only for local instance
+ filtered_instance_ids->push_back(m_instance_id);
+ }
+}
+
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::ImageMap<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/ImageMap.h b/src/tools/rbd_mirror/ImageMap.h
new file mode 100644
index 000000000..9dd61ee0d
--- /dev/null
+++ b/src/tools/rbd_mirror/ImageMap.h
@@ -0,0 +1,175 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_MAP_H
+#define CEPH_RBD_MIRROR_IMAGE_MAP_H
+
+#include <vector>
+
+#include "common/ceph_mutex.h"
+#include "include/Context.h"
+#include "common/AsyncOpTracker.h"
+#include "cls/rbd/cls_rbd_types.h"
+#include "include/rados/librados.hpp"
+
+#include "image_map/Policy.h"
+#include "image_map/Types.h"
+
+namespace librbd { class ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+
+template <typename> struct Threads;
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class ImageMap {
+public:
+ static ImageMap *create(librados::IoCtx &ioctx, Threads<ImageCtxT> *threads,
+ const std::string& instance_id,
+ image_map::Listener &listener) {
+ return new ImageMap(ioctx, threads, instance_id, listener);
+ }
+
+ ~ImageMap();
+
+ // init (load) the instance map from disk
+ void init(Context *on_finish);
+
+ // shut down map operations
+ void shut_down(Context *on_finish);
+
+ // update (add/remove) images
+ void update_images(const std::string &peer_uuid,
+ std::set<std::string> &&added_global_image_ids,
+ std::set<std::string> &&removed_global_image_ids);
+
+ // add/remove instances
+ void update_instances_added(const std::vector<std::string> &instances);
+ void update_instances_removed(const std::vector<std::string> &instances);
+
+private:
+ struct C_NotifyInstance;
+
+ ImageMap(librados::IoCtx &ioctx, Threads<ImageCtxT> *threads,
+ const std::string& instance_id, image_map::Listener &listener);
+
+ struct Update {
+ std::string global_image_id;
+ std::string instance_id;
+ utime_t mapped_time;
+
+ Update(const std::string &global_image_id, const std::string &instance_id,
+ utime_t mapped_time)
+ : global_image_id(global_image_id),
+ instance_id(instance_id),
+ mapped_time(mapped_time) {
+ }
+ Update(const std::string &global_image_id, const std::string &instance_id)
+ : Update(global_image_id, instance_id, ceph_clock_now()) {
+ }
+
+ friend std::ostream& operator<<(std::ostream& os,
+ const Update& update) {
+ os << "{global_image_id=" << update.global_image_id << ", "
+ << "instance_id=" << update.instance_id << "}";
+ return os;
+ }
+
+ };
+ typedef std::list<Update> Updates;
+
+ // Lock ordering: m_threads->timer_lock, m_lock
+
+ librados::IoCtx &m_ioctx;
+ Threads<ImageCtxT> *m_threads;
+ std::string m_instance_id;
+ image_map::Listener &m_listener;
+
+ std::unique_ptr<image_map::Policy> m_policy; // our mapping policy
+
+ Context *m_timer_task = nullptr;
+ ceph::mutex m_lock;
+ bool m_shutting_down = false;
+ AsyncOpTracker m_async_op_tracker;
+
+ // global_image_id -> registered peers ("" == local, remote otherwise)
+ std::map<std::string, std::set<std::string> > m_peer_map;
+
+ std::set<std::string> m_global_image_ids;
+
+ Context *m_rebalance_task = nullptr;
+
+ struct C_LoadMap : Context {
+ ImageMap *image_map;
+ Context *on_finish;
+
+ std::map<std::string, cls::rbd::MirrorImageMap> image_mapping;
+
+ C_LoadMap(ImageMap *image_map, Context *on_finish)
+ : image_map(image_map),
+ on_finish(on_finish) {
+ }
+
+ void finish(int r) override {
+ if (r == 0) {
+ image_map->handle_load(image_mapping);
+ }
+
+ image_map->finish_async_op();
+ on_finish->complete(r);
+ }
+ };
+
+ // async op-tracker helper routines
+ void start_async_op() {
+ m_async_op_tracker.start_op();
+ }
+ void finish_async_op() {
+ m_async_op_tracker.finish_op();
+ }
+ void wait_for_async_ops(Context *on_finish) {
+ m_async_op_tracker.wait_for_ops(on_finish);
+ }
+
+ void handle_peer_ack(const std::string &global_image_id, int r);
+ void handle_peer_ack_remove(const std::string &global_image_id, int r);
+
+ void handle_load(const std::map<std::string, cls::rbd::MirrorImageMap> &image_mapping);
+ void handle_update_request(const Updates &updates,
+ const std::set<std::string> &remove_global_image_ids, int r);
+
+ // continue (retry or resume depending on state machine) processing
+ // current action.
+ void continue_action(const std::set<std::string> &global_image_ids, int r);
+
+ // schedule an image for update
+ void schedule_action(const std::string &global_image_id);
+
+ void schedule_update_task();
+ void schedule_update_task(const ceph::mutex &timer_lock);
+ void process_updates();
+ void update_image_mapping(Updates&& map_updates,
+ std::set<std::string>&& map_removals);
+
+ void rebalance();
+ void schedule_rebalance_task();
+
+ void notify_listener_acquire_release_images(const Updates &acquire, const Updates &release);
+ void notify_listener_remove_images(const std::string &peer_uuid, const Updates &remove);
+
+ void update_images_added(const std::string &peer_uuid,
+ const std::set<std::string> &global_image_ids);
+ void update_images_removed(const std::string &peer_uuid,
+ const std::set<std::string> &global_image_ids);
+
+ void filter_instance_ids(const std::vector<std::string> &instance_ids,
+ std::vector<std::string> *filtered_instance_ids,
+ bool removal) const;
+
+};
+
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_IMAGE_MAP_H
diff --git a/src/tools/rbd_mirror/ImageReplayer.cc b/src/tools/rbd_mirror/ImageReplayer.cc
new file mode 100644
index 000000000..1e88c3262
--- /dev/null
+++ b/src/tools/rbd_mirror/ImageReplayer.cc
@@ -0,0 +1,1201 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "include/compat.h"
+#include "common/Formatter.h"
+#include "common/admin_socket.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "include/stringify.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "common/Timer.h"
+#include "global/global_context.h"
+#include "journal/Journaler.h"
+#include "librbd/ExclusiveLock.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/Journal.h"
+#include "librbd/Operations.h"
+#include "librbd/Utils.h"
+#include "librbd/asio/ContextWQ.h"
+#include "ImageDeleter.h"
+#include "ImageReplayer.h"
+#include "MirrorStatusUpdater.h"
+#include "Threads.h"
+#include "tools/rbd_mirror/image_replayer/BootstrapRequest.h"
+#include "tools/rbd_mirror/image_replayer/ReplayerListener.h"
+#include "tools/rbd_mirror/image_replayer/StateBuilder.h"
+#include "tools/rbd_mirror/image_replayer/Utils.h"
+#include "tools/rbd_mirror/image_replayer/journal/Replayer.h"
+#include "tools/rbd_mirror/image_replayer/journal/StateBuilder.h"
+#include <map>
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::" << *this << " " \
+ << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+
+using librbd::util::create_context_callback;
+
+template <typename I>
+std::ostream &operator<<(std::ostream &os,
+ const typename ImageReplayer<I>::State &state);
+
+namespace {
+
+template <typename I>
+class ImageReplayerAdminSocketCommand {
+public:
+ ImageReplayerAdminSocketCommand(const std::string &desc,
+ ImageReplayer<I> *replayer)
+ : desc(desc), replayer(replayer) {
+ }
+ virtual ~ImageReplayerAdminSocketCommand() {}
+ virtual int call(Formatter *f) = 0;
+
+ std::string desc;
+ ImageReplayer<I> *replayer;
+ bool registered = false;
+};
+
+template <typename I>
+class StatusCommand : public ImageReplayerAdminSocketCommand<I> {
+public:
+ explicit StatusCommand(const std::string &desc, ImageReplayer<I> *replayer)
+ : ImageReplayerAdminSocketCommand<I>(desc, replayer) {
+ }
+
+ int call(Formatter *f) override {
+ this->replayer->print_status(f);
+ return 0;
+ }
+};
+
+template <typename I>
+class StartCommand : public ImageReplayerAdminSocketCommand<I> {
+public:
+ explicit StartCommand(const std::string &desc, ImageReplayer<I> *replayer)
+ : ImageReplayerAdminSocketCommand<I>(desc, replayer) {
+ }
+
+ int call(Formatter *f) override {
+ this->replayer->start(nullptr, true);
+ return 0;
+ }
+};
+
+template <typename I>
+class StopCommand : public ImageReplayerAdminSocketCommand<I> {
+public:
+ explicit StopCommand(const std::string &desc, ImageReplayer<I> *replayer)
+ : ImageReplayerAdminSocketCommand<I>(desc, replayer) {
+ }
+
+ int call(Formatter *f) override {
+ this->replayer->stop(nullptr, true);
+ return 0;
+ }
+};
+
+template <typename I>
+class RestartCommand : public ImageReplayerAdminSocketCommand<I> {
+public:
+ explicit RestartCommand(const std::string &desc, ImageReplayer<I> *replayer)
+ : ImageReplayerAdminSocketCommand<I>(desc, replayer) {
+ }
+
+ int call(Formatter *f) override {
+ this->replayer->restart();
+ return 0;
+ }
+};
+
+template <typename I>
+class FlushCommand : public ImageReplayerAdminSocketCommand<I> {
+public:
+ explicit FlushCommand(const std::string &desc, ImageReplayer<I> *replayer)
+ : ImageReplayerAdminSocketCommand<I>(desc, replayer) {
+ }
+
+ int call(Formatter *f) override {
+ this->replayer->flush();
+ return 0;
+ }
+};
+
+template <typename I>
+class ImageReplayerAdminSocketHook : public AdminSocketHook {
+public:
+ ImageReplayerAdminSocketHook(CephContext *cct, const std::string &name,
+ ImageReplayer<I> *replayer)
+ : admin_socket(cct->get_admin_socket()),
+ commands{{"rbd mirror flush " + name,
+ new FlushCommand<I>("flush rbd mirror " + name, replayer)},
+ {"rbd mirror restart " + name,
+ new RestartCommand<I>("restart rbd mirror " + name, replayer)},
+ {"rbd mirror start " + name,
+ new StartCommand<I>("start rbd mirror " + name, replayer)},
+ {"rbd mirror status " + name,
+ new StatusCommand<I>("get status for rbd mirror " + name, replayer)},
+ {"rbd mirror stop " + name,
+ new StopCommand<I>("stop rbd mirror " + name, replayer)}} {
+ }
+
+ int register_commands() {
+ for (auto &it : commands) {
+ int r = admin_socket->register_command(it.first, this,
+ it.second->desc);
+ if (r < 0) {
+ return r;
+ }
+ it.second->registered = true;
+ }
+ return 0;
+ }
+
+ ~ImageReplayerAdminSocketHook() override {
+ admin_socket->unregister_commands(this);
+ for (auto &it : commands) {
+ delete it.second;
+ }
+ commands.clear();
+ }
+
+ int call(std::string_view command, const cmdmap_t& cmdmap,
+ const bufferlist&,
+ Formatter *f,
+ std::ostream& errss,
+ bufferlist& out) override {
+ auto i = commands.find(command);
+ ceph_assert(i != commands.end());
+ return i->second->call(f);
+ }
+
+private:
+ typedef std::map<std::string, ImageReplayerAdminSocketCommand<I>*,
+ std::less<>> Commands;
+
+ AdminSocket *admin_socket;
+ Commands commands;
+};
+
+} // anonymous namespace
+
+template <typename I>
+void ImageReplayer<I>::BootstrapProgressContext::update_progress(
+ const std::string &description, bool flush)
+{
+ const std::string desc = "bootstrapping, " + description;
+ replayer->set_state_description(0, desc);
+ if (flush) {
+ replayer->update_mirror_image_status(false, boost::none);
+ }
+}
+
+template <typename I>
+struct ImageReplayer<I>::ReplayerListener
+ : public image_replayer::ReplayerListener {
+ ImageReplayer<I>* image_replayer;
+
+ ReplayerListener(ImageReplayer<I>* image_replayer)
+ : image_replayer(image_replayer) {
+ }
+
+ void handle_notification() override {
+ image_replayer->handle_replayer_notification();
+ }
+};
+
+template <typename I>
+ImageReplayer<I>::ImageReplayer(
+ librados::IoCtx &local_io_ctx, const std::string &local_mirror_uuid,
+ const std::string &global_image_id, Threads<I> *threads,
+ InstanceWatcher<I> *instance_watcher,
+ MirrorStatusUpdater<I>* local_status_updater,
+ journal::CacheManagerHandler *cache_manager_handler,
+ PoolMetaCache* pool_meta_cache) :
+ m_local_io_ctx(local_io_ctx), m_local_mirror_uuid(local_mirror_uuid),
+ m_global_image_id(global_image_id), m_threads(threads),
+ m_instance_watcher(instance_watcher),
+ m_local_status_updater(local_status_updater),
+ m_cache_manager_handler(cache_manager_handler),
+ m_pool_meta_cache(pool_meta_cache),
+ m_local_image_name(global_image_id),
+ m_lock(ceph::make_mutex("rbd::mirror::ImageReplayer " +
+ stringify(local_io_ctx.get_id()) + " " + global_image_id)),
+ m_progress_cxt(this),
+ m_replayer_listener(new ReplayerListener(this))
+{
+ // Register asok commands using a temporary "remote_pool_name/global_image_id"
+ // name. When the image name becomes known on start the asok commands will be
+ // re-registered using "remote_pool_name/remote_image_name" name.
+
+ m_image_spec = image_replayer::util::compute_image_spec(
+ local_io_ctx, global_image_id);
+ register_admin_socket_hook();
+}
+
+template <typename I>
+ImageReplayer<I>::~ImageReplayer()
+{
+ unregister_admin_socket_hook();
+ ceph_assert(m_state_builder == nullptr);
+ ceph_assert(m_on_start_finish == nullptr);
+ ceph_assert(m_on_stop_contexts.empty());
+ ceph_assert(m_bootstrap_request == nullptr);
+ ceph_assert(m_update_status_task == nullptr);
+ delete m_replayer_listener;
+}
+
+template <typename I>
+image_replayer::HealthState ImageReplayer<I>::get_health_state() const {
+ std::lock_guard locker{m_lock};
+
+ if (!m_mirror_image_status_state) {
+ return image_replayer::HEALTH_STATE_OK;
+ } else if (*m_mirror_image_status_state ==
+ cls::rbd::MIRROR_IMAGE_STATUS_STATE_SYNCING ||
+ *m_mirror_image_status_state ==
+ cls::rbd::MIRROR_IMAGE_STATUS_STATE_UNKNOWN) {
+ return image_replayer::HEALTH_STATE_WARNING;
+ }
+ return image_replayer::HEALTH_STATE_ERROR;
+}
+
+template <typename I>
+void ImageReplayer<I>::add_peer(const Peer<I>& peer) {
+ dout(10) << "peer=" << peer << dendl;
+
+ std::lock_guard locker{m_lock};
+ auto it = m_peers.find(peer);
+ if (it == m_peers.end()) {
+ m_peers.insert(peer);
+ }
+}
+
+template <typename I>
+void ImageReplayer<I>::set_state_description(int r, const std::string &desc) {
+ dout(10) << "r=" << r << ", desc=" << desc << dendl;
+
+ std::lock_guard l{m_lock};
+ m_last_r = r;
+ m_state_desc = desc;
+}
+
+template <typename I>
+void ImageReplayer<I>::start(Context *on_finish, bool manual, bool restart)
+{
+ dout(10) << "on_finish=" << on_finish << dendl;
+
+ int r = 0;
+ {
+ std::lock_guard locker{m_lock};
+ if (!is_stopped_()) {
+ derr << "already running" << dendl;
+ r = -EINVAL;
+ } else if (m_manual_stop && !manual) {
+ dout(5) << "stopped manually, ignoring start without manual flag"
+ << dendl;
+ r = -EPERM;
+ } else if (restart && !m_restart_requested) {
+ dout(10) << "canceled restart" << dendl;
+ r = -ECANCELED;
+ } else {
+ m_state = STATE_STARTING;
+ m_last_r = 0;
+ m_state_desc.clear();
+ m_manual_stop = false;
+ m_delete_requested = false;
+ m_restart_requested = false;
+ m_status_removed = false;
+
+ if (on_finish != nullptr) {
+ ceph_assert(m_on_start_finish == nullptr);
+ m_on_start_finish = on_finish;
+ }
+ ceph_assert(m_on_stop_contexts.empty());
+ }
+ }
+
+ if (r < 0) {
+ if (on_finish) {
+ on_finish->complete(r);
+ }
+ return;
+ }
+
+ bootstrap();
+}
+
+template <typename I>
+void ImageReplayer<I>::bootstrap() {
+ dout(10) << dendl;
+
+ std::unique_lock locker{m_lock};
+ if (m_peers.empty()) {
+ locker.unlock();
+
+ dout(5) << "no peer clusters" << dendl;
+ on_start_fail(-ENOENT, "no peer clusters");
+ return;
+ }
+
+ // TODO need to support multiple remote images
+ ceph_assert(!m_peers.empty());
+ m_remote_image_peer = *m_peers.begin();
+
+ ceph_assert(m_state_builder == nullptr);
+ auto ctx = create_context_callback<
+ ImageReplayer, &ImageReplayer<I>::handle_bootstrap>(this);
+ auto request = image_replayer::BootstrapRequest<I>::create(
+ m_threads, m_local_io_ctx, m_remote_image_peer.io_ctx, m_instance_watcher,
+ m_global_image_id, m_local_mirror_uuid,
+ m_remote_image_peer.remote_pool_meta, m_cache_manager_handler,
+ m_pool_meta_cache, &m_progress_cxt, &m_state_builder, &m_resync_requested,
+ ctx);
+
+ request->get();
+ m_bootstrap_request = request;
+
+ // proceed even if stop was requested to allow for m_delete_requested
+ // to get set; cancel() would prevent BootstrapRequest from going into
+ // image sync
+ if (m_stop_requested) {
+ request->cancel();
+ }
+ locker.unlock();
+
+ update_mirror_image_status(false, boost::none);
+ request->send();
+}
+
+template <typename I>
+void ImageReplayer<I>::handle_bootstrap(int r) {
+ dout(10) << "r=" << r << dendl;
+ {
+ std::lock_guard locker{m_lock};
+ m_bootstrap_request->put();
+ m_bootstrap_request = nullptr;
+ }
+
+ // set m_delete_requested early to ensure that in case remote
+ // image no longer exists local image gets deleted even if start
+ // is interrupted
+ if (r == -ENOLINK) {
+ dout(5) << "remote image no longer exists" << dendl;
+ m_delete_requested = true;
+ }
+
+ if (on_start_interrupted()) {
+ return;
+ } else if (r == -ENOMSG) {
+ dout(5) << "local image is primary" << dendl;
+ on_start_fail(0, "local image is primary");
+ return;
+ } else if (r == -EREMOTEIO) {
+ dout(5) << "remote image is not primary" << dendl;
+ on_start_fail(-EREMOTEIO, "remote image is not primary");
+ return;
+ } else if (r == -EEXIST) {
+ on_start_fail(r, "split-brain detected");
+ return;
+ } else if (r == -ENOLINK) {
+ on_start_fail(0, "remote image no longer exists");
+ return;
+ } else if (r == -ERESTART) {
+ on_start_fail(r, "image in transient state, try again");
+ return;
+ } else if (r < 0) {
+ on_start_fail(r, "error bootstrapping replay");
+ return;
+ } else if (m_resync_requested) {
+ on_start_fail(0, "resync requested");
+ return;
+ }
+
+ start_replay();
+}
+
+template <typename I>
+void ImageReplayer<I>::start_replay() {
+ dout(10) << dendl;
+
+ std::unique_lock locker{m_lock};
+ ceph_assert(m_replayer == nullptr);
+ m_replayer = m_state_builder->create_replayer(m_threads, m_instance_watcher,
+ m_local_mirror_uuid,
+ m_pool_meta_cache,
+ m_replayer_listener);
+
+ auto ctx = create_context_callback<
+ ImageReplayer<I>, &ImageReplayer<I>::handle_start_replay>(this);
+ m_replayer->init(ctx);
+}
+
+template <typename I>
+void ImageReplayer<I>::handle_start_replay(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (on_start_interrupted()) {
+ return;
+ } else if (r < 0) {
+ std::string error_description = m_replayer->get_error_description();
+ if (r == -ENOTCONN && m_replayer->is_resync_requested()) {
+ std::unique_lock locker{m_lock};
+ m_resync_requested = true;
+ }
+
+ // shut down not required if init failed
+ m_replayer->destroy();
+ m_replayer = nullptr;
+
+ derr << "error starting replay: " << cpp_strerror(r) << dendl;
+ on_start_fail(r, error_description);
+ return;
+ }
+
+ Context *on_finish = nullptr;
+ {
+ std::unique_lock locker{m_lock};
+ ceph_assert(m_state == STATE_STARTING);
+ m_state = STATE_REPLAYING;
+ std::swap(m_on_start_finish, on_finish);
+
+ std::unique_lock timer_locker{m_threads->timer_lock};
+ schedule_update_mirror_image_replay_status();
+ }
+
+ update_mirror_image_status(true, boost::none);
+ if (on_replay_interrupted()) {
+ if (on_finish != nullptr) {
+ on_finish->complete(r);
+ }
+ return;
+ }
+
+ dout(10) << "start succeeded" << dendl;
+ if (on_finish != nullptr) {
+ dout(10) << "on finish complete, r=" << r << dendl;
+ on_finish->complete(r);
+ }
+}
+
+template <typename I>
+void ImageReplayer<I>::on_start_fail(int r, const std::string &desc)
+{
+ dout(10) << "r=" << r << ", desc=" << desc << dendl;
+ Context *ctx = new LambdaContext([this, r, desc](int _r) {
+ {
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_state == STATE_STARTING);
+ m_state = STATE_STOPPING;
+ if (r < 0 && r != -ECANCELED && r != -EREMOTEIO && r != -ENOENT) {
+ derr << "start failed: " << cpp_strerror(r) << dendl;
+ } else {
+ dout(10) << "start canceled" << dendl;
+ }
+ }
+
+ set_state_description(r, desc);
+ update_mirror_image_status(false, boost::none);
+ shut_down(r);
+ });
+ m_threads->work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+bool ImageReplayer<I>::on_start_interrupted() {
+ std::lock_guard locker{m_lock};
+ return on_start_interrupted(m_lock);
+}
+
+template <typename I>
+bool ImageReplayer<I>::on_start_interrupted(ceph::mutex& lock) {
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+ ceph_assert(m_state == STATE_STARTING);
+ if (!m_stop_requested) {
+ return false;
+ }
+
+ on_start_fail(-ECANCELED, "");
+ return true;
+}
+
+template <typename I>
+void ImageReplayer<I>::stop(Context *on_finish, bool manual, bool restart)
+{
+ dout(10) << "on_finish=" << on_finish << ", manual=" << manual
+ << ", restart=" << restart << dendl;
+
+ image_replayer::BootstrapRequest<I> *bootstrap_request = nullptr;
+ bool shut_down_replay = false;
+ bool is_stopped = false;
+ {
+ std::lock_guard locker{m_lock};
+
+ if (!is_running_()) {
+ if (manual && !m_manual_stop) {
+ dout(10) << "marking manual" << dendl;
+ m_manual_stop = true;
+ }
+ if (!restart && m_restart_requested) {
+ dout(10) << "canceling restart" << dendl;
+ m_restart_requested = false;
+ }
+ if (is_stopped_()) {
+ dout(10) << "already stopped" << dendl;
+ is_stopped = true;
+ } else {
+ dout(10) << "joining in-flight stop" << dendl;
+ if (on_finish != nullptr) {
+ m_on_stop_contexts.push_back(on_finish);
+ }
+ }
+ } else {
+ if (m_state == STATE_STARTING) {
+ dout(10) << "canceling start" << dendl;
+ if (m_bootstrap_request != nullptr) {
+ bootstrap_request = m_bootstrap_request;
+ bootstrap_request->get();
+ }
+ } else {
+ dout(10) << "interrupting replay" << dendl;
+ shut_down_replay = true;
+ }
+
+ ceph_assert(m_on_stop_contexts.empty());
+ if (on_finish != nullptr) {
+ m_on_stop_contexts.push_back(on_finish);
+ }
+ m_stop_requested = true;
+ m_manual_stop = manual;
+ }
+ }
+
+ if (is_stopped) {
+ if (on_finish) {
+ on_finish->complete(-EINVAL);
+ }
+ return;
+ }
+
+ // avoid holding lock since bootstrap request will update status
+ if (bootstrap_request != nullptr) {
+ dout(10) << "canceling bootstrap" << dendl;
+ bootstrap_request->cancel();
+ bootstrap_request->put();
+ }
+
+ if (shut_down_replay) {
+ on_stop_journal_replay();
+ }
+}
+
+template <typename I>
+void ImageReplayer<I>::on_stop_journal_replay(int r, const std::string &desc)
+{
+ dout(10) << dendl;
+
+ {
+ std::lock_guard locker{m_lock};
+ if (m_state != STATE_REPLAYING) {
+ // might be invoked multiple times while stopping
+ return;
+ }
+
+ m_stop_requested = true;
+ m_state = STATE_STOPPING;
+ }
+
+ cancel_update_mirror_image_replay_status();
+ set_state_description(r, desc);
+ update_mirror_image_status(true, boost::none);
+ shut_down(0);
+}
+
+template <typename I>
+void ImageReplayer<I>::restart(Context *on_finish)
+{
+ {
+ std::lock_guard locker{m_lock};
+ m_restart_requested = true;
+ }
+
+ auto ctx = new LambdaContext(
+ [this, on_finish](int r) {
+ if (r < 0) {
+ // Try start anyway.
+ }
+ start(on_finish, true, true);
+ });
+ stop(ctx, false, true);
+}
+
+template <typename I>
+void ImageReplayer<I>::flush()
+{
+ C_SaferCond ctx;
+
+ {
+ std::unique_lock locker{m_lock};
+ if (m_state != STATE_REPLAYING) {
+ return;
+ }
+
+ dout(10) << dendl;
+ ceph_assert(m_replayer != nullptr);
+ m_replayer->flush(&ctx);
+ }
+
+ int r = ctx.wait();
+ if (r >= 0) {
+ update_mirror_image_status(false, boost::none);
+ }
+}
+
+template <typename I>
+bool ImageReplayer<I>::on_replay_interrupted()
+{
+ bool shut_down;
+ {
+ std::lock_guard locker{m_lock};
+ shut_down = m_stop_requested;
+ }
+
+ if (shut_down) {
+ on_stop_journal_replay();
+ }
+ return shut_down;
+}
+
+template <typename I>
+void ImageReplayer<I>::print_status(Formatter *f)
+{
+ dout(10) << dendl;
+
+ std::lock_guard l{m_lock};
+
+ f->open_object_section("image_replayer");
+ f->dump_string("name", m_image_spec);
+ f->dump_string("state", to_string(m_state));
+ f->close_section();
+}
+
+template <typename I>
+void ImageReplayer<I>::schedule_update_mirror_image_replay_status() {
+ ceph_assert(ceph_mutex_is_locked_by_me(m_lock));
+ ceph_assert(ceph_mutex_is_locked_by_me(m_threads->timer_lock));
+ if (m_state != STATE_REPLAYING) {
+ return;
+ }
+
+ dout(10) << dendl;
+
+ // periodically update the replaying status even if nothing changes
+ // so that we can adjust our performance stats
+ ceph_assert(m_update_status_task == nullptr);
+ m_update_status_task = create_context_callback<
+ ImageReplayer<I>,
+ &ImageReplayer<I>::handle_update_mirror_image_replay_status>(this);
+ m_threads->timer->add_event_after(10, m_update_status_task);
+}
+
+template <typename I>
+void ImageReplayer<I>::handle_update_mirror_image_replay_status(int r) {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked_by_me(m_threads->timer_lock));
+
+ ceph_assert(m_update_status_task != nullptr);
+ m_update_status_task = nullptr;
+
+ auto ctx = new LambdaContext([this](int) {
+ update_mirror_image_status(false, boost::none);
+
+ std::unique_lock locker{m_lock};
+ std::unique_lock timer_locker{m_threads->timer_lock};
+
+ schedule_update_mirror_image_replay_status();
+ m_in_flight_op_tracker.finish_op();
+ });
+
+ m_in_flight_op_tracker.start_op();
+ m_threads->work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+void ImageReplayer<I>::cancel_update_mirror_image_replay_status() {
+ std::unique_lock timer_locker{m_threads->timer_lock};
+ if (m_update_status_task != nullptr) {
+ dout(10) << dendl;
+
+ if (m_threads->timer->cancel_event(m_update_status_task)) {
+ m_update_status_task = nullptr;
+ }
+ }
+}
+
+template <typename I>
+void ImageReplayer<I>::update_mirror_image_status(
+ bool force, const OptionalState &opt_state) {
+ dout(15) << "force=" << force << ", "
+ << "state=" << opt_state << dendl;
+
+ {
+ std::lock_guard locker{m_lock};
+ if (!force && !is_stopped_() && !is_running_()) {
+ dout(15) << "shut down in-progress: ignoring update" << dendl;
+ return;
+ }
+ }
+
+ m_in_flight_op_tracker.start_op();
+ auto ctx = new LambdaContext(
+ [this, force, opt_state](int r) {
+ set_mirror_image_status_update(force, opt_state);
+ });
+ m_threads->work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+void ImageReplayer<I>::set_mirror_image_status_update(
+ bool force, const OptionalState &opt_state) {
+ dout(15) << "force=" << force << ", "
+ << "state=" << opt_state << dendl;
+
+ reregister_admin_socket_hook();
+
+ State state;
+ std::string state_desc;
+ int last_r;
+ bool stopping_replay;
+
+ auto mirror_image_status_state = boost::make_optional(
+ false, cls::rbd::MIRROR_IMAGE_STATUS_STATE_UNKNOWN);
+ image_replayer::BootstrapRequest<I>* bootstrap_request = nullptr;
+ {
+ std::lock_guard locker{m_lock};
+ state = m_state;
+ state_desc = m_state_desc;
+ mirror_image_status_state = m_mirror_image_status_state;
+ last_r = m_last_r;
+ stopping_replay = (m_replayer != nullptr);
+
+ if (m_bootstrap_request != nullptr) {
+ bootstrap_request = m_bootstrap_request;
+ bootstrap_request->get();
+ }
+ }
+
+ bool syncing = false;
+ if (bootstrap_request != nullptr) {
+ syncing = bootstrap_request->is_syncing();
+ bootstrap_request->put();
+ bootstrap_request = nullptr;
+ }
+
+ if (opt_state) {
+ state = *opt_state;
+ }
+
+ cls::rbd::MirrorImageSiteStatus status;
+ status.up = true;
+ switch (state) {
+ case STATE_STARTING:
+ if (syncing) {
+ status.state = cls::rbd::MIRROR_IMAGE_STATUS_STATE_SYNCING;
+ status.description = state_desc.empty() ? "syncing" : state_desc;
+ mirror_image_status_state = status.state;
+ } else {
+ status.state = cls::rbd::MIRROR_IMAGE_STATUS_STATE_STARTING_REPLAY;
+ status.description = "starting replay";
+ }
+ break;
+ case STATE_REPLAYING:
+ status.state = cls::rbd::MIRROR_IMAGE_STATUS_STATE_REPLAYING;
+ {
+ std::string desc;
+ auto on_req_finish = new LambdaContext(
+ [this, force](int r) {
+ dout(15) << "replay status ready: r=" << r << dendl;
+ if (r >= 0) {
+ set_mirror_image_status_update(force, boost::none);
+ } else if (r == -EAGAIN) {
+ m_in_flight_op_tracker.finish_op();
+ }
+ });
+
+ ceph_assert(m_replayer != nullptr);
+ if (!m_replayer->get_replay_status(&desc, on_req_finish)) {
+ dout(15) << "waiting for replay status" << dendl;
+ return;
+ }
+
+ status.description = "replaying, " + desc;
+ mirror_image_status_state = boost::make_optional(
+ false, cls::rbd::MIRROR_IMAGE_STATUS_STATE_UNKNOWN);
+ }
+ break;
+ case STATE_STOPPING:
+ if (stopping_replay) {
+ status.state = cls::rbd::MIRROR_IMAGE_STATUS_STATE_STOPPING_REPLAY;
+ status.description = state_desc.empty() ? "stopping replay" : state_desc;
+ break;
+ }
+ // FALLTHROUGH
+ case STATE_STOPPED:
+ if (last_r == -EREMOTEIO) {
+ status.state = cls::rbd::MIRROR_IMAGE_STATUS_STATE_UNKNOWN;
+ status.description = state_desc;
+ mirror_image_status_state = status.state;
+ } else if (last_r < 0 && last_r != -ECANCELED) {
+ status.state = cls::rbd::MIRROR_IMAGE_STATUS_STATE_ERROR;
+ status.description = state_desc;
+ mirror_image_status_state = status.state;
+ } else {
+ status.state = cls::rbd::MIRROR_IMAGE_STATUS_STATE_STOPPED;
+ status.description = state_desc.empty() ? "stopped" : state_desc;
+ mirror_image_status_state = boost::none;
+ }
+ break;
+ default:
+ ceph_assert(!"invalid state");
+ }
+
+ {
+ std::lock_guard locker{m_lock};
+ m_mirror_image_status_state = mirror_image_status_state;
+ }
+
+ // prevent the status from ping-ponging when failed replays are restarted
+ if (mirror_image_status_state &&
+ *mirror_image_status_state == cls::rbd::MIRROR_IMAGE_STATUS_STATE_ERROR) {
+ status.state = *mirror_image_status_state;
+ }
+
+ dout(15) << "status=" << status << dendl;
+ m_local_status_updater->set_mirror_image_status(m_global_image_id, status,
+ force);
+ if (m_remote_image_peer.mirror_status_updater != nullptr) {
+ m_remote_image_peer.mirror_status_updater->set_mirror_image_status(
+ m_global_image_id, status, force);
+ }
+
+ m_in_flight_op_tracker.finish_op();
+}
+
+template <typename I>
+void ImageReplayer<I>::shut_down(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ {
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_state == STATE_STOPPING);
+ }
+
+ if (!m_in_flight_op_tracker.empty()) {
+ dout(15) << "waiting for in-flight operations to complete" << dendl;
+ m_in_flight_op_tracker.wait_for_ops(new LambdaContext([this, r](int) {
+ shut_down(r);
+ }));
+ return;
+ }
+
+ // chain the shut down sequence (reverse order)
+ Context *ctx = new LambdaContext(
+ [this, r](int _r) {
+ update_mirror_image_status(true, STATE_STOPPED);
+ handle_shut_down(r);
+ });
+
+ // destruct the state builder
+ if (m_state_builder != nullptr) {
+ ctx = new LambdaContext([this, ctx](int r) {
+ m_state_builder->close(ctx);
+ });
+ }
+
+ // close the replayer
+ if (m_replayer != nullptr) {
+ ctx = new LambdaContext([this, ctx](int r) {
+ m_replayer->destroy();
+ m_replayer = nullptr;
+ ctx->complete(0);
+ });
+ ctx = new LambdaContext([this, ctx](int r) {
+ m_replayer->shut_down(ctx);
+ });
+ }
+
+ m_threads->work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+void ImageReplayer<I>::handle_shut_down(int r) {
+ bool resync_requested = false;
+ bool delete_requested = false;
+ bool unregister_asok_hook = false;
+ {
+ std::lock_guard locker{m_lock};
+
+ if (m_delete_requested && m_state_builder != nullptr &&
+ !m_state_builder->local_image_id.empty()) {
+ ceph_assert(m_state_builder->remote_image_id.empty());
+ dout(0) << "remote image no longer exists: scheduling deletion" << dendl;
+ unregister_asok_hook = true;
+ std::swap(delete_requested, m_delete_requested);
+ m_delete_in_progress = true;
+ }
+
+ std::swap(resync_requested, m_resync_requested);
+ if (!delete_requested && !resync_requested && m_last_r == -ENOENT &&
+ ((m_state_builder == nullptr) ||
+ (m_state_builder->local_image_id.empty() &&
+ m_state_builder->remote_image_id.empty()))) {
+ dout(0) << "mirror image no longer exists" << dendl;
+ unregister_asok_hook = true;
+ m_finished = true;
+ }
+ }
+
+ if (unregister_asok_hook) {
+ unregister_admin_socket_hook();
+ }
+
+ if (delete_requested || resync_requested) {
+ dout(5) << "moving image to trash" << dendl;
+ auto ctx = new LambdaContext([this, r](int) {
+ handle_shut_down(r);
+ });
+ ImageDeleter<I>::trash_move(m_local_io_ctx, m_global_image_id,
+ resync_requested, m_threads->work_queue, ctx);
+ return;
+ }
+
+ if (!m_in_flight_op_tracker.empty()) {
+ dout(15) << "waiting for in-flight operations to complete" << dendl;
+ m_in_flight_op_tracker.wait_for_ops(new LambdaContext([this, r](int) {
+ handle_shut_down(r);
+ }));
+ return;
+ }
+
+ if (!m_status_removed) {
+ auto ctx = new LambdaContext([this, r](int) {
+ m_status_removed = true;
+ handle_shut_down(r);
+ });
+ remove_image_status(m_delete_in_progress, ctx);
+ return;
+ }
+
+ if (m_state_builder != nullptr) {
+ m_state_builder->destroy();
+ m_state_builder = nullptr;
+ }
+
+ dout(10) << "stop complete" << dendl;
+ Context *on_start = nullptr;
+ Contexts on_stop_contexts;
+ {
+ std::lock_guard locker{m_lock};
+ std::swap(on_start, m_on_start_finish);
+ on_stop_contexts = std::move(m_on_stop_contexts);
+ m_stop_requested = false;
+ ceph_assert(m_state == STATE_STOPPING);
+ m_state = STATE_STOPPED;
+ }
+
+ if (on_start != nullptr) {
+ dout(10) << "on start finish complete, r=" << r << dendl;
+ on_start->complete(r);
+ r = 0;
+ }
+ for (auto ctx : on_stop_contexts) {
+ dout(10) << "on stop finish " << ctx << " complete, r=" << r << dendl;
+ ctx->complete(r);
+ }
+}
+
+template <typename I>
+void ImageReplayer<I>::handle_replayer_notification() {
+ dout(10) << dendl;
+
+ std::unique_lock locker{m_lock};
+ if (m_state != STATE_REPLAYING) {
+ // might be attempting to shut down
+ return;
+ }
+
+ {
+ // detect a rename of the local image
+ ceph_assert(m_state_builder != nullptr &&
+ m_state_builder->local_image_ctx != nullptr);
+ std::shared_lock image_locker{m_state_builder->local_image_ctx->image_lock};
+ if (m_local_image_name != m_state_builder->local_image_ctx->name) {
+ // will re-register with new name after next status update
+ dout(10) << "image renamed" << dendl;
+ m_local_image_name = m_state_builder->local_image_ctx->name;
+ }
+ }
+
+ // replayer cannot be shut down while notification is in-flight
+ ceph_assert(m_replayer != nullptr);
+ locker.unlock();
+
+ if (m_replayer->is_resync_requested()) {
+ dout(10) << "resync requested" << dendl;
+ m_resync_requested = true;
+ on_stop_journal_replay(0, "resync requested");
+ return;
+ }
+
+ if (!m_replayer->is_replaying()) {
+ auto error_code = m_replayer->get_error_code();
+ auto error_description = m_replayer->get_error_description();
+ dout(10) << "replay interrupted: "
+ << "r=" << error_code << ", "
+ << "error=" << error_description << dendl;
+ on_stop_journal_replay(error_code, error_description);
+ return;
+ }
+
+ update_mirror_image_status(false, {});
+}
+
+template <typename I>
+std::string ImageReplayer<I>::to_string(const State state) {
+ switch (state) {
+ case ImageReplayer<I>::STATE_STARTING:
+ return "Starting";
+ case ImageReplayer<I>::STATE_REPLAYING:
+ return "Replaying";
+ case ImageReplayer<I>::STATE_STOPPING:
+ return "Stopping";
+ case ImageReplayer<I>::STATE_STOPPED:
+ return "Stopped";
+ default:
+ break;
+ }
+ return "Unknown(" + stringify(state) + ")";
+}
+
+template <typename I>
+void ImageReplayer<I>::register_admin_socket_hook() {
+ ImageReplayerAdminSocketHook<I> *asok_hook;
+ {
+ std::lock_guard locker{m_lock};
+ if (m_asok_hook != nullptr) {
+ return;
+ }
+
+ dout(15) << "registered asok hook: " << m_image_spec << dendl;
+ asok_hook = new ImageReplayerAdminSocketHook<I>(
+ g_ceph_context, m_image_spec, this);
+ int r = asok_hook->register_commands();
+ if (r == 0) {
+ m_asok_hook = asok_hook;
+ return;
+ }
+ derr << "error registering admin socket commands" << dendl;
+ }
+ delete asok_hook;
+}
+
+template <typename I>
+void ImageReplayer<I>::unregister_admin_socket_hook() {
+ dout(15) << dendl;
+
+ AdminSocketHook *asok_hook = nullptr;
+ {
+ std::lock_guard locker{m_lock};
+ std::swap(asok_hook, m_asok_hook);
+ }
+ delete asok_hook;
+}
+
+template <typename I>
+void ImageReplayer<I>::reregister_admin_socket_hook() {
+ std::unique_lock locker{m_lock};
+ if (m_state == STATE_STARTING && m_bootstrap_request != nullptr) {
+ m_local_image_name = m_bootstrap_request->get_local_image_name();
+ }
+
+ auto image_spec = image_replayer::util::compute_image_spec(
+ m_local_io_ctx, m_local_image_name);
+ if (m_asok_hook != nullptr && m_image_spec == image_spec) {
+ return;
+ }
+
+ dout(15) << "old_image_spec=" << m_image_spec << ", "
+ << "new_image_spec=" << image_spec << dendl;
+ m_image_spec = image_spec;
+
+ if (m_state == STATE_STOPPING || m_state == STATE_STOPPED) {
+ // no need to re-register if stopping
+ return;
+ }
+ locker.unlock();
+
+ unregister_admin_socket_hook();
+ register_admin_socket_hook();
+}
+
+template <typename I>
+void ImageReplayer<I>::remove_image_status(bool force, Context *on_finish)
+{
+ auto ctx = new LambdaContext([this, force, on_finish](int) {
+ remove_image_status_remote(force, on_finish);
+ });
+
+ if (m_local_status_updater->exists(m_global_image_id)) {
+ dout(15) << "removing local mirror image status" << dendl;
+ if (force) {
+ m_local_status_updater->remove_mirror_image_status(
+ m_global_image_id, true, ctx);
+ } else {
+ m_local_status_updater->remove_refresh_mirror_image_status(
+ m_global_image_id, ctx);
+ }
+ return;
+ }
+
+ ctx->complete(0);
+}
+
+template <typename I>
+void ImageReplayer<I>::remove_image_status_remote(bool force, Context *on_finish)
+{
+ if (m_remote_image_peer.mirror_status_updater != nullptr &&
+ m_remote_image_peer.mirror_status_updater->exists(m_global_image_id)) {
+ dout(15) << "removing remote mirror image status" << dendl;
+ if (force) {
+ m_remote_image_peer.mirror_status_updater->remove_mirror_image_status(
+ m_global_image_id, true, on_finish);
+ } else {
+ m_remote_image_peer.mirror_status_updater->remove_refresh_mirror_image_status(
+ m_global_image_id, on_finish);
+ }
+ return;
+ }
+ if (on_finish) {
+ on_finish->complete(0);
+ }
+}
+
+template <typename I>
+std::ostream &operator<<(std::ostream &os, const ImageReplayer<I> &replayer)
+{
+ os << "ImageReplayer: " << &replayer << " [" << replayer.get_local_pool_id()
+ << "/" << replayer.get_global_image_id() << "]";
+ return os;
+}
+
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::ImageReplayer<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/ImageReplayer.h b/src/tools/rbd_mirror/ImageReplayer.h
new file mode 100644
index 000000000..432fdf225
--- /dev/null
+++ b/src/tools/rbd_mirror/ImageReplayer.h
@@ -0,0 +1,273 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_REPLAYER_H
+#define CEPH_RBD_MIRROR_IMAGE_REPLAYER_H
+
+#include "common/AsyncOpTracker.h"
+#include "common/ceph_mutex.h"
+#include "include/rados/librados.hpp"
+#include "cls/rbd/cls_rbd_types.h"
+#include "ProgressContext.h"
+#include "tools/rbd_mirror/Types.h"
+#include "tools/rbd_mirror/image_replayer/Types.h"
+#include <boost/optional.hpp>
+#include <string>
+
+class AdminSocketHook;
+
+namespace journal { struct CacheManagerHandler; }
+namespace librbd { class ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+
+template <typename> struct InstanceWatcher;
+template <typename> struct MirrorStatusUpdater;
+struct PoolMetaCache;
+template <typename> struct Threads;
+
+namespace image_replayer {
+
+class Replayer;
+template <typename> class BootstrapRequest;
+template <typename> class StateBuilder;
+
+} // namespace image_replayer
+
+/**
+ * Replays changes from a remote cluster for a single image.
+ */
+template <typename ImageCtxT = librbd::ImageCtx>
+class ImageReplayer {
+public:
+ static ImageReplayer *create(
+ librados::IoCtx &local_io_ctx, const std::string &local_mirror_uuid,
+ const std::string &global_image_id, Threads<ImageCtxT> *threads,
+ InstanceWatcher<ImageCtxT> *instance_watcher,
+ MirrorStatusUpdater<ImageCtxT>* local_status_updater,
+ journal::CacheManagerHandler *cache_manager_handler,
+ PoolMetaCache* pool_meta_cache) {
+ return new ImageReplayer(local_io_ctx, local_mirror_uuid, global_image_id,
+ threads, instance_watcher, local_status_updater,
+ cache_manager_handler, pool_meta_cache);
+ }
+ void destroy() {
+ delete this;
+ }
+
+ ImageReplayer(librados::IoCtx &local_io_ctx,
+ const std::string &local_mirror_uuid,
+ const std::string &global_image_id,
+ Threads<ImageCtxT> *threads,
+ InstanceWatcher<ImageCtxT> *instance_watcher,
+ MirrorStatusUpdater<ImageCtxT>* local_status_updater,
+ journal::CacheManagerHandler *cache_manager_handler,
+ PoolMetaCache* pool_meta_cache);
+ virtual ~ImageReplayer();
+ ImageReplayer(const ImageReplayer&) = delete;
+ ImageReplayer& operator=(const ImageReplayer&) = delete;
+
+ bool is_stopped() { std::lock_guard l{m_lock}; return is_stopped_(); }
+ bool is_running() { std::lock_guard l{m_lock}; return is_running_(); }
+ bool is_replaying() { std::lock_guard l{m_lock}; return is_replaying_(); }
+
+ std::string get_name() { std::lock_guard l{m_lock}; return m_image_spec; };
+ void set_state_description(int r, const std::string &desc);
+
+ // TODO temporary until policy handles release of image replayers
+ inline bool is_finished() const {
+ std::lock_guard locker{m_lock};
+ return m_finished;
+ }
+ inline void set_finished(bool finished) {
+ std::lock_guard locker{m_lock};
+ m_finished = finished;
+ }
+
+ inline bool is_blocklisted() const {
+ std::lock_guard locker{m_lock};
+ return (m_last_r == -EBLOCKLISTED);
+ }
+
+ image_replayer::HealthState get_health_state() const;
+
+ void add_peer(const Peer<ImageCtxT>& peer);
+
+ inline int64_t get_local_pool_id() const {
+ return m_local_io_ctx.get_id();
+ }
+ inline const std::string& get_global_image_id() const {
+ return m_global_image_id;
+ }
+
+ void start(Context *on_finish, bool manual = false, bool restart = false);
+ void stop(Context *on_finish, bool manual = false, bool restart = false);
+ void restart(Context *on_finish = nullptr);
+ void flush();
+
+ void print_status(Formatter *f);
+
+protected:
+ /**
+ * @verbatim
+ * (error)
+ * <uninitialized> <------------------------------------ FAIL
+ * | ^
+ * v *
+ * <starting> *
+ * | *
+ * v (error) *
+ * BOOTSTRAP_IMAGE * * * * * * * * * * * * * * * * * * * *
+ * | *
+ * v (error) *
+ * START_REPLAY * * * * * * * * * * * * * * * * * * * * * *
+ * |
+ * v
+ * REPLAYING
+ * |
+ * v
+ * JOURNAL_REPLAY_SHUT_DOWN
+ * |
+ * v
+ * LOCAL_IMAGE_CLOSE
+ * |
+ * v
+ * <stopped>
+ *
+ * @endverbatim
+ */
+
+ void on_start_fail(int r, const std::string &desc);
+ bool on_start_interrupted();
+ bool on_start_interrupted(ceph::mutex& lock);
+
+ void on_stop_journal_replay(int r = 0, const std::string &desc = "");
+
+ bool on_replay_interrupted();
+
+private:
+ typedef std::set<Peer<ImageCtxT>> Peers;
+ typedef std::list<Context *> Contexts;
+
+ enum State {
+ STATE_UNKNOWN,
+ STATE_STARTING,
+ STATE_REPLAYING,
+ STATE_STOPPING,
+ STATE_STOPPED,
+ };
+
+ struct ReplayerListener;
+
+ typedef boost::optional<State> OptionalState;
+ typedef boost::optional<cls::rbd::MirrorImageStatusState>
+ OptionalMirrorImageStatusState;
+
+ class BootstrapProgressContext : public ProgressContext {
+ public:
+ BootstrapProgressContext(ImageReplayer<ImageCtxT> *replayer) :
+ replayer(replayer) {
+ }
+
+ void update_progress(const std::string &description,
+ bool flush = true) override;
+
+ private:
+ ImageReplayer<ImageCtxT> *replayer;
+ };
+
+ librados::IoCtx &m_local_io_ctx;
+ std::string m_local_mirror_uuid;
+ std::string m_global_image_id;
+ Threads<ImageCtxT> *m_threads;
+ InstanceWatcher<ImageCtxT> *m_instance_watcher;
+ MirrorStatusUpdater<ImageCtxT>* m_local_status_updater;
+ journal::CacheManagerHandler *m_cache_manager_handler;
+ PoolMetaCache* m_pool_meta_cache;
+
+ Peers m_peers;
+ Peer<ImageCtxT> m_remote_image_peer;
+
+ std::string m_local_image_name;
+ std::string m_image_spec;
+
+ mutable ceph::mutex m_lock;
+ State m_state = STATE_STOPPED;
+ std::string m_state_desc;
+
+ OptionalMirrorImageStatusState m_mirror_image_status_state =
+ boost::make_optional(false, cls::rbd::MIRROR_IMAGE_STATUS_STATE_UNKNOWN);
+ int m_last_r = 0;
+
+ BootstrapProgressContext m_progress_cxt;
+
+ bool m_finished = false;
+ bool m_delete_in_progress = false;
+ bool m_delete_requested = false;
+ bool m_resync_requested = false;
+ bool m_restart_requested = false;
+
+ bool m_status_removed = false;
+
+ image_replayer::StateBuilder<ImageCtxT>* m_state_builder = nullptr;
+ image_replayer::Replayer* m_replayer = nullptr;
+ ReplayerListener* m_replayer_listener = nullptr;
+
+ Context *m_on_start_finish = nullptr;
+ Contexts m_on_stop_contexts;
+ bool m_stop_requested = false;
+ bool m_manual_stop = false;
+
+ AdminSocketHook *m_asok_hook = nullptr;
+
+ image_replayer::BootstrapRequest<ImageCtxT> *m_bootstrap_request = nullptr;
+
+ AsyncOpTracker m_in_flight_op_tracker;
+
+ Context* m_update_status_task = nullptr;
+
+ static std::string to_string(const State state);
+
+ bool is_stopped_() const {
+ return m_state == STATE_STOPPED;
+ }
+ bool is_running_() const {
+ return !is_stopped_() && m_state != STATE_STOPPING && !m_stop_requested;
+ }
+ bool is_replaying_() const {
+ return (m_state == STATE_REPLAYING);
+ }
+
+ void schedule_update_mirror_image_replay_status();
+ void handle_update_mirror_image_replay_status(int r);
+ void cancel_update_mirror_image_replay_status();
+
+ void update_mirror_image_status(bool force, const OptionalState &state);
+ void set_mirror_image_status_update(bool force, const OptionalState &state);
+
+ void shut_down(int r);
+ void handle_shut_down(int r);
+
+ void bootstrap();
+ void handle_bootstrap(int r);
+
+ void start_replay();
+ void handle_start_replay(int r);
+
+ void handle_replayer_notification();
+
+ void register_admin_socket_hook();
+ void unregister_admin_socket_hook();
+ void reregister_admin_socket_hook();
+ void remove_image_status(bool force, Context *on_finish);
+ void remove_image_status_remote(bool force, Context *on_finish);
+
+};
+
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::ImageReplayer<librbd::ImageCtx>;
+
+#endif // CEPH_RBD_MIRROR_IMAGE_REPLAYER_H
diff --git a/src/tools/rbd_mirror/ImageSync.cc b/src/tools/rbd_mirror/ImageSync.cc
new file mode 100644
index 000000000..43d0c6663
--- /dev/null
+++ b/src/tools/rbd_mirror/ImageSync.cc
@@ -0,0 +1,469 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "ImageSync.h"
+#include "InstanceWatcher.h"
+#include "ProgressContext.h"
+#include "common/debug.h"
+#include "common/Timer.h"
+#include "common/errno.h"
+#include "librbd/DeepCopyRequest.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/Utils.h"
+#include "librbd/internal.h"
+#include "librbd/asio/ContextWQ.h"
+#include "librbd/deep_copy/Handler.h"
+#include "tools/rbd_mirror/Threads.h"
+#include "tools/rbd_mirror/image_sync/SyncPointCreateRequest.h"
+#include "tools/rbd_mirror/image_sync/SyncPointPruneRequest.h"
+#include "tools/rbd_mirror/image_sync/Types.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::ImageSync: " \
+ << this << " " << __func__
+
+namespace rbd {
+namespace mirror {
+
+using namespace image_sync;
+using librbd::util::create_async_context_callback;
+using librbd::util::create_context_callback;
+using librbd::util::unique_lock_name;
+
+template <typename I>
+class ImageSync<I>::ImageCopyProgressHandler
+ : public librbd::deep_copy::NoOpHandler {
+public:
+ ImageCopyProgressHandler(ImageSync *image_sync) : image_sync(image_sync) {
+ }
+
+ int update_progress(uint64_t object_no, uint64_t object_count) override {
+ image_sync->handle_copy_image_update_progress(object_no, object_count);
+ return 0;
+ }
+
+ ImageSync *image_sync;
+};
+
+template <typename I>
+ImageSync<I>::ImageSync(
+ Threads<I>* threads,
+ I *local_image_ctx,
+ I *remote_image_ctx,
+ const std::string &local_mirror_uuid,
+ image_sync::SyncPointHandler* sync_point_handler,
+ InstanceWatcher<I> *instance_watcher,
+ ProgressContext *progress_ctx,
+ Context *on_finish)
+ : CancelableRequest("rbd::mirror::ImageSync", local_image_ctx->cct,
+ on_finish),
+ m_threads(threads),
+ m_local_image_ctx(local_image_ctx),
+ m_remote_image_ctx(remote_image_ctx),
+ m_local_mirror_uuid(local_mirror_uuid),
+ m_sync_point_handler(sync_point_handler),
+ m_instance_watcher(instance_watcher),
+ m_progress_ctx(progress_ctx),
+ m_lock(ceph::make_mutex(unique_lock_name("ImageSync::m_lock", this))),
+ m_update_sync_point_interval(
+ m_local_image_ctx->cct->_conf.template get_val<double>(
+ "rbd_mirror_sync_point_update_age")) {
+}
+
+template <typename I>
+ImageSync<I>::~ImageSync() {
+ ceph_assert(m_image_copy_request == nullptr);
+ ceph_assert(m_image_copy_prog_handler == nullptr);
+ ceph_assert(m_update_sync_ctx == nullptr);
+}
+
+template <typename I>
+void ImageSync<I>::send() {
+ send_notify_sync_request();
+}
+
+template <typename I>
+void ImageSync<I>::cancel() {
+ std::lock_guard locker{m_lock};
+
+ dout(10) << dendl;
+
+ m_canceled = true;
+
+ if (m_instance_watcher->cancel_sync_request(m_local_image_ctx->id)) {
+ return;
+ }
+
+ if (m_image_copy_request != nullptr) {
+ m_image_copy_request->cancel();
+ }
+}
+
+template <typename I>
+void ImageSync<I>::send_notify_sync_request() {
+ update_progress("NOTIFY_SYNC_REQUEST");
+
+ dout(10) << dendl;
+
+ m_lock.lock();
+ if (m_canceled) {
+ m_lock.unlock();
+ CancelableRequest::finish(-ECANCELED);
+ return;
+ }
+
+ Context *ctx = create_async_context_callback(
+ m_threads->work_queue, create_context_callback<
+ ImageSync<I>, &ImageSync<I>::handle_notify_sync_request>(this));
+ m_instance_watcher->notify_sync_request(m_local_image_ctx->id, ctx);
+ m_lock.unlock();
+}
+
+template <typename I>
+void ImageSync<I>::handle_notify_sync_request(int r) {
+ dout(10) << ": r=" << r << dendl;
+
+ m_lock.lock();
+ if (r == 0 && m_canceled) {
+ r = -ECANCELED;
+ }
+ m_lock.unlock();
+
+ if (r < 0) {
+ CancelableRequest::finish(r);
+ return;
+ }
+
+ send_prune_catch_up_sync_point();
+}
+
+template <typename I>
+void ImageSync<I>::send_prune_catch_up_sync_point() {
+ update_progress("PRUNE_CATCH_UP_SYNC_POINT");
+
+ if (m_sync_point_handler->get_sync_points().empty()) {
+ send_create_sync_point();
+ return;
+ }
+
+ dout(10) << dendl;
+
+ // prune will remove sync points with missing snapshots and
+ // ensure we have a maximum of one sync point (in case we
+ // restarted)
+ Context *ctx = create_context_callback<
+ ImageSync<I>, &ImageSync<I>::handle_prune_catch_up_sync_point>(this);
+ SyncPointPruneRequest<I> *request = SyncPointPruneRequest<I>::create(
+ m_remote_image_ctx, false, m_sync_point_handler, ctx);
+ request->send();
+}
+
+template <typename I>
+void ImageSync<I>::handle_prune_catch_up_sync_point(int r) {
+ dout(10) << ": r=" << r << dendl;
+
+ if (r < 0) {
+ derr << ": failed to prune catch-up sync point: "
+ << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ send_create_sync_point();
+}
+
+template <typename I>
+void ImageSync<I>::send_create_sync_point() {
+ update_progress("CREATE_SYNC_POINT");
+
+ // TODO: when support for disconnecting laggy clients is added,
+ // re-connect and create catch-up sync point
+ if (!m_sync_point_handler->get_sync_points().empty()) {
+ send_copy_image();
+ return;
+ }
+
+ dout(10) << dendl;
+
+ Context *ctx = create_context_callback<
+ ImageSync<I>, &ImageSync<I>::handle_create_sync_point>(this);
+ SyncPointCreateRequest<I> *request = SyncPointCreateRequest<I>::create(
+ m_remote_image_ctx, m_local_mirror_uuid, m_sync_point_handler, ctx);
+ request->send();
+}
+
+template <typename I>
+void ImageSync<I>::handle_create_sync_point(int r) {
+ dout(10) << ": r=" << r << dendl;
+
+ if (r < 0) {
+ derr << ": failed to create sync point: " << cpp_strerror(r)
+ << dendl;
+ finish(r);
+ return;
+ }
+
+ send_copy_image();
+}
+
+template <typename I>
+void ImageSync<I>::send_copy_image() {
+ librados::snap_t snap_id_start = 0;
+ librados::snap_t snap_id_end;
+ librbd::deep_copy::ObjectNumber object_number;
+ int r = 0;
+
+ m_snap_seqs_copy = m_sync_point_handler->get_snap_seqs();
+ m_sync_points_copy = m_sync_point_handler->get_sync_points();
+ ceph_assert(!m_sync_points_copy.empty());
+ auto &sync_point = m_sync_points_copy.front();
+
+ {
+ std::shared_lock image_locker{m_remote_image_ctx->image_lock};
+ snap_id_end = m_remote_image_ctx->get_snap_id(
+ cls::rbd::UserSnapshotNamespace(), sync_point.snap_name);
+ if (snap_id_end == CEPH_NOSNAP) {
+ derr << ": failed to locate snapshot: " << sync_point.snap_name << dendl;
+ r = -ENOENT;
+ } else if (!sync_point.from_snap_name.empty()) {
+ snap_id_start = m_remote_image_ctx->get_snap_id(
+ cls::rbd::UserSnapshotNamespace(), sync_point.from_snap_name);
+ if (snap_id_start == CEPH_NOSNAP) {
+ derr << ": failed to locate from snapshot: "
+ << sync_point.from_snap_name << dendl;
+ r = -ENOENT;
+ }
+ }
+ object_number = sync_point.object_number;
+ }
+ if (r < 0) {
+ finish(r);
+ return;
+ }
+
+ m_lock.lock();
+ if (m_canceled) {
+ m_lock.unlock();
+ finish(-ECANCELED);
+ return;
+ }
+
+ dout(10) << dendl;
+
+ Context *ctx = create_context_callback<
+ ImageSync<I>, &ImageSync<I>::handle_copy_image>(this);
+ m_image_copy_prog_handler = new ImageCopyProgressHandler(this);
+ m_image_copy_request = librbd::DeepCopyRequest<I>::create(
+ m_remote_image_ctx, m_local_image_ctx, snap_id_start, snap_id_end,
+ 0, false, object_number, m_threads->work_queue, &m_snap_seqs_copy,
+ m_image_copy_prog_handler, ctx);
+ m_image_copy_request->get();
+ m_lock.unlock();
+
+ update_progress("COPY_IMAGE");
+
+ m_image_copy_request->send();
+}
+
+template <typename I>
+void ImageSync<I>::handle_copy_image(int r) {
+ dout(10) << ": r=" << r << dendl;
+
+ {
+ std::scoped_lock locker{m_threads->timer_lock, m_lock};
+ m_image_copy_request->put();
+ m_image_copy_request = nullptr;
+ delete m_image_copy_prog_handler;
+ m_image_copy_prog_handler = nullptr;
+ if (r == 0 && m_canceled) {
+ r = -ECANCELED;
+ }
+
+ if (m_update_sync_ctx != nullptr) {
+ m_threads->timer->cancel_event(m_update_sync_ctx);
+ m_update_sync_ctx = nullptr;
+ }
+
+ if (m_updating_sync_point) {
+ m_ret_val = r;
+ return;
+ }
+ }
+
+ if (r == -ECANCELED) {
+ dout(10) << ": image copy canceled" << dendl;
+ finish(r);
+ return;
+ } else if (r < 0) {
+ derr << ": failed to copy image: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ send_flush_sync_point();
+}
+
+template <typename I>
+void ImageSync<I>::handle_copy_image_update_progress(uint64_t object_no,
+ uint64_t object_count) {
+ int percent = 100 * object_no / object_count;
+ update_progress("COPY_IMAGE " + stringify(percent) + "%");
+
+ std::lock_guard locker{m_lock};
+ m_image_copy_object_no = object_no;
+ m_image_copy_object_count = object_count;
+
+ if (m_update_sync_ctx == nullptr && !m_updating_sync_point) {
+ send_update_sync_point();
+ }
+}
+
+template <typename I>
+void ImageSync<I>::send_update_sync_point() {
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ m_update_sync_ctx = nullptr;
+
+ if (m_canceled) {
+ return;
+ }
+
+ ceph_assert(!m_sync_points_copy.empty());
+ auto sync_point = &m_sync_points_copy.front();
+
+ if (sync_point->object_number &&
+ (m_image_copy_object_no - 1) == sync_point->object_number.get()) {
+ // update sync point did not progress since last sync
+ return;
+ }
+
+ m_updating_sync_point = true;
+
+ if (m_image_copy_object_no > 0) {
+ sync_point->object_number = m_image_copy_object_no - 1;
+ }
+
+ auto ctx = create_context_callback<
+ ImageSync<I>, &ImageSync<I>::handle_update_sync_point>(this);
+ m_sync_point_handler->update_sync_points(m_snap_seqs_copy,
+ m_sync_points_copy, false, ctx);
+}
+
+template <typename I>
+void ImageSync<I>::handle_update_sync_point(int r) {
+ CephContext *cct = m_local_image_ctx->cct;
+ ldout(cct, 20) << ": r=" << r << dendl;
+
+ {
+ std::scoped_lock locker{m_threads->timer_lock, m_lock};
+ m_updating_sync_point = false;
+
+ if (m_image_copy_request != nullptr) {
+ m_update_sync_ctx = new LambdaContext(
+ [this](int r) {
+ std::lock_guard locker{m_lock};
+ this->send_update_sync_point();
+ });
+ m_threads->timer->add_event_after(
+ m_update_sync_point_interval, m_update_sync_ctx);
+ return;
+ }
+ }
+
+ send_flush_sync_point();
+}
+
+template <typename I>
+void ImageSync<I>::send_flush_sync_point() {
+ if (m_ret_val < 0) {
+ finish(m_ret_val);
+ return;
+ }
+
+ update_progress("FLUSH_SYNC_POINT");
+
+ ceph_assert(!m_sync_points_copy.empty());
+ auto sync_point = &m_sync_points_copy.front();
+
+ if (m_image_copy_object_no > 0) {
+ sync_point->object_number = m_image_copy_object_no - 1;
+ } else {
+ sync_point->object_number = boost::none;
+ }
+
+ auto ctx = create_context_callback<
+ ImageSync<I>, &ImageSync<I>::handle_flush_sync_point>(this);
+ m_sync_point_handler->update_sync_points(m_snap_seqs_copy,
+ m_sync_points_copy, false, ctx);
+}
+
+template <typename I>
+void ImageSync<I>::handle_flush_sync_point(int r) {
+ dout(10) << ": r=" << r << dendl;
+
+ if (r < 0) {
+ derr << ": failed to update client data: " << cpp_strerror(r)
+ << dendl;
+ finish(r);
+ return;
+ }
+
+ send_prune_sync_points();
+}
+
+template <typename I>
+void ImageSync<I>::send_prune_sync_points() {
+ dout(10) << dendl;
+
+ update_progress("PRUNE_SYNC_POINTS");
+
+ Context *ctx = create_context_callback<
+ ImageSync<I>, &ImageSync<I>::handle_prune_sync_points>(this);
+ SyncPointPruneRequest<I> *request = SyncPointPruneRequest<I>::create(
+ m_remote_image_ctx, true, m_sync_point_handler, ctx);
+ request->send();
+}
+
+template <typename I>
+void ImageSync<I>::handle_prune_sync_points(int r) {
+ dout(10) << ": r=" << r << dendl;
+
+ if (r < 0) {
+ derr << ": failed to prune sync point: "
+ << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ if (!m_sync_point_handler->get_sync_points().empty()) {
+ send_copy_image();
+ return;
+ }
+
+ finish(0);
+}
+
+template <typename I>
+void ImageSync<I>::update_progress(const std::string &description) {
+ dout(20) << ": " << description << dendl;
+
+ if (m_progress_ctx) {
+ m_progress_ctx->update_progress("IMAGE_SYNC/" + description);
+ }
+}
+
+template <typename I>
+void ImageSync<I>::finish(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ m_instance_watcher->notify_sync_complete(m_local_image_ctx->id);
+ CancelableRequest::finish(r);
+}
+
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::ImageSync<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/ImageSync.h b/src/tools/rbd_mirror/ImageSync.h
new file mode 100644
index 000000000..b3389ce18
--- /dev/null
+++ b/src/tools/rbd_mirror/ImageSync.h
@@ -0,0 +1,151 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_SYNC_H
+#define RBD_MIRROR_IMAGE_SYNC_H
+
+#include "include/int_types.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/Types.h"
+#include "common/ceph_mutex.h"
+#include "tools/rbd_mirror/CancelableRequest.h"
+#include "tools/rbd_mirror/image_sync/Types.h"
+
+class Context;
+namespace journal { class Journaler; }
+namespace librbd { template <typename> class DeepCopyRequest; }
+
+namespace rbd {
+namespace mirror {
+
+class ProgressContext;
+template <typename> class InstanceWatcher;
+template <typename> class Threads;
+
+namespace image_sync { struct SyncPointHandler; }
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class ImageSync : public CancelableRequest {
+public:
+ static ImageSync* create(
+ Threads<ImageCtxT>* threads,
+ ImageCtxT *local_image_ctx,
+ ImageCtxT *remote_image_ctx,
+ const std::string &local_mirror_uuid,
+ image_sync::SyncPointHandler* sync_point_handler,
+ InstanceWatcher<ImageCtxT> *instance_watcher,
+ ProgressContext *progress_ctx,
+ Context *on_finish) {
+ return new ImageSync(threads, local_image_ctx, remote_image_ctx,
+ local_mirror_uuid, sync_point_handler,
+ instance_watcher, progress_ctx, on_finish);
+ }
+
+ ImageSync(
+ Threads<ImageCtxT>* threads,
+ ImageCtxT *local_image_ctx,
+ ImageCtxT *remote_image_ctx,
+ const std::string &local_mirror_uuid,
+ image_sync::SyncPointHandler* sync_point_handler,
+ InstanceWatcher<ImageCtxT> *instance_watcher,
+ ProgressContext *progress_ctx,
+ Context *on_finish);
+ ~ImageSync() override;
+
+ void send() override;
+ void cancel() override;
+
+protected:
+ void finish(int r) override;
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * NOTIFY_SYNC_REQUEST
+ * |
+ * v
+ * PRUNE_CATCH_UP_SYNC_POINT
+ * |
+ * v
+ * CREATE_SYNC_POINT (skip if already exists and
+ * | not disconnected)
+ * v
+ * COPY_IMAGE . . . . . . . . . . . . . .
+ * | .
+ * v .
+ * FLUSH_SYNC_POINT .
+ * | . (image sync canceled)
+ * v .
+ * PRUNE_SYNC_POINTS .
+ * | .
+ * v .
+ * <finish> < . . . . . . . . . . . . . .
+ *
+ * @endverbatim
+ */
+
+ class ImageCopyProgressHandler;
+
+ Threads<ImageCtxT>* m_threads;
+ ImageCtxT *m_local_image_ctx;
+ ImageCtxT *m_remote_image_ctx;
+ std::string m_local_mirror_uuid;
+ image_sync::SyncPointHandler* m_sync_point_handler;
+ InstanceWatcher<ImageCtxT> *m_instance_watcher;
+ ProgressContext *m_progress_ctx;
+
+ ceph::mutex m_lock;
+ bool m_canceled = false;
+
+ librbd::DeepCopyRequest<ImageCtxT> *m_image_copy_request = nullptr;
+ ImageCopyProgressHandler *m_image_copy_prog_handler = nullptr;
+
+ bool m_updating_sync_point = false;
+ Context *m_update_sync_ctx = nullptr;
+ double m_update_sync_point_interval;
+ uint64_t m_image_copy_object_no = 0;
+ uint64_t m_image_copy_object_count = 0;
+
+ librbd::SnapSeqs m_snap_seqs_copy;
+ image_sync::SyncPoints m_sync_points_copy;
+
+ int m_ret_val = 0;
+
+ void send_notify_sync_request();
+ void handle_notify_sync_request(int r);
+
+ void send_prune_catch_up_sync_point();
+ void handle_prune_catch_up_sync_point(int r);
+
+ void send_create_sync_point();
+ void handle_create_sync_point(int r);
+
+ void send_update_max_object_count();
+ void handle_update_max_object_count(int r);
+
+ void send_copy_image();
+ void handle_copy_image(int r);
+ void handle_copy_image_update_progress(uint64_t object_no,
+ uint64_t object_count);
+ void send_update_sync_point();
+ void handle_update_sync_point(int r);
+
+ void send_flush_sync_point();
+ void handle_flush_sync_point(int r);
+
+ void send_prune_sync_points();
+ void handle_prune_sync_points(int r);
+
+ void update_progress(const std::string &description);
+};
+
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::ImageSync<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_SYNC_H
diff --git a/src/tools/rbd_mirror/InstanceReplayer.cc b/src/tools/rbd_mirror/InstanceReplayer.cc
new file mode 100644
index 000000000..e625bf365
--- /dev/null
+++ b/src/tools/rbd_mirror/InstanceReplayer.cc
@@ -0,0 +1,543 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "include/stringify.h"
+#include "common/Cond.h"
+#include "common/Timer.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "librbd/Utils.h"
+#include "librbd/asio/ContextWQ.h"
+#include "ImageReplayer.h"
+#include "InstanceReplayer.h"
+#include "ServiceDaemon.h"
+#include "Threads.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::InstanceReplayer: " \
+ << this << " " << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+
+namespace {
+
+const std::string SERVICE_DAEMON_ASSIGNED_COUNT_KEY("image_assigned_count");
+const std::string SERVICE_DAEMON_WARNING_COUNT_KEY("image_warning_count");
+const std::string SERVICE_DAEMON_ERROR_COUNT_KEY("image_error_count");
+
+} // anonymous namespace
+
+using librbd::util::create_async_context_callback;
+using librbd::util::create_context_callback;
+
+template <typename I>
+InstanceReplayer<I>::InstanceReplayer(
+ librados::IoCtx &local_io_ctx, const std::string &local_mirror_uuid,
+ Threads<I> *threads, ServiceDaemon<I>* service_daemon,
+ MirrorStatusUpdater<I>* local_status_updater,
+ journal::CacheManagerHandler *cache_manager_handler,
+ PoolMetaCache* pool_meta_cache)
+ : m_local_io_ctx(local_io_ctx), m_local_mirror_uuid(local_mirror_uuid),
+ m_threads(threads), m_service_daemon(service_daemon),
+ m_local_status_updater(local_status_updater),
+ m_cache_manager_handler(cache_manager_handler),
+ m_pool_meta_cache(pool_meta_cache),
+ m_lock(ceph::make_mutex("rbd::mirror::InstanceReplayer " +
+ stringify(local_io_ctx.get_id()))) {
+}
+
+template <typename I>
+InstanceReplayer<I>::~InstanceReplayer() {
+ ceph_assert(m_image_state_check_task == nullptr);
+ ceph_assert(m_async_op_tracker.empty());
+ ceph_assert(m_image_replayers.empty());
+}
+
+template <typename I>
+bool InstanceReplayer<I>::is_blocklisted() const {
+ std::lock_guard locker{m_lock};
+ return m_blocklisted;
+}
+
+template <typename I>
+int InstanceReplayer<I>::init() {
+ C_SaferCond init_ctx;
+ init(&init_ctx);
+ return init_ctx.wait();
+}
+
+template <typename I>
+void InstanceReplayer<I>::init(Context *on_finish) {
+ dout(10) << dendl;
+
+ Context *ctx = new LambdaContext(
+ [this, on_finish] (int r) {
+ {
+ std::lock_guard timer_locker{m_threads->timer_lock};
+ schedule_image_state_check_task();
+ }
+ on_finish->complete(0);
+ });
+
+ m_threads->work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+void InstanceReplayer<I>::shut_down() {
+ C_SaferCond shut_down_ctx;
+ shut_down(&shut_down_ctx);
+ int r = shut_down_ctx.wait();
+ ceph_assert(r == 0);
+}
+
+template <typename I>
+void InstanceReplayer<I>::shut_down(Context *on_finish) {
+ dout(10) << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ ceph_assert(m_on_shut_down == nullptr);
+ m_on_shut_down = on_finish;
+
+ Context *ctx = new LambdaContext(
+ [this] (int r) {
+ cancel_image_state_check_task();
+ wait_for_ops();
+ });
+
+ m_threads->work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+void InstanceReplayer<I>::add_peer(const Peer<I>& peer) {
+ dout(10) << "peer=" << peer << dendl;
+
+ std::lock_guard locker{m_lock};
+ auto result = m_peers.insert(peer).second;
+ ceph_assert(result);
+}
+
+template <typename I>
+void InstanceReplayer<I>::release_all(Context *on_finish) {
+ dout(10) << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ C_Gather *gather_ctx = new C_Gather(g_ceph_context, on_finish);
+ for (auto it = m_image_replayers.begin(); it != m_image_replayers.end();
+ it = m_image_replayers.erase(it)) {
+ auto image_replayer = it->second;
+ auto ctx = gather_ctx->new_sub();
+ ctx = new LambdaContext(
+ [image_replayer, ctx] (int r) {
+ image_replayer->destroy();
+ ctx->complete(0);
+ });
+ stop_image_replayer(image_replayer, ctx);
+ }
+ gather_ctx->activate();
+}
+
+template <typename I>
+void InstanceReplayer<I>::acquire_image(InstanceWatcher<I> *instance_watcher,
+ const std::string &global_image_id,
+ Context *on_finish) {
+ dout(10) << "global_image_id=" << global_image_id << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ ceph_assert(m_on_shut_down == nullptr);
+
+ auto it = m_image_replayers.find(global_image_id);
+ if (it == m_image_replayers.end()) {
+ auto image_replayer = ImageReplayer<I>::create(
+ m_local_io_ctx, m_local_mirror_uuid, global_image_id,
+ m_threads, instance_watcher, m_local_status_updater,
+ m_cache_manager_handler, m_pool_meta_cache);
+
+ dout(10) << global_image_id << ": creating replayer " << image_replayer
+ << dendl;
+
+ it = m_image_replayers.insert(std::make_pair(global_image_id,
+ image_replayer)).first;
+
+ // TODO only a single peer is currently supported
+ ceph_assert(m_peers.size() == 1);
+ auto peer = *m_peers.begin();
+ image_replayer->add_peer(peer);
+ start_image_replayer(image_replayer);
+ } else {
+ // A duplicate acquire notification implies (1) connection hiccup or
+ // (2) new leader election. For the second case, restart the replayer to
+ // detect if the image has been deleted while the leader was offline
+ auto& image_replayer = it->second;
+ image_replayer->set_finished(false);
+ image_replayer->restart(new C_TrackedOp(m_async_op_tracker, nullptr));
+ }
+
+ m_threads->work_queue->queue(on_finish, 0);
+}
+
+template <typename I>
+void InstanceReplayer<I>::release_image(const std::string &global_image_id,
+ Context *on_finish) {
+ dout(10) << "global_image_id=" << global_image_id << dendl;
+
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_on_shut_down == nullptr);
+
+ auto it = m_image_replayers.find(global_image_id);
+ if (it == m_image_replayers.end()) {
+ dout(5) << global_image_id << ": not found" << dendl;
+ m_threads->work_queue->queue(on_finish, 0);
+ return;
+ }
+
+ auto image_replayer = it->second;
+ m_image_replayers.erase(it);
+
+ on_finish = new LambdaContext(
+ [image_replayer, on_finish] (int r) {
+ image_replayer->destroy();
+ on_finish->complete(0);
+ });
+ stop_image_replayer(image_replayer, on_finish);
+}
+
+template <typename I>
+void InstanceReplayer<I>::remove_peer_image(const std::string &global_image_id,
+ const std::string &peer_mirror_uuid,
+ Context *on_finish) {
+ dout(10) << "global_image_id=" << global_image_id << ", "
+ << "peer_mirror_uuid=" << peer_mirror_uuid << dendl;
+
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_on_shut_down == nullptr);
+
+ auto it = m_image_replayers.find(global_image_id);
+ if (it != m_image_replayers.end()) {
+ // TODO only a single peer is currently supported, therefore
+ // we can just interrupt the current image replayer and
+ // it will eventually detect that the peer image is missing and
+ // determine if a delete propagation is required.
+ auto image_replayer = it->second;
+ image_replayer->restart(new C_TrackedOp(m_async_op_tracker, nullptr));
+ }
+ m_threads->work_queue->queue(on_finish, 0);
+}
+
+template <typename I>
+void InstanceReplayer<I>::print_status(Formatter *f) {
+ dout(10) << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ f->open_array_section("image_replayers");
+ for (auto &kv : m_image_replayers) {
+ auto &image_replayer = kv.second;
+ image_replayer->print_status(f);
+ }
+ f->close_section();
+}
+
+template <typename I>
+void InstanceReplayer<I>::start()
+{
+ dout(10) << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ m_manual_stop = false;
+
+ auto cct = static_cast<CephContext *>(m_local_io_ctx.cct());
+ auto gather_ctx = new C_Gather(
+ cct, new C_TrackedOp(m_async_op_tracker, nullptr));
+ for (auto &kv : m_image_replayers) {
+ auto &image_replayer = kv.second;
+ image_replayer->start(gather_ctx->new_sub(), true);
+ }
+
+ gather_ctx->activate();
+}
+
+template <typename I>
+void InstanceReplayer<I>::stop()
+{
+ stop(nullptr);
+}
+
+template <typename I>
+void InstanceReplayer<I>::stop(Context *on_finish)
+{
+ dout(10) << dendl;
+
+ if (on_finish == nullptr) {
+ on_finish = new C_TrackedOp(m_async_op_tracker, on_finish);
+ } else {
+ on_finish = new LambdaContext(
+ [this, on_finish] (int r) {
+ m_async_op_tracker.wait_for_ops(on_finish);
+ });
+ }
+
+ auto cct = static_cast<CephContext *>(m_local_io_ctx.cct());
+ auto gather_ctx = new C_Gather(cct, on_finish);
+ {
+ std::lock_guard locker{m_lock};
+
+ m_manual_stop = true;
+
+ for (auto &kv : m_image_replayers) {
+ auto &image_replayer = kv.second;
+ image_replayer->stop(gather_ctx->new_sub(), true);
+ }
+ }
+
+ gather_ctx->activate();
+}
+
+template <typename I>
+void InstanceReplayer<I>::restart()
+{
+ dout(10) << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ m_manual_stop = false;
+
+ for (auto &kv : m_image_replayers) {
+ auto &image_replayer = kv.second;
+ image_replayer->restart(new C_TrackedOp(m_async_op_tracker, nullptr));
+ }
+}
+
+template <typename I>
+void InstanceReplayer<I>::flush()
+{
+ dout(10) << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ for (auto &kv : m_image_replayers) {
+ auto &image_replayer = kv.second;
+ image_replayer->flush();
+ }
+}
+
+template <typename I>
+void InstanceReplayer<I>::start_image_replayer(
+ ImageReplayer<I> *image_replayer) {
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ std::string global_image_id = image_replayer->get_global_image_id();
+ if (!image_replayer->is_stopped()) {
+ return;
+ } else if (image_replayer->is_blocklisted()) {
+ derr << "global_image_id=" << global_image_id << ": blocklisted detected "
+ << "during image replay" << dendl;
+ m_blocklisted = true;
+ return;
+ } else if (image_replayer->is_finished()) {
+ // TODO temporary until policy integrated
+ dout(5) << "removing image replayer for global_image_id="
+ << global_image_id << dendl;
+ m_image_replayers.erase(image_replayer->get_global_image_id());
+ image_replayer->destroy();
+ return;
+ } else if (m_manual_stop) {
+ return;
+ }
+
+ dout(10) << "global_image_id=" << global_image_id << dendl;
+ image_replayer->start(new C_TrackedOp(m_async_op_tracker, nullptr), false);
+}
+
+template <typename I>
+void InstanceReplayer<I>::queue_start_image_replayers() {
+ dout(10) << dendl;
+
+ Context *ctx = create_context_callback<
+ InstanceReplayer, &InstanceReplayer<I>::start_image_replayers>(this);
+ m_async_op_tracker.start_op();
+ m_threads->work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+void InstanceReplayer<I>::start_image_replayers(int r) {
+ dout(10) << dendl;
+
+ std::lock_guard locker{m_lock};
+ if (m_on_shut_down != nullptr) {
+ m_async_op_tracker.finish_op();
+ return;
+ }
+
+ uint64_t image_count = 0;
+ uint64_t warning_count = 0;
+ uint64_t error_count = 0;
+ for (auto it = m_image_replayers.begin();
+ it != m_image_replayers.end();) {
+ auto current_it(it);
+ ++it;
+
+ ++image_count;
+ auto health_state = current_it->second->get_health_state();
+ if (health_state == image_replayer::HEALTH_STATE_WARNING) {
+ ++warning_count;
+ } else if (health_state == image_replayer::HEALTH_STATE_ERROR) {
+ ++error_count;
+ }
+
+ start_image_replayer(current_it->second);
+ }
+
+ m_service_daemon->add_or_update_namespace_attribute(
+ m_local_io_ctx.get_id(), m_local_io_ctx.get_namespace(),
+ SERVICE_DAEMON_ASSIGNED_COUNT_KEY, image_count);
+ m_service_daemon->add_or_update_namespace_attribute(
+ m_local_io_ctx.get_id(), m_local_io_ctx.get_namespace(),
+ SERVICE_DAEMON_WARNING_COUNT_KEY, warning_count);
+ m_service_daemon->add_or_update_namespace_attribute(
+ m_local_io_ctx.get_id(), m_local_io_ctx.get_namespace(),
+ SERVICE_DAEMON_ERROR_COUNT_KEY, error_count);
+
+ m_async_op_tracker.finish_op();
+}
+
+template <typename I>
+void InstanceReplayer<I>::stop_image_replayer(ImageReplayer<I> *image_replayer,
+ Context *on_finish) {
+ dout(10) << image_replayer << " global_image_id="
+ << image_replayer->get_global_image_id() << ", on_finish="
+ << on_finish << dendl;
+
+ if (image_replayer->is_stopped()) {
+ m_threads->work_queue->queue(on_finish, 0);
+ return;
+ }
+
+ m_async_op_tracker.start_op();
+ Context *ctx = create_async_context_callback(
+ m_threads->work_queue, new LambdaContext(
+ [this, image_replayer, on_finish] (int r) {
+ stop_image_replayer(image_replayer, on_finish);
+ m_async_op_tracker.finish_op();
+ }));
+
+ if (image_replayer->is_running()) {
+ image_replayer->stop(ctx, false);
+ } else {
+ int after = 1;
+ dout(10) << "scheduling image replayer " << image_replayer << " stop after "
+ << after << " sec (task " << ctx << ")" << dendl;
+ ctx = new LambdaContext(
+ [this, after, ctx] (int r) {
+ std::lock_guard timer_locker{m_threads->timer_lock};
+ m_threads->timer->add_event_after(after, ctx);
+ });
+ m_threads->work_queue->queue(ctx, 0);
+ }
+}
+
+template <typename I>
+void InstanceReplayer<I>::wait_for_ops() {
+ dout(10) << dendl;
+
+ Context *ctx = create_context_callback<
+ InstanceReplayer, &InstanceReplayer<I>::handle_wait_for_ops>(this);
+
+ m_async_op_tracker.wait_for_ops(ctx);
+}
+
+template <typename I>
+void InstanceReplayer<I>::handle_wait_for_ops(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ ceph_assert(r == 0);
+
+ std::lock_guard locker{m_lock};
+ stop_image_replayers();
+}
+
+template <typename I>
+void InstanceReplayer<I>::stop_image_replayers() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ Context *ctx = create_async_context_callback(
+ m_threads->work_queue, create_context_callback<InstanceReplayer<I>,
+ &InstanceReplayer<I>::handle_stop_image_replayers>(this));
+
+ C_Gather *gather_ctx = new C_Gather(g_ceph_context, ctx);
+ for (auto &it : m_image_replayers) {
+ stop_image_replayer(it.second, gather_ctx->new_sub());
+ }
+ gather_ctx->activate();
+}
+
+template <typename I>
+void InstanceReplayer<I>::handle_stop_image_replayers(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ ceph_assert(r == 0);
+
+ Context *on_finish = nullptr;
+ {
+ std::lock_guard locker{m_lock};
+
+ for (auto &it : m_image_replayers) {
+ ceph_assert(it.second->is_stopped());
+ it.second->destroy();
+ }
+ m_image_replayers.clear();
+
+ ceph_assert(m_on_shut_down != nullptr);
+ std::swap(on_finish, m_on_shut_down);
+ }
+ on_finish->complete(r);
+}
+
+template <typename I>
+void InstanceReplayer<I>::cancel_image_state_check_task() {
+ std::lock_guard timer_locker{m_threads->timer_lock};
+
+ if (m_image_state_check_task == nullptr) {
+ return;
+ }
+
+ dout(10) << m_image_state_check_task << dendl;
+ bool canceled = m_threads->timer->cancel_event(m_image_state_check_task);
+ ceph_assert(canceled);
+ m_image_state_check_task = nullptr;
+}
+
+template <typename I>
+void InstanceReplayer<I>::schedule_image_state_check_task() {
+ ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock));
+ ceph_assert(m_image_state_check_task == nullptr);
+
+ m_image_state_check_task = new LambdaContext(
+ [this](int r) {
+ ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock));
+ m_image_state_check_task = nullptr;
+ schedule_image_state_check_task();
+ queue_start_image_replayers();
+ });
+
+ auto cct = static_cast<CephContext *>(m_local_io_ctx.cct());
+ int after = cct->_conf.get_val<uint64_t>(
+ "rbd_mirror_image_state_check_interval");
+
+ dout(10) << "scheduling image state check after " << after << " sec (task "
+ << m_image_state_check_task << ")" << dendl;
+ m_threads->timer->add_event_after(after, m_image_state_check_task);
+}
+
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::InstanceReplayer<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/InstanceReplayer.h b/src/tools/rbd_mirror/InstanceReplayer.h
new file mode 100644
index 000000000..7a5c79723
--- /dev/null
+++ b/src/tools/rbd_mirror/InstanceReplayer.h
@@ -0,0 +1,138 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_INSTANCE_REPLAYER_H
+#define RBD_MIRROR_INSTANCE_REPLAYER_H
+
+#include <map>
+#include <sstream>
+
+#include "common/AsyncOpTracker.h"
+#include "common/Formatter.h"
+#include "common/ceph_mutex.h"
+#include "tools/rbd_mirror/Types.h"
+
+namespace journal { struct CacheManagerHandler; }
+
+namespace librbd { class ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+
+template <typename> class ImageReplayer;
+template <typename> class InstanceWatcher;
+template <typename> class MirrorStatusUpdater;
+struct PoolMetaCache;
+template <typename> class ServiceDaemon;
+template <typename> struct Threads;
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class InstanceReplayer {
+public:
+ static InstanceReplayer* create(
+ librados::IoCtx &local_io_ctx, const std::string &local_mirror_uuid,
+ Threads<ImageCtxT> *threads, ServiceDaemon<ImageCtxT> *service_daemon,
+ MirrorStatusUpdater<ImageCtxT>* local_status_updater,
+ journal::CacheManagerHandler *cache_manager_handler,
+ PoolMetaCache* pool_meta_cache) {
+ return new InstanceReplayer(local_io_ctx, local_mirror_uuid, threads,
+ service_daemon, local_status_updater,
+ cache_manager_handler, pool_meta_cache);
+ }
+ void destroy() {
+ delete this;
+ }
+
+ InstanceReplayer(librados::IoCtx &local_io_ctx,
+ const std::string &local_mirror_uuid,
+ Threads<ImageCtxT> *threads,
+ ServiceDaemon<ImageCtxT> *service_daemon,
+ MirrorStatusUpdater<ImageCtxT>* local_status_updater,
+ journal::CacheManagerHandler *cache_manager_handler,
+ PoolMetaCache* pool_meta_cache);
+ ~InstanceReplayer();
+
+ bool is_blocklisted() const;
+
+ int init();
+ void shut_down();
+
+ void init(Context *on_finish);
+ void shut_down(Context *on_finish);
+
+ void add_peer(const Peer<ImageCtxT>& peer);
+
+ void acquire_image(InstanceWatcher<ImageCtxT> *instance_watcher,
+ const std::string &global_image_id, Context *on_finish);
+ void release_image(const std::string &global_image_id, Context *on_finish);
+ void remove_peer_image(const std::string &global_image_id,
+ const std::string &peer_mirror_uuid,
+ Context *on_finish);
+
+ void release_all(Context *on_finish);
+
+ void print_status(Formatter *f);
+ void start();
+ void stop();
+ void restart();
+ void flush();
+
+ void stop(Context *on_finish);
+
+private:
+ /**
+ * @verbatim
+ *
+ * <uninitialized> <-------------------\
+ * | (init) | (repeat for each
+ * v STOP_IMAGE_REPLAYER ---\ image replayer)
+ * SCHEDULE_IMAGE_STATE_CHECK_TASK ^ ^ |
+ * | | | |
+ * v (shut_down) | \---------/
+ * <initialized> -----------------> WAIT_FOR_OPS
+ *
+ * @endverbatim
+ */
+
+ typedef std::set<Peer<ImageCtxT>> Peers;
+
+ librados::IoCtx &m_local_io_ctx;
+ std::string m_local_mirror_uuid;
+ Threads<ImageCtxT> *m_threads;
+ ServiceDaemon<ImageCtxT> *m_service_daemon;
+ MirrorStatusUpdater<ImageCtxT>* m_local_status_updater;
+ journal::CacheManagerHandler *m_cache_manager_handler;
+ PoolMetaCache* m_pool_meta_cache;
+
+ mutable ceph::mutex m_lock;
+ AsyncOpTracker m_async_op_tracker;
+ std::map<std::string, ImageReplayer<ImageCtxT> *> m_image_replayers;
+ Peers m_peers;
+ Context *m_image_state_check_task = nullptr;
+ Context *m_on_shut_down = nullptr;
+ bool m_manual_stop = false;
+ bool m_blocklisted = false;
+
+ void wait_for_ops();
+ void handle_wait_for_ops(int r);
+
+ void start_image_replayer(ImageReplayer<ImageCtxT> *image_replayer);
+ void queue_start_image_replayers();
+ void start_image_replayers(int r);
+
+ void stop_image_replayer(ImageReplayer<ImageCtxT> *image_replayer,
+ Context *on_finish);
+
+ void stop_image_replayers();
+ void handle_stop_image_replayers(int r);
+
+ void schedule_image_state_check_task();
+ void cancel_image_state_check_task();
+};
+
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::InstanceReplayer<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_INSTANCE_REPLAYER_H
diff --git a/src/tools/rbd_mirror/InstanceWatcher.cc b/src/tools/rbd_mirror/InstanceWatcher.cc
new file mode 100644
index 000000000..7b531064d
--- /dev/null
+++ b/src/tools/rbd_mirror/InstanceWatcher.cc
@@ -0,0 +1,1290 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "InstanceWatcher.h"
+#include "include/stringify.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "librbd/AsioEngine.h"
+#include "librbd/ManagedLock.h"
+#include "librbd/Utils.h"
+#include "librbd/asio/ContextWQ.h"
+#include "InstanceReplayer.h"
+#include "Throttler.h"
+#include "common/Cond.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::InstanceWatcher: "
+
+namespace rbd {
+namespace mirror {
+
+using namespace instance_watcher;
+
+using librbd::util::create_async_context_callback;
+using librbd::util::create_context_callback;
+using librbd::util::create_rados_callback;
+using librbd::util::unique_lock_name;
+
+namespace {
+
+struct C_GetInstances : public Context {
+ std::vector<std::string> *instance_ids;
+ Context *on_finish;
+ bufferlist out_bl;
+
+ C_GetInstances(std::vector<std::string> *instance_ids, Context *on_finish)
+ : instance_ids(instance_ids), on_finish(on_finish) {
+ }
+
+ void finish(int r) override {
+ dout(10) << "C_GetInstances: " << this << " " << __func__ << ": r=" << r
+ << dendl;
+
+ if (r == 0) {
+ auto it = out_bl.cbegin();
+ r = librbd::cls_client::mirror_instances_list_finish(&it, instance_ids);
+ } else if (r == -ENOENT) {
+ r = 0;
+ }
+ on_finish->complete(r);
+ }
+};
+
+template <typename I>
+struct C_RemoveInstanceRequest : public Context {
+ InstanceWatcher<I> instance_watcher;
+ Context *on_finish;
+
+ C_RemoveInstanceRequest(librados::IoCtx &io_ctx,
+ librbd::AsioEngine& asio_engine,
+ const std::string &instance_id, Context *on_finish)
+ : instance_watcher(io_ctx, asio_engine, nullptr, nullptr, instance_id),
+ on_finish(on_finish) {
+ }
+
+ void send() {
+ dout(10) << "C_RemoveInstanceRequest: " << this << " " << __func__ << dendl;
+
+ instance_watcher.remove(this);
+ }
+
+ void finish(int r) override {
+ dout(10) << "C_RemoveInstanceRequest: " << this << " " << __func__ << ": r="
+ << r << dendl;
+ ceph_assert(r == 0);
+
+ on_finish->complete(r);
+ }
+};
+
+} // anonymous namespace
+
+template <typename I>
+struct InstanceWatcher<I>::C_NotifyInstanceRequest : public Context {
+ InstanceWatcher<I> *instance_watcher;
+ std::string instance_id;
+ uint64_t request_id;
+ bufferlist bl;
+ Context *on_finish;
+ bool send_to_leader;
+ std::unique_ptr<librbd::watcher::Notifier> notifier;
+ librbd::watcher::NotifyResponse response;
+ bool canceling = false;
+
+ C_NotifyInstanceRequest(InstanceWatcher<I> *instance_watcher,
+ const std::string &instance_id, uint64_t request_id,
+ bufferlist &&bl, Context *on_finish)
+ : instance_watcher(instance_watcher), instance_id(instance_id),
+ request_id(request_id), bl(bl), on_finish(on_finish),
+ send_to_leader(instance_id.empty()) {
+ dout(10) << "C_NotifyInstanceRequest: " << this << " " << __func__
+ << ": instance_watcher=" << instance_watcher << ", instance_id="
+ << instance_id << ", request_id=" << request_id << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(instance_watcher->m_lock));
+
+ if (!send_to_leader) {
+ ceph_assert((!instance_id.empty()));
+ notifier.reset(new librbd::watcher::Notifier(
+ instance_watcher->m_work_queue,
+ instance_watcher->m_ioctx,
+ RBD_MIRROR_INSTANCE_PREFIX + instance_id));
+ }
+
+ instance_watcher->m_notify_op_tracker.start_op();
+ auto result = instance_watcher->m_notify_ops.insert(
+ std::make_pair(instance_id, this)).second;
+ ceph_assert(result);
+ }
+
+ void send() {
+ dout(10) << "C_NotifyInstanceRequest: " << this << " " << __func__ << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(instance_watcher->m_lock));
+
+ if (canceling) {
+ dout(10) << "C_NotifyInstanceRequest: " << this << " " << __func__
+ << ": canceling" << dendl;
+ instance_watcher->m_work_queue->queue(this, -ECANCELED);
+ return;
+ }
+
+ if (send_to_leader) {
+ if (instance_watcher->m_leader_instance_id.empty()) {
+ dout(10) << "C_NotifyInstanceRequest: " << this << " " << __func__
+ << ": suspending" << dendl;
+ instance_watcher->suspend_notify_request(this);
+ return;
+ }
+
+ if (instance_watcher->m_leader_instance_id != instance_id) {
+ auto count = instance_watcher->m_notify_ops.erase(
+ std::make_pair(instance_id, this));
+ ceph_assert(count > 0);
+
+ instance_id = instance_watcher->m_leader_instance_id;
+
+ auto result = instance_watcher->m_notify_ops.insert(
+ std::make_pair(instance_id, this)).second;
+ ceph_assert(result);
+
+ notifier.reset(new librbd::watcher::Notifier(
+ instance_watcher->m_work_queue,
+ instance_watcher->m_ioctx,
+ RBD_MIRROR_INSTANCE_PREFIX + instance_id));
+ }
+ }
+
+ dout(10) << "C_NotifyInstanceRequest: " << this << " " << __func__
+ << ": sending to " << instance_id << dendl;
+ notifier->notify(bl, &response, this);
+ }
+
+ void cancel() {
+ dout(10) << "C_NotifyInstanceRequest: " << this << " " << __func__ << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(instance_watcher->m_lock));
+
+ canceling = true;
+ instance_watcher->unsuspend_notify_request(this);
+ }
+
+ void finish(int r) override {
+ dout(10) << "C_NotifyInstanceRequest: " << this << " " << __func__ << ": r="
+ << r << dendl;
+
+ if (r == 0 || r == -ETIMEDOUT) {
+ bool found = false;
+ for (auto &it : response.acks) {
+ auto &bl = it.second;
+ if (it.second.length() == 0) {
+ dout(5) << "C_NotifyInstanceRequest: " << this << " " << __func__
+ << ": no payload in ack, ignoring" << dendl;
+ continue;
+ }
+ try {
+ auto iter = bl.cbegin();
+ NotifyAckPayload ack;
+ decode(ack, iter);
+ if (ack.instance_id != instance_watcher->get_instance_id()) {
+ derr << "C_NotifyInstanceRequest: " << this << " " << __func__
+ << ": ack instance_id (" << ack.instance_id << ") "
+ << "does not match, ignoring" << dendl;
+ continue;
+ }
+ if (ack.request_id != request_id) {
+ derr << "C_NotifyInstanceRequest: " << this << " " << __func__
+ << ": ack request_id (" << ack.request_id << ") "
+ << "does not match, ignoring" << dendl;
+ continue;
+ }
+ r = ack.ret_val;
+ found = true;
+ break;
+ } catch (const buffer::error &err) {
+ derr << "C_NotifyInstanceRequest: " << this << " " << __func__
+ << ": failed to decode ack: " << err.what() << dendl;
+ continue;
+ }
+ }
+
+ if (!found) {
+ if (r == -ETIMEDOUT) {
+ derr << "C_NotifyInstanceRequest: " << this << " " << __func__
+ << ": resending after timeout" << dendl;
+ std::lock_guard locker{instance_watcher->m_lock};
+ send();
+ return;
+ } else {
+ r = -EINVAL;
+ }
+ } else {
+ if (r == -ESTALE && send_to_leader) {
+ derr << "C_NotifyInstanceRequest: " << this << " " << __func__
+ << ": resending due to leader change" << dendl;
+ std::lock_guard locker{instance_watcher->m_lock};
+ send();
+ return;
+ }
+ }
+ }
+
+ on_finish->complete(r);
+
+ {
+ std::lock_guard locker{instance_watcher->m_lock};
+ auto result = instance_watcher->m_notify_ops.erase(
+ std::make_pair(instance_id, this));
+ ceph_assert(result > 0);
+ instance_watcher->m_notify_op_tracker.finish_op();
+ }
+
+ delete this;
+ }
+
+ void complete(int r) override {
+ finish(r);
+ }
+};
+
+template <typename I>
+struct InstanceWatcher<I>::C_SyncRequest : public Context {
+ InstanceWatcher<I> *instance_watcher;
+ std::string sync_id;
+ Context *on_start;
+ Context *on_complete = nullptr;
+ C_NotifyInstanceRequest *req = nullptr;
+
+ C_SyncRequest(InstanceWatcher<I> *instance_watcher,
+ const std::string &sync_id, Context *on_start)
+ : instance_watcher(instance_watcher), sync_id(sync_id),
+ on_start(on_start) {
+ dout(10) << "C_SyncRequest: " << this << " " << __func__ << ": sync_id="
+ << sync_id << dendl;
+ }
+
+ void finish(int r) override {
+ dout(10) << "C_SyncRequest: " << this << " " << __func__ << ": r="
+ << r << dendl;
+
+ if (on_start != nullptr) {
+ instance_watcher->handle_notify_sync_request(this, r);
+ } else {
+ instance_watcher->handle_notify_sync_complete(this, r);
+ delete this;
+ }
+ }
+
+ // called twice
+ void complete(int r) override {
+ finish(r);
+ }
+};
+
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::InstanceWatcher: " \
+ << this << " " << __func__ << ": "
+template <typename I>
+void InstanceWatcher<I>::get_instances(librados::IoCtx &io_ctx,
+ std::vector<std::string> *instance_ids,
+ Context *on_finish) {
+ librados::ObjectReadOperation op;
+ librbd::cls_client::mirror_instances_list_start(&op);
+ C_GetInstances *ctx = new C_GetInstances(instance_ids, on_finish);
+ librados::AioCompletion *aio_comp = create_rados_callback(ctx);
+
+ int r = io_ctx.aio_operate(RBD_MIRROR_LEADER, aio_comp, &op, &ctx->out_bl);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void InstanceWatcher<I>::remove_instance(librados::IoCtx &io_ctx,
+ librbd::AsioEngine& asio_engine,
+ const std::string &instance_id,
+ Context *on_finish) {
+ auto req = new C_RemoveInstanceRequest<I>(io_ctx, asio_engine, instance_id,
+ on_finish);
+ req->send();
+}
+
+template <typename I>
+InstanceWatcher<I> *InstanceWatcher<I>::create(
+ librados::IoCtx &io_ctx, librbd::AsioEngine& asio_engine,
+ InstanceReplayer<I> *instance_replayer,
+ Throttler<I> *image_sync_throttler) {
+ return new InstanceWatcher<I>(io_ctx, asio_engine, instance_replayer,
+ image_sync_throttler,
+ stringify(io_ctx.get_instance_id()));
+}
+
+template <typename I>
+InstanceWatcher<I>::InstanceWatcher(librados::IoCtx &io_ctx,
+ librbd::AsioEngine& asio_engine,
+ InstanceReplayer<I> *instance_replayer,
+ Throttler<I> *image_sync_throttler,
+ const std::string &instance_id)
+ : Watcher(io_ctx, asio_engine.get_work_queue(),
+ RBD_MIRROR_INSTANCE_PREFIX + instance_id),
+ m_instance_replayer(instance_replayer),
+ m_image_sync_throttler(image_sync_throttler), m_instance_id(instance_id),
+ m_lock(ceph::make_mutex(
+ unique_lock_name("rbd::mirror::InstanceWatcher::m_lock", this))),
+ m_instance_lock(librbd::ManagedLock<I>::create(
+ m_ioctx, asio_engine, m_oid, this, librbd::managed_lock::EXCLUSIVE, true,
+ m_cct->_conf.get_val<uint64_t>("rbd_blocklist_expire_seconds"))) {
+}
+
+template <typename I>
+InstanceWatcher<I>::~InstanceWatcher() {
+ ceph_assert(m_requests.empty());
+ ceph_assert(m_notify_ops.empty());
+ ceph_assert(m_notify_op_tracker.empty());
+ ceph_assert(m_suspended_ops.empty());
+ ceph_assert(m_inflight_sync_reqs.empty());
+ m_instance_lock->destroy();
+}
+
+template <typename I>
+int InstanceWatcher<I>::init() {
+ C_SaferCond init_ctx;
+ init(&init_ctx);
+ return init_ctx.wait();
+}
+
+template <typename I>
+void InstanceWatcher<I>::init(Context *on_finish) {
+ dout(10) << "instance_id=" << m_instance_id << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ ceph_assert(m_on_finish == nullptr);
+ m_on_finish = on_finish;
+ m_ret_val = 0;
+
+ register_instance();
+}
+
+template <typename I>
+void InstanceWatcher<I>::shut_down() {
+ C_SaferCond shut_down_ctx;
+ shut_down(&shut_down_ctx);
+ int r = shut_down_ctx.wait();
+ ceph_assert(r == 0);
+}
+
+template <typename I>
+void InstanceWatcher<I>::shut_down(Context *on_finish) {
+ dout(10) << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ ceph_assert(m_on_finish == nullptr);
+ m_on_finish = on_finish;
+ m_ret_val = 0;
+
+ release_lock();
+}
+
+template <typename I>
+void InstanceWatcher<I>::remove(Context *on_finish) {
+ dout(10) << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ ceph_assert(m_on_finish == nullptr);
+ m_on_finish = on_finish;
+ m_ret_val = 0;
+
+ get_instance_locker();
+}
+
+template <typename I>
+void InstanceWatcher<I>::notify_image_acquire(
+ const std::string &instance_id, const std::string &global_image_id,
+ Context *on_notify_ack) {
+ dout(10) << "instance_id=" << instance_id << ", global_image_id="
+ << global_image_id << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ ceph_assert(m_on_finish == nullptr);
+
+ uint64_t request_id = ++m_request_seq;
+ bufferlist bl;
+ encode(NotifyMessage{ImageAcquirePayload{request_id, global_image_id}}, bl);
+ auto req = new C_NotifyInstanceRequest(this, instance_id, request_id,
+ std::move(bl), on_notify_ack);
+ req->send();
+}
+
+template <typename I>
+void InstanceWatcher<I>::notify_image_release(
+ const std::string &instance_id, const std::string &global_image_id,
+ Context *on_notify_ack) {
+ dout(10) << "instance_id=" << instance_id << ", global_image_id="
+ << global_image_id << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ ceph_assert(m_on_finish == nullptr);
+
+ uint64_t request_id = ++m_request_seq;
+ bufferlist bl;
+ encode(NotifyMessage{ImageReleasePayload{request_id, global_image_id}}, bl);
+ auto req = new C_NotifyInstanceRequest(this, instance_id, request_id,
+ std::move(bl), on_notify_ack);
+ req->send();
+}
+
+template <typename I>
+void InstanceWatcher<I>::notify_peer_image_removed(
+ const std::string &instance_id, const std::string &global_image_id,
+ const std::string &peer_mirror_uuid, Context *on_notify_ack) {
+ dout(10) << "instance_id=" << instance_id << ", "
+ << "global_image_id=" << global_image_id << ", "
+ << "peer_mirror_uuid=" << peer_mirror_uuid << dendl;
+
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_on_finish == nullptr);
+
+ uint64_t request_id = ++m_request_seq;
+ bufferlist bl;
+ encode(NotifyMessage{PeerImageRemovedPayload{request_id, global_image_id,
+ peer_mirror_uuid}}, bl);
+ auto req = new C_NotifyInstanceRequest(this, instance_id, request_id,
+ std::move(bl), on_notify_ack);
+ req->send();
+}
+
+template <typename I>
+void InstanceWatcher<I>::notify_sync_request(const std::string &sync_id,
+ Context *on_sync_start) {
+ dout(10) << "sync_id=" << sync_id << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ ceph_assert(m_inflight_sync_reqs.count(sync_id) == 0);
+
+ uint64_t request_id = ++m_request_seq;
+
+ bufferlist bl;
+ encode(NotifyMessage{SyncRequestPayload{request_id, sync_id}}, bl);
+
+ auto sync_ctx = new C_SyncRequest(this, sync_id, on_sync_start);
+ sync_ctx->req = new C_NotifyInstanceRequest(this, "", request_id,
+ std::move(bl), sync_ctx);
+
+ m_inflight_sync_reqs[sync_id] = sync_ctx;
+ sync_ctx->req->send();
+}
+
+template <typename I>
+bool InstanceWatcher<I>::cancel_sync_request(const std::string &sync_id) {
+ dout(10) << "sync_id=" << sync_id << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ auto it = m_inflight_sync_reqs.find(sync_id);
+ if (it == m_inflight_sync_reqs.end()) {
+ return false;
+ }
+
+ auto sync_ctx = it->second;
+
+ if (sync_ctx->on_start == nullptr) {
+ return false;
+ }
+
+ ceph_assert(sync_ctx->req != nullptr);
+ sync_ctx->req->cancel();
+ return true;
+}
+
+template <typename I>
+void InstanceWatcher<I>::notify_sync_start(const std::string &instance_id,
+ const std::string &sync_id) {
+ dout(10) << "sync_id=" << sync_id << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ uint64_t request_id = ++m_request_seq;
+
+ bufferlist bl;
+ encode(NotifyMessage{SyncStartPayload{request_id, sync_id}}, bl);
+
+ auto ctx = new LambdaContext(
+ [this, sync_id] (int r) {
+ dout(10) << "finish: sync_id=" << sync_id << ", r=" << r << dendl;
+ std::lock_guard locker{m_lock};
+ if (r != -ESTALE && is_leader()) {
+ m_image_sync_throttler->finish_op(m_ioctx.get_namespace(), sync_id);
+ }
+ });
+ auto req = new C_NotifyInstanceRequest(this, instance_id, request_id,
+ std::move(bl), ctx);
+ req->send();
+}
+
+template <typename I>
+void InstanceWatcher<I>::notify_sync_complete(const std::string &sync_id) {
+ std::lock_guard locker{m_lock};
+ notify_sync_complete(m_lock, sync_id);
+}
+
+template <typename I>
+void InstanceWatcher<I>::notify_sync_complete(const ceph::mutex&,
+ const std::string &sync_id) {
+ dout(10) << "sync_id=" << sync_id << dendl;
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ auto it = m_inflight_sync_reqs.find(sync_id);
+ ceph_assert(it != m_inflight_sync_reqs.end());
+
+ auto sync_ctx = it->second;
+ ceph_assert(sync_ctx->req == nullptr);
+
+ m_inflight_sync_reqs.erase(it);
+ m_work_queue->queue(sync_ctx, 0);
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_notify_sync_request(C_SyncRequest *sync_ctx,
+ int r) {
+ dout(10) << "sync_id=" << sync_ctx->sync_id << ", r=" << r << dendl;
+
+ Context *on_start = nullptr;
+ {
+ std::lock_guard locker{m_lock};
+ ceph_assert(sync_ctx->req != nullptr);
+ ceph_assert(sync_ctx->on_start != nullptr);
+
+ if (sync_ctx->req->canceling) {
+ r = -ECANCELED;
+ }
+
+ std::swap(sync_ctx->on_start, on_start);
+ sync_ctx->req = nullptr;
+
+ if (r == -ECANCELED) {
+ notify_sync_complete(m_lock, sync_ctx->sync_id);
+ }
+ }
+
+ on_start->complete(r == -ECANCELED ? r : 0);
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_notify_sync_complete(C_SyncRequest *sync_ctx,
+ int r) {
+ dout(10) << "sync_id=" << sync_ctx->sync_id << ", r=" << r << dendl;
+
+ if (sync_ctx->on_complete != nullptr) {
+ sync_ctx->on_complete->complete(r);
+ }
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_acquire_leader() {
+ dout(10) << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ m_leader_instance_id = m_instance_id;
+ unsuspend_notify_requests();
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_release_leader() {
+ dout(10) << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ m_leader_instance_id.clear();
+
+ m_image_sync_throttler->drain(m_ioctx.get_namespace(), -ESTALE);
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_update_leader(
+ const std::string &leader_instance_id) {
+ dout(10) << "leader_instance_id=" << leader_instance_id << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ m_leader_instance_id = leader_instance_id;
+
+ if (!m_leader_instance_id.empty()) {
+ unsuspend_notify_requests();
+ }
+}
+
+template <typename I>
+void InstanceWatcher<I>::cancel_notify_requests(
+ const std::string &instance_id) {
+ dout(10) << "instance_id=" << instance_id << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ for (auto op : m_notify_ops) {
+ if (op.first == instance_id && !op.second->send_to_leader) {
+ op.second->cancel();
+ }
+ }
+}
+
+template <typename I>
+void InstanceWatcher<I>::register_instance() {
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ dout(10) << dendl;
+
+ librados::ObjectWriteOperation op;
+ librbd::cls_client::mirror_instances_add(&op, m_instance_id);
+ librados::AioCompletion *aio_comp = create_rados_callback<
+ InstanceWatcher<I>, &InstanceWatcher<I>::handle_register_instance>(this);
+
+ int r = m_ioctx.aio_operate(RBD_MIRROR_LEADER, aio_comp, &op);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_register_instance(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ Context *on_finish = nullptr;
+ {
+ std::lock_guard locker{m_lock};
+
+ if (r == 0) {
+ create_instance_object();
+ return;
+ }
+
+ derr << "error registering instance: " << cpp_strerror(r) << dendl;
+
+ std::swap(on_finish, m_on_finish);
+ }
+ on_finish->complete(r);
+}
+
+
+template <typename I>
+void InstanceWatcher<I>::create_instance_object() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ librados::ObjectWriteOperation op;
+ op.create(true);
+
+ librados::AioCompletion *aio_comp = create_rados_callback<
+ InstanceWatcher<I>,
+ &InstanceWatcher<I>::handle_create_instance_object>(this);
+ int r = m_ioctx.aio_operate(m_oid, aio_comp, &op);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_create_instance_object(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ if (r < 0) {
+ derr << "error creating " << m_oid << " object: " << cpp_strerror(r)
+ << dendl;
+
+ m_ret_val = r;
+ unregister_instance();
+ return;
+ }
+
+ register_watch();
+}
+
+template <typename I>
+void InstanceWatcher<I>::register_watch() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ Context *ctx = create_async_context_callback(
+ m_work_queue, create_context_callback<
+ InstanceWatcher<I>, &InstanceWatcher<I>::handle_register_watch>(this));
+
+ librbd::Watcher::register_watch(ctx);
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_register_watch(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ if (r < 0) {
+ derr << "error registering instance watcher for " << m_oid << " object: "
+ << cpp_strerror(r) << dendl;
+
+ m_ret_val = r;
+ remove_instance_object();
+ return;
+ }
+
+ acquire_lock();
+}
+
+template <typename I>
+void InstanceWatcher<I>::acquire_lock() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ Context *ctx = create_async_context_callback(
+ m_work_queue, create_context_callback<
+ InstanceWatcher<I>, &InstanceWatcher<I>::handle_acquire_lock>(this));
+
+ m_instance_lock->acquire_lock(ctx);
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_acquire_lock(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ Context *on_finish = nullptr;
+ {
+ std::lock_guard locker{m_lock};
+
+ if (r < 0) {
+
+ derr << "error acquiring instance lock: " << cpp_strerror(r) << dendl;
+
+ m_ret_val = r;
+ unregister_watch();
+ return;
+ }
+
+ std::swap(on_finish, m_on_finish);
+ }
+
+ on_finish->complete(r);
+}
+
+template <typename I>
+void InstanceWatcher<I>::release_lock() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ Context *ctx = create_async_context_callback(
+ m_work_queue, create_context_callback<
+ InstanceWatcher<I>, &InstanceWatcher<I>::handle_release_lock>(this));
+
+ m_instance_lock->shut_down(ctx);
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_release_lock(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ if (r < 0) {
+ derr << "error releasing instance lock: " << cpp_strerror(r) << dendl;
+ }
+
+ unregister_watch();
+}
+
+template <typename I>
+void InstanceWatcher<I>::unregister_watch() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ Context *ctx = create_async_context_callback(
+ m_work_queue, create_context_callback<
+ InstanceWatcher<I>, &InstanceWatcher<I>::handle_unregister_watch>(this));
+
+ librbd::Watcher::unregister_watch(ctx);
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_unregister_watch(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "error unregistering instance watcher for " << m_oid << " object: "
+ << cpp_strerror(r) << dendl;
+ }
+
+ std::lock_guard locker{m_lock};
+ remove_instance_object();
+}
+
+template <typename I>
+void InstanceWatcher<I>::remove_instance_object() {
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ dout(10) << dendl;
+
+ librados::ObjectWriteOperation op;
+ op.remove();
+
+ librados::AioCompletion *aio_comp = create_rados_callback<
+ InstanceWatcher<I>,
+ &InstanceWatcher<I>::handle_remove_instance_object>(this);
+ int r = m_ioctx.aio_operate(m_oid, aio_comp, &op);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_remove_instance_object(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r == -ENOENT) {
+ r = 0;
+ }
+
+ if (r < 0) {
+ derr << "error removing " << m_oid << " object: " << cpp_strerror(r)
+ << dendl;
+ }
+
+ std::lock_guard locker{m_lock};
+ unregister_instance();
+}
+
+template <typename I>
+void InstanceWatcher<I>::unregister_instance() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ librados::ObjectWriteOperation op;
+ librbd::cls_client::mirror_instances_remove(&op, m_instance_id);
+ librados::AioCompletion *aio_comp = create_rados_callback<
+ InstanceWatcher<I>, &InstanceWatcher<I>::handle_unregister_instance>(this);
+
+ int r = m_ioctx.aio_operate(RBD_MIRROR_LEADER, aio_comp, &op);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_unregister_instance(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "error unregistering instance: " << cpp_strerror(r) << dendl;
+ }
+
+ std::lock_guard locker{m_lock};
+ wait_for_notify_ops();
+}
+
+template <typename I>
+void InstanceWatcher<I>::wait_for_notify_ops() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ for (auto op : m_notify_ops) {
+ op.second->cancel();
+ }
+
+ Context *ctx = create_async_context_callback(
+ m_work_queue, create_context_callback<
+ InstanceWatcher<I>, &InstanceWatcher<I>::handle_wait_for_notify_ops>(this));
+
+ m_notify_op_tracker.wait_for_ops(ctx);
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_wait_for_notify_ops(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ ceph_assert(r == 0);
+
+ Context *on_finish = nullptr;
+ {
+ std::lock_guard locker{m_lock};
+
+ ceph_assert(m_notify_ops.empty());
+
+ std::swap(on_finish, m_on_finish);
+ r = m_ret_val;
+ }
+ on_finish->complete(r);
+}
+
+template <typename I>
+void InstanceWatcher<I>::get_instance_locker() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ Context *ctx = create_async_context_callback(
+ m_work_queue, create_context_callback<
+ InstanceWatcher<I>, &InstanceWatcher<I>::handle_get_instance_locker>(this));
+
+ m_instance_lock->get_locker(&m_instance_locker, ctx);
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_get_instance_locker(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ if (r < 0) {
+ if (r != -ENOENT) {
+ derr << "error retrieving instance locker: " << cpp_strerror(r) << dendl;
+ }
+ remove_instance_object();
+ return;
+ }
+
+ break_instance_lock();
+}
+
+template <typename I>
+void InstanceWatcher<I>::break_instance_lock() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ Context *ctx = create_async_context_callback(
+ m_work_queue, create_context_callback<
+ InstanceWatcher<I>, &InstanceWatcher<I>::handle_break_instance_lock>(this));
+
+ m_instance_lock->break_lock(m_instance_locker, true, ctx);
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_break_instance_lock(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ if (r < 0) {
+ if (r != -ENOENT) {
+ derr << "error breaking instance lock: " << cpp_strerror(r) << dendl;
+ }
+ remove_instance_object();
+ return;
+ }
+
+ remove_instance_object();
+}
+
+template <typename I>
+void InstanceWatcher<I>::suspend_notify_request(C_NotifyInstanceRequest *req) {
+ dout(10) << req << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ auto result = m_suspended_ops.insert(req).second;
+ ceph_assert(result);
+}
+
+template <typename I>
+bool InstanceWatcher<I>::unsuspend_notify_request(
+ C_NotifyInstanceRequest *req) {
+ dout(10) << req << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ auto result = m_suspended_ops.erase(req);
+ if (result == 0) {
+ return false;
+ }
+
+ req->send();
+ return true;
+}
+
+template <typename I>
+void InstanceWatcher<I>::unsuspend_notify_requests() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ std::set<C_NotifyInstanceRequest *> suspended_ops;
+ std::swap(m_suspended_ops, suspended_ops);
+
+ for (auto op : suspended_ops) {
+ op->send();
+ }
+}
+
+template <typename I>
+Context *InstanceWatcher<I>::prepare_request(const std::string &instance_id,
+ uint64_t request_id,
+ C_NotifyAck *on_notify_ack) {
+ dout(10) << "instance_id=" << instance_id << ", request_id=" << request_id
+ << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ Context *ctx = nullptr;
+ Request request(instance_id, request_id);
+ auto it = m_requests.find(request);
+
+ if (it != m_requests.end()) {
+ dout(10) << "duplicate for in-progress request" << dendl;
+ delete it->on_notify_ack;
+ m_requests.erase(it);
+ } else {
+ ctx = create_async_context_callback(
+ m_work_queue, new LambdaContext(
+ [this, instance_id, request_id] (int r) {
+ complete_request(instance_id, request_id, r);
+ }));
+ }
+
+ request.on_notify_ack = on_notify_ack;
+ m_requests.insert(request);
+ return ctx;
+}
+
+template <typename I>
+void InstanceWatcher<I>::complete_request(const std::string &instance_id,
+ uint64_t request_id, int r) {
+ dout(10) << "instance_id=" << instance_id << ", request_id=" << request_id
+ << dendl;
+
+ C_NotifyAck *on_notify_ack;
+ {
+ std::lock_guard locker{m_lock};
+ Request request(instance_id, request_id);
+ auto it = m_requests.find(request);
+ ceph_assert(it != m_requests.end());
+ on_notify_ack = it->on_notify_ack;
+ m_requests.erase(it);
+ }
+
+ encode(NotifyAckPayload(instance_id, request_id, r), on_notify_ack->out);
+ on_notify_ack->complete(0);
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_notify(uint64_t notify_id, uint64_t handle,
+ uint64_t notifier_id, bufferlist &bl) {
+ dout(10) << "notify_id=" << notify_id << ", handle=" << handle << ", "
+ << "notifier_id=" << notifier_id << dendl;
+
+ auto ctx = new C_NotifyAck(this, notify_id, handle);
+
+ NotifyMessage notify_message;
+ try {
+ auto iter = bl.cbegin();
+ decode(notify_message, iter);
+ } catch (const buffer::error &err) {
+ derr << "error decoding image notification: " << err.what() << dendl;
+ ctx->complete(0);
+ return;
+ }
+
+ apply_visitor(HandlePayloadVisitor(this, stringify(notifier_id), ctx),
+ notify_message.payload);
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_image_acquire(
+ const std::string &global_image_id, Context *on_finish) {
+ dout(10) << "global_image_id=" << global_image_id << dendl;
+
+ auto ctx = new LambdaContext(
+ [this, global_image_id, on_finish] (int r) {
+ m_instance_replayer->acquire_image(this, global_image_id, on_finish);
+ m_notify_op_tracker.finish_op();
+ });
+
+ m_notify_op_tracker.start_op();
+ m_work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_image_release(
+ const std::string &global_image_id, Context *on_finish) {
+ dout(10) << "global_image_id=" << global_image_id << dendl;
+
+ auto ctx = new LambdaContext(
+ [this, global_image_id, on_finish] (int r) {
+ m_instance_replayer->release_image(global_image_id, on_finish);
+ m_notify_op_tracker.finish_op();
+ });
+
+ m_notify_op_tracker.start_op();
+ m_work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_peer_image_removed(
+ const std::string &global_image_id, const std::string &peer_mirror_uuid,
+ Context *on_finish) {
+ dout(10) << "global_image_id=" << global_image_id << ", "
+ << "peer_mirror_uuid=" << peer_mirror_uuid << dendl;
+
+ auto ctx = new LambdaContext(
+ [this, peer_mirror_uuid, global_image_id, on_finish] (int r) {
+ m_instance_replayer->remove_peer_image(global_image_id,
+ peer_mirror_uuid, on_finish);
+ m_notify_op_tracker.finish_op();
+ });
+
+ m_notify_op_tracker.start_op();
+ m_work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_sync_request(const std::string &instance_id,
+ const std::string &sync_id,
+ Context *on_finish) {
+ dout(10) << "instance_id=" << instance_id << ", sync_id=" << sync_id << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ if (!is_leader()) {
+ dout(10) << "sync request for non-leader" << dendl;
+ m_work_queue->queue(on_finish, -ESTALE);
+ return;
+ }
+
+ Context *on_start = create_async_context_callback(
+ m_work_queue, new LambdaContext(
+ [this, instance_id, sync_id, on_finish] (int r) {
+ dout(10) << "handle_sync_request: finish: instance_id=" << instance_id
+ << ", sync_id=" << sync_id << ", r=" << r << dendl;
+ if (r == 0) {
+ notify_sync_start(instance_id, sync_id);
+ }
+ if (r == -ENOENT) {
+ r = 0;
+ }
+ on_finish->complete(r);
+ }));
+ m_image_sync_throttler->start_op(m_ioctx.get_namespace(), sync_id, on_start);
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_sync_start(const std::string &instance_id,
+ const std::string &sync_id,
+ Context *on_finish) {
+ dout(10) << "instance_id=" << instance_id << ", sync_id=" << sync_id << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ auto it = m_inflight_sync_reqs.find(sync_id);
+ if (it == m_inflight_sync_reqs.end()) {
+ dout(5) << "not found" << dendl;
+ m_work_queue->queue(on_finish, 0);
+ return;
+ }
+
+ auto sync_ctx = it->second;
+
+ if (sync_ctx->on_complete != nullptr) {
+ dout(5) << "duplicate request" << dendl;
+ m_work_queue->queue(sync_ctx->on_complete, -ESTALE);
+ }
+
+ sync_ctx->on_complete = on_finish;
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_payload(const std::string &instance_id,
+ const ImageAcquirePayload &payload,
+ C_NotifyAck *on_notify_ack) {
+ dout(10) << "image_acquire: instance_id=" << instance_id << ", "
+ << "request_id=" << payload.request_id << dendl;
+
+ auto on_finish = prepare_request(instance_id, payload.request_id,
+ on_notify_ack);
+ if (on_finish != nullptr) {
+ handle_image_acquire(payload.global_image_id, on_finish);
+ }
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_payload(const std::string &instance_id,
+ const ImageReleasePayload &payload,
+ C_NotifyAck *on_notify_ack) {
+ dout(10) << "image_release: instance_id=" << instance_id << ", "
+ << "request_id=" << payload.request_id << dendl;
+
+ auto on_finish = prepare_request(instance_id, payload.request_id,
+ on_notify_ack);
+ if (on_finish != nullptr) {
+ handle_image_release(payload.global_image_id, on_finish);
+ }
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_payload(const std::string &instance_id,
+ const PeerImageRemovedPayload &payload,
+ C_NotifyAck *on_notify_ack) {
+ dout(10) << "remove_peer_image: instance_id=" << instance_id << ", "
+ << "request_id=" << payload.request_id << dendl;
+
+ auto on_finish = prepare_request(instance_id, payload.request_id,
+ on_notify_ack);
+ if (on_finish != nullptr) {
+ handle_peer_image_removed(payload.global_image_id, payload.peer_mirror_uuid,
+ on_finish);
+ }
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_payload(const std::string &instance_id,
+ const SyncRequestPayload &payload,
+ C_NotifyAck *on_notify_ack) {
+ dout(10) << "sync_request: instance_id=" << instance_id << ", "
+ << "request_id=" << payload.request_id << dendl;
+
+ auto on_finish = prepare_request(instance_id, payload.request_id,
+ on_notify_ack);
+ if (on_finish == nullptr) {
+ return;
+ }
+
+ handle_sync_request(instance_id, payload.sync_id, on_finish);
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_payload(const std::string &instance_id,
+ const SyncStartPayload &payload,
+ C_NotifyAck *on_notify_ack) {
+ dout(10) << "sync_start: instance_id=" << instance_id << ", "
+ << "request_id=" << payload.request_id << dendl;
+
+ auto on_finish = prepare_request(instance_id, payload.request_id,
+ on_notify_ack);
+ if (on_finish == nullptr) {
+ return;
+ }
+
+ handle_sync_start(instance_id, payload.sync_id, on_finish);
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_payload(const std::string &instance_id,
+ const UnknownPayload &payload,
+ C_NotifyAck *on_notify_ack) {
+ dout(5) << "unknown: instance_id=" << instance_id << dendl;
+
+ on_notify_ack->complete(0);
+}
+
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::InstanceWatcher<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/InstanceWatcher.h b/src/tools/rbd_mirror/InstanceWatcher.h
new file mode 100644
index 000000000..08e40b40b
--- /dev/null
+++ b/src/tools/rbd_mirror/InstanceWatcher.h
@@ -0,0 +1,269 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_INSTANCE_WATCHER_H
+#define CEPH_RBD_MIRROR_INSTANCE_WATCHER_H
+
+#include <map>
+#include <memory>
+#include <set>
+#include <string>
+#include <vector>
+
+#include "common/AsyncOpTracker.h"
+#include "librbd/Watcher.h"
+#include "librbd/managed_lock/Types.h"
+#include "tools/rbd_mirror/instance_watcher/Types.h"
+
+namespace librbd {
+
+class AsioEngine;
+class ImageCtx;
+template <typename> class ManagedLock;
+
+} // namespace librbd
+
+namespace rbd {
+namespace mirror {
+
+template <typename> class InstanceReplayer;
+template <typename> class Throttler;
+template <typename> struct Threads;
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class InstanceWatcher : protected librbd::Watcher {
+ using librbd::Watcher::unregister_watch; // Silence overloaded virtual warning
+public:
+ static void get_instances(librados::IoCtx &io_ctx,
+ std::vector<std::string> *instance_ids,
+ Context *on_finish);
+ static void remove_instance(librados::IoCtx &io_ctx,
+ librbd::AsioEngine& asio_engine,
+ const std::string &instance_id,
+ Context *on_finish);
+
+ static InstanceWatcher *create(
+ librados::IoCtx &io_ctx, librbd::AsioEngine& asio_engine,
+ InstanceReplayer<ImageCtxT> *instance_replayer,
+ Throttler<ImageCtxT> *image_sync_throttler);
+ void destroy() {
+ delete this;
+ }
+
+ InstanceWatcher(librados::IoCtx &io_ctx, librbd::AsioEngine& asio_engine,
+ InstanceReplayer<ImageCtxT> *instance_replayer,
+ Throttler<ImageCtxT> *image_sync_throttler,
+ const std::string &instance_id);
+ ~InstanceWatcher() override;
+
+ inline std::string &get_instance_id() {
+ return m_instance_id;
+ }
+
+ int init();
+ void shut_down();
+
+ void init(Context *on_finish);
+ void shut_down(Context *on_finish);
+ void remove(Context *on_finish);
+
+ void notify_image_acquire(const std::string &instance_id,
+ const std::string &global_image_id,
+ Context *on_notify_ack);
+ void notify_image_release(const std::string &instance_id,
+ const std::string &global_image_id,
+ Context *on_notify_ack);
+ void notify_peer_image_removed(const std::string &instance_id,
+ const std::string &global_image_id,
+ const std::string &peer_mirror_uuid,
+ Context *on_notify_ack);
+
+ void notify_sync_request(const std::string &sync_id, Context *on_sync_start);
+ bool cancel_sync_request(const std::string &sync_id);
+ void notify_sync_complete(const std::string &sync_id);
+
+ void cancel_notify_requests(const std::string &instance_id);
+
+ void handle_acquire_leader();
+ void handle_release_leader();
+ void handle_update_leader(const std::string &leader_instance_id);
+
+private:
+ /**
+ * @verbatim
+ *
+ * BREAK_INSTANCE_LOCK -------\
+ * ^ |
+ * | (error) |
+ * GET_INSTANCE_LOCKER * * *>|
+ * ^ (remove) |
+ * | |
+ * <uninitialized> <----------------+---- WAIT_FOR_NOTIFY_OPS
+ * | (init) ^ | ^
+ * v (error) * | |
+ * REGISTER_INSTANCE * * * * * *|* *> UNREGISTER_INSTANCE
+ * | * | ^
+ * v (error) * v |
+ * CREATE_INSTANCE_OBJECT * * * * * *> REMOVE_INSTANCE_OBJECT
+ * | * ^
+ * v (error) * |
+ * REGISTER_WATCH * * * * * * * * * *> UNREGISTER_WATCH
+ * | * ^
+ * v (error) * |
+ * ACQUIRE_LOCK * * * * * * * * * * * RELEASE_LOCK
+ * | ^
+ * v (shut_down) |
+ * <watching> -------------------------------/
+ *
+ * @endverbatim
+ */
+
+ struct C_NotifyInstanceRequest;
+ struct C_SyncRequest;
+
+ typedef std::pair<std::string, std::string> Id;
+
+ struct HandlePayloadVisitor : public boost::static_visitor<void> {
+ InstanceWatcher *instance_watcher;
+ std::string instance_id;
+ C_NotifyAck *on_notify_ack;
+
+ HandlePayloadVisitor(InstanceWatcher *instance_watcher,
+ const std::string &instance_id,
+ C_NotifyAck *on_notify_ack)
+ : instance_watcher(instance_watcher), instance_id(instance_id),
+ on_notify_ack(on_notify_ack) {
+ }
+
+ template <typename Payload>
+ inline void operator()(const Payload &payload) const {
+ instance_watcher->handle_payload(instance_id, payload, on_notify_ack);
+ }
+ };
+
+ struct Request {
+ std::string instance_id;
+ uint64_t request_id;
+ C_NotifyAck *on_notify_ack = nullptr;
+
+ Request(const std::string &instance_id, uint64_t request_id)
+ : instance_id(instance_id), request_id(request_id) {
+ }
+
+ inline bool operator<(const Request &rhs) const {
+ return instance_id < rhs.instance_id ||
+ (instance_id == rhs.instance_id && request_id < rhs.request_id);
+ }
+ };
+
+ Threads<ImageCtxT> *m_threads;
+ InstanceReplayer<ImageCtxT> *m_instance_replayer;
+ Throttler<ImageCtxT> *m_image_sync_throttler;
+ std::string m_instance_id;
+
+ mutable ceph::mutex m_lock;
+ librbd::ManagedLock<ImageCtxT> *m_instance_lock;
+ Context *m_on_finish = nullptr;
+ int m_ret_val = 0;
+ std::string m_leader_instance_id;
+ librbd::managed_lock::Locker m_instance_locker;
+ std::set<std::pair<std::string, C_NotifyInstanceRequest *>> m_notify_ops;
+ AsyncOpTracker m_notify_op_tracker;
+ uint64_t m_request_seq = 0;
+ std::set<Request> m_requests;
+ std::set<C_NotifyInstanceRequest *> m_suspended_ops;
+ std::map<std::string, C_SyncRequest *> m_inflight_sync_reqs;
+
+ inline bool is_leader() const {
+ return m_leader_instance_id == m_instance_id;
+ }
+
+ void register_instance();
+ void handle_register_instance(int r);
+
+ void create_instance_object();
+ void handle_create_instance_object(int r);
+
+ void register_watch();
+ void handle_register_watch(int r);
+
+ void acquire_lock();
+ void handle_acquire_lock(int r);
+
+ void release_lock();
+ void handle_release_lock(int r);
+
+ void unregister_watch();
+ void handle_unregister_watch(int r);
+
+ void remove_instance_object();
+ void handle_remove_instance_object(int r);
+
+ void unregister_instance();
+ void handle_unregister_instance(int r);
+
+ void wait_for_notify_ops();
+ void handle_wait_for_notify_ops(int r);
+
+ void get_instance_locker();
+ void handle_get_instance_locker(int r);
+
+ void break_instance_lock();
+ void handle_break_instance_lock(int r);
+
+ void suspend_notify_request(C_NotifyInstanceRequest *req);
+ bool unsuspend_notify_request(C_NotifyInstanceRequest *req);
+ void unsuspend_notify_requests();
+
+ void notify_sync_complete(const ceph::mutex& lock, const std::string &sync_id);
+ void handle_notify_sync_request(C_SyncRequest *sync_ctx, int r);
+ void handle_notify_sync_complete(C_SyncRequest *sync_ctx, int r);
+
+ void notify_sync_start(const std::string &instance_id,
+ const std::string &sync_id);
+
+ Context *prepare_request(const std::string &instance_id, uint64_t request_id,
+ C_NotifyAck *on_notify_ack);
+ void complete_request(const std::string &instance_id, uint64_t request_id,
+ int r);
+
+ void handle_notify(uint64_t notify_id, uint64_t handle,
+ uint64_t notifier_id, bufferlist &bl) override;
+
+ void handle_image_acquire(const std::string &global_image_id,
+ Context *on_finish);
+ void handle_image_release(const std::string &global_image_id,
+ Context *on_finish);
+ void handle_peer_image_removed(const std::string &global_image_id,
+ const std::string &peer_mirror_uuid,
+ Context *on_finish);
+
+ void handle_sync_request(const std::string &instance_id,
+ const std::string &sync_id, Context *on_finish);
+ void handle_sync_start(const std::string &instance_id,
+ const std::string &sync_id, Context *on_finish);
+
+ void handle_payload(const std::string &instance_id,
+ const instance_watcher::ImageAcquirePayload &payload,
+ C_NotifyAck *on_notify_ack);
+ void handle_payload(const std::string &instance_id,
+ const instance_watcher::ImageReleasePayload &payload,
+ C_NotifyAck *on_notify_ack);
+ void handle_payload(const std::string &instance_id,
+ const instance_watcher::PeerImageRemovedPayload &payload,
+ C_NotifyAck *on_notify_ack);
+ void handle_payload(const std::string &instance_id,
+ const instance_watcher::SyncRequestPayload &payload,
+ C_NotifyAck *on_notify_ack);
+ void handle_payload(const std::string &instance_id,
+ const instance_watcher::SyncStartPayload &payload,
+ C_NotifyAck *on_notify_ack);
+ void handle_payload(const std::string &instance_id,
+ const instance_watcher::UnknownPayload &payload,
+ C_NotifyAck *on_notify_ack);
+};
+
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_INSTANCE_WATCHER_H
diff --git a/src/tools/rbd_mirror/Instances.cc b/src/tools/rbd_mirror/Instances.cc
new file mode 100644
index 000000000..ca291bb5f
--- /dev/null
+++ b/src/tools/rbd_mirror/Instances.cc
@@ -0,0 +1,356 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "include/stringify.h"
+#include "common/Timer.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "librbd/Utils.h"
+#include "librbd/asio/ContextWQ.h"
+#include "InstanceWatcher.h"
+#include "Instances.h"
+#include "Threads.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::Instances: " \
+ << this << " " << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+
+using librbd::util::create_async_context_callback;
+using librbd::util::create_context_callback;
+using librbd::util::create_rados_callback;
+
+template <typename I>
+Instances<I>::Instances(Threads<I> *threads, librados::IoCtx &ioctx,
+ const std::string& instance_id,
+ instances::Listener& listener) :
+ m_threads(threads), m_ioctx(ioctx), m_instance_id(instance_id),
+ m_listener(listener), m_cct(reinterpret_cast<CephContext *>(ioctx.cct())),
+ m_lock(ceph::make_mutex("rbd::mirror::Instances " + ioctx.get_pool_name())) {
+}
+
+template <typename I>
+Instances<I>::~Instances() {
+}
+
+template <typename I>
+void Instances<I>::init(Context *on_finish) {
+ dout(10) << dendl;
+
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_on_finish == nullptr);
+ m_on_finish = on_finish;
+ get_instances();
+}
+
+template <typename I>
+void Instances<I>::shut_down(Context *on_finish) {
+ dout(10) << dendl;
+
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_on_finish == nullptr);
+ m_on_finish = on_finish;
+
+ Context *ctx = new LambdaContext(
+ [this](int r) {
+ std::scoped_lock locker{m_threads->timer_lock, m_lock};
+ cancel_remove_task();
+ wait_for_ops();
+ });
+
+ m_threads->work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+void Instances<I>::unblock_listener() {
+ dout(5) << dendl;
+
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_listener_blocked);
+ m_listener_blocked = false;
+
+ InstanceIds added_instance_ids;
+ for (auto& pair : m_instances) {
+ if (pair.second.state == INSTANCE_STATE_ADDING) {
+ added_instance_ids.push_back(pair.first);
+ }
+ }
+
+ if (!added_instance_ids.empty()) {
+ m_threads->work_queue->queue(
+ new C_NotifyInstancesAdded(this, added_instance_ids), 0);
+ }
+}
+
+template <typename I>
+void Instances<I>::acked(const InstanceIds& instance_ids) {
+ dout(10) << "instance_ids=" << instance_ids << dendl;
+
+ std::lock_guard locker{m_lock};
+ if (m_on_finish != nullptr) {
+ dout(5) << "received on shut down, ignoring" << dendl;
+ return;
+ }
+
+ Context *ctx = new C_HandleAcked(this, instance_ids);
+ m_threads->work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+void Instances<I>::handle_acked(const InstanceIds& instance_ids) {
+ dout(5) << "instance_ids=" << instance_ids << dendl;
+
+ std::scoped_lock locker{m_threads->timer_lock, m_lock};
+ if (m_on_finish != nullptr) {
+ dout(5) << "handled on shut down, ignoring" << dendl;
+ return;
+ }
+
+ InstanceIds added_instance_ids;
+ auto time = clock_t::now();
+ for (auto& instance_id : instance_ids) {
+ auto &instance = m_instances.insert(
+ std::make_pair(instance_id, Instance{})).first->second;
+ instance.acked_time = time;
+ if (instance.state == INSTANCE_STATE_ADDING) {
+ added_instance_ids.push_back(instance_id);
+ }
+ }
+
+ schedule_remove_task(time);
+ if (!m_listener_blocked && !added_instance_ids.empty()) {
+ m_threads->work_queue->queue(
+ new C_NotifyInstancesAdded(this, added_instance_ids), 0);
+ }
+}
+
+template <typename I>
+void Instances<I>::notify_instances_added(const InstanceIds& instance_ids) {
+ std::unique_lock locker{m_lock};
+ InstanceIds added_instance_ids;
+ for (auto& instance_id : instance_ids) {
+ auto it = m_instances.find(instance_id);
+ if (it != m_instances.end() && it->second.state == INSTANCE_STATE_ADDING) {
+ added_instance_ids.push_back(instance_id);
+ }
+ }
+
+ if (added_instance_ids.empty()) {
+ return;
+ }
+
+ dout(5) << "instance_ids=" << added_instance_ids << dendl;
+ locker.unlock();
+ m_listener.handle_added(added_instance_ids);
+ locker.lock();
+
+ for (auto& instance_id : added_instance_ids) {
+ auto it = m_instances.find(instance_id);
+ if (it != m_instances.end() && it->second.state == INSTANCE_STATE_ADDING) {
+ it->second.state = INSTANCE_STATE_IDLE;
+ }
+ }
+}
+
+template <typename I>
+void Instances<I>::notify_instances_removed(const InstanceIds& instance_ids) {
+ dout(5) << "instance_ids=" << instance_ids << dendl;
+ m_listener.handle_removed(instance_ids);
+
+ std::lock_guard locker{m_lock};
+ for (auto& instance_id : instance_ids) {
+ m_instances.erase(instance_id);
+ }
+}
+
+template <typename I>
+void Instances<I>::list(std::vector<std::string> *instance_ids) {
+ dout(20) << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ for (auto it : m_instances) {
+ instance_ids->push_back(it.first);
+ }
+}
+
+
+template <typename I>
+void Instances<I>::get_instances() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ Context *ctx = create_context_callback<
+ Instances, &Instances<I>::handle_get_instances>(this);
+
+ InstanceWatcher<I>::get_instances(m_ioctx, &m_instance_ids, ctx);
+}
+
+template <typename I>
+void Instances<I>::handle_get_instances(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ Context *on_finish = nullptr;
+ {
+ std::lock_guard locker{m_lock};
+ std::swap(on_finish, m_on_finish);
+ }
+
+ if (r < 0) {
+ derr << "error retrieving instances: " << cpp_strerror(r) << dendl;
+ } else {
+ handle_acked(m_instance_ids);
+ }
+ on_finish->complete(r);
+}
+
+template <typename I>
+void Instances<I>::wait_for_ops() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ Context *ctx = create_async_context_callback(
+ m_threads->work_queue, create_context_callback<
+ Instances, &Instances<I>::handle_wait_for_ops>(this));
+
+ m_async_op_tracker.wait_for_ops(ctx);
+}
+
+template <typename I>
+void Instances<I>::handle_wait_for_ops(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ ceph_assert(r == 0);
+
+ Context *on_finish = nullptr;
+ {
+ std::lock_guard locker{m_lock};
+ std::swap(on_finish, m_on_finish);
+ }
+ on_finish->complete(r);
+}
+
+template <typename I>
+void Instances<I>::remove_instances(const Instances<I>::clock_t::time_point& time) {
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ InstanceIds instance_ids;
+ for (auto& instance_pair : m_instances) {
+ if (instance_pair.first == m_instance_id) {
+ continue;
+ }
+ auto& instance = instance_pair.second;
+ if (instance.state != INSTANCE_STATE_REMOVING &&
+ instance.acked_time <= time) {
+ instance.state = INSTANCE_STATE_REMOVING;
+ instance_ids.push_back(instance_pair.first);
+ }
+ }
+ ceph_assert(!instance_ids.empty());
+
+ dout(10) << "instance_ids=" << instance_ids << dendl;
+ Context* ctx = new LambdaContext([this, instance_ids](int r) {
+ handle_remove_instances(r, instance_ids);
+ });
+ ctx = create_async_context_callback(m_threads->work_queue, ctx);
+
+ auto gather_ctx = new C_Gather(m_cct, ctx);
+ for (auto& instance_id : instance_ids) {
+ InstanceWatcher<I>::remove_instance(m_ioctx, *m_threads->asio_engine,
+ instance_id, gather_ctx->new_sub());
+ }
+
+ m_async_op_tracker.start_op();
+ gather_ctx->activate();
+}
+
+template <typename I>
+void Instances<I>::handle_remove_instances(
+ int r, const InstanceIds& instance_ids) {
+ std::scoped_lock locker{m_threads->timer_lock, m_lock};
+
+ dout(10) << "r=" << r << ", instance_ids=" << instance_ids << dendl;
+ ceph_assert(r == 0);
+
+ // fire removed notification now that instances have been blocklisted
+ m_threads->work_queue->queue(
+ new C_NotifyInstancesRemoved(this, instance_ids), 0);
+
+ // reschedule the timer for the next batch
+ schedule_remove_task(clock_t::now());
+ m_async_op_tracker.finish_op();
+}
+
+template <typename I>
+void Instances<I>::cancel_remove_task() {
+ ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock));
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ if (m_timer_task == nullptr) {
+ return;
+ }
+
+ dout(10) << dendl;
+
+ bool canceled = m_threads->timer->cancel_event(m_timer_task);
+ ceph_assert(canceled);
+ m_timer_task = nullptr;
+}
+
+template <typename I>
+void Instances<I>::schedule_remove_task(const Instances<I>::clock_t::time_point& time) {
+ cancel_remove_task();
+ if (m_on_finish != nullptr) {
+ dout(10) << "received on shut down, ignoring" << dendl;
+ return;
+ }
+
+ int after = m_cct->_conf.get_val<uint64_t>("rbd_mirror_leader_heartbeat_interval") *
+ (1 + m_cct->_conf.get_val<uint64_t>("rbd_mirror_leader_max_missed_heartbeats") +
+ m_cct->_conf.get_val<uint64_t>("rbd_mirror_leader_max_acquire_attempts_before_break"));
+
+ bool schedule = false;
+ auto oldest_time = time;
+ for (auto& instance : m_instances) {
+ if (instance.first == m_instance_id) {
+ continue;
+ }
+ if (instance.second.state == INSTANCE_STATE_REMOVING) {
+ // removal is already in-flight
+ continue;
+ }
+
+ oldest_time = std::min(oldest_time, instance.second.acked_time);
+ schedule = true;
+ }
+
+ if (!schedule) {
+ return;
+ }
+
+ dout(10) << dendl;
+
+ // schedule a time to fire when the oldest instance should be removed
+ m_timer_task = new LambdaContext(
+ [this, oldest_time](int r) {
+ ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock));
+ std::lock_guard locker{m_lock};
+ m_timer_task = nullptr;
+
+ remove_instances(oldest_time);
+ });
+
+ oldest_time += ceph::make_timespan(after);
+ m_threads->timer->add_event_at(oldest_time, m_timer_task);
+}
+
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::Instances<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/Instances.h b/src/tools/rbd_mirror/Instances.h
new file mode 100644
index 000000000..e6e104b73
--- /dev/null
+++ b/src/tools/rbd_mirror/Instances.h
@@ -0,0 +1,168 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_INSTANCES_H
+#define CEPH_RBD_MIRROR_INSTANCES_H
+
+#include <map>
+#include <vector>
+
+#include "include/buffer_fwd.h"
+#include "include/rados/librados_fwd.hpp"
+#include "common/AsyncOpTracker.h"
+#include "common/ceph_mutex.h"
+#include "librbd/Watcher.h"
+#include "tools/rbd_mirror/instances/Types.h"
+
+namespace librbd { class ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+
+template <typename> struct Threads;
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class Instances {
+public:
+ typedef std::vector<std::string> InstanceIds;
+
+ static Instances *create(Threads<ImageCtxT> *threads,
+ librados::IoCtx &ioctx,
+ const std::string& instance_id,
+ instances::Listener& listener) {
+ return new Instances(threads, ioctx, instance_id, listener);
+ }
+ void destroy() {
+ delete this;
+ }
+
+ Instances(Threads<ImageCtxT> *threads, librados::IoCtx &ioctx,
+ const std::string& instance_id, instances::Listener& listener);
+ virtual ~Instances();
+
+ void init(Context *on_finish);
+ void shut_down(Context *on_finish);
+
+ void unblock_listener();
+
+ void acked(const InstanceIds& instance_ids);
+
+ void list(std::vector<std::string> *instance_ids);
+
+private:
+ /**
+ * @verbatim
+ *
+ * <uninitialized> <---------------------\
+ * | (init) ^ |
+ * v (error) * |
+ * GET_INSTANCES * * * * * WAIT_FOR_OPS
+ * | ^
+ * v (shut_down) |
+ * <initialized> ------------------------/
+ * .
+ * . (remove_instance)
+ * v
+ * REMOVE_INSTANCE
+ *
+ * @endverbatim
+ */
+
+ enum InstanceState {
+ INSTANCE_STATE_ADDING,
+ INSTANCE_STATE_IDLE,
+ INSTANCE_STATE_REMOVING
+ };
+
+ using clock_t = ceph::real_clock;
+ struct Instance {
+ clock_t::time_point acked_time{};
+ InstanceState state = INSTANCE_STATE_ADDING;
+ };
+
+ struct C_NotifyBase : public Context {
+ Instances *instances;
+ InstanceIds instance_ids;
+
+ C_NotifyBase(Instances *instances, const InstanceIds& instance_ids)
+ : instances(instances), instance_ids(instance_ids) {
+ instances->m_async_op_tracker.start_op();
+ }
+
+ void finish(int r) override {
+ execute();
+ instances->m_async_op_tracker.finish_op();
+ }
+
+ virtual void execute() = 0;
+ };
+
+ struct C_HandleAcked : public C_NotifyBase {
+ C_HandleAcked(Instances *instances, const InstanceIds& instance_ids)
+ : C_NotifyBase(instances, instance_ids) {
+ }
+
+ void execute() override {
+ this->instances->handle_acked(this->instance_ids);
+ }
+ };
+
+ struct C_NotifyInstancesAdded : public C_NotifyBase {
+ C_NotifyInstancesAdded(Instances *instances,
+ const InstanceIds& instance_ids)
+ : C_NotifyBase(instances, instance_ids) {
+ }
+
+ void execute() override {
+ this->instances->notify_instances_added(this->instance_ids);
+ }
+ };
+
+ struct C_NotifyInstancesRemoved : public C_NotifyBase {
+ C_NotifyInstancesRemoved(Instances *instances,
+ const InstanceIds& instance_ids)
+ : C_NotifyBase(instances, instance_ids) {
+ }
+
+ void execute() override {
+ this->instances->notify_instances_removed(this->instance_ids);
+ }
+ };
+
+ Threads<ImageCtxT> *m_threads;
+ librados::IoCtx &m_ioctx;
+ std::string m_instance_id;
+ instances::Listener& m_listener;
+ CephContext *m_cct;
+
+ ceph::mutex m_lock;
+ InstanceIds m_instance_ids;
+ std::map<std::string, Instance> m_instances;
+ Context *m_on_finish = nullptr;
+ AsyncOpTracker m_async_op_tracker;
+
+ Context *m_timer_task = nullptr;
+
+ bool m_listener_blocked = true;
+
+ void handle_acked(const InstanceIds& instance_ids);
+ void notify_instances_added(const InstanceIds& instance_ids);
+ void notify_instances_removed(const InstanceIds& instance_ids);
+
+ void get_instances();
+ void handle_get_instances(int r);
+
+ void wait_for_ops();
+ void handle_wait_for_ops(int r);
+
+ void remove_instances(const clock_t::time_point& time);
+ void handle_remove_instances(int r, const InstanceIds& instance_ids);
+
+ void cancel_remove_task();
+ void schedule_remove_task(const clock_t::time_point& time);
+};
+
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_INSTANCES_H
diff --git a/src/tools/rbd_mirror/LeaderWatcher.cc b/src/tools/rbd_mirror/LeaderWatcher.cc
new file mode 100644
index 000000000..8f12af14c
--- /dev/null
+++ b/src/tools/rbd_mirror/LeaderWatcher.cc
@@ -0,0 +1,1069 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "LeaderWatcher.h"
+#include "common/Cond.h"
+#include "common/Timer.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "include/stringify.h"
+#include "librbd/Utils.h"
+#include "librbd/asio/ContextWQ.h"
+#include "librbd/watcher/Types.h"
+#include "Threads.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::LeaderWatcher: " \
+ << this << " " << __func__ << ": "
+namespace rbd {
+namespace mirror {
+
+using namespace leader_watcher;
+
+using librbd::util::create_async_context_callback;
+using librbd::util::create_context_callback;
+using librbd::util::create_rados_callback;
+
+template <typename I>
+LeaderWatcher<I>::LeaderWatcher(Threads<I> *threads, librados::IoCtx &io_ctx,
+ leader_watcher::Listener *listener)
+ : Watcher(io_ctx, threads->work_queue, RBD_MIRROR_LEADER),
+ m_threads(threads), m_listener(listener), m_instances_listener(this),
+ m_lock(ceph::make_mutex("rbd::mirror::LeaderWatcher " +
+ io_ctx.get_pool_name())),
+ m_notifier_id(librados::Rados(io_ctx).get_instance_id()),
+ m_instance_id(stringify(m_notifier_id)),
+ m_leader_lock(new LeaderLock(m_ioctx, *m_threads->asio_engine, m_oid, this,
+ true, m_cct->_conf.get_val<uint64_t>(
+ "rbd_blocklist_expire_seconds"))) {
+}
+
+template <typename I>
+LeaderWatcher<I>::~LeaderWatcher() {
+ ceph_assert(m_instances == nullptr);
+ ceph_assert(m_timer_task == nullptr);
+
+ delete m_leader_lock;
+}
+
+template <typename I>
+std::string LeaderWatcher<I>::get_instance_id() {
+ return m_instance_id;
+}
+
+template <typename I>
+int LeaderWatcher<I>::init() {
+ C_SaferCond init_ctx;
+ init(&init_ctx);
+ return init_ctx.wait();
+}
+
+template <typename I>
+void LeaderWatcher<I>::init(Context *on_finish) {
+ dout(10) << "notifier_id=" << m_notifier_id << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ ceph_assert(m_on_finish == nullptr);
+ m_on_finish = on_finish;
+
+ create_leader_object();
+}
+
+template <typename I>
+void LeaderWatcher<I>::create_leader_object() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ librados::ObjectWriteOperation op;
+ op.create(false);
+
+ librados::AioCompletion *aio_comp = create_rados_callback<
+ LeaderWatcher<I>, &LeaderWatcher<I>::handle_create_leader_object>(this);
+ int r = m_ioctx.aio_operate(m_oid, aio_comp, &op);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_create_leader_object(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ Context *on_finish = nullptr;
+ {
+ std::lock_guard locker{m_lock};
+
+ if (r == 0) {
+ register_watch();
+ return;
+ }
+
+ derr << "error creating " << m_oid << " object: " << cpp_strerror(r)
+ << dendl;
+
+ std::swap(on_finish, m_on_finish);
+ }
+ on_finish->complete(r);
+}
+
+template <typename I>
+void LeaderWatcher<I>::register_watch() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ Context *ctx = create_async_context_callback(
+ m_work_queue, create_context_callback<
+ LeaderWatcher<I>, &LeaderWatcher<I>::handle_register_watch>(this));
+
+ librbd::Watcher::register_watch(ctx);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_register_watch(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ Context *on_finish = nullptr;
+ {
+ std::lock_guard timer_locker(m_threads->timer_lock);
+ std::lock_guard locker{m_lock};
+
+ if (r < 0) {
+ derr << "error registering leader watcher for " << m_oid << " object: "
+ << cpp_strerror(r) << dendl;
+ } else {
+ schedule_acquire_leader_lock(0);
+ }
+
+ ceph_assert(m_on_finish != nullptr);
+ std::swap(on_finish, m_on_finish);
+ }
+
+ on_finish->complete(r);
+}
+
+template <typename I>
+void LeaderWatcher<I>::shut_down() {
+ C_SaferCond shut_down_ctx;
+ shut_down(&shut_down_ctx);
+ int r = shut_down_ctx.wait();
+ ceph_assert(r == 0);
+}
+
+template <typename I>
+void LeaderWatcher<I>::shut_down(Context *on_finish) {
+ dout(10) << dendl;
+
+ std::scoped_lock locker{m_threads->timer_lock, m_lock};
+
+ ceph_assert(m_on_shut_down_finish == nullptr);
+ m_on_shut_down_finish = on_finish;
+ cancel_timer_task();
+ shut_down_leader_lock();
+}
+
+template <typename I>
+void LeaderWatcher<I>::shut_down_leader_lock() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ Context *ctx = create_async_context_callback(
+ m_work_queue, create_context_callback<
+ LeaderWatcher<I>, &LeaderWatcher<I>::handle_shut_down_leader_lock>(this));
+
+ m_leader_lock->shut_down(ctx);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_shut_down_leader_lock(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ if (r < 0) {
+ derr << "error shutting down leader lock: " << cpp_strerror(r) << dendl;
+ }
+
+ unregister_watch();
+}
+
+template <typename I>
+void LeaderWatcher<I>::unregister_watch() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ Context *ctx = create_async_context_callback(
+ m_work_queue, create_context_callback<
+ LeaderWatcher<I>, &LeaderWatcher<I>::handle_unregister_watch>(this));
+
+ librbd::Watcher::unregister_watch(ctx);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_unregister_watch(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "error unregistering leader watcher for " << m_oid << " object: "
+ << cpp_strerror(r) << dendl;
+ }
+ wait_for_tasks();
+}
+
+template <typename I>
+void LeaderWatcher<I>::wait_for_tasks() {
+ dout(10) << dendl;
+
+ std::scoped_lock locker{m_threads->timer_lock, m_lock};
+ schedule_timer_task("wait for tasks", 0, false,
+ &LeaderWatcher<I>::handle_wait_for_tasks, true);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_wait_for_tasks() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock));
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+ ceph_assert(m_on_shut_down_finish != nullptr);
+
+ ceph_assert(!m_timer_op_tracker.empty());
+ m_timer_op_tracker.finish_op();
+
+ auto ctx = new LambdaContext([this](int r) {
+ Context *on_finish;
+ {
+ // ensure lock isn't held when completing shut down
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_on_shut_down_finish != nullptr);
+ on_finish = m_on_shut_down_finish;
+ }
+ on_finish->complete(0);
+ });
+ m_work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+bool LeaderWatcher<I>::is_blocklisted() const {
+ std::lock_guard locker{m_lock};
+ return m_blocklisted;
+}
+
+template <typename I>
+bool LeaderWatcher<I>::is_leader() const {
+ std::lock_guard locker{m_lock};
+ return is_leader(m_lock);
+}
+
+template <typename I>
+bool LeaderWatcher<I>::is_leader(ceph::mutex &lock) const {
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ bool leader = m_leader_lock->is_leader();
+ dout(10) << leader << dendl;
+ return leader;
+}
+
+template <typename I>
+bool LeaderWatcher<I>::is_releasing_leader() const {
+ std::lock_guard locker{m_lock};
+ return is_releasing_leader(m_lock);
+}
+
+template <typename I>
+bool LeaderWatcher<I>::is_releasing_leader(ceph::mutex &lock) const {
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ bool releasing = m_leader_lock->is_releasing_leader();
+ dout(10) << releasing << dendl;
+ return releasing;
+}
+
+template <typename I>
+bool LeaderWatcher<I>::get_leader_instance_id(std::string *instance_id) const {
+ dout(10) << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ if (is_leader(m_lock) || is_releasing_leader(m_lock)) {
+ *instance_id = m_instance_id;
+ return true;
+ }
+
+ if (!m_locker.cookie.empty()) {
+ *instance_id = stringify(m_locker.entity.num());
+ return true;
+ }
+
+ return false;
+}
+
+template <typename I>
+void LeaderWatcher<I>::release_leader() {
+ dout(10) << dendl;
+
+ std::lock_guard locker{m_lock};
+ if (!is_leader(m_lock)) {
+ return;
+ }
+
+ release_leader_lock();
+}
+
+template <typename I>
+void LeaderWatcher<I>::list_instances(std::vector<std::string> *instance_ids) {
+ dout(10) << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ instance_ids->clear();
+ if (m_instances != nullptr) {
+ m_instances->list(instance_ids);
+ }
+}
+
+template <typename I>
+void LeaderWatcher<I>::cancel_timer_task() {
+ ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock));
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ if (m_timer_task == nullptr) {
+ return;
+ }
+
+ dout(10) << m_timer_task << dendl;
+ bool canceled = m_threads->timer->cancel_event(m_timer_task);
+ ceph_assert(canceled);
+ m_timer_task = nullptr;
+}
+
+template <typename I>
+void LeaderWatcher<I>::schedule_timer_task(const std::string &name,
+ int delay_factor, bool leader,
+ TimerCallback timer_callback,
+ bool shutting_down) {
+ ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock));
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ if (!shutting_down && m_on_shut_down_finish != nullptr) {
+ return;
+ }
+
+ cancel_timer_task();
+
+ m_timer_task = new LambdaContext(
+ [this, leader, timer_callback](int r) {
+ ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock));
+ m_timer_task = nullptr;
+
+ if (m_timer_op_tracker.empty()) {
+ std::lock_guard locker{m_lock};
+ execute_timer_task(leader, timer_callback);
+ return;
+ }
+
+ // old timer task is still running -- do not start next
+ // task until the previous task completes
+ if (m_timer_gate == nullptr) {
+ m_timer_gate = new C_TimerGate(this);
+ m_timer_op_tracker.wait_for_ops(m_timer_gate);
+ }
+ m_timer_gate->leader = leader;
+ m_timer_gate->timer_callback = timer_callback;
+ });
+
+ int after = delay_factor * m_cct->_conf.get_val<uint64_t>(
+ "rbd_mirror_leader_heartbeat_interval");
+
+ dout(10) << "scheduling " << name << " after " << after << " sec (task "
+ << m_timer_task << ")" << dendl;
+ m_threads->timer->add_event_after(after, m_timer_task);
+}
+
+template <typename I>
+void LeaderWatcher<I>::execute_timer_task(bool leader,
+ TimerCallback timer_callback) {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock));
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+ ceph_assert(m_timer_op_tracker.empty());
+
+ if (is_leader(m_lock) != leader) {
+ return;
+ }
+
+ m_timer_op_tracker.start_op();
+ (this->*timer_callback)();
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_post_acquire_leader_lock(int r,
+ Context *on_finish) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ if (r == -EAGAIN) {
+ dout(10) << "already locked" << dendl;
+ } else {
+ derr << "error acquiring leader lock: " << cpp_strerror(r) << dendl;
+ }
+ on_finish->complete(r);
+ return;
+ }
+
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_on_finish == nullptr);
+ m_on_finish = on_finish;
+ m_ret_val = 0;
+
+ init_instances();
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_pre_release_leader_lock(Context *on_finish) {
+ dout(10) << dendl;
+
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_on_finish == nullptr);
+ m_on_finish = on_finish;
+ m_ret_val = 0;
+
+ notify_listener();
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_post_release_leader_lock(int r,
+ Context *on_finish) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ on_finish->complete(r);
+ return;
+ }
+
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_on_finish == nullptr);
+ m_on_finish = on_finish;
+
+ notify_lock_released();
+}
+
+template <typename I>
+void LeaderWatcher<I>::break_leader_lock() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock));
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+ ceph_assert(!m_timer_op_tracker.empty());
+
+ if (m_locker.cookie.empty()) {
+ get_locker();
+ return;
+ }
+
+ Context *ctx = create_async_context_callback(
+ m_work_queue, create_context_callback<
+ LeaderWatcher<I>, &LeaderWatcher<I>::handle_break_leader_lock>(this));
+
+ m_leader_lock->break_lock(m_locker, true, ctx);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_break_leader_lock(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ std::scoped_lock locker{m_threads->timer_lock, m_lock};
+ ceph_assert(!m_timer_op_tracker.empty());
+
+ if (m_leader_lock->is_shutdown()) {
+ dout(10) << "canceling due to shutdown" << dendl;
+ m_timer_op_tracker.finish_op();
+ return;
+ }
+
+ if (r < 0 && r != -ENOENT) {
+ derr << "error breaking leader lock: " << cpp_strerror(r) << dendl;
+ schedule_acquire_leader_lock(1);
+ m_timer_op_tracker.finish_op();
+ return;
+ }
+
+ m_locker = {};
+ m_acquire_attempts = 0;
+ acquire_leader_lock();
+}
+
+template <typename I>
+void LeaderWatcher<I>::schedule_get_locker(bool reset_leader,
+ uint32_t delay_factor) {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock));
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ if (reset_leader) {
+ m_locker = {};
+ m_acquire_attempts = 0;
+ }
+
+ schedule_timer_task("get locker", delay_factor, false,
+ &LeaderWatcher<I>::get_locker, false);
+}
+
+template <typename I>
+void LeaderWatcher<I>::get_locker() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock));
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+ ceph_assert(!m_timer_op_tracker.empty());
+
+ C_GetLocker *get_locker_ctx = new C_GetLocker(this);
+ Context *ctx = create_async_context_callback(m_work_queue, get_locker_ctx);
+
+ m_leader_lock->get_locker(&get_locker_ctx->locker, ctx);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_get_locker(int r,
+ librbd::managed_lock::Locker& locker) {
+ dout(10) << "r=" << r << dendl;
+
+ std::scoped_lock l{m_threads->timer_lock, m_lock};
+ ceph_assert(!m_timer_op_tracker.empty());
+
+ if (m_leader_lock->is_shutdown()) {
+ dout(10) << "canceling due to shutdown" << dendl;
+ m_timer_op_tracker.finish_op();
+ return;
+ }
+
+ if (is_leader(m_lock)) {
+ m_locker = {};
+ m_timer_op_tracker.finish_op();
+ return;
+ }
+
+ if (r == -ENOENT) {
+ m_locker = {};
+ m_acquire_attempts = 0;
+ acquire_leader_lock();
+ return;
+ } else if (r < 0) {
+ derr << "error retrieving leader locker: " << cpp_strerror(r) << dendl;
+ schedule_get_locker(true, 1);
+ m_timer_op_tracker.finish_op();
+ return;
+ }
+
+ bool notify_listener = false;
+ if (m_locker != locker) {
+ m_locker = locker;
+ notify_listener = true;
+ if (m_acquire_attempts > 1) {
+ dout(10) << "new lock owner detected -- resetting heartbeat counter"
+ << dendl;
+ m_acquire_attempts = 0;
+ }
+ }
+
+ if (m_acquire_attempts >= m_cct->_conf.get_val<uint64_t>(
+ "rbd_mirror_leader_max_acquire_attempts_before_break")) {
+ dout(0) << "breaking leader lock after " << m_acquire_attempts << " "
+ << "failed attempts to acquire" << dendl;
+ break_leader_lock();
+ return;
+ }
+
+ schedule_acquire_leader_lock(1);
+
+ if (!notify_listener) {
+ m_timer_op_tracker.finish_op();
+ return;
+ }
+
+ auto ctx = new LambdaContext(
+ [this](int r) {
+ std::string instance_id;
+ if (get_leader_instance_id(&instance_id)) {
+ m_listener->update_leader_handler(instance_id);
+ }
+ std::scoped_lock locker{m_threads->timer_lock, m_lock};
+ m_timer_op_tracker.finish_op();
+ });
+ m_work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+void LeaderWatcher<I>::schedule_acquire_leader_lock(uint32_t delay_factor) {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock));
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ schedule_timer_task("acquire leader lock",
+ delay_factor *
+ m_cct->_conf.get_val<uint64_t>("rbd_mirror_leader_max_missed_heartbeats"),
+ false, &LeaderWatcher<I>::acquire_leader_lock, false);
+}
+
+template <typename I>
+void LeaderWatcher<I>::acquire_leader_lock() {
+ ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock));
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+ ceph_assert(!m_timer_op_tracker.empty());
+
+ ++m_acquire_attempts;
+ dout(10) << "acquire_attempts=" << m_acquire_attempts << dendl;
+
+ Context *ctx = create_async_context_callback(
+ m_work_queue, create_context_callback<
+ LeaderWatcher<I>, &LeaderWatcher<I>::handle_acquire_leader_lock>(this));
+ m_leader_lock->try_acquire_lock(ctx);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_acquire_leader_lock(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ std::scoped_lock locker{m_threads->timer_lock, m_lock};
+ ceph_assert(!m_timer_op_tracker.empty());
+
+ if (m_leader_lock->is_shutdown()) {
+ dout(10) << "canceling due to shutdown" << dendl;
+ m_timer_op_tracker.finish_op();
+ return;
+ }
+
+ if (r < 0) {
+ if (r == -EAGAIN) {
+ dout(10) << "already locked" << dendl;
+ } else {
+ derr << "error acquiring lock: " << cpp_strerror(r) << dendl;
+ }
+
+ get_locker();
+ return;
+ }
+
+ m_locker = {};
+ m_acquire_attempts = 0;
+
+ if (m_ret_val) {
+ dout(5) << "releasing due to error on notify" << dendl;
+ release_leader_lock();
+ m_timer_op_tracker.finish_op();
+ return;
+ }
+
+ notify_heartbeat();
+}
+
+template <typename I>
+void LeaderWatcher<I>::release_leader_lock() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ Context *ctx = create_async_context_callback(
+ m_work_queue, create_context_callback<
+ LeaderWatcher<I>, &LeaderWatcher<I>::handle_release_leader_lock>(this));
+
+ m_leader_lock->release_lock(ctx);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_release_leader_lock(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ std::scoped_lock locker{m_threads->timer_lock, m_lock};
+
+ if (r < 0) {
+ derr << "error releasing lock: " << cpp_strerror(r) << dendl;
+ return;
+ }
+
+ schedule_acquire_leader_lock(1);
+}
+
+template <typename I>
+void LeaderWatcher<I>::init_instances() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+ ceph_assert(m_instances == nullptr);
+
+ m_instances = Instances<I>::create(m_threads, m_ioctx, m_instance_id,
+ m_instances_listener);
+
+ Context *ctx = create_context_callback<
+ LeaderWatcher<I>, &LeaderWatcher<I>::handle_init_instances>(this);
+
+ m_instances->init(ctx);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_init_instances(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ Context *on_finish = nullptr;
+ if (r < 0) {
+ std::lock_guard locker{m_lock};
+ derr << "error initializing instances: " << cpp_strerror(r) << dendl;
+ m_instances->destroy();
+ m_instances = nullptr;
+
+ ceph_assert(m_on_finish != nullptr);
+ std::swap(m_on_finish, on_finish);
+ } else {
+ std::lock_guard locker{m_lock};
+ notify_listener();
+ return;
+ }
+
+ on_finish->complete(r);
+}
+
+template <typename I>
+void LeaderWatcher<I>::shut_down_instances() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+ ceph_assert(m_instances != nullptr);
+
+ Context *ctx = create_async_context_callback(
+ m_work_queue, create_context_callback<LeaderWatcher<I>,
+ &LeaderWatcher<I>::handle_shut_down_instances>(this));
+
+ m_instances->shut_down(ctx);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_shut_down_instances(int r) {
+ dout(10) << "r=" << r << dendl;
+ ceph_assert(r == 0);
+
+ Context *on_finish = nullptr;
+ {
+ std::lock_guard locker{m_lock};
+
+ m_instances->destroy();
+ m_instances = nullptr;
+
+ ceph_assert(m_on_finish != nullptr);
+ std::swap(m_on_finish, on_finish);
+ }
+ on_finish->complete(r);
+}
+
+template <typename I>
+void LeaderWatcher<I>::notify_listener() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ Context *ctx = create_async_context_callback(
+ m_work_queue, create_context_callback<
+ LeaderWatcher<I>, &LeaderWatcher<I>::handle_notify_listener>(this));
+
+ if (is_leader(m_lock)) {
+ ctx = new LambdaContext(
+ [this, ctx](int r) {
+ m_listener->post_acquire_handler(ctx);
+ });
+ } else {
+ ctx = new LambdaContext(
+ [this, ctx](int r) {
+ m_listener->pre_release_handler(ctx);
+ });
+ }
+ m_work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_notify_listener(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ if (r < 0) {
+ derr << "error notifying listener: " << cpp_strerror(r) << dendl;
+ m_ret_val = r;
+ }
+
+ if (is_leader(m_lock)) {
+ notify_lock_acquired();
+ } else {
+ shut_down_instances();
+ }
+}
+
+template <typename I>
+void LeaderWatcher<I>::notify_lock_acquired() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ Context *ctx = create_context_callback<
+ LeaderWatcher<I>, &LeaderWatcher<I>::handle_notify_lock_acquired>(this);
+
+ bufferlist bl;
+ encode(NotifyMessage{LockAcquiredPayload{}}, bl);
+
+ send_notify(bl, nullptr, ctx);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_notify_lock_acquired(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ Context *on_finish = nullptr;
+ {
+ std::lock_guard locker{m_lock};
+ if (r < 0 && r != -ETIMEDOUT) {
+ derr << "error notifying leader lock acquired: " << cpp_strerror(r)
+ << dendl;
+ m_ret_val = r;
+ }
+
+ ceph_assert(m_on_finish != nullptr);
+ std::swap(m_on_finish, on_finish);
+
+ if (m_ret_val == 0) {
+ // listener should be ready for instance add/remove events now
+ m_instances->unblock_listener();
+ }
+ }
+ on_finish->complete(0);
+}
+
+template <typename I>
+void LeaderWatcher<I>::notify_lock_released() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ Context *ctx = create_context_callback<
+ LeaderWatcher<I>, &LeaderWatcher<I>::handle_notify_lock_released>(this);
+
+ bufferlist bl;
+ encode(NotifyMessage{LockReleasedPayload{}}, bl);
+
+ send_notify(bl, nullptr, ctx);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_notify_lock_released(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ Context *on_finish = nullptr;
+ {
+ std::lock_guard locker{m_lock};
+ if (r < 0 && r != -ETIMEDOUT) {
+ derr << "error notifying leader lock released: " << cpp_strerror(r)
+ << dendl;
+ }
+
+ ceph_assert(m_on_finish != nullptr);
+ std::swap(m_on_finish, on_finish);
+ }
+ on_finish->complete(r);
+}
+
+template <typename I>
+void LeaderWatcher<I>::notify_heartbeat() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock));
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+ ceph_assert(!m_timer_op_tracker.empty());
+
+ if (!is_leader(m_lock)) {
+ dout(5) << "not leader, canceling" << dendl;
+ m_timer_op_tracker.finish_op();
+ return;
+ }
+
+ Context *ctx = create_context_callback<
+ LeaderWatcher<I>, &LeaderWatcher<I>::handle_notify_heartbeat>(this);
+
+ bufferlist bl;
+ encode(NotifyMessage{HeartbeatPayload{}}, bl);
+
+ m_heartbeat_response.acks.clear();
+ send_notify(bl, &m_heartbeat_response, ctx);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_notify_heartbeat(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ std::scoped_lock locker{m_threads->timer_lock, m_lock};
+ ceph_assert(!m_timer_op_tracker.empty());
+
+ m_timer_op_tracker.finish_op();
+ if (m_leader_lock->is_shutdown()) {
+ dout(10) << "canceling due to shutdown" << dendl;
+ return;
+ } else if (!is_leader(m_lock)) {
+ return;
+ }
+
+ if (r < 0 && r != -ETIMEDOUT) {
+ derr << "error notifying heartbeat: " << cpp_strerror(r)
+ << ", releasing leader" << dendl;
+ release_leader_lock();
+ return;
+ }
+
+ dout(10) << m_heartbeat_response.acks.size() << " acks received, "
+ << m_heartbeat_response.timeouts.size() << " timed out" << dendl;
+
+ std::vector<std::string> instance_ids;
+ for (auto &it: m_heartbeat_response.acks) {
+ uint64_t notifier_id = it.first.gid;
+ instance_ids.push_back(stringify(notifier_id));
+ }
+ if (!instance_ids.empty()) {
+ m_instances->acked(instance_ids);
+ }
+
+ schedule_timer_task("heartbeat", 1, true,
+ &LeaderWatcher<I>::notify_heartbeat, false);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_heartbeat(Context *on_notify_ack) {
+ dout(10) << dendl;
+
+ {
+ std::scoped_lock locker{m_threads->timer_lock, m_lock};
+ if (is_leader(m_lock)) {
+ dout(5) << "got another leader heartbeat, ignoring" << dendl;
+ } else if (!m_locker.cookie.empty()) {
+ cancel_timer_task();
+ m_acquire_attempts = 0;
+ schedule_acquire_leader_lock(1);
+ }
+ }
+
+ on_notify_ack->complete(0);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_lock_acquired(Context *on_notify_ack) {
+ dout(10) << dendl;
+
+ {
+ std::scoped_lock locker{m_threads->timer_lock, m_lock};
+ if (is_leader(m_lock)) {
+ dout(5) << "got another leader lock_acquired, ignoring" << dendl;
+ } else {
+ cancel_timer_task();
+ schedule_get_locker(true, 0);
+ }
+ }
+
+ on_notify_ack->complete(0);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_lock_released(Context *on_notify_ack) {
+ dout(10) << dendl;
+
+ {
+ std::scoped_lock locker{m_threads->timer_lock, m_lock};
+ if (is_leader(m_lock)) {
+ dout(5) << "got another leader lock_released, ignoring" << dendl;
+ } else {
+ cancel_timer_task();
+ schedule_get_locker(true, 0);
+ }
+ }
+
+ on_notify_ack->complete(0);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_notify(uint64_t notify_id, uint64_t handle,
+ uint64_t notifier_id, bufferlist &bl) {
+ dout(10) << "notify_id=" << notify_id << ", handle=" << handle << ", "
+ << "notifier_id=" << notifier_id << dendl;
+
+ Context *ctx = new C_NotifyAck(this, notify_id, handle);
+
+ if (notifier_id == m_notifier_id) {
+ dout(10) << "our own notification, ignoring" << dendl;
+ ctx->complete(0);
+ return;
+ }
+
+ NotifyMessage notify_message;
+ try {
+ auto iter = bl.cbegin();
+ decode(notify_message, iter);
+ } catch (const buffer::error &err) {
+ derr << "error decoding image notification: " << err.what() << dendl;
+ ctx->complete(0);
+ return;
+ }
+
+ apply_visitor(HandlePayloadVisitor(this, ctx), notify_message.payload);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_rewatch_complete(int r) {
+ dout(5) << "r=" << r << dendl;
+
+ if (r == -EBLOCKLISTED) {
+ dout(1) << "blocklisted detected" << dendl;
+ m_blocklisted = true;
+ return;
+ }
+
+ m_leader_lock->reacquire_lock(nullptr);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_payload(const HeartbeatPayload &payload,
+ Context *on_notify_ack) {
+ dout(10) << "heartbeat" << dendl;
+
+ handle_heartbeat(on_notify_ack);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_payload(const LockAcquiredPayload &payload,
+ Context *on_notify_ack) {
+ dout(10) << "lock_acquired" << dendl;
+
+ handle_lock_acquired(on_notify_ack);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_payload(const LockReleasedPayload &payload,
+ Context *on_notify_ack) {
+ dout(10) << "lock_released" << dendl;
+
+ handle_lock_released(on_notify_ack);
+}
+
+template <typename I>
+void LeaderWatcher<I>::handle_payload(const UnknownPayload &payload,
+ Context *on_notify_ack) {
+ dout(10) << "unknown" << dendl;
+
+ on_notify_ack->complete(0);
+}
+
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::LeaderWatcher<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/LeaderWatcher.h b/src/tools/rbd_mirror/LeaderWatcher.h
new file mode 100644
index 000000000..58f23148f
--- /dev/null
+++ b/src/tools/rbd_mirror/LeaderWatcher.h
@@ -0,0 +1,313 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_LEADER_WATCHER_H
+#define CEPH_RBD_MIRROR_LEADER_WATCHER_H
+
+#include <list>
+#include <memory>
+#include <string>
+
+#include "common/AsyncOpTracker.h"
+#include "librbd/ManagedLock.h"
+#include "librbd/Watcher.h"
+#include "librbd/managed_lock/Types.h"
+#include "librbd/watcher/Types.h"
+#include "Instances.h"
+#include "tools/rbd_mirror/instances/Types.h"
+#include "tools/rbd_mirror/leader_watcher/Types.h"
+
+namespace librbd {
+class ImageCtx;
+namespace asio { struct ContextWQ; }
+} // namespace librbd
+
+namespace rbd {
+namespace mirror {
+
+template <typename> struct Threads;
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class LeaderWatcher : protected librbd::Watcher {
+ using librbd::Watcher::unregister_watch; // Silence overloaded virtual warning
+public:
+ static LeaderWatcher* create(Threads<ImageCtxT> *threads,
+ librados::IoCtx &io_ctx,
+ leader_watcher::Listener *listener) {
+ return new LeaderWatcher(threads, io_ctx, listener);
+ }
+
+ LeaderWatcher(Threads<ImageCtxT> *threads, librados::IoCtx &io_ctx,
+ leader_watcher::Listener *listener);
+ ~LeaderWatcher() override;
+
+ int init();
+ void shut_down();
+
+ void init(Context *on_finish);
+ void shut_down(Context *on_finish);
+
+ bool is_blocklisted() const;
+ bool is_leader() const;
+ bool is_releasing_leader() const;
+ bool get_leader_instance_id(std::string *instance_id) const;
+ void release_leader();
+ void list_instances(std::vector<std::string> *instance_ids);
+
+ std::string get_instance_id();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <uninitialized> <------------------------------ WAIT_FOR_TASKS
+ * | (init) ^ ^
+ * v * |
+ * CREATE_OBJECT * * * * * (error) UNREGISTER_WATCH
+ * | * ^
+ * v * |
+ * REGISTER_WATCH * * * * * SHUT_DOWN_LEADER_LOCK
+ * | ^
+ * | (no leader heartbeat and acquire failed) |
+ * | BREAK_LOCK <-------------------------------------\ |
+ * | | (no leader heartbeat) | | (shut down)
+ * | | /----------------------------------------\ | |
+ * | | | (lock_released received) | |
+ * | | | /-------------------------------------\ | |
+ * | | | | (lock_acquired or | | |
+ * | | | | heartbeat received) | | |
+ * | | | | (ENOENT) /-----------\ | | |
+ * | | | | * * * * * * * * * * | | | | |
+ * v v v v v (error) * v | | | |
+ * ACQUIRE_LEADER_LOCK * * * * *> GET_LOCKER ---> <secondary>
+ * | * ^
+ * ....|...................*.................... .....|.....................
+ * . v * . . | post_release .
+ * .INIT_INSTANCES * * * * * . .NOTIFY_LOCK_RELEASED .
+ * . | . .....^.....................
+ * . v . |
+ * .NOTIFY_LISTENER . RELEASE_LEADER_LOCK
+ * . | . ^
+ * . v . .....|.....................
+ * .NOTIFY_LOCK_ACQUIRED . . | .
+ * . | post_acquire . .SHUT_DOWN_INSTANCES .
+ * ....|........................................ . ^ .
+ * v . | .
+ * <leader> -----------------------------------> .NOTIFY_LISTENER .
+ * (shut_down, release_leader, . pre_release .
+ * notify error) ...........................
+ * @endverbatim
+ */
+
+ struct InstancesListener : public instances::Listener {
+ LeaderWatcher* leader_watcher;
+
+ InstancesListener(LeaderWatcher* leader_watcher)
+ : leader_watcher(leader_watcher) {
+ }
+
+ void handle_added(const InstanceIds& instance_ids) override {
+ leader_watcher->m_listener->handle_instances_added(instance_ids);
+ }
+
+ void handle_removed(const InstanceIds& instance_ids) override {
+ leader_watcher->m_listener->handle_instances_removed(instance_ids);
+ }
+ };
+
+ class LeaderLock : public librbd::ManagedLock<ImageCtxT> {
+ public:
+ typedef librbd::ManagedLock<ImageCtxT> Parent;
+
+ LeaderLock(librados::IoCtx& ioctx, librbd::AsioEngine& asio_engine,
+ const std::string& oid, LeaderWatcher *watcher,
+ bool blocklist_on_break_lock,
+ uint32_t blocklist_expire_seconds)
+ : Parent(ioctx, asio_engine, oid, watcher,
+ librbd::managed_lock::EXCLUSIVE, blocklist_on_break_lock,
+ blocklist_expire_seconds),
+ watcher(watcher) {
+ }
+
+ bool is_leader() const {
+ std::lock_guard locker{Parent::m_lock};
+ return Parent::is_state_post_acquiring() || Parent::is_state_locked();
+ }
+
+ bool is_releasing_leader() const {
+ std::lock_guard locker{Parent::m_lock};
+ return Parent::is_state_pre_releasing();
+ }
+
+ protected:
+ void post_acquire_lock_handler(int r, Context *on_finish) {
+ if (r == 0) {
+ // lock is owned at this point
+ std::lock_guard locker{Parent::m_lock};
+ Parent::set_state_post_acquiring();
+ }
+ watcher->handle_post_acquire_leader_lock(r, on_finish);
+ }
+ void pre_release_lock_handler(bool shutting_down,
+ Context *on_finish) {
+ watcher->handle_pre_release_leader_lock(on_finish);
+ }
+ void post_release_lock_handler(bool shutting_down, int r,
+ Context *on_finish) {
+ watcher->handle_post_release_leader_lock(r, on_finish);
+ }
+ private:
+ LeaderWatcher *watcher;
+ };
+
+ struct HandlePayloadVisitor : public boost::static_visitor<void> {
+ LeaderWatcher *leader_watcher;
+ Context *on_notify_ack;
+
+ HandlePayloadVisitor(LeaderWatcher *leader_watcher, Context *on_notify_ack)
+ : leader_watcher(leader_watcher), on_notify_ack(on_notify_ack) {
+ }
+
+ template <typename Payload>
+ inline void operator()(const Payload &payload) const {
+ leader_watcher->handle_payload(payload, on_notify_ack);
+ }
+ };
+
+ struct C_GetLocker : public Context {
+ LeaderWatcher *leader_watcher;
+ librbd::managed_lock::Locker locker;
+
+ C_GetLocker(LeaderWatcher *leader_watcher)
+ : leader_watcher(leader_watcher) {
+ }
+
+ void finish(int r) override {
+ leader_watcher->handle_get_locker(r, locker);
+ }
+ };
+
+ typedef void (LeaderWatcher<ImageCtxT>::*TimerCallback)();
+
+ struct C_TimerGate : public Context {
+ LeaderWatcher *leader_watcher;
+
+ bool leader = false;
+ TimerCallback timer_callback = nullptr;
+
+ C_TimerGate(LeaderWatcher *leader_watcher)
+ : leader_watcher(leader_watcher) {
+ }
+
+ void finish(int r) override {
+ leader_watcher->m_timer_gate = nullptr;
+ leader_watcher->execute_timer_task(leader, timer_callback);
+ }
+ };
+
+ Threads<ImageCtxT> *m_threads;
+ leader_watcher::Listener *m_listener;
+
+ InstancesListener m_instances_listener;
+ mutable ceph::mutex m_lock;
+ uint64_t m_notifier_id;
+ std::string m_instance_id;
+ LeaderLock *m_leader_lock;
+ Context *m_on_finish = nullptr;
+ Context *m_on_shut_down_finish = nullptr;
+ uint64_t m_acquire_attempts = 0;
+ int m_ret_val = 0;
+ Instances<ImageCtxT> *m_instances = nullptr;
+ librbd::managed_lock::Locker m_locker;
+
+ bool m_blocklisted = false;
+
+ AsyncOpTracker m_timer_op_tracker;
+ Context *m_timer_task = nullptr;
+ C_TimerGate *m_timer_gate = nullptr;
+
+ librbd::watcher::NotifyResponse m_heartbeat_response;
+
+ bool is_leader(ceph::mutex &m_lock) const;
+ bool is_releasing_leader(ceph::mutex &m_lock) const;
+
+ void cancel_timer_task();
+ void schedule_timer_task(const std::string &name,
+ int delay_factor, bool leader,
+ TimerCallback callback, bool shutting_down);
+ void execute_timer_task(bool leader, TimerCallback timer_callback);
+
+ void create_leader_object();
+ void handle_create_leader_object(int r);
+
+ void register_watch();
+ void handle_register_watch(int r);
+
+ void shut_down_leader_lock();
+ void handle_shut_down_leader_lock(int r);
+
+ void unregister_watch();
+ void handle_unregister_watch(int r);
+
+ void wait_for_tasks();
+ void handle_wait_for_tasks();
+
+ void break_leader_lock();
+ void handle_break_leader_lock(int r);
+
+ void schedule_get_locker(bool reset_leader, uint32_t delay_factor);
+ void get_locker();
+ void handle_get_locker(int r, librbd::managed_lock::Locker& locker);
+
+ void schedule_acquire_leader_lock(uint32_t delay_factor);
+ void acquire_leader_lock();
+ void handle_acquire_leader_lock(int r);
+
+ void release_leader_lock();
+ void handle_release_leader_lock(int r);
+
+ void init_instances();
+ void handle_init_instances(int r);
+
+ void shut_down_instances();
+ void handle_shut_down_instances(int r);
+
+ void notify_listener();
+ void handle_notify_listener(int r);
+
+ void notify_lock_acquired();
+ void handle_notify_lock_acquired(int r);
+
+ void notify_lock_released();
+ void handle_notify_lock_released(int r);
+
+ void notify_heartbeat();
+ void handle_notify_heartbeat(int r);
+
+ void handle_post_acquire_leader_lock(int r, Context *on_finish);
+ void handle_pre_release_leader_lock(Context *on_finish);
+ void handle_post_release_leader_lock(int r, Context *on_finish);
+
+ void handle_notify(uint64_t notify_id, uint64_t handle,
+ uint64_t notifier_id, bufferlist &bl) override;
+
+ void handle_rewatch_complete(int r) override;
+
+ void handle_heartbeat(Context *on_ack);
+ void handle_lock_acquired(Context *on_ack);
+ void handle_lock_released(Context *on_ack);
+
+ void handle_payload(const leader_watcher::HeartbeatPayload &payload,
+ Context *on_notify_ack);
+ void handle_payload(const leader_watcher::LockAcquiredPayload &payload,
+ Context *on_notify_ack);
+ void handle_payload(const leader_watcher::LockReleasedPayload &payload,
+ Context *on_notify_ack);
+ void handle_payload(const leader_watcher::UnknownPayload &payload,
+ Context *on_notify_ack);
+};
+
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_LEADER_WATCHER_H
diff --git a/src/tools/rbd_mirror/Mirror.cc b/src/tools/rbd_mirror/Mirror.cc
new file mode 100644
index 000000000..e87009281
--- /dev/null
+++ b/src/tools/rbd_mirror/Mirror.cc
@@ -0,0 +1,763 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <signal.h>
+
+#include <boost/range/adaptor/map.hpp>
+
+#include "common/Formatter.h"
+#include "common/PriorityCache.h"
+#include "common/admin_socket.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "journal/Types.h"
+#include "librbd/ImageCtx.h"
+#include "perfglue/heap_profiler.h"
+#include "Mirror.h"
+#include "PoolMetaCache.h"
+#include "ServiceDaemon.h"
+#include "Threads.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+
+using std::list;
+using std::map;
+using std::set;
+using std::string;
+using std::unique_ptr;
+using std::vector;
+
+using librados::Rados;
+using librados::IoCtx;
+using librbd::mirror_peer_t;
+
+namespace rbd {
+namespace mirror {
+
+namespace {
+
+class MirrorAdminSocketCommand {
+public:
+ virtual ~MirrorAdminSocketCommand() {}
+ virtual int call(Formatter *f) = 0;
+};
+
+class StatusCommand : public MirrorAdminSocketCommand {
+public:
+ explicit StatusCommand(Mirror *mirror) : mirror(mirror) {}
+
+ int call(Formatter *f) override {
+ mirror->print_status(f);
+ return 0;
+ }
+
+private:
+ Mirror *mirror;
+};
+
+class StartCommand : public MirrorAdminSocketCommand {
+public:
+ explicit StartCommand(Mirror *mirror) : mirror(mirror) {}
+
+ int call(Formatter *f) override {
+ mirror->start();
+ return 0;
+ }
+
+private:
+ Mirror *mirror;
+};
+
+class StopCommand : public MirrorAdminSocketCommand {
+public:
+ explicit StopCommand(Mirror *mirror) : mirror(mirror) {}
+
+ int call(Formatter *f) override {
+ mirror->stop();
+ return 0;
+ }
+
+private:
+ Mirror *mirror;
+};
+
+class RestartCommand : public MirrorAdminSocketCommand {
+public:
+ explicit RestartCommand(Mirror *mirror) : mirror(mirror) {}
+
+ int call(Formatter *f) override {
+ mirror->restart();
+ return 0;
+ }
+
+private:
+ Mirror *mirror;
+};
+
+class FlushCommand : public MirrorAdminSocketCommand {
+public:
+ explicit FlushCommand(Mirror *mirror) : mirror(mirror) {}
+
+ int call(Formatter *f) override {
+ mirror->flush();
+ return 0;
+ }
+
+private:
+ Mirror *mirror;
+};
+
+class LeaderReleaseCommand : public MirrorAdminSocketCommand {
+public:
+ explicit LeaderReleaseCommand(Mirror *mirror) : mirror(mirror) {}
+
+ int call(Formatter *f) override {
+ mirror->release_leader();
+ return 0;
+ }
+
+private:
+ Mirror *mirror;
+};
+
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::PriCache: " << this << " " \
+ << m_name << " " << __func__ << ": "
+
+struct PriCache : public PriorityCache::PriCache {
+ std::string m_name;
+ int64_t m_base_cache_max_size;
+ int64_t m_extra_cache_max_size;
+
+ PriorityCache::Priority m_base_cache_pri = PriorityCache::Priority::PRI10;
+ PriorityCache::Priority m_extra_cache_pri = PriorityCache::Priority::PRI10;
+ int64_t m_base_cache_bytes = 0;
+ int64_t m_extra_cache_bytes = 0;
+ int64_t m_committed_bytes = 0;
+ double m_cache_ratio = 0;
+
+ PriCache(const std::string &name, uint64_t min_size, uint64_t max_size)
+ : m_name(name), m_base_cache_max_size(min_size),
+ m_extra_cache_max_size(max_size - min_size) {
+ ceph_assert(max_size >= min_size);
+ }
+
+ void prioritize() {
+ if (m_base_cache_pri == PriorityCache::Priority::PRI0) {
+ return;
+ }
+ auto pri = static_cast<uint8_t>(m_base_cache_pri);
+ m_base_cache_pri = static_cast<PriorityCache::Priority>(--pri);
+
+ dout(30) << m_base_cache_pri << dendl;
+ }
+
+ int64_t request_cache_bytes(PriorityCache::Priority pri,
+ uint64_t total_cache) const override {
+ int64_t cache_bytes = 0;
+
+ if (pri == m_base_cache_pri) {
+ cache_bytes += m_base_cache_max_size;
+ }
+ if (pri == m_extra_cache_pri) {
+ cache_bytes += m_extra_cache_max_size;
+ }
+
+ dout(30) << cache_bytes << dendl;
+
+ return cache_bytes;
+ }
+
+ int64_t get_cache_bytes(PriorityCache::Priority pri) const override {
+ int64_t cache_bytes = 0;
+
+ if (pri == m_base_cache_pri) {
+ cache_bytes += m_base_cache_bytes;
+ }
+ if (pri == m_extra_cache_pri) {
+ cache_bytes += m_extra_cache_bytes;
+ }
+
+ dout(30) << "pri=" << pri << " " << cache_bytes << dendl;
+
+ return cache_bytes;
+ }
+
+ int64_t get_cache_bytes() const override {
+ auto cache_bytes = m_base_cache_bytes + m_extra_cache_bytes;
+
+ dout(30) << m_base_cache_bytes << "+" << m_extra_cache_bytes << "="
+ << cache_bytes << dendl;
+
+ return cache_bytes;
+ }
+
+ void set_cache_bytes(PriorityCache::Priority pri, int64_t bytes) override {
+ ceph_assert(bytes >= 0);
+ ceph_assert(pri == m_base_cache_pri || pri == m_extra_cache_pri ||
+ bytes == 0);
+
+ dout(30) << "pri=" << pri << " " << bytes << dendl;
+
+ if (pri == m_base_cache_pri) {
+ m_base_cache_bytes = std::min(m_base_cache_max_size, bytes);
+ bytes -= std::min(m_base_cache_bytes, bytes);
+ }
+
+ if (pri == m_extra_cache_pri) {
+ m_extra_cache_bytes = bytes;
+ }
+ }
+
+ void add_cache_bytes(PriorityCache::Priority pri, int64_t bytes) override {
+ ceph_assert(bytes >= 0);
+ ceph_assert(pri == m_base_cache_pri || pri == m_extra_cache_pri);
+
+ dout(30) << "pri=" << pri << " " << bytes << dendl;
+
+ if (pri == m_base_cache_pri) {
+ ceph_assert(m_base_cache_bytes <= m_base_cache_max_size);
+
+ auto chunk = std::min(m_base_cache_max_size - m_base_cache_bytes, bytes);
+ m_base_cache_bytes += chunk;
+ bytes -= chunk;
+ }
+
+ if (pri == m_extra_cache_pri) {
+ m_extra_cache_bytes += bytes;
+ }
+ }
+
+ int64_t commit_cache_size(uint64_t total_cache) override {
+ m_committed_bytes = p2roundup<int64_t>(get_cache_bytes(), 4096);
+
+ dout(30) << m_committed_bytes << dendl;
+
+ return m_committed_bytes;
+ }
+
+ int64_t get_committed_size() const override {
+ dout(30) << m_committed_bytes << dendl;
+
+ return m_committed_bytes;
+ }
+
+ double get_cache_ratio() const override {
+ dout(30) << m_cache_ratio << dendl;
+
+ return m_cache_ratio;
+ }
+
+ void set_cache_ratio(double ratio) override {
+ dout(30) << m_cache_ratio << dendl;
+
+ m_cache_ratio = ratio;
+ }
+
+ void shift_bins() override {
+ }
+
+ void import_bins(const std::vector<uint64_t> &intervals) override {
+ }
+
+ void set_bins(PriorityCache::Priority pri, uint64_t end_interval) override {
+ }
+
+ uint64_t get_bins(PriorityCache::Priority pri) const override {
+ return 0;
+ }
+
+ std::string get_cache_name() const override {
+ return m_name;
+ }
+};
+
+} // anonymous namespace
+
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::Mirror: " << this << " " \
+ << __func__ << ": "
+
+class MirrorAdminSocketHook : public AdminSocketHook {
+public:
+ MirrorAdminSocketHook(CephContext *cct, Mirror *mirror) :
+ admin_socket(cct->get_admin_socket()) {
+ std::string command;
+ int r;
+
+ command = "rbd mirror status";
+ r = admin_socket->register_command(command, this,
+ "get status for rbd mirror");
+ if (r == 0) {
+ commands[command] = new StatusCommand(mirror);
+ }
+
+ command = "rbd mirror start";
+ r = admin_socket->register_command(command, this,
+ "start rbd mirror");
+ if (r == 0) {
+ commands[command] = new StartCommand(mirror);
+ }
+
+ command = "rbd mirror stop";
+ r = admin_socket->register_command(command, this,
+ "stop rbd mirror");
+ if (r == 0) {
+ commands[command] = new StopCommand(mirror);
+ }
+
+ command = "rbd mirror restart";
+ r = admin_socket->register_command(command, this,
+ "restart rbd mirror");
+ if (r == 0) {
+ commands[command] = new RestartCommand(mirror);
+ }
+
+ command = "rbd mirror flush";
+ r = admin_socket->register_command(command, this,
+ "flush rbd mirror");
+ if (r == 0) {
+ commands[command] = new FlushCommand(mirror);
+ }
+
+ command = "rbd mirror leader release";
+ r = admin_socket->register_command(command, this,
+ "release rbd mirror leader");
+ if (r == 0) {
+ commands[command] = new LeaderReleaseCommand(mirror);
+ }
+ }
+
+ ~MirrorAdminSocketHook() override {
+ (void)admin_socket->unregister_commands(this);
+ for (Commands::const_iterator i = commands.begin(); i != commands.end();
+ ++i) {
+ delete i->second;
+ }
+ }
+
+ int call(std::string_view command, const cmdmap_t& cmdmap,
+ const bufferlist&,
+ Formatter *f,
+ std::ostream& errss,
+ bufferlist& out) override {
+ Commands::const_iterator i = commands.find(command);
+ ceph_assert(i != commands.end());
+ return i->second->call(f);
+ }
+
+private:
+ typedef std::map<std::string, MirrorAdminSocketCommand*, std::less<>> Commands;
+
+ AdminSocket *admin_socket;
+ Commands commands;
+};
+
+class CacheManagerHandler : public journal::CacheManagerHandler {
+public:
+ CacheManagerHandler(CephContext *cct)
+ : m_cct(cct) {
+
+ if (!m_cct->_conf.get_val<bool>("rbd_mirror_memory_autotune")) {
+ return;
+ }
+
+ uint64_t base = m_cct->_conf.get_val<Option::size_t>(
+ "rbd_mirror_memory_base");
+ double fragmentation = m_cct->_conf.get_val<double>(
+ "rbd_mirror_memory_expected_fragmentation");
+ uint64_t target = m_cct->_conf.get_val<Option::size_t>(
+ "rbd_mirror_memory_target");
+ uint64_t min = m_cct->_conf.get_val<Option::size_t>(
+ "rbd_mirror_memory_cache_min");
+ uint64_t max = min;
+
+ // When setting the maximum amount of memory to use for cache, first
+ // assume some base amount of memory for the daemon and then fudge in
+ // some overhead for fragmentation that scales with cache usage.
+ uint64_t ltarget = (1.0 - fragmentation) * target;
+ if (ltarget > base + min) {
+ max = ltarget - base;
+ }
+
+ m_next_balance = ceph_clock_now();
+ m_next_resize = ceph_clock_now();
+
+ m_cache_manager = std::make_unique<PriorityCache::Manager>(
+ m_cct, min, max, target, false);
+ }
+
+ ~CacheManagerHandler() {
+ std::lock_guard locker{m_lock};
+
+ ceph_assert(m_caches.empty());
+ }
+
+ void register_cache(const std::string &cache_name,
+ uint64_t min_size, uint64_t max_size,
+ journal::CacheRebalanceHandler* handler) override {
+ if (!m_cache_manager) {
+ handler->handle_cache_rebalanced(max_size);
+ return;
+ }
+
+ dout(20) << cache_name << " min_size=" << min_size << " max_size="
+ << max_size << " handler=" << handler << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ auto p = m_caches.insert(
+ {cache_name, {cache_name, min_size, max_size, handler}});
+ ceph_assert(p.second == true);
+
+ m_cache_manager->insert(cache_name, p.first->second.pri_cache, false);
+ m_next_balance = ceph_clock_now();
+ }
+
+ void unregister_cache(const std::string &cache_name) override {
+ if (!m_cache_manager) {
+ return;
+ }
+
+ dout(20) << cache_name << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ auto it = m_caches.find(cache_name);
+ ceph_assert(it != m_caches.end());
+
+ m_cache_manager->erase(cache_name);
+ m_caches.erase(it);
+ m_next_balance = ceph_clock_now();
+ }
+
+ void run_cache_manager() {
+ if (!m_cache_manager) {
+ return;
+ }
+
+ std::lock_guard locker{m_lock};
+
+ // Before we trim, check and see if it's time to rebalance/resize.
+ auto autotune_interval = m_cct->_conf.get_val<double>(
+ "rbd_mirror_memory_cache_autotune_interval");
+ auto resize_interval = m_cct->_conf.get_val<double>(
+ "rbd_mirror_memory_cache_resize_interval");
+
+ utime_t now = ceph_clock_now();
+
+ if (autotune_interval > 0 && m_next_balance <= now) {
+ dout(20) << "balance" << dendl;
+ m_cache_manager->balance();
+
+ for (auto &it : m_caches) {
+ auto pri_cache = static_cast<PriCache *>(it.second.pri_cache.get());
+ auto new_cache_bytes = pri_cache->get_cache_bytes();
+ it.second.handler->handle_cache_rebalanced(new_cache_bytes);
+ pri_cache->prioritize();
+ }
+
+ m_next_balance = ceph_clock_now();
+ m_next_balance += autotune_interval;
+ }
+
+ if (resize_interval > 0 && m_next_resize < now) {
+ if (ceph_using_tcmalloc()) {
+ dout(20) << "tune memory" << dendl;
+ m_cache_manager->tune_memory();
+ }
+
+ m_next_resize = ceph_clock_now();
+ m_next_resize += resize_interval;
+ }
+ }
+
+private:
+ struct Cache {
+ std::shared_ptr<PriorityCache::PriCache> pri_cache;
+ journal::CacheRebalanceHandler *handler;
+
+ Cache(const std::string name, uint64_t min_size, uint64_t max_size,
+ journal::CacheRebalanceHandler *handler)
+ : pri_cache(new PriCache(name, min_size, max_size)), handler(handler) {
+ }
+ };
+
+ CephContext *m_cct;
+
+ mutable ceph::mutex m_lock =
+ ceph::make_mutex("rbd::mirror::CacheManagerHandler");
+ std::unique_ptr<PriorityCache::Manager> m_cache_manager;
+ std::map<std::string, Cache> m_caches;
+
+ utime_t m_next_balance;
+ utime_t m_next_resize;
+};
+
+Mirror::Mirror(CephContext *cct, const std::vector<const char*> &args) :
+ m_cct(cct),
+ m_args(args),
+ m_local(new librados::Rados()),
+ m_cache_manager_handler(new CacheManagerHandler(cct)),
+ m_pool_meta_cache(new PoolMetaCache(cct)),
+ m_asok_hook(new MirrorAdminSocketHook(cct, this)) {
+}
+
+Mirror::~Mirror()
+{
+ delete m_asok_hook;
+}
+
+void Mirror::handle_signal(int signum)
+{
+ dout(20) << signum << dendl;
+
+ std::lock_guard l{m_lock};
+
+ switch (signum) {
+ case SIGHUP:
+ for (auto &it : m_pool_replayers) {
+ it.second->reopen_logs();
+ }
+ g_ceph_context->reopen_logs();
+ break;
+
+ case SIGINT:
+ case SIGTERM:
+ m_stopping = true;
+ m_cond.notify_all();
+ break;
+
+ default:
+ ceph_abort_msgf("unexpected signal %d", signum);
+ }
+}
+
+int Mirror::init()
+{
+ int r = m_local->init_with_context(m_cct);
+ if (r < 0) {
+ derr << "could not initialize rados handle" << dendl;
+ return r;
+ }
+
+ r = m_local->connect();
+ if (r < 0) {
+ derr << "error connecting to local cluster" << dendl;
+ return r;
+ }
+
+ m_threads = &(m_cct->lookup_or_create_singleton_object<
+ Threads<librbd::ImageCtx>>("rbd_mirror::threads", false, m_local));
+ m_service_daemon.reset(new ServiceDaemon<>(m_cct, m_local, m_threads));
+
+ r = m_service_daemon->init();
+ if (r < 0) {
+ derr << "error registering service daemon: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ m_local_cluster_watcher.reset(new ClusterWatcher(m_local, m_lock,
+ m_service_daemon.get()));
+ return r;
+}
+
+void Mirror::run()
+{
+ dout(20) << "enter" << dendl;
+
+ using namespace std::chrono_literals;
+ utime_t next_refresh_pools = ceph_clock_now();
+
+ while (!m_stopping) {
+ utime_t now = ceph_clock_now();
+ bool refresh_pools = next_refresh_pools <= now;
+ if (refresh_pools) {
+ m_local_cluster_watcher->refresh_pools();
+ next_refresh_pools = ceph_clock_now();
+ next_refresh_pools += m_cct->_conf.get_val<uint64_t>(
+ "rbd_mirror_pool_replayers_refresh_interval");
+ }
+ std::unique_lock l{m_lock};
+ if (!m_manual_stop) {
+ if (refresh_pools) {
+ update_pool_replayers(m_local_cluster_watcher->get_pool_peers(),
+ m_local_cluster_watcher->get_site_name());
+ }
+ m_cache_manager_handler->run_cache_manager();
+ }
+ m_cond.wait_for(l, 1s);
+ }
+
+ // stop all pool replayers in parallel
+ std::lock_guard locker{m_lock};
+ for (auto &pool_replayer : m_pool_replayers) {
+ pool_replayer.second->stop(false);
+ }
+ dout(20) << "return" << dendl;
+}
+
+void Mirror::print_status(Formatter *f)
+{
+ dout(20) << "enter" << dendl;
+
+ std::lock_guard l{m_lock};
+
+ if (m_stopping) {
+ return;
+ }
+
+ f->open_object_section("mirror_status");
+ f->open_array_section("pool_replayers");
+ for (auto &pool_replayer : m_pool_replayers) {
+ pool_replayer.second->print_status(f);
+ }
+ f->close_section();
+ f->close_section();
+}
+
+void Mirror::start()
+{
+ dout(20) << "enter" << dendl;
+ std::lock_guard l{m_lock};
+
+ if (m_stopping) {
+ return;
+ }
+
+ m_manual_stop = false;
+
+ for (auto &pool_replayer : m_pool_replayers) {
+ pool_replayer.second->start();
+ }
+}
+
+void Mirror::stop()
+{
+ dout(20) << "enter" << dendl;
+ std::lock_guard l{m_lock};
+
+ if (m_stopping) {
+ return;
+ }
+
+ m_manual_stop = true;
+
+ for (auto &pool_replayer : m_pool_replayers) {
+ pool_replayer.second->stop(true);
+ }
+}
+
+void Mirror::restart()
+{
+ dout(20) << "enter" << dendl;
+ std::lock_guard l{m_lock};
+
+ if (m_stopping) {
+ return;
+ }
+
+ m_manual_stop = false;
+
+ for (auto &pool_replayer : m_pool_replayers) {
+ pool_replayer.second->restart();
+ }
+}
+
+void Mirror::flush()
+{
+ dout(20) << "enter" << dendl;
+ std::lock_guard l{m_lock};
+
+ if (m_stopping || m_manual_stop) {
+ return;
+ }
+
+ for (auto &pool_replayer : m_pool_replayers) {
+ pool_replayer.second->flush();
+ }
+}
+
+void Mirror::release_leader()
+{
+ dout(20) << "enter" << dendl;
+ std::lock_guard l{m_lock};
+
+ if (m_stopping) {
+ return;
+ }
+
+ for (auto &pool_replayer : m_pool_replayers) {
+ pool_replayer.second->release_leader();
+ }
+}
+
+void Mirror::update_pool_replayers(const PoolPeers &pool_peers,
+ const std::string& site_name)
+{
+ dout(20) << "enter" << dendl;
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ // remove stale pool replayers before creating new pool replayers
+ for (auto it = m_pool_replayers.begin(); it != m_pool_replayers.end();) {
+ auto &peer = it->first.second;
+ auto pool_peer_it = pool_peers.find(it->first.first);
+ if (pool_peer_it == pool_peers.end() ||
+ pool_peer_it->second.find(peer) == pool_peer_it->second.end()) {
+ dout(20) << "removing pool replayer for " << peer << dendl;
+ // TODO: make async
+ it->second->shut_down();
+ it = m_pool_replayers.erase(it);
+ } else {
+ ++it;
+ }
+ }
+
+ for (auto &kv : pool_peers) {
+ for (auto &peer : kv.second) {
+ PoolPeer pool_peer(kv.first, peer);
+
+ auto pool_replayers_it = m_pool_replayers.find(pool_peer);
+ if (pool_replayers_it != m_pool_replayers.end()) {
+ auto& pool_replayer = pool_replayers_it->second;
+ if (!m_site_name.empty() && !site_name.empty() &&
+ m_site_name != site_name) {
+ dout(0) << "restarting pool replayer for " << peer << " due to "
+ << "updated site name" << dendl;
+ // TODO: make async
+ pool_replayer->shut_down();
+ pool_replayer->init(site_name);
+ } else if (pool_replayer->is_blocklisted()) {
+ derr << "restarting blocklisted pool replayer for " << peer << dendl;
+ // TODO: make async
+ pool_replayer->shut_down();
+ pool_replayer->init(site_name);
+ } else if (!pool_replayer->is_running()) {
+ derr << "restarting failed pool replayer for " << peer << dendl;
+ // TODO: make async
+ pool_replayer->shut_down();
+ pool_replayer->init(site_name);
+ }
+ } else {
+ dout(20) << "starting pool replayer for " << peer << dendl;
+ unique_ptr<PoolReplayer<>> pool_replayer(
+ new PoolReplayer<>(m_threads, m_service_daemon.get(),
+ m_cache_manager_handler.get(),
+ m_pool_meta_cache.get(), kv.first, peer,
+ m_args));
+
+ // TODO: make async
+ pool_replayer->init(site_name);
+ m_pool_replayers.emplace(pool_peer, std::move(pool_replayer));
+ }
+ }
+
+ // TODO currently only support a single peer
+ }
+
+ m_site_name = site_name;
+}
+
+} // namespace mirror
+} // namespace rbd
diff --git a/src/tools/rbd_mirror/Mirror.h b/src/tools/rbd_mirror/Mirror.h
new file mode 100644
index 000000000..f92a63b68
--- /dev/null
+++ b/src/tools/rbd_mirror/Mirror.h
@@ -0,0 +1,89 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_H
+#define CEPH_RBD_MIRROR_H
+
+#include "common/ceph_context.h"
+#include "common/ceph_mutex.h"
+#include "include/rados/librados.hpp"
+#include "include/utime.h"
+#include "ClusterWatcher.h"
+#include "PoolReplayer.h"
+#include "tools/rbd_mirror/Types.h"
+
+#include <set>
+#include <map>
+#include <memory>
+#include <atomic>
+
+namespace journal { class CacheManagerHandler; }
+
+namespace librbd { struct ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+
+template <typename> struct ServiceDaemon;
+template <typename> struct Threads;
+class CacheManagerHandler;
+class MirrorAdminSocketHook;
+class PoolMetaCache;
+
+/**
+ * Contains the main loop and overall state for rbd-mirror.
+ *
+ * Sets up mirroring, and coordinates between noticing config
+ * changes and applying them.
+ */
+class Mirror {
+public:
+ Mirror(CephContext *cct, const std::vector<const char*> &args);
+ Mirror(const Mirror&) = delete;
+ Mirror& operator=(const Mirror&) = delete;
+ ~Mirror();
+
+ int init();
+ void run();
+ void handle_signal(int signum);
+
+ void print_status(Formatter *f);
+ void start();
+ void stop();
+ void restart();
+ void flush();
+ void release_leader();
+
+private:
+ typedef ClusterWatcher::PoolPeers PoolPeers;
+ typedef std::pair<int64_t, PeerSpec> PoolPeer;
+
+ void update_pool_replayers(const PoolPeers &pool_peers,
+ const std::string& site_name);
+
+ void create_cache_manager();
+ void run_cache_manager(utime_t *next_run_interval);
+
+ CephContext *m_cct;
+ std::vector<const char*> m_args;
+ Threads<librbd::ImageCtx> *m_threads = nullptr;
+ ceph::mutex m_lock = ceph::make_mutex("rbd::mirror::Mirror");
+ ceph::condition_variable m_cond;
+ RadosRef m_local;
+ std::unique_ptr<ServiceDaemon<librbd::ImageCtx>> m_service_daemon;
+
+ // monitor local cluster for config changes in peers
+ std::unique_ptr<ClusterWatcher> m_local_cluster_watcher;
+ std::unique_ptr<CacheManagerHandler> m_cache_manager_handler;
+ std::unique_ptr<PoolMetaCache> m_pool_meta_cache;
+ std::map<PoolPeer, std::unique_ptr<PoolReplayer<>>> m_pool_replayers;
+ std::atomic<bool> m_stopping = { false };
+ bool m_manual_stop = false;
+ MirrorAdminSocketHook *m_asok_hook;
+ std::string m_site_name;
+};
+
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_H
diff --git a/src/tools/rbd_mirror/MirrorStatusUpdater.cc b/src/tools/rbd_mirror/MirrorStatusUpdater.cc
new file mode 100644
index 000000000..257cb1df2
--- /dev/null
+++ b/src/tools/rbd_mirror/MirrorStatusUpdater.cc
@@ -0,0 +1,397 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd_mirror/MirrorStatusUpdater.h"
+#include "include/Context.h"
+#include "include/stringify.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "common/Timer.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/Utils.h"
+#include "librbd/asio/ContextWQ.h"
+#include "tools/rbd_mirror/MirrorStatusWatcher.h"
+#include "tools/rbd_mirror/Threads.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::MirrorStatusUpdater " << this \
+ << " " << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+
+static const double UPDATE_INTERVAL_SECONDS = 30;
+static const uint32_t MAX_UPDATES_PER_OP = 100;
+
+using librbd::util::create_context_callback;
+using librbd::util::create_rados_callback;
+
+template <typename I>
+MirrorStatusUpdater<I>::MirrorStatusUpdater(
+ librados::IoCtx& io_ctx, Threads<I> *threads,
+ const std::string& local_mirror_uuid)
+ : m_io_ctx(io_ctx), m_threads(threads),
+ m_local_mirror_uuid(local_mirror_uuid),
+ m_lock(ceph::make_mutex("rbd::mirror::MirrorStatusUpdater " +
+ stringify(m_io_ctx.get_id()))) {
+ dout(10) << "local_mirror_uuid=" << local_mirror_uuid << ", "
+ << "pool_id=" << m_io_ctx.get_id() << dendl;
+}
+
+template <typename I>
+MirrorStatusUpdater<I>::~MirrorStatusUpdater() {
+ ceph_assert(!m_initialized);
+ delete m_mirror_status_watcher;
+}
+
+template <typename I>
+void MirrorStatusUpdater<I>::init(Context* on_finish) {
+ dout(10) << dendl;
+
+ ceph_assert(!m_initialized);
+ m_initialized = true;
+
+ {
+ std::lock_guard timer_locker{m_threads->timer_lock};
+ schedule_timer_task();
+ }
+
+ init_mirror_status_watcher(on_finish);
+}
+
+template <typename I>
+void MirrorStatusUpdater<I>::init_mirror_status_watcher(Context* on_finish) {
+ dout(10) << dendl;
+
+ auto ctx = new LambdaContext([this, on_finish](int r) {
+ handle_init_mirror_status_watcher(r, on_finish);
+ });
+ m_mirror_status_watcher = MirrorStatusWatcher<I>::create(
+ m_io_ctx, m_threads->work_queue);
+ m_mirror_status_watcher->init(ctx);
+}
+
+template <typename I>
+void MirrorStatusUpdater<I>::handle_init_mirror_status_watcher(
+ int r, Context* on_finish) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to init mirror status watcher: " << cpp_strerror(r)
+ << dendl;
+
+ delete m_mirror_status_watcher;
+ m_mirror_status_watcher = nullptr;
+
+ on_finish = new LambdaContext([r, on_finish](int) {
+ on_finish->complete(r);
+ });
+ shut_down(on_finish);
+ return;
+ }
+
+ m_threads->work_queue->queue(on_finish, 0);
+}
+
+template <typename I>
+void MirrorStatusUpdater<I>::shut_down(Context* on_finish) {
+ dout(10) << dendl;
+
+ {
+ std::lock_guard timer_locker{m_threads->timer_lock};
+ ceph_assert(m_timer_task != nullptr);
+ m_threads->timer->cancel_event(m_timer_task);
+ }
+
+ {
+ std::unique_lock locker(m_lock);
+ ceph_assert(m_initialized);
+ m_initialized = false;
+ }
+
+ shut_down_mirror_status_watcher(on_finish);
+}
+
+template <typename I>
+void MirrorStatusUpdater<I>::shut_down_mirror_status_watcher(
+ Context* on_finish) {
+ if (m_mirror_status_watcher == nullptr) {
+ finalize_shutdown(0, on_finish);
+ return;
+ }
+
+ dout(10) << dendl;
+
+ auto ctx = new LambdaContext([this, on_finish](int r) {
+ handle_shut_down_mirror_status_watcher(r, on_finish);
+ });
+ m_mirror_status_watcher->shut_down(ctx);
+}
+
+template <typename I>
+void MirrorStatusUpdater<I>::handle_shut_down_mirror_status_watcher(
+ int r, Context* on_finish) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to shut down mirror status watcher: " << cpp_strerror(r)
+ << dendl;
+ }
+
+ finalize_shutdown(r, on_finish);
+}
+
+template <typename I>
+void MirrorStatusUpdater<I>::finalize_shutdown(int r, Context* on_finish) {
+ dout(10) << dendl;
+
+ {
+ std::unique_lock locker(m_lock);
+ if (m_update_in_progress) {
+ if (r < 0) {
+ on_finish = new LambdaContext([r, on_finish](int) {
+ on_finish->complete(r);
+ });
+ }
+
+ m_update_on_finish_ctxs.push_back(on_finish);
+ return;
+ }
+ }
+
+ m_threads->work_queue->queue(on_finish, r);
+}
+
+template <typename I>
+bool MirrorStatusUpdater<I>::exists(const std::string& global_image_id) {
+ dout(15) << "global_image_id=" << global_image_id << dendl;
+
+ std::unique_lock locker(m_lock);
+ return (m_global_image_status.count(global_image_id) > 0);
+}
+
+template <typename I>
+void MirrorStatusUpdater<I>::set_mirror_image_status(
+ const std::string& global_image_id,
+ const cls::rbd::MirrorImageSiteStatus& mirror_image_site_status,
+ bool immediate_update) {
+ dout(15) << "global_image_id=" << global_image_id << ", "
+ << "mirror_image_site_status=" << mirror_image_site_status << dendl;
+
+ std::unique_lock locker(m_lock);
+
+ m_global_image_status[global_image_id] = mirror_image_site_status;
+ if (immediate_update) {
+ m_update_global_image_ids.insert(global_image_id);
+ queue_update_task(std::move(locker));
+ }
+}
+
+template <typename I>
+void MirrorStatusUpdater<I>::remove_refresh_mirror_image_status(
+ const std::string& global_image_id,
+ Context* on_finish) {
+ if (try_remove_mirror_image_status(global_image_id, false, false,
+ on_finish)) {
+ m_threads->work_queue->queue(on_finish, 0);
+ }
+}
+
+template <typename I>
+void MirrorStatusUpdater<I>::remove_mirror_image_status(
+ const std::string& global_image_id, bool immediate_update,
+ Context* on_finish) {
+ if (try_remove_mirror_image_status(global_image_id, true, immediate_update,
+ on_finish)) {
+ m_threads->work_queue->queue(on_finish, 0);
+ }
+}
+
+template <typename I>
+bool MirrorStatusUpdater<I>::try_remove_mirror_image_status(
+ const std::string& global_image_id, bool queue_update,
+ bool immediate_update, Context* on_finish) {
+ dout(15) << "global_image_id=" << global_image_id << ", "
+ << "queue_update=" << queue_update << ", "
+ << "immediate_update=" << immediate_update << dendl;
+
+ std::unique_lock locker(m_lock);
+ if ((m_update_in_flight &&
+ m_updating_global_image_ids.count(global_image_id) > 0) ||
+ ((m_update_in_progress || m_update_requested) &&
+ m_update_global_image_ids.count(global_image_id) > 0)) {
+ // if update is scheduled/in-progress, wait for it to complete
+ on_finish = new LambdaContext(
+ [this, global_image_id, queue_update, immediate_update,
+ on_finish](int r) {
+ if (try_remove_mirror_image_status(global_image_id, queue_update,
+ immediate_update, on_finish)) {
+ on_finish->complete(0);
+ }
+ });
+ m_update_on_finish_ctxs.push_back(on_finish);
+ return false;
+ }
+
+ m_global_image_status.erase(global_image_id);
+ if (queue_update) {
+ m_update_global_image_ids.insert(global_image_id);
+ if (immediate_update) {
+ queue_update_task(std::move(locker));
+ }
+ }
+
+ return true;
+}
+
+template <typename I>
+void MirrorStatusUpdater<I>::schedule_timer_task() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock));
+ ceph_assert(m_timer_task == nullptr);
+ m_timer_task = create_context_callback<
+ MirrorStatusUpdater<I>,
+ &MirrorStatusUpdater<I>::handle_timer_task>(this);
+ m_threads->timer->add_event_after(UPDATE_INTERVAL_SECONDS, m_timer_task);
+}
+
+template <typename I>
+void MirrorStatusUpdater<I>::handle_timer_task(int r) {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock));
+ ceph_assert(m_timer_task != nullptr);
+ m_timer_task = nullptr;
+ schedule_timer_task();
+
+ std::unique_lock locker(m_lock);
+ for (auto& pair : m_global_image_status) {
+ m_update_global_image_ids.insert(pair.first);
+ }
+
+ queue_update_task(std::move(locker));
+}
+
+template <typename I>
+void MirrorStatusUpdater<I>::queue_update_task(
+ std::unique_lock<ceph::mutex>&& locker) {
+ if (!m_initialized) {
+ return;
+ }
+
+ if (m_update_in_progress) {
+ if (m_update_in_flight) {
+ dout(10) << "deferring update due to in-flight ops" << dendl;
+ m_update_requested = true;
+ }
+ return;
+ }
+
+ m_update_in_progress = true;
+ ceph_assert(!m_update_in_flight);
+ ceph_assert(!m_update_requested);
+ locker.unlock();
+
+ dout(10) << dendl;
+ auto ctx = create_context_callback<
+ MirrorStatusUpdater<I>,
+ &MirrorStatusUpdater<I>::update_task>(this);
+ m_threads->work_queue->queue(ctx);
+}
+
+template <typename I>
+void MirrorStatusUpdater<I>::update_task(int r) {
+ dout(10) << dendl;
+
+ std::unique_lock locker(m_lock);
+ ceph_assert(m_update_in_progress);
+ ceph_assert(!m_update_in_flight);
+ m_update_in_flight = true;
+
+ std::swap(m_updating_global_image_ids, m_update_global_image_ids);
+ auto updating_global_image_ids = m_updating_global_image_ids;
+ auto global_image_status = m_global_image_status;
+ locker.unlock();
+
+ Context* ctx = create_context_callback<
+ MirrorStatusUpdater<I>,
+ &MirrorStatusUpdater<I>::handle_update_task>(this);
+ if (updating_global_image_ids.empty()) {
+ ctx->complete(0);
+ return;
+ }
+
+ auto gather = new C_Gather(g_ceph_context, ctx);
+
+ auto it = updating_global_image_ids.begin();
+ while (it != updating_global_image_ids.end()) {
+ librados::ObjectWriteOperation op;
+ uint32_t op_count = 0;
+
+ while (it != updating_global_image_ids.end() &&
+ op_count < MAX_UPDATES_PER_OP) {
+ auto& global_image_id = *it;
+ ++it;
+
+ auto status_it = global_image_status.find(global_image_id);
+ if (status_it == global_image_status.end()) {
+ librbd::cls_client::mirror_image_status_remove(&op, global_image_id);
+ ++op_count;
+ continue;
+ }
+
+ status_it->second.mirror_uuid = m_local_mirror_uuid;
+ librbd::cls_client::mirror_image_status_set(&op, global_image_id,
+ status_it->second);
+ ++op_count;
+ }
+
+ auto aio_comp = create_rados_callback(gather->new_sub());
+ int r = m_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op);
+ ceph_assert(r == 0);
+ aio_comp->release();
+ }
+
+ gather->activate();
+}
+
+template <typename I>
+void MirrorStatusUpdater<I>::handle_update_task(int r) {
+ dout(10) << dendl;
+ if (r < 0) {
+ derr << "failed to update mirror image statuses: " << cpp_strerror(r)
+ << dendl;
+ }
+
+ std::unique_lock locker(m_lock);
+
+ Contexts on_finish_ctxs;
+ std::swap(on_finish_ctxs, m_update_on_finish_ctxs);
+
+ ceph_assert(m_update_in_progress);
+ m_update_in_progress = false;
+
+ ceph_assert(m_update_in_flight);
+ m_update_in_flight = false;
+
+ m_updating_global_image_ids.clear();
+
+ if (m_update_requested) {
+ m_update_requested = false;
+ queue_update_task(std::move(locker));
+ } else {
+ locker.unlock();
+ }
+
+ for (auto on_finish : on_finish_ctxs) {
+ on_finish->complete(0);
+ }
+}
+
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::MirrorStatusUpdater<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/MirrorStatusUpdater.h b/src/tools/rbd_mirror/MirrorStatusUpdater.h
new file mode 100644
index 000000000..783b818fc
--- /dev/null
+++ b/src/tools/rbd_mirror/MirrorStatusUpdater.h
@@ -0,0 +1,119 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_MIRROR_STATUS_UPDATER_H
+#define CEPH_RBD_MIRROR_MIRROR_STATUS_UPDATER_H
+
+#include "include/rados/librados.hpp"
+#include "common/ceph_mutex.h"
+#include "cls/rbd/cls_rbd_types.h"
+#include <list>
+#include <map>
+#include <set>
+#include <string>
+
+struct Context;
+namespace librbd { class ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+
+template <typename> struct MirrorStatusWatcher;
+template <typename> struct Threads;
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class MirrorStatusUpdater {
+public:
+
+ static MirrorStatusUpdater* create(librados::IoCtx& io_ctx,
+ Threads<ImageCtxT> *threads,
+ const std::string& local_mirror_uuid) {
+ return new MirrorStatusUpdater(io_ctx, threads, local_mirror_uuid);
+ }
+
+ MirrorStatusUpdater(librados::IoCtx& io_ctx, Threads<ImageCtxT> *threads,
+ const std::string& local_mirror_uuid);
+ ~MirrorStatusUpdater();
+
+ void init(Context* on_finish);
+ void shut_down(Context* on_finish);
+
+ bool exists(const std::string& global_image_id);
+ void set_mirror_image_status(
+ const std::string& global_image_id,
+ const cls::rbd::MirrorImageSiteStatus& mirror_image_site_status,
+ bool immediate_update);
+ void remove_mirror_image_status(const std::string& global_image_id,
+ bool immediate_update, Context* on_finish);
+ void remove_refresh_mirror_image_status(const std::string& global_image_id,
+ Context* on_finish);
+
+private:
+ /**
+ * @verbatim
+ *
+ * <uninitialized> <----------------------\
+ * | (init) ^ (error) |
+ * v * |
+ * INIT_STATUS_WATCHER * * * * * |
+ * | |
+ * | SHUT_DOWN_STATUS_WATCHER
+ * | ^
+ * | |
+ * | (shutdown) |
+ * <initialized> -------------------------/
+ *
+ * @endverbatim
+ */
+ typedef std::list<Context*> Contexts;
+ typedef std::set<std::string> GlobalImageIds;
+ typedef std::map<std::string, cls::rbd::MirrorImageSiteStatus>
+ GlobalImageStatus;
+
+ librados::IoCtx m_io_ctx;
+ Threads<ImageCtxT>* m_threads;
+ std::string m_local_mirror_uuid;
+
+ Context* m_timer_task = nullptr;
+
+ ceph::mutex m_lock;
+
+ bool m_initialized = false;
+
+ MirrorStatusWatcher<ImageCtxT>* m_mirror_status_watcher = nullptr;
+
+ GlobalImageIds m_update_global_image_ids;
+ GlobalImageStatus m_global_image_status;
+
+ bool m_update_in_progress = false;
+ bool m_update_in_flight = false;
+ bool m_update_requested = false;
+ Contexts m_update_on_finish_ctxs;
+ GlobalImageIds m_updating_global_image_ids;
+
+ bool try_remove_mirror_image_status(const std::string& global_image_id,
+ bool queue_update, bool immediate_update,
+ Context* on_finish);
+
+ void init_mirror_status_watcher(Context* on_finish);
+ void handle_init_mirror_status_watcher(int r, Context* on_finish);
+
+ void shut_down_mirror_status_watcher(Context* on_finish);
+ void handle_shut_down_mirror_status_watcher(int r, Context* on_finish);
+ void finalize_shutdown(int r, Context* on_finish);
+
+ void schedule_timer_task();
+ void handle_timer_task(int r);
+
+ void queue_update_task(std::unique_lock<ceph::mutex>&& locker);
+ void update_task(int r);
+ void handle_update_task(int r);
+
+};
+
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::MirrorStatusUpdater<librbd::ImageCtx>;
+
+#endif // CEPH_RBD_MIRROR_MIRROR_STATUS_UPDATER_H
diff --git a/src/tools/rbd_mirror/MirrorStatusWatcher.cc b/src/tools/rbd_mirror/MirrorStatusWatcher.cc
new file mode 100644
index 000000000..3e1564c5b
--- /dev/null
+++ b/src/tools/rbd_mirror/MirrorStatusWatcher.cc
@@ -0,0 +1,74 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "MirrorStatusWatcher.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "librbd/Utils.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::MirrorStatusWatcher: " \
+ << this << " " << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+
+using librbd::util::create_rados_callback;
+
+template <typename I>
+MirrorStatusWatcher<I>::MirrorStatusWatcher(librados::IoCtx &io_ctx,
+ librbd::asio::ContextWQ *work_queue)
+ : Watcher(io_ctx, work_queue, RBD_MIRRORING) {
+}
+
+template <typename I>
+MirrorStatusWatcher<I>::~MirrorStatusWatcher() {
+}
+
+template <typename I>
+void MirrorStatusWatcher<I>::init(Context *on_finish) {
+ dout(20) << dendl;
+
+ on_finish = new LambdaContext(
+ [this, on_finish] (int r) {
+ if (r < 0) {
+ derr << "error removing down statuses: " << cpp_strerror(r) << dendl;
+ on_finish->complete(r);
+ return;
+ }
+ register_watch(on_finish);
+ });
+
+ librados::ObjectWriteOperation op;
+ librbd::cls_client::mirror_image_status_remove_down(&op);
+ librados::AioCompletion *aio_comp = create_rados_callback(on_finish);
+
+ int r = m_ioctx.aio_operate(RBD_MIRRORING, aio_comp, &op);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void MirrorStatusWatcher<I>::shut_down(Context *on_finish) {
+ dout(20) << dendl;
+
+ unregister_watch(on_finish);
+}
+
+template <typename I>
+void MirrorStatusWatcher<I>::handle_notify(uint64_t notify_id, uint64_t handle,
+ uint64_t notifier_id,
+ bufferlist &bl) {
+ dout(20) << dendl;
+
+ bufferlist out;
+ acknowledge_notify(notify_id, handle, out);
+}
+
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::MirrorStatusWatcher<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/MirrorStatusWatcher.h b/src/tools/rbd_mirror/MirrorStatusWatcher.h
new file mode 100644
index 000000000..3335e9e63
--- /dev/null
+++ b/src/tools/rbd_mirror/MirrorStatusWatcher.h
@@ -0,0 +1,43 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_MIRROR_STATUS_WATCHER_H
+#define CEPH_RBD_MIRROR_MIRROR_STATUS_WATCHER_H
+
+#include "librbd/Watcher.h"
+
+namespace librbd {
+class ImageCtx;
+namespace asio { struct ContextWQ; }
+} // namespace librbd
+
+namespace rbd {
+namespace mirror {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class MirrorStatusWatcher : protected librbd::Watcher {
+public:
+ static MirrorStatusWatcher *create(librados::IoCtx &io_ctx,
+ librbd::asio::ContextWQ *work_queue) {
+ return new MirrorStatusWatcher(io_ctx, work_queue);
+ }
+ void destroy() {
+ delete this;
+ }
+
+ MirrorStatusWatcher(librados::IoCtx &io_ctx,
+ librbd::asio::ContextWQ *work_queue);
+ ~MirrorStatusWatcher() override;
+
+ void init(Context *on_finish);
+ void shut_down(Context *on_finish);
+
+protected:
+ void handle_notify(uint64_t notify_id, uint64_t handle,
+ uint64_t notifier_id, bufferlist &bl) override;
+};
+
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_MIRROR_STATUS_WATCHER_H
diff --git a/src/tools/rbd_mirror/NamespaceReplayer.cc b/src/tools/rbd_mirror/NamespaceReplayer.cc
new file mode 100644
index 000000000..d305d8472
--- /dev/null
+++ b/src/tools/rbd_mirror/NamespaceReplayer.cc
@@ -0,0 +1,862 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "NamespaceReplayer.h"
+#include "common/Formatter.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "librbd/Utils.h"
+#include "librbd/api/Config.h"
+#include "librbd/api/Mirror.h"
+#include "librbd/asio/ContextWQ.h"
+#include "ServiceDaemon.h"
+#include "Threads.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::NamespaceReplayer: " \
+ << this << " " << __func__ << ": "
+
+using librbd::util::create_async_context_callback;
+using librbd::util::create_context_callback;
+
+namespace rbd {
+namespace mirror {
+
+using ::operator<<;
+
+namespace {
+
+const std::string SERVICE_DAEMON_LOCAL_COUNT_KEY("image_local_count");
+const std::string SERVICE_DAEMON_REMOTE_COUNT_KEY("image_remote_count");
+
+} // anonymous namespace
+
+template <typename I>
+NamespaceReplayer<I>::NamespaceReplayer(
+ const std::string &name,
+ librados::IoCtx &local_io_ctx, librados::IoCtx &remote_io_ctx,
+ const std::string &local_mirror_uuid,
+ const std::string& local_mirror_peer_uuid,
+ const RemotePoolMeta& remote_pool_meta,
+ Threads<I> *threads,
+ Throttler<I> *image_sync_throttler,
+ Throttler<I> *image_deletion_throttler,
+ ServiceDaemon<I> *service_daemon,
+ journal::CacheManagerHandler *cache_manager_handler,
+ PoolMetaCache* pool_meta_cache) :
+ m_namespace_name(name),
+ m_local_mirror_uuid(local_mirror_uuid),
+ m_local_mirror_peer_uuid(local_mirror_peer_uuid),
+ m_remote_pool_meta(remote_pool_meta),
+ m_threads(threads), m_image_sync_throttler(image_sync_throttler),
+ m_image_deletion_throttler(image_deletion_throttler),
+ m_service_daemon(service_daemon),
+ m_cache_manager_handler(cache_manager_handler),
+ m_pool_meta_cache(pool_meta_cache),
+ m_lock(ceph::make_mutex(librbd::util::unique_lock_name(
+ "rbd::mirror::NamespaceReplayer " + name, this))),
+ m_local_pool_watcher_listener(this, true),
+ m_remote_pool_watcher_listener(this, false),
+ m_image_map_listener(this) {
+ dout(10) << name << dendl;
+
+ m_local_io_ctx.dup(local_io_ctx);
+ m_local_io_ctx.set_namespace(name);
+ m_remote_io_ctx.dup(remote_io_ctx);
+ m_remote_io_ctx.set_namespace(name);
+}
+
+template <typename I>
+bool NamespaceReplayer<I>::is_blocklisted() const {
+ std::lock_guard locker{m_lock};
+ return m_instance_replayer->is_blocklisted() ||
+ (m_local_pool_watcher &&
+ m_local_pool_watcher->is_blocklisted()) ||
+ (m_remote_pool_watcher &&
+ m_remote_pool_watcher->is_blocklisted());
+}
+
+template <typename I>
+void NamespaceReplayer<I>::init(Context *on_finish) {
+ dout(20) << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ ceph_assert(m_on_finish == nullptr);
+ m_on_finish = on_finish;
+
+ init_local_status_updater();
+}
+
+
+template <typename I>
+void NamespaceReplayer<I>::shut_down(Context *on_finish) {
+ dout(20) << dendl;
+
+ {
+ std::lock_guard locker{m_lock};
+
+ ceph_assert(m_on_finish == nullptr);
+ m_on_finish = on_finish;
+
+ if (!m_image_map) {
+ stop_instance_replayer();
+ return;
+ }
+ }
+
+ auto ctx = new LambdaContext(
+ [this] (int r) {
+ std::lock_guard locker{m_lock};
+ stop_instance_replayer();
+ });
+ handle_release_leader(ctx);
+}
+
+template <typename I>
+void NamespaceReplayer<I>::print_status(Formatter *f)
+{
+ dout(20) << dendl;
+
+ ceph_assert(f);
+
+ std::lock_guard locker{m_lock};
+
+ m_instance_replayer->print_status(f);
+
+ if (m_image_deleter) {
+ f->open_object_section("image_deleter");
+ m_image_deleter->print_status(f);
+ f->close_section();
+ }
+}
+
+template <typename I>
+void NamespaceReplayer<I>::start()
+{
+ dout(20) << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ m_instance_replayer->start();
+}
+
+template <typename I>
+void NamespaceReplayer<I>::stop()
+{
+ dout(20) << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ m_instance_replayer->stop();
+}
+
+template <typename I>
+void NamespaceReplayer<I>::restart()
+{
+ dout(20) << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ m_instance_replayer->restart();
+}
+
+template <typename I>
+void NamespaceReplayer<I>::flush()
+{
+ dout(20) << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ m_instance_replayer->flush();
+}
+
+template <typename I>
+void NamespaceReplayer<I>::handle_update(const std::string &mirror_uuid,
+ ImageIds &&added_image_ids,
+ ImageIds &&removed_image_ids) {
+ std::lock_guard locker{m_lock};
+
+ if (!m_image_map) {
+ dout(20) << "not leader" << dendl;
+ return;
+ }
+
+ dout(10) << "mirror_uuid=" << mirror_uuid << ", "
+ << "added_count=" << added_image_ids.size() << ", "
+ << "removed_count=" << removed_image_ids.size() << dendl;
+
+ m_service_daemon->add_or_update_namespace_attribute(
+ m_local_io_ctx.get_id(), m_local_io_ctx.get_namespace(),
+ SERVICE_DAEMON_LOCAL_COUNT_KEY, m_local_pool_watcher->get_image_count());
+ if (m_remote_pool_watcher) {
+ m_service_daemon->add_or_update_namespace_attribute(
+ m_local_io_ctx.get_id(), m_local_io_ctx.get_namespace(),
+ SERVICE_DAEMON_REMOTE_COUNT_KEY,
+ m_remote_pool_watcher->get_image_count());
+ }
+
+ std::set<std::string> added_global_image_ids;
+ for (auto& image_id : added_image_ids) {
+ added_global_image_ids.insert(image_id.global_id);
+ }
+
+ std::set<std::string> removed_global_image_ids;
+ for (auto& image_id : removed_image_ids) {
+ removed_global_image_ids.insert(image_id.global_id);
+ }
+
+ m_image_map->update_images(mirror_uuid,
+ std::move(added_global_image_ids),
+ std::move(removed_global_image_ids));
+}
+
+template <typename I>
+void NamespaceReplayer<I>::handle_acquire_leader(Context *on_finish) {
+ dout(10) << dendl;
+
+ m_instance_watcher->handle_acquire_leader();
+
+ init_image_map(on_finish);
+}
+
+template <typename I>
+void NamespaceReplayer<I>::handle_release_leader(Context *on_finish) {
+ dout(10) << dendl;
+
+ m_instance_watcher->handle_release_leader();
+ shut_down_image_deleter(on_finish);
+}
+
+template <typename I>
+void NamespaceReplayer<I>::handle_update_leader(
+ const std::string &leader_instance_id) {
+ dout(10) << "leader_instance_id=" << leader_instance_id << dendl;
+
+ m_instance_watcher->handle_update_leader(leader_instance_id);
+}
+
+template <typename I>
+void NamespaceReplayer<I>::handle_instances_added(
+ const std::vector<std::string> &instance_ids) {
+ dout(10) << "instance_ids=" << instance_ids << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ if (!m_image_map) {
+ return;
+ }
+
+ m_image_map->update_instances_added(instance_ids);
+}
+
+template <typename I>
+void NamespaceReplayer<I>::handle_instances_removed(
+ const std::vector<std::string> &instance_ids) {
+ dout(10) << "instance_ids=" << instance_ids << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ if (!m_image_map) {
+ return;
+ }
+
+ m_image_map->update_instances_removed(instance_ids);
+}
+
+template <typename I>
+void NamespaceReplayer<I>::init_local_status_updater() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+ ceph_assert(!m_local_status_updater);
+
+ m_local_status_updater.reset(MirrorStatusUpdater<I>::create(
+ m_local_io_ctx, m_threads, ""));
+ auto ctx = create_context_callback<
+ NamespaceReplayer<I>,
+ &NamespaceReplayer<I>::handle_init_local_status_updater>(this);
+
+ m_local_status_updater->init(ctx);
+}
+
+template <typename I>
+void NamespaceReplayer<I>::handle_init_local_status_updater(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ if (r < 0) {
+ derr << "error initializing local mirror status updater: "
+ << cpp_strerror(r) << dendl;
+
+ m_local_status_updater.reset();
+ ceph_assert(m_on_finish != nullptr);
+ m_threads->work_queue->queue(m_on_finish, r);
+ m_on_finish = nullptr;
+ return;
+ }
+
+ init_remote_status_updater();
+}
+
+template <typename I>
+void NamespaceReplayer<I>::init_remote_status_updater() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+ ceph_assert(!m_remote_status_updater);
+
+ m_remote_status_updater.reset(MirrorStatusUpdater<I>::create(
+ m_remote_io_ctx, m_threads, m_local_mirror_uuid));
+ auto ctx = create_context_callback<
+ NamespaceReplayer<I>,
+ &NamespaceReplayer<I>::handle_init_remote_status_updater>(this);
+ m_remote_status_updater->init(ctx);
+}
+
+template <typename I>
+void NamespaceReplayer<I>::handle_init_remote_status_updater(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ if (r < 0) {
+ derr << "error initializing remote mirror status updater: "
+ << cpp_strerror(r) << dendl;
+
+ m_remote_status_updater.reset();
+ m_ret_val = r;
+ shut_down_local_status_updater();
+ return;
+ }
+
+ init_instance_replayer();
+}
+
+template <typename I>
+void NamespaceReplayer<I>::init_instance_replayer() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+ ceph_assert(!m_instance_replayer);
+
+ m_instance_replayer.reset(InstanceReplayer<I>::create(
+ m_local_io_ctx, m_local_mirror_uuid, m_threads, m_service_daemon,
+ m_local_status_updater.get(), m_cache_manager_handler,
+ m_pool_meta_cache));
+ auto ctx = create_context_callback<NamespaceReplayer<I>,
+ &NamespaceReplayer<I>::handle_init_instance_replayer>(this);
+
+ m_instance_replayer->init(ctx);
+}
+
+template <typename I>
+void NamespaceReplayer<I>::handle_init_instance_replayer(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ if (r < 0) {
+ derr << "error initializing instance replayer: " << cpp_strerror(r)
+ << dendl;
+
+ m_instance_replayer.reset();
+ m_ret_val = r;
+ shut_down_remote_status_updater();
+ return;
+ }
+
+ m_instance_replayer->add_peer({m_local_mirror_peer_uuid, m_remote_io_ctx,
+ m_remote_pool_meta,
+ m_remote_status_updater.get()});
+
+ init_instance_watcher();
+}
+
+template <typename I>
+void NamespaceReplayer<I>::init_instance_watcher() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+ ceph_assert(!m_instance_watcher);
+
+ m_instance_watcher.reset(InstanceWatcher<I>::create(
+ m_local_io_ctx, *m_threads->asio_engine, m_instance_replayer.get(),
+ m_image_sync_throttler));
+ auto ctx = create_context_callback<NamespaceReplayer<I>,
+ &NamespaceReplayer<I>::handle_init_instance_watcher>(this);
+
+ m_instance_watcher->init(ctx);
+}
+
+template <typename I>
+void NamespaceReplayer<I>::handle_init_instance_watcher(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ if (r < 0) {
+ derr << "error initializing instance watcher: " << cpp_strerror(r)
+ << dendl;
+
+ m_instance_watcher.reset();
+ m_ret_val = r;
+ shut_down_instance_replayer();
+ return;
+ }
+
+ ceph_assert(m_on_finish != nullptr);
+ m_threads->work_queue->queue(m_on_finish);
+ m_on_finish = nullptr;
+}
+
+template <typename I>
+void NamespaceReplayer<I>::stop_instance_replayer() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ Context *ctx = create_async_context_callback(
+ m_threads->work_queue, create_context_callback<NamespaceReplayer<I>,
+ &NamespaceReplayer<I>::handle_stop_instance_replayer>(this));
+
+ m_instance_replayer->stop(ctx);
+}
+
+template <typename I>
+void NamespaceReplayer<I>::handle_stop_instance_replayer(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "error stopping instance replayer: " << cpp_strerror(r) << dendl;
+ }
+
+ std::lock_guard locker{m_lock};
+
+ shut_down_instance_watcher();
+}
+
+template <typename I>
+void NamespaceReplayer<I>::shut_down_instance_watcher() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+ ceph_assert(m_instance_watcher);
+
+ Context *ctx = create_async_context_callback(
+ m_threads->work_queue, create_context_callback<NamespaceReplayer<I>,
+ &NamespaceReplayer<I>::handle_shut_down_instance_watcher>(this));
+
+ m_instance_watcher->shut_down(ctx);
+}
+
+template <typename I>
+void NamespaceReplayer<I>::handle_shut_down_instance_watcher(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "error shutting instance watcher down: " << cpp_strerror(r)
+ << dendl;
+ }
+
+ std::lock_guard locker{m_lock};
+
+ m_instance_watcher.reset();
+
+ shut_down_instance_replayer();
+}
+
+template <typename I>
+void NamespaceReplayer<I>::shut_down_instance_replayer() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+ ceph_assert(m_instance_replayer);
+
+ Context *ctx = create_async_context_callback(
+ m_threads->work_queue, create_context_callback<NamespaceReplayer<I>,
+ &NamespaceReplayer<I>::handle_shut_down_instance_replayer>(this));
+
+ m_instance_replayer->shut_down(ctx);
+}
+
+template <typename I>
+void NamespaceReplayer<I>::handle_shut_down_instance_replayer(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "error shutting instance replayer down: " << cpp_strerror(r)
+ << dendl;
+ }
+
+ std::lock_guard locker{m_lock};
+
+ m_instance_replayer.reset();
+
+ shut_down_remote_status_updater();
+}
+
+template <typename I>
+void NamespaceReplayer<I>::shut_down_remote_status_updater() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+ ceph_assert(m_remote_status_updater);
+
+ auto ctx = create_async_context_callback(
+ m_threads->work_queue, create_context_callback<
+ NamespaceReplayer<I>,
+ &NamespaceReplayer<I>::handle_shut_down_remote_status_updater>(this));
+ m_remote_status_updater->shut_down(ctx);
+}
+
+template <typename I>
+void NamespaceReplayer<I>::handle_shut_down_remote_status_updater(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "error shutting remote mirror status updater down: "
+ << cpp_strerror(r) << dendl;
+ }
+
+ std::lock_guard locker{m_lock};
+ m_remote_status_updater.reset();
+
+ shut_down_local_status_updater();
+}
+
+template <typename I>
+void NamespaceReplayer<I>::shut_down_local_status_updater() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+ ceph_assert(m_local_status_updater);
+
+ auto ctx = create_async_context_callback(
+ m_threads->work_queue, create_context_callback<
+ NamespaceReplayer<I>,
+ &NamespaceReplayer<I>::handle_shut_down_local_status_updater>(this));
+
+ m_local_status_updater->shut_down(ctx);
+}
+
+template <typename I>
+void NamespaceReplayer<I>::handle_shut_down_local_status_updater(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "error shutting local mirror status updater down: "
+ << cpp_strerror(r) << dendl;
+ }
+
+ std::lock_guard locker{m_lock};
+
+ m_local_status_updater.reset();
+
+ ceph_assert(!m_image_map);
+ ceph_assert(!m_image_deleter);
+ ceph_assert(!m_local_pool_watcher);
+ ceph_assert(!m_remote_pool_watcher);
+ ceph_assert(!m_instance_watcher);
+ ceph_assert(!m_instance_replayer);
+
+ ceph_assert(m_on_finish != nullptr);
+ m_threads->work_queue->queue(m_on_finish, m_ret_val);
+ m_on_finish = nullptr;
+ m_ret_val = 0;
+}
+
+template <typename I>
+void NamespaceReplayer<I>::init_image_map(Context *on_finish) {
+ dout(10) << dendl;
+
+ auto image_map = ImageMap<I>::create(m_local_io_ctx, m_threads,
+ m_instance_watcher->get_instance_id(),
+ m_image_map_listener);
+
+ auto ctx = new LambdaContext(
+ [this, image_map, on_finish](int r) {
+ handle_init_image_map(r, image_map, on_finish);
+ });
+ image_map->init(create_async_context_callback(
+ m_threads->work_queue, ctx));
+}
+
+template <typename I>
+void NamespaceReplayer<I>::handle_init_image_map(int r, ImageMap<I> *image_map,
+ Context *on_finish) {
+ dout(10) << "r=" << r << dendl;
+ if (r < 0) {
+ derr << "failed to init image map: " << cpp_strerror(r) << dendl;
+ on_finish = new LambdaContext([image_map, on_finish, r](int) {
+ delete image_map;
+ on_finish->complete(r);
+ });
+ image_map->shut_down(on_finish);
+ return;
+ }
+
+ ceph_assert(!m_image_map);
+ m_image_map.reset(image_map);
+
+ init_local_pool_watcher(on_finish);
+}
+
+template <typename I>
+void NamespaceReplayer<I>::init_local_pool_watcher(Context *on_finish) {
+ dout(10) << dendl;
+
+ std::lock_guard locker{m_lock};
+ ceph_assert(!m_local_pool_watcher);
+ m_local_pool_watcher.reset(PoolWatcher<I>::create(
+ m_threads, m_local_io_ctx, m_local_mirror_uuid,
+ m_local_pool_watcher_listener));
+
+ // ensure the initial set of local images is up-to-date
+ // after acquiring the leader role
+ auto ctx = new LambdaContext([this, on_finish](int r) {
+ handle_init_local_pool_watcher(r, on_finish);
+ });
+ m_local_pool_watcher->init(create_async_context_callback(
+ m_threads->work_queue, ctx));
+}
+
+template <typename I>
+void NamespaceReplayer<I>::handle_init_local_pool_watcher(
+ int r, Context *on_finish) {
+ dout(10) << "r=" << r << dendl;
+ if (r < 0) {
+ derr << "failed to retrieve local images: " << cpp_strerror(r) << dendl;
+ on_finish = new LambdaContext([on_finish, r](int) {
+ on_finish->complete(r);
+ });
+ shut_down_pool_watchers(on_finish);
+ return;
+ }
+
+ init_remote_pool_watcher(on_finish);
+}
+
+template <typename I>
+void NamespaceReplayer<I>::init_remote_pool_watcher(Context *on_finish) {
+ dout(10) << dendl;
+
+ std::lock_guard locker{m_lock};
+ ceph_assert(!m_remote_pool_watcher);
+ m_remote_pool_watcher.reset(PoolWatcher<I>::create(
+ m_threads, m_remote_io_ctx, m_remote_pool_meta.mirror_uuid,
+ m_remote_pool_watcher_listener));
+
+ auto ctx = new LambdaContext([this, on_finish](int r) {
+ handle_init_remote_pool_watcher(r, on_finish);
+ });
+ m_remote_pool_watcher->init(create_async_context_callback(
+ m_threads->work_queue, ctx));
+}
+
+template <typename I>
+void NamespaceReplayer<I>::handle_init_remote_pool_watcher(
+ int r, Context *on_finish) {
+ dout(10) << "r=" << r << dendl;
+ if (r == -ENOENT) {
+ // Technically nothing to do since the other side doesn't
+ // have mirroring enabled. Eventually the remote pool watcher will
+ // detect images (if mirroring is enabled), so no point propagating
+ // an error which would just busy-spin the state machines.
+ dout(0) << "remote peer does not have mirroring configured" << dendl;
+ } else if (r < 0) {
+ derr << "failed to retrieve remote images: " << cpp_strerror(r) << dendl;
+ on_finish = new LambdaContext([on_finish, r](int) {
+ on_finish->complete(r);
+ });
+ shut_down_pool_watchers(on_finish);
+ return;
+ }
+
+ init_image_deleter(on_finish);
+}
+
+template <typename I>
+void NamespaceReplayer<I>::init_image_deleter(Context *on_finish) {
+ dout(10) << dendl;
+
+ std::lock_guard locker{m_lock};
+ ceph_assert(!m_image_deleter);
+
+ on_finish = new LambdaContext([this, on_finish](int r) {
+ handle_init_image_deleter(r, on_finish);
+ });
+ m_image_deleter.reset(ImageDeleter<I>::create(m_local_io_ctx, m_threads,
+ m_image_deletion_throttler,
+ m_service_daemon));
+ m_image_deleter->init(create_async_context_callback(
+ m_threads->work_queue, on_finish));
+}
+
+template <typename I>
+void NamespaceReplayer<I>::handle_init_image_deleter(
+ int r, Context *on_finish) {
+ dout(10) << "r=" << r << dendl;
+ if (r < 0) {
+ derr << "failed to init image deleter: " << cpp_strerror(r) << dendl;
+ on_finish = new LambdaContext([on_finish, r](int) {
+ on_finish->complete(r);
+ });
+ shut_down_image_deleter(on_finish);
+ return;
+ }
+
+ on_finish->complete(0);
+}
+
+template <typename I>
+void NamespaceReplayer<I>::shut_down_image_deleter(Context* on_finish) {
+ dout(10) << dendl;
+ {
+ std::lock_guard locker{m_lock};
+ if (m_image_deleter) {
+ Context *ctx = new LambdaContext([this, on_finish](int r) {
+ handle_shut_down_image_deleter(r, on_finish);
+ });
+ ctx = create_async_context_callback(m_threads->work_queue, ctx);
+
+ m_image_deleter->shut_down(ctx);
+ return;
+ }
+ }
+ shut_down_pool_watchers(on_finish);
+}
+
+template <typename I>
+void NamespaceReplayer<I>::handle_shut_down_image_deleter(
+ int r, Context* on_finish) {
+ dout(10) << "r=" << r << dendl;
+
+ {
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_image_deleter);
+ m_image_deleter.reset();
+ }
+
+ shut_down_pool_watchers(on_finish);
+}
+
+template <typename I>
+void NamespaceReplayer<I>::shut_down_pool_watchers(Context *on_finish) {
+ dout(10) << dendl;
+
+ {
+ std::lock_guard locker{m_lock};
+ if (m_local_pool_watcher) {
+ Context *ctx = new LambdaContext([this, on_finish](int r) {
+ handle_shut_down_pool_watchers(r, on_finish);
+ });
+ ctx = create_async_context_callback(m_threads->work_queue, ctx);
+
+ auto gather_ctx = new C_Gather(g_ceph_context, ctx);
+ m_local_pool_watcher->shut_down(gather_ctx->new_sub());
+ if (m_remote_pool_watcher) {
+ m_remote_pool_watcher->shut_down(gather_ctx->new_sub());
+ }
+ gather_ctx->activate();
+ return;
+ }
+ }
+
+ on_finish->complete(0);
+}
+
+template <typename I>
+void NamespaceReplayer<I>::handle_shut_down_pool_watchers(
+ int r, Context *on_finish) {
+ dout(10) << "r=" << r << dendl;
+
+ {
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_local_pool_watcher);
+ m_local_pool_watcher.reset();
+
+ if (m_remote_pool_watcher) {
+ m_remote_pool_watcher.reset();
+ }
+ }
+ shut_down_image_map(on_finish);
+}
+
+template <typename I>
+void NamespaceReplayer<I>::shut_down_image_map(Context *on_finish) {
+ dout(5) << dendl;
+
+ std::lock_guard locker{m_lock};
+ if (m_image_map) {
+ on_finish = new LambdaContext(
+ [this, on_finish](int r) {
+ handle_shut_down_image_map(r, on_finish);
+ });
+ m_image_map->shut_down(create_async_context_callback(
+ m_threads->work_queue, on_finish));
+ return;
+ }
+
+ m_threads->work_queue->queue(on_finish);
+}
+
+template <typename I>
+void NamespaceReplayer<I>::handle_shut_down_image_map(int r, Context *on_finish) {
+ dout(5) << "r=" << r << dendl;
+ if (r < 0 && r != -EBLOCKLISTED) {
+ derr << "failed to shut down image map: " << cpp_strerror(r) << dendl;
+ }
+
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_image_map);
+ m_image_map.reset();
+
+ m_instance_replayer->release_all(create_async_context_callback(
+ m_threads->work_queue, on_finish));
+}
+
+template <typename I>
+void NamespaceReplayer<I>::handle_acquire_image(const std::string &global_image_id,
+ const std::string &instance_id,
+ Context* on_finish) {
+ dout(5) << "global_image_id=" << global_image_id << ", "
+ << "instance_id=" << instance_id << dendl;
+
+ m_instance_watcher->notify_image_acquire(instance_id, global_image_id,
+ on_finish);
+}
+
+template <typename I>
+void NamespaceReplayer<I>::handle_release_image(const std::string &global_image_id,
+ const std::string &instance_id,
+ Context* on_finish) {
+ dout(5) << "global_image_id=" << global_image_id << ", "
+ << "instance_id=" << instance_id << dendl;
+
+ m_instance_watcher->notify_image_release(instance_id, global_image_id,
+ on_finish);
+}
+
+template <typename I>
+void NamespaceReplayer<I>::handle_remove_image(const std::string &mirror_uuid,
+ const std::string &global_image_id,
+ const std::string &instance_id,
+ Context* on_finish) {
+ ceph_assert(!mirror_uuid.empty());
+ dout(5) << "mirror_uuid=" << mirror_uuid << ", "
+ << "global_image_id=" << global_image_id << ", "
+ << "instance_id=" << instance_id << dendl;
+
+ m_instance_watcher->notify_peer_image_removed(instance_id, global_image_id,
+ mirror_uuid, on_finish);
+}
+
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::NamespaceReplayer<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/NamespaceReplayer.h b/src/tools/rbd_mirror/NamespaceReplayer.h
new file mode 100644
index 000000000..e304b8253
--- /dev/null
+++ b/src/tools/rbd_mirror/NamespaceReplayer.h
@@ -0,0 +1,308 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_NAMESPACE_REPLAYER_H
+#define CEPH_RBD_MIRROR_NAMESPACE_REPLAYER_H
+
+#include "common/AsyncOpTracker.h"
+#include "common/ceph_mutex.h"
+#include "include/rados/librados.hpp"
+
+#include "tools/rbd_mirror/ImageDeleter.h"
+#include "tools/rbd_mirror/ImageMap.h"
+#include "tools/rbd_mirror/InstanceReplayer.h"
+#include "tools/rbd_mirror/InstanceWatcher.h"
+#include "tools/rbd_mirror/MirrorStatusUpdater.h"
+#include "tools/rbd_mirror/PoolWatcher.h"
+#include "tools/rbd_mirror/Types.h"
+#include "tools/rbd_mirror/image_map/Types.h"
+#include "tools/rbd_mirror/pool_watcher/Types.h"
+
+#include <memory>
+#include <string>
+#include <vector>
+
+class AdminSocketHook;
+
+namespace journal { struct CacheManagerHandler; }
+
+namespace librbd { class ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+
+struct PoolMetaCache;
+template <typename> class ServiceDaemon;
+template <typename> class Throttler;
+template <typename> struct Threads;
+
+/**
+ * Controls mirroring for a single remote cluster.
+ */
+template <typename ImageCtxT = librbd::ImageCtx>
+class NamespaceReplayer {
+public:
+ static NamespaceReplayer *create(
+ const std::string &name,
+ librados::IoCtx &local_ioctx,
+ librados::IoCtx &remote_ioctx,
+ const std::string &local_mirror_uuid,
+ const std::string &local_mirror_peer_uuid,
+ const RemotePoolMeta& remote_pool_meta,
+ Threads<ImageCtxT> *threads,
+ Throttler<ImageCtxT> *image_sync_throttler,
+ Throttler<ImageCtxT> *image_deletion_throttler,
+ ServiceDaemon<ImageCtxT> *service_daemon,
+ journal::CacheManagerHandler *cache_manager_handler,
+ PoolMetaCache* pool_meta_cache) {
+ return new NamespaceReplayer(name, local_ioctx, remote_ioctx,
+ local_mirror_uuid, local_mirror_peer_uuid,
+ remote_pool_meta, threads,
+ image_sync_throttler, image_deletion_throttler,
+ service_daemon, cache_manager_handler,
+ pool_meta_cache);
+ }
+
+ NamespaceReplayer(const std::string &name,
+ librados::IoCtx &local_ioctx,
+ librados::IoCtx &remote_ioctx,
+ const std::string &local_mirror_uuid,
+ const std::string& local_mirror_peer_uuid,
+ const RemotePoolMeta& remote_pool_meta,
+ Threads<ImageCtxT> *threads,
+ Throttler<ImageCtxT> *image_sync_throttler,
+ Throttler<ImageCtxT> *image_deletion_throttler,
+ ServiceDaemon<ImageCtxT> *service_daemon,
+ journal::CacheManagerHandler *cache_manager_handler,
+ PoolMetaCache* pool_meta_cache);
+ NamespaceReplayer(const NamespaceReplayer&) = delete;
+ NamespaceReplayer& operator=(const NamespaceReplayer&) = delete;
+
+ bool is_blocklisted() const;
+
+ void init(Context *on_finish);
+ void shut_down(Context *on_finish);
+
+ void handle_acquire_leader(Context *on_finish);
+ void handle_release_leader(Context *on_finish);
+ void handle_update_leader(const std::string &leader_instance_id);
+ void handle_instances_added(const std::vector<std::string> &instance_ids);
+ void handle_instances_removed(const std::vector<std::string> &instance_ids);
+
+ void print_status(Formatter *f);
+ void start();
+ void stop();
+ void restart();
+ void flush();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <uninitialized> <------------------------------------\
+ * | (init) ^ (error) |
+ * v * |
+ * INIT_LOCAL_STATUS_UPDATER * * * * * * * * > SHUT_DOWN_LOCAL_STATUS_UPDATER
+ * | * (error) ^
+ * v * |
+ * INIT_REMOTE_STATUS_UPDATER * * * * * * * > SHUT_DOWN_REMOTE_STATUS_UPDATER
+ * | * (error) ^
+ * v * |
+ * INIT_INSTANCE_REPLAYER * * * * * * * * * > SHUT_DOWN_INSTANCE_REPLAYER
+ * | * ^
+ * v * |
+ * INIT_INSTANCE_WATCHER * * * * * * * * * * SHUT_DOWN_INSTANCE_WATCHER
+ * | (error) ^
+ * | |
+ * v STOP_INSTANCE_REPLAYER
+ * | ^
+ * | (shut down) |
+ * | /----------------------------------------------/
+ * v |
+ * <follower> <---------------------------\
+ * . |
+ * . |
+ * v (leader acquired) |
+ * INIT_IMAGE_MAP |
+ * | |
+ * v |
+ * INIT_LOCAL_POOL_WATCHER SHUT_DOWN_IMAGE_MAP
+ * | ^
+ * v |
+ * INIT_REMOTE_POOL_WATCHER SHUT_DOWN_POOL_WATCHERS
+ * | ^
+ * v |
+ * INIT_IMAGE_DELETER SHUT_DOWN_IMAGE_DELETER
+ * | ^
+ * v .
+ * <leader> <-----------\ .
+ * . | .
+ * . (image update) | .
+ * . . > NOTIFY_INSTANCE_WATCHER .
+ * . .
+ * . (leader lost / shut down) .
+ * . . . . . . . . . . . . . . . . . . .
+ *
+ * @endverbatim
+ */
+
+ struct PoolWatcherListener : public pool_watcher::Listener {
+ NamespaceReplayer *namespace_replayer;
+ bool local;
+
+ PoolWatcherListener(NamespaceReplayer *namespace_replayer, bool local)
+ : namespace_replayer(namespace_replayer), local(local) {
+ }
+
+ void handle_update(const std::string &mirror_uuid,
+ ImageIds &&added_image_ids,
+ ImageIds &&removed_image_ids) override {
+ namespace_replayer->handle_update((local ? "" : mirror_uuid),
+ std::move(added_image_ids),
+ std::move(removed_image_ids));
+ }
+ };
+
+ struct ImageMapListener : public image_map::Listener {
+ NamespaceReplayer *namespace_replayer;
+
+ ImageMapListener(NamespaceReplayer *namespace_replayer)
+ : namespace_replayer(namespace_replayer) {
+ }
+
+ void acquire_image(const std::string &global_image_id,
+ const std::string &instance_id,
+ Context* on_finish) override {
+ namespace_replayer->handle_acquire_image(global_image_id, instance_id,
+ on_finish);
+ }
+
+ void release_image(const std::string &global_image_id,
+ const std::string &instance_id,
+ Context* on_finish) override {
+ namespace_replayer->handle_release_image(global_image_id, instance_id,
+ on_finish);
+ }
+
+ void remove_image(const std::string &mirror_uuid,
+ const std::string &global_image_id,
+ const std::string &instance_id,
+ Context* on_finish) override {
+ namespace_replayer->handle_remove_image(mirror_uuid, global_image_id,
+ instance_id, on_finish);
+ }
+ };
+
+ void handle_update(const std::string &mirror_uuid,
+ ImageIds &&added_image_ids,
+ ImageIds &&removed_image_ids);
+
+ int init_rados(const std::string &cluster_name,
+ const std::string &client_name,
+ const std::string &mon_host,
+ const std::string &key,
+ const std::string &description, RadosRef *rados_ref,
+ bool strip_cluster_overrides);
+
+ void init_local_status_updater();
+ void handle_init_local_status_updater(int r);
+
+ void init_remote_status_updater();
+ void handle_init_remote_status_updater(int r);
+
+ void init_instance_replayer();
+ void handle_init_instance_replayer(int r);
+
+ void init_instance_watcher();
+ void handle_init_instance_watcher(int r);
+
+ void stop_instance_replayer();
+ void handle_stop_instance_replayer(int r);
+
+ void shut_down_instance_watcher();
+ void handle_shut_down_instance_watcher(int r);
+
+ void shut_down_instance_replayer();
+ void handle_shut_down_instance_replayer(int r);
+
+ void shut_down_remote_status_updater();
+ void handle_shut_down_remote_status_updater(int r);
+
+ void shut_down_local_status_updater();
+ void handle_shut_down_local_status_updater(int r);
+
+ void init_image_map(Context *on_finish);
+ void handle_init_image_map(int r, ImageMap<ImageCtxT> *image_map,
+ Context *on_finish);
+
+ void init_local_pool_watcher(Context *on_finish);
+ void handle_init_local_pool_watcher(int r, Context *on_finish);
+
+ void init_remote_pool_watcher(Context *on_finish);
+ void handle_init_remote_pool_watcher(int r, Context *on_finish);
+
+ void init_image_deleter(Context* on_finish);
+ void handle_init_image_deleter(int r, Context* on_finish);
+
+ void shut_down_image_deleter(Context* on_finish);
+ void handle_shut_down_image_deleter(int r, Context* on_finish);
+
+ void shut_down_pool_watchers(Context *on_finish);
+ void handle_shut_down_pool_watchers(int r, Context *on_finish);
+
+ void shut_down_image_map(Context *on_finish);
+ void handle_shut_down_image_map(int r, Context *on_finish);
+
+ void handle_acquire_image(const std::string &global_image_id,
+ const std::string &instance_id,
+ Context* on_finish);
+ void handle_release_image(const std::string &global_image_id,
+ const std::string &instance_id,
+ Context* on_finish);
+ void handle_remove_image(const std::string &mirror_uuid,
+ const std::string &global_image_id,
+ const std::string &instance_id,
+ Context* on_finish);
+
+ std::string m_namespace_name;
+ librados::IoCtx m_local_io_ctx;
+ librados::IoCtx m_remote_io_ctx;
+ std::string m_local_mirror_uuid;
+ std::string m_local_mirror_peer_uuid;
+ RemotePoolMeta m_remote_pool_meta;
+ Threads<ImageCtxT> *m_threads;
+ Throttler<ImageCtxT> *m_image_sync_throttler;
+ Throttler<ImageCtxT> *m_image_deletion_throttler;
+ ServiceDaemon<ImageCtxT> *m_service_daemon;
+ journal::CacheManagerHandler *m_cache_manager_handler;
+ PoolMetaCache* m_pool_meta_cache;
+
+ mutable ceph::mutex m_lock;
+
+ int m_ret_val = 0;
+ Context *m_on_finish = nullptr;
+
+ std::unique_ptr<MirrorStatusUpdater<ImageCtxT>> m_local_status_updater;
+ std::unique_ptr<MirrorStatusUpdater<ImageCtxT>> m_remote_status_updater;
+
+ PoolWatcherListener m_local_pool_watcher_listener;
+ std::unique_ptr<PoolWatcher<ImageCtxT>> m_local_pool_watcher;
+
+ PoolWatcherListener m_remote_pool_watcher_listener;
+ std::unique_ptr<PoolWatcher<ImageCtxT>> m_remote_pool_watcher;
+
+ std::unique_ptr<InstanceReplayer<ImageCtxT>> m_instance_replayer;
+ std::unique_ptr<ImageDeleter<ImageCtxT>> m_image_deleter;
+
+ ImageMapListener m_image_map_listener;
+ std::unique_ptr<ImageMap<ImageCtxT>> m_image_map;
+
+ std::unique_ptr<InstanceWatcher<ImageCtxT>> m_instance_watcher;
+};
+
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::NamespaceReplayer<librbd::ImageCtx>;
+
+#endif // CEPH_RBD_MIRROR_NAMESPACE_REPLAYER_H
diff --git a/src/tools/rbd_mirror/PoolMetaCache.cc b/src/tools/rbd_mirror/PoolMetaCache.cc
new file mode 100644
index 000000000..261802a55
--- /dev/null
+++ b/src/tools/rbd_mirror/PoolMetaCache.cc
@@ -0,0 +1,83 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "common/debug.h"
+#include "common/dout.h"
+#include "tools/rbd_mirror/PoolMetaCache.h"
+#include <shared_mutex>
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::PoolMetaCache: " \
+ << this << " " << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+
+int PoolMetaCache::get_local_pool_meta(
+ int64_t pool_id,
+ LocalPoolMeta* local_pool_meta) const {
+ dout(15) << "pool_id=" << pool_id << dendl;
+
+ std::shared_lock locker{m_lock};
+ auto it = m_local_pool_metas.find(pool_id);
+ if (it == m_local_pool_metas.end()) {
+ return -ENOENT;
+ }
+
+ *local_pool_meta = it->second;
+ return 0;
+}
+
+void PoolMetaCache::set_local_pool_meta(
+ int64_t pool_id,
+ const LocalPoolMeta& local_pool_meta) {
+ dout(15) << "pool_id=" << pool_id << ", "
+ << "local_pool_meta=" << local_pool_meta << dendl;
+
+ std::unique_lock locker(m_lock);
+ m_local_pool_metas[pool_id] = local_pool_meta;
+}
+
+void PoolMetaCache::remove_local_pool_meta(int64_t pool_id) {
+ dout(15) << "pool_id=" << pool_id << dendl;
+
+ std::unique_lock locker(m_lock);
+ m_local_pool_metas.erase(pool_id);
+}
+
+int PoolMetaCache::get_remote_pool_meta(
+ int64_t pool_id,
+ RemotePoolMeta* remote_pool_meta) const {
+ dout(15) << "pool_id=" << pool_id << dendl;
+
+ std::shared_lock locker{m_lock};
+ auto it = m_remote_pool_metas.find(pool_id);
+ if (it == m_remote_pool_metas.end()) {
+ return -ENOENT;
+ }
+
+ *remote_pool_meta = it->second;
+ return 0;
+}
+
+void PoolMetaCache::set_remote_pool_meta(
+ int64_t pool_id,
+ const RemotePoolMeta& remote_pool_meta) {
+ dout(15) << "pool_id=" << pool_id << ", "
+ << "remote_pool_meta=" << remote_pool_meta << dendl;
+
+ std::unique_lock locker(m_lock);
+ m_remote_pool_metas[pool_id] = remote_pool_meta;
+}
+
+void PoolMetaCache::remove_remote_pool_meta(int64_t pool_id) {
+ dout(15) << "pool_id=" << pool_id << dendl;
+
+ std::unique_lock locker(m_lock);
+ m_remote_pool_metas.erase(pool_id);
+}
+
+} // namespace mirror
+} // namespace rbd
diff --git a/src/tools/rbd_mirror/PoolMetaCache.h b/src/tools/rbd_mirror/PoolMetaCache.h
new file mode 100644
index 000000000..f0440120f
--- /dev/null
+++ b/src/tools/rbd_mirror/PoolMetaCache.h
@@ -0,0 +1,47 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_POOL_META_CACHE_H
+#define CEPH_RBD_MIRROR_POOL_META_CACHE_H
+
+#include "include/int_types.h"
+#include "common/ceph_mutex.h"
+#include "tools/rbd_mirror/Types.h"
+#include <map>
+
+namespace rbd {
+namespace mirror {
+
+class PoolMetaCache {
+public:
+ PoolMetaCache(CephContext* cct)
+ : m_cct(cct) {
+ }
+ PoolMetaCache(const PoolMetaCache&) = delete;
+ PoolMetaCache& operator=(const PoolMetaCache&) = delete;
+
+ int get_local_pool_meta(int64_t pool_id,
+ LocalPoolMeta* local_pool_meta) const;
+ void set_local_pool_meta(int64_t pool_id,
+ const LocalPoolMeta& local_pool_meta);
+ void remove_local_pool_meta(int64_t pool_id);
+
+ int get_remote_pool_meta(int64_t pool_id,
+ RemotePoolMeta* remote_pool_meta) const;
+ void set_remote_pool_meta(int64_t pool_id,
+ const RemotePoolMeta& remote_pool_meta);
+ void remove_remote_pool_meta(int64_t pool_id);
+
+private:
+ CephContext* m_cct;
+
+ mutable ceph::shared_mutex m_lock =
+ ceph::make_shared_mutex("rbd::mirror::PoolMetaCache::m_lock");
+ std::map<int64_t, LocalPoolMeta> m_local_pool_metas;
+ std::map<int64_t, RemotePoolMeta> m_remote_pool_metas;
+};
+
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_POOL_META_CACHE_H
diff --git a/src/tools/rbd_mirror/PoolReplayer.cc b/src/tools/rbd_mirror/PoolReplayer.cc
new file mode 100644
index 000000000..8a04219da
--- /dev/null
+++ b/src/tools/rbd_mirror/PoolReplayer.cc
@@ -0,0 +1,1110 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "PoolReplayer.h"
+#include "common/Cond.h"
+#include "common/Formatter.h"
+#include "common/admin_socket.h"
+#include "common/ceph_argparse.h"
+#include "common/code_environment.h"
+#include "common/common_init.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "global/global_context.h"
+#include "librbd/api/Config.h"
+#include "librbd/api/Namespace.h"
+#include "PoolMetaCache.h"
+#include "RemotePoolPoller.h"
+#include "ServiceDaemon.h"
+#include "Threads.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::PoolReplayer: " \
+ << this << " " << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+
+using ::operator<<;
+
+namespace {
+
+const std::string SERVICE_DAEMON_INSTANCE_ID_KEY("instance_id");
+const std::string SERVICE_DAEMON_LEADER_KEY("leader");
+
+const std::vector<std::string> UNIQUE_PEER_CONFIG_KEYS {
+ {"monmap", "mon_host", "mon_dns_srv_name", "key", "keyfile", "keyring"}};
+
+template <typename I>
+class PoolReplayerAdminSocketCommand {
+public:
+ PoolReplayerAdminSocketCommand(PoolReplayer<I> *pool_replayer)
+ : pool_replayer(pool_replayer) {
+ }
+ virtual ~PoolReplayerAdminSocketCommand() {}
+ virtual int call(Formatter *f) = 0;
+protected:
+ PoolReplayer<I> *pool_replayer;
+};
+
+template <typename I>
+class StatusCommand : public PoolReplayerAdminSocketCommand<I> {
+public:
+ explicit StatusCommand(PoolReplayer<I> *pool_replayer)
+ : PoolReplayerAdminSocketCommand<I>(pool_replayer) {
+ }
+
+ int call(Formatter *f) override {
+ this->pool_replayer->print_status(f);
+ return 0;
+ }
+};
+
+template <typename I>
+class StartCommand : public PoolReplayerAdminSocketCommand<I> {
+public:
+ explicit StartCommand(PoolReplayer<I> *pool_replayer)
+ : PoolReplayerAdminSocketCommand<I>(pool_replayer) {
+ }
+
+ int call(Formatter *f) override {
+ this->pool_replayer->start();
+ return 0;
+ }
+};
+
+template <typename I>
+class StopCommand : public PoolReplayerAdminSocketCommand<I> {
+public:
+ explicit StopCommand(PoolReplayer<I> *pool_replayer)
+ : PoolReplayerAdminSocketCommand<I>(pool_replayer) {
+ }
+
+ int call(Formatter *f) override {
+ this->pool_replayer->stop(true);
+ return 0;
+ }
+};
+
+template <typename I>
+class RestartCommand : public PoolReplayerAdminSocketCommand<I> {
+public:
+ explicit RestartCommand(PoolReplayer<I> *pool_replayer)
+ : PoolReplayerAdminSocketCommand<I>(pool_replayer) {
+ }
+
+ int call(Formatter *f) override {
+ this->pool_replayer->restart();
+ return 0;
+ }
+};
+
+template <typename I>
+class FlushCommand : public PoolReplayerAdminSocketCommand<I> {
+public:
+ explicit FlushCommand(PoolReplayer<I> *pool_replayer)
+ : PoolReplayerAdminSocketCommand<I>(pool_replayer) {
+ }
+
+ int call(Formatter *f) override {
+ this->pool_replayer->flush();
+ return 0;
+ }
+};
+
+template <typename I>
+class LeaderReleaseCommand : public PoolReplayerAdminSocketCommand<I> {
+public:
+ explicit LeaderReleaseCommand(PoolReplayer<I> *pool_replayer)
+ : PoolReplayerAdminSocketCommand<I>(pool_replayer) {
+ }
+
+ int call(Formatter *f) override {
+ this->pool_replayer->release_leader();
+ return 0;
+ }
+};
+
+template <typename I>
+class PoolReplayerAdminSocketHook : public AdminSocketHook {
+public:
+ PoolReplayerAdminSocketHook(CephContext *cct, const std::string &name,
+ PoolReplayer<I> *pool_replayer)
+ : admin_socket(cct->get_admin_socket()) {
+ std::string command;
+ int r;
+
+ command = "rbd mirror status " + name;
+ r = admin_socket->register_command(command, this,
+ "get status for rbd mirror " + name);
+ if (r == 0) {
+ commands[command] = new StatusCommand<I>(pool_replayer);
+ }
+
+ command = "rbd mirror start " + name;
+ r = admin_socket->register_command(command, this,
+ "start rbd mirror " + name);
+ if (r == 0) {
+ commands[command] = new StartCommand<I>(pool_replayer);
+ }
+
+ command = "rbd mirror stop " + name;
+ r = admin_socket->register_command(command, this,
+ "stop rbd mirror " + name);
+ if (r == 0) {
+ commands[command] = new StopCommand<I>(pool_replayer);
+ }
+
+ command = "rbd mirror restart " + name;
+ r = admin_socket->register_command(command, this,
+ "restart rbd mirror " + name);
+ if (r == 0) {
+ commands[command] = new RestartCommand<I>(pool_replayer);
+ }
+
+ command = "rbd mirror flush " + name;
+ r = admin_socket->register_command(command, this,
+ "flush rbd mirror " + name);
+ if (r == 0) {
+ commands[command] = new FlushCommand<I>(pool_replayer);
+ }
+
+ command = "rbd mirror leader release " + name;
+ r = admin_socket->register_command(command, this,
+ "release rbd mirror leader " + name);
+ if (r == 0) {
+ commands[command] = new LeaderReleaseCommand<I>(pool_replayer);
+ }
+ }
+
+ ~PoolReplayerAdminSocketHook() override {
+ (void)admin_socket->unregister_commands(this);
+ for (auto i = commands.begin(); i != commands.end(); ++i) {
+ delete i->second;
+ }
+ }
+
+ int call(std::string_view command, const cmdmap_t& cmdmap,
+ const bufferlist&,
+ Formatter *f,
+ std::ostream& ss,
+ bufferlist& out) override {
+ auto i = commands.find(command);
+ ceph_assert(i != commands.end());
+ return i->second->call(f);
+ }
+
+private:
+ typedef std::map<std::string, PoolReplayerAdminSocketCommand<I>*,
+ std::less<>> Commands;
+
+ AdminSocket *admin_socket;
+ Commands commands;
+};
+
+} // anonymous namespace
+
+template <typename I>
+struct PoolReplayer<I>::RemotePoolPollerListener
+ : public remote_pool_poller::Listener {
+
+ PoolReplayer<I>* m_pool_replayer;
+
+ RemotePoolPollerListener(PoolReplayer<I>* pool_replayer)
+ : m_pool_replayer(pool_replayer) {
+ }
+
+ void handle_updated(const RemotePoolMeta& remote_pool_meta) override {
+ m_pool_replayer->handle_remote_pool_meta_updated(remote_pool_meta);
+ }
+};
+
+template <typename I>
+PoolReplayer<I>::PoolReplayer(
+ Threads<I> *threads, ServiceDaemon<I> *service_daemon,
+ journal::CacheManagerHandler *cache_manager_handler,
+ PoolMetaCache* pool_meta_cache, int64_t local_pool_id,
+ const PeerSpec &peer, const std::vector<const char*> &args) :
+ m_threads(threads),
+ m_service_daemon(service_daemon),
+ m_cache_manager_handler(cache_manager_handler),
+ m_pool_meta_cache(pool_meta_cache),
+ m_local_pool_id(local_pool_id),
+ m_peer(peer),
+ m_args(args),
+ m_lock(ceph::make_mutex("rbd::mirror::PoolReplayer " + stringify(peer))),
+ m_pool_replayer_thread(this),
+ m_leader_listener(this) {
+}
+
+template <typename I>
+PoolReplayer<I>::~PoolReplayer()
+{
+ shut_down();
+
+ ceph_assert(m_asok_hook == nullptr);
+}
+
+template <typename I>
+bool PoolReplayer<I>::is_blocklisted() const {
+ std::lock_guard locker{m_lock};
+ return m_blocklisted;
+}
+
+template <typename I>
+bool PoolReplayer<I>::is_leader() const {
+ std::lock_guard locker{m_lock};
+ return m_leader_watcher && m_leader_watcher->is_leader();
+}
+
+template <typename I>
+bool PoolReplayer<I>::is_running() const {
+ return m_pool_replayer_thread.is_started() && !m_stopping;
+}
+
+template <typename I>
+void PoolReplayer<I>::init(const std::string& site_name) {
+ std::lock_guard locker{m_lock};
+
+ ceph_assert(!m_pool_replayer_thread.is_started());
+
+ // reset state
+ m_stopping = false;
+ m_blocklisted = false;
+ m_site_name = site_name;
+
+ dout(10) << "replaying for " << m_peer << dendl;
+ int r = init_rados(g_ceph_context->_conf->cluster,
+ g_ceph_context->_conf->name.to_str(),
+ "", "", "local cluster", &m_local_rados, false);
+ if (r < 0) {
+ m_callout_id = m_service_daemon->add_or_update_callout(
+ m_local_pool_id, m_callout_id, service_daemon::CALLOUT_LEVEL_ERROR,
+ "unable to connect to local cluster");
+ return;
+ }
+
+ r = init_rados(m_peer.cluster_name, m_peer.client_name,
+ m_peer.mon_host, m_peer.key,
+ std::string("remote peer ") + stringify(m_peer),
+ &m_remote_rados, true);
+ if (r < 0) {
+ m_callout_id = m_service_daemon->add_or_update_callout(
+ m_local_pool_id, m_callout_id, service_daemon::CALLOUT_LEVEL_ERROR,
+ "unable to connect to remote cluster");
+ return;
+ }
+
+ r = m_local_rados->ioctx_create2(m_local_pool_id, m_local_io_ctx);
+ if (r < 0) {
+ derr << "error accessing local pool " << m_local_pool_id << ": "
+ << cpp_strerror(r) << dendl;
+ return;
+ }
+
+ auto cct = reinterpret_cast<CephContext *>(m_local_io_ctx.cct());
+ librbd::api::Config<I>::apply_pool_overrides(m_local_io_ctx, &cct->_conf);
+
+ r = librbd::cls_client::mirror_uuid_get(&m_local_io_ctx,
+ &m_local_mirror_uuid);
+ if (r < 0) {
+ derr << "failed to retrieve local mirror uuid from pool "
+ << m_local_io_ctx.get_pool_name() << ": " << cpp_strerror(r) << dendl;
+ m_callout_id = m_service_daemon->add_or_update_callout(
+ m_local_pool_id, m_callout_id, service_daemon::CALLOUT_LEVEL_ERROR,
+ "unable to query local mirror uuid");
+ return;
+ }
+
+ r = m_remote_rados->ioctx_create(m_local_io_ctx.get_pool_name().c_str(),
+ m_remote_io_ctx);
+ if (r < 0) {
+ derr << "error accessing remote pool " << m_local_io_ctx.get_pool_name()
+ << ": " << cpp_strerror(r) << dendl;
+ m_callout_id = m_service_daemon->add_or_update_callout(
+ m_local_pool_id, m_callout_id, service_daemon::CALLOUT_LEVEL_WARNING,
+ "unable to access remote pool");
+ return;
+ }
+
+ dout(10) << "connected to " << m_peer << dendl;
+
+ m_image_sync_throttler.reset(
+ Throttler<I>::create(cct, "rbd_mirror_concurrent_image_syncs"));
+
+ m_image_deletion_throttler.reset(
+ Throttler<I>::create(cct, "rbd_mirror_concurrent_image_deletions"));
+
+ m_remote_pool_poller_listener.reset(new RemotePoolPollerListener(this));
+ m_remote_pool_poller.reset(RemotePoolPoller<I>::create(
+ m_threads, m_remote_io_ctx, m_site_name, m_local_mirror_uuid,
+ *m_remote_pool_poller_listener));
+
+ C_SaferCond on_pool_poller_init;
+ m_remote_pool_poller->init(&on_pool_poller_init);
+ r = on_pool_poller_init.wait();
+ if (r < 0) {
+ derr << "failed to initialize remote pool poller: " << cpp_strerror(r)
+ << dendl;
+ m_callout_id = m_service_daemon->add_or_update_callout(
+ m_local_pool_id, m_callout_id, service_daemon::CALLOUT_LEVEL_ERROR,
+ "unable to initialize remote pool poller");
+ m_remote_pool_poller.reset();
+ return;
+ }
+ ceph_assert(!m_remote_pool_meta.mirror_uuid.empty());
+ m_pool_meta_cache->set_remote_pool_meta(
+ m_remote_io_ctx.get_id(), m_remote_pool_meta);
+ m_pool_meta_cache->set_local_pool_meta(
+ m_local_io_ctx.get_id(), {m_local_mirror_uuid});
+
+ m_default_namespace_replayer.reset(NamespaceReplayer<I>::create(
+ "", m_local_io_ctx, m_remote_io_ctx, m_local_mirror_uuid, m_peer.uuid,
+ m_remote_pool_meta, m_threads, m_image_sync_throttler.get(),
+ m_image_deletion_throttler.get(), m_service_daemon,
+ m_cache_manager_handler, m_pool_meta_cache));
+
+ C_SaferCond on_init;
+ m_default_namespace_replayer->init(&on_init);
+ r = on_init.wait();
+ if (r < 0) {
+ derr << "error initializing default namespace replayer: " << cpp_strerror(r)
+ << dendl;
+ m_callout_id = m_service_daemon->add_or_update_callout(
+ m_local_pool_id, m_callout_id, service_daemon::CALLOUT_LEVEL_ERROR,
+ "unable to initialize default namespace replayer");
+ m_default_namespace_replayer.reset();
+ return;
+ }
+
+ m_leader_watcher.reset(LeaderWatcher<I>::create(m_threads, m_local_io_ctx,
+ &m_leader_listener));
+ r = m_leader_watcher->init();
+ if (r < 0) {
+ derr << "error initializing leader watcher: " << cpp_strerror(r) << dendl;
+ m_callout_id = m_service_daemon->add_or_update_callout(
+ m_local_pool_id, m_callout_id, service_daemon::CALLOUT_LEVEL_ERROR,
+ "unable to initialize leader messenger object");
+ m_leader_watcher.reset();
+ return;
+ }
+
+ if (m_callout_id != service_daemon::CALLOUT_ID_NONE) {
+ m_service_daemon->remove_callout(m_local_pool_id, m_callout_id);
+ m_callout_id = service_daemon::CALLOUT_ID_NONE;
+ }
+
+ m_service_daemon->add_or_update_attribute(
+ m_local_io_ctx.get_id(), SERVICE_DAEMON_INSTANCE_ID_KEY,
+ stringify(m_local_io_ctx.get_instance_id()));
+
+ m_pool_replayer_thread.create("pool replayer");
+}
+
+template <typename I>
+void PoolReplayer<I>::shut_down() {
+ {
+ std::lock_guard l{m_lock};
+ m_stopping = true;
+ m_cond.notify_all();
+ }
+ if (m_pool_replayer_thread.is_started()) {
+ m_pool_replayer_thread.join();
+ }
+
+ if (m_leader_watcher) {
+ m_leader_watcher->shut_down();
+ }
+ m_leader_watcher.reset();
+
+ if (m_default_namespace_replayer) {
+ C_SaferCond on_shut_down;
+ m_default_namespace_replayer->shut_down(&on_shut_down);
+ on_shut_down.wait();
+ }
+ m_default_namespace_replayer.reset();
+
+ if (m_remote_pool_poller) {
+ C_SaferCond ctx;
+ m_remote_pool_poller->shut_down(&ctx);
+ ctx.wait();
+
+ m_pool_meta_cache->remove_remote_pool_meta(m_remote_io_ctx.get_id());
+ m_pool_meta_cache->remove_local_pool_meta(m_local_io_ctx.get_id());
+ }
+ m_remote_pool_poller.reset();
+ m_remote_pool_poller_listener.reset();
+
+ m_image_sync_throttler.reset();
+ m_image_deletion_throttler.reset();
+
+ m_local_rados.reset();
+ m_remote_rados.reset();
+}
+
+template <typename I>
+int PoolReplayer<I>::init_rados(const std::string &cluster_name,
+ const std::string &client_name,
+ const std::string &mon_host,
+ const std::string &key,
+ const std::string &description,
+ RadosRef *rados_ref,
+ bool strip_cluster_overrides) {
+ // NOTE: manually bootstrap a CephContext here instead of via
+ // the librados API to avoid mixing global singletons between
+ // the librados shared library and the daemon
+ // TODO: eliminate intermingling of global singletons within Ceph APIs
+ CephInitParameters iparams(CEPH_ENTITY_TYPE_CLIENT);
+ if (client_name.empty() || !iparams.name.from_str(client_name)) {
+ derr << "error initializing cluster handle for " << description << dendl;
+ return -EINVAL;
+ }
+
+ CephContext *cct = common_preinit(iparams, CODE_ENVIRONMENT_LIBRARY,
+ CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS);
+ cct->_conf->cluster = cluster_name;
+
+ // librados::Rados::conf_read_file
+ int r = cct->_conf.parse_config_files(nullptr, nullptr, 0);
+ if (r < 0 && r != -ENOENT) {
+ // do not treat this as fatal, it might still be able to connect
+ derr << "could not read ceph conf for " << description << ": "
+ << cpp_strerror(r) << dendl;
+ }
+
+ // preserve cluster-specific config settings before applying environment/cli
+ // overrides
+ std::map<std::string, std::string> config_values;
+ if (strip_cluster_overrides) {
+ // remote peer connections shouldn't apply cluster-specific
+ // configuration settings
+ for (auto& key : UNIQUE_PEER_CONFIG_KEYS) {
+ config_values[key] = cct->_conf.get_val<std::string>(key);
+ }
+ }
+
+ cct->_conf.parse_env(cct->get_module_type());
+
+ // librados::Rados::conf_parse_env
+ std::vector<const char*> args;
+ r = cct->_conf.parse_argv(args);
+ if (r < 0) {
+ derr << "could not parse environment for " << description << ":"
+ << cpp_strerror(r) << dendl;
+ cct->put();
+ return r;
+ }
+ cct->_conf.parse_env(cct->get_module_type());
+
+ if (!m_args.empty()) {
+ // librados::Rados::conf_parse_argv
+ args = m_args;
+ r = cct->_conf.parse_argv(args);
+ if (r < 0) {
+ derr << "could not parse command line args for " << description << ": "
+ << cpp_strerror(r) << dendl;
+ cct->put();
+ return r;
+ }
+ }
+
+ if (strip_cluster_overrides) {
+ // remote peer connections shouldn't apply cluster-specific
+ // configuration settings
+ for (auto& pair : config_values) {
+ auto value = cct->_conf.get_val<std::string>(pair.first);
+ if (pair.second != value) {
+ dout(0) << "reverting global config option override: "
+ << pair.first << ": " << value << " -> " << pair.second
+ << dendl;
+ cct->_conf.set_val_or_die(pair.first, pair.second);
+ }
+ }
+ }
+
+ if (!g_ceph_context->_conf->admin_socket.empty()) {
+ cct->_conf.set_val_or_die("admin_socket",
+ "$run_dir/$name.$pid.$cluster.$cctid.asok");
+ }
+
+ if (!mon_host.empty()) {
+ r = cct->_conf.set_val("mon_host", mon_host);
+ if (r < 0) {
+ derr << "failed to set mon_host config for " << description << ": "
+ << cpp_strerror(r) << dendl;
+ cct->put();
+ return r;
+ }
+ }
+
+ if (!key.empty()) {
+ r = cct->_conf.set_val("key", key);
+ if (r < 0) {
+ derr << "failed to set key config for " << description << ": "
+ << cpp_strerror(r) << dendl;
+ cct->put();
+ return r;
+ }
+ }
+
+ // disable unnecessary librbd cache
+ cct->_conf.set_val_or_die("rbd_cache", "false");
+ cct->_conf.apply_changes(nullptr);
+ cct->_conf.complain_about_parse_error(cct);
+
+ rados_ref->reset(new librados::Rados());
+
+ r = (*rados_ref)->init_with_context(cct);
+ ceph_assert(r == 0);
+ cct->put();
+
+ r = (*rados_ref)->connect();
+ if (r < 0) {
+ derr << "error connecting to " << description << ": "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+void PoolReplayer<I>::run() {
+ dout(20) << dendl;
+
+ while (true) {
+ std::string asok_hook_name = m_local_io_ctx.get_pool_name() + " " +
+ m_peer.cluster_name;
+ if (m_asok_hook_name != asok_hook_name || m_asok_hook == nullptr) {
+ m_asok_hook_name = asok_hook_name;
+ delete m_asok_hook;
+
+ m_asok_hook = new PoolReplayerAdminSocketHook<I>(g_ceph_context,
+ m_asok_hook_name, this);
+ }
+
+ with_namespace_replayers([this]() { update_namespace_replayers(); });
+
+ std::unique_lock locker{m_lock};
+
+ if (m_leader_watcher->is_blocklisted() ||
+ m_default_namespace_replayer->is_blocklisted()) {
+ m_blocklisted = true;
+ m_stopping = true;
+ }
+
+ for (auto &it : m_namespace_replayers) {
+ if (it.second->is_blocklisted()) {
+ m_blocklisted = true;
+ m_stopping = true;
+ break;
+ }
+ }
+
+ if (m_stopping) {
+ break;
+ }
+
+ auto seconds = g_ceph_context->_conf.get_val<uint64_t>(
+ "rbd_mirror_pool_replayers_refresh_interval");
+ m_cond.wait_for(locker, ceph::make_timespan(seconds));
+ }
+
+ // shut down namespace replayers
+ with_namespace_replayers([this]() { update_namespace_replayers(); });
+
+ delete m_asok_hook;
+ m_asok_hook = nullptr;
+}
+
+template <typename I>
+void PoolReplayer<I>::update_namespace_replayers() {
+ dout(20) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ std::set<std::string> mirroring_namespaces;
+ if (!m_stopping) {
+ int r = list_mirroring_namespaces(&mirroring_namespaces);
+ if (r < 0) {
+ return;
+ }
+ }
+
+ auto cct = reinterpret_cast<CephContext *>(m_local_io_ctx.cct());
+ C_SaferCond cond;
+ auto gather_ctx = new C_Gather(cct, &cond);
+ for (auto it = m_namespace_replayers.begin();
+ it != m_namespace_replayers.end(); ) {
+ auto iter = mirroring_namespaces.find(it->first);
+ if (iter == mirroring_namespaces.end()) {
+ auto namespace_replayer = it->second;
+ auto on_shut_down = new LambdaContext(
+ [namespace_replayer, ctx=gather_ctx->new_sub()](int r) {
+ delete namespace_replayer;
+ ctx->complete(r);
+ });
+ m_service_daemon->remove_namespace(m_local_pool_id, it->first);
+ namespace_replayer->shut_down(on_shut_down);
+ it = m_namespace_replayers.erase(it);
+ } else {
+ mirroring_namespaces.erase(iter);
+ it++;
+ }
+ }
+
+ for (auto &name : mirroring_namespaces) {
+ auto namespace_replayer = NamespaceReplayer<I>::create(
+ name, m_local_io_ctx, m_remote_io_ctx, m_local_mirror_uuid, m_peer.uuid,
+ m_remote_pool_meta, m_threads, m_image_sync_throttler.get(),
+ m_image_deletion_throttler.get(), m_service_daemon,
+ m_cache_manager_handler, m_pool_meta_cache);
+ auto on_init = new LambdaContext(
+ [this, namespace_replayer, name, &mirroring_namespaces,
+ ctx=gather_ctx->new_sub()](int r) {
+ std::lock_guard locker{m_lock};
+ if (r < 0) {
+ derr << "failed to initialize namespace replayer for namespace "
+ << name << ": " << cpp_strerror(r) << dendl;
+ delete namespace_replayer;
+ mirroring_namespaces.erase(name);
+ } else {
+ m_namespace_replayers[name] = namespace_replayer;
+ m_service_daemon->add_namespace(m_local_pool_id, name);
+ }
+ ctx->complete(r);
+ });
+ namespace_replayer->init(on_init);
+ }
+
+ gather_ctx->activate();
+
+ m_lock.unlock();
+ cond.wait();
+ m_lock.lock();
+
+ if (m_leader) {
+ C_SaferCond acquire_cond;
+ auto acquire_gather_ctx = new C_Gather(cct, &acquire_cond);
+
+ for (auto &name : mirroring_namespaces) {
+ namespace_replayer_acquire_leader(name, acquire_gather_ctx->new_sub());
+ }
+ acquire_gather_ctx->activate();
+
+ m_lock.unlock();
+ acquire_cond.wait();
+ m_lock.lock();
+
+ std::vector<std::string> instance_ids;
+ m_leader_watcher->list_instances(&instance_ids);
+
+ for (auto &name : mirroring_namespaces) {
+ auto it = m_namespace_replayers.find(name);
+ if (it == m_namespace_replayers.end()) {
+ // acuire leader for this namespace replayer failed
+ continue;
+ }
+ it->second->handle_instances_added(instance_ids);
+ }
+ } else {
+ std::string leader_instance_id;
+ if (m_leader_watcher->get_leader_instance_id(&leader_instance_id)) {
+ for (auto &name : mirroring_namespaces) {
+ m_namespace_replayers[name]->handle_update_leader(leader_instance_id);
+ }
+ }
+ }
+}
+
+template <typename I>
+int PoolReplayer<I>::list_mirroring_namespaces(
+ std::set<std::string> *namespaces) {
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ std::vector<std::string> names;
+
+ int r = librbd::api::Namespace<I>::list(m_local_io_ctx, &names);
+ if (r < 0) {
+ derr << "failed to list namespaces: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ for (auto &name : names) {
+ cls::rbd::MirrorMode mirror_mode = cls::rbd::MIRROR_MODE_DISABLED;
+ int r = librbd::cls_client::mirror_mode_get(&m_local_io_ctx, &mirror_mode);
+ if (r < 0 && r != -ENOENT) {
+ derr << "failed to get namespace mirror mode: " << cpp_strerror(r)
+ << dendl;
+ if (m_namespace_replayers.count(name) == 0) {
+ continue;
+ }
+ } else if (mirror_mode == cls::rbd::MIRROR_MODE_DISABLED) {
+ dout(10) << "mirroring is disabled for namespace " << name << dendl;
+ continue;
+ }
+
+ namespaces->insert(name);
+ }
+
+ return 0;
+}
+
+template <typename I>
+void PoolReplayer<I>::reopen_logs()
+{
+ std::lock_guard locker{m_lock};
+
+ if (m_local_rados) {
+ reinterpret_cast<CephContext *>(m_local_rados->cct())->reopen_logs();
+ }
+ if (m_remote_rados) {
+ reinterpret_cast<CephContext *>(m_remote_rados->cct())->reopen_logs();
+ }
+}
+
+template <typename I>
+void PoolReplayer<I>::namespace_replayer_acquire_leader(const std::string &name,
+ Context *on_finish) {
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ auto it = m_namespace_replayers.find(name);
+ ceph_assert(it != m_namespace_replayers.end());
+
+ on_finish = new LambdaContext(
+ [this, name, on_finish](int r) {
+ if (r < 0) {
+ derr << "failed to handle acquire leader for namespace: "
+ << name << ": " << cpp_strerror(r) << dendl;
+
+ // remove the namespace replayer -- update_namespace_replayers will
+ // retry to create it and acquire leader.
+
+ std::lock_guard locker{m_lock};
+
+ auto namespace_replayer = m_namespace_replayers[name];
+ m_namespace_replayers.erase(name);
+ auto on_shut_down = new LambdaContext(
+ [namespace_replayer, on_finish](int r) {
+ delete namespace_replayer;
+ on_finish->complete(r);
+ });
+ m_service_daemon->remove_namespace(m_local_pool_id, name);
+ namespace_replayer->shut_down(on_shut_down);
+ return;
+ }
+ on_finish->complete(0);
+ });
+
+ it->second->handle_acquire_leader(on_finish);
+}
+
+template <typename I>
+void PoolReplayer<I>::print_status(Formatter *f) {
+ dout(20) << dendl;
+
+ assert(f);
+
+ std::lock_guard l{m_lock};
+
+ f->open_object_section("pool_replayer_status");
+ f->dump_stream("peer") << m_peer;
+ if (m_local_io_ctx.is_valid()) {
+ f->dump_string("pool", m_local_io_ctx.get_pool_name());
+ f->dump_stream("instance_id") << m_local_io_ctx.get_instance_id();
+ }
+
+ std::string state("running");
+ if (m_manual_stop) {
+ state = "stopped (manual)";
+ } else if (m_stopping) {
+ state = "stopped";
+ } else if (!is_running()) {
+ state = "error";
+ }
+ f->dump_string("state", state);
+
+ if (m_leader_watcher) {
+ std::string leader_instance_id;
+ m_leader_watcher->get_leader_instance_id(&leader_instance_id);
+ f->dump_string("leader_instance_id", leader_instance_id);
+
+ bool leader = m_leader_watcher->is_leader();
+ f->dump_bool("leader", leader);
+ if (leader) {
+ std::vector<std::string> instance_ids;
+ m_leader_watcher->list_instances(&instance_ids);
+ f->open_array_section("instances");
+ for (auto instance_id : instance_ids) {
+ f->dump_string("instance_id", instance_id);
+ }
+ f->close_section(); // instances
+ }
+ }
+
+ if (m_local_rados) {
+ auto cct = reinterpret_cast<CephContext *>(m_local_rados->cct());
+ f->dump_string("local_cluster_admin_socket",
+ cct->_conf.get_val<std::string>("admin_socket"));
+ }
+ if (m_remote_rados) {
+ auto cct = reinterpret_cast<CephContext *>(m_remote_rados->cct());
+ f->dump_string("remote_cluster_admin_socket",
+ cct->_conf.get_val<std::string>("admin_socket"));
+ }
+
+ if (m_image_sync_throttler) {
+ f->open_object_section("sync_throttler");
+ m_image_sync_throttler->print_status(f);
+ f->close_section(); // sync_throttler
+ }
+
+ if (m_image_deletion_throttler) {
+ f->open_object_section("deletion_throttler");
+ m_image_deletion_throttler->print_status(f);
+ f->close_section(); // deletion_throttler
+ }
+
+ if (m_default_namespace_replayer) {
+ m_default_namespace_replayer->print_status(f);
+ }
+
+ f->open_array_section("namespaces");
+ for (auto &it : m_namespace_replayers) {
+ f->open_object_section("namespace");
+ f->dump_string("name", it.first);
+ it.second->print_status(f);
+ f->close_section(); // namespace
+ }
+ f->close_section(); // namespaces
+
+ f->close_section(); // pool_replayer_status
+}
+
+template <typename I>
+void PoolReplayer<I>::start() {
+ dout(20) << dendl;
+
+ std::lock_guard l{m_lock};
+
+ if (m_stopping) {
+ return;
+ }
+
+ m_manual_stop = false;
+
+ if (m_default_namespace_replayer) {
+ m_default_namespace_replayer->start();
+ }
+ for (auto &it : m_namespace_replayers) {
+ it.second->start();
+ }
+}
+
+template <typename I>
+void PoolReplayer<I>::stop(bool manual) {
+ dout(20) << "enter: manual=" << manual << dendl;
+
+ std::lock_guard l{m_lock};
+ if (!manual) {
+ m_stopping = true;
+ m_cond.notify_all();
+ return;
+ } else if (m_stopping) {
+ return;
+ }
+
+ m_manual_stop = true;
+
+ if (m_default_namespace_replayer) {
+ m_default_namespace_replayer->stop();
+ }
+ for (auto &it : m_namespace_replayers) {
+ it.second->stop();
+ }
+}
+
+template <typename I>
+void PoolReplayer<I>::restart() {
+ dout(20) << dendl;
+
+ std::lock_guard l{m_lock};
+
+ if (m_stopping) {
+ return;
+ }
+
+ if (m_default_namespace_replayer) {
+ m_default_namespace_replayer->restart();
+ }
+ for (auto &it : m_namespace_replayers) {
+ it.second->restart();
+ }
+}
+
+template <typename I>
+void PoolReplayer<I>::flush() {
+ dout(20) << dendl;
+
+ std::lock_guard l{m_lock};
+
+ if (m_stopping || m_manual_stop) {
+ return;
+ }
+
+ if (m_default_namespace_replayer) {
+ m_default_namespace_replayer->flush();
+ }
+ for (auto &it : m_namespace_replayers) {
+ it.second->flush();
+ }
+}
+
+template <typename I>
+void PoolReplayer<I>::release_leader() {
+ dout(20) << dendl;
+
+ std::lock_guard l{m_lock};
+
+ if (m_stopping || !m_leader_watcher) {
+ return;
+ }
+
+ m_leader_watcher->release_leader();
+}
+
+template <typename I>
+void PoolReplayer<I>::handle_post_acquire_leader(Context *on_finish) {
+ dout(20) << dendl;
+
+ with_namespace_replayers(
+ [this](Context *on_finish) {
+ dout(10) << "handle_post_acquire_leader" << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ m_service_daemon->add_or_update_attribute(m_local_pool_id,
+ SERVICE_DAEMON_LEADER_KEY,
+ true);
+ auto ctx = new LambdaContext(
+ [this, on_finish](int r) {
+ if (r == 0) {
+ std::lock_guard locker{m_lock};
+ m_leader = true;
+ }
+ on_finish->complete(r);
+ });
+
+ auto cct = reinterpret_cast<CephContext *>(m_local_io_ctx.cct());
+ auto gather_ctx = new C_Gather(cct, ctx);
+
+ m_default_namespace_replayer->handle_acquire_leader(
+ gather_ctx->new_sub());
+
+ for (auto &it : m_namespace_replayers) {
+ namespace_replayer_acquire_leader(it.first, gather_ctx->new_sub());
+ }
+
+ gather_ctx->activate();
+ }, on_finish);
+}
+
+template <typename I>
+void PoolReplayer<I>::handle_pre_release_leader(Context *on_finish) {
+ dout(20) << dendl;
+
+ with_namespace_replayers(
+ [this](Context *on_finish) {
+ dout(10) << "handle_pre_release_leader" << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+
+ m_leader = false;
+ m_service_daemon->remove_attribute(m_local_pool_id,
+ SERVICE_DAEMON_LEADER_KEY);
+
+ auto cct = reinterpret_cast<CephContext *>(m_local_io_ctx.cct());
+ auto gather_ctx = new C_Gather(cct, on_finish);
+
+ m_default_namespace_replayer->handle_release_leader(
+ gather_ctx->new_sub());
+
+ for (auto &it : m_namespace_replayers) {
+ it.second->handle_release_leader(gather_ctx->new_sub());
+ }
+
+ gather_ctx->activate();
+ }, on_finish);
+}
+
+template <typename I>
+void PoolReplayer<I>::handle_update_leader(
+ const std::string &leader_instance_id) {
+ dout(10) << "leader_instance_id=" << leader_instance_id << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ m_default_namespace_replayer->handle_update_leader(leader_instance_id);
+
+ for (auto &it : m_namespace_replayers) {
+ it.second->handle_update_leader(leader_instance_id);
+ }
+}
+
+template <typename I>
+void PoolReplayer<I>::handle_instances_added(
+ const std::vector<std::string> &instance_ids) {
+ dout(5) << "instance_ids=" << instance_ids << dendl;
+
+ std::lock_guard locker{m_lock};
+ if (!m_leader_watcher->is_leader()) {
+ return;
+ }
+
+ m_default_namespace_replayer->handle_instances_added(instance_ids);
+
+ for (auto &it : m_namespace_replayers) {
+ it.second->handle_instances_added(instance_ids);
+ }
+}
+
+template <typename I>
+void PoolReplayer<I>::handle_instances_removed(
+ const std::vector<std::string> &instance_ids) {
+ dout(5) << "instance_ids=" << instance_ids << dendl;
+
+ std::lock_guard locker{m_lock};
+ if (!m_leader_watcher->is_leader()) {
+ return;
+ }
+
+ m_default_namespace_replayer->handle_instances_removed(instance_ids);
+
+ for (auto &it : m_namespace_replayers) {
+ it.second->handle_instances_removed(instance_ids);
+ }
+}
+
+template <typename I>
+void PoolReplayer<I>::handle_remote_pool_meta_updated(
+ const RemotePoolMeta& remote_pool_meta) {
+ dout(5) << "remote_pool_meta=" << remote_pool_meta << dendl;
+
+ if (!m_default_namespace_replayer) {
+ m_remote_pool_meta = remote_pool_meta;
+ return;
+ }
+
+ derr << "remote pool metadata updated unexpectedly" << dendl;
+ std::unique_lock locker{m_lock};
+ m_stopping = true;
+ m_cond.notify_all();
+}
+
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::PoolReplayer<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/PoolReplayer.h b/src/tools/rbd_mirror/PoolReplayer.h
new file mode 100644
index 000000000..e0fd75377
--- /dev/null
+++ b/src/tools/rbd_mirror/PoolReplayer.h
@@ -0,0 +1,288 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_POOL_REPLAYER_H
+#define CEPH_RBD_MIRROR_POOL_REPLAYER_H
+
+#include "common/Cond.h"
+#include "common/ceph_mutex.h"
+#include "include/rados/librados.hpp"
+#include "librbd/Utils.h"
+#include "librbd/asio/ContextWQ.h"
+
+#include "tools/rbd_mirror/LeaderWatcher.h"
+#include "tools/rbd_mirror/NamespaceReplayer.h"
+#include "tools/rbd_mirror/Throttler.h"
+#include "tools/rbd_mirror/Types.h"
+#include "tools/rbd_mirror/leader_watcher/Types.h"
+#include "tools/rbd_mirror/service_daemon/Types.h"
+
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+class AdminSocketHook;
+
+namespace journal { struct CacheManagerHandler; }
+
+namespace librbd { class ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+
+template <typename> class RemotePoolPoller;
+namespace remote_pool_poller { struct Listener; }
+
+struct PoolMetaCache;
+template <typename> class ServiceDaemon;
+template <typename> struct Threads;
+
+
+/**
+ * Controls mirroring for a single remote cluster.
+ */
+template <typename ImageCtxT = librbd::ImageCtx>
+class PoolReplayer {
+public:
+ PoolReplayer(Threads<ImageCtxT> *threads,
+ ServiceDaemon<ImageCtxT> *service_daemon,
+ journal::CacheManagerHandler *cache_manager_handler,
+ PoolMetaCache* pool_meta_cache,
+ int64_t local_pool_id, const PeerSpec &peer,
+ const std::vector<const char*> &args);
+ ~PoolReplayer();
+ PoolReplayer(const PoolReplayer&) = delete;
+ PoolReplayer& operator=(const PoolReplayer&) = delete;
+
+ bool is_blocklisted() const;
+ bool is_leader() const;
+ bool is_running() const;
+
+ void init(const std::string& site_name);
+ void shut_down();
+
+ void run();
+
+ void print_status(Formatter *f);
+ void start();
+ void stop(bool manual);
+ void restart();
+ void flush();
+ void release_leader();
+ void reopen_logs();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * INIT
+ * |
+ * v
+ * <follower> <---------------------\
+ * . |
+ * . (leader acquired) |
+ * v |
+ * NOTIFY_NAMESPACE_WATCHERS NOTIFY_NAMESPACE_WATCHERS
+ * | ^
+ * v .
+ * <leader> .
+ * . .
+ * . (leader lost / shut down) .
+ * . . . . . . . . . . . . . . . .
+ *
+ * @endverbatim
+ */
+
+ struct RemotePoolPollerListener;
+
+ int init_rados(const std::string &cluster_name,
+ const std::string &client_name,
+ const std::string &mon_host,
+ const std::string &key,
+ const std::string &description, RadosRef *rados_ref,
+ bool strip_cluster_overrides);
+
+ void update_namespace_replayers();
+ int list_mirroring_namespaces(std::set<std::string> *namespaces);
+
+ void namespace_replayer_acquire_leader(const std::string &name,
+ Context *on_finish);
+
+ void handle_post_acquire_leader(Context *on_finish);
+ void handle_pre_release_leader(Context *on_finish);
+
+ void handle_update_leader(const std::string &leader_instance_id);
+
+ void handle_instances_added(const std::vector<std::string> &instance_ids);
+ void handle_instances_removed(const std::vector<std::string> &instance_ids);
+
+ // sync version, executed in the caller thread
+ template <typename L>
+ void with_namespace_replayers(L &&callback) {
+ std::lock_guard locker{m_lock};
+
+ if (m_namespace_replayers_locked) {
+ ceph_assert(m_on_namespace_replayers_unlocked == nullptr);
+ C_SaferCond cond;
+ m_on_namespace_replayers_unlocked = &cond;
+ m_lock.unlock();
+ cond.wait();
+ m_lock.lock();
+ } else {
+ m_namespace_replayers_locked = true;
+ }
+
+ ceph_assert(m_namespace_replayers_locked);
+ callback(); // may temporary release the lock
+ ceph_assert(m_namespace_replayers_locked);
+
+ if (m_on_namespace_replayers_unlocked == nullptr) {
+ m_namespace_replayers_locked = false;
+ return;
+ }
+
+ m_threads->work_queue->queue(m_on_namespace_replayers_unlocked);
+ m_on_namespace_replayers_unlocked = nullptr;
+ }
+
+ // async version
+ template <typename L>
+ void with_namespace_replayers(L &&callback, Context *on_finish) {
+ std::lock_guard locker{m_lock};
+
+ on_finish = librbd::util::create_async_context_callback(
+ m_threads->work_queue, new LambdaContext(
+ [this, on_finish](int r) {
+ {
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_namespace_replayers_locked);
+
+ m_namespace_replayers_locked = false;
+
+ if (m_on_namespace_replayers_unlocked != nullptr) {
+ m_namespace_replayers_locked = true;
+ m_threads->work_queue->queue(m_on_namespace_replayers_unlocked);
+ m_on_namespace_replayers_unlocked = nullptr;
+ }
+ }
+ on_finish->complete(r);
+ }));
+
+ auto on_lock = new LambdaContext(
+ [this, callback, on_finish](int) {
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_namespace_replayers_locked);
+
+ callback(on_finish);
+ });
+
+ if (m_namespace_replayers_locked) {
+ ceph_assert(m_on_namespace_replayers_unlocked == nullptr);
+ m_on_namespace_replayers_unlocked = on_lock;
+ return;
+ }
+
+ m_namespace_replayers_locked = true;
+ m_threads->work_queue->queue(on_lock);
+ }
+
+ void handle_remote_pool_meta_updated(const RemotePoolMeta& remote_pool_meta);
+
+ Threads<ImageCtxT> *m_threads;
+ ServiceDaemon<ImageCtxT> *m_service_daemon;
+ journal::CacheManagerHandler *m_cache_manager_handler;
+ PoolMetaCache* m_pool_meta_cache;
+ int64_t m_local_pool_id = -1;
+ PeerSpec m_peer;
+ std::vector<const char*> m_args;
+
+ mutable ceph::mutex m_lock;
+ ceph::condition_variable m_cond;
+ std::string m_site_name;
+ bool m_stopping = false;
+ bool m_manual_stop = false;
+ bool m_blocklisted = false;
+
+ RadosRef m_local_rados;
+ RadosRef m_remote_rados;
+
+ librados::IoCtx m_local_io_ctx;
+ librados::IoCtx m_remote_io_ctx;
+
+ std::string m_local_mirror_uuid;
+
+ RemotePoolMeta m_remote_pool_meta;
+ std::unique_ptr<remote_pool_poller::Listener> m_remote_pool_poller_listener;
+ std::unique_ptr<RemotePoolPoller<ImageCtxT>> m_remote_pool_poller;
+
+ std::unique_ptr<NamespaceReplayer<ImageCtxT>> m_default_namespace_replayer;
+ std::map<std::string, NamespaceReplayer<ImageCtxT> *> m_namespace_replayers;
+
+ std::string m_asok_hook_name;
+ AdminSocketHook *m_asok_hook = nullptr;
+
+ service_daemon::CalloutId m_callout_id = service_daemon::CALLOUT_ID_NONE;
+
+ bool m_leader = false;
+ bool m_namespace_replayers_locked = false;
+ Context *m_on_namespace_replayers_unlocked = nullptr;
+
+ class PoolReplayerThread : public Thread {
+ PoolReplayer *m_pool_replayer;
+ public:
+ PoolReplayerThread(PoolReplayer *pool_replayer)
+ : m_pool_replayer(pool_replayer) {
+ }
+ void *entry() override {
+ m_pool_replayer->run();
+ return 0;
+ }
+ } m_pool_replayer_thread;
+
+ class LeaderListener : public leader_watcher::Listener {
+ public:
+ LeaderListener(PoolReplayer *pool_replayer)
+ : m_pool_replayer(pool_replayer) {
+ }
+
+ protected:
+ void post_acquire_handler(Context *on_finish) override {
+ m_pool_replayer->handle_post_acquire_leader(on_finish);
+ }
+
+ void pre_release_handler(Context *on_finish) override {
+ m_pool_replayer->handle_pre_release_leader(on_finish);
+ }
+
+ void update_leader_handler(
+ const std::string &leader_instance_id) override {
+ m_pool_replayer->handle_update_leader(leader_instance_id);
+ }
+
+ void handle_instances_added(const InstanceIds& instance_ids) override {
+ m_pool_replayer->handle_instances_added(instance_ids);
+ }
+
+ void handle_instances_removed(const InstanceIds& instance_ids) override {
+ m_pool_replayer->handle_instances_removed(instance_ids);
+ }
+
+ private:
+ PoolReplayer *m_pool_replayer;
+ } m_leader_listener;
+
+ std::unique_ptr<LeaderWatcher<ImageCtxT>> m_leader_watcher;
+ std::unique_ptr<Throttler<ImageCtxT>> m_image_sync_throttler;
+ std::unique_ptr<Throttler<ImageCtxT>> m_image_deletion_throttler;
+};
+
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::PoolReplayer<librbd::ImageCtx>;
+
+#endif // CEPH_RBD_MIRROR_POOL_REPLAYER_H
diff --git a/src/tools/rbd_mirror/PoolWatcher.cc b/src/tools/rbd_mirror/PoolWatcher.cc
new file mode 100644
index 000000000..bec931cf3
--- /dev/null
+++ b/src/tools/rbd_mirror/PoolWatcher.cc
@@ -0,0 +1,473 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd_mirror/PoolWatcher.h"
+#include "include/rbd_types.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "common/Timer.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/internal.h"
+#include "librbd/MirroringWatcher.h"
+#include "librbd/Utils.h"
+#include "librbd/api/Image.h"
+#include "librbd/api/Mirror.h"
+#include "librbd/asio/ContextWQ.h"
+#include "tools/rbd_mirror/Threads.h"
+#include "tools/rbd_mirror/pool_watcher/RefreshImagesRequest.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::PoolWatcher: " << this << " " \
+ << __func__ << ": "
+
+using std::list;
+using std::string;
+using std::unique_ptr;
+using std::vector;
+using librbd::util::create_context_callback;
+using librbd::util::create_rados_callback;
+
+namespace rbd {
+namespace mirror {
+
+template <typename I>
+class PoolWatcher<I>::MirroringWatcher : public librbd::MirroringWatcher<I> {
+public:
+ using ContextWQ = typename std::decay<
+ typename std::remove_pointer<
+ decltype(Threads<I>::work_queue)>::type>::type;
+
+ MirroringWatcher(librados::IoCtx &io_ctx, ContextWQ *work_queue,
+ PoolWatcher *pool_watcher)
+ : librbd::MirroringWatcher<I>(io_ctx, work_queue),
+ m_pool_watcher(pool_watcher) {
+ }
+
+ void handle_rewatch_complete(int r) override {
+ m_pool_watcher->handle_rewatch_complete(r);
+ }
+
+ void handle_mode_updated(cls::rbd::MirrorMode mirror_mode) override {
+ // invalidate all image state and refresh the pool contents
+ m_pool_watcher->schedule_refresh_images(5);
+ }
+
+ void handle_image_updated(cls::rbd::MirrorImageState state,
+ const std::string &image_id,
+ const std::string &global_image_id) override {
+ bool enabled = (state == cls::rbd::MIRROR_IMAGE_STATE_ENABLED);
+ m_pool_watcher->handle_image_updated(image_id, global_image_id,
+ enabled);
+ }
+
+private:
+ PoolWatcher *m_pool_watcher;
+};
+
+template <typename I>
+PoolWatcher<I>::PoolWatcher(Threads<I> *threads,
+ librados::IoCtx &io_ctx,
+ const std::string& mirror_uuid,
+ pool_watcher::Listener &listener)
+ : m_threads(threads),
+ m_io_ctx(io_ctx),
+ m_mirror_uuid(mirror_uuid),
+ m_listener(listener),
+ m_lock(ceph::make_mutex(librbd::util::unique_lock_name(
+ "rbd::mirror::PoolWatcher", this))) {
+ m_mirroring_watcher = new MirroringWatcher(m_io_ctx,
+ m_threads->work_queue, this);
+}
+
+template <typename I>
+PoolWatcher<I>::~PoolWatcher() {
+ delete m_mirroring_watcher;
+}
+
+template <typename I>
+bool PoolWatcher<I>::is_blocklisted() const {
+ std::lock_guard locker{m_lock};
+ return m_blocklisted;
+}
+
+template <typename I>
+void PoolWatcher<I>::init(Context *on_finish) {
+ dout(5) << dendl;
+
+ {
+ std::lock_guard locker{m_lock};
+ m_on_init_finish = on_finish;
+
+ ceph_assert(!m_refresh_in_progress);
+ m_refresh_in_progress = true;
+ }
+
+ // start async updates for mirror image directory
+ register_watcher();
+}
+
+template <typename I>
+void PoolWatcher<I>::shut_down(Context *on_finish) {
+ dout(5) << dendl;
+
+ {
+ std::scoped_lock locker{m_threads->timer_lock, m_lock};
+
+ ceph_assert(!m_shutting_down);
+ m_shutting_down = true;
+ if (m_timer_ctx != nullptr) {
+ m_threads->timer->cancel_event(m_timer_ctx);
+ m_timer_ctx = nullptr;
+ }
+ }
+
+ // in-progress unregister tracked as async op
+ unregister_watcher();
+
+ m_async_op_tracker.wait_for_ops(on_finish);
+}
+
+template <typename I>
+void PoolWatcher<I>::register_watcher() {
+ {
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_image_ids_invalid);
+ ceph_assert(m_refresh_in_progress);
+ }
+
+ // if the watch registration is in-flight, let the watcher
+ // handle the transition -- only (re-)register if it's not registered
+ if (!m_mirroring_watcher->is_unregistered()) {
+ refresh_images();
+ return;
+ }
+
+ // first time registering or the watch failed
+ dout(5) << dendl;
+ m_async_op_tracker.start_op();
+
+ Context *ctx = create_context_callback<
+ PoolWatcher, &PoolWatcher<I>::handle_register_watcher>(this);
+ m_mirroring_watcher->register_watch(ctx);
+}
+
+template <typename I>
+void PoolWatcher<I>::handle_register_watcher(int r) {
+ dout(5) << "r=" << r << dendl;
+
+ {
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_image_ids_invalid);
+ ceph_assert(m_refresh_in_progress);
+ if (r < 0) {
+ m_refresh_in_progress = false;
+ }
+ }
+
+ Context *on_init_finish = nullptr;
+ if (r >= 0) {
+ refresh_images();
+ } else if (r == -EBLOCKLISTED) {
+ dout(0) << "detected client is blocklisted" << dendl;
+
+ std::lock_guard locker{m_lock};
+ m_blocklisted = true;
+ std::swap(on_init_finish, m_on_init_finish);
+ } else if (r == -ENOENT) {
+ dout(5) << "mirroring directory does not exist" << dendl;
+ {
+ std::lock_guard locker{m_lock};
+ std::swap(on_init_finish, m_on_init_finish);
+ }
+
+ schedule_refresh_images(30);
+ } else {
+ derr << "unexpected error registering mirroring directory watch: "
+ << cpp_strerror(r) << dendl;
+ schedule_refresh_images(10);
+ }
+
+ m_async_op_tracker.finish_op();
+ if (on_init_finish != nullptr) {
+ on_init_finish->complete(r);
+ }
+}
+
+template <typename I>
+void PoolWatcher<I>::unregister_watcher() {
+ dout(5) << dendl;
+
+ m_async_op_tracker.start_op();
+ Context *ctx = new LambdaContext([this](int r) {
+ dout(5) << "unregister_watcher: r=" << r << dendl;
+ if (r < 0) {
+ derr << "error unregistering watcher for "
+ << m_mirroring_watcher->get_oid() << " object: " << cpp_strerror(r)
+ << dendl;
+ }
+ m_async_op_tracker.finish_op();
+ });
+
+ m_mirroring_watcher->unregister_watch(ctx);
+}
+
+template <typename I>
+void PoolWatcher<I>::refresh_images() {
+ dout(5) << dendl;
+
+ {
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_image_ids_invalid);
+ ceph_assert(m_refresh_in_progress);
+
+ // clear all pending notification events since we need to perform
+ // a full image list refresh
+ m_pending_added_image_ids.clear();
+ m_pending_removed_image_ids.clear();
+ }
+
+ m_async_op_tracker.start_op();
+ m_refresh_image_ids.clear();
+ Context *ctx = create_context_callback<
+ PoolWatcher, &PoolWatcher<I>::handle_refresh_images>(this);
+ auto req = pool_watcher::RefreshImagesRequest<I>::create(m_io_ctx,
+ &m_refresh_image_ids,
+ ctx);
+ req->send();
+}
+
+template <typename I>
+void PoolWatcher<I>::handle_refresh_images(int r) {
+ dout(5) << "r=" << r << dendl;
+
+ bool deferred_refresh = false;
+ bool retry_refresh = false;
+ Context *on_init_finish = nullptr;
+ {
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_image_ids_invalid);
+ ceph_assert(m_refresh_in_progress);
+ m_refresh_in_progress = false;
+
+ if (r == -ENOENT) {
+ dout(5) << "mirroring directory not found" << dendl;
+ r = 0;
+ m_refresh_image_ids.clear();
+ }
+
+ if (m_deferred_refresh) {
+ // need to refresh -- skip the notification
+ deferred_refresh = true;
+ } else if (r >= 0) {
+ m_pending_image_ids = std::move(m_refresh_image_ids);
+ m_image_ids_invalid = false;
+ std::swap(on_init_finish, m_on_init_finish);
+
+ schedule_listener();
+ } else if (r == -EBLOCKLISTED) {
+ dout(0) << "detected client is blocklisted during image refresh" << dendl;
+
+ m_blocklisted = true;
+ std::swap(on_init_finish, m_on_init_finish);
+ } else {
+ retry_refresh = true;
+ }
+ }
+
+ if (deferred_refresh) {
+ dout(5) << "scheduling deferred refresh" << dendl;
+ schedule_refresh_images(0);
+ } else if (retry_refresh) {
+ derr << "failed to retrieve mirroring directory: " << cpp_strerror(r)
+ << dendl;
+ schedule_refresh_images(10);
+ }
+
+ m_async_op_tracker.finish_op();
+ if (on_init_finish != nullptr) {
+ on_init_finish->complete(r);
+ }
+}
+
+template <typename I>
+void PoolWatcher<I>::schedule_refresh_images(double interval) {
+ std::scoped_lock locker{m_threads->timer_lock, m_lock};
+ if (m_shutting_down || m_refresh_in_progress || m_timer_ctx != nullptr) {
+ if (m_refresh_in_progress && !m_deferred_refresh) {
+ dout(5) << "deferring refresh until in-flight refresh completes" << dendl;
+ m_deferred_refresh = true;
+ }
+ return;
+ }
+
+ m_image_ids_invalid = true;
+ m_timer_ctx = m_threads->timer->add_event_after(
+ interval,
+ new LambdaContext([this](int r) {
+ process_refresh_images();
+ }));
+}
+
+template <typename I>
+void PoolWatcher<I>::handle_rewatch_complete(int r) {
+ dout(5) << "r=" << r << dendl;
+
+ if (r == -EBLOCKLISTED) {
+ dout(0) << "detected client is blocklisted" << dendl;
+
+ std::lock_guard locker{m_lock};
+ m_blocklisted = true;
+ return;
+ } else if (r == -ENOENT) {
+ dout(5) << "mirroring directory deleted" << dendl;
+ } else if (r < 0) {
+ derr << "unexpected error re-registering mirroring directory watch: "
+ << cpp_strerror(r) << dendl;
+ }
+
+ schedule_refresh_images(5);
+}
+
+template <typename I>
+void PoolWatcher<I>::handle_image_updated(const std::string &id,
+ const std::string &global_image_id,
+ bool enabled) {
+ dout(10) << "image_id=" << id << ", "
+ << "global_image_id=" << global_image_id << ", "
+ << "enabled=" << enabled << dendl;
+
+ std::lock_guard locker{m_lock};
+ ImageId image_id(global_image_id, id);
+ m_pending_added_image_ids.erase(image_id);
+ m_pending_removed_image_ids.erase(image_id);
+
+ if (enabled) {
+ m_pending_added_image_ids.insert(image_id);
+ schedule_listener();
+ } else {
+ m_pending_removed_image_ids.insert(image_id);
+ schedule_listener();
+ }
+}
+
+template <typename I>
+void PoolWatcher<I>::process_refresh_images() {
+ ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock));
+ ceph_assert(m_timer_ctx != nullptr);
+ m_timer_ctx = nullptr;
+
+ {
+ std::lock_guard locker{m_lock};
+ ceph_assert(!m_refresh_in_progress);
+ m_refresh_in_progress = true;
+ m_deferred_refresh = false;
+ }
+
+ // execute outside of the timer's lock
+ m_async_op_tracker.start_op();
+ Context *ctx = new LambdaContext([this](int r) {
+ register_watcher();
+ m_async_op_tracker.finish_op();
+ });
+ m_threads->work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+void PoolWatcher<I>::schedule_listener() {
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+ m_pending_updates = true;
+ if (m_shutting_down || m_image_ids_invalid || m_notify_listener_in_progress) {
+ return;
+ }
+
+ dout(20) << dendl;
+
+ m_async_op_tracker.start_op();
+ Context *ctx = new LambdaContext([this](int r) {
+ notify_listener();
+ m_async_op_tracker.finish_op();
+ });
+
+ m_notify_listener_in_progress = true;
+ m_threads->work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+void PoolWatcher<I>::notify_listener() {
+ dout(10) << dendl;
+
+ std::string mirror_uuid;
+ ImageIds added_image_ids;
+ ImageIds removed_image_ids;
+ {
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_notify_listener_in_progress);
+ }
+
+ if (!removed_image_ids.empty()) {
+ m_listener.handle_update(mirror_uuid, {}, std::move(removed_image_ids));
+ removed_image_ids.clear();
+ }
+
+ {
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_notify_listener_in_progress);
+
+ // if the watch failed while we didn't own the lock, we are going
+ // to need to perform a full refresh
+ if (m_image_ids_invalid) {
+ m_notify_listener_in_progress = false;
+ return;
+ }
+
+ // merge add/remove notifications into pending set (a given image
+ // can only be in one set or another)
+ for (auto &image_id : m_pending_removed_image_ids) {
+ dout(20) << "image_id=" << image_id << dendl;
+ m_pending_image_ids.erase(image_id);
+ }
+
+ for (auto &image_id : m_pending_added_image_ids) {
+ dout(20) << "image_id=" << image_id << dendl;
+ m_pending_image_ids.erase(image_id);
+ m_pending_image_ids.insert(image_id);
+ }
+ m_pending_added_image_ids.clear();
+
+ // compute added/removed images
+ for (auto &image_id : m_image_ids) {
+ auto it = m_pending_image_ids.find(image_id);
+ if (it == m_pending_image_ids.end() || it->id != image_id.id) {
+ removed_image_ids.insert(image_id);
+ }
+ }
+ for (auto &image_id : m_pending_image_ids) {
+ auto it = m_image_ids.find(image_id);
+ if (it == m_image_ids.end() || it->id != image_id.id) {
+ added_image_ids.insert(image_id);
+ }
+ }
+
+ m_pending_updates = false;
+ m_image_ids = m_pending_image_ids;
+ }
+
+ m_listener.handle_update(m_mirror_uuid, std::move(added_image_ids),
+ std::move(removed_image_ids));
+
+ {
+ std::lock_guard locker{m_lock};
+ m_notify_listener_in_progress = false;
+ if (m_pending_updates) {
+ schedule_listener();
+ }
+ }
+}
+
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::PoolWatcher<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/PoolWatcher.h b/src/tools/rbd_mirror/PoolWatcher.h
new file mode 100644
index 000000000..2905de15f
--- /dev/null
+++ b/src/tools/rbd_mirror/PoolWatcher.h
@@ -0,0 +1,161 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_POOL_WATCHER_H
+#define CEPH_RBD_MIRROR_POOL_WATCHER_H
+
+#include <map>
+#include <memory>
+#include <set>
+#include <string>
+
+#include "common/AsyncOpTracker.h"
+#include "common/ceph_context.h"
+#include "common/ceph_mutex.h"
+#include "include/rados/librados.hpp"
+#include "tools/rbd_mirror/Types.h"
+#include <boost/functional/hash.hpp>
+#include <boost/optional.hpp>
+#include "include/ceph_assert.h"
+#include "tools/rbd_mirror/pool_watcher/Types.h"
+
+namespace librbd { struct ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+
+template <typename> struct Threads;
+
+/**
+ * Keeps track of images that have mirroring enabled within all
+ * pools.
+ */
+template <typename ImageCtxT = librbd::ImageCtx>
+class PoolWatcher {
+public:
+ static PoolWatcher* create(Threads<ImageCtxT> *threads,
+ librados::IoCtx &io_ctx,
+ const std::string& mirror_uuid,
+ pool_watcher::Listener &listener) {
+ return new PoolWatcher(threads, io_ctx, mirror_uuid, listener);
+ }
+
+ PoolWatcher(Threads<ImageCtxT> *threads,
+ librados::IoCtx &io_ctx,
+ const std::string& mirror_uuid,
+ pool_watcher::Listener &listener);
+ ~PoolWatcher();
+ PoolWatcher(const PoolWatcher&) = delete;
+ PoolWatcher& operator=(const PoolWatcher&) = delete;
+
+ bool is_blocklisted() const;
+
+ void init(Context *on_finish = nullptr);
+ void shut_down(Context *on_finish);
+
+ inline uint64_t get_image_count() const {
+ std::lock_guard locker{m_lock};
+ return m_image_ids.size();
+ }
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * INIT
+ * |
+ * v
+ * REGISTER_WATCHER
+ * |
+ * |/--------------------------------\
+ * | |
+ * v |
+ * REFRESH_IMAGES |
+ * | |
+ * |/----------------------------\ |
+ * | | |
+ * v | |
+ * NOTIFY_LISTENER | |
+ * | | |
+ * v | |
+ * IDLE ---\ | |
+ * | | | |
+ * | |\---> IMAGE_UPDATED | |
+ * | | | | |
+ * | | v | |
+ * | | GET_IMAGE_NAME --/ |
+ * | | |
+ * | \----> WATCH_ERROR ---------/
+ * v
+ * SHUT_DOWN
+ * |
+ * v
+ * UNREGISTER_WATCHER
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+ class MirroringWatcher;
+
+ Threads<ImageCtxT> *m_threads;
+ librados::IoCtx m_io_ctx;
+ std::string m_mirror_uuid;
+ pool_watcher::Listener &m_listener;
+
+ ImageIds m_refresh_image_ids;
+ bufferlist m_out_bl;
+
+ mutable ceph::mutex m_lock;
+
+ Context *m_on_init_finish = nullptr;
+
+ ImageIds m_image_ids;
+
+ bool m_pending_updates = false;
+ bool m_notify_listener_in_progress = false;
+ ImageIds m_pending_image_ids;
+ ImageIds m_pending_added_image_ids;
+ ImageIds m_pending_removed_image_ids;
+
+ MirroringWatcher *m_mirroring_watcher;
+
+ Context *m_timer_ctx = nullptr;
+
+ AsyncOpTracker m_async_op_tracker;
+ bool m_blocklisted = false;
+ bool m_shutting_down = false;
+ bool m_image_ids_invalid = true;
+ bool m_refresh_in_progress = false;
+ bool m_deferred_refresh = false;
+
+ void register_watcher();
+ void handle_register_watcher(int r);
+ void unregister_watcher();
+
+ void refresh_images();
+ void handle_refresh_images(int r);
+
+ void schedule_refresh_images(double interval);
+ void process_refresh_images();
+
+ void handle_rewatch_complete(int r);
+ void handle_image_updated(const std::string &image_id,
+ const std::string &global_image_id,
+ bool enabled);
+
+ void schedule_listener();
+ void notify_listener();
+
+};
+
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::PoolWatcher<librbd::ImageCtx>;
+
+#endif // CEPH_RBD_MIRROR_POOL_WATCHER_H
diff --git a/src/tools/rbd_mirror/ProgressContext.h b/src/tools/rbd_mirror/ProgressContext.h
new file mode 100644
index 000000000..e4430ee6a
--- /dev/null
+++ b/src/tools/rbd_mirror/ProgressContext.h
@@ -0,0 +1,21 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_PROGRESS_CONTEXT_H
+#define RBD_MIRROR_PROGRESS_CONTEXT_H
+
+namespace rbd {
+namespace mirror {
+
+class ProgressContext
+{
+public:
+ virtual ~ProgressContext() {}
+ virtual void update_progress(const std::string &description,
+ bool flush = true) = 0;
+};
+
+} // namespace mirror
+} // namespace rbd
+
+#endif // RBD_MIRROR_PROGRESS_CONTEXT_H
diff --git a/src/tools/rbd_mirror/RemotePoolPoller.cc b/src/tools/rbd_mirror/RemotePoolPoller.cc
new file mode 100644
index 000000000..8bfb35d4a
--- /dev/null
+++ b/src/tools/rbd_mirror/RemotePoolPoller.cc
@@ -0,0 +1,267 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "RemotePoolPoller.h"
+#include "include/ceph_assert.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "common/Timer.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/Utils.h"
+#include "librbd/asio/ContextWQ.h"
+#include "tools/rbd_mirror/Threads.h"
+#include "tools/rbd_mirror/Types.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::RemotePoolPoller: " << this << " " \
+ << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+
+static const double POLL_INTERVAL_SECONDS = 30;
+
+using librbd::util::create_rados_callback;
+
+template <typename I>
+RemotePoolPoller<I>::~RemotePoolPoller() {
+ ceph_assert(m_timer_task == nullptr);
+}
+
+template <typename I>
+void RemotePoolPoller<I>::init(Context* on_finish) {
+ dout(10) << dendl;
+
+ ceph_assert(m_state == STATE_INITIALIZING);
+ ceph_assert(m_on_finish == nullptr);
+ m_on_finish = on_finish;
+
+ get_mirror_uuid();
+}
+
+template <typename I>
+void RemotePoolPoller<I>::shut_down(Context* on_finish) {
+ dout(10) << dendl;
+
+ std::unique_lock locker(m_threads->timer_lock);
+ ceph_assert(m_state == STATE_POLLING);
+ m_state = STATE_SHUTTING_DOWN;
+
+ if (m_timer_task == nullptr) {
+ // currently executing a poll
+ ceph_assert(m_on_finish == nullptr);
+ m_on_finish = on_finish;
+ return;
+ }
+
+ m_threads->timer->cancel_event(m_timer_task);
+ m_timer_task = nullptr;
+ m_threads->work_queue->queue(on_finish, 0);
+}
+
+template <typename I>
+void RemotePoolPoller<I>::get_mirror_uuid() {
+ dout(10) << dendl;
+
+ librados::ObjectReadOperation op;
+ librbd::cls_client::mirror_uuid_get_start(&op);
+
+ auto aio_comp = create_rados_callback<
+ RemotePoolPoller<I>, &RemotePoolPoller<I>::handle_get_mirror_uuid>(this);
+ m_out_bl.clear();
+ int r = m_remote_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op, &m_out_bl);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void RemotePoolPoller<I>::handle_get_mirror_uuid(int r) {
+ dout(10) << "r=" << r << dendl;
+ std::string remote_mirror_uuid;
+ if (r >= 0) {
+ auto it = m_out_bl.cbegin();
+ r = librbd::cls_client::mirror_uuid_get_finish(&it, &remote_mirror_uuid);
+ if (r >= 0 && remote_mirror_uuid.empty()) {
+ r = -ENOENT;
+ }
+ }
+
+ if (r < 0) {
+ if (r == -ENOENT) {
+ dout(5) << "remote mirror uuid missing" << dendl;
+ } else {
+ derr << "failed to retrieve remote mirror uuid: " << cpp_strerror(r)
+ << dendl;
+ }
+
+ m_remote_pool_meta.mirror_uuid = "";
+ }
+
+ // if we have the mirror uuid, we will poll until shut down
+ if (m_state == STATE_INITIALIZING) {
+ if (r < 0) {
+ schedule_task(r);
+ return;
+ }
+
+ m_state = STATE_POLLING;
+ }
+
+ dout(10) << "remote_mirror_uuid=" << remote_mirror_uuid << dendl;
+ if (m_remote_pool_meta.mirror_uuid != remote_mirror_uuid) {
+ m_remote_pool_meta.mirror_uuid = remote_mirror_uuid;
+ m_updated = true;
+ }
+
+ mirror_peer_ping();
+}
+
+template <typename I>
+void RemotePoolPoller<I>::mirror_peer_ping() {
+ dout(10) << dendl;
+
+ librados::ObjectWriteOperation op;
+ librbd::cls_client::mirror_peer_ping(&op, m_site_name, m_local_mirror_uuid);
+
+ auto aio_comp = create_rados_callback<
+ RemotePoolPoller<I>, &RemotePoolPoller<I>::handle_mirror_peer_ping>(this);
+ int r = m_remote_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void RemotePoolPoller<I>::handle_mirror_peer_ping(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r == -EOPNOTSUPP) {
+ // older OSD that doesn't support snaphot-based mirroring, so no need
+ // to query remote peers
+ dout(10) << "remote peer does not support snapshot-based mirroring"
+ << dendl;
+ notify_listener();
+ return;
+ } else if (r < 0) {
+ // we can still see if we can perform a peer list and find outselves
+ derr << "failed to ping remote mirror peer: " << cpp_strerror(r) << dendl;
+ }
+
+ mirror_peer_list();
+}
+
+template <typename I>
+void RemotePoolPoller<I>::mirror_peer_list() {
+ dout(10) << dendl;
+
+ librados::ObjectReadOperation op;
+ librbd::cls_client::mirror_peer_list_start(&op);
+
+ auto aio_comp = create_rados_callback<
+ RemotePoolPoller<I>, &RemotePoolPoller<I>::handle_mirror_peer_list>(this);
+ m_out_bl.clear();
+ int r = m_remote_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op, &m_out_bl);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void RemotePoolPoller<I>::handle_mirror_peer_list(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ std::vector<cls::rbd::MirrorPeer> peers;
+ if (r == 0) {
+ auto iter = m_out_bl.cbegin();
+ r = librbd::cls_client::mirror_peer_list_finish(&iter, &peers);
+ }
+
+ if (r < 0) {
+ derr << "failed to retrieve mirror peers: " << cpp_strerror(r) << dendl;
+ }
+
+ cls::rbd::MirrorPeer* matched_peer = nullptr;
+ for (auto& peer : peers) {
+ if (peer.mirror_peer_direction == cls::rbd::MIRROR_PEER_DIRECTION_RX) {
+ continue;
+ }
+
+ if (peer.mirror_uuid == m_local_mirror_uuid) {
+ matched_peer = &peer;
+ break;
+ } else if (peer.site_name == m_site_name) {
+ // keep searching in case we hit an exact match by fsid
+ matched_peer = &peer;
+ }
+ }
+
+ // older OSDs don't support peer ping so we might fail to find a match,
+ // which will prevent snapshot mirroring from functioning
+ std::string remote_mirror_peer_uuid;
+ if (matched_peer != nullptr) {
+ remote_mirror_peer_uuid = matched_peer->uuid;
+ }
+
+ dout(10) << "remote_mirror_peer_uuid=" << remote_mirror_peer_uuid << dendl;
+ if (m_remote_pool_meta.mirror_peer_uuid != remote_mirror_peer_uuid) {
+ m_remote_pool_meta.mirror_peer_uuid = remote_mirror_peer_uuid;
+ m_updated = true;
+ }
+
+ notify_listener();
+}
+
+template <typename I>
+void RemotePoolPoller<I>::notify_listener() {
+ bool updated = false;
+ std::swap(updated, m_updated);
+ if (updated) {
+ dout(10) << dendl;
+ m_listener.handle_updated(m_remote_pool_meta);
+ }
+
+ schedule_task(0);
+}
+
+template <typename I>
+void RemotePoolPoller<I>::schedule_task(int r) {
+ std::unique_lock locker{m_threads->timer_lock};
+
+ if (m_state == STATE_POLLING) {
+ dout(10) << dendl;
+
+ ceph_assert(m_timer_task == nullptr);
+ m_timer_task = new LambdaContext([this](int) {
+ handle_task();
+ });
+
+ m_threads->timer->add_event_after(POLL_INTERVAL_SECONDS, m_timer_task);
+ }
+
+ // finish init or shut down callback
+ if (m_on_finish != nullptr) {
+ locker.unlock();
+ Context* on_finish = nullptr;
+ std::swap(on_finish, m_on_finish);
+ on_finish->complete(m_state == STATE_SHUTTING_DOWN ? 0 : r);
+ }
+}
+
+template <typename I>
+void RemotePoolPoller<I>::handle_task() {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked_by_me(m_threads->timer_lock));
+ m_timer_task = nullptr;
+
+ auto ctx = new LambdaContext([this](int) {
+ get_mirror_uuid();
+ });
+ m_threads->work_queue->queue(ctx);
+}
+
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::RemotePoolPoller<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/RemotePoolPoller.h b/src/tools/rbd_mirror/RemotePoolPoller.h
new file mode 100644
index 000000000..19d803ca1
--- /dev/null
+++ b/src/tools/rbd_mirror/RemotePoolPoller.h
@@ -0,0 +1,133 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_REMOTE_POOL_POLLER_H
+#define CEPH_RBD_MIRROR_REMOTE_POOL_POLLER_H
+
+#include "include/rados/librados.hpp"
+#include "tools/rbd_mirror/Types.h"
+#include <string>
+
+struct Context;
+namespace librbd { struct ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+
+template <typename> struct Threads;
+
+namespace remote_pool_poller {
+
+struct Listener {
+ virtual ~Listener() {}
+
+ virtual void handle_updated(const RemotePoolMeta& remote_pool_meta) = 0;
+};
+
+}; // namespace remote_pool_poller
+
+template <typename ImageCtxT>
+class RemotePoolPoller {
+public:
+ static RemotePoolPoller* create(
+ Threads<ImageCtxT>* threads,
+ librados::IoCtx& remote_io_ctx,
+ const std::string& site_name,
+ const std::string& local_mirror_uuid,
+ remote_pool_poller::Listener& listener) {
+ return new RemotePoolPoller(threads, remote_io_ctx, site_name,
+ local_mirror_uuid, listener);
+ }
+
+ RemotePoolPoller(
+ Threads<ImageCtxT>* threads,
+ librados::IoCtx& remote_io_ctx,
+ const std::string& site_name,
+ const std::string& local_mirror_uuid,
+ remote_pool_poller::Listener& listener)
+ : m_threads(threads),
+ m_remote_io_ctx(remote_io_ctx),
+ m_site_name(site_name),
+ m_local_mirror_uuid(local_mirror_uuid),
+ m_listener(listener) {
+ }
+ ~RemotePoolPoller();
+
+ void init(Context* on_finish);
+ void shut_down(Context* on_finish);
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * |/----------------------------\
+ * | |
+ * v |
+ * MIRROR_UUID_GET |
+ * | |
+ * v |
+ * MIRROR_PEER_PING |
+ * | |
+ * v |
+ * MIRROR_PEER_LIST |
+ * | |
+ * v |
+ * MIRROR_UUID_GET |
+ * | |
+ * v (skip if no changes) |
+ * NOTIFY_LISTENER |
+ * | |
+ * | (repeat periodically) |
+ * |\----------------------------/
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+
+ enum State {
+ STATE_INITIALIZING,
+ STATE_POLLING,
+ STATE_SHUTTING_DOWN
+ };
+
+ Threads<ImageCtxT>* m_threads;
+ librados::IoCtx& m_remote_io_ctx;
+ std::string m_site_name;
+ std::string m_local_mirror_uuid;
+ remote_pool_poller::Listener& m_listener;
+
+ bufferlist m_out_bl;
+
+ RemotePoolMeta m_remote_pool_meta;
+ bool m_updated = false;
+
+ State m_state = STATE_INITIALIZING;
+ Context* m_timer_task = nullptr;
+ Context* m_on_finish = nullptr;
+
+ void get_mirror_uuid();
+ void handle_get_mirror_uuid(int r);
+
+ void mirror_peer_ping();
+ void handle_mirror_peer_ping(int r);
+
+ void mirror_peer_list();
+ void handle_mirror_peer_list(int r);
+
+ void notify_listener();
+
+ void schedule_task(int r);
+ void handle_task();
+
+};
+
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::RemotePoolPoller<librbd::ImageCtx>;
+
+#endif // CEPH_RBD_MIRROR_REMOTE_POOL_POLLER_H
diff --git a/src/tools/rbd_mirror/ServiceDaemon.cc b/src/tools/rbd_mirror/ServiceDaemon.cc
new file mode 100644
index 000000000..f3cabcc87
--- /dev/null
+++ b/src/tools/rbd_mirror/ServiceDaemon.cc
@@ -0,0 +1,327 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd_mirror/ServiceDaemon.h"
+#include "include/Context.h"
+#include "include/stringify.h"
+#include "common/ceph_context.h"
+#include "common/config.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "common/Formatter.h"
+#include "common/Timer.h"
+#include "tools/rbd_mirror/Threads.h"
+#include <sstream>
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::ServiceDaemon: " << this << " " \
+ << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+
+namespace {
+
+const std::string RBD_MIRROR_AUTH_ID_PREFIX("rbd-mirror.");
+
+struct AttributeDumpVisitor : public boost::static_visitor<void> {
+ ceph::Formatter *f;
+ const std::string& name;
+
+ AttributeDumpVisitor(ceph::Formatter *f, const std::string& name)
+ : f(f), name(name) {
+ }
+
+ void operator()(bool val) const {
+ f->dump_bool(name.c_str(), val);
+ }
+ void operator()(uint64_t val) const {
+ f->dump_unsigned(name.c_str(), val);
+ }
+ void operator()(const std::string& val) const {
+ f->dump_string(name.c_str(), val);
+ }
+};
+
+} // anonymous namespace
+
+using namespace service_daemon;
+
+template <typename I>
+ServiceDaemon<I>::ServiceDaemon(CephContext *cct, RadosRef rados,
+ Threads<I>* threads)
+ : m_cct(cct), m_rados(rados), m_threads(threads) {
+ dout(20) << dendl;
+}
+
+template <typename I>
+ServiceDaemon<I>::~ServiceDaemon() {
+ dout(20) << dendl;
+ std::lock_guard timer_locker{m_threads->timer_lock};
+ if (m_timer_ctx != nullptr) {
+ m_threads->timer->cancel_event(m_timer_ctx);
+ update_status();
+ }
+}
+
+template <typename I>
+int ServiceDaemon<I>::init() {
+ dout(20) << dendl;
+
+ std::string id = m_cct->_conf->name.get_id();
+ if (id.find(RBD_MIRROR_AUTH_ID_PREFIX) == 0) {
+ id = id.substr(RBD_MIRROR_AUTH_ID_PREFIX.size());
+ }
+
+ std::string instance_id = stringify(m_rados->get_instance_id());
+ std::map<std::string, std::string> service_metadata = {
+ {"id", id}, {"instance_id", instance_id}};
+ int r = m_rados->service_daemon_register("rbd-mirror", instance_id,
+ service_metadata);
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+template <typename I>
+void ServiceDaemon<I>::add_pool(int64_t pool_id, const std::string& pool_name) {
+ dout(20) << "pool_id=" << pool_id << ", pool_name=" << pool_name << dendl;
+
+ {
+ std::lock_guard locker{m_lock};
+ m_pools.insert({pool_id, {pool_name}});
+ }
+ schedule_update_status();
+}
+
+template <typename I>
+void ServiceDaemon<I>::remove_pool(int64_t pool_id) {
+ dout(20) << "pool_id=" << pool_id << dendl;
+ {
+ std::lock_guard locker{m_lock};
+ m_pools.erase(pool_id);
+ }
+ schedule_update_status();
+}
+
+template <typename I>
+void ServiceDaemon<I>::add_namespace(int64_t pool_id,
+ const std::string& namespace_name) {
+ dout(20) << "pool_id=" << pool_id << ", namespace=" << namespace_name
+ << dendl;
+
+ std::lock_guard locker{m_lock};
+ auto pool_it = m_pools.find(pool_id);
+ if (pool_it == m_pools.end()) {
+ return;
+ }
+ pool_it->second.ns_attributes[namespace_name];
+
+ // don't schedule update status as the namespace attributes are empty yet
+}
+
+template <typename I>
+void ServiceDaemon<I>::remove_namespace(int64_t pool_id,
+ const std::string& namespace_name) {
+ dout(20) << "pool_id=" << pool_id << ", namespace=" << namespace_name
+ << dendl;
+ {
+ std::lock_guard locker{m_lock};
+ auto pool_it = m_pools.find(pool_id);
+ if (pool_it == m_pools.end()) {
+ return;
+ }
+ pool_it->second.ns_attributes.erase(namespace_name);
+ }
+ schedule_update_status();
+}
+
+template <typename I>
+uint64_t ServiceDaemon<I>::add_or_update_callout(int64_t pool_id,
+ uint64_t callout_id,
+ CalloutLevel callout_level,
+ const std::string& text) {
+ dout(20) << "pool_id=" << pool_id << ", "
+ << "callout_id=" << callout_id << ", "
+ << "callout_level=" << callout_level << ", "
+ << "text=" << text << dendl;
+
+ {
+ std::lock_guard locker{m_lock};
+ auto pool_it = m_pools.find(pool_id);
+ if (pool_it == m_pools.end()) {
+ return CALLOUT_ID_NONE;
+ }
+
+ if (callout_id == CALLOUT_ID_NONE) {
+ callout_id = ++m_callout_id;
+ }
+ pool_it->second.callouts[callout_id] = {callout_level, text};
+ }
+
+ schedule_update_status();
+ return callout_id;
+}
+
+template <typename I>
+void ServiceDaemon<I>::remove_callout(int64_t pool_id, uint64_t callout_id) {
+ dout(20) << "pool_id=" << pool_id << ", "
+ << "callout_id=" << callout_id << dendl;
+
+ {
+ std::lock_guard locker{m_lock};
+ auto pool_it = m_pools.find(pool_id);
+ if (pool_it == m_pools.end()) {
+ return;
+ }
+ pool_it->second.callouts.erase(callout_id);
+ }
+
+ schedule_update_status();
+}
+
+template <typename I>
+void ServiceDaemon<I>::add_or_update_attribute(int64_t pool_id,
+ const std::string& key,
+ const AttributeValue& value) {
+ dout(20) << "pool_id=" << pool_id << ", "
+ << "key=" << key << ", "
+ << "value=" << value << dendl;
+
+ {
+ std::lock_guard locker{m_lock};
+ auto pool_it = m_pools.find(pool_id);
+ if (pool_it == m_pools.end()) {
+ return;
+ }
+ pool_it->second.attributes[key] = value;
+ }
+
+ schedule_update_status();
+}
+
+template <typename I>
+void ServiceDaemon<I>::add_or_update_namespace_attribute(
+ int64_t pool_id, const std::string& namespace_name, const std::string& key,
+ const AttributeValue& value) {
+ if (namespace_name.empty()) {
+ add_or_update_attribute(pool_id, key, value);
+ return;
+ }
+
+ dout(20) << "pool_id=" << pool_id << ", "
+ << "namespace=" << namespace_name << ", "
+ << "key=" << key << ", "
+ << "value=" << value << dendl;
+
+ {
+ std::lock_guard locker{m_lock};
+ auto pool_it = m_pools.find(pool_id);
+ if (pool_it == m_pools.end()) {
+ return;
+ }
+
+ auto ns_it = pool_it->second.ns_attributes.find(namespace_name);
+ if (ns_it == pool_it->second.ns_attributes.end()) {
+ return;
+ }
+
+ ns_it->second[key] = value;
+ }
+
+ schedule_update_status();
+}
+
+template <typename I>
+void ServiceDaemon<I>::remove_attribute(int64_t pool_id,
+ const std::string& key) {
+ dout(20) << "pool_id=" << pool_id << ", "
+ << "key=" << key << dendl;
+
+ {
+ std::lock_guard locker{m_lock};
+ auto pool_it = m_pools.find(pool_id);
+ if (pool_it == m_pools.end()) {
+ return;
+ }
+ pool_it->second.attributes.erase(key);
+ }
+
+ schedule_update_status();
+}
+
+template <typename I>
+void ServiceDaemon<I>::schedule_update_status() {
+ std::lock_guard timer_locker{m_threads->timer_lock};
+ if (m_timer_ctx != nullptr) {
+ return;
+ }
+
+ m_timer_ctx = new LambdaContext([this](int) {
+ m_timer_ctx = nullptr;
+ update_status();
+ });
+ m_threads->timer->add_event_after(1, m_timer_ctx);
+}
+
+template <typename I>
+void ServiceDaemon<I>::update_status() {
+ dout(20) << dendl;
+ ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock));
+
+ ceph::JSONFormatter f;
+ {
+ std::lock_guard locker{m_lock};
+ f.open_object_section("pools");
+ for (auto& pool_pair : m_pools) {
+ f.open_object_section(stringify(pool_pair.first).c_str());
+ f.dump_string("name", pool_pair.second.name);
+ f.open_object_section("callouts");
+ for (auto& callout : pool_pair.second.callouts) {
+ f.open_object_section(stringify(callout.first).c_str());
+ f.dump_string("level", stringify(callout.second.level).c_str());
+ f.dump_string("text", callout.second.text.c_str());
+ f.close_section();
+ }
+ f.close_section(); // callouts
+
+ for (auto& attribute : pool_pair.second.attributes) {
+ AttributeDumpVisitor attribute_dump_visitor(&f, attribute.first);
+ boost::apply_visitor(attribute_dump_visitor, attribute.second);
+ }
+
+ if (!pool_pair.second.ns_attributes.empty()) {
+ f.open_object_section("namespaces");
+ for (auto& [ns, attributes] : pool_pair.second.ns_attributes) {
+ f.open_object_section(ns.c_str());
+ for (auto& [key, value] : attributes) {
+ AttributeDumpVisitor attribute_dump_visitor(&f, key);
+ boost::apply_visitor(attribute_dump_visitor, value);
+ }
+ f.close_section(); // namespace
+ }
+ f.close_section(); // namespaces
+ }
+ f.close_section(); // pool
+ }
+ f.close_section(); // pools
+ }
+
+ std::stringstream ss;
+ f.flush(ss);
+
+ int r = m_rados->service_daemon_update_status({{"json", ss.str()}});
+ if (r < 0) {
+ derr << "failed to update service daemon status: " << cpp_strerror(r)
+ << dendl;
+ }
+}
+
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::ServiceDaemon<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/ServiceDaemon.h b/src/tools/rbd_mirror/ServiceDaemon.h
new file mode 100644
index 000000000..8b1e0f584
--- /dev/null
+++ b/src/tools/rbd_mirror/ServiceDaemon.h
@@ -0,0 +1,94 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_SERVICE_DAEMON_H
+#define CEPH_RBD_MIRROR_SERVICE_DAEMON_H
+
+#include "common/ceph_mutex.h"
+#include "include/common_fwd.h"
+#include "tools/rbd_mirror/Types.h"
+#include "tools/rbd_mirror/service_daemon/Types.h"
+#include <map>
+#include <string>
+
+struct Context;
+namespace librbd { struct ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+
+template <typename> struct Threads;
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class ServiceDaemon {
+public:
+ ServiceDaemon(CephContext *cct, RadosRef rados, Threads<ImageCtxT>* threads);
+ ~ServiceDaemon();
+
+ int init();
+
+ void add_pool(int64_t pool_id, const std::string& pool_name);
+ void remove_pool(int64_t pool_id);
+
+ void add_namespace(int64_t pool_id, const std::string& namespace_name);
+ void remove_namespace(int64_t pool_id, const std::string& namespace_name);
+
+ uint64_t add_or_update_callout(int64_t pool_id, uint64_t callout_id,
+ service_daemon::CalloutLevel callout_level,
+ const std::string& text);
+ void remove_callout(int64_t pool_id, uint64_t callout_id);
+
+ void add_or_update_attribute(int64_t pool_id, const std::string& key,
+ const service_daemon::AttributeValue& value);
+ void add_or_update_namespace_attribute(
+ int64_t pool_id, const std::string& namespace_name,
+ const std::string& key, const service_daemon::AttributeValue& value);
+ void remove_attribute(int64_t pool_id, const std::string& key);
+
+private:
+ struct Callout {
+ service_daemon::CalloutLevel level;
+ std::string text;
+
+ Callout() : level(service_daemon::CALLOUT_LEVEL_INFO) {
+ }
+ Callout(service_daemon::CalloutLevel level, const std::string& text)
+ : level(level), text(text) {
+ }
+ };
+ typedef std::map<uint64_t, Callout> Callouts;
+ typedef std::map<std::string, service_daemon::AttributeValue> Attributes;
+ typedef std::map<std::string, Attributes> NamespaceAttributes;
+
+ struct Pool {
+ std::string name;
+ Callouts callouts;
+ Attributes attributes;
+ NamespaceAttributes ns_attributes;
+
+ Pool(const std::string& name) : name(name) {
+ }
+ };
+
+ typedef std::map<int64_t, Pool> Pools;
+
+ CephContext *m_cct;
+ RadosRef m_rados;
+ Threads<ImageCtxT>* m_threads;
+
+ ceph::mutex m_lock = ceph::make_mutex("rbd::mirror::ServiceDaemon");
+ Pools m_pools;
+ uint64_t m_callout_id = service_daemon::CALLOUT_ID_NONE;
+
+ Context* m_timer_ctx = nullptr;
+
+ void schedule_update_status();
+ void update_status();
+};
+
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::ServiceDaemon<librbd::ImageCtx>;
+
+#endif // CEPH_RBD_MIRROR_SERVICE_DAEMON_H
diff --git a/src/tools/rbd_mirror/Threads.cc b/src/tools/rbd_mirror/Threads.cc
new file mode 100644
index 000000000..b0c762641
--- /dev/null
+++ b/src/tools/rbd_mirror/Threads.cc
@@ -0,0 +1,38 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd_mirror/Threads.h"
+#include "common/Timer.h"
+#include "librbd/AsioEngine.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/asio/ContextWQ.h"
+
+namespace rbd {
+namespace mirror {
+
+template <typename I>
+Threads<I>::Threads(std::shared_ptr<librados::Rados>& rados) {
+ auto cct = static_cast<CephContext*>(rados->cct());
+ asio_engine = new librbd::AsioEngine(rados);
+ work_queue = asio_engine->get_work_queue();
+
+ timer = new SafeTimer(cct, timer_lock, true);
+ timer->init();
+}
+
+template <typename I>
+Threads<I>::~Threads() {
+ {
+ std::lock_guard timer_locker{timer_lock};
+ timer->shutdown();
+ }
+ delete timer;
+
+ work_queue->drain();
+ delete asio_engine;
+}
+
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::Threads<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/Threads.h b/src/tools/rbd_mirror/Threads.h
new file mode 100644
index 000000000..35c0b0f1c
--- /dev/null
+++ b/src/tools/rbd_mirror/Threads.h
@@ -0,0 +1,45 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_THREADS_H
+#define CEPH_RBD_MIRROR_THREADS_H
+
+#include "include/common_fwd.h"
+#include "include/rados/librados_fwd.hpp"
+#include "common/ceph_mutex.h"
+#include "common/Timer.h"
+#include <memory>
+
+class ThreadPool;
+
+namespace librbd {
+struct AsioEngine;
+struct ImageCtx;
+namespace asio { struct ContextWQ; }
+} // namespace librbd
+
+namespace rbd {
+namespace mirror {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class Threads {
+public:
+ librbd::AsioEngine* asio_engine = nullptr;
+ librbd::asio::ContextWQ* work_queue = nullptr;
+
+ SafeTimer *timer = nullptr;
+ ceph::mutex timer_lock = ceph::make_mutex("Threads::timer_lock");
+
+ explicit Threads(std::shared_ptr<librados::Rados>& rados);
+ Threads(const Threads&) = delete;
+ Threads& operator=(const Threads&) = delete;
+
+ ~Threads();
+};
+
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::Threads<librbd::ImageCtx>;
+
+#endif // CEPH_RBD_MIRROR_THREADS_H
diff --git a/src/tools/rbd_mirror/Throttler.cc b/src/tools/rbd_mirror/Throttler.cc
new file mode 100644
index 000000000..07d6e397e
--- /dev/null
+++ b/src/tools/rbd_mirror/Throttler.cc
@@ -0,0 +1,240 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2016 SUSE LINUX GmbH
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include "Throttler.h"
+#include "common/Formatter.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "librbd/Utils.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::Throttler:: " << this \
+ << " " << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+
+template <typename I>
+Throttler<I>::Throttler(CephContext *cct, const std::string &config_key)
+ : m_cct(cct), m_config_key(config_key),
+ m_config_keys{m_config_key.c_str(), nullptr},
+ m_lock(ceph::make_mutex(
+ librbd::util::unique_lock_name("rbd::mirror::Throttler", this))),
+ m_max_concurrent_ops(cct->_conf.get_val<uint64_t>(m_config_key)) {
+ dout(20) << m_config_key << "=" << m_max_concurrent_ops << dendl;
+ m_cct->_conf.add_observer(this);
+}
+
+template <typename I>
+Throttler<I>::~Throttler() {
+ m_cct->_conf.remove_observer(this);
+
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_inflight_ops.empty());
+ ceph_assert(m_queue.empty());
+}
+
+template <typename I>
+void Throttler<I>::start_op(const std::string &ns,
+ const std::string &id_,
+ Context *on_start) {
+ Id id{ns, id_};
+
+ dout(20) << "id=" << id << dendl;
+
+ int r = 0;
+ {
+ std::lock_guard locker{m_lock};
+
+ if (m_inflight_ops.count(id) > 0) {
+ dout(20) << "duplicate for already started op " << id << dendl;
+ } else if (m_queued_ops.count(id) > 0) {
+ dout(20) << "duplicate for already queued op " << id << dendl;
+ std::swap(m_queued_ops[id], on_start);
+ r = -ENOENT;
+ } else if (m_max_concurrent_ops == 0 ||
+ m_inflight_ops.size() < m_max_concurrent_ops) {
+ ceph_assert(m_queue.empty());
+ m_inflight_ops.insert(id);
+ dout(20) << "ready to start op for " << id << " ["
+ << m_inflight_ops.size() << "/" << m_max_concurrent_ops << "]"
+ << dendl;
+ } else {
+ m_queue.push_back(id);
+ std::swap(m_queued_ops[id], on_start);
+ dout(20) << "op for " << id << " has been queued" << dendl;
+ }
+ }
+
+ if (on_start != nullptr) {
+ on_start->complete(r);
+ }
+}
+
+template <typename I>
+bool Throttler<I>::cancel_op(const std::string &ns,
+ const std::string &id_) {
+ Id id{ns, id_};
+
+ dout(20) << "id=" << id << dendl;
+
+ Context *on_start = nullptr;
+ {
+ std::lock_guard locker{m_lock};
+ auto it = m_queued_ops.find(id);
+ if (it != m_queued_ops.end()) {
+ dout(20) << "canceled queued op for " << id << dendl;
+ m_queue.remove(id);
+ on_start = it->second;
+ m_queued_ops.erase(it);
+ }
+ }
+
+ if (on_start == nullptr) {
+ return false;
+ }
+
+ on_start->complete(-ECANCELED);
+ return true;
+}
+
+template <typename I>
+void Throttler<I>::finish_op(const std::string &ns,
+ const std::string &id_) {
+ Id id{ns, id_};
+
+ dout(20) << "id=" << id << dendl;
+
+ if (cancel_op(ns, id_)) {
+ return;
+ }
+
+ Context *on_start = nullptr;
+ {
+ std::lock_guard locker{m_lock};
+
+ m_inflight_ops.erase(id);
+
+ if (m_inflight_ops.size() < m_max_concurrent_ops && !m_queue.empty()) {
+ auto id = m_queue.front();
+ auto it = m_queued_ops.find(id);
+ ceph_assert(it != m_queued_ops.end());
+ m_inflight_ops.insert(id);
+ dout(20) << "ready to start op for " << id << " ["
+ << m_inflight_ops.size() << "/" << m_max_concurrent_ops << "]"
+ << dendl;
+ on_start = it->second;
+ m_queued_ops.erase(it);
+ m_queue.pop_front();
+ }
+ }
+
+ if (on_start != nullptr) {
+ on_start->complete(0);
+ }
+}
+
+template <typename I>
+void Throttler<I>::drain(const std::string &ns, int r) {
+ dout(20) << "ns=" << ns << dendl;
+
+ std::map<Id, Context *> queued_ops;
+ {
+ std::lock_guard locker{m_lock};
+ for (auto it = m_queued_ops.begin(); it != m_queued_ops.end(); ) {
+ if (it->first.first == ns) {
+ queued_ops[it->first] = it->second;
+ m_queue.remove(it->first);
+ it = m_queued_ops.erase(it);
+ } else {
+ it++;
+ }
+ }
+ for (auto it = m_inflight_ops.begin(); it != m_inflight_ops.end(); ) {
+ if (it->first == ns) {
+ dout(20) << "inflight_op " << *it << dendl;
+ it = m_inflight_ops.erase(it);
+ } else {
+ it++;
+ }
+ }
+ }
+
+ for (auto &it : queued_ops) {
+ dout(20) << "queued_op " << it.first << dendl;
+ it.second->complete(r);
+ }
+}
+
+template <typename I>
+void Throttler<I>::set_max_concurrent_ops(uint32_t max) {
+ dout(20) << "max=" << max << dendl;
+
+ std::list<Context *> ops;
+ {
+ std::lock_guard locker{m_lock};
+ m_max_concurrent_ops = max;
+
+ // Start waiting ops in the case of available free slots
+ while ((m_max_concurrent_ops == 0 ||
+ m_inflight_ops.size() < m_max_concurrent_ops) &&
+ !m_queue.empty()) {
+ auto id = m_queue.front();
+ m_inflight_ops.insert(id);
+ dout(20) << "ready to start op for " << id << " ["
+ << m_inflight_ops.size() << "/" << m_max_concurrent_ops << "]"
+ << dendl;
+ auto it = m_queued_ops.find(id);
+ ceph_assert(it != m_queued_ops.end());
+ ops.push_back(it->second);
+ m_queued_ops.erase(it);
+ m_queue.pop_front();
+ }
+ }
+
+ for (const auto& ctx : ops) {
+ ctx->complete(0);
+ }
+}
+
+template <typename I>
+void Throttler<I>::print_status(ceph::Formatter *f) {
+ dout(20) << dendl;
+
+ std::lock_guard locker{m_lock};
+
+ f->dump_int("max_parallel_requests", m_max_concurrent_ops);
+ f->dump_int("running_requests", m_inflight_ops.size());
+ f->dump_int("waiting_requests", m_queue.size());
+}
+
+template <typename I>
+const char** Throttler<I>::get_tracked_conf_keys() const {
+ return m_config_keys;
+}
+
+template <typename I>
+void Throttler<I>::handle_conf_change(const ConfigProxy& conf,
+ const std::set<std::string> &changed) {
+ if (changed.count(m_config_key)) {
+ set_max_concurrent_ops(conf.get_val<uint64_t>(m_config_key));
+ }
+}
+
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::Throttler<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/Throttler.h b/src/tools/rbd_mirror/Throttler.h
new file mode 100644
index 000000000..32080238a
--- /dev/null
+++ b/src/tools/rbd_mirror/Throttler.h
@@ -0,0 +1,74 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_THROTTLER_H
+#define RBD_MIRROR_THROTTLER_H
+
+#include <list>
+#include <map>
+#include <set>
+#include <sstream>
+#include <string>
+#include <utility>
+
+#include "common/ceph_mutex.h"
+#include "common/config_obs.h"
+#include "include/common_fwd.h"
+
+class Context;
+
+namespace ceph { class Formatter; }
+namespace librbd { class ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class Throttler : public md_config_obs_t {
+public:
+ static Throttler *create(
+ CephContext *cct,
+ const std::string &config_key) {
+ return new Throttler(cct, config_key);
+ }
+ void destroy() {
+ delete this;
+ }
+
+ Throttler(CephContext *cct,
+ const std::string &config_key);
+ ~Throttler() override;
+
+ void set_max_concurrent_ops(uint32_t max);
+ void start_op(const std::string &ns, const std::string &id,
+ Context *on_start);
+ bool cancel_op(const std::string &ns, const std::string &id);
+ void finish_op(const std::string &ns, const std::string &id);
+ void drain(const std::string &ns, int r);
+
+ void print_status(ceph::Formatter *f);
+
+private:
+ typedef std::pair<std::string, std::string> Id;
+
+ CephContext *m_cct;
+ const std::string m_config_key;
+ mutable const char* m_config_keys[2];
+
+ ceph::mutex m_lock;
+ uint32_t m_max_concurrent_ops;
+ std::list<Id> m_queue;
+ std::map<Id, Context *> m_queued_ops;
+ std::set<Id> m_inflight_ops;
+
+ const char **get_tracked_conf_keys() const override;
+ void handle_conf_change(const ConfigProxy& conf,
+ const std::set<std::string> &changed) override;
+};
+
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::Throttler<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_THROTTLER_H
diff --git a/src/tools/rbd_mirror/Types.cc b/src/tools/rbd_mirror/Types.cc
new file mode 100644
index 000000000..cd71c73b1
--- /dev/null
+++ b/src/tools/rbd_mirror/Types.cc
@@ -0,0 +1,32 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd_mirror/Types.h"
+
+namespace rbd {
+namespace mirror {
+
+std::ostream &operator<<(std::ostream &os, const ImageId &image_id) {
+ return os << "global id=" << image_id.global_id << ", "
+ << "id=" << image_id.id;
+}
+
+std::ostream& operator<<(std::ostream& lhs,
+ const LocalPoolMeta& rhs) {
+ return lhs << "mirror_uuid=" << rhs.mirror_uuid;
+}
+
+std::ostream& operator<<(std::ostream& lhs,
+ const RemotePoolMeta& rhs) {
+ return lhs << "mirror_uuid=" << rhs.mirror_uuid << ", "
+ "mirror_peer_uuid=" << rhs.mirror_peer_uuid;
+}
+
+std::ostream& operator<<(std::ostream& lhs, const PeerSpec &peer) {
+ return lhs << "uuid: " << peer.uuid
+ << " cluster: " << peer.cluster_name
+ << " client: " << peer.client_name;
+}
+
+} // namespace mirror
+} // namespace rbd
diff --git a/src/tools/rbd_mirror/Types.h b/src/tools/rbd_mirror/Types.h
new file mode 100644
index 000000000..9bba58fb1
--- /dev/null
+++ b/src/tools/rbd_mirror/Types.h
@@ -0,0 +1,171 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_TYPES_H
+#define CEPH_RBD_MIRROR_TYPES_H
+
+#include <iostream>
+#include <memory>
+#include <set>
+#include <string>
+#include <vector>
+
+#include "include/rados/librados.hpp"
+#include "include/rbd/librbd.hpp"
+
+namespace rbd {
+namespace mirror {
+
+template <typename> struct MirrorStatusUpdater;
+
+// Performance counters
+enum {
+ l_rbd_mirror_journal_first = 27000,
+ l_rbd_mirror_journal_entries,
+ l_rbd_mirror_journal_replay_bytes,
+ l_rbd_mirror_journal_replay_latency,
+ l_rbd_mirror_journal_last,
+ l_rbd_mirror_snapshot_first,
+ l_rbd_mirror_snapshot_snapshots,
+ l_rbd_mirror_snapshot_sync_time,
+ l_rbd_mirror_snapshot_sync_bytes,
+ // per-image only counters below
+ l_rbd_mirror_snapshot_remote_timestamp,
+ l_rbd_mirror_snapshot_local_timestamp,
+ l_rbd_mirror_snapshot_last_sync_time,
+ l_rbd_mirror_snapshot_last_sync_bytes,
+ l_rbd_mirror_snapshot_last,
+};
+
+typedef std::shared_ptr<librados::Rados> RadosRef;
+typedef std::shared_ptr<librados::IoCtx> IoCtxRef;
+typedef std::shared_ptr<librbd::Image> ImageRef;
+
+struct ImageId {
+ std::string global_id;
+ std::string id;
+
+ explicit ImageId(const std::string &global_id) : global_id(global_id) {
+ }
+ ImageId(const std::string &global_id, const std::string &id)
+ : global_id(global_id), id(id) {
+ }
+
+ inline bool operator==(const ImageId &rhs) const {
+ return (global_id == rhs.global_id && id == rhs.id);
+ }
+ inline bool operator<(const ImageId &rhs) const {
+ return global_id < rhs.global_id;
+ }
+};
+
+std::ostream &operator<<(std::ostream &, const ImageId &image_id);
+
+typedef std::set<ImageId> ImageIds;
+
+struct LocalPoolMeta {
+ LocalPoolMeta() {}
+ LocalPoolMeta(const std::string& mirror_uuid)
+ : mirror_uuid(mirror_uuid) {
+ }
+
+ std::string mirror_uuid;
+};
+
+std::ostream& operator<<(std::ostream& lhs,
+ const LocalPoolMeta& local_pool_meta);
+
+struct RemotePoolMeta {
+ RemotePoolMeta() {}
+ RemotePoolMeta(const std::string& mirror_uuid,
+ const std::string& mirror_peer_uuid)
+ : mirror_uuid(mirror_uuid),
+ mirror_peer_uuid(mirror_peer_uuid) {
+ }
+
+ std::string mirror_uuid;
+ std::string mirror_peer_uuid;
+};
+
+std::ostream& operator<<(std::ostream& lhs,
+ const RemotePoolMeta& remote_pool_meta);
+
+template <typename I>
+struct Peer {
+ std::string uuid;
+ mutable librados::IoCtx io_ctx;
+ RemotePoolMeta remote_pool_meta;
+ MirrorStatusUpdater<I>* mirror_status_updater = nullptr;
+
+ Peer() {
+ }
+ Peer(const std::string& uuid,
+ librados::IoCtx& io_ctx,
+ const RemotePoolMeta& remote_pool_meta,
+ MirrorStatusUpdater<I>* mirror_status_updater)
+ : io_ctx(io_ctx),
+ remote_pool_meta(remote_pool_meta),
+ mirror_status_updater(mirror_status_updater) {
+ }
+
+ inline bool operator<(const Peer &rhs) const {
+ return uuid < rhs.uuid;
+ }
+};
+
+template <typename I>
+std::ostream& operator<<(std::ostream& lhs, const Peer<I>& peer) {
+ return lhs << peer.remote_pool_meta;
+}
+
+struct PeerSpec {
+ PeerSpec() = default;
+ PeerSpec(const std::string &uuid, const std::string &cluster_name,
+ const std::string &client_name)
+ : uuid(uuid), cluster_name(cluster_name), client_name(client_name)
+ {
+ }
+ PeerSpec(const librbd::mirror_peer_site_t &peer) :
+ uuid(peer.uuid),
+ cluster_name(peer.site_name),
+ client_name(peer.client_name)
+ {
+ }
+
+ std::string uuid;
+ std::string cluster_name;
+ std::string client_name;
+
+ /// optional config properties
+ std::string mon_host;
+ std::string key;
+
+ bool operator==(const PeerSpec& rhs) const {
+ return (uuid == rhs.uuid &&
+ cluster_name == rhs.cluster_name &&
+ client_name == rhs.client_name &&
+ mon_host == rhs.mon_host &&
+ key == rhs.key);
+ }
+ bool operator<(const PeerSpec& rhs) const {
+ if (uuid != rhs.uuid) {
+ return uuid < rhs.uuid;
+ } else if (cluster_name != rhs.cluster_name) {
+ return cluster_name < rhs.cluster_name;
+ } else if (client_name != rhs.client_name) {
+ return client_name < rhs.client_name;
+ } else if (mon_host < rhs.mon_host) {
+ return mon_host < rhs.mon_host;
+ } else {
+ return key < rhs.key;
+ }
+ }
+};
+
+std::ostream& operator<<(std::ostream& lhs, const PeerSpec &peer);
+
+} // namespace mirror
+} // namespace rbd
+
+
+#endif // CEPH_RBD_MIRROR_TYPES_H
diff --git a/src/tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.cc b/src/tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.cc
new file mode 100644
index 000000000..19a98804c
--- /dev/null
+++ b/src/tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.cc
@@ -0,0 +1,299 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "librbd/ExclusiveLock.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/Operations.h"
+#include "librbd/Utils.h"
+#include "librbd/journal/Policy.h"
+#include "tools/rbd_mirror/image_deleter/Types.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_deleter::SnapshotPurgeRequest: " \
+ << this << " " << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+namespace image_deleter {
+
+using librbd::util::create_context_callback;
+
+template <typename I>
+void SnapshotPurgeRequest<I>::send() {
+ open_image();
+}
+
+template <typename I>
+void SnapshotPurgeRequest<I>::open_image() {
+ dout(10) << dendl;
+ m_image_ctx = I::create("", m_image_id, nullptr, m_io_ctx, false);
+
+ // ensure non-primary images can be modified
+ m_image_ctx->read_only_mask &= ~librbd::IMAGE_READ_ONLY_FLAG_NON_PRIMARY;
+
+ {
+ std::unique_lock image_locker{m_image_ctx->image_lock};
+ m_image_ctx->set_journal_policy(new JournalPolicy());
+ }
+
+ Context *ctx = create_context_callback<
+ SnapshotPurgeRequest<I>, &SnapshotPurgeRequest<I>::handle_open_image>(
+ this);
+ m_image_ctx->state->open(librbd::OPEN_FLAG_SKIP_OPEN_PARENT, ctx);
+}
+
+template <typename I>
+void SnapshotPurgeRequest<I>::handle_open_image(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to open image '" << m_image_id << "': " << cpp_strerror(r)
+ << dendl;
+ m_image_ctx = nullptr;
+
+ finish(r);
+ return;
+ }
+
+ acquire_lock();
+}
+
+template <typename I>
+void SnapshotPurgeRequest<I>::acquire_lock() {
+ dout(10) << dendl;
+
+ m_image_ctx->owner_lock.lock_shared();
+ if (m_image_ctx->exclusive_lock == nullptr) {
+ m_image_ctx->owner_lock.unlock_shared();
+
+ start_snap_unprotect();
+ return;
+ }
+
+ m_image_ctx->exclusive_lock->acquire_lock(create_context_callback<
+ SnapshotPurgeRequest<I>, &SnapshotPurgeRequest<I>::handle_acquire_lock>(
+ this));
+ m_image_ctx->owner_lock.unlock_shared();
+}
+
+template <typename I>
+void SnapshotPurgeRequest<I>::handle_acquire_lock(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to acquire exclusive lock: " << cpp_strerror(r) << dendl;
+ m_ret_val = r;
+ close_image();
+ return;
+ }
+
+ start_snap_unprotect();
+}
+
+template <typename I>
+void SnapshotPurgeRequest<I>::start_snap_unprotect() {
+ dout(10) << dendl;
+
+ {
+ std::shared_lock image_locker{m_image_ctx->image_lock};
+ m_snaps = m_image_ctx->snaps;
+ }
+ snap_unprotect();
+}
+
+template <typename I>
+void SnapshotPurgeRequest<I>::snap_unprotect() {
+ if (m_snaps.empty()) {
+ close_image();
+ return;
+ }
+
+ librados::snap_t snap_id = m_snaps.back();
+ m_image_ctx->image_lock.lock_shared();
+ int r = m_image_ctx->get_snap_namespace(snap_id, &m_snap_namespace);
+ if (r < 0) {
+ m_image_ctx->image_lock.unlock_shared();
+
+ derr << "failed to get snap namespace: " << cpp_strerror(r) << dendl;
+ m_ret_val = r;
+ close_image();
+ return;
+ }
+
+ r = m_image_ctx->get_snap_name(snap_id, &m_snap_name);
+ if (r < 0) {
+ m_image_ctx->image_lock.unlock_shared();
+
+ derr << "failed to get snap name: " << cpp_strerror(r) << dendl;
+ m_ret_val = r;
+ close_image();
+ return;
+ }
+
+ bool is_protected;
+ r = m_image_ctx->is_snap_protected(snap_id, &is_protected);
+ if (r < 0) {
+ m_image_ctx->image_lock.unlock_shared();
+
+ derr << "failed to get snap protection status: " << cpp_strerror(r)
+ << dendl;
+ m_ret_val = r;
+ close_image();
+ return;
+ }
+ m_image_ctx->image_lock.unlock_shared();
+
+ if (!is_protected) {
+ snap_remove();
+ return;
+ }
+
+ dout(10) << "snap_id=" << snap_id << ", "
+ << "snap_namespace=" << m_snap_namespace << ", "
+ << "snap_name=" << m_snap_name << dendl;
+
+ auto finish_op_ctx = start_lock_op(&r);
+ if (finish_op_ctx == nullptr) {
+ derr << "lost exclusive lock" << dendl;
+ m_ret_val = r;
+ close_image();
+ return;
+ }
+
+ auto ctx = new LambdaContext([this, finish_op_ctx](int r) {
+ handle_snap_unprotect(r);
+ finish_op_ctx->complete(0);
+ });
+ std::shared_lock owner_locker{m_image_ctx->owner_lock};
+ m_image_ctx->operations->execute_snap_unprotect(
+ m_snap_namespace, m_snap_name.c_str(), ctx);
+}
+
+template <typename I>
+void SnapshotPurgeRequest<I>::handle_snap_unprotect(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r == -EBUSY) {
+ dout(10) << "snapshot in-use" << dendl;
+ m_ret_val = r;
+ close_image();
+ return;
+ } else if (r < 0) {
+ derr << "failed to unprotect snapshot: " << cpp_strerror(r) << dendl;
+ m_ret_val = r;
+ close_image();
+ return;
+ }
+
+ {
+ // avoid the need to refresh to delete the newly unprotected snapshot
+ std::shared_lock image_locker{m_image_ctx->image_lock};
+ librados::snap_t snap_id = m_snaps.back();
+ auto snap_info_it = m_image_ctx->snap_info.find(snap_id);
+ if (snap_info_it != m_image_ctx->snap_info.end()) {
+ snap_info_it->second.protection_status =
+ RBD_PROTECTION_STATUS_UNPROTECTED;
+ }
+ }
+
+ snap_remove();
+}
+
+template <typename I>
+void SnapshotPurgeRequest<I>::snap_remove() {
+ librados::snap_t snap_id = m_snaps.back();
+ dout(10) << "snap_id=" << snap_id << ", "
+ << "snap_namespace=" << m_snap_namespace << ", "
+ << "snap_name=" << m_snap_name << dendl;
+
+ int r;
+ auto finish_op_ctx = start_lock_op(&r);
+ if (finish_op_ctx == nullptr) {
+ derr << "lost exclusive lock" << dendl;
+ m_ret_val = r;
+ close_image();
+ return;
+ }
+
+ auto ctx = new LambdaContext([this, finish_op_ctx](int r) {
+ handle_snap_remove(r);
+ finish_op_ctx->complete(0);
+ });
+ std::shared_lock owner_locker{m_image_ctx->owner_lock};
+ m_image_ctx->operations->execute_snap_remove(
+ m_snap_namespace, m_snap_name.c_str(), ctx);
+}
+
+template <typename I>
+void SnapshotPurgeRequest<I>::handle_snap_remove(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r == -EBUSY) {
+ dout(10) << "snapshot in-use" << dendl;
+ m_ret_val = r;
+ close_image();
+ return;
+ } else if (r < 0) {
+ derr << "failed to remove snapshot: " << cpp_strerror(r) << dendl;
+ m_ret_val = r;
+ close_image();
+ return;
+ }
+
+ m_snaps.pop_back();
+ snap_unprotect();
+}
+
+template <typename I>
+void SnapshotPurgeRequest<I>::close_image() {
+ dout(10) << dendl;
+
+ m_image_ctx->state->close(create_context_callback<
+ SnapshotPurgeRequest<I>,
+ &SnapshotPurgeRequest<I>::handle_close_image>(this));
+}
+
+template <typename I>
+void SnapshotPurgeRequest<I>::handle_close_image(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ m_image_ctx = nullptr;
+
+ if (r < 0) {
+ derr << "failed to close: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+ finish(0);
+}
+
+template <typename I>
+void SnapshotPurgeRequest<I>::finish(int r) {
+ if (m_ret_val < 0) {
+ r = m_ret_val;
+ }
+
+ m_on_finish->complete(r);
+ delete this;
+}
+
+template <typename I>
+Context *SnapshotPurgeRequest<I>::start_lock_op(int* r) {
+ std::shared_lock owner_locker{m_image_ctx->owner_lock};
+ if (m_image_ctx->exclusive_lock == nullptr) {
+ return new LambdaContext([](int r) {});
+ }
+ return m_image_ctx->exclusive_lock->start_op(r);
+}
+
+} // namespace image_deleter
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_deleter::SnapshotPurgeRequest<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.h b/src/tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.h
new file mode 100644
index 000000000..70cae8518
--- /dev/null
+++ b/src/tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.h
@@ -0,0 +1,105 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_DELETER_SNAPSHOT_PURGE_REQUEST_H
+#define CEPH_RBD_MIRROR_IMAGE_DELETER_SNAPSHOT_PURGE_REQUEST_H
+
+#include "include/rados/librados.hpp"
+#include "cls/rbd/cls_rbd_types.h"
+#include <string>
+#include <vector>
+
+class Context;
+namespace librbd { struct ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+namespace image_deleter {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class SnapshotPurgeRequest {
+public:
+ static SnapshotPurgeRequest* create(librados::IoCtx &io_ctx,
+ const std::string &image_id,
+ Context *on_finish) {
+ return new SnapshotPurgeRequest(io_ctx, image_id, on_finish);
+ }
+
+ SnapshotPurgeRequest(librados::IoCtx &io_ctx, const std::string &image_id,
+ Context *on_finish)
+ : m_io_ctx(io_ctx), m_image_id(image_id), m_on_finish(on_finish) {
+ }
+
+ void send();
+
+private:
+ /*
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * OPEN_IMAGE
+ * |
+ * v
+ * ACQUIRE_LOCK
+ * |
+ * | (repeat for each snapshot)
+ * |/------------------------\
+ * | |
+ * v (skip if not needed) |
+ * SNAP_UNPROTECT |
+ * | |
+ * v (skip if not needed) |
+ * SNAP_REMOVE -----------------/
+ * |
+ * v
+ * CLOSE_IMAGE
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+
+ librados::IoCtx &m_io_ctx;
+ std::string m_image_id;
+ Context *m_on_finish;
+
+ ImageCtxT *m_image_ctx = nullptr;
+ int m_ret_val = 0;
+
+ std::vector<librados::snap_t> m_snaps;
+ cls::rbd::SnapshotNamespace m_snap_namespace;
+ std::string m_snap_name;
+
+ void open_image();
+ void handle_open_image(int r);
+
+ void acquire_lock();
+ void handle_acquire_lock(int r);
+
+ void start_snap_unprotect();
+ void snap_unprotect();
+ void handle_snap_unprotect(int r);
+
+ void snap_remove();
+ void handle_snap_remove(int r);
+
+ void close_image();
+ void handle_close_image(int r);
+
+ void finish(int r);
+
+ Context *start_lock_op(int* r);
+
+};
+
+} // namespace image_deleter
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_deleter::SnapshotPurgeRequest<librbd::ImageCtx>;
+
+#endif // CEPH_RBD_MIRROR_IMAGE_DELETER_SNAPSHOT_PURGE_REQUEST_H
+
diff --git a/src/tools/rbd_mirror/image_deleter/TrashMoveRequest.cc b/src/tools/rbd_mirror/image_deleter/TrashMoveRequest.cc
new file mode 100644
index 000000000..e53923ef3
--- /dev/null
+++ b/src/tools/rbd_mirror/image_deleter/TrashMoveRequest.cc
@@ -0,0 +1,419 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd_mirror/image_deleter/TrashMoveRequest.h"
+#include "include/rbd_types.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "common/WorkQueue.h"
+#include "librbd/ExclusiveLock.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/Journal.h"
+#include "librbd/TrashWatcher.h"
+#include "librbd/Utils.h"
+#include "librbd/asio/ContextWQ.h"
+#include "librbd/journal/ResetRequest.h"
+#include "librbd/mirror/ImageRemoveRequest.h"
+#include "librbd/mirror/GetInfoRequest.h"
+#include "librbd/trash/MoveRequest.h"
+#include "tools/rbd_mirror/image_deleter/Types.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_deleter::TrashMoveRequest: " \
+ << this << " " << __func__ << ": "
+namespace rbd {
+namespace mirror {
+namespace image_deleter {
+
+using librbd::util::create_context_callback;
+using librbd::util::create_rados_callback;
+
+template <typename I>
+void TrashMoveRequest<I>::send() {
+ get_mirror_image_id();
+}
+
+template <typename I>
+void TrashMoveRequest<I>::get_mirror_image_id() {
+ dout(10) << dendl;
+
+ librados::ObjectReadOperation op;
+ librbd::cls_client::mirror_image_get_image_id_start(&op, m_global_image_id);
+
+ auto aio_comp = create_rados_callback<
+ TrashMoveRequest<I>,
+ &TrashMoveRequest<I>::handle_get_mirror_image_id>(this);
+ m_out_bl.clear();
+ int r = m_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op, &m_out_bl);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void TrashMoveRequest<I>::handle_get_mirror_image_id(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r == 0) {
+ auto bl_it = m_out_bl.cbegin();
+ r = librbd::cls_client::mirror_image_get_image_id_finish(&bl_it,
+ &m_image_id);
+ }
+ if (r == -ENOENT) {
+ dout(10) << "image " << m_global_image_id << " is not mirrored" << dendl;
+ finish(r);
+ return;
+ } else if (r < 0) {
+ derr << "error retrieving local id for image " << m_global_image_id << ": "
+ << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ get_mirror_info();
+}
+
+template <typename I>
+void TrashMoveRequest<I>::get_mirror_info() {
+ dout(10) << dendl;
+
+ auto ctx = create_context_callback<
+ TrashMoveRequest<I>, &TrashMoveRequest<I>::handle_get_mirror_info>(this);
+ auto req = librbd::mirror::GetInfoRequest<I>::create(
+ m_io_ctx, m_op_work_queue, m_image_id, &m_mirror_image, &m_promotion_state,
+ &m_primary_mirror_uuid, ctx);
+ req->send();
+}
+
+template <typename I>
+void TrashMoveRequest<I>::handle_get_mirror_info(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r == -ENOENT) {
+ dout(5) << "image " << m_global_image_id << " is not mirrored" << dendl;
+ finish(r);
+ return;
+ } else if (r < 0) {
+ derr << "error retrieving image primary info for image "
+ << m_global_image_id << ": " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ if (m_promotion_state == librbd::mirror::PROMOTION_STATE_PRIMARY) {
+ dout(10) << "image " << m_global_image_id << " is local primary" << dendl;
+ finish(-EPERM);
+ return;
+ } else if (m_promotion_state == librbd::mirror::PROMOTION_STATE_ORPHAN &&
+ !m_resync) {
+ dout(10) << "image " << m_global_image_id << " is orphaned" << dendl;
+ finish(-EPERM);
+ return;
+ }
+
+ disable_mirror_image();
+}
+
+template <typename I>
+void TrashMoveRequest<I>::disable_mirror_image() {
+ dout(10) << dendl;
+
+ m_mirror_image.state = cls::rbd::MIRROR_IMAGE_STATE_DISABLING;
+
+ librados::ObjectWriteOperation op;
+ librbd::cls_client::mirror_image_set(&op, m_image_id, m_mirror_image);
+
+ auto aio_comp = create_rados_callback<
+ TrashMoveRequest<I>,
+ &TrashMoveRequest<I>::handle_disable_mirror_image>(this);
+ int r = m_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void TrashMoveRequest<I>::handle_disable_mirror_image(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r == -ENOENT) {
+ dout(10) << "local image is not mirrored, aborting deletion." << dendl;
+ finish(r);
+ return;
+ } else if (r == -EEXIST || r == -EINVAL) {
+ derr << "cannot disable mirroring for image " << m_global_image_id
+ << ": global_image_id has changed/reused: "
+ << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ } else if (r < 0) {
+ derr << "cannot disable mirroring for image " << m_global_image_id
+ << ": " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ open_image();
+}
+
+template <typename I>
+void TrashMoveRequest<I>::open_image() {
+ dout(10) << dendl;
+
+ m_image_ctx = I::create("", m_image_id, nullptr, m_io_ctx, false);
+
+ // ensure non-primary images can be modified
+ m_image_ctx->read_only_mask &= ~librbd::IMAGE_READ_ONLY_FLAG_NON_PRIMARY;
+
+ {
+ // don't attempt to open the journal
+ std::unique_lock image_locker{m_image_ctx->image_lock};
+ m_image_ctx->set_journal_policy(new JournalPolicy());
+ }
+
+ Context *ctx = create_context_callback<
+ TrashMoveRequest<I>, &TrashMoveRequest<I>::handle_open_image>(this);
+ m_image_ctx->state->open(librbd::OPEN_FLAG_SKIP_OPEN_PARENT, ctx);
+}
+
+template <typename I>
+void TrashMoveRequest<I>::handle_open_image(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r == -ENOENT) {
+ dout(5) << "mirror image does not exist, removing orphaned metadata" << dendl;
+ m_image_ctx = nullptr;
+ remove_mirror_image();
+ return;
+ }
+
+ if (r < 0) {
+ derr << "failed to open image: " << cpp_strerror(r) << dendl;
+ m_image_ctx = nullptr;
+ finish(r);
+ return;
+ }
+
+ if (m_image_ctx->old_format) {
+ derr << "cannot move v1 image to trash" << dendl;
+ m_ret_val = -EINVAL;
+ close_image();
+ return;
+ }
+
+ reset_journal();
+}
+
+template <typename I>
+void TrashMoveRequest<I>::reset_journal() {
+ if (m_mirror_image.mode == cls::rbd::MIRROR_IMAGE_MODE_SNAPSHOT) {
+ // snapshot-based mirroring doesn't require journal feature
+ acquire_lock();
+ return;
+ }
+
+ dout(10) << dendl;
+
+ // TODO use Journal thread pool for journal ops until converted to ASIO
+ ContextWQ* context_wq;
+ librbd::Journal<>::get_work_queue(
+ reinterpret_cast<CephContext*>(m_io_ctx.cct()), &context_wq);
+
+ // ensure that if the image is recovered any peers will split-brain
+ auto ctx = create_context_callback<
+ TrashMoveRequest<I>, &TrashMoveRequest<I>::handle_reset_journal>(this);
+ auto req = librbd::journal::ResetRequest<I>::create(
+ m_io_ctx, m_image_id, librbd::Journal<>::IMAGE_CLIENT_ID,
+ librbd::Journal<>::LOCAL_MIRROR_UUID, context_wq, ctx);
+ req->send();
+}
+
+template <typename I>
+void TrashMoveRequest<I>::handle_reset_journal(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0 && r != -ENOENT) {
+ derr << "failed to reset journal: " << cpp_strerror(r) << dendl;
+ m_ret_val = r;
+ close_image();
+ return;
+ }
+
+ acquire_lock();
+}
+
+template <typename I>
+void TrashMoveRequest<I>::acquire_lock() {
+ m_image_ctx->owner_lock.lock_shared();
+ if (m_image_ctx->exclusive_lock == nullptr) {
+ m_image_ctx->owner_lock.unlock_shared();
+
+ if (m_mirror_image.mode == cls::rbd::MIRROR_IMAGE_MODE_SNAPSHOT) {
+ // snapshot-based mirroring doesn't require exclusive-lock
+ trash_move();
+ } else {
+ derr << "exclusive lock feature not enabled" << dendl;
+ m_ret_val = -EINVAL;
+ close_image();
+ }
+ return;
+ }
+
+ dout(10) << dendl;
+
+ Context *ctx = create_context_callback<
+ TrashMoveRequest<I>, &TrashMoveRequest<I>::handle_acquire_lock>(this);
+ m_image_ctx->exclusive_lock->block_requests(0);
+ m_image_ctx->exclusive_lock->acquire_lock(ctx);
+ m_image_ctx->owner_lock.unlock_shared();
+}
+
+template <typename I>
+void TrashMoveRequest<I>::handle_acquire_lock(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to acquire exclusive lock: " << cpp_strerror(r) << dendl;
+ m_ret_val = r;
+ close_image();
+ return;
+ }
+
+ trash_move();
+}
+
+template <typename I>
+void TrashMoveRequest<I>::trash_move() {
+ dout(10) << dendl;
+
+ utime_t delete_time{ceph_clock_now()};
+ utime_t deferment_end_time{delete_time};
+ deferment_end_time +=
+ m_image_ctx->config.template get_val<uint64_t>("rbd_mirroring_delete_delay");
+
+ m_trash_image_spec = {
+ cls::rbd::TRASH_IMAGE_SOURCE_MIRRORING, m_image_ctx->name, delete_time,
+ deferment_end_time};
+
+ Context *ctx = create_context_callback<
+ TrashMoveRequest<I>, &TrashMoveRequest<I>::handle_trash_move>(this);
+ auto req = librbd::trash::MoveRequest<I>::create(
+ m_io_ctx, m_image_id, m_trash_image_spec, ctx);
+ req->send();
+}
+
+template <typename I>
+void TrashMoveRequest<I>::handle_trash_move(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to move image to trash: " << cpp_strerror(r) << dendl;
+ m_ret_val = r;
+ close_image();
+ return;
+ }
+
+ m_moved_to_trash = true;
+ remove_mirror_image();
+}
+
+template <typename I>
+void TrashMoveRequest<I>::remove_mirror_image() {
+ dout(10) << dendl;
+
+ auto ctx = create_context_callback<
+ TrashMoveRequest<I>,
+ &TrashMoveRequest<I>::handle_remove_mirror_image>(this);
+ auto req = librbd::mirror::ImageRemoveRequest<I>::create(
+ m_io_ctx, m_global_image_id, m_image_id, ctx);
+ req->send();
+}
+
+template <typename I>
+void TrashMoveRequest<I>::handle_remove_mirror_image(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r == -ENOENT) {
+ dout(10) << "local image is not mirrored" << dendl;
+ } else if (r < 0) {
+ derr << "failed to remove mirror image state for " << m_global_image_id
+ << ": " << cpp_strerror(r) << dendl;
+ m_ret_val = r;
+ }
+
+ close_image();
+}
+
+template <typename I>
+void TrashMoveRequest<I>::close_image() {
+ dout(10) << dendl;
+
+ if (m_image_ctx == nullptr) {
+ handle_close_image(0);
+ return;
+ }
+ Context *ctx = create_context_callback<
+ TrashMoveRequest<I>, &TrashMoveRequest<I>::handle_close_image>(this);
+ m_image_ctx->state->close(ctx);
+}
+
+template <typename I>
+void TrashMoveRequest<I>::handle_close_image(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ m_image_ctx = nullptr;
+
+ if (r < 0) {
+ derr << "failed to close image: " << cpp_strerror(r) << dendl;
+ }
+
+ // don't send notification if we failed
+ if (!m_moved_to_trash) {
+ finish(0);
+ return;
+ }
+
+ notify_trash_add();
+}
+
+template <typename I>
+void TrashMoveRequest<I>::notify_trash_add() {
+ dout(10) << dendl;
+
+ Context *ctx = create_context_callback<
+ TrashMoveRequest<I>, &TrashMoveRequest<I>::handle_notify_trash_add>(this);
+ librbd::TrashWatcher<I>::notify_image_added(m_io_ctx, m_image_id,
+ m_trash_image_spec, ctx);
+}
+
+template <typename I>
+void TrashMoveRequest<I>::handle_notify_trash_add(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to notify trash watchers: " << cpp_strerror(r) << dendl;
+ }
+
+ finish(0);
+}
+
+template <typename I>
+void TrashMoveRequest<I>::finish(int r) {
+ if (m_ret_val < 0) {
+ r = m_ret_val;
+ }
+
+ dout(10) << "r=" << r << dendl;
+
+ m_on_finish->complete(r);
+ delete this;
+}
+
+} // namespace image_deleter
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_deleter::TrashMoveRequest<librbd::ImageCtx>;
+
diff --git a/src/tools/rbd_mirror/image_deleter/TrashMoveRequest.h b/src/tools/rbd_mirror/image_deleter/TrashMoveRequest.h
new file mode 100644
index 000000000..5b3f02519
--- /dev/null
+++ b/src/tools/rbd_mirror/image_deleter/TrashMoveRequest.h
@@ -0,0 +1,142 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_DELETE_TRASH_MOVE_REQUEST_H
+#define CEPH_RBD_MIRROR_IMAGE_DELETE_TRASH_MOVE_REQUEST_H
+
+#include "include/buffer.h"
+#include "include/rados/librados.hpp"
+#include "cls/rbd/cls_rbd_types.h"
+#include "librbd/mirror/Types.h"
+#include <string>
+
+struct Context;
+namespace librbd {
+struct ImageCtx;
+namespace asio { struct ContextWQ; }
+} // namespace librbd
+
+namespace rbd {
+namespace mirror {
+namespace image_deleter {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class TrashMoveRequest {
+public:
+ static TrashMoveRequest* create(librados::IoCtx& io_ctx,
+ const std::string& global_image_id,
+ bool resync,
+ librbd::asio::ContextWQ* op_work_queue,
+ Context* on_finish) {
+ return new TrashMoveRequest(io_ctx, global_image_id, resync, op_work_queue,
+ on_finish);
+ }
+
+ TrashMoveRequest(librados::IoCtx& io_ctx, const std::string& global_image_id,
+ bool resync, librbd::asio::ContextWQ* op_work_queue,
+ Context* on_finish)
+ : m_io_ctx(io_ctx), m_global_image_id(global_image_id), m_resync(resync),
+ m_op_work_queue(op_work_queue), m_on_finish(on_finish) {
+ }
+
+ void send();
+
+private:
+ /*
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * GET_MIRROR_IMAGE_ID
+ * |
+ * v
+ * GET_MIRROR_INFO
+ * |
+ * v
+ * DISABLE_MIRROR_IMAGE
+ * |
+ * v
+ * OPEN_IMAGE
+ * |
+ * v (skip if not needed)
+ * RESET_JOURNAL
+ * |
+ * v (skip if not needed)
+ * ACQUIRE_LOCK
+ * |
+ * v
+ * TRASH_MOVE
+ * |
+ * v
+ * REMOVE_MIRROR_IMAGE
+ * |
+ * v
+ * CLOSE_IMAGE
+ * |
+ * v
+ * NOTIFY_TRASH_ADD
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+
+ librados::IoCtx &m_io_ctx;
+ std::string m_global_image_id;
+ bool m_resync;
+ librbd::asio::ContextWQ *m_op_work_queue;
+ Context *m_on_finish;
+
+ ceph::bufferlist m_out_bl;
+ std::string m_image_id;
+ cls::rbd::MirrorImage m_mirror_image;
+ librbd::mirror::PromotionState m_promotion_state;
+ std::string m_primary_mirror_uuid;
+ cls::rbd::TrashImageSpec m_trash_image_spec;
+ ImageCtxT *m_image_ctx = nullptr;;
+ int m_ret_val = 0;
+ bool m_moved_to_trash = false;
+
+ void get_mirror_image_id();
+ void handle_get_mirror_image_id(int r);
+
+ void get_mirror_info();
+ void handle_get_mirror_info(int r);
+
+ void disable_mirror_image();
+ void handle_disable_mirror_image(int r);
+
+ void open_image();
+ void handle_open_image(int r);
+
+ void reset_journal();
+ void handle_reset_journal(int r);
+
+ void acquire_lock();
+ void handle_acquire_lock(int r);
+
+ void trash_move();
+ void handle_trash_move(int r);
+
+ void remove_mirror_image();
+ void handle_remove_mirror_image(int r);
+
+ void close_image();
+ void handle_close_image(int r);
+
+ void notify_trash_add();
+ void handle_notify_trash_add(int r);
+
+ void finish(int r);
+
+};
+
+} // namespace image_deleter
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_deleter::TrashMoveRequest<librbd::ImageCtx>;
+
+#endif // CEPH_RBD_MIRROR_IMAGE_DELETE_TRASH_WATCHER_H
diff --git a/src/tools/rbd_mirror/image_deleter/TrashRemoveRequest.cc b/src/tools/rbd_mirror/image_deleter/TrashRemoveRequest.cc
new file mode 100644
index 000000000..4d7c1c9df
--- /dev/null
+++ b/src/tools/rbd_mirror/image_deleter/TrashRemoveRequest.cc
@@ -0,0 +1,265 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd_mirror/image_deleter/TrashRemoveRequest.h"
+#include "include/ceph_assert.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "common/WorkQueue.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/TrashWatcher.h"
+#include "librbd/Utils.h"
+#include "librbd/asio/ContextWQ.h"
+#include "librbd/trash/RemoveRequest.h"
+#include "tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_deleter::TrashRemoveRequest: " \
+ << this << " " << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+namespace image_deleter {
+
+using librbd::util::create_context_callback;
+using librbd::util::create_rados_callback;
+
+template <typename I>
+void TrashRemoveRequest<I>::send() {
+ *m_error_result = ERROR_RESULT_RETRY;
+
+ get_trash_image_spec();
+}
+
+template <typename I>
+void TrashRemoveRequest<I>::get_trash_image_spec() {
+ dout(10) << dendl;
+
+ librados::ObjectReadOperation op;
+ librbd::cls_client::trash_get_start(&op, m_image_id);
+
+ auto aio_comp = create_rados_callback<
+ TrashRemoveRequest<I>,
+ &TrashRemoveRequest<I>::handle_get_trash_image_spec>(this);
+ m_out_bl.clear();
+ int r = m_io_ctx.aio_operate(RBD_TRASH, aio_comp, &op, &m_out_bl);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void TrashRemoveRequest<I>::handle_get_trash_image_spec(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r == 0) {
+ auto bl_it = m_out_bl.cbegin();
+ r = librbd::cls_client::trash_get_finish(&bl_it, &m_trash_image_spec);
+ }
+
+ if (r == -ENOENT || (r >= 0 && m_trash_image_spec.source !=
+ cls::rbd::TRASH_IMAGE_SOURCE_MIRRORING)) {
+ dout(10) << "image id " << m_image_id << " not in mirroring trash" << dendl;
+ finish(0);
+ return;
+ } else if (r < 0) {
+ derr << "error getting image id " << m_image_id << " info from trash: "
+ << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ if (m_trash_image_spec.state != cls::rbd::TRASH_IMAGE_STATE_NORMAL &&
+ m_trash_image_spec.state != cls::rbd::TRASH_IMAGE_STATE_REMOVING) {
+ dout(10) << "image " << m_image_id << " is not in an expected trash state: "
+ << m_trash_image_spec.state << dendl;
+ *m_error_result = ERROR_RESULT_RETRY_IMMEDIATELY;
+ finish(-EBUSY);
+ return;
+ }
+
+ set_trash_state();
+}
+
+template <typename I>
+void TrashRemoveRequest<I>::set_trash_state() {
+ if (m_trash_image_spec.state == cls::rbd::TRASH_IMAGE_STATE_REMOVING) {
+ get_snap_context();
+ return;
+ }
+
+ dout(10) << dendl;
+
+ librados::ObjectWriteOperation op;
+ librbd::cls_client::trash_state_set(&op, m_image_id,
+ cls::rbd::TRASH_IMAGE_STATE_REMOVING,
+ cls::rbd::TRASH_IMAGE_STATE_NORMAL);
+
+ auto aio_comp = create_rados_callback<
+ TrashRemoveRequest<I>,
+ &TrashRemoveRequest<I>::handle_set_trash_state>(this);
+ int r = m_io_ctx.aio_operate(RBD_TRASH, aio_comp, &op);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void TrashRemoveRequest<I>::handle_set_trash_state(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r == -ENOENT) {
+ dout(10) << "image id " << m_image_id << " not in mirroring trash" << dendl;
+ finish(0);
+ return;
+ } else if (r < 0 && r != -EOPNOTSUPP) {
+ derr << "error setting trash image state for image id " << m_image_id
+ << ": " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ get_snap_context();
+}
+
+template <typename I>
+void TrashRemoveRequest<I>::get_snap_context() {
+ dout(10) << dendl;
+
+ librados::ObjectReadOperation op;
+ librbd::cls_client::get_snapcontext_start(&op);
+
+ std::string header_oid = librbd::util::header_name(m_image_id);
+
+ auto aio_comp = create_rados_callback<
+ TrashRemoveRequest<I>,
+ &TrashRemoveRequest<I>::handle_get_snap_context>(this);
+ m_out_bl.clear();
+ int r = m_io_ctx.aio_operate(header_oid, aio_comp, &op, &m_out_bl);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void TrashRemoveRequest<I>::handle_get_snap_context(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ ::SnapContext snapc;
+ if (r == 0) {
+ auto bl_it = m_out_bl.cbegin();
+ r = librbd::cls_client::get_snapcontext_finish(&bl_it, &snapc);
+ }
+ if (r < 0 && r != -ENOENT) {
+ derr << "error retrieving snapshot context for image "
+ << m_image_id << ": " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ m_has_snapshots = (!snapc.empty());
+ purge_snapshots();
+}
+
+template <typename I>
+void TrashRemoveRequest<I>::purge_snapshots() {
+ if (!m_has_snapshots) {
+ remove_image();
+ return;
+ }
+
+ dout(10) << dendl;
+ auto ctx = create_context_callback<
+ TrashRemoveRequest<I>,
+ &TrashRemoveRequest<I>::handle_purge_snapshots>(this);
+ auto req = SnapshotPurgeRequest<I>::create(m_io_ctx, m_image_id, ctx);
+ req->send();
+}
+
+template <typename I>
+void TrashRemoveRequest<I>::handle_purge_snapshots(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r == -EBUSY) {
+ dout(10) << "snapshots still in-use" << dendl;
+ *m_error_result = ERROR_RESULT_RETRY_IMMEDIATELY;
+ finish(r);
+ return;
+ } else if (r < 0) {
+ derr << "failed to purge image snapshots: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ remove_image();
+}
+
+template <typename I>
+void TrashRemoveRequest<I>::remove_image() {
+ dout(10) << dendl;
+
+ auto ctx = create_context_callback<
+ TrashRemoveRequest<I>,
+ &TrashRemoveRequest<I>::handle_remove_image>(this);
+ auto req = librbd::trash::RemoveRequest<I>::create(
+ m_io_ctx, m_image_id, m_op_work_queue, true, m_progress_ctx,
+ ctx);
+ req->send();
+}
+
+template <typename I>
+void TrashRemoveRequest<I>::handle_remove_image(int r) {
+ dout(10) << "r=" << r << dendl;
+ if (r == -ENOTEMPTY) {
+ // image must have clone v2 snapshot still associated to child
+ dout(10) << "snapshots still in-use" << dendl;
+ *m_error_result = ERROR_RESULT_RETRY_IMMEDIATELY;
+ finish(-EBUSY);
+ return;
+ }
+
+ if (r < 0 && r != -ENOENT) {
+ derr << "error removing image " << m_image_id << " "
+ << "(" << m_image_id << ") from local pool: "
+ << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ notify_trash_removed();
+}
+
+template <typename I>
+void TrashRemoveRequest<I>::notify_trash_removed() {
+ dout(10) << dendl;
+
+ Context *ctx = create_context_callback<
+ TrashRemoveRequest<I>,
+ &TrashRemoveRequest<I>::handle_notify_trash_removed>(this);
+ librbd::TrashWatcher<I>::notify_image_removed(m_io_ctx, m_image_id, ctx);
+}
+
+template <typename I>
+void TrashRemoveRequest<I>::handle_notify_trash_removed(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to notify trash watchers: " << cpp_strerror(r) << dendl;
+ }
+
+ finish(0);
+}
+
+template <typename I>
+void TrashRemoveRequest<I>::finish(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ m_on_finish->complete(r);
+ delete this;
+}
+
+} // namespace image_deleter
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_deleter::TrashRemoveRequest<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_deleter/TrashRemoveRequest.h b/src/tools/rbd_mirror/image_deleter/TrashRemoveRequest.h
new file mode 100644
index 000000000..b99736b33
--- /dev/null
+++ b/src/tools/rbd_mirror/image_deleter/TrashRemoveRequest.h
@@ -0,0 +1,117 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_DELETER_TRASH_REMOVE_REQUEST_H
+#define CEPH_RBD_MIRROR_IMAGE_DELETER_TRASH_REMOVE_REQUEST_H
+
+#include "include/rados/librados.hpp"
+#include "include/buffer.h"
+#include "cls/rbd/cls_rbd_types.h"
+#include "librbd/internal.h"
+#include "tools/rbd_mirror/image_deleter/Types.h"
+#include <string>
+#include <vector>
+
+class Context;
+class ContextWQ;
+namespace librbd {
+struct ImageCtx;
+namespace asio { struct ContextWQ; }
+} // namespace librbd
+
+namespace rbd {
+namespace mirror {
+namespace image_deleter {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class TrashRemoveRequest {
+public:
+ static TrashRemoveRequest* create(librados::IoCtx &io_ctx,
+ const std::string &image_id,
+ ErrorResult *error_result,
+ librbd::asio::ContextWQ *op_work_queue,
+ Context *on_finish) {
+ return new TrashRemoveRequest(io_ctx, image_id, error_result, op_work_queue,
+ on_finish);
+ }
+
+ TrashRemoveRequest(librados::IoCtx &io_ctx, const std::string &image_id,
+ ErrorResult *error_result,
+ librbd::asio::ContextWQ *op_work_queue,
+ Context *on_finish)
+ : m_io_ctx(io_ctx), m_image_id(image_id), m_error_result(error_result),
+ m_op_work_queue(op_work_queue), m_on_finish(on_finish) {
+ }
+
+ void send();
+
+private:
+ /*
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * GET_TRASH_IMAGE_SPEC
+ * |
+ * v
+ * SET_TRASH_STATE
+ * |
+ * v
+ * GET_SNAP_CONTEXT
+ * |
+ * v
+ * PURGE_SNAPSHOTS
+ * |
+ * v
+ * TRASH_REMOVE
+ * |
+ * v
+ * NOTIFY_TRASH_REMOVE
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+
+ librados::IoCtx &m_io_ctx;
+ std::string m_image_id;
+ ErrorResult *m_error_result;
+ librbd::asio::ContextWQ *m_op_work_queue;
+ Context *m_on_finish;
+
+ ceph::bufferlist m_out_bl;
+ cls::rbd::TrashImageSpec m_trash_image_spec;
+ bool m_has_snapshots = false;
+ librbd::NoOpProgressContext m_progress_ctx;
+
+ void get_trash_image_spec();
+ void handle_get_trash_image_spec(int r);
+
+ void set_trash_state();
+ void handle_set_trash_state(int r);
+
+ void get_snap_context();
+ void handle_get_snap_context(int r);
+
+ void purge_snapshots();
+ void handle_purge_snapshots(int r);
+
+ void remove_image();
+ void handle_remove_image(int r);
+
+ void notify_trash_removed();
+ void handle_notify_trash_removed(int r);
+
+ void finish(int r);
+
+};
+
+} // namespace image_deleter
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_deleter::TrashRemoveRequest<librbd::ImageCtx>;
+
+#endif // CEPH_RBD_MIRROR_IMAGE_DELETER_TRASH_REMOVE_REQUEST_H
diff --git a/src/tools/rbd_mirror/image_deleter/TrashWatcher.cc b/src/tools/rbd_mirror/image_deleter/TrashWatcher.cc
new file mode 100644
index 000000000..552d77e0e
--- /dev/null
+++ b/src/tools/rbd_mirror/image_deleter/TrashWatcher.cc
@@ -0,0 +1,384 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd_mirror/image_deleter/TrashWatcher.h"
+#include "include/rbd_types.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "common/Timer.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/Utils.h"
+#include "librbd/asio/ContextWQ.h"
+#include "tools/rbd_mirror/Threads.h"
+#include "tools/rbd_mirror/image_deleter/Types.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_deleter::TrashWatcher: " \
+ << this << " " << __func__ << ": "
+
+using librbd::util::create_context_callback;
+using librbd::util::create_rados_callback;
+
+namespace rbd {
+namespace mirror {
+namespace image_deleter {
+
+namespace {
+
+const size_t MAX_RETURN = 1024;
+
+} // anonymous namespace
+
+template <typename I>
+TrashWatcher<I>::TrashWatcher(librados::IoCtx &io_ctx, Threads<I> *threads,
+ TrashListener& trash_listener)
+ : librbd::TrashWatcher<I>(io_ctx, threads->work_queue),
+ m_io_ctx(io_ctx), m_threads(threads), m_trash_listener(trash_listener),
+ m_lock(ceph::make_mutex(librbd::util::unique_lock_name(
+ "rbd::mirror::image_deleter::TrashWatcher", this))) {
+}
+
+template <typename I>
+void TrashWatcher<I>::init(Context *on_finish) {
+ dout(5) << dendl;
+
+ {
+ std::lock_guard locker{m_lock};
+ m_on_init_finish = on_finish;
+
+ ceph_assert(!m_trash_list_in_progress);
+ m_trash_list_in_progress = true;
+ }
+
+ create_trash();
+}
+
+template <typename I>
+void TrashWatcher<I>::shut_down(Context *on_finish) {
+ dout(5) << dendl;
+
+ {
+ std::scoped_lock locker{m_threads->timer_lock, m_lock};
+
+ ceph_assert(!m_shutting_down);
+ m_shutting_down = true;
+ if (m_timer_ctx != nullptr) {
+ m_threads->timer->cancel_event(m_timer_ctx);
+ m_timer_ctx = nullptr;
+ }
+ }
+
+ auto ctx = new LambdaContext([this, on_finish](int r) {
+ unregister_watcher(on_finish);
+ });
+ m_async_op_tracker.wait_for_ops(ctx);
+}
+
+template <typename I>
+void TrashWatcher<I>::handle_image_added(const std::string &image_id,
+ const cls::rbd::TrashImageSpec& spec) {
+ dout(10) << "image_id=" << image_id << dendl;
+
+ std::lock_guard locker{m_lock};
+ add_image(image_id, spec);
+}
+
+template <typename I>
+void TrashWatcher<I>::handle_image_removed(const std::string &image_id) {
+ // ignore removals -- the image deleter will ignore -ENOENTs
+}
+
+template <typename I>
+void TrashWatcher<I>::handle_rewatch_complete(int r) {
+ dout(5) << "r=" << r << dendl;
+
+ if (r == -EBLOCKLISTED) {
+ dout(0) << "detected client is blocklisted" << dendl;
+ return;
+ } else if (r == -ENOENT) {
+ dout(5) << "trash directory deleted" << dendl;
+ } else if (r < 0) {
+ derr << "unexpected error re-registering trash directory watch: "
+ << cpp_strerror(r) << dendl;
+ }
+ schedule_trash_list(30);
+}
+
+template <typename I>
+void TrashWatcher<I>::create_trash() {
+ dout(20) << dendl;
+ {
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_trash_list_in_progress);
+ }
+
+ librados::ObjectWriteOperation op;
+ op.create(false);
+
+ m_async_op_tracker.start_op();
+ auto aio_comp = create_rados_callback<
+ TrashWatcher<I>, &TrashWatcher<I>::handle_create_trash>(this);
+ int r = m_io_ctx.aio_operate(RBD_TRASH, aio_comp, &op);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void TrashWatcher<I>::handle_create_trash(int r) {
+ dout(20) << "r=" << r << dendl;
+ {
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_trash_list_in_progress);
+ }
+
+ Context* on_init_finish = nullptr;
+ if (r == -EBLOCKLISTED || r == -ENOENT) {
+ if (r == -EBLOCKLISTED) {
+ dout(0) << "detected client is blocklisted" << dendl;
+ } else {
+ dout(0) << "detected pool no longer exists" << dendl;
+ }
+
+ std::lock_guard locker{m_lock};
+ std::swap(on_init_finish, m_on_init_finish);
+ m_trash_list_in_progress = false;
+ } else if (r < 0 && r != -EEXIST) {
+ derr << "failed to create trash object: " << cpp_strerror(r) << dendl;
+ {
+ std::lock_guard locker{m_lock};
+ m_trash_list_in_progress = false;
+ }
+
+ schedule_trash_list(30);
+ } else {
+ register_watcher();
+ }
+
+ m_async_op_tracker.finish_op();
+ if (on_init_finish != nullptr) {
+ on_init_finish->complete(r);
+ }
+}
+
+template <typename I>
+void TrashWatcher<I>::register_watcher() {
+ {
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_trash_list_in_progress);
+ }
+
+ // if the watch registration is in-flight, let the watcher
+ // handle the transition -- only (re-)register if it's not registered
+ if (!this->is_unregistered()) {
+ trash_list(true);
+ return;
+ }
+
+ // first time registering or the watch failed
+ dout(5) << dendl;
+ m_async_op_tracker.start_op();
+
+ Context *ctx = create_context_callback<
+ TrashWatcher, &TrashWatcher<I>::handle_register_watcher>(this);
+ this->register_watch(ctx);
+}
+
+template <typename I>
+void TrashWatcher<I>::handle_register_watcher(int r) {
+ dout(5) << "r=" << r << dendl;
+
+ {
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_trash_list_in_progress);
+ if (r < 0) {
+ m_trash_list_in_progress = false;
+ }
+ }
+
+ Context *on_init_finish = nullptr;
+ if (r >= 0) {
+ trash_list(true);
+ } else if (r == -EBLOCKLISTED) {
+ dout(0) << "detected client is blocklisted" << dendl;
+
+ std::lock_guard locker{m_lock};
+ std::swap(on_init_finish, m_on_init_finish);
+ } else {
+ derr << "unexpected error registering trash directory watch: "
+ << cpp_strerror(r) << dendl;
+ schedule_trash_list(10);
+ }
+
+ m_async_op_tracker.finish_op();
+ if (on_init_finish != nullptr) {
+ on_init_finish->complete(r);
+ }
+}
+
+template <typename I>
+void TrashWatcher<I>::unregister_watcher(Context* on_finish) {
+ dout(5) << dendl;
+
+ m_async_op_tracker.start_op();
+ Context *ctx = new LambdaContext([this, on_finish](int r) {
+ handle_unregister_watcher(r, on_finish);
+ });
+ this->unregister_watch(ctx);
+}
+
+template <typename I>
+void TrashWatcher<I>::handle_unregister_watcher(int r, Context* on_finish) {
+ dout(5) << "unregister_watcher: r=" << r << dendl;
+ if (r < 0) {
+ derr << "error unregistering watcher for trash directory: "
+ << cpp_strerror(r) << dendl;
+ }
+ m_async_op_tracker.finish_op();
+ on_finish->complete(0);
+}
+
+template <typename I>
+void TrashWatcher<I>::trash_list(bool initial_request) {
+ if (initial_request) {
+ m_async_op_tracker.start_op();
+ m_last_image_id = "";
+ }
+
+ dout(5) << "last_image_id=" << m_last_image_id << dendl;
+
+ {
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_trash_list_in_progress);
+ }
+
+ librados::ObjectReadOperation op;
+ librbd::cls_client::trash_list_start(&op, m_last_image_id, MAX_RETURN);
+
+ librados::AioCompletion *aio_comp = create_rados_callback<
+ TrashWatcher<I>, &TrashWatcher<I>::handle_trash_list>(this);
+ m_out_bl.clear();
+ int r = m_io_ctx.aio_operate(RBD_TRASH, aio_comp, &op, &m_out_bl);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void TrashWatcher<I>::handle_trash_list(int r) {
+ dout(5) << "r=" << r << dendl;
+
+ std::map<std::string, cls::rbd::TrashImageSpec> images;
+ if (r >= 0) {
+ auto bl_it = m_out_bl.cbegin();
+ r = librbd::cls_client::trash_list_finish(&bl_it, &images);
+ }
+
+ Context *on_init_finish = nullptr;
+ {
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_trash_list_in_progress);
+ if (r >= 0) {
+ for (auto& image : images) {
+ add_image(image.first, image.second);
+ }
+ } else if (r == -ENOENT) {
+ r = 0;
+ }
+
+ if (r == -EBLOCKLISTED) {
+ dout(0) << "detected client is blocklisted during trash refresh" << dendl;
+ m_trash_list_in_progress = false;
+ std::swap(on_init_finish, m_on_init_finish);
+ } else if (r >= 0 && images.size() < MAX_RETURN) {
+ m_trash_list_in_progress = false;
+ std::swap(on_init_finish, m_on_init_finish);
+ } else if (r < 0) {
+ m_trash_list_in_progress = false;
+ }
+ }
+
+ if (r >= 0 && images.size() == MAX_RETURN) {
+ m_last_image_id = images.rbegin()->first;
+ trash_list(false);
+ return;
+ } else if (r < 0 && r != -EBLOCKLISTED) {
+ derr << "failed to retrieve trash directory: " << cpp_strerror(r) << dendl;
+ schedule_trash_list(10);
+ }
+
+ m_async_op_tracker.finish_op();
+ if (on_init_finish != nullptr) {
+ on_init_finish->complete(r);
+ }
+}
+
+template <typename I>
+void TrashWatcher<I>::schedule_trash_list(double interval) {
+ std::scoped_lock locker{m_threads->timer_lock, m_lock};
+ if (m_shutting_down || m_trash_list_in_progress || m_timer_ctx != nullptr) {
+ if (m_trash_list_in_progress && !m_deferred_trash_list) {
+ dout(5) << "deferring refresh until in-flight refresh completes" << dendl;
+ m_deferred_trash_list = true;
+ }
+ return;
+ }
+
+ dout(5) << dendl;
+ m_timer_ctx = m_threads->timer->add_event_after(
+ interval,
+ new LambdaContext([this](int r) {
+ process_trash_list();
+ }));
+}
+
+template <typename I>
+void TrashWatcher<I>::process_trash_list() {
+ dout(5) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock));
+ ceph_assert(m_timer_ctx != nullptr);
+ m_timer_ctx = nullptr;
+
+ {
+ std::lock_guard locker{m_lock};
+ ceph_assert(!m_trash_list_in_progress);
+ m_trash_list_in_progress = true;
+ }
+
+ // execute outside of the timer's lock
+ m_async_op_tracker.start_op();
+ Context *ctx = new LambdaContext([this](int r) {
+ create_trash();
+ m_async_op_tracker.finish_op();
+ });
+ m_threads->work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+void TrashWatcher<I>::add_image(const std::string& image_id,
+ const cls::rbd::TrashImageSpec& spec) {
+ if (spec.source != cls::rbd::TRASH_IMAGE_SOURCE_MIRRORING) {
+ return;
+ }
+
+ ceph_assert(ceph_mutex_is_locked(m_lock));
+ auto& deferment_end_time = spec.deferment_end_time;
+ dout(10) << "image_id=" << image_id << ", "
+ << "deferment_end_time=" << deferment_end_time << dendl;
+
+ m_async_op_tracker.start_op();
+ auto ctx = new LambdaContext([this, image_id, deferment_end_time](int r) {
+ m_trash_listener.handle_trash_image(image_id,
+ deferment_end_time.to_real_time());
+ m_async_op_tracker.finish_op();
+ });
+ m_threads->work_queue->queue(ctx, 0);
+}
+
+} // namespace image_deleter;
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_deleter::TrashWatcher<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_deleter/TrashWatcher.h b/src/tools/rbd_mirror/image_deleter/TrashWatcher.h
new file mode 100644
index 000000000..e818a102c
--- /dev/null
+++ b/src/tools/rbd_mirror/image_deleter/TrashWatcher.h
@@ -0,0 +1,139 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_DELETE_TRASH_WATCHER_H
+#define CEPH_RBD_MIRROR_IMAGE_DELETE_TRASH_WATCHER_H
+
+#include "include/rados/librados.hpp"
+#include "common/AsyncOpTracker.h"
+#include "common/ceph_mutex.h"
+#include "librbd/TrashWatcher.h"
+#include <set>
+#include <string>
+
+struct Context;
+namespace librbd { struct ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+
+template <typename> struct Threads;
+
+namespace image_deleter {
+
+struct TrashListener;
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class TrashWatcher : public librbd::TrashWatcher<ImageCtxT> {
+public:
+ static TrashWatcher* create(librados::IoCtx &io_ctx,
+ Threads<ImageCtxT> *threads,
+ TrashListener& trash_listener) {
+ return new TrashWatcher(io_ctx, threads, trash_listener);
+ }
+
+ TrashWatcher(librados::IoCtx &io_ctx, Threads<ImageCtxT> *threads,
+ TrashListener& trash_listener);
+ TrashWatcher(const TrashWatcher&) = delete;
+ TrashWatcher& operator=(const TrashWatcher&) = delete;
+
+ void init(Context *on_finish);
+ void shut_down(Context *on_finish);
+
+protected:
+ void handle_image_added(const std::string &image_id,
+ const cls::rbd::TrashImageSpec& spec) override;
+
+ void handle_image_removed(const std::string &image_id) override;
+
+ void handle_rewatch_complete(int r) override;
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * INIT
+ * |
+ * v
+ * CREATE_TRASH
+ * |
+ * v
+ * REGISTER_WATCHER
+ * |
+ * |/--------------------------------\
+ * | |
+ * |/---------\ |
+ * | | |
+ * v | (more images) |
+ * TRASH_LIST ---/ |
+ * | |
+ * |/----------------------------\ |
+ * | | |
+ * v | |
+ * <idle> --\ | |
+ * | | | |
+ * | |\---> IMAGE_ADDED -----/ |
+ * | | |
+ * | \----> WATCH_ERROR ---------/
+ * v
+ * SHUT_DOWN
+ * |
+ * v
+ * UNREGISTER_WATCHER
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+
+ librados::IoCtx m_io_ctx;
+ Threads<ImageCtxT> *m_threads;
+ TrashListener& m_trash_listener;
+
+ std::string m_last_image_id;
+ bufferlist m_out_bl;
+
+ mutable ceph::mutex m_lock;
+
+ Context *m_on_init_finish = nullptr;
+ Context *m_timer_ctx = nullptr;
+
+ AsyncOpTracker m_async_op_tracker;
+ bool m_trash_list_in_progress = false;
+ bool m_deferred_trash_list = false;
+ bool m_shutting_down = false;
+
+ void register_watcher();
+ void handle_register_watcher(int r);
+
+ void create_trash();
+ void handle_create_trash(int r);
+
+ void unregister_watcher(Context* on_finish);
+ void handle_unregister_watcher(int r, Context* on_finish);
+
+ void trash_list(bool initial_request);
+ void handle_trash_list(int r);
+
+ void schedule_trash_list(double interval);
+ void process_trash_list();
+
+ void get_mirror_uuid();
+ void handle_get_mirror_uuid(int r);
+
+ void add_image(const std::string& image_id,
+ const cls::rbd::TrashImageSpec& spec);
+
+};
+
+} // namespace image_deleter
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_deleter::TrashWatcher<librbd::ImageCtx>;
+
+#endif // CEPH_RBD_MIRROR_IMAGE_DELETE_TRASH_WATCHER_H
diff --git a/src/tools/rbd_mirror/image_deleter/Types.h b/src/tools/rbd_mirror/image_deleter/Types.h
new file mode 100644
index 000000000..1c70b7e14
--- /dev/null
+++ b/src/tools/rbd_mirror/image_deleter/Types.h
@@ -0,0 +1,54 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_DELETER_TYPES_H
+#define CEPH_RBD_MIRROR_IMAGE_DELETER_TYPES_H
+
+#include "include/Context.h"
+#include "librbd/journal/Policy.h"
+#include <string>
+
+struct utime_t;
+
+namespace rbd {
+namespace mirror {
+namespace image_deleter {
+
+enum ErrorResult {
+ ERROR_RESULT_COMPLETE,
+ ERROR_RESULT_RETRY,
+ ERROR_RESULT_RETRY_IMMEDIATELY
+};
+
+struct TrashListener {
+ TrashListener() {
+ }
+ TrashListener(const TrashListener&) = delete;
+ TrashListener& operator=(const TrashListener&) = delete;
+
+ virtual ~TrashListener() {
+ }
+
+ virtual void handle_trash_image(const std::string& image_id,
+ const ceph::real_clock::time_point& deferment_end_time) = 0;
+
+};
+
+struct JournalPolicy : public librbd::journal::Policy {
+ bool append_disabled() const override {
+ return true;
+ }
+ bool journal_disabled() const override {
+ return true;
+ }
+
+ void allocate_tag_on_lock(Context *on_finish) override {
+ on_finish->complete(0);
+ }
+};
+
+} // namespace image_deleter
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_IMAGE_DELETER_TYPES_H
diff --git a/src/tools/rbd_mirror/image_map/LoadRequest.cc b/src/tools/rbd_mirror/image_map/LoadRequest.cc
new file mode 100644
index 000000000..46564a160
--- /dev/null
+++ b/src/tools/rbd_mirror/image_map/LoadRequest.cc
@@ -0,0 +1,174 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "common/debug.h"
+#include "common/errno.h"
+
+#include "librbd/Utils.h"
+#include "include/rbd_types.h"
+#include "cls/rbd/cls_rbd_client.h"
+
+#include "UpdateRequest.h"
+#include "LoadRequest.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_map::LoadRequest: " \
+ << this << " " << __func__
+
+namespace rbd {
+namespace mirror {
+namespace image_map {
+
+static const uint32_t MAX_RETURN = 1024;
+
+using librbd::util::create_rados_callback;
+using librbd::util::create_context_callback;
+
+template<typename I>
+LoadRequest<I>::LoadRequest(librados::IoCtx &ioctx,
+ std::map<std::string, cls::rbd::MirrorImageMap> *image_mapping,
+ Context *on_finish)
+ : m_ioctx(ioctx),
+ m_image_mapping(image_mapping),
+ m_on_finish(on_finish) {
+}
+
+template<typename I>
+void LoadRequest<I>::send() {
+ dout(20) << dendl;
+
+ image_map_list();
+}
+
+template<typename I>
+void LoadRequest<I>::image_map_list() {
+ dout(20) << dendl;
+
+ librados::ObjectReadOperation op;
+ librbd::cls_client::mirror_image_map_list_start(&op, m_start_after, MAX_RETURN);
+
+ librados::AioCompletion *aio_comp = create_rados_callback<
+ LoadRequest, &LoadRequest::handle_image_map_list>(this);
+
+ m_out_bl.clear();
+ int r = m_ioctx.aio_operate(RBD_MIRROR_LEADER, aio_comp, &op, &m_out_bl);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template<typename I>
+void LoadRequest<I>::handle_image_map_list(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ std::map<std::string, cls::rbd::MirrorImageMap> image_mapping;
+ if (r == 0) {
+ auto it = m_out_bl.cbegin();
+ r = librbd::cls_client::mirror_image_map_list_finish(&it, &image_mapping);
+ }
+
+ if (r < 0) {
+ derr << ": failed to get image map: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ m_image_mapping->insert(image_mapping.begin(), image_mapping.end());
+
+ if (image_mapping.size() == MAX_RETURN) {
+ m_start_after = image_mapping.rbegin()->first;
+ image_map_list();
+ return;
+ }
+
+ mirror_image_list();
+}
+
+template<typename I>
+void LoadRequest<I>::mirror_image_list() {
+ dout(20) << dendl;
+
+ librados::ObjectReadOperation op;
+ librbd::cls_client::mirror_image_list_start(&op, m_start_after, MAX_RETURN);
+
+ m_out_bl.clear();
+ librados::AioCompletion *aio_comp = create_rados_callback<
+ LoadRequest<I>,
+ &LoadRequest<I>::handle_mirror_image_list>(this);
+ int r = m_ioctx.aio_operate(RBD_MIRRORING, aio_comp, &op, &m_out_bl);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template<typename I>
+void LoadRequest<I>::handle_mirror_image_list(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ std::map<std::string, std::string> ids;
+ if (r == 0) {
+ auto it = m_out_bl.cbegin();
+ r = librbd::cls_client::mirror_image_list_finish(&it, &ids);
+ }
+
+ if (r < 0 && r != -ENOENT) {
+ derr << "failed to list mirrored images: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ for (auto &id : ids) {
+ m_global_image_ids.emplace(id.second);
+ }
+
+ if (ids.size() == MAX_RETURN) {
+ m_start_after = ids.rbegin()->first;
+ mirror_image_list();
+ return;
+ }
+
+ cleanup_image_map();
+}
+
+template<typename I>
+void LoadRequest<I>::cleanup_image_map() {
+ dout(20) << dendl;
+
+ std::set<std::string> map_removals;
+
+ auto it = m_image_mapping->begin();
+ while (it != m_image_mapping->end()) {
+ if (m_global_image_ids.count(it->first) > 0) {
+ ++it;
+ continue;
+ }
+ map_removals.emplace(it->first);
+ it = m_image_mapping->erase(it);
+ }
+
+ if (map_removals.size() == 0) {
+ finish(0);
+ return;
+ }
+
+ auto ctx = create_context_callback<
+ LoadRequest<I>,
+ &LoadRequest<I>::finish>(this);
+ image_map::UpdateRequest<I> *req = image_map::UpdateRequest<I>::create(
+ m_ioctx, {}, std::move(map_removals), ctx);
+ req->send();
+}
+
+template<typename I>
+void LoadRequest<I>::finish(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ m_on_finish->complete(r);
+ delete this;
+}
+
+} // namespace image_map
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_map::LoadRequest<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_map/LoadRequest.h b/src/tools/rbd_mirror/image_map/LoadRequest.h
new file mode 100644
index 000000000..9b1be9685
--- /dev/null
+++ b/src/tools/rbd_mirror/image_map/LoadRequest.h
@@ -0,0 +1,77 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_MAP_LOAD_REQUEST_H
+#define CEPH_RBD_MIRROR_IMAGE_MAP_LOAD_REQUEST_H
+
+#include "cls/rbd/cls_rbd_types.h"
+#include "include/rados/librados.hpp"
+
+class Context;
+
+namespace librbd { class ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+namespace image_map {
+
+template<typename ImageCtxT = librbd::ImageCtx>
+class LoadRequest {
+public:
+ static LoadRequest *create(librados::IoCtx &ioctx,
+ std::map<std::string, cls::rbd::MirrorImageMap> *image_mapping,
+ Context *on_finish) {
+ return new LoadRequest(ioctx, image_mapping, on_finish);
+ }
+
+ void send();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * | . . . . . . . .
+ * v v . MAX_RETURN
+ * IMAGE_MAP_LIST. . . . . . .
+ * |
+ * v
+ * MIRROR_IMAGE_LIST
+ * |
+ * v
+ * CLEANUP_IMAGE_MAP
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+ LoadRequest(librados::IoCtx &ioctx,
+ std::map<std::string, cls::rbd::MirrorImageMap> *image_mapping,
+ Context *on_finish);
+
+ librados::IoCtx &m_ioctx;
+ std::map<std::string, cls::rbd::MirrorImageMap> *m_image_mapping;
+ Context *m_on_finish;
+
+ std::set<std::string> m_global_image_ids;
+
+ bufferlist m_out_bl;
+ std::string m_start_after;
+
+ void image_map_list();
+ void handle_image_map_list(int r);
+
+ void mirror_image_list();
+ void handle_mirror_image_list(int r);
+
+ void cleanup_image_map();
+
+ void finish(int r);
+};
+
+} // namespace image_map
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_IMAGE_MAP_LOAD_REQUEST_H
diff --git a/src/tools/rbd_mirror/image_map/Policy.cc b/src/tools/rbd_mirror/image_map/Policy.cc
new file mode 100644
index 000000000..62fbd12dc
--- /dev/null
+++ b/src/tools/rbd_mirror/image_map/Policy.cc
@@ -0,0 +1,407 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "common/debug.h"
+#include "common/errno.h"
+
+#include "librbd/Utils.h"
+#include "Policy.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_map::Policy: " << this \
+ << " " << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+namespace image_map {
+
+namespace {
+
+bool is_instance_action(ActionType action_type) {
+ switch (action_type) {
+ case ACTION_TYPE_ACQUIRE:
+ case ACTION_TYPE_RELEASE:
+ return true;
+ case ACTION_TYPE_NONE:
+ case ACTION_TYPE_MAP_UPDATE:
+ case ACTION_TYPE_MAP_REMOVE:
+ break;
+ }
+ return false;
+}
+
+} // anonymous namespace
+
+using ::operator<<;
+using librbd::util::unique_lock_name;
+
+Policy::Policy(librados::IoCtx &ioctx)
+ : m_ioctx(ioctx),
+ m_map_lock(ceph::make_shared_mutex(
+ unique_lock_name("rbd::mirror::image_map::Policy::m_map_lock", this))) {
+
+ // map should at least have once instance
+ std::string instance_id = stringify(ioctx.get_instance_id());
+ m_map.emplace(instance_id, std::set<std::string>{});
+}
+
+void Policy::init(
+ const std::map<std::string, cls::rbd::MirrorImageMap> &image_mapping) {
+ dout(20) << dendl;
+
+ std::unique_lock map_lock{m_map_lock};
+ for (auto& it : image_mapping) {
+ ceph_assert(!it.second.instance_id.empty());
+ auto map_result = m_map[it.second.instance_id].emplace(it.first);
+ ceph_assert(map_result.second);
+
+ auto image_state_result = m_image_states.emplace(
+ it.first, ImageState{it.second.instance_id, it.second.mapped_time});
+ ceph_assert(image_state_result.second);
+
+ // ensure we (re)send image acquire actions to the instance
+ auto& image_state = image_state_result.first->second;
+ auto start_action = set_state(&image_state,
+ StateTransition::STATE_INITIALIZING, false);
+ ceph_assert(start_action);
+ }
+}
+
+LookupInfo Policy::lookup(const std::string &global_image_id) {
+ dout(20) << "global_image_id=" << global_image_id << dendl;
+
+ std::shared_lock map_lock{m_map_lock};
+ LookupInfo info;
+
+ auto it = m_image_states.find(global_image_id);
+ if (it != m_image_states.end()) {
+ info.instance_id = it->second.instance_id;
+ info.mapped_time = it->second.mapped_time;
+ }
+ return info;
+}
+
+bool Policy::add_image(const std::string &global_image_id) {
+ dout(5) << "global_image_id=" << global_image_id << dendl;
+
+ std::unique_lock map_lock{m_map_lock};
+ auto image_state_result = m_image_states.emplace(global_image_id,
+ ImageState{});
+ auto& image_state = image_state_result.first->second;
+ if (image_state.state == StateTransition::STATE_INITIALIZING) {
+ // avoid duplicate acquire notifications upon leader startup
+ return false;
+ }
+
+ return set_state(&image_state, StateTransition::STATE_ASSOCIATING, false);
+}
+
+bool Policy::remove_image(const std::string &global_image_id) {
+ dout(5) << "global_image_id=" << global_image_id << dendl;
+
+ std::unique_lock map_lock{m_map_lock};
+ auto it = m_image_states.find(global_image_id);
+ if (it == m_image_states.end()) {
+ return false;
+ }
+
+ auto& image_state = it->second;
+ return set_state(&image_state, StateTransition::STATE_DISSOCIATING, false);
+}
+
+void Policy::add_instances(const InstanceIds &instance_ids,
+ GlobalImageIds* global_image_ids) {
+ dout(5) << "instance_ids=" << instance_ids << dendl;
+
+ std::unique_lock map_lock{m_map_lock};
+ for (auto& instance : instance_ids) {
+ ceph_assert(!instance.empty());
+ m_map.emplace(instance, std::set<std::string>{});
+ }
+
+ // post-failover, remove any dead instances and re-shuffle their images
+ if (m_initial_update) {
+ dout(5) << "initial instance update" << dendl;
+ m_initial_update = false;
+
+ std::set<std::string> alive_instances(instance_ids.begin(),
+ instance_ids.end());
+ InstanceIds dead_instances;
+ for (auto& map_pair : m_map) {
+ if (alive_instances.find(map_pair.first) == alive_instances.end()) {
+ dead_instances.push_back(map_pair.first);
+ }
+ }
+
+ if (!dead_instances.empty()) {
+ remove_instances(m_map_lock, dead_instances, global_image_ids);
+ }
+ }
+
+ GlobalImageIds shuffle_global_image_ids;
+ do_shuffle_add_instances(m_map, m_image_states.size(), &shuffle_global_image_ids);
+ dout(5) << "shuffling global_image_ids=[" << shuffle_global_image_ids
+ << "]" << dendl;
+ for (auto& global_image_id : shuffle_global_image_ids) {
+ auto it = m_image_states.find(global_image_id);
+ ceph_assert(it != m_image_states.end());
+
+ auto& image_state = it->second;
+ if (set_state(&image_state, StateTransition::STATE_SHUFFLING, false)) {
+ global_image_ids->emplace(global_image_id);
+ }
+ }
+}
+
+void Policy::remove_instances(const InstanceIds &instance_ids,
+ GlobalImageIds* global_image_ids) {
+ std::unique_lock map_lock{m_map_lock};
+ remove_instances(m_map_lock, instance_ids, global_image_ids);
+}
+
+void Policy::remove_instances(const ceph::shared_mutex& lock,
+ const InstanceIds &instance_ids,
+ GlobalImageIds* global_image_ids) {
+ ceph_assert(ceph_mutex_is_wlocked(m_map_lock));
+ dout(5) << "instance_ids=" << instance_ids << dendl;
+
+ for (auto& instance_id : instance_ids) {
+ auto map_it = m_map.find(instance_id);
+ if (map_it == m_map.end()) {
+ continue;
+ }
+
+ auto& instance_global_image_ids = map_it->second;
+ if (instance_global_image_ids.empty()) {
+ m_map.erase(map_it);
+ continue;
+ }
+
+ m_dead_instances.insert(instance_id);
+ dout(5) << "force shuffling: instance_id=" << instance_id << ", "
+ << "global_image_ids=[" << instance_global_image_ids << "]"<< dendl;
+ for (auto& global_image_id : instance_global_image_ids) {
+ auto it = m_image_states.find(global_image_id);
+ ceph_assert(it != m_image_states.end());
+
+ auto& image_state = it->second;
+ if (is_state_scheduled(image_state,
+ StateTransition::STATE_DISSOCIATING)) {
+ // don't shuffle images that no longer exist
+ continue;
+ }
+
+ if (set_state(&image_state, StateTransition::STATE_SHUFFLING, true)) {
+ global_image_ids->emplace(global_image_id);
+ }
+ }
+ }
+}
+
+ActionType Policy::start_action(const std::string &global_image_id) {
+ std::unique_lock map_lock{m_map_lock};
+
+ auto it = m_image_states.find(global_image_id);
+ ceph_assert(it != m_image_states.end());
+
+ auto& image_state = it->second;
+ auto& transition = image_state.transition;
+ ceph_assert(transition.action_type != ACTION_TYPE_NONE);
+
+ dout(5) << "global_image_id=" << global_image_id << ", "
+ << "state=" << image_state.state << ", "
+ << "action_type=" << transition.action_type << dendl;
+ if (transition.start_policy_action) {
+ execute_policy_action(global_image_id, &image_state,
+ *transition.start_policy_action);
+ transition.start_policy_action = boost::none;
+ }
+ return transition.action_type;
+}
+
+bool Policy::finish_action(const std::string &global_image_id, int r) {
+ std::unique_lock map_lock{m_map_lock};
+
+ auto it = m_image_states.find(global_image_id);
+ ceph_assert(it != m_image_states.end());
+
+ auto& image_state = it->second;
+ auto& transition = image_state.transition;
+ dout(5) << "global_image_id=" << global_image_id << ", "
+ << "state=" << image_state.state << ", "
+ << "action_type=" << transition.action_type << ", "
+ << "r=" << r << dendl;
+
+ // retry on failure unless it's an RPC message to an instance that is dead
+ if (r < 0 &&
+ (!is_instance_action(image_state.transition.action_type) ||
+ image_state.instance_id == UNMAPPED_INSTANCE_ID ||
+ m_dead_instances.find(image_state.instance_id) ==
+ m_dead_instances.end())) {
+ return true;
+ }
+
+ auto finish_policy_action = transition.finish_policy_action;
+ StateTransition::transit(image_state.state, &image_state.transition);
+ if (transition.finish_state) {
+ // in-progress state machine complete
+ ceph_assert(StateTransition::is_idle(*transition.finish_state));
+ image_state.state = *transition.finish_state;
+ image_state.transition = {};
+ }
+
+ if (StateTransition::is_idle(image_state.state) && image_state.next_state) {
+ // advance to pending state machine
+ bool start_action = set_state(&image_state, *image_state.next_state, false);
+ ceph_assert(start_action);
+ }
+
+ // image state may get purged in execute_policy_action()
+ bool pending_action = image_state.transition.action_type != ACTION_TYPE_NONE;
+ if (finish_policy_action) {
+ execute_policy_action(global_image_id, &image_state, *finish_policy_action);
+ }
+
+ return pending_action;
+}
+
+void Policy::execute_policy_action(
+ const std::string& global_image_id, ImageState* image_state,
+ StateTransition::PolicyAction policy_action) {
+ dout(5) << "global_image_id=" << global_image_id << ", "
+ << "policy_action=" << policy_action << dendl;
+
+ switch (policy_action) {
+ case StateTransition::POLICY_ACTION_MAP:
+ map(global_image_id, image_state);
+ break;
+ case StateTransition::POLICY_ACTION_UNMAP:
+ unmap(global_image_id, image_state);
+ break;
+ case StateTransition::POLICY_ACTION_REMOVE:
+ if (image_state->state == StateTransition::STATE_UNASSOCIATED) {
+ ceph_assert(image_state->instance_id == UNMAPPED_INSTANCE_ID);
+ ceph_assert(!image_state->next_state);
+ m_image_states.erase(global_image_id);
+ }
+ break;
+ }
+}
+
+void Policy::map(const std::string& global_image_id, ImageState* image_state) {
+ ceph_assert(ceph_mutex_is_wlocked(m_map_lock));
+
+ std::string instance_id = image_state->instance_id;
+ if (instance_id != UNMAPPED_INSTANCE_ID && !is_dead_instance(instance_id)) {
+ return;
+ }
+ if (is_dead_instance(instance_id)) {
+ unmap(global_image_id, image_state);
+ }
+
+ instance_id = do_map(m_map, global_image_id);
+ ceph_assert(!instance_id.empty());
+ dout(5) << "global_image_id=" << global_image_id << ", "
+ << "instance_id=" << instance_id << dendl;
+
+ image_state->instance_id = instance_id;
+ image_state->mapped_time = ceph_clock_now();
+
+ auto ins = m_map[instance_id].emplace(global_image_id);
+ ceph_assert(ins.second);
+}
+
+void Policy::unmap(const std::string &global_image_id,
+ ImageState* image_state) {
+ ceph_assert(ceph_mutex_is_wlocked(m_map_lock));
+
+ std::string instance_id = image_state->instance_id;
+ if (instance_id == UNMAPPED_INSTANCE_ID) {
+ return;
+ }
+
+ dout(5) << "global_image_id=" << global_image_id << ", "
+ << "instance_id=" << instance_id << dendl;
+
+ ceph_assert(!instance_id.empty());
+ m_map[instance_id].erase(global_image_id);
+ image_state->instance_id = UNMAPPED_INSTANCE_ID;
+ image_state->mapped_time = {};
+
+ if (is_dead_instance(instance_id) && m_map[instance_id].empty()) {
+ dout(5) << "removing dead instance_id=" << instance_id << dendl;
+ m_map.erase(instance_id);
+ m_dead_instances.erase(instance_id);
+ }
+}
+
+bool Policy::is_image_shuffling(const std::string &global_image_id) {
+ ceph_assert(ceph_mutex_is_locked(m_map_lock));
+
+ auto it = m_image_states.find(global_image_id);
+ ceph_assert(it != m_image_states.end());
+ auto& image_state = it->second;
+
+ // avoid attempting to re-shuffle a pending shuffle
+ auto result = is_state_scheduled(image_state,
+ StateTransition::STATE_SHUFFLING);
+ dout(20) << "global_image_id=" << global_image_id << ", "
+ << "result=" << result << dendl;
+ return result;
+}
+
+bool Policy::can_shuffle_image(const std::string &global_image_id) {
+ ceph_assert(ceph_mutex_is_locked(m_map_lock));
+
+ CephContext *cct = reinterpret_cast<CephContext *>(m_ioctx.cct());
+ int migration_throttle = cct->_conf.get_val<uint64_t>(
+ "rbd_mirror_image_policy_migration_throttle");
+
+ auto it = m_image_states.find(global_image_id);
+ ceph_assert(it != m_image_states.end());
+ auto& image_state = it->second;
+
+ utime_t last_shuffled_time = image_state.mapped_time;
+
+ // idle images that haven't been recently remapped can shuffle
+ utime_t now = ceph_clock_now();
+ auto result = (StateTransition::is_idle(image_state.state) &&
+ ((migration_throttle <= 0) ||
+ (now - last_shuffled_time >= migration_throttle)));
+ dout(10) << "global_image_id=" << global_image_id << ", "
+ << "migration_throttle=" << migration_throttle << ", "
+ << "last_shuffled_time=" << last_shuffled_time << ", "
+ << "result=" << result << dendl;
+ return result;
+}
+
+bool Policy::set_state(ImageState* image_state, StateTransition::State state,
+ bool ignore_current_state) {
+ if (!ignore_current_state && image_state->state == state) {
+ image_state->next_state = boost::none;
+ return false;
+ } else if (StateTransition::is_idle(image_state->state)) {
+ image_state->state = state;
+ image_state->next_state = boost::none;
+
+ StateTransition::transit(image_state->state, &image_state->transition);
+ ceph_assert(image_state->transition.action_type != ACTION_TYPE_NONE);
+ ceph_assert(!image_state->transition.finish_state);
+ return true;
+ }
+
+ image_state->next_state = state;
+ return false;
+}
+
+bool Policy::is_state_scheduled(const ImageState& image_state,
+ StateTransition::State state) const {
+ return (image_state.state == state ||
+ (image_state.next_state && *image_state.next_state == state));
+}
+
+} // namespace image_map
+} // namespace mirror
+} // namespace rbd
diff --git a/src/tools/rbd_mirror/image_map/Policy.h b/src/tools/rbd_mirror/image_map/Policy.h
new file mode 100644
index 000000000..b256e2f1d
--- /dev/null
+++ b/src/tools/rbd_mirror/image_map/Policy.h
@@ -0,0 +1,122 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_MAP_POLICY_H
+#define CEPH_RBD_MIRROR_IMAGE_MAP_POLICY_H
+
+#include <map>
+#include <tuple>
+#include <boost/optional.hpp>
+
+#include "cls/rbd/cls_rbd_types.h"
+#include "include/rados/librados.hpp"
+#include "tools/rbd_mirror/image_map/StateTransition.h"
+#include "tools/rbd_mirror/image_map/Types.h"
+
+class Context;
+
+namespace rbd {
+namespace mirror {
+namespace image_map {
+
+class Policy {
+public:
+ Policy(librados::IoCtx &ioctx);
+
+ virtual ~Policy() {
+ }
+
+ // init -- called during initialization
+ void init(
+ const std::map<std::string, cls::rbd::MirrorImageMap> &image_mapping);
+
+ // lookup an image from the map
+ LookupInfo lookup(const std::string &global_image_id);
+
+ // add, remove
+ bool add_image(const std::string &global_image_id);
+ bool remove_image(const std::string &global_image_id);
+
+ // shuffle images when instances are added/removed
+ void add_instances(const InstanceIds &instance_ids,
+ GlobalImageIds* global_image_ids);
+ void remove_instances(const InstanceIds &instance_ids,
+ GlobalImageIds* global_image_ids);
+
+ ActionType start_action(const std::string &global_image_id);
+ bool finish_action(const std::string &global_image_id, int r);
+
+protected:
+ typedef std::map<std::string, std::set<std::string> > InstanceToImageMap;
+
+ bool is_dead_instance(const std::string instance_id) {
+ ceph_assert(ceph_mutex_is_locked(m_map_lock));
+ return m_dead_instances.find(instance_id) != m_dead_instances.end();
+ }
+
+ bool is_image_shuffling(const std::string &global_image_id);
+ bool can_shuffle_image(const std::string &global_image_id);
+
+ // map an image (global image id) to an instance
+ virtual std::string do_map(const InstanceToImageMap& map,
+ const std::string &global_image_id) = 0;
+
+ // shuffle images when instances are added/removed
+ virtual void do_shuffle_add_instances(
+ const InstanceToImageMap& map, size_t image_count,
+ std::set<std::string> *remap_global_image_ids) = 0;
+
+private:
+ struct ImageState {
+ std::string instance_id = UNMAPPED_INSTANCE_ID;
+ utime_t mapped_time;
+
+ ImageState() {}
+ ImageState(const std::string& instance_id, const utime_t& mapped_time)
+ : instance_id(instance_id), mapped_time(mapped_time) {
+ }
+
+ // active state and action
+ StateTransition::State state = StateTransition::STATE_UNASSOCIATED;
+ StateTransition::Transition transition;
+
+ // next scheduled state
+ boost::optional<StateTransition::State> next_state = boost::none;
+ };
+
+ typedef std::map<std::string, ImageState> ImageStates;
+
+ librados::IoCtx &m_ioctx;
+
+ ceph::shared_mutex m_map_lock; // protects m_map
+ InstanceToImageMap m_map; // instance_id -> global_id map
+
+ ImageStates m_image_states;
+ std::set<std::string> m_dead_instances;
+
+ bool m_initial_update = true;
+
+ void remove_instances(const ceph::shared_mutex& lock,
+ const InstanceIds &instance_ids,
+ GlobalImageIds* global_image_ids);
+
+ bool set_state(ImageState* image_state, StateTransition::State state,
+ bool ignore_current_state);
+
+ void execute_policy_action(const std::string& global_image_id,
+ ImageState* image_state,
+ StateTransition::PolicyAction policy_action);
+
+ void map(const std::string& global_image_id, ImageState* image_state);
+ void unmap(const std::string &global_image_id, ImageState* image_state);
+
+ bool is_state_scheduled(const ImageState& image_state,
+ StateTransition::State state) const;
+
+};
+
+} // namespace image_map
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_IMAGE_MAP_POLICY_H
diff --git a/src/tools/rbd_mirror/image_map/SimplePolicy.cc b/src/tools/rbd_mirror/image_map/SimplePolicy.cc
new file mode 100644
index 000000000..f26805819
--- /dev/null
+++ b/src/tools/rbd_mirror/image_map/SimplePolicy.cc
@@ -0,0 +1,89 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "common/debug.h"
+#include "common/errno.h"
+
+#include "SimplePolicy.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_map::SimplePolicy: " << this \
+ << " " << __func__ << ": "
+namespace rbd {
+namespace mirror {
+namespace image_map {
+
+SimplePolicy::SimplePolicy(librados::IoCtx &ioctx)
+ : Policy(ioctx) {
+}
+
+size_t SimplePolicy::calc_images_per_instance(const InstanceToImageMap& map,
+ size_t image_count) {
+ size_t nr_instances = 0;
+ for (auto const &it : map) {
+ if (!Policy::is_dead_instance(it.first)) {
+ ++nr_instances;
+ }
+ }
+ ceph_assert(nr_instances > 0);
+
+ size_t images_per_instance = image_count / nr_instances;
+ if (images_per_instance == 0) {
+ ++images_per_instance;
+ }
+
+ return images_per_instance;
+}
+
+void SimplePolicy::do_shuffle_add_instances(
+ const InstanceToImageMap& map, size_t image_count,
+ std::set<std::string> *remap_global_image_ids) {
+ uint64_t images_per_instance = calc_images_per_instance(map, image_count);
+ dout(5) << "images per instance=" << images_per_instance << dendl;
+
+ for (auto const &instance : map) {
+ if (instance.second.size() <= images_per_instance) {
+ continue;
+ }
+
+ auto it = instance.second.begin();
+ uint64_t cut_off = instance.second.size() - images_per_instance;
+
+ while (it != instance.second.end() && cut_off > 0) {
+ if (Policy::is_image_shuffling(*it)) {
+ --cut_off;
+ } else if (Policy::can_shuffle_image(*it)) {
+ --cut_off;
+ remap_global_image_ids->emplace(*it);
+ }
+
+ ++it;
+ }
+ }
+}
+
+std::string SimplePolicy::do_map(const InstanceToImageMap& map,
+ const std::string &global_image_id) {
+ auto min_it = map.end();
+ for (auto it = map.begin(); it != map.end(); ++it) {
+ ceph_assert(it->second.find(global_image_id) == it->second.end());
+ if (Policy::is_dead_instance(it->first)) {
+ continue;
+ } else if (min_it == map.end()) {
+ min_it = it;
+ } else if (it->second.size() < min_it->second.size()) {
+ min_it = it;
+ }
+ }
+
+ ceph_assert(min_it != map.end());
+ dout(20) << "global_image_id=" << global_image_id << " maps to instance_id="
+ << min_it->first << dendl;
+ return min_it->first;
+}
+
+} // namespace image_map
+} // namespace mirror
+} // namespace rbd
diff --git a/src/tools/rbd_mirror/image_map/SimplePolicy.h b/src/tools/rbd_mirror/image_map/SimplePolicy.h
new file mode 100644
index 000000000..ad2071b2c
--- /dev/null
+++ b/src/tools/rbd_mirror/image_map/SimplePolicy.h
@@ -0,0 +1,39 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_MAP_SIMPLE_POLICY_H
+#define CEPH_RBD_MIRROR_IMAGE_MAP_SIMPLE_POLICY_H
+
+#include "Policy.h"
+
+namespace rbd {
+namespace mirror {
+namespace image_map {
+
+class SimplePolicy : public Policy {
+public:
+ static SimplePolicy *create(librados::IoCtx &ioctx) {
+ return new SimplePolicy(ioctx);
+ }
+
+protected:
+ SimplePolicy(librados::IoCtx &ioctx);
+
+ std::string do_map(const InstanceToImageMap& map,
+ const std::string &global_image_id) override;
+
+ void do_shuffle_add_instances(
+ const InstanceToImageMap& map, size_t image_count,
+ std::set<std::string> *remap_global_image_ids) override;
+
+private:
+ size_t calc_images_per_instance(const InstanceToImageMap& map,
+ size_t image_count);
+
+};
+
+} // namespace image_map
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_IMAGE_MAP_SIMPLE_POLICY_H
diff --git a/src/tools/rbd_mirror/image_map/StateTransition.cc b/src/tools/rbd_mirror/image_map/StateTransition.cc
new file mode 100644
index 000000000..ec5f07ff9
--- /dev/null
+++ b/src/tools/rbd_mirror/image_map/StateTransition.cc
@@ -0,0 +1,94 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <ostream>
+#include "include/ceph_assert.h"
+#include "StateTransition.h"
+
+namespace rbd {
+namespace mirror {
+namespace image_map {
+
+std::ostream &operator<<(std::ostream &os,
+ const StateTransition::State &state) {
+ switch(state) {
+ case StateTransition::STATE_INITIALIZING:
+ os << "INITIALIZING";
+ break;
+ case StateTransition::STATE_ASSOCIATING:
+ os << "ASSOCIATING";
+ break;
+ case StateTransition::STATE_ASSOCIATED:
+ os << "ASSOCIATED";
+ break;
+ case StateTransition::STATE_SHUFFLING:
+ os << "SHUFFLING";
+ break;
+ case StateTransition::STATE_DISSOCIATING:
+ os << "DISSOCIATING";
+ break;
+ case StateTransition::STATE_UNASSOCIATED:
+ os << "UNASSOCIATED";
+ break;
+ }
+ return os;
+}
+
+std::ostream &operator<<(std::ostream &os,
+ const StateTransition::PolicyAction &policy_action) {
+ switch(policy_action) {
+ case StateTransition::POLICY_ACTION_MAP:
+ os << "MAP";
+ break;
+ case StateTransition::POLICY_ACTION_UNMAP:
+ os << "UNMAP";
+ break;
+ case StateTransition::POLICY_ACTION_REMOVE:
+ os << "REMOVE";
+ break;
+ }
+ return os;
+}
+
+const StateTransition::TransitionTable StateTransition::s_transition_table {
+ // state current_action Transition
+ // ---------------------------------------------------------------------------
+ {{STATE_INITIALIZING, ACTION_TYPE_NONE}, {ACTION_TYPE_ACQUIRE, {}, {},
+ {}}},
+ {{STATE_INITIALIZING, ACTION_TYPE_ACQUIRE}, {ACTION_TYPE_NONE, {}, {},
+ {STATE_ASSOCIATED}}},
+
+ {{STATE_ASSOCIATING, ACTION_TYPE_NONE}, {ACTION_TYPE_MAP_UPDATE,
+ {POLICY_ACTION_MAP}, {}, {}}},
+ {{STATE_ASSOCIATING, ACTION_TYPE_MAP_UPDATE}, {ACTION_TYPE_ACQUIRE, {}, {},
+ {}}},
+ {{STATE_ASSOCIATING, ACTION_TYPE_ACQUIRE}, {ACTION_TYPE_NONE, {}, {},
+ {STATE_ASSOCIATED}}},
+
+ {{STATE_DISSOCIATING, ACTION_TYPE_NONE}, {ACTION_TYPE_RELEASE, {},
+ {POLICY_ACTION_UNMAP}, {}}},
+ {{STATE_DISSOCIATING, ACTION_TYPE_RELEASE}, {ACTION_TYPE_MAP_REMOVE, {},
+ {POLICY_ACTION_REMOVE}, {}}},
+ {{STATE_DISSOCIATING, ACTION_TYPE_MAP_REMOVE}, {ACTION_TYPE_NONE, {},
+ {}, {STATE_UNASSOCIATED}}},
+
+ {{STATE_SHUFFLING, ACTION_TYPE_NONE}, {ACTION_TYPE_RELEASE, {},
+ {POLICY_ACTION_UNMAP}, {}}},
+ {{STATE_SHUFFLING, ACTION_TYPE_RELEASE}, {ACTION_TYPE_MAP_UPDATE,
+ {POLICY_ACTION_MAP}, {}, {}}},
+ {{STATE_SHUFFLING, ACTION_TYPE_MAP_UPDATE}, {ACTION_TYPE_ACQUIRE, {}, {},
+ {}}},
+ {{STATE_SHUFFLING, ACTION_TYPE_ACQUIRE}, {ACTION_TYPE_NONE, {}, {},
+ {STATE_ASSOCIATED}}}
+};
+
+void StateTransition::transit(State state, Transition* transition) {
+ auto it = s_transition_table.find({state, transition->action_type});
+ ceph_assert(it != s_transition_table.end());
+
+ *transition = it->second;
+}
+
+} // namespace image_map
+} // namespace mirror
+} // namespace rbd
diff --git a/src/tools/rbd_mirror/image_map/StateTransition.h b/src/tools/rbd_mirror/image_map/StateTransition.h
new file mode 100644
index 000000000..02a5ce4e9
--- /dev/null
+++ b/src/tools/rbd_mirror/image_map/StateTransition.h
@@ -0,0 +1,76 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_MAP_STATE_TRANSITION_H
+#define CEPH_RBD_MIRROR_IMAGE_MAP_STATE_TRANSITION_H
+
+#include "tools/rbd_mirror/image_map/Types.h"
+#include <boost/optional.hpp>
+#include <map>
+
+namespace rbd {
+namespace mirror {
+namespace image_map {
+
+class StateTransition {
+public:
+ enum State {
+ STATE_UNASSOCIATED,
+ STATE_INITIALIZING,
+ STATE_ASSOCIATING,
+ STATE_ASSOCIATED,
+ STATE_SHUFFLING,
+ STATE_DISSOCIATING
+ };
+
+ enum PolicyAction {
+ POLICY_ACTION_MAP,
+ POLICY_ACTION_UNMAP,
+ POLICY_ACTION_REMOVE
+ };
+
+ struct Transition {
+ // image map action
+ ActionType action_type = ACTION_TYPE_NONE;
+
+ // policy internal action
+ boost::optional<PolicyAction> start_policy_action;
+ boost::optional<PolicyAction> finish_policy_action;
+
+ // state machine complete
+ boost::optional<State> finish_state;
+
+ Transition() {
+ }
+ Transition(ActionType action_type,
+ const boost::optional<PolicyAction>& start_policy_action,
+ const boost::optional<PolicyAction>& finish_policy_action,
+ const boost::optional<State>& finish_state)
+ : action_type(action_type), start_policy_action(start_policy_action),
+ finish_policy_action(finish_policy_action), finish_state(finish_state) {
+ }
+ };
+
+ static bool is_idle(State state) {
+ return (state == STATE_UNASSOCIATED || state == STATE_ASSOCIATED);
+ }
+
+ static void transit(State state, Transition* transition);
+
+private:
+ typedef std::pair<State, ActionType> TransitionKey;
+ typedef std::map<TransitionKey, Transition> TransitionTable;
+
+ // image transition table
+ static const TransitionTable s_transition_table;
+};
+
+std::ostream &operator<<(std::ostream &os, const StateTransition::State &state);
+std::ostream &operator<<(std::ostream &os,
+ const StateTransition::PolicyAction &policy_action);
+
+} // namespace image_map
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_IMAGE_MAP_STATE_TRANSITION_H
diff --git a/src/tools/rbd_mirror/image_map/Types.cc b/src/tools/rbd_mirror/image_map/Types.cc
new file mode 100644
index 000000000..47de9c3cf
--- /dev/null
+++ b/src/tools/rbd_mirror/image_map/Types.cc
@@ -0,0 +1,138 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "Types.h"
+#include "include/ceph_assert.h"
+#include "include/stringify.h"
+#include "common/Formatter.h"
+#include <iostream>
+
+namespace rbd {
+namespace mirror {
+namespace image_map {
+
+const std::string UNMAPPED_INSTANCE_ID("");
+
+namespace {
+
+template <typename E>
+class GetTypeVisitor : public boost::static_visitor<E> {
+public:
+ template <typename T>
+ inline E operator()(const T&) const {
+ return T::TYPE;
+ }
+};
+
+class EncodeVisitor : public boost::static_visitor<void> {
+public:
+ explicit EncodeVisitor(bufferlist &bl) : m_bl(bl) {
+ }
+
+ template <typename T>
+ inline void operator()(const T& t) const {
+ using ceph::encode;
+ encode(static_cast<uint32_t>(T::TYPE), m_bl);
+ t.encode(m_bl);
+ }
+private:
+ bufferlist &m_bl;
+};
+
+class DecodeVisitor : public boost::static_visitor<void> {
+public:
+ DecodeVisitor(__u8 version, bufferlist::const_iterator &iter)
+ : m_version(version), m_iter(iter) {
+ }
+
+ template <typename T>
+ inline void operator()(T& t) const {
+ t.decode(m_version, m_iter);
+ }
+private:
+ __u8 m_version;
+ bufferlist::const_iterator &m_iter;
+};
+
+class DumpVisitor : public boost::static_visitor<void> {
+public:
+ explicit DumpVisitor(Formatter *formatter, const std::string &key)
+ : m_formatter(formatter), m_key(key) {}
+
+ template <typename T>
+ inline void operator()(const T& t) const {
+ auto type = T::TYPE;
+ m_formatter->dump_string(m_key.c_str(), stringify(type));
+ t.dump(m_formatter);
+ }
+private:
+ ceph::Formatter *m_formatter;
+ std::string m_key;
+};
+
+} // anonymous namespace
+
+PolicyMetaType PolicyData::get_policy_meta_type() const {
+ return boost::apply_visitor(GetTypeVisitor<PolicyMetaType>(), policy_meta);
+}
+
+void PolicyData::encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ boost::apply_visitor(EncodeVisitor(bl), policy_meta);
+ ENCODE_FINISH(bl);
+}
+
+void PolicyData::decode(bufferlist::const_iterator& it) {
+ DECODE_START(1, it);
+
+ uint32_t policy_meta_type;
+ decode(policy_meta_type, it);
+
+ switch (policy_meta_type) {
+ case POLICY_META_TYPE_NONE:
+ policy_meta = PolicyMetaNone();
+ break;
+ default:
+ policy_meta = PolicyMetaUnknown();
+ break;
+ }
+
+ boost::apply_visitor(DecodeVisitor(struct_v, it), policy_meta);
+ DECODE_FINISH(it);
+}
+
+void PolicyData::dump(Formatter *f) const {
+ boost::apply_visitor(DumpVisitor(f, "policy_meta_type"), policy_meta);
+}
+
+void PolicyData::generate_test_instances(std::list<PolicyData *> &o) {
+ o.push_back(new PolicyData(PolicyMetaNone()));
+}
+
+std::ostream &operator<<(std::ostream &os, const ActionType& action_type) {
+ switch (action_type) {
+ case ACTION_TYPE_NONE:
+ os << "NONE";
+ break;
+ case ACTION_TYPE_MAP_UPDATE:
+ os << "MAP_UPDATE";
+ break;
+ case ACTION_TYPE_MAP_REMOVE:
+ os << "MAP_REMOVE";
+ break;
+ case ACTION_TYPE_ACQUIRE:
+ os << "ACQUIRE";
+ break;
+ case ACTION_TYPE_RELEASE:
+ os << "RELEASE";
+ break;
+ default:
+ os << "UNKNOWN (" << static_cast<uint32_t>(action_type) << ")";
+ break;
+ }
+ return os;
+}
+
+} // namespace image_map
+} // namespace mirror
+} // namespace rbd
diff --git a/src/tools/rbd_mirror/image_map/Types.h b/src/tools/rbd_mirror/image_map/Types.h
new file mode 100644
index 000000000..5a97430f3
--- /dev/null
+++ b/src/tools/rbd_mirror/image_map/Types.h
@@ -0,0 +1,130 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_MAP_TYPES_H
+#define CEPH_RBD_MIRROR_IMAGE_MAP_TYPES_H
+
+#include <iosfwd>
+#include <map>
+#include <set>
+#include <string>
+#include <boost/variant.hpp>
+
+#include "include/buffer.h"
+#include "include/encoding.h"
+#include "include/utime.h"
+#include "tools/rbd_mirror/Types.h"
+
+struct Context;
+
+namespace ceph {
+class Formatter;
+}
+
+namespace rbd {
+namespace mirror {
+namespace image_map {
+
+extern const std::string UNMAPPED_INSTANCE_ID;
+
+struct Listener {
+ virtual ~Listener() {
+ }
+
+ virtual void acquire_image(const std::string &global_image_id,
+ const std::string &instance_id,
+ Context* on_finish) = 0;
+ virtual void release_image(const std::string &global_image_id,
+ const std::string &instance_id,
+ Context* on_finish) = 0;
+ virtual void remove_image(const std::string &mirror_uuid,
+ const std::string &global_image_id,
+ const std::string &instance_id,
+ Context* on_finish) = 0;
+};
+
+struct LookupInfo {
+ std::string instance_id = UNMAPPED_INSTANCE_ID;
+ utime_t mapped_time;
+};
+
+enum ActionType {
+ ACTION_TYPE_NONE,
+ ACTION_TYPE_MAP_UPDATE,
+ ACTION_TYPE_MAP_REMOVE,
+ ACTION_TYPE_ACQUIRE,
+ ACTION_TYPE_RELEASE
+};
+
+typedef std::vector<std::string> InstanceIds;
+typedef std::set<std::string> GlobalImageIds;
+typedef std::map<std::string, ActionType> ImageActionTypes;
+
+enum PolicyMetaType {
+ POLICY_META_TYPE_NONE = 0,
+};
+
+struct PolicyMetaNone {
+ static const PolicyMetaType TYPE = POLICY_META_TYPE_NONE;
+
+ PolicyMetaNone() {
+ }
+
+ void encode(bufferlist& bl) const {
+ }
+
+ void decode(__u8 version, bufferlist::const_iterator& it) {
+ }
+
+ void dump(Formatter *f) const {
+ }
+};
+
+struct PolicyMetaUnknown {
+ static const PolicyMetaType TYPE = static_cast<PolicyMetaType>(-1);
+
+ PolicyMetaUnknown() {
+ }
+
+ void encode(bufferlist& bl) const {
+ ceph_abort();
+ }
+
+ void decode(__u8 version, bufferlist::const_iterator& it) {
+ }
+
+ void dump(Formatter *f) const {
+ }
+};
+
+typedef boost::variant<PolicyMetaNone,
+ PolicyMetaUnknown> PolicyMeta;
+
+struct PolicyData {
+ PolicyData()
+ : policy_meta(PolicyMetaUnknown()) {
+ }
+ PolicyData(const PolicyMeta &policy_meta)
+ : policy_meta(policy_meta) {
+ }
+
+ PolicyMeta policy_meta;
+
+ PolicyMetaType get_policy_meta_type() const;
+
+ void encode(bufferlist& bl) const;
+ void decode(bufferlist::const_iterator& it);
+ void dump(Formatter *f) const;
+
+ static void generate_test_instances(std::list<PolicyData *> &o);
+};
+
+WRITE_CLASS_ENCODER(PolicyData);
+
+std::ostream &operator<<(std::ostream &os, const ActionType &action_type);
+
+} // namespace image_map
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_IMAGE_MAP_TYPES_H
diff --git a/src/tools/rbd_mirror/image_map/UpdateRequest.cc b/src/tools/rbd_mirror/image_map/UpdateRequest.cc
new file mode 100644
index 000000000..799c5670f
--- /dev/null
+++ b/src/tools/rbd_mirror/image_map/UpdateRequest.cc
@@ -0,0 +1,100 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "common/debug.h"
+#include "common/errno.h"
+
+#include "librbd/Utils.h"
+#include "include/rbd_types.h"
+#include "cls/rbd/cls_rbd_client.h"
+
+#include "UpdateRequest.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_map::UpdateRequest: " \
+ << this << " " << __func__
+
+namespace rbd {
+namespace mirror {
+namespace image_map {
+
+using librbd::util::create_rados_callback;
+
+static const uint32_t MAX_UPDATE = 256;
+
+template <typename I>
+UpdateRequest<I>::UpdateRequest(librados::IoCtx &ioctx,
+ std::map<std::string, cls::rbd::MirrorImageMap> &&update_mapping,
+ std::set<std::string> &&remove_global_image_ids, Context *on_finish)
+ : m_ioctx(ioctx),
+ m_update_mapping(update_mapping),
+ m_remove_global_image_ids(remove_global_image_ids),
+ m_on_finish(on_finish) {
+}
+
+template <typename I>
+void UpdateRequest<I>::send() {
+ dout(20) << dendl;
+
+ update_image_map();
+}
+
+template <typename I>
+void UpdateRequest<I>::update_image_map() {
+ dout(20) << dendl;
+
+ if (m_update_mapping.empty() && m_remove_global_image_ids.empty()) {
+ finish(0);
+ return;
+ }
+
+ uint32_t nr_updates = 0;
+ librados::ObjectWriteOperation op;
+
+ auto it1 = m_update_mapping.begin();
+ while (it1 != m_update_mapping.end() && nr_updates++ < MAX_UPDATE) {
+ librbd::cls_client::mirror_image_map_update(&op, it1->first, it1->second);
+ it1 = m_update_mapping.erase(it1);
+ }
+
+ auto it2 = m_remove_global_image_ids.begin();
+ while (it2 != m_remove_global_image_ids.end() && nr_updates++ < MAX_UPDATE) {
+ librbd::cls_client::mirror_image_map_remove(&op, *it2);
+ it2 = m_remove_global_image_ids.erase(it2);
+ }
+
+ librados::AioCompletion *aio_comp = create_rados_callback<
+ UpdateRequest, &UpdateRequest::handle_update_image_map>(this);
+ int r = m_ioctx.aio_operate(RBD_MIRROR_LEADER, aio_comp, &op);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void UpdateRequest<I>::handle_update_image_map(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ if (r < 0) {
+ derr << ": failed to update image map: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ update_image_map();
+}
+
+template <typename I>
+void UpdateRequest<I>::finish(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ m_on_finish->complete(r);
+ delete this;
+}
+
+} // namespace image_map
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_map::UpdateRequest<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_map/UpdateRequest.h b/src/tools/rbd_mirror/image_map/UpdateRequest.h
new file mode 100644
index 000000000..841cc6f9b
--- /dev/null
+++ b/src/tools/rbd_mirror/image_map/UpdateRequest.h
@@ -0,0 +1,65 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_MAP_UPDATE_REQUEST_H
+#define CEPH_RBD_MIRROR_IMAGE_MAP_UPDATE_REQUEST_H
+
+#include "cls/rbd/cls_rbd_types.h"
+#include "include/rados/librados.hpp"
+
+class Context;
+
+namespace librbd { class ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+namespace image_map {
+
+template<typename ImageCtxT = librbd::ImageCtx>
+class UpdateRequest {
+public:
+ // accepts an image map for updation and a collection of
+ // global image ids to purge.
+ static UpdateRequest *create(librados::IoCtx &ioctx,
+ std::map<std::string, cls::rbd::MirrorImageMap> &&update_mapping,
+ std::set<std::string> &&remove_global_image_ids, Context *on_finish) {
+ return new UpdateRequest(ioctx, std::move(update_mapping), std::move(remove_global_image_ids),
+ on_finish);
+ }
+
+ void send();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * | . . . . . . . .
+ * v v . MAX_UPDATE
+ * UPDATE_IMAGE_MAP. . . . . . .
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+ UpdateRequest(librados::IoCtx &ioctx,
+ std::map<std::string, cls::rbd::MirrorImageMap> &&update_mapping,
+ std::set<std::string> &&remove_global_image_ids, Context *on_finish);
+
+ librados::IoCtx &m_ioctx;
+ std::map<std::string, cls::rbd::MirrorImageMap> m_update_mapping;
+ std::set<std::string> m_remove_global_image_ids;
+ Context *m_on_finish;
+
+ void update_image_map();
+ void handle_update_image_map(int r);
+
+ void finish(int r);
+};
+
+} // namespace image_map
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_IMAGE_MAP_UPDATE_REQUEST_H
diff --git a/src/tools/rbd_mirror/image_replayer/BootstrapRequest.cc b/src/tools/rbd_mirror/image_replayer/BootstrapRequest.cc
new file mode 100644
index 000000000..bda5b5f9b
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/BootstrapRequest.cc
@@ -0,0 +1,485 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "include/compat.h"
+#include "BootstrapRequest.h"
+#include "CreateImageRequest.h"
+#include "OpenImageRequest.h"
+#include "OpenLocalImageRequest.h"
+#include "common/debug.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "journal/Journaler.h"
+#include "journal/Settings.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/internal.h"
+#include "librbd/Journal.h"
+#include "librbd/Utils.h"
+#include "librbd/asio/ContextWQ.h"
+#include "librbd/journal/Types.h"
+#include "tools/rbd_mirror/BaseRequest.h"
+#include "tools/rbd_mirror/ImageSync.h"
+#include "tools/rbd_mirror/ProgressContext.h"
+#include "tools/rbd_mirror/Threads.h"
+#include "tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.h"
+#include "tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.h"
+#include "tools/rbd_mirror/image_replayer/journal/StateBuilder.h"
+#include "tools/rbd_mirror/image_replayer/journal/SyncPointHandler.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::" \
+ << "BootstrapRequest: " << this << " " \
+ << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+using librbd::util::create_context_callback;
+using librbd::util::unique_lock_name;
+
+template <typename I>
+BootstrapRequest<I>::BootstrapRequest(
+ Threads<I>* threads,
+ librados::IoCtx& local_io_ctx,
+ librados::IoCtx& remote_io_ctx,
+ InstanceWatcher<I>* instance_watcher,
+ const std::string& global_image_id,
+ const std::string& local_mirror_uuid,
+ const RemotePoolMeta& remote_pool_meta,
+ ::journal::CacheManagerHandler* cache_manager_handler,
+ PoolMetaCache* pool_meta_cache,
+ ProgressContext* progress_ctx,
+ StateBuilder<I>** state_builder,
+ bool* do_resync,
+ Context* on_finish)
+ : CancelableRequest("rbd::mirror::image_replayer::BootstrapRequest",
+ reinterpret_cast<CephContext*>(local_io_ctx.cct()),
+ on_finish),
+ m_threads(threads),
+ m_local_io_ctx(local_io_ctx),
+ m_remote_io_ctx(remote_io_ctx),
+ m_instance_watcher(instance_watcher),
+ m_global_image_id(global_image_id),
+ m_local_mirror_uuid(local_mirror_uuid),
+ m_remote_pool_meta(remote_pool_meta),
+ m_cache_manager_handler(cache_manager_handler),
+ m_pool_meta_cache(pool_meta_cache),
+ m_progress_ctx(progress_ctx),
+ m_state_builder(state_builder),
+ m_do_resync(do_resync),
+ m_lock(ceph::make_mutex(unique_lock_name("BootstrapRequest::m_lock",
+ this))) {
+ dout(10) << dendl;
+}
+
+template <typename I>
+bool BootstrapRequest<I>::is_syncing() const {
+ std::lock_guard locker{m_lock};
+ return (m_image_sync != nullptr);
+}
+
+template <typename I>
+void BootstrapRequest<I>::send() {
+ *m_do_resync = false;
+
+ prepare_local_image();
+}
+
+template <typename I>
+void BootstrapRequest<I>::cancel() {
+ dout(10) << dendl;
+
+ std::lock_guard locker{m_lock};
+ m_canceled = true;
+
+ if (m_image_sync != nullptr) {
+ m_image_sync->cancel();
+ }
+}
+
+template <typename I>
+std::string BootstrapRequest<I>::get_local_image_name() const {
+ std::unique_lock locker{m_lock};
+ return m_local_image_name;
+}
+
+template <typename I>
+void BootstrapRequest<I>::prepare_local_image() {
+ dout(10) << dendl;
+ update_progress("PREPARE_LOCAL_IMAGE");
+
+ {
+ std::unique_lock locker{m_lock};
+ m_local_image_name = m_global_image_id;
+ }
+
+ ceph_assert(*m_state_builder == nullptr);
+ auto ctx = create_context_callback<
+ BootstrapRequest, &BootstrapRequest<I>::handle_prepare_local_image>(this);
+ auto req = image_replayer::PrepareLocalImageRequest<I>::create(
+ m_local_io_ctx, m_global_image_id, &m_prepare_local_image_name,
+ m_state_builder, m_threads->work_queue, ctx);
+ req->send();
+}
+
+template <typename I>
+void BootstrapRequest<I>::handle_prepare_local_image(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ ceph_assert(r < 0 || *m_state_builder != nullptr);
+ if (r == -ENOENT) {
+ dout(10) << "local image does not exist" << dendl;
+ } else if (r < 0) {
+ derr << "error preparing local image for replay: " << cpp_strerror(r)
+ << dendl;
+ finish(r);
+ return;
+ }
+
+ // image replayer will detect the name change (if any) at next
+ // status update
+ if (r >= 0 && !m_prepare_local_image_name.empty()) {
+ std::unique_lock locker{m_lock};
+ m_local_image_name = m_prepare_local_image_name;
+ }
+
+ prepare_remote_image();
+}
+
+template <typename I>
+void BootstrapRequest<I>::prepare_remote_image() {
+ dout(10) << dendl;
+ update_progress("PREPARE_REMOTE_IMAGE");
+
+ Context *ctx = create_context_callback<
+ BootstrapRequest, &BootstrapRequest<I>::handle_prepare_remote_image>(this);
+ auto req = image_replayer::PrepareRemoteImageRequest<I>::create(
+ m_threads, m_local_io_ctx, m_remote_io_ctx, m_global_image_id,
+ m_local_mirror_uuid, m_remote_pool_meta, m_cache_manager_handler,
+ m_state_builder, ctx);
+ req->send();
+}
+
+template <typename I>
+void BootstrapRequest<I>::handle_prepare_remote_image(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ auto state_builder = *m_state_builder;
+ ceph_assert(state_builder == nullptr ||
+ !state_builder->remote_mirror_uuid.empty());
+
+ if (state_builder != nullptr && state_builder->is_local_primary()) {
+ dout(5) << "local image is primary" << dendl;
+ finish(-ENOMSG);
+ return;
+ } else if (r == -ENOENT || state_builder == nullptr) {
+ dout(10) << "remote image does not exist";
+ if (state_builder != nullptr) {
+ *_dout << ": "
+ << "local_image_id=" << state_builder->local_image_id << ", "
+ << "remote_image_id=" << state_builder->remote_image_id << ", "
+ << "is_linked=" << state_builder->is_linked();
+ }
+ *_dout << dendl;
+
+ // TODO need to support multiple remote images
+ if (state_builder != nullptr &&
+ state_builder->remote_image_id.empty() &&
+ (state_builder->local_image_id.empty() ||
+ state_builder->is_linked())) {
+ // both images doesn't exist or local image exists and is non-primary
+ // and linked to the missing remote image
+ finish(-ENOLINK);
+ } else {
+ finish(-ENOENT);
+ }
+ return;
+ } else if (r < 0) {
+ derr << "error preparing remote image for replay: " << cpp_strerror(r)
+ << dendl;
+ finish(r);
+ return;
+ }
+
+ if (!state_builder->is_remote_primary()) {
+ ceph_assert(!state_builder->remote_image_id.empty());
+ if (state_builder->local_image_id.empty()) {
+ dout(10) << "local image does not exist and remote image is not primary"
+ << dendl;
+ finish(-EREMOTEIO);
+ return;
+ } else if (!state_builder->is_linked()) {
+ dout(10) << "local image is unlinked and remote image is not primary"
+ << dendl;
+ finish(-EREMOTEIO);
+ return;
+ }
+ // if the local image is linked to the remote image, we ignore that
+ // the remote image is not primary so that we can replay demotion
+ }
+
+ open_remote_image();
+}
+
+template <typename I>
+void BootstrapRequest<I>::open_remote_image() {
+ ceph_assert(*m_state_builder != nullptr);
+ auto remote_image_id = (*m_state_builder)->remote_image_id;
+ dout(15) << "remote_image_id=" << remote_image_id << dendl;
+
+ update_progress("OPEN_REMOTE_IMAGE");
+
+ auto ctx = create_context_callback<
+ BootstrapRequest<I>,
+ &BootstrapRequest<I>::handle_open_remote_image>(this);
+ ceph_assert(*m_state_builder != nullptr);
+ OpenImageRequest<I> *request = OpenImageRequest<I>::create(
+ m_remote_io_ctx, &(*m_state_builder)->remote_image_ctx, remote_image_id,
+ false, ctx);
+ request->send();
+}
+
+template <typename I>
+void BootstrapRequest<I>::handle_open_remote_image(int r) {
+ dout(15) << "r=" << r << dendl;
+
+ ceph_assert(*m_state_builder != nullptr);
+ if (r < 0) {
+ derr << "failed to open remote image: " << cpp_strerror(r) << dendl;
+ ceph_assert((*m_state_builder)->remote_image_ctx == nullptr);
+ finish(r);
+ return;
+ }
+
+ if ((*m_state_builder)->local_image_id.empty()) {
+ create_local_image();
+ return;
+ }
+
+ open_local_image();
+}
+
+template <typename I>
+void BootstrapRequest<I>::open_local_image() {
+ ceph_assert(*m_state_builder != nullptr);
+ auto local_image_id = (*m_state_builder)->local_image_id;
+
+ dout(15) << "local_image_id=" << local_image_id << dendl;
+
+ update_progress("OPEN_LOCAL_IMAGE");
+
+ Context *ctx = create_context_callback<
+ BootstrapRequest<I>, &BootstrapRequest<I>::handle_open_local_image>(
+ this);
+ OpenLocalImageRequest<I> *request = OpenLocalImageRequest<I>::create(
+ m_local_io_ctx, &(*m_state_builder)->local_image_ctx, local_image_id,
+ m_threads->work_queue, ctx);
+ request->send();
+}
+
+template <typename I>
+void BootstrapRequest<I>::handle_open_local_image(int r) {
+ dout(15) << "r=" << r << dendl;
+
+ ceph_assert(*m_state_builder != nullptr);
+ auto local_image_ctx = (*m_state_builder)->local_image_ctx;
+ ceph_assert((r >= 0 && local_image_ctx != nullptr) ||
+ (r < 0 && local_image_ctx == nullptr));
+
+ if (r == -ENOENT) {
+ dout(10) << "local image missing" << dendl;
+ create_local_image();
+ return;
+ } else if (r == -EREMOTEIO) {
+ dout(10) << "local image is primary -- skipping image replay" << dendl;
+ m_ret_val = r;
+ close_remote_image();
+ return;
+ } else if (r < 0) {
+ derr << "failed to open local image: " << cpp_strerror(r) << dendl;
+ m_ret_val = r;
+ close_remote_image();
+ return;
+ }
+
+ prepare_replay();
+}
+
+template <typename I>
+void BootstrapRequest<I>::prepare_replay() {
+ dout(10) << dendl;
+ update_progress("PREPARE_REPLAY");
+
+ ceph_assert(*m_state_builder != nullptr);
+ auto ctx = create_context_callback<
+ BootstrapRequest<I>, &BootstrapRequest<I>::handle_prepare_replay>(this);
+ auto request = (*m_state_builder)->create_prepare_replay_request(
+ m_local_mirror_uuid, m_progress_ctx, m_do_resync, &m_syncing, ctx);
+ request->send();
+}
+
+template <typename I>
+void BootstrapRequest<I>::handle_prepare_replay(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to prepare local replay: " << cpp_strerror(r) << dendl;
+ m_ret_val = r;
+ close_remote_image();
+ return;
+ } else if (*m_do_resync) {
+ dout(10) << "local image resync requested" << dendl;
+ close_remote_image();
+ return;
+ } else if ((*m_state_builder)->is_disconnected()) {
+ dout(10) << "client flagged disconnected -- skipping bootstrap" << dendl;
+ // The caller is expected to detect disconnect initializing remote journal.
+ m_ret_val = 0;
+ close_remote_image();
+ return;
+ } else if (m_syncing) {
+ dout(10) << "local image still syncing to remote image" << dendl;
+ image_sync();
+ return;
+ }
+
+ close_remote_image();
+}
+
+template <typename I>
+void BootstrapRequest<I>::create_local_image() {
+ dout(10) << dendl;
+ update_progress("CREATE_LOCAL_IMAGE");
+
+ ceph_assert(*m_state_builder != nullptr);
+ auto ctx = create_context_callback<
+ BootstrapRequest<I>,
+ &BootstrapRequest<I>::handle_create_local_image>(this);
+ auto request = (*m_state_builder)->create_local_image_request(
+ m_threads, m_local_io_ctx, m_global_image_id, m_pool_meta_cache,
+ m_progress_ctx, ctx);
+ request->send();
+}
+
+template <typename I>
+void BootstrapRequest<I>::handle_create_local_image(int r) {
+ dout(15) << "r=" << r << dendl;
+
+ if (r < 0) {
+ if (r == -ENOENT) {
+ dout(10) << "parent image does not exist" << dendl;
+ } else {
+ derr << "failed to create local image: " << cpp_strerror(r) << dendl;
+ }
+ m_ret_val = r;
+ close_remote_image();
+ return;
+ }
+
+ open_local_image();
+}
+
+template <typename I>
+void BootstrapRequest<I>::image_sync() {
+ std::unique_lock locker{m_lock};
+ if (m_canceled) {
+ locker.unlock();
+
+ m_ret_val = -ECANCELED;
+ dout(10) << "request canceled" << dendl;
+ close_remote_image();
+ return;
+ }
+
+ dout(15) << dendl;
+ ceph_assert(m_image_sync == nullptr);
+
+ auto state_builder = *m_state_builder;
+ auto sync_point_handler = state_builder->create_sync_point_handler();
+
+ Context *ctx = create_context_callback<
+ BootstrapRequest<I>, &BootstrapRequest<I>::handle_image_sync>(this);
+ m_image_sync = ImageSync<I>::create(
+ m_threads, state_builder->local_image_ctx, state_builder->remote_image_ctx,
+ m_local_mirror_uuid, sync_point_handler, m_instance_watcher,
+ m_progress_ctx, ctx);
+ m_image_sync->get();
+ locker.unlock();
+
+ update_progress("IMAGE_SYNC");
+ m_image_sync->send();
+}
+
+template <typename I>
+void BootstrapRequest<I>::handle_image_sync(int r) {
+ dout(15) << "r=" << r << dendl;
+
+ {
+ std::lock_guard locker{m_lock};
+ m_image_sync->put();
+ m_image_sync = nullptr;
+
+ (*m_state_builder)->destroy_sync_point_handler();
+ }
+
+ if (r < 0) {
+ if (r == -ECANCELED) {
+ dout(10) << "request canceled" << dendl;
+ } else {
+ derr << "failed to sync remote image: " << cpp_strerror(r) << dendl;
+ }
+ m_ret_val = r;
+ }
+
+ close_remote_image();
+}
+
+template <typename I>
+void BootstrapRequest<I>::close_remote_image() {
+ if ((*m_state_builder)->replay_requires_remote_image()) {
+ finish(m_ret_val);
+ return;
+ }
+
+ dout(15) << dendl;
+
+ update_progress("CLOSE_REMOTE_IMAGE");
+
+ auto ctx = create_context_callback<
+ BootstrapRequest<I>,
+ &BootstrapRequest<I>::handle_close_remote_image>(this);
+ ceph_assert(*m_state_builder != nullptr);
+ (*m_state_builder)->close_remote_image(ctx);
+}
+
+template <typename I>
+void BootstrapRequest<I>::handle_close_remote_image(int r) {
+ dout(15) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "error encountered closing remote image: " << cpp_strerror(r)
+ << dendl;
+ }
+
+ finish(m_ret_val);
+}
+
+template <typename I>
+void BootstrapRequest<I>::update_progress(const std::string &description) {
+ dout(15) << description << dendl;
+
+ if (m_progress_ctx) {
+ m_progress_ctx->update_progress(description);
+ }
+}
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_replayer::BootstrapRequest<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_replayer/BootstrapRequest.h b/src/tools/rbd_mirror/image_replayer/BootstrapRequest.h
new file mode 100644
index 000000000..f5bb8dd8a
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/BootstrapRequest.h
@@ -0,0 +1,181 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_BOOTSTRAP_REQUEST_H
+#define RBD_MIRROR_IMAGE_REPLAYER_BOOTSTRAP_REQUEST_H
+
+#include "include/int_types.h"
+#include "include/rados/librados.hpp"
+#include "common/ceph_mutex.h"
+#include "common/Timer.h"
+#include "cls/rbd/cls_rbd_types.h"
+#include "librbd/mirror/Types.h"
+#include "tools/rbd_mirror/CancelableRequest.h"
+#include "tools/rbd_mirror/Types.h"
+#include <string>
+
+class Context;
+
+namespace journal { class CacheManagerHandler; }
+namespace librbd { class ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+
+class ProgressContext;
+
+template <typename> class ImageSync;
+template <typename> class InstanceWatcher;
+struct PoolMetaCache;
+template <typename> struct Threads;
+
+namespace image_replayer {
+
+template <typename> class StateBuilder;
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class BootstrapRequest : public CancelableRequest {
+public:
+ typedef rbd::mirror::ProgressContext ProgressContext;
+
+ static BootstrapRequest* create(
+ Threads<ImageCtxT>* threads,
+ librados::IoCtx& local_io_ctx,
+ librados::IoCtx& remote_io_ctx,
+ InstanceWatcher<ImageCtxT>* instance_watcher,
+ const std::string& global_image_id,
+ const std::string& local_mirror_uuid,
+ const RemotePoolMeta& remote_pool_meta,
+ ::journal::CacheManagerHandler* cache_manager_handler,
+ PoolMetaCache* pool_meta_cache,
+ ProgressContext* progress_ctx,
+ StateBuilder<ImageCtxT>** state_builder,
+ bool* do_resync,
+ Context* on_finish) {
+ return new BootstrapRequest(
+ threads, local_io_ctx, remote_io_ctx, instance_watcher, global_image_id,
+ local_mirror_uuid, remote_pool_meta, cache_manager_handler,
+ pool_meta_cache, progress_ctx, state_builder, do_resync, on_finish);
+ }
+
+ BootstrapRequest(
+ Threads<ImageCtxT>* threads,
+ librados::IoCtx& local_io_ctx,
+ librados::IoCtx& remote_io_ctx,
+ InstanceWatcher<ImageCtxT>* instance_watcher,
+ const std::string& global_image_id,
+ const std::string& local_mirror_uuid,
+ const RemotePoolMeta& remote_pool_meta,
+ ::journal::CacheManagerHandler* cache_manager_handler,
+ PoolMetaCache* pool_meta_cache,
+ ProgressContext* progress_ctx,
+ StateBuilder<ImageCtxT>** state_builder,
+ bool* do_resync,
+ Context* on_finish);
+
+ bool is_syncing() const;
+
+ void send() override;
+ void cancel() override;
+
+ std::string get_local_image_name() const;
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v (error)
+ * PREPARE_LOCAL_IMAGE * * * * * * * * * * * * * * * * * *
+ * | *
+ * v (error) *
+ * PREPARE_REMOTE_IMAGE * * * * * * * * * * * * * * * * * *
+ * | *
+ * v (error) *
+ * OPEN_REMOTE_IMAGE * * * * * * * * * * * * * * * * * * *
+ * | *
+ * | *
+ * \----> CREATE_LOCAL_IMAGE * * * * * * * * * * * * *
+ * | | ^ * *
+ * | | . * *
+ * | v . (image DNE) * *
+ * \----> OPEN_LOCAL_IMAGE * * * * * * * * * * * * * *
+ * | * *
+ * | * *
+ * v * *
+ * PREPARE_REPLAY * * * * * * * * * * * * * * *
+ * | * *
+ * | * *
+ * v (skip if not needed) * *
+ * IMAGE_SYNC * * * * * * * * * * * * * * * * *
+ * | * *
+ * | * *
+ * /---------/ * *
+ * | * *
+ * v * *
+ * CLOSE_REMOTE_IMAGE < * * * * * * * * * * * * * * * * *
+ * | *
+ * v *
+ * <finish> < * * * * * * * * * * * * * * * * * * * * * * *
+ *
+ * @endverbatim
+ */
+ Threads<ImageCtxT>* m_threads;
+ librados::IoCtx &m_local_io_ctx;
+ librados::IoCtx &m_remote_io_ctx;
+ InstanceWatcher<ImageCtxT> *m_instance_watcher;
+ std::string m_global_image_id;
+ std::string m_local_mirror_uuid;
+ RemotePoolMeta m_remote_pool_meta;
+ ::journal::CacheManagerHandler *m_cache_manager_handler;
+ PoolMetaCache* m_pool_meta_cache;
+ ProgressContext *m_progress_ctx;
+ StateBuilder<ImageCtxT>** m_state_builder;
+ bool *m_do_resync;
+
+ mutable ceph::mutex m_lock;
+ bool m_canceled = false;
+
+ int m_ret_val = 0;
+
+ std::string m_local_image_name;
+ std::string m_prepare_local_image_name;
+
+ bool m_syncing = false;
+ ImageSync<ImageCtxT> *m_image_sync = nullptr;
+
+ void prepare_local_image();
+ void handle_prepare_local_image(int r);
+
+ void prepare_remote_image();
+ void handle_prepare_remote_image(int r);
+
+ void open_remote_image();
+ void handle_open_remote_image(int r);
+
+ void open_local_image();
+ void handle_open_local_image(int r);
+
+ void create_local_image();
+ void handle_create_local_image(int r);
+
+ void prepare_replay();
+ void handle_prepare_replay(int r);
+
+ void image_sync();
+ void handle_image_sync(int r);
+
+ void close_remote_image();
+ void handle_close_remote_image(int r);
+
+ void update_progress(const std::string &description);
+};
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_replayer::BootstrapRequest<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_BOOTSTRAP_REQUEST_H
diff --git a/src/tools/rbd_mirror/image_replayer/CloseImageRequest.cc b/src/tools/rbd_mirror/image_replayer/CloseImageRequest.cc
new file mode 100644
index 000000000..872c8baa9
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/CloseImageRequest.cc
@@ -0,0 +1,62 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "CloseImageRequest.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/Utils.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::CloseImageRequest: " \
+ << this << " " << __func__
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+using librbd::util::create_context_callback;
+
+template <typename I>
+CloseImageRequest<I>::CloseImageRequest(I **image_ctx, Context *on_finish)
+ : m_image_ctx(image_ctx), m_on_finish(on_finish) {
+}
+
+template <typename I>
+void CloseImageRequest<I>::send() {
+ close_image();
+}
+
+template <typename I>
+void CloseImageRequest<I>::close_image() {
+ dout(20) << dendl;
+
+ Context *ctx = create_context_callback<
+ CloseImageRequest<I>, &CloseImageRequest<I>::handle_close_image>(this);
+ (*m_image_ctx)->state->close(ctx);
+}
+
+template <typename I>
+void CloseImageRequest<I>::handle_close_image(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ if (r < 0) {
+ derr << ": error encountered while closing image: " << cpp_strerror(r)
+ << dendl;
+ }
+
+ *m_image_ctx = nullptr;
+
+ m_on_finish->complete(0);
+ delete this;
+}
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_replayer::CloseImageRequest<librbd::ImageCtx>;
+
diff --git a/src/tools/rbd_mirror/image_replayer/CloseImageRequest.h b/src/tools/rbd_mirror/image_replayer/CloseImageRequest.h
new file mode 100644
index 000000000..02481369d
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/CloseImageRequest.h
@@ -0,0 +1,56 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_CLOSE_IMAGE_REQUEST_H
+#define RBD_MIRROR_IMAGE_REPLAYER_CLOSE_IMAGE_REQUEST_H
+
+#include "include/int_types.h"
+#include "librbd/ImageCtx.h"
+#include <string>
+
+class Context;
+namespace librbd { class ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class CloseImageRequest {
+public:
+ static CloseImageRequest* create(ImageCtxT **image_ctx, Context *on_finish) {
+ return new CloseImageRequest(image_ctx, on_finish);
+ }
+
+ CloseImageRequest(ImageCtxT **image_ctx, Context *on_finish);
+
+ void send();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * CLOSE_IMAGE
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+ ImageCtxT **m_image_ctx;
+ Context *m_on_finish;
+
+ void close_image();
+ void handle_close_image(int r);
+};
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_replayer::CloseImageRequest<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_CLOSE_IMAGE_REQUEST_H
diff --git a/src/tools/rbd_mirror/image_replayer/CreateImageRequest.cc b/src/tools/rbd_mirror/image_replayer/CreateImageRequest.cc
new file mode 100644
index 000000000..641bb03e8
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/CreateImageRequest.cc
@@ -0,0 +1,451 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "CreateImageRequest.h"
+#include "CloseImageRequest.h"
+#include "OpenImageRequest.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/internal.h"
+#include "librbd/Utils.h"
+#include "librbd/asio/ContextWQ.h"
+#include "librbd/image/CreateRequest.h"
+#include "librbd/image/CloneRequest.h"
+#include "tools/rbd_mirror/PoolMetaCache.h"
+#include "tools/rbd_mirror/Types.h"
+#include "tools/rbd_mirror/Threads.h"
+#include "tools/rbd_mirror/image_replayer/Utils.h"
+#include "tools/rbd_mirror/image_sync/Utils.h"
+#include <boost/algorithm/string/predicate.hpp>
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::CreateImageRequest: " \
+ << this << " " << __func__ << ": "
+
+using librbd::util::create_async_context_callback;
+using librbd::util::create_context_callback;
+using librbd::util::create_rados_callback;
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+template <typename I>
+CreateImageRequest<I>::CreateImageRequest(
+ Threads<I>* threads,
+ librados::IoCtx &local_io_ctx,
+ const std::string &global_image_id,
+ const std::string &remote_mirror_uuid,
+ const std::string &local_image_name,
+ const std::string &local_image_id,
+ I *remote_image_ctx,
+ PoolMetaCache* pool_meta_cache,
+ cls::rbd::MirrorImageMode mirror_image_mode,
+ Context *on_finish)
+ : m_threads(threads), m_local_io_ctx(local_io_ctx),
+ m_global_image_id(global_image_id),
+ m_remote_mirror_uuid(remote_mirror_uuid),
+ m_local_image_name(local_image_name), m_local_image_id(local_image_id),
+ m_remote_image_ctx(remote_image_ctx),
+ m_pool_meta_cache(pool_meta_cache),
+ m_mirror_image_mode(mirror_image_mode), m_on_finish(on_finish) {
+}
+
+template <typename I>
+void CreateImageRequest<I>::send() {
+ int r = validate_parent();
+ if (r < 0) {
+ error(r);
+ return;
+ }
+
+ if (m_remote_parent_spec.pool_id == -1) {
+ create_image();
+ } else {
+ get_parent_global_image_id();
+ }
+}
+
+template <typename I>
+void CreateImageRequest<I>::create_image() {
+ dout(10) << dendl;
+
+ using klass = CreateImageRequest<I>;
+ Context *ctx = create_context_callback<
+ klass, &klass::handle_create_image>(this);
+
+ std::shared_lock image_locker{m_remote_image_ctx->image_lock};
+
+ auto& config{
+ reinterpret_cast<CephContext*>(m_local_io_ctx.cct())->_conf};
+
+ librbd::ImageOptions image_options;
+ populate_image_options(&image_options);
+
+ auto req = librbd::image::CreateRequest<I>::create(
+ config, m_local_io_ctx, m_local_image_name, m_local_image_id,
+ m_remote_image_ctx->size, image_options, 0U, m_mirror_image_mode,
+ m_global_image_id, m_remote_mirror_uuid, m_remote_image_ctx->op_work_queue,
+ ctx);
+ req->send();
+}
+
+template <typename I>
+void CreateImageRequest<I>::handle_create_image(int r) {
+ dout(10) << "r=" << r << dendl;
+ if (r == -EBADF) {
+ dout(5) << "image id " << m_local_image_id << " already in-use" << dendl;
+ finish(r);
+ return;
+ } else if (r < 0) {
+ derr << "failed to create local image: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ finish(0);
+}
+
+template <typename I>
+void CreateImageRequest<I>::get_parent_global_image_id() {
+ dout(10) << dendl;
+
+ librados::ObjectReadOperation op;
+ librbd::cls_client::mirror_image_get_start(&op,
+ m_remote_parent_spec.image_id);
+
+ librados::AioCompletion *aio_comp = create_rados_callback<
+ CreateImageRequest<I>,
+ &CreateImageRequest<I>::handle_get_parent_global_image_id>(this);
+ m_out_bl.clear();
+ int r = m_remote_parent_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op,
+ &m_out_bl);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void CreateImageRequest<I>::handle_get_parent_global_image_id(int r) {
+ dout(10) << "r=" << r << dendl;
+ if (r == 0) {
+ cls::rbd::MirrorImage mirror_image;
+ auto iter = m_out_bl.cbegin();
+ r = librbd::cls_client::mirror_image_get_finish(&iter, &mirror_image);
+ if (r == 0) {
+ m_parent_global_image_id = mirror_image.global_image_id;
+ dout(15) << "parent_global_image_id=" << m_parent_global_image_id
+ << dendl;
+ }
+ }
+
+ if (r == -ENOENT) {
+ dout(10) << "parent image " << m_remote_parent_spec.image_id
+ << " not mirrored" << dendl;
+ finish(r);
+ return;
+ } else if (r < 0) {
+ derr << "failed to retrieve global image id for parent image "
+ << m_remote_parent_spec.image_id << ": " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ get_local_parent_image_id();
+}
+
+template <typename I>
+void CreateImageRequest<I>::get_local_parent_image_id() {
+ dout(10) << dendl;
+
+ librados::ObjectReadOperation op;
+ librbd::cls_client::mirror_image_get_image_id_start(
+ &op, m_parent_global_image_id);
+
+ librados::AioCompletion *aio_comp = create_rados_callback<
+ CreateImageRequest<I>,
+ &CreateImageRequest<I>::handle_get_local_parent_image_id>(this);
+ m_out_bl.clear();
+ int r = m_local_parent_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op,
+ &m_out_bl);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void CreateImageRequest<I>::handle_get_local_parent_image_id(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r == 0) {
+ auto iter = m_out_bl.cbegin();
+ r = librbd::cls_client::mirror_image_get_image_id_finish(
+ &iter, &m_local_parent_spec.image_id);
+ }
+
+ if (r == -ENOENT) {
+ dout(10) << "parent image " << m_parent_global_image_id << " not "
+ << "registered locally" << dendl;
+ finish(r);
+ return;
+ } else if (r < 0) {
+ derr << "failed to retrieve local image id for parent image "
+ << m_parent_global_image_id << ": " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ open_remote_parent_image();
+}
+
+template <typename I>
+void CreateImageRequest<I>::open_remote_parent_image() {
+ dout(10) << dendl;
+
+ Context *ctx = create_context_callback<
+ CreateImageRequest<I>,
+ &CreateImageRequest<I>::handle_open_remote_parent_image>(this);
+ OpenImageRequest<I> *request = OpenImageRequest<I>::create(
+ m_remote_parent_io_ctx, &m_remote_parent_image_ctx,
+ m_remote_parent_spec.image_id, true, ctx);
+ request->send();
+}
+
+template <typename I>
+void CreateImageRequest<I>::handle_open_remote_parent_image(int r) {
+ dout(10) << "r=" << r << dendl;
+ if (r < 0) {
+ derr << "failed to open remote parent image " << m_parent_pool_name << "/"
+ << m_remote_parent_spec.image_id << dendl;
+ finish(r);
+ return;
+ }
+
+ clone_image();
+}
+
+template <typename I>
+void CreateImageRequest<I>::clone_image() {
+ dout(10) << dendl;
+
+ LocalPoolMeta local_parent_pool_meta;
+ int r = m_pool_meta_cache->get_local_pool_meta(
+ m_local_parent_io_ctx.get_id(), &local_parent_pool_meta);
+ if (r < 0) {
+ derr << "failed to retrieve local parent mirror uuid for pool "
+ << m_local_parent_io_ctx.get_id() << dendl;
+ m_ret_val = r;
+ close_remote_parent_image();
+ return;
+ }
+
+ // ensure no image sync snapshots for the local cluster exist in the
+ // remote image
+ bool found_parent_snap = false;
+ bool found_image_sync_snap = false;
+ std::string snap_name;
+ cls::rbd::SnapshotNamespace snap_namespace;
+ {
+ auto snap_prefix = image_sync::util::get_snapshot_name_prefix(
+ local_parent_pool_meta.mirror_uuid);
+
+ std::shared_lock remote_image_locker(m_remote_parent_image_ctx->image_lock);
+ for (auto snap_info : m_remote_parent_image_ctx->snap_info) {
+ if (snap_info.first == m_remote_parent_spec.snap_id) {
+ found_parent_snap = true;
+ snap_name = snap_info.second.name;
+ snap_namespace = snap_info.second.snap_namespace;
+ } else if (boost::starts_with(snap_info.second.name, snap_prefix)) {
+ found_image_sync_snap = true;
+ }
+ }
+ }
+
+ if (!found_parent_snap) {
+ dout(15) << "remote parent image snapshot not found" << dendl;
+ m_ret_val = -ENOENT;
+ close_remote_parent_image();
+ return;
+ } else if (found_image_sync_snap) {
+ dout(15) << "parent image not synced to local cluster" << dendl;
+ m_ret_val = -ENOENT;
+ close_remote_parent_image();
+ return;
+ }
+
+ librbd::ImageOptions opts;
+ populate_image_options(&opts);
+
+ auto& config{
+ reinterpret_cast<CephContext*>(m_local_io_ctx.cct())->_conf};
+
+ using klass = CreateImageRequest<I>;
+ Context *ctx = create_context_callback<
+ klass, &klass::handle_clone_image>(this);
+
+ librbd::image::CloneRequest<I> *req = librbd::image::CloneRequest<I>::create(
+ config, m_local_parent_io_ctx, m_local_parent_spec.image_id, snap_name,
+ snap_namespace, CEPH_NOSNAP, m_local_io_ctx, m_local_image_name,
+ m_local_image_id, opts, m_mirror_image_mode, m_global_image_id,
+ m_remote_mirror_uuid, m_remote_image_ctx->op_work_queue, ctx);
+ req->send();
+}
+
+template <typename I>
+void CreateImageRequest<I>::handle_clone_image(int r) {
+ dout(10) << "r=" << r << dendl;
+ if (r == -EBADF) {
+ dout(5) << "image id " << m_local_image_id << " already in-use" << dendl;
+ m_ret_val = r;
+ } else if (r < 0) {
+ derr << "failed to clone image " << m_parent_pool_name << "/"
+ << m_remote_parent_spec.image_id << " to "
+ << m_local_image_name << dendl;
+ m_ret_val = r;
+ }
+
+ close_remote_parent_image();
+}
+
+template <typename I>
+void CreateImageRequest<I>::close_remote_parent_image() {
+ dout(10) << dendl;
+ Context *ctx = create_context_callback<
+ CreateImageRequest<I>,
+ &CreateImageRequest<I>::handle_close_remote_parent_image>(this);
+ CloseImageRequest<I> *request = CloseImageRequest<I>::create(
+ &m_remote_parent_image_ctx, ctx);
+ request->send();
+}
+
+template <typename I>
+void CreateImageRequest<I>::handle_close_remote_parent_image(int r) {
+ dout(10) << "r=" << r << dendl;
+ if (r < 0) {
+ derr << "error encountered closing remote parent image: "
+ << cpp_strerror(r) << dendl;
+ }
+
+ finish(m_ret_val);
+}
+
+template <typename I>
+void CreateImageRequest<I>::error(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ m_threads->work_queue->queue(create_context_callback<
+ CreateImageRequest<I>, &CreateImageRequest<I>::finish>(this), r);
+}
+
+template <typename I>
+void CreateImageRequest<I>::finish(int r) {
+ dout(10) << "r=" << r << dendl;
+ m_on_finish->complete(r);
+ delete this;
+}
+
+template <typename I>
+int CreateImageRequest<I>::validate_parent() {
+ std::shared_lock owner_locker{m_remote_image_ctx->owner_lock};
+ std::shared_lock image_locker{m_remote_image_ctx->image_lock};
+
+ m_remote_parent_spec = m_remote_image_ctx->parent_md.spec;
+
+ // scan all remote snapshots for a linked parent
+ for (auto &snap_info_pair : m_remote_image_ctx->snap_info) {
+ auto &parent_spec = snap_info_pair.second.parent.spec;
+ if (parent_spec.pool_id == -1) {
+ continue;
+ } else if (m_remote_parent_spec.pool_id == -1) {
+ m_remote_parent_spec = parent_spec;
+ continue;
+ }
+
+ if (m_remote_parent_spec != parent_spec) {
+ derr << "remote image parent spec mismatch" << dendl;
+ return -EINVAL;
+ }
+ }
+
+ if (m_remote_parent_spec.pool_id == -1) {
+ return 0;
+ }
+
+ // map remote parent pool to local parent pool
+ int r = librbd::util::create_ioctx(
+ m_remote_image_ctx->md_ctx, "remote parent pool",
+ m_remote_parent_spec.pool_id, m_remote_parent_spec.pool_namespace,
+ &m_remote_parent_io_ctx);
+ if (r < 0) {
+ derr << "failed to open remote parent pool " << m_remote_parent_spec.pool_id
+ << ": " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ m_parent_pool_name = m_remote_parent_io_ctx.get_pool_name();
+
+ librados::Rados local_rados(m_local_io_ctx);
+ r = local_rados.ioctx_create(m_parent_pool_name.c_str(),
+ m_local_parent_io_ctx);
+ if (r < 0) {
+ derr << "failed to open local parent pool " << m_parent_pool_name << ": "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+ m_local_parent_io_ctx.set_namespace(m_remote_parent_io_ctx.get_namespace());
+
+ return 0;
+}
+
+template <typename I>
+void CreateImageRequest<I>::populate_image_options(
+ librbd::ImageOptions* image_options) {
+ image_options->set(RBD_IMAGE_OPTION_FEATURES,
+ m_remote_image_ctx->features);
+ image_options->set(RBD_IMAGE_OPTION_ORDER, m_remote_image_ctx->order);
+ image_options->set(RBD_IMAGE_OPTION_STRIPE_UNIT,
+ m_remote_image_ctx->stripe_unit);
+ image_options->set(RBD_IMAGE_OPTION_STRIPE_COUNT,
+ m_remote_image_ctx->stripe_count);
+
+ // Determine the data pool for the local image as follows:
+ // 1. If the local pool has a default data pool, use it.
+ // 2. If the remote image has a data pool different from its metadata pool and
+ // a pool with the same name exists locally, use it.
+ // 3. Don't set the data pool explicitly.
+ std::string data_pool;
+ librados::Rados local_rados(m_local_io_ctx);
+ auto default_data_pool = g_ceph_context->_conf.get_val<std::string>("rbd_default_data_pool");
+ auto remote_md_pool = m_remote_image_ctx->md_ctx.get_pool_name();
+ auto remote_data_pool = m_remote_image_ctx->data_ctx.get_pool_name();
+
+ if (default_data_pool != "") {
+ data_pool = default_data_pool;
+ } else if (remote_data_pool != remote_md_pool) {
+ if (local_rados.pool_lookup(remote_data_pool.c_str()) >= 0) {
+ data_pool = remote_data_pool;
+ }
+ }
+
+ if (data_pool != "") {
+ image_options->set(RBD_IMAGE_OPTION_DATA_POOL, data_pool);
+ }
+
+ if (m_remote_parent_spec.pool_id != -1) {
+ uint64_t clone_format = 1;
+ if (m_remote_image_ctx->test_op_features(
+ RBD_OPERATION_FEATURE_CLONE_CHILD)) {
+ clone_format = 2;
+ }
+ image_options->set(RBD_IMAGE_OPTION_CLONE_FORMAT, clone_format);
+ }
+}
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_replayer::CreateImageRequest<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_replayer/CreateImageRequest.h b/src/tools/rbd_mirror/image_replayer/CreateImageRequest.h
new file mode 100644
index 000000000..2ff7794e8
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/CreateImageRequest.h
@@ -0,0 +1,144 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_CREATE_IMAGE_REQUEST_H
+#define RBD_MIRROR_IMAGE_REPLAYER_CREATE_IMAGE_REQUEST_H
+
+#include "include/int_types.h"
+#include "include/types.h"
+#include "include/rados/librados.hpp"
+#include "cls/rbd/cls_rbd_types.h"
+#include "librbd/Types.h"
+#include <string>
+
+class Context;
+namespace librbd { class ImageCtx; }
+namespace librbd { class ImageOptions; }
+
+namespace rbd {
+namespace mirror {
+
+class PoolMetaCache;
+template <typename> struct Threads;
+
+namespace image_replayer {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class CreateImageRequest {
+public:
+ static CreateImageRequest *create(
+ Threads<ImageCtxT> *threads,
+ librados::IoCtx &local_io_ctx,
+ const std::string &global_image_id,
+ const std::string &remote_mirror_uuid,
+ const std::string &local_image_name,
+ const std::string &local_image_id,
+ ImageCtxT *remote_image_ctx,
+ PoolMetaCache* pool_meta_cache,
+ cls::rbd::MirrorImageMode mirror_image_mode,
+ Context *on_finish) {
+ return new CreateImageRequest(threads, local_io_ctx, global_image_id,
+ remote_mirror_uuid, local_image_name,
+ local_image_id, remote_image_ctx,
+ pool_meta_cache, mirror_image_mode,
+ on_finish);
+ }
+
+ CreateImageRequest(
+ Threads<ImageCtxT> *threads, librados::IoCtx &local_io_ctx,
+ const std::string &global_image_id,
+ const std::string &remote_mirror_uuid,
+ const std::string &local_image_name,
+ const std::string &local_image_id,
+ ImageCtxT *remote_image_ctx,
+ PoolMetaCache* pool_meta_cache,
+ cls::rbd::MirrorImageMode mirror_image_mode,
+ Context *on_finish);
+
+ void send();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start> * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+ * | *
+ * | (non-clone) *
+ * |\------------> CREATE_IMAGE ---------------------\ * (error)
+ * | | *
+ * | (clone) | *
+ * \-------------> GET_PARENT_GLOBAL_IMAGE_ID * * * | * * * *
+ * | | * *
+ * v | *
+ * GET_LOCAL_PARENT_IMAGE_ID * * * * | * * * *
+ * | | * *
+ * v | *
+ * OPEN_REMOTE_PARENT * * * * * * * | * * * *
+ * | | * *
+ * v | *
+ * CLONE_IMAGE | *
+ * | | *
+ * v | *
+ * CLOSE_REMOTE_PARENT | *
+ * | v *
+ * \------------------------> <finish> < * *
+ * @endverbatim
+ */
+
+ Threads<ImageCtxT> *m_threads;
+ librados::IoCtx &m_local_io_ctx;
+ std::string m_global_image_id;
+ std::string m_remote_mirror_uuid;
+ std::string m_local_image_name;
+ std::string m_local_image_id;
+ ImageCtxT *m_remote_image_ctx;
+ PoolMetaCache* m_pool_meta_cache;
+ cls::rbd::MirrorImageMode m_mirror_image_mode;
+ Context *m_on_finish;
+
+ librados::IoCtx m_remote_parent_io_ctx;
+ ImageCtxT *m_remote_parent_image_ctx = nullptr;
+ cls::rbd::ParentImageSpec m_remote_parent_spec;
+
+ librados::IoCtx m_local_parent_io_ctx;
+ cls::rbd::ParentImageSpec m_local_parent_spec;
+
+ bufferlist m_out_bl;
+ std::string m_parent_global_image_id;
+ std::string m_parent_pool_name;
+ int m_ret_val = 0;
+
+ void create_image();
+ void handle_create_image(int r);
+
+ void get_parent_global_image_id();
+ void handle_get_parent_global_image_id(int r);
+
+ void get_local_parent_image_id();
+ void handle_get_local_parent_image_id(int r);
+
+ void open_remote_parent_image();
+ void handle_open_remote_parent_image(int r);
+
+ void clone_image();
+ void handle_clone_image(int r);
+
+ void close_remote_parent_image();
+ void handle_close_remote_parent_image(int r);
+
+ void error(int r);
+ void finish(int r);
+
+ int validate_parent();
+
+ void populate_image_options(librbd::ImageOptions* image_options);
+
+};
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_replayer::CreateImageRequest<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_CREATE_IMAGE_REQUEST_H
diff --git a/src/tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.cc b/src/tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.cc
new file mode 100644
index 000000000..74e975373
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.cc
@@ -0,0 +1,85 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.h"
+#include "include/rados/librados.hpp"
+#include "cls/rbd/cls_rbd_client.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/Utils.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::" \
+ << "GetMirrorImageIdRequest: " << this << " " \
+ << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+using librbd::util::create_rados_callback;
+
+template <typename I>
+void GetMirrorImageIdRequest<I>::send() {
+ dout(20) << dendl;
+ get_image_id();
+}
+
+template <typename I>
+void GetMirrorImageIdRequest<I>::get_image_id() {
+ dout(20) << dendl;
+
+ // attempt to cross-reference a image id by the global image id
+ librados::ObjectReadOperation op;
+ librbd::cls_client::mirror_image_get_image_id_start(&op, m_global_image_id);
+
+ librados::AioCompletion *aio_comp = create_rados_callback<
+ GetMirrorImageIdRequest<I>,
+ &GetMirrorImageIdRequest<I>::handle_get_image_id>(
+ this);
+ int r = m_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op, &m_out_bl);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void GetMirrorImageIdRequest<I>::handle_get_image_id(int r) {
+ if (r == 0) {
+ auto iter = m_out_bl.cbegin();
+ r = librbd::cls_client::mirror_image_get_image_id_finish(
+ &iter, m_image_id);
+ }
+
+ dout(20) << "r=" << r << ", "
+ << "image_id=" << *m_image_id << dendl;
+
+ if (r < 0) {
+ if (r == -ENOENT) {
+ dout(10) << "global image " << m_global_image_id << " not registered"
+ << dendl;
+ } else {
+ derr << "failed to retrieve image id: " << cpp_strerror(r) << dendl;
+ }
+ finish(r);
+ return;
+ }
+
+ finish(0);
+}
+
+template <typename I>
+void GetMirrorImageIdRequest<I>::finish(int r) {
+ dout(20) << "r=" << r << dendl;
+
+ m_on_finish->complete(r);
+ delete this;
+}
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_replayer::GetMirrorImageIdRequest<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.h b/src/tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.h
new file mode 100644
index 000000000..b26645138
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.h
@@ -0,0 +1,75 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_GET_MIRROR_IMAGE_ID_REQUEST_H
+#define RBD_MIRROR_IMAGE_REPLAYER_GET_MIRROR_IMAGE_ID_REQUEST_H
+
+#include "include/buffer.h"
+#include "include/rados/librados_fwd.hpp"
+#include <string>
+
+namespace librbd { struct ImageCtx; }
+
+struct Context;
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class GetMirrorImageIdRequest {
+public:
+ static GetMirrorImageIdRequest *create(librados::IoCtx &io_ctx,
+ const std::string &global_image_id,
+ std::string *image_id,
+ Context *on_finish) {
+ return new GetMirrorImageIdRequest(io_ctx, global_image_id, image_id,
+ on_finish);
+ }
+
+ GetMirrorImageIdRequest(librados::IoCtx &io_ctx,
+ const std::string &global_image_id,
+ std::string *image_id,
+ Context *on_finish)
+ : m_io_ctx(io_ctx), m_global_image_id(global_image_id),
+ m_image_id(image_id), m_on_finish(on_finish) {
+ }
+
+ void send();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * GET_IMAGE_ID
+ * |
+ * v
+ * <finish>
+
+ * @endverbatim
+ */
+
+ librados::IoCtx &m_io_ctx;
+ std::string m_global_image_id;
+ std::string *m_image_id;
+ Context *m_on_finish;
+
+ bufferlist m_out_bl;
+
+ void get_image_id();
+ void handle_get_image_id(int r);
+
+ void finish(int r);
+
+};
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_replayer::GetMirrorImageIdRequest<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_GET_MIRROR_IMAGE_ID_REQUEST_H
diff --git a/src/tools/rbd_mirror/image_replayer/OpenImageRequest.cc b/src/tools/rbd_mirror/image_replayer/OpenImageRequest.cc
new file mode 100644
index 000000000..e6ab382be
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/OpenImageRequest.cc
@@ -0,0 +1,79 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "OpenImageRequest.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/Utils.h"
+#include <type_traits>
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::OpenImageRequest: " \
+ << this << " " << __func__ << " "
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+using librbd::util::create_context_callback;
+
+template <typename I>
+OpenImageRequest<I>::OpenImageRequest(librados::IoCtx &io_ctx, I **image_ctx,
+ const std::string &image_id,
+ bool read_only, Context *on_finish)
+ : m_io_ctx(io_ctx), m_image_ctx(image_ctx), m_image_id(image_id),
+ m_read_only(read_only), m_on_finish(on_finish) {
+}
+
+template <typename I>
+void OpenImageRequest<I>::send() {
+ send_open_image();
+}
+
+template <typename I>
+void OpenImageRequest<I>::send_open_image() {
+ dout(20) << dendl;
+
+ *m_image_ctx = I::create("", m_image_id, nullptr, m_io_ctx, m_read_only);
+
+ if (!m_read_only) {
+ // ensure non-primary images can be modified
+ (*m_image_ctx)->read_only_mask = ~librbd::IMAGE_READ_ONLY_FLAG_NON_PRIMARY;
+ }
+
+ Context *ctx = create_context_callback<
+ OpenImageRequest<I>, &OpenImageRequest<I>::handle_open_image>(
+ this);
+ (*m_image_ctx)->state->open(0, ctx);
+}
+
+template <typename I>
+void OpenImageRequest<I>::handle_open_image(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ if (r < 0) {
+ derr << ": failed to open image '" << m_image_id << "': "
+ << cpp_strerror(r) << dendl;
+ *m_image_ctx = nullptr;
+ }
+
+ finish(r);
+}
+
+template <typename I>
+void OpenImageRequest<I>::finish(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ m_on_finish->complete(r);
+ delete this;
+}
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_replayer::OpenImageRequest<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_replayer/OpenImageRequest.h b/src/tools/rbd_mirror/image_replayer/OpenImageRequest.h
new file mode 100644
index 000000000..01ab31171
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/OpenImageRequest.h
@@ -0,0 +1,71 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_OPEN_IMAGE_REQUEST_H
+#define RBD_MIRROR_IMAGE_REPLAYER_OPEN_IMAGE_REQUEST_H
+
+#include "include/int_types.h"
+#include "librbd/ImageCtx.h"
+#include <string>
+
+class Context;
+namespace librbd { class ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class OpenImageRequest {
+public:
+ static OpenImageRequest* create(librados::IoCtx &io_ctx,
+ ImageCtxT **image_ctx,
+ const std::string &image_id,
+ bool read_only, Context *on_finish) {
+ return new OpenImageRequest(io_ctx, image_ctx, image_id, read_only,
+ on_finish);
+ }
+
+ OpenImageRequest(librados::IoCtx &io_ctx, ImageCtxT **image_ctx,
+ const std::string &image_id, bool read_only,
+ Context *on_finish);
+
+ void send();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * OPEN_IMAGE
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+ librados::IoCtx &m_io_ctx;
+ ImageCtxT **m_image_ctx;
+ std::string m_image_id;
+ bool m_read_only;
+ Context *m_on_finish;
+
+ void send_open_image();
+ void handle_open_image(int r);
+
+ void send_close_image(int r);
+ void handle_close_image(int r);
+
+ void finish(int r);
+
+};
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_replayer::OpenImageRequest<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_OPEN_IMAGE_REQUEST_H
diff --git a/src/tools/rbd_mirror/image_replayer/OpenLocalImageRequest.cc b/src/tools/rbd_mirror/image_replayer/OpenLocalImageRequest.cc
new file mode 100644
index 000000000..7f8d9608e
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/OpenLocalImageRequest.cc
@@ -0,0 +1,292 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "include/compat.h"
+#include "CloseImageRequest.h"
+#include "OpenLocalImageRequest.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "librbd/ExclusiveLock.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/Journal.h"
+#include "librbd/Utils.h"
+#include "librbd/asio/ContextWQ.h"
+#include "librbd/exclusive_lock/Policy.h"
+#include "librbd/journal/Policy.h"
+#include "librbd/mirror/GetInfoRequest.h"
+#include <type_traits>
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::OpenLocalImageRequest: " \
+ << this << " " << __func__ << " "
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+using librbd::util::create_context_callback;
+
+namespace {
+
+template <typename I>
+struct MirrorExclusiveLockPolicy : public librbd::exclusive_lock::Policy {
+ I *image_ctx;
+
+ MirrorExclusiveLockPolicy(I *image_ctx) : image_ctx(image_ctx) {
+ }
+
+ bool may_auto_request_lock() override {
+ return false;
+ }
+
+ int lock_requested(bool force) override {
+ int r = -EROFS;
+ {
+ std::shared_lock owner_locker{image_ctx->owner_lock};
+ std::shared_lock image_locker{image_ctx->image_lock};
+ if (image_ctx->journal == nullptr || image_ctx->journal->is_tag_owner()) {
+ r = 0;
+ }
+ }
+
+ if (r == 0) {
+ // if the local image journal has been closed or if it was (force)
+ // promoted allow the lock to be released to another client
+ image_ctx->exclusive_lock->release_lock(nullptr);
+ }
+ return r;
+ }
+
+ bool accept_blocked_request(
+ librbd::exclusive_lock::OperationRequestType request_type) override {
+ switch (request_type) {
+ case librbd::exclusive_lock::OPERATION_REQUEST_TYPE_TRASH_SNAP_REMOVE:
+ case librbd::exclusive_lock::OPERATION_REQUEST_TYPE_FORCE_PROMOTION:
+ return true;
+ default:
+ return false;
+ }
+ }
+};
+
+struct MirrorJournalPolicy : public librbd::journal::Policy {
+ librbd::asio::ContextWQ *work_queue;
+
+ MirrorJournalPolicy(librbd::asio::ContextWQ *work_queue)
+ : work_queue(work_queue) {
+ }
+
+ bool append_disabled() const override {
+ // avoid recording any events to the local journal
+ return true;
+ }
+ bool journal_disabled() const override {
+ return false;
+ }
+
+ void allocate_tag_on_lock(Context *on_finish) override {
+ // rbd-mirror will manually create tags by copying them from the peer
+ work_queue->queue(on_finish, 0);
+ }
+};
+
+} // anonymous namespace
+
+template <typename I>
+OpenLocalImageRequest<I>::OpenLocalImageRequest(
+ librados::IoCtx &local_io_ctx,
+ I **local_image_ctx,
+ const std::string &local_image_id,
+ librbd::asio::ContextWQ *work_queue,
+ Context *on_finish)
+ : m_local_io_ctx(local_io_ctx), m_local_image_ctx(local_image_ctx),
+ m_local_image_id(local_image_id), m_work_queue(work_queue),
+ m_on_finish(on_finish) {
+}
+
+template <typename I>
+void OpenLocalImageRequest<I>::send() {
+ send_open_image();
+}
+
+template <typename I>
+void OpenLocalImageRequest<I>::send_open_image() {
+ dout(20) << dendl;
+
+ *m_local_image_ctx = I::create("", m_local_image_id, nullptr,
+ m_local_io_ctx, false);
+
+ // ensure non-primary images can be modified
+ (*m_local_image_ctx)->read_only_mask =
+ ~librbd::IMAGE_READ_ONLY_FLAG_NON_PRIMARY;
+
+ {
+ std::scoped_lock locker{(*m_local_image_ctx)->owner_lock,
+ (*m_local_image_ctx)->image_lock};
+ (*m_local_image_ctx)->set_exclusive_lock_policy(
+ new MirrorExclusiveLockPolicy<I>(*m_local_image_ctx));
+ (*m_local_image_ctx)->set_journal_policy(
+ new MirrorJournalPolicy(m_work_queue));
+ }
+
+ Context *ctx = create_context_callback<
+ OpenLocalImageRequest<I>, &OpenLocalImageRequest<I>::handle_open_image>(
+ this);
+ (*m_local_image_ctx)->state->open(0, ctx);
+}
+
+template <typename I>
+void OpenLocalImageRequest<I>::handle_open_image(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ if (r < 0) {
+ if (r == -ENOENT) {
+ dout(10) << ": local image does not exist" << dendl;
+ } else {
+ derr << ": failed to open image '" << m_local_image_id << "': "
+ << cpp_strerror(r) << dendl;
+ }
+ *m_local_image_ctx = nullptr;
+ finish(r);
+ return;
+ }
+
+ send_get_mirror_info();
+}
+
+template <typename I>
+void OpenLocalImageRequest<I>::send_get_mirror_info() {
+ dout(20) << dendl;
+
+ Context *ctx = create_context_callback<
+ OpenLocalImageRequest<I>,
+ &OpenLocalImageRequest<I>::handle_get_mirror_info>(
+ this);
+ auto request = librbd::mirror::GetInfoRequest<I>::create(
+ **m_local_image_ctx, &m_mirror_image, &m_promotion_state,
+ &m_primary_mirror_uuid, ctx);
+ request->send();
+}
+
+template <typename I>
+void OpenLocalImageRequest<I>::handle_get_mirror_info(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ if (r == -ENOENT) {
+ dout(5) << ": local image is not mirrored" << dendl;
+ send_close_image(r);
+ return;
+ } else if (r < 0) {
+ derr << ": error querying local image primary status: " << cpp_strerror(r)
+ << dendl;
+ send_close_image(r);
+ return;
+ }
+
+ if (m_mirror_image.state == cls::rbd::MIRROR_IMAGE_STATE_DISABLING) {
+ dout(5) << ": local image mirroring is being disabled" << dendl;
+ send_close_image(-ENOENT);
+ return;
+ }
+
+ // if the local image owns the tag -- don't steal the lock since
+ // we aren't going to mirror peer data into this image anyway
+ if (m_promotion_state == librbd::mirror::PROMOTION_STATE_PRIMARY) {
+ dout(10) << ": local image is primary -- skipping image replay" << dendl;
+ send_close_image(-EREMOTEIO);
+ return;
+ }
+
+ send_lock_image();
+}
+
+template <typename I>
+void OpenLocalImageRequest<I>::send_lock_image() {
+ std::shared_lock owner_locker{(*m_local_image_ctx)->owner_lock};
+ if ((*m_local_image_ctx)->exclusive_lock == nullptr) {
+ owner_locker.unlock();
+ if (m_mirror_image.mode == cls::rbd::MIRROR_IMAGE_MODE_SNAPSHOT) {
+ finish(0);
+ } else {
+ derr << ": image does not support exclusive lock" << dendl;
+ send_close_image(-EINVAL);
+ }
+ return;
+ }
+
+ dout(20) << dendl;
+
+ // disallow any proxied maintenance operations before grabbing lock
+ (*m_local_image_ctx)->exclusive_lock->block_requests(-EROFS);
+
+ Context *ctx = create_context_callback<
+ OpenLocalImageRequest<I>, &OpenLocalImageRequest<I>::handle_lock_image>(
+ this);
+
+ (*m_local_image_ctx)->exclusive_lock->acquire_lock(ctx);
+}
+
+template <typename I>
+void OpenLocalImageRequest<I>::handle_lock_image(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ if (r < 0) {
+ derr << ": failed to lock image '" << m_local_image_id << "': "
+ << cpp_strerror(r) << dendl;
+ send_close_image(r);
+ return;
+ }
+
+ {
+ std::shared_lock owner_locker{(*m_local_image_ctx)->owner_lock};
+ if ((*m_local_image_ctx)->exclusive_lock == nullptr ||
+ !(*m_local_image_ctx)->exclusive_lock->is_lock_owner()) {
+ derr << ": image is not locked" << dendl;
+ send_close_image(-EBUSY);
+ return;
+ }
+ }
+
+ finish(0);
+}
+
+template <typename I>
+void OpenLocalImageRequest<I>::send_close_image(int r) {
+ dout(20) << dendl;
+
+ if (m_ret_val == 0 && r < 0) {
+ m_ret_val = r;
+ }
+
+ Context *ctx = create_context_callback<
+ OpenLocalImageRequest<I>, &OpenLocalImageRequest<I>::handle_close_image>(
+ this);
+ CloseImageRequest<I> *request = CloseImageRequest<I>::create(
+ m_local_image_ctx, ctx);
+ request->send();
+}
+
+template <typename I>
+void OpenLocalImageRequest<I>::handle_close_image(int r) {
+ dout(20) << dendl;
+
+ ceph_assert(r == 0);
+ finish(m_ret_val);
+}
+
+template <typename I>
+void OpenLocalImageRequest<I>::finish(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ m_on_finish->complete(r);
+ delete this;
+}
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_replayer::OpenLocalImageRequest<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_replayer/OpenLocalImageRequest.h b/src/tools/rbd_mirror/image_replayer/OpenLocalImageRequest.h
new file mode 100644
index 000000000..9a642bc39
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/OpenLocalImageRequest.h
@@ -0,0 +1,97 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_OPEN_LOCAL_IMAGE_REQUEST_H
+#define RBD_MIRROR_IMAGE_REPLAYER_OPEN_LOCAL_IMAGE_REQUEST_H
+
+#include "include/int_types.h"
+#include "cls/rbd/cls_rbd_types.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/mirror/Types.h"
+#include <string>
+
+class Context;
+namespace librbd {
+class ImageCtx;
+namespace asio { struct ContextWQ; }
+} // namespace librbd
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class OpenLocalImageRequest {
+public:
+ static OpenLocalImageRequest* create(librados::IoCtx &local_io_ctx,
+ ImageCtxT **local_image_ctx,
+ const std::string &local_image_id,
+ librbd::asio::ContextWQ *work_queue,
+ Context *on_finish) {
+ return new OpenLocalImageRequest(local_io_ctx, local_image_ctx,
+ local_image_id, work_queue, on_finish);
+ }
+
+ OpenLocalImageRequest(librados::IoCtx &local_io_ctx,
+ ImageCtxT **local_image_ctx,
+ const std::string &local_image_id,
+ librbd::asio::ContextWQ *work_queue,
+ Context *on_finish);
+
+ void send();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * OPEN_IMAGE * * * * * * * *
+ * | *
+ * v *
+ * GET_MIRROR_INFO * * * * *
+ * | *
+ * v (skip if primary) v
+ * LOCK_IMAGE * * * > CLOSE_IMAGE
+ * | |
+ * v |
+ * <finish> <---------------/
+ *
+ * @endverbatim
+ */
+ librados::IoCtx &m_local_io_ctx;
+ ImageCtxT **m_local_image_ctx;
+ std::string m_local_image_id;
+ librbd::asio::ContextWQ *m_work_queue;
+ Context *m_on_finish;
+
+ cls::rbd::MirrorImage m_mirror_image;
+ librbd::mirror::PromotionState m_promotion_state =
+ librbd::mirror::PROMOTION_STATE_NON_PRIMARY;
+ std::string m_primary_mirror_uuid;
+ int m_ret_val = 0;
+
+ void send_open_image();
+ void handle_open_image(int r);
+
+ void send_get_mirror_info();
+ void handle_get_mirror_info(int r);
+
+ void send_lock_image();
+ void handle_lock_image(int r);
+
+ void send_close_image(int r);
+ void handle_close_image(int r);
+
+ void finish(int r);
+
+};
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_replayer::OpenLocalImageRequest<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_OPEN_LOCAL_IMAGE_REQUEST_H
diff --git a/src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.cc b/src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.cc
new file mode 100644
index 000000000..b1fef7254
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.cc
@@ -0,0 +1,197 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.h"
+#include "include/rados/librados.hpp"
+#include "cls/rbd/cls_rbd_client.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/Journal.h"
+#include "librbd/Utils.h"
+#include "librbd/mirror/GetInfoRequest.h"
+#include "tools/rbd_mirror/ImageDeleter.h"
+#include "tools/rbd_mirror/Threads.h"
+#include "tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.h"
+#include "tools/rbd_mirror/image_replayer/journal/StateBuilder.h"
+#include "tools/rbd_mirror/image_replayer/snapshot/StateBuilder.h"
+#include <type_traits>
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::" \
+ << "PrepareLocalImageRequest: " << this << " " \
+ << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+using librbd::util::create_context_callback;
+using librbd::util::create_rados_callback;
+
+template <typename I>
+void PrepareLocalImageRequest<I>::send() {
+ dout(10) << dendl;
+ get_local_image_id();
+}
+
+template <typename I>
+void PrepareLocalImageRequest<I>::get_local_image_id() {
+ dout(10) << dendl;
+
+ Context *ctx = create_context_callback<
+ PrepareLocalImageRequest<I>,
+ &PrepareLocalImageRequest<I>::handle_get_local_image_id>(this);
+ auto req = GetMirrorImageIdRequest<I>::create(m_io_ctx, m_global_image_id,
+ &m_local_image_id, ctx);
+ req->send();
+}
+
+template <typename I>
+void PrepareLocalImageRequest<I>::handle_get_local_image_id(int r) {
+ dout(10) << "r=" << r << ", "
+ << "local_image_id=" << m_local_image_id << dendl;
+
+ if (r < 0) {
+ finish(r);
+ return;
+ }
+
+ get_local_image_name();
+}
+
+template <typename I>
+void PrepareLocalImageRequest<I>::get_local_image_name() {
+ dout(10) << dendl;
+
+ librados::ObjectReadOperation op;
+ librbd::cls_client::dir_get_name_start(&op, m_local_image_id);
+
+ m_out_bl.clear();
+ librados::AioCompletion *aio_comp = create_rados_callback<
+ PrepareLocalImageRequest<I>,
+ &PrepareLocalImageRequest<I>::handle_get_local_image_name>(this);
+ int r = m_io_ctx.aio_operate(RBD_DIRECTORY, aio_comp, &op, &m_out_bl);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void PrepareLocalImageRequest<I>::handle_get_local_image_name(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r == 0) {
+ auto it = m_out_bl.cbegin();
+ r = librbd::cls_client::dir_get_name_finish(&it, m_local_image_name);
+ }
+
+ if (r == -ENOENT) {
+ // proceed we should have a mirror image record if we got this far
+ dout(10) << "image does not exist for local image id " << m_local_image_id
+ << dendl;
+ *m_local_image_name = "";
+ } else if (r < 0) {
+ derr << "failed to retrieve image name: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ get_mirror_info();
+}
+
+template <typename I>
+void PrepareLocalImageRequest<I>::get_mirror_info() {
+ dout(10) << dendl;
+
+ auto ctx = create_context_callback<
+ PrepareLocalImageRequest<I>,
+ &PrepareLocalImageRequest<I>::handle_get_mirror_info>(this);
+ auto req = librbd::mirror::GetInfoRequest<I>::create(
+ m_io_ctx, m_work_queue, m_local_image_id, &m_mirror_image,
+ &m_promotion_state, &m_primary_mirror_uuid, ctx);
+ req->send();
+}
+
+template <typename I>
+void PrepareLocalImageRequest<I>::handle_get_mirror_info(int r) {
+ dout(10) << ": r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to retrieve local mirror image info: " << cpp_strerror(r)
+ << dendl;
+ finish(r);
+ return;
+ }
+
+ if (m_mirror_image.state == cls::rbd::MIRROR_IMAGE_STATE_CREATING) {
+ dout(5) << "local image is still in creating state, issuing a removal"
+ << dendl;
+ move_to_trash();
+ return;
+ } else if (m_mirror_image.state == cls::rbd::MIRROR_IMAGE_STATE_DISABLING) {
+ dout(5) << "local image mirroring is in disabling state" << dendl;
+ finish(-ERESTART);
+ return;
+ }
+
+ switch (m_mirror_image.mode) {
+ case cls::rbd::MIRROR_IMAGE_MODE_JOURNAL:
+ // journal-based local image exists
+ {
+ auto state_builder = journal::StateBuilder<I>::create(m_global_image_id);
+ state_builder->local_primary_mirror_uuid = m_primary_mirror_uuid;
+ *m_state_builder = state_builder;
+ }
+ break;
+ case cls::rbd::MIRROR_IMAGE_MODE_SNAPSHOT:
+ // snapshot-based local image exists
+ *m_state_builder = snapshot::StateBuilder<I>::create(m_global_image_id);
+ break;
+ default:
+ derr << "unsupported mirror image mode " << m_mirror_image.mode << " "
+ << "for image " << m_global_image_id << dendl;
+ finish(-EOPNOTSUPP);
+ break;
+ }
+
+ dout(10) << "local_image_id=" << m_local_image_id << ", "
+ << "local_promotion_state=" << m_promotion_state << ", "
+ << "local_primary_mirror_uuid=" << m_primary_mirror_uuid << dendl;
+ (*m_state_builder)->local_image_id = m_local_image_id;
+ (*m_state_builder)->local_promotion_state = m_promotion_state;
+ finish(0);
+}
+
+template <typename I>
+void PrepareLocalImageRequest<I>::move_to_trash() {
+ dout(10) << dendl;
+
+ Context *ctx = create_context_callback<
+ PrepareLocalImageRequest<I>,
+ &PrepareLocalImageRequest<I>::handle_move_to_trash>(this);
+ ImageDeleter<I>::trash_move(m_io_ctx, m_global_image_id,
+ false, m_work_queue, ctx);
+}
+
+template <typename I>
+void PrepareLocalImageRequest<I>::handle_move_to_trash(int r) {
+ dout(10) << ": r=" << r << dendl;
+
+ finish(-ENOENT);
+}
+
+template <typename I>
+void PrepareLocalImageRequest<I>::finish(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ m_on_finish->complete(r);
+ delete this;
+}
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_replayer::PrepareLocalImageRequest<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.h b/src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.h
new file mode 100644
index 000000000..6372169ff
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.h
@@ -0,0 +1,115 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_PREPARE_LOCAL_IMAGE_REQUEST_H
+#define RBD_MIRROR_IMAGE_REPLAYER_PREPARE_LOCAL_IMAGE_REQUEST_H
+
+#include "include/buffer.h"
+#include "include/rados/librados_fwd.hpp"
+#include "cls/rbd/cls_rbd_types.h"
+#include "librbd/mirror/Types.h"
+#include <string>
+
+struct Context;
+
+namespace librbd {
+struct ImageCtx;
+namespace asio { struct ContextWQ; }
+} // namespace librbd
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+template <typename> class StateBuilder;
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class PrepareLocalImageRequest {
+public:
+ static PrepareLocalImageRequest *create(
+ librados::IoCtx &io_ctx,
+ const std::string &global_image_id,
+ std::string *local_image_name,
+ StateBuilder<ImageCtxT>** state_builder,
+ librbd::asio::ContextWQ *work_queue,
+ Context *on_finish) {
+ return new PrepareLocalImageRequest(io_ctx, global_image_id,
+ local_image_name, state_builder,
+ work_queue, on_finish);
+ }
+
+ PrepareLocalImageRequest(
+ librados::IoCtx &io_ctx,
+ const std::string &global_image_id,
+ std::string *local_image_name,
+ StateBuilder<ImageCtxT>** state_builder,
+ librbd::asio::ContextWQ *work_queue,
+ Context *on_finish)
+ : m_io_ctx(io_ctx), m_global_image_id(global_image_id),
+ m_local_image_name(local_image_name), m_state_builder(state_builder),
+ m_work_queue(work_queue), m_on_finish(on_finish) {
+ }
+
+ void send();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * GET_LOCAL_IMAGE_ID
+ * |
+ * v
+ * GET_LOCAL_IMAGE_NAME
+ * |
+ * v
+ * GET_MIRROR_INFO
+ * |
+ * | (if the image mirror state is CREATING)
+ * v
+ * TRASH_MOVE
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+
+ librados::IoCtx &m_io_ctx;
+ std::string m_global_image_id;
+ std::string *m_local_image_name;
+ StateBuilder<ImageCtxT>** m_state_builder;
+ librbd::asio::ContextWQ *m_work_queue;
+ Context *m_on_finish;
+
+ bufferlist m_out_bl;
+ std::string m_local_image_id;
+ cls::rbd::MirrorImage m_mirror_image;
+ librbd::mirror::PromotionState m_promotion_state;
+ std::string m_primary_mirror_uuid;
+
+ void get_local_image_id();
+ void handle_get_local_image_id(int r);
+
+ void get_local_image_name();
+ void handle_get_local_image_name(int r);
+
+ void get_mirror_info();
+ void handle_get_mirror_info(int r);
+
+ void move_to_trash();
+ void handle_move_to_trash(int r);
+
+ void finish(int r);
+
+};
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_replayer::PrepareLocalImageRequest<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_PREPARE_LOCAL_IMAGE_REQUEST_H
diff --git a/src/tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.cc b/src/tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.cc
new file mode 100644
index 000000000..45a44a300
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.cc
@@ -0,0 +1,283 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.h"
+#include "include/rados/librados.hpp"
+#include "cls/rbd/cls_rbd_client.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "journal/Journaler.h"
+#include "journal/Settings.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/Journal.h"
+#include "librbd/Utils.h"
+#include "librbd/asio/ContextWQ.h"
+#include "librbd/mirror/GetInfoRequest.h"
+#include "tools/rbd_mirror/Threads.h"
+#include "tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.h"
+#include "tools/rbd_mirror/image_replayer/Utils.h"
+#include "tools/rbd_mirror/image_replayer/journal/StateBuilder.h"
+#include "tools/rbd_mirror/image_replayer/snapshot/StateBuilder.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::" \
+ << "PrepareRemoteImageRequest: " << this << " " \
+ << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+using librbd::util::create_async_context_callback;
+using librbd::util::create_context_callback;
+using librbd::util::create_rados_callback;
+
+template <typename I>
+void PrepareRemoteImageRequest<I>::send() {
+ if (*m_state_builder != nullptr) {
+ (*m_state_builder)->remote_mirror_uuid = m_remote_pool_meta.mirror_uuid;
+ auto state_builder = dynamic_cast<snapshot::StateBuilder<I>*>(*m_state_builder);
+ if (state_builder) {
+ state_builder->remote_mirror_peer_uuid = m_remote_pool_meta.mirror_peer_uuid;
+ }
+ }
+
+ get_remote_image_id();
+}
+
+template <typename I>
+void PrepareRemoteImageRequest<I>::get_remote_image_id() {
+ dout(10) << dendl;
+
+ Context *ctx = create_context_callback<
+ PrepareRemoteImageRequest<I>,
+ &PrepareRemoteImageRequest<I>::handle_get_remote_image_id>(this);
+ auto req = GetMirrorImageIdRequest<I>::create(m_remote_io_ctx,
+ m_global_image_id,
+ &m_remote_image_id, ctx);
+ req->send();
+}
+
+template <typename I>
+void PrepareRemoteImageRequest<I>::handle_get_remote_image_id(int r) {
+ dout(10) << "r=" << r << ", "
+ << "remote_image_id=" << m_remote_image_id << dendl;
+
+ if (r < 0) {
+ finish(r);
+ return;
+ }
+
+ get_mirror_info();
+}
+
+template <typename I>
+void PrepareRemoteImageRequest<I>::get_mirror_info() {
+ dout(10) << dendl;
+
+ auto ctx = create_context_callback<
+ PrepareRemoteImageRequest<I>,
+ &PrepareRemoteImageRequest<I>::handle_get_mirror_info>(this);
+ auto req = librbd::mirror::GetInfoRequest<I>::create(
+ m_remote_io_ctx, m_threads->work_queue, m_remote_image_id,
+ &m_mirror_image, &m_promotion_state, &m_primary_mirror_uuid,
+ ctx);
+ req->send();
+}
+
+template <typename I>
+void PrepareRemoteImageRequest<I>::handle_get_mirror_info(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r == -ENOENT) {
+ dout(10) << "image " << m_global_image_id << " not mirrored" << dendl;
+ finish(r);
+ return;
+ } else if (r < 0) {
+ derr << "failed to retrieve mirror image details for image "
+ << m_global_image_id << ": " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ auto state_builder = *m_state_builder;
+ if (state_builder != nullptr &&
+ state_builder->get_mirror_image_mode() != m_mirror_image.mode) {
+ derr << "local and remote mirror image using different mirroring modes "
+ << "for image " << m_global_image_id << ": split-brain" << dendl;
+ finish(-EEXIST);
+ return;
+ } else if (m_mirror_image.state == cls::rbd::MIRROR_IMAGE_STATE_DISABLING) {
+ dout(5) << "remote image mirroring is being disabled" << dendl;
+ finish(-ENOENT);
+ return;
+ }
+
+ switch (m_mirror_image.mode) {
+ case cls::rbd::MIRROR_IMAGE_MODE_JOURNAL:
+ get_client();
+ break;
+ case cls::rbd::MIRROR_IMAGE_MODE_SNAPSHOT:
+ finalize_snapshot_state_builder();
+ finish(0);
+ break;
+ default:
+ derr << "unsupported mirror image mode " << m_mirror_image.mode << " "
+ << "for image " << m_global_image_id << dendl;
+ finish(-EOPNOTSUPP);
+ break;
+ }
+}
+
+template <typename I>
+void PrepareRemoteImageRequest<I>::get_client() {
+ dout(10) << dendl;
+
+ auto cct = static_cast<CephContext *>(m_local_io_ctx.cct());
+ ::journal::Settings journal_settings;
+ journal_settings.commit_interval = cct->_conf.get_val<double>(
+ "rbd_mirror_journal_commit_age");
+
+ // TODO use Journal thread pool for journal ops until converted to ASIO
+ ContextWQ* context_wq;
+ librbd::Journal<>::get_work_queue(cct, &context_wq);
+
+ ceph_assert(m_remote_journaler == nullptr);
+ m_remote_journaler = new Journaler(context_wq, m_threads->timer,
+ &m_threads->timer_lock, m_remote_io_ctx,
+ m_remote_image_id, m_local_mirror_uuid,
+ journal_settings, m_cache_manager_handler);
+
+ Context *ctx = create_async_context_callback(
+ m_threads->work_queue, create_context_callback<
+ PrepareRemoteImageRequest<I>,
+ &PrepareRemoteImageRequest<I>::handle_get_client>(this));
+ m_remote_journaler->get_client(m_local_mirror_uuid, &m_client, ctx);
+}
+
+template <typename I>
+void PrepareRemoteImageRequest<I>::handle_get_client(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ MirrorPeerClientMeta client_meta;
+ if (r == -ENOENT) {
+ dout(10) << "client not registered" << dendl;
+ register_client();
+ } else if (r < 0) {
+ derr << "failed to retrieve client: " << cpp_strerror(r) << dendl;
+ finish(r);
+ } else if (!util::decode_client_meta(m_client, &client_meta)) {
+ // require operator intervention since the data is corrupt
+ finish(-EBADMSG);
+ } else {
+ // skip registration if it already exists
+ finalize_journal_state_builder(m_client.state, client_meta);
+ finish(0);
+ }
+}
+
+template <typename I>
+void PrepareRemoteImageRequest<I>::register_client() {
+ dout(10) << dendl;
+
+ auto state_builder = *m_state_builder;
+ librbd::journal::MirrorPeerClientMeta client_meta{
+ (state_builder == nullptr ? "" : state_builder->local_image_id)};
+ client_meta.state = librbd::journal::MIRROR_PEER_STATE_REPLAYING;
+
+ librbd::journal::ClientData client_data{client_meta};
+ bufferlist client_data_bl;
+ encode(client_data, client_data_bl);
+
+ Context *ctx = create_async_context_callback(
+ m_threads->work_queue, create_context_callback<
+ PrepareRemoteImageRequest<I>,
+ &PrepareRemoteImageRequest<I>::handle_register_client>(this));
+ m_remote_journaler->register_client(client_data_bl, ctx);
+}
+
+template <typename I>
+void PrepareRemoteImageRequest<I>::handle_register_client(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to register with remote journal: " << cpp_strerror(r)
+ << dendl;
+ finish(r);
+ return;
+ }
+
+ auto state_builder = *m_state_builder;
+ librbd::journal::MirrorPeerClientMeta client_meta{
+ (state_builder == nullptr ? "" : state_builder->local_image_id)};
+ client_meta.state = librbd::journal::MIRROR_PEER_STATE_REPLAYING;
+ finalize_journal_state_builder(cls::journal::CLIENT_STATE_CONNECTED,
+ client_meta);
+ finish(0);
+}
+
+template <typename I>
+void PrepareRemoteImageRequest<I>::finalize_journal_state_builder(
+ cls::journal::ClientState client_state,
+ const MirrorPeerClientMeta& client_meta) {
+ journal::StateBuilder<I>* state_builder = nullptr;
+ if (*m_state_builder != nullptr) {
+ // already verified that it's a matching builder in
+ // 'handle_get_mirror_info'
+ state_builder = dynamic_cast<journal::StateBuilder<I>*>(*m_state_builder);
+ ceph_assert(state_builder != nullptr);
+ } else {
+ state_builder = journal::StateBuilder<I>::create(m_global_image_id);
+ *m_state_builder = state_builder;
+ }
+
+ state_builder->remote_mirror_uuid = m_remote_pool_meta.mirror_uuid;
+ state_builder->remote_image_id = m_remote_image_id;
+ state_builder->remote_promotion_state = m_promotion_state;
+ state_builder->remote_journaler = m_remote_journaler;
+ state_builder->remote_client_state = client_state;
+ state_builder->remote_client_meta = client_meta;
+}
+
+template <typename I>
+void PrepareRemoteImageRequest<I>::finalize_snapshot_state_builder() {
+ snapshot::StateBuilder<I>* state_builder = nullptr;
+ if (*m_state_builder != nullptr) {
+ state_builder = dynamic_cast<snapshot::StateBuilder<I>*>(*m_state_builder);
+ ceph_assert(state_builder != nullptr);
+ } else {
+ state_builder = snapshot::StateBuilder<I>::create(m_global_image_id);
+ *m_state_builder = state_builder;
+ }
+
+ dout(10) << "remote_mirror_uuid=" << m_remote_pool_meta.mirror_uuid << ", "
+ << "remote_mirror_peer_uuid="
+ << m_remote_pool_meta.mirror_peer_uuid << ", "
+ << "remote_image_id=" << m_remote_image_id << ", "
+ << "remote_promotion_state=" << m_promotion_state << dendl;
+ state_builder->remote_mirror_uuid = m_remote_pool_meta.mirror_uuid;
+ state_builder->remote_mirror_peer_uuid = m_remote_pool_meta.mirror_peer_uuid;
+ state_builder->remote_image_id = m_remote_image_id;
+ state_builder->remote_promotion_state = m_promotion_state;
+}
+
+template <typename I>
+void PrepareRemoteImageRequest<I>::finish(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ delete m_remote_journaler;
+ m_remote_journaler = nullptr;
+ }
+
+ m_on_finish->complete(r);
+ delete this;
+}
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_replayer::PrepareRemoteImageRequest<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.h b/src/tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.h
new file mode 100644
index 000000000..483cfc001
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.h
@@ -0,0 +1,153 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_PREPARE_REMOTE_IMAGE_REQUEST_H
+#define RBD_MIRROR_IMAGE_REPLAYER_PREPARE_REMOTE_IMAGE_REQUEST_H
+
+#include "include/buffer_fwd.h"
+#include "include/rados/librados_fwd.hpp"
+#include "cls/journal/cls_journal_types.h"
+#include "cls/rbd/cls_rbd_types.h"
+#include "librbd/journal/Types.h"
+#include "librbd/journal/TypeTraits.h"
+#include "librbd/mirror/Types.h"
+#include "tools/rbd_mirror/Types.h"
+#include <string>
+
+namespace journal { class Journaler; }
+namespace journal { struct CacheManagerHandler; }
+namespace librbd { struct ImageCtx; }
+namespace librbd { namespace journal { struct MirrorPeerClientMeta; } }
+
+struct Context;
+
+namespace rbd {
+namespace mirror {
+
+template <typename> struct Threads;
+
+namespace image_replayer {
+
+template <typename> class StateBuilder;
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class PrepareRemoteImageRequest {
+public:
+ typedef librbd::journal::TypeTraits<ImageCtxT> TypeTraits;
+ typedef typename TypeTraits::Journaler Journaler;
+ typedef librbd::journal::MirrorPeerClientMeta MirrorPeerClientMeta;
+
+ static PrepareRemoteImageRequest *create(
+ Threads<ImageCtxT> *threads,
+ librados::IoCtx &local_io_ctx,
+ librados::IoCtx &remote_io_ctx,
+ const std::string &global_image_id,
+ const std::string &local_mirror_uuid,
+ const RemotePoolMeta& remote_pool_meta,
+ ::journal::CacheManagerHandler *cache_manager_handler,
+ StateBuilder<ImageCtxT>** state_builder,
+ Context *on_finish) {
+ return new PrepareRemoteImageRequest(threads, local_io_ctx, remote_io_ctx,
+ global_image_id, local_mirror_uuid,
+ remote_pool_meta,
+ cache_manager_handler, state_builder,
+ on_finish);
+ }
+
+ PrepareRemoteImageRequest(
+ Threads<ImageCtxT> *threads,
+ librados::IoCtx &local_io_ctx,
+ librados::IoCtx &remote_io_ctx,
+ const std::string &global_image_id,
+ const std::string &local_mirror_uuid,
+ const RemotePoolMeta& remote_pool_meta,
+ ::journal::CacheManagerHandler *cache_manager_handler,
+ StateBuilder<ImageCtxT>** state_builder,
+ Context *on_finish)
+ : m_threads(threads),
+ m_local_io_ctx(local_io_ctx),
+ m_remote_io_ctx(remote_io_ctx),
+ m_global_image_id(global_image_id),
+ m_local_mirror_uuid(local_mirror_uuid),
+ m_remote_pool_meta(remote_pool_meta),
+ m_cache_manager_handler(cache_manager_handler),
+ m_state_builder(state_builder),
+ m_on_finish(on_finish) {
+ }
+
+ void send();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * GET_REMOTE_IMAGE_ID
+ * |
+ * v
+ * GET_REMOTE_MIRROR_INFO
+ * |
+ * | (journal)
+ * \-----------> GET_CLIENT
+ * | |
+ * | v (skip if not needed)
+ * | REGISTER_CLIENT
+ * | |
+ * | |
+ * |/----------------/
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+
+ Threads<ImageCtxT> *m_threads;
+ librados::IoCtx &m_local_io_ctx;
+ librados::IoCtx &m_remote_io_ctx;
+ std::string m_global_image_id;
+ std::string m_local_mirror_uuid;
+ RemotePoolMeta m_remote_pool_meta;
+ ::journal::CacheManagerHandler *m_cache_manager_handler;
+ StateBuilder<ImageCtxT>** m_state_builder;
+ Context *m_on_finish;
+
+ bufferlist m_out_bl;
+ std::string m_remote_image_id;
+ cls::rbd::MirrorImage m_mirror_image;
+ librbd::mirror::PromotionState m_promotion_state =
+ librbd::mirror::PROMOTION_STATE_UNKNOWN;
+ std::string m_primary_mirror_uuid;
+
+ // journal-based mirroring
+ Journaler *m_remote_journaler = nullptr;
+ cls::journal::Client m_client;
+
+ void get_remote_image_id();
+ void handle_get_remote_image_id(int r);
+
+ void get_mirror_info();
+ void handle_get_mirror_info(int r);
+
+ void get_client();
+ void handle_get_client(int r);
+
+ void register_client();
+ void handle_register_client(int r);
+
+ void finalize_journal_state_builder(cls::journal::ClientState client_state,
+ const MirrorPeerClientMeta& client_meta);
+ void finalize_snapshot_state_builder();
+
+ void finish(int r);
+};
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_replayer::PrepareRemoteImageRequest<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_PREPARE_REMOTE_IMAGE_REQUEST_H
diff --git a/src/tools/rbd_mirror/image_replayer/Replayer.h b/src/tools/rbd_mirror/image_replayer/Replayer.h
new file mode 100644
index 000000000..f3bfa4da0
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/Replayer.h
@@ -0,0 +1,39 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_REPLAYER_H
+#define RBD_MIRROR_IMAGE_REPLAYER_REPLAYER_H
+
+#include <string>
+
+struct Context;
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+struct Replayer {
+ virtual ~Replayer() {}
+
+ virtual void destroy() = 0;
+
+ virtual void init(Context* on_finish) = 0;
+ virtual void shut_down(Context* on_finish) = 0;
+
+ virtual void flush(Context* on_finish) = 0;
+
+ virtual bool get_replay_status(std::string* description,
+ Context* on_finish) = 0;
+
+ virtual bool is_replaying() const = 0;
+ virtual bool is_resync_requested() const = 0;
+
+ virtual int get_error_code() const = 0;
+ virtual std::string get_error_description() const = 0;
+};
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_REPLAYER_H
diff --git a/src/tools/rbd_mirror/image_replayer/ReplayerListener.h b/src/tools/rbd_mirror/image_replayer/ReplayerListener.h
new file mode 100644
index 000000000..f17f401b1
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/ReplayerListener.h
@@ -0,0 +1,21 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_REPLAYER_LISTENER_H
+#define RBD_MIRROR_IMAGE_REPLAYER_REPLAYER_LISTENER_H
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+struct ReplayerListener {
+ virtual ~ReplayerListener() {}
+
+ virtual void handle_notification() = 0;
+};
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_REPLAYER_LISTENER_H
diff --git a/src/tools/rbd_mirror/image_replayer/StateBuilder.cc b/src/tools/rbd_mirror/image_replayer/StateBuilder.cc
new file mode 100644
index 000000000..55fb3509d
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/StateBuilder.cc
@@ -0,0 +1,138 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "StateBuilder.h"
+#include "include/ceph_assert.h"
+#include "include/Context.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "journal/Journaler.h"
+#include "librbd/ImageCtx.h"
+#include "tools/rbd_mirror/image_replayer/CloseImageRequest.h"
+#include "tools/rbd_mirror/image_sync/Types.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::" \
+ << "StateBuilder: " << this << " " \
+ << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+template <typename I>
+StateBuilder<I>::StateBuilder(const std::string& global_image_id)
+ : global_image_id(global_image_id) {
+ dout(10) << "global_image_id=" << global_image_id << dendl;
+}
+
+template <typename I>
+StateBuilder<I>::~StateBuilder() {
+ ceph_assert(local_image_ctx == nullptr);
+ ceph_assert(remote_image_ctx == nullptr);
+ ceph_assert(m_sync_point_handler == nullptr);
+}
+
+template <typename I>
+bool StateBuilder<I>::is_local_primary() const {
+ if (local_promotion_state == librbd::mirror::PROMOTION_STATE_PRIMARY) {
+ ceph_assert(!local_image_id.empty());
+ return true;
+ }
+ return false;
+}
+
+template <typename I>
+bool StateBuilder<I>::is_remote_primary() const {
+ if (remote_promotion_state == librbd::mirror::PROMOTION_STATE_PRIMARY) {
+ ceph_assert(!remote_image_id.empty());
+ return true;
+ }
+ return false;
+}
+
+template <typename I>
+bool StateBuilder<I>::is_linked() const {
+ if (local_promotion_state == librbd::mirror::PROMOTION_STATE_NON_PRIMARY) {
+ ceph_assert(!local_image_id.empty());
+ return is_linked_impl();
+ }
+ return false;
+}
+
+template <typename I>
+void StateBuilder<I>::close_local_image(Context* on_finish) {
+ if (local_image_ctx == nullptr) {
+ on_finish->complete(0);
+ return;
+ }
+
+ dout(10) << dendl;
+ auto ctx = new LambdaContext([this, on_finish](int r) {
+ handle_close_local_image(r, on_finish);
+ });
+ auto request = image_replayer::CloseImageRequest<I>::create(
+ &local_image_ctx, ctx);
+ request->send();
+}
+
+template <typename I>
+void StateBuilder<I>::handle_close_local_image(int r, Context* on_finish) {
+ dout(10) << "r=" << r << dendl;
+
+ ceph_assert(local_image_ctx == nullptr);
+ if (r < 0) {
+ derr << "failed to close local image for image " << global_image_id << ": "
+ << cpp_strerror(r) << dendl;
+ }
+
+ on_finish->complete(r);
+}
+
+template <typename I>
+void StateBuilder<I>::close_remote_image(Context* on_finish) {
+ if (remote_image_ctx == nullptr) {
+ on_finish->complete(0);
+ return;
+ }
+
+ dout(10) << dendl;
+ auto ctx = new LambdaContext([this, on_finish](int r) {
+ handle_close_remote_image(r, on_finish);
+ });
+ auto request = image_replayer::CloseImageRequest<I>::create(
+ &remote_image_ctx, ctx);
+ request->send();
+}
+
+template <typename I>
+void StateBuilder<I>::handle_close_remote_image(int r, Context* on_finish) {
+ dout(10) << "r=" << r << dendl;
+
+ ceph_assert(remote_image_ctx == nullptr);
+ if (r < 0) {
+ derr << "failed to close remote image for image " << global_image_id << ": "
+ << cpp_strerror(r) << dendl;
+ }
+
+ on_finish->complete(r);
+}
+
+template <typename I>
+void StateBuilder<I>::destroy_sync_point_handler() {
+ if (m_sync_point_handler == nullptr) {
+ return;
+ }
+
+ dout(15) << dendl;
+ m_sync_point_handler->destroy();
+ m_sync_point_handler = nullptr;
+}
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_replayer::StateBuilder<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_replayer/StateBuilder.h b/src/tools/rbd_mirror/image_replayer/StateBuilder.h
new file mode 100644
index 000000000..51cf8668c
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/StateBuilder.h
@@ -0,0 +1,114 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_REPLAYER_STATE_BUILDER_H
+#define CEPH_RBD_MIRROR_IMAGE_REPLAYER_STATE_BUILDER_H
+
+#include "include/rados/librados_fwd.hpp"
+#include "cls/rbd/cls_rbd_types.h"
+#include "librbd/mirror/Types.h"
+
+struct Context;
+namespace librbd { struct ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+
+struct BaseRequest;
+template <typename> class InstanceWatcher;
+struct PoolMetaCache;
+struct ProgressContext;
+template <typename> class Threads;
+
+namespace image_sync { struct SyncPointHandler; }
+
+namespace image_replayer {
+
+struct Replayer;
+struct ReplayerListener;
+
+template <typename ImageCtxT>
+class StateBuilder {
+public:
+ StateBuilder(const StateBuilder&) = delete;
+ StateBuilder& operator=(const StateBuilder&) = delete;
+
+ virtual ~StateBuilder();
+
+ virtual void destroy() {
+ delete this;
+ }
+
+ virtual void close(Context* on_finish) = 0;
+
+ virtual bool is_disconnected() const = 0;
+
+ bool is_local_primary() const;
+ bool is_remote_primary() const;
+ bool is_linked() const;
+
+ virtual cls::rbd::MirrorImageMode get_mirror_image_mode() const = 0;
+
+ virtual image_sync::SyncPointHandler* create_sync_point_handler() = 0;
+ void destroy_sync_point_handler();
+
+ virtual bool replay_requires_remote_image() const = 0;
+
+ void close_remote_image(Context* on_finish);
+
+ virtual BaseRequest* create_local_image_request(
+ Threads<ImageCtxT>* threads,
+ librados::IoCtx& local_io_ctx,
+ const std::string& global_image_id,
+ PoolMetaCache* pool_meta_cache,
+ ProgressContext* progress_ctx,
+ Context* on_finish) = 0;
+
+ virtual BaseRequest* create_prepare_replay_request(
+ const std::string& local_mirror_uuid,
+ ProgressContext* progress_ctx,
+ bool* resync_requested,
+ bool* syncing,
+ Context* on_finish) = 0;
+
+ virtual Replayer* create_replayer(
+ Threads<ImageCtxT>* threads,
+ InstanceWatcher<ImageCtxT>* instance_watcher,
+ const std::string& local_mirror_uuid,
+ PoolMetaCache* pool_meta_cache,
+ ReplayerListener* replayer_listener) = 0;
+
+ std::string global_image_id;
+
+ std::string local_image_id;
+ librbd::mirror::PromotionState local_promotion_state =
+ librbd::mirror::PROMOTION_STATE_UNKNOWN;
+ ImageCtxT* local_image_ctx = nullptr;
+
+ std::string remote_mirror_uuid;
+ std::string remote_image_id;
+ librbd::mirror::PromotionState remote_promotion_state =
+ librbd::mirror::PROMOTION_STATE_UNKNOWN;
+ ImageCtxT* remote_image_ctx = nullptr;
+
+protected:
+ image_sync::SyncPointHandler* m_sync_point_handler = nullptr;
+
+ StateBuilder(const std::string& global_image_id);
+
+ void close_local_image(Context* on_finish);
+
+private:
+ virtual bool is_linked_impl() const = 0;
+
+ void handle_close_local_image(int r, Context* on_finish);
+ void handle_close_remote_image(int r, Context* on_finish);
+};
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_replayer::StateBuilder<librbd::ImageCtx>;
+
+#endif // CEPH_RBD_MIRROR_IMAGE_REPLAYER_STATE_BUILDER_H
diff --git a/src/tools/rbd_mirror/image_replayer/TimeRollingMean.cc b/src/tools/rbd_mirror/image_replayer/TimeRollingMean.cc
new file mode 100644
index 000000000..5d9c9aca1
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/TimeRollingMean.cc
@@ -0,0 +1,34 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd_mirror/image_replayer/TimeRollingMean.h"
+#include "common/Clock.h"
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+void TimeRollingMean::operator()(uint32_t value) {
+ auto time = ceph_clock_now();
+ if (m_last_time.is_zero()) {
+ m_last_time = time;
+ } else if (m_last_time.sec() < time.sec()) {
+ auto sec = m_last_time.sec();
+ while (sec++ < time.sec()) {
+ m_rolling_mean(m_sum);
+ m_sum = 0;
+ }
+
+ m_last_time = time;
+ }
+
+ m_sum += value;
+}
+
+double TimeRollingMean::get_average() const {
+ return boost::accumulators::rolling_mean(m_rolling_mean);
+}
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
diff --git a/src/tools/rbd_mirror/image_replayer/TimeRollingMean.h b/src/tools/rbd_mirror/image_replayer/TimeRollingMean.h
new file mode 100644
index 000000000..139ef893f
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/TimeRollingMean.h
@@ -0,0 +1,40 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_TIME_ROLLING_MEAN_H
+#define RBD_MIRROR_IMAGE_REPLAYER_TIME_ROLLING_MEAN_H
+
+#include "include/utime.h"
+#include <boost/accumulators/accumulators.hpp>
+#include <boost/accumulators/statistics/stats.hpp>
+#include <boost/accumulators/statistics/rolling_mean.hpp>
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+class TimeRollingMean {
+public:
+
+ void operator()(uint32_t value);
+
+ double get_average() const;
+
+private:
+ typedef boost::accumulators::accumulator_set<
+ uint64_t, boost::accumulators::stats<
+ boost::accumulators::tag::rolling_mean>> RollingMean;
+
+ utime_t m_last_time;
+ uint64_t m_sum = 0;
+
+ RollingMean m_rolling_mean{
+ boost::accumulators::tag::rolling_window::window_size = 30};
+
+};
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_TIME_ROLLING_MEAN_H
diff --git a/src/tools/rbd_mirror/image_replayer/Types.h b/src/tools/rbd_mirror/image_replayer/Types.h
new file mode 100644
index 000000000..6ab988a76
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/Types.h
@@ -0,0 +1,21 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_REPLAYER_TYPES_H
+#define CEPH_RBD_MIRROR_IMAGE_REPLAYER_TYPES_H
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+enum HealthState {
+ HEALTH_STATE_OK,
+ HEALTH_STATE_WARNING,
+ HEALTH_STATE_ERROR
+};
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_IMAGE_REPLAYER_TYPES_H
diff --git a/src/tools/rbd_mirror/image_replayer/Utils.cc b/src/tools/rbd_mirror/image_replayer/Utils.cc
new file mode 100644
index 000000000..55162a4e4
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/Utils.cc
@@ -0,0 +1,61 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd_mirror/image_replayer/Utils.h"
+#include "include/rados/librados.hpp"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "cls/journal/cls_journal_types.h"
+#include "librbd/journal/Types.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::util::" \
+ << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+namespace util {
+
+std::string compute_image_spec(librados::IoCtx& io_ctx,
+ const std::string& image_name) {
+ std::string name = io_ctx.get_namespace();
+ if (!name.empty()) {
+ name += "/";
+ }
+
+ return io_ctx.get_pool_name() + "/" + name + image_name;
+}
+
+bool decode_client_meta(const cls::journal::Client& client,
+ librbd::journal::MirrorPeerClientMeta* client_meta) {
+ dout(15) << dendl;
+
+ librbd::journal::ClientData client_data;
+ auto it = client.data.cbegin();
+ try {
+ decode(client_data, it);
+ } catch (const buffer::error &err) {
+ derr << "failed to decode client meta data: " << err.what() << dendl;
+ return false;
+ }
+
+ auto local_client_meta = boost::get<librbd::journal::MirrorPeerClientMeta>(
+ &client_data.client_meta);
+ if (local_client_meta == nullptr) {
+ derr << "unknown peer registration" << dendl;
+ return false;
+ }
+
+ *client_meta = *local_client_meta;
+ dout(15) << "client found: client_meta=" << *client_meta << dendl;
+ return true;
+}
+
+} // namespace util
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
diff --git a/src/tools/rbd_mirror/image_replayer/Utils.h b/src/tools/rbd_mirror/image_replayer/Utils.h
new file mode 100644
index 000000000..6c5352cd1
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/Utils.h
@@ -0,0 +1,29 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_UTILS_H
+#define RBD_MIRROR_IMAGE_REPLAYER_UTILS_H
+
+#include "include/rados/librados_fwd.hpp"
+#include <string>
+
+namespace cls { namespace journal { struct Client; } }
+namespace librbd { namespace journal { struct MirrorPeerClientMeta; } }
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+namespace util {
+
+std::string compute_image_spec(librados::IoCtx& io_ctx,
+ const std::string& image_name);
+
+bool decode_client_meta(const cls::journal::Client& client,
+ librbd::journal::MirrorPeerClientMeta* client_meta);
+
+} // namespace util
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_UTILS_H
diff --git a/src/tools/rbd_mirror/image_replayer/journal/CreateLocalImageRequest.cc b/src/tools/rbd_mirror/image_replayer/journal/CreateLocalImageRequest.cc
new file mode 100644
index 000000000..087cf4f5f
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/journal/CreateLocalImageRequest.cc
@@ -0,0 +1,162 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "CreateLocalImageRequest.h"
+#include "include/rados/librados.hpp"
+#include "common/debug.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "journal/Journaler.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/Utils.h"
+#include "librbd/journal/Types.h"
+#include "tools/rbd_mirror/PoolMetaCache.h"
+#include "tools/rbd_mirror/ProgressContext.h"
+#include "tools/rbd_mirror/Threads.h"
+#include "tools/rbd_mirror/image_replayer/CreateImageRequest.h"
+#include "tools/rbd_mirror/image_replayer/Utils.h"
+#include "tools/rbd_mirror/image_replayer/journal/StateBuilder.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::journal::" \
+ << "CreateLocalImageRequest: " << this << " " \
+ << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+namespace journal {
+
+using librbd::util::create_async_context_callback;
+using librbd::util::create_context_callback;
+
+template <typename I>
+void CreateLocalImageRequest<I>::send() {
+ unregister_client();
+}
+
+template <typename I>
+void CreateLocalImageRequest<I>::unregister_client() {
+ dout(10) << dendl;
+ update_progress("UNREGISTER_CLIENT");
+
+ auto ctx = create_context_callback<
+ CreateLocalImageRequest<I>,
+ &CreateLocalImageRequest<I>::handle_unregister_client>(this);
+ m_state_builder->remote_journaler->unregister_client(ctx);
+}
+
+template <typename I>
+void CreateLocalImageRequest<I>::handle_unregister_client(int r) {
+ dout(10) << "r=" << r << dendl;
+ if (r < 0 && r != -ENOENT) {
+ derr << "failed to unregister with remote journal: " << cpp_strerror(r)
+ << dendl;
+ finish(r);
+ return;
+ }
+
+ m_state_builder->local_image_id = "";
+ m_state_builder->remote_client_meta = {};
+ register_client();
+}
+
+template <typename I>
+void CreateLocalImageRequest<I>::register_client() {
+ ceph_assert(m_state_builder->local_image_id.empty());
+ m_state_builder->local_image_id =
+ librbd::util::generate_image_id<I>(m_local_io_ctx);
+ dout(10) << "local_image_id=" << m_state_builder->local_image_id << dendl;
+ update_progress("REGISTER_CLIENT");
+
+ librbd::journal::MirrorPeerClientMeta client_meta{
+ m_state_builder->local_image_id};
+ client_meta.state = librbd::journal::MIRROR_PEER_STATE_SYNCING;
+
+ librbd::journal::ClientData client_data{client_meta};
+ bufferlist client_data_bl;
+ encode(client_data, client_data_bl);
+
+ auto ctx = create_context_callback<
+ CreateLocalImageRequest<I>,
+ &CreateLocalImageRequest<I>::handle_register_client>(this);
+ m_state_builder->remote_journaler->register_client(client_data_bl, ctx);
+}
+
+template <typename I>
+void CreateLocalImageRequest<I>::handle_register_client(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to register with remote journal: " << cpp_strerror(r)
+ << dendl;
+ finish(r);
+ return;
+ }
+
+ m_state_builder->remote_client_state = cls::journal::CLIENT_STATE_CONNECTED;
+ m_state_builder->remote_client_meta = {m_state_builder->local_image_id};
+ m_state_builder->remote_client_meta.state =
+ librbd::journal::MIRROR_PEER_STATE_SYNCING;
+
+ create_local_image();
+}
+
+template <typename I>
+void CreateLocalImageRequest<I>::create_local_image() {
+ dout(10) << "local_image_id=" << m_state_builder->local_image_id << dendl;
+ update_progress("CREATE_LOCAL_IMAGE");
+
+ m_remote_image_ctx->image_lock.lock_shared();
+ std::string image_name = m_remote_image_ctx->name;
+ m_remote_image_ctx->image_lock.unlock_shared();
+
+ auto ctx = create_context_callback<
+ CreateLocalImageRequest<I>,
+ &CreateLocalImageRequest<I>::handle_create_local_image>(this);
+ auto request = CreateImageRequest<I>::create(
+ m_threads, m_local_io_ctx, m_global_image_id,
+ m_state_builder->remote_mirror_uuid, image_name,
+ m_state_builder->local_image_id, m_remote_image_ctx,
+ m_pool_meta_cache, cls::rbd::MIRROR_IMAGE_MODE_JOURNAL, ctx);
+ request->send();
+}
+template <typename I>
+void CreateLocalImageRequest<I>::handle_create_local_image(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r == -EBADF) {
+ dout(5) << "image id " << m_state_builder->local_image_id << " "
+ << "already in-use" << dendl;
+ unregister_client();
+ return;
+ } else if (r < 0) {
+ if (r == -ENOENT) {
+ dout(10) << "parent image does not exist" << dendl;
+ } else {
+ derr << "failed to create local image: " << cpp_strerror(r) << dendl;
+ }
+ finish(r);
+ return;
+ }
+
+ finish(0);
+}
+
+template <typename I>
+void CreateLocalImageRequest<I>::update_progress(
+ const std::string& description) {
+ dout(15) << description << dendl;
+ if (m_progress_ctx != nullptr) {
+ m_progress_ctx->update_progress(description);
+ }
+}
+
+} // namespace journal
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_replayer::journal::CreateLocalImageRequest<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_replayer/journal/CreateLocalImageRequest.h b/src/tools/rbd_mirror/image_replayer/journal/CreateLocalImageRequest.h
new file mode 100644
index 000000000..fc776ecc3
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/journal/CreateLocalImageRequest.h
@@ -0,0 +1,116 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_CREATE_LOCAL_IMAGE_REQUEST_H
+#define RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_CREATE_LOCAL_IMAGE_REQUEST_H
+
+#include "include/rados/librados_fwd.hpp"
+#include "tools/rbd_mirror/BaseRequest.h"
+#include <string>
+
+struct Context;
+namespace journal { class Journaler; }
+namespace librbd { class ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+
+class PoolMetaCache;
+class ProgressContext;
+template <typename> struct Threads;
+
+namespace image_replayer {
+namespace journal {
+
+template <typename> class StateBuilder;
+
+template <typename ImageCtxT>
+class CreateLocalImageRequest : public BaseRequest {
+public:
+ typedef rbd::mirror::ProgressContext ProgressContext;
+
+ static CreateLocalImageRequest* create(
+ Threads<ImageCtxT>* threads,
+ librados::IoCtx& local_io_ctx,
+ ImageCtxT* remote_image_ctx,
+ const std::string& global_image_id,
+ PoolMetaCache* pool_meta_cache,
+ ProgressContext* progress_ctx,
+ StateBuilder<ImageCtxT>* state_builder,
+ Context* on_finish) {
+ return new CreateLocalImageRequest(threads, local_io_ctx, remote_image_ctx,
+ global_image_id, pool_meta_cache,
+ progress_ctx, state_builder, on_finish);
+ }
+
+ CreateLocalImageRequest(
+ Threads<ImageCtxT>* threads,
+ librados::IoCtx& local_io_ctx,
+ ImageCtxT* remote_image_ctx,
+ const std::string& global_image_id,
+ PoolMetaCache* pool_meta_cache,
+ ProgressContext* progress_ctx,
+ StateBuilder<ImageCtxT>* state_builder,
+ Context* on_finish)
+ : BaseRequest(on_finish),
+ m_threads(threads),
+ m_local_io_ctx(local_io_ctx),
+ m_remote_image_ctx(remote_image_ctx),
+ m_global_image_id(global_image_id),
+ m_pool_meta_cache(pool_meta_cache),
+ m_progress_ctx(progress_ctx),
+ m_state_builder(state_builder) {
+ }
+
+ void send();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * UNREGISTER_CLIENT < * * * * * * * *
+ * | *
+ * v *
+ * REGISTER_CLIENT *
+ * | *
+ * v (id exists) *
+ * CREATE_LOCAL_IMAGE * * * * * * * * *
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+
+ Threads<ImageCtxT>* m_threads;
+ librados::IoCtx& m_local_io_ctx;
+ ImageCtxT* m_remote_image_ctx;
+ std::string m_global_image_id;
+ PoolMetaCache* m_pool_meta_cache;
+ ProgressContext* m_progress_ctx;
+ StateBuilder<ImageCtxT>* m_state_builder;
+
+ void unregister_client();
+ void handle_unregister_client(int r);
+
+ void register_client();
+ void handle_register_client(int r);
+
+ void create_local_image();
+ void handle_create_local_image(int r);
+
+ void update_progress(const std::string& description);
+
+};
+
+} // namespace journal
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_replayer::journal::CreateLocalImageRequest<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_CREATE_LOCAL_IMAGE_REQUEST_H
diff --git a/src/tools/rbd_mirror/image_replayer/journal/EventPreprocessor.cc b/src/tools/rbd_mirror/image_replayer/journal/EventPreprocessor.cc
new file mode 100644
index 000000000..f5d49048e
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/journal/EventPreprocessor.cc
@@ -0,0 +1,206 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "EventPreprocessor.h"
+#include "common/debug.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "journal/Journaler.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/Utils.h"
+#include "librbd/asio/ContextWQ.h"
+#include "librbd/journal/Types.h"
+#include <boost/variant.hpp>
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::journal::" \
+ << "EventPreprocessor: " << this << " " << __func__ \
+ << ": "
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+namespace journal {
+
+using librbd::util::create_context_callback;
+
+template <typename I>
+EventPreprocessor<I>::EventPreprocessor(I &local_image_ctx,
+ Journaler &remote_journaler,
+ const std::string &local_mirror_uuid,
+ MirrorPeerClientMeta *client_meta,
+ librbd::asio::ContextWQ *work_queue)
+ : m_local_image_ctx(local_image_ctx), m_remote_journaler(remote_journaler),
+ m_local_mirror_uuid(local_mirror_uuid), m_client_meta(client_meta),
+ m_work_queue(work_queue) {
+}
+
+template <typename I>
+EventPreprocessor<I>::~EventPreprocessor() {
+ ceph_assert(!m_in_progress);
+}
+
+template <typename I>
+bool EventPreprocessor<I>::is_required(const EventEntry &event_entry) {
+ SnapSeqs snap_seqs(m_client_meta->snap_seqs);
+ return (prune_snap_map(&snap_seqs) ||
+ event_entry.get_event_type() ==
+ librbd::journal::EVENT_TYPE_SNAP_RENAME);
+}
+
+template <typename I>
+void EventPreprocessor<I>::preprocess(EventEntry *event_entry,
+ Context *on_finish) {
+ ceph_assert(!m_in_progress);
+ m_in_progress = true;
+ m_event_entry = event_entry;
+ m_on_finish = on_finish;
+
+ refresh_image();
+}
+
+template <typename I>
+void EventPreprocessor<I>::refresh_image() {
+ dout(20) << dendl;
+
+ Context *ctx = create_context_callback<
+ EventPreprocessor<I>, &EventPreprocessor<I>::handle_refresh_image>(this);
+ m_local_image_ctx.state->refresh(ctx);
+}
+
+template <typename I>
+void EventPreprocessor<I>::handle_refresh_image(int r) {
+ dout(20) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "error encountered during image refresh: " << cpp_strerror(r)
+ << dendl;
+ finish(r);
+ return;
+ }
+
+ preprocess_event();
+}
+
+template <typename I>
+void EventPreprocessor<I>::preprocess_event() {
+ dout(20) << dendl;
+
+ m_snap_seqs = m_client_meta->snap_seqs;
+ m_snap_seqs_updated = prune_snap_map(&m_snap_seqs);
+
+ int r = boost::apply_visitor(PreprocessEventVisitor(this),
+ m_event_entry->event);
+ if (r < 0) {
+ finish(r);
+ return;
+ }
+
+ update_client();
+}
+
+template <typename I>
+int EventPreprocessor<I>::preprocess_snap_rename(
+ librbd::journal::SnapRenameEvent &event) {
+ dout(20) << "remote_snap_id=" << event.snap_id << ", "
+ << "src_snap_name=" << event.src_snap_name << ", "
+ << "dest_snap_name=" << event.dst_snap_name << dendl;
+
+ auto snap_seq_it = m_snap_seqs.find(event.snap_id);
+ if (snap_seq_it != m_snap_seqs.end()) {
+ dout(20) << "remapping remote snap id " << snap_seq_it->first << " "
+ << "to local snap id " << snap_seq_it->second << dendl;
+ event.snap_id = snap_seq_it->second;
+ return 0;
+ }
+
+ auto snap_id_it = m_local_image_ctx.snap_ids.find({cls::rbd::UserSnapshotNamespace(),
+ event.src_snap_name});
+ if (snap_id_it == m_local_image_ctx.snap_ids.end()) {
+ dout(20) << "cannot map remote snapshot '" << event.src_snap_name << "' "
+ << "to local snapshot" << dendl;
+ event.snap_id = CEPH_NOSNAP;
+ return -ENOENT;
+ }
+
+ dout(20) << "mapping remote snap id " << event.snap_id << " "
+ << "to local snap id " << snap_id_it->second << dendl;
+ m_snap_seqs_updated = true;
+ m_snap_seqs[event.snap_id] = snap_id_it->second;
+ event.snap_id = snap_id_it->second;
+ return 0;
+}
+
+template <typename I>
+void EventPreprocessor<I>::update_client() {
+ if (!m_snap_seqs_updated) {
+ finish(0);
+ return;
+ }
+
+ dout(20) << dendl;
+ librbd::journal::MirrorPeerClientMeta client_meta(*m_client_meta);
+ client_meta.snap_seqs = m_snap_seqs;
+
+ librbd::journal::ClientData client_data(client_meta);
+ bufferlist data_bl;
+ encode(client_data, data_bl);
+
+ Context *ctx = create_context_callback<
+ EventPreprocessor<I>, &EventPreprocessor<I>::handle_update_client>(
+ this);
+ m_remote_journaler.update_client(data_bl, ctx);
+}
+
+template <typename I>
+void EventPreprocessor<I>::handle_update_client(int r) {
+ dout(20) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to update mirror peer journal client: "
+ << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ m_client_meta->snap_seqs = m_snap_seqs;
+ finish(0);
+}
+
+template <typename I>
+bool EventPreprocessor<I>::prune_snap_map(SnapSeqs *snap_seqs) {
+ bool pruned = false;
+
+ std::shared_lock image_locker{m_local_image_ctx.image_lock};
+ for (auto it = snap_seqs->begin(); it != snap_seqs->end(); ) {
+ auto current_it(it++);
+ if (m_local_image_ctx.snap_info.count(current_it->second) == 0) {
+ snap_seqs->erase(current_it);
+ pruned = true;
+ }
+ }
+ return pruned;
+}
+
+template <typename I>
+void EventPreprocessor<I>::finish(int r) {
+ dout(20) << "r=" << r << dendl;
+
+ Context *on_finish = m_on_finish;
+ m_on_finish = nullptr;
+ m_event_entry = nullptr;
+ m_in_progress = false;
+ m_snap_seqs_updated = false;
+ m_work_queue->queue(on_finish, r);
+}
+
+} // namespace journal
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_replayer::journal::EventPreprocessor<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_replayer/journal/EventPreprocessor.h b/src/tools/rbd_mirror/image_replayer/journal/EventPreprocessor.h
new file mode 100644
index 000000000..12f70eb93
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/journal/EventPreprocessor.h
@@ -0,0 +1,127 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_EVENT_PREPROCESSOR_H
+#define RBD_MIRROR_IMAGE_REPLAYER_EVENT_PREPROCESSOR_H
+
+#include "include/int_types.h"
+#include "librbd/journal/Types.h"
+#include "librbd/journal/TypeTraits.h"
+#include <map>
+#include <string>
+#include <boost/variant/static_visitor.hpp>
+
+struct Context;
+namespace journal { class Journaler; }
+namespace librbd {
+class ImageCtx;
+namespace asio { struct ContextWQ; }
+} // namespace librbd
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+namespace journal {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class EventPreprocessor {
+public:
+ using Journaler = typename librbd::journal::TypeTraits<ImageCtxT>::Journaler;
+ using EventEntry = librbd::journal::EventEntry;
+ using MirrorPeerClientMeta = librbd::journal::MirrorPeerClientMeta;
+
+ static EventPreprocessor *create(ImageCtxT &local_image_ctx,
+ Journaler &remote_journaler,
+ const std::string &local_mirror_uuid,
+ MirrorPeerClientMeta *client_meta,
+ librbd::asio::ContextWQ *work_queue) {
+ return new EventPreprocessor(local_image_ctx, remote_journaler,
+ local_mirror_uuid, client_meta, work_queue);
+ }
+
+ static void destroy(EventPreprocessor* processor) {
+ delete processor;
+ }
+
+ EventPreprocessor(ImageCtxT &local_image_ctx, Journaler &remote_journaler,
+ const std::string &local_mirror_uuid,
+ MirrorPeerClientMeta *client_meta,
+ librbd::asio::ContextWQ *work_queue);
+ ~EventPreprocessor();
+
+ bool is_required(const EventEntry &event_entry);
+ void preprocess(EventEntry *event_entry, Context *on_finish);
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v (skip if not required)
+ * REFRESH_IMAGE
+ * |
+ * v (skip if not required)
+ * PREPROCESS_EVENT
+ * |
+ * v (skip if not required)
+ * UPDATE_CLIENT
+ *
+ * @endverbatim
+ */
+
+ typedef std::map<uint64_t, uint64_t> SnapSeqs;
+
+ class PreprocessEventVisitor : public boost::static_visitor<int> {
+ public:
+ EventPreprocessor *event_preprocessor;
+
+ PreprocessEventVisitor(EventPreprocessor *event_preprocessor)
+ : event_preprocessor(event_preprocessor) {
+ }
+
+ template <typename T>
+ inline int operator()(T&) const {
+ return 0;
+ }
+ inline int operator()(librbd::journal::SnapRenameEvent &event) const {
+ return event_preprocessor->preprocess_snap_rename(event);
+ }
+ };
+
+ ImageCtxT &m_local_image_ctx;
+ Journaler &m_remote_journaler;
+ std::string m_local_mirror_uuid;
+ MirrorPeerClientMeta *m_client_meta;
+ librbd::asio::ContextWQ *m_work_queue;
+
+ bool m_in_progress = false;
+ EventEntry *m_event_entry = nullptr;
+ Context *m_on_finish = nullptr;
+
+ SnapSeqs m_snap_seqs;
+ bool m_snap_seqs_updated = false;
+
+ bool prune_snap_map(SnapSeqs *snap_seqs);
+
+ void refresh_image();
+ void handle_refresh_image(int r);
+
+ void preprocess_event();
+ int preprocess_snap_rename(librbd::journal::SnapRenameEvent &event);
+
+ void update_client();
+ void handle_update_client(int r);
+
+ void finish(int r);
+
+};
+
+} // namespace journal
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_replayer::journal::EventPreprocessor<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_EVENT_PREPROCESSOR_H
diff --git a/src/tools/rbd_mirror/image_replayer/journal/PrepareReplayRequest.cc b/src/tools/rbd_mirror/image_replayer/journal/PrepareReplayRequest.cc
new file mode 100644
index 000000000..c8a96a4ad
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/journal/PrepareReplayRequest.cc
@@ -0,0 +1,316 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "PrepareReplayRequest.h"
+#include "common/debug.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "journal/Journaler.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/Journal.h"
+#include "librbd/Utils.h"
+#include "tools/rbd_mirror/ProgressContext.h"
+#include "tools/rbd_mirror/image_replayer/journal/StateBuilder.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::journal::" \
+ << "PrepareReplayRequest: " << this << " " \
+ << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+namespace journal {
+
+using librbd::util::create_context_callback;
+
+template <typename I>
+void PrepareReplayRequest<I>::send() {
+ *m_resync_requested = false;
+ *m_syncing = false;
+
+ if (m_state_builder->local_image_id !=
+ m_state_builder->remote_client_meta.image_id) {
+ // somehow our local image has a different image id than the image id
+ // registered in the remote image
+ derr << "split-brain detected: local_image_id="
+ << m_state_builder->local_image_id << ", "
+ << "registered local_image_id="
+ << m_state_builder->remote_client_meta.image_id << dendl;
+ finish(-EEXIST);
+ return;
+ }
+
+ std::shared_lock image_locker(m_state_builder->local_image_ctx->image_lock);
+ if (m_state_builder->local_image_ctx->journal == nullptr) {
+ image_locker.unlock();
+
+ derr << "local image does not support journaling" << dendl;
+ finish(-EINVAL);
+ return;
+ }
+
+ int r = m_state_builder->local_image_ctx->journal->is_resync_requested(
+ m_resync_requested);
+ if (r < 0) {
+ image_locker.unlock();
+
+ derr << "failed to check if a resync was requested" << dendl;
+ finish(r);
+ return;
+ }
+
+ m_local_tag_tid = m_state_builder->local_image_ctx->journal->get_tag_tid();
+ m_local_tag_data = m_state_builder->local_image_ctx->journal->get_tag_data();
+ dout(10) << "local tag=" << m_local_tag_tid << ", "
+ << "local tag data=" << m_local_tag_data << dendl;
+ image_locker.unlock();
+
+ if (*m_resync_requested) {
+ finish(0);
+ return;
+ } else if (m_state_builder->remote_client_meta.state ==
+ librbd::journal::MIRROR_PEER_STATE_SYNCING &&
+ m_local_tag_data.mirror_uuid ==
+ m_state_builder->remote_mirror_uuid) {
+ // if the initial sync hasn't completed, we cannot replay
+ *m_syncing = true;
+ finish(0);
+ return;
+ }
+
+ update_client_state();
+}
+
+template <typename I>
+void PrepareReplayRequest<I>::update_client_state() {
+ if (m_state_builder->remote_client_meta.state !=
+ librbd::journal::MIRROR_PEER_STATE_SYNCING ||
+ m_local_tag_data.mirror_uuid == m_state_builder->remote_mirror_uuid) {
+ get_remote_tag_class();
+ return;
+ }
+
+ // our local image is not primary, is flagged as syncing on the remote side,
+ // but is no longer tied to the remote -- this implies we were forced
+ // promoted and then demoted at some point
+ dout(15) << dendl;
+ update_progress("UPDATE_CLIENT_STATE");
+
+ auto client_meta = m_state_builder->remote_client_meta;
+ client_meta.state = librbd::journal::MIRROR_PEER_STATE_REPLAYING;
+
+ librbd::journal::ClientData client_data(client_meta);
+ bufferlist data_bl;
+ encode(client_data, data_bl);
+
+ auto ctx = create_context_callback<
+ PrepareReplayRequest<I>,
+ &PrepareReplayRequest<I>::handle_update_client_state>(this);
+ m_state_builder->remote_journaler->update_client(data_bl, ctx);
+}
+
+template <typename I>
+void PrepareReplayRequest<I>::handle_update_client_state(int r) {
+ dout(15) << "r=" << r << dendl;
+ if (r < 0) {
+ derr << "failed to update client: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ m_state_builder->remote_client_meta.state =
+ librbd::journal::MIRROR_PEER_STATE_REPLAYING;
+ get_remote_tag_class();
+}
+
+template <typename I>
+void PrepareReplayRequest<I>::get_remote_tag_class() {
+ dout(10) << dendl;
+ update_progress("GET_REMOTE_TAG_CLASS");
+
+ auto ctx = create_context_callback<
+ PrepareReplayRequest<I>,
+ &PrepareReplayRequest<I>::handle_get_remote_tag_class>(this);
+ m_state_builder->remote_journaler->get_client(
+ librbd::Journal<>::IMAGE_CLIENT_ID, &m_client, ctx);
+}
+
+template <typename I>
+void PrepareReplayRequest<I>::handle_get_remote_tag_class(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to retrieve remote client: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ librbd::journal::ClientData client_data;
+ auto it = m_client.data.cbegin();
+ try {
+ decode(client_data, it);
+ } catch (const buffer::error &err) {
+ derr << "failed to decode remote client meta data: " << err.what()
+ << dendl;
+ finish(-EBADMSG);
+ return;
+ }
+
+ librbd::journal::ImageClientMeta *client_meta =
+ boost::get<librbd::journal::ImageClientMeta>(&client_data.client_meta);
+ if (client_meta == nullptr) {
+ derr << "unknown remote client registration" << dendl;
+ finish(-EINVAL);
+ return;
+ }
+
+ m_remote_tag_class = client_meta->tag_class;
+ dout(10) << "remote tag class=" << m_remote_tag_class << dendl;
+
+ get_remote_tags();
+}
+
+template <typename I>
+void PrepareReplayRequest<I>::get_remote_tags() {
+ dout(10) << dendl;
+ update_progress("GET_REMOTE_TAGS");
+
+ auto ctx = create_context_callback<
+ PrepareReplayRequest<I>,
+ &PrepareReplayRequest<I>::handle_get_remote_tags>(this);
+ m_state_builder->remote_journaler->get_tags(m_remote_tag_class,
+ &m_remote_tags, ctx);
+}
+
+template <typename I>
+void PrepareReplayRequest<I>::handle_get_remote_tags(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to retrieve remote tags: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ // At this point, the local image was existing, non-primary, and replaying;
+ // and the remote image is primary. Attempt to link the local image's most
+ // recent tag to the remote image's tag chain.
+ bool remote_tag_data_valid = false;
+ librbd::journal::TagData remote_tag_data;
+ boost::optional<uint64_t> remote_orphan_tag_tid =
+ boost::make_optional<uint64_t>(false, 0U);
+ bool reconnect_orphan = false;
+
+ // decode the remote tags
+ for (auto &remote_tag : m_remote_tags) {
+ if (m_local_tag_data.predecessor.commit_valid &&
+ m_local_tag_data.predecessor.mirror_uuid ==
+ m_state_builder->remote_mirror_uuid &&
+ m_local_tag_data.predecessor.tag_tid > remote_tag.tid) {
+ dout(10) << "skipping processed predecessor remote tag "
+ << remote_tag.tid << dendl;
+ continue;
+ }
+
+ try {
+ auto it = remote_tag.data.cbegin();
+ decode(remote_tag_data, it);
+ remote_tag_data_valid = true;
+ } catch (const buffer::error &err) {
+ derr << "failed to decode remote tag " << remote_tag.tid << ": "
+ << err.what() << dendl;
+ finish(-EBADMSG);
+ return;
+ }
+
+ dout(10) << "decoded remote tag " << remote_tag.tid << ": "
+ << remote_tag_data << dendl;
+
+ if (!m_local_tag_data.predecessor.commit_valid) {
+ // newly synced local image (no predecessor) replays from the first tag
+ if (remote_tag_data.mirror_uuid != librbd::Journal<>::LOCAL_MIRROR_UUID) {
+ dout(10) << "skipping non-primary remote tag" << dendl;
+ continue;
+ }
+
+ dout(10) << "using initial primary remote tag" << dendl;
+ break;
+ }
+
+ if (m_local_tag_data.mirror_uuid == librbd::Journal<>::ORPHAN_MIRROR_UUID) {
+ // demotion last available local epoch
+
+ if (remote_tag_data.mirror_uuid == m_local_tag_data.mirror_uuid &&
+ remote_tag_data.predecessor.commit_valid &&
+ remote_tag_data.predecessor.tag_tid ==
+ m_local_tag_data.predecessor.tag_tid) {
+ // demotion matches remote epoch
+
+ if (remote_tag_data.predecessor.mirror_uuid == m_local_mirror_uuid &&
+ m_local_tag_data.predecessor.mirror_uuid ==
+ librbd::Journal<>::LOCAL_MIRROR_UUID) {
+ // local demoted and remote has matching event
+ dout(10) << "found matching local demotion tag" << dendl;
+ remote_orphan_tag_tid = remote_tag.tid;
+ continue;
+ }
+
+ if (m_local_tag_data.predecessor.mirror_uuid ==
+ m_state_builder->remote_mirror_uuid &&
+ remote_tag_data.predecessor.mirror_uuid ==
+ librbd::Journal<>::LOCAL_MIRROR_UUID) {
+ // remote demoted and local has matching event
+ dout(10) << "found matching remote demotion tag" << dendl;
+ remote_orphan_tag_tid = remote_tag.tid;
+ continue;
+ }
+ }
+
+ if (remote_tag_data.mirror_uuid == librbd::Journal<>::LOCAL_MIRROR_UUID &&
+ remote_tag_data.predecessor.mirror_uuid ==
+ librbd::Journal<>::ORPHAN_MIRROR_UUID &&
+ remote_tag_data.predecessor.commit_valid && remote_orphan_tag_tid &&
+ remote_tag_data.predecessor.tag_tid == *remote_orphan_tag_tid) {
+ // remote promotion tag chained to remote/local demotion tag
+ dout(10) << "found chained remote promotion tag" << dendl;
+ reconnect_orphan = true;
+ break;
+ }
+
+ // promotion must follow demotion
+ remote_orphan_tag_tid = boost::none;
+ }
+ }
+
+ if (remote_tag_data_valid &&
+ m_local_tag_data.mirror_uuid == m_state_builder->remote_mirror_uuid) {
+ dout(10) << "local image is in clean replay state" << dendl;
+ } else if (reconnect_orphan) {
+ dout(10) << "remote image was demoted/promoted" << dendl;
+ } else {
+ derr << "split-brain detected -- skipping image replay" << dendl;
+ finish(-EEXIST);
+ return;
+ }
+
+ finish(0);
+}
+
+template <typename I>
+void PrepareReplayRequest<I>::update_progress(const std::string &description) {
+ dout(10) << description << dendl;
+
+ if (m_progress_ctx != nullptr) {
+ m_progress_ctx->update_progress(description);
+ }
+}
+
+} // namespace journal
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_replayer::journal::PrepareReplayRequest<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_replayer/journal/PrepareReplayRequest.h b/src/tools/rbd_mirror/image_replayer/journal/PrepareReplayRequest.h
new file mode 100644
index 000000000..2b6fb659b
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/journal/PrepareReplayRequest.h
@@ -0,0 +1,115 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_PREPARE_REPLAY_REQUEST_H
+#define RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_PREPARE_REPLAY_REQUEST_H
+
+#include "include/int_types.h"
+#include "cls/journal/cls_journal_types.h"
+#include "librbd/journal/Types.h"
+#include "librbd/mirror/Types.h"
+#include "tools/rbd_mirror/BaseRequest.h"
+#include <list>
+#include <string>
+
+struct Context;
+namespace librbd { struct ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+
+class ProgressContext;
+
+namespace image_replayer {
+namespace journal {
+
+template <typename> class StateBuilder;
+
+template <typename ImageCtxT>
+class PrepareReplayRequest : public BaseRequest {
+public:
+ static PrepareReplayRequest* create(
+ const std::string& local_mirror_uuid,
+ ProgressContext* progress_ctx,
+ StateBuilder<ImageCtxT>* state_builder,
+ bool* resync_requested,
+ bool* syncing,
+ Context* on_finish) {
+ return new PrepareReplayRequest(
+ local_mirror_uuid, progress_ctx, state_builder, resync_requested,
+ syncing, on_finish);
+ }
+
+ PrepareReplayRequest(
+ const std::string& local_mirror_uuid,
+ ProgressContext* progress_ctx,
+ StateBuilder<ImageCtxT>* state_builder,
+ bool* resync_requested,
+ bool* syncing,
+ Context* on_finish)
+ : BaseRequest(on_finish),
+ m_local_mirror_uuid(local_mirror_uuid),
+ m_progress_ctx(progress_ctx),
+ m_state_builder(state_builder),
+ m_resync_requested(resync_requested),
+ m_syncing(syncing) {
+ }
+
+ void send() override;
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * UPDATE_CLIENT_STATE
+ * |
+ * v
+ * GET_REMOTE_TAG_CLASS
+ * |
+ * v
+ * GET_REMOTE_TAGS
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+ typedef std::list<cls::journal::Tag> Tags;
+
+ std::string m_local_mirror_uuid;
+ ProgressContext* m_progress_ctx;
+ StateBuilder<ImageCtxT>* m_state_builder;
+ bool* m_resync_requested;
+ bool* m_syncing;
+
+ uint64_t m_local_tag_tid = 0;
+ librbd::journal::TagData m_local_tag_data;
+
+ uint64_t m_remote_tag_class = 0;
+ Tags m_remote_tags;
+ cls::journal::Client m_client;
+
+ void update_client_state();
+ void handle_update_client_state(int r);
+
+ void get_remote_tag_class();
+ void handle_get_remote_tag_class(int r);
+
+ void get_remote_tags();
+ void handle_get_remote_tags(int r);
+
+ void update_progress(const std::string& description);
+
+};
+
+} // namespace journal
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_replayer::journal::PrepareReplayRequest<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_PREPARE_REPLAY_REQUEST_H
diff --git a/src/tools/rbd_mirror/image_replayer/journal/ReplayStatusFormatter.cc b/src/tools/rbd_mirror/image_replayer/journal/ReplayStatusFormatter.cc
new file mode 100644
index 000000000..eb99d5add
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/journal/ReplayStatusFormatter.cc
@@ -0,0 +1,284 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "ReplayStatusFormatter.h"
+#include "common/debug.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "journal/Journaler.h"
+#include "json_spirit/json_spirit.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/Journal.h"
+#include "librbd/Utils.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::journal::" \
+ << "ReplayStatusFormatter: " << this << " " \
+ << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+namespace journal {
+
+using librbd::util::unique_lock_name;
+
+namespace {
+
+double round_to_two_places(double value) {
+ return abs(round(value * 100) / 100);
+}
+
+json_spirit::mObject to_json_object(
+ const cls::journal::ObjectPosition& position) {
+ json_spirit::mObject object;
+ if (position != cls::journal::ObjectPosition{}) {
+ object["object_number"] = position.object_number;
+ object["tag_tid"] = position.tag_tid;
+ object["entry_tid"] = position.entry_tid;
+ }
+ return object;
+}
+
+} // anonymous namespace
+
+template <typename I>
+ReplayStatusFormatter<I>::ReplayStatusFormatter(Journaler *journaler,
+ const std::string &mirror_uuid)
+ : m_journaler(journaler),
+ m_mirror_uuid(mirror_uuid),
+ m_lock(ceph::make_mutex(unique_lock_name("ReplayStatusFormatter::m_lock", this))) {
+}
+
+template <typename I>
+void ReplayStatusFormatter<I>::handle_entry_processed(uint32_t bytes) {
+ dout(20) << dendl;
+
+ m_bytes_per_second(bytes);
+ m_entries_per_second(1);
+}
+
+template <typename I>
+bool ReplayStatusFormatter<I>::get_or_send_update(std::string *description,
+ Context *on_finish) {
+ dout(20) << dendl;
+
+ bool in_progress = false;
+ {
+ std::lock_guard locker{m_lock};
+ if (m_on_finish) {
+ in_progress = true;
+ } else {
+ m_on_finish = on_finish;
+ }
+ }
+
+ if (in_progress) {
+ dout(10) << "previous request is still in progress, ignoring" << dendl;
+ on_finish->complete(-EAGAIN);
+ return false;
+ }
+
+ m_master_position = cls::journal::ObjectPosition();
+ m_mirror_position = cls::journal::ObjectPosition();
+
+ cls::journal::Client master_client, mirror_client;
+ int r;
+
+ r = m_journaler->get_cached_client(librbd::Journal<>::IMAGE_CLIENT_ID,
+ &master_client);
+ if (r < 0) {
+ derr << "error retrieving registered master client: "
+ << cpp_strerror(r) << dendl;
+ } else {
+ r = m_journaler->get_cached_client(m_mirror_uuid, &mirror_client);
+ if (r < 0) {
+ derr << "error retrieving registered mirror client: "
+ << cpp_strerror(r) << dendl;
+ }
+ }
+
+ if (!master_client.commit_position.object_positions.empty()) {
+ m_master_position =
+ *(master_client.commit_position.object_positions.begin());
+ }
+
+ if (!mirror_client.commit_position.object_positions.empty()) {
+ m_mirror_position =
+ *(mirror_client.commit_position.object_positions.begin());
+ }
+
+ if (!calculate_behind_master_or_send_update()) {
+ dout(20) << "need to update tag cache" << dendl;
+ return false;
+ }
+
+ format(description);
+
+ {
+ std::lock_guard locker{m_lock};
+ ceph_assert(m_on_finish == on_finish);
+ m_on_finish = nullptr;
+ }
+
+ on_finish->complete(-EEXIST);
+ return true;
+}
+
+template <typename I>
+bool ReplayStatusFormatter<I>::calculate_behind_master_or_send_update() {
+ dout(20) << "m_master_position=" << m_master_position
+ << ", m_mirror_position=" << m_mirror_position << dendl;
+
+ m_entries_behind_master = 0;
+
+ if (m_master_position == cls::journal::ObjectPosition() ||
+ m_master_position.tag_tid < m_mirror_position.tag_tid) {
+ return true;
+ }
+
+ cls::journal::ObjectPosition master = m_master_position;
+ uint64_t mirror_tag_tid = m_mirror_position.tag_tid;
+
+ while (master.tag_tid > mirror_tag_tid) {
+ auto tag_it = m_tag_cache.find(master.tag_tid);
+ if (tag_it == m_tag_cache.end()) {
+ send_update_tag_cache(master.tag_tid, mirror_tag_tid);
+ return false;
+ }
+ librbd::journal::TagData &tag_data = tag_it->second;
+ m_entries_behind_master += master.entry_tid;
+ master = {0, tag_data.predecessor.tag_tid, tag_data.predecessor.entry_tid};
+ }
+ if (master.tag_tid == mirror_tag_tid &&
+ master.entry_tid > m_mirror_position.entry_tid) {
+ m_entries_behind_master += master.entry_tid - m_mirror_position.entry_tid;
+ }
+
+ dout(20) << "clearing tags not needed any more (below mirror position)"
+ << dendl;
+
+ uint64_t tag_tid = mirror_tag_tid;
+ size_t old_size = m_tag_cache.size();
+ while (tag_tid != 0) {
+ auto tag_it = m_tag_cache.find(tag_tid);
+ if (tag_it == m_tag_cache.end()) {
+ break;
+ }
+ librbd::journal::TagData &tag_data = tag_it->second;
+
+ dout(20) << "erasing tag " << tag_data << "for tag_tid " << tag_tid
+ << dendl;
+
+ tag_tid = tag_data.predecessor.tag_tid;
+ m_tag_cache.erase(tag_it);
+ }
+
+ dout(20) << old_size - m_tag_cache.size() << " entries cleared" << dendl;
+
+ return true;
+}
+
+template <typename I>
+void ReplayStatusFormatter<I>::send_update_tag_cache(uint64_t master_tag_tid,
+ uint64_t mirror_tag_tid) {
+ if (master_tag_tid <= mirror_tag_tid ||
+ m_tag_cache.find(master_tag_tid) != m_tag_cache.end()) {
+ Context *on_finish = nullptr;
+ {
+ std::lock_guard locker{m_lock};
+ std::swap(m_on_finish, on_finish);
+ }
+
+ ceph_assert(on_finish);
+ on_finish->complete(0);
+ return;
+ }
+
+ dout(20) << "master_tag_tid=" << master_tag_tid << ", mirror_tag_tid="
+ << mirror_tag_tid << dendl;
+
+ auto ctx = new LambdaContext(
+ [this, master_tag_tid, mirror_tag_tid](int r) {
+ handle_update_tag_cache(master_tag_tid, mirror_tag_tid, r);
+ });
+ m_journaler->get_tag(master_tag_tid, &m_tag, ctx);
+}
+
+template <typename I>
+void ReplayStatusFormatter<I>::handle_update_tag_cache(uint64_t master_tag_tid,
+ uint64_t mirror_tag_tid,
+ int r) {
+ librbd::journal::TagData tag_data;
+
+ if (r < 0) {
+ derr << "error retrieving tag " << master_tag_tid << ": " << cpp_strerror(r)
+ << dendl;
+ } else {
+ dout(20) << "retrieved tag " << master_tag_tid << ": " << m_tag << dendl;
+
+ auto it = m_tag.data.cbegin();
+ try {
+ decode(tag_data, it);
+ } catch (const buffer::error &err) {
+ derr << "error decoding tag " << master_tag_tid << ": " << err.what()
+ << dendl;
+ }
+ }
+
+ if (tag_data.predecessor.mirror_uuid !=
+ librbd::Journal<>::LOCAL_MIRROR_UUID &&
+ tag_data.predecessor.mirror_uuid !=
+ librbd::Journal<>::ORPHAN_MIRROR_UUID) {
+ dout(20) << "hit remote image non-primary epoch" << dendl;
+ tag_data.predecessor = {};
+ }
+
+ dout(20) << "decoded tag " << master_tag_tid << ": " << tag_data << dendl;
+
+ m_tag_cache[master_tag_tid] = tag_data;
+ send_update_tag_cache(tag_data.predecessor.tag_tid, mirror_tag_tid);
+}
+
+template <typename I>
+void ReplayStatusFormatter<I>::format(std::string *description) {
+ dout(20) << "m_master_position=" << m_master_position
+ << ", m_mirror_position=" << m_mirror_position
+ << ", m_entries_behind_master=" << m_entries_behind_master << dendl;
+
+ json_spirit::mObject root_obj;
+ root_obj["primary_position"] = to_json_object(m_master_position);
+ root_obj["non_primary_position"] = to_json_object(m_mirror_position);
+ root_obj["entries_behind_primary"] = (
+ m_entries_behind_master > 0 ? m_entries_behind_master : 0);
+
+ m_bytes_per_second(0);
+ root_obj["bytes_per_second"] = round_to_two_places(
+ m_bytes_per_second.get_average());
+
+ m_entries_per_second(0);
+ auto entries_per_second = m_entries_per_second.get_average();
+ root_obj["entries_per_second"] = round_to_two_places(entries_per_second);
+
+ if (m_entries_behind_master > 0 && entries_per_second > 0) {
+ std::uint64_t seconds_until_synced = round_to_two_places(
+ m_entries_behind_master / entries_per_second);
+ if (seconds_until_synced >= std::numeric_limits<uint64_t>::max()) {
+ seconds_until_synced = std::numeric_limits<uint64_t>::max();
+ }
+
+ root_obj["seconds_until_synced"] = seconds_until_synced;
+ }
+
+ *description = json_spirit::write(
+ root_obj, json_spirit::remove_trailing_zeros);
+}
+
+} // namespace journal
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_replayer::journal::ReplayStatusFormatter<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_replayer/journal/ReplayStatusFormatter.h b/src/tools/rbd_mirror/image_replayer/journal/ReplayStatusFormatter.h
new file mode 100644
index 000000000..5dbbfe10d
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/journal/ReplayStatusFormatter.h
@@ -0,0 +1,70 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_REPLAY_STATUS_FORMATTER_H
+#define RBD_MIRROR_IMAGE_REPLAYER_REPLAY_STATUS_FORMATTER_H
+
+#include "include/Context.h"
+#include "common/ceph_mutex.h"
+#include "cls/journal/cls_journal_types.h"
+#include "librbd/journal/Types.h"
+#include "librbd/journal/TypeTraits.h"
+#include "tools/rbd_mirror/image_replayer/TimeRollingMean.h"
+
+namespace journal { class Journaler; }
+namespace librbd { class ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+namespace journal {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class ReplayStatusFormatter {
+public:
+ typedef typename librbd::journal::TypeTraits<ImageCtxT>::Journaler Journaler;
+
+ static ReplayStatusFormatter* create(Journaler *journaler,
+ const std::string &mirror_uuid) {
+ return new ReplayStatusFormatter(journaler, mirror_uuid);
+ }
+
+ static void destroy(ReplayStatusFormatter* formatter) {
+ delete formatter;
+ }
+
+ ReplayStatusFormatter(Journaler *journaler, const std::string &mirror_uuid);
+
+ void handle_entry_processed(uint32_t bytes);
+
+ bool get_or_send_update(std::string *description, Context *on_finish);
+
+private:
+ Journaler *m_journaler;
+ std::string m_mirror_uuid;
+ ceph::mutex m_lock;
+ Context *m_on_finish = nullptr;
+ cls::journal::ObjectPosition m_master_position;
+ cls::journal::ObjectPosition m_mirror_position;
+ int64_t m_entries_behind_master = 0;
+ cls::journal::Tag m_tag;
+ std::map<uint64_t, librbd::journal::TagData> m_tag_cache;
+
+ TimeRollingMean m_bytes_per_second;
+ TimeRollingMean m_entries_per_second;
+
+ bool calculate_behind_master_or_send_update();
+ void send_update_tag_cache(uint64_t master_tag_tid, uint64_t mirror_tag_tid);
+ void handle_update_tag_cache(uint64_t master_tag_tid, uint64_t mirror_tag_tid,
+ int r);
+ void format(std::string *description);
+};
+
+} // namespace journal
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_replayer::journal::ReplayStatusFormatter<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_REPLAY_STATUS_FORMATTER_H
diff --git a/src/tools/rbd_mirror/image_replayer/journal/Replayer.cc b/src/tools/rbd_mirror/image_replayer/journal/Replayer.cc
new file mode 100644
index 000000000..20560038c
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/journal/Replayer.cc
@@ -0,0 +1,1317 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "Replayer.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "common/perf_counters.h"
+#include "common/perf_counters_key.h"
+#include "common/Timer.h"
+#include "librbd/Journal.h"
+#include "librbd/Utils.h"
+#include "librbd/asio/ContextWQ.h"
+#include "librbd/journal/Replay.h"
+#include "journal/Journaler.h"
+#include "journal/JournalMetadataListener.h"
+#include "journal/ReplayHandler.h"
+#include "tools/rbd_mirror/Threads.h"
+#include "tools/rbd_mirror/Types.h"
+#include "tools/rbd_mirror/image_replayer/CloseImageRequest.h"
+#include "tools/rbd_mirror/image_replayer/ReplayerListener.h"
+#include "tools/rbd_mirror/image_replayer/Utils.h"
+#include "tools/rbd_mirror/image_replayer/journal/EventPreprocessor.h"
+#include "tools/rbd_mirror/image_replayer/journal/ReplayStatusFormatter.h"
+#include "tools/rbd_mirror/image_replayer/journal/StateBuilder.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::journal::" \
+ << "Replayer: " << this << " " << __func__ << ": "
+
+extern PerfCounters *g_journal_perf_counters;
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+namespace journal {
+
+namespace {
+
+uint32_t calculate_replay_delay(const utime_t &event_time,
+ int mirroring_replay_delay) {
+ if (mirroring_replay_delay <= 0) {
+ return 0;
+ }
+
+ utime_t now = ceph_clock_now();
+ if (event_time + mirroring_replay_delay <= now) {
+ return 0;
+ }
+
+ // ensure it is rounded up when converting to integer
+ return (event_time + mirroring_replay_delay - now) + 1;
+}
+
+} // anonymous namespace
+
+using librbd::util::create_async_context_callback;
+using librbd::util::create_context_callback;
+
+template <typename I>
+struct Replayer<I>::C_ReplayCommitted : public Context {
+ Replayer* replayer;
+ ReplayEntry replay_entry;
+ uint64_t replay_bytes;
+ utime_t replay_start_time;
+
+ C_ReplayCommitted(Replayer* replayer, ReplayEntry &&replay_entry,
+ uint64_t replay_bytes, const utime_t &replay_start_time)
+ : replayer(replayer), replay_entry(std::move(replay_entry)),
+ replay_bytes(replay_bytes), replay_start_time(replay_start_time) {
+ }
+
+ void finish(int r) override {
+ replayer->handle_process_entry_safe(replay_entry, replay_bytes,
+ replay_start_time, r);
+ }
+};
+
+template <typename I>
+struct Replayer<I>::RemoteJournalerListener
+ : public ::journal::JournalMetadataListener {
+ Replayer* replayer;
+
+ RemoteJournalerListener(Replayer* replayer) : replayer(replayer) {}
+
+ void handle_update(::journal::JournalMetadata*) override {
+ auto ctx = new C_TrackedOp(
+ replayer->m_in_flight_op_tracker,
+ new LambdaContext([this](int r) {
+ replayer->handle_remote_journal_metadata_updated();
+ }));
+ replayer->m_threads->work_queue->queue(ctx, 0);
+ }
+};
+
+template <typename I>
+struct Replayer<I>::RemoteReplayHandler : public ::journal::ReplayHandler {
+ Replayer* replayer;
+
+ RemoteReplayHandler(Replayer* replayer) : replayer(replayer) {}
+ ~RemoteReplayHandler() override {};
+
+ void handle_entries_available() override {
+ replayer->handle_replay_ready();
+ }
+
+ void handle_complete(int r) override {
+ std::string error;
+ if (r == -ENOMEM) {
+ error = "not enough memory in autotune cache";
+ } else if (r < 0) {
+ error = "replay completed with error: " + cpp_strerror(r);
+ }
+ replayer->handle_replay_complete(r, error);
+ }
+};
+
+template <typename I>
+struct Replayer<I>::LocalJournalListener
+ : public librbd::journal::Listener {
+ Replayer* replayer;
+
+ LocalJournalListener(Replayer* replayer) : replayer(replayer) {
+ }
+
+ void handle_close() override {
+ replayer->handle_replay_complete(0, "");
+ }
+
+ void handle_promoted() override {
+ replayer->handle_replay_complete(0, "force promoted");
+ }
+
+ void handle_resync() override {
+ replayer->handle_resync_image();
+ }
+};
+
+template <typename I>
+Replayer<I>::Replayer(
+ Threads<I>* threads,
+ const std::string& local_mirror_uuid,
+ StateBuilder<I>* state_builder,
+ ReplayerListener* replayer_listener)
+ : m_threads(threads),
+ m_local_mirror_uuid(local_mirror_uuid),
+ m_state_builder(state_builder),
+ m_replayer_listener(replayer_listener),
+ m_lock(ceph::make_mutex(librbd::util::unique_lock_name(
+ "rbd::mirror::image_replayer::journal::Replayer", this))) {
+ dout(10) << dendl;
+}
+
+template <typename I>
+Replayer<I>::~Replayer() {
+ dout(10) << dendl;
+
+ {
+ std::unique_lock locker{m_lock};
+ unregister_perf_counters();
+ }
+
+ ceph_assert(m_remote_listener == nullptr);
+ ceph_assert(m_local_journal_listener == nullptr);
+ ceph_assert(m_local_journal_replay == nullptr);
+ ceph_assert(m_remote_replay_handler == nullptr);
+ ceph_assert(m_event_preprocessor == nullptr);
+ ceph_assert(m_replay_status_formatter == nullptr);
+ ceph_assert(m_delayed_preprocess_task == nullptr);
+ ceph_assert(m_flush_local_replay_task == nullptr);
+ ceph_assert(m_state_builder->local_image_ctx == nullptr);
+}
+
+template <typename I>
+void Replayer<I>::init(Context* on_finish) {
+ dout(10) << dendl;
+
+ {
+ auto local_image_ctx = m_state_builder->local_image_ctx;
+ std::shared_lock image_locker{local_image_ctx->image_lock};
+ m_image_spec = util::compute_image_spec(local_image_ctx->md_ctx,
+ local_image_ctx->name);
+ }
+
+ {
+ std::unique_lock locker{m_lock};
+ register_perf_counters();
+ }
+
+ ceph_assert(m_on_init_shutdown == nullptr);
+ m_on_init_shutdown = on_finish;
+
+ init_remote_journaler();
+}
+
+template <typename I>
+void Replayer<I>::shut_down(Context* on_finish) {
+ dout(10) << dendl;
+
+ std::unique_lock locker{m_lock};
+ ceph_assert(m_on_init_shutdown == nullptr);
+ m_on_init_shutdown = on_finish;
+
+ if (m_state == STATE_INIT) {
+ // raced with the last piece of the init state machine
+ return;
+ } else if (m_state == STATE_REPLAYING) {
+ m_state = STATE_COMPLETE;
+ }
+
+ // if shutting down due to an error notification, we don't
+ // need to propagate the same error again
+ m_error_code = 0;
+ m_error_description = "";
+
+ cancel_delayed_preprocess_task();
+ cancel_flush_local_replay_task();
+ wait_for_flush();
+}
+
+template <typename I>
+void Replayer<I>::flush(Context* on_finish) {
+ dout(10) << dendl;
+
+ flush_local_replay(new C_TrackedOp(m_in_flight_op_tracker, on_finish));
+}
+
+template <typename I>
+bool Replayer<I>::get_replay_status(std::string* description,
+ Context* on_finish) {
+ dout(10) << dendl;
+
+ std::unique_lock locker{m_lock};
+ if (m_replay_status_formatter == nullptr) {
+ derr << "replay not running" << dendl;
+ locker.unlock();
+
+ on_finish->complete(-EAGAIN);
+ return false;
+ }
+
+ on_finish = new C_TrackedOp(m_in_flight_op_tracker, on_finish);
+ return m_replay_status_formatter->get_or_send_update(description,
+ on_finish);
+}
+
+template <typename I>
+void Replayer<I>::init_remote_journaler() {
+ dout(10) << dendl;
+
+ Context *ctx = create_context_callback<
+ Replayer, &Replayer<I>::handle_init_remote_journaler>(this);
+ m_state_builder->remote_journaler->init(ctx);
+}
+
+template <typename I>
+void Replayer<I>::handle_init_remote_journaler(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ std::unique_lock locker{m_lock};
+ if (r < 0) {
+ derr << "failed to initialize remote journal: " << cpp_strerror(r) << dendl;
+ handle_replay_complete(locker, r, "error initializing remote journal");
+ close_local_image();
+ return;
+ }
+
+ // listen for metadata updates to check for disconnect events
+ ceph_assert(m_remote_listener == nullptr);
+ m_remote_listener = new RemoteJournalerListener(this);
+ m_state_builder->remote_journaler->add_listener(m_remote_listener);
+
+ cls::journal::Client remote_client;
+ r = m_state_builder->remote_journaler->get_cached_client(m_local_mirror_uuid,
+ &remote_client);
+ if (r < 0) {
+ derr << "error retrieving remote journal client: " << cpp_strerror(r)
+ << dendl;
+ handle_replay_complete(locker, r, "error retrieving remote journal client");
+ close_local_image();
+ return;
+ }
+
+ std::string error;
+ r = validate_remote_client_state(remote_client,
+ &m_state_builder->remote_client_meta,
+ &m_resync_requested, &error);
+ if (r < 0) {
+ handle_replay_complete(locker, r, error);
+ close_local_image();
+ return;
+ }
+
+ start_external_replay(locker);
+}
+
+template <typename I>
+void Replayer<I>::start_external_replay(std::unique_lock<ceph::mutex>& locker) {
+ dout(10) << dendl;
+
+ auto local_image_ctx = m_state_builder->local_image_ctx;
+ std::shared_lock local_image_locker{local_image_ctx->image_lock};
+
+ ceph_assert(m_local_journal == nullptr);
+ m_local_journal = local_image_ctx->journal;
+ if (m_local_journal == nullptr) {
+ local_image_locker.unlock();
+
+ derr << "local image journal closed" << dendl;
+ handle_replay_complete(locker, -EINVAL, "error accessing local journal");
+ close_local_image();
+ return;
+ }
+
+ // safe to hold pointer to journal after external playback starts
+ Context *start_ctx = create_context_callback<
+ Replayer, &Replayer<I>::handle_start_external_replay>(this);
+ m_local_journal->start_external_replay(&m_local_journal_replay, start_ctx);
+}
+
+template <typename I>
+void Replayer<I>::handle_start_external_replay(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ std::unique_lock locker{m_lock};
+ if (r < 0) {
+ ceph_assert(m_local_journal_replay == nullptr);
+ derr << "error starting external replay on local image "
+ << m_state_builder->local_image_ctx->id << ": "
+ << cpp_strerror(r) << dendl;
+
+ handle_replay_complete(locker, r, "error starting replay on local image");
+ close_local_image();
+ return;
+ }
+
+ if (!notify_init_complete(locker)) {
+ return;
+ }
+
+ m_state = STATE_REPLAYING;
+
+ // check for resync/promotion state after adding listener
+ if (!add_local_journal_listener(locker)) {
+ return;
+ }
+
+ // start remote journal replay
+ m_event_preprocessor = EventPreprocessor<I>::create(
+ *m_state_builder->local_image_ctx, *m_state_builder->remote_journaler,
+ m_local_mirror_uuid, &m_state_builder->remote_client_meta,
+ m_threads->work_queue);
+ m_replay_status_formatter = ReplayStatusFormatter<I>::create(
+ m_state_builder->remote_journaler, m_local_mirror_uuid);
+
+ auto cct = static_cast<CephContext *>(m_state_builder->local_image_ctx->cct);
+ double poll_seconds = cct->_conf.get_val<double>(
+ "rbd_mirror_journal_poll_age");
+ m_remote_replay_handler = new RemoteReplayHandler(this);
+ m_state_builder->remote_journaler->start_live_replay(m_remote_replay_handler,
+ poll_seconds);
+
+ notify_status_updated();
+}
+
+template <typename I>
+bool Replayer<I>::add_local_journal_listener(
+ std::unique_lock<ceph::mutex>& locker) {
+ dout(10) << dendl;
+
+ // listen for promotion and resync requests against local journal
+ ceph_assert(m_local_journal_listener == nullptr);
+ m_local_journal_listener = new LocalJournalListener(this);
+ m_local_journal->add_listener(m_local_journal_listener);
+
+ // verify that the local image wasn't force-promoted and that a resync hasn't
+ // been requested now that we are listening for events
+ if (m_local_journal->is_tag_owner()) {
+ dout(10) << "local image force-promoted" << dendl;
+ handle_replay_complete(locker, 0, "force promoted");
+ return false;
+ }
+
+ bool resync_requested = false;
+ int r = m_local_journal->is_resync_requested(&resync_requested);
+ if (r < 0) {
+ dout(10) << "failed to determine resync state: " << cpp_strerror(r)
+ << dendl;
+ handle_replay_complete(locker, r, "error parsing resync state");
+ return false;
+ } else if (resync_requested) {
+ dout(10) << "local image resync requested" << dendl;
+ handle_replay_complete(locker, 0, "resync requested");
+ return false;
+ }
+
+ return true;
+}
+
+template <typename I>
+bool Replayer<I>::notify_init_complete(std::unique_lock<ceph::mutex>& locker) {
+ dout(10) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked_by_me(m_lock));
+ ceph_assert(m_state == STATE_INIT);
+
+ // notify that init has completed
+ Context *on_finish = nullptr;
+ std::swap(m_on_init_shutdown, on_finish);
+
+ locker.unlock();
+ on_finish->complete(0);
+ locker.lock();
+
+ if (m_on_init_shutdown != nullptr) {
+ // shut down requested after we notified init complete but before we
+ // grabbed the lock
+ close_local_image();
+ return false;
+ }
+
+ return true;
+}
+
+template <typename I>
+void Replayer<I>::wait_for_flush() {
+ ceph_assert(ceph_mutex_is_locked_by_me(m_lock));
+
+ // ensure that we don't have two concurrent local journal replay shut downs
+ dout(10) << dendl;
+ auto ctx = create_async_context_callback(
+ m_threads->work_queue, create_context_callback<
+ Replayer<I>, &Replayer<I>::handle_wait_for_flush>(this));
+ m_flush_tracker.wait_for_ops(ctx);
+}
+
+template <typename I>
+void Replayer<I>::handle_wait_for_flush(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ shut_down_local_journal_replay();
+}
+
+template <typename I>
+void Replayer<I>::shut_down_local_journal_replay() {
+ std::unique_lock locker{m_lock};
+
+ if (m_local_journal_replay == nullptr) {
+ wait_for_event_replay();
+ return;
+ }
+
+ // It's required to stop the local journal replay state machine prior to
+ // waiting for the events to complete. This is to ensure that IO is properly
+ // flushed (it might be batched), wait for any running ops to complete, and
+ // to cancel any ops waiting for their associated OnFinish events.
+ dout(10) << dendl;
+ auto ctx = create_context_callback<
+ Replayer<I>, &Replayer<I>::handle_shut_down_local_journal_replay>(this);
+ m_local_journal_replay->shut_down(true, ctx);
+}
+
+template <typename I>
+void Replayer<I>::handle_shut_down_local_journal_replay(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ std::unique_lock locker{m_lock};
+ if (r < 0) {
+ derr << "error shutting down journal replay: " << cpp_strerror(r) << dendl;
+ handle_replay_error(r, "failed to shut down local journal replay");
+ }
+
+ wait_for_event_replay();
+}
+
+template <typename I>
+void Replayer<I>::wait_for_event_replay() {
+ ceph_assert(ceph_mutex_is_locked_by_me(m_lock));
+
+ dout(10) << dendl;
+ auto ctx = create_async_context_callback(
+ m_threads->work_queue, create_context_callback<
+ Replayer<I>, &Replayer<I>::handle_wait_for_event_replay>(this));
+ m_event_replay_tracker.wait_for_ops(ctx);
+}
+
+template <typename I>
+void Replayer<I>::handle_wait_for_event_replay(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ std::unique_lock locker{m_lock};
+ close_local_image();
+}
+
+template <typename I>
+void Replayer<I>::close_local_image() {
+ ceph_assert(ceph_mutex_is_locked_by_me(m_lock));
+ if (m_state_builder->local_image_ctx == nullptr) {
+ stop_remote_journaler_replay();
+ return;
+ }
+
+ dout(10) << dendl;
+ if (m_local_journal_listener != nullptr) {
+ // blocks if listener notification is in-progress
+ m_local_journal->remove_listener(m_local_journal_listener);
+ delete m_local_journal_listener;
+ m_local_journal_listener = nullptr;
+ }
+
+ if (m_local_journal_replay != nullptr) {
+ m_local_journal->stop_external_replay();
+ m_local_journal_replay = nullptr;
+ }
+
+ if (m_event_preprocessor != nullptr) {
+ image_replayer::journal::EventPreprocessor<I>::destroy(
+ m_event_preprocessor);
+ m_event_preprocessor = nullptr;
+ }
+
+ m_local_journal.reset();
+
+ // NOTE: it's important to ensure that the local image is fully
+ // closed before attempting to close the remote journal in
+ // case the remote cluster is unreachable
+ ceph_assert(m_state_builder->local_image_ctx != nullptr);
+ auto ctx = create_context_callback<
+ Replayer<I>, &Replayer<I>::handle_close_local_image>(this);
+ auto request = image_replayer::CloseImageRequest<I>::create(
+ &m_state_builder->local_image_ctx, ctx);
+ request->send();
+}
+
+
+template <typename I>
+void Replayer<I>::handle_close_local_image(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ std::unique_lock locker{m_lock};
+ if (r < 0) {
+ derr << "error closing local iamge: " << cpp_strerror(r) << dendl;
+ handle_replay_error(r, "failed to close local image");
+ }
+
+ ceph_assert(m_state_builder->local_image_ctx == nullptr);
+ stop_remote_journaler_replay();
+}
+
+template <typename I>
+void Replayer<I>::stop_remote_journaler_replay() {
+ ceph_assert(ceph_mutex_is_locked_by_me(m_lock));
+
+ if (m_state_builder->remote_journaler == nullptr) {
+ wait_for_in_flight_ops();
+ return;
+ } else if (m_remote_replay_handler == nullptr) {
+ wait_for_in_flight_ops();
+ return;
+ }
+
+ dout(10) << dendl;
+ auto ctx = create_async_context_callback(
+ m_threads->work_queue, create_context_callback<
+ Replayer<I>, &Replayer<I>::handle_stop_remote_journaler_replay>(this));
+ m_state_builder->remote_journaler->stop_replay(ctx);
+}
+
+template <typename I>
+void Replayer<I>::handle_stop_remote_journaler_replay(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ std::unique_lock locker{m_lock};
+ if (r < 0) {
+ derr << "failed to stop remote journaler replay : " << cpp_strerror(r)
+ << dendl;
+ handle_replay_error(r, "failed to stop remote journaler replay");
+ }
+
+ delete m_remote_replay_handler;
+ m_remote_replay_handler = nullptr;
+
+ wait_for_in_flight_ops();
+}
+
+template <typename I>
+void Replayer<I>::wait_for_in_flight_ops() {
+ dout(10) << dendl;
+ if (m_remote_listener != nullptr) {
+ m_state_builder->remote_journaler->remove_listener(m_remote_listener);
+ delete m_remote_listener;
+ m_remote_listener = nullptr;
+ }
+
+ auto ctx = create_async_context_callback(
+ m_threads->work_queue, create_context_callback<
+ Replayer<I>, &Replayer<I>::handle_wait_for_in_flight_ops>(this));
+ m_in_flight_op_tracker.wait_for_ops(ctx);
+}
+
+template <typename I>
+void Replayer<I>::handle_wait_for_in_flight_ops(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ ReplayStatusFormatter<I>::destroy(m_replay_status_formatter);
+ m_replay_status_formatter = nullptr;
+
+ Context* on_init_shutdown = nullptr;
+ {
+ std::unique_lock locker{m_lock};
+ ceph_assert(m_on_init_shutdown != nullptr);
+ std::swap(m_on_init_shutdown, on_init_shutdown);
+ m_state = STATE_COMPLETE;
+ }
+ on_init_shutdown->complete(m_error_code);
+}
+
+template <typename I>
+void Replayer<I>::handle_remote_journal_metadata_updated() {
+ dout(20) << dendl;
+
+ std::unique_lock locker{m_lock};
+ if (m_state != STATE_REPLAYING) {
+ return;
+ }
+
+ cls::journal::Client remote_client;
+ int r = m_state_builder->remote_journaler->get_cached_client(
+ m_local_mirror_uuid, &remote_client);
+ if (r < 0) {
+ derr << "failed to retrieve client: " << cpp_strerror(r) << dendl;
+ return;
+ }
+
+ librbd::journal::MirrorPeerClientMeta remote_client_meta;
+ std::string error;
+ r = validate_remote_client_state(remote_client, &remote_client_meta,
+ &m_resync_requested, &error);
+ if (r < 0) {
+ dout(0) << "client flagged disconnected, stopping image replay" << dendl;
+ handle_replay_complete(locker, r, error);
+ }
+}
+
+template <typename I>
+void Replayer<I>::schedule_flush_local_replay_task() {
+ ceph_assert(ceph_mutex_is_locked_by_me(m_lock));
+
+ std::unique_lock timer_locker{m_threads->timer_lock};
+ if (m_state != STATE_REPLAYING || m_flush_local_replay_task != nullptr) {
+ return;
+ }
+
+ dout(15) << dendl;
+ m_flush_local_replay_task = create_async_context_callback(
+ m_threads->work_queue, create_context_callback<
+ Replayer<I>, &Replayer<I>::handle_flush_local_replay_task>(this));
+ m_threads->timer->add_event_after(30, m_flush_local_replay_task);
+}
+
+template <typename I>
+void Replayer<I>::cancel_flush_local_replay_task() {
+ ceph_assert(ceph_mutex_is_locked_by_me(m_lock));
+
+ std::unique_lock timer_locker{m_threads->timer_lock};
+ if (m_flush_local_replay_task != nullptr) {
+ dout(10) << dendl;
+ m_threads->timer->cancel_event(m_flush_local_replay_task);
+ m_flush_local_replay_task = nullptr;
+ }
+}
+
+template <typename I>
+void Replayer<I>::handle_flush_local_replay_task(int) {
+ dout(15) << dendl;
+
+ m_in_flight_op_tracker.start_op();
+ auto on_finish = new LambdaContext([this](int) {
+ std::unique_lock locker{m_lock};
+
+ {
+ std::unique_lock timer_locker{m_threads->timer_lock};
+ m_flush_local_replay_task = nullptr;
+ }
+
+ notify_status_updated();
+ m_in_flight_op_tracker.finish_op();
+ });
+ flush_local_replay(on_finish);
+}
+
+template <typename I>
+void Replayer<I>::flush_local_replay(Context* on_flush) {
+ std::unique_lock locker{m_lock};
+ if (m_state != STATE_REPLAYING) {
+ locker.unlock();
+ on_flush->complete(0);
+ return;
+ } else if (m_local_journal_replay == nullptr) {
+ // raced w/ a tag creation stop/start, which implies that
+ // the replay is flushed
+ locker.unlock();
+ flush_commit_position(on_flush);
+ return;
+ }
+
+ dout(15) << dendl;
+ auto ctx = new LambdaContext(
+ [this, on_flush](int r) {
+ handle_flush_local_replay(on_flush, r);
+ });
+ m_local_journal_replay->flush(ctx);
+}
+
+template <typename I>
+void Replayer<I>::handle_flush_local_replay(Context* on_flush, int r) {
+ dout(15) << "r=" << r << dendl;
+ if (r < 0) {
+ derr << "error flushing local replay: " << cpp_strerror(r) << dendl;
+ on_flush->complete(r);
+ return;
+ }
+
+ flush_commit_position(on_flush);
+}
+
+template <typename I>
+void Replayer<I>::flush_commit_position(Context* on_flush) {
+ std::unique_lock locker{m_lock};
+ if (m_state != STATE_REPLAYING) {
+ locker.unlock();
+ on_flush->complete(0);
+ return;
+ }
+
+ dout(15) << dendl;
+ auto ctx = new LambdaContext(
+ [this, on_flush](int r) {
+ handle_flush_commit_position(on_flush, r);
+ });
+ m_state_builder->remote_journaler->flush_commit_position(ctx);
+}
+
+template <typename I>
+void Replayer<I>::handle_flush_commit_position(Context* on_flush, int r) {
+ dout(15) << "r=" << r << dendl;
+ if (r < 0) {
+ derr << "error flushing remote journal commit position: "
+ << cpp_strerror(r) << dendl;
+ }
+
+ on_flush->complete(r);
+}
+
+template <typename I>
+void Replayer<I>::handle_replay_error(int r, const std::string &error) {
+ ceph_assert(ceph_mutex_is_locked_by_me(m_lock));
+
+ if (m_error_code == 0) {
+ m_error_code = r;
+ m_error_description = error;
+ }
+}
+
+template <typename I>
+bool Replayer<I>::is_replay_complete() const {
+ std::unique_lock locker{m_lock};
+ return is_replay_complete(locker);
+}
+
+template <typename I>
+bool Replayer<I>::is_replay_complete(
+ const std::unique_lock<ceph::mutex>&) const {
+ ceph_assert(ceph_mutex_is_locked_by_me(m_lock));
+ return (m_state == STATE_COMPLETE);
+}
+
+template <typename I>
+void Replayer<I>::handle_replay_complete(int r, const std::string &error) {
+ std::unique_lock locker{m_lock};
+ handle_replay_complete(locker, r, error);
+}
+
+template <typename I>
+void Replayer<I>::handle_replay_complete(
+ const std::unique_lock<ceph::mutex>&, int r, const std::string &error) {
+ ceph_assert(ceph_mutex_is_locked_by_me(m_lock));
+
+ dout(10) << "r=" << r << ", error=" << error << dendl;
+ if (r < 0) {
+ derr << "replay encountered an error: " << cpp_strerror(r) << dendl;
+ handle_replay_error(r, error);
+ }
+
+ if (m_state != STATE_REPLAYING) {
+ return;
+ }
+
+ m_state = STATE_COMPLETE;
+ notify_status_updated();
+}
+
+template <typename I>
+void Replayer<I>::handle_replay_ready() {
+ std::unique_lock locker{m_lock};
+ handle_replay_ready(locker);
+}
+
+template <typename I>
+void Replayer<I>::handle_replay_ready(
+ std::unique_lock<ceph::mutex>& locker) {
+ ceph_assert(ceph_mutex_is_locked_by_me(m_lock));
+
+ dout(20) << dendl;
+ if (is_replay_complete(locker)) {
+ return;
+ }
+
+ if (!m_state_builder->remote_journaler->try_pop_front(&m_replay_entry,
+ &m_replay_tag_tid)) {
+ dout(20) << "no entries ready for replay" << dendl;
+ return;
+ }
+
+ // can safely drop lock once the entry is tracked
+ m_event_replay_tracker.start_op();
+ locker.unlock();
+
+ dout(20) << "entry tid=" << m_replay_entry.get_commit_tid()
+ << "tag_tid=" << m_replay_tag_tid << dendl;
+ if (!m_replay_tag_valid || m_replay_tag.tid != m_replay_tag_tid) {
+ // must allocate a new local journal tag prior to processing
+ replay_flush();
+ return;
+ }
+
+ preprocess_entry();
+}
+
+template <typename I>
+void Replayer<I>::replay_flush() {
+ dout(10) << dendl;
+ m_flush_tracker.start_op();
+
+ // shut down the replay to flush all IO and ops and create a new
+ // replayer to handle the new tag epoch
+ auto ctx = create_context_callback<
+ Replayer<I>, &Replayer<I>::handle_replay_flush_shut_down>(this);
+ ceph_assert(m_local_journal_replay != nullptr);
+ m_local_journal_replay->shut_down(false, ctx);
+}
+
+template <typename I>
+void Replayer<I>::handle_replay_flush_shut_down(int r) {
+ std::unique_lock locker{m_lock};
+ dout(10) << "r=" << r << dendl;
+
+ ceph_assert(m_local_journal != nullptr);
+ ceph_assert(m_local_journal_listener != nullptr);
+
+ // blocks if listener notification is in-progress
+ m_local_journal->remove_listener(m_local_journal_listener);
+ delete m_local_journal_listener;
+ m_local_journal_listener = nullptr;
+
+ m_local_journal->stop_external_replay();
+ m_local_journal_replay = nullptr;
+ m_local_journal.reset();
+
+ if (r < 0) {
+ locker.unlock();
+
+ handle_replay_flush(r);
+ return;
+ }
+
+ // journal might have been closed now that we stopped external replay
+ auto local_image_ctx = m_state_builder->local_image_ctx;
+ std::shared_lock local_image_locker{local_image_ctx->image_lock};
+ m_local_journal = local_image_ctx->journal;
+ if (m_local_journal == nullptr) {
+ local_image_locker.unlock();
+ locker.unlock();
+
+ derr << "local image journal closed" << dendl;
+ handle_replay_flush(-EINVAL);
+ return;
+ }
+
+ auto ctx = create_context_callback<
+ Replayer<I>, &Replayer<I>::handle_replay_flush>(this);
+ m_local_journal->start_external_replay(&m_local_journal_replay, ctx);
+}
+
+template <typename I>
+void Replayer<I>::handle_replay_flush(int r) {
+ std::unique_lock locker{m_lock};
+ dout(10) << "r=" << r << dendl;
+ m_flush_tracker.finish_op();
+
+ if (r < 0) {
+ derr << "replay flush encountered an error: " << cpp_strerror(r) << dendl;
+ handle_replay_complete(locker, r, "replay flush encountered an error");
+ m_event_replay_tracker.finish_op();
+ return;
+ } else if (is_replay_complete(locker)) {
+ m_event_replay_tracker.finish_op();
+ return;
+ }
+
+ // check for resync/promotion state after adding listener
+ if (!add_local_journal_listener(locker)) {
+ m_event_replay_tracker.finish_op();
+ return;
+ }
+ locker.unlock();
+
+ get_remote_tag();
+}
+
+template <typename I>
+void Replayer<I>::get_remote_tag() {
+ dout(15) << "tag_tid: " << m_replay_tag_tid << dendl;
+
+ Context *ctx = create_context_callback<
+ Replayer, &Replayer<I>::handle_get_remote_tag>(this);
+ m_state_builder->remote_journaler->get_tag(m_replay_tag_tid, &m_replay_tag,
+ ctx);
+}
+
+template <typename I>
+void Replayer<I>::handle_get_remote_tag(int r) {
+ dout(15) << "r=" << r << dendl;
+
+ if (r == 0) {
+ try {
+ auto it = m_replay_tag.data.cbegin();
+ decode(m_replay_tag_data, it);
+ } catch (const buffer::error &err) {
+ r = -EBADMSG;
+ }
+ }
+
+ if (r < 0) {
+ derr << "failed to retrieve remote tag " << m_replay_tag_tid << ": "
+ << cpp_strerror(r) << dendl;
+ handle_replay_complete(r, "failed to retrieve remote tag");
+ m_event_replay_tracker.finish_op();
+ return;
+ }
+
+ m_replay_tag_valid = true;
+ dout(15) << "decoded remote tag " << m_replay_tag_tid << ": "
+ << m_replay_tag_data << dendl;
+
+ allocate_local_tag();
+}
+
+template <typename I>
+void Replayer<I>::allocate_local_tag() {
+ dout(15) << dendl;
+
+ std::string mirror_uuid = m_replay_tag_data.mirror_uuid;
+ if (mirror_uuid == librbd::Journal<>::LOCAL_MIRROR_UUID) {
+ mirror_uuid = m_state_builder->remote_mirror_uuid;
+ } else if (mirror_uuid == m_local_mirror_uuid) {
+ mirror_uuid = librbd::Journal<>::LOCAL_MIRROR_UUID;
+ } else if (mirror_uuid == librbd::Journal<>::ORPHAN_MIRROR_UUID) {
+ // handle possible edge condition where daemon can failover and
+ // the local image has already been promoted/demoted
+ auto local_tag_data = m_local_journal->get_tag_data();
+ if (local_tag_data.mirror_uuid == librbd::Journal<>::ORPHAN_MIRROR_UUID &&
+ (local_tag_data.predecessor.commit_valid &&
+ local_tag_data.predecessor.mirror_uuid ==
+ librbd::Journal<>::LOCAL_MIRROR_UUID)) {
+ dout(15) << "skipping stale demotion event" << dendl;
+ handle_process_entry_safe(m_replay_entry, m_replay_bytes,
+ m_replay_start_time, 0);
+ handle_replay_ready();
+ return;
+ } else {
+ dout(5) << "encountered image demotion: stopping" << dendl;
+ handle_replay_complete(0, "");
+ }
+ }
+
+ librbd::journal::TagPredecessor predecessor(m_replay_tag_data.predecessor);
+ if (predecessor.mirror_uuid == librbd::Journal<>::LOCAL_MIRROR_UUID) {
+ predecessor.mirror_uuid = m_state_builder->remote_mirror_uuid;
+ } else if (predecessor.mirror_uuid == m_local_mirror_uuid) {
+ predecessor.mirror_uuid = librbd::Journal<>::LOCAL_MIRROR_UUID;
+ }
+
+ dout(15) << "mirror_uuid=" << mirror_uuid << ", "
+ << "predecessor=" << predecessor << ", "
+ << "replay_tag_tid=" << m_replay_tag_tid << dendl;
+ Context *ctx = create_context_callback<
+ Replayer, &Replayer<I>::handle_allocate_local_tag>(this);
+ m_local_journal->allocate_tag(mirror_uuid, predecessor, ctx);
+}
+
+template <typename I>
+void Replayer<I>::handle_allocate_local_tag(int r) {
+ dout(15) << "r=" << r << ", "
+ << "tag_tid=" << m_local_journal->get_tag_tid() << dendl;
+ if (r < 0) {
+ derr << "failed to allocate journal tag: " << cpp_strerror(r) << dendl;
+ handle_replay_complete(r, "failed to allocate journal tag");
+ m_event_replay_tracker.finish_op();
+ return;
+ }
+
+ preprocess_entry();
+}
+
+template <typename I>
+void Replayer<I>::preprocess_entry() {
+ dout(20) << "preprocessing entry tid=" << m_replay_entry.get_commit_tid()
+ << dendl;
+
+ bufferlist data = m_replay_entry.get_data();
+ auto it = data.cbegin();
+ int r = m_local_journal_replay->decode(&it, &m_event_entry);
+ if (r < 0) {
+ derr << "failed to decode journal event" << dendl;
+ handle_replay_complete(r, "failed to decode journal event");
+ m_event_replay_tracker.finish_op();
+ return;
+ }
+
+ m_replay_bytes = data.length();
+ uint32_t delay = calculate_replay_delay(
+ m_event_entry.timestamp,
+ m_state_builder->local_image_ctx->mirroring_replay_delay);
+ if (delay == 0) {
+ handle_preprocess_entry_ready(0);
+ return;
+ }
+
+ std::unique_lock locker{m_lock};
+ if (is_replay_complete(locker)) {
+ // don't schedule a delayed replay task if a shut-down is in-progress
+ m_event_replay_tracker.finish_op();
+ return;
+ }
+
+ dout(20) << "delaying replay by " << delay << " sec" << dendl;
+ std::unique_lock timer_locker{m_threads->timer_lock};
+ ceph_assert(m_delayed_preprocess_task == nullptr);
+ m_delayed_preprocess_task = create_context_callback<
+ Replayer<I>, &Replayer<I>::handle_delayed_preprocess_task>(this);
+ m_threads->timer->add_event_after(delay, m_delayed_preprocess_task);
+}
+
+template <typename I>
+void Replayer<I>::handle_delayed_preprocess_task(int r) {
+ dout(20) << "r=" << r << dendl;
+
+ ceph_assert(ceph_mutex_is_locked_by_me(m_threads->timer_lock));
+ m_delayed_preprocess_task = nullptr;
+
+ m_threads->work_queue->queue(create_context_callback<
+ Replayer, &Replayer<I>::handle_preprocess_entry_ready>(this), 0);
+}
+
+template <typename I>
+void Replayer<I>::handle_preprocess_entry_ready(int r) {
+ dout(20) << "r=" << r << dendl;
+ ceph_assert(r == 0);
+
+ m_replay_start_time = ceph_clock_now();
+ if (!m_event_preprocessor->is_required(m_event_entry)) {
+ process_entry();
+ return;
+ }
+
+ Context *ctx = create_context_callback<
+ Replayer, &Replayer<I>::handle_preprocess_entry_safe>(this);
+ m_event_preprocessor->preprocess(&m_event_entry, ctx);
+}
+
+template <typename I>
+void Replayer<I>::handle_preprocess_entry_safe(int r) {
+ dout(20) << "r=" << r << dendl;
+
+ if (r < 0) {
+ if (r == -ECANCELED) {
+ handle_replay_complete(0, "lost exclusive lock");
+ } else {
+ derr << "failed to preprocess journal event" << dendl;
+ handle_replay_complete(r, "failed to preprocess journal event");
+ }
+
+ m_event_replay_tracker.finish_op();
+ return;
+ }
+
+ process_entry();
+}
+
+template <typename I>
+void Replayer<I>::process_entry() {
+ dout(20) << "processing entry tid=" << m_replay_entry.get_commit_tid()
+ << dendl;
+
+ Context *on_ready = create_context_callback<
+ Replayer, &Replayer<I>::handle_process_entry_ready>(this);
+ Context *on_commit = new C_ReplayCommitted(this, std::move(m_replay_entry),
+ m_replay_bytes,
+ m_replay_start_time);
+
+ m_local_journal_replay->process(m_event_entry, on_ready, on_commit);
+}
+
+template <typename I>
+void Replayer<I>::handle_process_entry_ready(int r) {
+ std::unique_lock locker{m_lock};
+
+ dout(20) << dendl;
+ ceph_assert(r == 0);
+
+ bool update_status = false;
+ {
+ auto local_image_ctx = m_state_builder->local_image_ctx;
+ std::shared_lock image_locker{local_image_ctx->image_lock};
+ auto image_spec = util::compute_image_spec(local_image_ctx->md_ctx,
+ local_image_ctx->name);
+ if (m_image_spec != image_spec) {
+ m_image_spec = image_spec;
+ update_status = true;
+ }
+ }
+
+ m_replay_status_formatter->handle_entry_processed(m_replay_bytes);
+
+ if (update_status) {
+ unregister_perf_counters();
+ register_perf_counters();
+ notify_status_updated();
+ }
+
+ // attempt to process the next event
+ handle_replay_ready(locker);
+}
+
+template <typename I>
+void Replayer<I>::handle_process_entry_safe(
+ const ReplayEntry &replay_entry, uint64_t replay_bytes,
+ const utime_t &replay_start_time, int r) {
+ dout(20) << "commit_tid=" << replay_entry.get_commit_tid() << ", r=" << r
+ << dendl;
+
+ if (r < 0) {
+ derr << "failed to commit journal event: " << cpp_strerror(r) << dendl;
+ handle_replay_complete(r, "failed to commit journal event");
+ } else {
+ ceph_assert(m_state_builder->remote_journaler != nullptr);
+ m_state_builder->remote_journaler->committed(replay_entry);
+ }
+
+ auto latency = ceph_clock_now() - replay_start_time;
+ if (g_journal_perf_counters) {
+ g_journal_perf_counters->inc(l_rbd_mirror_journal_entries);
+ g_journal_perf_counters->inc(l_rbd_mirror_journal_replay_bytes,
+ replay_bytes);
+ g_journal_perf_counters->tinc(l_rbd_mirror_journal_replay_latency,
+ latency);
+ }
+
+ auto ctx = new LambdaContext(
+ [this, replay_bytes, latency](int r) {
+ std::unique_lock locker{m_lock};
+ schedule_flush_local_replay_task();
+
+ if (m_perf_counters) {
+ m_perf_counters->inc(l_rbd_mirror_journal_entries);
+ m_perf_counters->inc(l_rbd_mirror_journal_replay_bytes, replay_bytes);
+ m_perf_counters->tinc(l_rbd_mirror_journal_replay_latency, latency);
+ }
+
+ m_event_replay_tracker.finish_op();
+ });
+ m_threads->work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+void Replayer<I>::handle_resync_image() {
+ dout(10) << dendl;
+
+ std::unique_lock locker{m_lock};
+ m_resync_requested = true;
+ handle_replay_complete(locker, 0, "resync requested");
+}
+
+template <typename I>
+void Replayer<I>::notify_status_updated() {
+ ceph_assert(ceph_mutex_is_locked_by_me(m_lock));
+
+ dout(10) << dendl;
+
+ auto ctx = new C_TrackedOp(m_in_flight_op_tracker, new LambdaContext(
+ [this](int) {
+ m_replayer_listener->handle_notification();
+ }));
+ m_threads->work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+void Replayer<I>::cancel_delayed_preprocess_task() {
+ ceph_assert(ceph_mutex_is_locked_by_me(m_lock));
+
+ bool canceled_delayed_preprocess_task = false;
+ {
+ std::unique_lock timer_locker{m_threads->timer_lock};
+ if (m_delayed_preprocess_task != nullptr) {
+ dout(10) << dendl;
+ canceled_delayed_preprocess_task = m_threads->timer->cancel_event(
+ m_delayed_preprocess_task);
+ ceph_assert(canceled_delayed_preprocess_task);
+ m_delayed_preprocess_task = nullptr;
+ }
+ }
+
+ if (canceled_delayed_preprocess_task) {
+ // wake up sleeping replay
+ m_event_replay_tracker.finish_op();
+ }
+}
+
+template <typename I>
+int Replayer<I>::validate_remote_client_state(
+ const cls::journal::Client& remote_client,
+ librbd::journal::MirrorPeerClientMeta* remote_client_meta,
+ bool* resync_requested, std::string* error) {
+ ceph_assert(ceph_mutex_is_locked_by_me(m_lock));
+
+ if (!util::decode_client_meta(remote_client, remote_client_meta)) {
+ // require operator intervention since the data is corrupt
+ *error = "error retrieving remote journal client";
+ return -EBADMSG;
+ }
+
+ auto local_image_ctx = m_state_builder->local_image_ctx;
+ dout(5) << "image_id=" << local_image_ctx->id << ", "
+ << "remote_client_meta.image_id="
+ << remote_client_meta->image_id << ", "
+ << "remote_client.state=" << remote_client.state << dendl;
+ if (remote_client_meta->image_id == local_image_ctx->id &&
+ remote_client.state != cls::journal::CLIENT_STATE_CONNECTED) {
+ dout(5) << "client flagged disconnected, stopping image replay" << dendl;
+ if (local_image_ctx->config.template get_val<bool>(
+ "rbd_mirroring_resync_after_disconnect")) {
+ dout(10) << "disconnected: automatic resync" << dendl;
+ *resync_requested = true;
+ *error = "disconnected: automatic resync";
+ return -ENOTCONN;
+ } else {
+ dout(10) << "disconnected" << dendl;
+ *error = "disconnected";
+ return -ENOTCONN;
+ }
+ }
+
+ return 0;
+}
+
+template <typename I>
+void Replayer<I>::register_perf_counters() {
+ dout(5) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked_by_me(m_lock));
+ ceph_assert(m_perf_counters == nullptr);
+
+ auto cct = static_cast<CephContext *>(m_state_builder->local_image_ctx->cct);
+ auto prio = cct->_conf.get_val<int64_t>("rbd_mirror_image_perf_stats_prio");
+
+ auto local_image_ctx = m_state_builder->local_image_ctx;
+ std::string labels = ceph::perf_counters::key_create(
+ "rbd_mirror_journal_image",
+ {{"pool", local_image_ctx->md_ctx.get_pool_name()},
+ {"namespace", local_image_ctx->md_ctx.get_namespace()},
+ {"image", local_image_ctx->name}});
+
+ PerfCountersBuilder plb(g_ceph_context, labels, l_rbd_mirror_journal_first,
+ l_rbd_mirror_journal_last);
+ plb.add_u64_counter(l_rbd_mirror_journal_entries, "entries",
+ "Number of entries replayed", nullptr, prio);
+ plb.add_u64_counter(l_rbd_mirror_journal_replay_bytes, "replay_bytes",
+ "Total bytes replayed", nullptr, prio,
+ unit_t(UNIT_BYTES));
+ plb.add_time_avg(l_rbd_mirror_journal_replay_latency, "replay_latency",
+ "Replay latency", nullptr, prio);
+ m_perf_counters = plb.create_perf_counters();
+ g_ceph_context->get_perfcounters_collection()->add(m_perf_counters);
+}
+
+template <typename I>
+void Replayer<I>::unregister_perf_counters() {
+ dout(5) << dendl;
+ ceph_assert(ceph_mutex_is_locked_by_me(m_lock));
+
+ PerfCounters *perf_counters = nullptr;
+ std::swap(perf_counters, m_perf_counters);
+
+ if (perf_counters != nullptr) {
+ g_ceph_context->get_perfcounters_collection()->remove(perf_counters);
+ delete perf_counters;
+ }
+}
+
+} // namespace journal
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_replayer::journal::Replayer<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_replayer/journal/Replayer.h b/src/tools/rbd_mirror/image_replayer/journal/Replayer.h
new file mode 100644
index 000000000..6b1f36d9c
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/journal/Replayer.h
@@ -0,0 +1,323 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_REPLAYER_H
+#define RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_REPLAYER_H
+
+#include "tools/rbd_mirror/image_replayer/Replayer.h"
+#include "include/utime.h"
+#include "common/AsyncOpTracker.h"
+#include "common/ceph_mutex.h"
+#include "common/RefCountedObj.h"
+#include "cls/journal/cls_journal_types.h"
+#include "journal/ReplayEntry.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/journal/Types.h"
+#include "librbd/journal/TypeTraits.h"
+#include <string>
+#include <type_traits>
+
+namespace journal { class Journaler; }
+namespace librbd {
+
+struct ImageCtx;
+namespace journal { template <typename I> class Replay; }
+
+} // namespace librbd
+
+namespace rbd {
+namespace mirror {
+
+template <typename> struct Threads;
+
+namespace image_replayer {
+
+struct ReplayerListener;
+
+namespace journal {
+
+template <typename> class EventPreprocessor;
+template <typename> class ReplayStatusFormatter;
+template <typename> class StateBuilder;
+
+template <typename ImageCtxT>
+class Replayer : public image_replayer::Replayer {
+public:
+ typedef typename librbd::journal::TypeTraits<ImageCtxT>::Journaler Journaler;
+
+ static Replayer* create(
+ Threads<ImageCtxT>* threads,
+ const std::string& local_mirror_uuid,
+ StateBuilder<ImageCtxT>* state_builder,
+ ReplayerListener* replayer_listener) {
+ return new Replayer(threads, local_mirror_uuid, state_builder,
+ replayer_listener);
+ }
+
+ Replayer(
+ Threads<ImageCtxT>* threads,
+ const std::string& local_mirror_uuid,
+ StateBuilder<ImageCtxT>* state_builder,
+ ReplayerListener* replayer_listener);
+ ~Replayer();
+
+ void destroy() override {
+ delete this;
+ }
+
+ void init(Context* on_finish) override;
+ void shut_down(Context* on_finish) override;
+
+ void flush(Context* on_finish) override;
+
+ bool get_replay_status(std::string* description, Context* on_finish) override;
+
+ bool is_replaying() const override {
+ std::unique_lock locker{m_lock};
+ return (m_state == STATE_REPLAYING);
+ }
+
+ bool is_resync_requested() const override {
+ std::unique_lock locker(m_lock);
+ return m_resync_requested;
+ }
+
+ int get_error_code() const override {
+ std::unique_lock locker(m_lock);
+ return m_error_code;
+ }
+
+ std::string get_error_description() const override {
+ std::unique_lock locker(m_lock);
+ return m_error_description;
+ }
+
+ std::string get_image_spec() const {
+ std::unique_lock locker(m_lock);
+ return m_image_spec;
+ }
+
+private:
+ /**
+ * @verbatim
+ *
+ * <init>
+ * |
+ * v (error)
+ * INIT_REMOTE_JOURNALER * * * * * * * * * * * * * * * * * * *
+ * | *
+ * v (error) *
+ * START_EXTERNAL_REPLAY * * * * * * * * * * * * * * * * * * *
+ * | *
+ * | /--------------------------------------------\ *
+ * | | | *
+ * v v (asok flush) | *
+ * REPLAYING -------------> LOCAL_REPLAY_FLUSH | *
+ * | \ | | *
+ * | | v | *
+ * | | FLUSH_COMMIT_POSITION | *
+ * | | | | *
+ * | | \--------------------/| *
+ * | | | *
+ * | | (entries available) | *
+ * | \-----------> REPLAY_READY | *
+ * | | | *
+ * | | (skip if not | *
+ * | v needed) (error) *
+ * | REPLAY_FLUSH * * * * * * * * * *
+ * | | | * *
+ * | | (skip if not | * *
+ * | v needed) (error) * *
+ * | GET_REMOTE_TAG * * * * * * * * *
+ * | | | * *
+ * | | (skip if not | * *
+ * | v needed) (error) * *
+ * | ALLOCATE_LOCAL_TAG * * * * * * *
+ * | | | * *
+ * | v (error) * *
+ * | PREPROCESS_ENTRY * * * * * * * *
+ * | | | * *
+ * | v (error) * *
+ * | PROCESS_ENTRY * * * * * * * * * *
+ * | | | * *
+ * | \---------------------/ * *
+ * v (shutdown) * *
+ * REPLAY_COMPLETE < * * * * * * * * * * * * * * * * * * * *
+ * | *
+ * v *
+ * WAIT_FOR_FLUSH *
+ * | *
+ * v *
+ * SHUT_DOWN_LOCAL_JOURNAL_REPLAY *
+ * | *
+ * v *
+ * WAIT_FOR_REPLAY *
+ * | *
+ * v *
+ * CLOSE_LOCAL_IMAGE < * * * * * * * * * * * * * * * * * * * *
+ * |
+ * v (skip if not started)
+ * STOP_REMOTE_JOURNALER_REPLAY
+ * |
+ * v
+ * WAIT_FOR_IN_FLIGHT_OPS
+ * |
+ * v
+ * <shutdown>
+ *
+ * @endverbatim
+ */
+
+ typedef typename librbd::journal::TypeTraits<ImageCtxT>::ReplayEntry ReplayEntry;
+
+ enum State {
+ STATE_INIT,
+ STATE_REPLAYING,
+ STATE_COMPLETE
+ };
+
+ struct C_ReplayCommitted;
+ struct RemoteJournalerListener;
+ struct RemoteReplayHandler;
+ struct LocalJournalListener;
+
+ Threads<ImageCtxT>* m_threads;
+ std::string m_local_mirror_uuid;
+ StateBuilder<ImageCtxT>* m_state_builder;
+ ReplayerListener* m_replayer_listener;
+
+ mutable ceph::mutex m_lock;
+
+ std::string m_image_spec;
+ Context* m_on_init_shutdown = nullptr;
+
+ State m_state = STATE_INIT;
+ int m_error_code = 0;
+ std::string m_error_description;
+ bool m_resync_requested = false;
+
+ ceph::ref_t<typename std::remove_pointer<decltype(ImageCtxT::journal)>::type>
+ m_local_journal;
+ RemoteJournalerListener* m_remote_listener = nullptr;
+
+ librbd::journal::Replay<ImageCtxT>* m_local_journal_replay = nullptr;
+ EventPreprocessor<ImageCtxT>* m_event_preprocessor = nullptr;
+ ReplayStatusFormatter<ImageCtxT>* m_replay_status_formatter = nullptr;
+ RemoteReplayHandler* m_remote_replay_handler = nullptr;
+ LocalJournalListener* m_local_journal_listener = nullptr;
+
+ PerfCounters *m_perf_counters = nullptr;
+
+ ReplayEntry m_replay_entry;
+ uint64_t m_replay_bytes = 0;
+ utime_t m_replay_start_time;
+ bool m_replay_tag_valid = false;
+ uint64_t m_replay_tag_tid = 0;
+ cls::journal::Tag m_replay_tag;
+ librbd::journal::TagData m_replay_tag_data;
+ librbd::journal::EventEntry m_event_entry;
+
+ AsyncOpTracker m_flush_tracker;
+
+ AsyncOpTracker m_event_replay_tracker;
+ Context *m_delayed_preprocess_task = nullptr;
+
+ AsyncOpTracker m_in_flight_op_tracker;
+ Context *m_flush_local_replay_task = nullptr;
+
+ void handle_remote_journal_metadata_updated();
+
+ void schedule_flush_local_replay_task();
+ void cancel_flush_local_replay_task();
+ void handle_flush_local_replay_task(int r);
+
+ void flush_local_replay(Context* on_flush);
+ void handle_flush_local_replay(Context* on_flush, int r);
+
+ void flush_commit_position(Context* on_flush);
+ void handle_flush_commit_position(Context* on_flush, int r);
+
+ void init_remote_journaler();
+ void handle_init_remote_journaler(int r);
+
+ void start_external_replay(std::unique_lock<ceph::mutex>& locker);
+ void handle_start_external_replay(int r);
+
+ bool add_local_journal_listener(std::unique_lock<ceph::mutex>& locker);
+
+ bool notify_init_complete(std::unique_lock<ceph::mutex>& locker);
+
+ void wait_for_flush();
+ void handle_wait_for_flush(int r);
+
+ void shut_down_local_journal_replay();
+ void handle_shut_down_local_journal_replay(int r);
+
+ void wait_for_event_replay();
+ void handle_wait_for_event_replay(int r);
+
+ void close_local_image();
+ void handle_close_local_image(int r);
+
+ void stop_remote_journaler_replay();
+ void handle_stop_remote_journaler_replay(int r);
+
+ void wait_for_in_flight_ops();
+ void handle_wait_for_in_flight_ops(int r);
+
+ void replay_flush();
+ void handle_replay_flush_shut_down(int r);
+ void handle_replay_flush(int r);
+
+ void get_remote_tag();
+ void handle_get_remote_tag(int r);
+
+ void allocate_local_tag();
+ void handle_allocate_local_tag(int r);
+
+ void handle_replay_error(int r, const std::string &error);
+
+ bool is_replay_complete() const;
+ bool is_replay_complete(const std::unique_lock<ceph::mutex>& locker) const;
+
+ void handle_replay_complete(int r, const std::string &error_desc);
+ void handle_replay_complete(const std::unique_lock<ceph::mutex>&,
+ int r, const std::string &error_desc);
+ void handle_replay_ready();
+ void handle_replay_ready(std::unique_lock<ceph::mutex>& locker);
+
+ void preprocess_entry();
+ void handle_delayed_preprocess_task(int r);
+ void handle_preprocess_entry_ready(int r);
+ void handle_preprocess_entry_safe(int r);
+
+ void process_entry();
+ void handle_process_entry_ready(int r);
+ void handle_process_entry_safe(const ReplayEntry& replay_entry,
+ uint64_t relay_bytes,
+ const utime_t &replay_start_time, int r);
+
+ void handle_resync_image();
+
+ void notify_status_updated();
+
+ void cancel_delayed_preprocess_task();
+
+ int validate_remote_client_state(
+ const cls::journal::Client& remote_client,
+ librbd::journal::MirrorPeerClientMeta* remote_client_meta,
+ bool* resync_requested, std::string* error);
+
+ void register_perf_counters();
+ void unregister_perf_counters();
+
+};
+
+} // namespace journal
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_replayer::journal::Replayer<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_REPLAYER_H
diff --git a/src/tools/rbd_mirror/image_replayer/journal/StateBuilder.cc b/src/tools/rbd_mirror/image_replayer/journal/StateBuilder.cc
new file mode 100644
index 000000000..5f1fb0e2f
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/journal/StateBuilder.cc
@@ -0,0 +1,149 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "StateBuilder.h"
+#include "include/ceph_assert.h"
+#include "include/Context.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "journal/Journaler.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/Journal.h"
+#include "tools/rbd_mirror/image_replayer/journal/CreateLocalImageRequest.h"
+#include "tools/rbd_mirror/image_replayer/journal/PrepareReplayRequest.h"
+#include "tools/rbd_mirror/image_replayer/journal/Replayer.h"
+#include "tools/rbd_mirror/image_replayer/journal/SyncPointHandler.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::journal::" \
+ << "StateBuilder: " << this << " " \
+ << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+namespace journal {
+
+template <typename I>
+StateBuilder<I>::StateBuilder(const std::string& global_image_id)
+ : image_replayer::StateBuilder<I>(global_image_id) {
+}
+
+template <typename I>
+StateBuilder<I>::~StateBuilder() {
+ ceph_assert(remote_journaler == nullptr);
+}
+
+template <typename I>
+void StateBuilder<I>::close(Context* on_finish) {
+ dout(10) << dendl;
+
+ // close the remote journaler after closing the local image
+ // in case we have lost contact w/ the remote cluster and
+ // will block
+ on_finish = new LambdaContext([this, on_finish](int) {
+ shut_down_remote_journaler(on_finish);
+ });
+ on_finish = new LambdaContext([this, on_finish](int) {
+ this->close_local_image(on_finish);
+ });
+ this->close_remote_image(on_finish);
+}
+
+template <typename I>
+bool StateBuilder<I>::is_disconnected() const {
+ return (remote_client_state == cls::journal::CLIENT_STATE_DISCONNECTED);
+}
+
+template <typename I>
+bool StateBuilder<I>::is_linked_impl() const {
+ ceph_assert(!this->remote_mirror_uuid.empty());
+ return (local_primary_mirror_uuid == this->remote_mirror_uuid);
+}
+
+template <typename I>
+cls::rbd::MirrorImageMode StateBuilder<I>::get_mirror_image_mode() const {
+ return cls::rbd::MIRROR_IMAGE_MODE_JOURNAL;
+}
+
+template <typename I>
+image_sync::SyncPointHandler* StateBuilder<I>::create_sync_point_handler() {
+ dout(10) << dendl;
+
+ this->m_sync_point_handler = SyncPointHandler<I>::create(this);
+ return this->m_sync_point_handler;
+}
+
+template <typename I>
+BaseRequest* StateBuilder<I>::create_local_image_request(
+ Threads<I>* threads,
+ librados::IoCtx& local_io_ctx,
+ const std::string& global_image_id,
+ PoolMetaCache* pool_meta_cache,
+ ProgressContext* progress_ctx,
+ Context* on_finish) {
+ return CreateLocalImageRequest<I>::create(
+ threads, local_io_ctx, this->remote_image_ctx, this->global_image_id,
+ pool_meta_cache, progress_ctx, this, on_finish);
+}
+
+template <typename I>
+BaseRequest* StateBuilder<I>::create_prepare_replay_request(
+ const std::string& local_mirror_uuid,
+ ProgressContext* progress_ctx,
+ bool* resync_requested,
+ bool* syncing,
+ Context* on_finish) {
+ return PrepareReplayRequest<I>::create(
+ local_mirror_uuid, progress_ctx, this, resync_requested, syncing,
+ on_finish);
+}
+
+template <typename I>
+image_replayer::Replayer* StateBuilder<I>::create_replayer(
+ Threads<I>* threads,
+ InstanceWatcher<I>* instance_watcher,
+ const std::string& local_mirror_uuid,
+ PoolMetaCache* pool_meta_cache,
+ ReplayerListener* replayer_listener) {
+ return Replayer<I>::create(
+ threads, local_mirror_uuid, this, replayer_listener);
+}
+
+template <typename I>
+void StateBuilder<I>::shut_down_remote_journaler(Context* on_finish) {
+ if (remote_journaler == nullptr) {
+ on_finish->complete(0);
+ return;
+ }
+
+ dout(10) << dendl;
+ auto ctx = new LambdaContext([this, on_finish](int r) {
+ handle_shut_down_remote_journaler(r, on_finish);
+ });
+ remote_journaler->shut_down(ctx);
+}
+
+template <typename I>
+void StateBuilder<I>::handle_shut_down_remote_journaler(int r,
+ Context* on_finish) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to shut down remote journaler: " << cpp_strerror(r)
+ << dendl;
+ }
+
+ delete remote_journaler;
+ remote_journaler = nullptr;
+ on_finish->complete(r);
+}
+
+} // namespace journal
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_replayer::journal::StateBuilder<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_replayer/journal/StateBuilder.h b/src/tools/rbd_mirror/image_replayer/journal/StateBuilder.h
new file mode 100644
index 000000000..790d1390b
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/journal/StateBuilder.h
@@ -0,0 +1,94 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_STATE_BUILDER_H
+#define CEPH_RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_STATE_BUILDER_H
+
+#include "tools/rbd_mirror/image_replayer/StateBuilder.h"
+#include "cls/journal/cls_journal_types.h"
+#include "librbd/journal/Types.h"
+#include "librbd/journal/TypeTraits.h"
+#include <string>
+
+struct Context;
+
+namespace librbd { struct ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+namespace journal {
+
+template <typename> class SyncPointHandler;
+
+template <typename ImageCtxT>
+class StateBuilder : public image_replayer::StateBuilder<ImageCtxT> {
+public:
+ typedef librbd::journal::TypeTraits<ImageCtxT> TypeTraits;
+ typedef typename TypeTraits::Journaler Journaler;
+
+ static StateBuilder* create(const std::string& global_image_id) {
+ return new StateBuilder(global_image_id);
+ }
+
+ StateBuilder(const std::string& global_image_id);
+ ~StateBuilder() override;
+
+ void close(Context* on_finish) override;
+
+ bool is_disconnected() const override;
+
+ cls::rbd::MirrorImageMode get_mirror_image_mode() const override;
+
+ image_sync::SyncPointHandler* create_sync_point_handler() override;
+
+ bool replay_requires_remote_image() const override {
+ return false;
+ }
+
+ BaseRequest* create_local_image_request(
+ Threads<ImageCtxT>* threads,
+ librados::IoCtx& local_io_ctx,
+ const std::string& global_image_id,
+ PoolMetaCache* pool_meta_cache,
+ ProgressContext* progress_ctx,
+ Context* on_finish) override;
+
+ BaseRequest* create_prepare_replay_request(
+ const std::string& local_mirror_uuid,
+ ProgressContext* progress_ctx,
+ bool* resync_requested,
+ bool* syncing,
+ Context* on_finish) override;
+
+ image_replayer::Replayer* create_replayer(
+ Threads<ImageCtxT>* threads,
+ InstanceWatcher<ImageCtxT>* instance_watcher,
+ const std::string& local_mirror_uuid,
+ PoolMetaCache* pool_meta_cache,
+ ReplayerListener* replayer_listener) override;
+
+ std::string local_primary_mirror_uuid;
+
+ Journaler* remote_journaler = nullptr;
+ cls::journal::ClientState remote_client_state =
+ cls::journal::CLIENT_STATE_CONNECTED;
+ librbd::journal::MirrorPeerClientMeta remote_client_meta;
+
+ SyncPointHandler<ImageCtxT>* sync_point_handler = nullptr;
+
+private:
+ bool is_linked_impl() const override;
+
+ void shut_down_remote_journaler(Context* on_finish);
+ void handle_shut_down_remote_journaler(int r, Context* on_finish);
+};
+
+} // namespace journal
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_replayer::journal::StateBuilder<librbd::ImageCtx>;
+
+#endif // CEPH_RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_STATE_BUILDER_H
diff --git a/src/tools/rbd_mirror/image_replayer/journal/SyncPointHandler.cc b/src/tools/rbd_mirror/image_replayer/journal/SyncPointHandler.cc
new file mode 100644
index 000000000..66d13e555
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/journal/SyncPointHandler.cc
@@ -0,0 +1,109 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "SyncPointHandler.h"
+#include "StateBuilder.h"
+#include "include/ceph_assert.h"
+#include "include/Context.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "journal/Journaler.h"
+#include "librbd/ImageCtx.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::journal::" \
+ << "SyncPointHandler: " << this << " " \
+ << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+namespace journal {
+
+template <typename I>
+SyncPointHandler<I>::SyncPointHandler(StateBuilder<I>* state_builder)
+ : m_state_builder(state_builder),
+ m_client_meta_copy(state_builder->remote_client_meta) {
+}
+
+template <typename I>
+typename SyncPointHandler<I>::SyncPoints
+SyncPointHandler<I>::get_sync_points() const {
+ SyncPoints sync_points;
+ for (auto& sync_point : m_client_meta_copy.sync_points) {
+ sync_points.emplace_back(
+ sync_point.snap_namespace,
+ sync_point.snap_name,
+ sync_point.from_snap_name,
+ sync_point.object_number);
+ }
+ return sync_points;
+}
+
+template <typename I>
+librbd::SnapSeqs SyncPointHandler<I>::get_snap_seqs() const {
+ return m_client_meta_copy.snap_seqs;
+}
+
+template <typename I>
+void SyncPointHandler<I>::update_sync_points(
+ const librbd::SnapSeqs& snap_seqs, const SyncPoints& sync_points,
+ bool sync_complete, Context* on_finish) {
+ dout(10) << dendl;
+
+ if (sync_complete && sync_points.empty()) {
+ m_client_meta_copy.state = librbd::journal::MIRROR_PEER_STATE_REPLAYING;
+ }
+
+ m_client_meta_copy.snap_seqs = snap_seqs;
+ m_client_meta_copy.sync_points.clear();
+ for (auto& sync_point : sync_points) {
+ m_client_meta_copy.sync_points.emplace_back(
+ sync_point.snap_namespace,
+ sync_point.snap_name,
+ sync_point.from_snap_name,
+ sync_point.object_number);
+
+ if (sync_point.object_number) {
+ m_client_meta_copy.sync_object_count = std::max(
+ m_client_meta_copy.sync_object_count, *sync_point.object_number + 1);
+ }
+ }
+
+ dout(20) << "client_meta=" << m_client_meta_copy << dendl;
+ bufferlist client_data_bl;
+ librbd::journal::ClientData client_data{m_client_meta_copy};
+ encode(client_data, client_data_bl);
+
+ auto ctx = new LambdaContext([this, on_finish](int r) {
+ handle_update_sync_points(r, on_finish);
+ });
+ m_state_builder->remote_journaler->update_client(client_data_bl, ctx);
+}
+
+template <typename I>
+void SyncPointHandler<I>::handle_update_sync_points(int r, Context* on_finish) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r >= 0) {
+ m_state_builder->remote_client_meta.snap_seqs =
+ m_client_meta_copy.snap_seqs;
+ m_state_builder->remote_client_meta.sync_points =
+ m_client_meta_copy.sync_points;
+ } else {
+ derr << "failed to update remote journal client meta for image "
+ << m_state_builder->global_image_id << ": " << cpp_strerror(r)
+ << dendl;
+ }
+
+ on_finish->complete(r);
+}
+
+} // namespace journal
+} // namespace image_sync
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_replayer::journal::SyncPointHandler<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_replayer/journal/SyncPointHandler.h b/src/tools/rbd_mirror/image_replayer/journal/SyncPointHandler.h
new file mode 100644
index 000000000..b4f492c19
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/journal/SyncPointHandler.h
@@ -0,0 +1,55 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_SYNC_POINT_HANDLER_H
+#define RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_SYNC_POINT_HANDLER_H
+
+#include "tools/rbd_mirror/image_sync/Types.h"
+#include "librbd/journal/Types.h"
+
+struct Context;
+namespace librbd { struct ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+namespace journal {
+
+template <typename> class StateBuilder;
+
+template <typename ImageCtxT>
+class SyncPointHandler : public image_sync::SyncPointHandler {
+public:
+ using SyncPoint = image_sync::SyncPoint;
+ using SyncPoints = image_sync::SyncPoints;
+
+ static SyncPointHandler* create(StateBuilder<ImageCtxT>* state_builder) {
+ return new SyncPointHandler(state_builder);
+ }
+ SyncPointHandler(StateBuilder<ImageCtxT>* state_builder);
+
+ SyncPoints get_sync_points() const override;
+ librbd::SnapSeqs get_snap_seqs() const override;
+
+ void update_sync_points(const librbd::SnapSeqs& snap_seqs,
+ const SyncPoints& sync_points,
+ bool sync_complete,
+ Context* on_finish) override;
+
+private:
+ StateBuilder<ImageCtxT>* m_state_builder;
+
+ librbd::journal::MirrorPeerClientMeta m_client_meta_copy;
+
+ void handle_update_sync_points(int r, Context* on_finish);
+
+};
+
+} // namespace journal
+} // namespace image_sync
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_replayer::journal::SyncPointHandler<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_SYNC_POINT_HANDLER_H
diff --git a/src/tools/rbd_mirror/image_replayer/snapshot/ApplyImageStateRequest.cc b/src/tools/rbd_mirror/image_replayer/snapshot/ApplyImageStateRequest.cc
new file mode 100644
index 000000000..75881307c
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/snapshot/ApplyImageStateRequest.cc
@@ -0,0 +1,658 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "ApplyImageStateRequest.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/Operations.h"
+#include "librbd/Utils.h"
+#include "librbd/image/GetMetadataRequest.h"
+#include "tools/rbd_mirror/image_replayer/snapshot/Utils.h"
+#include <boost/algorithm/string/predicate.hpp>
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::snapshot::" \
+ << "ApplyImageStateRequest: " << this << " " \
+ << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+namespace snapshot {
+
+using librbd::util::create_context_callback;
+using librbd::util::create_rados_callback;
+
+template <typename I>
+ApplyImageStateRequest<I>::ApplyImageStateRequest(
+ const std::string& local_mirror_uuid,
+ const std::string& remote_mirror_uuid,
+ I* local_image_ctx,
+ I* remote_image_ctx,
+ librbd::mirror::snapshot::ImageState image_state,
+ Context* on_finish)
+ : m_local_mirror_uuid(local_mirror_uuid),
+ m_remote_mirror_uuid(remote_mirror_uuid),
+ m_local_image_ctx(local_image_ctx),
+ m_remote_image_ctx(remote_image_ctx),
+ m_image_state(image_state),
+ m_on_finish(on_finish) {
+ dout(15) << "image_state=" << m_image_state << dendl;
+
+ std::shared_lock image_locker{m_local_image_ctx->image_lock};
+ m_features = m_local_image_ctx->features & ~RBD_FEATURES_IMPLICIT_ENABLE;
+ compute_local_to_remote_snap_ids();
+}
+
+template <typename I>
+void ApplyImageStateRequest<I>::send() {
+ rename_image();
+}
+
+template <typename I>
+void ApplyImageStateRequest<I>::rename_image() {
+ std::shared_lock owner_locker{m_local_image_ctx->owner_lock};
+ std::shared_lock image_locker{m_local_image_ctx->image_lock};
+ if (m_local_image_ctx->name == m_image_state.name) {
+ image_locker.unlock();
+ owner_locker.unlock();
+
+ update_features();
+ return;
+ }
+ image_locker.unlock();
+
+ dout(15) << "local_image_name=" << m_local_image_ctx->name << ", "
+ << "remote_image_name=" << m_image_state.name << dendl;
+
+ auto ctx = create_context_callback<
+ ApplyImageStateRequest<I>,
+ &ApplyImageStateRequest<I>::handle_rename_image>(this);
+ m_local_image_ctx->operations->execute_rename(m_image_state.name, ctx);
+}
+
+template <typename I>
+void ApplyImageStateRequest<I>::handle_rename_image(int r) {
+ dout(15) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to rename image to '" << m_image_state.name << "': "
+ << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ update_features();
+}
+
+template <typename I>
+void ApplyImageStateRequest<I>::update_features() {
+ uint64_t feature_updates = 0UL;
+ bool enabled = false;
+
+ auto image_state_features =
+ m_image_state.features & ~RBD_FEATURES_IMPLICIT_ENABLE;
+ feature_updates = (m_features & ~image_state_features);
+ if (feature_updates == 0UL) {
+ feature_updates = (image_state_features & ~m_features);
+ enabled = (feature_updates != 0UL);
+ }
+
+ if (feature_updates == 0UL) {
+ get_image_meta();
+ return;
+ }
+
+ dout(15) << "image_features=" << m_features << ", "
+ << "state_features=" << image_state_features << ", "
+ << "feature_updates=" << feature_updates << ", "
+ << "enabled=" << enabled << dendl;
+
+ if (enabled) {
+ m_features |= feature_updates;
+ } else {
+ m_features &= ~feature_updates;
+ }
+
+ std::shared_lock owner_lock{m_local_image_ctx->owner_lock};
+ auto ctx = create_context_callback<
+ ApplyImageStateRequest<I>,
+ &ApplyImageStateRequest<I>::handle_update_features>(this);
+ m_local_image_ctx->operations->execute_update_features(
+ feature_updates, enabled, ctx, 0U);
+}
+
+template <typename I>
+void ApplyImageStateRequest<I>::handle_update_features(int r) {
+ dout(15) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to update image features: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ update_features();
+}
+
+template <typename I>
+void ApplyImageStateRequest<I>::get_image_meta() {
+ dout(15) << dendl;
+
+ auto ctx = create_context_callback<
+ ApplyImageStateRequest<I>,
+ &ApplyImageStateRequest<I>::handle_get_image_meta>(this);
+ auto req = librbd::image::GetMetadataRequest<I>::create(
+ m_local_image_ctx->md_ctx, m_local_image_ctx->header_oid, true, "", "", 0U,
+ &m_metadata, ctx);
+ req->send();
+}
+
+template <typename I>
+void ApplyImageStateRequest<I>::handle_get_image_meta(int r) {
+ dout(15) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to fetch local image metadata: " << cpp_strerror(r)
+ << dendl;
+ finish(r);
+ return;
+ }
+
+ update_image_meta();
+}
+
+template <typename I>
+void ApplyImageStateRequest<I>::update_image_meta() {
+ std::set<std::string> keys_to_remove;
+ for (const auto& [key, value] : m_metadata) {
+ if (m_image_state.metadata.count(key) == 0) {
+ dout(15) << "removing image-meta key '" << key << "'" << dendl;
+ keys_to_remove.insert(key);
+ }
+ }
+
+ std::map<std::string, bufferlist> metadata_to_update;
+ for (const auto& [key, value] : m_image_state.metadata) {
+ auto it = m_metadata.find(key);
+ if (it == m_metadata.end() || !it->second.contents_equal(value)) {
+ dout(15) << "updating image-meta key '" << key << "'" << dendl;
+ metadata_to_update.insert({key, value});
+ }
+ }
+
+ if (keys_to_remove.empty() && metadata_to_update.empty()) {
+ unprotect_snapshot();
+ return;
+ }
+
+ dout(15) << dendl;
+
+ librados::ObjectWriteOperation op;
+ for (const auto& key : keys_to_remove) {
+ librbd::cls_client::metadata_remove(&op, key);
+ }
+ if (!metadata_to_update.empty()) {
+ librbd::cls_client::metadata_set(&op, metadata_to_update);
+ }
+
+ auto aio_comp = create_rados_callback<
+ ApplyImageStateRequest<I>,
+ &ApplyImageStateRequest<I>::handle_update_image_meta>(this);
+ int r = m_local_image_ctx->md_ctx.aio_operate(m_local_image_ctx->header_oid, aio_comp,
+ &op);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void ApplyImageStateRequest<I>::handle_update_image_meta(int r) {
+ dout(15) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to update image metadata: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ m_metadata.clear();
+
+ m_prev_snap_id = CEPH_NOSNAP;
+ unprotect_snapshot();
+}
+
+template <typename I>
+void ApplyImageStateRequest<I>::unprotect_snapshot() {
+ std::shared_lock image_locker{m_local_image_ctx->image_lock};
+
+ auto snap_it = m_local_image_ctx->snap_info.begin();
+ if (m_prev_snap_id != CEPH_NOSNAP) {
+ snap_it = m_local_image_ctx->snap_info.upper_bound(m_prev_snap_id);
+ }
+
+ for (; snap_it != m_local_image_ctx->snap_info.end(); ++snap_it) {
+ auto snap_id = snap_it->first;
+ const auto& snap_info = snap_it->second;
+
+ auto user_ns = std::get_if<cls::rbd::UserSnapshotNamespace>(
+ &snap_info.snap_namespace);
+ if (user_ns == nullptr) {
+ dout(20) << "snapshot " << snap_id << " is not a user snapshot" << dendl;
+ continue;
+ }
+
+ if (snap_info.protection_status == RBD_PROTECTION_STATUS_UNPROTECTED) {
+ dout(20) << "snapshot " << snap_id << " is already unprotected" << dendl;
+ continue;
+ }
+
+ auto snap_id_map_it = m_local_to_remote_snap_ids.find(snap_id);
+ if (snap_id_map_it == m_local_to_remote_snap_ids.end()) {
+ dout(15) << "snapshot " << snap_id << " does not exist in remote image"
+ << dendl;
+ break;
+ }
+
+ auto remote_snap_id = snap_id_map_it->second;
+ auto snap_state_it = m_image_state.snapshots.find(remote_snap_id);
+ if (snap_state_it == m_image_state.snapshots.end()) {
+ dout(15) << "snapshot " << snap_id << " does not exist in remote image "
+ << "state" << dendl;
+ break;
+ }
+
+ const auto& snap_state = snap_state_it->second;
+ if (snap_state.protection_status == RBD_PROTECTION_STATUS_UNPROTECTED) {
+ dout(15) << "snapshot " << snap_id << " is unprotected in remote image"
+ << dendl;
+ break;
+ }
+ }
+
+ if (snap_it == m_local_image_ctx->snap_info.end()) {
+ image_locker.unlock();
+
+ // no local snapshots to unprotect
+ m_prev_snap_id = CEPH_NOSNAP;
+ remove_snapshot();
+ return;
+ }
+
+ m_prev_snap_id = snap_it->first;
+ m_snap_name = snap_it->second.name;
+ image_locker.unlock();
+
+ dout(15) << "snap_name=" << m_snap_name << ", "
+ << "snap_id=" << m_prev_snap_id << dendl;
+
+ std::shared_lock owner_locker{m_local_image_ctx->owner_lock};
+ auto ctx = create_context_callback<
+ ApplyImageStateRequest<I>,
+ &ApplyImageStateRequest<I>::handle_unprotect_snapshot>(this);
+ m_local_image_ctx->operations->execute_snap_unprotect(
+ cls::rbd::UserSnapshotNamespace{}, m_snap_name.c_str(), ctx);
+}
+
+template <typename I>
+void ApplyImageStateRequest<I>::handle_unprotect_snapshot(int r) {
+ dout(15) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to unprotect snapshot " << m_snap_name << ": "
+ << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ unprotect_snapshot();
+}
+
+template <typename I>
+void ApplyImageStateRequest<I>::remove_snapshot() {
+ std::shared_lock image_locker{m_local_image_ctx->image_lock};
+
+ auto snap_it = m_local_image_ctx->snap_info.begin();
+ if (m_prev_snap_id != CEPH_NOSNAP) {
+ snap_it = m_local_image_ctx->snap_info.upper_bound(m_prev_snap_id);
+ }
+
+ for (; snap_it != m_local_image_ctx->snap_info.end(); ++snap_it) {
+ auto snap_id = snap_it->first;
+ const auto& snap_info = snap_it->second;
+
+ auto user_ns = std::get_if<cls::rbd::UserSnapshotNamespace>(
+ &snap_info.snap_namespace);
+ if (user_ns == nullptr) {
+ dout(20) << "snapshot " << snap_id << " is not a user snapshot" << dendl;
+ continue;
+ }
+
+ auto snap_id_map_it = m_local_to_remote_snap_ids.find(snap_id);
+ if (snap_id_map_it == m_local_to_remote_snap_ids.end()) {
+ dout(15) << "snapshot " << snap_id << " does not exist in remote image"
+ << dendl;
+ break;
+ }
+
+ auto remote_snap_id = snap_id_map_it->second;
+ auto snap_state_it = m_image_state.snapshots.find(remote_snap_id);
+ if (snap_state_it == m_image_state.snapshots.end()) {
+ dout(15) << "snapshot " << snap_id << " does not exist in remote image "
+ << "state" << dendl;
+ break;
+ }
+ }
+
+ if (snap_it == m_local_image_ctx->snap_info.end()) {
+ image_locker.unlock();
+
+ // no local snapshots to remove
+ m_prev_snap_id = CEPH_NOSNAP;
+ protect_snapshot();
+ return;
+ }
+
+ m_prev_snap_id = snap_it->first;
+ m_snap_name = snap_it->second.name;
+ image_locker.unlock();
+
+ dout(15) << "snap_name=" << m_snap_name << ", "
+ << "snap_id=" << m_prev_snap_id << dendl;
+
+ std::shared_lock owner_locker{m_local_image_ctx->owner_lock};
+ auto ctx = create_context_callback<
+ ApplyImageStateRequest<I>,
+ &ApplyImageStateRequest<I>::handle_remove_snapshot>(this);
+ m_local_image_ctx->operations->execute_snap_remove(
+ cls::rbd::UserSnapshotNamespace{}, m_snap_name.c_str(), ctx);
+}
+
+template <typename I>
+void ApplyImageStateRequest<I>::handle_remove_snapshot(int r) {
+ dout(15) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to remove snapshot " << m_snap_name << ": "
+ << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ remove_snapshot();
+}
+
+template <typename I>
+void ApplyImageStateRequest<I>::protect_snapshot() {
+ std::shared_lock image_locker{m_local_image_ctx->image_lock};
+
+ auto snap_it = m_local_image_ctx->snap_info.begin();
+ if (m_prev_snap_id != CEPH_NOSNAP) {
+ snap_it = m_local_image_ctx->snap_info.upper_bound(m_prev_snap_id);
+ }
+
+ for (; snap_it != m_local_image_ctx->snap_info.end(); ++snap_it) {
+ auto snap_id = snap_it->first;
+ const auto& snap_info = snap_it->second;
+
+ auto user_ns = std::get_if<cls::rbd::UserSnapshotNamespace>(
+ &snap_info.snap_namespace);
+ if (user_ns == nullptr) {
+ dout(20) << "snapshot " << snap_id << " is not a user snapshot" << dendl;
+ continue;
+ }
+
+ if (snap_info.protection_status == RBD_PROTECTION_STATUS_PROTECTED) {
+ dout(20) << "snapshot " << snap_id << " is already protected" << dendl;
+ continue;
+ }
+
+ auto snap_id_map_it = m_local_to_remote_snap_ids.find(snap_id);
+ if (snap_id_map_it == m_local_to_remote_snap_ids.end()) {
+ dout(15) << "snapshot " << snap_id << " does not exist in remote image"
+ << dendl;
+ continue;
+ }
+
+ auto remote_snap_id = snap_id_map_it->second;
+ auto snap_state_it = m_image_state.snapshots.find(remote_snap_id);
+ if (snap_state_it == m_image_state.snapshots.end()) {
+ dout(15) << "snapshot " << snap_id << " does not exist in remote image "
+ << "state" << dendl;
+ continue;
+ }
+
+ const auto& snap_state = snap_state_it->second;
+ if (snap_state.protection_status == RBD_PROTECTION_STATUS_PROTECTED) {
+ dout(15) << "snapshot " << snap_id << " is protected in remote image"
+ << dendl;
+ break;
+ }
+ }
+
+ if (snap_it == m_local_image_ctx->snap_info.end()) {
+ image_locker.unlock();
+
+ // no local snapshots to protect
+ m_prev_snap_id = CEPH_NOSNAP;
+ rename_snapshot();
+ return;
+ }
+
+ m_prev_snap_id = snap_it->first;
+ m_snap_name = snap_it->second.name;
+ image_locker.unlock();
+
+ dout(15) << "snap_name=" << m_snap_name << ", "
+ << "snap_id=" << m_prev_snap_id << dendl;
+
+ std::shared_lock owner_locker{m_local_image_ctx->owner_lock};
+ auto ctx = create_context_callback<
+ ApplyImageStateRequest<I>,
+ &ApplyImageStateRequest<I>::handle_protect_snapshot>(this);
+ m_local_image_ctx->operations->execute_snap_protect(
+ cls::rbd::UserSnapshotNamespace{}, m_snap_name.c_str(), ctx);
+}
+
+template <typename I>
+void ApplyImageStateRequest<I>::handle_protect_snapshot(int r) {
+ dout(15) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to protect snapshot " << m_snap_name << ": "
+ << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ protect_snapshot();
+}
+
+template <typename I>
+void ApplyImageStateRequest<I>::rename_snapshot() {
+ std::shared_lock image_locker{m_local_image_ctx->image_lock};
+
+ auto snap_it = m_local_image_ctx->snap_info.begin();
+ if (m_prev_snap_id != CEPH_NOSNAP) {
+ snap_it = m_local_image_ctx->snap_info.upper_bound(m_prev_snap_id);
+ }
+
+ for (; snap_it != m_local_image_ctx->snap_info.end(); ++snap_it) {
+ auto snap_id = snap_it->first;
+ const auto& snap_info = snap_it->second;
+
+ auto user_ns = std::get_if<cls::rbd::UserSnapshotNamespace>(
+ &snap_info.snap_namespace);
+ if (user_ns == nullptr) {
+ dout(20) << "snapshot " << snap_id << " is not a user snapshot" << dendl;
+ continue;
+ }
+
+ auto snap_id_map_it = m_local_to_remote_snap_ids.find(snap_id);
+ if (snap_id_map_it == m_local_to_remote_snap_ids.end()) {
+ dout(15) << "snapshot " << snap_id << " does not exist in remote image"
+ << dendl;
+ continue;
+ }
+
+ auto remote_snap_id = snap_id_map_it->second;
+ auto snap_state_it = m_image_state.snapshots.find(remote_snap_id);
+ if (snap_state_it == m_image_state.snapshots.end()) {
+ dout(15) << "snapshot " << snap_id << " does not exist in remote image "
+ << "state" << dendl;
+ continue;
+ }
+
+ const auto& snap_state = snap_state_it->second;
+ if (snap_info.name != snap_state.name) {
+ dout(15) << "snapshot " << snap_id << " has been renamed from '"
+ << snap_info.name << "' to '" << snap_state.name << "'"
+ << dendl;
+ m_snap_name = snap_state.name;
+ break;
+ }
+ }
+
+ if (snap_it == m_local_image_ctx->snap_info.end()) {
+ image_locker.unlock();
+
+ // no local snapshots to protect
+ m_prev_snap_id = CEPH_NOSNAP;
+ set_snapshot_limit();
+ return;
+ }
+
+ m_prev_snap_id = snap_it->first;
+ image_locker.unlock();
+
+ dout(15) << "snap_name=" << m_snap_name << ", "
+ << "snap_id=" << m_prev_snap_id << dendl;
+
+ std::shared_lock owner_locker{m_local_image_ctx->owner_lock};
+ auto ctx = create_context_callback<
+ ApplyImageStateRequest<I>,
+ &ApplyImageStateRequest<I>::handle_rename_snapshot>(this);
+ m_local_image_ctx->operations->execute_snap_rename(
+ m_prev_snap_id, m_snap_name.c_str(), ctx);
+}
+
+template <typename I>
+void ApplyImageStateRequest<I>::handle_rename_snapshot(int r) {
+ dout(15) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to protect snapshot " << m_snap_name << ": "
+ << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ rename_snapshot();
+}
+
+template <typename I>
+void ApplyImageStateRequest<I>::set_snapshot_limit() {
+ dout(15) << "snap_limit=" << m_image_state.snap_limit << dendl;
+
+ // no need to even check the current limit -- just set it
+ std::shared_lock owner_locker{m_local_image_ctx->owner_lock};
+ auto ctx = create_context_callback<
+ ApplyImageStateRequest<I>,
+ &ApplyImageStateRequest<I>::handle_set_snapshot_limit>(this);
+ m_local_image_ctx->operations->execute_snap_set_limit(
+ m_image_state.snap_limit, ctx);
+}
+
+template <typename I>
+void ApplyImageStateRequest<I>::handle_set_snapshot_limit(int r) {
+ dout(15) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to update snapshot limit: " << cpp_strerror(r)
+ << dendl;
+ }
+
+ finish(r);
+}
+
+template <typename I>
+void ApplyImageStateRequest<I>::finish(int r) {
+ dout(15) << "r=" << r << dendl;
+
+ m_on_finish->complete(r);
+ delete this;
+}
+
+template <typename I>
+uint64_t ApplyImageStateRequest<I>::compute_remote_snap_id(
+ uint64_t local_snap_id) {
+ ceph_assert(ceph_mutex_is_locked(m_local_image_ctx->image_lock));
+ ceph_assert(ceph_mutex_is_locked(m_remote_image_ctx->image_lock));
+
+ // Search our local non-primary snapshots for a mapping to the remote
+ // snapshot. The non-primary mirror snapshot with the mappings will always
+ // come at or after the snapshot we are searching against
+ auto remote_snap_id = util::compute_remote_snap_id(
+ m_local_image_ctx->image_lock, m_local_image_ctx->snap_info,
+ local_snap_id, m_remote_mirror_uuid);
+ if (remote_snap_id != CEPH_NOSNAP) {
+ return remote_snap_id;
+ }
+
+ // if we failed to find a match to a remote snapshot in our local non-primary
+ // snapshots, check the remote image for non-primary snapshot mappings back
+ // to our snapshot
+ for (auto snap_it = m_remote_image_ctx->snap_info.begin();
+ snap_it != m_remote_image_ctx->snap_info.end(); ++snap_it) {
+ auto snap_id = snap_it->first;
+ auto mirror_ns = std::get_if<cls::rbd::MirrorSnapshotNamespace>(
+ &snap_it->second.snap_namespace);
+ if (mirror_ns == nullptr || !mirror_ns->is_non_primary()) {
+ continue;
+ }
+
+ if (mirror_ns->primary_mirror_uuid != m_local_mirror_uuid) {
+ dout(20) << "remote snapshot " << snap_id << " not tied to local"
+ << dendl;
+ continue;
+ } else if (mirror_ns->primary_snap_id == local_snap_id) {
+ dout(15) << "local snapshot " << local_snap_id << " maps to "
+ << "remote snapshot " << snap_id << dendl;
+ return snap_id;
+ }
+
+ const auto& snap_seqs = mirror_ns->snap_seqs;
+ for (auto [local_snap_id_seq, remote_snap_id_seq] : snap_seqs) {
+ if (local_snap_id_seq == local_snap_id) {
+ dout(15) << "local snapshot " << local_snap_id << " maps to "
+ << "remote snapshot " << remote_snap_id_seq << dendl;
+ return remote_snap_id_seq;
+ }
+ }
+ }
+
+ return CEPH_NOSNAP;
+}
+
+template <typename I>
+void ApplyImageStateRequest<I>::compute_local_to_remote_snap_ids() {
+ ceph_assert(ceph_mutex_is_locked(m_local_image_ctx->image_lock));
+ std::shared_lock remote_image_locker{m_remote_image_ctx->image_lock};
+
+ for (const auto& [snap_id, snap_info] : m_local_image_ctx->snap_info) {
+ m_local_to_remote_snap_ids[snap_id] = compute_remote_snap_id(snap_id);
+ }
+
+ dout(15) << "local_to_remote_snap_ids=" << m_local_to_remote_snap_ids
+ << dendl;
+}
+
+} // namespace snapshot
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_replayer::snapshot::ApplyImageStateRequest<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_replayer/snapshot/ApplyImageStateRequest.h b/src/tools/rbd_mirror/image_replayer/snapshot/ApplyImageStateRequest.h
new file mode 100644
index 000000000..0e2d09ddf
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/snapshot/ApplyImageStateRequest.h
@@ -0,0 +1,155 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_APPLY_IMAGE_STATE_REQUEST_H
+#define RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_APPLY_IMAGE_STATE_REQUEST_H
+
+#include "common/ceph_mutex.h"
+#include "librbd/mirror/snapshot/Types.h"
+#include <map>
+#include <string>
+
+struct Context;
+
+namespace librbd {
+
+struct ImageCtx;
+
+} // namespace librbd
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+namespace snapshot {
+
+template <typename> class EventPreprocessor;
+template <typename> class ReplayStatusFormatter;
+template <typename> class StateBuilder;
+
+template <typename ImageCtxT>
+class ApplyImageStateRequest {
+public:
+ static ApplyImageStateRequest* create(
+ const std::string& local_mirror_uuid,
+ const std::string& remote_mirror_uuid,
+ ImageCtxT* local_image_ctx,
+ ImageCtxT* remote_image_ctx,
+ librbd::mirror::snapshot::ImageState image_state,
+ Context* on_finish) {
+ return new ApplyImageStateRequest(local_mirror_uuid, remote_mirror_uuid,
+ local_image_ctx, remote_image_ctx,
+ image_state, on_finish);
+ }
+
+ ApplyImageStateRequest(
+ const std::string& local_mirror_uuid,
+ const std::string& remote_mirror_uuid,
+ ImageCtxT* local_image_ctx,
+ ImageCtxT* remote_image_ctx,
+ librbd::mirror::snapshot::ImageState image_state,
+ Context* on_finish);
+
+ void send();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * RENAME_IMAGE
+ * |
+ * | /---------\
+ * | | |
+ * v v |
+ * UPDATE_FEATURES -----/
+ * |
+ * v
+ * GET_IMAGE_META
+ * |
+ * | /---------\
+ * | | |
+ * v v |
+ * UPDATE_IMAGE_META ---/
+ * |
+ * | /---------\
+ * | | |
+ * v v |
+ * UNPROTECT_SNAPSHOT |
+ * | |
+ * v |
+ * REMOVE_SNAPSHOT |
+ * | |
+ * v |
+ * PROTECT_SNAPSHOT |
+ * | |
+ * v |
+ * RENAME_SNAPSHOT -----/
+ * |
+ * v
+ * SET_SNAPSHOT_LIMIT
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+
+ std::string m_local_mirror_uuid;
+ std::string m_remote_mirror_uuid;
+ ImageCtxT* m_local_image_ctx;
+ ImageCtxT* m_remote_image_ctx;
+ librbd::mirror::snapshot::ImageState m_image_state;
+ Context* m_on_finish;
+
+ std::map<uint64_t, uint64_t> m_local_to_remote_snap_ids;
+
+ uint64_t m_features = 0;
+
+ std::map<std::string, bufferlist> m_metadata;
+
+ uint64_t m_prev_snap_id = 0;
+ std::string m_snap_name;
+
+ void rename_image();
+ void handle_rename_image(int r);
+
+ void update_features();
+ void handle_update_features(int r);
+
+ void get_image_meta();
+ void handle_get_image_meta(int r);
+
+ void update_image_meta();
+ void handle_update_image_meta(int r);
+
+ void unprotect_snapshot();
+ void handle_unprotect_snapshot(int r);
+
+ void remove_snapshot();
+ void handle_remove_snapshot(int r);
+
+ void protect_snapshot();
+ void handle_protect_snapshot(int r);
+
+ void rename_snapshot();
+ void handle_rename_snapshot(int r);
+
+ void set_snapshot_limit();
+ void handle_set_snapshot_limit(int r);
+
+ void finish(int r);
+
+ uint64_t compute_remote_snap_id(uint64_t snap_id);
+ void compute_local_to_remote_snap_ids();
+};
+
+} // namespace snapshot
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_replayer::snapshot::ApplyImageStateRequest<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_APPLY_IMAGE_STATE_REQUEST_H
diff --git a/src/tools/rbd_mirror/image_replayer/snapshot/CreateLocalImageRequest.cc b/src/tools/rbd_mirror/image_replayer/snapshot/CreateLocalImageRequest.cc
new file mode 100644
index 000000000..c923395c9
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/snapshot/CreateLocalImageRequest.cc
@@ -0,0 +1,204 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "CreateLocalImageRequest.h"
+#include "include/rados/librados.hpp"
+#include "common/debug.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "cls/rbd/cls_rbd_types.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/Utils.h"
+#include "tools/rbd_mirror/ProgressContext.h"
+#include "tools/rbd_mirror/image_replayer/CreateImageRequest.h"
+#include "tools/rbd_mirror/image_replayer/snapshot/StateBuilder.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::snapshot::" \
+ << "CreateLocalImageRequest: " << this << " " \
+ << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+namespace snapshot {
+
+using librbd::util::create_context_callback;
+using librbd::util::create_rados_callback;
+
+template <typename I>
+void CreateLocalImageRequest<I>::send() {
+ disable_mirror_image();
+}
+
+template <typename I>
+void CreateLocalImageRequest<I>::disable_mirror_image() {
+ if (m_state_builder->local_image_id.empty()) {
+ add_mirror_image();
+ return;
+ }
+
+ dout(10) << dendl;
+ update_progress("DISABLE_MIRROR_IMAGE");
+
+ // need to send 'disabling' since the cls methods will fail if we aren't
+ // in that state
+ cls::rbd::MirrorImage mirror_image{
+ cls::rbd::MIRROR_IMAGE_MODE_SNAPSHOT, m_global_image_id,
+ cls::rbd::MIRROR_IMAGE_STATE_DISABLING};
+ librados::ObjectWriteOperation op;
+ librbd::cls_client::mirror_image_set(&op, m_state_builder->local_image_id,
+ mirror_image);
+
+ auto aio_comp = create_rados_callback<
+ CreateLocalImageRequest<I>,
+ &CreateLocalImageRequest<I>::handle_disable_mirror_image>(this);
+ int r = m_local_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void CreateLocalImageRequest<I>::handle_disable_mirror_image(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to disable mirror image " << m_global_image_id << ": "
+ << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ remove_mirror_image();
+}
+
+template <typename I>
+void CreateLocalImageRequest<I>::remove_mirror_image() {
+ dout(10) << dendl;
+ update_progress("REMOVE_MIRROR_IMAGE");
+
+ librados::ObjectWriteOperation op;
+ librbd::cls_client::mirror_image_remove(&op, m_state_builder->local_image_id);
+
+ auto aio_comp = create_rados_callback<
+ CreateLocalImageRequest<I>,
+ &CreateLocalImageRequest<I>::handle_remove_mirror_image>(this);
+ int r = m_local_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void CreateLocalImageRequest<I>::handle_remove_mirror_image(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to remove mirror image " << m_global_image_id << ": "
+ << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ m_state_builder->local_image_id = "";
+ add_mirror_image();
+}
+
+template <typename I>
+void CreateLocalImageRequest<I>::add_mirror_image() {
+ ceph_assert(m_state_builder->local_image_id.empty());
+ m_state_builder->local_image_id =
+ librbd::util::generate_image_id<I>(m_local_io_ctx);
+
+ dout(10) << "local_image_id=" << m_state_builder->local_image_id << dendl;
+ update_progress("ADD_MIRROR_IMAGE");
+
+ // use 'creating' to track a partially constructed image. it will
+ // be switched to 'enabled' once the image is fully created
+ cls::rbd::MirrorImage mirror_image{
+ cls::rbd::MIRROR_IMAGE_MODE_SNAPSHOT, m_global_image_id,
+ cls::rbd::MIRROR_IMAGE_STATE_CREATING};
+ librados::ObjectWriteOperation op;
+ librbd::cls_client::mirror_image_set(&op, m_state_builder->local_image_id,
+ mirror_image);
+
+ auto aio_comp = create_rados_callback<
+ CreateLocalImageRequest<I>,
+ &CreateLocalImageRequest<I>::handle_add_mirror_image>(this);
+ int r = m_local_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void CreateLocalImageRequest<I>::handle_add_mirror_image(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to register mirror image " << m_global_image_id << ": "
+ << cpp_strerror(r) << dendl;
+ this->finish(r);
+ return;
+ }
+
+ create_local_image();
+}
+
+template <typename I>
+void CreateLocalImageRequest<I>::create_local_image() {
+ dout(10) << "local_image_id=" << m_state_builder->local_image_id << dendl;
+ update_progress("CREATE_LOCAL_IMAGE");
+
+ m_remote_image_ctx->image_lock.lock_shared();
+ std::string image_name = m_remote_image_ctx->name;
+ m_remote_image_ctx->image_lock.unlock_shared();
+
+ auto ctx = create_context_callback<
+ CreateLocalImageRequest<I>,
+ &CreateLocalImageRequest<I>::handle_create_local_image>(this);
+ auto request = CreateImageRequest<I>::create(
+ m_threads, m_local_io_ctx, m_global_image_id,
+ m_state_builder->remote_mirror_uuid, image_name,
+ m_state_builder->local_image_id, m_remote_image_ctx,
+ m_pool_meta_cache, cls::rbd::MIRROR_IMAGE_MODE_SNAPSHOT, ctx);
+ request->send();
+}
+template <typename I>
+void CreateLocalImageRequest<I>::handle_create_local_image(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r == -EBADF) {
+ dout(5) << "image id " << m_state_builder->local_image_id << " "
+ << "already in-use" << dendl;
+ disable_mirror_image();
+ return;
+ } else if (r < 0) {
+ if (r == -ENOENT) {
+ dout(10) << "parent image does not exist" << dendl;
+ } else {
+ derr << "failed to create local image: " << cpp_strerror(r) << dendl;
+ }
+ finish(r);
+ return;
+ }
+
+ finish(0);
+}
+
+template <typename I>
+void CreateLocalImageRequest<I>::update_progress(
+ const std::string& description) {
+ dout(15) << description << dendl;
+ if (m_progress_ctx != nullptr) {
+ m_progress_ctx->update_progress(description);
+ }
+}
+
+} // namespace snapshot
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_replayer::snapshot::CreateLocalImageRequest<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_replayer/snapshot/CreateLocalImageRequest.h b/src/tools/rbd_mirror/image_replayer/snapshot/CreateLocalImageRequest.h
new file mode 100644
index 000000000..3345154b4
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/snapshot/CreateLocalImageRequest.h
@@ -0,0 +1,121 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_CREATE_LOCAL_IMAGE_REQUEST_H
+#define RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_CREATE_LOCAL_IMAGE_REQUEST_H
+
+#include "include/rados/librados_fwd.hpp"
+#include "tools/rbd_mirror/BaseRequest.h"
+#include <string>
+
+struct Context;
+namespace librbd { class ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+
+class PoolMetaCache;
+class ProgressContext;
+template <typename> struct Threads;
+
+namespace image_replayer {
+namespace snapshot {
+
+template <typename> class StateBuilder;
+
+template <typename ImageCtxT>
+class CreateLocalImageRequest : public BaseRequest {
+public:
+ typedef rbd::mirror::ProgressContext ProgressContext;
+
+ static CreateLocalImageRequest* create(
+ Threads<ImageCtxT>* threads,
+ librados::IoCtx& local_io_ctx,
+ ImageCtxT* remote_image_ctx,
+ const std::string& global_image_id,
+ PoolMetaCache* pool_meta_cache,
+ ProgressContext* progress_ctx,
+ StateBuilder<ImageCtxT>* state_builder,
+ Context* on_finish) {
+ return new CreateLocalImageRequest(threads, local_io_ctx, remote_image_ctx,
+ global_image_id, pool_meta_cache,
+ progress_ctx, state_builder, on_finish);
+ }
+
+ CreateLocalImageRequest(
+ Threads<ImageCtxT>* threads,
+ librados::IoCtx& local_io_ctx,
+ ImageCtxT* remote_image_ctx,
+ const std::string& global_image_id,
+ PoolMetaCache* pool_meta_cache,
+ ProgressContext* progress_ctx,
+ StateBuilder<ImageCtxT>* state_builder,
+ Context* on_finish)
+ : BaseRequest(on_finish),
+ m_threads(threads),
+ m_local_io_ctx(local_io_ctx),
+ m_remote_image_ctx(remote_image_ctx),
+ m_global_image_id(global_image_id),
+ m_pool_meta_cache(pool_meta_cache),
+ m_progress_ctx(progress_ctx),
+ m_state_builder(state_builder) {
+ }
+
+ void send();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * DISABLE_MIRROR_IMAGE < * * * * * *
+ * | *
+ * v *
+ * REMOVE_MIRROR_IMAGE *
+ * | *
+ * v *
+ * ADD_MIRROR_IMAGE *
+ * | *
+ * v (id exists) *
+ * CREATE_LOCAL_IMAGE * * * * * * * *
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+
+ Threads<ImageCtxT>* m_threads;
+ librados::IoCtx& m_local_io_ctx;
+ ImageCtxT* m_remote_image_ctx;
+ std::string m_global_image_id;
+ PoolMetaCache* m_pool_meta_cache;
+ ProgressContext* m_progress_ctx;
+ StateBuilder<ImageCtxT>* m_state_builder;
+
+ void disable_mirror_image();
+ void handle_disable_mirror_image(int r);
+
+ void remove_mirror_image();
+ void handle_remove_mirror_image(int r);
+
+ void add_mirror_image();
+ void handle_add_mirror_image(int r);
+
+ void create_local_image();
+ void handle_create_local_image(int r);
+
+ void update_progress(const std::string& description);
+
+};
+
+} // namespace snapshot
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_replayer::snapshot::CreateLocalImageRequest<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_CREATE_LOCAL_IMAGE_REQUEST_H
diff --git a/src/tools/rbd_mirror/image_replayer/snapshot/PrepareReplayRequest.cc b/src/tools/rbd_mirror/image_replayer/snapshot/PrepareReplayRequest.cc
new file mode 100644
index 000000000..575eb8534
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/snapshot/PrepareReplayRequest.cc
@@ -0,0 +1,70 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "PrepareReplayRequest.h"
+#include "common/debug.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/Utils.h"
+#include "librbd/mirror/snapshot/ImageMeta.h"
+#include "tools/rbd_mirror/ProgressContext.h"
+#include "tools/rbd_mirror/image_replayer/snapshot/StateBuilder.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::snapshot::" \
+ << "PrepareReplayRequest: " << this << " " \
+ << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+namespace snapshot {
+
+using librbd::util::create_context_callback;
+
+template <typename I>
+void PrepareReplayRequest<I>::send() {
+ *m_resync_requested = false;
+ *m_syncing = false;
+
+ load_local_image_meta();
+}
+
+template <typename I>
+void PrepareReplayRequest<I>::load_local_image_meta() {
+ dout(15) << dendl;
+
+ ceph_assert(m_state_builder->local_image_meta == nullptr);
+ m_state_builder->local_image_meta =
+ librbd::mirror::snapshot::ImageMeta<I>::create(
+ m_state_builder->local_image_ctx, m_local_mirror_uuid);
+
+ auto ctx = create_context_callback<
+ PrepareReplayRequest<I>,
+ &PrepareReplayRequest<I>::handle_load_local_image_meta>(this);
+ m_state_builder->local_image_meta->load(ctx);
+}
+
+template <typename I>
+void PrepareReplayRequest<I>::handle_load_local_image_meta(int r) {
+ dout(15) << "r=" << r << dendl;
+
+ if (r < 0 && r != -ENOENT) {
+ derr << "failed to load local image-meta: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ *m_resync_requested = m_state_builder->local_image_meta->resync_requested;
+ finish(0);
+}
+
+} // namespace snapshot
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_replayer::snapshot::PrepareReplayRequest<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_replayer/snapshot/PrepareReplayRequest.h b/src/tools/rbd_mirror/image_replayer/snapshot/PrepareReplayRequest.h
new file mode 100644
index 000000000..4e9246acd
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/snapshot/PrepareReplayRequest.h
@@ -0,0 +1,92 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_PREPARE_REPLAY_REQUEST_H
+#define RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_PREPARE_REPLAY_REQUEST_H
+
+#include "include/int_types.h"
+#include "librbd/mirror/Types.h"
+#include "tools/rbd_mirror/BaseRequest.h"
+#include <list>
+#include <string>
+
+struct Context;
+namespace librbd { struct ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+
+class ProgressContext;
+
+namespace image_replayer {
+namespace snapshot {
+
+template <typename> class StateBuilder;
+
+template <typename ImageCtxT>
+class PrepareReplayRequest : public BaseRequest {
+public:
+ static PrepareReplayRequest* create(
+ const std::string& local_mirror_uuid,
+ ProgressContext* progress_ctx,
+ StateBuilder<ImageCtxT>* state_builder,
+ bool* resync_requested,
+ bool* syncing,
+ Context* on_finish) {
+ return new PrepareReplayRequest(
+ local_mirror_uuid, progress_ctx, state_builder, resync_requested,
+ syncing, on_finish);
+ }
+
+ PrepareReplayRequest(
+ const std::string& local_mirror_uuid,
+ ProgressContext* progress_ctx,
+ StateBuilder<ImageCtxT>* state_builder,
+ bool* resync_requested,
+ bool* syncing,
+ Context* on_finish)
+ : BaseRequest(on_finish),
+ m_local_mirror_uuid(local_mirror_uuid),
+ m_progress_ctx(progress_ctx),
+ m_state_builder(state_builder),
+ m_resync_requested(resync_requested),
+ m_syncing(syncing) {
+ }
+
+ void send() override;
+
+private:
+ // TODO
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * LOAD_LOCAL_IMAGE_META
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+
+ std::string m_local_mirror_uuid;
+ ProgressContext* m_progress_ctx;
+ StateBuilder<ImageCtxT>* m_state_builder;
+ bool* m_resync_requested;
+ bool* m_syncing;
+
+ void load_local_image_meta();
+ void handle_load_local_image_meta(int r);
+
+};
+
+} // namespace snapshot
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_replayer::snapshot::PrepareReplayRequest<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_PREPARE_REPLAY_REQUEST_H
diff --git a/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.cc b/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.cc
new file mode 100644
index 000000000..67eaa9777
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.cc
@@ -0,0 +1,1633 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "Replayer.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "common/perf_counters.h"
+#include "common/perf_counters_key.h"
+#include "include/stringify.h"
+#include "common/Timer.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "json_spirit/json_spirit.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/Operations.h"
+#include "librbd/Utils.h"
+#include "librbd/asio/ContextWQ.h"
+#include "librbd/deep_copy/Handler.h"
+#include "librbd/deep_copy/ImageCopyRequest.h"
+#include "librbd/deep_copy/SnapshotCopyRequest.h"
+#include "librbd/mirror/ImageStateUpdateRequest.h"
+#include "librbd/mirror/snapshot/CreateNonPrimaryRequest.h"
+#include "librbd/mirror/snapshot/GetImageStateRequest.h"
+#include "librbd/mirror/snapshot/ImageMeta.h"
+#include "librbd/mirror/snapshot/UnlinkPeerRequest.h"
+#include "tools/rbd_mirror/InstanceWatcher.h"
+#include "tools/rbd_mirror/PoolMetaCache.h"
+#include "tools/rbd_mirror/Threads.h"
+#include "tools/rbd_mirror/Types.h"
+#include "tools/rbd_mirror/image_replayer/CloseImageRequest.h"
+#include "tools/rbd_mirror/image_replayer/ReplayerListener.h"
+#include "tools/rbd_mirror/image_replayer/Utils.h"
+#include "tools/rbd_mirror/image_replayer/snapshot/ApplyImageStateRequest.h"
+#include "tools/rbd_mirror/image_replayer/snapshot/StateBuilder.h"
+#include "tools/rbd_mirror/image_replayer/snapshot/Utils.h"
+#include <set>
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::snapshot::" \
+ << "Replayer: " << this << " " << __func__ << ": "
+
+extern PerfCounters *g_snapshot_perf_counters;
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+namespace snapshot {
+
+namespace {
+
+double round_to_two_places(double value) {
+ return abs(round(value * 100) / 100);
+}
+
+template<typename I>
+std::pair<uint64_t, librbd::SnapInfo*> get_newest_mirror_snapshot(
+ I* image_ctx) {
+ for (auto snap_info_it = image_ctx->snap_info.rbegin();
+ snap_info_it != image_ctx->snap_info.rend(); ++snap_info_it) {
+ const auto& snap_ns = snap_info_it->second.snap_namespace;
+ auto mirror_ns = std::get_if<
+ cls::rbd::MirrorSnapshotNamespace>(&snap_ns);
+ if (mirror_ns == nullptr || !mirror_ns->complete) {
+ continue;
+ }
+
+ return {snap_info_it->first, &snap_info_it->second};
+ }
+
+ return {CEPH_NOSNAP, nullptr};
+}
+
+} // anonymous namespace
+
+using librbd::util::create_async_context_callback;
+using librbd::util::create_context_callback;
+using librbd::util::create_rados_callback;
+
+template <typename I>
+struct Replayer<I>::C_UpdateWatchCtx : public librbd::UpdateWatchCtx {
+ Replayer<I>* replayer;
+
+ C_UpdateWatchCtx(Replayer<I>* replayer) : replayer(replayer) {
+ }
+
+ void handle_notify() override {
+ replayer->handle_image_update_notify();
+ }
+};
+
+template <typename I>
+struct Replayer<I>::DeepCopyHandler : public librbd::deep_copy::Handler {
+ Replayer *replayer;
+
+ DeepCopyHandler(Replayer* replayer) : replayer(replayer) {
+ }
+
+ void handle_read(uint64_t bytes_read) override {
+ replayer->handle_copy_image_read(bytes_read);
+ }
+
+ int update_progress(uint64_t object_number, uint64_t object_count) override {
+ replayer->handle_copy_image_progress(object_number, object_count);
+ return 0;
+ }
+};
+
+template <typename I>
+Replayer<I>::Replayer(
+ Threads<I>* threads,
+ InstanceWatcher<I>* instance_watcher,
+ const std::string& local_mirror_uuid,
+ PoolMetaCache* pool_meta_cache,
+ StateBuilder<I>* state_builder,
+ ReplayerListener* replayer_listener)
+ : m_threads(threads),
+ m_instance_watcher(instance_watcher),
+ m_local_mirror_uuid(local_mirror_uuid),
+ m_pool_meta_cache(pool_meta_cache),
+ m_state_builder(state_builder),
+ m_replayer_listener(replayer_listener),
+ m_lock(ceph::make_mutex(librbd::util::unique_lock_name(
+ "rbd::mirror::image_replayer::snapshot::Replayer", this))) {
+ dout(10) << dendl;
+}
+
+template <typename I>
+Replayer<I>::~Replayer() {
+ dout(10) << dendl;
+
+ {
+ std::unique_lock locker{m_lock};
+ unregister_perf_counters();
+ }
+
+ ceph_assert(m_state == STATE_COMPLETE);
+ ceph_assert(m_update_watch_ctx == nullptr);
+ ceph_assert(m_deep_copy_handler == nullptr);
+}
+
+template <typename I>
+void Replayer<I>::init(Context* on_finish) {
+ dout(10) << dendl;
+
+ ceph_assert(m_state == STATE_INIT);
+
+ RemotePoolMeta remote_pool_meta;
+ int r = m_pool_meta_cache->get_remote_pool_meta(
+ m_state_builder->remote_image_ctx->md_ctx.get_id(), &remote_pool_meta);
+ if (r < 0 || remote_pool_meta.mirror_peer_uuid.empty()) {
+ derr << "failed to retrieve mirror peer uuid from remote pool" << dendl;
+ m_state = STATE_COMPLETE;
+ m_threads->work_queue->queue(on_finish, r);
+ return;
+ }
+
+ m_remote_mirror_peer_uuid = remote_pool_meta.mirror_peer_uuid;
+ dout(10) << "remote_mirror_peer_uuid=" << m_remote_mirror_peer_uuid << dendl;
+
+ {
+ auto local_image_ctx = m_state_builder->local_image_ctx;
+ std::shared_lock image_locker{local_image_ctx->image_lock};
+ m_image_spec = image_replayer::util::compute_image_spec(
+ local_image_ctx->md_ctx, local_image_ctx->name);
+ }
+
+ {
+ std::unique_lock locker{m_lock};
+ register_perf_counters();
+ }
+
+ ceph_assert(m_on_init_shutdown == nullptr);
+ m_on_init_shutdown = on_finish;
+
+ register_local_update_watcher();
+}
+
+template <typename I>
+void Replayer<I>::shut_down(Context* on_finish) {
+ dout(10) << dendl;
+
+ std::unique_lock locker{m_lock};
+ ceph_assert(m_on_init_shutdown == nullptr);
+ m_on_init_shutdown = on_finish;
+ m_error_code = 0;
+ m_error_description = "";
+
+ ceph_assert(m_state != STATE_INIT);
+ auto state = STATE_COMPLETE;
+ std::swap(m_state, state);
+
+ if (state == STATE_REPLAYING) {
+ // if a sync request was pending, request a cancelation
+ m_instance_watcher->cancel_sync_request(
+ m_state_builder->local_image_ctx->id);
+
+ // TODO interrupt snapshot copy and image copy state machines even if remote
+ // cluster is unreachable
+ dout(10) << "shut down pending on completion of snapshot replay" << dendl;
+ return;
+ }
+ locker.unlock();
+
+ unregister_remote_update_watcher();
+}
+
+template <typename I>
+void Replayer<I>::flush(Context* on_finish) {
+ dout(10) << dendl;
+
+ // TODO
+ m_threads->work_queue->queue(on_finish, 0);
+}
+
+template <typename I>
+bool Replayer<I>::get_replay_status(std::string* description,
+ Context* on_finish) {
+ dout(10) << dendl;
+
+ std::unique_lock locker{m_lock};
+ if (m_state != STATE_REPLAYING && m_state != STATE_IDLE) {
+ locker.unlock();
+
+ derr << "replay not running" << dendl;
+ on_finish->complete(-EAGAIN);
+ return false;
+ }
+
+ std::shared_lock local_image_locker{
+ m_state_builder->local_image_ctx->image_lock};
+ auto [local_snap_id, local_snap_info] = get_newest_mirror_snapshot(
+ m_state_builder->local_image_ctx);
+
+ std::shared_lock remote_image_locker{
+ m_state_builder->remote_image_ctx->image_lock};
+ auto [remote_snap_id, remote_snap_info] = get_newest_mirror_snapshot(
+ m_state_builder->remote_image_ctx);
+
+ if (remote_snap_info == nullptr) {
+ remote_image_locker.unlock();
+ local_image_locker.unlock();
+ locker.unlock();
+
+ derr << "remote image does not contain mirror snapshots" << dendl;
+ on_finish->complete(-EAGAIN);
+ return false;
+ }
+
+ std::string replay_state = "idle";
+ if (m_remote_snap_id_end != CEPH_NOSNAP) {
+ replay_state = "syncing";
+ }
+
+ json_spirit::mObject root_obj;
+ root_obj["replay_state"] = replay_state;
+ root_obj["remote_snapshot_timestamp"] = remote_snap_info->timestamp.sec();
+ if (m_perf_counters) {
+ m_perf_counters->tset(l_rbd_mirror_snapshot_remote_timestamp,
+ remote_snap_info->timestamp);
+ }
+
+ auto matching_remote_snap_id = util::compute_remote_snap_id(
+ m_state_builder->local_image_ctx->image_lock,
+ m_state_builder->local_image_ctx->snap_info,
+ local_snap_id, m_state_builder->remote_mirror_uuid);
+ auto matching_remote_snap_it =
+ m_state_builder->remote_image_ctx->snap_info.find(matching_remote_snap_id);
+ if (matching_remote_snap_id != CEPH_NOSNAP &&
+ matching_remote_snap_it !=
+ m_state_builder->remote_image_ctx->snap_info.end()) {
+ // use the timestamp from the matching remote image since
+ // the local snapshot would just be the time the snapshot was
+ // synced and not the consistency point in time.
+ root_obj["local_snapshot_timestamp"] =
+ matching_remote_snap_it->second.timestamp.sec();
+ if (m_perf_counters) {
+ m_perf_counters->tset(l_rbd_mirror_snapshot_local_timestamp,
+ matching_remote_snap_it->second.timestamp);
+ }
+ }
+
+ matching_remote_snap_it = m_state_builder->remote_image_ctx->snap_info.find(
+ m_remote_snap_id_end);
+ if (m_remote_snap_id_end != CEPH_NOSNAP &&
+ matching_remote_snap_it !=
+ m_state_builder->remote_image_ctx->snap_info.end()) {
+ root_obj["syncing_snapshot_timestamp"] = remote_snap_info->timestamp.sec();
+
+ if (m_local_object_count > 0) {
+ root_obj["syncing_percent"] =
+ 100 * m_local_mirror_snap_ns.last_copied_object_number /
+ m_local_object_count;
+ } else {
+ // Set syncing_percent to 0 if m_local_object_count has
+ // not yet been set (last_copied_object_number may be > 0
+ // if the sync is being resumed).
+ root_obj["syncing_percent"] = 0;
+ }
+ }
+
+ m_bytes_per_second(0);
+ auto bytes_per_second = m_bytes_per_second.get_average();
+ root_obj["bytes_per_second"] = round_to_two_places(bytes_per_second);
+
+ auto bytes_per_snapshot = boost::accumulators::rolling_mean(
+ m_bytes_per_snapshot);
+ root_obj["bytes_per_snapshot"] = round_to_two_places(bytes_per_snapshot);
+
+ root_obj["last_snapshot_sync_seconds"] = m_last_snapshot_sync_seconds;
+ root_obj["last_snapshot_bytes"] = m_last_snapshot_bytes;
+
+ auto pending_bytes = bytes_per_snapshot * m_pending_snapshots;
+ if (bytes_per_second > 0 && m_pending_snapshots > 0) {
+ std::uint64_t seconds_until_synced = round_to_two_places(
+ pending_bytes / bytes_per_second);
+ if (seconds_until_synced >= std::numeric_limits<uint64_t>::max()) {
+ seconds_until_synced = std::numeric_limits<uint64_t>::max();
+ }
+
+ root_obj["seconds_until_synced"] = seconds_until_synced;
+ }
+
+ *description = json_spirit::write(
+ root_obj, json_spirit::remove_trailing_zeros);
+
+ local_image_locker.unlock();
+ remote_image_locker.unlock();
+ locker.unlock();
+ on_finish->complete(-EEXIST);
+ return true;
+}
+
+template <typename I>
+void Replayer<I>::load_local_image_meta() {
+ dout(10) << dendl;
+
+ {
+ // reset state in case new snapshot is added while we are scanning
+ std::unique_lock locker{m_lock};
+ m_image_updated = false;
+ }
+
+ bool update_status = false;
+ {
+ auto local_image_ctx = m_state_builder->local_image_ctx;
+ std::shared_lock image_locker{local_image_ctx->image_lock};
+ auto image_spec = image_replayer::util::compute_image_spec(
+ local_image_ctx->md_ctx, local_image_ctx->name);
+ if (m_image_spec != image_spec) {
+ m_image_spec = image_spec;
+ update_status = true;
+ }
+ }
+ if (update_status) {
+ std::unique_lock locker{m_lock};
+ unregister_perf_counters();
+ register_perf_counters();
+ notify_status_updated();
+ }
+
+ ceph_assert(m_state_builder->local_image_meta != nullptr);
+ auto ctx = create_context_callback<
+ Replayer<I>, &Replayer<I>::handle_load_local_image_meta>(this);
+ m_state_builder->local_image_meta->load(ctx);
+}
+
+template <typename I>
+void Replayer<I>::handle_load_local_image_meta(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0 && r != -ENOENT) {
+ derr << "failed to load local image-meta: " << cpp_strerror(r) << dendl;
+ handle_replay_complete(r, "failed to load local image-meta");
+ return;
+ }
+
+ if (r >= 0 && m_state_builder->local_image_meta->resync_requested) {
+ m_resync_requested = true;
+
+ dout(10) << "local image resync requested" << dendl;
+ handle_replay_complete(0, "resync requested");
+ return;
+ }
+
+ refresh_local_image();
+}
+
+template <typename I>
+void Replayer<I>::refresh_local_image() {
+ if (!m_state_builder->local_image_ctx->state->is_refresh_required()) {
+ refresh_remote_image();
+ return;
+ }
+
+ dout(10) << dendl;
+ auto ctx = create_context_callback<
+ Replayer<I>, &Replayer<I>::handle_refresh_local_image>(this);
+ m_state_builder->local_image_ctx->state->refresh(ctx);
+}
+
+template <typename I>
+void Replayer<I>::handle_refresh_local_image(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to refresh local image: " << cpp_strerror(r) << dendl;
+ handle_replay_complete(r, "failed to refresh local image");
+ return;
+ }
+
+ refresh_remote_image();
+}
+
+template <typename I>
+void Replayer<I>::refresh_remote_image() {
+ if (!m_state_builder->remote_image_ctx->state->is_refresh_required()) {
+ std::unique_lock locker{m_lock};
+ scan_local_mirror_snapshots(&locker);
+ return;
+ }
+
+ dout(10) << dendl;
+ auto ctx = create_context_callback<
+ Replayer<I>, &Replayer<I>::handle_refresh_remote_image>(this);
+ m_state_builder->remote_image_ctx->state->refresh(ctx);
+}
+
+template <typename I>
+void Replayer<I>::handle_refresh_remote_image(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to refresh remote image: " << cpp_strerror(r) << dendl;
+ handle_replay_complete(r, "failed to refresh remote image");
+ return;
+ }
+
+ std::unique_lock locker{m_lock};
+ scan_local_mirror_snapshots(&locker);
+}
+
+template <typename I>
+void Replayer<I>::scan_local_mirror_snapshots(
+ std::unique_lock<ceph::mutex>* locker) {
+ if (is_replay_interrupted(locker)) {
+ return;
+ }
+
+ dout(10) << dendl;
+
+ m_local_snap_id_start = 0;
+ m_local_snap_id_end = CEPH_NOSNAP;
+ m_local_mirror_snap_ns = {};
+ m_local_object_count = 0;
+
+ m_remote_snap_id_start = 0;
+ m_remote_snap_id_end = CEPH_NOSNAP;
+ m_remote_mirror_snap_ns = {};
+
+ std::set<uint64_t> prune_snap_ids;
+
+ auto local_image_ctx = m_state_builder->local_image_ctx;
+ std::shared_lock image_locker{local_image_ctx->image_lock};
+ for (auto snap_info_it = local_image_ctx->snap_info.begin();
+ snap_info_it != local_image_ctx->snap_info.end(); ++snap_info_it) {
+ const auto& snap_ns = snap_info_it->second.snap_namespace;
+ auto mirror_ns = std::get_if<
+ cls::rbd::MirrorSnapshotNamespace>(&snap_ns);
+ if (mirror_ns == nullptr) {
+ continue;
+ }
+
+ dout(15) << "local mirror snapshot: id=" << snap_info_it->first << ", "
+ << "mirror_ns=" << *mirror_ns << dendl;
+ m_local_mirror_snap_ns = *mirror_ns;
+
+ auto local_snap_id = snap_info_it->first;
+ if (mirror_ns->is_non_primary()) {
+ if (mirror_ns->complete) {
+ // if remote has new snapshots, we would sync from here
+ m_local_snap_id_start = local_snap_id;
+ ceph_assert(m_local_snap_id_end == CEPH_NOSNAP);
+
+ if (mirror_ns->mirror_peer_uuids.empty()) {
+ // no other peer will attempt to sync to this snapshot so store as
+ // a candidate for removal
+ prune_snap_ids.insert(local_snap_id);
+ }
+ } else if (mirror_ns->last_copied_object_number == 0 &&
+ m_local_snap_id_start > 0) {
+ // snapshot might be missing image state, object-map, etc, so just
+ // delete and re-create it if we haven't started copying data
+ // objects. Also only prune this snapshot since we will need the
+ // previous mirror snapshot for syncing. Special case exception for
+ // the first non-primary snapshot since we know its snapshot is
+ // well-formed because otherwise the mirror-image-state would have
+ // forced an image deletion.
+ prune_snap_ids.clear();
+ prune_snap_ids.insert(local_snap_id);
+ break;
+ } else {
+ // start snap will be last complete mirror snapshot or initial
+ // image revision
+ m_local_snap_id_end = local_snap_id;
+ break;
+ }
+ } else if (mirror_ns->is_primary()) {
+ if (mirror_ns->complete) {
+ m_local_snap_id_start = local_snap_id;
+ ceph_assert(m_local_snap_id_end == CEPH_NOSNAP);
+ } else {
+ derr << "incomplete local primary snapshot" << dendl;
+ handle_replay_complete(locker, -EINVAL,
+ "incomplete local primary snapshot");
+ return;
+ }
+ } else {
+ derr << "unknown local mirror snapshot state" << dendl;
+ handle_replay_complete(locker, -EINVAL,
+ "invalid local mirror snapshot state");
+ return;
+ }
+ }
+ image_locker.unlock();
+
+ if (m_local_snap_id_start > 0) {
+ // remove candidate that is required for delta snapshot sync
+ prune_snap_ids.erase(m_local_snap_id_start);
+ }
+ if (!prune_snap_ids.empty()) {
+ locker->unlock();
+
+ auto prune_snap_id = *prune_snap_ids.begin();
+ dout(5) << "pruning unused non-primary snapshot " << prune_snap_id << dendl;
+ prune_non_primary_snapshot(prune_snap_id);
+ return;
+ }
+
+ if (m_local_snap_id_start > 0 || m_local_snap_id_end != CEPH_NOSNAP) {
+ if (m_local_mirror_snap_ns.is_non_primary() &&
+ m_local_mirror_snap_ns.primary_mirror_uuid !=
+ m_state_builder->remote_mirror_uuid) {
+ if (m_local_mirror_snap_ns.is_orphan()) {
+ dout(5) << "local image being force promoted" << dendl;
+ handle_replay_complete(locker, 0, "orphan (force promoting)");
+ return;
+ }
+ // TODO support multiple peers
+ derr << "local image linked to unknown peer: "
+ << m_local_mirror_snap_ns.primary_mirror_uuid << dendl;
+ handle_replay_complete(locker, -EEXIST,
+ "local image linked to unknown peer");
+ return;
+ } else if (m_local_mirror_snap_ns.state ==
+ cls::rbd::MIRROR_SNAPSHOT_STATE_PRIMARY) {
+ dout(5) << "local image promoted" << dendl;
+ handle_replay_complete(locker, 0, "force promoted");
+ return;
+ }
+
+ dout(10) << "found local mirror snapshot: "
+ << "local_snap_id_start=" << m_local_snap_id_start << ", "
+ << "local_snap_id_end=" << m_local_snap_id_end << ", "
+ << "local_snap_ns=" << m_local_mirror_snap_ns << dendl;
+ if (!m_local_mirror_snap_ns.is_primary() &&
+ m_local_mirror_snap_ns.complete) {
+ // our remote sync should start after this completed snapshot
+ m_remote_snap_id_start = m_local_mirror_snap_ns.primary_snap_id;
+ }
+ }
+
+ // we don't have any mirror snapshots or only completed non-primary
+ // mirror snapshots
+ scan_remote_mirror_snapshots(locker);
+}
+
+template <typename I>
+void Replayer<I>::scan_remote_mirror_snapshots(
+ std::unique_lock<ceph::mutex>* locker) {
+ dout(10) << dendl;
+
+ m_pending_snapshots = 0;
+
+ std::set<uint64_t> unlink_snap_ids;
+ bool split_brain = false;
+ bool remote_demoted = false;
+ auto remote_image_ctx = m_state_builder->remote_image_ctx;
+ std::shared_lock image_locker{remote_image_ctx->image_lock};
+ for (auto snap_info_it = remote_image_ctx->snap_info.begin();
+ snap_info_it != remote_image_ctx->snap_info.end(); ++snap_info_it) {
+ const auto& snap_ns = snap_info_it->second.snap_namespace;
+ auto mirror_ns = std::get_if<
+ cls::rbd::MirrorSnapshotNamespace>(&snap_ns);
+ if (mirror_ns == nullptr) {
+ continue;
+ }
+
+ dout(15) << "remote mirror snapshot: id=" << snap_info_it->first << ", "
+ << "mirror_ns=" << *mirror_ns << dendl;
+ remote_demoted = mirror_ns->is_demoted();
+ if (!mirror_ns->is_primary() && !mirror_ns->is_non_primary()) {
+ derr << "unknown remote mirror snapshot state" << dendl;
+ handle_replay_complete(locker, -EINVAL,
+ "invalid remote mirror snapshot state");
+ return;
+ } else if (mirror_ns->mirror_peer_uuids.count(m_remote_mirror_peer_uuid) ==
+ 0) {
+ dout(15) << "skipping remote snapshot due to missing mirror peer"
+ << dendl;
+ continue;
+ }
+
+ auto remote_snap_id = snap_info_it->first;
+ if (m_local_snap_id_start > 0 || m_local_snap_id_end != CEPH_NOSNAP) {
+ // we have a local mirror snapshot
+ if (m_local_mirror_snap_ns.is_non_primary()) {
+ // previously validated that it was linked to remote
+ ceph_assert(m_local_mirror_snap_ns.primary_mirror_uuid ==
+ m_state_builder->remote_mirror_uuid);
+
+ if (m_remote_snap_id_end == CEPH_NOSNAP) {
+ // haven't found the end snap so treat this as a candidate for unlink
+ unlink_snap_ids.insert(remote_snap_id);
+ }
+ if (m_local_mirror_snap_ns.complete &&
+ m_local_mirror_snap_ns.primary_snap_id >= remote_snap_id) {
+ // skip past completed remote snapshot
+ m_remote_snap_id_start = remote_snap_id;
+ m_remote_mirror_snap_ns = *mirror_ns;
+ dout(15) << "skipping synced remote snapshot " << remote_snap_id
+ << dendl;
+ continue;
+ } else if (!m_local_mirror_snap_ns.complete &&
+ m_local_mirror_snap_ns.primary_snap_id > remote_snap_id) {
+ // skip until we get to the in-progress remote snapshot
+ dout(15) << "skipping synced remote snapshot " << remote_snap_id
+ << " while search for in-progress sync" << dendl;
+ m_remote_snap_id_start = remote_snap_id;
+ m_remote_mirror_snap_ns = *mirror_ns;
+ continue;
+ }
+ } else if (m_local_mirror_snap_ns.state ==
+ cls::rbd::MIRROR_SNAPSHOT_STATE_PRIMARY_DEMOTED) {
+ // find the matching demotion snapshot in remote image
+ ceph_assert(m_local_snap_id_start > 0);
+ if (mirror_ns->state ==
+ cls::rbd::MIRROR_SNAPSHOT_STATE_NON_PRIMARY_DEMOTED &&
+ mirror_ns->primary_mirror_uuid == m_local_mirror_uuid &&
+ mirror_ns->primary_snap_id == m_local_snap_id_start) {
+ dout(10) << "located matching demotion snapshot: "
+ << "remote_snap_id=" << remote_snap_id << ", "
+ << "local_snap_id=" << m_local_snap_id_start << dendl;
+ m_remote_snap_id_start = remote_snap_id;
+ split_brain = false;
+ continue;
+ } else if (m_remote_snap_id_start == 0) {
+ // still looking for our matching demotion snapshot
+ dout(15) << "skipping remote snapshot " << remote_snap_id << " "
+ << "while searching for demotion" << dendl;
+ split_brain = true;
+ continue;
+ }
+ } else {
+ // should not have been able to reach this
+ ceph_assert(false);
+ }
+ } else if (!mirror_ns->is_primary()) {
+ dout(15) << "skipping non-primary remote snapshot" << dendl;
+ continue;
+ }
+
+ // found candidate snapshot to sync
+ ++m_pending_snapshots;
+ if (m_remote_snap_id_end != CEPH_NOSNAP) {
+ continue;
+ }
+
+ // first primary snapshot where were are listed as a peer
+ m_remote_snap_id_end = remote_snap_id;
+ m_remote_mirror_snap_ns = *mirror_ns;
+ }
+
+ if (m_remote_snap_id_start != 0 &&
+ remote_image_ctx->snap_info.count(m_remote_snap_id_start) == 0) {
+ // the remote start snapshot was deleted out from under us
+ derr << "failed to locate remote start snapshot: "
+ << "snap_id=" << m_remote_snap_id_start << dendl;
+ split_brain = true;
+ }
+
+ image_locker.unlock();
+
+ if (!split_brain) {
+ unlink_snap_ids.erase(m_remote_snap_id_start);
+ unlink_snap_ids.erase(m_remote_snap_id_end);
+ if (!unlink_snap_ids.empty()) {
+ locker->unlock();
+
+ // retry the unlinking process for a remote snapshot that we do not
+ // need anymore
+ auto remote_snap_id = *unlink_snap_ids.begin();
+ dout(10) << "unlinking from remote snapshot " << remote_snap_id << dendl;
+ unlink_peer(remote_snap_id);
+ return;
+ }
+
+ if (m_remote_snap_id_end != CEPH_NOSNAP) {
+ dout(10) << "found remote mirror snapshot: "
+ << "remote_snap_id_start=" << m_remote_snap_id_start << ", "
+ << "remote_snap_id_end=" << m_remote_snap_id_end << ", "
+ << "remote_snap_ns=" << m_remote_mirror_snap_ns << dendl;
+ if (m_remote_mirror_snap_ns.complete) {
+ locker->unlock();
+
+ if (m_local_snap_id_end != CEPH_NOSNAP &&
+ !m_local_mirror_snap_ns.complete) {
+ // attempt to resume image-sync
+ dout(10) << "local image contains in-progress mirror snapshot"
+ << dendl;
+ get_local_image_state();
+ } else {
+ copy_snapshots();
+ }
+ return;
+ } else {
+ // might have raced with the creation of a remote mirror snapshot
+ // so we will need to refresh and rescan once it completes
+ dout(15) << "remote mirror snapshot not complete" << dendl;
+ }
+ }
+ }
+
+ if (m_image_updated) {
+ // received update notification while scanning image, restart ...
+ m_image_updated = false;
+ locker->unlock();
+
+ dout(10) << "restarting snapshot scan due to remote update notification"
+ << dendl;
+ load_local_image_meta();
+ return;
+ }
+
+ if (is_replay_interrupted(locker)) {
+ return;
+ } else if (split_brain) {
+ derr << "split-brain detected: failed to find matching non-primary "
+ << "snapshot in remote image: "
+ << "local_snap_id_start=" << m_local_snap_id_start << ", "
+ << "local_snap_ns=" << m_local_mirror_snap_ns << dendl;
+ handle_replay_complete(locker, -EEXIST, "split-brain");
+ return;
+ } else if (remote_demoted) {
+ dout(10) << "remote image demoted" << dendl;
+ handle_replay_complete(locker, -EREMOTEIO, "remote image demoted");
+ return;
+ }
+
+ dout(10) << "all remote snapshots synced: idling waiting for new snapshot"
+ << dendl;
+ ceph_assert(m_state == STATE_REPLAYING);
+ m_state = STATE_IDLE;
+
+ notify_status_updated();
+}
+
+template <typename I>
+void Replayer<I>::prune_non_primary_snapshot(uint64_t snap_id) {
+ dout(10) << "snap_id=" << snap_id << dendl;
+
+ auto local_image_ctx = m_state_builder->local_image_ctx;
+ bool snap_valid = false;
+ cls::rbd::SnapshotNamespace snap_namespace;
+ std::string snap_name;
+
+ {
+ std::shared_lock image_locker{local_image_ctx->image_lock};
+ auto snap_info = local_image_ctx->get_snap_info(snap_id);
+ if (snap_info != nullptr) {
+ snap_valid = true;
+ snap_namespace = snap_info->snap_namespace;
+ snap_name = snap_info->name;
+
+ ceph_assert(std::holds_alternative<cls::rbd::MirrorSnapshotNamespace>(
+ snap_namespace));
+ }
+ }
+
+ if (!snap_valid) {
+ load_local_image_meta();
+ return;
+ }
+
+ auto ctx = create_context_callback<
+ Replayer<I>, &Replayer<I>::handle_prune_non_primary_snapshot>(this);
+ local_image_ctx->operations->snap_remove(snap_namespace, snap_name, ctx);
+}
+
+template <typename I>
+void Replayer<I>::handle_prune_non_primary_snapshot(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0 && r != -ENOENT) {
+ derr << "failed to prune non-primary snapshot: " << cpp_strerror(r)
+ << dendl;
+ handle_replay_complete(r, "failed to prune non-primary snapshot");
+ return;
+ }
+
+ if (is_replay_interrupted()) {
+ return;
+ }
+
+ load_local_image_meta();
+}
+
+template <typename I>
+void Replayer<I>::copy_snapshots() {
+ dout(10) << "remote_snap_id_start=" << m_remote_snap_id_start << ", "
+ << "remote_snap_id_end=" << m_remote_snap_id_end << ", "
+ << "local_snap_id_start=" << m_local_snap_id_start << dendl;
+
+ ceph_assert(m_remote_snap_id_start != CEPH_NOSNAP);
+ ceph_assert(m_remote_snap_id_end > 0 &&
+ m_remote_snap_id_end != CEPH_NOSNAP);
+ ceph_assert(m_local_snap_id_start != CEPH_NOSNAP);
+
+ m_local_mirror_snap_ns = {};
+ auto ctx = create_context_callback<
+ Replayer<I>, &Replayer<I>::handle_copy_snapshots>(this);
+ auto req = librbd::deep_copy::SnapshotCopyRequest<I>::create(
+ m_state_builder->remote_image_ctx, m_state_builder->local_image_ctx,
+ m_remote_snap_id_start, m_remote_snap_id_end, m_local_snap_id_start,
+ false, m_threads->work_queue, &m_local_mirror_snap_ns.snap_seqs,
+ ctx);
+ req->send();
+}
+
+template <typename I>
+void Replayer<I>::handle_copy_snapshots(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to copy snapshots from remote to local image: "
+ << cpp_strerror(r) << dendl;
+ handle_replay_complete(
+ r, "failed to copy snapshots from remote to local image");
+ return;
+ }
+
+ dout(10) << "remote_snap_id_start=" << m_remote_snap_id_start << ", "
+ << "remote_snap_id_end=" << m_remote_snap_id_end << ", "
+ << "local_snap_id_start=" << m_local_snap_id_start << ", "
+ << "snap_seqs=" << m_local_mirror_snap_ns.snap_seqs << dendl;
+ get_remote_image_state();
+}
+
+template <typename I>
+void Replayer<I>::get_remote_image_state() {
+ dout(10) << dendl;
+
+ auto ctx = create_context_callback<
+ Replayer<I>, &Replayer<I>::handle_get_remote_image_state>(this);
+ auto req = librbd::mirror::snapshot::GetImageStateRequest<I>::create(
+ m_state_builder->remote_image_ctx, m_remote_snap_id_end,
+ &m_image_state, ctx);
+ req->send();
+}
+
+template <typename I>
+void Replayer<I>::handle_get_remote_image_state(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to retrieve remote snapshot image state: "
+ << cpp_strerror(r) << dendl;
+ handle_replay_complete(r, "failed to retrieve remote snapshot image state");
+ return;
+ }
+
+ create_non_primary_snapshot();
+}
+
+template <typename I>
+void Replayer<I>::get_local_image_state() {
+ dout(10) << dendl;
+
+ ceph_assert(m_local_snap_id_end != CEPH_NOSNAP);
+ auto ctx = create_context_callback<
+ Replayer<I>, &Replayer<I>::handle_get_local_image_state>(this);
+ auto req = librbd::mirror::snapshot::GetImageStateRequest<I>::create(
+ m_state_builder->local_image_ctx, m_local_snap_id_end,
+ &m_image_state, ctx);
+ req->send();
+}
+
+template <typename I>
+void Replayer<I>::handle_get_local_image_state(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to retrieve local snapshot image state: "
+ << cpp_strerror(r) << dendl;
+ handle_replay_complete(r, "failed to retrieve local snapshot image state");
+ return;
+ }
+
+ request_sync();
+}
+
+template <typename I>
+void Replayer<I>::create_non_primary_snapshot() {
+ auto local_image_ctx = m_state_builder->local_image_ctx;
+
+ if (m_local_snap_id_start > 0) {
+ std::shared_lock local_image_locker{local_image_ctx->image_lock};
+
+ auto local_snap_info_it = local_image_ctx->snap_info.find(
+ m_local_snap_id_start);
+ if (local_snap_info_it == local_image_ctx->snap_info.end()) {
+ local_image_locker.unlock();
+
+ derr << "failed to locate local snapshot " << m_local_snap_id_start
+ << dendl;
+ handle_replay_complete(-ENOENT, "failed to locate local start snapshot");
+ return;
+ }
+
+ auto mirror_ns = std::get_if<cls::rbd::MirrorSnapshotNamespace>(
+ &local_snap_info_it->second.snap_namespace);
+ ceph_assert(mirror_ns != nullptr);
+
+ auto remote_image_ctx = m_state_builder->remote_image_ctx;
+ std::shared_lock remote_image_locker{remote_image_ctx->image_lock};
+
+ // (re)build a full mapping from remote to local snap ids for all user
+ // snapshots to support applying image state in the future
+ for (auto& [remote_snap_id, remote_snap_info] :
+ remote_image_ctx->snap_info) {
+ if (remote_snap_id >= m_remote_snap_id_end) {
+ break;
+ }
+
+ // we can ignore all non-user snapshots since image state only includes
+ // user snapshots
+ if (!std::holds_alternative<cls::rbd::UserSnapshotNamespace>(
+ remote_snap_info.snap_namespace)) {
+ continue;
+ }
+
+ uint64_t local_snap_id = CEPH_NOSNAP;
+ if (mirror_ns->is_demoted() && !m_remote_mirror_snap_ns.is_demoted()) {
+ // if we are creating a non-primary snapshot following a demotion,
+ // re-build the full snapshot sequence since we don't have a valid
+ // snapshot mapping
+ auto local_snap_id_it = local_image_ctx->snap_ids.find(
+ {remote_snap_info.snap_namespace, remote_snap_info.name});
+ if (local_snap_id_it != local_image_ctx->snap_ids.end()) {
+ local_snap_id = local_snap_id_it->second;
+ }
+ } else {
+ auto snap_seq_it = mirror_ns->snap_seqs.find(remote_snap_id);
+ if (snap_seq_it != mirror_ns->snap_seqs.end()) {
+ local_snap_id = snap_seq_it->second;
+ }
+ }
+
+ if (m_local_mirror_snap_ns.snap_seqs.count(remote_snap_id) == 0 &&
+ local_snap_id != CEPH_NOSNAP) {
+ dout(15) << "mapping remote snapshot " << remote_snap_id << " to "
+ << "local snapshot " << local_snap_id << dendl;
+ m_local_mirror_snap_ns.snap_seqs[remote_snap_id] = local_snap_id;
+ }
+ }
+ }
+
+ dout(10) << "demoted=" << m_remote_mirror_snap_ns.is_demoted() << ", "
+ << "primary_mirror_uuid="
+ << m_state_builder->remote_mirror_uuid << ", "
+ << "primary_snap_id=" << m_remote_snap_id_end << ", "
+ << "snap_seqs=" << m_local_mirror_snap_ns.snap_seqs << dendl;
+
+ auto ctx = create_context_callback<
+ Replayer<I>, &Replayer<I>::handle_create_non_primary_snapshot>(this);
+ auto req = librbd::mirror::snapshot::CreateNonPrimaryRequest<I>::create(
+ local_image_ctx, m_remote_mirror_snap_ns.is_demoted(),
+ m_state_builder->remote_mirror_uuid, m_remote_snap_id_end,
+ m_local_mirror_snap_ns.snap_seqs, m_image_state, &m_local_snap_id_end, ctx);
+ req->send();
+}
+
+template <typename I>
+void Replayer<I>::handle_create_non_primary_snapshot(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to create local mirror snapshot: " << cpp_strerror(r)
+ << dendl;
+ handle_replay_complete(r, "failed to create local mirror snapshot");
+ return;
+ }
+
+ dout(15) << "local_snap_id_end=" << m_local_snap_id_end << dendl;
+
+ update_mirror_image_state();
+}
+
+template <typename I>
+void Replayer<I>::update_mirror_image_state() {
+ if (m_local_snap_id_start > 0) {
+ request_sync();
+ return;
+ }
+
+ // a newly created non-primary image has a local mirror state of CREATING
+ // until this point so that we could avoid preserving the image until
+ // the first non-primary snapshot linked the two images together.
+ dout(10) << dendl;
+ auto ctx = create_context_callback<
+ Replayer<I>, &Replayer<I>::handle_update_mirror_image_state>(this);
+ auto req = librbd::mirror::ImageStateUpdateRequest<I>::create(
+ m_state_builder->local_image_ctx->md_ctx,
+ m_state_builder->local_image_ctx->id,
+ cls::rbd::MIRROR_IMAGE_STATE_ENABLED, {}, ctx);
+ req->send();
+}
+
+template <typename I>
+void Replayer<I>::handle_update_mirror_image_state(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to update local mirror image state: " << cpp_strerror(r)
+ << dendl;
+ handle_replay_complete(r, "failed to update local mirror image state");
+ return;
+ }
+
+ request_sync();
+}
+
+template <typename I>
+void Replayer<I>::request_sync() {
+ if (m_remote_mirror_snap_ns.clean_since_snap_id == m_remote_snap_id_start) {
+ dout(10) << "skipping unnecessary image copy: "
+ << "remote_snap_id_start=" << m_remote_snap_id_start << ", "
+ << "remote_mirror_snap_ns=" << m_remote_mirror_snap_ns << dendl;
+ apply_image_state();
+ return;
+ }
+
+ dout(10) << dendl;
+ std::unique_lock locker{m_lock};
+ if (is_replay_interrupted(&locker)) {
+ return;
+ }
+
+ auto ctx = create_async_context_callback(
+ m_threads->work_queue, create_context_callback<
+ Replayer<I>, &Replayer<I>::handle_request_sync>(this));
+ m_instance_watcher->notify_sync_request(m_state_builder->local_image_ctx->id,
+ ctx);
+}
+
+template <typename I>
+void Replayer<I>::handle_request_sync(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ std::unique_lock locker{m_lock};
+ if (is_replay_interrupted(&locker)) {
+ return;
+ } else if (r == -ECANCELED) {
+ dout(5) << "image-sync canceled" << dendl;
+ handle_replay_complete(&locker, r, "image-sync canceled");
+ return;
+ } else if (r < 0) {
+ derr << "failed to request image-sync: " << cpp_strerror(r) << dendl;
+ handle_replay_complete(&locker, r, "failed to request image-sync");
+ return;
+ }
+
+ m_sync_in_progress = true;
+ locker.unlock();
+
+ copy_image();
+}
+
+template <typename I>
+void Replayer<I>::copy_image() {
+ dout(10) << "remote_snap_id_start=" << m_remote_snap_id_start << ", "
+ << "remote_snap_id_end=" << m_remote_snap_id_end << ", "
+ << "local_snap_id_start=" << m_local_snap_id_start << ", "
+ << "last_copied_object_number="
+ << m_local_mirror_snap_ns.last_copied_object_number << ", "
+ << "snap_seqs=" << m_local_mirror_snap_ns.snap_seqs << dendl;
+
+ m_snapshot_bytes = 0;
+ m_snapshot_replay_start = ceph_clock_now();
+ m_deep_copy_handler = new DeepCopyHandler(this);
+ auto ctx = create_context_callback<
+ Replayer<I>, &Replayer<I>::handle_copy_image>(this);
+ auto req = librbd::deep_copy::ImageCopyRequest<I>::create(
+ m_state_builder->remote_image_ctx, m_state_builder->local_image_ctx,
+ m_remote_snap_id_start, m_remote_snap_id_end, m_local_snap_id_start, false,
+ (m_local_mirror_snap_ns.last_copied_object_number > 0 ?
+ librbd::deep_copy::ObjectNumber{
+ m_local_mirror_snap_ns.last_copied_object_number} :
+ librbd::deep_copy::ObjectNumber{}),
+ m_local_mirror_snap_ns.snap_seqs, m_deep_copy_handler, ctx);
+ req->send();
+}
+
+template <typename I>
+void Replayer<I>::handle_copy_image(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ delete m_deep_copy_handler;
+ m_deep_copy_handler = nullptr;
+
+ if (r < 0) {
+ derr << "failed to copy remote image to local image: " << cpp_strerror(r)
+ << dendl;
+ handle_replay_complete(r, "failed to copy remote image");
+ return;
+ }
+
+ {
+ std::unique_lock locker{m_lock};
+ m_last_snapshot_bytes = m_snapshot_bytes;
+ m_bytes_per_snapshot(m_snapshot_bytes);
+ utime_t duration = ceph_clock_now() - m_snapshot_replay_start;
+ m_last_snapshot_sync_seconds = duration.sec();
+
+ if (g_snapshot_perf_counters) {
+ g_snapshot_perf_counters->inc(l_rbd_mirror_snapshot_sync_bytes,
+ m_snapshot_bytes);
+ g_snapshot_perf_counters->inc(l_rbd_mirror_snapshot_snapshots);
+ g_snapshot_perf_counters->tinc(l_rbd_mirror_snapshot_sync_time,
+ duration);
+ }
+ if (m_perf_counters) {
+ m_perf_counters->inc(l_rbd_mirror_snapshot_sync_bytes, m_snapshot_bytes);
+ m_perf_counters->inc(l_rbd_mirror_snapshot_snapshots);
+ m_perf_counters->tinc(l_rbd_mirror_snapshot_sync_time, duration);
+ m_perf_counters->tset(l_rbd_mirror_snapshot_last_sync_time, duration);
+ m_perf_counters->set(l_rbd_mirror_snapshot_last_sync_bytes,
+ m_snapshot_bytes);
+ }
+ }
+
+ apply_image_state();
+}
+
+template <typename I>
+void Replayer<I>::handle_copy_image_progress(uint64_t object_number,
+ uint64_t object_count) {
+ dout(10) << "object_number=" << object_number << ", "
+ << "object_count=" << object_count << dendl;
+
+ std::unique_lock locker{m_lock};
+ m_local_mirror_snap_ns.last_copied_object_number = std::min(
+ object_number, object_count);
+ m_local_object_count = object_count;
+
+ update_non_primary_snapshot(false);
+}
+
+template <typename I>
+void Replayer<I>::handle_copy_image_read(uint64_t bytes_read) {
+ dout(20) << "bytes_read=" << bytes_read << dendl;
+
+ std::unique_lock locker{m_lock};
+ m_bytes_per_second(bytes_read);
+ m_snapshot_bytes += bytes_read;
+}
+
+template <typename I>
+void Replayer<I>::apply_image_state() {
+ dout(10) << dendl;
+
+ auto ctx = create_context_callback<
+ Replayer<I>, &Replayer<I>::handle_apply_image_state>(this);
+ auto req = ApplyImageStateRequest<I>::create(
+ m_local_mirror_uuid,
+ m_state_builder->remote_mirror_uuid,
+ m_state_builder->local_image_ctx,
+ m_state_builder->remote_image_ctx,
+ m_image_state, ctx);
+ req->send();
+}
+
+template <typename I>
+void Replayer<I>::handle_apply_image_state(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0 && r != -ENOENT) {
+ derr << "failed to apply remote image state to local image: "
+ << cpp_strerror(r) << dendl;
+ handle_replay_complete(r, "failed to apply remote image state");
+ return;
+ }
+
+ std::unique_lock locker{m_lock};
+ update_non_primary_snapshot(true);
+}
+
+template <typename I>
+void Replayer<I>::update_non_primary_snapshot(bool complete) {
+ ceph_assert(ceph_mutex_is_locked_by_me(m_lock));
+ if (!complete) {
+ // disallow two in-flight updates if this isn't the completion of the sync
+ if (m_updating_sync_point) {
+ return;
+ }
+ m_updating_sync_point = true;
+ } else {
+ m_local_mirror_snap_ns.complete = true;
+ }
+
+ dout(10) << dendl;
+
+ librados::ObjectWriteOperation op;
+ librbd::cls_client::mirror_image_snapshot_set_copy_progress(
+ &op, m_local_snap_id_end, m_local_mirror_snap_ns.complete,
+ m_local_mirror_snap_ns.last_copied_object_number);
+
+ auto ctx = new C_TrackedOp(
+ m_in_flight_op_tracker, new LambdaContext([this, complete](int r) {
+ handle_update_non_primary_snapshot(complete, r);
+ }));
+ auto aio_comp = create_rados_callback(ctx);
+ int r = m_state_builder->local_image_ctx->md_ctx.aio_operate(
+ m_state_builder->local_image_ctx->header_oid, aio_comp, &op);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void Replayer<I>::handle_update_non_primary_snapshot(bool complete, int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to update local snapshot progress: " << cpp_strerror(r)
+ << dendl;
+ if (complete) {
+ // only fail if this was the final update
+ handle_replay_complete(r, "failed to update local snapshot progress");
+ return;
+ }
+ }
+
+ if (!complete) {
+ // periodic sync-point update -- do not advance state machine
+ std::unique_lock locker{m_lock};
+
+ ceph_assert(m_updating_sync_point);
+ m_updating_sync_point = false;
+ return;
+ }
+
+ notify_image_update();
+}
+
+template <typename I>
+void Replayer<I>::notify_image_update() {
+ dout(10) << dendl;
+
+ auto ctx = create_context_callback<
+ Replayer<I>, &Replayer<I>::handle_notify_image_update>(this);
+ m_state_builder->local_image_ctx->notify_update(ctx);
+}
+
+template <typename I>
+void Replayer<I>::handle_notify_image_update(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to notify local image update: " << cpp_strerror(r) << dendl;
+ }
+
+ unlink_peer(m_remote_snap_id_start);
+}
+
+template <typename I>
+void Replayer<I>::unlink_peer(uint64_t remote_snap_id) {
+ if (remote_snap_id == 0) {
+ finish_sync();
+ return;
+ }
+
+ // local snapshot fully synced -- we no longer depend on the sync
+ // start snapshot in the remote image
+ dout(10) << "remote_snap_id=" << remote_snap_id << dendl;
+
+ auto ctx = create_context_callback<
+ Replayer<I>, &Replayer<I>::handle_unlink_peer>(this);
+ auto req = librbd::mirror::snapshot::UnlinkPeerRequest<I>::create(
+ m_state_builder->remote_image_ctx, remote_snap_id,
+ m_remote_mirror_peer_uuid, false, ctx);
+ req->send();
+}
+
+template <typename I>
+void Replayer<I>::handle_unlink_peer(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0 && r != -ENOENT) {
+ derr << "failed to unlink local peer from remote image: " << cpp_strerror(r)
+ << dendl;
+ handle_replay_complete(r, "failed to unlink local peer from remote image");
+ return;
+ }
+
+ finish_sync();
+}
+
+template <typename I>
+void Replayer<I>::finish_sync() {
+ dout(10) << dendl;
+
+ {
+ std::unique_lock locker{m_lock};
+ notify_status_updated();
+
+ if (m_sync_in_progress) {
+ m_sync_in_progress = false;
+ m_instance_watcher->notify_sync_complete(
+ m_state_builder->local_image_ctx->id);
+ }
+ }
+
+ if (is_replay_interrupted()) {
+ return;
+ }
+
+ load_local_image_meta();
+}
+
+template <typename I>
+void Replayer<I>::register_local_update_watcher() {
+ dout(10) << dendl;
+
+ m_update_watch_ctx = new C_UpdateWatchCtx(this);
+
+ int r = m_state_builder->local_image_ctx->state->register_update_watcher(
+ m_update_watch_ctx, &m_local_update_watcher_handle);
+ auto ctx = create_context_callback<
+ Replayer<I>, &Replayer<I>::handle_register_local_update_watcher>(this);
+ m_threads->work_queue->queue(ctx, r);
+}
+
+template <typename I>
+void Replayer<I>::handle_register_local_update_watcher(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to register local update watcher: " << cpp_strerror(r)
+ << dendl;
+ handle_replay_complete(r, "failed to register local image update watcher");
+ m_state = STATE_COMPLETE;
+
+ delete m_update_watch_ctx;
+ m_update_watch_ctx = nullptr;
+
+ Context* on_init = nullptr;
+ std::swap(on_init, m_on_init_shutdown);
+ on_init->complete(r);
+ return;
+ }
+
+ register_remote_update_watcher();
+}
+
+template <typename I>
+void Replayer<I>::register_remote_update_watcher() {
+ dout(10) << dendl;
+
+ int r = m_state_builder->remote_image_ctx->state->register_update_watcher(
+ m_update_watch_ctx, &m_remote_update_watcher_handle);
+ auto ctx = create_context_callback<
+ Replayer<I>, &Replayer<I>::handle_register_remote_update_watcher>(this);
+ m_threads->work_queue->queue(ctx, r);
+}
+
+template <typename I>
+void Replayer<I>::handle_register_remote_update_watcher(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to register remote update watcher: " << cpp_strerror(r)
+ << dendl;
+ handle_replay_complete(r, "failed to register remote image update watcher");
+ m_state = STATE_COMPLETE;
+
+ unregister_local_update_watcher();
+ return;
+ }
+
+ m_state = STATE_REPLAYING;
+
+ Context* on_init = nullptr;
+ std::swap(on_init, m_on_init_shutdown);
+ on_init->complete(0);
+
+ // delay initial snapshot scan until after we have alerted
+ // image replayer that we have initialized in case an error
+ // occurs
+ {
+ std::unique_lock locker{m_lock};
+ notify_status_updated();
+ }
+
+ load_local_image_meta();
+}
+
+template <typename I>
+void Replayer<I>::unregister_remote_update_watcher() {
+ dout(10) << dendl;
+
+ auto ctx = create_context_callback<
+ Replayer<I>,
+ &Replayer<I>::handle_unregister_remote_update_watcher>(this);
+ m_state_builder->remote_image_ctx->state->unregister_update_watcher(
+ m_remote_update_watcher_handle, ctx);
+}
+
+template <typename I>
+void Replayer<I>::handle_unregister_remote_update_watcher(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to unregister remote update watcher: " << cpp_strerror(r)
+ << dendl;
+ }
+
+ unregister_local_update_watcher();
+}
+
+template <typename I>
+void Replayer<I>::unregister_local_update_watcher() {
+ dout(10) << dendl;
+
+ auto ctx = create_context_callback<
+ Replayer<I>,
+ &Replayer<I>::handle_unregister_local_update_watcher>(this);
+ m_state_builder->local_image_ctx->state->unregister_update_watcher(
+ m_local_update_watcher_handle, ctx);
+}
+
+template <typename I>
+void Replayer<I>::handle_unregister_local_update_watcher(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ if (r < 0) {
+ derr << "failed to unregister local update watcher: " << cpp_strerror(r)
+ << dendl;
+ }
+
+ delete m_update_watch_ctx;
+ m_update_watch_ctx = nullptr;
+
+ wait_for_in_flight_ops();
+}
+
+template <typename I>
+void Replayer<I>::wait_for_in_flight_ops() {
+ dout(10) << dendl;
+
+ auto ctx = create_async_context_callback(
+ m_threads->work_queue, create_context_callback<
+ Replayer<I>, &Replayer<I>::handle_wait_for_in_flight_ops>(this));
+ m_in_flight_op_tracker.wait_for_ops(ctx);
+}
+
+template <typename I>
+void Replayer<I>::handle_wait_for_in_flight_ops(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ Context* on_shutdown = nullptr;
+ {
+ std::unique_lock locker{m_lock};
+ ceph_assert(m_on_init_shutdown != nullptr);
+ std::swap(on_shutdown, m_on_init_shutdown);
+ }
+ on_shutdown->complete(m_error_code);
+}
+
+template <typename I>
+void Replayer<I>::handle_image_update_notify() {
+ dout(10) << dendl;
+
+ std::unique_lock locker{m_lock};
+ if (m_state == STATE_REPLAYING) {
+ dout(15) << "flagging snapshot rescan required" << dendl;
+ m_image_updated = true;
+ } else if (m_state == STATE_IDLE) {
+ m_state = STATE_REPLAYING;
+ locker.unlock();
+
+ dout(15) << "restarting idle replayer" << dendl;
+ load_local_image_meta();
+ }
+}
+
+template <typename I>
+void Replayer<I>::handle_replay_complete(int r,
+ const std::string& description) {
+ std::unique_lock locker{m_lock};
+ handle_replay_complete(&locker, r, description);
+}
+
+template <typename I>
+void Replayer<I>::handle_replay_complete(std::unique_lock<ceph::mutex>* locker,
+ int r,
+ const std::string& description) {
+ ceph_assert(ceph_mutex_is_locked_by_me(m_lock));
+
+ if (m_sync_in_progress) {
+ m_sync_in_progress = false;
+ m_instance_watcher->notify_sync_complete(
+ m_state_builder->local_image_ctx->id);
+ }
+
+ // don't set error code and description if resuming a pending
+ // shutdown
+ if (is_replay_interrupted(locker)) {
+ return;
+ }
+
+ if (m_error_code == 0) {
+ m_error_code = r;
+ m_error_description = description;
+ }
+
+ if (m_state != STATE_REPLAYING && m_state != STATE_IDLE) {
+ return;
+ }
+
+ m_state = STATE_COMPLETE;
+ notify_status_updated();
+}
+
+template <typename I>
+void Replayer<I>::notify_status_updated() {
+ ceph_assert(ceph_mutex_is_locked_by_me(m_lock));
+
+ dout(10) << dendl;
+ auto ctx = new C_TrackedOp(m_in_flight_op_tracker, new LambdaContext(
+ [this](int) {
+ m_replayer_listener->handle_notification();
+ }));
+ m_threads->work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+bool Replayer<I>::is_replay_interrupted() {
+ std::unique_lock locker{m_lock};
+ return is_replay_interrupted(&locker);
+}
+
+template <typename I>
+bool Replayer<I>::is_replay_interrupted(std::unique_lock<ceph::mutex>* locker) {
+ if (m_state == STATE_COMPLETE) {
+ locker->unlock();
+
+ dout(10) << "resuming pending shut down" << dendl;
+ unregister_remote_update_watcher();
+ return true;
+ }
+ return false;
+}
+
+template <typename I>
+void Replayer<I>::register_perf_counters() {
+ dout(5) << dendl;
+
+ ceph_assert(ceph_mutex_is_locked_by_me(m_lock));
+ ceph_assert(m_perf_counters == nullptr);
+
+ auto cct = static_cast<CephContext *>(m_state_builder->local_image_ctx->cct);
+ auto prio = cct->_conf.get_val<int64_t>("rbd_mirror_image_perf_stats_prio");
+
+ auto local_image_ctx = m_state_builder->local_image_ctx;
+ std::string labels = ceph::perf_counters::key_create(
+ "rbd_mirror_snapshot_image",
+ {{"pool", local_image_ctx->md_ctx.get_pool_name()},
+ {"namespace", local_image_ctx->md_ctx.get_namespace()},
+ {"image", local_image_ctx->name}});
+
+ PerfCountersBuilder plb(g_ceph_context, labels, l_rbd_mirror_snapshot_first,
+ l_rbd_mirror_snapshot_last);
+ plb.add_u64_counter(l_rbd_mirror_snapshot_snapshots, "snapshots",
+ "Number of snapshots synced", nullptr, prio);
+ plb.add_time_avg(l_rbd_mirror_snapshot_sync_time, "sync_time",
+ "Average sync time", nullptr, prio);
+ plb.add_u64_counter(l_rbd_mirror_snapshot_sync_bytes, "sync_bytes",
+ "Total bytes synced", nullptr, prio, unit_t(UNIT_BYTES));
+ plb.add_time(l_rbd_mirror_snapshot_remote_timestamp, "remote_timestamp",
+ "Timestamp of the remote snapshot", nullptr, prio);
+ plb.add_time(l_rbd_mirror_snapshot_local_timestamp, "local_timestamp",
+ "Timestamp of the local snapshot", nullptr, prio);
+ plb.add_time(l_rbd_mirror_snapshot_last_sync_time, "last_sync_time",
+ "Time taken to sync the last snapshot", nullptr, prio);
+ plb.add_u64(l_rbd_mirror_snapshot_last_sync_bytes, "last_sync_bytes",
+ "Bytes synced for the last snapshot", nullptr, prio,
+ unit_t(UNIT_BYTES));
+
+ m_perf_counters = plb.create_perf_counters();
+ g_ceph_context->get_perfcounters_collection()->add(m_perf_counters);
+}
+
+template <typename I>
+void Replayer<I>::unregister_perf_counters() {
+ dout(5) << dendl;
+ ceph_assert(ceph_mutex_is_locked_by_me(m_lock));
+
+ PerfCounters *perf_counters = nullptr;
+ std::swap(perf_counters, m_perf_counters);
+
+ if (perf_counters != nullptr) {
+ g_ceph_context->get_perfcounters_collection()->remove(perf_counters);
+ delete perf_counters;
+ }
+}
+
+} // namespace snapshot
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_replayer::snapshot::Replayer<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.h b/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.h
new file mode 100644
index 000000000..17d45f6bc
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.h
@@ -0,0 +1,349 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_REPLAYER_H
+#define RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_REPLAYER_H
+
+#include "tools/rbd_mirror/image_replayer/Replayer.h"
+#include "common/ceph_mutex.h"
+#include "common/AsyncOpTracker.h"
+#include "cls/rbd/cls_rbd_types.h"
+#include "librbd/mirror/snapshot/Types.h"
+#include "tools/rbd_mirror/image_replayer/TimeRollingMean.h"
+#include <boost/accumulators/accumulators.hpp>
+#include <boost/accumulators/statistics/stats.hpp>
+#include <boost/accumulators/statistics/rolling_mean.hpp>
+#include <string>
+#include <type_traits>
+
+namespace librbd {
+
+struct ImageCtx;
+namespace snapshot { template <typename I> class Replay; }
+
+} // namespace librbd
+
+namespace rbd {
+namespace mirror {
+
+template <typename> struct InstanceWatcher;
+class PoolMetaCache;
+template <typename> struct Threads;
+
+namespace image_replayer {
+
+struct ReplayerListener;
+
+namespace snapshot {
+
+template <typename> class EventPreprocessor;
+template <typename> class ReplayStatusFormatter;
+template <typename> class StateBuilder;
+
+template <typename ImageCtxT>
+class Replayer : public image_replayer::Replayer {
+public:
+ static Replayer* create(
+ Threads<ImageCtxT>* threads,
+ InstanceWatcher<ImageCtxT>* instance_watcher,
+ const std::string& local_mirror_uuid,
+ PoolMetaCache* pool_meta_cache,
+ StateBuilder<ImageCtxT>* state_builder,
+ ReplayerListener* replayer_listener) {
+ return new Replayer(threads, instance_watcher, local_mirror_uuid,
+ pool_meta_cache, state_builder, replayer_listener);
+ }
+
+ Replayer(
+ Threads<ImageCtxT>* threads,
+ InstanceWatcher<ImageCtxT>* instance_watcher,
+ const std::string& local_mirror_uuid,
+ PoolMetaCache* pool_meta_cache,
+ StateBuilder<ImageCtxT>* state_builder,
+ ReplayerListener* replayer_listener);
+ ~Replayer();
+
+ void destroy() override {
+ delete this;
+ }
+
+ void init(Context* on_finish) override;
+ void shut_down(Context* on_finish) override;
+
+ void flush(Context* on_finish) override;
+
+ bool get_replay_status(std::string* description, Context* on_finish) override;
+
+ bool is_replaying() const override {
+ std::unique_lock locker{m_lock};
+ return (m_state == STATE_REPLAYING || m_state == STATE_IDLE);
+ }
+
+ bool is_resync_requested() const override {
+ std::unique_lock locker{m_lock};
+ return m_resync_requested;
+ }
+
+ int get_error_code() const override {
+ std::unique_lock locker(m_lock);
+ return m_error_code;
+ }
+
+ std::string get_error_description() const override {
+ std::unique_lock locker(m_lock);
+ return m_error_description;
+ }
+
+ std::string get_image_spec() const {
+ std::unique_lock locker(m_lock);
+ return m_image_spec;
+ }
+
+private:
+ /**
+ * @verbatim
+ *
+ * <init>
+ * |
+ * v
+ * REGISTER_LOCAL_UPDATE_WATCHER
+ * |
+ * v
+ * REGISTER_REMOTE_UPDATE_WATCHER
+ * |
+ * v
+ * LOAD_LOCAL_IMAGE_META <----------------------------\
+ * | |
+ * v (skip if not needed) |
+ * REFRESH_LOCAL_IMAGE |
+ * | |
+ * v (skip if not needed) |
+ * REFRESH_REMOTE_IMAGE |
+ * | |
+ * | (unused non-primary snapshot) |
+ * |\--------------> PRUNE_NON_PRIMARY_SNAPSHOT---/|
+ * | |
+ * | (interrupted sync) |
+ * |\--------------> GET_LOCAL_IMAGE_STATE ------\ |
+ * | | |
+ * | (new snapshot) | |
+ * |\--------------> COPY_SNAPSHOTS | |
+ * | | | |
+ * | v | |
+ * | GET_REMOTE_IMAGE_STATE | |
+ * | | | |
+ * | v | |
+ * | CREATE_NON_PRIMARY_SNAPSHOT | |
+ * | | | |
+ * | v (skip if not needed)| |
+ * | UPDATE_MIRROR_IMAGE_STATE | |
+ * | | | |
+ * | |/--------------------/ |
+ * | | |
+ * | v |
+ * | REQUEST_SYNC |
+ * | | |
+ * | v |
+ * | COPY_IMAGE |
+ * | | |
+ * | v |
+ * | APPLY_IMAGE_STATE |
+ * | | |
+ * | v |
+ * | UPDATE_NON_PRIMARY_SNAPSHOT |
+ * | | |
+ * | v |
+ * | NOTIFY_IMAGE_UPDATE |
+ * | | |
+ * | (interrupted unlink) v |
+ * |\--------------> UNLINK_PEER |
+ * | | |
+ * | v |
+ * | NOTIFY_LISTENER |
+ * | | |
+ * | \----------------------/|
+ * | |
+ * | (remote demoted) |
+ * \---------------> NOTIFY_LISTENER |
+ * | | |
+ * |/--------------------/ |
+ * | |
+ * | (update notification) |
+ * <idle> --------------------------------------------/
+ * |
+ * v
+ * <shut down>
+ * |
+ * v
+ * UNREGISTER_REMOTE_UPDATE_WATCHER
+ * |
+ * v
+ * UNREGISTER_LOCAL_UPDATE_WATCHER
+ * |
+ * v
+ * WAIT_FOR_IN_FLIGHT_OPS
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+
+ enum State {
+ STATE_INIT,
+ STATE_REPLAYING,
+ STATE_IDLE,
+ STATE_COMPLETE
+ };
+
+ struct C_UpdateWatchCtx;
+ struct DeepCopyHandler;
+
+ Threads<ImageCtxT>* m_threads;
+ InstanceWatcher<ImageCtxT>* m_instance_watcher;
+ std::string m_local_mirror_uuid;
+ PoolMetaCache* m_pool_meta_cache;
+ StateBuilder<ImageCtxT>* m_state_builder;
+ ReplayerListener* m_replayer_listener;
+
+ mutable ceph::mutex m_lock;
+
+ State m_state = STATE_INIT;
+
+ std::string m_image_spec;
+ Context* m_on_init_shutdown = nullptr;
+
+ bool m_resync_requested = false;
+ int m_error_code = 0;
+ std::string m_error_description;
+
+ C_UpdateWatchCtx* m_update_watch_ctx = nullptr;
+ uint64_t m_local_update_watcher_handle = 0;
+ uint64_t m_remote_update_watcher_handle = 0;
+ bool m_image_updated = false;
+
+ AsyncOpTracker m_in_flight_op_tracker;
+
+ uint64_t m_local_snap_id_start = 0;
+ uint64_t m_local_snap_id_end = CEPH_NOSNAP;
+ cls::rbd::MirrorSnapshotNamespace m_local_mirror_snap_ns;
+ uint64_t m_local_object_count = 0;
+
+ std::string m_remote_mirror_peer_uuid;
+ uint64_t m_remote_snap_id_start = 0;
+ uint64_t m_remote_snap_id_end = CEPH_NOSNAP;
+ cls::rbd::MirrorSnapshotNamespace m_remote_mirror_snap_ns;
+
+ librbd::mirror::snapshot::ImageState m_image_state;
+ DeepCopyHandler* m_deep_copy_handler = nullptr;
+
+ TimeRollingMean m_bytes_per_second;
+ uint64_t m_last_snapshot_sync_seconds = 0;
+
+ uint64_t m_snapshot_bytes = 0;
+ uint64_t m_last_snapshot_bytes = 0;
+
+ boost::accumulators::accumulator_set<
+ uint64_t, boost::accumulators::stats<
+ boost::accumulators::tag::rolling_mean>> m_bytes_per_snapshot{
+ boost::accumulators::tag::rolling_window::window_size = 2};
+ utime_t m_snapshot_replay_start;
+
+ uint32_t m_pending_snapshots = 0;
+
+ bool m_remote_image_updated = false;
+ bool m_updating_sync_point = false;
+ bool m_sync_in_progress = false;
+
+ PerfCounters *m_perf_counters = nullptr;
+
+ void load_local_image_meta();
+ void handle_load_local_image_meta(int r);
+
+ void refresh_local_image();
+ void handle_refresh_local_image(int r);
+
+ void refresh_remote_image();
+ void handle_refresh_remote_image(int r);
+
+ void scan_local_mirror_snapshots(std::unique_lock<ceph::mutex>* locker);
+ void scan_remote_mirror_snapshots(std::unique_lock<ceph::mutex>* locker);
+
+ void prune_non_primary_snapshot(uint64_t snap_id);
+ void handle_prune_non_primary_snapshot(int r);
+
+ void copy_snapshots();
+ void handle_copy_snapshots(int r);
+
+ void get_remote_image_state();
+ void handle_get_remote_image_state(int r);
+
+ void get_local_image_state();
+ void handle_get_local_image_state(int r);
+
+ void create_non_primary_snapshot();
+ void handle_create_non_primary_snapshot(int r);
+
+ void update_mirror_image_state();
+ void handle_update_mirror_image_state(int r);
+
+ void request_sync();
+ void handle_request_sync(int r);
+
+ void copy_image();
+ void handle_copy_image(int r);
+ void handle_copy_image_progress(uint64_t object_number,
+ uint64_t object_count);
+ void handle_copy_image_read(uint64_t bytes_read);
+
+ void apply_image_state();
+ void handle_apply_image_state(int r);
+
+ void update_non_primary_snapshot(bool complete);
+ void handle_update_non_primary_snapshot(bool complete, int r);
+
+ void notify_image_update();
+ void handle_notify_image_update(int r);
+
+ void unlink_peer(uint64_t remote_snap_id);
+ void handle_unlink_peer(int r);
+
+ void finish_sync();
+
+ void register_local_update_watcher();
+ void handle_register_local_update_watcher(int r);
+
+ void register_remote_update_watcher();
+ void handle_register_remote_update_watcher(int r);
+
+ void unregister_remote_update_watcher();
+ void handle_unregister_remote_update_watcher(int r);
+
+ void unregister_local_update_watcher();
+ void handle_unregister_local_update_watcher(int r);
+
+ void wait_for_in_flight_ops();
+ void handle_wait_for_in_flight_ops(int r);
+
+ void handle_image_update_notify();
+
+ void handle_replay_complete(int r, const std::string& description);
+ void handle_replay_complete(std::unique_lock<ceph::mutex>* locker,
+ int r, const std::string& description);
+ void notify_status_updated();
+
+ bool is_replay_interrupted();
+ bool is_replay_interrupted(std::unique_lock<ceph::mutex>* lock);
+
+ void register_perf_counters();
+ void unregister_perf_counters();
+};
+
+} // namespace snapshot
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_replayer::snapshot::Replayer<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_REPLAYER_H
diff --git a/src/tools/rbd_mirror/image_replayer/snapshot/StateBuilder.cc b/src/tools/rbd_mirror/image_replayer/snapshot/StateBuilder.cc
new file mode 100644
index 000000000..ca3e6918b
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/snapshot/StateBuilder.cc
@@ -0,0 +1,120 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "StateBuilder.h"
+#include "include/ceph_assert.h"
+#include "include/Context.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/mirror/snapshot/ImageMeta.h"
+#include "tools/rbd_mirror/image_replayer/snapshot/CreateLocalImageRequest.h"
+#include "tools/rbd_mirror/image_replayer/snapshot/PrepareReplayRequest.h"
+#include "tools/rbd_mirror/image_replayer/snapshot/Replayer.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::snapshot::" \
+ << "StateBuilder: " << this << " " \
+ << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+namespace snapshot {
+
+template <typename I>
+StateBuilder<I>::StateBuilder(const std::string& global_image_id)
+ : image_replayer::StateBuilder<I>(global_image_id) {
+}
+
+template <typename I>
+StateBuilder<I>::~StateBuilder() {
+ ceph_assert(local_image_meta == nullptr);
+}
+
+template <typename I>
+void StateBuilder<I>::close(Context* on_finish) {
+ dout(10) << dendl;
+
+ delete local_image_meta;
+ local_image_meta = nullptr;
+
+ // close the remote image after closing the local
+ // image in case the remote cluster is unreachable and
+ // we cannot close it.
+ on_finish = new LambdaContext([this, on_finish](int) {
+ this->close_remote_image(on_finish);
+ });
+ this->close_local_image(on_finish);
+}
+
+template <typename I>
+bool StateBuilder<I>::is_disconnected() const {
+ return false;
+}
+
+template <typename I>
+bool StateBuilder<I>::is_linked_impl() const {
+ // the remote has to have us registered as a peer
+ return !remote_mirror_peer_uuid.empty();
+}
+
+template <typename I>
+cls::rbd::MirrorImageMode StateBuilder<I>::get_mirror_image_mode() const {
+ return cls::rbd::MIRROR_IMAGE_MODE_SNAPSHOT;
+}
+
+template <typename I>
+image_sync::SyncPointHandler* StateBuilder<I>::create_sync_point_handler() {
+ dout(10) << dendl;
+
+ // TODO
+ ceph_assert(false);
+ return nullptr;
+}
+
+template <typename I>
+BaseRequest* StateBuilder<I>::create_local_image_request(
+ Threads<I>* threads,
+ librados::IoCtx& local_io_ctx,
+ const std::string& global_image_id,
+ PoolMetaCache* pool_meta_cache,
+ ProgressContext* progress_ctx,
+ Context* on_finish) {
+ return CreateLocalImageRequest<I>::create(
+ threads, local_io_ctx, this->remote_image_ctx, global_image_id,
+ pool_meta_cache, progress_ctx, this, on_finish);
+}
+
+template <typename I>
+BaseRequest* StateBuilder<I>::create_prepare_replay_request(
+ const std::string& local_mirror_uuid,
+ ProgressContext* progress_ctx,
+ bool* resync_requested,
+ bool* syncing,
+ Context* on_finish) {
+ return PrepareReplayRequest<I>::create(
+ local_mirror_uuid, progress_ctx, this, resync_requested, syncing,
+ on_finish);
+}
+
+template <typename I>
+image_replayer::Replayer* StateBuilder<I>::create_replayer(
+ Threads<I>* threads,
+ InstanceWatcher<I>* instance_watcher,
+ const std::string& local_mirror_uuid,
+ PoolMetaCache* pool_meta_cache,
+ ReplayerListener* replayer_listener) {
+ return Replayer<I>::create(
+ threads, instance_watcher, local_mirror_uuid, pool_meta_cache, this,
+ replayer_listener);
+}
+
+} // namespace snapshot
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_replayer::snapshot::StateBuilder<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_replayer/snapshot/StateBuilder.h b/src/tools/rbd_mirror/image_replayer/snapshot/StateBuilder.h
new file mode 100644
index 000000000..a4ab82982
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/snapshot/StateBuilder.h
@@ -0,0 +1,93 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_STATE_BUILDER_H
+#define CEPH_RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_STATE_BUILDER_H
+
+#include "tools/rbd_mirror/image_replayer/StateBuilder.h"
+#include <string>
+
+struct Context;
+
+namespace librbd {
+
+struct ImageCtx;
+
+namespace mirror {
+namespace snapshot {
+
+template <typename> class ImageMeta;
+
+} // namespace snapshot
+} // namespace mirror
+} // namespace librbd
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+namespace snapshot {
+
+template <typename> class SyncPointHandler;
+
+template <typename ImageCtxT>
+class StateBuilder : public image_replayer::StateBuilder<ImageCtxT> {
+public:
+ static StateBuilder* create(const std::string& global_image_id) {
+ return new StateBuilder(global_image_id);
+ }
+
+ StateBuilder(const std::string& global_image_id);
+ ~StateBuilder() override;
+
+ void close(Context* on_finish) override;
+
+ bool is_disconnected() const override;
+
+ cls::rbd::MirrorImageMode get_mirror_image_mode() const override;
+
+ image_sync::SyncPointHandler* create_sync_point_handler() override;
+
+ bool replay_requires_remote_image() const override {
+ return true;
+ }
+
+ BaseRequest* create_local_image_request(
+ Threads<ImageCtxT>* threads,
+ librados::IoCtx& local_io_ctx,
+ const std::string& global_image_id,
+ PoolMetaCache* pool_meta_cache,
+ ProgressContext* progress_ctx,
+ Context* on_finish) override;
+
+ BaseRequest* create_prepare_replay_request(
+ const std::string& local_mirror_uuid,
+ ProgressContext* progress_ctx,
+ bool* resync_requested,
+ bool* syncing,
+ Context* on_finish) override;
+
+ image_replayer::Replayer* create_replayer(
+ Threads<ImageCtxT>* threads,
+ InstanceWatcher<ImageCtxT>* instance_watcher,
+ const std::string& local_mirror_uuid,
+ PoolMetaCache* pool_meta_cache,
+ ReplayerListener* replayer_listener) override;
+
+ SyncPointHandler<ImageCtxT>* sync_point_handler = nullptr;
+
+ std::string remote_mirror_peer_uuid;
+
+ librbd::mirror::snapshot::ImageMeta<ImageCtxT>* local_image_meta = nullptr;
+
+private:
+ bool is_linked_impl() const override;
+};
+
+} // namespace snapshot
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_replayer::snapshot::StateBuilder<librbd::ImageCtx>;
+
+#endif // CEPH_RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_STATE_BUILDER_H
diff --git a/src/tools/rbd_mirror/image_replayer/snapshot/Utils.cc b/src/tools/rbd_mirror/image_replayer/snapshot/Utils.cc
new file mode 100644
index 000000000..6df95d300
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/snapshot/Utils.cc
@@ -0,0 +1,65 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "Utils.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "cls/rbd/cls_rbd_types.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::snapshot::util::" \
+ << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+namespace snapshot {
+namespace util {
+
+uint64_t compute_remote_snap_id(
+ const ceph::shared_mutex& local_image_lock,
+ const std::map<librados::snap_t, librbd::SnapInfo>& local_snap_infos,
+ uint64_t local_snap_id, const std::string& remote_mirror_uuid) {
+ ceph_assert(ceph_mutex_is_locked(local_image_lock));
+
+ // Search our local non-primary snapshots for a mapping to the remote
+ // snapshot. The non-primary mirror snapshot with the mappings will always
+ // come at or after the snapshot we are searching against
+ for (auto snap_it = local_snap_infos.lower_bound(local_snap_id);
+ snap_it != local_snap_infos.end(); ++snap_it) {
+ auto mirror_ns = std::get_if<cls::rbd::MirrorSnapshotNamespace>(
+ &snap_it->second.snap_namespace);
+ if (mirror_ns == nullptr || !mirror_ns->is_non_primary()) {
+ continue;
+ }
+
+ if (mirror_ns->primary_mirror_uuid != remote_mirror_uuid) {
+ dout(20) << "local snapshot " << snap_it->first << " not tied to remote"
+ << dendl;
+ continue;
+ } else if (local_snap_id == snap_it->first) {
+ dout(15) << "local snapshot " << local_snap_id << " maps to "
+ << "remote snapshot " << mirror_ns->primary_snap_id << dendl;
+ return mirror_ns->primary_snap_id;
+ }
+
+ const auto& snap_seqs = mirror_ns->snap_seqs;
+ for (auto [remote_snap_id_seq, local_snap_id_seq] : snap_seqs) {
+ if (local_snap_id_seq == local_snap_id) {
+ dout(15) << "local snapshot " << local_snap_id << " maps to "
+ << "remote snapshot " << remote_snap_id_seq << dendl;
+ return remote_snap_id_seq;
+ }
+ }
+ }
+
+ return CEPH_NOSNAP;
+}
+
+} // namespace util
+} // namespace snapshot
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
diff --git a/src/tools/rbd_mirror/image_replayer/snapshot/Utils.h b/src/tools/rbd_mirror/image_replayer/snapshot/Utils.h
new file mode 100644
index 000000000..8efc58685
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/snapshot/Utils.h
@@ -0,0 +1,30 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_UTILS_H
+#define RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_UTILS_H
+
+#include "include/int_types.h"
+#include "include/rados/librados.hpp"
+#include "common/ceph_mutex.h"
+#include "librbd/Types.h"
+#include <map>
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+namespace snapshot {
+namespace util {
+
+uint64_t compute_remote_snap_id(
+ const ceph::shared_mutex& local_image_lock,
+ const std::map<librados::snap_t, librbd::SnapInfo>& local_snap_infos,
+ uint64_t local_snap_id, const std::string& remote_mirror_uuid);
+
+} // namespace util
+} // namespace snapshot
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_UTILS_H
diff --git a/src/tools/rbd_mirror/image_sync/SyncPointCreateRequest.cc b/src/tools/rbd_mirror/image_sync/SyncPointCreateRequest.cc
new file mode 100644
index 000000000..1bd5d77f0
--- /dev/null
+++ b/src/tools/rbd_mirror/image_sync/SyncPointCreateRequest.cc
@@ -0,0 +1,172 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "SyncPointCreateRequest.h"
+#include "include/uuid.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/Operations.h"
+#include "librbd/Utils.h"
+#include "tools/rbd_mirror/image_sync/Types.h"
+#include "tools/rbd_mirror/image_sync/Utils.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_sync::SyncPointCreateRequest: " \
+ << this << " " << __func__
+
+namespace rbd {
+namespace mirror {
+namespace image_sync {
+
+using librbd::util::create_context_callback;
+
+template <typename I>
+SyncPointCreateRequest<I>::SyncPointCreateRequest(
+ I *remote_image_ctx,
+ const std::string &local_mirror_uuid,
+ SyncPointHandler* sync_point_handler,
+ Context *on_finish)
+ : m_remote_image_ctx(remote_image_ctx),
+ m_local_mirror_uuid(local_mirror_uuid),
+ m_sync_point_handler(sync_point_handler),
+ m_on_finish(on_finish) {
+ m_sync_points_copy = m_sync_point_handler->get_sync_points();
+ ceph_assert(m_sync_points_copy.size() < 2);
+
+ // initialize the updated client meta with the new sync point
+ m_sync_points_copy.emplace_back();
+ if (m_sync_points_copy.size() > 1) {
+ m_sync_points_copy.back().from_snap_name =
+ m_sync_points_copy.front().snap_name;
+ }
+}
+
+template <typename I>
+void SyncPointCreateRequest<I>::send() {
+ send_update_sync_points();
+}
+
+template <typename I>
+void SyncPointCreateRequest<I>::send_update_sync_points() {
+ uuid_d uuid_gen;
+ uuid_gen.generate_random();
+
+ auto& sync_point = m_sync_points_copy.back();
+ sync_point.snap_name = util::get_snapshot_name_prefix(m_local_mirror_uuid) +
+ uuid_gen.to_string();
+
+ auto ctx = create_context_callback<
+ SyncPointCreateRequest<I>,
+ &SyncPointCreateRequest<I>::handle_update_sync_points>(this);
+ m_sync_point_handler->update_sync_points(
+ m_sync_point_handler->get_snap_seqs(), m_sync_points_copy, false, ctx);
+}
+
+template <typename I>
+void SyncPointCreateRequest<I>::handle_update_sync_points(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ if (r < 0) {
+ derr << ": failed to update client data: " << cpp_strerror(r)
+ << dendl;
+ finish(r);
+ return;
+ }
+
+ send_refresh_image();
+}
+
+template <typename I>
+void SyncPointCreateRequest<I>::send_refresh_image() {
+ dout(20) << dendl;
+
+ Context *ctx = create_context_callback<
+ SyncPointCreateRequest<I>, &SyncPointCreateRequest<I>::handle_refresh_image>(
+ this);
+ m_remote_image_ctx->state->refresh(ctx);
+}
+
+template <typename I>
+void SyncPointCreateRequest<I>::handle_refresh_image(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ if (r < 0) {
+ derr << ": remote image refresh failed: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ send_create_snap();
+}
+
+template <typename I>
+void SyncPointCreateRequest<I>::send_create_snap() {
+ dout(20) << dendl;
+
+ auto& sync_point = m_sync_points_copy.back();
+
+ Context *ctx = create_context_callback<
+ SyncPointCreateRequest<I>, &SyncPointCreateRequest<I>::handle_create_snap>(
+ this);
+ m_remote_image_ctx->operations->snap_create(
+ cls::rbd::UserSnapshotNamespace(), sync_point.snap_name.c_str(),
+ librbd::SNAP_CREATE_FLAG_SKIP_NOTIFY_QUIESCE, m_prog_ctx, ctx);
+}
+
+template <typename I>
+void SyncPointCreateRequest<I>::handle_create_snap(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ if (r == -EEXIST) {
+ send_update_sync_points();
+ return;
+ } else if (r < 0) {
+ derr << ": failed to create snapshot: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ send_final_refresh_image();
+}
+
+template <typename I>
+void SyncPointCreateRequest<I>::send_final_refresh_image() {
+ dout(20) << dendl;
+
+ Context *ctx = create_context_callback<
+ SyncPointCreateRequest<I>,
+ &SyncPointCreateRequest<I>::handle_final_refresh_image>(this);
+ m_remote_image_ctx->state->refresh(ctx);
+}
+
+template <typename I>
+void SyncPointCreateRequest<I>::handle_final_refresh_image(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ if (r < 0) {
+ derr << ": failed to refresh image for snapshot: " << cpp_strerror(r)
+ << dendl;
+ finish(r);
+ return;
+ }
+
+ finish(0);
+}
+
+template <typename I>
+void SyncPointCreateRequest<I>::finish(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ m_on_finish->complete(r);
+ delete this;
+}
+
+} // namespace image_sync
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_sync::SyncPointCreateRequest<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_sync/SyncPointCreateRequest.h b/src/tools/rbd_mirror/image_sync/SyncPointCreateRequest.h
new file mode 100644
index 000000000..9b52b8374
--- /dev/null
+++ b/src/tools/rbd_mirror/image_sync/SyncPointCreateRequest.h
@@ -0,0 +1,93 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_SYNC_SYNC_POINT_CREATE_REQUEST_H
+#define RBD_MIRROR_IMAGE_SYNC_SYNC_POINT_CREATE_REQUEST_H
+
+#include "librbd/internal.h"
+#include "Types.h"
+#include <string>
+
+class Context;
+namespace journal { class Journaler; }
+namespace librbd { class ImageCtx; }
+namespace librbd { namespace journal { struct MirrorPeerClientMeta; } }
+
+namespace rbd {
+namespace mirror {
+namespace image_sync {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class SyncPointCreateRequest {
+public:
+ static SyncPointCreateRequest* create(
+ ImageCtxT *remote_image_ctx,
+ const std::string &local_mirror_uuid,
+ SyncPointHandler* sync_point_handler,
+ Context *on_finish) {
+ return new SyncPointCreateRequest(remote_image_ctx, local_mirror_uuid,
+ sync_point_handler, on_finish);
+ }
+
+ SyncPointCreateRequest(
+ ImageCtxT *remote_image_ctx,
+ const std::string &local_mirror_uuid,
+ SyncPointHandler* sync_point_handler,
+ Context *on_finish);
+
+ void send();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * UPDATE_SYNC_POINTS < . .
+ * | .
+ * v .
+ * REFRESH_IMAGE .
+ * | . (repeat on EEXIST)
+ * v .
+ * CREATE_SNAP . . . . . .
+ * |
+ * v
+ * REFRESH_IMAGE
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+
+ ImageCtxT *m_remote_image_ctx;
+ std::string m_local_mirror_uuid;
+ SyncPointHandler* m_sync_point_handler;
+ Context *m_on_finish;
+
+ SyncPoints m_sync_points_copy;
+ librbd::NoOpProgressContext m_prog_ctx;
+
+ void send_update_sync_points();
+ void handle_update_sync_points(int r);
+
+ void send_refresh_image();
+ void handle_refresh_image(int r);
+
+ void send_create_snap();
+ void handle_create_snap(int r);
+
+ void send_final_refresh_image();
+ void handle_final_refresh_image(int r);
+
+ void finish(int r);
+};
+
+} // namespace image_sync
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_sync::SyncPointCreateRequest<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_SYNC_SYNC_POINT_CREATE_REQUEST_H
diff --git a/src/tools/rbd_mirror/image_sync/SyncPointPruneRequest.cc b/src/tools/rbd_mirror/image_sync/SyncPointPruneRequest.cc
new file mode 100644
index 000000000..d1cd32b39
--- /dev/null
+++ b/src/tools/rbd_mirror/image_sync/SyncPointPruneRequest.cc
@@ -0,0 +1,213 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "SyncPointPruneRequest.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/Operations.h"
+#include "librbd/Utils.h"
+#include <set>
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_sync::SyncPointPruneRequest: " \
+ << this << " " << __func__
+namespace rbd {
+namespace mirror {
+namespace image_sync {
+
+using librbd::util::create_context_callback;
+
+template <typename I>
+SyncPointPruneRequest<I>::SyncPointPruneRequest(
+ I *remote_image_ctx,
+ bool sync_complete,
+ SyncPointHandler* sync_point_handler,
+ Context *on_finish)
+ : m_remote_image_ctx(remote_image_ctx),
+ m_sync_complete(sync_complete),
+ m_sync_point_handler(sync_point_handler),
+ m_on_finish(on_finish) {
+ m_sync_points_copy = m_sync_point_handler->get_sync_points();
+}
+
+template <typename I>
+void SyncPointPruneRequest<I>::send() {
+ if (m_sync_points_copy.empty()) {
+ send_remove_snap();
+ return;
+ }
+
+ if (m_sync_complete) {
+ // if sync is complete, we can remove the master sync point
+ auto it = m_sync_points_copy.begin();
+ auto& sync_point = *it;
+
+ ++it;
+ if (it == m_sync_points_copy.end() ||
+ it->from_snap_name != sync_point.snap_name) {
+ m_snap_names.push_back(sync_point.snap_name);
+ }
+
+ if (!sync_point.from_snap_name.empty()) {
+ m_snap_names.push_back(sync_point.from_snap_name);
+ }
+ } else {
+ // if we have more than one sync point or invalid sync points,
+ // trim them off
+ std::shared_lock image_locker{m_remote_image_ctx->image_lock};
+ std::set<std::string> snap_names;
+ for (auto it = m_sync_points_copy.rbegin();
+ it != m_sync_points_copy.rend(); ++it) {
+ auto& sync_point = *it;
+ if (&sync_point == &m_sync_points_copy.front()) {
+ if (m_remote_image_ctx->get_snap_id(
+ cls::rbd::UserSnapshotNamespace(), sync_point.snap_name) ==
+ CEPH_NOSNAP) {
+ derr << ": failed to locate sync point snapshot: "
+ << sync_point.snap_name << dendl;
+ } else if (!sync_point.from_snap_name.empty()) {
+ derr << ": unexpected from_snap_name in primary sync point: "
+ << sync_point.from_snap_name << dendl;
+ } else {
+ // first sync point is OK -- keep it
+ break;
+ }
+ m_invalid_master_sync_point = true;
+ }
+
+ if (snap_names.count(sync_point.snap_name) == 0) {
+ snap_names.insert(sync_point.snap_name);
+ m_snap_names.push_back(sync_point.snap_name);
+ }
+
+ auto& front_sync_point = m_sync_points_copy.front();
+ if (!sync_point.from_snap_name.empty() &&
+ snap_names.count(sync_point.from_snap_name) == 0 &&
+ sync_point.from_snap_name != front_sync_point.snap_name) {
+ snap_names.insert(sync_point.from_snap_name);
+ m_snap_names.push_back(sync_point.from_snap_name);
+ }
+ }
+ }
+
+ send_remove_snap();
+}
+
+template <typename I>
+void SyncPointPruneRequest<I>::send_remove_snap() {
+ if (m_snap_names.empty()) {
+ send_refresh_image();
+ return;
+ }
+
+ const std::string &snap_name = m_snap_names.front();
+
+ dout(20) << ": snap_name=" << snap_name << dendl;
+
+ Context *ctx = create_context_callback<
+ SyncPointPruneRequest<I>, &SyncPointPruneRequest<I>::handle_remove_snap>(
+ this);
+ m_remote_image_ctx->operations->snap_remove(cls::rbd::UserSnapshotNamespace(),
+ snap_name.c_str(),
+ ctx);
+}
+
+template <typename I>
+void SyncPointPruneRequest<I>::handle_remove_snap(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ ceph_assert(!m_snap_names.empty());
+ std::string snap_name = m_snap_names.front();
+ m_snap_names.pop_front();
+
+ if (r == -ENOENT) {
+ r = 0;
+ }
+ if (r < 0) {
+ derr << ": failed to remove snapshot '" << snap_name << "': "
+ << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ send_remove_snap();
+}
+
+template <typename I>
+void SyncPointPruneRequest<I>::send_refresh_image() {
+ dout(20) << dendl;
+
+ Context *ctx = create_context_callback<
+ SyncPointPruneRequest<I>, &SyncPointPruneRequest<I>::handle_refresh_image>(
+ this);
+ m_remote_image_ctx->state->refresh(ctx);
+}
+
+template <typename I>
+void SyncPointPruneRequest<I>::handle_refresh_image(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ if (r < 0) {
+ derr << ": remote image refresh failed: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ send_update_sync_points();
+}
+
+template <typename I>
+void SyncPointPruneRequest<I>::send_update_sync_points() {
+ dout(20) << dendl;
+
+ if (m_sync_complete) {
+ m_sync_points_copy.pop_front();
+ } else {
+ while (m_sync_points_copy.size() > 1) {
+ m_sync_points_copy.pop_back();
+ }
+ if (m_invalid_master_sync_point) {
+ // all subsequent sync points would have been pruned
+ m_sync_points_copy.clear();
+ }
+ }
+
+ auto ctx = create_context_callback<
+ SyncPointPruneRequest<I>,
+ &SyncPointPruneRequest<I>::handle_update_sync_points>(this);
+ m_sync_point_handler->update_sync_points(
+ m_sync_point_handler->get_snap_seqs(), m_sync_points_copy,
+ m_sync_complete, ctx);
+}
+
+template <typename I>
+void SyncPointPruneRequest<I>::handle_update_sync_points(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ if (r < 0) {
+ derr << ": failed to update client data: " << cpp_strerror(r)
+ << dendl;
+ finish(r);
+ return;
+ }
+
+ finish(0);
+}
+
+template <typename I>
+void SyncPointPruneRequest<I>::finish(int r) {
+ dout(20) << ": r=" << r << dendl;
+
+ m_on_finish->complete(r);
+ delete this;
+}
+
+} // namespace image_sync
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_sync::SyncPointPruneRequest<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_sync/SyncPointPruneRequest.h b/src/tools/rbd_mirror/image_sync/SyncPointPruneRequest.h
new file mode 100644
index 000000000..08bf840b1
--- /dev/null
+++ b/src/tools/rbd_mirror/image_sync/SyncPointPruneRequest.h
@@ -0,0 +1,91 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_SYNC_SYNC_POINT_PRUNE_REQUEST_H
+#define RBD_MIRROR_IMAGE_SYNC_SYNC_POINT_PRUNE_REQUEST_H
+
+#include "tools/rbd_mirror/image_sync/Types.h"
+#include <list>
+#include <string>
+
+class Context;
+namespace journal { class Journaler; }
+namespace librbd { class ImageCtx; }
+namespace librbd { namespace journal { struct MirrorPeerClientMeta; } }
+
+namespace rbd {
+namespace mirror {
+namespace image_sync {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class SyncPointPruneRequest {
+public:
+ static SyncPointPruneRequest* create(
+ ImageCtxT *remote_image_ctx,
+ bool sync_complete,
+ SyncPointHandler* sync_point_handler,
+ Context *on_finish) {
+ return new SyncPointPruneRequest(remote_image_ctx, sync_complete,
+ sync_point_handler, on_finish);
+ }
+
+ SyncPointPruneRequest(
+ ImageCtxT *remote_image_ctx,
+ bool sync_complete,
+ SyncPointHandler* sync_point_handler,
+ Context *on_finish);
+
+ void send();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * | . . . . .
+ * | . .
+ * v v . (repeat if from snap
+ * REMOVE_SNAP . . . unused by other sync)
+ * |
+ * v
+ * REFRESH_IMAGE
+ * |
+ * v
+ * UPDATE_CLIENT
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+
+ ImageCtxT *m_remote_image_ctx;
+ bool m_sync_complete;
+ SyncPointHandler* m_sync_point_handler;
+ Context *m_on_finish;
+
+ SyncPoints m_sync_points_copy;
+ std::list<std::string> m_snap_names;
+
+ bool m_invalid_master_sync_point = false;
+
+ void send_remove_snap();
+ void handle_remove_snap(int r);
+
+ void send_refresh_image();
+ void handle_refresh_image(int r);
+
+ void send_update_sync_points();
+ void handle_update_sync_points(int r);
+
+ void finish(int r);
+};
+
+} // namespace image_sync
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_sync::SyncPointPruneRequest<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_SYNC_SYNC_POINT_PRUNE_REQUEST_H
diff --git a/src/tools/rbd_mirror/image_sync/Types.h b/src/tools/rbd_mirror/image_sync/Types.h
new file mode 100644
index 000000000..d748dc93e
--- /dev/null
+++ b/src/tools/rbd_mirror/image_sync/Types.h
@@ -0,0 +1,74 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_SYNC_TYPES_H
+#define RBD_MIRROR_IMAGE_SYNC_TYPES_H
+
+#include "cls/rbd/cls_rbd_types.h"
+#include "librbd/Types.h"
+#include <list>
+#include <string>
+#include <boost/optional.hpp>
+
+struct Context;
+
+namespace rbd {
+namespace mirror {
+namespace image_sync {
+
+struct SyncPoint {
+ typedef boost::optional<uint64_t> ObjectNumber;
+
+ SyncPoint() {
+ }
+ SyncPoint(const cls::rbd::SnapshotNamespace& snap_namespace,
+ const std::string& snap_name,
+ const std::string& from_snap_name,
+ const ObjectNumber& object_number)
+ : snap_namespace(snap_namespace), snap_name(snap_name),
+ from_snap_name(from_snap_name), object_number(object_number) {
+ }
+
+ cls::rbd::SnapshotNamespace snap_namespace =
+ {cls::rbd::UserSnapshotNamespace{}};
+ std::string snap_name;
+ std::string from_snap_name;
+ ObjectNumber object_number = boost::none;
+
+ bool operator==(const SyncPoint& rhs) const {
+ return (snap_namespace == rhs.snap_namespace &&
+ snap_name == rhs.snap_name &&
+ from_snap_name == rhs.from_snap_name &&
+ object_number == rhs.object_number);
+ }
+};
+
+typedef std::list<SyncPoint> SyncPoints;
+
+struct SyncPointHandler {
+public:
+ SyncPointHandler(const SyncPointHandler&) = delete;
+ SyncPointHandler& operator=(const SyncPointHandler&) = delete;
+
+ virtual ~SyncPointHandler() {}
+ virtual void destroy() {
+ delete this;
+ }
+
+ virtual SyncPoints get_sync_points() const = 0;
+ virtual librbd::SnapSeqs get_snap_seqs() const = 0;
+
+ virtual void update_sync_points(const librbd::SnapSeqs& snap_seq,
+ const SyncPoints& sync_points,
+ bool sync_complete,
+ Context* on_finish) = 0;
+
+protected:
+ SyncPointHandler() {}
+};
+
+} // namespace image_sync
+} // namespace mirror
+} // namespace rbd
+
+#endif // RBD_MIRROR_IMAGE_SYNC_TYPES_H
diff --git a/src/tools/rbd_mirror/image_sync/Utils.cc b/src/tools/rbd_mirror/image_sync/Utils.cc
new file mode 100644
index 000000000..6a3eae72d
--- /dev/null
+++ b/src/tools/rbd_mirror/image_sync/Utils.cc
@@ -0,0 +1,24 @@
+// -*- mode:c++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "Utils.h"
+
+namespace rbd {
+namespace mirror {
+namespace image_sync {
+namespace util {
+
+namespace {
+
+static const std::string SNAP_NAME_PREFIX(".rbd-mirror");
+
+} // anonymous namespace
+
+std::string get_snapshot_name_prefix(const std::string& local_mirror_uuid) {
+ return SNAP_NAME_PREFIX + "." + local_mirror_uuid + ".";
+}
+
+} // namespace util
+} // namespace image_sync
+} // namespace mirror
+} // namespace rbd
diff --git a/src/tools/rbd_mirror/image_sync/Utils.h b/src/tools/rbd_mirror/image_sync/Utils.h
new file mode 100644
index 000000000..139699daa
--- /dev/null
+++ b/src/tools/rbd_mirror/image_sync/Utils.h
@@ -0,0 +1,16 @@
+// -*- mode:c++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <string>
+
+namespace rbd {
+namespace mirror {
+namespace image_sync {
+namespace util {
+
+std::string get_snapshot_name_prefix(const std::string& local_mirror_uuid);
+
+} // namespace util
+} // namespace image_sync
+} // namespace mirror
+} // namespace rbd
diff --git a/src/tools/rbd_mirror/instance_watcher/Types.cc b/src/tools/rbd_mirror/instance_watcher/Types.cc
new file mode 100644
index 000000000..0e9922733
--- /dev/null
+++ b/src/tools/rbd_mirror/instance_watcher/Types.cc
@@ -0,0 +1,245 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "Types.h"
+#include "include/ceph_assert.h"
+#include "include/stringify.h"
+#include "common/Formatter.h"
+
+namespace rbd {
+namespace mirror {
+namespace instance_watcher {
+
+namespace {
+
+class EncodePayloadVisitor : public boost::static_visitor<void> {
+public:
+ explicit EncodePayloadVisitor(bufferlist &bl) : m_bl(bl) {}
+
+ template <typename Payload>
+ inline void operator()(const Payload &payload) const {
+ using ceph::encode;
+ encode(static_cast<uint32_t>(Payload::NOTIFY_OP), m_bl);
+ payload.encode(m_bl);
+ }
+
+private:
+ bufferlist &m_bl;
+};
+
+class DecodePayloadVisitor : public boost::static_visitor<void> {
+public:
+ DecodePayloadVisitor(__u8 version, bufferlist::const_iterator &iter)
+ : m_version(version), m_iter(iter) {}
+
+ template <typename Payload>
+ inline void operator()(Payload &payload) const {
+ payload.decode(m_version, m_iter);
+ }
+
+private:
+ __u8 m_version;
+ bufferlist::const_iterator &m_iter;
+};
+
+class DumpPayloadVisitor : public boost::static_visitor<void> {
+public:
+ explicit DumpPayloadVisitor(Formatter *formatter) : m_formatter(formatter) {}
+
+ template <typename Payload>
+ inline void operator()(const Payload &payload) const {
+ NotifyOp notify_op = Payload::NOTIFY_OP;
+ m_formatter->dump_string("notify_op", stringify(notify_op));
+ payload.dump(m_formatter);
+ }
+
+private:
+ ceph::Formatter *m_formatter;
+};
+
+} // anonymous namespace
+
+void PayloadBase::encode(bufferlist &bl) const {
+ using ceph::encode;
+ encode(request_id, bl);
+}
+
+void PayloadBase::decode(__u8 version, bufferlist::const_iterator &iter) {
+ using ceph::decode;
+ decode(request_id, iter);
+}
+
+void PayloadBase::dump(Formatter *f) const {
+ f->dump_unsigned("request_id", request_id);
+}
+
+void ImagePayloadBase::encode(bufferlist &bl) const {
+ using ceph::encode;
+ PayloadBase::encode(bl);
+ encode(global_image_id, bl);
+}
+
+void ImagePayloadBase::decode(__u8 version, bufferlist::const_iterator &iter) {
+ using ceph::decode;
+ PayloadBase::decode(version, iter);
+ decode(global_image_id, iter);
+}
+
+void ImagePayloadBase::dump(Formatter *f) const {
+ PayloadBase::dump(f);
+ f->dump_string("global_image_id", global_image_id);
+}
+
+void PeerImageRemovedPayload::encode(bufferlist &bl) const {
+ using ceph::encode;
+ PayloadBase::encode(bl);
+ encode(global_image_id, bl);
+ encode(peer_mirror_uuid, bl);
+}
+
+void PeerImageRemovedPayload::decode(__u8 version, bufferlist::const_iterator &iter) {
+ using ceph::decode;
+ PayloadBase::decode(version, iter);
+ decode(global_image_id, iter);
+ decode(peer_mirror_uuid, iter);
+}
+
+void PeerImageRemovedPayload::dump(Formatter *f) const {
+ PayloadBase::dump(f);
+ f->dump_string("global_image_id", global_image_id);
+ f->dump_string("peer_mirror_uuid", peer_mirror_uuid);
+}
+
+void SyncPayloadBase::encode(bufferlist &bl) const {
+ using ceph::encode;
+ PayloadBase::encode(bl);
+ encode(sync_id, bl);
+}
+
+void SyncPayloadBase::decode(__u8 version, bufferlist::const_iterator &iter) {
+ using ceph::decode;
+ PayloadBase::decode(version, iter);
+ decode(sync_id, iter);
+}
+
+void SyncPayloadBase::dump(Formatter *f) const {
+ PayloadBase::dump(f);
+ f->dump_string("sync_id", sync_id);
+}
+
+void UnknownPayload::encode(bufferlist &bl) const {
+ ceph_abort();
+}
+
+void UnknownPayload::decode(__u8 version, bufferlist::const_iterator &iter) {
+}
+
+void UnknownPayload::dump(Formatter *f) const {
+}
+
+void NotifyMessage::encode(bufferlist& bl) const {
+ ENCODE_START(2, 2, bl);
+ boost::apply_visitor(EncodePayloadVisitor(bl), payload);
+ ENCODE_FINISH(bl);
+}
+
+void NotifyMessage::decode(bufferlist::const_iterator& iter) {
+ DECODE_START(2, iter);
+
+ uint32_t notify_op;
+ decode(notify_op, iter);
+
+ // select the correct payload variant based upon the encoded op
+ switch (notify_op) {
+ case NOTIFY_OP_IMAGE_ACQUIRE:
+ payload = ImageAcquirePayload();
+ break;
+ case NOTIFY_OP_IMAGE_RELEASE:
+ payload = ImageReleasePayload();
+ break;
+ case NOTIFY_OP_PEER_IMAGE_REMOVED:
+ payload = PeerImageRemovedPayload();
+ break;
+ case NOTIFY_OP_SYNC_REQUEST:
+ payload = SyncRequestPayload();
+ break;
+ case NOTIFY_OP_SYNC_START:
+ payload = SyncStartPayload();
+ break;
+ default:
+ payload = UnknownPayload();
+ break;
+ }
+
+ apply_visitor(DecodePayloadVisitor(struct_v, iter), payload);
+ DECODE_FINISH(iter);
+}
+
+void NotifyMessage::dump(Formatter *f) const {
+ apply_visitor(DumpPayloadVisitor(f), payload);
+}
+
+void NotifyMessage::generate_test_instances(std::list<NotifyMessage *> &o) {
+ o.push_back(new NotifyMessage(ImageAcquirePayload()));
+ o.push_back(new NotifyMessage(ImageAcquirePayload(1, "gid")));
+
+ o.push_back(new NotifyMessage(ImageReleasePayload()));
+ o.push_back(new NotifyMessage(ImageReleasePayload(1, "gid")));
+
+ o.push_back(new NotifyMessage(PeerImageRemovedPayload()));
+ o.push_back(new NotifyMessage(PeerImageRemovedPayload(1, "gid", "uuid")));
+
+ o.push_back(new NotifyMessage(SyncRequestPayload()));
+ o.push_back(new NotifyMessage(SyncRequestPayload(1, "sync_id")));
+
+ o.push_back(new NotifyMessage(SyncStartPayload()));
+ o.push_back(new NotifyMessage(SyncStartPayload(1, "sync_id")));
+}
+
+std::ostream &operator<<(std::ostream &out, const NotifyOp &op) {
+ switch (op) {
+ case NOTIFY_OP_IMAGE_ACQUIRE:
+ out << "ImageAcquire";
+ break;
+ case NOTIFY_OP_IMAGE_RELEASE:
+ out << "ImageRelease";
+ break;
+ case NOTIFY_OP_PEER_IMAGE_REMOVED:
+ out << "PeerImageRemoved";
+ break;
+ case NOTIFY_OP_SYNC_REQUEST:
+ out << "SyncRequest";
+ break;
+ case NOTIFY_OP_SYNC_START:
+ out << "SyncStart";
+ break;
+ default:
+ out << "Unknown (" << static_cast<uint32_t>(op) << ")";
+ break;
+ }
+ return out;
+}
+
+void NotifyAckPayload::encode(bufferlist &bl) const {
+ using ceph::encode;
+ encode(instance_id, bl);
+ encode(request_id, bl);
+ encode(ret_val, bl);
+}
+
+void NotifyAckPayload::decode(bufferlist::const_iterator &iter) {
+ using ceph::decode;
+ decode(instance_id, iter);
+ decode(request_id, iter);
+ decode(ret_val, iter);
+}
+
+void NotifyAckPayload::dump(Formatter *f) const {
+ f->dump_string("instance_id", instance_id);
+ f->dump_unsigned("request_id", request_id);
+ f->dump_int("request_id", ret_val);
+}
+
+} // namespace instance_watcher
+} // namespace mirror
+} // namespace rbd
diff --git a/src/tools/rbd_mirror/instance_watcher/Types.h b/src/tools/rbd_mirror/instance_watcher/Types.h
new file mode 100644
index 000000000..b0b7b7791
--- /dev/null
+++ b/src/tools/rbd_mirror/instance_watcher/Types.h
@@ -0,0 +1,197 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_INSTANCE_WATCHER_TYPES_H
+#define RBD_MIRROR_INSTANCE_WATCHER_TYPES_H
+
+#include <string>
+#include <set>
+#include <boost/variant.hpp>
+
+#include "include/buffer_fwd.h"
+#include "include/encoding.h"
+#include "include/int_types.h"
+
+namespace ceph { class Formatter; }
+
+namespace rbd {
+namespace mirror {
+namespace instance_watcher {
+
+enum NotifyOp {
+ NOTIFY_OP_IMAGE_ACQUIRE = 0,
+ NOTIFY_OP_IMAGE_RELEASE = 1,
+ NOTIFY_OP_PEER_IMAGE_REMOVED = 2,
+ NOTIFY_OP_SYNC_REQUEST = 3,
+ NOTIFY_OP_SYNC_START = 4
+};
+
+struct PayloadBase {
+ uint64_t request_id;
+
+ PayloadBase() : request_id(0) {
+ }
+
+ PayloadBase(uint64_t request_id) : request_id(request_id) {
+ }
+
+ void encode(bufferlist &bl) const;
+ void decode(__u8 version, bufferlist::const_iterator &iter);
+ void dump(Formatter *f) const;
+};
+
+struct ImagePayloadBase : public PayloadBase {
+ std::string global_image_id;
+
+ ImagePayloadBase() : PayloadBase() {
+ }
+
+ ImagePayloadBase(uint64_t request_id, const std::string &global_image_id)
+ : PayloadBase(request_id), global_image_id(global_image_id) {
+ }
+
+ void encode(bufferlist &bl) const;
+ void decode(__u8 version, bufferlist::const_iterator &iter);
+ void dump(Formatter *f) const;
+};
+
+struct ImageAcquirePayload : public ImagePayloadBase {
+ static const NotifyOp NOTIFY_OP = NOTIFY_OP_IMAGE_ACQUIRE;
+
+ ImageAcquirePayload() {
+ }
+ ImageAcquirePayload(uint64_t request_id, const std::string &global_image_id)
+ : ImagePayloadBase(request_id, global_image_id) {
+ }
+};
+
+struct ImageReleasePayload : public ImagePayloadBase {
+ static const NotifyOp NOTIFY_OP = NOTIFY_OP_IMAGE_RELEASE;
+
+ ImageReleasePayload() {
+ }
+ ImageReleasePayload(uint64_t request_id, const std::string &global_image_id)
+ : ImagePayloadBase(request_id, global_image_id) {
+ }
+};
+
+struct PeerImageRemovedPayload : public PayloadBase {
+ static const NotifyOp NOTIFY_OP = NOTIFY_OP_PEER_IMAGE_REMOVED;
+
+ std::string global_image_id;
+ std::string peer_mirror_uuid;
+
+ PeerImageRemovedPayload() {
+ }
+ PeerImageRemovedPayload(uint64_t request_id,
+ const std::string& global_image_id,
+ const std::string& peer_mirror_uuid)
+ : PayloadBase(request_id),
+ global_image_id(global_image_id), peer_mirror_uuid(peer_mirror_uuid) {
+ }
+
+ void encode(bufferlist &bl) const;
+ void decode(__u8 version, bufferlist::const_iterator &iter);
+ void dump(Formatter *f) const;
+};
+
+struct SyncPayloadBase : public PayloadBase {
+ std::string sync_id;
+
+ SyncPayloadBase() : PayloadBase() {
+ }
+
+ SyncPayloadBase(uint64_t request_id, const std::string &sync_id)
+ : PayloadBase(request_id), sync_id(sync_id) {
+ }
+
+ void encode(bufferlist &bl) const;
+ void decode(__u8 version, bufferlist::const_iterator &iter);
+ void dump(Formatter *f) const;
+};
+
+struct SyncRequestPayload : public SyncPayloadBase {
+ static const NotifyOp NOTIFY_OP = NOTIFY_OP_SYNC_REQUEST;
+
+ SyncRequestPayload() : SyncPayloadBase() {
+ }
+
+ SyncRequestPayload(uint64_t request_id, const std::string &sync_id)
+ : SyncPayloadBase(request_id, sync_id) {
+ }
+};
+
+struct SyncStartPayload : public SyncPayloadBase {
+ static const NotifyOp NOTIFY_OP = NOTIFY_OP_SYNC_START;
+
+ SyncStartPayload() : SyncPayloadBase() {
+ }
+
+ SyncStartPayload(uint64_t request_id, const std::string &sync_id)
+ : SyncPayloadBase(request_id, sync_id) {
+ }
+};
+
+struct UnknownPayload {
+ static const NotifyOp NOTIFY_OP = static_cast<NotifyOp>(-1);
+
+ UnknownPayload() {
+ }
+
+ void encode(bufferlist &bl) const;
+ void decode(__u8 version, bufferlist::const_iterator &iter);
+ void dump(Formatter *f) const;
+};
+
+typedef boost::variant<ImageAcquirePayload,
+ ImageReleasePayload,
+ PeerImageRemovedPayload,
+ SyncRequestPayload,
+ SyncStartPayload,
+ UnknownPayload> Payload;
+
+struct NotifyMessage {
+ NotifyMessage(const Payload &payload = UnknownPayload()) : payload(payload) {
+ }
+
+ Payload payload;
+
+ void encode(bufferlist& bl) const;
+ void decode(bufferlist::const_iterator& it);
+ void dump(Formatter *f) const;
+
+ static void generate_test_instances(std::list<NotifyMessage *> &o);
+};
+
+WRITE_CLASS_ENCODER(NotifyMessage);
+
+std::ostream &operator<<(std::ostream &out, const NotifyOp &op);
+
+struct NotifyAckPayload {
+ std::string instance_id;
+ uint64_t request_id;
+ int ret_val;
+
+ NotifyAckPayload() : request_id(0), ret_val(0) {
+ }
+
+ NotifyAckPayload(const std::string &instance_id, uint64_t request_id,
+ int ret_val)
+ : instance_id(instance_id), request_id(request_id), ret_val(ret_val) {
+ }
+
+ void encode(bufferlist &bl) const;
+ void decode(bufferlist::const_iterator& it);
+ void dump(Formatter *f) const;
+};
+
+WRITE_CLASS_ENCODER(NotifyAckPayload);
+
+} // namespace instance_watcher
+} // namespace mirror
+} // namespace librbd
+
+using rbd::mirror::instance_watcher::encode;
+using rbd::mirror::instance_watcher::decode;
+
+#endif // RBD_MIRROR_INSTANCE_WATCHER_TYPES_H
diff --git a/src/tools/rbd_mirror/instances/Types.h b/src/tools/rbd_mirror/instances/Types.h
new file mode 100644
index 000000000..8b0a68fc3
--- /dev/null
+++ b/src/tools/rbd_mirror/instances/Types.h
@@ -0,0 +1,28 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_INSTANCES_TYPES_H
+#define CEPH_RBD_MIRROR_INSTANCES_TYPES_H
+
+#include <string>
+#include <vector>
+
+namespace rbd {
+namespace mirror {
+namespace instances {
+
+struct Listener {
+ typedef std::vector<std::string> InstanceIds;
+
+ virtual ~Listener() {
+ }
+
+ virtual void handle_added(const InstanceIds& instance_ids) = 0;
+ virtual void handle_removed(const InstanceIds& instance_ids) = 0;
+};
+
+} // namespace instances
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_INSTANCES_TYPES_H
diff --git a/src/tools/rbd_mirror/leader_watcher/Types.cc b/src/tools/rbd_mirror/leader_watcher/Types.cc
new file mode 100644
index 000000000..d2fb7908f
--- /dev/null
+++ b/src/tools/rbd_mirror/leader_watcher/Types.cc
@@ -0,0 +1,161 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "Types.h"
+#include "include/ceph_assert.h"
+#include "include/stringify.h"
+#include "common/Formatter.h"
+
+namespace rbd {
+namespace mirror {
+namespace leader_watcher {
+
+namespace {
+
+class EncodePayloadVisitor : public boost::static_visitor<void> {
+public:
+ explicit EncodePayloadVisitor(bufferlist &bl) : m_bl(bl) {}
+
+ template <typename Payload>
+ inline void operator()(const Payload &payload) const {
+ using ceph::encode;
+ encode(static_cast<uint32_t>(Payload::NOTIFY_OP), m_bl);
+ payload.encode(m_bl);
+ }
+
+private:
+ bufferlist &m_bl;
+};
+
+class DecodePayloadVisitor : public boost::static_visitor<void> {
+public:
+ DecodePayloadVisitor(__u8 version, bufferlist::const_iterator &iter)
+ : m_version(version), m_iter(iter) {}
+
+ template <typename Payload>
+ inline void operator()(Payload &payload) const {
+ payload.decode(m_version, m_iter);
+ }
+
+private:
+ __u8 m_version;
+ bufferlist::const_iterator &m_iter;
+};
+
+class DumpPayloadVisitor : public boost::static_visitor<void> {
+public:
+ explicit DumpPayloadVisitor(Formatter *formatter) : m_formatter(formatter) {}
+
+ template <typename Payload>
+ inline void operator()(const Payload &payload) const {
+ NotifyOp notify_op = Payload::NOTIFY_OP;
+ m_formatter->dump_string("notify_op", stringify(notify_op));
+ payload.dump(m_formatter);
+ }
+
+private:
+ ceph::Formatter *m_formatter;
+};
+
+} // anonymous namespace
+
+void HeartbeatPayload::encode(bufferlist &bl) const {
+}
+
+void HeartbeatPayload::decode(__u8 version, bufferlist::const_iterator &iter) {
+}
+
+void HeartbeatPayload::dump(Formatter *f) const {
+}
+
+void LockAcquiredPayload::encode(bufferlist &bl) const {
+}
+
+void LockAcquiredPayload::decode(__u8 version, bufferlist::const_iterator &iter) {
+}
+
+void LockAcquiredPayload::dump(Formatter *f) const {
+}
+
+void LockReleasedPayload::encode(bufferlist &bl) const {
+}
+
+void LockReleasedPayload::decode(__u8 version, bufferlist::const_iterator &iter) {
+}
+
+void LockReleasedPayload::dump(Formatter *f) const {
+}
+
+void UnknownPayload::encode(bufferlist &bl) const {
+ ceph_abort();
+}
+
+void UnknownPayload::decode(__u8 version, bufferlist::const_iterator &iter) {
+}
+
+void UnknownPayload::dump(Formatter *f) const {
+}
+
+void NotifyMessage::encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ boost::apply_visitor(EncodePayloadVisitor(bl), payload);
+ ENCODE_FINISH(bl);
+}
+
+void NotifyMessage::decode(bufferlist::const_iterator& iter) {
+ DECODE_START(1, iter);
+
+ uint32_t notify_op;
+ decode(notify_op, iter);
+
+ // select the correct payload variant based upon the encoded op
+ switch (notify_op) {
+ case NOTIFY_OP_HEARTBEAT:
+ payload = HeartbeatPayload();
+ break;
+ case NOTIFY_OP_LOCK_ACQUIRED:
+ payload = LockAcquiredPayload();
+ break;
+ case NOTIFY_OP_LOCK_RELEASED:
+ payload = LockReleasedPayload();
+ break;
+ default:
+ payload = UnknownPayload();
+ break;
+ }
+
+ apply_visitor(DecodePayloadVisitor(struct_v, iter), payload);
+ DECODE_FINISH(iter);
+}
+
+void NotifyMessage::dump(Formatter *f) const {
+ apply_visitor(DumpPayloadVisitor(f), payload);
+}
+
+void NotifyMessage::generate_test_instances(std::list<NotifyMessage *> &o) {
+ o.push_back(new NotifyMessage(HeartbeatPayload()));
+ o.push_back(new NotifyMessage(LockAcquiredPayload()));
+ o.push_back(new NotifyMessage(LockReleasedPayload()));
+}
+
+std::ostream &operator<<(std::ostream &out, const NotifyOp &op) {
+ switch (op) {
+ case NOTIFY_OP_HEARTBEAT:
+ out << "Heartbeat";
+ break;
+ case NOTIFY_OP_LOCK_ACQUIRED:
+ out << "LockAcquired";
+ break;
+ case NOTIFY_OP_LOCK_RELEASED:
+ out << "LockReleased";
+ break;
+ default:
+ out << "Unknown (" << static_cast<uint32_t>(op) << ")";
+ break;
+ }
+ return out;
+}
+
+} // namespace leader_watcher
+} // namespace mirror
+} // namespace librbd
diff --git a/src/tools/rbd_mirror/leader_watcher/Types.h b/src/tools/rbd_mirror/leader_watcher/Types.h
new file mode 100644
index 000000000..1278e54b7
--- /dev/null
+++ b/src/tools/rbd_mirror/leader_watcher/Types.h
@@ -0,0 +1,117 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_LEADER_WATCHER_TYPES_H
+#define RBD_MIRROR_LEADER_WATCHER_TYPES_H
+
+#include "include/int_types.h"
+#include "include/buffer_fwd.h"
+#include "include/encoding.h"
+#include <string>
+#include <vector>
+#include <boost/variant.hpp>
+
+struct Context;
+
+namespace ceph { class Formatter; }
+
+namespace rbd {
+namespace mirror {
+namespace leader_watcher {
+
+struct Listener {
+ typedef std::vector<std::string> InstanceIds;
+
+ virtual ~Listener() {
+ }
+
+ virtual void post_acquire_handler(Context *on_finish) = 0;
+ virtual void pre_release_handler(Context *on_finish) = 0;
+
+ virtual void update_leader_handler(
+ const std::string &leader_instance_id) = 0;
+
+ virtual void handle_instances_added(const InstanceIds& instance_ids) = 0;
+ virtual void handle_instances_removed(const InstanceIds& instance_ids) = 0;
+};
+
+enum NotifyOp {
+ NOTIFY_OP_HEARTBEAT = 0,
+ NOTIFY_OP_LOCK_ACQUIRED = 1,
+ NOTIFY_OP_LOCK_RELEASED = 2,
+};
+
+struct HeartbeatPayload {
+ static const NotifyOp NOTIFY_OP = NOTIFY_OP_HEARTBEAT;
+
+ HeartbeatPayload() {
+ }
+
+ void encode(bufferlist &bl) const;
+ void decode(__u8 version, bufferlist::const_iterator &iter);
+ void dump(Formatter *f) const;
+};
+
+struct LockAcquiredPayload {
+ static const NotifyOp NOTIFY_OP = NOTIFY_OP_LOCK_ACQUIRED;
+
+ LockAcquiredPayload() {
+ }
+
+ void encode(bufferlist &bl) const;
+ void decode(__u8 version, bufferlist::const_iterator &iter);
+ void dump(Formatter *f) const;
+};
+
+struct LockReleasedPayload {
+ static const NotifyOp NOTIFY_OP = NOTIFY_OP_LOCK_RELEASED;
+
+ LockReleasedPayload() {
+ }
+
+ void encode(bufferlist &bl) const;
+ void decode(__u8 version, bufferlist::const_iterator &iter);
+ void dump(Formatter *f) const;
+};
+
+struct UnknownPayload {
+ static const NotifyOp NOTIFY_OP = static_cast<NotifyOp>(-1);
+
+ UnknownPayload() {
+ }
+
+ void encode(bufferlist &bl) const;
+ void decode(__u8 version, bufferlist::const_iterator &iter);
+ void dump(Formatter *f) const;
+};
+
+typedef boost::variant<HeartbeatPayload,
+ LockAcquiredPayload,
+ LockReleasedPayload,
+ UnknownPayload> Payload;
+
+struct NotifyMessage {
+ NotifyMessage(const Payload &payload = UnknownPayload()) : payload(payload) {
+ }
+
+ Payload payload;
+
+ void encode(bufferlist& bl) const;
+ void decode(bufferlist::const_iterator& it);
+ void dump(Formatter *f) const;
+
+ static void generate_test_instances(std::list<NotifyMessage *> &o);
+};
+
+WRITE_CLASS_ENCODER(NotifyMessage);
+
+std::ostream &operator<<(std::ostream &out, const NotifyOp &op);
+
+} // namespace leader_watcher
+} // namespace mirror
+} // namespace librbd
+
+using rbd::mirror::leader_watcher::encode;
+using rbd::mirror::leader_watcher::decode;
+
+#endif // RBD_MIRROR_LEADER_WATCHER_TYPES_H
diff --git a/src/tools/rbd_mirror/main.cc b/src/tools/rbd_mirror/main.cc
new file mode 100644
index 000000000..85e95e6b6
--- /dev/null
+++ b/src/tools/rbd_mirror/main.cc
@@ -0,0 +1,124 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "common/ceph_argparse.h"
+#include "common/config.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "common/perf_counters.h"
+#include "global/global_init.h"
+#include "global/signal_handler.h"
+#include "Mirror.h"
+#include "Types.h"
+
+#include <vector>
+
+rbd::mirror::Mirror *mirror = nullptr;
+PerfCounters *g_journal_perf_counters = nullptr;
+PerfCounters *g_snapshot_perf_counters = nullptr;
+
+void usage() {
+ std::cout << "usage: rbd-mirror [options...]" << std::endl;
+ std::cout << "options:\n";
+ std::cout << " -m monaddress[:port] connect to specified monitor\n";
+ std::cout << " --keyring=<path> path to keyring for local cluster\n";
+ std::cout << " --log-file=<logfile> file to log debug output\n";
+ std::cout << " --debug-rbd-mirror=<log-level>/<memory-level> set rbd-mirror debug level\n";
+ generic_server_usage();
+}
+
+static void handle_signal(int signum)
+{
+ if (mirror)
+ mirror->handle_signal(signum);
+}
+
+int main(int argc, const char **argv)
+{
+ auto args = argv_to_vec(argc, argv);
+ if (args.empty()) {
+ std::cerr << argv[0] << ": -h or --help for usage" << std::endl;
+ exit(1);
+ }
+ if (ceph_argparse_need_usage(args)) {
+ usage();
+ exit(0);
+ }
+
+ auto cct = global_init(nullptr, args, CEPH_ENTITY_TYPE_CLIENT,
+ CODE_ENVIRONMENT_DAEMON,
+ CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS);
+
+ if (g_conf()->daemonize) {
+ global_init_daemonize(g_ceph_context);
+ }
+
+ common_init_finish(g_ceph_context);
+
+ init_async_signal_handler();
+ register_async_signal_handler(SIGHUP, handle_signal);
+ register_async_signal_handler_oneshot(SIGINT, handle_signal);
+ register_async_signal_handler_oneshot(SIGTERM, handle_signal);
+
+ auto cmd_args = argv_to_vec(argc, argv);
+
+ // disable unnecessary librbd cache
+ g_ceph_context->_conf.set_val_or_die("rbd_cache", "false");
+
+ auto prio =
+ g_ceph_context->_conf.get_val<int64_t>("rbd_mirror_perf_stats_prio");
+ {
+ PerfCountersBuilder plb(g_ceph_context, "rbd_mirror_journal",
+ rbd::mirror::l_rbd_mirror_journal_first,
+ rbd::mirror::l_rbd_mirror_journal_last);
+ plb.add_u64_counter(rbd::mirror::l_rbd_mirror_journal_entries, "entries",
+ "Number of entries replayed", nullptr, prio);
+ plb.add_u64_counter(rbd::mirror::l_rbd_mirror_journal_replay_bytes,
+ "replay_bytes", "Total bytes replayed", nullptr, prio,
+ unit_t(UNIT_BYTES));
+ plb.add_time_avg(rbd::mirror::l_rbd_mirror_journal_replay_latency,
+ "replay_latency", "Replay latency", nullptr, prio);
+ g_journal_perf_counters = plb.create_perf_counters();
+ }
+ {
+ PerfCountersBuilder plb(
+ g_ceph_context, "rbd_mirror_snapshot",
+ rbd::mirror::l_rbd_mirror_snapshot_first,
+ rbd::mirror::l_rbd_mirror_snapshot_remote_timestamp);
+ plb.add_u64_counter(rbd::mirror::l_rbd_mirror_snapshot_snapshots,
+ "snapshots", "Number of snapshots synced", nullptr,
+ prio);
+ plb.add_time_avg(rbd::mirror::l_rbd_mirror_snapshot_sync_time, "sync_time",
+ "Average sync time", nullptr, prio);
+ plb.add_u64_counter(rbd::mirror::l_rbd_mirror_snapshot_sync_bytes,
+ "sync_bytes", "Total bytes synced", nullptr, prio,
+ unit_t(UNIT_BYTES));
+ g_snapshot_perf_counters = plb.create_perf_counters();
+ }
+ g_ceph_context->get_perfcounters_collection()->add(g_journal_perf_counters);
+ g_ceph_context->get_perfcounters_collection()->add(g_snapshot_perf_counters);
+
+ mirror = new rbd::mirror::Mirror(g_ceph_context, cmd_args);
+ int r = mirror->init();
+ if (r < 0) {
+ std::cerr << "failed to initialize: " << cpp_strerror(r) << std::endl;
+ goto cleanup;
+ }
+
+ mirror->run();
+
+ cleanup:
+ unregister_async_signal_handler(SIGHUP, handle_signal);
+ unregister_async_signal_handler(SIGINT, handle_signal);
+ unregister_async_signal_handler(SIGTERM, handle_signal);
+ shutdown_async_signal_handler();
+
+ g_ceph_context->get_perfcounters_collection()->remove(g_journal_perf_counters);
+ g_ceph_context->get_perfcounters_collection()->remove(g_snapshot_perf_counters);
+
+ delete mirror;
+ delete g_journal_perf_counters;
+ delete g_snapshot_perf_counters;
+
+ return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
+}
diff --git a/src/tools/rbd_mirror/pool_watcher/RefreshImagesRequest.cc b/src/tools/rbd_mirror/pool_watcher/RefreshImagesRequest.cc
new file mode 100644
index 000000000..a1d9c1b54
--- /dev/null
+++ b/src/tools/rbd_mirror/pool_watcher/RefreshImagesRequest.cc
@@ -0,0 +1,89 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd_mirror/pool_watcher/RefreshImagesRequest.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "cls/rbd/cls_rbd_client.h"
+#include "librbd/Utils.h"
+#include <map>
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::pool_watcher::RefreshImagesRequest " \
+ << this << " " << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+namespace pool_watcher {
+
+static const uint32_t MAX_RETURN = 1024;
+
+using librbd::util::create_rados_callback;
+
+template <typename I>
+void RefreshImagesRequest<I>::send() {
+ m_image_ids->clear();
+ mirror_image_list();
+}
+
+template <typename I>
+void RefreshImagesRequest<I>::mirror_image_list() {
+ dout(10) << dendl;
+
+ librados::ObjectReadOperation op;
+ librbd::cls_client::mirror_image_list_start(&op, m_start_after, MAX_RETURN);
+
+ m_out_bl.clear();
+ librados::AioCompletion *aio_comp = create_rados_callback<
+ RefreshImagesRequest<I>,
+ &RefreshImagesRequest<I>::handle_mirror_image_list>(this);
+ int r = m_remote_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op, &m_out_bl);
+ ceph_assert(r == 0);
+ aio_comp->release();
+}
+
+template <typename I>
+void RefreshImagesRequest<I>::handle_mirror_image_list(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ std::map<std::string, std::string> ids;
+ if (r == 0) {
+ auto it = m_out_bl.cbegin();
+ r = librbd::cls_client::mirror_image_list_finish(&it, &ids);
+ }
+
+ if (r < 0 && r != -ENOENT) {
+ derr << "failed to list mirrored images: " << cpp_strerror(r) << dendl;
+ finish(r);
+ return;
+ }
+
+ // store as global -> local image ids
+ for (auto &id : ids) {
+ m_image_ids->emplace(id.second, id.first);
+ }
+
+ if (ids.size() == MAX_RETURN) {
+ m_start_after = ids.rbegin()->first;
+ mirror_image_list();
+ return;
+ }
+
+ finish(0);
+}
+
+template <typename I>
+void RefreshImagesRequest<I>::finish(int r) {
+ dout(10) << "r=" << r << dendl;
+
+ m_on_finish->complete(r);
+ delete this;
+}
+
+} // namespace pool_watcher
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::pool_watcher::RefreshImagesRequest<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/pool_watcher/RefreshImagesRequest.h b/src/tools/rbd_mirror/pool_watcher/RefreshImagesRequest.h
new file mode 100644
index 000000000..8bfeabe29
--- /dev/null
+++ b/src/tools/rbd_mirror/pool_watcher/RefreshImagesRequest.h
@@ -0,0 +1,73 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_POOL_WATCHER_REFRESH_IMAGES_REQUEST_H
+#define CEPH_RBD_MIRROR_POOL_WATCHER_REFRESH_IMAGES_REQUEST_H
+
+#include "include/buffer.h"
+#include "include/rados/librados.hpp"
+#include "tools/rbd_mirror/Types.h"
+#include <string>
+
+struct Context;
+
+namespace librbd { struct ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+namespace pool_watcher {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class RefreshImagesRequest {
+public:
+ static RefreshImagesRequest *create(librados::IoCtx &remote_io_ctx,
+ ImageIds *image_ids, Context *on_finish) {
+ return new RefreshImagesRequest(remote_io_ctx, image_ids, on_finish);
+ }
+
+ RefreshImagesRequest(librados::IoCtx &remote_io_ctx, ImageIds *image_ids,
+ Context *on_finish)
+ : m_remote_io_ctx(remote_io_ctx), m_image_ids(image_ids),
+ m_on_finish(on_finish) {
+ }
+
+ void send();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * | /-------------\
+ * | | |
+ * v v | (more images)
+ * MIRROR_IMAGE_LIST ---/
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+
+ librados::IoCtx &m_remote_io_ctx;
+ ImageIds *m_image_ids;
+ Context *m_on_finish;
+
+ bufferlist m_out_bl;
+ std::string m_start_after;
+
+ void mirror_image_list();
+ void handle_mirror_image_list(int r);
+
+ void finish(int r);
+
+};
+
+} // namespace pool_watcher
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::pool_watcher::RefreshImagesRequest<librbd::ImageCtx>;
+
+#endif // CEPH_RBD_MIRROR_POOL_WATCHER_REFRESH_IMAGES_REQUEST_H
diff --git a/src/tools/rbd_mirror/pool_watcher/Types.h b/src/tools/rbd_mirror/pool_watcher/Types.h
new file mode 100644
index 000000000..52dfc342d
--- /dev/null
+++ b/src/tools/rbd_mirror/pool_watcher/Types.h
@@ -0,0 +1,27 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_POOL_WATCHER_TYPES_H
+#define CEPH_RBD_MIRROR_POOL_WATCHER_TYPES_H
+
+#include "tools/rbd_mirror/Types.h"
+#include <string>
+
+namespace rbd {
+namespace mirror {
+namespace pool_watcher {
+
+struct Listener {
+ virtual ~Listener() {
+ }
+
+ virtual void handle_update(const std::string &mirror_uuid,
+ ImageIds &&added_image_ids,
+ ImageIds &&removed_image_ids) = 0;
+};
+
+} // namespace pool_watcher
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_POOL_WATCHER_TYPES_H
diff --git a/src/tools/rbd_mirror/service_daemon/Types.cc b/src/tools/rbd_mirror/service_daemon/Types.cc
new file mode 100644
index 000000000..7dc6537c5
--- /dev/null
+++ b/src/tools/rbd_mirror/service_daemon/Types.cc
@@ -0,0 +1,29 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd_mirror/service_daemon/Types.h"
+#include <iostream>
+
+namespace rbd {
+namespace mirror {
+namespace service_daemon {
+
+std::ostream& operator<<(std::ostream& os, const CalloutLevel& callout_level) {
+ switch (callout_level) {
+ case CALLOUT_LEVEL_INFO:
+ os << "info";
+ break;
+ case CALLOUT_LEVEL_WARNING:
+ os << "warning";
+ break;
+ case CALLOUT_LEVEL_ERROR:
+ os << "error";
+ break;
+ }
+ return os;
+}
+
+} // namespace service_daemon
+} // namespace mirror
+} // namespace rbd
+
diff --git a/src/tools/rbd_mirror/service_daemon/Types.h b/src/tools/rbd_mirror/service_daemon/Types.h
new file mode 100644
index 000000000..3aab72016
--- /dev/null
+++ b/src/tools/rbd_mirror/service_daemon/Types.h
@@ -0,0 +1,33 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_MIRROR_SERVICE_DAEMON_TYPES_H
+#define CEPH_RBD_MIRROR_SERVICE_DAEMON_TYPES_H
+
+#include "include/int_types.h"
+#include <iosfwd>
+#include <string>
+#include <boost/variant.hpp>
+
+namespace rbd {
+namespace mirror {
+namespace service_daemon {
+
+typedef uint64_t CalloutId;
+const uint64_t CALLOUT_ID_NONE {0};
+
+enum CalloutLevel {
+ CALLOUT_LEVEL_INFO,
+ CALLOUT_LEVEL_WARNING,
+ CALLOUT_LEVEL_ERROR
+};
+
+std::ostream& operator<<(std::ostream& os, const CalloutLevel& callout_level);
+
+typedef boost::variant<bool, uint64_t, std::string> AttributeValue;
+
+} // namespace service_daemon
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_SERVICE_DAEMON_TYPES_H
diff --git a/src/tools/rbd_nbd/CMakeLists.txt b/src/tools/rbd_nbd/CMakeLists.txt
new file mode 100644
index 000000000..da758f514
--- /dev/null
+++ b/src/tools/rbd_nbd/CMakeLists.txt
@@ -0,0 +1,4 @@
+find_package(nl REQUIRED genl)
+add_executable(rbd-nbd rbd-nbd.cc)
+target_link_libraries(rbd-nbd librbd librados global nl::genl)
+install(TARGETS rbd-nbd DESTINATION bin)
diff --git a/src/tools/rbd_nbd/nbd-netlink.h b/src/tools/rbd_nbd/nbd-netlink.h
new file mode 100644
index 000000000..2d0b90964
--- /dev/null
+++ b/src/tools/rbd_nbd/nbd-netlink.h
@@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright (C) 2017 Facebook. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+#ifndef _UAPILINUX_NBD_NETLINK_H
+#define _UAPILINUX_NBD_NETLINK_H
+
+#define NBD_GENL_FAMILY_NAME "nbd"
+#define NBD_GENL_VERSION 0x1
+#define NBD_GENL_MCAST_GROUP_NAME "nbd_mc_group"
+
+/* Configuration policy attributes, used for CONNECT */
+enum {
+ NBD_ATTR_UNSPEC,
+ NBD_ATTR_INDEX,
+ NBD_ATTR_SIZE_BYTES,
+ NBD_ATTR_BLOCK_SIZE_BYTES,
+ NBD_ATTR_TIMEOUT,
+ NBD_ATTR_SERVER_FLAGS,
+ NBD_ATTR_CLIENT_FLAGS,
+ NBD_ATTR_SOCKETS,
+ NBD_ATTR_DEAD_CONN_TIMEOUT,
+ NBD_ATTR_DEVICE_LIST,
+ NBD_ATTR_BACKEND_IDENTIFIER,
+ __NBD_ATTR_MAX,
+};
+#define NBD_ATTR_MAX (__NBD_ATTR_MAX - 1)
+
+/*
+ * This is the format for multiple devices with NBD_ATTR_DEVICE_LIST
+ *
+ * [NBD_ATTR_DEVICE_LIST]
+ * [NBD_DEVICE_ITEM]
+ * [NBD_DEVICE_INDEX]
+ * [NBD_DEVICE_CONNECTED]
+ */
+enum {
+ NBD_DEVICE_ITEM_UNSPEC,
+ NBD_DEVICE_ITEM,
+ __NBD_DEVICE_ITEM_MAX,
+};
+#define NBD_DEVICE_ITEM_MAX (__NBD_DEVICE_ITEM_MAX - 1)
+
+enum {
+ NBD_DEVICE_UNSPEC,
+ NBD_DEVICE_INDEX,
+ NBD_DEVICE_CONNECTED,
+ __NBD_DEVICE_MAX,
+};
+#define NBD_DEVICE_ATTR_MAX (__NBD_DEVICE_MAX - 1)
+
+/*
+ * This is the format for multiple sockets with NBD_ATTR_SOCKETS
+ *
+ * [NBD_ATTR_SOCKETS]
+ * [NBD_SOCK_ITEM]
+ * [NBD_SOCK_FD]
+ * [NBD_SOCK_ITEM]
+ * [NBD_SOCK_FD]
+ */
+enum {
+ NBD_SOCK_ITEM_UNSPEC,
+ NBD_SOCK_ITEM,
+ __NBD_SOCK_ITEM_MAX,
+};
+#define NBD_SOCK_ITEM_MAX (__NBD_SOCK_ITEM_MAX - 1)
+
+enum {
+ NBD_SOCK_UNSPEC,
+ NBD_SOCK_FD,
+ __NBD_SOCK_MAX,
+};
+#define NBD_SOCK_MAX (__NBD_SOCK_MAX - 1)
+
+enum {
+ NBD_CMD_UNSPEC,
+ NBD_CMD_CONNECT,
+ NBD_CMD_DISCONNECT,
+ NBD_CMD_RECONFIGURE,
+ NBD_CMD_LINK_DEAD,
+ NBD_CMD_STATUS,
+ __NBD_CMD_MAX,
+};
+#define NBD_CMD_MAX (__NBD_CMD_MAX - 1)
+
+#endif /* _UAPILINUX_NBD_NETLINK_H */
diff --git a/src/tools/rbd_nbd/rbd-nbd.cc b/src/tools/rbd_nbd/rbd-nbd.cc
new file mode 100644
index 000000000..e348bd8fe
--- /dev/null
+++ b/src/tools/rbd_nbd/rbd-nbd.cc
@@ -0,0 +1,2441 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * rbd-nbd - RBD in userspace
+ *
+ * Copyright (C) 2015 - 2016 Kylin Corporation
+ *
+ * Author: Yunchuan Wen <yunchuan.wen@kylin-cloud.com>
+ * Li Wang <li.wang@kylin-cloud.com>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+*/
+
+#include "acconfig.h"
+#include "include/int_types.h"
+#include "include/scope_guard.h"
+
+#include <boost/endian/conversion.hpp>
+
+#include <libgen.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <string.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <linux/nbd.h>
+#include <linux/fs.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/syscall.h>
+
+#include "nbd-netlink.h"
+#include <libnl3/netlink/genl/genl.h>
+#include <libnl3/netlink/genl/ctrl.h>
+#include <libnl3/netlink/genl/mngt.h>
+
+#include <filesystem>
+#include <fstream>
+#include <iostream>
+#include <memory>
+#include <regex>
+#include <boost/algorithm/string/predicate.hpp>
+#include <boost/lexical_cast.hpp>
+
+#include "common/Formatter.h"
+#include "common/Preforker.h"
+#include "common/SubProcess.h"
+#include "common/TextTable.h"
+#include "common/ceph_argparse.h"
+#include "common/config.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "common/event_socket.h"
+#include "common/module.h"
+#include "common/safe_io.h"
+#include "common/version.h"
+
+#include "global/global_init.h"
+#include "global/signal_handler.h"
+
+#include "include/rados/librados.hpp"
+#include "include/rbd/librbd.hpp"
+#include "include/stringify.h"
+#include "include/xlist.h"
+
+#include "mon/MonClient.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd-nbd: "
+
+using namespace std;
+namespace fs = std::filesystem;
+
+using boost::endian::big_to_native;
+using boost::endian::native_to_big;
+
+enum Command {
+ None,
+ Map,
+ Unmap,
+ Attach,
+ Detach,
+ List
+};
+
+struct Config {
+ int nbds_max = 0;
+ int max_part = 255;
+ int io_timeout = -1;
+ int reattach_timeout = 30;
+
+ bool exclusive = false;
+ bool notrim = false;
+ bool quiesce = false;
+ bool readonly = false;
+ bool set_max_part = false;
+ bool try_netlink = false;
+ bool show_cookie = false;
+
+ std::string poolname;
+ std::string nsname;
+ std::string imgname;
+ std::string snapname;
+ std::string devpath;
+ std::string quiesce_hook = CMAKE_INSTALL_LIBEXECDIR "/rbd-nbd/rbd-nbd_quiesce";
+
+ std::string format;
+ bool pretty_format = false;
+
+ std::vector<librbd::encryption_format_t> encryption_formats;
+ std::vector<std::string> encryption_passphrase_files;
+
+ Command command = None;
+ int pid = 0;
+ std::string cookie;
+ uint64_t snapid = CEPH_NOSNAP;
+
+ std::string image_spec() const {
+ std::string spec = poolname + "/";
+
+ if (!nsname.empty()) {
+ spec += nsname + "/";
+ }
+ spec += imgname;
+
+ if (!snapname.empty()) {
+ spec += "@" + snapname;
+ }
+
+ return spec;
+ }
+};
+
+static void usage()
+{
+ std::cout << "Usage: rbd-nbd [options] map <image-or-snap-spec> Map image to nbd device\n"
+ << " detach <device|image-or-snap-spec> Detach image from nbd device\n"
+ << " [options] attach <image-or-snap-spec> Attach image to nbd device\n"
+ << " unmap <device|image-or-snap-spec> Unmap nbd device\n"
+ << " [options] list-mapped List mapped nbd devices\n"
+ << "Map and attach options:\n"
+ << " --device <device path> Specify nbd device path (/dev/nbd{num})\n"
+ << " --encryption-format luks|luks1|luks2\n"
+ << " Image encryption format (default: luks)\n"
+ << " --encryption-passphrase-file Path of file containing passphrase for unlocking image encryption\n"
+ << " --exclusive Forbid writes by other clients\n"
+ << " --notrim Turn off trim/discard\n"
+ << " --io-timeout <sec> Set nbd IO timeout\n"
+ << " --max_part <limit> Override for module param max_part\n"
+ << " --nbds_max <limit> Override for module param nbds_max\n"
+ << " --quiesce Use quiesce callbacks\n"
+ << " --quiesce-hook <path> Specify quiesce hook path\n"
+ << " (default: " << Config().quiesce_hook << ")\n"
+ << " --read-only Map read-only\n"
+ << " --reattach-timeout <sec> Set nbd re-attach timeout\n"
+ << " (default: " << Config().reattach_timeout << ")\n"
+ << " --try-netlink Use the nbd netlink interface\n"
+ << " --show-cookie Show device cookie\n"
+ << " --cookie Specify device cookie\n"
+ << " --snap-id <snap-id> Specify snapshot by ID instead of by name\n"
+ << "\n"
+ << "Unmap and detach options:\n"
+ << " --device <device path> Specify nbd device path (/dev/nbd{num})\n"
+ << " --snap-id <snap-id> Specify snapshot by ID instead of by name\n"
+ << "\n"
+ << "List options:\n"
+ << " --format plain|json|xml Output format (default: plain)\n"
+ << " --pretty-format Pretty formatting (json and xml)\n"
+ << std::endl;
+ generic_server_usage();
+}
+
+static int nbd = -1;
+static int nbd_index = -1;
+static EventSocket terminate_event_sock;
+
+#define RBD_NBD_BLKSIZE 512UL
+
+#define HELP_INFO 1
+#define VERSION_INFO 2
+
+static int parse_args(vector<const char*>& args, std::ostream *err_msg,
+ Config *cfg);
+static int netlink_disconnect(int index);
+static int netlink_resize(int nbd_index, uint64_t size);
+
+static int run_quiesce_hook(const std::string &quiesce_hook,
+ const std::string &devpath,
+ const std::string &command);
+
+static std::string get_cookie(const std::string &devpath);
+
+class NBDServer
+{
+public:
+ uint64_t quiesce_watch_handle = 0;
+
+private:
+ int fd;
+ librbd::Image &image;
+ Config *cfg;
+
+public:
+ NBDServer(int fd, librbd::Image& image, Config *cfg)
+ : fd(fd)
+ , image(image)
+ , cfg(cfg)
+ , reader_thread(*this, &NBDServer::reader_entry)
+ , writer_thread(*this, &NBDServer::writer_entry)
+ , quiesce_thread(*this, &NBDServer::quiesce_entry)
+ {
+ std::vector<librbd::config_option_t> options;
+ image.config_list(&options);
+ for (auto &option : options) {
+ if ((option.name == std::string("rbd_cache") ||
+ option.name == std::string("rbd_cache_writethrough_until_flush")) &&
+ option.value == "false") {
+ allow_internal_flush = true;
+ break;
+ }
+ }
+ }
+
+ Config *get_cfg() const {
+ return cfg;
+ }
+
+private:
+ int terminate_event_fd = -1;
+ ceph::mutex disconnect_lock =
+ ceph::make_mutex("NBDServer::DisconnectLocker");
+ ceph::condition_variable disconnect_cond;
+ std::atomic<bool> terminated = { false };
+ std::atomic<bool> allow_internal_flush = { false };
+
+ struct IOContext
+ {
+ xlist<IOContext*>::item item;
+ NBDServer *server = nullptr;
+ struct nbd_request request;
+ struct nbd_reply reply;
+ bufferlist data;
+ int command = 0;
+
+ IOContext()
+ : item(this)
+ {}
+ };
+
+ friend std::ostream &operator<<(std::ostream &os, const IOContext &ctx);
+
+ ceph::mutex lock = ceph::make_mutex("NBDServer::Locker");
+ ceph::condition_variable cond;
+ xlist<IOContext*> io_pending;
+ xlist<IOContext*> io_finished;
+
+ void io_start(IOContext *ctx)
+ {
+ std::lock_guard l{lock};
+ io_pending.push_back(&ctx->item);
+ }
+
+ void io_finish(IOContext *ctx)
+ {
+ std::lock_guard l{lock};
+ ceph_assert(ctx->item.is_on_list());
+ ctx->item.remove_myself();
+ io_finished.push_back(&ctx->item);
+ cond.notify_all();
+ }
+
+ IOContext *wait_io_finish()
+ {
+ std::unique_lock l{lock};
+ cond.wait(l, [this] {
+ return !io_finished.empty() ||
+ (io_pending.empty() && terminated);
+ });
+
+ if (io_finished.empty())
+ return NULL;
+
+ IOContext *ret = io_finished.front();
+ io_finished.pop_front();
+
+ return ret;
+ }
+
+ void wait_clean()
+ {
+ std::unique_lock l{lock};
+ cond.wait(l, [this] { return io_pending.empty(); });
+
+ while(!io_finished.empty()) {
+ std::unique_ptr<IOContext> free_ctx(io_finished.front());
+ io_finished.pop_front();
+ }
+ }
+
+ void assert_clean()
+ {
+ std::unique_lock l{lock};
+
+ ceph_assert(!reader_thread.is_started());
+ ceph_assert(!writer_thread.is_started());
+ ceph_assert(io_pending.empty());
+ ceph_assert(io_finished.empty());
+ }
+
+ static void aio_callback(librbd::completion_t cb, void *arg)
+ {
+ librbd::RBD::AioCompletion *aio_completion =
+ reinterpret_cast<librbd::RBD::AioCompletion*>(cb);
+
+ IOContext *ctx = reinterpret_cast<IOContext *>(arg);
+ int ret = aio_completion->get_return_value();
+
+ dout(20) << __func__ << ": " << *ctx << dendl;
+
+ if (ret == -EINVAL) {
+ // if shrinking an image, a pagecache writeback might reference
+ // extents outside of the range of the new image extents
+ dout(0) << __func__ << ": masking IO out-of-bounds error" << dendl;
+ ctx->data.clear();
+ ret = 0;
+ }
+
+ if (ret < 0) {
+ ctx->reply.error = native_to_big<uint32_t>(-ret);
+ } else if ((ctx->command == NBD_CMD_READ) &&
+ ret < static_cast<int>(ctx->request.len)) {
+ int pad_byte_count = static_cast<int> (ctx->request.len) - ret;
+ ctx->data.append_zero(pad_byte_count);
+ dout(20) << __func__ << ": " << *ctx << ": Pad byte count: "
+ << pad_byte_count << dendl;
+ ctx->reply.error = native_to_big<uint32_t>(0);
+ } else {
+ ctx->reply.error = native_to_big<uint32_t>(0);
+ }
+ ctx->server->io_finish(ctx);
+
+ aio_completion->release();
+ }
+
+ void reader_entry()
+ {
+ struct pollfd poll_fds[2];
+ memset(poll_fds, 0, sizeof(struct pollfd) * 2);
+ poll_fds[0].fd = fd;
+ poll_fds[0].events = POLLIN;
+ poll_fds[1].fd = terminate_event_fd;
+ poll_fds[1].events = POLLIN;
+
+ while (true) {
+ std::unique_ptr<IOContext> ctx(new IOContext());
+ ctx->server = this;
+
+ dout(20) << __func__ << ": waiting for nbd request" << dendl;
+
+ int r = poll(poll_fds, 2, -1);
+ if (r == -1) {
+ if (errno == EINTR) {
+ continue;
+ }
+ r = -errno;
+ derr << "failed to poll nbd: " << cpp_strerror(r) << dendl;
+ goto error;
+ }
+
+ if ((poll_fds[1].revents & POLLIN) != 0) {
+ dout(0) << __func__ << ": terminate received" << dendl;
+ goto signal;
+ }
+
+ if ((poll_fds[0].revents & POLLIN) == 0) {
+ dout(20) << __func__ << ": nothing to read" << dendl;
+ continue;
+ }
+
+ r = safe_read_exact(fd, &ctx->request, sizeof(struct nbd_request));
+ if (r < 0) {
+ derr << "failed to read nbd request header: " << cpp_strerror(r)
+ << dendl;
+ goto error;
+ }
+
+ if (ctx->request.magic != htonl(NBD_REQUEST_MAGIC)) {
+ derr << "invalid nbd request header" << dendl;
+ goto signal;
+ }
+
+ ctx->request.from = big_to_native(ctx->request.from);
+ ctx->request.type = big_to_native(ctx->request.type);
+ ctx->request.len = big_to_native(ctx->request.len);
+
+ ctx->reply.magic = native_to_big<uint32_t>(NBD_REPLY_MAGIC);
+ memcpy(ctx->reply.handle, ctx->request.handle, sizeof(ctx->reply.handle));
+
+ ctx->command = ctx->request.type & 0x0000ffff;
+
+ dout(20) << *ctx << ": start" << dendl;
+
+ switch (ctx->command)
+ {
+ case NBD_CMD_DISC:
+ // NBD_DO_IT will return when pipe is closed
+ dout(0) << "disconnect request received" << dendl;
+ goto signal;
+ case NBD_CMD_WRITE:
+ bufferptr ptr(ctx->request.len);
+ r = safe_read_exact(fd, ptr.c_str(), ctx->request.len);
+ if (r < 0) {
+ derr << *ctx << ": failed to read nbd request data: "
+ << cpp_strerror(r) << dendl;
+ goto error;
+ }
+ ctx->data.push_back(ptr);
+ break;
+ }
+
+ IOContext *pctx = ctx.release();
+ io_start(pctx);
+ librbd::RBD::AioCompletion *c = new librbd::RBD::AioCompletion(pctx, aio_callback);
+ switch (pctx->command)
+ {
+ case NBD_CMD_WRITE:
+ image.aio_write(pctx->request.from, pctx->request.len, pctx->data, c);
+ break;
+ case NBD_CMD_READ:
+ image.aio_read(pctx->request.from, pctx->request.len, pctx->data, c);
+ break;
+ case NBD_CMD_FLUSH:
+ image.aio_flush(c);
+ allow_internal_flush = true;
+ break;
+ case NBD_CMD_TRIM:
+ image.aio_discard(pctx->request.from, pctx->request.len, c);
+ break;
+ default:
+ derr << *pctx << ": invalid request command" << dendl;
+ c->release();
+ goto signal;
+ }
+ }
+error:
+ {
+ int r = netlink_disconnect(nbd_index);
+ if (r == 1) {
+ ioctl(nbd, NBD_DISCONNECT);
+ }
+ }
+signal:
+ std::lock_guard l{lock};
+ terminated = true;
+ cond.notify_all();
+
+ std::lock_guard disconnect_l{disconnect_lock};
+ disconnect_cond.notify_all();
+
+ dout(20) << __func__ << ": terminated" << dendl;
+ }
+
+ void writer_entry()
+ {
+ while (true) {
+ dout(20) << __func__ << ": waiting for io request" << dendl;
+ std::unique_ptr<IOContext> ctx(wait_io_finish());
+ if (!ctx) {
+ dout(20) << __func__ << ": no io requests, terminating" << dendl;
+ goto done;
+ }
+
+ dout(20) << __func__ << ": got: " << *ctx << dendl;
+
+ int r = safe_write(fd, &ctx->reply, sizeof(struct nbd_reply));
+ if (r < 0) {
+ derr << *ctx << ": failed to write reply header: " << cpp_strerror(r)
+ << dendl;
+ goto error;
+ }
+ if (ctx->command == NBD_CMD_READ && ctx->reply.error == htonl(0)) {
+ r = ctx->data.write_fd(fd);
+ if (r < 0) {
+ derr << *ctx << ": failed to write replay data: " << cpp_strerror(r)
+ << dendl;
+ goto error;
+ }
+ }
+ dout(20) << *ctx << ": finish" << dendl;
+ }
+ error:
+ wait_clean();
+ done:
+ ::shutdown(fd, SHUT_RDWR);
+
+ dout(20) << __func__ << ": terminated" << dendl;
+ }
+
+ bool wait_quiesce() {
+ dout(20) << __func__ << dendl;
+
+ std::unique_lock locker{lock};
+ cond.wait(locker, [this] { return quiesce || terminated; });
+
+ if (terminated) {
+ return false;
+ }
+
+ dout(20) << __func__ << ": got quiesce request" << dendl;
+ return true;
+ }
+
+ void wait_unquiesce(std::unique_lock<ceph::mutex> &locker) {
+ dout(20) << __func__ << dendl;
+
+ cond.wait(locker, [this] { return !quiesce || terminated; });
+
+ dout(20) << __func__ << ": got unquiesce request" << dendl;
+ }
+
+ void wait_inflight_io() {
+ if (!allow_internal_flush) {
+ return;
+ }
+
+ uint64_t features = 0;
+ image.features(&features);
+ if ((features & RBD_FEATURE_EXCLUSIVE_LOCK) != 0) {
+ bool is_owner = false;
+ image.is_exclusive_lock_owner(&is_owner);
+ if (!is_owner) {
+ return;
+ }
+ }
+
+ dout(20) << __func__ << dendl;
+
+ int r = image.flush();
+ if (r < 0) {
+ derr << "flush failed: " << cpp_strerror(r) << dendl;
+ }
+ }
+
+ void quiesce_entry()
+ {
+ ceph_assert(cfg->quiesce);
+
+ while (wait_quiesce()) {
+
+ int r = run_quiesce_hook(cfg->quiesce_hook, cfg->devpath, "quiesce");
+
+ wait_inflight_io();
+
+ {
+ std::unique_lock locker{lock};
+ ceph_assert(quiesce == true);
+
+ image.quiesce_complete(quiesce_watch_handle, r);
+
+ if (r < 0) {
+ quiesce = false;
+ continue;
+ }
+
+ wait_unquiesce(locker);
+ }
+
+ run_quiesce_hook(cfg->quiesce_hook, cfg->devpath, "unquiesce");
+ }
+
+ dout(20) << __func__ << ": terminated" << dendl;
+ }
+
+ class ThreadHelper : public Thread
+ {
+ public:
+ typedef void (NBDServer::*entry_func)();
+ private:
+ NBDServer &server;
+ entry_func func;
+ public:
+ ThreadHelper(NBDServer &_server, entry_func _func)
+ :server(_server)
+ ,func(_func)
+ {}
+ protected:
+ void* entry() override
+ {
+ (server.*func)();
+ return NULL;
+ }
+ } reader_thread, writer_thread, quiesce_thread;
+
+ bool started = false;
+ bool quiesce = false;
+
+public:
+ void start()
+ {
+ if (!started) {
+ dout(10) << __func__ << ": starting" << dendl;
+
+ started = true;
+
+ terminate_event_fd = eventfd(0, EFD_NONBLOCK);
+ ceph_assert(terminate_event_fd > 0);
+ int r = terminate_event_sock.init(terminate_event_fd,
+ EVENT_SOCKET_TYPE_EVENTFD);
+ ceph_assert(r >= 0);
+
+ reader_thread.create("rbd_reader");
+ writer_thread.create("rbd_writer");
+ if (cfg->quiesce) {
+ quiesce_thread.create("rbd_quiesce");
+ }
+ }
+ }
+
+ void wait_for_disconnect()
+ {
+ if (!started)
+ return;
+
+ std::unique_lock l{disconnect_lock};
+ disconnect_cond.wait(l);
+ }
+
+ void notify_quiesce() {
+ dout(10) << __func__ << dendl;
+
+ ceph_assert(cfg->quiesce);
+
+ std::unique_lock locker{lock};
+ ceph_assert(quiesce == false);
+ quiesce = true;
+ cond.notify_all();
+ }
+
+ void notify_unquiesce() {
+ dout(10) << __func__ << dendl;
+
+ ceph_assert(cfg->quiesce);
+
+ std::unique_lock locker{lock};
+ ceph_assert(quiesce == true);
+ quiesce = false;
+ cond.notify_all();
+ }
+
+ ~NBDServer()
+ {
+ if (started) {
+ dout(10) << __func__ << ": terminating" << dendl;
+
+ terminate_event_sock.notify();
+
+ reader_thread.join();
+ writer_thread.join();
+ if (cfg->quiesce) {
+ quiesce_thread.join();
+ }
+
+ assert_clean();
+
+ close(terminate_event_fd);
+ started = false;
+ }
+ }
+};
+
+std::ostream &operator<<(std::ostream &os, const NBDServer::IOContext &ctx) {
+
+ os << "[" << std::hex << big_to_native(*((uint64_t *)ctx.request.handle));
+
+ switch (ctx.command)
+ {
+ case NBD_CMD_WRITE:
+ os << " WRITE ";
+ break;
+ case NBD_CMD_READ:
+ os << " READ ";
+ break;
+ case NBD_CMD_FLUSH:
+ os << " FLUSH ";
+ break;
+ case NBD_CMD_TRIM:
+ os << " TRIM ";
+ break;
+ case NBD_CMD_DISC:
+ os << " DISC ";
+ break;
+ default:
+ os << " UNKNOWN(" << ctx.command << ") ";
+ break;
+ }
+
+ os << ctx.request.from << "~" << ctx.request.len << " "
+ << std::dec << big_to_native(ctx.reply.error) << "]";
+
+ return os;
+}
+
+class NBDQuiesceWatchCtx : public librbd::QuiesceWatchCtx
+{
+public:
+ NBDQuiesceWatchCtx(NBDServer *server) : server(server) {
+ }
+
+ void handle_quiesce() override {
+ server->notify_quiesce();
+ }
+
+ void handle_unquiesce() override {
+ server->notify_unquiesce();
+ }
+
+private:
+ NBDServer *server;
+};
+
+class NBDWatchCtx : public librbd::UpdateWatchCtx
+{
+private:
+ int fd;
+ int nbd_index;
+ bool use_netlink;
+ librados::IoCtx &io_ctx;
+ librbd::Image &image;
+ uint64_t size;
+ std::thread handle_notify_thread;
+ ceph::condition_variable cond;
+ ceph::mutex lock = ceph::make_mutex("NBDWatchCtx::Locker");
+ bool notify = false;
+ bool terminated = false;
+
+ bool wait_notify() {
+ dout(10) << __func__ << dendl;
+
+ std::unique_lock locker{lock};
+ cond.wait(locker, [this] { return notify || terminated; });
+
+ if (terminated) {
+ return false;
+ }
+
+ dout(10) << __func__ << ": got notify request" << dendl;
+ notify = false;
+ return true;
+ }
+
+ void handle_notify_entry() {
+ dout(10) << __func__ << dendl;
+
+ while (wait_notify()) {
+ uint64_t new_size;
+ int ret = image.size(&new_size);
+ if (ret < 0) {
+ derr << "getting image size failed: " << cpp_strerror(ret) << dendl;
+ continue;
+ }
+ if (new_size == size) {
+ continue;
+ }
+ dout(5) << "resize detected" << dendl;
+ if (ioctl(fd, BLKFLSBUF, NULL) < 0) {
+ derr << "invalidate page cache failed: " << cpp_strerror(errno)
+ << dendl;
+ }
+ if (use_netlink) {
+ ret = netlink_resize(nbd_index, new_size);
+ } else {
+ ret = ioctl(fd, NBD_SET_SIZE, new_size);
+ if (ret < 0) {
+ derr << "resize failed: " << cpp_strerror(errno) << dendl;
+ }
+ }
+ if (!ret) {
+ size = new_size;
+ }
+ if (ioctl(fd, BLKRRPART, NULL) < 0) {
+ derr << "rescan of partition table failed: " << cpp_strerror(errno)
+ << dendl;
+ }
+ if (image.invalidate_cache() < 0) {
+ derr << "invalidate rbd cache failed" << dendl;
+ }
+ }
+ }
+
+public:
+ NBDWatchCtx(int _fd,
+ int _nbd_index,
+ bool _use_netlink,
+ librados::IoCtx &_io_ctx,
+ librbd::Image &_image,
+ unsigned long _size)
+ : fd(_fd)
+ , nbd_index(_nbd_index)
+ , use_netlink(_use_netlink)
+ , io_ctx(_io_ctx)
+ , image(_image)
+ , size(_size)
+ {
+ handle_notify_thread = make_named_thread("rbd_handle_notify",
+ &NBDWatchCtx::handle_notify_entry,
+ this);
+ }
+
+ ~NBDWatchCtx() override
+ {
+ dout(10) << __func__ << ": terminating" << dendl;
+ std::unique_lock locker{lock};
+ terminated = true;
+ cond.notify_all();
+ locker.unlock();
+
+ handle_notify_thread.join();
+ dout(10) << __func__ << ": finish" << dendl;
+ }
+
+ void handle_notify() override
+ {
+ dout(10) << __func__ << dendl;
+
+ std::unique_lock locker{lock};
+ notify = true;
+ cond.notify_all();
+ }
+};
+
+class NBDListIterator {
+public:
+ bool get(Config *cfg) {
+ while (true) {
+ std::string nbd_path = "/sys/block/nbd" + stringify(m_index);
+ if(access(nbd_path.c_str(), F_OK) != 0) {
+ return false;
+ }
+
+ *cfg = Config();
+ cfg->devpath = "/dev/nbd" + stringify(m_index++);
+
+ int pid;
+ std::ifstream ifs;
+ ifs.open(nbd_path + "/pid", std::ifstream::in);
+ if (!ifs.is_open()) {
+ continue;
+ }
+ ifs >> pid;
+ ifs.close();
+
+ // If the rbd-nbd is re-attached the pid may store garbage
+ // here. We are sure this is the case when it is negative or
+ // zero. Then we just try to find the attached process scanning
+ // /proc fs. If it is positive we check the process with this
+ // pid first and if it is not rbd-nbd fallback to searching the
+ // attached process.
+ do {
+ if (pid <= 0) {
+ pid = find_attached(cfg->devpath);
+ if (pid <= 0) {
+ break;
+ }
+ }
+
+ if (get_mapped_info(pid, cfg) >= 0) {
+ return true;
+ }
+ pid = -1;
+ } while (true);
+ }
+ }
+
+private:
+ int m_index = 0;
+ std::map<int, Config> m_mapped_info_cache;
+
+ int get_mapped_info(int pid, Config *cfg) {
+ ceph_assert(!cfg->devpath.empty());
+
+ auto it = m_mapped_info_cache.find(pid);
+ if (it != m_mapped_info_cache.end()) {
+ if (it->second.devpath != cfg->devpath) {
+ return -EINVAL;
+ }
+ *cfg = it->second;
+ return 0;
+ }
+
+ m_mapped_info_cache[pid] = {};
+
+ int r;
+ std::string path = "/proc/" + stringify(pid) + "/comm";
+ std::ifstream ifs;
+ std::string comm;
+ ifs.open(path.c_str(), std::ifstream::in);
+ if (!ifs.is_open())
+ return -1;
+ ifs >> comm;
+ if (comm != "rbd-nbd") {
+ return -EINVAL;
+ }
+ ifs.close();
+
+ path = "/proc/" + stringify(pid) + "/cmdline";
+ std::string cmdline;
+ std::vector<const char*> args;
+
+ ifs.open(path.c_str(), std::ifstream::in);
+ if (!ifs.is_open())
+ return -1;
+ ifs >> cmdline;
+
+ if (cmdline.empty()) {
+ return -EINVAL;
+ }
+
+ for (unsigned i = 0; i < cmdline.size(); i++) {
+ char *arg = &cmdline[i];
+ if (i == 0) {
+ if (strcmp(basename(arg) , "rbd-nbd") != 0) {
+ return -EINVAL;
+ }
+ } else {
+ args.push_back(arg);
+ }
+
+ while (cmdline[i] != '\0') {
+ i++;
+ }
+ }
+
+ std::ostringstream err_msg;
+ Config c;
+ r = parse_args(args, &err_msg, &c);
+ if (r < 0) {
+ return r;
+ }
+
+ if (c.command != Map && c.command != Attach) {
+ return -ENOENT;
+ }
+
+ c.pid = pid;
+ m_mapped_info_cache.erase(pid);
+ if (!c.devpath.empty()) {
+ m_mapped_info_cache[pid] = c;
+ if (c.devpath != cfg->devpath) {
+ return -ENOENT;
+ }
+ } else {
+ c.devpath = cfg->devpath;
+ }
+
+ c.cookie = get_cookie(cfg->devpath);
+ *cfg = c;
+ return 0;
+ }
+
+ int find_attached(const std::string &devpath) {
+ for (auto &entry : fs::directory_iterator("/proc")) {
+ if (!fs::is_directory(entry.status())) {
+ continue;
+ }
+
+ int pid;
+ try {
+ pid = boost::lexical_cast<uint64_t>(entry.path().filename().c_str());
+ } catch (boost::bad_lexical_cast&) {
+ continue;
+ }
+
+ Config cfg;
+ cfg.devpath = devpath;
+ if (get_mapped_info(pid, &cfg) >=0 && cfg.command == Attach) {
+ return cfg.pid;
+ }
+ }
+
+ return -1;
+ }
+};
+
+struct EncryptionOptions {
+ std::vector<librbd::encryption_spec_t> specs;
+
+ ~EncryptionOptions() {
+ for (auto& spec : specs) {
+ switch (spec.format) {
+ case RBD_ENCRYPTION_FORMAT_LUKS: {
+ auto opts =
+ static_cast<librbd::encryption_luks_format_options_t*>(spec.opts);
+ ceph_memzero_s(opts->passphrase.data(), opts->passphrase.size(),
+ opts->passphrase.size());
+ delete opts;
+ break;
+ }
+ case RBD_ENCRYPTION_FORMAT_LUKS1: {
+ auto opts =
+ static_cast<librbd::encryption_luks1_format_options_t*>(spec.opts);
+ ceph_memzero_s(opts->passphrase.data(), opts->passphrase.size(),
+ opts->passphrase.size());
+ delete opts;
+ break;
+ }
+ case RBD_ENCRYPTION_FORMAT_LUKS2: {
+ auto opts =
+ static_cast<librbd::encryption_luks2_format_options_t*>(spec.opts);
+ ceph_memzero_s(opts->passphrase.data(), opts->passphrase.size(),
+ opts->passphrase.size());
+ delete opts;
+ break;
+ }
+ default:
+ ceph_abort();
+ }
+ }
+ }
+};
+
+static std::string get_cookie(const std::string &devpath)
+{
+ std::string cookie;
+ std::ifstream ifs;
+ std::string path = "/sys/block/" + devpath.substr(sizeof("/dev/") - 1) + "/backend";
+
+ ifs.open(path, std::ifstream::in);
+ if (ifs.is_open()) {
+ std::getline(ifs, cookie);
+ ifs.close();
+ }
+ return cookie;
+}
+
+static int load_module(Config *cfg)
+{
+ ostringstream param;
+ int ret;
+
+ if (cfg->nbds_max)
+ param << "nbds_max=" << cfg->nbds_max;
+
+ if (cfg->max_part)
+ param << " max_part=" << cfg->max_part;
+
+ if (!access("/sys/module/nbd", F_OK)) {
+ if (cfg->nbds_max || cfg->set_max_part)
+ cerr << "rbd-nbd: ignoring kernel module parameter options: nbd module already loaded"
+ << std::endl;
+ return 0;
+ }
+
+ ret = module_load("nbd", param.str().c_str());
+ if (ret < 0)
+ cerr << "rbd-nbd: failed to load nbd kernel module: " << cpp_strerror(-ret)
+ << std::endl;
+
+ return ret;
+}
+
+static int check_device_size(int nbd_index, unsigned long expected_size)
+{
+ // There are bugs with some older kernel versions that result in an
+ // overflow for large image sizes. This check is to ensure we are
+ // not affected.
+
+ unsigned long size = 0;
+ std::string path = "/sys/block/nbd" + stringify(nbd_index) + "/size";
+ std::ifstream ifs;
+ ifs.open(path.c_str(), std::ifstream::in);
+ if (!ifs.is_open()) {
+ cerr << "rbd-nbd: failed to open " << path << std::endl;
+ return -EINVAL;
+ }
+ ifs >> size;
+ size *= RBD_NBD_BLKSIZE;
+
+ if (size == 0) {
+ // Newer kernel versions will report real size only after nbd
+ // connect. Assume this is the case and return success.
+ return 0;
+ }
+
+ if (size != expected_size) {
+ cerr << "rbd-nbd: kernel reported invalid device size (" << size
+ << ", expected " << expected_size << ")" << std::endl;
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int parse_nbd_index(const std::string& devpath)
+{
+ int index, ret;
+
+ ret = sscanf(devpath.c_str(), "/dev/nbd%d", &index);
+ if (ret <= 0) {
+ // mean an early matching failure. But some cases need a negative value.
+ if (ret == 0)
+ ret = -EINVAL;
+ cerr << "rbd-nbd: invalid device path: " << devpath
+ << " (expected /dev/nbd{num})" << std::endl;
+ return ret;
+ }
+
+ return index;
+}
+
+static int try_ioctl_setup(Config *cfg, int fd, uint64_t size,
+ uint64_t blksize, uint64_t flags)
+{
+ int index = 0, r;
+
+ if (cfg->devpath.empty()) {
+ char dev[64];
+ const char *path = "/sys/module/nbd/parameters/nbds_max";
+ int nbds_max = -1;
+ if (access(path, F_OK) == 0) {
+ std::ifstream ifs;
+ ifs.open(path, std::ifstream::in);
+ if (ifs.is_open()) {
+ ifs >> nbds_max;
+ ifs.close();
+ }
+ }
+
+ while (true) {
+ snprintf(dev, sizeof(dev), "/dev/nbd%d", index);
+
+ nbd = open(dev, O_RDWR);
+ if (nbd < 0) {
+ if (nbd == -EPERM && nbds_max != -1 && index < (nbds_max-1)) {
+ ++index;
+ continue;
+ }
+ r = nbd;
+ cerr << "rbd-nbd: failed to find unused device" << std::endl;
+ goto done;
+ }
+
+ r = ioctl(nbd, NBD_SET_SOCK, fd);
+ if (r < 0) {
+ close(nbd);
+ ++index;
+ continue;
+ }
+
+ cfg->devpath = dev;
+ break;
+ }
+ } else {
+ r = parse_nbd_index(cfg->devpath);
+ if (r < 0)
+ goto done;
+ index = r;
+
+ nbd = open(cfg->devpath.c_str(), O_RDWR);
+ if (nbd < 0) {
+ r = nbd;
+ cerr << "rbd-nbd: failed to open device: " << cfg->devpath << std::endl;
+ goto done;
+ }
+
+ r = ioctl(nbd, NBD_SET_SOCK, fd);
+ if (r < 0) {
+ r = -errno;
+ cerr << "rbd-nbd: the device " << cfg->devpath << " is busy" << std::endl;
+ close(nbd);
+ goto done;
+ }
+ }
+
+ r = ioctl(nbd, NBD_SET_BLKSIZE, blksize);
+ if (r < 0) {
+ r = -errno;
+ cerr << "rbd-nbd: NBD_SET_BLKSIZE failed" << std::endl;
+ goto close_nbd;
+ }
+
+ r = ioctl(nbd, NBD_SET_SIZE, size);
+ if (r < 0) {
+ cerr << "rbd-nbd: NBD_SET_SIZE failed" << std::endl;
+ r = -errno;
+ goto close_nbd;
+ }
+
+ ioctl(nbd, NBD_SET_FLAGS, flags);
+
+ if (cfg->io_timeout >= 0) {
+ r = ioctl(nbd, NBD_SET_TIMEOUT, (unsigned long)cfg->io_timeout);
+ if (r < 0) {
+ r = -errno;
+ cerr << "rbd-nbd: failed to set IO timeout: " << cpp_strerror(r)
+ << std::endl;
+ goto close_nbd;
+ }
+ }
+
+ dout(10) << "ioctl setup complete for " << cfg->devpath << dendl;
+ nbd_index = index;
+ return 0;
+
+close_nbd:
+ if (r < 0) {
+ ioctl(nbd, NBD_CLEAR_SOCK);
+ cerr << "rbd-nbd: failed to map, status: " << cpp_strerror(-r) << std::endl;
+ }
+ close(nbd);
+done:
+ return r;
+}
+
+static void netlink_cleanup(struct nl_sock *sock)
+{
+ if (!sock)
+ return;
+
+ nl_close(sock);
+ nl_socket_free(sock);
+}
+
+static struct nl_sock *netlink_init(int *id)
+{
+ struct nl_sock *sock;
+ int ret;
+
+ sock = nl_socket_alloc();
+ if (!sock) {
+ cerr << "rbd-nbd: Could not allocate netlink socket." << std::endl;
+ return NULL;
+ }
+
+ ret = genl_connect(sock);
+ if (ret < 0) {
+ cerr << "rbd-nbd: Could not connect netlink socket. Error " << ret
+ << std::endl;
+ goto free_sock;
+ }
+
+ *id = genl_ctrl_resolve(sock, "nbd");
+ if (*id < 0)
+ // nbd netlink interface not supported.
+ goto close_sock;
+
+ return sock;
+
+close_sock:
+ nl_close(sock);
+free_sock:
+ nl_socket_free(sock);
+ return NULL;
+}
+
+static int netlink_disconnect(int index)
+{
+ struct nl_sock *sock;
+ struct nl_msg *msg;
+ int ret, nl_id;
+
+ sock = netlink_init(&nl_id);
+ if (!sock)
+ // Try ioctl
+ return 1;
+
+ nl_socket_modify_cb(sock, NL_CB_VALID, NL_CB_CUSTOM, genl_handle_msg, NULL);
+
+ msg = nlmsg_alloc();
+ if (!msg) {
+ cerr << "rbd-nbd: Could not allocate netlink message." << std::endl;
+ goto free_sock;
+ }
+
+ if (!genlmsg_put(msg, NL_AUTO_PORT, NL_AUTO_SEQ, nl_id, 0, 0,
+ NBD_CMD_DISCONNECT, 0)) {
+ cerr << "rbd-nbd: Could not setup message." << std::endl;
+ goto nla_put_failure;
+ }
+
+ NLA_PUT_U32(msg, NBD_ATTR_INDEX, index);
+
+ ret = nl_send_sync(sock, msg);
+ netlink_cleanup(sock);
+ if (ret < 0) {
+ cerr << "rbd-nbd: netlink disconnect failed: " << nl_geterror(-ret)
+ << std::endl;
+ return -EIO;
+ }
+
+ return 0;
+
+nla_put_failure:
+ nlmsg_free(msg);
+free_sock:
+ netlink_cleanup(sock);
+ return -EIO;
+}
+
+static int netlink_disconnect_by_path(const std::string& devpath)
+{
+ int index;
+
+ index = parse_nbd_index(devpath);
+ if (index < 0)
+ return index;
+
+ return netlink_disconnect(index);
+}
+
+static int netlink_resize(int nbd_index, uint64_t size)
+{
+ struct nl_sock *sock;
+ struct nl_msg *msg;
+ int nl_id, ret;
+
+ sock = netlink_init(&nl_id);
+ if (!sock) {
+ cerr << "rbd-nbd: Netlink interface not supported." << std::endl;
+ return 1;
+ }
+
+ nl_socket_modify_cb(sock, NL_CB_VALID, NL_CB_CUSTOM, genl_handle_msg, NULL);
+
+ msg = nlmsg_alloc();
+ if (!msg) {
+ cerr << "rbd-nbd: Could not allocate netlink message." << std::endl;
+ goto free_sock;
+ }
+
+ if (!genlmsg_put(msg, NL_AUTO_PORT, NL_AUTO_SEQ, nl_id, 0, 0,
+ NBD_CMD_RECONFIGURE, 0)) {
+ cerr << "rbd-nbd: Could not setup message." << std::endl;
+ goto free_msg;
+ }
+
+ NLA_PUT_U32(msg, NBD_ATTR_INDEX, nbd_index);
+ NLA_PUT_U64(msg, NBD_ATTR_SIZE_BYTES, size);
+
+ ret = nl_send_sync(sock, msg);
+ if (ret < 0) {
+ cerr << "rbd-nbd: netlink resize failed: " << nl_geterror(ret) << std::endl;
+ goto free_sock;
+ }
+
+ netlink_cleanup(sock);
+ dout(10) << "netlink resize complete for nbd" << nbd_index << dendl;
+ return 0;
+
+nla_put_failure:
+free_msg:
+ nlmsg_free(msg);
+free_sock:
+ netlink_cleanup(sock);
+ return -EIO;
+}
+
+static int netlink_connect_cb(struct nl_msg *msg, void *arg)
+{
+ struct genlmsghdr *gnlh = (struct genlmsghdr *)nlmsg_data(nlmsg_hdr(msg));
+ Config *cfg = (Config *)arg;
+ struct nlattr *msg_attr[NBD_ATTR_MAX + 1];
+ uint32_t index;
+ int ret;
+
+ ret = nla_parse(msg_attr, NBD_ATTR_MAX, genlmsg_attrdata(gnlh, 0),
+ genlmsg_attrlen(gnlh, 0), NULL);
+ if (ret) {
+ cerr << "rbd-nbd: Unsupported netlink reply" << std::endl;
+ return -NLE_MSGTYPE_NOSUPPORT;
+ }
+
+ if (!msg_attr[NBD_ATTR_INDEX]) {
+ cerr << "rbd-nbd: netlink connect reply missing device index." << std::endl;
+ return -NLE_MSGTYPE_NOSUPPORT;
+ }
+
+ index = nla_get_u32(msg_attr[NBD_ATTR_INDEX]);
+ cfg->devpath = "/dev/nbd" + stringify(index);
+ nbd_index = index;
+
+ return NL_OK;
+}
+
+static int netlink_connect(Config *cfg, struct nl_sock *sock, int nl_id, int fd,
+ uint64_t size, uint64_t flags, bool reconnect)
+{
+ struct nlattr *sock_attr;
+ struct nlattr *sock_opt;
+ struct nl_msg *msg;
+ int ret;
+
+ if (reconnect) {
+ dout(10) << "netlink try reconnect for " << cfg->devpath << dendl;
+
+ nl_socket_modify_cb(sock, NL_CB_VALID, NL_CB_CUSTOM, genl_handle_msg, NULL);
+ } else {
+ nl_socket_modify_cb(sock, NL_CB_VALID, NL_CB_CUSTOM, netlink_connect_cb,
+ cfg);
+ }
+
+ msg = nlmsg_alloc();
+ if (!msg) {
+ cerr << "rbd-nbd: Could not allocate netlink message." << std::endl;
+ return -ENOMEM;
+ }
+
+ if (!genlmsg_put(msg, NL_AUTO_PORT, NL_AUTO_SEQ, nl_id, 0, 0,
+ reconnect ? NBD_CMD_RECONFIGURE : NBD_CMD_CONNECT, 0)) {
+ cerr << "rbd-nbd: Could not setup message." << std::endl;
+ goto free_msg;
+ }
+
+ if (!cfg->devpath.empty()) {
+ ret = parse_nbd_index(cfg->devpath);
+ if (ret < 0)
+ goto free_msg;
+
+ NLA_PUT_U32(msg, NBD_ATTR_INDEX, ret);
+ if (reconnect) {
+ nbd_index = ret;
+ }
+ }
+
+ if (cfg->io_timeout >= 0)
+ NLA_PUT_U64(msg, NBD_ATTR_TIMEOUT, cfg->io_timeout);
+
+ NLA_PUT_U64(msg, NBD_ATTR_SIZE_BYTES, size);
+ NLA_PUT_U64(msg, NBD_ATTR_BLOCK_SIZE_BYTES, RBD_NBD_BLKSIZE);
+ NLA_PUT_U64(msg, NBD_ATTR_SERVER_FLAGS, flags);
+ NLA_PUT_U64(msg, NBD_ATTR_DEAD_CONN_TIMEOUT, cfg->reattach_timeout);
+ if (!cfg->cookie.empty())
+ NLA_PUT_STRING(msg, NBD_ATTR_BACKEND_IDENTIFIER, cfg->cookie.c_str());
+
+ sock_attr = nla_nest_start(msg, NBD_ATTR_SOCKETS);
+ if (!sock_attr) {
+ cerr << "rbd-nbd: Could not init sockets in netlink message." << std::endl;
+ goto free_msg;
+ }
+
+ sock_opt = nla_nest_start(msg, NBD_SOCK_ITEM);
+ if (!sock_opt) {
+ cerr << "rbd-nbd: Could not init sock in netlink message." << std::endl;
+ goto free_msg;
+ }
+
+ NLA_PUT_U32(msg, NBD_SOCK_FD, fd);
+ nla_nest_end(msg, sock_opt);
+ nla_nest_end(msg, sock_attr);
+
+ ret = nl_send_sync(sock, msg);
+ if (ret < 0) {
+ cerr << "rbd-nbd: netlink connect failed: " << nl_geterror(ret)
+ << std::endl;
+ return -EIO;
+ }
+
+ dout(10) << "netlink connect complete for " << cfg->devpath << dendl;
+ return 0;
+
+nla_put_failure:
+free_msg:
+ nlmsg_free(msg);
+ return -EIO;
+}
+
+static int try_netlink_setup(Config *cfg, int fd, uint64_t size, uint64_t flags,
+ bool reconnect)
+{
+ struct nl_sock *sock;
+ int nl_id, ret;
+
+ sock = netlink_init(&nl_id);
+ if (!sock) {
+ cerr << "rbd-nbd: Netlink interface not supported. Using ioctl interface."
+ << std::endl;
+ return 1;
+ }
+
+ dout(10) << "netlink interface supported." << dendl;
+
+ ret = netlink_connect(cfg, sock, nl_id, fd, size, flags, reconnect);
+ netlink_cleanup(sock);
+
+ if (ret != 0)
+ return ret;
+
+ nbd = open(cfg->devpath.c_str(), O_RDWR);
+ if (nbd < 0) {
+ cerr << "rbd-nbd: failed to open device: " << cfg->devpath << std::endl;
+ return nbd;
+ }
+
+ return 0;
+}
+
+static int run_quiesce_hook(const std::string &quiesce_hook,
+ const std::string &devpath,
+ const std::string &command) {
+ dout(10) << __func__ << ": " << quiesce_hook << " " << devpath << " "
+ << command << dendl;
+
+ SubProcess hook(quiesce_hook.c_str(), SubProcess::CLOSE, SubProcess::PIPE,
+ SubProcess::PIPE);
+ hook.add_cmd_args(devpath.c_str(), command.c_str(), NULL);
+ bufferlist err;
+ int r = hook.spawn();
+ if (r < 0) {
+ err.append("subprocess spawn failed");
+ } else {
+ err.read_fd(hook.get_stderr(), 16384);
+ r = hook.join();
+ if (r > 0) {
+ r = -r;
+ }
+ }
+ if (r < 0) {
+ derr << __func__ << ": " << quiesce_hook << " " << devpath << " "
+ << command << " failed: " << err.to_str() << dendl;
+ } else {
+ dout(10) << " succeeded: " << err.to_str() << dendl;
+ }
+
+ return r;
+}
+
+static void handle_signal(int signum)
+{
+ ceph_assert(signum == SIGINT || signum == SIGTERM);
+ derr << "*** Got signal " << sig_str(signum) << " ***" << dendl;
+
+ dout(20) << __func__ << ": " << "notifying terminate" << dendl;
+
+ ceph_assert(terminate_event_sock.is_valid());
+ terminate_event_sock.notify();
+}
+
+static NBDServer *start_server(int fd, librbd::Image& image, Config *cfg)
+{
+ NBDServer *server;
+
+ server = new NBDServer(fd, image, cfg);
+ server->start();
+
+ init_async_signal_handler();
+ register_async_signal_handler(SIGHUP, sighup_handler);
+ register_async_signal_handler_oneshot(SIGINT, handle_signal);
+ register_async_signal_handler_oneshot(SIGTERM, handle_signal);
+
+ return server;
+}
+
+static void run_server(Preforker& forker, NBDServer *server, bool netlink_used)
+{
+ if (g_conf()->daemonize) {
+ global_init_postfork_finish(g_ceph_context);
+ forker.daemonize();
+ }
+
+ if (netlink_used)
+ server->wait_for_disconnect();
+ else
+ ioctl(nbd, NBD_DO_IT);
+
+ unregister_async_signal_handler(SIGHUP, sighup_handler);
+ unregister_async_signal_handler(SIGINT, handle_signal);
+ unregister_async_signal_handler(SIGTERM, handle_signal);
+ shutdown_async_signal_handler();
+}
+
+// Eventually it should be removed when pidfd_open is widely supported.
+
+static int wait_for_terminate_legacy(int pid, int timeout)
+{
+ for (int i = 0; ; i++) {
+ if (kill(pid, 0) == -1) {
+ if (errno == ESRCH) {
+ return 0;
+ }
+ int r = -errno;
+ cerr << "rbd-nbd: kill(" << pid << ", 0) failed: "
+ << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ if (i >= timeout * 2) {
+ break;
+ }
+ usleep(500000);
+ }
+
+ cerr << "rbd-nbd: waiting for process exit timed out" << std::endl;
+ return -ETIMEDOUT;
+}
+
+// Eventually it should be replaced with glibc' pidfd_open
+// when it is widely available.
+
+#ifdef __NR_pidfd_open
+static int pidfd_open(pid_t pid, unsigned int flags)
+{
+ return syscall(__NR_pidfd_open, pid, flags);
+}
+#else
+static int pidfd_open(pid_t pid, unsigned int flags)
+{
+ errno = ENOSYS;
+ return -1;
+}
+#endif
+
+static int wait_for_terminate(int pid, int timeout)
+{
+ int fd = pidfd_open(pid, 0);
+ if (fd == -1) {
+ if (errno == ENOSYS) {
+ return wait_for_terminate_legacy(pid, timeout);
+ }
+ if (errno == ESRCH) {
+ return 0;
+ }
+ int r = -errno;
+ cerr << "rbd-nbd: pidfd_open(" << pid << ") failed: "
+ << cpp_strerror(r) << std::endl;
+ return r;
+ }
+
+ struct pollfd poll_fds[1];
+ memset(poll_fds, 0, sizeof(struct pollfd));
+ poll_fds[0].fd = fd;
+ poll_fds[0].events = POLLIN;
+
+ int r = poll(poll_fds, 1, timeout * 1000);
+ if (r == -1) {
+ r = -errno;
+ cerr << "rbd-nbd: failed to poll rbd-nbd process: " << cpp_strerror(r)
+ << std::endl;
+ goto done;
+ } else {
+ r = 0;
+ }
+
+ if ((poll_fds[0].revents & POLLIN) == 0) {
+ cerr << "rbd-nbd: waiting for process exit timed out" << std::endl;
+ r = -ETIMEDOUT;
+ }
+
+done:
+ close(fd);
+
+ return r;
+}
+
+static int do_map(int argc, const char *argv[], Config *cfg, bool reconnect)
+{
+ int r;
+
+ librados::Rados rados;
+ librbd::RBD rbd;
+ librados::IoCtx io_ctx;
+ librbd::Image image;
+
+ int read_only = 0;
+ unsigned long flags;
+ unsigned long size;
+ unsigned long blksize = RBD_NBD_BLKSIZE;
+ bool use_netlink;
+
+ int fd[2];
+
+ librbd::image_info_t info;
+
+ Preforker forker;
+ NBDServer *server;
+
+ auto args = argv_to_vec(argc, argv);
+ if (args.empty()) {
+ cerr << argv[0] << ": -h or --help for usage" << std::endl;
+ exit(1);
+ }
+ if (ceph_argparse_need_usage(args)) {
+ usage();
+ exit(0);
+ }
+
+ auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT,
+ CODE_ENVIRONMENT_DAEMON,
+ CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS);
+ g_ceph_context->_conf.set_val_or_die("pid_file", "");
+
+ if (global_init_prefork(g_ceph_context) >= 0) {
+ std::string err;
+ r = forker.prefork(err);
+ if (r < 0) {
+ cerr << err << std::endl;
+ return r;
+ }
+ if (forker.is_parent()) {
+ if (forker.parent_wait(err) != 0) {
+ return -ENXIO;
+ }
+ return 0;
+ }
+ global_init_postfork_start(g_ceph_context);
+ }
+
+ common_init_finish(g_ceph_context);
+ global_init_chdir(g_ceph_context);
+
+ if (socketpair(AF_UNIX, SOCK_STREAM, 0, fd) == -1) {
+ r = -errno;
+ goto close_ret;
+ }
+
+ r = rados.init_with_context(g_ceph_context);
+ if (r < 0)
+ goto close_fd;
+
+ r = rados.connect();
+ if (r < 0)
+ goto close_fd;
+
+ r = rados.ioctx_create(cfg->poolname.c_str(), io_ctx);
+ if (r < 0)
+ goto close_fd;
+
+ io_ctx.set_namespace(cfg->nsname);
+
+ r = rbd.open(io_ctx, image, cfg->imgname.c_str());
+ if (r < 0)
+ goto close_fd;
+
+ if (cfg->exclusive) {
+ r = image.lock_acquire(RBD_LOCK_MODE_EXCLUSIVE);
+ if (r < 0) {
+ cerr << "rbd-nbd: failed to acquire exclusive lock: " << cpp_strerror(r)
+ << std::endl;
+ goto close_fd;
+ }
+ }
+
+ if (cfg->snapid != CEPH_NOSNAP) {
+ r = image.snap_set_by_id(cfg->snapid);
+ if (r < 0) {
+ cerr << "rbd-nbd: failed to set snap id: " << cpp_strerror(r)
+ << std::endl;
+ goto close_fd;
+ }
+ } else if (!cfg->snapname.empty()) {
+ r = image.snap_set(cfg->snapname.c_str());
+ if (r < 0) {
+ cerr << "rbd-nbd: failed to set snap name: " << cpp_strerror(r)
+ << std::endl;
+ goto close_fd;
+ }
+ }
+
+ if (!cfg->encryption_formats.empty()) {
+ EncryptionOptions encryption_options;
+ encryption_options.specs.reserve(cfg->encryption_formats.size());
+
+ for (size_t i = 0; i < cfg->encryption_formats.size(); ++i) {
+ std::ifstream file(cfg->encryption_passphrase_files[i],
+ std::ios::in | std::ios::binary);
+ if (file.fail()) {
+ r = -errno;
+ std::cerr << "rbd-nbd: unable to open passphrase file '"
+ << cfg->encryption_passphrase_files[i] << "': "
+ << cpp_strerror(r) << std::endl;
+ goto close_fd;
+ }
+ std::string passphrase((std::istreambuf_iterator<char>(file)),
+ std::istreambuf_iterator<char>());
+ file.close();
+
+ switch (cfg->encryption_formats[i]) {
+ case RBD_ENCRYPTION_FORMAT_LUKS: {
+ auto opts = new librbd::encryption_luks_format_options_t{
+ std::move(passphrase)};
+ encryption_options.specs.push_back(
+ {RBD_ENCRYPTION_FORMAT_LUKS, opts, sizeof(*opts)});
+ break;
+ }
+ case RBD_ENCRYPTION_FORMAT_LUKS1: {
+ auto opts = new librbd::encryption_luks1_format_options_t{
+ .passphrase = std::move(passphrase)};
+ encryption_options.specs.push_back(
+ {RBD_ENCRYPTION_FORMAT_LUKS1, opts, sizeof(*opts)});
+ break;
+ }
+ case RBD_ENCRYPTION_FORMAT_LUKS2: {
+ auto opts = new librbd::encryption_luks2_format_options_t{
+ .passphrase = std::move(passphrase)};
+ encryption_options.specs.push_back(
+ {RBD_ENCRYPTION_FORMAT_LUKS2, opts, sizeof(*opts)});
+ break;
+ }
+ default:
+ ceph_abort();
+ }
+ }
+
+ r = image.encryption_load2(encryption_options.specs.data(),
+ encryption_options.specs.size());
+ if (r != 0) {
+ cerr << "rbd-nbd: failed to load encryption: " << cpp_strerror(r)
+ << std::endl;
+ goto close_fd;
+ }
+
+ // luks2 block size can vary upto 4096, while luks1 always uses 512
+ // currently we don't have an rbd API for querying the loaded encryption
+ blksize = 4096;
+ }
+
+ r = image.stat(info, sizeof(info));
+ if (r < 0)
+ goto close_fd;
+
+ flags = NBD_FLAG_SEND_FLUSH | NBD_FLAG_HAS_FLAGS;
+ if (!cfg->notrim) {
+ flags |= NBD_FLAG_SEND_TRIM;
+ }
+ if (!cfg->snapname.empty() || cfg->readonly) {
+ flags |= NBD_FLAG_READ_ONLY;
+ read_only = 1;
+ }
+
+ if (info.size > ULONG_MAX) {
+ r = -EFBIG;
+ cerr << "rbd-nbd: image is too large (" << byte_u_t(info.size)
+ << ", max is " << byte_u_t(ULONG_MAX) << ")" << std::endl;
+ goto close_fd;
+ }
+
+ size = info.size;
+
+ r = load_module(cfg);
+ if (r < 0)
+ goto close_fd;
+
+ server = start_server(fd[1], image, cfg);
+
+ use_netlink = cfg->try_netlink || reconnect;
+ if (use_netlink) {
+ // generate when the cookie is not supplied at CLI
+ if (!reconnect && cfg->cookie.empty()) {
+ uuid_d uuid_gen;
+ uuid_gen.generate_random();
+ cfg->cookie = uuid_gen.to_string();
+ }
+ r = try_netlink_setup(cfg, fd[0], size, flags, reconnect);
+ if (r < 0) {
+ goto free_server;
+ } else if (r == 1) {
+ use_netlink = false;
+ }
+ }
+
+ if (!use_netlink) {
+ r = try_ioctl_setup(cfg, fd[0], size, blksize, flags);
+ if (r < 0)
+ goto free_server;
+ }
+
+ r = check_device_size(nbd_index, size);
+ if (r < 0)
+ goto close_nbd;
+
+ r = ioctl(nbd, BLKROSET, (unsigned long) &read_only);
+ if (r < 0) {
+ r = -errno;
+ goto close_nbd;
+ }
+
+ {
+ NBDQuiesceWatchCtx quiesce_watch_ctx(server);
+ if (cfg->quiesce) {
+ r = image.quiesce_watch(&quiesce_watch_ctx,
+ &server->quiesce_watch_handle);
+ if (r < 0) {
+ goto close_nbd;
+ }
+ }
+
+ uint64_t handle;
+
+ NBDWatchCtx watch_ctx(nbd, nbd_index, use_netlink, io_ctx, image,
+ info.size);
+ r = image.update_watch(&watch_ctx, &handle);
+ if (r < 0)
+ goto close_nbd;
+
+ std::string cookie;
+ if (use_netlink) {
+ cookie = get_cookie(cfg->devpath);
+ ceph_assert(cookie == cfg->cookie || cookie.empty());
+ }
+ if (cfg->show_cookie && !cookie.empty()) {
+ cout << cfg->devpath << " " << cookie << std::endl;
+ } else {
+ cout << cfg->devpath << std::endl;
+ }
+
+ run_server(forker, server, use_netlink);
+
+ if (cfg->quiesce) {
+ r = image.quiesce_unwatch(server->quiesce_watch_handle);
+ ceph_assert(r == 0);
+ }
+
+ r = image.update_unwatch(handle);
+ ceph_assert(r == 0);
+ }
+
+close_nbd:
+ if (r < 0) {
+ if (use_netlink) {
+ netlink_disconnect(nbd_index);
+ } else {
+ ioctl(nbd, NBD_CLEAR_SOCK);
+ cerr << "rbd-nbd: failed to map, status: " << cpp_strerror(-r)
+ << std::endl;
+ }
+ }
+ close(nbd);
+free_server:
+ delete server;
+close_fd:
+ close(fd[0]);
+ close(fd[1]);
+close_ret:
+ image.close();
+ io_ctx.close();
+ rados.shutdown();
+
+ forker.exit(r < 0 ? EXIT_FAILURE : 0);
+ // Unreachable;
+ return r;
+}
+
+static int do_detach(Config *cfg)
+{
+ int r = kill(cfg->pid, SIGTERM);
+ if (r == -1) {
+ r = -errno;
+ cerr << "rbd-nbd: failed to terminate " << cfg->pid << ": "
+ << cpp_strerror(r) << std::endl;
+ return r;
+ }
+
+ return wait_for_terminate(cfg->pid, cfg->reattach_timeout);
+}
+
+static int do_unmap(Config *cfg)
+{
+ /*
+ * The netlink disconnect call supports devices setup with netlink or ioctl,
+ * so we always try that first.
+ */
+ int r = netlink_disconnect_by_path(cfg->devpath);
+ if (r < 0) {
+ return r;
+ }
+
+ if (r == 1) {
+ int nbd = open(cfg->devpath.c_str(), O_RDWR);
+ if (nbd < 0) {
+ cerr << "rbd-nbd: failed to open device: " << cfg->devpath << std::endl;
+ return nbd;
+ }
+
+ r = ioctl(nbd, NBD_DISCONNECT);
+ if (r < 0) {
+ cerr << "rbd-nbd: the device is not used" << std::endl;
+ }
+
+ close(nbd);
+
+ if (r < 0) {
+ return r;
+ }
+ }
+
+ if (cfg->pid > 0) {
+ r = wait_for_terminate(cfg->pid, cfg->reattach_timeout);
+ }
+
+ return 0;
+}
+
+static int parse_imgpath(const std::string &imgpath, Config *cfg,
+ std::ostream *err_msg) {
+ std::regex pattern("^(?:([^/]+)/(?:([^/@]+)/)?)?([^@]+)(?:@([^/@]+))?$");
+ std::smatch match;
+ if (!std::regex_match(imgpath, match, pattern)) {
+ std::cerr << "rbd-nbd: invalid spec '" << imgpath << "'" << std::endl;
+ return -EINVAL;
+ }
+
+ if (match[1].matched) {
+ cfg->poolname = match[1];
+ }
+
+ if (match[2].matched) {
+ cfg->nsname = match[2];
+ }
+
+ cfg->imgname = match[3];
+
+ if (match[4].matched)
+ cfg->snapname = match[4];
+
+ return 0;
+}
+
+static int do_list_mapped_devices(const std::string &format, bool pretty_format)
+{
+ bool should_print = false;
+ std::unique_ptr<ceph::Formatter> f;
+ TextTable tbl;
+
+ if (format == "json") {
+ f.reset(new JSONFormatter(pretty_format));
+ } else if (format == "xml") {
+ f.reset(new XMLFormatter(pretty_format));
+ } else if (!format.empty() && format != "plain") {
+ std::cerr << "rbd-nbd: invalid output format: " << format << std::endl;
+ return -EINVAL;
+ }
+
+ if (f) {
+ f->open_array_section("devices");
+ } else {
+ tbl.define_column("id", TextTable::LEFT, TextTable::LEFT);
+ tbl.define_column("pool", TextTable::LEFT, TextTable::LEFT);
+ tbl.define_column("namespace", TextTable::LEFT, TextTable::LEFT);
+ tbl.define_column("image", TextTable::LEFT, TextTable::LEFT);
+ tbl.define_column("snap", TextTable::LEFT, TextTable::LEFT);
+ tbl.define_column("device", TextTable::LEFT, TextTable::LEFT);
+ tbl.define_column("cookie", TextTable::LEFT, TextTable::LEFT);
+ }
+
+ Config cfg;
+ NBDListIterator it;
+ while (it.get(&cfg)) {
+ std::string snap = (cfg.snapid != CEPH_NOSNAP ?
+ "@" + std::to_string(cfg.snapid) : cfg.snapname);
+ if (f) {
+ f->open_object_section("device");
+ f->dump_int("id", cfg.pid);
+ f->dump_string("pool", cfg.poolname);
+ f->dump_string("namespace", cfg.nsname);
+ f->dump_string("image", cfg.imgname);
+ f->dump_string("snap", snap);
+ f->dump_string("device", cfg.devpath);
+ f->dump_string("cookie", cfg.cookie);
+ f->close_section();
+ } else {
+ should_print = true;
+ tbl << cfg.pid << cfg.poolname << cfg.nsname << cfg.imgname
+ << (snap.empty() ? "-" : snap) << cfg.devpath << cfg.cookie
+ << TextTable::endrow;
+ }
+ }
+
+ if (f) {
+ f->close_section(); // devices
+ f->flush(std::cout);
+ }
+ if (should_print) {
+ std::cout << tbl;
+ }
+ return 0;
+}
+
+static bool find_mapped_dev_by_spec(Config *cfg, int skip_pid=-1) {
+ Config c;
+ NBDListIterator it;
+ while (it.get(&c)) {
+ if (c.pid != skip_pid &&
+ c.poolname == cfg->poolname && c.nsname == cfg->nsname &&
+ c.imgname == cfg->imgname && c.snapname == cfg->snapname &&
+ (cfg->devpath.empty() || c.devpath == cfg->devpath) &&
+ c.snapid == cfg->snapid) {
+ *cfg = c;
+ return true;
+ }
+ }
+ return false;
+}
+
+static int find_proc_by_dev(Config *cfg) {
+ Config c;
+ NBDListIterator it;
+ while (it.get(&c)) {
+ if (c.devpath == cfg->devpath) {
+ *cfg = c;
+ return true;
+ }
+ }
+ return false;
+}
+
+static int parse_args(vector<const char*>& args, std::ostream *err_msg,
+ Config *cfg) {
+ std::string conf_file_list;
+ std::string cluster;
+ CephInitParameters iparams = ceph_argparse_early_args(
+ args, CEPH_ENTITY_TYPE_CLIENT, &cluster, &conf_file_list);
+
+ ConfigProxy config{false};
+ config->name = iparams.name;
+ config->cluster = cluster;
+
+ if (!conf_file_list.empty()) {
+ config.parse_config_files(conf_file_list.c_str(), nullptr, 0);
+ } else {
+ config.parse_config_files(nullptr, nullptr, 0);
+ }
+ config.parse_env(CEPH_ENTITY_TYPE_CLIENT);
+ config.parse_argv(args);
+ cfg->poolname = config.get_val<std::string>("rbd_default_pool");
+
+ std::vector<const char*>::iterator i;
+ std::ostringstream err;
+ std::string arg_value;
+ long long snapid;
+
+ for (i = args.begin(); i != args.end(); ) {
+ if (ceph_argparse_flag(args, i, "-h", "--help", (char*)NULL)) {
+ return HELP_INFO;
+ } else if (ceph_argparse_flag(args, i, "-v", "--version", (char*)NULL)) {
+ return VERSION_INFO;
+ } else if (ceph_argparse_witharg(args, i, &cfg->devpath, "--device", (char *)NULL)) {
+ } else if (ceph_argparse_witharg(args, i, &cfg->io_timeout, err,
+ "--io-timeout", (char *)NULL)) {
+ if (!err.str().empty()) {
+ *err_msg << "rbd-nbd: " << err.str();
+ return -EINVAL;
+ }
+ if (cfg->io_timeout < 0) {
+ *err_msg << "rbd-nbd: Invalid argument for io-timeout!";
+ return -EINVAL;
+ }
+ } else if (ceph_argparse_witharg(args, i, &cfg->nbds_max, err, "--nbds_max", (char *)NULL)) {
+ if (!err.str().empty()) {
+ *err_msg << "rbd-nbd: " << err.str();
+ return -EINVAL;
+ }
+ if (cfg->nbds_max < 0) {
+ *err_msg << "rbd-nbd: Invalid argument for nbds_max!";
+ return -EINVAL;
+ }
+ } else if (ceph_argparse_witharg(args, i, &cfg->max_part, err, "--max_part", (char *)NULL)) {
+ if (!err.str().empty()) {
+ *err_msg << "rbd-nbd: " << err.str();
+ return -EINVAL;
+ }
+ if ((cfg->max_part < 0) || (cfg->max_part > 255)) {
+ *err_msg << "rbd-nbd: Invalid argument for max_part(0~255)!";
+ return -EINVAL;
+ }
+ cfg->set_max_part = true;
+ } else if (ceph_argparse_flag(args, i, "--quiesce", (char *)NULL)) {
+ cfg->quiesce = true;
+ } else if (ceph_argparse_witharg(args, i, &cfg->quiesce_hook,
+ "--quiesce-hook", (char *)NULL)) {
+ } else if (ceph_argparse_flag(args, i, "--read-only", (char *)NULL)) {
+ cfg->readonly = true;
+ } else if (ceph_argparse_witharg(args, i, &cfg->reattach_timeout, err,
+ "--reattach-timeout", (char *)NULL)) {
+ if (!err.str().empty()) {
+ *err_msg << "rbd-nbd: " << err.str();
+ return -EINVAL;
+ }
+ if (cfg->reattach_timeout < 0) {
+ *err_msg << "rbd-nbd: Invalid argument for reattach-timeout!";
+ return -EINVAL;
+ }
+ } else if (ceph_argparse_flag(args, i, "--exclusive", (char *)NULL)) {
+ cfg->exclusive = true;
+ } else if (ceph_argparse_flag(args, i, "--notrim", (char *)NULL)) {
+ cfg->notrim = true;
+ } else if (ceph_argparse_witharg(args, i, &cfg->io_timeout, err,
+ "--timeout", (char *)NULL)) {
+ if (!err.str().empty()) {
+ *err_msg << "rbd-nbd: " << err.str();
+ return -EINVAL;
+ }
+ if (cfg->io_timeout < 0) {
+ *err_msg << "rbd-nbd: Invalid argument for timeout!";
+ return -EINVAL;
+ }
+ *err_msg << "rbd-nbd: --timeout is deprecated (use --io-timeout)";
+ } else if (ceph_argparse_witharg(args, i, &cfg->format, err, "--format",
+ (char *)NULL)) {
+ } else if (ceph_argparse_flag(args, i, "--pretty-format", (char *)NULL)) {
+ cfg->pretty_format = true;
+ } else if (ceph_argparse_flag(args, i, "--try-netlink", (char *)NULL)) {
+ cfg->try_netlink = true;
+ } else if (ceph_argparse_flag(args, i, "--show-cookie", (char *)NULL)) {
+ cfg->show_cookie = true;
+ } else if (ceph_argparse_witharg(args, i, &cfg->cookie, "--cookie", (char *)NULL)) {
+ } else if (ceph_argparse_witharg(args, i, &snapid, err,
+ "--snap-id", (char *)NULL)) {
+ if (!err.str().empty()) {
+ *err_msg << "rbd-nbd: " << err.str();
+ return -EINVAL;
+ }
+ if (snapid < 0) {
+ *err_msg << "rbd-nbd: Invalid argument for snap-id!";
+ return -EINVAL;
+ }
+ cfg->snapid = snapid;
+ } else if (ceph_argparse_witharg(args, i, &arg_value,
+ "--encryption-format", (char *)NULL)) {
+ if (arg_value == "luks1") {
+ cfg->encryption_formats.push_back(RBD_ENCRYPTION_FORMAT_LUKS1);
+ } else if (arg_value == "luks2") {
+ cfg->encryption_formats.push_back(RBD_ENCRYPTION_FORMAT_LUKS2);
+ } else if (arg_value == "luks") {
+ cfg->encryption_formats.push_back(RBD_ENCRYPTION_FORMAT_LUKS);
+ } else {
+ *err_msg << "rbd-nbd: Invalid encryption format";
+ return -EINVAL;
+ }
+ } else if (ceph_argparse_witharg(args, i, &arg_value,
+ "--encryption-passphrase-file",
+ (char *)NULL)) {
+ cfg->encryption_passphrase_files.push_back(arg_value);
+ } else {
+ ++i;
+ }
+ }
+
+ if (cfg->encryption_formats.empty() &&
+ !cfg->encryption_passphrase_files.empty()) {
+ cfg->encryption_formats.resize(cfg->encryption_passphrase_files.size(),
+ RBD_ENCRYPTION_FORMAT_LUKS);
+ }
+
+ if (cfg->encryption_formats.size() != cfg->encryption_passphrase_files.size()) {
+ *err_msg << "rbd-nbd: Encryption formats count does not match "
+ << "passphrase files count";
+ return -EINVAL;
+ }
+
+ Command cmd = None;
+ if (args.begin() != args.end()) {
+ if (strcmp(*args.begin(), "map") == 0) {
+ cmd = Map;
+ } else if (strcmp(*args.begin(), "unmap") == 0) {
+ cmd = Unmap;
+ } else if (strcmp(*args.begin(), "attach") == 0) {
+ cmd = Attach;
+ } else if (strcmp(*args.begin(), "detach") == 0) {
+ cmd = Detach;
+ } else if (strcmp(*args.begin(), "list-mapped") == 0) {
+ cmd = List;
+ } else {
+ *err_msg << "rbd-nbd: unknown command: " << *args.begin();
+ return -EINVAL;
+ }
+ args.erase(args.begin());
+ }
+
+ if (cmd == None) {
+ *err_msg << "rbd-nbd: must specify command";
+ return -EINVAL;
+ }
+
+ std::string cookie;
+ switch (cmd) {
+ case Attach:
+ if (cfg->devpath.empty()) {
+ *err_msg << "rbd-nbd: must specify device to attach";
+ return -EINVAL;
+ }
+ // Allowing attach without --cookie option for kernel without
+ // NBD_ATTR_BACKEND_IDENTIFIER support for compatibility
+ cookie = get_cookie(cfg->devpath);
+ if (!cookie.empty()) {
+ if (cfg->cookie.empty()) {
+ *err_msg << "rbd-nbd: must specify cookie to attach";
+ return -EINVAL;
+ } else if (cookie != cfg->cookie) {
+ *err_msg << "rbd-nbd: cookie mismatch";
+ return -EINVAL;
+ }
+ } else if (!cfg->cookie.empty()) {
+ *err_msg << "rbd-nbd: kernel does not have cookie support";
+ return -EINVAL;
+ }
+ [[fallthrough]];
+ case Map:
+ if (args.begin() == args.end()) {
+ *err_msg << "rbd-nbd: must specify image-or-snap-spec";
+ return -EINVAL;
+ }
+ if (parse_imgpath(*args.begin(), cfg, err_msg) < 0) {
+ return -EINVAL;
+ }
+ args.erase(args.begin());
+ break;
+ case Detach:
+ case Unmap:
+ if (args.begin() == args.end()) {
+ *err_msg << "rbd-nbd: must specify nbd device or image-or-snap-spec";
+ return -EINVAL;
+ }
+ if (boost::starts_with(*args.begin(), "/dev/")) {
+ cfg->devpath = *args.begin();
+ } else {
+ if (parse_imgpath(*args.begin(), cfg, err_msg) < 0) {
+ return -EINVAL;
+ }
+ }
+ args.erase(args.begin());
+ break;
+ default:
+ //shut up gcc;
+ break;
+ }
+
+ if (cfg->snapid != CEPH_NOSNAP && !cfg->snapname.empty()) {
+ *err_msg << "rbd-nbd: use either snapname or snapid, not both";
+ return -EINVAL;
+ }
+
+ if (args.begin() != args.end()) {
+ *err_msg << "rbd-nbd: unknown args: " << *args.begin();
+ return -EINVAL;
+ }
+
+ cfg->command = cmd;
+ return 0;
+}
+
+static int rbd_nbd(int argc, const char *argv[])
+{
+ int r;
+ Config cfg;
+ auto args = argv_to_vec(argc, argv);
+ std::ostringstream err_msg;
+ r = parse_args(args, &err_msg, &cfg);
+ if (r == HELP_INFO) {
+ usage();
+ return 0;
+ } else if (r == VERSION_INFO) {
+ std::cout << pretty_version_to_str() << std::endl;
+ return 0;
+ } else if (r < 0) {
+ cerr << err_msg.str() << std::endl;
+ return r;
+ }
+
+ if (!err_msg.str().empty()) {
+ cerr << err_msg.str() << std::endl;
+ }
+
+ switch (cfg.command) {
+ case Attach:
+ ceph_assert(!cfg.devpath.empty());
+ if (find_mapped_dev_by_spec(&cfg, getpid())) {
+ cerr << "rbd-nbd: " << cfg.devpath << " has process " << cfg.pid
+ << " connected" << std::endl;
+ return -EBUSY;
+ }
+ [[fallthrough]];
+ case Map:
+ if (cfg.imgname.empty()) {
+ cerr << "rbd-nbd: image name was not specified" << std::endl;
+ return -EINVAL;
+ }
+
+ r = do_map(argc, argv, &cfg, cfg.command == Attach);
+ if (r < 0)
+ return -EINVAL;
+ break;
+ case Detach:
+ if (cfg.devpath.empty()) {
+ if (!find_mapped_dev_by_spec(&cfg)) {
+ cerr << "rbd-nbd: " << cfg.image_spec() << " is not mapped"
+ << std::endl;
+ return -ENOENT;
+ }
+ } else if (!find_proc_by_dev(&cfg)) {
+ cerr << "rbd-nbd: no process attached to " << cfg.devpath << " found"
+ << std::endl;
+ return -ENOENT;
+ }
+ r = do_detach(&cfg);
+ if (r < 0)
+ return -EINVAL;
+ break;
+ case Unmap:
+ if (cfg.devpath.empty()) {
+ if (!find_mapped_dev_by_spec(&cfg)) {
+ cerr << "rbd-nbd: " << cfg.image_spec() << " is not mapped"
+ << std::endl;
+ return -ENOENT;
+ }
+ } else if (!find_proc_by_dev(&cfg)) {
+ // still try to send disconnect to the device
+ }
+ r = do_unmap(&cfg);
+ if (r < 0)
+ return -EINVAL;
+ break;
+ case List:
+ r = do_list_mapped_devices(cfg.format, cfg.pretty_format);
+ if (r < 0)
+ return -EINVAL;
+ break;
+ default:
+ usage();
+ break;
+ }
+
+ return 0;
+}
+
+int main(int argc, const char *argv[])
+{
+ int r = rbd_nbd(argc, argv);
+ if (r < 0) {
+ return EXIT_FAILURE;
+ }
+ return 0;
+}
diff --git a/src/tools/rbd_nbd/rbd-nbd_quiesce b/src/tools/rbd_nbd/rbd-nbd_quiesce
new file mode 100755
index 000000000..a62a12b15
--- /dev/null
+++ b/src/tools/rbd_nbd/rbd-nbd_quiesce
@@ -0,0 +1,31 @@
+#!/bin/sh
+
+echo "$0 $@" >&2
+
+if [ $# -lt 2 ]; then
+ echo "usage: $0 <dev> <cmd>" >&2
+ exit 1
+fi
+
+dev=$1
+cmd=$2
+
+export PATH=/usr/sbin:/usr/bin:/sbin:/bin
+
+findmnt -S "${dev}" -fno TARGET |
+while read mnt; do
+ case "${cmd}" in
+ quiesce)
+ echo "freezing ${mnt}" >&2
+ fsfreeze -f "${mnt}"
+ ;;
+ unquiesce)
+ echo "unfreezing ${mnt}" >&2
+ fsfreeze -u "${mnt}"
+ ;;
+ *)
+ echo "unknown command ${cmd}" >&2
+ exit 1
+ ;;
+ esac
+done
diff --git a/src/tools/rbd_recover_tool/FAQ b/src/tools/rbd_recover_tool/FAQ
new file mode 100644
index 000000000..1655e8530
--- /dev/null
+++ b/src/tools/rbd_recover_tool/FAQ
@@ -0,0 +1,16 @@
+# author: min chen(minchen@ubuntukylin.com) 2014 2015
+
+1. error "get_image_metadata_v2: no meta_header_seq input"
+cause:
+ database is old, refresh database
+solution:
+ ./rbd-recover-tool database
+
+2. Error initializing leveldb: IO error: lock /var/lib/ceph/osd/ceph-0/current/omap/LOCK: Resource temporarily unavailable
+ ERROR: error flushing journal /var/lib/ceph/osd/ceph-0/journal for object store /var/lib/ceph/osd/ceph-0: (1) Operation not permitted
+cause:
+ when ./rbd-recover-tool database is interrupted , but command has been sent to each osd node, and there is a process reading leveldb and it is LOCKED
+ if run ./rbd-recover-tool database again, all command are sent to osd nodes again, while previous process is locking leveldb, so all new command
+ are failed.
+solution:
+ wait until all previous command finished.
diff --git a/src/tools/rbd_recover_tool/README b/src/tools/rbd_recover_tool/README
new file mode 100644
index 000000000..d289c11ca
--- /dev/null
+++ b/src/tools/rbd_recover_tool/README
@@ -0,0 +1,97 @@
+# author: Min chen(minchen@ubuntukylin.com) 2014 2015
+
+------------- ceph rbd recover tool -------------
+
+ ceph rbd recover tool is used for recovering ceph rbd image, when all ceph services are killed.
+it is based on ceph-0.80.x (Firefly and newer)
+ currently, ceph service(ceph-mon, ceph-osd) evently are not available caused by bugs or sth else
+, especially on large scale ceph cluster, so that the ceph cluster can not supply service
+and rbd images can not be accessed. In this case, a tool to recover rbd image is necessary.
+ ceph rbd recover tool is just used for this, it can collect all objects of an image from distributed
+osd nodes with the latest pg epoch, and splice objects by offset to a complete image. To make sure
+object data is complete, this tool does flush osd journal on each osd node before recovering.
+ but, there are some limitions:
+-need ssh service and unobstructed network
+-osd data must be accessed on local disk
+-clone image is not supported, while snapshot is supported
+-only support relicated pool
+
+before you run this tool, you should make sure that:
+1). all processes (ceph-osd, ceph-mon, ceph-mds) are shutdown
+2). ssh daemon is running & network is ok (ssh to each node without password)
+3). ceph-kvstore-tool is installed(for ubuntu: apt-get install ceph-test)
+4). osd disk is not crashed and data can be accessed on local filesystem
+
+-architecture:
+
+ +---- osd.0
+ |
+admin_node -----------+---- osd.1
+ |
+ +---- osd.2
+ |
+ ......
+
+-files:
+admin_node: {rbd-recover-tool common_h epoch_h metadata_h database_h}
+osd: {osd_job common_h epoch_h metadata_h} #/var/rbd_tool/osd_job
+in this architecture, admin_node acts as client, osds act as server.
+so, they run different files:
+on admin_node run: rbd-recover-tool <action> [<parameters>]
+on osd node run: ./osd_job <function> <parameters>
+admin_node will copy files: osd_job, common_h, epoch_h, metadata_h to remote osd node
+
+
+-config file
+before you run this tool, make sure write config files first
+osd_host_path: osd hostnames and osd data path #user input
+ osdhost0 /var/lib/ceph/osd/ceph-0
+ osdhost1 /var/lib/ceph/osd/ceph-1
+ ......
+mon_host: all mon node hostname #user input
+ monhost0
+ monhost1
+ ......
+mds_host: all mds node hostname #user input
+ mdshost0
+ mdshost1
+ ......
+then, init_env_admin function will create file: osd_host
+osd_host: all osd node hostname #generated by admin_job, user ignore it
+ osdhost0
+ osdhost1
+ ......
+
+
+-usage:
+rbd-recovert-tool <operation>
+<operation> :
+database #generating offline database: hobject path, node hostname, pg_epoch and image metadata
+list #list all images from offline database
+lookup <pool_id>/<image_name>[@[<snap_name>]] #lookup image metadata in offline database
+recover <pool_id><image_name>[@[<snap_name>]] [/path/to/store/image] #recover image data according to image metadata
+
+-steps:
+1. stop all ceph services: ceph-mon, ceph-osd, ceph-mds
+2. setup config files: osd_host_path, mon_host, mds_host
+3. rbd-recover-tool database # wait a long time
+4. rbd-recover-tool list
+4. rbd-recover-tool recover <pool_id>/<image_name>[@[<image_name>]] [/path/to/store/image]
+
+
+-debug & error check
+if admin_node operation is failed, you can check it on osd node
+cd /var/rbd_tool/osd_job
+./osd_job <operation>
+<operation> :
+do_image_id <image_id_hobject> #get image id of image format v2
+do_image_id <image_header_hobject> #get image id of image format v1
+do_image_metadata_v1 <image_header_hobject> #get image metadata of image format v1, maybe pg epoch is not latest
+do_image_metadata_v2 <image_header_hobject> #get image metadata of image format v2, maybe pg epoch is not latest
+do_image_list #get all images on this osd(image head hobject)
+do_pg_epoch #get all pg epoch and store it in /var/rbd_tool/single_node/node_pg_epoch
+do_omap_list #list all omap headers and omap entries on this osd
+
+
+-FAQ
+file FAQ lists some common confusing cases while testing
diff --git a/src/tools/rbd_recover_tool/TODO b/src/tools/rbd_recover_tool/TODO
new file mode 100644
index 000000000..c36d4c947
--- /dev/null
+++ b/src/tools/rbd_recover_tool/TODO
@@ -0,0 +1,2 @@
+
+1.support clone imag
diff --git a/src/tools/rbd_recover_tool/common_h b/src/tools/rbd_recover_tool/common_h
new file mode 100644
index 000000000..f2df662ad
--- /dev/null
+++ b/src/tools/rbd_recover_tool/common_h
@@ -0,0 +1,412 @@
+#!/usr/bin/env bash
+# file: common_h
+#
+# Copyright (C) 2015 Ubuntu Kylin
+#
+# Author: Min Chen <minchen@ubuntukylin.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+
+my_dir=$(dirname "$0")
+
+# admin node init path
+rbd_image=/var/rbd_tool/rbd_image
+database=$rbd_image/database
+image_coll_v1=$rbd_image/image_coll_v1
+image_coll_v2=$rbd_image/image_coll_v2
+pg_coll=$rbd_image/pg_coll
+images=$rbd_image/images
+images_meta=$rbd_image/images_meta
+default_backup_dir=/var/rbd_tool/default_backup_dir
+
+# admin node: image snap & nosnap
+nosnap= #$rbd_image/<image_name>/nosnap
+snap= #rbd_image/<image_name>/<snap_name>
+
+# osd node init path
+job_path=/var/rbd_tool/osd_job
+single_node=/var/rbd_tool/single_node
+
+# osd node vars
+osd_env= #single_node/$cluster$id/osd_env
+osd_data= #/var/lib/ceph/osd/$cluster-$id
+omap_path= #$osd_data/current/omap
+image_list_v1= #single_node/$cluster-$id/image_list_v1
+image_list_v2= #single_node/$cluster-$id/image_list_v2
+image_v1= #$single_node/$cluster-$id/image_v1
+image_v2= #$single_node/$cluster-$id/image_v2
+pgid_list= #$single_node/$cluster-$id/pgid_list
+node_pg_epoch= #$single_node/$cluster-$id/node_pg_epoch
+omap_list= #$single_node/$cluster-$id/omap_list
+
+# admin node config file
+osd_host_path=$my_dir/config/osd_host_path
+osd_host_mapping= #$pwd_path/config/osd_host_mapping # host --> host_remote: by init_env_admin()
+osd_host=$my_dir/config/osd_host #generated by function init_env_admin()
+mon_host=$my_dir/config/mon_host
+mds_host=$my_dir/config/mds_host
+
+# ssh option
+ssh_option="-o ConnectTimeout=1"
+
+# gen md5sum
+function gen_md5()
+{
+ echo $1|md5sum|awk '{print $1}'
+}
+
+# on each osd node
+# check ceph environment: ssh, ceph-kvstore-tool, osd_data_path
+function check_ceph_env()
+{
+ local func="check_ceph_env"
+ if [ $# -lt 2 ];then
+ echo "$func: parameters: <node> <data_path>"
+ exit
+ fi
+ local node=$1
+ local data_path=$2
+ local res=
+ local cmd=
+
+ trap 'echo [$node]: ssh failed; exit' INT HUP
+ ssh -o ConnectTimeout=1 $node "echo -n" </dev/null
+ res=$?
+ if [ $res -ne 0 ];then
+ echo "[$node]: ssh failed"
+ exit
+ fi
+
+ cmd=ceph-kvstore-tool
+ trap 'echo [$node]: $cmd failed; exit' INT HUP
+ ssh -o ConnectTimeout=1 $node "$cmd &>/dev/null;" </dev/null
+ res=$?
+ # ceph-kvstore-tool will return 1 with no parameters input
+ if [ $res -ne 1 ];then
+ echo "[$node]: $cmd not installed"
+ exit
+ fi
+
+ trap 'echo [$node]: stat $data_path failed; exit' INT HUP
+ ssh -o ConnectTimeout=1 $node "stat $data_path &>/dev/null;" </dev/null
+ res=$?
+ if [ $res -ne 0 ];then
+ echo "[$node]: $data_path not exists"
+ exit
+ fi
+}
+
+# osd node context : osd_data_path
+function init_env_osd()
+{
+ local func="init_env_osd"
+ if [ "$1"x = ""x ];then
+ echo "$func: no osd_data_path input"
+ exit
+ fi
+ osd_data=$1
+ omap_path=$osd_data/current/omap
+
+ if [ ! -e $single_node ];then
+ mkdir -p $single_node
+ fi
+
+ local osd_id=`gen_md5 $osd_data`
+ local osd_dir=$single_node/$osd_id
+
+ if [ ! -e $osd_dir ];then
+ mkdir -p $osd_dir
+ fi
+
+ image_list_v1=$osd_dir/image_list_v1
+ image_list_v2=$osd_dir/image_list_v2
+ image_v1=$osd_dir/image_v1
+ image_v2=$osd_dir/image_v2
+ pgid_list=$osd_dir/pgid_list
+ node_pg_epoch=$osd_dir/node_pg_epoch
+ omap_list=$osd_dir/omap_list
+}
+
+# admin node process file: osd_host_path
+function init_env_admin()
+{
+ local func="init_env_admin"
+ local pwd_path=`pwd`
+ osd_host_mapping=$pwd_path/config/osd_host_mapping
+ if [ ! -s $osd_host_path ];then
+ echo "$func: config/osd_host_path not exists or empty"
+ exit
+ fi
+ if [ ! -e $rbd_image ];then
+ mkdir -p $rbd_image
+ fi
+ if [ ! -e $images ];then
+ mkdir -p $images
+ fi
+
+ if [ ! -s $mon_host ];then
+ echo "$func: config/mon_host not exists or empty"
+ exit
+ fi
+ if [ ! -e $mds_host ];then
+ echo "$func: config/mds_host not exists"
+ exit
+ fi
+
+ # we just judge if osd_host is needed to be updated
+ if [ -s $osd_host ] && [ $osd_host -nt $osd_host_path ];then
+ return
+ fi
+ echo "$func: create osd_host ..."
+ # create file: osd_host and osd_host_mapping
+ >$osd_host
+ >$osd_host_mapping
+ local lines=0
+ local lineno=0
+ while read line
+ do
+ lineno=$(($lineno + 1))
+ if [ "$line"x = ""x ];then
+ continue;
+ fi
+ local node=`echo $line|awk '{print $1}'`
+ if [ "$node"x = ""x ];then
+ echo "$func: osd_host_path : line $lineno: osd hostname not input"
+ rm -rf $osd_host $osd_host_mapping
+ exit
+ fi
+ local data_path=`echo $line|awk '{print $2}'`
+ if [ "$data_path"x = ""x ];then
+ echo "$func: osd_host_path : line $lineno: osd data_path not input"
+ rm -rf $osd_host $osd_host_mapping
+ exit
+ fi
+ lines=$(($lines + 1))
+ # in case : there are servral hostnames on the same node
+ # just need output of `hostname`
+ local hostname_alias=
+ hostname_alias=`ssh $ssh_option $node "hostname" 2>/dev/null </dev/null`
+ if [ "$hostname_alias"x = ""x ];then
+ echo "$func: osd_host_path: line $lineno: $node: get remote hostname alias failed"
+ rm -rf $osd_host $osd_host_mapping
+ exit
+ fi
+ echo "$node $hostname_alias" >>$osd_host_mapping
+ echo $node >> $osd_host
+ # check ceph env on remote osd
+ check_ceph_env $node $data_path
+ done < $osd_host_path
+
+ if [ $lines = 0 ];then
+ echo "$func: no osd host path valid"
+ exit
+ fi
+}
+
+function admin_parse_osd()
+{
+ local func="admin_parse_osd"
+ if [ -s $osd_host ];then
+ return
+ fi
+ # create file: osd_host
+ >$osd_host
+ local lines=0
+ local lineno=0
+ while read line
+ do
+ lineno=$(($lineno + 1))
+ if [ "$line"x = ""x ];then
+ continue;
+ fi
+ local node=`echo $line|awk '{print $1}'`
+ if [ "$node"x = ""x ];then
+ echo "$func: osd_host_path : line $lineno: osd_host not input"
+ exit
+ fi
+ local data_path=`echo $line|awk '{print $2}'`
+ if [ "$data_path"x = ""x ];then
+ echo "$func: osd_host_path : line $lineno: osd_data not input"
+ exit
+ fi
+ lines=$(($lines + 1))
+ echo $node >> $osd_host
+ done < $osd_host_path
+}
+
+# for osd node
+function get_omap_list()
+{
+ ceph-kvstore-tool $omap_path list > $omap_list
+}
+
+function convert_underline()
+{
+ if [ "$1"x = ""x ];then
+ return
+ fi
+
+ echo $1|sed -e 's/_/\\u/gp'|head -n 1
+}
+
+function dump_backslash()
+{
+ echo $*|sed -e 's/\\/\\\\/gp'|head -n 1
+}
+
+function dump_dump_backslash()
+{
+ echo $*|sed -e 's/\\/\\\\\\\\/gp'|head -n 1
+}
+
+function char_convert()
+{
+ if [ "$1"x = ""x ];then
+ return
+ fi
+
+ echo $1|sed -e 's/_/\\u/gp' -e 's/\./%e/gp' -e 's/%/%p/gp'|head -n 1
+}
+
+function check_osd_process()
+{
+ local func="check_osd_process"
+ local host=$1
+ if [ "$1"x = ""x ];then
+ exit
+ fi
+ local cmds="ps aux|grep ceph-osd|grep -v grep"
+ local ret=/tmp/ret.$$$$
+ ssh $ssh_option $host $cmds |tee $ret
+ if [ -s $ret ];then
+ echo "$func: [$host] ceph-osd process is not killed"
+ exit
+ fi
+ rm -f $ret
+}
+
+function get_map_header_prefix()
+{
+ echo "_HOBJTOSEQ_"
+}
+
+function get_map_header_key()
+{
+ local func="get_map_header_key"
+ if [ "$1"x = ""x ];then
+ #echo $func': no keyword input'
+ exit
+ fi
+ local keyword=$1
+ local res=`cat $omap_list| grep $keyword`
+ if [ "$res"x = ""x ];then
+ #echo "$func: map_header_key = $keyword not exists"
+ exit
+ fi
+ echo $res|awk -F ":" '{print $2}'
+}
+
+function get_header_seq()
+{
+ local func="get_header_seq"
+ if [ "$1"x == ""x ];then
+ #echo "$func: no prefix input"
+ exit;
+ elif [ "$2"x == ""x ];then
+ #echo "$func: no key input"
+ exit;
+ fi
+ local prefix=$1;
+ local key=$2;
+ local res=/tmp/header_seq.$$$$
+
+ ceph-kvstore-tool $omap_path get $prefix $key 2>/dev/null 1>$res
+ if [ $? != 0 ]; then
+ #echo "$func: <$prefix , $key> not exists" ;
+ exit;
+ fi
+
+ # ceph-kvstore-tool get result like this:
+ # 02 01 7e 00 00 00 12 44 00 00 00 00 00 00 00 00
+ # get header seq bytes:
+ # 12 44 00 00 00 00 00 00
+ # -> 00 00 00 00 00 00 44 12
+ # echo $((16#0000000000004412)) -> 17426 == header_seq
+ local seq=`cat $res |head -n 2|tail -n 1| \
+ awk '
+ BEGIN {
+ FS=":"
+ seq="";
+ i=7;
+ } {
+ split($2, arr, " ")
+ # header_seq uint64 : 8 bytes
+ for (x=7; x>=0; --x) {
+ seq=seq""arr[i+x];
+ }
+ }
+ END {
+ print seq
+ }'`
+ if [ "$seq"x = ""x ];then
+ #echo "$func: get <$prefix , $key> failed"
+ exit;
+ fi
+ rm -f $res
+ echo $((16#$seq))
+}
+
+# get header info key/value
+function get_header_kv()
+{
+ local func="get_header_kv"
+ if [ "$1"x = ""x ];then
+ #echo "$func: no prefix input"
+ exit
+ elif [ "$2"x = ""x ];then
+ #echo "$func: no key input"
+ exit
+ elif [ "$3"x != "string"x ] && [ "$3"x != "int"x ];then
+ #echo "$func: no valid type input, use type (string|int)"
+ exit
+ fi
+
+ local prefix=$1
+ local key=$2
+ local types=$3
+ local res=/tmp/kv.$$$$
+
+ ceph-kvstore-tool $omap_path get $prefix $key 2>/dev/null 1>$res
+ if [ $? != 0 ];then
+ #echo "$func: <$prefix , $key> not exists"
+ exit
+ fi
+
+ if [ "$types"x = "string"x ];then
+ local value=`cat $res |tail -n +2|head -n -1|awk -F ": " '{printf $3}'|sed -n 's/^\.\{4\}//p'`
+ echo $value
+ elif [ "$types"x = "int"x ];then
+ local value=`cat $res |tail -n +2|head -n -1| \
+ awk '
+ BEGIN{
+ FS=":"
+ } {
+ split($2, arr, " ");
+ len=length(arr)
+ for (i=len; i>0; --i) {
+ printf arr[i];
+ }
+ }'`
+ echo $((16#$value))
+ fi
+ rm -f $res
+}
diff --git a/src/tools/rbd_recover_tool/config/mds_host b/src/tools/rbd_recover_tool/config/mds_host
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/src/tools/rbd_recover_tool/config/mds_host
diff --git a/src/tools/rbd_recover_tool/config/mon_host b/src/tools/rbd_recover_tool/config/mon_host
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/src/tools/rbd_recover_tool/config/mon_host
diff --git a/src/tools/rbd_recover_tool/config/osd_host_path b/src/tools/rbd_recover_tool/config/osd_host_path
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/src/tools/rbd_recover_tool/config/osd_host_path
diff --git a/src/tools/rbd_recover_tool/database_h b/src/tools/rbd_recover_tool/database_h
new file mode 100644
index 000000000..4ff20425a
--- /dev/null
+++ b/src/tools/rbd_recover_tool/database_h
@@ -0,0 +1,1134 @@
+#!/usr/bin/env bash
+# file: database_h
+#
+# Copyright (C) 2015 Ubuntu Kylin
+#
+# Author: Min Chen <minchen@ubuntukylin.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+
+my_dir=$(dirname "$0")
+
+. $my_dir/common_h
+. $my_dir/metadata_h
+. $my_dir/epoch_h
+
+db_image_prefix=
+db_image_size=
+db_order=
+db_snap_id=
+db_snap_image_size=
+found=0
+
+#init osd_data and get all objects path
+function gen_database()
+{
+ local func="gen_database"
+ rm -rf $database/*
+ rm -rf $images
+ rm -rf $raw
+ mkdir -p $database
+ local host=
+ local data_path=
+
+ trap 'echo $func failed; exit;' INT HUP
+ while read line
+ do
+ {
+ host=`echo $line|awk '{print $1}'`
+ data_path=`echo $line|awk '{print $2}'`
+ if [ "$host"x = ""x ] || [ "$data_path"x = ""x ];then
+ continue
+ fi
+ local cmds="find $data_path/current -type f"
+ ssh $ssh_option $host $cmds > $database/$host
+ } &
+ done < $osd_host_path
+ wait
+ echo "$func: finish"
+}
+
+# collect hobjects from database
+# and choose the object whose epoch is latest
+# then, sort the objects by their offsets in image
+function gather_hobject_common()
+{
+ func="gather_hobject_common"
+
+ trap 'echo $func failed; exit;' INT HUP
+ if [ $# -lt 2 ];then
+ echo "$func: parameters: <pool_id> <image_prefix> [<snap_id>]"
+ exit
+ fi
+
+ local pool_id=$1
+ local image_prefix=$2
+ pool_id=$(($pool_id))
+ local hex_pool_id=`printf "%x" $pool_id`
+ # NOSNAP = uint64(-2)
+ local snap_id=`printf "%u" -2`
+ local hex_snap_id="head"
+ local psuffix=
+ local fsuffix="_head"
+ if [ $# = 3 ];then
+ snap_id=$(($3))
+ hex_snap_id=`printf "%x" $snap_id`
+ psuffix="_"$snap_id
+ fsuffix="_"$snap_id
+ fi
+ local underline_image_prefix=`convert_underline $image_prefix`
+ local dump_image_prefix=`dump_backslash $underline_image_prefix`
+ local ddump_image_prefix=`dump_dump_backslash $underline_image_prefix`
+ local images_raw_dir=$rbd_image/raw
+ local image_hobjects_dir=$images/pool_$pool_id/$image_prefix
+ # $images/raw/$image_prefix"_head"
+ local image_hobjects_raw=$images_raw_dir/$image_prefix"$fsuffix"
+ # $images/$image_prefix/$image_prefix"_head"
+ local image_hobjects_stable=$image_hobjects_dir/$image_prefix"$fsuffix"
+
+ if [ ! -e $images_raw_dir ];then
+ mkdir -p $images_raw_dir
+ fi
+ if [ ! -e $image_hobjects_dir ];then
+ local image_metadata=$images_meta/$image_name_in
+ mkdir -p $image_hobjects_dir
+ fi
+
+ pushd $database >/dev/null
+ local pattern="\.[0-9a-f]+__"$hex_snap_id"_[0-9A-F]{8}__"$hex_pool_id
+ >$image_hobjects_raw
+ grep -r -E $dump_image_prefix""$pattern * >$image_hobjects_raw
+ if [ ! -s $image_hobjects_raw ];then
+ echo "$func: image snap [ $image_prefix"$psuffix" ] is empty"
+ return 1 #no data available
+ fi
+ popd >/dev/null
+
+ local offset_dir_temp=$images_raw_dir/$image_prefix"$fsuffix""_dir_temp"
+ rm -rf $offset_dir_temp
+ mkdir -p $offset_dir_temp
+
+ echo "gather hobjects from database: snapid=$snap_id ..."
+
+ # format: ceph2:/var/lib/ceph/osd/ceph-1/current/2.d3_head/rb.0.1293.6b8b4567.000000000002__head_FB425CD3__2
+ local tmp_image=$offset_dir_temp/tmpimage.$$$$
+ >$tmp_image
+ cat $image_hobjects_raw |
+ awk -F ':' '
+ BEGIN {
+ pg_coll="'$pg_coll'"
+ tmp_image="'$tmp_image'"
+ osd_host_mapping="'$osd_host_mapping'"
+ snapid="'$snap_id'"
+ }{
+ # $2 = /var/lib/ceph/osd/ceph-1/current/2.d3_head/rb.0.1293.6b8b4567.000000000002__head_FB425CD3__2
+
+ split($2, arr1, "/current/"); # {/var/lib/ceph/osd/ceph-1/, 2.d3_head/rb.0.1293.6b8b4567.000000000002__head_FB425CD3__2}
+ split(arr1[2], arr2, "/"); # {2.d3_head, rb.0.1293.6b8b4567.000000000002__head_FB425CD3__2}
+ split(arr2[1], arr3, "_head"); # {2.d3,}
+
+ hobject=$2;
+ data_path=arr1[1];
+ gsub(/\\u/, "\\\\\\\\u", hobject); # dump backslash to delay escape (\ -> \\)
+ "awk \"\\$1 == \\\""$1"\\\" {print \\$2}\" "osd_host_mapping" | head -n 1" | getline node
+ pgid = arr3[1];
+
+ len=length(arr2);
+ offset_hobject=arr2[len] # rb.0.1293.6b8b4567.000000000002__head_FB425CD3__2
+ split(offset_hobject, offarr1, "."); # {rb, 0, 1293, 6b8b4567, 000000000002__head_FB425CD3__2}
+ len1=length(offarr1)
+ offset_p=offarr1[len1] # 000000000002__head_FB425CD3__2
+ split(offset_p, offarr2, "__"); # {000000000002, head_FB425CD3, 2}
+ offset=offarr2[1]; # 000000000002
+
+ system("echo -n \""node" "pgid" "hobject" "offset" "snapid" \" >>"tmp_image);
+ #system("echo -n \""node" "pgid" "hobject" "offset" "snapid" \"");
+ #print node" "pgid" "hobject" "offset" "snapid
+
+ # find pg_epoch from pg_coll database
+ system("awk \"\\$1 == \\\""node"\\\" && \\$2 == \\\""pgid"\\\" && \\$4 == \\\""data_path"\\\" {print \\$3}\" "pg_coll" >>"tmp_image);
+ #system("awk \"\\$1 == \\\""node"\\\" && \\$2 == \\\""pgid"\\\" && \\$4 == \\\""data_path"\\\" {print \\$3}\" "pg_coll);
+ }'
+
+ local sort_image=$offset_dir_temp/sortimage.$$$$
+ >$sort_image
+ sort -t ' ' -k 4.1,4 -k 6.1nr -k 1.1,1 $tmp_image >$sort_image
+ sort -t ' ' -k 4.1,4 -u $sort_image > $image_hobjects_stable
+
+ #rm -rf $offset_dir_temp
+ return 0
+}
+
+function gather_hobject_nosnap()
+{
+ gather_hobject_common $1 $2
+}
+
+function gather_hobject_snap()
+{
+ gather_hobject_common $1 $2 $3
+}
+
+# select the max pg_epoch item of the same $field
+# if no same $field, choose the first
+# format : "node $field pg_epoch"
+function choose_epoch()
+{
+ cat $1|sort -t ' ' -k 3.1,3nr -k 2.1,2n |head -n 1;
+}
+
+# lookup image info , after scatter_node_jobs & gather_node_infos
+function lookup_image()
+{
+ local func="lookup_image"
+ if [ $# -lt 2 ];then
+ echo "$func: parameters error <pool_id> <image_name> [<snap_name>]"
+ fi
+ local pool_id=$1
+ local image_name=$2
+ local snap_name=$3
+ pool_id=$((pool_id))
+ echo -e "$func: pool_id = $pool_id\timage_name = $image_name\tsnap_name = $snap_name"
+ if [ $pool_id -lt 0 ];then
+ echo "$func: pool_id must great than zero"
+ exit
+ fi
+ local hex_pool_id=`printf "%x" $pool_id`
+ input_image $image_name
+ local node=
+ local item=/tmp/item.$$$$
+ local img_name=`dump_backslash $image_name`
+
+ local image_format=0
+ local image_id_hobject=
+ local image_header_hobject=
+ local result=/tmp/tmp_result.$$$$
+ local res1=/tmp/tmp_res1.$$$$
+ local res2=/tmp/tmp_res2.$$$$
+ local data_path=
+
+ # image format v1
+ {
+ cat $image_coll_v1|grep -E "/$img_name\.rbd__head_[0-9A-F]{8}__$hex_pool_id" >$res1
+ if [ -s $res1 ];then
+ echo -n "$func: rbd_header_hobject = "
+ choose_epoch $res1| tee $item
+ #choose_epoch $res1 > $item
+
+ if [ -e $item ];then
+ node=`cat $item|awk '{print $1}'`
+ image_header_hobject=`cat $item|awk '{print $2}'`
+ if [ "$node"x = ""x ];then
+ echo "$func: v1 node is NULL"
+ exit
+ fi
+ if [ "$image_header_hobject"x = ""x ];then
+ echo "$func: v1 image_header_hobject is NULL"
+ exit
+ fi
+ rm -f $item
+ fi
+
+ image_format=1
+ echo -e "image_name:\t$image_name_in"
+ echo -e "image_format:\t$image_format"
+ data_path=`echo $image_header_hobject|awk -F "/current" '{print $1}'`
+
+ >$result
+ cmds="bash $job_path/osd_job do_image_metadata_v1 $data_path `dump_backslash $image_header_hobject` $snap_name"
+ ssh $ssh_option $node $cmds | tee $result
+ fi
+ }
+
+ # image format v2
+ {
+ cat $image_coll_v2|grep -E "/rbd\\\\uid\."$img_name"__head_[0-9A-F]{8}__$hex_pool_id" >$res2
+ if [ -s $res2 ];then
+ echo -n "$func: rbd_id_hobject = "
+ choose_epoch $res2 | tee $item
+ #choose_epoch $res2 > $item
+
+ if [ -e $item ];then
+ node=`cat $item|awk '{print $1}'`
+ image_id_hobject=`cat $item|awk '{print $2}'`
+ if [ "$node"x = ""x ];then
+ echo "$func: v2 node is NULL(to get image_id_hobject)"
+ exit
+ fi
+ if [ "$image_id_hobject"x = ""x ];then
+ echo "$func: v2 image_id_hobject is NULL"
+ exit
+ fi
+ rm -f $item
+ fi
+
+ check_osd_process $node
+ image_format=2
+
+ local tid=/tmp/image_id.$$$$
+ data_path=`echo $image_id_hobject|awk -F "/current" '{print $1}'`
+ >$tid
+ cmds="bash $job_path/osd_job do_image_id $data_path `dump_backslash $image_id_hobject`"
+ ssh $ssh_option $node $cmds > $tid
+
+ local image_id=`cat $tid`
+ rm -f $tid
+
+ #get image_header_hobject
+ pushd $database >/dev/null
+ local pattern="header\."$image_id"__head_[0-9A-F]{8}__$hex_pool_id"
+ local tcoll=/tmp/tmp_image_head_coll.$$$$
+
+ # hostname(by command hostname) in $pg_coll maybe different from hostname in tcoll(input by user)
+ # t_host: hostname read from config file ($tcoll)
+ # t_host_remote: $(hostname) on osd node ($pg_coll)
+ grep -r -E $pattern * >$tcoll
+ popd >/dev/null
+
+ local t_host=(`cat $tcoll|awk -F ":" '{print $1}'`)
+ local t_pgid=(`cat $tcoll|awk -F ":" '{print $2}'|sed -n 's/.*\/\([0-9a-fA-F]\+\.[0-9a-fA-F]\+\)_head\/.*/\1/p'`)
+ local t_hobject=(`cat $tcoll|awk -F ":" '{print $2}'`)
+ local t_data_path=(`cat $tcoll|awk -F ":" '{split($2, arr, "/current/"); print arr[1];}'`)
+ rm -f $tcoll
+ declare -a t_host_remote
+
+ #if there is no failed pg migration, number of t_host is replica num
+ #replica num : 3, 4, 5 ...
+ local t_hostname=/tmp/t_hostname.$$$$
+ for ((i=0; i<${#t_host[*]}; i++))
+ do
+ ssh $ssh_option ${t_host[$i]} "hostname" >$t_hostname
+ if [ $? != 0 ];then
+ echo "$func: ${t_host[$i]} get host_remote failed"
+ exit
+ fi
+ t_host_remote[$i]=`cat $t_hostname`
+ done
+ rm -f $t_hostname
+
+ local t_item=/tmp/tmp_item.$$$$
+ local tmp_item=/tmp/tmp_tmp_item.$$$$
+
+ >$tmp_item
+ for ((i=0; i<${#t_host_remote[*]}; i++ ))
+ do
+ local node=${t_host_remote[$i]}
+ local pgid=${t_pgid[$i]}
+ awk '$1 == "'"$node"'" && $2 == "'"$pgid"'" {print}' $pg_coll >>$tmp_item
+ done
+
+ # t_item: <remote_hostname> <pgid> <epoch> <data_path>
+ sort -u $tmp_item >$t_item
+ rm -f $tmp_item
+
+ local entry=`choose_epoch $t_item` #t_host_remote
+ rm -f $t_item
+
+ node=`echo $entry|awk '{print $1}'`
+ data_path=`echo $entry|awk '{print $4}'`
+ if [ "$node"x = ""x ];then
+ echo "$func: v2 node is NULL (to get image_header_hobject)"
+ exit
+ fi
+
+ for ((i=0; i<${#t_host_remote[*]}; i++))
+ do
+ if [ "${t_host_remote[$i]}"x = "$node"x ] && [ "${t_data_path[$i]}"x = "$data_path"x ];then
+ image_header_hobject=${t_hobject[$i]}
+ break
+ fi
+ done
+
+ if [ "$image_id_hobject"x = ""x ];then
+ echo "$func: v2 image_header_hobject is NULL"
+ exit
+ fi
+
+ check_osd_process $node
+
+ echo "$func: rbd_header_hobject = $node $image_header_hobject"
+ echo -e "image_name:\t$image_name_in"
+ echo -e "image_format:\t$image_format"
+
+ #data_path=`echo $image_header_hobject|awk -F "/current" '{print $1}'`
+ >$result
+ cmds="bash $job_path/osd_job do_image_metadata_v2 $data_path $image_id `dump_backslash $image_header_hobject` $snap_name"
+ ssh $ssh_option $node $cmds | tee $result
+ fi
+ }
+
+ if [ ! -s $result ];then
+ echo "$func: $image_name_in not exists"
+ exit
+ fi
+
+ # to assign value to global variable
+ db_image_prefix=`cat $result|awk '/^(object_prefix|block_name):/{print $2}'`
+ if [ "$db_image_prefix"x = ""x ];then
+ echo "$func: image_prefix is NULL"
+ exit
+ fi
+
+ db_image_size=`cat $result|awk '/^image_size:/{print $2}'`
+ db_order=`cat $result|awk '/^order:/{print $2}'`
+ if [ "$snap_name"x != ""x ];then
+ db_snap_id=`cat $result|awk '/^snapshot:/{print $2}'`
+ if [ "$db_snap_id"x = ""x ];then
+ echo "$func: $image_name_in@$snap_name NOT EXISTS"
+ exit
+ fi
+ db_snap_image_size=`cat $result|awk '/^snapshot:/{print $4}'`
+ else
+ #save snaplist
+ local image_snaplist=$images/pool_$pool_id/$image_name_in/@snaplist
+ local image_dir=$images/pool_$pool_id/$image_name_in
+ if [ ! -e $image_dir ];then
+ mkdir -p $image_dir
+ fi
+ cat $result|awk '/^snapshot:/{print $2" "$3" "$4}' >$image_snaplist
+ fi
+ found=1
+ rm -f $result
+}
+
+function list_images()
+{
+ echo "=============== format =============="
+ echo "format: <pool_id>/<image_name>"
+ echo "================ v1: ================"
+ #sed -n 's/\(.*\)\/\(.*\)\.rbd__\(.*\)/\2/p' $image_coll_v1|sort -u|sed -e 's/\\u/_/g'
+ sed -n 's/.*\/\(.*\)\.rbd__head_[0-9A-F]\{8\}__\([0-9a-f]\+\).*/\2 \1/p' $image_coll_v1|sort -u|awk '{print strtonum("0x"$1)"/"$2;}'|sed -e 's/\\u/_/g'
+ echo "================ v2: ================"
+ #sed -n 's/\(.*\)\/rbd\\uid.\(.*\)__\(head.*\)/\2/p' $image_coll_v2|sort -u|sed 's/\\u/_/g'
+ sed -n 's/.*\/rbd\\uid.\(.*\)__head_[0-9A-F]\{8\}__\([0-9a-f]\+\).*/\2 \1/p' $image_coll_v2|sort -u|awk '{print strtonum("0x"$1)"/"$2}'|sed 's/\\u/_/g'
+}
+
+# lookup image metadata
+# and
+# collect hobjects of image with the latest pg epoch
+function discover_image_nosnap()
+{
+ local func="discover_image_nosnap"
+ echo "$func ..."
+ local pool_id=$1
+ local image_name=$2
+ pool_id=$(($pool_id))
+ lookup_image $pool_id $image_name # assign $image_prefix
+ gather_hobject_nosnap $pool_id $db_image_prefix
+ if [ $? -ne 0 ];then
+ exit
+ fi
+ local image_hobjects_stable_nosnap=$images/pool_$pool_id/$db_image_prefix/$db_image_prefix"_head"
+ local image_hobjects_dir=$images/pool_$pool_id/$image_name_in
+ if [ ! -e $image_hobjects_dir ];then
+ mkdir -p $image_hobjects_dir
+ fi
+ # mv image_prefix to image_name
+ mv $image_hobjects_stable_nosnap $image_hobjects_dir/$image_name_in
+ rm -rf $images/pool_$pool_id/$db_image_prefix
+}
+
+# get the offset snapid object
+# if there is no object, choose the smallest snapid which is greater than current snapid
+function get_object_clone()
+{
+ local func="get_object_clone"
+ if [ $# -lt 4 ];then
+ exit
+ fi
+
+ local object_offset_string=$1
+ local snapid=$2
+ local snaplist_path=$3
+ local snapset_output_dir=$4
+
+ # snapid in desc
+ local snap_coll_arr=(`
+ cat $snaplist_path|awk '{ if ($1 >= '"$snapid"') print "'"$snapset_output_dir"'/@"$1}'`)
+
+ local hex_snapid=`printf "%x" $snapid`
+ pushd $snapset_output_dir >/dev/null
+ # get object with the smallest snapid greater than current snapid
+ awk '$4 == "'"$object_offset_string"'" && $5 >= '$snapid' {print}' `echo ${snap_coll_arr[@]}` |tail -n 1
+ popd >/dev/null
+}
+
+# gather hobject for each snapid
+function gen_snapset_hobject()
+{
+ local func="gen_image_snapset"
+ echo "$func ..."
+ if [ $# -lt 4 ];then
+ echo "$func: parameters: <pool_id> <image_prefix> <snaplist_path> <snapset_output_dir>"
+ exit
+ fi
+ local pool_id=$1
+ local image_prefix=$2
+ local snaplist_path=$3
+ local snapset_output_dir=$4
+ pool_id=$(($pool_id))
+ OIFS=$IFS
+ IFS=$'\n'
+ local snaparr=(`cat $snaplist_path`)
+ # gather hobject for each snapshot
+ trap 'echo $func failed; exit;' INT HUP
+ for line in ${snaparr[@]}
+ do
+ OOIFS=$IFS
+ IFS=$' '
+ local field=(`echo $line`)
+ local snapid=${field[0]}
+ local image_hobjects_stable_snap=$images/pool_$pool_id/$image_prefix/$image_prefix"_"$snapid
+ local image_snap=$snapset_output_dir/@$snapid
+ gather_hobject_snap $pool_id $image_prefix $snapid
+ local res=$?
+ if [ $res -ne 0 ];then
+ touch $image_snap
+ else
+ mv $image_hobjects_stable_snap $image_snap
+ fi
+ IFS=$OOIFS
+ done
+ IFS=$OIFS
+}
+
+# lookup image metadata and get snapid hobjects
+function discover_image_snap()
+{
+ local func="discover_image_snap"
+ echo "$func ..."
+ if [ $# -lt 3 ];then
+ echo "$func: parameters: <pool_id> <image_name> [<snap_name>]"
+ exit
+ fi
+ local pool_id=$1
+ local image_name=$2
+ local snap_name=$3
+ pool_id=$(($pool_id))
+ #mkdir -p $images/$image_prefix
+ lookup_image $pool_id $image_name $snap_name # input image_name and snap_name to lookup metadata and snap_id
+ if [ "$db_snap_id"x = ""x ];then
+ echo "$func: lookup image failed to gen snapid"
+ exit
+ fi
+ local image_hobjects_dir_prefix=$images/pool_$pool_id/$db_image_prefix
+ local image_nosnap=$images/pool_$pool_id/$image_name_in
+ #check if image nosnap recovered
+ if [ ! -s $image_nosnap ];then
+ echo "$func: please recover image nosnap before recover with snap"
+ rm -rf $image_hobjects_dir_prefix
+ exit
+ fi
+ local image_hobject_dir=$images/pool_$pool_id/$image_name_in
+ local image_snap_hobject=$image_hobject_dir/$image_name_in@$db_snap_id
+ local image_snap_hobject_head=$image_hobject_dir/$image_name_in@$db_snap_id@head
+ local image_snaplist=$image_hobject_dir/@snaplist
+ local image_snapset_dir=$image_hobject_dir/@snapset_dir
+ local image_head=$image_hobject_dir/$image_name_in
+ if [ ! -e $image_hobject_dir ];then
+ mkdir -p $image_hobject_dir
+ fi
+ # only gen snapset one time
+ if [ ! -e $image_snapset_dir ];then
+ mkdir -p $image_snapset_dir
+ gen_snapset_hobject $pool_id $db_image_prefix $image_snaplist $image_snapset_dir
+
+ fi
+
+ echo "$func: will get object clone ..."
+ >$image_snap_hobject
+ >$image_snap_hobject_head
+
+ trap 'echo $func failed; exit;' INT HUP
+ # get each offset 's snapid hobject
+ while read line
+ do
+ #echo $line
+ OOIFS=$IFS
+ IFS=$' '
+ local field=(`echo $line`)
+ local offset_string=${field[3]}
+ IFS=$OOIFS
+ local entry=`get_object_clone $offset_string $db_snap_id $image_snaplist $image_snapset_dir`
+ if [ "$entry"x != ""x ];then
+ echo $entry >> $image_snap_hobject
+ echo `dump_backslash $line` >> $image_snap_hobject_head
+ fi
+ done < $image_head
+ rm -rf $image_hobjects_dir_prefix
+}
+
+# after discover_image_nosnap
+# collect objects from osds one by one in sequence
+function copy_image_nosnap_single_thread()
+{
+ local func="copy_image_nosnap_single_thread"
+ echo "$func ..."
+ if [ $# -lt 3 ];then
+ echo "$func: parameters: <pool_id> <image_hobjects> <backup_dir>"
+ exit
+ fi
+ local pool_id=$1
+ local image_hobjects=$2
+ local backup_dir=$3
+ pool_id=$(($pool_id))
+
+ # make sure lookup_image first
+ if [ $found = 0 ];then
+ echo "$func: image not found, maybe forget to discover_image"
+ exit
+ fi
+ if [ ! -e $backup_dir ];then
+ mkdir -p $backup_dir
+ fi
+
+ local image_dir=$backup_dir/pool_$pool_id/$image_name_in
+ local image_file=$image_dir/$image_name_in
+ local CURRENT=$image_dir/@CURRENT
+ local LOCK=$image_dir/@LOCK
+ if [ ! -e $image_dir ];then
+ mkdir -p $image_dir
+ fi
+ if [ -e $LOCK ];then
+ echo "$func: $LOCK is locked by other process"
+ exit
+ else
+ touch $LOCK
+ fi
+
+ >$image_file
+ truncate -s $db_image_size $image_file
+ echo "head">$CURRENT
+
+ local count=$(($db_image_size >> $db_order))
+ local start=`cat $image_hobjects|head -n 1|awk '{print $4}'`
+ local end=`cat $image_hobjects|tail -n 1|awk '{print $4}'`
+ local entry_count=`cat $image_hobjects|wc -l`
+
+ local char_bits=$((`echo $start|wc -c` -1 ))
+ local format="%0"$char_bits"x"
+
+ local expect_start=`printf $format 0`
+ local expect_end=`printf $format $(($count -1 ))`
+
+ echo -e "object_count\t$entry_count"
+ echo -e "expect\t\t[$expect_start ~ $expect_end] count:$count"
+ echo -e "range\t\t[$start ~ $end] count:$entry_count"
+
+ local icount=0
+ local istart=
+ local iend=
+ local percent=
+
+ trap 'echo $func failed; exit;' INT HUP
+ local unit=$((1<<$db_order))
+ while read line
+ do
+ {
+ icount=$(($icount+1))
+ node=`echo $line|awk '{print $1}'`
+ hobject=`echo $line|awk '{print $3}'`
+ offset=`echo $line|awk '{print $4}'`
+ off=$((16#$offset))
+ if [ $icount = 1 ];then
+ istart=$offset
+ fi
+ hobject=`dump_backslash $hobject`
+ iend=$offset
+ sshcmd="cat $hobject"
+ ssh $ssh_option $node $sshcmd < /dev/null | dd of=$image_file bs=$unit seek=$off conv=notrunc 2>/dev/null
+ percent=`echo "scale=3; 100*$icount/$entry_count"|bc`
+ tput sc #record current cursor
+ echo -n -e "complete\t[$istart ~ $iend] $icount/$entry_count ==> "$percent"%"
+ if [ $icount != $entry_count ];then
+ tput rc # backport most recent cursor
+ fi
+ }
+ done < $image_hobjects
+
+ echo
+ echo -n "size: "
+ ls -lh $image_file|awk '{print $5"\t"$9}'
+ echo -n "du: "
+ du -h $image_file
+ #unlock
+ rm -f $LOCK
+}
+
+
+# ssh copy snap_object & head_object from osd to admin node
+# copy all snapshot objects
+# and
+# all head objects which have the same offset as snapshot objects
+function collect_image_snap_objects()
+{
+ local func="collect_image_snap_objects"
+ #$1=backup_dir, $2=snap_name, $3=snap_hobjects, $4=head_hobjects
+ if [ $# -lt 6 ];then
+ echo "$func: parameters: <pool_id> <image_name> <snap_id> <snap_hobjects> <head_hobjects> <backup_dir>"
+ exit
+ fi
+
+ local pool_id=$1
+ local image_name=$2
+ local snap_id=$3
+ local snap_hobjects=$4 #snap hobjects info
+ local head_hobjects=$5 #head hobjects info
+ local backup_dir=$6
+ pool_id=$(($pool_id))
+
+ local head_dir=$backup_dir/pool_$pool_id/$image_name/@head
+ local snap_dir=$backup_dir/pool_$pool_id/$image_name/@$snap_id
+ local CURRENT=$backup_dir/pool_$pool_id/$image_name/@CURRENT
+
+ if [ ! -e $head_dir ];then
+ mkdir -p $head_dir
+ fi
+ if [ ! -e $snap_dir ];then
+ mkdir -p $snap_dir
+ fi
+
+ local snap_node= #osd node
+ local snap_hobject= #hobject path with snapid on osd
+ local snap_offset=
+ local snap_filename=
+
+ local head_node=
+ local head_hobject=
+ local head_offset=
+ local head_filename=
+
+ # ignore if there is no object in snapshot(empty )
+ if [ ! -s $snap_hobjects ];then
+ echo "$func: $snap_hobjects is empty"
+ return 0
+ fi
+ local start=`head -n 1 $snap_hobjects|awk '{print $4}'`
+ local end=`tail -n 1 $snap_hobjects|awk '{print $4}'`
+ local entry_count=`cat $snap_hobjects|wc -l`
+ if [ $((16#$first_offset)) -gt $((16#$last_offset)) ];then
+ echo "$func: $snap_hobjects not sorted"
+ return 1
+ fi
+
+ # just assert if ignored empty snapshot
+ if [ "$start"x = ""x ] || [ "$end"x = ""x ];then
+ return 1
+ fi
+
+ # speed up copy snapshot
+ # lookup the corresponding head hobject of snap hobject
+ # use command: grep <offset> <head hobjects>
+ #
+ # eg.
+ # head hobjects: (32 objects, snapid = uint64(-2) = 18446744073709551614)
+ # ceph1 29.4d /var/lib/ceph/osd/ceph-0/current/29.4d_head/rb.0.1c414.6b8b4567.000000000000__head_EC2C1C4D__1d 000000000000 18446744073709551614 869
+ # ceph1 29.8c /var/lib/ceph/osd/ceph-0/current/29.8c_head/rb.0.1c414.6b8b4567.000000000001__head_0F439A8C__1d 000000000001 18446744073709551614 867
+ # ceph1 29.6a /var/lib/ceph/osd/ceph-0/current/29.6a_head/rb.0.1c414.6b8b4567.000000000002__head_FC55706A__1d 000000000002 18446744073709551614 869
+ # ceph1 29.8b /var/lib/ceph/osd/ceph-0/current/29.8b_head/rb.0.1c414.6b8b4567.000000000003__head_20A6328B__1d 000000000003 18446744073709551614 869
+ # ceph2 29.75 /var/lib/ceph/osd/ceph-1/current/29.75_head/rb.0.1c414.6b8b4567.000000000004__head_AC5ADB75__1d 000000000004 18446744073709551614 867
+ # ceph2 29.23 /var/lib/ceph/osd/ceph-1/current/29.23_head/rb.0.1c414.6b8b4567.000000000005__head_1FDEA823__1d 000000000005 18446744073709551614 867
+ # ......
+ # ceph1 29.34 /var/lib/ceph/osd/ceph-0/current/29.34_head/rb.0.1c414.6b8b4567.00000000001f__head_52373734__1d 00000000001f 18446744073709551614 869
+ #
+ # snap hobjects: (3 objects, snapid >= 29)
+ # ceph1 29.8c /var/lib/ceph/osd/ceph-0/current/29.8c_head/rb.0.1c414.6b8b4567.000000000001__1f_0F439A8C__1d 000000000001 31 867
+ # ceph1 29.6a /var/lib/ceph/osd/ceph-0/current/29.6a_head/rb.0.1c414.6b8b4567.000000000002__1e_FC55706A__1d 000000000002 30 869
+ # ceph1 29.8b /var/lib/ceph/osd/ceph-0/current/29.8b_head/rb.0.1c414.6b8b4567.000000000003__1d_20A6328B__1d 000000000003 29 869
+ #
+ # so find out offset in head hobjects line number:
+ # snap hobjects: 000000000001 ---> head hobjects: 2 (n1)
+ # snap hobjects: 000000000003 ---> head hobjects: 4 (n2)
+ #
+ # finally , grep range from the whole file [1 ~ N] shranked to part of file [n1 ~ n2]
+ # the worst case : [n1 ~ n2] = [1 ~ N], means no shranking
+
+ # get the line number of the start offset in head hobjects
+ local n1=`grep -n $start $head_hobjects|head -n 1|cut -d ":" -f 1`
+ # get the line number of the end offset in head hobjects
+ local n2=`grep -n $end $head_hobjects|head -n 1|cut -d ":" -f 1`
+
+ local icount=0
+ local istart=
+ local iend=
+ local percent=
+
+ OIFS=$IFS
+ IFS=$'\n'
+
+ #assume file:snap_hobjects is not very large, and can be loaded into memory
+ local snap_arr=(`cat $snap_hobjects`)
+ local snap_tmp=/tmp/snaptmp.$$$$
+
+ # snap_tmp:
+ # consists of snap hobject or head hobject
+ # select lineno range: [n1 ~ n2]
+ head -n $n2 $head_hobjects|tail -n $(($n2-$n1+1)) >$snap_tmp
+
+ echo "copy image snap/head objects from osd ..."
+ echo -e "object_count\t$entry_count"
+ echo -e "range\t\t[$start ~ $end] count:$entry_count"
+
+ trap 'echo $func failed; exit;' INT HUP
+ for line in ${snap_arr[*]}
+ do
+ icount=$(($icount+1))
+
+ OOIFS=$IFS
+ IFS=$' '
+
+ local arr=(`echo $line`)
+ snap_node=${arr[0]}
+ snap_hobject=${arr[2]}
+ snap_offset=${arr[3]}
+ snap_filename=$snap_dir/$snap_offset
+
+ if [ $icount = 1 ];then
+ istart=$snap_offset
+ fi
+ iend=$snap_offset
+
+ #lookup corresponding head hobject of snap hobject
+ local res=`grep $snap_offset $snap_tmp|head -n 1`
+ if [ "$res"x = ""x ];then
+ echo "$func: image object[ $snap_offset ] missing"
+ exit
+ fi
+
+ local arr2=(`echo $res`)
+ head_node=${arr2[0]}
+ head_hobject=${arr2[2]}
+ head_offset=${arr2[3]}
+ head_filename=$head_dir/$head_offset
+
+ # just copy object(snap/head) if it does not exist
+ if [ ! -e $snap_filename ];then
+ ssh $ssh_option $snap_node "cat $snap_hobject" > $snap_filename
+ fi
+ if [ ! -e $head_filename ];then
+ ssh $ssh_option $head_node "cat $head_hobject" > $head_filename
+ fi
+ IFS=$OOIFS
+
+ percent=`echo "scale=3; 100*$icount/$entry_count"|bc`
+ tput sc #record current cursor
+ echo -n -e "complete\t[$istart ~ $iend] $icount/$entry_count ==> "$percent"%"
+ if [ $icount != $entry_count ];then
+ tput rc # backport most recent cursor
+ fi
+ done
+ echo
+ IFS=$OIFS
+ rm -f $snap_tmp
+ return 0
+}
+
+# copy all snap objects and corresponding head objects from osds
+# in single process
+function copy_image_snap_single_thread()
+{
+ local func="copy_image_snap_single_thread"
+ if [ $# -lt 6 ];then
+ echo "$func: parameters: <pool_id> <image_name> <snap_id> <snap_hobjects> <head_hobjects> <backup_dir>"
+ exit
+ fi
+ local pool_id=$1
+ local image_name=$2
+ local snap_id=$3
+ local snap_hobjects=$4
+ local head_hobjects=$5
+ local backup_dir=$6
+ pool_id=$(($pool_id))
+
+ local CURRENT=$backup_dir/pool_$pool_id/$image_name/@CURRENT
+ local LOCK=$backup_dir/pool_$pool_id/$image_name/@LOCK
+ #lock
+ if [ -e $LOCK ];then
+ echo "$func: $LOCK is locked by other process"
+ exit
+ else
+ touch $LOCK
+ fi
+ collect_image_snap_objects $pool_id $image_name $snap_id $snap_hobjects $head_hobjects $backup_dir
+ #unlock
+ rm -f $LOCK
+}
+
+# after all snap objects and necessary head objects are copied,
+# just pick appropriate head objects and snap objects and write them to image
+# in order to rollback image to snapshot
+#
+# init: image is created by copy_image_nosnap_single_thread firstly
+#
+# all output include 3 parts:
+# <image> <head objects> <snap objects>
+#
+# head objects1 --- snap1 objects
+# head objects2 --- snap2 objects
+# image head objects3 --- snap3 objects
+# ......
+# head objectsN --- snapN objects
+#
+# how to rollback:
+# firstly rollback to head, secondly write <snapX objects>
+# head = <image> + <head objects>
+# snap1 = <image> + <head objects> + <snap1 objects>
+# snap2 = <image> + <head objects> + <snap2 objects>
+# snap3 = <image> + <head objects> + <snap3 objects>
+# ......
+# snapN = <image> + <head objects> + <snapN objects>
+#
+# improve rollback:
+# there is intersection of head objects and snapX objects, if snapX objects are not empty
+# and need to deduplicate the intersection.
+# deduplicate steps:
+# - get difference set of head objects and snapX objects
+# - write the difference set objects to image
+# - write the snapX objects to image
+function rollback_image_snap()
+{
+ local func="rollback_image_snap"
+
+ echo "$func ..."
+
+ trap 'echo $func failed; exit;' INT HUP
+ if [ $# -lt 6 ];then
+ echo "$func: parameters <pool_id> <image_name> <snap_id> <snap_object_dir> <backup_dir> <image_unit>"
+ exit
+ fi
+ local pool_id=$1
+ local image_name=$2
+ local snap_id=$3
+ local snap_object_dir=$4
+ local backup_dir=$5
+ local image_unit=$6
+
+ local need_diff_set=0
+
+ local image_path=$backup_dir/pool_$pool_id/$image_name/$image_name
+ local head_object_dir=$backup_dir/pool_$pool_id/$image_name/@head
+ local CURRENT=$backup_dir/pool_$pool_id/$image_name/@CURRENT
+ local LOCK=$backup_dir/pool_$pool_id/$image_name/@LOCK
+ if [ -e $LOCK ];then
+ echo "$func: $LOCK is locked by other process"
+ exit
+ else
+ touch $LOCK
+ fi
+ if [ $snap_id -ne -2 ];then
+ echo $snap_id > $CURRENT
+ else
+ echo "head" > $CURRENT
+ fi
+
+ if [ ! -e $snap_object_dir ];then
+ return 0
+ fi
+
+ if [ "$snap_object_dir"x != "$head_object_dir"x ];then
+ echo "$func: need to compute diff_set of head"
+ need_diff_set=1
+ else
+ echo "$func: NO diff_set"
+ need_diff_set=0
+ fi
+
+ local entry_count=0
+ local start=
+ local end=
+ local offset=
+ local icount=0
+ local istart=
+ local iend=
+ local percent=
+
+ local snap_objects=
+ local head_objects=
+ local diff_set=
+
+ snap_objects=(`ls $snap_object_dir`)
+
+ # if need to compute difference set of head_objects and snap_objects
+ if [ $need_diff_set -ne 0 ];then
+ head_objects=(`ls $head_object_dir`)
+
+ #get the difference set: ( head_objects - snap_objects )
+ diff_set=(`
+ sort -m <(echo ${head_objects[@]}|xargs -n 1 echo) <(echo ${snap_objects[@]}|xargs -n 1 echo) \
+ <(echo ${snap_objects[@]}|xargs -n 1 echo) |uniq -u`)
+
+ # copy diff_set of head object to image
+ pushd $head_object_dir >/dev/null
+
+ echo "$func: copy diff_set head objects ..."
+ entry_count=${#diff_set[@]}
+ start=${diff_set[0]}
+ end=
+ if [ $entry_count -gt 0 ];then
+ end=${diff_set[$(($entry_count - 1))]}
+ fi
+ offset=
+ icount=0
+ istart=
+ iend=
+ percent=
+
+ echo -e "object_count\t$entry_count"
+ echo -e "range\t\t[$start ~ $end] count:$entry_count"
+
+ for object in ${diff_set[@]}
+ do
+ icount=$(($icount+1))
+ if [ $icount = 1 ];then
+ istart=$object
+ fi
+ iend=$object
+
+ local offset=$((16#$object))
+ dd if=$object of=$image_path bs=$image_unit seek=$offset conv=notrunc 2>/dev/null
+
+ percent=`echo "scale=3; 100*$icount/$entry_count"|bc`
+ tput sc #record current cursor
+ echo -n -e "complete\t[$istart ~ $iend] $icount/$entry_count ==> "$percent"%"
+ if [ $icount != $entry_count ];then
+ tput rc # backport most recent cursor
+ fi
+ done
+ if [ $entry_count -gt 0 ];then
+ echo
+ fi
+ popd >/dev/null
+
+ if [ $snap_id -ne -2 ];then
+ echo -e "$image_name already rollback diff_set: (head - snap)"
+ fi
+ fi
+
+ # copy snap object to image
+ pushd $snap_object_dir >/dev/null
+
+ if [ $need_diff_set -ne 0 ];then
+ echo "$func: copy snap objects ..."
+ else
+ echo "$func: copy head objects ..."
+ fi
+ entry_count=${#snap_objects[@]}
+ start=${snap_objects[0]}
+ end=
+ if [ $entry_count -gt 0 ];then
+ end=${snap_objects[$(($entry_count - 1))]}
+ fi
+ offset=
+ icount=0
+ istart=
+ iend=
+ percent=
+
+ echo -e "object_count\t$entry_count"
+ echo -e "range\t\t[$start ~ $end] count:$entry_count"
+
+ for object in ${snap_objects[@]}
+ do
+ icount=$(($icount+1))
+ if [ $icount = 1 ];then
+ istart=$object
+ fi
+ iend=$object
+
+ local offset=$((16#$object))
+ dd if=$object of=$image_path bs=$image_unit seek=$offset conv=notrunc 2>/dev/null
+
+ percent=`echo "scale=3; 100*$icount/$entry_count"|bc`
+ tput sc #record current cursor
+ echo -n -e "complete\t[$istart ~ $iend] $icount/$entry_count ==> "$percent"%"
+ if [ $icount != $entry_count ];then
+ tput rc # backport most recent cursor
+ fi
+ done
+ if [ $entry_count -gt 0 ];then
+ echo
+ fi
+ popd >/dev/null
+
+ rm -f $LOCK
+ if [ $snap_id -ne -2 ];then
+ echo "$image_name rollback to snapid: $snap_id"
+ else
+ echo "$image_name rollback to head"
+ fi
+}
+
+function recover_image()
+{
+ local func="recover_image"
+ echo "$func ..."
+
+ if [ $# -lt 3 ];then
+ echo "$func: parameters: <pool_id> <image_name> <snap_name> [<backup_dir>]"
+ exit
+ fi
+
+ local pool_id=$1
+ local img_name=$2
+ local snap_name=$3
+ local backup_dir=$4
+ pool_id=$(($pool_id))
+ if [ "$snap_name"x = "@"x ];then
+ snap_name=
+ fi
+ if [ "$backup_dir"x = ""x ];then
+ backup_dir=$default_backup_dir
+ fi
+
+ #recover image with nosnap
+ if [ "$snap_name"x = ""x ];then
+ discover_image_nosnap $pool_id $img_name #input image_name
+ local image_hobjects=$images/pool_$pool_id/$image_name_in/$image_name_in
+ copy_image_nosnap_single_thread $pool_id $image_hobjects $backup_dir
+
+ #recover image with snap
+ else
+
+ # check if recovered head already
+ local img_hobjects_path=$images/pool_$pool_id/$img_name/$img_name
+ local img_file_path=$backup_dir/pool_$pool_id/$img_name/$img_name
+ if [ ! -e $img_hobjects_path ] || [ ! -e $img_file_path ];then
+ echo "$func: $img_name@$snap_name : can not rollback to snapshot, please recover image head first"
+ exit
+ fi
+
+ # rollback to head
+ if [ "$snap_name"x = "@@"x ];then
+ local head_dir=$backup_dir/pool_$pool_id/$img_name/@head
+ if [ -e $head_dir ];then
+ local unit=`pushd $head_dir >/dev/null; ls|head -n 1|xargs -n 1 stat|awk '/Size:/{print $2}'`
+ # rollback to head
+ rollback_image_snap $pool_id $img_name -2 $backup_dir/$img_name/@head $backup_dir $unit
+ echo "$image_name_in head : $backup_dir/$img_name/$img_name"
+ else
+ echo "$func: no need to rollback to head"
+ fi
+ return 0
+ fi
+
+ # rollback to snap
+ discover_image_snap $pool_id $img_name $snap_name # get image meta & get snapid object
+ local snap_hobjects=$images/pool_$pool_id/$image_name_in/$image_name_in@$db_snap_id
+ local head_hobjects=$images/pool_$pool_id/$image_name_in/$image_name_in@$db_snap_id@head
+ local snap_object_dir=$backup_dir/pool_$pool_id/$image_name_in/@$db_snap_id
+ local image_path=$backup_dir/pool_$pool_id/$image_name_in/$image_name_in
+ local image_unit=$((1<<$db_order))
+ copy_image_snap_single_thread $pool_id $image_name_in $db_snap_id $snap_hobjects $head_hobjects $backup_dir
+ rollback_image_snap $pool_id $image_name_in $db_snap_id $snap_object_dir $backup_dir $image_unit
+ echo "$image_name_in@$snap_name : $image_path"
+ fi
+}
diff --git a/src/tools/rbd_recover_tool/epoch_h b/src/tools/rbd_recover_tool/epoch_h
new file mode 100644
index 000000000..e268eafa7
--- /dev/null
+++ b/src/tools/rbd_recover_tool/epoch_h
@@ -0,0 +1,119 @@
+#!/usr/bin/env bash
+# file: epoch_h
+#
+# Copyright (C) 2015 Ubuntu Kylin
+#
+# Author: Min Chen <minchen@ubuntukylin.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+
+my_dir=$(dirname "$0")
+. $my_dir/common_h
+
+#pgid_list=$single_node/$cluster-$id/pgid_list
+function get_pgid_list()
+{
+ find $osd_data/current/ -type d -name "*_head"|\
+ sed -n 's/\(.*\)\/current\/\([0-9a-fA-F]\+\.[0-9a-fA-F]\+\)_head/\2 \1/p'|\
+ sort -t ' ' -k 1.1,1h -k 2.1,2 > $pgid_list;
+}
+
+function get_pgid()
+{
+ hobject_path=$1
+ echo $hobject_path| sed -n 's/\(.*\)\/\([0-9a-fA-F]\+\.[0-9a-fA-F]\+\)_head\(.*\)/\2/p'
+}
+
+infos_seq=
+function get_infos_seq()
+{
+ local func="get_infos_seq"
+
+ local keyword=":infos."
+ local infos_key=`get_map_header_key $keyword`
+
+ if [ "$infos_key"x = ""x ];then
+ echo "$func: keyword not input or infos_key not exists"
+ exit
+ fi
+ local prefix=`get_map_header_prefix`
+ local key=$infos_key
+
+ infos_seq=`get_header_seq $prefix $key`
+ if [ "$infos_seq"x = ""x ];then
+ echo "$func: infos_seq not exists"
+ exit
+ fi
+}
+
+pg_epoch=
+function get_pg_epoch()
+{
+ local func="get_pg_epoch"
+ if [ "$1"x = ""x ];then
+ echo "$func: no pgid input"
+ exit
+ fi
+
+ get_pg_epoch_firefly "$1"
+ if [ "$pg_epoch"x != ""x ]; then
+ # echo "Epoch for $1: $pg_epoch (firefly)"
+ return
+ fi
+
+ get_pg_epoch_hammer "$1"
+ if [ "$pg_epoch"x != ""x ]; then
+ # echo "Epoch for $1: $pg_epoch (hammer)"
+ return
+ fi
+
+ echo "$func: Couldn't find epoch for $1"
+ exit
+}
+
+function get_pg_epoch_firefly()
+{
+ local func="get_pg_epoch_firefly"
+ if [ "$1"x = ""x ];then
+ echo "$func: no pgid input"
+ exit
+ fi
+ local pgid=$1
+ local key=$pgid"_epoch"
+
+ #get_infos_seq;
+ # infos_seq default to 1
+ infos_seq=1
+ local infos_seq=`printf "%016d" $infos_seq`
+ local prefix="_USER_"$infos_seq"_USER_"
+
+ pg_epoch=`get_header_kv $prefix $key int`
+}
+
+function get_pg_epoch_hammer()
+{
+ local func="get_pg_epoch_hammer"
+ if [ "$1"x = ""x ];then
+ echo "$func: no pgid input"
+ exit
+ fi
+ local pgid="$1"
+ local hkey_prefix="$(get_map_header_prefix)"
+ local hkey="$(printf '...head.%x.%08X' "$(echo "$pgid"|cut -d'.' -f1)" "$((0x$(echo "$pgid"|cut -d'.' -f2)))")"
+
+ local infos_seq="$(get_header_seq "$hkey_prefix" "$hkey")"
+ local infos_seq=`printf "%016d" $infos_seq`
+ local prefix="_USER_"$infos_seq"_USER_"
+ local key="_epoch"
+
+ pg_epoch=`get_header_kv $prefix $key int`
+}
diff --git a/src/tools/rbd_recover_tool/metadata_h b/src/tools/rbd_recover_tool/metadata_h
new file mode 100644
index 000000000..b736ceea7
--- /dev/null
+++ b/src/tools/rbd_recover_tool/metadata_h
@@ -0,0 +1,368 @@
+#!/usr/bin/env bash
+# file: metadata_h
+#
+# Copyright (C) 2015 Ubuntu Kylin
+#
+# Author: Min Chen <minchen@ubuntukylin.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+
+my_dir=$(dirname "$0")
+. $my_dir/common_h
+. $my_dir/epoch_h
+
+# put origin name in $image_name_in: for output
+# put convert "_" name in $image_name: for grep image hobjects from database
+image_name_in=
+image_name=
+function input_image()
+{
+ local func="input_image"
+ if [ "$1"x = ""x ];then
+ echo "$func: no image name input"
+ exit
+ fi
+
+ image_name_in=$1
+ # "_" -> "\u"
+ image_name=`convert_underline $image_name_in`
+}
+
+#======================================== distinguish v1 or v2 ===================================
+#image_list_v1=$single_node/$cluster-$id/image_list_v1
+#image_list_v2=$single_node/$cluster-$id/image_list_v2
+function get_image_list()
+{
+ find $osd_data/current/ -type f|grep ".rbd__" >$image_list_v1
+ find $osd_data/current/ -type f|grep "rbd\\\\uid." >$image_list_v2
+}
+
+function get_image_format_by_hobject()
+{
+ local func="get_image_format"
+ if [ "$1"x = ""x ];then
+ exit
+ fi
+ local res1=`cat $image_list_v1|grep $1`
+ if [ "$res1"x != ""x ];then
+ echo 1
+ exit
+ fi
+
+ local res2=`cat $image_list_v2|grep $1`
+ if [ "$res2"x = ""x ];then
+ echo 2
+ exit
+ fi
+}
+
+#======================================== image format v1 ========================================
+# <image_name>.rbd include 3 parts:
+# header + snap_count*snapshot + snap_count*snap_name
+#
+# struct rbd_obj_header_ondisk {
+# 40 char text[40];
+# 24 char block_name[RBD_MAX_BLOCK_NAME_SIZE];
+# 4 char signature[4];
+# 8 char version[8];
+# struct {
+# 1 __u8 order;
+# 1 __u8 crypt_type;
+# 1 __u8 comp_type;
+# 1 __u8 unused;
+# } __attribute__((packed)) options;
+# 8 ceph_le64 image_size;//hexdump -C s=80 n=8
+# 8 ceph_le64 snap_seq; //hexdump -C s=88 n=8
+# 4 ceph_le32 snap_count;//hexdump -C s=96 n=4
+# 4 ceph_le32 reserved;
+# 8 ceph_le64 snap_names_len;//hexdump -C s=104 n=8
+# struct rbd_obj_snap_ondisk snaps[0];
+# } __attribute__((packed));
+#
+# sizeof(rbd_obj_header_ondisk): 112
+#
+# struct rbd_obj_snap_ondisk {
+# 8 ceph_le64 id; //hexdump -C s=112+i*16 n=8 , i=[0, snap_count)
+# 8 ceph_le64 image_size;//hexdump -C s=112+i*16+8 n=8, i=[0, snap_count)
+# } __attribute__((packed));
+# sizeof(rbd_obj_snap_ondisk): 16
+#
+# get snap_names form <image_nane>.rbd
+# hexdump -e '10/1 "%_c"' -s $((112 + $snap_count*16)) -n $snap_names_len <image_name>.rbd
+# then split snap_names into array
+
+function get_image_metadata_v1()
+{
+ local func="get_image_metadata_v1"
+ if [ "$1"x = ""x ];then
+ echo "$func: no image head object input"
+ exit
+ fi
+ local snap_name=
+ if [ "$2"x != ""x ];then
+ snap_name=$2
+ fi
+
+ if [ ! -e $1 ];then
+ echo "$func: $1 not exists"
+ exit
+ fi
+ local hobject_path=$1
+ d_hobject_path=`dump_backslash $1`
+ local image_format=`get_image_format_by_hobject $d_hobject_path`
+ if [ $image_format != 1 ];then
+ echo "$func: image_format must be 1"
+ exit
+ fi
+
+ if [ ! -e $hobject_path ];then
+ echo "$func: $hobject_path not exists"
+ exit
+ fi
+
+ # decode rbd_obj_header_ondisk of <image_name>.rbd
+ local block_name=`hexdump -e '10/1 "%c"' -s 40 -n 24 $hobject_path`
+ local order=`hexdump -e '10/4 "%u"' -s 76 -n 1 $hobject_path`
+ local image_size=`hexdump -C -s 80 -n 8 $hobject_path|head -n 1|awk '{for (i=9; i>1; i--) {printf $i}}'`
+ image_size=$((16#$image_size))
+ local snap_seq=`hexdump -C -s 88 -n 8 $hobject_path|head -n 1|
+ awk '{num=""; for(i=9; i>1; i--){ num=num""$i;} print strtonum("0x"num);}'`
+ local snap_count=`hexdump -C -s 96 -n 4 $hobject_path|head -n 1|
+ awk '{num=""; for(i=5; i>1; i--){ num=num""$i;} print strtonum("0x"num);}'`
+ local snap_names_len=`hexdump -C -s 104 -n 8 $hobject_path|head -n 1|
+ awk '{num=""; for(i=9; i>1; i--){ num=num""$i;} print strtonum("0x"num);}'`
+
+ echo -e "block_name:\t$block_name"
+ echo -e "order:\t\t$order"
+ echo -e "image_size:\t$image_size"
+ echo -e "snap_seq:\t$snap_seq"
+
+ # decode N rbd_obj_snap_ondisk of <image_name>.rbd
+ declare -a snap_ids
+ declare -a snap_names
+ declare -a snap_image_sizes
+ local size_header=112 #sizeof(rbd_obj_header_ondisk)
+ local size_snap=16 #sizeof(rbd_obj_snap_ondisk)
+ local offset=0
+ local id_off=0
+ local size_off=0
+ for ((i=0; i<$snap_count; i++))
+ do
+ offset=$(($size_header + $i * $size_snap))
+ id_off=$offset
+ size_off=$(($offset + 8))
+ snap_ids[$i]=`hexdump -C -s $id_off -n 8 $hobject_path|head -n 1|
+ awk '{num=""; for(i=9; i>1; i--){num=num""$i;} print strtonum("0x"num);}'`
+ snap_image_sizes[$i]=`hexdump -C -s $size_off -n 8 $hobject_path|head -n 1|
+ awk '{num=""; for(i=9; i>1; i--){num=num""$i;} print strtonum("0x"num);}'`
+ done
+ offset=$(($size_header + $snap_count * $size_snap))
+ snap_names=(`hexdump -e '10/1 "%_c"' -s $offset -n $snap_names_len $hobject_path|
+ awk -F "\\\\\\\\\\\\\\\\0" '{for(i=1; i<=NF; i++) {print $i" "} }'`);
+
+ echo -e "\t\tID\tNAME\t\tSIZE"
+ for ((i=0; i<$snap_count; i++))
+ do
+ if [ "$snap_name"x = ""x ];then
+ echo -n -e "snapshot:\t"
+ echo -e "${snap_ids[$i]}\t${snap_names[$i]}\t\t${snap_image_sizes[$i]}"
+ continue
+ fi
+ if [ "$snap_name"x = "${snap_names[$i]}"x ];then
+ echo -n -e "snapshot:\t"
+ echo -e "${snap_ids[$i]}\t${snap_names[$i]}\t\t${snap_image_sizes[$i]}"
+ return
+ fi
+ done
+}
+
+#======================================== end image format v1 ========================================
+
+#======================================== image format v2 ========================================
+
+# map_header, header_seq, header, key/value
+# eg.
+# map_header _HOBJTOSEQ_:rbd%uheader%e139a6b8b4567...head.2.68E826B6
+# meta_header_seq 17426
+# header: _USER_0000000000017426_USER_:object_prefix
+# _USER_0000000000017426_USER_:order
+# _USER_0000000000017426_USER_:size
+# _USER_0000000000017426_USER_:snap_seq
+# key/value ceph-kvstore-tool /storepath get _USER_0000000000017426_USER_ (object_prefix|order|size|snap_seq)
+
+# decode image id from image_id_hobject
+function get_image_id()
+{
+ local func="get_image_id"
+ if [ "$1"x = ""x ];then
+ exit;
+ fi
+ local image_id_hobject=$1 #from admin node's database
+
+ if [ ! -e $image_id_hobject ];then
+ #echo "$func: $image_id_hobject not exists"
+ exit;
+ fi
+
+ # get len of string
+ local n=`hexdump -e '10/4 "%u"' -s 0 -n 4 $image_id_hobject`
+ # get string
+ hexdump -e '10/1 "%c"' -s 4 -n $n $image_id_hobject
+}
+
+#find image_id omap entry in omaplist
+map_header_prefix=
+map_header_key=
+function get_map_header()
+{
+ local func="get_map_header"
+ local image_id=$1
+ if [ "$image_id"x = ""x ];then
+ echo "$func: no image_id input"
+ exit;
+ fi
+ map_header_prefix=`get_map_header_prefix`
+ local keyword="header%e"$image_id
+ map_header_key=`get_map_header_key $keyword`
+ if [ "$map_header_key"x = ""x ];then
+ echo "$func: map_header_key is NULL(not in omaplist)"
+ exit
+ fi
+}
+
+#get meta header seq from map_header
+meta_header_seq=
+function get_meta_header_seq()
+{
+ local func="get_meta_header_seq"
+ if [ "$1"x == ""x ];then
+ echo "$func: no prefix input"
+ exit;
+ elif [ "$2"x == ""x ];then
+ echo "$func: no key input"
+ exit;
+ fi
+ local prefix=$1;
+ local key=$2;
+ meta_header_seq=`get_header_seq $prefix $key`
+}
+
+# get image metadata : object_prefix, order, image_size, snap_seq
+object_prefix=
+order=
+image_size=
+snap_seq=
+function get_image_metadata_v2()
+{
+ local func="get_image_metadata_v2"
+ if [ "$1"x = ""x ];then
+ echo "$func: no meta_header_seq input"
+ exit;
+ fi
+ local meta_header_seq=`printf "%016d" $1`
+ #echo "$func: meta_header_seq = "$meta_header_seq
+ local ghobject_key="_USER_"$meta_header_seq"_USER_"
+ local prefix=$ghobject_key
+
+ object_prefix=`get_header_kv $prefix object_prefix string`
+ #object_prefix="rbd_data.$image_id"
+ order=`get_header_kv $prefix order int`
+ image_size=`get_header_kv $prefix size int`
+ snap_seq=`get_header_kv $prefix snap_seq int`
+
+ echo -e "object_prefix:\t$object_prefix"
+ echo -e "order:\t\t$order"
+ echo -e "image_size:\t$image_size"
+ echo -e "snap_seq:\t$snap_seq"
+
+ # list snapshot
+ list_snaps_v2 $1 $2
+}
+
+# struct cls_rbd_snap {
+# snapid_t id;
+# string name;
+# uint64_t image_size;
+# uint64_t features;
+# uint8_t protection_status;
+# cls_rbd_parent parent;
+# }
+# decode cls_rbd_snap
+# 1 u8 struct_v
+# 1 u8 struct_compat
+# 4 u32 struct_len
+# 8 u64 snapid_t id //s=6 n=8
+# 4 u32 len of name //s=14 n=4
+# len char name //s=18 n=len
+# 8 u64 image_size
+# 8 u64 features
+# ......
+#
+function list_snaps_v2()
+{
+ local func="list_snaps_v2"
+ if [ "$1"x = ""x ];then
+ exit
+ fi
+ local sname=
+ if [ $# -eq 2 ];then
+ sname=$2
+ fi
+ local meta_header_seq=`printf "%016d" $1`
+ local prefix="_USER_"$meta_header_seq"_USER_"
+ local keys=(`awk -F ":" '/snapshot_/ && $1 == "'"$prefix"'" {if ($2 == "") exit; split($2, arr, "_");
+ print arr[2];}' $omap_list|sort -r`)
+ echo -e "\t\tID\tNAME\t\tSIZE"
+ for key in ${keys[@]}
+ do
+ key="snapshot_$key"
+ local arr=(`ceph-kvstore-tool $omap_path get $prefix $key|awk -F ":" '{print $2}'`);
+ # get snap_name
+ tmp=
+ for ((i=17; i>13; i--))
+ do
+ tmp="$tmp${arr[$i]}"
+ done
+ local len=$((16#$tmp))
+ local snap_name=
+ for ((i=18; i<$((18+$len)); i++))
+ do
+ # convert ascii to char
+ local char=`echo -e "\x${arr[$i]}"`
+ snap_name="$snap_name$char"
+ done
+ # get snap_id (little endian)
+ local tmp=
+ for ((i=13; i>5; i--))
+ do
+ tmp="$tmp${arr[$i]}"
+ done
+ local snap_id=$((16#$tmp))
+ # get image_size of current snap (little endian)
+ tmp=
+ for ((i=$((25+$len)); i>$((17+$len)); i--))
+ do
+ tmp="$tmp${arr[$i]}"
+ done
+ local image_size=$((16#$tmp))
+ if [ "$sname"x = ""x ];then
+ echo -e "snapshot:\t$snap_id\t$snap_name\t\t$image_size"
+ continue
+ fi
+ if [ "$sname"x = "$snap_name"x ];then
+ echo -e "snapshot:\t$snap_id\t$snap_name\t\t$image_size"
+ return
+ fi
+ done
+}
+
+#======================================== end image format v2 ========================================
diff --git a/src/tools/rbd_recover_tool/osd_job b/src/tools/rbd_recover_tool/osd_job
new file mode 100755
index 000000000..b4b80be8a
--- /dev/null
+++ b/src/tools/rbd_recover_tool/osd_job
@@ -0,0 +1,170 @@
+#!/usr/bin/env bash
+# file: osd_job
+#
+# Copyright (C) 2015 Ubuntu Kylin
+#
+# Author: Min Chen <minchen@ubuntukylin.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+
+my_dir=$(dirname "$0")
+
+. $my_dir/common_h
+. $my_dir/metadata_h
+. $my_dir/epoch_h
+
+function check_ceph_osd()
+{
+ local func="check_ceph_osd"
+ local host=`hostname`
+ # if ceph-osd service is still running, except flush-journal
+ if [ "`ps aux|grep ceph-osd|grep -v flush-journal|grep -v grep`"x != ""x ];then
+ echo "[$host]: $func: ceph-osd is running..., stop it"
+ exit
+ fi
+}
+
+function cat_pg_epoch()
+{
+ local func="cat_pg_epoch"
+ init_env_osd $1
+ if [ -e $node_pg_epoch ];then
+ cat $node_pg_epoch
+ fi
+}
+
+function cat_image_v1()
+{
+ local func="cat_image_v1"
+ init_env_osd $1
+ if [ -e $image_v1 ];then
+ cat $image_v1
+ fi
+}
+
+function cat_image_v2()
+{
+ local func="cat_image_v2"
+ init_env_osd $1
+ if [ -e $image_v2 ];then
+ cat $image_v2
+ fi
+}
+
+function flush_osd_journal()
+{
+ local func="flush_osd_journal"
+ init_env_osd $1
+ local osd_data_path=$osd_data
+ local osd_journal_path=$osd_data/journal
+ local whoami_path=$osd_data/whoami
+ local host=`hostname`
+ if [ ! -e $whoami_path ];then
+ echo "[$host]: $func: $whoami_path not exists"
+ exit
+ fi
+ local whoami=`cat $whoami_path`
+ echo "[$host]: $func ..."
+ ceph-osd -i $whoami --osd-data $osd_data_path --osd-journal $osd_journal_path --flush-journal >/dev/null
+ if [ $? -ne 0 ];then
+ echo "[$host]: $func: flush osd journal failed"
+ exit
+ fi
+}
+
+function do_omap_list()
+{
+ local func="do_omap_list"
+ init_env_osd $1
+ local host=`hostname`
+ echo "[$host]: $func ..."
+ get_omap_list
+}
+
+# get all pgs epoch
+function do_pg_epoch()
+{
+ local func="do_pg_epoch"
+ init_env_osd $1
+ local node=`hostname`
+ get_pgid_list
+ >$node_pg_epoch
+ local pgid=
+ local data_path=
+ local host=`hostname`
+ echo "[$host]: $func ..."
+ while read line
+ do
+ {
+ pgid=`echo $line|awk '{print $1}'`
+ data_path=`echo $line|awk '{print $2}'`
+ get_pg_epoch $pgid
+ echo -e "$node $pgid $pg_epoch $data_path" >>$node_pg_epoch
+ }
+ done < $pgid_list
+}
+
+# get an list of image in this osd node, pg epoch maybe not the latest, the admin node will do distinguish
+function do_image_list()
+{
+ local func="do_image_list"
+ init_env_osd $1
+ get_image_list
+ local node=`hostname`
+ >$image_v1
+ >$image_v2
+ local host=`hostname`
+ echo "[$host]: $func ..."
+ for line in `cat $image_list_v1`
+ do
+ pgid=`get_pgid $line`
+ get_pg_epoch $pgid
+ echo "$node $line $pg_epoch" >> $image_v1
+ done
+ for line in `cat $image_list_v2`
+ do
+ pgid=`get_pgid $line`
+ get_pg_epoch $pgid
+ echo "$node $line $pg_epoch" >> $image_v2
+ done
+}
+
+function do_image_id()
+{
+ local func="do_image_id"
+ init_env_osd $1
+ get_image_id $2
+}
+
+function do_image_metadata_v1()
+{
+ local func="do_image_metadata_v1"
+ init_env_osd $1
+ local image_header_hobject=$2
+ local snap_name=$3
+ get_image_metadata_v1 $image_header_hobject $snap_name
+}
+
+function do_image_metadata_v2()
+{
+ local func="do_image_metadata_v2"
+ init_env_osd $1
+ local image_id=$2
+ local image_header_hobject=$3
+ local snap_name=$4
+ get_map_header $image_id
+ get_meta_header_seq $map_header_prefix $map_header_key
+ get_image_metadata_v2 $meta_header_seq $snap_name
+}
+
+check_ceph_osd
+$*
diff --git a/src/tools/rbd_recover_tool/rbd-recover-tool b/src/tools/rbd_recover_tool/rbd-recover-tool
new file mode 100755
index 000000000..b7a258650
--- /dev/null
+++ b/src/tools/rbd_recover_tool/rbd-recover-tool
@@ -0,0 +1,327 @@
+#!/usr/bin/env bash
+# file: rbd-recover-tool
+#
+# Copyright (C) 2015 Ubuntu Kylin
+#
+# Author: Min Chen <minchen@ubuntukylin.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+
+# rbd-recover-tool is an offline recover tool for rbd image in replicated pool
+# when ceph cluster is stopped.
+# it is a simple disater recovery policy, just for urgent condition
+
+my_dir=$(dirname "$0")
+
+. $my_dir/common_h
+. $my_dir/metadata_h
+. $my_dir/epoch_h
+. $my_dir/database_h
+
+#scp files from admin node to osd node
+file1=common_h
+file2=metadata_h
+file3=epoch_h
+file4=osd_job
+
+#------------ admin node's action -------------
+
+function scp_file()
+{
+ local func="scp_file"
+ file=$1
+ if [ "$1"x = ""x ];then
+ echo "$func: not file input"
+ exit
+ fi
+ for host in `cat $osd_host`
+ do
+ {
+ echo "$func: $host"
+ scp $ssh_option $file $host:$job_path 1>/dev/null
+ } &
+ done
+}
+
+function scp_files()
+{
+ local func="scp_files"
+ for host in `cat $osd_host`
+ do
+ {
+ echo "$func: $host"
+ scp $ssh_option $file1 $host:$job_path
+ scp $ssh_option $file2 $host:$job_path
+ scp $ssh_option $file3 $host:$job_path
+ scp $ssh_option $file4 $host:$job_path
+ } &
+ done
+ wait
+ echo "$func: finish"
+}
+
+function scatter_node_jobs()
+{
+ local func="scatter_node_jobs"
+ local host=
+ local data_path=
+ echo "$func: flush osd journal & generate infos: omap, pg, image metadata ..."
+
+ trap 'echo $func failed; exit' INT HUP
+ while read line
+ do
+ {
+ host=`echo $line|awk '{print $1}'`
+ data_path=`echo $line|awk '{print $2}'`
+ check_osd_process $host
+
+ cmd="mkdir -p $job_path"
+ ssh $ssh_option $host $cmd
+ scp $ssh_option $file1 $host:$job_path >/dev/null
+ scp $ssh_option $file2 $host:$job_path >/dev/null
+ scp $ssh_option $file3 $host:$job_path >/dev/null
+ scp $ssh_option $file4 $host:$job_path >/dev/null
+
+ cmd="bash $job_path/osd_job flush_osd_journal $data_path;"
+ cmd="$cmd $job_path/osd_job do_omap_list $data_path;"
+ cmd="$cmd bash $job_path/osd_job do_pg_epoch $data_path;"
+ cmd="$cmd bash $job_path/osd_job do_image_list $data_path;"
+
+ ssh $ssh_option $host $cmd </dev/null
+ } &
+ done < $osd_host_path
+ wait
+ echo "$func: finish"
+}
+
+function gather_node_infos()
+{
+ local func="gather_node_infos"
+ echo "$func ..."
+ >$pg_coll
+ >$image_coll_v1
+ >$image_coll_v2
+ trap 'echo $func failed; exit' INT HUP
+ while read line
+ do
+ {
+ host=`echo $line|awk '{print $1}'`
+ data_path=`echo $line|awk '{print $2}'`
+ echo "$func: $host"
+ check_osd_process $host
+
+ #pg epoch
+ cmd1="bash $job_path/osd_job cat_pg_epoch $data_path"
+ ssh $ssh_option $host $cmd1 >> $pg_coll
+ #image v1
+ cmd2="bash $job_path/osd_job cat_image_v1 $data_path"
+ ssh $ssh_option $host $cmd2 >> $image_coll_v1
+ #image v2
+ cmd3="bash $job_path/osd_job cat_image_v2 $data_path"
+ ssh $ssh_option $host $cmd3 >> $image_coll_v2
+ } &
+ done < $osd_host_path
+ wait
+ echo "$func: finish"
+}
+
+function scatter_gather()
+{
+ local func="scatter_gather"
+ if [ ! -s $osd_host ];then
+ echo "$func: no osd_host input"
+ exit
+ fi
+ if [ ! -s $mon_host ];then
+ echo "$func: no mon_host input"
+ exit
+ fi
+ scatter_node_jobs
+ gather_node_infos
+}
+
+
+#------------- operations --------------
+
+function database()
+{
+ scatter_gather
+ gen_database
+}
+
+function list()
+{
+ list_images
+}
+
+function lookup()
+{
+ lookup_image $1 $2 $3
+}
+
+function recover()
+{
+ recover_image $1 $2 $3 $4
+}
+
+#------------- helper -------------
+
+function usage()
+{
+ local cmd_name="rbd-recover-tool"
+ echo
+ echo "$cmd_name is used to recover rbd image of replicated pool,
+ when all ceph services are stopped"
+ echo "Usage:"
+ echo "$cmd_name database
+ gather pg info, object info, image metadata,
+ and epoch info from all osd nodes,
+ this will cosume a long time, just be patient,
+ especially when scale up to 1000+ osds"
+ echo "$cmd_name list
+ list all rbd images of all replicated pools,
+ before to lookup & recover"
+ echo "$cmd_name lookup <pool_id>/<image_name>[@[<snap_name>]]
+ show image metadata: image format, rbd id, size, order, snapseq
+ In addition, for image with snapshots,
+ this will list all snapshot infomations"
+ echo "$cmd_name recover <pool_id>/<image_name>[@[<snap_name>]] [</path/to/store/image>]
+ all snapshots share one image head, to economize disk space
+ so there is only one snapshot at any time,
+ image is saved at </path/to/store/image>/pool_<pool_id>/image_name/image_name
+ cat <path/to/store/image>/pool_<pool_id>/image_name/@CURRENT,
+ will show snapid
+ recover to raw image/nosnap/head: <image_name>
+ rollback to image head: <image_name>@
+ rollback to image snap: <image_name>@<snap_name>
+ recover steps:
+ 1. recover image nosnap (only one time)
+ 2. rollback to image snap"
+}
+
+function get_path()
+{
+ local func="get_path"
+ if [ $# -lt 1 ];then
+ return
+ fi
+ if [[ $1 =~ // ]];then
+ return # "/path//to" is invalid
+ fi
+ local parent=`dirname $1`
+ local name=`basename $1`
+ if [ "$parent"x = "/"x ];then
+ echo "$parent$name"
+ else
+ echo -n "$parent/$name"
+ fi
+}
+
+function admin_cmd()
+{
+ local func="admin_cmd"
+ if [ $# -lt 1 ];then
+ usage
+ exit
+ fi
+ if [ "$1"x = "-h"x ] || [ "$1"x = "--help"x ];then
+ usage
+ exit
+ fi
+
+ if [ "$1"x = "database"x ];then
+ if [ $# -gt 1 ];then
+ usage
+ exit
+ fi
+ # remove osd_host to refresh osd_host and osd_host_mapping
+ rm -f $osd_host
+ init_env_admin
+ database
+ elif [ "$1"x = "list"x ];then
+ if [ $# -gt 1 ];then
+ usage
+ exit
+ fi
+ init_env_admin
+ list
+ elif [ "$1"x = "lookup"x ];then
+ if [ $# -gt 2 ];then
+ usage
+ exit
+ fi
+ local pool_id=-1
+ local image_name=
+ local snap_name=
+ if [[ $2 =~ ^([^@/]+)/([^@/]+)$ ]];then
+ pool_id="${BASH_REMATCH[1]}"
+ image_name="${BASH_REMATCH[2]}"
+ elif [[ $2 =~ ^([^@/]+)/([^@/]+)@([^@/]*)$ ]];then
+ pool_id="${BASH_REMATCH[1]}"
+ image_name="${BASH_REMATCH[2]}"
+ snap_name="${BASH_REMATCH[3]}"
+ else
+ echo "format: $2 is invalid, use <pool_id>/<image_name>[@[<snap_name>]]"
+ exit
+ fi
+ init_env_admin
+ lookup $pool_id $image_name $snap_name
+ elif [ "$1"x = "recover"x ];then
+ if [ $# -lt 2 ] || [ $# -gt 3 ];then
+ usage
+ exit
+ fi
+ local pool_id=-1
+ local image_name=
+ local snap_name=@
+ local image_dir=
+ if [[ $2 =~ ^([^@/]+)/([^@/]+)$ ]];then
+ pool_id="${BASH_REMATCH[1]}"
+ image_name="${BASH_REMATCH[2]}"
+ elif [[ $2 =~ ^([^@/]+)/([^@/]+)@([^@/]*)$ ]];then
+ pool_id="${BASH_REMATCH[1]}"
+ image_name="${BASH_REMATCH[2]}"
+ snap_name="${BASH_REMATCH[3]}"
+ if [ "$snap_name"x = ""x ];then
+ snap_name=@@
+ fi
+ else
+ echo "format: $2 is invalid, use <pool_id>/<image_name>[@[<snap_name>]]"
+ exit
+ fi
+ if [ $# = 3 ];then
+ image_dir=`get_path $3`
+ if [ "image_dir"x = ""x ];then
+ echo "$3 invalid"
+ exit
+ fi
+ fi
+ init_env_admin
+ recover $pool_id $image_name $snap_name $image_dir
+ elif [ "$1"x = "scp_files"x ];then
+ if [ $# -gt 1 ];then
+ exit
+ fi
+ admin_parse_osd
+ scp_files
+ elif [ "$1"x = "scp_file"x ];then
+ if [ $# -gt 2 ];then
+ exit
+ fi
+ admin_parse_osd
+ scp_file $2
+ else
+ echo "$func: $1: command not found"
+ fi
+}
+
+admin_cmd $*
diff --git a/src/tools/rbd_recover_tool/test_rbd_recover_tool.sh b/src/tools/rbd_recover_tool/test_rbd_recover_tool.sh
new file mode 100755
index 000000000..876b47b90
--- /dev/null
+++ b/src/tools/rbd_recover_tool/test_rbd_recover_tool.sh
@@ -0,0 +1,542 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2015 Ubuntu Kylin
+#
+# Author: Min Chen <minchen@ubuntukylin.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+
+# unit test case for rbd-recover-tool
+
+#prepare:
+# - write config files: config/osd_host, config/mon_host, config/storage_path, config/mds_host if exist mds
+#step 1. rbd export all images as you need
+#step 2. stop all ceph services
+#step 3. use ceph_rbd_recover_tool to recover all images
+#step 4. compare md5sum of recover image with that of export image who has the same image name
+
+ssh_opt="-o ConnectTimeout=1"
+my_dir=$(dirname "$0")
+tool_dir=$my_dir
+
+#storage_path=$my_dir/config/storage_path
+mon_host=$my_dir/config/mon_host
+osd_host=$my_dir/config/osd_host
+mds_host=$my_dir/config/mds_host
+
+test_dir= # `cat $storage_path`
+export_dir= #$test_dir/export
+recover_dir= #$test_dir/recover
+image_names= #$test_dir/image_names
+online_images= #$test_dir/online_images, all images on ceph rbd pool
+gen_db= #$test_dir/gen_db, label database if exist
+pool=rbd
+pool_id=2
+
+function get_pool_id()
+{
+ local pool_id_file=/tmp/pool_id_file.$$$$
+ ceph osd pool stats $pool|head -n 1|awk '{print $4}' >$pool_id_file
+ if [ $? -ne 0 ];then
+ echo "$func: get pool id failed: pool = $pool"
+ rm -f $pool_id_file
+ exit
+ fi
+ pool_id=`cat $pool_id_file`
+ echo "$func: pool_id = $pool_id"
+ rm -f $pool_id_file
+}
+
+function init()
+{
+ local func="init"
+ if [ $# -eq 0 ];then
+ echo "$func: must input <path> to storage images, enough disk space is good"
+ exit
+ fi
+ if [ ! -s $osd_host ];then
+ echo "$func: config/osd_host not exists or empty"
+ exit
+ fi
+ if [ ! -s $mon_host ];then
+ echo "$func: config/mon_host not exists or empty"
+ exit
+ fi
+ if [ ! -e $mds_host ];then
+ echo "$func: config/mds_host not exists"
+ exit
+ fi
+ test_dir=$1
+ export_dir=$test_dir/export
+ recover_dir=$test_dir/recover
+ image_names=$test_dir/image_names
+ online_images=$test_dir/online_images
+ gen_db=$test_dir/gen_db
+
+ trap 'echo "ceph cluster is stopped ..."; exit;' INT
+ ceph -s >/dev/null
+ get_pool_id
+
+ mkdir -p $test_dir
+ mkdir -p $export_dir
+ mkdir -p $recover_dir
+ rm -rf $export_dir/*
+ rm -rf $recover_dir/*
+}
+
+function do_gen_database()
+{
+ local func="do_gen_database"
+ if [ -s $gen_db ] && [ `cat $gen_db` = 1 ];then
+ echo "$func: database already existed"
+ exit
+ fi
+ bash $tool_dir/rbd-recover-tool database
+ echo 1 >$gen_db
+}
+
+#check if all ceph processes are stopped
+function check_ceph_service()
+{
+ local func="check_ceph_service"
+ local res=`cat $osd_host $mon_host $mds_host|sort -u|tr -d [:blank:]|xargs -n 1 -I @ ssh $ssh_opt @ "ps aux|grep -E \"(ceph-osd|ceph-mon|ceph-mds)\"|grep -v grep"`
+ if [ "$res"x != ""x ];then
+ echo "$func: NOT all ceph services are stopped"
+ return 1
+ exit
+ fi
+ echo "$func: all ceph services are stopped"
+ return 0
+}
+
+function stop_ceph()
+{
+ local func="stop_ceph"
+ #cat osd_host|xargs -n 1 -I @ ssh $ssh_opt @ "killall ceph-osd"
+ while read osd
+ do
+ {
+ osd=`echo $osd|tr -d [:blank:]`
+ if [ "$osd"x = ""x ];then
+ continue
+ fi
+ #ssh $ssh_opt $osd "killall ceph-osd ceph-mon ceph-mds" </dev/null
+ ssh $ssh_opt $osd "killall ceph-osd" </dev/null
+ } &
+ done < $osd_host
+ wait
+ echo "waiting kill all osd ..."
+ sleep 1
+ #cat $mon_host|xargs -n 1 -I @ ssh $ssh_opt @ "killall ceph-mon ceph-osd ceph-mds"
+ cat $mon_host|xargs -n 1 -I @ ssh $ssh_opt @ "killall ceph-mon"
+ #cat $mds_host|xargs -n 1 -I @ ssh $ssh_opt @ "killall ceph-mds ceph-mon ceph-osd"
+ cat $mds_host|xargs -n 1 -I @ ssh $ssh_opt @ "killall ceph-mds"
+}
+
+function create_image()
+{
+ local func="create_image"
+ if [ ${#} -lt 3 ];then
+ echo "create_image: parameters: <image_name> <size> <image_format>"
+ exit
+ fi
+ local image_name=$1
+ local size=$2
+ local image_format=$3
+ if [ $image_format -lt 1 ] || [ $image_format -gt 2 ];then
+ echo "$func: image_format must be 1 or 2"
+ exit
+ fi
+ local res=`rbd list|grep -E "^$1$"`
+ echo "$func $image_name ..."
+ if [ "$res"x = ""x ];then
+ rbd -p $pool create $image_name --size $size --image_format $image_format
+ else
+ if [ $image_format -eq 2 ];then
+ rbd snap ls $image_name|tail -n +2|awk '{print $2}'|xargs -n 1 -I % rbd snap unprotect $image_name@%
+ fi
+ rbd snap purge $image_name
+ #rbd rm $image_name
+ rbd -p $pool resize --allow-shrink --size $size $image_name
+ fi
+}
+
+function export_image()
+{
+ local func="export_image"
+
+ if [ $# -lt 2 ];then
+ echo "$func: parameters: <image_name> <image_format> [<image_size>]"
+ exit
+ fi
+
+ local image_name=$1
+ local format=$(($2))
+ local size=$(($3)) #MB
+
+ if [ $format -ne 1 ] && [ $format -ne 2 ];then
+ echo "$func: image format must be 1 or 2"
+ exit
+ fi
+
+ if [ $size -eq 0 ];then
+ size=24 #MB
+ echo "$func: size = $size"
+ fi
+ local mnt=/rbdfuse
+
+ mount |grep "rbd-fuse on /rbdfuse" &>/dev/null
+ if [ $? -ne 0 ];then
+ rbd-fuse $mnt
+ fi
+
+ create_image $image_name $size $format
+
+ dd conv=notrunc if=/dev/urandom of=$mnt/$image_name bs=4M count=$(($size/4))
+
+ local export_image_dir=$export_dir/pool_$pool_id/$image_name
+ mkdir -p $export_image_dir
+ local export_md5_nosnap=$export_image_dir/@md5_nosnap
+ >$export_md5_nosnap
+
+ local export_image_path=$export_image_dir/$image_name
+ rm -f $export_image_path
+
+ rbd export $pool/$image_name $export_image_path
+ md5sum $export_image_path |awk '{print $1}' >$export_md5_nosnap
+}
+
+function recover_image()
+{
+ local func="recover_snapshots"
+ if [ $# -lt 1 ];then
+ echo "$func: parameters: <image_name>"
+ exit
+ fi
+
+ local image_name=$1
+ #pool_id=29
+
+ local recover_image_dir=$recover_dir/pool_$pool_id/$image_name
+ mkdir -p $recover_image_dir
+ local recover_md5_nosnap=$recover_image_dir/@md5_nosnap
+ >$recover_md5_nosnap
+ local snapshot=
+
+ bash $tool_dir/rbd-recover-tool recover $pool_id/$image_name $recover_dir
+ md5sum $recover_image_dir/$image_name|awk '{print $1}' >$recover_md5_nosnap
+}
+
+function make_snapshot()
+{
+ local func="make_snapshot"
+ if [ $# -lt 5 ];then
+ echo "$func: parameters: <ofile> <seek> <count> <snap> <export_image_dir>"
+ exit
+ fi
+ local ofile=$1
+ local seek=$(($2))
+ local count=$(($3))
+ local snap=$4
+ local export_image_dir=$5
+
+ if [ $seek -lt 0 ];then
+ echo "$func: seek can not be minus"
+ exit
+ fi
+
+ if [ $count -lt 1 ];then
+ echo "$func: count must great than zero"
+ exit
+ fi
+
+ echo "[$snap] $func ..."
+ echo "$1 $2 $3 $4"
+ rbd snap ls $image_name|grep $snap;
+
+ local res=$?
+ if [ $res -eq 0 ];then
+ return $res
+ fi
+
+ dd conv=notrunc if=/dev/urandom of=$ofile bs=1M count=$count seek=$seek 2>/dev/null
+ snapshot=$image_name@$snap
+ rbd snap create $snapshot
+ rm -f $export_image_dir/$snapshot
+ rbd export $pool/$image_name $export_image_dir/$snapshot
+ pushd $export_image_dir >/dev/null
+ md5sum $snapshot >> @md5
+ popd >/dev/null
+}
+
+function recover_snapshots()
+{
+ local func="recover_snapshots"
+ if [ $# -lt 1 ];then
+ echo "$func: parameters: <image_name>"
+ exit
+ fi
+
+ local image_name=$1
+ #pool_id=29
+
+ local recover_image_dir=$recover_dir/pool_$pool_id/$image_name
+ mkdir -p $recover_image_dir
+ local recover_md5=$recover_image_dir/@md5
+ >$recover_md5
+ local snapshot=
+
+
+ # recover head
+ bash $tool_dir/rbd-recover-tool recover $pool_id/$image_name $recover_dir
+
+ # recover snapshots
+ for((i=1; i<10; i++))
+ do
+ snapshot=snap$i
+ bash $tool_dir/rbd-recover-tool recover $pool_id/$image_name@$snapshot $recover_dir
+ pushd $recover_image_dir >/dev/null
+ local chksum=`md5sum $image_name|awk '{print $1}'`
+ echo "$chksum $image_name@$snapshot" >>@md5
+ popd >/dev/null
+ done
+}
+
+function export_snapshots()
+{
+ local func="export_snapshots"
+
+ if [ $# -lt 2 ];then
+ echo "$func: parameters: <image_name> <image_format> [<image_size>]"
+ exit
+ fi
+
+ local image_name=$1
+ local format=$(($2))
+ local size=$(($3)) #MB
+
+ if [ $format -ne 1 ] && [ $format -ne 2 ];then
+ echo "$func: image format must be 1 or 2"
+ exit
+ fi
+
+ if [ $size -eq 0 ];then
+ size=24 #MB
+ echo "$func: size = $size"
+ fi
+ local mnt=/rbdfuse
+
+ mount |grep "rbd-fuse on /rbdfuse" &>/dev/null
+ if [ $? -ne 0 ];then
+ rbd-fuse $mnt
+ fi
+
+ create_image $image_name $size $format
+
+ local export_image_dir=$export_dir/pool_$pool_id/$image_name
+ mkdir -p $export_image_dir
+ local export_md5=$export_image_dir/@md5
+ >$export_md5
+
+ # create 9 snapshots
+ # image = {object0, object1, object2, object3, object4, object5, ...}
+ #
+ # snap1 : init/write all objects
+ # snap2 : write object0
+ # snap3 : write object1
+ # snap4 : write object2
+ # snap5 : write object3
+ # snap6 : write object4
+ # snap7 : write object5
+ # snap8 : write object0
+ # snap9 : write object3
+
+ make_snapshot $mnt/$image_name 0 $size snap1 $export_image_dir
+ make_snapshot $mnt/$image_name 0 1 snap2 $export_image_dir
+ make_snapshot $mnt/$image_name 4 1 snap3 $export_image_dir
+ make_snapshot $mnt/$image_name 8 1 snap4 $export_image_dir
+ make_snapshot $mnt/$image_name 12 1 snap5 $export_image_dir
+ make_snapshot $mnt/$image_name 16 1 snap6 $export_image_dir
+ make_snapshot $mnt/$image_name 20 1 snap7 $export_image_dir
+ make_snapshot $mnt/$image_name 1 1 snap8 $export_image_dir
+ make_snapshot $mnt/$image_name 13 1 snap9 $export_image_dir
+}
+
+function check_recover_nosnap()
+{
+ local func="check_recover_nosnap"
+ if [ $# -lt 3 ];then
+ echo "$func: parameters: <export_md5_file> <recover_md5_file> <image_name>"
+ fi
+ local export_md5=$1
+ local recover_md5=$2
+ local image_name=$3
+
+ local ifpassed="FAILED"
+
+ echo "================ < $image_name nosnap > ================"
+
+ local export_md5sum=`cat $export_md5`
+ local recover_md5sum=`cat $recover_md5`
+
+ if [ "$export_md5sum"x != ""x ] && [ "$export_md5sum"x = "$recover_md5sum"x ];then
+ ifpassed="PASSED"
+ fi
+ echo "export: $export_md5sum"
+ echo "recover: $recover_md5sum $ifpassed"
+}
+
+function check_recover_snapshots()
+{
+ local func="check_recover_snapshots"
+ if [ $# -lt 3 ];then
+ echo "$func: parameters: <export_md5_file> <recover_md5_file> <image_name>"
+ fi
+ local export_md5=$1
+ local recover_md5=$2
+ local image_name=$3
+
+ local ifpassed="FAILED"
+
+ echo "================ < $image_name snapshots > ================"
+
+ OIFS=$IFS
+ IFS=$'\n'
+ local export_md5s=(`cat $export_md5`)
+ local recover_md5s=(`cat $recover_md5`)
+ for((i=0; i<9; i++))
+ do
+ OOIFS=$IFS
+ IFS=$' '
+ local x=$(($i+1))
+ snapshot=snap$x
+
+ local export_arr=(`echo ${export_md5s[$i]}`)
+ local recover_arr=(`echo ${recover_md5s[$i]}`)
+ echo "export: ${export_md5s[$i]}"
+ if [ "${export_arr[1]}"x != ""x ] && [ "${export_arr[1]}"x = "${recover_arr[1]}"x ];then
+ ifpassed="PASSED"
+ fi
+ echo "recover: ${recover_md5s[$i]} $ifpassed"
+ IFS=$OOIFS
+ done
+ IFS=$OIFS
+}
+
+# step 1: export image, snapshot
+function do_export_nosnap()
+{
+ export_image image_v1_nosnap 1
+ export_image image_v2_nosnap 2
+}
+
+function do_export_snap()
+{
+ export_snapshots image_v1_snap 1
+ export_snapshots image_v2_snap 2
+}
+
+# step 2: stop ceph cluster and gen database
+function stop_cluster_gen_database()
+{
+ trap 'echo stop ceph cluster failed; exit;' INT HUP
+ stop_ceph
+ sleep 2
+ check_ceph_service
+ local res=$?
+ while [ $res -ne 0 ]
+ do
+ stop_ceph
+ sleep 2
+ check_ceph_service
+ res=$?
+ done
+
+ echo 0 >$gen_db
+ do_gen_database
+}
+
+# step 3: recover image,snapshot
+function do_recover_nosnap()
+{
+ recover_image image_v1_nosnap
+ recover_image image_v2_nosnap
+}
+
+function do_recover_snap()
+{
+ recover_snapshots image_v1_snap
+ recover_snapshots image_v2_snap
+}
+
+# step 4: check md5sum pair<export_md5sum, recover_md5sum>
+function do_check_recover_nosnap()
+{
+ local image1=image_v1_nosnap
+ local image2=image_v2_nosnap
+
+ local export_md5_1=$export_dir/pool_$pool_id/$image1/@md5_nosnap
+ local export_md5_2=$export_dir/pool_$pool_id/$image2/@md5_nosnap
+ local recover_md5_1=$recover_dir/pool_$pool_id/$image1/@md5_nosnap
+ local recover_md5_2=$recover_dir/pool_$pool_id/$image2/@md5_nosnap
+
+ check_recover_nosnap $export_md5_1 $recover_md5_1 $image1
+ check_recover_nosnap $export_md5_2 $recover_md5_2 $image2
+}
+
+function do_check_recover_snap()
+{
+ local image1=image_v1_snap
+ local image2=image_v2_snap
+
+ local export_md5_1=$export_dir/pool_$pool_id/$image1/@md5
+ local export_md5_2=$export_dir/pool_$pool_id/$image2/@md5
+ local recover_md5_1=$recover_dir/pool_$pool_id/$image1/@md5
+ local recover_md5_2=$recover_dir/pool_$pool_id/$image2/@md5
+
+ check_recover_snapshots $export_md5_1 $recover_md5_1 $image1
+ check_recover_snapshots $export_md5_2 $recover_md5_2 $image2
+}
+
+function test_case_1()
+{
+ do_export_nosnap
+ stop_cluster_gen_database
+ do_recover_nosnap
+ do_check_recover_nosnap
+}
+
+function test_case_2()
+{
+ do_export_snap
+ stop_cluster_gen_database
+ do_recover_snap
+ do_check_recover_snap
+}
+
+function test_case_3()
+{
+ do_export_nosnap
+ do_export_snap
+
+ stop_cluster_gen_database
+
+ do_recover_nosnap
+ do_recover_snap
+
+ do_check_recover_nosnap
+ do_check_recover_snap
+}
+
+
+init $*
+test_case_3
diff --git a/src/tools/rbd_wnbd/CMakeLists.txt b/src/tools/rbd_wnbd/CMakeLists.txt
new file mode 100644
index 000000000..86c41b2ee
--- /dev/null
+++ b/src/tools/rbd_wnbd/CMakeLists.txt
@@ -0,0 +1,11 @@
+add_executable(rbd-wnbd rbd_wnbd.cc wnbd_handler.cc wnbd_wmi.cc)
+set_target_properties(
+ rbd-wnbd PROPERTIES COMPILE_FLAGS
+ "-fpermissive -I${WNBD_INCLUDE_DIRS}")
+target_link_libraries(
+ rbd-wnbd setupapi rpcrt4
+ wbemuuid oleaut32
+ ${WNBD_LIBRARIES}
+ ${Boost_FILESYSTEM_LIBRARY}
+ librbd librados global)
+install(TARGETS rbd-wnbd DESTINATION bin)
diff --git a/src/tools/rbd_wnbd/rbd_wnbd.cc b/src/tools/rbd_wnbd/rbd_wnbd.cc
new file mode 100644
index 000000000..a9e160456
--- /dev/null
+++ b/src/tools/rbd_wnbd/rbd_wnbd.cc
@@ -0,0 +1,1871 @@
+/*
+ * rbd-wnbd - RBD in userspace
+ *
+ * Copyright (C) 2020 SUSE LINUX GmbH
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+*/
+
+#include <objidl.h>
+// LOCK_WRITE is also defined by objidl.h, we have to avoid
+// a collision.
+#undef LOCK_WRITE
+
+#include "include/int_types.h"
+
+#include <atomic>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <unistd.h>
+
+#include "wnbd_handler.h"
+#include "wnbd_wmi.h"
+#include "rbd_wnbd.h"
+
+#include <fstream>
+#include <memory>
+#include <regex>
+
+#include "common/Formatter.h"
+#include "common/TextTable.h"
+#include "common/ceph_argparse.h"
+#include "common/config.h"
+#include "common/debug.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "common/version.h"
+#include "common/win32/service.h"
+#include "common/win32/wstring.h"
+#include "common/admin_socket_client.h"
+
+#include "global/global_init.h"
+
+#include "include/uuid.h"
+#include "include/rados/librados.hpp"
+#include "include/rbd/librbd.hpp"
+
+#include <shellapi.h>
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd-wnbd: "
+
+using namespace std;
+
+// Wait 2s before recreating the wmi subscription in case of errors
+#define WMI_SUBSCRIPTION_RETRY_INTERVAL 2
+// SCSI adapter modification events aren't received until the entire polling
+// interval has elapsed (unlike other WMI classes, such as Msvm_ComputerSystem).
+// With longer intervals, it even seems to miss events. For this reason,
+// we're using a relatively short interval but have adapter state monitoring
+// as an optional feature, mainly used for dev / driver certification purposes.
+#define WNBD_ADAPTER_WMI_POLL_INTERVAL 2
+// Wait for wmi events up to two seconds
+#define WMI_EVENT_TIMEOUT 2
+
+bool is_process_running(DWORD pid)
+{
+ HANDLE process = OpenProcess(SYNCHRONIZE, FALSE, pid);
+ DWORD ret = WaitForSingleObject(process, 0);
+ CloseHandle(process);
+ return ret == WAIT_TIMEOUT;
+}
+
+DWORD WNBDActiveDiskIterator::fetch_list(
+ PWNBD_CONNECTION_LIST* conn_list)
+{
+ DWORD curr_buff_sz = 0;
+ DWORD buff_sz = 0;
+ DWORD err = 0;
+ PWNBD_CONNECTION_LIST tmp_list = NULL;
+
+ // We're using a loop because other connections may show up by the time
+ // we retry.
+ do {
+ if (tmp_list)
+ free(tmp_list);
+
+ if (buff_sz) {
+ tmp_list = (PWNBD_CONNECTION_LIST) calloc(1, buff_sz);
+ if (!tmp_list) {
+ derr << "Could not allocate " << buff_sz << " bytes." << dendl;
+ err = ERROR_NOT_ENOUGH_MEMORY;
+ break;
+ }
+ }
+
+ curr_buff_sz = buff_sz;
+ // If the buffer is too small, the return value is 0 and "BufferSize"
+ // will contain the required size. This is counterintuitive, but
+ // Windows drivers can't return a buffer as well as a non-zero status.
+ err = WnbdList(tmp_list, &buff_sz);
+ if (err)
+ break;
+ } while (curr_buff_sz < buff_sz);
+
+ if (err) {
+ if (tmp_list)
+ free(tmp_list);
+ } else {
+ *conn_list = tmp_list;
+ }
+ return err;
+}
+
+WNBDActiveDiskIterator::WNBDActiveDiskIterator()
+{
+ DWORD status = WNBDActiveDiskIterator::fetch_list(&conn_list);
+ switch (status) {
+ case 0:
+ // no error
+ break;
+ case ERROR_OPEN_FAILED:
+ error = -ENOENT;
+ break;
+ default:
+ error = -EINVAL;
+ break;
+ }
+}
+
+WNBDActiveDiskIterator::~WNBDActiveDiskIterator()
+{
+ if (conn_list) {
+ free(conn_list);
+ conn_list = NULL;
+ }
+}
+
+bool WNBDActiveDiskIterator::get(Config *cfg)
+{
+ index += 1;
+ *cfg = Config();
+
+ if (!conn_list || index >= (int)conn_list->Count) {
+ return false;
+ }
+
+ auto conn_info = conn_list->Connections[index];
+ auto conn_props = conn_info.Properties;
+
+ if (strncmp(conn_props.Owner, RBD_WNBD_OWNER_NAME, WNBD_MAX_OWNER_LENGTH)) {
+ dout(10) << "Ignoring disk: " << conn_props.InstanceName
+ << ". Owner: " << conn_props.Owner << dendl;
+ return this->get(cfg);
+ }
+
+ error = load_mapping_config_from_registry(conn_props.InstanceName, cfg);
+ if (error) {
+ derr << "Could not load registry disk info for: "
+ << conn_props.InstanceName << ". Error: " << error << dendl;
+ return false;
+ }
+
+ cfg->disk_number = conn_info.DiskNumber;
+ cfg->serial_number = std::string(conn_props.SerialNumber);
+ cfg->pid = conn_props.Pid;
+ cfg->active = cfg->disk_number > 0 && is_process_running(conn_props.Pid);
+ cfg->wnbd_mapped = true;
+
+ return true;
+}
+
+RegistryDiskIterator::RegistryDiskIterator()
+{
+ reg_key = new RegistryKey(g_ceph_context, HKEY_LOCAL_MACHINE,
+ SERVICE_REG_KEY, false);
+ if (!reg_key->hKey) {
+ if (!reg_key->missingKey)
+ error = -EINVAL;
+ return;
+ }
+
+ if (RegQueryInfoKey(reg_key->hKey, NULL, NULL, NULL, &subkey_count,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL)) {
+ derr << "Could not query registry key: " << SERVICE_REG_KEY << dendl;
+ error = -EINVAL;
+ return;
+ }
+}
+
+bool RegistryDiskIterator::get(Config *cfg)
+{
+ index += 1;
+ *cfg = Config();
+
+ if (!reg_key->hKey || !subkey_count) {
+ return false;
+ }
+
+ char subkey_name[MAX_PATH] = {0};
+ DWORD subkey_name_sz = MAX_PATH;
+ int err = RegEnumKeyEx(
+ reg_key->hKey, index, subkey_name, &subkey_name_sz,
+ NULL, NULL, NULL, NULL);
+ if (err == ERROR_NO_MORE_ITEMS) {
+ return false;
+ } else if (err) {
+ derr << "Could not enumerate registry. Error: " << err << dendl;
+ error = -EINVAL;
+ return false;
+ }
+
+ if (load_mapping_config_from_registry(subkey_name, cfg)) {
+ error = -EINVAL;
+ return false;
+ };
+
+ return true;
+}
+
+// Iterate over all RBD mappings, getting info from the registry and the driver.
+bool WNBDDiskIterator::get(Config *cfg)
+{
+ *cfg = Config();
+
+ bool found_active = active_iterator.get(cfg);
+ if (found_active) {
+ active_devices.insert(cfg->devpath);
+ return true;
+ }
+
+ error = active_iterator.get_error();
+ if (error) {
+ dout(5) << ": WNBD iterator error: " << error << dendl;
+ return false;
+ }
+
+ while(registry_iterator.get(cfg)) {
+ if (active_devices.find(cfg->devpath) != active_devices.end()) {
+ // Skip active devices that were already yielded.
+ continue;
+ }
+ return true;
+ }
+
+ error = registry_iterator.get_error();
+ if (error) {
+ dout(5) << ": Registry iterator error: " << error << dendl;
+ }
+ return false;
+}
+
+int get_exe_path(std::string& path) {
+ char buffer[MAX_PATH];
+ DWORD err = 0;
+
+ int ret = GetModuleFileNameA(NULL, buffer, MAX_PATH);
+ if (!ret || ret == MAX_PATH) {
+ err = GetLastError();
+ derr << "Could not retrieve executable path. "
+ << "Error: " << win32_strerror(err) << dendl;
+ return -EINVAL;
+ }
+
+ path = buffer;
+ return 0;
+}
+
+std::string get_cli_args() {
+ std::ostringstream cmdline;
+ for (int i=1; i<__argc; i++) {
+ if (i > 1)
+ cmdline << " ";
+ cmdline << std::quoted(__argv[i]);
+ }
+ return cmdline.str();
+}
+
+int send_map_request(std::string arguments) {
+ dout(15) << __func__ << ": command arguments: " << arguments << dendl;
+
+ BYTE request_buff[SERVICE_PIPE_BUFFSZ] = { 0 };
+ ServiceRequest* request = (ServiceRequest*) request_buff;
+ request->command = Connect;
+ arguments.copy(
+ (char*)request->arguments,
+ SERVICE_PIPE_BUFFSZ - FIELD_OFFSET(ServiceRequest, arguments));
+ ServiceReply reply = { 0 };
+
+ DWORD bytes_read = 0;
+ BOOL success = CallNamedPipe(
+ SERVICE_PIPE_NAME,
+ request_buff,
+ SERVICE_PIPE_BUFFSZ,
+ &reply,
+ sizeof(reply),
+ &bytes_read,
+ DEFAULT_MAP_TIMEOUT_MS);
+ if (!success) {
+ DWORD err = GetLastError();
+ derr << "Could not send device map request. "
+ << "Make sure that the ceph service is running. "
+ << "Error: " << win32_strerror(err) << dendl;
+ return -EINVAL;
+ }
+ if (reply.status) {
+ derr << "The ceph service failed to map the image. "
+ << "Check the log file or pass '-f' (foreground mode) "
+ << "for additional information. "
+ << "Error: " << cpp_strerror(reply.status)
+ << dendl;
+ }
+
+ return reply.status;
+}
+
+// Spawn a subprocess using the specified "rbd-wnbd" command
+// arguments. A pipe is passed to the child process,
+// which will allow it to communicate the mapping status
+int map_device_using_suprocess(std::string arguments, int timeout_ms)
+{
+ STARTUPINFO si;
+ PROCESS_INFORMATION pi;
+ char ch;
+ DWORD err = 0, status = 0;
+ int exit_code = 0;
+ std::ostringstream command_line;
+ std::string exe_path;
+ // Windows async IO context
+ OVERLAPPED connect_o, read_o;
+ HANDLE connect_event = NULL, read_event = NULL;
+ // Used for waiting on multiple events that are going to be initialized later.
+ HANDLE wait_events[2] = { INVALID_HANDLE_VALUE, INVALID_HANDLE_VALUE};
+ DWORD bytes_read = 0;
+ // We may get a command line containing an old pipe handle when
+ // recreating mappings, so we'll have to replace it.
+ std::regex pipe_pattern("([\'\"]?--pipe-name[\'\"]? +[\'\"]?[^ ]+[\'\"]?)");
+
+ uuid_d uuid;
+ uuid.generate_random();
+ std::ostringstream pipe_name;
+ pipe_name << "\\\\.\\pipe\\rbd-wnbd-" << uuid;
+
+ // Create an unique named pipe to communicate with the child. */
+ HANDLE pipe_handle = CreateNamedPipe(
+ pipe_name.str().c_str(),
+ PIPE_ACCESS_INBOUND | FILE_FLAG_FIRST_PIPE_INSTANCE |
+ FILE_FLAG_OVERLAPPED,
+ PIPE_WAIT,
+ 1, // Only accept one instance
+ SERVICE_PIPE_BUFFSZ,
+ SERVICE_PIPE_BUFFSZ,
+ SERVICE_PIPE_TIMEOUT_MS,
+ NULL);
+ if (pipe_handle == INVALID_HANDLE_VALUE) {
+ err = GetLastError();
+ derr << "CreateNamedPipe failed: " << win32_strerror(err) << dendl;
+ exit_code = -ECHILD;
+ goto finally;
+ }
+ connect_event = CreateEvent(0, TRUE, FALSE, NULL);
+ read_event = CreateEvent(0, TRUE, FALSE, NULL);
+ if (!connect_event || !read_event) {
+ err = GetLastError();
+ derr << "CreateEvent failed: " << win32_strerror(err) << dendl;
+ exit_code = -ECHILD;
+ goto finally;
+ }
+ connect_o.hEvent = connect_event;
+ read_o.hEvent = read_event;
+
+ status = ConnectNamedPipe(pipe_handle, &connect_o);
+ err = GetLastError();
+ if (status || err != ERROR_IO_PENDING) {
+ if (status)
+ err = status;
+ derr << "ConnectNamedPipe failed: " << win32_strerror(err) << dendl;
+ exit_code = -ECHILD;
+ goto finally;
+ }
+ err = 0;
+
+ dout(5) << __func__ << ": command arguments: " << arguments << dendl;
+
+ // We'll avoid running arbitrary commands, instead using the executable
+ // path of this process (expected to be the full rbd-wnbd.exe path).
+ err = get_exe_path(exe_path);
+ if (err) {
+ exit_code = -EINVAL;
+ goto finally;
+ }
+ command_line << std::quoted(exe_path)
+ << " " << std::regex_replace(arguments, pipe_pattern, "")
+ << " --pipe-name " << pipe_name.str();
+
+ dout(5) << __func__ << ": command line: " << command_line.str() << dendl;
+
+ GetStartupInfo(&si);
+ // Create a detached child
+ if (!CreateProcess(NULL, (char*)command_line.str().c_str(),
+ NULL, NULL, FALSE, DETACHED_PROCESS,
+ NULL, NULL, &si, &pi)) {
+ err = GetLastError();
+ derr << "CreateProcess failed: " << win32_strerror(err) << dendl;
+ exit_code = -ECHILD;
+ goto finally;
+ }
+
+ wait_events[0] = connect_event;
+ wait_events[1] = pi.hProcess;
+ status = WaitForMultipleObjects(2, wait_events, FALSE, timeout_ms);
+ switch(status) {
+ case WAIT_OBJECT_0:
+ if (!GetOverlappedResult(pipe_handle, &connect_o, &bytes_read, TRUE)) {
+ err = GetLastError();
+ derr << "Couln't establish a connection with the child process. "
+ << "Error: " << win32_strerror(err) << dendl;
+ exit_code = -ECHILD;
+ goto clean_process;
+ }
+ // We have an incoming connection.
+ break;
+ case WAIT_OBJECT_0 + 1:
+ // The process has exited prematurely.
+ goto clean_process;
+ case WAIT_TIMEOUT:
+ derr << "Timed out waiting for child process connection." << dendl;
+ goto clean_process;
+ default:
+ derr << "Failed waiting for child process. Status: " << status << dendl;
+ goto clean_process;
+ }
+ // Block and wait for child to say it is ready.
+ dout(5) << __func__ << ": waiting for child notification." << dendl;
+ if (!ReadFile(pipe_handle, &ch, 1, NULL, &read_o)) {
+ err = GetLastError();
+ if (err != ERROR_IO_PENDING) {
+ derr << "Receiving child process reply failed with: "
+ << win32_strerror(err) << dendl;
+ exit_code = -ECHILD;
+ goto clean_process;
+ }
+ }
+ wait_events[0] = read_event;
+ wait_events[1] = pi.hProcess;
+ // The RBD daemon is expected to write back right after opening the
+ // pipe. We'll use the same timeout value for now.
+ status = WaitForMultipleObjects(2, wait_events, FALSE, timeout_ms);
+ switch(status) {
+ case WAIT_OBJECT_0:
+ if (!GetOverlappedResult(pipe_handle, &read_o, &bytes_read, TRUE)) {
+ err = GetLastError();
+ derr << "Receiving child process reply failed with: "
+ << win32_strerror(err) << dendl;
+ exit_code = -ECHILD;
+ goto clean_process;
+ }
+ break;
+ case WAIT_OBJECT_0 + 1:
+ // The process has exited prematurely.
+ goto clean_process;
+ case WAIT_TIMEOUT:
+ derr << "Timed out waiting for child process message." << dendl;
+ goto clean_process;
+ default:
+ derr << "Failed waiting for child process. Status: " << status << dendl;
+ goto clean_process;
+ }
+
+ dout(5) << __func__ << ": received child notification." << dendl;
+ goto finally;
+
+ clean_process:
+ if (!is_process_running(pi.dwProcessId)) {
+ GetExitCodeProcess(pi.hProcess, (PDWORD)&exit_code);
+ derr << "Daemon failed with: " << cpp_strerror(exit_code) << dendl;
+ } else {
+ // The process closed the pipe without notifying us or exiting.
+ // This is quite unlikely, but we'll terminate the process.
+ dout(0) << "Terminating unresponsive process." << dendl;
+ TerminateProcess(pi.hProcess, 1);
+ exit_code = -EINVAL;
+ }
+
+ finally:
+ if (exit_code)
+ derr << "Could not start RBD daemon." << dendl;
+ if (pipe_handle)
+ CloseHandle(pipe_handle);
+ if (connect_event)
+ CloseHandle(connect_event);
+ if (read_event)
+ CloseHandle(read_event);
+ return exit_code;
+}
+
+BOOL WINAPI console_handler_routine(DWORD dwCtrlType)
+{
+ dout(0) << "Received control signal: " << dwCtrlType
+ << ". Exiting." << dendl;
+
+ std::unique_lock l{shutdown_lock};
+ if (handler)
+ handler->shutdown();
+
+ return true;
+}
+
+int save_config_to_registry(Config* cfg)
+{
+ std::string strKey{ SERVICE_REG_KEY };
+ strKey.append("\\");
+ strKey.append(cfg->devpath);
+ auto reg_key = RegistryKey(
+ g_ceph_context, HKEY_LOCAL_MACHINE, strKey.c_str(), true);
+ if (!reg_key.hKey) {
+ return -EINVAL;
+ }
+
+ int ret_val = 0;
+ // Registry writes are immediately available to other processes.
+ // Still, we'll do a flush to ensure that the mapping can be
+ // recreated after a system crash.
+ if (reg_key.set("pid", getpid()) ||
+ reg_key.set("devpath", cfg->devpath) ||
+ reg_key.set("poolname", cfg->poolname) ||
+ reg_key.set("nsname", cfg->nsname) ||
+ reg_key.set("imgname", cfg->imgname) ||
+ reg_key.set("snapname", cfg->snapname) ||
+ reg_key.set("command_line", get_cli_args()) ||
+ reg_key.set("persistent", cfg->persistent) ||
+ reg_key.set("admin_sock_path", g_conf()->admin_socket) ||
+ reg_key.flush()) {
+ ret_val = -EINVAL;
+ }
+
+ return ret_val;
+}
+
+int remove_config_from_registry(Config* cfg)
+{
+ std::string strKey{ SERVICE_REG_KEY };
+ strKey.append("\\");
+ strKey.append(cfg->devpath);
+ return RegistryKey::remove(
+ g_ceph_context, HKEY_LOCAL_MACHINE, strKey.c_str());
+}
+
+int load_mapping_config_from_registry(string devpath, Config* cfg)
+{
+ std::string strKey{ SERVICE_REG_KEY };
+ strKey.append("\\");
+ strKey.append(devpath);
+ auto reg_key = RegistryKey(
+ g_ceph_context, HKEY_LOCAL_MACHINE, strKey.c_str(), false);
+ if (!reg_key.hKey) {
+ if (reg_key.missingKey)
+ return -ENOENT;
+ else
+ return -EINVAL;
+ }
+
+ reg_key.get("devpath", cfg->devpath);
+ reg_key.get("poolname", cfg->poolname);
+ reg_key.get("nsname", cfg->nsname);
+ reg_key.get("imgname", cfg->imgname);
+ reg_key.get("snapname", cfg->snapname);
+ reg_key.get("command_line", cfg->command_line);
+ reg_key.get("persistent", cfg->persistent);
+ reg_key.get("admin_sock_path", cfg->admin_sock_path);
+
+ return 0;
+}
+
+int restart_registered_mappings(
+ int worker_count,
+ int total_timeout,
+ int image_map_timeout)
+{
+ Config cfg;
+ WNBDDiskIterator iterator;
+ int r;
+ std::atomic<int> err = 0;
+
+ dout(0) << "remounting persistent disks" << dendl;
+
+ int total_timeout_ms = max(total_timeout, total_timeout * 1000);
+ int image_map_timeout_ms = max(image_map_timeout, image_map_timeout * 1000);
+
+ LARGE_INTEGER start_t, counter_freq;
+ QueryPerformanceFrequency(&counter_freq);
+ QueryPerformanceCounter(&start_t);
+
+ boost::asio::thread_pool pool(worker_count);
+ while (iterator.get(&cfg)) {
+ if (cfg.command_line.empty()) {
+ derr << "Could not recreate mapping, missing command line: "
+ << cfg.devpath << dendl;
+ err = -EINVAL;
+ continue;
+ }
+ if (cfg.wnbd_mapped) {
+ dout(1) << __func__ << ": device already mapped: "
+ << cfg.devpath << dendl;
+ continue;
+ }
+ if (!cfg.persistent) {
+ dout(1) << __func__ << ": cleaning up non-persistent mapping: "
+ << cfg.devpath << dendl;
+ r = remove_config_from_registry(&cfg);
+ if (r) {
+ derr << __func__ << ": could not clean up non-persistent mapping: "
+ << cfg.devpath << dendl;
+ }
+ continue;
+ }
+
+ boost::asio::post(pool,
+ [cfg, start_t, counter_freq, total_timeout_ms,
+ image_map_timeout_ms, &err]()
+ {
+ LARGE_INTEGER curr_t, elapsed_ms;
+ QueryPerformanceCounter(&curr_t);
+ elapsed_ms.QuadPart = curr_t.QuadPart - start_t.QuadPart;
+ elapsed_ms.QuadPart *= 1000;
+ elapsed_ms.QuadPart /= counter_freq.QuadPart;
+
+ int time_left_ms = max(
+ 0,
+ total_timeout_ms - (int)elapsed_ms.QuadPart);
+ time_left_ms = min(image_map_timeout_ms, time_left_ms);
+ if (!time_left_ms) {
+ err = -ETIMEDOUT;
+ return;
+ }
+
+ dout(1) << "Remapping: " << cfg.devpath
+ << ". Timeout: " << time_left_ms << " ms." << dendl;
+
+ // We'll try to map all devices and return a non-zero value
+ // if any of them fails.
+ int r = map_device_using_suprocess(cfg.command_line, time_left_ms);
+ if (r) {
+ err = r;
+ derr << "Could not create mapping: "
+ << cfg.devpath << ". Error: " << r << dendl;
+ } else {
+ dout(1) << "Successfully remapped: " << cfg.devpath << dendl;
+ }
+ });
+ }
+ pool.join();
+
+ r = iterator.get_error();
+ if (r) {
+ derr << "Could not fetch all mappings. Error: " << r << dendl;
+ err = r;
+ }
+
+ return err;
+}
+
+int disconnect_all_mappings(
+ bool unregister,
+ bool hard_disconnect,
+ int soft_disconnect_timeout,
+ int worker_count)
+{
+ // Although not generally recommended, soft_disconnect_timeout can be 0,
+ // which means infinite timeout.
+ ceph_assert(soft_disconnect_timeout >= 0);
+ ceph_assert(worker_count > 0);
+ int64_t timeout_ms = soft_disconnect_timeout * 1000;
+
+ Config cfg;
+ WNBDActiveDiskIterator iterator;
+ int r;
+ std::atomic<int> err = 0;
+
+ boost::asio::thread_pool pool(worker_count);
+ LARGE_INTEGER start_t, counter_freq;
+ QueryPerformanceFrequency(&counter_freq);
+ QueryPerformanceCounter(&start_t);
+ while (iterator.get(&cfg)) {
+ boost::asio::post(pool,
+ [cfg, start_t, counter_freq, timeout_ms,
+ hard_disconnect, unregister, &err]() mutable
+ {
+ LARGE_INTEGER curr_t, elapsed_ms;
+ QueryPerformanceCounter(&curr_t);
+ elapsed_ms.QuadPart = curr_t.QuadPart - start_t.QuadPart;
+ elapsed_ms.QuadPart *= 1000;
+ elapsed_ms.QuadPart /= counter_freq.QuadPart;
+
+ int64_t time_left_ms = max((int64_t)0, timeout_ms - elapsed_ms.QuadPart);
+
+ cfg.hard_disconnect = hard_disconnect || !time_left_ms;
+ cfg.hard_disconnect_fallback = true;
+ cfg.soft_disconnect_timeout = time_left_ms / 1000;
+
+ dout(1) << "Removing mapping: " << cfg.devpath
+ << ". Timeout: " << cfg.soft_disconnect_timeout
+ << "s. Hard disconnect: " << cfg.hard_disconnect
+ << dendl;
+
+ int r = do_unmap(&cfg, unregister);
+ if (r) {
+ err = r;
+ derr << "Could not remove mapping: " << cfg.devpath
+ << ". Error: " << r << dendl;
+ } else {
+ dout(1) << "Successfully removed mapping: " << cfg.devpath << dendl;
+ }
+ });
+ }
+ pool.join();
+
+ r = iterator.get_error();
+ if (r == -ENOENT) {
+ dout(0) << __func__ << ": wnbd adapter unavailable, "
+ << "assuming that no wnbd mappings exist." << dendl;
+ err = 0;
+ } else if (r) {
+ derr << "Could not fetch all mappings. Error: " << r << dendl;
+ err = r;
+ }
+
+ return err;
+}
+
+class RBDService : public ServiceBase {
+ private:
+ bool hard_disconnect;
+ int soft_disconnect_timeout;
+ int thread_count;
+ int service_start_timeout;
+ int image_map_timeout;
+ bool remap_failure_fatal;
+ bool adapter_monitoring_enabled;
+
+ std::thread adapter_monitor_thread;
+
+ ceph::mutex start_lock = ceph::make_mutex("RBDService::StartLocker");
+ ceph::mutex shutdown_lock = ceph::make_mutex("RBDService::ShutdownLocker");
+ bool started = false;
+ std::atomic<bool> stop_requsted = false;
+
+ public:
+ RBDService(bool _hard_disconnect,
+ int _soft_disconnect_timeout,
+ int _thread_count,
+ int _service_start_timeout,
+ int _image_map_timeout,
+ bool _remap_failure_fatal,
+ bool _adapter_monitoring_enabled)
+ : ServiceBase(g_ceph_context)
+ , hard_disconnect(_hard_disconnect)
+ , soft_disconnect_timeout(_soft_disconnect_timeout)
+ , thread_count(_thread_count)
+ , service_start_timeout(_service_start_timeout)
+ , image_map_timeout(_image_map_timeout)
+ , remap_failure_fatal(_remap_failure_fatal)
+ , adapter_monitoring_enabled(_adapter_monitoring_enabled)
+ {
+ }
+
+ static int execute_command(ServiceRequest* request)
+ {
+ switch(request->command) {
+ case Connect:
+ dout(1) << "Received device connect request. Command line: "
+ << (char*)request->arguments << dendl;
+ // TODO: use the configured service map timeout.
+ // TODO: add ceph.conf options.
+ return map_device_using_suprocess(
+ (char*)request->arguments, DEFAULT_MAP_TIMEOUT_MS);
+ default:
+ dout(1) << "Received unsupported command: "
+ << request->command << dendl;
+ return -ENOSYS;
+ }
+ }
+
+ static DWORD handle_connection(HANDLE pipe_handle)
+ {
+ PBYTE message[SERVICE_PIPE_BUFFSZ] = { 0 };
+ DWORD bytes_read = 0, bytes_written = 0;
+ DWORD err = 0;
+ DWORD reply_sz = 0;
+ ServiceReply reply = { 0 };
+
+ dout(20) << __func__ << ": Receiving message." << dendl;
+ BOOL success = ReadFile(
+ pipe_handle, message, SERVICE_PIPE_BUFFSZ,
+ &bytes_read, NULL);
+ if (!success || !bytes_read) {
+ err = GetLastError();
+ derr << "Could not read service command: "
+ << win32_strerror(err) << dendl;
+ goto exit;
+ }
+
+ dout(20) << __func__ << ": Executing command." << dendl;
+ reply.status = execute_command((ServiceRequest*) message);
+ reply_sz = sizeof(reply);
+
+ dout(20) << __func__ << ": Sending reply. Status: "
+ << reply.status << dendl;
+ success = WriteFile(
+ pipe_handle, &reply, reply_sz, &bytes_written, NULL);
+ if (!success || reply_sz != bytes_written) {
+ err = GetLastError();
+ derr << "Could not send service command result: "
+ << win32_strerror(err) << dendl;
+ }
+
+exit:
+ dout(20) << __func__ << ": Cleaning up connection." << dendl;
+ FlushFileBuffers(pipe_handle);
+ DisconnectNamedPipe(pipe_handle);
+ CloseHandle(pipe_handle);
+
+ return err;
+ }
+
+ // We have to support Windows server 2016. Unix sockets only work on
+ // WS 2019, so we can't use the Ceph admin socket abstraction.
+ // Getting the Ceph admin sockets to work with Windows named pipes
+ // would require quite a few changes.
+ static DWORD accept_pipe_connection() {
+ DWORD err = 0;
+ // We're currently using default ACLs, which grant full control to the
+ // LocalSystem account and administrator as well as the owner.
+ dout(20) << __func__ << ": opening new pipe instance" << dendl;
+ HANDLE pipe_handle = CreateNamedPipe(
+ SERVICE_PIPE_NAME,
+ PIPE_ACCESS_DUPLEX,
+ PIPE_TYPE_MESSAGE | PIPE_READMODE_MESSAGE | PIPE_WAIT,
+ PIPE_UNLIMITED_INSTANCES,
+ SERVICE_PIPE_BUFFSZ,
+ SERVICE_PIPE_BUFFSZ,
+ SERVICE_PIPE_TIMEOUT_MS,
+ NULL);
+ if (pipe_handle == INVALID_HANDLE_VALUE) {
+ err = GetLastError();
+ derr << "CreatePipe failed: " << win32_strerror(err) << dendl;
+ return -EINVAL;
+ }
+
+ dout(20) << __func__ << ": waiting for connections." << dendl;
+ BOOL connected = ConnectNamedPipe(pipe_handle, NULL);
+ if (!connected) {
+ err = GetLastError();
+ if (err != ERROR_PIPE_CONNECTED) {
+ derr << "Pipe connection failed: " << win32_strerror(err) << dendl;
+
+ CloseHandle(pipe_handle);
+ return err;
+ }
+ }
+
+ dout(20) << __func__ << ": Connection received." << dendl;
+ // We'll handle the connection in a separate thread and at the same time
+ // accept a new connection.
+ HANDLE handler_thread = CreateThread(
+ NULL, 0, (LPTHREAD_START_ROUTINE) handle_connection, pipe_handle, 0, 0);
+ if (!handler_thread) {
+ err = GetLastError();
+ derr << "Could not start pipe connection handler thread: "
+ << win32_strerror(err) << dendl;
+ CloseHandle(pipe_handle);
+ } else {
+ CloseHandle(handler_thread);
+ }
+
+ return err;
+ }
+
+ static int pipe_server_loop(LPVOID arg)
+ {
+ dout(5) << "Accepting admin pipe connections." << dendl;
+ while (1) {
+ // This call will block until a connection is received, which will
+ // then be handled in a separate thread. The function returns, allowing
+ // us to accept another simultaneous connection.
+ accept_pipe_connection();
+ }
+ return 0;
+ }
+
+ int create_pipe_server() {
+ HANDLE handler_thread = CreateThread(
+ NULL, 0, (LPTHREAD_START_ROUTINE) pipe_server_loop, NULL, 0, 0);
+ DWORD err = 0;
+
+ if (!handler_thread) {
+ err = GetLastError();
+ derr << "Could not start pipe server: " << win32_strerror(err) << dendl;
+ } else {
+ CloseHandle(handler_thread);
+ }
+
+ return err;
+ }
+
+ void monitor_wnbd_adapter()
+ {
+ dout(5) << __func__ << ": initializing COM" << dendl;
+ // Initialize the Windows COM library for this thread.
+ COMBootstrapper com_bootstrapper;
+ HRESULT hres = com_bootstrapper.initialize();
+ if (FAILED(hres)) {
+ return;
+ }
+
+ WmiSubscription subscription = subscribe_wnbd_adapter_events(
+ WNBD_ADAPTER_WMI_POLL_INTERVAL);
+ dout(5) << __func__ << ": initializing wmi subscription" << dendl;
+ hres = subscription.initialize();
+
+ dout(0) << "monitoring wnbd adapter state changes" << dendl;
+ // The event watcher will wait at most WMI_EVENT_TIMEOUT (2s)
+ // and exit the loop if the service is being stopped.
+ while (!stop_requsted) {
+ IWbemClassObject* object;
+ ULONG returned = 0;
+
+ if (FAILED(hres)) {
+ derr << "couldn't retrieve wnbd adapter events, wmi hresult: "
+ << hres << ". Reestablishing wmi listener in "
+ << WMI_SUBSCRIPTION_RETRY_INTERVAL << " seconds." << dendl;
+ subscription.close();
+ Sleep(WMI_SUBSCRIPTION_RETRY_INTERVAL * 1000);
+
+ dout(20) << "recreating wnbd adapter wmi subscription" << dendl;
+ subscription = subscribe_wnbd_adapter_events(
+ WNBD_ADAPTER_WMI_POLL_INTERVAL);
+ hres = subscription.initialize();
+ continue;
+ }
+
+ dout(20) << "fetching wnbd adapter events" << dendl;
+ hres = subscription.next(
+ WMI_EVENT_TIMEOUT * 1000,
+ 1, // we'll process one event at a time
+ &object,
+ &returned);
+
+ if (!FAILED(hres) && returned) {
+ if (WBEM_S_NO_ERROR == object->InheritsFrom(L"__InstanceCreationEvent")) {
+ dout(0) << "wnbd adapter (re)created, remounting disks" << dendl;
+ restart_registered_mappings(
+ thread_count, service_start_timeout, image_map_timeout);
+ } else if (WBEM_S_NO_ERROR == object->InheritsFrom(L"__InstanceDeletionEvent")) {
+ dout(0) << "wnbd adapter removed" << dendl;
+ // nothing to do here
+ } else if (WBEM_S_NO_ERROR == object->InheritsFrom(L"__InstanceModificationEvent")) {
+ dout(0) << "wnbd adapter changed" << dendl;
+ // TODO: look for state changes and log the availability/status
+ }
+
+ object->Release();
+ }
+ }
+
+ dout(10) << "service stop requested, wnbd event monitor exited" << dendl;
+ }
+
+ int run_hook() override {
+ std::unique_lock l{start_lock};
+ if (started) {
+ // The run hook is only supposed to be called once per process,
+ // however we're staying cautious.
+ derr << "Service already running." << dendl;
+ return -EALREADY;
+ }
+
+ started = true;
+ // Restart registered mappings before accepting new ones.
+ int r = restart_registered_mappings(
+ thread_count, service_start_timeout, image_map_timeout);
+ if (r) {
+ if (remap_failure_fatal) {
+ derr << "Couldn't remap all images. Cleaning up." << dendl;
+ return r;
+ } else {
+ dout(0) << "Ignoring image remap failure." << dendl;
+ }
+ }
+
+ if (adapter_monitoring_enabled) {
+ adapter_monitor_thread = std::thread(&monitor_wnbd_adapter, this);
+ } else {
+ dout(0) << "WNBD adapter monitoring disabled." << dendl;
+ }
+
+ return create_pipe_server();
+ }
+
+ // Invoked when the service is requested to stop.
+ int stop_hook() override {
+ std::unique_lock l{shutdown_lock};
+
+ stop_requsted = true;
+
+ int r = disconnect_all_mappings(
+ false, hard_disconnect, soft_disconnect_timeout, thread_count);
+
+ if (adapter_monitor_thread.joinable()) {
+ dout(10) << "waiting for wnbd event monitor thread" << dendl;
+ adapter_monitor_thread.join();
+ dout(10) << "wnbd event monitor stopped" << dendl;
+ }
+
+ return r;
+ }
+
+ // Invoked when the system is shutting down.
+ int shutdown_hook() override {
+ return stop_hook();
+ }
+};
+
+class WNBDWatchCtx : public librbd::UpdateWatchCtx
+{
+private:
+ librados::IoCtx &io_ctx;
+ WnbdHandler* handler;
+ librbd::Image &image;
+ uint64_t size;
+public:
+ WNBDWatchCtx(librados::IoCtx& io_ctx, WnbdHandler* handler,
+ librbd::Image& image, uint64_t size)
+ : io_ctx(io_ctx)
+ , handler(handler)
+ , image(image)
+ , size(size)
+ { }
+
+ ~WNBDWatchCtx() override {}
+
+ void handle_notify() override
+ {
+ uint64_t new_size;
+
+ if (image.size(&new_size) == 0 && new_size != size &&
+ handler->resize(new_size) == 0) {
+ size = new_size;
+ }
+ }
+};
+
+static void usage()
+{
+ const char* usage_str =R"(
+Usage: rbd-wnbd [options] map <image-or-snap-spec> Map an image to wnbd device
+ [options] unmap <device|image-or-snap-spec> Unmap wnbd device
+ [options] list List mapped wnbd devices
+ [options] show <image-or-snap-spec> Show mapped wnbd device
+ stats <image-or-snap-spec> Show IO counters
+ [options] service Windows service entrypoint,
+ handling device lifecycle
+
+Map options:
+ --device <device path> Optional mapping unique identifier
+ --exclusive Forbid writes by other clients
+ --read-only Map read-only
+ --non-persistent Do not recreate the mapping when the Ceph service
+ restarts. By default, mappings are persistent
+ --io-req-workers The number of workers that dispatch IO requests.
+ Default: 4
+ --io-reply-workers The number of workers that dispatch IO replies.
+ Default: 4
+
+Unmap options:
+ --hard-disconnect Skip attempting a soft disconnect
+ --no-hard-disconnect-fallback Immediately return an error if the soft
+ disconnect fails instead of attempting a hard
+ disconnect as fallback
+ --soft-disconnect-timeout Soft disconnect timeout in seconds. The soft
+ disconnect operation uses PnP to notify the
+ Windows storage stack that the device is going to
+ be disconnectd. Storage drivers can block this
+ operation if there are pending operations,
+ unflushed caches or open handles. Default: 15
+
+Service options:
+ --hard-disconnect Skip attempting a soft disconnect
+ --soft-disconnect-timeout Cummulative soft disconnect timeout in seconds,
+ used when disconnecting existing mappings. A hard
+ disconnect will be issued when hitting the timeout
+ --service-thread-count The number of workers used when mapping or
+ unmapping images. Default: 8
+ --start-timeout The service start timeout in seconds. Default: 120
+ --map-timeout Individual image map timeout in seconds. Default: 20
+ --remap-failure-fatal If set, the service will stop when failing to remap
+ an image at start time, unmapping images that have
+ been mapped so far.
+ --adapter-monitoring-enabled If set, the service will monitor WNBD adapter WMI
+ events and remount the images when the adapter gets
+ recreated. Mainly used for development and driver
+ certification purposes.
+
+Show|List options:
+ --format plain|json|xml Output format (default: plain)
+ --pretty-format Pretty formatting (json and xml)
+
+Common options:
+ --wnbd-log-level libwnbd.dll log level
+
+)";
+
+ std::cout << usage_str;
+ generic_server_usage();
+}
+
+
+static Command cmd = None;
+
+int construct_devpath_if_missing(Config* cfg)
+{
+ // Windows doesn't allow us to request specific disk paths when mapping an
+ // image. This will just be used by rbd-wnbd and wnbd as an identifier.
+ if (cfg->devpath.empty()) {
+ if (cfg->imgname.empty()) {
+ derr << "Missing image name." << dendl;
+ return -EINVAL;
+ }
+
+ if (!cfg->poolname.empty()) {
+ cfg->devpath += cfg->poolname;
+ cfg->devpath += '/';
+ }
+ if (!cfg->nsname.empty()) {
+ cfg->devpath += cfg->nsname;
+ cfg->devpath += '/';
+ }
+
+ cfg->devpath += cfg->imgname;
+
+ if (!cfg->snapname.empty()) {
+ cfg->devpath += '@';
+ cfg->devpath += cfg->snapname;
+ }
+ }
+
+ return 0;
+}
+
+boost::intrusive_ptr<CephContext> do_global_init(
+ int argc, const char *argv[], Config *cfg)
+{
+ auto args = argv_to_vec(argc, argv);
+
+ code_environment_t code_env;
+ int flags;
+
+ switch(cmd) {
+ case Connect:
+ code_env = CODE_ENVIRONMENT_DAEMON;
+ flags = CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS;
+ break;
+ case Service:
+ code_env = CODE_ENVIRONMENT_DAEMON;
+ flags = CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS |
+ CINIT_FLAG_NO_MON_CONFIG |
+ CINIT_FLAG_NO_DAEMON_ACTIONS;
+ break;
+ default:
+ code_env = CODE_ENVIRONMENT_UTILITY;
+ flags = CINIT_FLAG_NO_MON_CONFIG;
+ break;
+ }
+
+ global_pre_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, code_env, flags);
+ // Avoid cluttering the console when spawning a mapping that will run
+ // in the background.
+ if (g_conf()->daemonize && cfg->parent_pipe.empty()) {
+ flags |= CINIT_FLAG_NO_DAEMON_ACTIONS;
+ }
+ auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT,
+ code_env, flags, FALSE);
+
+ // There's no fork on Windows, we should be safe calling this anytime.
+ common_init_finish(g_ceph_context);
+ global_init_chdir(g_ceph_context);
+
+ return cct;
+}
+
+static int do_map(Config *cfg)
+{
+ int r;
+
+ librados::Rados rados;
+ librbd::RBD rbd;
+ librados::IoCtx io_ctx;
+ librbd::Image image;
+ librbd::image_info_t info;
+ HANDLE parent_pipe_handle = INVALID_HANDLE_VALUE;
+ int err = 0;
+
+ if (g_conf()->daemonize && cfg->parent_pipe.empty()) {
+ return send_map_request(get_cli_args());
+ }
+
+ dout(0) << "Mapping RBD image: " << cfg->devpath << dendl;
+
+ r = rados.init_with_context(g_ceph_context);
+ if (r < 0) {
+ derr << "rbd-wnbd: couldn't initialize rados: " << cpp_strerror(r)
+ << dendl;
+ goto close_ret;
+ }
+
+ r = rados.connect();
+ if (r < 0) {
+ derr << "rbd-wnbd: couldn't connect to rados: " << cpp_strerror(r)
+ << dendl;
+ goto close_ret;
+ }
+
+ r = rados.ioctx_create(cfg->poolname.c_str(), io_ctx);
+ if (r < 0) {
+ derr << "rbd-wnbd: couldn't create IO context: " << cpp_strerror(r)
+ << dendl;
+ goto close_ret;
+ }
+
+ io_ctx.set_namespace(cfg->nsname);
+
+ r = rbd.open(io_ctx, image, cfg->imgname.c_str());
+ if (r < 0) {
+ derr << "rbd-wnbd: couldn't open rbd image: " << cpp_strerror(r)
+ << dendl;
+ goto close_ret;
+ }
+
+ if (cfg->exclusive) {
+ r = image.lock_acquire(RBD_LOCK_MODE_EXCLUSIVE);
+ if (r < 0) {
+ derr << "rbd-wnbd: failed to acquire exclusive lock: " << cpp_strerror(r)
+ << dendl;
+ goto close_ret;
+ }
+ }
+
+ if (!cfg->snapname.empty()) {
+ r = image.snap_set(cfg->snapname.c_str());
+ if (r < 0) {
+ derr << "rbd-wnbd: couldn't use snapshot: " << cpp_strerror(r)
+ << dendl;
+ goto close_ret;
+ }
+ }
+
+ r = image.stat(info, sizeof(info));
+ if (r < 0)
+ goto close_ret;
+
+ if (info.size > _UI64_MAX) {
+ r = -EFBIG;
+ derr << "rbd-wnbd: image is too large (" << byte_u_t(info.size)
+ << ", max is " << byte_u_t(_UI64_MAX) << ")" << dendl;
+ goto close_ret;
+ }
+
+ // We're storing mapping details in the registry even for non-persistent
+ // mappings. This allows us to easily retrieve mapping details such
+ // as the rbd pool or admin socket path.
+ // We're cleaning up the registry entry when the non-persistent mapping
+ // gets disconnected or when the ceph service restarts.
+ r = save_config_to_registry(cfg);
+ if (r < 0)
+ goto close_ret;
+
+ handler = new WnbdHandler(image, cfg->devpath,
+ info.size / RBD_WNBD_BLKSIZE,
+ RBD_WNBD_BLKSIZE,
+ !cfg->snapname.empty() || cfg->readonly,
+ g_conf().get_val<bool>("rbd_cache"),
+ cfg->io_req_workers,
+ cfg->io_reply_workers);
+ r = handler->start();
+ if (r) {
+ r = r == ERROR_ALREADY_EXISTS ? -EEXIST : -EINVAL;
+ goto close_ret;
+ }
+
+ // We're informing the parent processes that the initialization
+ // was successful.
+ if (!cfg->parent_pipe.empty()) {
+ parent_pipe_handle = CreateFile(
+ cfg->parent_pipe.c_str(), GENERIC_WRITE, 0, NULL,
+ OPEN_EXISTING, 0, NULL);
+ if (parent_pipe_handle == INVALID_HANDLE_VALUE) {
+ derr << "Could not open parent pipe: " << win32_strerror(err) << dendl;
+ } else if (!WriteFile(parent_pipe_handle, "a", 1, NULL, NULL)) {
+ // TODO: consider exiting in this case. The parent didn't wait for us,
+ // maybe it was killed after a timeout.
+ err = GetLastError();
+ derr << "Failed to communicate with the parent: "
+ << win32_strerror(err) << dendl;
+ } else {
+ dout(5) << __func__ << ": submitted parent notification." << dendl;
+ }
+
+ if (parent_pipe_handle != INVALID_HANDLE_VALUE)
+ CloseHandle(parent_pipe_handle);
+
+ global_init_postfork_finish(g_ceph_context);
+ }
+
+ {
+ uint64_t watch_handle;
+ WNBDWatchCtx watch_ctx(io_ctx, handler, image, info.size);
+ r = image.update_watch(&watch_ctx, &watch_handle);
+ if (r < 0) {
+ derr << __func__ << ": update_watch failed with error: "
+ << cpp_strerror(r) << dendl;
+
+ handler->shutdown();
+ goto close_ret;
+ }
+
+ handler->wait();
+
+ r = image.update_unwatch(watch_handle);
+ if (r < 0)
+ derr << __func__ << ": update_unwatch failed with error: "
+ << cpp_strerror(r) << dendl;
+
+ handler->shutdown();
+ }
+
+close_ret:
+ // The registry record shouldn't be removed for (already) running mappings.
+ if (!cfg->persistent) {
+ dout(5) << __func__ << ": cleaning up non-persistent mapping: "
+ << cfg->devpath << dendl;
+ r = remove_config_from_registry(cfg);
+ if (r) {
+ derr << __func__ << ": could not clean up non-persistent mapping: "
+ << cfg->devpath << dendl;
+ }
+ }
+
+ std::unique_lock l{shutdown_lock};
+
+ image.close();
+ io_ctx.close();
+ rados.shutdown();
+ if (handler) {
+ delete handler;
+ handler = nullptr;
+ }
+
+ return r;
+}
+
+static int do_unmap(Config *cfg, bool unregister)
+{
+ WNBD_REMOVE_OPTIONS remove_options = {0};
+ remove_options.Flags.HardRemove = cfg->hard_disconnect;
+ remove_options.Flags.HardRemoveFallback = cfg->hard_disconnect_fallback;
+ remove_options.SoftRemoveTimeoutMs = cfg->soft_disconnect_timeout * 1000;
+ remove_options.SoftRemoveRetryIntervalMs = SOFT_REMOVE_RETRY_INTERVAL * 1000;
+
+ int err = WnbdRemoveEx(cfg->devpath.c_str(), &remove_options);
+ if (err && err != ERROR_FILE_NOT_FOUND) {
+ return -EINVAL;
+ }
+
+ if (unregister) {
+ err = remove_config_from_registry(cfg);
+ if (err) {
+ derr << "rbd-wnbd: failed to unregister device: "
+ << cfg->devpath << ". Error: " << err << dendl;
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+
+static int parse_imgpath(const std::string &imgpath, Config *cfg,
+ std::ostream *err_msg)
+{
+ std::regex pattern("^(?:([^/]+)/(?:([^/@]+)/)?)?([^@]+)(?:@([^/@]+))?$");
+ std::smatch match;
+ if (!std::regex_match(imgpath, match, pattern)) {
+ derr << "rbd-wnbd: invalid spec '" << imgpath << "'" << dendl;
+ return -EINVAL;
+ }
+
+ if (match[1].matched) {
+ cfg->poolname = match[1];
+ }
+
+ if (match[2].matched) {
+ cfg->nsname = match[2];
+ }
+
+ cfg->imgname = match[3];
+
+ if (match[4].matched)
+ cfg->snapname = match[4];
+
+ return 0;
+}
+
+static int do_list_mapped_devices(const std::string &format, bool pretty_format)
+{
+ std::unique_ptr<ceph::Formatter> f;
+ TextTable tbl;
+
+ if (format == "json") {
+ f.reset(new JSONFormatter(pretty_format));
+ } else if (format == "xml") {
+ f.reset(new XMLFormatter(pretty_format));
+ } else if (!format.empty() && format != "plain") {
+ derr << "rbd-wnbd: invalid output format: " << format << dendl;
+ return -EINVAL;
+ }
+
+ if (f) {
+ f->open_array_section("devices");
+ } else {
+ tbl.define_column("id", TextTable::LEFT, TextTable::LEFT);
+ tbl.define_column("pool", TextTable::LEFT, TextTable::LEFT);
+ tbl.define_column("namespace", TextTable::LEFT, TextTable::LEFT);
+ tbl.define_column("image", TextTable::LEFT, TextTable::LEFT);
+ tbl.define_column("snap", TextTable::LEFT, TextTable::LEFT);
+ tbl.define_column("device", TextTable::LEFT, TextTable::LEFT);
+ tbl.define_column("disk_number", TextTable::LEFT, TextTable::LEFT);
+ tbl.define_column("status", TextTable::LEFT, TextTable::LEFT);
+ }
+
+ Config cfg;
+ WNBDDiskIterator wnbd_disk_iterator;
+
+ while (wnbd_disk_iterator.get(&cfg)) {
+ const char* status = cfg.active ?
+ WNBD_STATUS_ACTIVE : WNBD_STATUS_INACTIVE;
+
+ if (f) {
+ f->open_object_section("device");
+ f->dump_int("id", cfg.pid ? cfg.pid : -1);
+ f->dump_string("device", cfg.devpath);
+ f->dump_string("pool", cfg.poolname);
+ f->dump_string("namespace", cfg.nsname);
+ f->dump_string("image", cfg.imgname);
+ f->dump_string("snap", cfg.snapname);
+ f->dump_int("disk_number", cfg.disk_number ? cfg.disk_number : -1);
+ f->dump_string("status", status);
+ f->close_section();
+ } else {
+ if (cfg.snapname.empty()) {
+ cfg.snapname = "-";
+ }
+ tbl << (cfg.pid ? cfg.pid : -1) << cfg.poolname << cfg.nsname
+ << cfg.imgname << cfg.snapname << cfg.devpath
+ << cfg.disk_number << status << TextTable::endrow;
+ }
+ }
+ int error = wnbd_disk_iterator.get_error();
+ if (error) {
+ derr << "Could not get disk list: " << error << dendl;
+ return error;
+ }
+
+ if (f) {
+ f->close_section();
+ f->flush(std::cout);
+ } else {
+ std::cout << tbl;
+ }
+
+ return 0;
+}
+
+static int do_show_mapped_device(std::string format, bool pretty_format,
+ std::string devpath)
+{
+ std::unique_ptr<ceph::Formatter> f;
+ TextTable tbl;
+
+ if (format.empty() || format == "plain") {
+ format = "json";
+ pretty_format = true;
+ }
+ if (format == "json") {
+ f.reset(new JSONFormatter(pretty_format));
+ } else if (format == "xml") {
+ f.reset(new XMLFormatter(pretty_format));
+ } else {
+ derr << "rbd-wnbd: invalid output format: " << format << dendl;
+ return -EINVAL;
+ }
+
+ Config cfg;
+ int error = load_mapping_config_from_registry(devpath, &cfg);
+ if (error) {
+ derr << "Could not load registry disk info for: "
+ << devpath << ". Error: " << error << dendl;
+ return error;
+ }
+
+ WNBD_CONNECTION_INFO conn_info = { 0 };
+ // If the device is currently disconnected but there is a persistent
+ // mapping record, we'll show that.
+ DWORD ret = WnbdShow(devpath.c_str(), &conn_info);
+ if (ret && ret != ERROR_FILE_NOT_FOUND) {
+ return -EINVAL;
+ }
+
+ auto conn_props = conn_info.Properties;
+ cfg.active = conn_info.DiskNumber > 0 && is_process_running(conn_props.Pid);
+ f->open_object_section("device");
+ f->dump_int("id", conn_props.Pid ? conn_props.Pid : -1);
+ f->dump_string("device", cfg.devpath);
+ f->dump_string("pool", cfg.poolname);
+ f->dump_string("namespace", cfg.nsname);
+ f->dump_string("image", cfg.imgname);
+ f->dump_string("snap", cfg.snapname);
+ f->dump_int("persistent", cfg.persistent);
+ f->dump_int("disk_number", conn_info.DiskNumber ? conn_info.DiskNumber : -1);
+ f->dump_string("status", cfg.active ? WNBD_STATUS_ACTIVE : WNBD_STATUS_INACTIVE);
+ f->dump_string("pnp_device_id", to_string(conn_info.PNPDeviceID));
+ f->dump_int("readonly", conn_props.Flags.ReadOnly);
+ f->dump_int("block_size", conn_props.BlockSize);
+ f->dump_int("block_count", conn_props.BlockCount);
+ f->dump_int("flush_enabled", conn_props.Flags.FlushSupported);
+ f->close_section();
+ f->flush(std::cout);
+
+ return 0;
+}
+
+static int do_stats(std::string search_devpath)
+{
+ Config cfg;
+ WNBDDiskIterator wnbd_disk_iterator;
+
+ while (wnbd_disk_iterator.get(&cfg)) {
+ if (cfg.devpath != search_devpath)
+ continue;
+
+ AdminSocketClient client = AdminSocketClient(cfg.admin_sock_path);
+ std::string output;
+ std::string result = client.do_request("{\"prefix\":\"wnbd stats\"}",
+ &output);
+ if (!result.empty()) {
+ std::cerr << "Admin socket error: " << result << std::endl;
+ return -EINVAL;
+ }
+
+ std::cout << output << std::endl;
+ return 0;
+ }
+ int error = wnbd_disk_iterator.get_error();
+ if (!error) {
+ error = -ENOENT;
+ }
+
+ derr << "Could not find the specified disk." << dendl;
+ return error;
+}
+
+static int parse_args(std::vector<const char*>& args,
+ std::ostream *err_msg,
+ Command *command, Config *cfg)
+{
+ std::string conf_file_list;
+ std::string cluster;
+ CephInitParameters iparams = ceph_argparse_early_args(
+ args, CEPH_ENTITY_TYPE_CLIENT, &cluster, &conf_file_list);
+
+ ConfigProxy config{false};
+ config->name = iparams.name;
+ config->cluster = cluster;
+
+ if (!conf_file_list.empty()) {
+ config.parse_config_files(conf_file_list.c_str(), nullptr, 0);
+ } else {
+ config.parse_config_files(nullptr, nullptr, 0);
+ }
+ config.parse_env(CEPH_ENTITY_TYPE_CLIENT);
+ config.parse_argv(args);
+ cfg->poolname = config.get_val<std::string>("rbd_default_pool");
+
+ std::vector<const char*>::iterator i;
+ std::ostringstream err;
+
+ // TODO: consider using boost::program_options like Device.cc does.
+ // This should simplify argument parsing. Also, some arguments must be tied
+ // to specific commands, for example the disconnect timeout. Luckily,
+ // this is enforced by the "rbd device" wrapper.
+ for (i = args.begin(); i != args.end(); ) {
+ if (ceph_argparse_flag(args, i, "-h", "--help", (char*)NULL)) {
+ return HELP_INFO;
+ } else if (ceph_argparse_flag(args, i, "-v", "--version", (char*)NULL)) {
+ return VERSION_INFO;
+ } else if (ceph_argparse_witharg(args, i, &cfg->devpath, "--device", (char *)NULL)) {
+ } else if (ceph_argparse_witharg(args, i, &cfg->format, err, "--format",
+ (char *)NULL)) {
+ } else if (ceph_argparse_flag(args, i, "--read-only", (char *)NULL)) {
+ cfg->readonly = true;
+ } else if (ceph_argparse_flag(args, i, "--exclusive", (char *)NULL)) {
+ cfg->exclusive = true;
+ } else if (ceph_argparse_flag(args, i, "--non-persistent", (char *)NULL)) {
+ cfg->persistent = false;
+ } else if (ceph_argparse_flag(args, i, "--pretty-format", (char *)NULL)) {
+ cfg->pretty_format = true;
+ } else if (ceph_argparse_flag(args, i, "--remap-failure-fatal", (char *)NULL)) {
+ cfg->remap_failure_fatal = true;
+ } else if (ceph_argparse_flag(args, i, "--adapter-monitoring-enabled", (char *)NULL)) {
+ cfg->adapter_monitoring_enabled = true;
+ } else if (ceph_argparse_witharg(args, i, &cfg->parent_pipe, err,
+ "--pipe-name", (char *)NULL)) {
+ if (!err.str().empty()) {
+ *err_msg << "rbd-wnbd: " << err.str();
+ return -EINVAL;
+ }
+ } else if (ceph_argparse_witharg(args, i, (int*)&cfg->wnbd_log_level,
+ err, "--wnbd-log-level", (char *)NULL)) {
+ if (!err.str().empty()) {
+ *err_msg << "rbd-wnbd: " << err.str();
+ return -EINVAL;
+ }
+ if (cfg->wnbd_log_level < 0) {
+ *err_msg << "rbd-wnbd: Invalid argument for wnbd-log-level";
+ return -EINVAL;
+ }
+ } else if (ceph_argparse_witharg(args, i, (int*)&cfg->io_req_workers,
+ err, "--io-req-workers", (char *)NULL)) {
+ if (!err.str().empty()) {
+ *err_msg << "rbd-wnbd: " << err.str();
+ return -EINVAL;
+ }
+ if (cfg->io_req_workers <= 0) {
+ *err_msg << "rbd-wnbd: Invalid argument for io-req-workers";
+ return -EINVAL;
+ }
+ } else if (ceph_argparse_witharg(args, i, (int*)&cfg->io_reply_workers,
+ err, "--io-reply-workers", (char *)NULL)) {
+ if (!err.str().empty()) {
+ *err_msg << "rbd-wnbd: " << err.str();
+ return -EINVAL;
+ }
+ if (cfg->io_reply_workers <= 0) {
+ *err_msg << "rbd-wnbd: Invalid argument for io-reply-workers";
+ return -EINVAL;
+ }
+ } else if (ceph_argparse_witharg(args, i, (int*)&cfg->service_thread_count,
+ err, "--service-thread-count", (char *)NULL)) {
+ if (!err.str().empty()) {
+ *err_msg << "rbd-wnbd: " << err.str();
+ return -EINVAL;
+ }
+ if (cfg->service_thread_count <= 0) {
+ *err_msg << "rbd-wnbd: Invalid argument for service-thread-count";
+ return -EINVAL;
+ }
+ } else if (ceph_argparse_flag(args, i, "--hard-disconnect", (char *)NULL)) {
+ cfg->hard_disconnect = true;
+ } else if (ceph_argparse_flag(args, i,
+ "--no-hard-disconnect-fallback", (char *)NULL)) {
+ cfg->hard_disconnect_fallback = false;
+ } else if (ceph_argparse_witharg(args, i,
+ (int*)&cfg->soft_disconnect_timeout,
+ err, "--soft-disconnect-timeout",
+ (char *)NULL)) {
+ if (!err.str().empty()) {
+ *err_msg << "rbd-wnbd: " << err.str();
+ return -EINVAL;
+ }
+ if (cfg->soft_disconnect_timeout < 0) {
+ *err_msg << "rbd-wnbd: Invalid argument for soft-disconnect-timeout";
+ return -EINVAL;
+ }
+ } else if (ceph_argparse_witharg(args, i,
+ (int*)&cfg->service_start_timeout,
+ err, "--start-timeout",
+ (char *)NULL)) {
+ if (!err.str().empty()) {
+ *err_msg << "rbd-wnbd: " << err.str();
+ return -EINVAL;
+ }
+ if (cfg->service_start_timeout <= 0) {
+ *err_msg << "rbd-wnbd: Invalid argument for start-timeout";
+ return -EINVAL;
+ }
+ } else if (ceph_argparse_witharg(args, i,
+ (int*)&cfg->image_map_timeout,
+ err, "--map-timeout",
+ (char *)NULL)) {
+ if (!err.str().empty()) {
+ *err_msg << "rbd-wnbd: " << err.str();
+ return -EINVAL;
+ }
+ if (cfg->image_map_timeout <= 0) {
+ *err_msg << "rbd-wnbd: Invalid argument for map-timeout";
+ return -EINVAL;
+ }
+ } else {
+ ++i;
+ }
+ }
+
+ Command cmd = None;
+ if (args.begin() != args.end()) {
+ if (strcmp(*args.begin(), "map") == 0) {
+ cmd = Connect;
+ } else if (strcmp(*args.begin(), "unmap") == 0) {
+ cmd = Disconnect;
+ } else if (strcmp(*args.begin(), "list") == 0) {
+ cmd = List;
+ } else if (strcmp(*args.begin(), "show") == 0) {
+ cmd = Show;
+ } else if (strcmp(*args.begin(), "service") == 0) {
+ cmd = Service;
+ } else if (strcmp(*args.begin(), "stats") == 0) {
+ cmd = Stats;
+ } else if (strcmp(*args.begin(), "help") == 0) {
+ return HELP_INFO;
+ } else {
+ *err_msg << "rbd-wnbd: unknown command: " << *args.begin();
+ return -EINVAL;
+ }
+ args.erase(args.begin());
+ }
+
+ if (cmd == None) {
+ *err_msg << "rbd-wnbd: must specify command";
+ return -EINVAL;
+ }
+
+ switch (cmd) {
+ case Connect:
+ case Disconnect:
+ case Show:
+ case Stats:
+ if (args.begin() == args.end()) {
+ *err_msg << "rbd-wnbd: must specify wnbd device or image-or-snap-spec";
+ return -EINVAL;
+ }
+ if (parse_imgpath(*args.begin(), cfg, err_msg) < 0) {
+ return -EINVAL;
+ }
+ args.erase(args.begin());
+ break;
+ default:
+ //shut up gcc;
+ break;
+ }
+
+ if (args.begin() != args.end()) {
+ *err_msg << "rbd-wnbd: unknown args: " << *args.begin();
+ return -EINVAL;
+ }
+
+ *command = cmd;
+ return 0;
+}
+
+static int rbd_wnbd(int argc, const char *argv[])
+{
+ Config cfg;
+ auto args = argv_to_vec(argc, argv);
+
+ // Avoid using dout before calling "do_global_init"
+ if (args.empty()) {
+ std::cout << argv[0] << ": -h or --help for usage" << std::endl;
+ exit(1);
+ }
+
+ std::ostringstream err_msg;
+ int r = parse_args(args, &err_msg, &cmd, &cfg);
+ if (r == HELP_INFO) {
+ usage();
+ return 0;
+ } else if (r == VERSION_INFO) {
+ std::cout << pretty_version_to_str() << std::endl;
+ return 0;
+ } else if (r < 0) {
+ std::cout << err_msg.str() << std::endl;
+ return r;
+ }
+
+ auto cct = do_global_init(argc, argv, &cfg);
+
+ WnbdSetLogger(WnbdHandler::LogMessage);
+ WnbdSetLogLevel(cfg.wnbd_log_level);
+
+ switch (cmd) {
+ case Connect:
+ if (construct_devpath_if_missing(&cfg)) {
+ return -EINVAL;
+ }
+ r = do_map(&cfg);
+ if (r < 0)
+ return r;
+ break;
+ case Disconnect:
+ if (construct_devpath_if_missing(&cfg)) {
+ return -EINVAL;
+ }
+ r = do_unmap(&cfg, true);
+ if (r < 0)
+ return r;
+ break;
+ case List:
+ r = do_list_mapped_devices(cfg.format, cfg.pretty_format);
+ if (r < 0)
+ return r;
+ break;
+ case Show:
+ if (construct_devpath_if_missing(&cfg)) {
+ return r;
+ }
+ r = do_show_mapped_device(cfg.format, cfg.pretty_format, cfg.devpath);
+ if (r < 0)
+ return r;
+ break;
+ case Service:
+ {
+ RBDService service(cfg.hard_disconnect, cfg.soft_disconnect_timeout,
+ cfg.service_thread_count,
+ cfg.service_start_timeout,
+ cfg.image_map_timeout,
+ cfg.remap_failure_fatal,
+ cfg.adapter_monitoring_enabled);
+ // This call will block until the service stops.
+ r = RBDService::initialize(&service);
+ if (r < 0)
+ return r;
+ break;
+ }
+ case Stats:
+ if (construct_devpath_if_missing(&cfg)) {
+ return -EINVAL;
+ }
+ return do_stats(cfg.devpath);
+ default:
+ usage();
+ break;
+ }
+
+ return 0;
+}
+
+int main(int argc, const char *argv[])
+{
+ SetConsoleCtrlHandler(console_handler_routine, true);
+ // Avoid the Windows Error Reporting dialog.
+ SetErrorMode(GetErrorMode() | SEM_NOGPFAULTERRORBOX);
+ int r = rbd_wnbd(argc, argv);
+ if (r < 0) {
+ return r;
+ }
+ return 0;
+}
diff --git a/src/tools/rbd_wnbd/rbd_wnbd.h b/src/tools/rbd_wnbd/rbd_wnbd.h
new file mode 100644
index 000000000..ac298e318
--- /dev/null
+++ b/src/tools/rbd_wnbd/rbd_wnbd.h
@@ -0,0 +1,193 @@
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2020 SUSE LINUX GmbH
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef RBD_WNBD_H
+#define RBD_WNBD_H
+
+#include <string.h>
+#include <iostream>
+#include <vector>
+
+#include "include/compat.h"
+#include "common/win32/registry.h"
+
+#include "wnbd_handler.h"
+
+#define SERVICE_REG_KEY "SYSTEM\\CurrentControlSet\\Services\\rbd-wnbd"
+#define SERVICE_PIPE_NAME "\\\\.\\pipe\\rbd-wnbd"
+#define SERVICE_PIPE_TIMEOUT_MS 5000
+#define SERVICE_PIPE_BUFFSZ 4096
+
+#define DEFAULT_MAP_TIMEOUT_MS 30000
+
+#define RBD_WNBD_BLKSIZE 512UL
+
+#define DEFAULT_SERVICE_START_TIMEOUT 120
+#define DEFAULT_IMAGE_MAP_TIMEOUT 20
+
+#define HELP_INFO 1
+#define VERSION_INFO 2
+
+#define WNBD_STATUS_ACTIVE "active"
+#define WNBD_STATUS_INACTIVE "inactive"
+
+#define DEFAULT_SERVICE_THREAD_COUNT 8
+
+static WnbdHandler* handler = nullptr;
+ceph::mutex shutdown_lock = ceph::make_mutex("RbdWnbd::ShutdownLock");
+
+struct Config {
+ bool exclusive = false;
+ bool readonly = false;
+
+ std::string parent_pipe;
+
+ std::string poolname;
+ std::string nsname;
+ std::string imgname;
+ std::string snapname;
+ std::string devpath;
+
+ std::string format;
+ bool pretty_format = false;
+
+ bool hard_disconnect = false;
+ int soft_disconnect_timeout = DEFAULT_SOFT_REMOVE_TIMEOUT;
+ bool hard_disconnect_fallback = true;
+
+ int service_start_timeout = DEFAULT_SERVICE_START_TIMEOUT;
+ int image_map_timeout = DEFAULT_IMAGE_MAP_TIMEOUT;
+ bool remap_failure_fatal = false;
+ bool adapter_monitoring_enabled = false;
+
+ // TODO: consider moving those fields to a separate structure. Those
+ // provide connection information without actually being configurable.
+ // The disk number is provided by Windows.
+ int disk_number = -1;
+ int pid = 0;
+ std::string serial_number;
+ bool active = false;
+ bool wnbd_mapped = false;
+ std::string command_line;
+ std::string admin_sock_path;
+
+ WnbdLogLevel wnbd_log_level = WnbdLogLevelInfo;
+ int io_req_workers = DEFAULT_IO_WORKER_COUNT;
+ int io_reply_workers = DEFAULT_IO_WORKER_COUNT;
+ int service_thread_count = DEFAULT_SERVICE_THREAD_COUNT;
+
+ // register the mapping, recreating it when the Ceph service starts.
+ bool persistent = true;
+};
+
+enum Command {
+ None,
+ Connect,
+ Disconnect,
+ List,
+ Show,
+ Service,
+ Stats
+};
+
+typedef struct {
+ Command command;
+ BYTE arguments[1];
+} ServiceRequest;
+
+typedef struct {
+ int status;
+} ServiceReply;
+
+bool is_process_running(DWORD pid);
+void unmap_at_exit();
+
+int disconnect_all_mappings(
+ bool unregister,
+ bool hard_disconnect,
+ int soft_disconnect_timeout,
+ int worker_count);
+int restart_registered_mappings(
+ int worker_count, int total_timeout, int image_map_timeout);
+int map_device_using_suprocess(std::string command_line);
+
+int construct_devpath_if_missing(Config* cfg);
+int save_config_to_registry(Config* cfg);
+int remove_config_from_registry(Config* cfg);
+int load_mapping_config_from_registry(std::string devpath, Config* cfg);
+
+BOOL WINAPI console_handler_routine(DWORD dwCtrlType);
+
+static int parse_args(std::vector<const char*>& args,
+ std::ostream *err_msg,
+ Command *command, Config *cfg);
+static int do_unmap(Config *cfg, bool unregister);
+
+
+class BaseIterator {
+ public:
+ virtual ~BaseIterator() {};
+ virtual bool get(Config *cfg) = 0;
+
+ int get_error() {
+ return error;
+ }
+ protected:
+ int error = 0;
+ int index = -1;
+};
+
+// Iterate over mapped devices, retrieving info from the driver.
+class WNBDActiveDiskIterator : public BaseIterator {
+ public:
+ WNBDActiveDiskIterator();
+ ~WNBDActiveDiskIterator();
+
+ bool get(Config *cfg);
+
+ private:
+ PWNBD_CONNECTION_LIST conn_list = NULL;
+
+ static DWORD fetch_list(PWNBD_CONNECTION_LIST* conn_list);
+};
+
+
+// Iterate over the Windows registry key, retrieving registered mappings.
+class RegistryDiskIterator : public BaseIterator {
+ public:
+ RegistryDiskIterator();
+ ~RegistryDiskIterator() {
+ delete reg_key;
+ }
+
+ bool get(Config *cfg);
+ private:
+ DWORD subkey_count = 0;
+ char subkey_name[MAX_PATH];
+
+ RegistryKey* reg_key = NULL;
+};
+
+// Iterate over all RBD mappings, getting info from the registry and driver.
+class WNBDDiskIterator : public BaseIterator {
+ public:
+ bool get(Config *cfg);
+
+ private:
+ // We'll keep track of the active devices.
+ std::set<std::string> active_devices;
+
+ WNBDActiveDiskIterator active_iterator;
+ RegistryDiskIterator registry_iterator;
+};
+
+#endif // RBD_WNBD_H
diff --git a/src/tools/rbd_wnbd/wnbd_handler.cc b/src/tools/rbd_wnbd/wnbd_handler.cc
new file mode 100644
index 000000000..f6a489836
--- /dev/null
+++ b/src/tools/rbd_wnbd/wnbd_handler.cc
@@ -0,0 +1,456 @@
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2020 SUSE LINUX GmbH
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd
+
+#include "wnbd_handler.h"
+
+#define _NTSCSI_USER_MODE_
+#include <rpc.h>
+#include <ddk/scsi.h>
+
+#include <boost/thread/tss.hpp>
+
+#include "common/debug.h"
+#include "common/errno.h"
+#include "common/safe_io.h"
+#include "common/SubProcess.h"
+#include "common/Formatter.h"
+
+#include "global/global_context.h"
+
+#include "rbd_wnbd.h"
+
+WnbdHandler::~WnbdHandler()
+{
+ if (started && wnbd_disk) {
+ dout(10) << __func__ << ": terminating" << dendl;
+
+ shutdown();
+ reply_tpool->join();
+
+ WnbdClose(wnbd_disk);
+
+ started = false;
+
+ delete reply_tpool;
+ delete admin_hook;
+ }
+}
+
+int WnbdHandler::wait()
+{
+ int err = 0;
+ if (started && wnbd_disk) {
+ dout(10) << __func__ << ": waiting" << dendl;
+
+ err = WnbdWaitDispatcher(wnbd_disk);
+ if (err) {
+ derr << __func__ << " failed waiting for dispatcher to stop: "
+ << err << dendl;
+ }
+ }
+
+ return err;
+}
+
+int WnbdAdminHook::call (
+ std::string_view command, const cmdmap_t& cmdmap,
+ const bufferlist&,
+ Formatter *f,
+ std::ostream& errss,
+ bufferlist& out)
+{
+ if (command == "wnbd stats") {
+ return m_handler->dump_stats(f);
+ }
+ return -ENOSYS;
+}
+
+int WnbdHandler::dump_stats(Formatter *f)
+{
+ if (!f) {
+ return -EINVAL;
+ }
+
+ WNBD_USR_STATS stats = { 0 };
+ DWORD err = WnbdGetUserspaceStats(wnbd_disk, &stats);
+ if (err) {
+ derr << "Failed to retrieve WNBD userspace stats. Error: " << err << dendl;
+ return -EINVAL;
+ }
+
+ f->open_object_section("stats");
+ f->dump_int("TotalReceivedRequests", stats.TotalReceivedRequests);
+ f->dump_int("TotalSubmittedRequests", stats.TotalSubmittedRequests);
+ f->dump_int("TotalReceivedReplies", stats.TotalReceivedReplies);
+ f->dump_int("UnsubmittedRequests", stats.UnsubmittedRequests);
+ f->dump_int("PendingSubmittedRequests", stats.PendingSubmittedRequests);
+ f->dump_int("PendingReplies", stats.PendingReplies);
+ f->dump_int("ReadErrors", stats.ReadErrors);
+ f->dump_int("WriteErrors", stats.WriteErrors);
+ f->dump_int("FlushErrors", stats.FlushErrors);
+ f->dump_int("UnmapErrors", stats.UnmapErrors);
+ f->dump_int("InvalidRequests", stats.InvalidRequests);
+ f->dump_int("TotalRWRequests", stats.TotalRWRequests);
+ f->dump_int("TotalReadBlocks", stats.TotalReadBlocks);
+ f->dump_int("TotalWrittenBlocks", stats.TotalWrittenBlocks);
+
+ f->close_section();
+ return 0;
+}
+
+void WnbdHandler::shutdown()
+{
+ std::unique_lock l{shutdown_lock};
+ if (!terminated && wnbd_disk) {
+ // We're requesting the disk to be removed but continue serving IO
+ // requests until the driver sends us the "Disconnect" event.
+ // TODO: expose PWNBD_REMOVE_OPTIONS, we're using the defaults ATM.
+ WnbdRemove(wnbd_disk, NULL);
+ wait();
+ terminated = true;
+ }
+}
+
+void WnbdHandler::aio_callback(librbd::completion_t cb, void *arg)
+{
+ librbd::RBD::AioCompletion *aio_completion =
+ reinterpret_cast<librbd::RBD::AioCompletion*>(cb);
+
+ WnbdHandler::IOContext* ctx = static_cast<WnbdHandler::IOContext*>(arg);
+ int ret = aio_completion->get_return_value();
+
+ dout(20) << __func__ << ": " << *ctx << dendl;
+
+ if (ret == -EINVAL) {
+ // if shrinking an image, a pagecache writeback might reference
+ // extents outside of the range of the new image extents
+ dout(0) << __func__ << ": masking IO out-of-bounds error" << *ctx << dendl;
+ ctx->data.clear();
+ ret = 0;
+ }
+
+ if (ret < 0) {
+ ctx->err_code = -ret;
+ // TODO: check the actual error.
+ ctx->set_sense(SCSI_SENSE_MEDIUM_ERROR,
+ SCSI_ADSENSE_UNRECOVERED_ERROR);
+ } else if ((ctx->req_type == WnbdReqTypeRead) &&
+ ret < static_cast<int>(ctx->req_size)) {
+ int pad_byte_count = static_cast<int> (ctx->req_size) - ret;
+ ctx->data.append_zero(pad_byte_count);
+ dout(20) << __func__ << ": " << *ctx << ": Pad byte count: "
+ << pad_byte_count << dendl;
+ ctx->err_code = 0;
+ } else {
+ ctx->err_code = 0;
+ }
+
+ boost::asio::post(
+ *ctx->handler->reply_tpool,
+ [&, ctx]()
+ {
+ ctx->handler->send_io_response(ctx);
+ });
+
+ aio_completion->release();
+}
+
+void WnbdHandler::send_io_response(WnbdHandler::IOContext *ctx) {
+ std::unique_ptr<WnbdHandler::IOContext> pctx{ctx};
+ ceph_assert(WNBD_DEFAULT_MAX_TRANSFER_LENGTH >= pctx->data.length());
+
+ WNBD_IO_RESPONSE wnbd_rsp = {0};
+ wnbd_rsp.RequestHandle = pctx->req_handle;
+ wnbd_rsp.RequestType = pctx->req_type;
+ wnbd_rsp.Status = pctx->wnbd_status;
+ int err = 0;
+
+ // Use TLS to store an overlapped structure so that we avoid
+ // recreating one each time we send a reply.
+ static boost::thread_specific_ptr<OVERLAPPED> overlapped_tls(
+ // Cleanup routine
+ [](LPOVERLAPPED p_overlapped)
+ {
+ if (p_overlapped->hEvent) {
+ CloseHandle(p_overlapped->hEvent);
+ }
+ delete p_overlapped;
+ });
+
+ LPOVERLAPPED overlapped = overlapped_tls.get();
+ if (!overlapped)
+ {
+ overlapped = new OVERLAPPED{0};
+ HANDLE overlapped_evt = CreateEventA(0, TRUE, TRUE, NULL);
+ if (!overlapped_evt) {
+ err = GetLastError();
+ derr << "Could not create event. Error: " << err << dendl;
+ return;
+ }
+
+ overlapped->hEvent = overlapped_evt;
+ overlapped_tls.reset(overlapped);
+ }
+
+ if (!ResetEvent(overlapped->hEvent)) {
+ err = GetLastError();
+ derr << "Could not reset event. Error: " << err << dendl;
+ return;
+ }
+
+ err = WnbdSendResponseEx(
+ pctx->handler->wnbd_disk,
+ &wnbd_rsp,
+ pctx->data.c_str(),
+ pctx->data.length(),
+ overlapped);
+ if (err == ERROR_IO_PENDING) {
+ DWORD returned_bytes = 0;
+ err = 0;
+ // We've got ERROR_IO_PENDING, which means that the operation is in
+ // progress. We'll use GetOverlappedResult to wait for it to complete
+ // and then retrieve the result.
+ if (!GetOverlappedResult(pctx->handler->wnbd_disk, overlapped,
+ &returned_bytes, TRUE)) {
+ err = GetLastError();
+ derr << "Could not send response. Request id: " << wnbd_rsp.RequestHandle
+ << ". Error: " << err << dendl;
+ }
+ }
+}
+
+void WnbdHandler::IOContext::set_sense(uint8_t sense_key, uint8_t asc, uint64_t info)
+{
+ WnbdSetSenseEx(&wnbd_status, sense_key, asc, info);
+}
+
+void WnbdHandler::IOContext::set_sense(uint8_t sense_key, uint8_t asc)
+{
+ WnbdSetSense(&wnbd_status, sense_key, asc);
+}
+
+void WnbdHandler::Read(
+ PWNBD_DISK Disk,
+ UINT64 RequestHandle,
+ PVOID Buffer,
+ UINT64 BlockAddress,
+ UINT32 BlockCount,
+ BOOLEAN ForceUnitAccess)
+{
+ WnbdHandler* handler = nullptr;
+ ceph_assert(!WnbdGetUserContext(Disk, (PVOID*)&handler));
+
+ WnbdHandler::IOContext* ctx = new WnbdHandler::IOContext();
+ ctx->handler = handler;
+ ctx->req_handle = RequestHandle;
+ ctx->req_type = WnbdReqTypeRead;
+ ctx->req_size = BlockCount * handler->block_size;
+ ctx->req_from = BlockAddress * handler->block_size;
+ ceph_assert(ctx->req_size <= WNBD_DEFAULT_MAX_TRANSFER_LENGTH);
+
+ int op_flags = 0;
+ if (ForceUnitAccess) {
+ op_flags |= LIBRADOS_OP_FLAG_FADVISE_FUA;
+ }
+
+ dout(20) << *ctx << ": start" << dendl;
+
+ librbd::RBD::AioCompletion *c = new librbd::RBD::AioCompletion(ctx, aio_callback);
+ handler->image.aio_read2(ctx->req_from, ctx->req_size, ctx->data, c, op_flags);
+
+ dout(20) << *ctx << ": submitted" << dendl;
+}
+
+void WnbdHandler::Write(
+ PWNBD_DISK Disk,
+ UINT64 RequestHandle,
+ PVOID Buffer,
+ UINT64 BlockAddress,
+ UINT32 BlockCount,
+ BOOLEAN ForceUnitAccess)
+{
+ WnbdHandler* handler = nullptr;
+ ceph_assert(!WnbdGetUserContext(Disk, (PVOID*)&handler));
+
+ WnbdHandler::IOContext* ctx = new WnbdHandler::IOContext();
+ ctx->handler = handler;
+ ctx->req_handle = RequestHandle;
+ ctx->req_type = WnbdReqTypeWrite;
+ ctx->req_size = BlockCount * handler->block_size;
+ ctx->req_from = BlockAddress * handler->block_size;
+
+ bufferptr ptr((char*)Buffer, ctx->req_size);
+ ctx->data.push_back(ptr);
+
+ int op_flags = 0;
+ if (ForceUnitAccess) {
+ op_flags |= LIBRADOS_OP_FLAG_FADVISE_FUA;
+ }
+
+ dout(20) << *ctx << ": start" << dendl;
+
+ librbd::RBD::AioCompletion *c = new librbd::RBD::AioCompletion(ctx, aio_callback);
+ handler->image.aio_write2(ctx->req_from, ctx->req_size, ctx->data, c, op_flags);
+
+ dout(20) << *ctx << ": submitted" << dendl;
+}
+
+void WnbdHandler::Flush(
+ PWNBD_DISK Disk,
+ UINT64 RequestHandle,
+ UINT64 BlockAddress,
+ UINT32 BlockCount)
+{
+ WnbdHandler* handler = nullptr;
+ ceph_assert(!WnbdGetUserContext(Disk, (PVOID*)&handler));
+
+ WnbdHandler::IOContext* ctx = new WnbdHandler::IOContext();
+ ctx->handler = handler;
+ ctx->req_handle = RequestHandle;
+ ctx->req_type = WnbdReqTypeFlush;
+ ctx->req_size = BlockCount * handler->block_size;
+ ctx->req_from = BlockAddress * handler->block_size;
+
+ dout(20) << *ctx << ": start" << dendl;
+
+ librbd::RBD::AioCompletion *c = new librbd::RBD::AioCompletion(ctx, aio_callback);
+ handler->image.aio_flush(c);
+
+ dout(20) << *ctx << ": submitted" << dendl;
+}
+
+void WnbdHandler::Unmap(
+ PWNBD_DISK Disk,
+ UINT64 RequestHandle,
+ PWNBD_UNMAP_DESCRIPTOR Descriptors,
+ UINT32 Count)
+{
+ WnbdHandler* handler = nullptr;
+ ceph_assert(!WnbdGetUserContext(Disk, (PVOID*)&handler));
+ ceph_assert(1 == Count);
+
+ WnbdHandler::IOContext* ctx = new WnbdHandler::IOContext();
+ ctx->handler = handler;
+ ctx->req_handle = RequestHandle;
+ ctx->req_type = WnbdReqTypeUnmap;
+ ctx->req_size = Descriptors[0].BlockCount * handler->block_size;
+ ctx->req_from = Descriptors[0].BlockAddress * handler->block_size;
+
+ dout(20) << *ctx << ": start" << dendl;
+
+ librbd::RBD::AioCompletion *c = new librbd::RBD::AioCompletion(ctx, aio_callback);
+ handler->image.aio_discard(ctx->req_from, ctx->req_size, c);
+
+ dout(20) << *ctx << ": submitted" << dendl;
+}
+
+void WnbdHandler::LogMessage(
+ WnbdLogLevel LogLevel,
+ const char* Message,
+ const char* FileName,
+ UINT32 Line,
+ const char* FunctionName)
+{
+ // We're already passing the log level to WNBD, so we'll use the highest
+ // log level here.
+ dout(0) << "libwnbd.dll!" << FunctionName << " "
+ << WnbdLogLevelToStr(LogLevel) << " " << Message << dendl;
+}
+
+int WnbdHandler::resize(uint64_t new_size)
+{
+ int err = 0;
+
+ uint64_t new_block_count = new_size / block_size;
+
+ dout(5) << "Resizing disk. Block size: " << block_size
+ << ". New block count: " << new_block_count
+ << ". Old block count: "
+ << wnbd_disk->Properties.BlockCount << "." << dendl;
+ err = WnbdSetDiskSize(wnbd_disk, new_block_count);
+ if (err) {
+ derr << "WNBD: Setting disk size failed with error: "
+ << win32_strerror(err) << dendl;
+ return -EINVAL;
+ }
+
+ dout(5) << "Successfully resized disk to: " << new_block_count << " blocks"
+ << dendl;
+ return 0;
+}
+
+int WnbdHandler::start()
+{
+ int err = 0;
+ WNBD_PROPERTIES wnbd_props = {0};
+
+ instance_name.copy(wnbd_props.InstanceName, sizeof(wnbd_props.InstanceName));
+ ceph_assert(strlen(RBD_WNBD_OWNER_NAME) < WNBD_MAX_OWNER_LENGTH);
+ strncpy(wnbd_props.Owner, RBD_WNBD_OWNER_NAME, WNBD_MAX_OWNER_LENGTH);
+
+ wnbd_props.BlockCount = block_count;
+ wnbd_props.BlockSize = block_size;
+ wnbd_props.MaxUnmapDescCount = 1;
+
+ wnbd_props.Flags.ReadOnly = readonly;
+ wnbd_props.Flags.UnmapSupported = 1;
+ if (rbd_cache_enabled) {
+ wnbd_props.Flags.FUASupported = 1;
+ wnbd_props.Flags.FlushSupported = 1;
+ }
+
+ err = WnbdCreate(&wnbd_props, &RbdWnbdInterface, this, &wnbd_disk);
+ if (err)
+ goto exit;
+
+ started = true;
+
+ err = WnbdStartDispatcher(wnbd_disk, io_req_workers);
+ if (err) {
+ derr << "Could not start WNBD dispatcher. Error: " << err << dendl;
+ }
+
+exit:
+ return err;
+}
+
+std::ostream &operator<<(std::ostream &os, const WnbdHandler::IOContext &ctx) {
+
+ os << "[" << std::hex << ctx.req_handle;
+
+ switch (ctx.req_type)
+ {
+ case WnbdReqTypeRead:
+ os << " READ ";
+ break;
+ case WnbdReqTypeWrite:
+ os << " WRITE ";
+ break;
+ case WnbdReqTypeFlush:
+ os << " FLUSH ";
+ break;
+ case WnbdReqTypeUnmap:
+ os << " TRIM ";
+ break;
+ default:
+ os << " UNKNOWN(" << ctx.req_type << ") ";
+ break;
+ }
+
+ os << ctx.req_from << "~" << ctx.req_size << " "
+ << std::dec << ntohl(ctx.err_code) << "]";
+
+ return os;
+}
diff --git a/src/tools/rbd_wnbd/wnbd_handler.h b/src/tools/rbd_wnbd/wnbd_handler.h
new file mode 100644
index 000000000..c1ab5676b
--- /dev/null
+++ b/src/tools/rbd_wnbd/wnbd_handler.h
@@ -0,0 +1,188 @@
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2020 SUSE LINUX GmbH
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef WNBD_HANDLER_H
+#define WNBD_HANDLER_H
+
+#include <wnbd.h>
+
+#include "common/admin_socket.h"
+#include "common/ceph_context.h"
+#include "common/Thread.h"
+
+#include "include/rbd/librbd.hpp"
+#include "include/xlist.h"
+
+#include "global/global_context.h"
+
+// TODO: make this configurable.
+#define RBD_WNBD_MAX_TRANSFER 2 * 1024 * 1024
+#define SOFT_REMOVE_RETRY_INTERVAL 2
+#define DEFAULT_SOFT_REMOVE_TIMEOUT 15
+#define DEFAULT_IO_WORKER_COUNT 4
+
+// Not defined by mingw.
+#ifndef SCSI_ADSENSE_UNRECOVERED_ERROR
+#define SCSI_ADSENSE_UNRECOVERED_ERROR 0x11
+#endif
+
+// The following will be assigned to the "Owner" field of the WNBD
+// parameters, which can be used to determine the application managing
+// a disk. We'll ignore other disks.
+#define RBD_WNBD_OWNER_NAME "ceph-rbd-wnbd"
+
+class WnbdHandler;
+
+class WnbdAdminHook : public AdminSocketHook {
+ WnbdHandler *m_handler;
+
+public:
+ explicit WnbdAdminHook(WnbdHandler *handler) :
+ m_handler(handler) {
+ g_ceph_context->get_admin_socket()->register_command(
+ "wnbd stats", this, "get WNBD stats");
+ }
+ ~WnbdAdminHook() override {
+ g_ceph_context->get_admin_socket()->unregister_commands(this);
+ }
+
+ int call(std::string_view command, const cmdmap_t& cmdmap,
+ const bufferlist&,
+ Formatter *f, std::ostream& errss, bufferlist& out) override;
+};
+
+
+class WnbdHandler
+{
+private:
+ librbd::Image &image;
+ std::string instance_name;
+ uint64_t block_count;
+ uint32_t block_size;
+ bool readonly;
+ bool rbd_cache_enabled;
+ uint32_t io_req_workers;
+ uint32_t io_reply_workers;
+ WnbdAdminHook* admin_hook;
+ boost::asio::thread_pool* reply_tpool;
+
+public:
+ WnbdHandler(librbd::Image& _image, std::string _instance_name,
+ uint64_t _block_count, uint32_t _block_size,
+ bool _readonly, bool _rbd_cache_enabled,
+ uint32_t _io_req_workers,
+ uint32_t _io_reply_workers)
+ : image(_image)
+ , instance_name(_instance_name)
+ , block_count(_block_count)
+ , block_size(_block_size)
+ , readonly(_readonly)
+ , rbd_cache_enabled(_rbd_cache_enabled)
+ , io_req_workers(_io_req_workers)
+ , io_reply_workers(_io_reply_workers)
+ {
+ admin_hook = new WnbdAdminHook(this);
+ // Instead of relying on librbd's own thread pool, we're going to use a
+ // separate one. This allows us to make assumptions on the threads that
+ // are going to send the IO replies and thus be able to cache Windows
+ // OVERLAPPED structures.
+ reply_tpool = new boost::asio::thread_pool(_io_reply_workers);
+ }
+
+ int resize(uint64_t new_size);
+ int start();
+ // Wait for the handler to stop, which normally happens when the driver
+ // passes the "Disconnect" request.
+ int wait();
+ void shutdown();
+
+ int dump_stats(Formatter *f);
+
+ ~WnbdHandler();
+
+ static VOID LogMessage(
+ WnbdLogLevel LogLevel,
+ const char* Message,
+ const char* FileName,
+ UINT32 Line,
+ const char* FunctionName);
+
+private:
+ ceph::mutex shutdown_lock = ceph::make_mutex("WnbdHandler::DisconnectLocker");
+ bool started = false;
+ bool terminated = false;
+ WNBD_DISK* wnbd_disk = nullptr;
+
+ struct IOContext
+ {
+ xlist<IOContext*>::item item;
+ WnbdHandler *handler = nullptr;
+ WNBD_STATUS wnbd_status = {0};
+ WnbdRequestType req_type = WnbdReqTypeUnknown;
+ uint64_t req_handle = 0;
+ uint32_t err_code = 0;
+ size_t req_size;
+ uint64_t req_from;
+ bufferlist data;
+
+ IOContext()
+ : item(this)
+ {}
+
+ void set_sense(uint8_t sense_key, uint8_t asc, uint64_t info);
+ void set_sense(uint8_t sense_key, uint8_t asc);
+ };
+
+ friend std::ostream &operator<<(std::ostream &os, const IOContext &ctx);
+
+ void send_io_response(IOContext *ctx);
+
+ static void aio_callback(librbd::completion_t cb, void *arg);
+
+ // WNBD IO entry points
+ static void Read(
+ PWNBD_DISK Disk,
+ UINT64 RequestHandle,
+ PVOID Buffer,
+ UINT64 BlockAddress,
+ UINT32 BlockCount,
+ BOOLEAN ForceUnitAccess);
+ static void Write(
+ PWNBD_DISK Disk,
+ UINT64 RequestHandle,
+ PVOID Buffer,
+ UINT64 BlockAddress,
+ UINT32 BlockCount,
+ BOOLEAN ForceUnitAccess);
+ static void Flush(
+ PWNBD_DISK Disk,
+ UINT64 RequestHandle,
+ UINT64 BlockAddress,
+ UINT32 BlockCount);
+ static void Unmap(
+ PWNBD_DISK Disk,
+ UINT64 RequestHandle,
+ PWNBD_UNMAP_DESCRIPTOR Descriptors,
+ UINT32 Count);
+
+ static constexpr WNBD_INTERFACE RbdWnbdInterface =
+ {
+ Read,
+ Write,
+ Flush,
+ Unmap,
+ };
+};
+
+std::ostream &operator<<(std::ostream &os, const WnbdHandler::IOContext &ctx);
+
+#endif // WNBD_HANDLER_H
diff --git a/src/tools/rbd_wnbd/wnbd_wmi.cc b/src/tools/rbd_wnbd/wnbd_wmi.cc
new file mode 100644
index 000000000..f49fa4cc6
--- /dev/null
+++ b/src/tools/rbd_wnbd/wnbd_wmi.cc
@@ -0,0 +1,261 @@
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (c) 2019 SUSE LLC
+ * Copyright (C) 2022 Cloudbase Solutions
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include "wnbd_wmi.h"
+
+#include "common/debug.h"
+#include "common/win32/wstring.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd-wnbd: "
+
+// Initializes the COM library for use by the calling thread using
+// COINIT_MULTITHREADED.
+static HRESULT co_initialize_basic()
+{
+ dout(10) << "initializing COM library" << dendl;
+
+ HRESULT hres = CoInitializeEx(0, COINIT_MULTITHREADED);
+ if (FAILED(hres)) {
+ derr << "CoInitializeEx failed. HRESULT: " << hres << dendl;
+ return hres;
+ }
+
+ // CoInitializeSecurity must be called once per process.
+ static bool com_security_flags_set = false;
+
+ if (!com_security_flags_set) {
+ hres = CoInitializeSecurity(
+ NULL, -1, NULL, NULL,
+ RPC_C_AUTHN_LEVEL_DEFAULT,
+ RPC_C_IMP_LEVEL_IMPERSONATE,
+ NULL,
+ EOAC_NONE,
+ NULL);
+ if (FAILED(hres)) {
+ derr << "CoInitializeSecurity failed. HRESULT: " << hres << dendl;
+ CoUninitialize();
+ return hres;
+ }
+
+ com_security_flags_set = true;
+ }
+
+ return 0;
+}
+
+// co_uninitialize must be called once for every successful
+// co_initialize_basic call. Any WMI objects (including connections,
+// event subscriptions, etc) must be released beforehand.
+static void co_uninitialize()
+{
+ dout(10) << "closing COM library" << dendl;
+ CoUninitialize();
+}
+
+HRESULT COMBootstrapper::initialize()
+{
+ std::unique_lock l{init_lock};
+
+ HRESULT hres = co_initialize_basic();
+ if (!FAILED(hres)) {
+ initialized = true;
+ }
+ return hres;
+}
+
+void COMBootstrapper::cleanup()
+{
+ if (initialized) {
+ co_uninitialize();
+ initialized = false;
+ }
+}
+
+void WmiConnection::close()
+{
+ dout(20) << "closing wmi conn: " << this
+ << ", svc: " << wbem_svc
+ << ", loc: " << wbem_loc << dendl;
+ if (wbem_svc != NULL) {
+ wbem_svc->Release();
+ wbem_svc = NULL;
+ }
+ if (wbem_loc != NULL) {
+ wbem_loc->Release();
+ wbem_loc = NULL;
+ }
+}
+
+HRESULT WmiConnection::initialize()
+{
+ HRESULT hres = CoCreateInstance(
+ CLSID_WbemLocator, 0, CLSCTX_INPROC_SERVER,
+ IID_IWbemLocator, (LPVOID*)&wbem_loc);
+ if (FAILED(hres)) {
+ derr << "CoCreateInstance failed. HRESULT: " << hres << dendl;
+ return hres;
+ }
+
+ hres = wbem_loc->ConnectServer(
+ _bstr_t(ns.c_str()).GetBSTR(), NULL, NULL, NULL,
+ WBEM_FLAG_CONNECT_USE_MAX_WAIT, NULL, NULL,
+ &wbem_svc);
+ if (FAILED(hres)) {
+ derr << "Could not connect to WMI service. HRESULT: " << hres << dendl;
+ return hres;
+ }
+
+ if (!wbem_svc) {
+ hres = MAKE_HRESULT(SEVERITY_ERROR, FACILITY_WIN32,
+ ERROR_INVALID_HANDLE);
+ derr << "WMI connection failed, no WMI service object received." << dendl;
+ return hres;
+ }
+
+ hres = CoSetProxyBlanket(
+ wbem_svc, RPC_C_AUTHN_WINNT, RPC_C_AUTHZ_NONE, NULL,
+ RPC_C_AUTHN_LEVEL_CALL, RPC_C_IMP_LEVEL_IMPERSONATE, NULL, EOAC_NONE);
+ if (FAILED(hres)) {
+ derr << "CoSetProxyBlanket failed. HRESULT:" << hres << dendl;
+ }
+
+ return hres;
+}
+
+HRESULT get_property_str(
+ IWbemClassObject* cls_obj,
+ const std::wstring& property,
+ std::wstring& value)
+{
+ VARIANT vt_prop;
+ VariantInit(&vt_prop);
+ HRESULT hres = cls_obj->Get(property.c_str(), 0, &vt_prop, 0, 0);
+ if (!FAILED(hres)) {
+ VARIANT vt_bstr_prop;
+ VariantInit(&vt_bstr_prop);
+ hres = VariantChangeType(&vt_bstr_prop, &vt_prop, 0, VT_BSTR);
+ if (!FAILED(hres)) {
+ value = vt_bstr_prop.bstrVal;
+ }
+ VariantClear(&vt_bstr_prop);
+ }
+ VariantClear(&vt_prop);
+
+ if (FAILED(hres)) {
+ derr << "Could not get WMI property: " << to_string(property)
+ << ". HRESULT: " << hres << dendl;
+ }
+ return hres;
+}
+
+HRESULT get_property_int(
+ IWbemClassObject* cls_obj,
+ const std::wstring& property,
+ uint32_t& value)
+{
+ VARIANT vt_prop;
+ VariantInit(&vt_prop);
+ HRESULT hres = cls_obj->Get(property.c_str(), 0, &vt_prop, 0, 0);
+ if (!FAILED(hres)) {
+ VARIANT vt_uint_prop;
+ VariantInit(&vt_uint_prop);
+ hres = VariantChangeType(&vt_uint_prop, &vt_prop, 0, VT_UINT);
+ if (!FAILED(hres)) {
+ value = vt_uint_prop.intVal;
+ }
+ VariantClear(&vt_uint_prop);
+ }
+ VariantClear(&vt_prop);
+
+ if (FAILED(hres)) {
+ derr << "Could not get WMI property: " << to_string(property)
+ << ". HRESULT: " << hres << dendl;
+ }
+ return hres;
+}
+
+HRESULT WmiSubscription::initialize()
+{
+ HRESULT hres = conn.initialize();
+ if (FAILED(hres)) {
+ derr << "Could not create WMI connection" << dendl;
+ return hres;
+ }
+
+ hres = conn.wbem_svc->ExecNotificationQuery(
+ _bstr_t(L"WQL").GetBSTR(),
+ _bstr_t(query.c_str()).GetBSTR(),
+ WBEM_FLAG_FORWARD_ONLY | WBEM_FLAG_RETURN_IMMEDIATELY,
+ NULL,
+ &event_enum);
+
+ if (FAILED(hres)) {
+ derr << "Notification query failed, unable to subscribe to "
+ << "WMI events. HRESULT: " << hres << dendl;
+ } else {
+ dout(20) << "wmi subscription initialized: " << this
+ << ", event enum: " << event_enum
+ << ", conn: " << &conn << ", conn svc: " << conn.wbem_svc << dendl;
+ }
+
+ return hres;
+}
+
+void WmiSubscription::close()
+{
+ dout(20) << "closing wmi subscription: " << this
+ << ", event enum: " << event_enum << dendl;
+ if (event_enum != NULL) {
+ event_enum->Release();
+ event_enum = NULL;
+ }
+}
+
+HRESULT WmiSubscription::next(
+ long timeout,
+ ULONG count,
+ IWbemClassObject **objects,
+ ULONG *returned)
+{
+ if (!event_enum) {
+ HRESULT hres = MAKE_HRESULT(
+ SEVERITY_ERROR, FACILITY_WIN32,
+ ERROR_INVALID_HANDLE);
+ derr << "WMI subscription uninitialized." << dendl;
+ return hres;
+ }
+
+ HRESULT hres = event_enum->Next(timeout, count, objects, returned);
+ if (FAILED(hres)) {
+ derr << "Unable to retrieve WMI events. HRESULT: "
+ << hres << dendl;
+ }
+ return hres;
+}
+
+WmiSubscription subscribe_wnbd_adapter_events(
+ uint32_t interval)
+{
+ std::wostringstream query_stream;
+ query_stream
+ << L"SELECT * FROM __InstanceOperationEvent "
+ << L"WITHIN " << interval
+ << L"WHERE TargetInstance ISA 'Win32_ScsiController' "
+ << L"AND TargetInstance.Description="
+ << L"'WNBD SCSI Virtual Adapter'";
+
+ return WmiSubscription(L"root\\cimv2", query_stream.str());
+}
diff --git a/src/tools/rbd_wnbd/wnbd_wmi.h b/src/tools/rbd_wnbd/wnbd_wmi.h
new file mode 100644
index 000000000..4d802d986
--- /dev/null
+++ b/src/tools/rbd_wnbd/wnbd_wmi.h
@@ -0,0 +1,109 @@
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (c) 2019 SUSE LLC
+ * Copyright (C) 2022 Cloudbase Solutions
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#pragma once
+#include <comutil.h>
+
+#define _WIN32_DCOM
+#include <wbemcli.h>
+
+#include <string>
+#include <vector>
+
+#include "common/ceph_mutex.h"
+
+// Convenience helper for initializing and cleaning up the
+// Windows COM library using "COINIT_MULTITHREADED" concurrency mode.
+// Any WMI objects (including connections, event subscriptions, etc)
+// must be released before the COM library gets closed.
+class COMBootstrapper
+{
+private:
+ bool initialized = false;
+
+ ceph::mutex init_lock = ceph::make_mutex("COMBootstrapper::InitLocker");
+
+public:
+ HRESULT initialize();
+ void cleanup();
+
+ ~COMBootstrapper()
+ {
+ cleanup();
+ }
+};
+
+class WmiConnection
+{
+private:
+ std::wstring ns;
+public:
+ IWbemLocator* wbem_loc;
+ IWbemServices* wbem_svc;
+
+ WmiConnection(std::wstring ns)
+ : ns(ns)
+ , wbem_loc(nullptr)
+ , wbem_svc(nullptr)
+ {
+ }
+ ~WmiConnection()
+ {
+ close();
+ }
+
+ HRESULT initialize();
+ void close();
+};
+
+HRESULT get_property_str(
+ IWbemClassObject* cls_obj,
+ const std::wstring& property,
+ std::wstring& value);
+HRESULT get_property_int(
+ IWbemClassObject* cls_obj,
+ const std::wstring& property,
+ uint32_t& value);
+
+class WmiSubscription
+{
+private:
+ std::wstring query;
+
+ WmiConnection conn;
+ IEnumWbemClassObject *event_enum;
+
+public:
+ WmiSubscription(std::wstring ns, std::wstring query)
+ : query(query)
+ , conn(WmiConnection(ns))
+ , event_enum(nullptr)
+ {
+ }
+ ~WmiSubscription()
+ {
+ close();
+ }
+
+ HRESULT initialize();
+ void close();
+
+ // IEnumWbemClassObject::Next wrapper
+ HRESULT next(
+ long timeout,
+ ULONG count,
+ IWbemClassObject **objects,
+ ULONG *returned);
+};
+
+WmiSubscription subscribe_wnbd_adapter_events(uint32_t interval);