diff options
Diffstat (limited to '')
232 files changed, 62592 insertions, 0 deletions
diff --git a/src/tools/rbd/ArgumentTypes.cc b/src/tools/rbd/ArgumentTypes.cc new file mode 100644 index 000000000..17a06c805 --- /dev/null +++ b/src/tools/rbd/ArgumentTypes.cc @@ -0,0 +1,576 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "include/rbd/features.h" +#include "common/config_proxy.h" +#include "common/strtol.h" +#include "common/Formatter.h" +#include "global/global_context.h" +#include <iostream> +#include <boost/tokenizer.hpp> + +namespace rbd { +namespace argument_types { + +namespace po = boost::program_options; + +const std::map<uint64_t, std::string> ImageFeatures::FEATURE_MAPPING = { + {RBD_FEATURE_LAYERING, RBD_FEATURE_NAME_LAYERING}, + {RBD_FEATURE_STRIPINGV2, RBD_FEATURE_NAME_STRIPINGV2}, + {RBD_FEATURE_EXCLUSIVE_LOCK, RBD_FEATURE_NAME_EXCLUSIVE_LOCK}, + {RBD_FEATURE_OBJECT_MAP, RBD_FEATURE_NAME_OBJECT_MAP}, + {RBD_FEATURE_FAST_DIFF, RBD_FEATURE_NAME_FAST_DIFF}, + {RBD_FEATURE_DEEP_FLATTEN, RBD_FEATURE_NAME_DEEP_FLATTEN}, + {RBD_FEATURE_JOURNALING, RBD_FEATURE_NAME_JOURNALING}, + {RBD_FEATURE_DATA_POOL, RBD_FEATURE_NAME_DATA_POOL}, + {RBD_FEATURE_OPERATIONS, RBD_FEATURE_NAME_OPERATIONS}, + {RBD_FEATURE_MIGRATING, RBD_FEATURE_NAME_MIGRATING}, + {RBD_FEATURE_NON_PRIMARY, RBD_FEATURE_NAME_NON_PRIMARY}, + {RBD_FEATURE_DIRTY_CACHE, RBD_FEATURE_NAME_DIRTY_CACHE}, +}; + +Format::Formatter Format::create_formatter(bool pretty) const { + if (value == "json") { + return Formatter(new JSONFormatter(pretty)); + } else if (value == "xml") { + return Formatter(new XMLFormatter(pretty)); + } + return Formatter(); +} + +std::string get_name_prefix(ArgumentModifier modifier) { + switch (modifier) { + case ARGUMENT_MODIFIER_SOURCE: + return SOURCE_PREFIX; + case ARGUMENT_MODIFIER_DEST: + return DEST_PREFIX; + default: + return ""; + } +} + +std::string get_description_prefix(ArgumentModifier modifier) { + switch (modifier) { + case ARGUMENT_MODIFIER_SOURCE: + return "source "; + case ARGUMENT_MODIFIER_DEST: + return "destination "; + default: + return ""; + } +} + +void add_pool_option(po::options_description *opt, + ArgumentModifier modifier, + const std::string &desc_suffix) { + std::string name = POOL_NAME + ",p"; + std::string description = "pool name"; + switch (modifier) { + case ARGUMENT_MODIFIER_NONE: + break; + case ARGUMENT_MODIFIER_SOURCE: + description = "source " + description; + break; + case ARGUMENT_MODIFIER_DEST: + name = DEST_POOL_NAME; + description = "destination " + description; + break; + } + description += desc_suffix; + + // TODO add validator + opt->add_options() + (name.c_str(), po::value<std::string>(), description.c_str()); +} + +void add_namespace_option(boost::program_options::options_description *opt, + ArgumentModifier modifier) { + std::string name = NAMESPACE_NAME; + std::string description = "namespace name"; + switch (modifier) { + case ARGUMENT_MODIFIER_NONE: + break; + case ARGUMENT_MODIFIER_SOURCE: + description = "source " + description; + break; + case ARGUMENT_MODIFIER_DEST: + name = DEST_NAMESPACE_NAME; + description = "destination " + description; + break; + } + + // TODO add validator + opt->add_options() + (name.c_str(), po::value<std::string>(), description.c_str()); +} + +void add_image_option(po::options_description *opt, + ArgumentModifier modifier, + const std::string &desc_suffix) { + std::string name = IMAGE_NAME; + std::string description = "image name"; + switch (modifier) { + case ARGUMENT_MODIFIER_NONE: + break; + case ARGUMENT_MODIFIER_SOURCE: + description = "source " + description; + break; + case ARGUMENT_MODIFIER_DEST: + name = DEST_IMAGE_NAME; + description = "destination " + description; + break; + } + description += desc_suffix; + + // TODO add validator + opt->add_options() + (name.c_str(), po::value<std::string>(), description.c_str()); +} + +void add_image_id_option(po::options_description *opt, + const std::string &desc_suffix) { + std::string name = IMAGE_ID; + std::string description = "image id"; + description += desc_suffix; + + // TODO add validator + opt->add_options() + (name.c_str(), po::value<std::string>(), description.c_str()); +} + +void add_snap_option(po::options_description *opt, + ArgumentModifier modifier) { + + std::string name = SNAPSHOT_NAME; + std::string description = "snapshot name"; + switch (modifier) { + case ARGUMENT_MODIFIER_NONE: + break; + case ARGUMENT_MODIFIER_DEST: + name = DEST_SNAPSHOT_NAME; + description = "destination " + description; + break; + case ARGUMENT_MODIFIER_SOURCE: + description = "source " + description; + break; + } + + // TODO add validator + opt->add_options() + (name.c_str(), po::value<std::string>(), description.c_str()); +} + +void add_snap_id_option(po::options_description *opt) { + opt->add_options() + (SNAPSHOT_ID.c_str(), po::value<uint64_t>(), "snapshot id"); +} + +void add_pool_options(boost::program_options::options_description *pos, + boost::program_options::options_description *opt, + bool namespaces_supported) { + opt->add_options() + ((POOL_NAME + ",p").c_str(), po::value<std::string>(), "pool name"); + if (namespaces_supported) { + add_namespace_option(opt, ARGUMENT_MODIFIER_NONE); + pos->add_options() + ("pool-spec", "pool specification\n" + "(example: <pool-name>[/<namespace>]"); + } else { + pos->add_options() + ("pool-name", "pool name"); + } +} + +void add_image_spec_options(po::options_description *pos, + po::options_description *opt, + ArgumentModifier modifier) { + pos->add_options() + ((get_name_prefix(modifier) + IMAGE_SPEC).c_str(), + (get_description_prefix(modifier) + "image specification\n" + + "(example: [<pool-name>/[<namespace>/]]<image-name>)").c_str()); + add_pool_option(opt, modifier); + add_namespace_option(opt, modifier); + add_image_option(opt, modifier); +} + +void add_snap_spec_options(po::options_description *pos, + po::options_description *opt, + ArgumentModifier modifier) { + pos->add_options() + ((get_name_prefix(modifier) + SNAPSHOT_SPEC).c_str(), + (get_description_prefix(modifier) + "snapshot specification\n" + + "(example: [<pool-name>/[<namespace>/]]<image-name>@<snap-name>)").c_str()); + add_pool_option(opt, modifier); + add_namespace_option(opt, modifier); + add_image_option(opt, modifier); + add_snap_option(opt, modifier); +} + +void add_image_or_snap_spec_options(po::options_description *pos, + po::options_description *opt, + ArgumentModifier modifier) { + pos->add_options() + ((get_name_prefix(modifier) + IMAGE_OR_SNAPSHOT_SPEC).c_str(), + (get_description_prefix(modifier) + "image or snapshot specification\n" + + "(example: [<pool-name>/[<namespace>/]]<image-name>[@<snap-name>])").c_str()); + add_pool_option(opt, modifier); + add_namespace_option(opt, modifier); + add_image_option(opt, modifier); + add_snap_option(opt, modifier); +} + +void add_create_image_options(po::options_description *opt, + bool include_format) { + // TODO get default image format from conf + if (include_format) { + opt->add_options() + (IMAGE_FORMAT.c_str(), po::value<ImageFormat>(), + "image format [default: 2]") + (IMAGE_NEW_FORMAT.c_str(), + po::value<ImageNewFormat>()->zero_tokens(), + "deprecated[:image-format 2]"); + } + + opt->add_options() + (IMAGE_ORDER.c_str(), po::value<ImageOrder>(), + "deprecated[:object-size]") + (IMAGE_OBJECT_SIZE.c_str(), po::value<ImageObjectSize>(), + "object size in B/K/M [4K <= object size <= 32M]") + (IMAGE_FEATURES.c_str(), po::value<ImageFeatures>()->composing(), + ("image features\n" + get_short_features_help(true)).c_str()) + (IMAGE_SHARED.c_str(), po::bool_switch(), "shared image") + (IMAGE_STRIPE_UNIT.c_str(), po::value<ImageObjectSize>(), "stripe unit in B/K/M") + (IMAGE_STRIPE_COUNT.c_str(), po::value<uint64_t>(), "stripe count") + (IMAGE_DATA_POOL.c_str(), po::value<std::string>(), "data pool") + (IMAGE_MIRROR_IMAGE_MODE.c_str(), po::value<MirrorImageMode>(), + "mirror image mode [journal or snapshot]"); + + add_create_journal_options(opt); +} + +void add_create_journal_options(po::options_description *opt) { + opt->add_options() + (JOURNAL_SPLAY_WIDTH.c_str(), po::value<uint64_t>(), + "number of active journal objects") + (JOURNAL_OBJECT_SIZE.c_str(), po::value<JournalObjectSize>(), + "size of journal objects [4K <= size <= 64M]") + (JOURNAL_POOL.c_str(), po::value<std::string>(), + "pool for journal objects"); +} + +void add_size_option(boost::program_options::options_description *opt) { + opt->add_options() + ((IMAGE_SIZE + ",s").c_str(), po::value<ImageSize>()->required(), + "image size (in M/G/T) [default: M]"); +} + +void add_sparse_size_option(boost::program_options::options_description *opt) { + opt->add_options() + (IMAGE_SPARSE_SIZE.c_str(), po::value<ImageObjectSize>(), + "sparse size in B/K/M [default: 4K]"); +} + +void add_path_options(boost::program_options::options_description *pos, + boost::program_options::options_description *opt, + const std::string &description) { + pos->add_options() + (PATH_NAME.c_str(), po::value<std::string>(), description.c_str()); + opt->add_options() + (PATH.c_str(), po::value<std::string>(), description.c_str()); +} + +void add_limit_option(po::options_description *opt) { + std::string description = "maximum allowed snapshot count"; + + opt->add_options() + (LIMIT.c_str(), po::value<uint64_t>(), description.c_str()); +} + +void add_no_progress_option(boost::program_options::options_description *opt) { + opt->add_options() + (NO_PROGRESS.c_str(), po::bool_switch(), "disable progress output"); +} + +void add_format_options(boost::program_options::options_description *opt) { + opt->add_options() + (FORMAT.c_str(), po::value<Format>(), "output format (plain, json, or xml) [default: plain]") + (PRETTY_FORMAT.c_str(), po::bool_switch(), + "pretty formatting (json and xml)"); +} + +void add_verbose_option(boost::program_options::options_description *opt) { + opt->add_options() + (VERBOSE.c_str(), po::bool_switch(), "be verbose"); +} + +void add_no_error_option(boost::program_options::options_description *opt) { + opt->add_options() + (NO_ERR.c_str(), po::bool_switch(), "continue after error"); +} + +void add_export_format_option(boost::program_options::options_description *opt) { + opt->add_options() + ("export-format", po::value<ExportFormat>(), "format of image file"); +} + +void add_flatten_option(boost::program_options::options_description *opt) { + opt->add_options() + (IMAGE_FLATTEN.c_str(), po::bool_switch(), + "fill clone with parent data (make it independent)"); +} + +void add_snap_create_options(po::options_description *opt) { + opt->add_options() + (SKIP_QUIESCE.c_str(), po::bool_switch(), "do not run quiesce hooks") + (IGNORE_QUIESCE_ERROR.c_str(), po::bool_switch(), + "ignore quiesce hook error"); +} + +void add_encryption_options(boost::program_options::options_description *opt) { + opt->add_options() + (ENCRYPTION_FORMAT.c_str(), + po::value<std::vector<EncryptionFormat>>(), + "encryption format (luks, luks1, luks2) [default: luks]"); + + opt->add_options() + (ENCRYPTION_PASSPHRASE_FILE.c_str(), + po::value<std::vector<std::string>>(), + "path to file containing passphrase for unlocking the image"); +} + +std::string get_short_features_help(bool append_suffix) { + std::ostringstream oss; + bool first_feature = true; + oss << "["; + for (auto &pair : ImageFeatures::FEATURE_MAPPING) { + if ((pair.first & RBD_FEATURES_IMPLICIT_ENABLE) != 0ULL) { + // hide implicitly enabled features from list + continue; + } else if (!append_suffix && (pair.first & RBD_FEATURES_MUTABLE) == 0ULL) { + // hide non-mutable features for the 'rbd feature XYZ' command + continue; + } + + if (!first_feature) { + oss << ", "; + } + first_feature = false; + + std::string suffix; + if (append_suffix) { + if ((pair.first & rbd::utils::get_rbd_default_features(g_ceph_context)) != 0) { + suffix += "+"; + } + if ((pair.first & RBD_FEATURES_MUTABLE) != 0) { + suffix += "*"; + } else if ((pair.first & RBD_FEATURES_DISABLE_ONLY) != 0) { + suffix += "-"; + } + if (!suffix.empty()) { + suffix = "(" + suffix + ")"; + } + } + oss << pair.second << suffix; + } + oss << "]"; + return oss.str(); +} + +std::string get_long_features_help() { + std::ostringstream oss; + oss << "Image Features:" << std::endl + << " (*) supports enabling/disabling on existing images" << std::endl + << " (-) supports disabling-only on existing images" << std::endl + << " (+) enabled by default for new images if features not specified" + << std::endl; + return oss.str(); +} + +void validate(boost::any& v, const std::vector<std::string>& values, + ImageSize *target_type, int) { + po::validators::check_first_occurrence(v); + const std::string &s = po::validators::get_single_string(values); + + std::string parse_error; + uint64_t size = strict_iecstrtoll(s, &parse_error); + if (!parse_error.empty()) { + throw po::validation_error(po::validation_error::invalid_option_value); + } + + //NOTE: We can remove below given three lines of code once all applications, + //which use this CLI will adopt B/K/M/G/T/P/E with size value + if (isdigit(*s.rbegin())) { + size = size << 20; // Default MB to Bytes + } + v = boost::any(size); +} + +void validate(boost::any& v, const std::vector<std::string>& values, + ImageOrder *target_type, int dummy) { + po::validators::check_first_occurrence(v); + const std::string &s = po::validators::get_single_string(values); + try { + uint64_t order = boost::lexical_cast<uint64_t>(s); + if (order >= 12 && order <= 25) { + v = boost::any(order); + return; + } + } catch (const boost::bad_lexical_cast &) { + } + throw po::validation_error(po::validation_error::invalid_option_value); +} + +void validate(boost::any& v, const std::vector<std::string>& values, + ImageObjectSize *target_type, int dummy) { + po::validators::check_first_occurrence(v); + const std::string &s = po::validators::get_single_string(values); + + std::string parse_error; + uint64_t objectsize = strict_iecstrtoll(s, &parse_error); + if (!parse_error.empty()) { + throw po::validation_error(po::validation_error::invalid_option_value); + } + v = boost::any(objectsize); +} + +void validate(boost::any& v, const std::vector<std::string>& values, + ImageFormat *target_type, int dummy) { + po::validators::check_first_occurrence(v); + const std::string &s = po::validators::get_single_string(values); + try { + uint32_t format = boost::lexical_cast<uint32_t>(s); + if (format == 1 || format == 2) { + v = boost::any(format); + return; + } + } catch (const boost::bad_lexical_cast &) { + } + throw po::validation_error(po::validation_error::invalid_option_value); +} + +void validate(boost::any& v, const std::vector<std::string>& values, + ImageNewFormat *target_type, int dummy) { + v = boost::any(true); +} + +void validate(boost::any& v, const std::vector<std::string>& values, + ImageFeatures *target_type, int) { + if (v.empty()) { + v = boost::any(static_cast<uint64_t>(0)); + } + + uint64_t &features = boost::any_cast<uint64_t &>(v); + for (auto &value : values) { + boost::char_separator<char> sep(","); + boost::tokenizer<boost::char_separator<char> > tok(value, sep); + for (auto &token : tok) { + bool matched = false; + for (auto &it : ImageFeatures::FEATURE_MAPPING) { + if (token == it.second) { + features |= it.first; + matched = true; + break; + } + } + + if (!matched) { + throw po::validation_error(po::validation_error::invalid_option_value); + } + } + } +} + +void validate(boost::any& v, const std::vector<std::string>& values, + MirrorImageMode* mirror_image_mode, int) { + po::validators::check_first_occurrence(v); + const std::string &s = po::validators::get_single_string(values); + if (s == "journal") { + v = boost::any(RBD_MIRROR_IMAGE_MODE_JOURNAL); + } else if (s == "snapshot") { + v = boost::any(RBD_MIRROR_IMAGE_MODE_SNAPSHOT); + } else { + throw po::validation_error(po::validation_error::invalid_option_value); + } +} + +void validate(boost::any& v, const std::vector<std::string>& values, + Format *target_type, int) { + po::validators::check_first_occurrence(v); + const std::string &s = po::validators::get_single_string(values); + if (s == "plain" || s == "json" || s == "xml") { + v = boost::any(Format(s)); + } else { + throw po::validation_error(po::validation_error::invalid_option_value); + } +} + +void validate(boost::any& v, const std::vector<std::string>& values, + JournalObjectSize *target_type, int) { + po::validators::check_first_occurrence(v); + const std::string &s = po::validators::get_single_string(values); + + std::string parse_error; + uint64_t size = strict_iecstrtoll(s, &parse_error); + if (parse_error.empty() && (size >= (1 << 12)) && (size <= (1 << 26))) { + v = boost::any(size); + return; + } + throw po::validation_error(po::validation_error::invalid_option_value); +} + +void validate(boost::any& v, const std::vector<std::string>& values, + EncryptionAlgorithm *target_type, int) { + po::validators::check_first_occurrence(v); + const std::string &s = po::validators::get_single_string(values); + if (s == "aes-128") { + v = boost::any(RBD_ENCRYPTION_ALGORITHM_AES128); + } else if (s == "aes-256") { + v = boost::any(RBD_ENCRYPTION_ALGORITHM_AES256); + } else { + throw po::validation_error(po::validation_error::invalid_option_value); + } +} + +void validate(boost::any& v, const std::vector<std::string>& values, + EncryptionFormat *target_type, int) { + po::validators::check_first_occurrence(v); + const std::string &s = po::validators::get_single_string(values); + if (s == "luks") { + v = boost::any(EncryptionFormat{RBD_ENCRYPTION_FORMAT_LUKS}); + } else if (s == "luks1") { + v = boost::any(EncryptionFormat{RBD_ENCRYPTION_FORMAT_LUKS1}); + } else if (s == "luks2") { + v = boost::any(EncryptionFormat{RBD_ENCRYPTION_FORMAT_LUKS2}); + } else { + throw po::validation_error(po::validation_error::invalid_option_value); + } +} + +void validate(boost::any& v, const std::vector<std::string>& values, + ExportFormat *target_type, int) { + po::validators::check_first_occurrence(v); + const std::string &s = po::validators::get_single_string(values); + + std::string parse_error; + uint64_t format = strict_iecstrtoll(s, &parse_error); + if (!parse_error.empty() || (format != 1 && format != 2)) { + throw po::validation_error(po::validation_error::invalid_option_value); + } + + v = boost::any(format); +} + +void validate(boost::any& v, const std::vector<std::string>& values, + Secret *target_type, int) { + + po::validators::check_first_occurrence(v); + const std::string &s = po::validators::get_single_string(values); + g_conf().set_val_or_die("keyfile", s.c_str()); + v = boost::any(s); +} + +} // namespace argument_types +} // namespace rbd diff --git a/src/tools/rbd/ArgumentTypes.h b/src/tools/rbd/ArgumentTypes.h new file mode 100644 index 000000000..db16b4b3c --- /dev/null +++ b/src/tools/rbd/ArgumentTypes.h @@ -0,0 +1,244 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_ARGUMENT_TYPES_H +#define CEPH_RBD_ARGUMENT_TYPES_H + +#include "include/int_types.h" +#include <set> +#include <string> +#include <vector> +#include <boost/any.hpp> +#include <boost/program_options.hpp> +#include <boost/shared_ptr.hpp> + +namespace ceph { class Formatter; } + +namespace rbd { +namespace argument_types { + +enum ArgumentModifier { + ARGUMENT_MODIFIER_NONE, + ARGUMENT_MODIFIER_SOURCE, + ARGUMENT_MODIFIER_DEST +}; + +enum SpecFormat { + SPEC_FORMAT_IMAGE, + SPEC_FORMAT_SNAPSHOT, + SPEC_FORMAT_IMAGE_OR_SNAPSHOT +}; + +static const std::string SOURCE_PREFIX("source-"); +static const std::string DEST_PREFIX("dest-"); + +// positional arguments +static const std::string POSITIONAL_COMMAND_SPEC("positional-command-spec"); +static const std::string POSITIONAL_ARGUMENTS("positional-arguments"); +static const std::string IMAGE_SPEC("image-spec"); +static const std::string SNAPSHOT_SPEC("snap-spec"); +static const std::string IMAGE_OR_SNAPSHOT_SPEC("image-or-snap-spec"); +static const std::string PATH_NAME("path-name"); +static const std::string IMAGE_ID("image-id"); + +// optional arguments +static const std::string CONFIG_PATH("conf"); +static const std::string POOL_NAME("pool"); +static const std::string DEST_POOL_NAME("dest-pool"); +static const std::string NAMESPACE_NAME("namespace"); +static const std::string DEST_NAMESPACE_NAME("dest-namespace"); +static const std::string IMAGE_NAME("image"); +static const std::string DEST_IMAGE_NAME("dest"); +static const std::string SNAPSHOT_NAME("snap"); +static const std::string SNAPSHOT_ID("snap-id"); +static const std::string DEST_SNAPSHOT_NAME("dest-snap"); +static const std::string PATH("path"); +static const std::string FROM_SNAPSHOT_NAME("from-snap"); +static const std::string WHOLE_OBJECT("whole-object"); + +// encryption arguments +static const std::string ENCRYPTION_FORMAT("encryption-format"); +static const std::string ENCRYPTION_PASSPHRASE_FILE("encryption-passphrase-file"); + +static const std::string IMAGE_FORMAT("image-format"); +static const std::string IMAGE_NEW_FORMAT("new-format"); +static const std::string IMAGE_ORDER("order"); +static const std::string IMAGE_OBJECT_SIZE("object-size"); +static const std::string IMAGE_FEATURES("image-feature"); +static const std::string IMAGE_SHARED("image-shared"); +static const std::string IMAGE_SIZE("size"); +static const std::string IMAGE_STRIPE_UNIT("stripe-unit"); +static const std::string IMAGE_STRIPE_COUNT("stripe-count"); +static const std::string IMAGE_DATA_POOL("data-pool"); +static const std::string IMAGE_SPARSE_SIZE("sparse-size"); +static const std::string IMAGE_THICK_PROVISION("thick-provision"); +static const std::string IMAGE_FLATTEN("flatten"); +static const std::string IMAGE_MIRROR_IMAGE_MODE("mirror-image-mode"); + +static const std::string JOURNAL_OBJECT_SIZE("journal-object-size"); +static const std::string JOURNAL_SPLAY_WIDTH("journal-splay-width"); +static const std::string JOURNAL_POOL("journal-pool"); + +static const std::string NO_PROGRESS("no-progress"); +static const std::string FORMAT("format"); +static const std::string PRETTY_FORMAT("pretty-format"); +static const std::string VERBOSE("verbose"); +static const std::string NO_ERR("no-error"); + +static const std::string LIMIT("limit"); + +static const std::string SKIP_QUIESCE("skip-quiesce"); +static const std::string IGNORE_QUIESCE_ERROR("ignore-quiesce-error"); + +static const std::set<std::string> SWITCH_ARGUMENTS = { + WHOLE_OBJECT, IMAGE_SHARED, IMAGE_THICK_PROVISION, IMAGE_FLATTEN, + NO_PROGRESS, PRETTY_FORMAT, VERBOSE, NO_ERR, SKIP_QUIESCE, + IGNORE_QUIESCE_ERROR +}; + +struct ImageSize {}; +struct ImageOrder {}; +struct ImageObjectSize {}; +struct ImageFormat {}; +struct ImageNewFormat {}; + +struct ImageFeatures { + static const std::map<uint64_t, std::string> FEATURE_MAPPING; + + uint64_t features; +}; + +struct MirrorImageMode {}; + +template <typename T> +struct TypedValue { + T value; + TypedValue(const T& t) : value(t) {} +}; + +struct Format : public TypedValue<std::string> { + typedef boost::shared_ptr<ceph::Formatter> Formatter; + + Format(const std::string &format) : TypedValue<std::string>(format) {} + + Formatter create_formatter(bool pretty) const; +}; + +struct JournalObjectSize {}; + +struct ExportFormat {}; + +struct Secret {}; + +struct EncryptionAlgorithm {}; +struct EncryptionFormat { + uint64_t format; +}; + +void add_export_format_option(boost::program_options::options_description *opt); + +std::string get_name_prefix(ArgumentModifier modifier); +std::string get_description_prefix(ArgumentModifier modifier); + +void add_all_option(boost::program_options::options_description *opt, + std::string description); + +void add_pool_option(boost::program_options::options_description *opt, + ArgumentModifier modifier, + const std::string &desc_suffix = ""); +void add_namespace_option(boost::program_options::options_description *opt, + ArgumentModifier modifier); + +void add_image_option(boost::program_options::options_description *opt, + ArgumentModifier modifier, + const std::string &desc_suffix = ""); + +void add_image_id_option(boost::program_options::options_description *opt, + const std::string &desc_suffix = ""); + +void add_snap_option(boost::program_options::options_description *opt, + ArgumentModifier modifier); +void add_snap_id_option(boost::program_options::options_description *opt); + +void add_pool_options(boost::program_options::options_description *pos, + boost::program_options::options_description *opt, + bool namespaces_supported); + +void add_image_spec_options(boost::program_options::options_description *pos, + boost::program_options::options_description *opt, + ArgumentModifier modifier); + +void add_snap_spec_options(boost::program_options::options_description *pos, + boost::program_options::options_description *opt, + ArgumentModifier modifier); + +void add_image_or_snap_spec_options( + boost::program_options::options_description *pos, + boost::program_options::options_description *opt, + ArgumentModifier modifier); + +void add_create_image_options(boost::program_options::options_description *opt, + bool include_format); + +void add_create_journal_options( + boost::program_options::options_description *opt); + +void add_size_option(boost::program_options::options_description *opt); + +void add_sparse_size_option(boost::program_options::options_description *opt); + +void add_path_options(boost::program_options::options_description *pos, + boost::program_options::options_description *opt, + const std::string &description); + +void add_limit_option(boost::program_options::options_description *opt); + +void add_no_progress_option(boost::program_options::options_description *opt); + +void add_format_options(boost::program_options::options_description *opt); + +void add_verbose_option(boost::program_options::options_description *opt); + +void add_no_error_option(boost::program_options::options_description *opt); + +void add_flatten_option(boost::program_options::options_description *opt); + +void add_snap_create_options(boost::program_options::options_description *opt); + +void add_encryption_options(boost::program_options::options_description *opt); + +std::string get_short_features_help(bool append_suffix); +std::string get_long_features_help(); + +void validate(boost::any& v, const std::vector<std::string>& values, + ExportFormat *target_type, int); +void validate(boost::any& v, const std::vector<std::string>& values, + ImageSize *target_type, int); +void validate(boost::any& v, const std::vector<std::string>& values, + ImageOrder *target_type, int); +void validate(boost::any& v, const std::vector<std::string>& values, + ImageObjectSize *target_type, int); +void validate(boost::any& v, const std::vector<std::string>& values, + ImageFormat *target_type, int); +void validate(boost::any& v, const std::vector<std::string>& values, + ImageNewFormat *target_type, int); +void validate(boost::any& v, const std::vector<std::string>& values, + ImageFeatures *target_type, int); +void validate(boost::any& v, const std::vector<std::string>& values, + Format *target_type, int); +void validate(boost::any& v, const std::vector<std::string>& values, + JournalObjectSize *target_type, int); +void validate(boost::any& v, const std::vector<std::string>& values, + EncryptionAlgorithm *target_type, int); +void validate(boost::any& v, const std::vector<std::string>& values, + EncryptionFormat *target_type, int); +void validate(boost::any& v, const std::vector<std::string>& values, + Secret *target_type, int); + + +std::ostream &operator<<(std::ostream &os, const ImageFeatures &features); + +} // namespace argument_types +} // namespace rbd + +#endif // CEPH_RBD_ARGUMENT_TYPES_H diff --git a/src/tools/rbd/CMakeLists.txt b/src/tools/rbd/CMakeLists.txt new file mode 100644 index 000000000..5a895354d --- /dev/null +++ b/src/tools/rbd/CMakeLists.txt @@ -0,0 +1,80 @@ +set(CURSES_NEED_NCURSES TRUE) +# libcurses may not be available on some platforms (e.g. Windows). +find_package(Curses) + +set(rbd_srcs + rbd.cc + ArgumentTypes.cc + IndentStream.cc + MirrorDaemonServiceInfo.cc + OptionPrinter.cc + Schedule.cc + Shell.cc + Utils.cc + action/Bench.cc + action/Children.cc + action/Clone.cc + action/Config.cc + action/Copy.cc + action/Create.cc + action/Device.cc + action/Diff.cc + action/DiskUsage.cc + action/Encryption.cc + action/Export.cc + action/Feature.cc + action/Flatten.cc + action/Ggate.cc + action/Group.cc + action/ImageMeta.cc + action/Import.cc + action/Info.cc + action/Journal.cc + action/Kernel.cc + action/List.cc + action/Lock.cc + action/MergeDiff.cc + action/Migration.cc + action/MirrorImage.cc + action/MirrorPool.cc + action/MirrorSnapshotSchedule.cc + action/Namespace.cc + action/Nbd.cc + action/ObjectMap.cc + action/Perf.cc + action/PersistentCache.cc + action/Pool.cc + action/Remove.cc + action/Rename.cc + action/Resize.cc + action/Snap.cc + action/Sparsify.cc + action/Status.cc + action/TrashPurgeSchedule.cc + action/Trash.cc + action/Watch.cc + action/Wnbd.cc) + +add_executable(rbd ${rbd_srcs} + $<TARGET_OBJECTS:common_texttable_obj>) +set_target_properties(rbd PROPERTIES OUTPUT_NAME rbd) +target_link_libraries(rbd + cls_journal_client + cls_rbd_client + rbd_types + librbd + journal + libneorados + librados + ceph-common global + ${BLKID_LIBRARIES} ${CMAKE_DL_LIBS}) +if(CURSES_FOUND) + target_compile_definitions(rbd PRIVATE HAVE_CURSES) + target_link_libraries(rbd ${CURSES_LIBRARIES}) +endif() +if(WITH_KRBD) + target_link_libraries(rbd + krbd) +endif() + +install(TARGETS rbd DESTINATION bin) diff --git a/src/tools/rbd/IndentStream.cc b/src/tools/rbd/IndentStream.cc new file mode 100644 index 000000000..83591a8cb --- /dev/null +++ b/src/tools/rbd/IndentStream.cc @@ -0,0 +1,59 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/IndentStream.h" + +namespace rbd { + +int IndentBuffer::overflow (int c) { + if (traits_type::eq_int_type(traits_type::eof(), c)) { + return traits_type::not_eof(c); + } + + int r; + switch (c) { + case '\n': + m_buffer += c; + flush_line(); + r = m_streambuf->sputn(m_buffer.c_str(), m_buffer.size()); + m_buffer.clear(); + return r; + case '\t': + // convert tab to single space and fall-through + c = ' '; + default: + if (m_indent + m_buffer.size() >= m_line_length) { + size_t word_offset = m_buffer.find_last_of(m_delim); + bool space_delim = (m_delim == " "); + if (word_offset == std::string::npos && !space_delim) { + word_offset = m_buffer.find_last_of(" "); + } + + if (word_offset != std::string::npos) { + flush_line(); + m_streambuf->sputn(m_buffer.c_str(), word_offset); + m_buffer = std::string(m_buffer, + word_offset + (space_delim ? 1 : 0)); + } else { + flush_line(); + m_streambuf->sputn(m_buffer.c_str(), m_buffer.size()); + m_buffer.clear(); + } + m_streambuf->sputc('\n'); + } + m_buffer += c; + return c; + } +} + +void IndentBuffer::flush_line() { + if (m_initial_offset >= m_indent) { + m_initial_offset = 0; + m_streambuf->sputc('\n'); + } + + m_streambuf->sputn(m_indent_prefix.c_str(), m_indent - m_initial_offset); + m_initial_offset = 0; +} + +} // namespace rbd diff --git a/src/tools/rbd/IndentStream.h b/src/tools/rbd/IndentStream.h new file mode 100644 index 000000000..85ccc85b3 --- /dev/null +++ b/src/tools/rbd/IndentStream.h @@ -0,0 +1,60 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_INDENT_STREAM_H +#define CEPH_RBD_INDENT_STREAM_H + +#include "include/int_types.h" +#include <iostream> +#include <streambuf> +#include <iomanip> + +namespace rbd { + +class IndentBuffer : public std::streambuf { +public: + IndentBuffer(size_t indent, size_t initial_offset, size_t line_length, + std::streambuf *streambuf) + : m_indent(indent), m_initial_offset(initial_offset), + m_line_length(line_length), m_streambuf(streambuf), + m_delim(" "), m_indent_prefix(m_indent, ' ') { + } + + void set_delimiter(const std::string &delim) { + m_delim = delim; + } + +protected: + int overflow (int c) override; + +private: + size_t m_indent; + size_t m_initial_offset; + size_t m_line_length; + std::streambuf *m_streambuf; + + std::string m_delim; + std::string m_indent_prefix; + std::string m_buffer; + + void flush_line(); +}; + +class IndentStream : public std::ostream { +public: + IndentStream(size_t indent, size_t initial_offset, size_t line_length, + std::ostream &os) + : std::ostream(&m_indent_buffer), + m_indent_buffer(indent, initial_offset, line_length, os.rdbuf()) { + } + + void set_delimiter(const std::string &delim) { + m_indent_buffer.set_delimiter(delim); + } +private: + IndentBuffer m_indent_buffer; +}; + +} // namespace rbd + +#endif // CEPH_RBD_INDENT_STREAM_ITERATOR_H diff --git a/src/tools/rbd/MirrorDaemonServiceInfo.cc b/src/tools/rbd/MirrorDaemonServiceInfo.cc new file mode 100644 index 000000000..e7422e66a --- /dev/null +++ b/src/tools/rbd/MirrorDaemonServiceInfo.cc @@ -0,0 +1,307 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "common/ceph_json.h" +#include "common/errno.h" +#include "include/rados/librados.hpp" +#include "include/stringify.h" +#include "tools/rbd/MirrorDaemonServiceInfo.h" + +#include <boost/scope_exit.hpp> +#include <iostream> + +#include "json_spirit/json_spirit.h" + +namespace rbd { + +std::ostream& operator<<(std::ostream& os, MirrorHealth mirror_health) { + switch (mirror_health) { + case MIRROR_HEALTH_OK: + os << "OK"; + break; + case MIRROR_HEALTH_UNKNOWN: + os << "UNKNOWN"; + break; + case MIRROR_HEALTH_WARNING: + os << "WARNING"; + break; + case MIRROR_HEALTH_ERROR: + os << "ERROR"; + break; + } + return os; +} + +std::string MirrorService::get_image_description() const { + std::string description = (!client_id.empty() ? client_id : + stringify(service_id)); + if (!hostname.empty()) { + description += " on " + hostname; + } + return description; +} + +void MirrorService::dump_image( + argument_types::Format::Formatter formatter) const { + formatter->open_object_section("daemon_service"); + formatter->dump_string("service_id", service_id); + formatter->dump_string("instance_id", instance_id); + formatter->dump_string("daemon_id", client_id); + formatter->dump_string("hostname", hostname); + formatter->close_section(); +} + +int MirrorDaemonServiceInfo::init() { + int r = get_mirror_service_dump(); + if (r < 0) { + return r; + } else if (m_mirror_services.empty()) { + return 0; + } + + r = get_mirror_service_status(); + if (r < 0) { + return r; + } + + return 0; +} + +const MirrorService* MirrorDaemonServiceInfo::get_by_service_id( + const std::string& service_id) const { + auto it = m_mirror_services.find(service_id); + if (it == m_mirror_services.end()) { + return nullptr; + } + + return &it->second; +} + +const MirrorService* MirrorDaemonServiceInfo::get_by_instance_id( + const std::string& instance_id) const { + auto it = m_instance_to_service_ids.find(instance_id); + if (it == m_instance_to_service_ids.end()) { + return nullptr; + } + + return get_by_service_id(it->second); +} + +MirrorServices MirrorDaemonServiceInfo::get_mirror_services() const { + MirrorServices mirror_services; + for (auto& it : m_mirror_services) { + mirror_services.push_back(it.second); + } + return mirror_services; +} + +int MirrorDaemonServiceInfo::get_mirror_service_dump() { + librados::Rados rados(m_io_ctx); + std::string cmd = R"({"prefix": "service dump", "format": "json"})"; + bufferlist in_bl; + bufferlist out_bl; + + int r = rados.mon_command(cmd, in_bl, &out_bl, nullptr); + if (r < 0) { + std::cerr << "rbd: failed to query services: " << cpp_strerror(r) + << std::endl; + return r; + } + + json_spirit::mValue json_root; + if(!json_spirit::read(out_bl.to_str(), json_root)) { + std::cerr << "rbd: invalid service dump JSON received" << std::endl; + return -EBADMSG; + } + + try { + auto& services = json_root.get_obj()["services"]; + if (services.is_null()) { + std::cerr << "rbd: missing services in service dump JSON" << std::endl; + return -EBADMSG; + } + + auto& service = services.get_obj()["rbd-mirror"]; + if (service.is_null()) { + // no rbd-mirror daemons running + return 0; + } + + auto& daemons = service.get_obj()["daemons"]; + if (daemons.is_null()) { + return 0; + } + + for (auto& daemon_pair : daemons.get_obj()) { + // rbd-mirror instances will always be integers but other objects + // are included + auto& service_id = daemon_pair.first; + if (daemon_pair.second.type() != json_spirit::obj_type) { + continue; + } + + auto& daemon = daemon_pair.second.get_obj(); + auto& metadata_val = daemon["metadata"]; + if (metadata_val.is_null()) { + continue; + } + auto& metadata = metadata_val.get_obj(); + + MirrorService mirror_service{service_id}; + + auto& client_id = metadata["id"]; + if (!client_id.is_null()) { + mirror_service.client_id = client_id.get_str(); + } + + auto& ceph_version = metadata["ceph_version_short"]; + if (!ceph_version.is_null()) { + mirror_service.ceph_version = ceph_version.get_str(); + } + + auto& hostname = metadata["hostname"]; + if (!hostname.is_null()) { + mirror_service.hostname = hostname.get_str(); + } + + m_mirror_services[service_id] = mirror_service; + } + + } catch (std::runtime_error&) { + std::cerr << "rbd: unexpected service dump JSON received" << std::endl; + return -EBADMSG; + } + + return 0; +} + +int MirrorDaemonServiceInfo::get_mirror_service_status() { + librados::Rados rados(m_io_ctx); + std::string cmd = R"({"prefix": "service status", "format": "json"})"; + bufferlist in_bl; + bufferlist out_bl; + + int r = rados.mon_command(cmd, in_bl, &out_bl, nullptr); + if (r < 0) { + std::cerr << "rbd: failed to query service status: " << cpp_strerror(r) + << std::endl; + return r; + } + json_spirit::mValue json_root; + if(!json_spirit::read(out_bl.to_str(), json_root)) { + std::cerr << "rbd: invalid service status JSON received" << std::endl; + return -EBADMSG; + } + + bool found_leader = false; + bool found_pool = false; + + try { + auto& service = json_root.get_obj()["rbd-mirror"]; + if (service.is_null()) { + return 0; + } + + for (auto& daemon_pair : service.get_obj()) { + std::string service_id = daemon_pair.first; + auto it = m_mirror_services.find(service_id); + if (it == m_mirror_services.end()) { + continue; + } + + auto& mirror_service = it->second; + auto& daemon = daemon_pair.second.get_obj(); + auto& status = daemon["status"]; + if (status.is_null()) { + mirror_service.callouts.push_back("not reporting status"); + mirror_service.health = MIRROR_HEALTH_WARNING; + continue; + } + + auto& json = status.get_obj()["json"]; + if (json.is_null()) { + mirror_service.callouts.push_back("not reporting status"); + mirror_service.health = MIRROR_HEALTH_WARNING; + continue; + } + + json_spirit::mValue json_status; + if(!json_spirit::read(json.get_str(), json_status)) { + std::cerr << "rbd: invalid service status daemon status JSON received" + << std::endl; + return -EBADMSG; + } + + auto& pool_val = json_status.get_obj()[stringify(m_io_ctx.get_id())]; + if (pool_val.is_null()) { + mirror_service.callouts.push_back("not reporting status for pool"); + mirror_service.health = MIRROR_HEALTH_WARNING; + continue; + } + + auto& pool = pool_val.get_obj(); + found_pool = true; + + auto& instance_id = pool["instance_id"]; + if (!instance_id.is_null()) { + mirror_service.instance_id = instance_id.get_str(); + m_instance_to_service_ids[mirror_service.instance_id] = service_id; + } + + auto& leader = pool["leader"]; + if (!leader.is_null() && leader.get_bool()) { + mirror_service.leader = true; + found_leader = true; + } + + MirrorHealth mirror_service_health = MIRROR_HEALTH_OK; + auto& callouts = pool["callouts"]; + if (!callouts.is_null()) { + for (auto& callout_pair : callouts.get_obj()) { + auto& callout = callout_pair.second.get_obj(); + auto& level = callout["level"]; + if (level.is_null()) { + continue; + } + + auto& level_str = level.get_str(); + if (mirror_service_health < MIRROR_HEALTH_ERROR && + level_str == "error") { + mirror_service_health = MIRROR_HEALTH_ERROR; + } else if (mirror_service_health < MIRROR_HEALTH_WARNING && + level_str == "warning") { + mirror_service_health = MIRROR_HEALTH_WARNING; + } + + auto& text = callout["text"]; + if (!text.is_null()) { + mirror_service.callouts.push_back(text.get_str()); + } + } + } + mirror_service.health = mirror_service_health; + } + } catch (std::runtime_error&) { + std::cerr << "rbd: unexpected service status JSON received" << std::endl; + return -EBADMSG; + } + + // compute overall daemon health + m_daemon_health = MIRROR_HEALTH_OK; + if (!found_pool) { + // no daemons are reporting status for this pool + m_daemon_health = MIRROR_HEALTH_ERROR; + } else if (!found_leader) { + // no daemons are reporting leader role for this pool + m_daemon_health = MIRROR_HEALTH_WARNING; + } + + for (auto& pair : m_mirror_services) { + m_daemon_health = std::max(m_daemon_health, pair.second.health); + } + + return 0; +} + +} // namespace rbd + diff --git a/src/tools/rbd/MirrorDaemonServiceInfo.h b/src/tools/rbd/MirrorDaemonServiceInfo.h new file mode 100644 index 000000000..d667332e5 --- /dev/null +++ b/src/tools/rbd/MirrorDaemonServiceInfo.h @@ -0,0 +1,78 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_DAEMON_SERVICE_INFO_H +#define CEPH_RBD_MIRROR_DAEMON_SERVICE_INFO_H + +#include "include/rados/librados_fwd.hpp" +#include "tools/rbd/ArgumentTypes.h" + +#include <iosfwd> +#include <list> +#include <map> +#include <string> + +namespace rbd { + +enum MirrorHealth { + MIRROR_HEALTH_OK = 0, + MIRROR_HEALTH_UNKNOWN = 1, + MIRROR_HEALTH_WARNING = 2, + MIRROR_HEALTH_ERROR = 3 +}; + +std::ostream& operator<<(std::ostream& os, MirrorHealth mirror_health); + +struct MirrorService { + MirrorService() {} + explicit MirrorService(const std::string& service_id) + : service_id(service_id) { + } + + std::string service_id; + std::string instance_id; + bool leader = false; + std::string client_id; + std::string ceph_version; + std::string hostname; + std::list<std::string> callouts; + + MirrorHealth health = MIRROR_HEALTH_UNKNOWN; + + std::string get_image_description() const; + void dump_image(argument_types::Format::Formatter formatter) const; +}; + +typedef std::list<MirrorService> MirrorServices; + +class MirrorDaemonServiceInfo { +public: + MirrorDaemonServiceInfo(librados::IoCtx &io_ctx) : m_io_ctx(io_ctx) { + } + + int init(); + + const MirrorService* get_by_service_id(const std::string& service_id) const; + const MirrorService* get_by_instance_id(const std::string& instance_id) const; + + MirrorServices get_mirror_services() const; + MirrorHealth get_daemon_health() const { + return m_daemon_health; + } + +private: + librados::IoCtx &m_io_ctx; + + std::map<std::string, MirrorService> m_mirror_services; + std::map<std::string, std::string> m_instance_to_service_ids; + + MirrorHealth m_daemon_health = MIRROR_HEALTH_UNKNOWN; + + int get_mirror_service_dump(); + int get_mirror_service_status(); + +}; + +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_DAEMON_SERVICE_INFO_H diff --git a/src/tools/rbd/OptionPrinter.cc b/src/tools/rbd/OptionPrinter.cc new file mode 100644 index 000000000..0fea6b691 --- /dev/null +++ b/src/tools/rbd/OptionPrinter.cc @@ -0,0 +1,161 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/OptionPrinter.h" +#include "tools/rbd/IndentStream.h" +#include "include/ceph_assert.h" + +namespace rbd { + +namespace po = boost::program_options; + +const std::string OptionPrinter::POSITIONAL_ARGUMENTS("Positional arguments"); +const std::string OptionPrinter::OPTIONAL_ARGUMENTS("Optional arguments"); + +const size_t OptionPrinter::MAX_DESCRIPTION_OFFSET; + +OptionPrinter::OptionPrinter(const OptionsDescription &positional, + const OptionsDescription &optional) + : m_positional(positional), m_optional(optional) { +} + +void OptionPrinter::print_short(std::ostream &os, size_t initial_offset) { + size_t max_option_width = 0; + std::vector<std::string> optionals; + for (size_t i = 0; i < m_optional.options().size(); ++i) { + std::stringstream option; + + bool required = m_optional.options()[i]->semantic()->is_required(); + if (!required) { + option << "["; + } + option << "--" << m_optional.options()[i]->long_name(); + if (m_optional.options()[i]->semantic()->max_tokens() != 0) { + option << " <" << m_optional.options()[i]->long_name() << ">"; + } + if (!required) { + option << "]"; + } + max_option_width = std::max(max_option_width, option.str().size()); + optionals.emplace_back(option.str()); + } + + std::vector<std::string> positionals; + for (size_t i = 0; i < m_positional.options().size(); ++i) { + std::stringstream option; + + // we overload po::value<std::string>()->default_value("") to signify + // an optional positional argument (purely for help printing purposes) + boost::any v; + bool required = !m_positional.options()[i]->semantic()->apply_default(v); + if (!required) { + auto ptr = boost::any_cast<std::string>(&v); + ceph_assert(ptr && ptr->empty()); + option << "["; + } + option << "<" << m_positional.options()[i]->long_name() << ">"; + if (m_positional.options()[i]->semantic()->max_tokens() > 1) { + option << " [<" << m_positional.options()[i]->long_name() << "> ...]"; + } + if (!required) { + option << "]"; + } + + max_option_width = std::max(max_option_width, option.str().size()); + positionals.emplace_back(option.str()); + + if (m_positional.options()[i]->semantic()->max_tokens() > 1) { + break; + } + } + + size_t indent = std::min(initial_offset, MAX_DESCRIPTION_OFFSET) + 1; + if (indent + max_option_width + 2 > LINE_WIDTH) { + // decrease the indent so that we don't wrap past the end of the line + indent = LINE_WIDTH - max_option_width - 2; + } + + IndentStream indent_stream(indent, initial_offset, LINE_WIDTH, os); + indent_stream.set_delimiter("["); + for (auto& option : optionals) { + indent_stream << option << " "; + } + + if (optionals.size() > 0 || positionals.size() == 0) { + indent_stream << std::endl; + } + + if (positionals.size() > 0) { + indent_stream.set_delimiter(" "); + for (auto& option : positionals) { + indent_stream << option << " "; + } + indent_stream << std::endl; + } +} + +void OptionPrinter::print_optional(const OptionsDescription &global_opts, + size_t &name_width, std::ostream &os) { + std::string indent2(2, ' '); + + for (size_t i = 0; i < global_opts.options().size(); ++i) { + std::string description = global_opts.options()[i]->description(); + auto result = boost::find_first(description, "deprecated"); + if (!result.empty()) { + continue; + } + std::stringstream ss; + ss << indent2 + << global_opts.options()[i]->format_name() << " " + << global_opts.options()[i]->format_parameter(); + + std::cout << ss.str(); + IndentStream indent_stream(name_width, ss.str().size(), LINE_WIDTH, std::cout); + indent_stream << global_opts.options()[i]->description() << std::endl; + } + +} + +void OptionPrinter::print_detailed(std::ostream &os) { + std::string indent_prefix(2, ' '); + size_t name_width = compute_name_width(indent_prefix.size()); + + if (m_positional.options().size() > 0) { + std::cout << POSITIONAL_ARGUMENTS << std::endl; + for (size_t i = 0; i < m_positional.options().size(); ++i) { + std::stringstream ss; + ss << indent_prefix << "<" << m_positional.options()[i]->long_name() + << ">"; + + std::cout << ss.str(); + IndentStream indent_stream(name_width, ss.str().size(), LINE_WIDTH, os); + indent_stream << m_positional.options()[i]->description() << std::endl; + } + std::cout << std::endl; + } + + if (m_optional.options().size() > 0) { + std::cout << OPTIONAL_ARGUMENTS << std::endl; + print_optional(m_optional, name_width, os); + std::cout << std::endl; + } +} + +size_t OptionPrinter::compute_name_width(size_t indent) { + size_t width = MIN_NAME_WIDTH; + std::vector<OptionsDescription> descs = {m_positional, m_optional}; + for (size_t desc_idx = 0; desc_idx < descs.size(); ++desc_idx) { + const OptionsDescription &desc = descs[desc_idx]; + for (size_t opt_idx = 0; opt_idx < desc.options().size(); ++opt_idx) { + size_t name_width = desc.options()[opt_idx]->format_name().size() + + desc.options()[opt_idx]->format_parameter().size() + + 1; + width = std::max(width, name_width); + } + } + width += indent; + width = std::min(width, MAX_DESCRIPTION_OFFSET) + 1; + return width; +} + +} // namespace rbd diff --git a/src/tools/rbd/OptionPrinter.h b/src/tools/rbd/OptionPrinter.h new file mode 100644 index 000000000..06d3a3c99 --- /dev/null +++ b/src/tools/rbd/OptionPrinter.h @@ -0,0 +1,43 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_OPTION_PRINTER_H +#define CEPH_RBD_OPTION_PRINTER_H + +#include "include/int_types.h" +#include <string> +#include <vector> +#include <boost/algorithm/string.hpp> +#include <boost/program_options.hpp> + +namespace rbd { + +class OptionPrinter { +public: + typedef boost::program_options::options_description OptionsDescription; + + static const std::string POSITIONAL_ARGUMENTS; + static const std::string OPTIONAL_ARGUMENTS; + + static const size_t LINE_WIDTH = 80; + static const size_t MIN_NAME_WIDTH = 20; + static const size_t MAX_DESCRIPTION_OFFSET = 37; + + OptionPrinter(const OptionsDescription &positional, + const OptionsDescription &optional); + + void print_short(std::ostream &os, size_t initial_offset); + void print_detailed(std::ostream &os); + static void print_optional(const OptionsDescription &global_opts, + size_t &name_width, std::ostream &os); + +private: + const OptionsDescription &m_positional; + const OptionsDescription &m_optional; + + size_t compute_name_width(size_t indent); +}; + +} // namespace rbd + +#endif // CEPH_RBD_OPTION_PRINTER_H diff --git a/src/tools/rbd/Schedule.cc b/src/tools/rbd/Schedule.cc new file mode 100644 index 000000000..15dda3aee --- /dev/null +++ b/src/tools/rbd/Schedule.cc @@ -0,0 +1,367 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "common/Formatter.h" +#include "common/TextTable.h" +#include "common/ceph_json.h" +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Schedule.h" +#include "tools/rbd/Utils.h" + +#include <iostream> +#include <regex> + +namespace rbd { + +namespace at = argument_types; +namespace po = boost::program_options; + +namespace { + +int parse_schedule_name(const std::string &name, bool allow_images, + std::string *pool_name, std::string *namespace_name, + std::string *image_name) { + // parse names like: + // '', 'rbd/', 'rbd/ns/', 'rbd/image', 'rbd/ns/image' + std::regex pattern("^(?:([^/]+)/(?:(?:([^/]+)/|)(?:([^/@]+))?)?)?$"); + std::smatch match; + if (!std::regex_match(name, match, pattern)) { + return -EINVAL; + } + + if (match[1].matched) { + *pool_name = match[1]; + } else { + *pool_name = "-"; + } + + if (match[2].matched) { + *namespace_name = match[2]; + } else if (match[3].matched) { + *namespace_name = ""; + } else { + *namespace_name = "-"; + } + + if (match[3].matched) { + if (!allow_images) { + return -EINVAL; + } + *image_name = match[3]; + } else { + *image_name = "-"; + } + + return 0; +} + +} // anonymous namespace + +void add_level_spec_options(po::options_description *options, + bool allow_image) { + at::add_pool_option(options, at::ARGUMENT_MODIFIER_NONE); + at::add_namespace_option(options, at::ARGUMENT_MODIFIER_NONE); + if (allow_image) { + at::add_image_option(options, at::ARGUMENT_MODIFIER_NONE); + } +} + +int get_level_spec_args(const po::variables_map &vm, + std::map<std::string, std::string> *args) { + if (vm.count(at::IMAGE_NAME)) { + std::string pool_name; + std::string namespace_name; + std::string image_name; + + int r = utils::extract_spec(vm[at::IMAGE_NAME].as<std::string>(), + &pool_name, &namespace_name, &image_name, + nullptr, utils::SPEC_VALIDATION_FULL); + if (r < 0) { + return r; + } + + if (!pool_name.empty()) { + if (vm.count(at::POOL_NAME)) { + std::cerr << "rbd: pool is specified both via pool and image options" + << std::endl; + return -EINVAL; + } + if (vm.count(at::NAMESPACE_NAME)) { + std::cerr << "rbd: namespace is specified both via namespace and image" + << " options" << std::endl; + return -EINVAL; + } + } + + if (vm.count(at::POOL_NAME)) { + pool_name = vm[at::POOL_NAME].as<std::string>(); + } + + if (vm.count(at::NAMESPACE_NAME)) { + namespace_name = vm[at::NAMESPACE_NAME].as<std::string>(); + } + + if (namespace_name.empty()) { + (*args)["level_spec"] = pool_name + "/" + image_name; + } else { + (*args)["level_spec"] = pool_name + "/" + namespace_name + "/" + + image_name; + } + return 0; + } + + if (vm.count(at::NAMESPACE_NAME)) { + std::string pool_name; + std::string namespace_name; + + if (vm.count(at::POOL_NAME)) { + pool_name = vm[at::POOL_NAME].as<std::string>(); + } + + namespace_name = vm[at::NAMESPACE_NAME].as<std::string>(); + + (*args)["level_spec"] = pool_name + "/" + namespace_name + "/"; + + return 0; + } + + if (vm.count(at::POOL_NAME)) { + std::string pool_name = vm[at::POOL_NAME].as<std::string>(); + + (*args)["level_spec"] = pool_name + "/"; + + return 0; + } + + (*args)["level_spec"] = ""; + + return 0; +} + +void normalize_level_spec_args(std::map<std::string, std::string> *args) { + std::map<std::string, std::string> raw_args; + std::swap(raw_args, *args); + + auto default_pool_name = utils::get_default_pool_name(); + for (auto [key, value] : raw_args) { + if (key == "level_spec" && !value.empty() && value[0] == '/') { + value = default_pool_name + value; + } + + (*args)[key] = value; + } +} + +void add_schedule_options(po::options_description *positional, + bool mandatory) { + if (mandatory) { + positional->add_options() + ("interval", "schedule interval"); + } else { + positional->add_options() + ("interval", po::value<std::string>()->default_value(""), + "schedule interval"); + } + positional->add_options() + ("start-time", po::value<std::string>()->default_value(""), + "schedule start time"); +} + +int get_schedule_args(const po::variables_map &vm, bool mandatory, + std::map<std::string, std::string> *args) { + size_t arg_index = 0; + + std::string interval = utils::get_positional_argument(vm, arg_index++); + if (interval.empty()) { + if (mandatory) { + std::cerr << "rbd: missing 'interval' argument" << std::endl; + return -EINVAL; + } + return 0; + } + (*args)["interval"] = interval; + + std::string start_time = utils::get_positional_argument(vm, arg_index++); + if (!start_time.empty()) { + (*args)["start_time"] = start_time; + } + + return 0; +} + +int Schedule::parse(json_spirit::mValue &schedule_val) { + if (schedule_val.type() != json_spirit::array_type) { + std::cerr << "rbd: unexpected schedule JSON received: " + << "schedule is not array" << std::endl; + return -EBADMSG; + } + + try { + for (auto &item_val : schedule_val.get_array()) { + if (item_val.type() != json_spirit::obj_type) { + std::cerr << "rbd: unexpected schedule JSON received: " + << "schedule item is not object" << std::endl; + return -EBADMSG; + } + + auto &item = item_val.get_obj(); + + if (item["interval"].type() != json_spirit::str_type) { + std::cerr << "rbd: unexpected schedule JSON received: " + << "interval is not string" << std::endl; + return -EBADMSG; + } + auto interval = item["interval"].get_str(); + + std::string start_time; + if (item["start_time"].type() == json_spirit::str_type) { + start_time = item["start_time"].get_str(); + } + + items.push_back({interval, start_time}); + } + + } catch (std::runtime_error &) { + std::cerr << "rbd: invalid schedule JSON received" << std::endl; + return -EBADMSG; + } + + return 0; +} + +void Schedule::dump(ceph::Formatter *f) { + f->open_array_section("items"); + for (auto &item : items) { + f->open_object_section("item"); + f->dump_string("interval", item.first); + f->dump_string("start_time", item.second); + f->close_section(); // item + } + f->close_section(); // items +} + +std::ostream& operator<<(std::ostream& os, Schedule &s) { + std::string delimiter; + for (auto &item : s.items) { + os << delimiter << "every " << item.first; + if (!item.second.empty()) { + os << " starting at " << item.second; + } + delimiter = ", "; + } + return os; +} + +int ScheduleList::parse(const std::string &list) { + json_spirit::mValue json_root; + if (!json_spirit::read(list, json_root)) { + std::cerr << "rbd: invalid schedule list JSON received" << std::endl; + return -EBADMSG; + } + + try { + for (auto &[id, schedule_val] : json_root.get_obj()) { + if (schedule_val.type() != json_spirit::obj_type) { + std::cerr << "rbd: unexpected schedule list JSON received: " + << "schedule_val is not object" << std::endl; + return -EBADMSG; + } + auto &schedule = schedule_val.get_obj(); + if (schedule["name"].type() != json_spirit::str_type) { + std::cerr << "rbd: unexpected schedule list JSON received: " + << "schedule name is not string" << std::endl; + return -EBADMSG; + } + auto name = schedule["name"].get_str(); + + if (schedule["schedule"].type() != json_spirit::array_type) { + std::cerr << "rbd: unexpected schedule list JSON received: " + << "schedule is not array" << std::endl; + return -EBADMSG; + } + + Schedule s; + int r = s.parse(schedule["schedule"]); + if (r < 0) { + return r; + } + schedules[name] = s; + } + } catch (std::runtime_error &) { + std::cerr << "rbd: invalid schedule list JSON received" << std::endl; + return -EBADMSG; + } + + return 0; +} + +Schedule *ScheduleList::find(const std::string &name) { + auto it = schedules.find(name); + if (it == schedules.end()) { + return nullptr; + } + + return &it->second; +} + +void ScheduleList::dump(ceph::Formatter *f) { + f->open_array_section("schedules"); + for (auto &[name, s] : schedules) { + std::string pool_name; + std::string namespace_name; + std::string image_name; + + int r = parse_schedule_name(name, allow_images, &pool_name, &namespace_name, + &image_name); + if (r < 0) { + continue; + } + + f->open_object_section("schedule"); + f->dump_string("pool", pool_name); + f->dump_string("namespace", namespace_name); + if (allow_images) { + f->dump_string("image", image_name); + } + s.dump(f); + f->close_section(); + } + f->close_section(); +} + +std::ostream& operator<<(std::ostream& os, ScheduleList &l) { + TextTable tbl; + tbl.define_column("POOL", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("NAMESPACE", TextTable::LEFT, TextTable::LEFT); + if (l.allow_images) { + tbl.define_column("IMAGE", TextTable::LEFT, TextTable::LEFT); + } + tbl.define_column("SCHEDULE", TextTable::LEFT, TextTable::LEFT); + + for (auto &[name, s] : l.schedules) { + std::string pool_name; + std::string namespace_name; + std::string image_name; + + int r = parse_schedule_name(name, l.allow_images, &pool_name, + &namespace_name, &image_name); + if (r < 0) { + continue; + } + + std::stringstream ss; + ss << s; + + tbl << pool_name << namespace_name; + if (l.allow_images) { + tbl << image_name; + } + tbl << ss.str() << TextTable::endrow; + } + + os << tbl; + return os; +} + +} // namespace rbd + diff --git a/src/tools/rbd/Schedule.h b/src/tools/rbd/Schedule.h new file mode 100644 index 000000000..bf0964bb1 --- /dev/null +++ b/src/tools/rbd/Schedule.h @@ -0,0 +1,67 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_SCHEDULE_H +#define CEPH_RBD_SCHEDULE_H + +#include "json_spirit/json_spirit.h" + +#include <iostream> +#include <list> +#include <map> +#include <string> +#include <boost/program_options.hpp> + +namespace ceph { class Formatter; } + +namespace rbd { + +void add_level_spec_options( + boost::program_options::options_description *options, bool allow_image=true); +int get_level_spec_args(const boost::program_options::variables_map &vm, + std::map<std::string, std::string> *args); +void normalize_level_spec_args(std::map<std::string, std::string> *args); + +void add_schedule_options( + boost::program_options::options_description *positional, bool mandatory); +int get_schedule_args(const boost::program_options::variables_map &vm, + bool mandatory, std::map<std::string, std::string> *args); + +class Schedule { +public: + Schedule() { + } + + int parse(json_spirit::mValue &schedule_val); + void dump(ceph::Formatter *f); + + friend std::ostream& operator<<(std::ostream& os, Schedule &s); + +private: + std::string name; + std::list<std::pair<std::string, std::string>> items; +}; + +std::ostream& operator<<(std::ostream& os, Schedule &s); + +class ScheduleList { +public: + ScheduleList(bool allow_images=true) : allow_images(allow_images) { + } + + int parse(const std::string &list); + Schedule *find(const std::string &name); + void dump(ceph::Formatter *f); + + friend std::ostream& operator<<(std::ostream& os, ScheduleList &l); + +private: + bool allow_images; + std::map<std::string, Schedule> schedules; +}; + +std::ostream& operator<<(std::ostream& os, ScheduleList &l); + +} // namespace rbd + +#endif // CEPH_RBD_SCHEDULE_H diff --git a/src/tools/rbd/Shell.cc b/src/tools/rbd/Shell.cc new file mode 100644 index 000000000..ab1d20331 --- /dev/null +++ b/src/tools/rbd/Shell.cc @@ -0,0 +1,487 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/Shell.h" +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/IndentStream.h" +#include "tools/rbd/OptionPrinter.h" +#include "common/ceph_argparse.h" +#include "common/config.h" +#include "global/global_context.h" +#include "global/global_init.h" +#include "include/stringify.h" +#include <algorithm> +#include <iostream> +#include <set> + +namespace rbd { + +namespace at = argument_types; +namespace po = boost::program_options; + +namespace { + +static const std::string APP_NAME("rbd"); +static const std::string HELP_SPEC("help"); +static const std::string BASH_COMPLETION_SPEC("bash-completion"); + +boost::intrusive_ptr<CephContext> global_init( + int argc, const char **argv, std::vector<std::string> *command_args, + std::vector<std::string> *global_init_args) { + auto cmd_args = argv_to_vec(argc, argv); + std::vector<const char*> args(cmd_args); + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, + CINIT_FLAG_NO_MON_CONFIG); + + *command_args = {args.begin(), args.end()}; + + // Scan command line arguments for ceph global init args (those are + // filtered out from args vector by global_init). + + auto cursor = args.begin(); + for (auto &arg : cmd_args) { + auto iter = cursor; + for (; iter != args.end(); iter++) { + if (*iter == arg) { + break; + } + } + if (iter == args.end()) { + // filtered out by global_init + global_init_args->push_back(arg); + } else { + cursor = ++iter; + } + } + + return cct; +} + +std::string format_command_spec(const Shell::CommandSpec &spec) { + return joinify<std::string>(spec.begin(), spec.end(), " "); +} + +std::string format_alias_spec(const Shell::CommandSpec &spec, + const Shell::CommandSpec &alias_spec) { + auto spec_it = spec.begin(); + auto alias_it = alias_spec.begin(); + int level = 0; + while (spec_it != spec.end() && alias_it != alias_spec.end() && + *spec_it == *alias_it) { + spec_it++; + alias_it++; + level++; + } + ceph_assert(spec_it != spec.end() && alias_it != alias_spec.end()); + + if (level < 2) { + return joinify<std::string>(alias_spec.begin(), alias_spec.end(), " "); + } else { + return "... " + joinify<std::string>(alias_it, alias_spec.end(), " "); + } +} + +std::string format_command_name(const Shell::CommandSpec &spec, + const Shell::CommandSpec &alias_spec) { + std::string name = format_command_spec(spec); + if (!alias_spec.empty()) { + name += " (" + format_alias_spec(spec, alias_spec) + ")"; + } + return name; +} + +std::string format_option_suffix( + const boost::shared_ptr<po::option_description> &option) { + std::string suffix; + if (option->semantic()->max_tokens() != 0) { + if (option->description().find("path") != std::string::npos || + option->description().find("file") != std::string::npos) { + suffix += " path"; + } else if (option->description().find("host") != std::string::npos) { + suffix += " host"; + } else { + suffix += " arg"; + } + } + return suffix; +} + +} // anonymous namespace + +std::vector<Shell::Action *>& Shell::get_actions() { + static std::vector<Action *> actions; + + return actions; +} + +std::set<std::string>& Shell::get_switch_arguments() { + static std::set<std::string> switch_arguments; + + return switch_arguments; +} + +void print_deprecated_warning(po::option_description option, std::string description) { + auto pos = description.find_first_of(":"); + if (pos != std::string::npos) { + std::string param = description.substr(pos + 1, description.size() - pos - 2); + std::cerr << "rbd: " << option.format_name() << " is deprecated, use --" + << param << std::endl; + } +} + +int Shell::execute(int argc, const char **argv) { + std::vector<std::string> arguments; + std::vector<std::string> ceph_global_init_args; + auto cct = global_init(argc, argv, &arguments, &ceph_global_init_args); + + std::vector<std::string> command_spec; + get_command_spec(arguments, &command_spec); + bool is_alias = true; + + if (command_spec.empty() || command_spec == CommandSpec({"help"})) { + // list all available actions + print_help(); + return 0; + } else if (command_spec[0] == HELP_SPEC) { + // list help for specific action + command_spec.erase(command_spec.begin()); + Action *action = find_action(command_spec, NULL, &is_alias); + if (action == NULL) { + print_unknown_action(command_spec); + return EXIT_FAILURE; + } else { + print_action_help(action, is_alias); + return 0; + } + } else if (command_spec[0] == BASH_COMPLETION_SPEC) { + command_spec.erase(command_spec.begin()); + print_bash_completion(command_spec); + return 0; + } + + CommandSpec *matching_spec; + Action *action = find_action(command_spec, &matching_spec, &is_alias); + if (action == NULL) { + print_unknown_action(command_spec); + return EXIT_FAILURE; + } + + po::variables_map vm; + try { + po::options_description positional_opts; + po::options_description command_opts; + (*action->get_arguments)(&positional_opts, &command_opts); + + // dynamically allocate options for our command (e.g. snap list) and + // its associated positional arguments + po::options_description argument_opts; + argument_opts.add_options() + (at::POSITIONAL_COMMAND_SPEC.c_str(), + po::value<std::vector<std::string> >()->required(), "") + (at::POSITIONAL_ARGUMENTS.c_str(), + po::value<std::vector<std::string> >(), ""); + + po::positional_options_description positional_options; + positional_options.add(at::POSITIONAL_COMMAND_SPEC.c_str(), + matching_spec->size()); + if (!positional_opts.options().empty()) { + int max_count = positional_opts.options().size(); + if (positional_opts.options().back()->semantic()->max_tokens() > 1) + max_count = -1; + positional_options.add(at::POSITIONAL_ARGUMENTS.c_str(), max_count); + } + + po::options_description group_opts; + group_opts.add(command_opts) + .add(argument_opts); + + po::store(po::command_line_parser(arguments) + .style(po::command_line_style::default_style & + ~po::command_line_style::allow_guessing) + .options(group_opts) + .positional(positional_options) + .run(), vm); + + if (vm[at::POSITIONAL_COMMAND_SPEC].as<std::vector<std::string> >() != + *matching_spec) { + std::cerr << "rbd: failed to parse command" << std::endl; + return EXIT_FAILURE; + } + + int r = (*action->execute)(vm, ceph_global_init_args); + + if (vm.size() > 0) { + for (auto opt : vm) { + try { + auto option = command_opts.find(opt.first, false); + auto description = option.description(); + auto result = boost::find_first(description, "deprecated"); + if (!result.empty()) { + print_deprecated_warning(option, description); + } + } catch (std::exception& e) { + continue; + } + } + } + + po::options_description global_opts; + get_global_options(&global_opts); + auto it = ceph_global_init_args.begin(); + for ( ; it != ceph_global_init_args.end(); ++it) { + auto pos = (*it).find_last_of("-"); + auto prefix_style = po::command_line_style::allow_long; + if (pos == 0) { + prefix_style = po::command_line_style::allow_dash_for_short; + } else if (pos == std::string::npos) { + continue; + } + + for (size_t i = 0; i < global_opts.options().size(); ++i) { + std::string param_name = global_opts.options()[i]->canonical_display_name( + prefix_style); + auto description = global_opts.options()[i]->description(); + auto result = boost::find_first(description, "deprecated"); + if (!result.empty() && *it == param_name) { + print_deprecated_warning(*global_opts.options()[i], description); + break; + } + } + } + + if (r != 0) { + return std::abs(r); + } + } catch (po::required_option& e) { + std::cerr << "rbd: " << e.what() << std::endl; + return EXIT_FAILURE; + } catch (po::too_many_positional_options_error& e) { + std::cerr << "rbd: too many arguments" << std::endl; + return EXIT_FAILURE; + } catch (po::error& e) { + std::cerr << "rbd: " << e.what() << std::endl; + return EXIT_FAILURE; + } + + return 0; +} + +void Shell::get_command_spec(const std::vector<std::string> &arguments, + std::vector<std::string> *command_spec) { + for (size_t i = 0; i < arguments.size(); ++i) { + std::string arg(arguments[i]); + if (arg == "-h" || arg == "--help") { + *command_spec = {HELP_SPEC}; + return; + } else if (arg == "--") { + // all arguments after a double-dash are positional + if (i + 1 < arguments.size()) { + command_spec->insert(command_spec->end(), + arguments.data() + i + 1, + arguments.data() + arguments.size()); + } + return; + } else if (arg[0] == '-') { + // if the option is not a switch, skip its value + if (arg.size() >= 2 && + (arg[1] == '-' || + get_switch_arguments().count(arg.substr(1, 1)) == 0) && + (arg[1] != '-' || + get_switch_arguments().count(arg.substr(2, std::string::npos)) == 0) && + at::SWITCH_ARGUMENTS.count(arg.substr(2, std::string::npos)) == 0 && + arg.find('=') == std::string::npos) { + ++i; + } + } else { + command_spec->push_back(arg); + } + } +} + +Shell::Action *Shell::find_action(const CommandSpec &command_spec, + CommandSpec **matching_spec, bool *is_alias) { + // sort such that all "trash purge schedule ..." actions come before + // "trash purge" + std::vector<Action *> actions(get_actions()); + std::sort(actions.begin(), actions.end(), [](auto lhs, auto rhs) { + return lhs->command_spec.size() > rhs->command_spec.size(); + }); + + for (Action *action : actions) { + if (action->command_spec.size() <= command_spec.size()) { + if (std::equal(action->command_spec.begin(), + action->command_spec.end(), + command_spec.begin())) { + if (matching_spec != NULL) { + *matching_spec = &action->command_spec; + } + *is_alias = false; + return action; + } + } + if (!action->alias_command_spec.empty() && + action->alias_command_spec.size() <= command_spec.size()) { + if (std::equal(action->alias_command_spec.begin(), + action->alias_command_spec.end(), + command_spec.begin())) { + if (matching_spec != NULL) { + *matching_spec = &action->alias_command_spec; + } + *is_alias = true; + return action; + } + } + } + return NULL; +} + +void Shell::get_global_options(po::options_description *opts) { + opts->add_options() + ((at::CONFIG_PATH + ",c").c_str(), po::value<std::string>(), "path to cluster configuration") + ("cluster", po::value<std::string>(), "cluster name") + ("id", po::value<std::string>(), "client id (without 'client.' prefix)") + ("user", po::value<std::string>(), "deprecated[:id]") + ("name,n", po::value<std::string>(), "client name") + ("mon_host,m", po::value<std::string>(), "monitor host") + ("secret", po::value<at::Secret>(), "deprecated[:keyfile]") + ("keyfile,K", po::value<std::string>(), "path to secret key") + ("keyring,k", po::value<std::string>(), "path to keyring"); +} + +void Shell::print_help() { + std::cout << "usage: " << APP_NAME << " <command> ..." + << std::endl << std::endl + << "Command-line interface for managing Ceph RBD images." + << std::endl << std::endl; + + std::vector<Action *> actions(get_actions()); + std::sort(actions.begin(), actions.end(), + [](Action *lhs, Action *rhs) { return lhs->command_spec < + rhs->command_spec; }); + + std::cout << OptionPrinter::POSITIONAL_ARGUMENTS << ":" << std::endl + << " <command>" << std::endl; + + // since the commands have spaces, we have to build our own formatter + std::string indent(4, ' '); + size_t name_width = OptionPrinter::MIN_NAME_WIDTH; + for (size_t i = 0; i < actions.size(); ++i) { + Action *action = actions[i]; + std::string name = format_command_name(action->command_spec, + action->alias_command_spec); + name_width = std::max(name_width, name.size()); + } + name_width += indent.size(); + name_width = std::min(name_width, OptionPrinter::MAX_DESCRIPTION_OFFSET) + 1; + + for (size_t i = 0; i < actions.size(); ++i) { + Action *action = actions[i]; + if (!action->visible) + continue; + std::stringstream ss; + ss << indent + << format_command_name(action->command_spec, action->alias_command_spec); + + std::cout << ss.str(); + if (!action->description.empty()) { + IndentStream indent_stream(name_width, ss.str().size(), + OptionPrinter::LINE_WIDTH, + std::cout); + indent_stream << action->description << std::endl; + } else { + std::cout << std::endl; + } + } + + po::options_description global_opts; + get_global_options(&global_opts); + + std::cout << std::endl << OptionPrinter::OPTIONAL_ARGUMENTS << ":" << std::endl; + OptionPrinter::print_optional(global_opts, name_width, std::cout); + + std::cout << std::endl + << "See '" << APP_NAME << " help <command>' for help on a specific " + << "command." << std::endl; + } + +void Shell::print_action_help(Action *action, bool is_alias) { + std::stringstream ss; + ss << "usage: " << APP_NAME << " " + << format_command_spec(is_alias ? action->alias_command_spec : action->command_spec); + std::cout << ss.str(); + + po::options_description positional; + po::options_description options; + (*action->get_arguments)(&positional, &options); + + OptionPrinter option_printer(positional, options); + option_printer.print_short(std::cout, ss.str().size()); + + if (!action->description.empty()) { + std::cout << std::endl << action->description << std::endl; + } + + std::cout << std::endl; + option_printer.print_detailed(std::cout); + + if (!action->help.empty()) { + std::cout << action->help << std::endl; + } +} + +void Shell::print_unknown_action(const std::vector<std::string> &command_spec) { + std::cerr << "error: unknown option '" + << joinify<std::string>(command_spec.begin(), + command_spec.end(), " ") << "'" + << std::endl << std::endl; + print_help(); +} + +void Shell::print_bash_completion(const CommandSpec &command_spec) { + + bool is_alias = true; + + Action *action = find_action(command_spec, NULL, &is_alias); + po::options_description global_opts; + get_global_options(&global_opts); + print_bash_completion_options(global_opts); + + if (action != nullptr) { + po::options_description positional_opts; + po::options_description command_opts; + (*action->get_arguments)(&positional_opts, &command_opts); + print_bash_completion_options(command_opts); + } else { + std::cout << "|help"; + for (size_t i = 0; i < get_actions().size(); ++i) { + Action *action = get_actions()[i]; + std::cout << "|" + << joinify<std::string>(action->command_spec.begin(), + action->command_spec.end(), " "); + if (!action->alias_command_spec.empty()) { + std::cout << "|" + << joinify<std::string>(action->alias_command_spec.begin(), + action->alias_command_spec.end(), + " "); + } + } + } + std::cout << "|" << std::endl; +} + +void Shell::print_bash_completion_options(const po::options_description &ops) { + for (size_t i = 0; i < ops.options().size(); ++i) { + auto option = ops.options()[i]; + std::string long_name(option->canonical_display_name(0)); + std::string short_name(option->canonical_display_name( + po::command_line_style::allow_dash_for_short)); + + std::cout << "|--" << long_name << format_option_suffix(option); + if (long_name != short_name) { + std::cout << "|" << short_name << format_option_suffix(option); + } + } +} + +} // namespace rbd diff --git a/src/tools/rbd/Shell.h b/src/tools/rbd/Shell.h new file mode 100644 index 000000000..fe3dee46b --- /dev/null +++ b/src/tools/rbd/Shell.h @@ -0,0 +1,76 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_SHELL_H +#define CEPH_RBD_SHELL_H + +#include "include/int_types.h" +#include <set> +#include <string> +#include <vector> +#include <boost/program_options.hpp> + +namespace rbd { + +class Shell { +public: + typedef std::vector<std::string> CommandSpec; + + struct Action { + typedef void (*GetArguments)(boost::program_options::options_description *, + boost::program_options::options_description *); + typedef int (*Execute)(const boost::program_options::variables_map &, + const std::vector<std::string> &); + + CommandSpec command_spec; + CommandSpec alias_command_spec; + const std::string description; + const std::string help; + GetArguments get_arguments; + Execute execute; + bool visible; + + template <typename Args, typename Execute> + Action(const std::initializer_list<std::string> &command_spec, + const std::initializer_list<std::string> &alias_command_spec, + const std::string &description, const std::string &help, + Args args, Execute execute, bool visible = true) + : command_spec(command_spec), alias_command_spec(alias_command_spec), + description(description), help(help), get_arguments(args), + execute(execute), visible(visible) { + Shell::get_actions().push_back(this); + } + + }; + + struct SwitchArguments { + SwitchArguments(const std::initializer_list<std::string> &arguments) { + Shell::get_switch_arguments().insert(arguments.begin(), arguments.end()); + } + }; + + int execute(int argc, const char **argv); + +private: + static std::vector<Action *>& get_actions(); + static std::set<std::string>& get_switch_arguments(); + + void get_command_spec(const std::vector<std::string> &arguments, + std::vector<std::string> *command_spec); + Action *find_action(const CommandSpec &command_spec, + CommandSpec **matching_spec, bool *is_alias); + + void get_global_options(boost::program_options::options_description *opts); + + void print_help(); + void print_action_help(Action *action, bool is_alias); + void print_unknown_action(const CommandSpec &command_spec); + + void print_bash_completion(const CommandSpec &command_spec); + void print_bash_completion_options( + const boost::program_options::options_description &ops); +}; + +} // namespace rbd + +#endif // CEPH_RBD_SHELL_H diff --git a/src/tools/rbd/Utils.cc b/src/tools/rbd/Utils.cc new file mode 100644 index 000000000..71da0bd27 --- /dev/null +++ b/src/tools/rbd/Utils.cc @@ -0,0 +1,1203 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/Utils.h" +#include "include/ceph_assert.h" +#include "include/Context.h" +#include "include/encoding.h" +#include "common/common_init.h" +#include "include/stringify.h" +#include "include/rbd/features.h" +#include "common/config.h" +#include "common/errno.h" +#include "common/escape.h" +#include "common/safe_io.h" +#include "global/global_context.h" +#include <fstream> +#include <iostream> +#include <regex> +#include <boost/algorithm/string.hpp> +#include <boost/lexical_cast.hpp> + +namespace rbd { +namespace utils { + +namespace at = argument_types; +namespace po = boost::program_options; + +namespace { + +static std::string mgr_command_args_to_str( + const std::map<std::string, std::string> &args) { + std::string out = ""; + + std::string delimiter; + for (auto &it : args) { + out += delimiter + "\"" + it.first + "\": \"" + + stringify(json_stream_escaper(it.second)) + "\""; + delimiter = ",\n"; + } + + return out; +} + +} // anonymous namespace + +int ProgressContext::update_progress(uint64_t offset, uint64_t total) { + if (progress) { + int pc = get_percentage(offset, total); + if (pc > last_pc) { + std::cerr << "\r" << operation << ": " + << pc << "% complete..." << std::flush; + last_pc = pc; + } + } + return 0; +} + +void ProgressContext::finish() { + if (progress) { + std::cerr << "\r" << operation << ": 100% complete...done." << std::endl; + } +} + +void ProgressContext::fail() { + if (progress) { + std::cerr << "\r" << operation << ": " << last_pc << "% complete...failed." + << std::endl; + } +} + +int get_percentage(uint64_t part, uint64_t whole) { + return whole ? (100 * part / whole) : 0; +} + +void aio_context_callback(librbd::completion_t completion, void *arg) +{ + librbd::RBD::AioCompletion *aio_completion = + reinterpret_cast<librbd::RBD::AioCompletion*>(completion); + Context *context = reinterpret_cast<Context *>(arg); + context->complete(aio_completion->get_return_value()); + aio_completion->release(); +} + +int read_string(int fd, unsigned max, std::string *out) { + char buf[4]; + + int r = safe_read_exact(fd, buf, 4); + if (r < 0) + return r; + + bufferlist bl; + bl.append(buf, 4); + auto p = bl.cbegin(); + uint32_t len; + decode(len, p); + if (len > max) + return -EINVAL; + + char sbuf[len]; + r = safe_read_exact(fd, sbuf, len); + if (r < 0) + return r; + out->assign(sbuf, len); + return len; +} + +int extract_spec(const std::string &spec, std::string *pool_name, + std::string *namespace_name, std::string *name, + std::string *snap_name, SpecValidation spec_validation) { + if (!g_ceph_context->_conf.get_val<bool>("rbd_validate_names")) { + spec_validation = SPEC_VALIDATION_NONE; + } + + std::regex pattern; + switch (spec_validation) { + case SPEC_VALIDATION_FULL: + // disallow "/" and "@" in all names + pattern = "^(?:([^/@]+)/(?:([^/@]+)/)?)?([^/@]+)(?:@([^/@]+))?$"; + break; + case SPEC_VALIDATION_SNAP: + // disallow "/" and "@" in snap name + pattern = "^(?:([^/]+)/(?:([^/@]+)/)?)?([^@]+)(?:@([^/@]+))?$"; + break; + case SPEC_VALIDATION_NONE: + // relaxed pattern assumes pool is before first "/", + // namespace is before second "/", and snap name is after first "@" + pattern = "^(?:([^/]+)/(?:([^/@]+)/)?)?([^@]+)(?:@(.+))?$"; + break; + default: + ceph_abort(); + break; + } + + std::smatch match; + if (!std::regex_match(spec, match, pattern)) { + std::cerr << "rbd: invalid spec '" << spec << "'" << std::endl; + return -EINVAL; + } + + if (match[1].matched) { + if (pool_name != nullptr) { + *pool_name = match[1]; + } else { + std::cerr << "rbd: pool name specified for a command that doesn't use it" + << std::endl; + return -EINVAL; + } + } + + if (match[2].matched) { + if (namespace_name != nullptr) { + *namespace_name = match[2]; + } else { + std::cerr << "rbd: namespace name specified for a command that doesn't " + << "use it" << std::endl; + return -EINVAL; + } + } + + if (name != nullptr) { + *name = match[3]; + } + + if (match[4].matched) { + if (snap_name != nullptr) { + *snap_name = match[4]; + } else { + std::cerr << "rbd: snapshot name specified for a command that doesn't " + << "use it" << std::endl; + return -EINVAL; + } + } + return 0; +} + +std::string get_positional_argument(const po::variables_map &vm, size_t index) { + if (vm.count(at::POSITIONAL_ARGUMENTS) == 0) { + return ""; + } + + const std::vector<std::string> &args = + boost::any_cast<std::vector<std::string> >( + vm[at::POSITIONAL_ARGUMENTS].value()); + if (index < args.size()) { + return args[index]; + } + return ""; +} + +void normalize_pool_name(std::string* pool_name) { + if (pool_name->empty()) { + *pool_name = get_default_pool_name(); + } +} + +std::string get_default_pool_name() { + return g_ceph_context->_conf.get_val<std::string>("rbd_default_pool"); +} + +int get_pool_and_namespace_names( + const boost::program_options::variables_map &vm, bool validate_pool_name, + std::string* pool_name, std::string* namespace_name, size_t *arg_index) { + if (namespace_name != nullptr && vm.count(at::NAMESPACE_NAME)) { + *namespace_name = vm[at::NAMESPACE_NAME].as<std::string>(); + } + + if (vm.count(at::POOL_NAME)) { + *pool_name = vm[at::POOL_NAME].as<std::string>(); + } else { + *pool_name = get_positional_argument(vm, *arg_index); + if (!pool_name->empty()) { + if (namespace_name != nullptr) { + auto slash_pos = pool_name->find_last_of('/'); + if (slash_pos != std::string::npos) { + *namespace_name = pool_name->substr(slash_pos + 1); + } + *pool_name = pool_name->substr(0, slash_pos); + } + ++(*arg_index); + } + } + + if (!g_ceph_context->_conf.get_val<bool>("rbd_validate_names")) { + validate_pool_name = false; + } + + if (validate_pool_name && + pool_name->find_first_of("/@") != std::string::npos) { + std::cerr << "rbd: invalid pool '" << *pool_name << "'" << std::endl; + return -EINVAL; + } else if (namespace_name != nullptr && + namespace_name->find_first_of("/@") != std::string::npos) { + std::cerr << "rbd: invalid namespace '" << *namespace_name << "'" + << std::endl; + return -EINVAL; + } + + return 0; +} + +int get_pool_image_id(const po::variables_map &vm, + size_t *spec_arg_index, + std::string *pool_name, + std::string *namespace_name, + std::string *image_id) { + + if (vm.count(at::POOL_NAME) && pool_name != nullptr) { + *pool_name = vm[at::POOL_NAME].as<std::string>(); + } + if (vm.count(at::NAMESPACE_NAME) && namespace_name != nullptr) { + *namespace_name = vm[at::NAMESPACE_NAME].as<std::string>(); + } + if (vm.count(at::IMAGE_ID) && image_id != nullptr) { + *image_id = vm[at::IMAGE_ID].as<std::string>(); + } + + int r; + if (image_id != nullptr && spec_arg_index != nullptr && image_id->empty()) { + std::string spec = get_positional_argument(vm, (*spec_arg_index)++); + if (!spec.empty()) { + r = extract_spec(spec, pool_name, namespace_name, image_id, nullptr, + SPEC_VALIDATION_FULL); + if (r < 0) { + return r; + } + } + } + + if (image_id != nullptr && image_id->empty()) { + std::cerr << "rbd: image id was not specified" << std::endl; + return -EINVAL; + } + + return 0; +} + +int get_image_or_snap_spec(const po::variables_map &vm, std::string *spec) { + size_t arg_index = 0; + std::string pool_name; + std::string nspace_name; + std::string image_name; + std::string snap_name; + int r = get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &nspace_name, + &image_name, &snap_name, true, SNAPSHOT_PRESENCE_PERMITTED, + SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + if (pool_name.empty()) { + // connect to the cluster to get the default pool + librados::Rados rados; + r = init_rados(&rados); + if (r < 0) { + return r; + } + + normalize_pool_name(&pool_name); + } + + spec->append(pool_name); + spec->append("/"); + if (!nspace_name.empty()) { + spec->append(nspace_name); + spec->append("/"); + } + spec->append(image_name); + if (!snap_name.empty()) { + spec->append("@"); + spec->append(snap_name); + } + + return 0; +} + +void append_options_as_args(const std::vector<std::string> &options, + std::vector<std::string> *args) { + for (auto &opts : options) { + std::vector<std::string> args_; + boost::split(args_, opts, boost::is_any_of(",")); + for (auto &o : args_) { + args->push_back("--" + o); + } + } +} + +int get_pool_image_snapshot_names(const po::variables_map &vm, + at::ArgumentModifier mod, + size_t *spec_arg_index, + std::string *pool_name, + std::string *namespace_name, + std::string *image_name, + std::string *snap_name, + bool image_name_required, + SnapshotPresence snapshot_presence, + SpecValidation spec_validation) { + std::string pool_key = (mod == at::ARGUMENT_MODIFIER_DEST ? + at::DEST_POOL_NAME : at::POOL_NAME); + std::string image_key = (mod == at::ARGUMENT_MODIFIER_DEST ? + at::DEST_IMAGE_NAME : at::IMAGE_NAME); + return get_pool_generic_snapshot_names(vm, mod, spec_arg_index, pool_key, + pool_name, namespace_name, image_key, + "image", image_name, snap_name, + image_name_required, snapshot_presence, + spec_validation); +} + +int get_pool_generic_snapshot_names(const po::variables_map &vm, + at::ArgumentModifier mod, + size_t *spec_arg_index, + const std::string& pool_key, + std::string *pool_name, + std::string *namespace_name, + const std::string& generic_key, + const std::string& generic_key_desc, + std::string *generic_name, + std::string *snap_name, + bool generic_name_required, + SnapshotPresence snapshot_presence, + SpecValidation spec_validation) { + std::string namespace_key = (mod == at::ARGUMENT_MODIFIER_DEST ? + at::DEST_NAMESPACE_NAME : at::NAMESPACE_NAME); + std::string snap_key = (mod == at::ARGUMENT_MODIFIER_DEST ? + at::DEST_SNAPSHOT_NAME : at::SNAPSHOT_NAME); + + if (vm.count(pool_key) && pool_name != nullptr) { + *pool_name = vm[pool_key].as<std::string>(); + } + if (vm.count(namespace_key) && namespace_name != nullptr) { + *namespace_name = vm[namespace_key].as<std::string>(); + } + if (vm.count(generic_key) && generic_name != nullptr) { + *generic_name = vm[generic_key].as<std::string>(); + } + if (vm.count(snap_key) && snap_name != nullptr) { + *snap_name = vm[snap_key].as<std::string>(); + } + + int r; + if ((generic_key == at::IMAGE_NAME || generic_key == at::DEST_IMAGE_NAME) && + generic_name != nullptr && !generic_name->empty()) { + // despite the separate pool and snapshot name options, + // we can also specify them via the image option + std::string image_name_copy(*generic_name); + r = extract_spec(image_name_copy, pool_name, namespace_name, generic_name, + snap_name, spec_validation); + if (r < 0) { + return r; + } + } + + if (generic_name != nullptr && spec_arg_index != nullptr && + generic_name->empty()) { + std::string spec = get_positional_argument(vm, (*spec_arg_index)++); + if (!spec.empty()) { + r = extract_spec(spec, pool_name, namespace_name, generic_name, snap_name, + spec_validation); + if (r < 0) { + return r; + } + } + } + + if (generic_name != nullptr && generic_name_required && + generic_name->empty()) { + std::string prefix = at::get_description_prefix(mod); + std::cerr << "rbd: " + << (mod == at::ARGUMENT_MODIFIER_DEST ? prefix : std::string()) + << generic_key_desc << " name was not specified" << std::endl; + return -EINVAL; + } + + std::regex pattern("^[^@/]*?$"); + if (spec_validation == SPEC_VALIDATION_FULL) { + // validate pool name while creating/renaming/copying/cloning/importing/etc + if ((pool_name != nullptr) && !std::regex_match (*pool_name, pattern)) { + std::cerr << "rbd: invalid pool name '" << *pool_name << "'" << std::endl; + return -EINVAL; + } + } + + if (namespace_name != nullptr && !namespace_name->empty() && + !std::regex_match (*namespace_name, pattern)) { + std::cerr << "rbd: invalid namespace name '" << *namespace_name << "'" + << std::endl; + return -EINVAL; + } + + if (snap_name != nullptr) { + r = validate_snapshot_name(mod, *snap_name, snapshot_presence, + spec_validation); + if (r < 0) { + return r; + } + } + return 0; +} + +int validate_snapshot_name(at::ArgumentModifier mod, + const std::string &snap_name, + SnapshotPresence snapshot_presence, + SpecValidation spec_validation) { + std::string prefix = at::get_description_prefix(mod); + switch (snapshot_presence) { + case SNAPSHOT_PRESENCE_PERMITTED: + break; + case SNAPSHOT_PRESENCE_NONE: + if (!snap_name.empty()) { + std::cerr << "rbd: " + << (mod == at::ARGUMENT_MODIFIER_DEST ? prefix : std::string()) + << "snapshot name specified for a command that doesn't use it" + << std::endl; + return -EINVAL; + } + break; + case SNAPSHOT_PRESENCE_REQUIRED: + if (snap_name.empty()) { + std::cerr << "rbd: " + << (mod == at::ARGUMENT_MODIFIER_DEST ? prefix : std::string()) + << "snapshot name was not specified" << std::endl; + return -EINVAL; + } + break; + } + + if (spec_validation == SPEC_VALIDATION_SNAP) { + // disallow "/" and "@" in snap name + std::regex pattern("^[^@/]*?$"); + if (!std::regex_match (snap_name, pattern)) { + std::cerr << "rbd: invalid snap name '" << snap_name << "'" << std::endl; + return -EINVAL; + } + } + return 0; +} + +int get_image_options(const boost::program_options::variables_map &vm, + bool get_format, librbd::ImageOptions *opts) { + uint64_t order = 0, stripe_unit = 0, stripe_count = 0, object_size = 0; + uint64_t features = 0, features_clear = 0; + std::string data_pool; + bool order_specified = true; + bool features_specified = false; + bool features_clear_specified = false; + bool stripe_specified = false; + + if (vm.count(at::IMAGE_ORDER)) { + order = vm[at::IMAGE_ORDER].as<uint64_t>(); + } else if (vm.count(at::IMAGE_OBJECT_SIZE)) { + object_size = vm[at::IMAGE_OBJECT_SIZE].as<uint64_t>(); + order = std::round(std::log2(object_size)); + } else { + order_specified = false; + } + + if (vm.count(at::IMAGE_FEATURES)) { + features = vm[at::IMAGE_FEATURES].as<uint64_t>(); + features_specified = true; + } + + if (vm.count(at::IMAGE_STRIPE_UNIT)) { + stripe_unit = vm[at::IMAGE_STRIPE_UNIT].as<uint64_t>(); + stripe_specified = true; + } + + if (vm.count(at::IMAGE_STRIPE_COUNT)) { + stripe_count = vm[at::IMAGE_STRIPE_COUNT].as<uint64_t>(); + stripe_specified = true; + } + + if (vm.count(at::IMAGE_SHARED) && vm[at::IMAGE_SHARED].as<bool>()) { + if (features_specified) { + features &= ~RBD_FEATURES_SINGLE_CLIENT; + } else { + features_clear |= RBD_FEATURES_SINGLE_CLIENT; + features_clear_specified = true; + } + } + + if (vm.count(at::IMAGE_DATA_POOL)) { + data_pool = vm[at::IMAGE_DATA_POOL].as<std::string>(); + } + + if (get_format) { + uint64_t format = 0; + bool format_specified = false; + if (vm.count(at::IMAGE_NEW_FORMAT)) { + format = 2; + format_specified = true; + } else if (vm.count(at::IMAGE_FORMAT)) { + format = vm[at::IMAGE_FORMAT].as<uint32_t>(); + format_specified = true; + } + if (format == 1) { + std::cerr << "rbd: image format 1 is deprecated" << std::endl; + } + + if (features_specified && features != 0) { + if (format_specified && format == 1) { + std::cerr << "rbd: features not allowed with format 1; " + << "use --image-format 2" << std::endl; + return -EINVAL; + } else { + format = 2; + format_specified = true; + } + } + + if ((stripe_unit || stripe_count) && + (stripe_unit != (1ull << order) && stripe_count != 1)) { + if (format_specified && format == 1) { + std::cerr << "rbd: non-default striping not allowed with format 1; " + << "use --image-format 2" << std::endl; + return -EINVAL; + } else { + format = 2; + format_specified = true; + } + } + + if (!data_pool.empty()) { + if (format_specified && format == 1) { + std::cerr << "rbd: data pool not allowed with format 1; " + << "use --image-format 2" << std::endl; + return -EINVAL; + } else { + format = 2; + format_specified = true; + } + } + + if (format_specified) { + int r = g_conf().set_val("rbd_default_format", stringify(format)); + ceph_assert(r == 0); + opts->set(RBD_IMAGE_OPTION_FORMAT, format); + } + } + + if (order_specified) + opts->set(RBD_IMAGE_OPTION_ORDER, order); + if (features_specified) + opts->set(RBD_IMAGE_OPTION_FEATURES, features); + if (features_clear_specified) { + opts->set(RBD_IMAGE_OPTION_FEATURES_CLEAR, features_clear); + } + if (stripe_specified) { + opts->set(RBD_IMAGE_OPTION_STRIPE_UNIT, stripe_unit); + opts->set(RBD_IMAGE_OPTION_STRIPE_COUNT, stripe_count); + } + if (!data_pool.empty()) { + opts->set(RBD_IMAGE_OPTION_DATA_POOL, data_pool); + } + int r = get_journal_options(vm, opts); + if (r < 0) { + return r; + } + + r = get_flatten_option(vm, opts); + if (r < 0) { + return r; + } + + if (vm.count(at::IMAGE_MIRROR_IMAGE_MODE)) { + opts->set(RBD_IMAGE_OPTION_MIRROR_IMAGE_MODE, + vm[at::IMAGE_MIRROR_IMAGE_MODE].as<librbd::mirror_image_mode_t>()); + } + + return 0; +} + +int get_journal_options(const boost::program_options::variables_map &vm, + librbd::ImageOptions *opts) { + + if (vm.count(at::JOURNAL_OBJECT_SIZE)) { + uint64_t size = vm[at::JOURNAL_OBJECT_SIZE].as<uint64_t>(); + uint64_t order = 12; + while ((1ULL << order) < size) { + order++; + } + opts->set(RBD_IMAGE_OPTION_JOURNAL_ORDER, order); + + int r = g_conf().set_val("rbd_journal_order", stringify(order)); + ceph_assert(r == 0); + } + if (vm.count(at::JOURNAL_SPLAY_WIDTH)) { + opts->set(RBD_IMAGE_OPTION_JOURNAL_SPLAY_WIDTH, + vm[at::JOURNAL_SPLAY_WIDTH].as<uint64_t>()); + + int r = g_conf().set_val("rbd_journal_splay_width", + stringify( + vm[at::JOURNAL_SPLAY_WIDTH].as<uint64_t>())); + ceph_assert(r == 0); + } + if (vm.count(at::JOURNAL_POOL)) { + opts->set(RBD_IMAGE_OPTION_JOURNAL_POOL, + vm[at::JOURNAL_POOL].as<std::string>()); + + int r = g_conf().set_val("rbd_journal_pool", + vm[at::JOURNAL_POOL].as<std::string>()); + ceph_assert(r == 0); + } + + return 0; +} + +int get_flatten_option(const boost::program_options::variables_map &vm, + librbd::ImageOptions *opts) { + if (vm.count(at::IMAGE_FLATTEN) && vm[at::IMAGE_FLATTEN].as<bool>()) { + uint64_t flatten = 1; + opts->set(RBD_IMAGE_OPTION_FLATTEN, flatten); + } + return 0; +} + +int get_image_size(const boost::program_options::variables_map &vm, + uint64_t *size) { + if (vm.count(at::IMAGE_SIZE) == 0) { + std::cerr << "rbd: must specify --size <M/G/T>" << std::endl; + return -EINVAL; + } + + *size = vm[at::IMAGE_SIZE].as<uint64_t>(); + return 0; +} + +int get_path(const boost::program_options::variables_map &vm, + size_t *arg_index, std::string *path) { + if (vm.count(at::PATH)) { + *path = vm[at::PATH].as<std::string>(); + } else { + *path = get_positional_argument(vm, *arg_index); + if (!path->empty()) { + ++(*arg_index); + } + } + + if (path->empty()) { + std::cerr << "rbd: path was not specified" << std::endl; + return -EINVAL; + } + return 0; +} + +int get_formatter(const po::variables_map &vm, + at::Format::Formatter *formatter) { + if (vm.count(at::FORMAT)) { + bool pretty = vm[at::PRETTY_FORMAT].as<bool>(); + *formatter = vm[at::FORMAT].as<at::Format>().create_formatter(pretty); + if (*formatter == nullptr && pretty) { + std::cerr << "rbd: --pretty-format only works when --format " + << "is json or xml" << std::endl; + return -EINVAL; + } else if (*formatter != nullptr && !pretty) { + formatter->get()->enable_line_break(); + } + } else if (vm[at::PRETTY_FORMAT].as<bool>()) { + std::cerr << "rbd: --pretty-format only works when --format " + << "is json or xml" << std::endl; + return -EINVAL; + } + return 0; +} + +int get_snap_create_flags(const po::variables_map &vm, uint32_t *flags) { + if (vm[at::SKIP_QUIESCE].as<bool>() && + vm[at::IGNORE_QUIESCE_ERROR].as<bool>()) { + std::cerr << "rbd: " << at::IGNORE_QUIESCE_ERROR + << " cannot be used together with " << at::SKIP_QUIESCE + << std::endl; + return -EINVAL; + } + + *flags = 0; + if (vm[at::SKIP_QUIESCE].as<bool>()) { + *flags |= RBD_SNAP_CREATE_SKIP_QUIESCE; + } else if (vm[at::IGNORE_QUIESCE_ERROR].as<bool>()) { + *flags |= RBD_SNAP_CREATE_IGNORE_QUIESCE_ERROR; + } + return 0; +} + +int get_encryption_options(const boost::program_options::variables_map &vm, + EncryptionOptions* result) { + std::vector<std::string> passphrase_files; + if (vm.count(at::ENCRYPTION_PASSPHRASE_FILE)) { + passphrase_files = + vm[at::ENCRYPTION_PASSPHRASE_FILE].as<std::vector<std::string>>(); + } + + std::vector<at::EncryptionFormat> formats; + if (vm.count(at::ENCRYPTION_FORMAT)) { + formats = vm[at::ENCRYPTION_FORMAT].as<decltype(formats)>(); + } else if (vm.count(at::ENCRYPTION_PASSPHRASE_FILE)) { + formats.resize(passphrase_files.size(), + at::EncryptionFormat{RBD_ENCRYPTION_FORMAT_LUKS}); + } + + if (formats.size() != passphrase_files.size()) { + std::cerr << "rbd: encryption formats count does not match " + << "passphrase files count" << std::endl; + return -EINVAL; + } + + result->specs.clear(); + result->specs.reserve(formats.size()); + for (size_t i = 0; i < formats.size(); ++i) { + std::ifstream file(passphrase_files[i], std::ios::in | std::ios::binary); + if (file.fail()) { + std::cerr << "rbd: unable to open passphrase file '" + << passphrase_files[i] << "': " << cpp_strerror(errno) + << std::endl; + return -errno; + } + std::string passphrase((std::istreambuf_iterator<char>(file)), + std::istreambuf_iterator<char>()); + file.close(); + + switch (formats[i].format) { + case RBD_ENCRYPTION_FORMAT_LUKS: { + auto opts = new librbd::encryption_luks_format_options_t{ + std::move(passphrase)}; + result->specs.push_back( + {RBD_ENCRYPTION_FORMAT_LUKS, opts, sizeof(*opts)}); + break; + } + case RBD_ENCRYPTION_FORMAT_LUKS1: { + auto opts = new librbd::encryption_luks1_format_options_t{ + .passphrase = std::move(passphrase)}; + result->specs.push_back( + {RBD_ENCRYPTION_FORMAT_LUKS1, opts, sizeof(*opts)}); + break; + } + case RBD_ENCRYPTION_FORMAT_LUKS2: { + auto opts = new librbd::encryption_luks2_format_options_t{ + .passphrase = std::move(passphrase)}; + result->specs.push_back( + {RBD_ENCRYPTION_FORMAT_LUKS2, opts, sizeof(*opts)}); + break; + } + default: + ceph_abort(); + } + } + + return 0; +} + +void init_context() { + g_conf().set_val_or_die("rbd_cache_writethrough_until_flush", "false"); + g_conf().apply_changes(nullptr); +} + +int init_rados(librados::Rados *rados) { + init_context(); + + int r = rados->init_with_context(g_ceph_context); + if (r < 0) { + std::cerr << "rbd: couldn't initialize rados!" << std::endl; + return r; + } + + r = rados->connect(); + if (r < 0) { + std::cerr << "rbd: couldn't connect to the cluster!" << std::endl; + return r; + } + + return 0; +} + +int init(const std::string &pool_name, const std::string& namespace_name, + librados::Rados *rados, librados::IoCtx *io_ctx) { + init_context(); + + int r = init_rados(rados); + if (r < 0) { + return r; + } + + r = init_io_ctx(*rados, pool_name, namespace_name, io_ctx); + if (r < 0) { + return r; + } + return 0; +} + +int init_io_ctx(librados::Rados &rados, std::string pool_name, + const std::string& namespace_name, librados::IoCtx *io_ctx) { + normalize_pool_name(&pool_name); + + int r = rados.ioctx_create(pool_name.c_str(), *io_ctx); + if (r < 0) { + if (r == -ENOENT && pool_name == get_default_pool_name()) { + std::cerr << "rbd: error opening default pool " + << "'" << pool_name << "'" << std::endl + << "Ensure that the default pool has been created or specify " + << "an alternate pool name." << std::endl; + } else { + std::cerr << "rbd: error opening pool '" << pool_name << "': " + << cpp_strerror(r) << std::endl; + } + return r; + } + + return set_namespace(namespace_name, io_ctx); +} + +int set_namespace(const std::string& namespace_name, librados::IoCtx *io_ctx) { + if (!namespace_name.empty()) { + librbd::RBD rbd; + bool exists = false; + int r = rbd.namespace_exists(*io_ctx, namespace_name.c_str(), &exists); + if (r < 0) { + std::cerr << "rbd: error asserting namespace: " + << cpp_strerror(r) << std::endl; + return r; + } + if (!exists) { + std::cerr << "rbd: namespace '" << namespace_name << "' does not exist." + << std::endl; + return -ENOENT; + } + } + io_ctx->set_namespace(namespace_name); + return 0; +} + +void disable_cache() { + g_conf().set_val_or_die("rbd_cache", "false"); +} + +int open_image(librados::IoCtx &io_ctx, const std::string &image_name, + bool read_only, librbd::Image *image) { + int r; + librbd::RBD rbd; + if (read_only) { + r = rbd.open_read_only(io_ctx, *image, image_name.c_str(), NULL); + } else { + r = rbd.open(io_ctx, *image, image_name.c_str()); + } + + if (r < 0) { + std::cerr << "rbd: error opening image " << image_name << ": " + << cpp_strerror(r) << std::endl; + return r; + } + return 0; +} + +int open_image_by_id(librados::IoCtx &io_ctx, const std::string &image_id, + bool read_only, librbd::Image *image) { + int r; + librbd::RBD rbd; + if (read_only) { + r = rbd.open_by_id_read_only(io_ctx, *image, image_id.c_str(), NULL); + } else { + r = rbd.open_by_id(io_ctx, *image, image_id.c_str()); + } + + if (r < 0) { + std::cerr << "rbd: error opening image with id " << image_id << ": " + << cpp_strerror(r) << std::endl; + return r; + } + return 0; +} + +int init_and_open_image(const std::string &pool_name, + const std::string &namespace_name, + const std::string &image_name, + const std::string &image_id, + const std::string &snap_name, bool read_only, + librados::Rados *rados, librados::IoCtx *io_ctx, + librbd::Image *image) { + int r = init(pool_name, namespace_name, rados, io_ctx); + if (r < 0) { + return r; + } + + if (image_id.empty()) { + r = open_image(*io_ctx, image_name, read_only, image); + } else { + r = open_image_by_id(*io_ctx, image_id, read_only, image); + } + if (r < 0) { + return r; + } + + if (!snap_name.empty()) { + r = snap_set(*image, snap_name); + if (r < 0) { + return r; + } + } + + return 0; +} + +int snap_set(librbd::Image &image, const std::string &snap_name) { + int r = image.snap_set(snap_name.c_str()); + if (r < 0) { + std::cerr << "error setting snapshot context: " << cpp_strerror(r) + << std::endl; + return r; + } + return 0; +} + +void calc_sparse_extent(const bufferptr &bp, + size_t sparse_size, + size_t buffer_offset, + uint64_t buffer_length, + size_t *write_length, + bool *zeroed) { + if (sparse_size == 0) { + // sparse writes are disabled -- write the full extent + ceph_assert(buffer_offset == 0); + *write_length = buffer_length; + *zeroed = false; + return; + } + + *write_length = 0; + size_t original_offset = buffer_offset; + while (buffer_offset < buffer_length) { + size_t extent_size = std::min<size_t>( + sparse_size, buffer_length - buffer_offset); + + bufferptr extent(bp, buffer_offset, extent_size); + + bool extent_is_zero = extent.is_zero(); + if (original_offset == buffer_offset) { + *zeroed = extent_is_zero; + } else if (*zeroed != extent_is_zero) { + ceph_assert(*write_length > 0); + return; + } + + buffer_offset += extent_size; + *write_length += extent_size; + } +} + +std::string image_id(librbd::Image& image) { + std::string id; + int r = image.get_id(&id); + if (r < 0) { + return std::string(); + } + return id; +} + +std::string mirror_image_mode(librbd::mirror_image_mode_t mode) { + switch (mode) { + case RBD_MIRROR_IMAGE_MODE_JOURNAL: + return "journal"; + case RBD_MIRROR_IMAGE_MODE_SNAPSHOT: + return "snapshot"; + default: + return "unknown"; + } +} + +std::string mirror_image_state(librbd::mirror_image_state_t state) { + switch (state) { + case RBD_MIRROR_IMAGE_DISABLING: + return "disabling"; + case RBD_MIRROR_IMAGE_ENABLED: + return "enabled"; + case RBD_MIRROR_IMAGE_DISABLED: + return "disabled"; + default: + return "unknown"; + } +} + +std::string mirror_image_status_state( + librbd::mirror_image_status_state_t state) { + switch (state) { + case MIRROR_IMAGE_STATUS_STATE_UNKNOWN: + return "unknown"; + case MIRROR_IMAGE_STATUS_STATE_ERROR: + return "error"; + case MIRROR_IMAGE_STATUS_STATE_SYNCING: + return "syncing"; + case MIRROR_IMAGE_STATUS_STATE_STARTING_REPLAY: + return "starting_replay"; + case MIRROR_IMAGE_STATUS_STATE_REPLAYING: + return "replaying"; + case MIRROR_IMAGE_STATUS_STATE_STOPPING_REPLAY: + return "stopping_replay"; + case MIRROR_IMAGE_STATUS_STATE_STOPPED: + return "stopped"; + default: + return "unknown (" + stringify(static_cast<uint32_t>(state)) + ")"; + } +} + +std::string mirror_image_site_status_state( + const librbd::mirror_image_site_status_t& status) { + return (status.up ? "up+" : "down+") + + mirror_image_status_state(status.state); +} + +std::string mirror_image_global_status_state( + const librbd::mirror_image_global_status_t& status) { + librbd::mirror_image_site_status_t local_status; + int r = get_local_mirror_image_status(status, &local_status); + if (r < 0) { + return "down+unknown"; + } + + return mirror_image_site_status_state(local_status); +} + +int get_local_mirror_image_status( + const librbd::mirror_image_global_status_t& status, + librbd::mirror_image_site_status_t* local_status) { + auto it = std::find_if(status.site_statuses.begin(), + status.site_statuses.end(), + [](auto& site_status) { + return (site_status.mirror_uuid == + RBD_MIRROR_IMAGE_STATUS_LOCAL_MIRROR_UUID); + }); + if (it == status.site_statuses.end()) { + return -ENOENT; + } + + *local_status = *it; + return 0; +} + +std::string timestr(time_t t) { + if (t == 0) { + return ""; + } + + struct tm tm; + + localtime_r(&t, &tm); + + char buf[32]; + strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M:%S", &tm); + + return buf; +} + +uint64_t get_rbd_default_features(CephContext* cct) { + auto features = cct->_conf.get_val<std::string>("rbd_default_features"); + return boost::lexical_cast<uint64_t>(features); +} + +bool is_not_user_snap_namespace(librbd::Image* image, + const librbd::snap_info_t &snap_info) +{ + librbd::snap_namespace_type_t namespace_type; + int r = image->snap_get_namespace_type(snap_info.id, &namespace_type); + if (r < 0) { + return false; + } + return namespace_type != RBD_SNAP_NAMESPACE_TYPE_USER; +} + +void get_mirror_peer_sites( + librados::IoCtx& io_ctx, + std::vector<librbd::mirror_peer_site_t>* mirror_peers) { + librados::IoCtx default_io_ctx; + default_io_ctx.dup(io_ctx); + default_io_ctx.set_namespace(""); + + mirror_peers->clear(); + + librbd::RBD rbd; + int r = rbd.mirror_peer_site_list(default_io_ctx, mirror_peers); + if (r < 0 && r != -ENOENT) { + std::cerr << "rbd: failed to list mirror peers" << std::endl; + } +} + +void get_mirror_peer_mirror_uuids_to_names( + const std::vector<librbd::mirror_peer_site_t>& mirror_peers, + std::map<std::string, std::string>* mirror_uuids_to_name) { + mirror_uuids_to_name->clear(); + for (auto& peer : mirror_peers) { + if (!peer.mirror_uuid.empty() && !peer.site_name.empty()) { + (*mirror_uuids_to_name)[peer.mirror_uuid] = peer.site_name; + } + } +} + +void populate_unknown_mirror_image_site_statuses( + const std::vector<librbd::mirror_peer_site_t>& mirror_peers, + librbd::mirror_image_global_status_t* global_status) { + std::set<std::string> missing_mirror_uuids; + librbd::mirror_peer_direction_t mirror_peer_direction = + RBD_MIRROR_PEER_DIRECTION_RX_TX; + for (auto& peer : mirror_peers) { + if (peer.uuid == mirror_peers.begin()->uuid) { + mirror_peer_direction = peer.direction; + } else if (mirror_peer_direction != RBD_MIRROR_PEER_DIRECTION_RX_TX && + mirror_peer_direction != peer.direction) { + mirror_peer_direction = RBD_MIRROR_PEER_DIRECTION_RX_TX; + } + + if (!peer.mirror_uuid.empty() && + peer.direction != RBD_MIRROR_PEER_DIRECTION_TX) { + missing_mirror_uuids.insert(peer.mirror_uuid); + } + } + + if (mirror_peer_direction != RBD_MIRROR_PEER_DIRECTION_TX) { + missing_mirror_uuids.insert(RBD_MIRROR_IMAGE_STATUS_LOCAL_MIRROR_UUID); + } + + std::vector<librbd::mirror_image_site_status_t> site_statuses; + site_statuses.reserve(missing_mirror_uuids.size()); + + for (auto& site_status : global_status->site_statuses) { + if (missing_mirror_uuids.count(site_status.mirror_uuid) > 0) { + missing_mirror_uuids.erase(site_status.mirror_uuid); + site_statuses.push_back(site_status); + } + } + + for (auto& mirror_uuid : missing_mirror_uuids) { + site_statuses.push_back({mirror_uuid, MIRROR_IMAGE_STATUS_STATE_UNKNOWN, + "status not found", 0, false}); + } + + std::swap(global_status->site_statuses, site_statuses); +} + +int mgr_command(librados::Rados& rados, const std::string& cmd, + const std::map<std::string, std::string> &args, + std::ostream *out_os, std::ostream *err_os) { + std::string command = R"( + { + "prefix": ")" + cmd + R"(", )" + mgr_command_args_to_str(args) + R"( + })"; + + bufferlist in_bl; + bufferlist out_bl; + std::string outs; + int r = rados.mgr_command(command, in_bl, &out_bl, &outs); + if (r < 0) { + (*err_os) << "rbd: " << cmd << " failed: " << cpp_strerror(r); + if (!outs.empty()) { + (*err_os) << ": " << outs; + } + (*err_os) << std::endl; + return r; + } + + if (out_bl.length() != 0) { + (*out_os) << out_bl.c_str(); + } + + return 0; +} + +} // namespace utils +} // namespace rbd diff --git a/src/tools/rbd/Utils.h b/src/tools/rbd/Utils.h new file mode 100644 index 000000000..5076fd7fe --- /dev/null +++ b/src/tools/rbd/Utils.h @@ -0,0 +1,283 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_UTILS_H +#define CEPH_RBD_UTILS_H + +#include "include/compat.h" +#include "include/int_types.h" +#include "include/rados/librados.hpp" +#include "include/rbd/librbd.hpp" +#include "tools/rbd/ArgumentTypes.h" +#include <map> +#include <string> +#include <boost/program_options.hpp> + +namespace rbd { +namespace utils { + +namespace detail { + +template <typename T, void(T::*MF)(int)> +void aio_completion_callback(librbd::completion_t completion, + void *arg) { + librbd::RBD::AioCompletion *aio_completion = + reinterpret_cast<librbd::RBD::AioCompletion*>(completion); + + // complete the AIO callback in separate thread context + T *t = reinterpret_cast<T *>(arg); + int r = aio_completion->get_return_value(); + aio_completion->release(); + + (t->*MF)(r); +} + +} // namespace detail + +static const std::string RBD_DIFF_BANNER ("rbd diff v1\n"); +static const size_t RBD_DEFAULT_SPARSE_SIZE = 4096; + +static const std::string RBD_IMAGE_BANNER_V2 ("rbd image v2\n"); +static const std::string RBD_IMAGE_DIFFS_BANNER_V2 ("rbd image diffs v2\n"); +static const std::string RBD_DIFF_BANNER_V2 ("rbd diff v2\n"); + +#define RBD_DIFF_FROM_SNAP 'f' +#define RBD_DIFF_TO_SNAP 't' +#define RBD_DIFF_IMAGE_SIZE 's' +#define RBD_DIFF_WRITE 'w' +#define RBD_DIFF_ZERO 'z' +#define RBD_DIFF_END 'e' + +#define RBD_SNAP_PROTECTION_STATUS 'p' + +#define RBD_EXPORT_IMAGE_ORDER 'O' +#define RBD_EXPORT_IMAGE_FEATURES 'T' +#define RBD_EXPORT_IMAGE_STRIPE_UNIT 'U' +#define RBD_EXPORT_IMAGE_STRIPE_COUNT 'C' +#define RBD_EXPORT_IMAGE_META 'M' +#define RBD_EXPORT_IMAGE_END 'E' + +enum SnapshotPresence { + SNAPSHOT_PRESENCE_NONE, + SNAPSHOT_PRESENCE_PERMITTED, + SNAPSHOT_PRESENCE_REQUIRED +}; + +enum SpecValidation { + SPEC_VALIDATION_FULL, + SPEC_VALIDATION_SNAP, + SPEC_VALIDATION_NONE +}; + +struct ProgressContext : public librbd::ProgressContext { + const char *operation; + bool progress; + int last_pc; + + ProgressContext(const char *o, bool no_progress) + : operation(o), progress(!no_progress), last_pc(0) { + } + + int update_progress(uint64_t offset, uint64_t total) override; + void finish(); + void fail(); +}; + +int get_percentage(uint64_t part, uint64_t whole); + +struct EncryptionOptions { + std::vector<librbd::encryption_spec_t> specs; + + ~EncryptionOptions() { + for (auto& spec : specs) { + switch (spec.format) { + case RBD_ENCRYPTION_FORMAT_LUKS: { + auto opts = + static_cast<librbd::encryption_luks_format_options_t*>(spec.opts); + ceph_memzero_s(opts->passphrase.data(), opts->passphrase.size(), + opts->passphrase.size()); + delete opts; + break; + } + case RBD_ENCRYPTION_FORMAT_LUKS1: { + auto opts = + static_cast<librbd::encryption_luks1_format_options_t*>(spec.opts); + ceph_memzero_s(opts->passphrase.data(), opts->passphrase.size(), + opts->passphrase.size()); + delete opts; + break; + } + case RBD_ENCRYPTION_FORMAT_LUKS2: { + auto opts = + static_cast<librbd::encryption_luks2_format_options_t*>(spec.opts); + ceph_memzero_s(opts->passphrase.data(), opts->passphrase.size(), + opts->passphrase.size()); + delete opts; + break; + } + default: + ceph_abort(); + } + } + } +}; + +template <typename T, void(T::*MF)(int)> +librbd::RBD::AioCompletion *create_aio_completion(T *t) { + return new librbd::RBD::AioCompletion( + t, &detail::aio_completion_callback<T, MF>); +} + +void aio_context_callback(librbd::completion_t completion, void *arg); + +int read_string(int fd, unsigned max, std::string *out); + +int extract_spec(const std::string &spec, std::string *pool_name, + std::string *namespace_name, std::string *name, + std::string *snap_name, SpecValidation spec_validation); + +std::string get_positional_argument( + const boost::program_options::variables_map &vm, size_t index); + +void normalize_pool_name(std::string* pool_name); +std::string get_default_pool_name(); + +int get_image_or_snap_spec(const boost::program_options::variables_map &vm, + std::string *spec); + +void append_options_as_args(const std::vector<std::string> &options, + std::vector<std::string> *args); + +int get_pool_and_namespace_names( + const boost::program_options::variables_map &vm, bool validate_pool_name, + std::string* pool_name, std::string* namespace_name, size_t *arg_index); + +int get_pool_image_snapshot_names( + const boost::program_options::variables_map &vm, + argument_types::ArgumentModifier mod, size_t *spec_arg_index, + std::string *pool_name, std::string *namespace_name, + std::string *image_name, std::string *snap_name, bool image_name_required, + SnapshotPresence snapshot_presence, SpecValidation spec_validation); + +int get_pool_generic_snapshot_names( + const boost::program_options::variables_map &vm, + argument_types::ArgumentModifier mod, size_t *spec_arg_index, + const std::string& pool_key, std::string *pool_name, + std::string *namespace_name, const std::string& generic_key, + const std::string& generic_key_desc, std::string *generic_name, + std::string *snap_name, bool generic_name_required, + SnapshotPresence snapshot_presence, SpecValidation spec_validation); + +int get_pool_image_id(const boost::program_options::variables_map &vm, + size_t *spec_arg_index, + std::string *pool_name, + std::string *namespace_name, + std::string *image_id); + +int validate_snapshot_name(argument_types::ArgumentModifier mod, + const std::string &snap_name, + SnapshotPresence snapshot_presence, + SpecValidation spec_validation); + +int get_image_options(const boost::program_options::variables_map &vm, + bool get_format, librbd::ImageOptions* opts); + +int get_journal_options(const boost::program_options::variables_map &vm, + librbd::ImageOptions *opts); + +int get_flatten_option(const boost::program_options::variables_map &vm, + librbd::ImageOptions *opts); + +int get_image_size(const boost::program_options::variables_map &vm, + uint64_t *size); + +int get_path(const boost::program_options::variables_map &vm, + size_t *arg_index, std::string *path); + +int get_formatter(const boost::program_options::variables_map &vm, + argument_types::Format::Formatter *formatter); + +int get_snap_create_flags(const boost::program_options::variables_map &vm, + uint32_t *flags); + +int get_encryption_options(const boost::program_options::variables_map &vm, + EncryptionOptions* result); + +void init_context(); + +int init_rados(librados::Rados *rados); + +int init(const std::string& pool_name, const std::string& namespace_name, + librados::Rados *rados, librados::IoCtx *io_ctx); +int init_io_ctx(librados::Rados &rados, std::string pool_name, + const std::string& namespace_name, librados::IoCtx *io_ctx); +int set_namespace(const std::string& namespace_name, librados::IoCtx *io_ctx); + +void disable_cache(); + +int open_image(librados::IoCtx &io_ctx, const std::string &image_name, + bool read_only, librbd::Image *image); + +int open_image_by_id(librados::IoCtx &io_ctx, const std::string &image_id, + bool read_only, librbd::Image *image); + +int init_and_open_image(const std::string &pool_name, + const std::string &namespace_name, + const std::string &image_name, + const std::string &image_id, + const std::string &snap_name, bool read_only, + librados::Rados *rados, librados::IoCtx *io_ctx, + librbd::Image *image); + +int snap_set(librbd::Image &image, const std::string &snap_name); + +void calc_sparse_extent(const bufferptr &bp, + size_t sparse_size, + size_t buffer_offset, + uint64_t length, + size_t *write_length, + bool *zeroed); + +bool is_not_user_snap_namespace(librbd::Image* image, + const librbd::snap_info_t &snap_info); + +std::string image_id(librbd::Image& image); + +std::string mirror_image_mode( + librbd::mirror_image_mode_t mirror_image_mode); +std::string mirror_image_state( + librbd::mirror_image_state_t mirror_image_state); +std::string mirror_image_status_state( + librbd::mirror_image_status_state_t state); +std::string mirror_image_site_status_state( + const librbd::mirror_image_site_status_t& status); +std::string mirror_image_global_status_state( + const librbd::mirror_image_global_status_t& status); + +int get_local_mirror_image_status( + const librbd::mirror_image_global_status_t& status, + librbd::mirror_image_site_status_t* local_status); + +std::string timestr(time_t t); + +// duplicate here to not include librbd_internal lib +uint64_t get_rbd_default_features(CephContext* cct); + +void get_mirror_peer_sites( + librados::IoCtx& io_ctx, + std::vector<librbd::mirror_peer_site_t>* mirror_peers); +void get_mirror_peer_mirror_uuids_to_names( + const std::vector<librbd::mirror_peer_site_t>& mirror_peers, + std::map<std::string, std::string>* fsid_to_name); +void populate_unknown_mirror_image_site_statuses( + const std::vector<librbd::mirror_peer_site_t>& mirror_peers, + librbd::mirror_image_global_status_t* global_status); + +int mgr_command(librados::Rados& rados, const std::string& cmd, + const std::map<std::string, std::string> &args, + std::ostream *out_os, std::ostream *err_os); + +} // namespace utils +} // namespace rbd + +#endif // CEPH_RBD_UTILS_H diff --git a/src/tools/rbd/action/Bench.cc b/src/tools/rbd/action/Bench.cc new file mode 100644 index 000000000..061a76d33 --- /dev/null +++ b/src/tools/rbd/action/Bench.cc @@ -0,0 +1,589 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "common/errno.h" +#include "common/strtol.h" +#include "common/ceph_mutex.h" +#include "include/types.h" +#include "global/signal_handler.h" +#include <atomic> +#include <chrono> +#include <iostream> +#include <boost/accumulators/accumulators.hpp> +#include <boost/accumulators/statistics/stats.hpp> +#include <boost/accumulators/statistics/rolling_sum.hpp> +#include <boost/program_options.hpp> + +using namespace std::chrono; + +static std::atomic<bool> terminating; +static void handle_signal(int signum) +{ + ceph_assert(signum == SIGINT || signum == SIGTERM); + terminating = true; +} + +namespace rbd { +namespace action { +namespace bench { + +namespace at = argument_types; +namespace po = boost::program_options; + +namespace { + +enum io_type_t { + IO_TYPE_READ = 0, + IO_TYPE_WRITE, + IO_TYPE_RW, + + IO_TYPE_NUM, +}; + +enum io_pattern_t { + IO_PATTERN_RAND, + IO_PATTERN_SEQ, + IO_PATTERN_FULL_SEQ +}; + +struct IOType {}; +struct Size {}; +struct IOPattern {}; + +void validate(boost::any& v, const std::vector<std::string>& values, + Size *target_type, int) { + po::validators::check_first_occurrence(v); + const std::string &s = po::validators::get_single_string(values); + + std::string parse_error; + uint64_t size = strict_iecstrtoll(s, &parse_error); + if (!parse_error.empty()) { + throw po::validation_error(po::validation_error::invalid_option_value); + } + v = boost::any(size); +} + +void validate(boost::any& v, const std::vector<std::string>& values, + IOPattern *target_type, int) { + po::validators::check_first_occurrence(v); + const std::string &s = po::validators::get_single_string(values); + if (s == "rand") { + v = IO_PATTERN_RAND; + } else if (s == "seq") { + v = IO_PATTERN_SEQ; + } else if (s == "full-seq") { + v = IO_PATTERN_FULL_SEQ; + } else { + throw po::validation_error(po::validation_error::invalid_option_value); + } +} + +io_type_t get_io_type(std::string io_type_string) { + if (io_type_string == "read") + return IO_TYPE_READ; + else if (io_type_string == "write") + return IO_TYPE_WRITE; + else if (io_type_string == "readwrite" || io_type_string == "rw") + return IO_TYPE_RW; + else + return IO_TYPE_NUM; +} + +void validate(boost::any& v, const std::vector<std::string>& values, + IOType *target_type, int) { + po::validators::check_first_occurrence(v); + const std::string &s = po::validators::get_single_string(values); + io_type_t io_type = get_io_type(s); + if (io_type >= IO_TYPE_NUM) + throw po::validation_error(po::validation_error::invalid_option_value); + else + v = boost::any(io_type); +} + +} // anonymous namespace + +static void rbd_bencher_completion(void *c, void *pc); +struct rbd_bencher; + +struct bencher_completer { + rbd_bencher *bencher; + bufferlist *bl; + +public: + bencher_completer(rbd_bencher *bencher, bufferlist *bl) + : bencher(bencher), bl(bl) + { } + + ~bencher_completer() + { + if (bl) + delete bl; + } +}; + +struct rbd_bencher { + librbd::Image *image; + ceph::mutex lock = ceph::make_mutex("rbd_bencher::lock"); + ceph::condition_variable cond; + int in_flight; + io_type_t io_type; + uint64_t io_size; + bufferlist write_bl; + + explicit rbd_bencher(librbd::Image *i, io_type_t io_type, uint64_t io_size) + : image(i), + in_flight(0), + io_type(io_type), + io_size(io_size) + { + if (io_type == IO_TYPE_WRITE || io_type == IO_TYPE_RW) { + bufferptr bp(io_size); + memset(bp.c_str(), rand() & 0xff, io_size); + write_bl.push_back(bp); + } + } + + void start_io(int max, uint64_t off, uint64_t len, int op_flags, bool read_flag) + { + { + std::lock_guard l{lock}; + in_flight++; + } + + librbd::RBD::AioCompletion *c; + if (read_flag) { + bufferlist *read_bl = new bufferlist(); + c = new librbd::RBD::AioCompletion((void *)(new bencher_completer(this, read_bl)), + rbd_bencher_completion); + image->aio_read2(off, len, *read_bl, c, op_flags); + } else { + c = new librbd::RBD::AioCompletion((void *)(new bencher_completer(this, NULL)), + rbd_bencher_completion); + image->aio_write2(off, len, write_bl, c, op_flags); + } + } + + int wait_for(int max, bool interrupt_on_terminating) { + std::unique_lock l{lock}; + while (in_flight > max && !(terminating && interrupt_on_terminating)) { + cond.wait_for(l, 200ms); + } + + return terminating ? -EINTR : 0; + } + +}; + +void rbd_bencher_completion(void *vc, void *pc) +{ + librbd::RBD::AioCompletion *c = (librbd::RBD::AioCompletion *)vc; + bencher_completer *bc = static_cast<bencher_completer *>(pc); + rbd_bencher *b = bc->bencher; + //cout << "complete " << c << std::endl; + int ret = c->get_return_value(); + if (b->io_type == IO_TYPE_WRITE && ret != 0) { + std::cout << "write error: " << cpp_strerror(ret) << std::endl; + exit(ret < 0 ? -ret : ret); + } else if (b->io_type == IO_TYPE_READ && (unsigned int)ret != b->io_size) { + std::cout << "read error: " << cpp_strerror(ret) << std::endl; + exit(ret < 0 ? -ret : ret); + } + b->lock.lock(); + b->in_flight--; + b->cond.notify_all(); + b->lock.unlock(); + c->release(); + delete bc; +} + +bool should_read(uint64_t read_proportion) +{ + uint64_t rand_num = rand() % 100; + + if (rand_num < read_proportion) + return true; + else + return false; +} + +int do_bench(librbd::Image& image, io_type_t io_type, + uint64_t io_size, uint64_t io_threads, + uint64_t io_bytes, io_pattern_t io_pattern, + uint64_t read_proportion) +{ + uint64_t size = 0; + image.size(&size); + if (io_size > size) { + std::cerr << "rbd: io-size " << byte_u_t(io_size) << " " + << "larger than image size " << byte_u_t(size) << std::endl; + return -EINVAL; + } + + if (io_size > std::numeric_limits<uint32_t>::max()) { + std::cerr << "rbd: io-size should be less than 4G" << std::endl; + return -EINVAL; + } + + int r = image.flush(); + if (r < 0 && (r != -EROFS || io_type != IO_TYPE_READ)) { + std::cerr << "rbd: failed to flush: " << cpp_strerror(r) << std::endl; + return r; + } + + rbd_bencher b(&image, io_type, io_size); + + std::cout << "bench " + << " type " << (io_type == IO_TYPE_READ ? "read" : + io_type == IO_TYPE_WRITE ? "write" : "readwrite") + << (io_type == IO_TYPE_RW ? " read:write=" + + std::to_string(read_proportion) + ":" + + std::to_string(100 - read_proportion) : "") + << " io_size " << io_size + << " io_threads " << io_threads + << " bytes " << io_bytes + << " pattern "; + switch (io_pattern) { + case IO_PATTERN_RAND: + std::cout << "random"; + break; + case IO_PATTERN_SEQ: + std::cout << "sequential"; + break; + case IO_PATTERN_FULL_SEQ: + std::cout << "full sequential"; + break; + default: + ceph_assert(false); + break; + } + std::cout << std::endl; + + srand(time(NULL) % (unsigned long) -1); + + coarse_mono_time start = coarse_mono_clock::now(); + std::chrono::duration<double> last = std::chrono::duration<double>::zero(); + uint64_t ios = 0; + + std::vector<uint64_t> thread_offset; + uint64_t i; + uint64_t seq_chunk_length = (size / io_size / io_threads) * io_size;; + + // disturb all thread's offset + for (i = 0; i < io_threads; i++) { + uint64_t start_pos = 0; + switch (io_pattern) { + case IO_PATTERN_RAND: + start_pos = (rand() % (size / io_size)) * io_size; + break; + case IO_PATTERN_SEQ: + start_pos = seq_chunk_length * i; + break; + case IO_PATTERN_FULL_SEQ: + start_pos = i * io_size; + break; + default: + break; + } + thread_offset.push_back(start_pos); + } + + const int WINDOW_SIZE = 5; + typedef boost::accumulators::accumulator_set< + double, boost::accumulators::stats< + boost::accumulators::tag::rolling_sum> > RollingSum; + + RollingSum time_acc( + boost::accumulators::tag::rolling_window::window_size = WINDOW_SIZE); + RollingSum ios_acc( + boost::accumulators::tag::rolling_window::window_size = WINDOW_SIZE); + RollingSum off_acc( + boost::accumulators::tag::rolling_window::window_size = WINDOW_SIZE); + uint64_t cur_ios = 0; + uint64_t cur_off = 0; + + int op_flags; + if (io_pattern == IO_PATTERN_RAND) { + op_flags = LIBRADOS_OP_FLAG_FADVISE_RANDOM; + } else { + op_flags = LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL; + } + + printf(" SEC OPS OPS/SEC BYTES/SEC\n"); + uint64_t off; + int read_ops = 0; + int write_ops = 0; + + for (off = 0; off < io_bytes; ) { + // Issue I/O + i = 0; + int r = 0; + while (i < io_threads && off < io_bytes) { + bool read_flag = should_read(read_proportion); + + r = b.wait_for(io_threads - 1, true); + if (r < 0) { + break; + } + b.start_io(io_threads, thread_offset[i], io_size, op_flags, read_flag); + + ++i; + ++ios; + off += io_size; + + ++cur_ios; + cur_off += io_size; + + if (read_flag) + read_ops++; + else + write_ops++; + } + + if (r < 0) { + break; + } + + // Set the thread_offsets of next I/O + for (i = 0; i < io_threads; ++i) { + switch (io_pattern) { + case IO_PATTERN_RAND: + thread_offset[i] = (rand() % (size / io_size)) * io_size; + continue; + case IO_PATTERN_SEQ: + if (off < (seq_chunk_length * io_threads)) { + thread_offset[i] += io_size; + } else { + // thread_offset is adjusted to the chunks unassigned to threads. + thread_offset[i] = off + (i * io_size); + } + if (thread_offset[i] + io_size > size) { + thread_offset[i] = seq_chunk_length * i; + } + break; + case IO_PATTERN_FULL_SEQ: + thread_offset[i] += (io_size * io_threads); + if (thread_offset[i] >= size) { + thread_offset[i] = i * io_size; + } + break; + default: + break; + } + } + + coarse_mono_time now = coarse_mono_clock::now(); + std::chrono::duration<double> elapsed = now - start; + if (last == std::chrono::duration<double>::zero()) { + last = elapsed; + } else if ((int)elapsed.count() != (int)last.count()) { + time_acc((elapsed - last).count()); + ios_acc(static_cast<double>(cur_ios)); + off_acc(static_cast<double>(cur_off)); + cur_ios = 0; + cur_off = 0; + + double time_sum = boost::accumulators::rolling_sum(time_acc); + std::cout.width(5); + std::cout << (int)elapsed.count(); + std::cout.width(10); + std::cout << ios - io_threads; + std::cout.width(10); + std::cout << boost::accumulators::rolling_sum(ios_acc) / time_sum; + std::cout.width(10); + std::cout << byte_u_t(boost::accumulators::rolling_sum(off_acc) / time_sum) << "/s" + << std::endl; + last = elapsed; + } + } + b.wait_for(0, false); + + if (io_type != IO_TYPE_READ) { + r = image.flush(); + if (r < 0) { + std::cerr << "rbd: failed to flush at the end: " << cpp_strerror(r) + << std::endl; + } + } + + coarse_mono_time now = coarse_mono_clock::now(); + std::chrono::duration<double> elapsed = now - start; + + std::cout << "elapsed: " << (int)elapsed.count() << " " + << "ops: " << ios << " " + << "ops/sec: " << (double)ios / elapsed.count() << " " + << "bytes/sec: " << byte_u_t((double)off / elapsed.count()) << "/s" + << std::endl; + + if (io_type == IO_TYPE_RW) { + std::cout << "read_ops: " << read_ops << " " + << "read_ops/sec: " << (double)read_ops / elapsed.count() << " " + << "read_bytes/sec: " << byte_u_t((double)read_ops * io_size / elapsed.count()) << "/s" + << std::endl; + + std::cout << "write_ops: " << write_ops << " " + << "write_ops/sec: " << (double)write_ops / elapsed.count() << " " + << "write_bytes/sec: " << byte_u_t((double)write_ops * io_size / elapsed.count()) << "/s" + << std::endl; + + } + + return 0; +} + +void add_bench_common_options(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + + options->add_options() + ("io-size", po::value<Size>(), "IO size (in B/K/M/G) (< 4G) [default: 4K]") + ("io-threads", po::value<uint32_t>(), "ios in flight [default: 16]") + ("io-total", po::value<Size>(), "total size for IO (in B/K/M/G/T) [default: 1G]") + ("io-pattern", po::value<IOPattern>(), "IO pattern (rand, seq, or full-seq) [default: seq]") + ("rw-mix-read", po::value<uint64_t>(), "read proportion in readwrite (<= 100) [default: 50]"); +} + +void get_arguments_for_write(po::options_description *positional, + po::options_description *options) { + add_bench_common_options(positional, options); +} + +void get_arguments_for_bench(po::options_description *positional, + po::options_description *options) { + add_bench_common_options(positional, options); + + options->add_options() + ("io-type", po::value<IOType>()->required(), "IO type (read, write, or readwrite(rw))"); +} + +int bench_execute(const po::variables_map &vm, io_type_t bench_io_type) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + utils::SnapshotPresence snap_presence = utils::SNAPSHOT_PRESENCE_NONE; + if (bench_io_type == IO_TYPE_READ) + snap_presence = utils::SNAPSHOT_PRESENCE_PERMITTED; + + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, snap_presence, utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + uint64_t bench_io_size; + if (vm.count("io-size")) { + bench_io_size = vm["io-size"].as<uint64_t>(); + } else { + bench_io_size = 4096; + } + if (bench_io_size == 0) { + std::cerr << "rbd: --io-size should be greater than zero." << std::endl; + return -EINVAL; + } + + uint32_t bench_io_threads; + if (vm.count("io-threads")) { + bench_io_threads = vm["io-threads"].as<uint32_t>(); + } else { + bench_io_threads = 16; + } + if (bench_io_threads == 0) { + std::cerr << "rbd: --io-threads should be greater than zero." << std::endl; + return -EINVAL; + } + + uint64_t bench_bytes; + if (vm.count("io-total")) { + bench_bytes = vm["io-total"].as<uint64_t>(); + } else { + bench_bytes = 1 << 30; + } + + io_pattern_t bench_pattern; + if (vm.count("io-pattern")) { + bench_pattern = vm["io-pattern"].as<io_pattern_t>(); + } else { + bench_pattern = IO_PATTERN_SEQ; + } + + uint64_t bench_read_proportion; + if (bench_io_type == IO_TYPE_READ) { + bench_read_proportion = 100; + } else if (bench_io_type == IO_TYPE_WRITE) { + bench_read_proportion = 0; + } else { + if (vm.count("rw-mix-read")) { + bench_read_proportion = vm["rw-mix-read"].as<uint64_t>(); + } else { + bench_read_proportion = 50; + } + + if (bench_read_proportion > 100) { + std::cerr << "rbd: --rw-mix-read should not be larger than 100." << std::endl; + return -EINVAL; + } + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", + snap_name, false, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + init_async_signal_handler(); + register_async_signal_handler(SIGHUP, sighup_handler); + register_async_signal_handler_oneshot(SIGINT, handle_signal); + register_async_signal_handler_oneshot(SIGTERM, handle_signal); + + r = do_bench(image, bench_io_type, bench_io_size, bench_io_threads, + bench_bytes, bench_pattern, bench_read_proportion); + + unregister_async_signal_handler(SIGHUP, sighup_handler); + unregister_async_signal_handler(SIGINT, handle_signal); + unregister_async_signal_handler(SIGTERM, handle_signal); + shutdown_async_signal_handler(); + + if (r < 0) { + std::cerr << "bench failed: " << cpp_strerror(r) << std::endl; + return r; + } + return 0; +} + +int execute_for_write(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::cerr << "rbd: bench-write is deprecated, use rbd bench --io-type write ..." << std::endl; + return bench_execute(vm, IO_TYPE_WRITE); +} + +int execute_for_bench(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + io_type_t bench_io_type; + if (vm.count("io-type")) { + bench_io_type = vm["io-type"].as<io_type_t>(); + } else { + std::cerr << "rbd: --io-type must be specified." << std::endl; + return -EINVAL; + } + + return bench_execute(vm, bench_io_type); +} + +Shell::Action action_write( + {"bench-write"}, {}, "Simple write benchmark. (Deprecated, please use `rbd bench --io-type write` instead.)", + "", &get_arguments_for_write, &execute_for_write, false); + +Shell::Action action_bench( + {"bench"}, {}, "Simple benchmark.", "", &get_arguments_for_bench, &execute_for_bench); + +} // namespace bench +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Children.cc b/src/tools/rbd/action/Children.cc new file mode 100644 index 000000000..58e861b69 --- /dev/null +++ b/src/tools/rbd/action/Children.cc @@ -0,0 +1,167 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "common/errno.h" +#include "common/Formatter.h" +#include <iostream> +#include <boost/program_options.hpp> + +namespace rbd { +namespace action { +namespace children { + +namespace at = argument_types; +namespace po = boost::program_options; + +int do_list_children(librados::IoCtx &io_ctx, librbd::Image &image, + bool all_flag, bool descendants_flag, Formatter *f) +{ + std::vector<librbd::linked_image_spec_t> children; + librbd::RBD rbd; + int r; + if (descendants_flag) { + r = image.list_descendants(&children); + } else { + r = image.list_children3(&children); + } + if (r < 0) + return r; + + if (f) + f->open_array_section("children"); + + for (auto& child : children) { + bool trash = child.trash; + if (f) { + if (all_flag) { + f->open_object_section("child"); + f->dump_string("pool", child.pool_name); + f->dump_string("pool_namespace", child.pool_namespace); + f->dump_string("image", child.image_name); + f->dump_string("id", child.image_id); + f->dump_bool("trash", child.trash); + f->close_section(); + } else if (!trash) { + f->open_object_section("child"); + f->dump_string("pool", child.pool_name); + f->dump_string("pool_namespace", child.pool_namespace); + f->dump_string("image", child.image_name); + f->close_section(); + } + } else if (all_flag || !trash) { + if (child.pool_name.empty()) { + std::cout << "(child missing " << child.pool_id << "/"; + } else { + std::cout << child.pool_name << "/"; + } + if (!child.pool_namespace.empty()) { + std::cout << child.pool_namespace << "/"; + } + if (child.image_name.empty()) { + std::cout << child.image_id << ")"; + } else { + std::cout << child.image_name; + if (trash) { + std::cout << " (trash " << child.image_id << ")"; + } + } + std::cout << std::endl; + } + } + + if (f) { + f->close_section(); + f->flush(std::cout); + } + + return 0; +} + +void get_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_or_snap_spec_options(positional, options, + at::ARGUMENT_MODIFIER_NONE); + at::add_snap_id_option(options); + options->add_options() + ("all,a", po::bool_switch(), "list all children (include trash)"); + options->add_options() + ("descendants", po::bool_switch(), "include all descendants"); + at::add_format_options(options); +} + +int execute(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + uint64_t snap_id = LIBRADOS_SNAP_HEAD; + if (vm.count(at::SNAPSHOT_ID)) { + snap_id = vm[at::SNAPSHOT_ID].as<uint64_t>(); + } + + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_PERMITTED, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + if (snap_id != LIBRADOS_SNAP_HEAD && !snap_name.empty()) { + std::cerr << "rbd: trying to access snapshot using both name and id." + << std::endl; + return -EINVAL; + } + + at::Format::Formatter formatter; + r = utils::get_formatter(vm, &formatter); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "", + true, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + if (!snap_name.empty()) { + r = image.snap_set(snap_name.c_str()); + } else if (snap_id != LIBRADOS_SNAP_HEAD) { + r = image.snap_set_by_id(snap_id); + } + if (r == -ENOENT) { + std::cerr << "rbd: snapshot does not exist." << std::endl; + return r; + } else if (r < 0) { + std::cerr << "rbd: error setting snapshot: " << cpp_strerror(r) + << std::endl; + return r; + } + + r = do_list_children(io_ctx, image, vm["all"].as<bool>(), + vm["descendants"].as<bool>(), formatter.get()); + if (r < 0) { + std::cerr << "rbd: listing children failed: " << cpp_strerror(r) + << std::endl; + return r; + } + return 0; +} + +Shell::SwitchArguments switched_arguments({"all", "a", "descendants"}); +Shell::Action action( + {"children"}, {}, "Display children of an image or its snapshot.", "", + &get_arguments, &execute); + +} // namespace children +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Clone.cc b/src/tools/rbd/action/Clone.cc new file mode 100644 index 000000000..6406c957e --- /dev/null +++ b/src/tools/rbd/action/Clone.cc @@ -0,0 +1,99 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "common/errno.h" +#include <iostream> +#include <boost/program_options.hpp> + +namespace rbd { +namespace action { +namespace clone { + +namespace at = argument_types; +namespace po = boost::program_options; + +int do_clone(librbd::RBD &rbd, librados::IoCtx &p_ioctx, + const char *p_name, const char *p_snapname, + librados::IoCtx &c_ioctx, const char *c_name, + librbd::ImageOptions& opts) { + return rbd.clone3(p_ioctx, p_name, p_snapname, c_ioctx, c_name, opts); +} + +void get_arguments(po::options_description *positional, + po::options_description *options) { + at::add_snap_spec_options(positional, options, at::ARGUMENT_MODIFIER_SOURCE); + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_DEST); + at::add_create_image_options(options, false); +} + +int execute(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_SOURCE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_REQUIRED, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + std::string dst_pool_name; + std::string dst_namespace_name; + std::string dst_image_name; + std::string dst_snap_name; + r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_DEST, &arg_index, &dst_pool_name, + &dst_namespace_name, &dst_image_name, &dst_snap_name, true, + utils::SNAPSHOT_PRESENCE_NONE, utils::SPEC_VALIDATION_FULL); + if (r < 0) { + return r; + } + + librbd::ImageOptions opts; + r = utils::get_image_options(vm, false, &opts); + if (r < 0) { + return r; + } + opts.set(RBD_IMAGE_OPTION_FORMAT, static_cast<uint64_t>(2)); + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + librados::IoCtx dst_io_ctx; + r = utils::init_io_ctx(rados, dst_pool_name, dst_namespace_name, &dst_io_ctx); + if (r < 0) { + return r; + } + + librbd::RBD rbd; + r = do_clone(rbd, io_ctx, image_name.c_str(), snap_name.c_str(), dst_io_ctx, + dst_image_name.c_str(), opts); + if (r == -EXDEV) { + std::cerr << "rbd: clone v2 required for cross-namespace clones." + << std::endl; + return r; + } else if (r < 0) { + std::cerr << "rbd: clone error: " << cpp_strerror(r) << std::endl; + return r; + } + return 0; +} + +Shell::Action action( + {"clone"}, {}, "Clone a snapshot into a CoW child image.", + at::get_long_features_help(), &get_arguments, &execute); + +} // namespace clone +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Config.cc b/src/tools/rbd/action/Config.cc new file mode 100644 index 000000000..b038485ce --- /dev/null +++ b/src/tools/rbd/action/Config.cc @@ -0,0 +1,891 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "common/Formatter.h" +#include "common/TextTable.h" +#include "common/ceph_context.h" +#include "common/ceph_json.h" +#include "common/escape.h" +#include "common/errno.h" +#include "common/options.h" +#include "global/global_context.h" +#include "include/stringify.h" + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" + +#include <iostream> + +#include <boost/algorithm/string/predicate.hpp> +#include <boost/program_options.hpp> + +#include "json_spirit/json_spirit.h" + +namespace rbd { +namespace action { +namespace config { + +namespace at = argument_types; +namespace po = boost::program_options; + +namespace { + +const std::string METADATA_CONF_PREFIX = "conf_"; + +void add_config_entity_option( + boost::program_options::options_description *positional) { + positional->add_options() + ("config-entity", "config entity (global, client, client.<id>)"); +} + +void add_pool_option(boost::program_options::options_description *positional) { + positional->add_options() + ("pool-name", "pool name"); +} + +void add_key_option(po::options_description *positional) { + positional->add_options() + ("key", "config key"); +} + +int get_config_entity(const po::variables_map &vm, std::string *config_entity) { + *config_entity = utils::get_positional_argument(vm, 0); + + if (*config_entity != "global" && *config_entity != "client" && + !boost::starts_with(*config_entity, ("client."))) { + std::cerr << "rbd: invalid config entity: " << *config_entity + << " (must be global, client or client.<id>)" << std::endl; + return -EINVAL; + } + + return 0; +} + +int get_pool(const po::variables_map &vm, std::string *pool_name) { + *pool_name = utils::get_positional_argument(vm, 0); + if (pool_name->empty()) { + std::cerr << "rbd: pool name was not specified" << std::endl; + return -EINVAL; + } + + return 0; +} + +int get_key(const po::variables_map &vm, size_t *arg_index, + std::string *key) { + *key = utils::get_positional_argument(vm, *arg_index); + if (key->empty()) { + std::cerr << "rbd: config key was not specified" << std::endl; + return -EINVAL; + } else { + ++(*arg_index); + } + + if (!boost::starts_with(*key, "rbd_")) { + std::cerr << "rbd: not rbd option: " << *key << std::endl; + return -EINVAL; + } + + std::string value; + int r = g_ceph_context->_conf.get_val(key->c_str(), &value); + if (r < 0) { + std::cerr << "rbd: invalid config key: " << *key << std::endl; + return -EINVAL; + } + + return 0; +} + +std::ostream& operator<<(std::ostream& os, + const librbd::config_source_t& source) { + switch (source) { + case RBD_CONFIG_SOURCE_CONFIG: + os << "config"; + break; + case RBD_CONFIG_SOURCE_POOL: + os << "pool"; + break; + case RBD_CONFIG_SOURCE_IMAGE: + os << "image"; + break; + default: + os << "unknown (" << static_cast<uint32_t>(source) << ")"; + break; + } + return os; +} + +int config_global_list( + librados::Rados &rados, const std::string &config_entity, + std::map<std::string, std::pair<std::string, std::string>> *options) { + bool client_id_config_entity = + boost::starts_with(config_entity, ("client.")); + std::string cmd = + "{" + "\"prefix\": \"config dump\", " + "\"format\": \"json\" " + "}"; + bufferlist in_bl; + bufferlist out_bl; + std::string ss; + int r = rados.mon_command(cmd, in_bl, &out_bl, &ss); + if (r < 0) { + std::cerr << "rbd: error reading config: " << ss << std::endl; + return r; + } + + json_spirit::mValue json_root; + if (!json_spirit::read(out_bl.to_str(), json_root)) { + std::cerr << "rbd: error parsing config dump" << std::endl; + return -EINVAL; + } + + try { + auto &json_array = json_root.get_array(); + for (auto& e : json_array) { + auto &json_obj = e.get_obj(); + std::string section; + std::string name; + std::string value; + + for (auto &pairs : json_obj) { + if (pairs.first == "section") { + section = pairs.second.get_str(); + } else if (pairs.first == "name") { + name = pairs.second.get_str(); + } else if (pairs.first == "value") { + value = pairs.second.get_str(); + } + } + + if (!boost::starts_with(name, "rbd_")) { + continue; + } + if (section != "global" && section != "client" && + (!client_id_config_entity || section != config_entity)) { + continue; + } + if (config_entity == "global" && section != "global") { + continue; + } + auto it = options->find(name); + if (it == options->end()) { + (*options)[name] = {value, section}; + continue; + } + if (section == "client") { + if (it->second.second == "global") { + it->second = {value, section}; + } + } else if (client_id_config_entity) { + it->second = {value, section}; + } + } + } catch (std::runtime_error &e) { + std::cerr << "rbd: error parsing config dump: " << e.what() << std::endl; + return -EINVAL; + } + + return 0; +} + +} // anonymous namespace + +void get_global_get_arguments(po::options_description *positional, + po::options_description *options) { + add_config_entity_option(positional); + add_key_option(positional); +} + +int execute_global_get(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string config_entity; + int r = get_config_entity(vm, &config_entity); + if (r < 0) { + return r; + } + + std::string key; + size_t arg_index = 1; + r = get_key(vm, &arg_index, &key); + if (r < 0) { + return r; + } + + librados::Rados rados; + r = utils::init_rados(&rados); + if (r < 0) { + return r; + } + + std::map<std::string, std::pair<std::string, std::string>> options; + r = config_global_list(rados, config_entity, &options); + if (r < 0) { + return r; + } + + auto it = options.find(key); + + if (it == options.end() || it->second.second != config_entity) { + std::cerr << "rbd: " << key << " is not set" << std::endl; + return -ENOENT; + } + + std::cout << it->second.first << std::endl; + return 0; +} + +void get_global_set_arguments(po::options_description *positional, + po::options_description *options) { + add_config_entity_option(positional); + add_key_option(positional); + positional->add_options() + ("value", "config value"); +} + +int execute_global_set(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string config_entity; + int r = get_config_entity(vm, &config_entity); + if (r < 0) { + return r; + } + + std::string key; + size_t arg_index = 1; + r = get_key(vm, &arg_index, &key); + if (r < 0) { + return r; + } + + librados::Rados rados; + r = utils::init_rados(&rados); + if (r < 0) { + return r; + } + + std::string value = utils::get_positional_argument(vm, 2); + std::string cmd = + "{" + "\"prefix\": \"config set\", " + "\"who\": \"" + stringify(json_stream_escaper(config_entity)) + "\", " + "\"name\": \"" + key + "\", " + "\"value\": \"" + stringify(json_stream_escaper(value)) + "\"" + "}"; + bufferlist in_bl; + std::string ss; + r = rados.mon_command(cmd, in_bl, nullptr, &ss); + if (r < 0) { + std::cerr << "rbd: error setting " << key << ": " << ss << std::endl; + return r; + } + + return 0; +} + +void get_global_remove_arguments(po::options_description *positional, + po::options_description *options) { + add_config_entity_option(positional); + add_key_option(positional); +} + +int execute_global_remove( + const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string config_entity; + int r = get_config_entity(vm, &config_entity); + if (r < 0) { + return r; + } + + std::string key; + size_t arg_index = 1; + r = get_key(vm, &arg_index, &key); + if (r < 0) { + return r; + } + + librados::Rados rados; + r = utils::init_rados(&rados); + if (r < 0) { + return r; + } + + std::string cmd = + "{" + "\"prefix\": \"config rm\", " + "\"who\": \"" + stringify(json_stream_escaper(config_entity)) + "\", " + "\"name\": \"" + key + "\"" + "}"; + bufferlist in_bl; + std::string ss; + r = rados.mon_command(cmd, in_bl, nullptr, &ss); + if (r < 0) { + std::cerr << "rbd: error removing " << key << ": " << ss << std::endl; + return r; + } + + return 0; +} + +void get_global_list_arguments(po::options_description *positional, + po::options_description *options) { + add_config_entity_option(positional); + at::add_format_options(options); +} + +int execute_global_list(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string config_entity; + int r = get_config_entity(vm, &config_entity); + if (r < 0) { + return r; + } + + at::Format::Formatter f; + r = utils::get_formatter(vm, &f); + if (r < 0) { + return r; + } + + librados::Rados rados; + r = utils::init_rados(&rados); + if (r < 0) { + return r; + } + + std::map<std::string, std::pair<std::string, std::string>> options; + r = config_global_list(rados, config_entity, &options); + if (r < 0) { + return r; + } + + if (options.empty() && !f) { + return 0; + } + + TextTable tbl; + + if (f) { + f->open_array_section("config"); + } else { + tbl.define_column("Name", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("Value", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("Section", TextTable::LEFT, TextTable::LEFT); + } + + for (const auto &it : options) { + if (f) { + f->open_object_section("option"); + f->dump_string("name", it.first); + f->dump_string("value", it.second.first); + f->dump_string("section", it.second.second); + f->close_section(); + } else { + tbl << it.first << it.second.first << it.second.second + << TextTable::endrow; + } + } + + if (f) { + f->close_section(); + f->flush(std::cout); + } else { + std::cout << tbl; + } + + return 0; +} + +void get_pool_get_arguments(po::options_description *positional, + po::options_description *options) { + add_pool_option(positional); + add_key_option(positional); +} + +int execute_pool_get(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string pool_name; + int r = get_pool(vm, &pool_name); + if (r < 0) { + return r; + } + + std::string key; + size_t arg_index = 1; + r = get_key(vm, &arg_index, &key); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, "", &rados, &io_ctx); + if (r < 0) { + return r; + } + + librbd::RBD rbd; + std::string value; + + r = rbd.pool_metadata_get(io_ctx, METADATA_CONF_PREFIX + key, &value); + if (r < 0) { + if (r == -ENOENT) { + std::cerr << "rbd: " << key << " is not set" << std::endl; + } else { + std::cerr << "rbd: failed to get " << key << ": " << cpp_strerror(r) + << std::endl; + } + return r; + } + + std::cout << value << std::endl; + return 0; +} + +void get_pool_set_arguments(po::options_description *positional, + po::options_description *options) { + add_pool_option(positional); + add_key_option(positional); + positional->add_options() + ("value", "config value"); +} + +int execute_pool_set(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string pool_name; + int r = get_pool(vm, &pool_name); + if (r < 0) { + return r; + } + + std::string key; + size_t arg_index = 1; + r = get_key(vm, &arg_index, &key); + if (r < 0) { + return r; + } + + std::string value = utils::get_positional_argument(vm, 2); + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, "", &rados, &io_ctx); + if (r < 0) { + return r; + } + + librbd::RBD rbd; + r = rbd.pool_metadata_set(io_ctx, METADATA_CONF_PREFIX + key, value); + if (r < 0) { + std::cerr << "rbd: failed to set " << key << ": " << cpp_strerror(r) + << std::endl; + return r; + } + + return 0; +} + +void get_pool_remove_arguments(po::options_description *positional, + po::options_description *options) { + add_pool_option(positional); + add_key_option(positional); +} + +int execute_pool_remove(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string pool_name; + int r = get_pool(vm, &pool_name); + if (r < 0) { + return r; + } + + std::string key; + size_t arg_index = 1; + r = get_key(vm, &arg_index, &key); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, "", &rados, &io_ctx); + if (r < 0) { + return r; + } + + librbd::RBD rbd; + r = rbd.pool_metadata_remove(io_ctx, METADATA_CONF_PREFIX + key); + if (r < 0) { + std::cerr << "rbd: failed to remove " << key << ": " << cpp_strerror(r) + << std::endl; + return r; + } + + return 0; +} + +void get_pool_list_arguments(po::options_description *positional, + po::options_description *options) { + add_pool_option(positional); + at::add_format_options(options); +} + +int execute_pool_list(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string pool_name; + int r = get_pool(vm, &pool_name); + if (r < 0) { + return r; + } + + at::Format::Formatter f; + r = utils::get_formatter(vm, &f); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, "", &rados, &io_ctx); + if (r < 0) { + return r; + } + + TextTable tbl; + librbd::RBD rbd; + std::vector<librbd::config_option_t> options; + + r = rbd.config_list(io_ctx, &options); + if (r < 0) { + std::cerr << "rbd: failed to list config: " << cpp_strerror(r) << std::endl; + return r; + } + + if (f) { + f->open_array_section("config"); + } else { + tbl.define_column("Name", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("Value", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("Source", TextTable::LEFT, TextTable::LEFT); + } + + for (auto &option : options) { + if (f) { + f->open_object_section("option"); + f->dump_string("name", option.name); + f->dump_string("value", option.value); + f->dump_stream("source") << option.source; + f->close_section(); + } else { + std::ostringstream source; + source << option.source; + tbl << option.name << option.value << source.str() << TextTable::endrow; + } + } + + if (f) { + f->close_section(); + f->flush(std::cout); + } else { + std::cout << tbl; + } + + return 0; +} + +void get_image_get_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + add_key_option(positional); +} + +int execute_image_get(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + std::string key; + r = get_key(vm, &arg_index, &key); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "", + false, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + std::string value; + + r = image.metadata_get(METADATA_CONF_PREFIX + key, &value); + if (r < 0) { + if (r == -ENOENT) { + std::cerr << "rbd: " << key << " is not set" << std::endl; + } else { + std::cerr << "rbd: failed to get " << key << ": " << cpp_strerror(r) + << std::endl; + } + return r; + } + + std::cout << value << std::endl; + return 0; +} + +void get_image_set_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + add_key_option(positional); + positional->add_options() + ("value", "config value"); +} + +int execute_image_set(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + std::string key; + r = get_key(vm, &arg_index, &key); + if (r < 0) { + return r; + } + + std::string value = utils::get_positional_argument(vm, arg_index); + if (value.empty()) { + std::cerr << "rbd: image config value was not specified" << std::endl; + return -EINVAL; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "", + false, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + r = image.metadata_set(METADATA_CONF_PREFIX + key, value); + if (r < 0) { + std::cerr << "rbd: failed to set " << key << ": " << cpp_strerror(r) + << std::endl; + return r; + } + + return 0; +} + +void get_image_remove_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + add_key_option(positional); +} + +int execute_image_remove( + const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + std::string key; + r = get_key(vm, &arg_index, &key); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "", + false, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + r = image.metadata_remove(METADATA_CONF_PREFIX + key); + if (r < 0) { + std::cerr << "rbd: failed to remove " << key << ": " << cpp_strerror(r) + << std::endl; + return r; + } + + return 0; +} + +void get_image_list_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + at::add_format_options(options); +} + +int execute_image_list(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + at::Format::Formatter f; + r = utils::get_formatter(vm, &f); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "", + false, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + TextTable tbl; + std::vector<librbd::config_option_t> options; + + r = image.config_list(&options); + if (r < 0) { + std::cerr << "rbd: failed to list config: " << cpp_strerror(r) << std::endl; + return r; + } + + if (options.empty()) { + if (f == nullptr) { + std::cout << "There are no values" << std::endl; + } + return 0; + } + + if (f) { + f->open_array_section("config"); + } else { + tbl.define_column("Name", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("Value", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("Source", TextTable::LEFT, TextTable::LEFT); + } + + for (auto &option : options) { + if (f) { + f->open_object_section("option"); + f->dump_string("name", option.name); + f->dump_string("value", option.value); + f->dump_stream("source") << option.source; + f->close_section(); + } else { + std::ostringstream source; + source << option.source; + tbl << option.name << option.value << source.str() << TextTable::endrow; + } + } + + if (f == nullptr) { + bool single = (options.size() == 1); + std::cout << "There " << (single ? "is" : "are") << " " << options.size() + << " " << (single ? "value" : "values") << ":" << std::endl; + } + + if (f) { + f->close_section(); + f->flush(std::cout); + } else { + std::cout << tbl; + } + + return 0; +} + +Shell::Action action_global_get( + {"config", "global", "get"}, {}, + "Get a global-level configuration override.", "", + &get_global_get_arguments, &execute_global_get); +Shell::Action action_global_set( + {"config", "global", "set"}, {}, + "Set a global-level configuration override.", "", + &get_global_set_arguments, &execute_global_set); +Shell::Action action_global_remove( + {"config", "global", "remove"}, {"config", "global", "rm"}, + "Remove a global-level configuration override.", "", + &get_global_remove_arguments, &execute_global_remove); +Shell::Action action_global_list( + {"config", "global", "list"}, {"config", "global", "ls"}, + "List global-level configuration overrides.", "", + &get_global_list_arguments, &execute_global_list); + +Shell::Action action_pool_get( + {"config", "pool", "get"}, {}, "Get a pool-level configuration override.", "", + &get_pool_get_arguments, &execute_pool_get); +Shell::Action action_pool_set( + {"config", "pool", "set"}, {}, "Set a pool-level configuration override.", "", + &get_pool_set_arguments, &execute_pool_set); +Shell::Action action_pool_remove( + {"config", "pool", "remove"}, {"config", "pool", "rm"}, + "Remove a pool-level configuration override.", "", + &get_pool_remove_arguments, &execute_pool_remove); +Shell::Action action_pool_list( + {"config", "pool", "list"}, {"config", "pool", "ls"}, + "List pool-level configuration overrides.", "", + &get_pool_list_arguments, &execute_pool_list); + +Shell::Action action_image_get( + {"config", "image", "get"}, {}, "Get an image-level configuration override.", + "", &get_image_get_arguments, &execute_image_get); +Shell::Action action_image_set( + {"config", "image", "set"}, {}, "Set an image-level configuration override.", + "", &get_image_set_arguments, &execute_image_set); +Shell::Action action_image_remove( + {"config", "image", "remove"}, {"config", "image", "rm"}, + "Remove an image-level configuration override.", "", + &get_image_remove_arguments, &execute_image_remove); +Shell::Action action_image_list( + {"config", "image", "list"}, {"config", "image", "ls"}, + "List image-level configuration overrides.", "", + &get_image_list_arguments, &execute_image_list); + +} // namespace config +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Copy.cc b/src/tools/rbd/action/Copy.cc new file mode 100644 index 000000000..9e42c0652 --- /dev/null +++ b/src/tools/rbd/action/Copy.cc @@ -0,0 +1,195 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "common/errno.h" +#include <iostream> +#include <boost/program_options.hpp> + +namespace rbd { +namespace action { +namespace copy { + +namespace at = argument_types; +namespace po = boost::program_options; + +static int do_copy(librbd::Image &src, librados::IoCtx& dest_pp, + const char *destname, librbd::ImageOptions& opts, + bool no_progress, + size_t sparse_size) +{ + utils::ProgressContext pc("Image copy", no_progress); + int r = src.copy_with_progress4(dest_pp, destname, opts, pc, sparse_size); + if (r < 0){ + pc.fail(); + return r; + } + pc.finish(); + return 0; +} + +void get_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_or_snap_spec_options(positional, options, + at::ARGUMENT_MODIFIER_SOURCE); + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_DEST); + at::add_create_image_options(options, false); + at::add_sparse_size_option(options); + at::add_no_progress_option(options); +} + +int execute(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_SOURCE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_PERMITTED, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + std::string dst_pool_name; + std::string dst_namespace_name; + std::string dst_image_name; + std::string dst_snap_name; + r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_DEST, &arg_index, &dst_pool_name, + &dst_namespace_name, &dst_image_name, &dst_snap_name, true, + utils::SNAPSHOT_PRESENCE_NONE, utils::SPEC_VALIDATION_FULL); + if (r < 0) { + return r; + } + + librbd::ImageOptions opts; + r = utils::get_image_options(vm, false, &opts); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", + snap_name, true, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + librados::IoCtx dst_io_ctx; + r = utils::init_io_ctx(rados, dst_pool_name, dst_namespace_name, &dst_io_ctx); + if (r < 0) { + return r; + } + + size_t sparse_size = utils::RBD_DEFAULT_SPARSE_SIZE; + if (vm.count(at::IMAGE_SPARSE_SIZE)) { + sparse_size = vm[at::IMAGE_SPARSE_SIZE].as<size_t>(); + } + r = do_copy(image, dst_io_ctx, dst_image_name.c_str(), opts, + vm[at::NO_PROGRESS].as<bool>(), sparse_size); + if (r < 0) { + std::cerr << "rbd: copy failed: " << cpp_strerror(r) << std::endl; + return r; + } + return 0; +} + +Shell::Action action( + {"copy"}, {"cp"}, "Copy src image to dest.", at::get_long_features_help(), + &get_arguments, &execute); + +static int do_deep_copy(librbd::Image &src, librados::IoCtx& dest_pp, + const char *destname, librbd::ImageOptions& opts, + bool no_progress) +{ + utils::ProgressContext pc("Image deep copy", no_progress); + int r = src.deep_copy_with_progress(dest_pp, destname, opts, pc); + if (r < 0){ + pc.fail(); + return r; + } + pc.finish(); + return 0; +} + +void get_arguments_deep(po::options_description *positional, + po::options_description *options) { + at::add_image_or_snap_spec_options(positional, options, + at::ARGUMENT_MODIFIER_SOURCE); + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_DEST); + at::add_create_image_options(options, false); + at::add_flatten_option(options); + at::add_no_progress_option(options); +} + +int execute_deep(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_SOURCE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_PERMITTED, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + std::string dst_pool_name; + std::string dst_namespace_name; + std::string dst_image_name; + std::string dst_snap_name; + r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_DEST, &arg_index, &dst_pool_name, + &dst_namespace_name, &dst_image_name, &dst_snap_name, true, + utils::SNAPSHOT_PRESENCE_NONE, utils::SPEC_VALIDATION_FULL); + if (r < 0) { + return r; + } + + librbd::ImageOptions opts; + r = utils::get_image_options(vm, false, &opts); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", + snap_name, true, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + librados::IoCtx dst_io_ctx; + r = utils::init_io_ctx(rados, dst_pool_name, dst_namespace_name, &dst_io_ctx); + if (r < 0) { + return r; + } + + r = do_deep_copy(image, dst_io_ctx, dst_image_name.c_str(), opts, + vm[at::NO_PROGRESS].as<bool>()); + if (r < 0) { + std::cerr << "rbd: deep copy failed: " << cpp_strerror(r) << std::endl; + return r; + } + return 0; +} + +Shell::Action action_deep( + {"deep", "copy"}, {"deep", "cp"}, "Deep copy (including snapshots) src image to dest.", + at::get_long_features_help(), &get_arguments_deep, &execute_deep); + +} // namespace copy +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Create.cc b/src/tools/rbd/action/Create.cc new file mode 100644 index 000000000..047a8cb77 --- /dev/null +++ b/src/tools/rbd/action/Create.cc @@ -0,0 +1,257 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "common/ceph_mutex.h" +#include "common/config_proxy.h" +#include "common/errno.h" +#include "global/global_context.h" +#include <iostream> +#include <boost/program_options.hpp> + +namespace rbd { +namespace action { +namespace create { + +namespace at = argument_types; +namespace po = boost::program_options; + +static int do_create(librbd::RBD &rbd, librados::IoCtx& io_ctx, + const char *imgname, uint64_t size, + librbd::ImageOptions& opts) { + return rbd.create4(io_ctx, imgname, size, opts); +} + +void get_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + at::add_create_image_options(options, true); + options->add_options() + (at::IMAGE_THICK_PROVISION.c_str(), po::bool_switch(), "fully allocate storage and zero image"); + at::add_size_option(options); + at::add_no_progress_option(options); +} + +void thick_provision_writer_completion(rbd_completion_t, void *); + +struct thick_provision_writer { + librbd::Image *image; + ceph::mutex lock = ceph::make_mutex("thick_provision_writer::lock"); + ceph::condition_variable cond; + uint64_t chunk_size; + uint64_t concurr; + struct { + uint64_t in_flight; + int io_error; + } io_status; + + // Constructor + explicit thick_provision_writer(librbd::Image *i, librbd::ImageOptions &o) + : image(i) + { + // If error cases occur, the code is aborted, because + // constructor cannot return error value. + ceph_assert(g_ceph_context != nullptr); + + librbd::image_info_t info; + int r = image->stat(info, sizeof(info)); + ceph_assert(r >= 0); + + uint64_t order = info.order; + if (order == 0) { + order = g_conf().get_val<uint64_t>("rbd_default_order"); + } + + auto stripe_count = std::max<uint64_t>(1U, image->get_stripe_count()); + chunk_size = (1ull << order) * stripe_count; + + concurr = std::max<uint64_t>( + 1U, g_conf().get_val<uint64_t>("rbd_concurrent_management_ops") / + stripe_count); + + io_status.in_flight = 0; + io_status.io_error = 0; + } + + int start_io(uint64_t write_offset) + { + { + std::lock_guard l{lock}; + io_status.in_flight++; + if (io_status.in_flight > concurr) { + io_status.in_flight--; + return -EINVAL; + } + } + + librbd::RBD::AioCompletion *c; + c = new librbd::RBD::AioCompletion(this, thick_provision_writer_completion); + int r; + r = image->aio_write_zeroes(write_offset, chunk_size, c, + RBD_WRITE_ZEROES_FLAG_THICK_PROVISION, + LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL); + if (r < 0) { + std::lock_guard l{lock}; + io_status.io_error = r; + } + return r; + } + + int wait_for(uint64_t max) { + using namespace std::chrono_literals; + std::unique_lock l{lock}; + int r = io_status.io_error; + + while (io_status.in_flight > max) { + cond.wait_for(l, 200ms); + } + return r; + } +}; + +void thick_provision_writer_completion(rbd_completion_t rc, void *pc) { + librbd::RBD::AioCompletion *ac = (librbd::RBD::AioCompletion *)rc; + thick_provision_writer *tc = static_cast<thick_provision_writer *>(pc); + + int r = ac->get_return_value(); + tc->lock.lock(); + if (r < 0 && tc->io_status.io_error >= 0) { + tc->io_status.io_error = r; + } + tc->io_status.in_flight--; + tc->cond.notify_all(); + tc->lock.unlock(); + ac->release(); +} + +int write_data(librbd::Image &image, librbd::ImageOptions &opts, + bool no_progress) { + uint64_t image_size; + int r = 0; + utils::ProgressContext pc("Thick provisioning", no_progress); + + if (image.size(&image_size) != 0) { + return -EINVAL; + } + + thick_provision_writer tpw(&image, opts); + uint64_t off; + uint64_t i; + for (off = 0; off < image_size;) { + i = 0; + while (i < tpw.concurr && off < image_size) { + tpw.wait_for(tpw.concurr - 1); + r = tpw.start_io(off); + if (r != 0) { + goto err_writesame; + } + ++i; + off += tpw.chunk_size; + if(off > image_size) { + off = image_size; + } + pc.update_progress(off, image_size); + } + } + + tpw.wait_for(0); + r = image.flush(); + if (r < 0) { + std::cerr << "rbd: failed to flush at the end: " << cpp_strerror(r) + << std::endl; + goto err_writesame; + } + pc.finish(); + + return r; + +err_writesame: + tpw.wait_for(0); + pc.fail(); + + return r; +} + +int thick_write(const std::string &image_name,librados::IoCtx &io_ctx, + librbd::ImageOptions &opts, bool no_progress) { + int r; + librbd::Image image; + + r = utils::open_image(io_ctx, image_name, false, &image); + if (r < 0) { + return r; + } + + r = write_data(image, opts, no_progress); + + image.close(); + + return r; +} + +int execute(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_FULL); + if (r < 0) { + return r; + } + + librbd::ImageOptions opts; + r = utils::get_image_options(vm, true, &opts); + if (r < 0) { + return r; + } + + uint64_t size; + r = utils::get_image_size(vm, &size); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + librbd::RBD rbd; + r = do_create(rbd, io_ctx, image_name.c_str(), size, opts); + if (!namespace_name.empty() && r == -ENOENT) { + std::cerr << "rbd: namespace not found - it must be created with " + << "'rbd namespace create' before creating an image." + << std::endl; + return r; + } else if (r < 0) { + std::cerr << "rbd: create error: " << cpp_strerror(r) << std::endl; + return r; + } + + if (vm.count(at::IMAGE_THICK_PROVISION) && vm[at::IMAGE_THICK_PROVISION].as<bool>()) { + r = thick_write(image_name, io_ctx, opts, vm[at::NO_PROGRESS].as<bool>()); + if (r < 0) { + std::cerr << "rbd: image created but error encountered during thick provisioning: " + << cpp_strerror(r) << std::endl; + return r; + } + } + return 0; +} + +Shell::Action action( + {"create"}, {}, "Create an empty image.", at::get_long_features_help(), + &get_arguments, &execute); + +} // namespace create +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Device.cc b/src/tools/rbd/action/Device.cc new file mode 100644 index 000000000..878081438 --- /dev/null +++ b/src/tools/rbd/action/Device.cc @@ -0,0 +1,285 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "acconfig.h" +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" + +#include <boost/program_options.hpp> + +#include "include/ceph_assert.h" + +namespace rbd { +namespace action { + +namespace at = argument_types; +namespace po = boost::program_options; + +#define DECLARE_DEVICE_OPERATIONS(ns) \ + namespace ns { \ + int execute_list(const po::variables_map &vm, \ + const std::vector<std::string> &ceph_global_args); \ + int execute_map(const po::variables_map &vm, \ + const std::vector<std::string> &ceph_global_args); \ + int execute_unmap(const po::variables_map &vm, \ + const std::vector<std::string> &ceph_global_args); \ + int execute_attach(const po::variables_map &vm, \ + const std::vector<std::string> &ceph_global_args); \ + int execute_detach(const po::variables_map &vm, \ + const std::vector<std::string> &ceph_global_args); \ + } + +DECLARE_DEVICE_OPERATIONS(ggate); +DECLARE_DEVICE_OPERATIONS(kernel); +DECLARE_DEVICE_OPERATIONS(nbd); +DECLARE_DEVICE_OPERATIONS(wnbd); + +namespace device { + +namespace { + +struct DeviceOperations { + int (*execute_list)(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_args); + int (*execute_map)(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_args); + int (*execute_unmap)(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_args); + int (*execute_attach)(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_args); + int (*execute_detach)(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_args); +}; + +const DeviceOperations ggate_operations = { + ggate::execute_list, + ggate::execute_map, + ggate::execute_unmap, + ggate::execute_attach, + ggate::execute_detach, +}; + +const DeviceOperations krbd_operations = { + kernel::execute_list, + kernel::execute_map, + kernel::execute_unmap, + kernel::execute_attach, + kernel::execute_detach, +}; + +const DeviceOperations nbd_operations = { + nbd::execute_list, + nbd::execute_map, + nbd::execute_unmap, + nbd::execute_attach, + nbd::execute_detach, +}; + +const DeviceOperations wnbd_operations = { + wnbd::execute_list, + wnbd::execute_map, + wnbd::execute_unmap, + wnbd::execute_attach, + wnbd::execute_detach, +}; + +enum device_type_t { + DEVICE_TYPE_GGATE, + DEVICE_TYPE_KRBD, + DEVICE_TYPE_NBD, + DEVICE_TYPE_WNBD, +}; + +struct DeviceType {}; + +void validate(boost::any& v, const std::vector<std::string>& values, + DeviceType *target_type, int) { + po::validators::check_first_occurrence(v); + const std::string &s = po::validators::get_single_string(values); + + #ifdef _WIN32 + if (s == "wnbd") { + v = boost::any(DEVICE_TYPE_WNBD); + #else + if (s == "nbd") { + v = boost::any(DEVICE_TYPE_NBD); + } else if (s == "ggate") { + v = boost::any(DEVICE_TYPE_GGATE); + } else if (s == "krbd") { + v = boost::any(DEVICE_TYPE_KRBD); + #endif /* _WIN32 */ + } else { + throw po::validation_error(po::validation_error::invalid_option_value); + } +} + +void add_device_type_option(po::options_description *options) { + options->add_options() + ("device-type,t", po::value<DeviceType>(), +#ifdef _WIN32 + "device type [wnbd]"); +#else + "device type [ggate, krbd (default), nbd]"); +#endif +} + +void add_device_specific_options(po::options_description *options) { + options->add_options() + ("options,o", po::value<std::vector<std::string>>(), + "device specific options"); +} + +device_type_t get_device_type(const po::variables_map &vm) { + if (vm.count("device-type")) { + return vm["device-type"].as<device_type_t>(); + } + #ifndef _WIN32 + return DEVICE_TYPE_KRBD; + #else + return DEVICE_TYPE_WNBD; + #endif +} + +const DeviceOperations *get_device_operations(const po::variables_map &vm) { + switch (get_device_type(vm)) { + case DEVICE_TYPE_GGATE: + return &ggate_operations; + case DEVICE_TYPE_KRBD: + return &krbd_operations; + case DEVICE_TYPE_NBD: + return &nbd_operations; + case DEVICE_TYPE_WNBD: + return &wnbd_operations; + default: + ceph_abort(); + return nullptr; + } +} + +} // anonymous namespace + +void get_list_arguments(po::options_description *positional, + po::options_description *options) { + add_device_type_option(options); + at::add_format_options(options); +} + +int execute_list(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + return (*get_device_operations(vm)->execute_list)(vm, ceph_global_init_args); +} + +void get_map_arguments(po::options_description *positional, + po::options_description *options) { + add_device_type_option(options); + at::add_image_or_snap_spec_options(positional, options, + at::ARGUMENT_MODIFIER_NONE); + options->add_options() + ("show-cookie", po::bool_switch(), "show device cookie") + ("cookie", po::value<std::string>(), "specify device cookie") + ("read-only", po::bool_switch(), "map read-only") + ("exclusive", po::bool_switch(), "disable automatic exclusive lock transitions") + ("quiesce", po::bool_switch(), "use quiesce hooks") + ("quiesce-hook", po::value<std::string>(), "quiesce hook path"); + at::add_snap_id_option(options); + add_device_specific_options(options); +} + +int execute_map(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + return (*get_device_operations(vm)->execute_map)(vm, ceph_global_init_args); +} + +void get_unmap_arguments(po::options_description *positional, + po::options_description *options) { + add_device_type_option(options); + positional->add_options() + ("image-or-snap-or-device-spec", + "image, snapshot, or device specification\n" + "[<pool-name>/[<namespace>/]]<image-name>[@<snap-name>] or <device-path>"); + at::add_pool_option(options, at::ARGUMENT_MODIFIER_NONE); + at::add_namespace_option(options, at::ARGUMENT_MODIFIER_NONE); + at::add_image_option(options, at::ARGUMENT_MODIFIER_NONE); + at::add_snap_option(options, at::ARGUMENT_MODIFIER_NONE); + at::add_snap_id_option(options); + add_device_specific_options(options); +} + +int execute_unmap(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + return (*get_device_operations(vm)->execute_unmap)(vm, ceph_global_init_args); +} + +void get_attach_arguments(po::options_description *positional, + po::options_description *options) { + add_device_type_option(options); + at::add_image_or_snap_spec_options(positional, options, + at::ARGUMENT_MODIFIER_NONE); + options->add_options() + ("device", po::value<std::string>()->required(), "specify device path") + ("show-cookie", po::bool_switch(), "show device cookie") + ("cookie", po::value<std::string>(), "specify device cookie") + ("read-only", po::bool_switch(), "attach read-only") + ("force", po::bool_switch(), "force attach") + ("exclusive", po::bool_switch(), "disable automatic exclusive lock transitions") + ("quiesce", po::bool_switch(), "use quiesce hooks") + ("quiesce-hook", po::value<std::string>(), "quiesce hook path"); + at::add_snap_id_option(options); + add_device_specific_options(options); +} + +int execute_attach(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + return (*get_device_operations(vm)->execute_attach)(vm, ceph_global_init_args); +} + +void get_detach_arguments(po::options_description *positional, + po::options_description *options) { + add_device_type_option(options); + positional->add_options() + ("image-or-snap-or-device-spec", + "image, snapshot, or device specification\n" + "[<pool-name>/[<namespace>/]]<image-name>[@<snap-name>] or <device-path>"); + at::add_pool_option(options, at::ARGUMENT_MODIFIER_NONE); + at::add_namespace_option(options, at::ARGUMENT_MODIFIER_NONE); + at::add_image_option(options, at::ARGUMENT_MODIFIER_NONE); + at::add_snap_option(options, at::ARGUMENT_MODIFIER_NONE); + at::add_snap_id_option(options); + add_device_specific_options(options); +} + +int execute_detach(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + return (*get_device_operations(vm)->execute_detach)(vm, ceph_global_init_args); +} + +Shell::SwitchArguments switched_arguments({"exclusive", "force", "quiesce", + "read-only", "show-cookie"}); + +Shell::Action action_list( + {"device", "list"}, {"showmapped"}, "List mapped rbd images.", "", + &get_list_arguments, &execute_list); +// yet another alias for list command +Shell::Action action_ls( + {"device", "ls"}, {}, "List mapped rbd images.", "", + &get_list_arguments, &execute_list, false); + +Shell::Action action_map( + {"device", "map"}, {"map"}, "Map an image to a block device.", "", + &get_map_arguments, &execute_map); + +Shell::Action action_unmap( + {"device", "unmap"}, {"unmap"}, "Unmap a rbd device.", "", + &get_unmap_arguments, &execute_unmap); + +Shell::Action action_attach( + {"device", "attach"}, {}, "Attach image to device.", "", + &get_attach_arguments, &execute_attach); + +Shell::Action action_detach( + {"device", "detach"}, {}, "Detach image from device.", "", + &get_detach_arguments, &execute_detach); + +} // namespace device +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Diff.cc b/src/tools/rbd/action/Diff.cc new file mode 100644 index 000000000..838ef6cc5 --- /dev/null +++ b/src/tools/rbd/action/Diff.cc @@ -0,0 +1,142 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "common/errno.h" +#include "common/Formatter.h" +#include "common/TextTable.h" +#include <iostream> +#include <boost/program_options.hpp> + +namespace rbd { +namespace action { +namespace diff { + +namespace at = argument_types; +namespace po = boost::program_options; + +struct output_method { + output_method() : f(NULL), t(NULL), empty(true) {} + Formatter *f; + TextTable *t; + bool empty; +}; + +static int diff_cb(uint64_t ofs, size_t len, int exists, void *arg) +{ + output_method *om = static_cast<output_method *>(arg); + om->empty = false; + if (om->f) { + om->f->open_object_section("extent"); + om->f->dump_unsigned("offset", ofs); + om->f->dump_unsigned("length", len); + om->f->dump_string("exists", exists ? "true" : "false"); + om->f->close_section(); + } else { + ceph_assert(om->t); + *(om->t) << ofs << len << (exists ? "data" : "zero") << TextTable::endrow; + } + return 0; +} + +static int do_diff(librbd::Image& image, const char *fromsnapname, + bool whole_object, Formatter *f) +{ + int r; + librbd::image_info_t info; + + r = image.stat(info, sizeof(info)); + if (r < 0) + return r; + + output_method om; + if (f) { + om.f = f; + f->open_array_section("extents"); + } else { + om.t = new TextTable(); + om.t->define_column("Offset", TextTable::LEFT, TextTable::LEFT); + om.t->define_column("Length", TextTable::LEFT, TextTable::LEFT); + om.t->define_column("Type", TextTable::LEFT, TextTable::LEFT); + } + + r = image.diff_iterate2(fromsnapname, 0, info.size, true, whole_object, + diff_cb, &om); + if (f) { + f->close_section(); + f->flush(std::cout); + } else { + if (!om.empty) + std::cout << *om.t; + delete om.t; + } + return r; +} + +void get_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_or_snap_spec_options(positional, options, + at::ARGUMENT_MODIFIER_NONE); + options->add_options() + (at::FROM_SNAPSHOT_NAME.c_str(), po::value<std::string>(), + "snapshot starting point") + (at::WHOLE_OBJECT.c_str(), po::bool_switch(), "compare whole object"); + at::add_format_options(options); +} + +int execute(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_PERMITTED, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + std::string from_snap_name; + if (vm.count(at::FROM_SNAPSHOT_NAME)) { + from_snap_name = vm[at::FROM_SNAPSHOT_NAME].as<std::string>(); + } + + bool diff_whole_object = vm[at::WHOLE_OBJECT].as<bool>(); + + at::Format::Formatter formatter; + r = utils::get_formatter(vm, &formatter); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", + snap_name, true, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + r = do_diff(image, from_snap_name.empty() ? nullptr : from_snap_name.c_str(), + diff_whole_object, formatter.get()); + if (r < 0) { + std::cerr << "rbd: diff error: " << cpp_strerror(r) << std::endl; + return -r; + } + return 0; +} + +Shell::Action action( + {"diff"}, {}, + "Print extents that differ since a previous snap, or image creation.", "", + &get_arguments, &execute); + +} // namespace diff +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/DiskUsage.cc b/src/tools/rbd/action/DiskUsage.cc new file mode 100644 index 000000000..12fb8cfde --- /dev/null +++ b/src/tools/rbd/action/DiskUsage.cc @@ -0,0 +1,377 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "include/types.h" +#include "include/stringify.h" +#include "common/errno.h" +#include "common/Formatter.h" +#include "common/TextTable.h" +#include <algorithm> +#include <iostream> +#include <boost/bind/bind.hpp> +#include <boost/program_options.hpp> + +namespace rbd { +namespace action { +namespace disk_usage { + +namespace at = argument_types; +namespace po = boost::program_options; +using namespace boost::placeholders; + +static int disk_usage_callback(uint64_t offset, size_t len, int exists, + void *arg) { + uint64_t *used_size = reinterpret_cast<uint64_t *>(arg); + if (exists) { + (*used_size) += len; + } + return 0; +} + +static int get_image_disk_usage(const std::string& name, + const std::string& snap_name, + const std::string& from_snap_name, + librbd::Image &image, + bool exact, + uint64_t size, + uint64_t *used_size){ + + const char* from = NULL; + if (!from_snap_name.empty()) { + from = from_snap_name.c_str(); + } + + uint64_t flags; + int r = image.get_flags(&flags); + if (r < 0) { + std::cerr << "rbd: failed to retrieve image flags: " << cpp_strerror(r) + << std::endl; + return r; + } + if ((flags & RBD_FLAG_FAST_DIFF_INVALID) != 0) { + std::cerr << "warning: fast-diff map is invalid for " << name + << (snap_name.empty() ? "" : "@" + snap_name) << ". " + << "operation may be slow." << std::endl; + } + + *used_size = 0; + r = image.diff_iterate2(from, 0, size, false, !exact, + &disk_usage_callback, used_size); + if (r < 0) { + std::cerr << "rbd: failed to iterate diffs: " << cpp_strerror(r) + << std::endl; + return r; + } + + return 0; +} + +void format_image_disk_usage(const std::string& name, + const std::string& id, + const std::string& snap_name, + uint64_t snap_id, + uint64_t size, + uint64_t used_size, + TextTable& tbl, Formatter *f) { + if (f) { + f->open_object_section("image"); + f->dump_string("name", name); + f->dump_string("id", id); + if (!snap_name.empty()) { + f->dump_string("snapshot", snap_name); + f->dump_unsigned("snapshot_id", snap_id); + } + f->dump_unsigned("provisioned_size", size); + f->dump_unsigned("used_size" , used_size); + f->close_section(); + } else { + std::string full_name = name; + if (!snap_name.empty()) { + full_name += "@" + snap_name; + } + tbl << full_name + << stringify(byte_u_t(size)) + << stringify(byte_u_t(used_size)) + << TextTable::endrow; + } +} + +static int do_disk_usage(librbd::RBD &rbd, librados::IoCtx &io_ctx, + const char *imgname, const char *snapname, + const char *from_snapname, bool exact, Formatter *f, + bool merge_snap) { + std::vector<librbd::image_spec_t> images; + int r = rbd.list2(io_ctx, &images); + if (r == -ENOENT) { + r = 0; + } else if (r < 0) { + return r; + } + + TextTable tbl; + if (f) { + f->open_object_section("stats"); + f->open_array_section("images"); + } else { + tbl.define_column("NAME", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("PROVISIONED", TextTable::LEFT, TextTable::RIGHT); + tbl.define_column("USED", TextTable::LEFT, TextTable::RIGHT); + } + + uint32_t count = 0; + uint64_t used_size = 0; + uint64_t total_prov = 0; + uint64_t total_used = 0; + uint64_t snap_id = CEPH_NOSNAP; + uint64_t from_id = CEPH_NOSNAP; + bool found = false; + for (auto& image_spec : images) { + if (imgname != NULL && image_spec.name != imgname) { + continue; + } + found = true; + + librbd::Image image; + r = rbd.open_read_only(io_ctx, image, image_spec.name.c_str(), NULL); + if (r < 0) { + if (r != -ENOENT) { + std::cerr << "rbd: error opening " << image_spec.name << ": " + << cpp_strerror(r) << std::endl; + } + continue; + } + + uint64_t features; + r = image.features(&features); + if (r < 0) { + std::cerr << "rbd: failed to retrieve image features: " << cpp_strerror(r) + << std::endl; + goto out; + } + if ((features & RBD_FEATURE_FAST_DIFF) == 0) { + std::cerr << "warning: fast-diff map is not enabled for " + << image_spec.name << ". " << "operation may be slow." + << std::endl; + } + + librbd::image_info_t info; + if (image.stat(info, sizeof(info)) < 0) { + r = -EINVAL; + goto out; + } + + std::vector<librbd::snap_info_t> snap_list; + r = image.snap_list(snap_list); + if (r < 0) { + std::cerr << "rbd: error opening " << image_spec.name << " snapshots: " + << cpp_strerror(r) << std::endl; + continue; + } + + snap_list.erase(remove_if(snap_list.begin(), + snap_list.end(), + boost::bind(utils::is_not_user_snap_namespace, &image, _1)), + snap_list.end()); + + bool found_from_snap = (from_snapname == nullptr); + bool found_snap = (snapname == nullptr); + bool found_from = (from_snapname == nullptr); + std::string last_snap_name; + std::sort(snap_list.begin(), snap_list.end(), + boost::bind(&librbd::snap_info_t::id, _1) < + boost::bind(&librbd::snap_info_t::id, _2)); + if (!found_snap || !found_from) { + for (auto &snap_info : snap_list) { + if (!found_snap && snap_info.name == snapname) { + snap_id = snap_info.id; + found_snap = true; + } + if (!found_from && snap_info.name == from_snapname) { + from_id = snap_info.id; + found_from = true; + } + if (found_snap && found_from) { + break; + } + } + } + if ((snapname != nullptr && snap_id == CEPH_NOSNAP) || + (from_snapname != nullptr && from_id == CEPH_NOSNAP)) { + std::cerr << "specified snapshot is not found." << std::endl; + return -ENOENT; + } + if (snap_id != CEPH_NOSNAP && from_id != CEPH_NOSNAP) { + if (from_id == snap_id) { + // no diskusage. + return 0; + } + if (from_id >= snap_id) { + return -EINVAL; + } + } + + uint64_t image_full_used_size = 0; + + for (std::vector<librbd::snap_info_t>::const_iterator snap = + snap_list.begin(); snap != snap_list.end(); ++snap) { + librbd::Image snap_image; + r = rbd.open_read_only(io_ctx, snap_image, image_spec.name.c_str(), + snap->name.c_str()); + if (r < 0) { + std::cerr << "rbd: error opening snapshot " << image_spec.name << "@" + << snap->name << ": " << cpp_strerror(r) << std::endl; + goto out; + } + + if (imgname == nullptr || found_from_snap || + (found_from_snap && snapname != nullptr && snap->name == snapname)) { + + r = get_image_disk_usage(image_spec.name, snap->name, last_snap_name, snap_image, exact, snap->size, &used_size); + if (r < 0) { + goto out; + } + if (!merge_snap) { + format_image_disk_usage(image_spec.name, image_spec.id, snap->name, + snap->id, snap->size, used_size, tbl, f); + } + + image_full_used_size += used_size; + + if (snapname != NULL) { + total_prov += snap->size; + } + total_used += used_size; + ++count; + } + + if (!found_from_snap && from_snapname != nullptr && + snap->name == from_snapname) { + found_from_snap = true; + } + if (snapname != nullptr && snap->name == snapname) { + break; + } + last_snap_name = snap->name; + } + + if (snapname == NULL) { + r = get_image_disk_usage(image_spec.name, "", last_snap_name, image, exact, info.size, &used_size); + if (r < 0) { + goto out; + } + + image_full_used_size += used_size; + + if (!merge_snap) { + format_image_disk_usage(image_spec.name, image_spec.id, "", CEPH_NOSNAP, + info.size, used_size, tbl, f); + } else { + format_image_disk_usage(image_spec.name, image_spec.id, "", CEPH_NOSNAP, + info.size, image_full_used_size, tbl, f); + } + + total_prov += info.size; + total_used += used_size; + ++count; + } + } + if (imgname != nullptr && !found) { + std::cerr << "specified image " << imgname << " is not found." << std::endl; + return -ENOENT; + } + +out: + if (f) { + f->close_section(); + if (imgname == NULL) { + f->dump_unsigned("total_provisioned_size", total_prov); + f->dump_unsigned("total_used_size", total_used); + } + f->close_section(); + f->flush(std::cout); + } else if (!images.empty()) { + if (count > 1) { + tbl << "<TOTAL>" + << stringify(byte_u_t(total_prov)) + << stringify(byte_u_t(total_used)) + << TextTable::endrow; + } + std::cout << tbl; + } + + return r < 0 ? r : 0; +} + +void get_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_or_snap_spec_options(positional, options, + at::ARGUMENT_MODIFIER_NONE); + at::add_format_options(options); + options->add_options() + (at::FROM_SNAPSHOT_NAME.c_str(), po::value<std::string>(), + "snapshot starting point") + ("exact", po::bool_switch(), "compute exact disk usage (slow)") + ("merge-snapshots", po::bool_switch(), + "merge snapshot sizes with its image"); +} + +int execute(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, vm.count(at::FROM_SNAPSHOT_NAME), + utils::SNAPSHOT_PRESENCE_PERMITTED, utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + std::string from_snap_name; + if (vm.count(at::FROM_SNAPSHOT_NAME)) { + from_snap_name = vm[at::FROM_SNAPSHOT_NAME].as<std::string>(); + } + + at::Format::Formatter formatter; + r = utils::get_formatter(vm, &formatter); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + utils::disable_cache(); + + librbd::RBD rbd; + r = do_disk_usage(rbd, io_ctx, + image_name.empty() ? nullptr: image_name.c_str(), + snap_name.empty() ? nullptr : snap_name.c_str(), + from_snap_name.empty() ? nullptr : from_snap_name.c_str(), + vm["exact"].as<bool>(), formatter.get(), + vm["merge-snapshots"].as<bool>()); + if (r < 0) { + std::cerr << "rbd: du failed: " << cpp_strerror(r) << std::endl; + return r; + } + return 0; +} + +Shell::SwitchArguments switched_arguments({"exact", "merge-snapshots"}); +Shell::Action action( + {"disk-usage"}, {"du"}, "Show disk usage stats for pool, image or snapshot.", + "", &get_arguments, &execute); + +} // namespace disk_usage +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Encryption.cc b/src/tools/rbd/action/Encryption.cc new file mode 100644 index 000000000..ecd4f0cb5 --- /dev/null +++ b/src/tools/rbd/action/Encryption.cc @@ -0,0 +1,117 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "include/compat.h" +#include "include/scope_guard.h" +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "common/errno.h" +#include <fstream> +#include <iostream> +#include <boost/program_options.hpp> + +namespace rbd { +namespace action { +namespace encryption { + +namespace at = argument_types; +namespace po = boost::program_options; + +void get_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + positional->add_options() + ("format", "encryption format [possible values: luks1, luks2]") + ("passphrase-file", + "path of file containing passphrase for unlocking the image"); + options->add_options() + ("cipher-alg", po::value<at::EncryptionAlgorithm>(), + "encryption algorithm [possible values: aes-128, aes-256 (default)]"); +} + +int execute(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + std::string format_str = utils::get_positional_argument(vm, arg_index++); + if (format_str.empty()) { + std::cerr << "rbd: must specify format." << std::endl; + return -EINVAL; + } + + std::string passphrase_file = + utils::get_positional_argument(vm, arg_index++); + if (passphrase_file.empty()) { + std::cerr << "rbd: must specify passphrase-file." << std::endl; + return -EINVAL; + } + + auto alg = RBD_ENCRYPTION_ALGORITHM_AES256; + if (vm.count("cipher-alg")) { + alg = vm["cipher-alg"].as<librbd::encryption_algorithm_t>(); + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "", + false, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + std::ifstream file(passphrase_file, std::ios::in | std::ios::binary); + if (file.fail()) { + std::cerr << "rbd: unable to open passphrase file '" << passphrase_file + << "': " << cpp_strerror(errno) << std::endl; + return -errno; + } + std::string passphrase((std::istreambuf_iterator<char>(file)), + std::istreambuf_iterator<char>()); + file.close(); + + if (format_str == "luks1") { + librbd::encryption_luks1_format_options_t opts = { + alg, std::move(passphrase)}; + r = image.encryption_format( + RBD_ENCRYPTION_FORMAT_LUKS1, &opts, sizeof(opts)); + ceph_memzero_s(opts.passphrase.data(), opts.passphrase.size(), + opts.passphrase.size()); + } else if (format_str == "luks2") { + librbd::encryption_luks2_format_options_t opts = { + alg, std::move(passphrase)}; + r = image.encryption_format( + RBD_ENCRYPTION_FORMAT_LUKS2, &opts, sizeof(opts)); + ceph_memzero_s(opts.passphrase.data(), opts.passphrase.size(), + opts.passphrase.size()); + } else { + std::cerr << "rbd: unsupported encryption format" << std::endl; + return -ENOTSUP; + } + + if (r < 0) { + std::cerr << "rbd: encryption format error: " << cpp_strerror(r) + << std::endl; + } + return r; +} + +Shell::Action action( + {"encryption", "format"}, {}, "Format image to an encrypted format.", "", + &get_arguments, &execute); + +} // namespace encryption +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Export.cc b/src/tools/rbd/action/Export.cc new file mode 100644 index 000000000..ddcf0f2c3 --- /dev/null +++ b/src/tools/rbd/action/Export.cc @@ -0,0 +1,653 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "include/compat.h" +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "include/Context.h" +#include "common/errno.h" +#include "common/Throttle.h" +#include "include/encoding.h" +#include <iostream> +#include <fcntl.h> +#include <stdlib.h> +#include <boost/program_options.hpp> +#include <boost/scope_exit.hpp> + +using std::cerr; +using std::string; + +namespace rbd { +namespace action { +namespace export_full { + +struct ExportDiffContext { + librbd::Image *image; + int fd; + int export_format; + uint64_t totalsize; + utils::ProgressContext pc; + OrderedThrottle throttle; + + ExportDiffContext(librbd::Image *i, int f, uint64_t t, int max_ops, + bool no_progress, int eformat) : + image(i), fd(f), export_format(eformat), totalsize(t), pc("Exporting image", no_progress), + throttle(max_ops, true) { + } +}; + +class C_ExportDiff : public Context { +public: + C_ExportDiff(ExportDiffContext *edc, uint64_t offset, uint64_t length, + bool exists, int export_format) + : m_export_diff_context(edc), m_offset(offset), m_length(length), + m_exists(exists), m_export_format(export_format) { + } + + int send() { + if (m_export_diff_context->throttle.pending_error()) { + return m_export_diff_context->throttle.wait_for_ret(); + } + + C_OrderedThrottle *ctx = m_export_diff_context->throttle.start_op(this); + if (m_exists) { + librbd::RBD::AioCompletion *aio_completion = + new librbd::RBD::AioCompletion(ctx, &utils::aio_context_callback); + + int op_flags = LIBRADOS_OP_FLAG_FADVISE_NOCACHE; + int r = m_export_diff_context->image->aio_read2( + m_offset, m_length, m_read_data, aio_completion, op_flags); + if (r < 0) { + aio_completion->release(); + ctx->complete(r); + } + } else { + ctx->complete(0); + } + return 0; + } + + static int export_diff_cb(uint64_t offset, size_t length, int exists, + void *arg) { + ExportDiffContext *edc = reinterpret_cast<ExportDiffContext *>(arg); + + C_ExportDiff *context = new C_ExportDiff(edc, offset, length, exists, edc->export_format); + return context->send(); + } + +protected: + void finish(int r) override { + if (r >= 0) { + if (m_exists) { + m_exists = !m_read_data.is_zero(); + } + r = write_extent(m_export_diff_context, m_offset, m_length, m_exists, m_export_format); + if (r == 0 && m_exists) { + r = m_read_data.write_fd(m_export_diff_context->fd); + } + } + m_export_diff_context->throttle.end_op(r); + } + +private: + ExportDiffContext *m_export_diff_context; + uint64_t m_offset; + uint64_t m_length; + bool m_exists; + int m_export_format; + bufferlist m_read_data; + + static int write_extent(ExportDiffContext *edc, uint64_t offset, + uint64_t length, bool exists, int export_format) { + // extent + bufferlist bl; + __u8 tag = exists ? RBD_DIFF_WRITE : RBD_DIFF_ZERO; + uint64_t len = 0; + encode(tag, bl); + if (export_format == 2) { + if (tag == RBD_DIFF_WRITE) + len = 8 + 8 + length; + else + len = 8 + 8; + encode(len, bl); + } + encode(offset, bl); + encode(length, bl); + int r = bl.write_fd(edc->fd); + + edc->pc.update_progress(offset, edc->totalsize); + return r; + } +}; + + +int do_export_diff_fd(librbd::Image& image, const char *fromsnapname, + const char *endsnapname, bool whole_object, + int fd, bool no_progress, int export_format) +{ + int r; + librbd::image_info_t info; + + r = image.stat(info, sizeof(info)); + if (r < 0) + return r; + + { + // header + bufferlist bl; + if (export_format == 1) + bl.append(utils::RBD_DIFF_BANNER); + else + bl.append(utils::RBD_DIFF_BANNER_V2); + + __u8 tag; + uint64_t len = 0; + if (fromsnapname) { + tag = RBD_DIFF_FROM_SNAP; + encode(tag, bl); + std::string from(fromsnapname); + if (export_format == 2) { + len = from.length() + 4; + encode(len, bl); + } + encode(from, bl); + } + + if (endsnapname) { + tag = RBD_DIFF_TO_SNAP; + encode(tag, bl); + std::string to(endsnapname); + if (export_format == 2) { + len = to.length() + 4; + encode(len, bl); + } + encode(to, bl); + } + + if (endsnapname && export_format == 2) { + tag = RBD_SNAP_PROTECTION_STATUS; + encode(tag, bl); + bool is_protected = false; + r = image.snap_is_protected(endsnapname, &is_protected); + if (r < 0) { + return r; + } + len = 1; + encode(len, bl); + encode(is_protected, bl); + } + + tag = RBD_DIFF_IMAGE_SIZE; + encode(tag, bl); + uint64_t endsize = info.size; + if (export_format == 2) { + len = 8; + encode(len, bl); + } + encode(endsize, bl); + + r = bl.write_fd(fd); + if (r < 0) { + return r; + } + } + ExportDiffContext edc(&image, fd, info.size, + g_conf().get_val<uint64_t>("rbd_concurrent_management_ops"), + no_progress, export_format); + r = image.diff_iterate2(fromsnapname, 0, info.size, true, whole_object, + &C_ExportDiff::export_diff_cb, (void *)&edc); + if (r < 0) { + goto out; + } + + r = edc.throttle.wait_for_ret(); + if (r < 0) { + goto out; + } + + { + __u8 tag = RBD_DIFF_END; + bufferlist bl; + encode(tag, bl); + r = bl.write_fd(fd); + } + +out: + if (r < 0) + edc.pc.fail(); + else + edc.pc.finish(); + + return r; +} + +int do_export_diff(librbd::Image& image, const char *fromsnapname, + const char *endsnapname, bool whole_object, + const char *path, bool no_progress) +{ + int r; + int fd; + + if (strcmp(path, "-") == 0) + fd = STDOUT_FILENO; + else + fd = open(path, O_WRONLY | O_CREAT | O_EXCL | O_BINARY, 0644); + if (fd < 0) + return -errno; + + r = do_export_diff_fd(image, fromsnapname, endsnapname, whole_object, fd, no_progress, 1); + + if (fd != 1) + close(fd); + if (r < 0 && fd != 1) { + remove(path); + } + + return r; +} + + +namespace at = argument_types; +namespace po = boost::program_options; + +void get_arguments_diff(po::options_description *positional, + po::options_description *options) { + at::add_image_or_snap_spec_options(positional, options, + at::ARGUMENT_MODIFIER_SOURCE); + at::add_path_options(positional, options, + "export file (or '-' for stdout)"); + options->add_options() + (at::FROM_SNAPSHOT_NAME.c_str(), po::value<std::string>(), + "snapshot starting point") + (at::WHOLE_OBJECT.c_str(), po::bool_switch(), "compare whole object"); + at::add_no_progress_option(options); +} + +int execute_diff(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_SOURCE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_PERMITTED, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + std::string path; + r = utils::get_path(vm, &arg_index, &path); + if (r < 0) { + return r; + } + + std::string from_snap_name; + if (vm.count(at::FROM_SNAPSHOT_NAME)) { + from_snap_name = vm[at::FROM_SNAPSHOT_NAME].as<std::string>(); + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", + snap_name, true, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + r = do_export_diff(image, + from_snap_name.empty() ? nullptr : from_snap_name.c_str(), + snap_name.empty() ? nullptr : snap_name.c_str(), + vm[at::WHOLE_OBJECT].as<bool>(), path.c_str(), + vm[at::NO_PROGRESS].as<bool>()); + if (r < 0) { + std::cerr << "rbd: export-diff error: " << cpp_strerror(r) << std::endl; + return r; + } + return 0; +} + +Shell::Action action_diff( + {"export-diff"}, {}, "Export incremental diff to file.", "", + &get_arguments_diff, &execute_diff); + +class C_Export : public Context +{ +public: + C_Export(OrderedThrottle &ordered_throttle, librbd::Image &image, + uint64_t fd_offset, uint64_t offset, uint64_t length, int fd) + : m_throttle(ordered_throttle), m_image(image), m_dest_offset(fd_offset), + m_offset(offset), m_length(length), m_fd(fd) + { + } + + void send() + { + auto ctx = m_throttle.start_op(this); + auto aio_completion = new librbd::RBD::AioCompletion( + ctx, &utils::aio_context_callback); + int op_flags = LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL | + LIBRADOS_OP_FLAG_FADVISE_NOCACHE; + int r = m_image.aio_read2(m_offset, m_length, m_bufferlist, + aio_completion, op_flags); + if (r < 0) { + cerr << "rbd: error requesting read from source image" << std::endl; + aio_completion->release(); + m_throttle.end_op(r); + } + } + + void finish(int r) override + { + BOOST_SCOPE_EXIT((&m_throttle) (&r)) + { + m_throttle.end_op(r); + } BOOST_SCOPE_EXIT_END + + if (r < 0) { + cerr << "rbd: error reading from source image at offset " + << m_offset << ": " << cpp_strerror(r) << std::endl; + return; + } + + ceph_assert(m_bufferlist.length() == static_cast<size_t>(r)); + if (m_fd != STDOUT_FILENO) { + if (m_bufferlist.is_zero()) { + return; + } + + uint64_t chkret = lseek64(m_fd, m_dest_offset, SEEK_SET); + if (chkret != m_dest_offset) { + cerr << "rbd: error seeking destination image to offset " + << m_dest_offset << std::endl; + r = -errno; + return; + } + } + + r = m_bufferlist.write_fd(m_fd); + if (r < 0) { + cerr << "rbd: error writing to destination image at offset " + << m_dest_offset << std::endl; + } + } + +private: + OrderedThrottle &m_throttle; + librbd::Image &m_image; + bufferlist m_bufferlist; + uint64_t m_dest_offset; + uint64_t m_offset; + uint64_t m_length; + int m_fd; +}; + +const uint32_t MAX_KEYS = 64; + +static int do_export_v2(librbd::Image& image, librbd::image_info_t &info, int fd, + uint64_t period, int max_concurrent_ops, utils::ProgressContext &pc) +{ + int r = 0; + // header + bufferlist bl; + bl.append(utils::RBD_IMAGE_BANNER_V2); + + __u8 tag; + uint64_t length; + // encode order + tag = RBD_EXPORT_IMAGE_ORDER; + length = 8; + encode(tag, bl); + encode(length, bl); + encode(uint64_t(info.order), bl); + + // encode features + tag = RBD_EXPORT_IMAGE_FEATURES; + uint64_t features; + image.features(&features); + length = 8; + encode(tag, bl); + encode(length, bl); + encode(features, bl); + + // encode stripe_unit and stripe_count + tag = RBD_EXPORT_IMAGE_STRIPE_UNIT; + uint64_t stripe_unit; + stripe_unit = image.get_stripe_unit(); + length = 8; + encode(tag, bl); + encode(length, bl); + encode(stripe_unit, bl); + + tag = RBD_EXPORT_IMAGE_STRIPE_COUNT; + uint64_t stripe_count; + stripe_count = image.get_stripe_count(); + length = 8; + encode(tag, bl); + encode(length, bl); + encode(stripe_count, bl); + + //retrieve metadata of image + std::map<std::string, string> imagemetas; + std::string last_key; + bool more_results = true; + while (more_results) { + std::map<std::string, bufferlist> pairs; + r = image.metadata_list(last_key, MAX_KEYS, &pairs); + if (r < 0) { + std::cerr << "failed to retrieve metadata of image : " << cpp_strerror(r) + << std::endl; + return r; + } + + if (!pairs.empty()) { + last_key = pairs.rbegin()->first; + + for (auto kv : pairs) { + std::string key = kv.first; + std::string val(kv.second.c_str(), kv.second.length()); + imagemetas[key] = val; + } + } + more_results = (pairs.size() == MAX_KEYS); + } + + //encode imageMeta key and value + for (std::map<std::string, string>::iterator it = imagemetas.begin(); + it != imagemetas.end(); ++it) { + string key = it->first; + string value = it->second; + + tag = RBD_EXPORT_IMAGE_META; + length = key.length() + value.length() + 4 * 2; + encode(tag, bl); + encode(length, bl); + encode(key, bl); + encode(value, bl); + } + + // encode end tag + tag = RBD_EXPORT_IMAGE_END; + encode(tag, bl); + + // write bl to fd. + r = bl.write_fd(fd); + if (r < 0) { + return r; + } + + // header for snapshots + bl.clear(); + bl.append(utils::RBD_IMAGE_DIFFS_BANNER_V2); + + std::vector<librbd::snap_info_t> snaps; + r = image.snap_list(snaps); + if (r < 0) { + return r; + } + + uint64_t diff_num = snaps.size() + 1; + encode(diff_num, bl); + + r = bl.write_fd(fd); + if (r < 0) { + return r; + } + + const char *last_snap = NULL; + for (size_t i = 0; i < snaps.size(); ++i) { + utils::snap_set(image, snaps[i].name.c_str()); + r = do_export_diff_fd(image, last_snap, snaps[i].name.c_str(), false, fd, true, 2); + if (r < 0) { + return r; + } + pc.update_progress(i, snaps.size() + 1); + last_snap = snaps[i].name.c_str(); + } + utils::snap_set(image, std::string("")); + r = do_export_diff_fd(image, last_snap, nullptr, false, fd, true, 2); + if (r < 0) { + return r; + } + pc.update_progress(snaps.size() + 1, snaps.size() + 1); + return r; +} + +static int do_export_v1(librbd::Image& image, librbd::image_info_t &info, + int fd, uint64_t period, int max_concurrent_ops, + utils::ProgressContext &pc) +{ + int r = 0; + size_t file_size = 0; + OrderedThrottle throttle(max_concurrent_ops, false); + for (uint64_t offset = 0; offset < info.size; offset += period) { + if (throttle.pending_error()) { + break; + } + + uint64_t length = std::min(period, info.size - offset); + C_Export *ctx = new C_Export(throttle, image, file_size + offset, offset, + length, fd); + ctx->send(); + + pc.update_progress(offset, info.size); + } + + file_size += info.size; + r = throttle.wait_for_ret(); + if (fd != 1) { + if (r >= 0) { + r = ftruncate(fd, file_size); + if (r < 0) + return r; + + uint64_t chkret = lseek64(fd, file_size, SEEK_SET); + if (chkret != file_size) + r = errno; + } + } + return r; +} + +static int do_export(librbd::Image& image, const char *path, bool no_progress, + int export_format) +{ + librbd::image_info_t info; + int64_t r = image.stat(info, sizeof(info)); + if (r < 0) + return r; + + int fd; + int max_concurrent_ops = g_conf().get_val<uint64_t>("rbd_concurrent_management_ops"); + bool to_stdout = (strcmp(path, "-") == 0); + if (to_stdout) { + fd = STDOUT_FILENO; + } else { + fd = open(path, O_WRONLY | O_CREAT | O_EXCL | O_BINARY, 0644); + if (fd < 0) { + return -errno; + } +#ifdef HAVE_POSIX_FADVISE + posix_fadvise(fd, 0, 0, POSIX_FADV_SEQUENTIAL); +#endif + } + + utils::ProgressContext pc("Exporting image", no_progress); + uint64_t period = image.get_stripe_count() * (1ull << info.order); + + if (export_format == 1) + r = do_export_v1(image, info, fd, period, max_concurrent_ops, pc); + else + r = do_export_v2(image, info, fd, period, max_concurrent_ops, pc); + + if (r < 0) + pc.fail(); + else + pc.finish(); + if (!to_stdout) + close(fd); + return r; +} + +void get_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_or_snap_spec_options(positional, options, + at::ARGUMENT_MODIFIER_SOURCE); + at::add_path_options(positional, options, + "export file (or '-' for stdout)"); + at::add_no_progress_option(options); + at::add_export_format_option(options); +} + +int execute(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_SOURCE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_PERMITTED, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + std::string path; + r = utils::get_path(vm, &arg_index, &path); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", + snap_name, true, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + int format = 1; + if (vm.count("export-format")) + format = vm["export-format"].as<uint64_t>(); + + r = do_export(image, path.c_str(), vm[at::NO_PROGRESS].as<bool>(), format); + if (r < 0) { + std::cerr << "rbd: export error: " << cpp_strerror(r) << std::endl; + return r; + } + return 0; +} + +Shell::Action action( + {"export"}, {}, "Export image to file.", "", &get_arguments, &execute); + +} // namespace export_full +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Feature.cc b/src/tools/rbd/action/Feature.cc new file mode 100644 index 000000000..13a7b6ea7 --- /dev/null +++ b/src/tools/rbd/action/Feature.cc @@ -0,0 +1,116 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "include/stringify.h" +#include "common/errno.h" +#include <iostream> +#include <map> +#include <boost/program_options.hpp> + +namespace rbd { +namespace action { +namespace feature { + +namespace at = argument_types; +namespace po = boost::program_options; + +void get_arguments(po::options_description *positional, + po::options_description *options, bool enabled) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + positional->add_options() + ("features", po::value<at::ImageFeatures>()->multitoken(), + ("image features\n" + at::get_short_features_help(false)).c_str()); + if (enabled) { + at::add_create_journal_options(options); + } +} + +void get_arguments_disable(po::options_description *positional, + po::options_description *options) { + get_arguments(positional, options, false); +} + +void get_arguments_enable(po::options_description *positional, + po::options_description *options) { + get_arguments(positional, options, true); +} + +int execute(const po::variables_map &vm, bool enabled) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + librbd::ImageOptions opts; + r = utils::get_journal_options(vm, &opts); + if (r < 0) { + return r; + } + + std::vector<std::string> feature_names; + if (vm.count(at::POSITIONAL_ARGUMENTS)) { + const std::vector<std::string> &args = + vm[at::POSITIONAL_ARGUMENTS].as<std::vector<std::string> >(); + feature_names.insert(feature_names.end(), args.begin() + arg_index, + args.end()); + } + + if (feature_names.empty()) { + std::cerr << "rbd: at least one feature name must be specified" + << std::endl; + return -EINVAL; + } + + boost::any features_any(static_cast<uint64_t>(0)); + at::ImageFeatures image_features; + at::validate(features_any, feature_names, &image_features, 0); + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "", + false, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + r = image.update_features(boost::any_cast<uint64_t>(features_any), enabled); + if (r < 0) { + std::cerr << "rbd: failed to update image features: " << cpp_strerror(r) + << std::endl; + return r; + } + return 0; +} + +int execute_disable(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + return execute(vm, false); +} + +int execute_enable(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + return execute(vm, true); +} + +Shell::Action action_disable( + {"feature", "disable"}, {}, "Disable the specified image feature.", "", + &get_arguments_disable, &execute_disable); +Shell::Action action_enable( + {"feature", "enable"}, {}, "Enable the specified image feature.", "", + &get_arguments_enable, &execute_enable); + +} // namespace feature +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Flatten.cc b/src/tools/rbd/action/Flatten.cc new file mode 100644 index 000000000..87fadc999 --- /dev/null +++ b/src/tools/rbd/action/Flatten.cc @@ -0,0 +1,92 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "common/errno.h" +#include <iostream> +#include <boost/program_options.hpp> + +namespace rbd { +namespace action { +namespace flatten { + +namespace at = argument_types; +namespace po = boost::program_options; + +static int do_flatten(librbd::Image& image, bool no_progress) +{ + utils::ProgressContext pc("Image flatten", no_progress); + int r = image.flatten_with_progress(pc); + if (r < 0) { + pc.fail(); + return r; + } + pc.finish(); + return 0; +} + +void get_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + at::add_no_progress_option(options); + at::add_encryption_options(options); +} + +int execute(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + utils::EncryptionOptions encryption_options; + r = utils::get_encryption_options(vm, &encryption_options); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "", + false, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + auto spec_count = encryption_options.specs.size(); + if (spec_count > 0) { + r = image.encryption_load2(&encryption_options.specs[0], spec_count); + + if (r < 0) { + std::cerr << "rbd: encryption load failed: " << cpp_strerror(r) + << std::endl; + return r; + } + } + + r = do_flatten(image, vm[at::NO_PROGRESS].as<bool>()); + if (r < 0) { + std::cerr << "rbd: flatten error: " << cpp_strerror(r) << std::endl; + return r; + } + return 0; +} + +Shell::Action action( + {"flatten"}, {}, "Fill clone with parent data (make it independent).", "", + &get_arguments, &execute); + +} // namespace flatten +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Ggate.cc b/src/tools/rbd/action/Ggate.cc new file mode 100644 index 000000000..11782d70a --- /dev/null +++ b/src/tools/rbd/action/Ggate.cc @@ -0,0 +1,180 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include <sys/param.h> +#include <errno.h> +#include <unistd.h> + +#include "include/stringify.h" +#include "common/SubProcess.h" + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" + +#include <boost/algorithm/string.hpp> +#include <boost/algorithm/string/predicate.hpp> +#include <boost/program_options.hpp> + +#include <iostream> + +namespace rbd { +namespace action { +namespace ggate { + +namespace at = argument_types; +namespace po = boost::program_options; + +#if defined(__FreeBSD__) +static int call_ggate_cmd(const po::variables_map &vm, + const std::vector<std::string> &args, + const std::vector<std::string> &ceph_global_args) { + SubProcess process("rbd-ggate", SubProcess::KEEP, SubProcess::KEEP, + SubProcess::KEEP); + + for (auto &arg : ceph_global_args) { + process.add_cmd_arg(arg.c_str()); + } + + for (auto &arg : args) { + process.add_cmd_arg(arg.c_str()); + } + + if (process.spawn()) { + std::cerr << "rbd: failed to run rbd-ggate: " << process.err() << std::endl; + return -EINVAL; + } else if (process.join()) { + std::cerr << "rbd: rbd-ggate failed with error: " << process.err() + << std::endl; + return -EINVAL; + } + + return 0; +} +#endif + +int execute_list(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { +#if !defined(__FreeBSD__) + std::cerr << "rbd: ggate is only supported on FreeBSD" << std::endl; + return -EOPNOTSUPP; +#else + std::vector<std::string> args; + + args.push_back("list"); + + if (vm.count("format")) { + args.push_back("--format"); + args.push_back(vm["format"].as<at::Format>().value); + } + if (vm["pretty-format"].as<bool>()) { + args.push_back("--pretty-format"); + } + + return call_ggate_cmd(vm, args, ceph_global_init_args); +#endif +} + +int execute_map(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { +#if !defined(__FreeBSD__) + std::cerr << "rbd: ggate is only supported on FreeBSD" << std::endl; + return -EOPNOTSUPP; +#else + std::vector<std::string> args; + + args.push_back("map"); + std::string img; + int r = utils::get_image_or_snap_spec(vm, &img); + if (r < 0) { + return r; + } + args.push_back(img); + + if (vm["quiesce"].as<bool>()) { + std::cerr << "rbd: warning: quiesce is not supported" << std::endl; + } + + if (vm["read-only"].as<bool>()) { + args.push_back("--read-only"); + } + + if (vm["exclusive"].as<bool>()) { + args.push_back("--exclusive"); + } + + if (vm.count("quiesce-hook")) { + std::cerr << "rbd: warning: quiesce-hook is not supported" << std::endl; + } + + if (vm.count("options")) { + utils::append_options_as_args(vm["options"].as<std::vector<std::string>>(), + &args); + } + + return call_ggate_cmd(vm, args, ceph_global_init_args); +#endif +} + +int execute_unmap(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { +#if !defined(__FreeBSD__) + std::cerr << "rbd: ggate is only supported on FreeBSD" << std::endl; + return -EOPNOTSUPP; +#else + std::string device_name = utils::get_positional_argument(vm, 0); + if (!boost::starts_with(device_name, "/dev/")) { + device_name.clear(); + } + + std::string image_name; + if (device_name.empty()) { + int r = utils::get_image_or_snap_spec(vm, &image_name); + if (r < 0) { + return r; + } + } + + if (device_name.empty() && image_name.empty()) { + std::cerr << "rbd: unmap requires either image name or device path" + << std::endl; + return -EINVAL; + } + + std::vector<std::string> args; + + args.push_back("unmap"); + args.push_back(device_name.empty() ? image_name : device_name); + + if (vm.count("options")) { + utils::append_options_as_args(vm["options"].as<std::vector<std::string>>(), + &args); + } + + return call_ggate_cmd(vm, args, ceph_global_init_args); +#endif +} + +int execute_attach(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { +#if !defined(__FreeBSD__) + std::cerr << "rbd: ggate is only supported on FreeBSD" << std::endl; +#else + std::cerr << "rbd: ggate attach command not supported" << std::endl; +#endif + return -EOPNOTSUPP; +} + +int execute_detach(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { +#if !defined(__FreeBSD__) + std::cerr << "rbd: ggate is only supported on FreeBSD" << std::endl; +#else + std::cerr << "rbd: ggate detach command not supported" << std::endl; +#endif + return -EOPNOTSUPP; +} + +} // namespace ggate +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Group.cc b/src/tools/rbd/action/Group.cc new file mode 100644 index 000000000..5c2232a6f --- /dev/null +++ b/src/tools/rbd/action/Group.cc @@ -0,0 +1,912 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include <iostream> + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "include/rbd_types.h" +#include "cls/rbd/cls_rbd_types.h" +#include "common/errno.h" +#include "common/Formatter.h" +#include "common/TextTable.h" + +namespace rbd { +namespace action { +namespace group { + +namespace at = argument_types; +namespace po = boost::program_options; + +static const std::string GROUP_SPEC("group-spec"); +static const std::string GROUP_SNAP_SPEC("group-snap-spec"); + +static const std::string GROUP_NAME("group"); +static const std::string DEST_GROUP_NAME("dest-group"); + +static const std::string GROUP_POOL_NAME("group-" + at::POOL_NAME); +static const std::string IMAGE_POOL_NAME("image-" + at::POOL_NAME); + +void add_group_option(po::options_description *opt, + at::ArgumentModifier modifier) { + std::string name = GROUP_NAME; + std::string description = at::get_description_prefix(modifier) + "group name"; + switch (modifier) { + case at::ARGUMENT_MODIFIER_NONE: + case at::ARGUMENT_MODIFIER_SOURCE: + break; + case at::ARGUMENT_MODIFIER_DEST: + name = DEST_GROUP_NAME; + break; + } + + // TODO add validator + opt->add_options() + (name.c_str(), po::value<std::string>(), description.c_str()); +} + +void add_prefixed_pool_option(po::options_description *opt, + const std::string &prefix) { + std::string name = prefix + "-" + at::POOL_NAME; + std::string description = prefix + " pool name"; + + opt->add_options() + (name.c_str(), po::value<std::string>(), description.c_str()); +} + +void add_prefixed_namespace_option(po::options_description *opt, + const std::string &prefix) { + std::string name = prefix + "-" + at::NAMESPACE_NAME; + std::string description = prefix + " namespace name"; + + opt->add_options() + (name.c_str(), po::value<std::string>(), description.c_str()); +} + +void add_group_spec_options(po::options_description *pos, + po::options_description *opt, + at::ArgumentModifier modifier, + bool snap) { + at::add_pool_option(opt, modifier); + at::add_namespace_option(opt, modifier); + add_group_option(opt, modifier); + if (!snap) { + pos->add_options() + ((get_name_prefix(modifier) + GROUP_SPEC).c_str(), + (get_description_prefix(modifier) + "group specification\n" + + "(example: [<pool-name>/[<namespace>/]]<group-name>)").c_str()); + } else { + add_snap_option(opt, modifier); + pos->add_options() + ((get_name_prefix(modifier) + GROUP_SNAP_SPEC).c_str(), + (get_description_prefix(modifier) + "group specification\n" + + "(example: [<pool-name>/[<namespace>/]]<group-name>@<snap-name>)").c_str()); + } +} + +int execute_create(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + + std::string pool_name; + std::string namespace_name; + std::string group_name; + + int r = utils::get_pool_generic_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, at::POOL_NAME, &pool_name, + &namespace_name, GROUP_NAME, "group", &group_name, nullptr, true, + utils::SNAPSHOT_PRESENCE_NONE, utils::SPEC_VALIDATION_FULL); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + librbd::RBD rbd; + r = rbd.group_create(io_ctx, group_name.c_str()); + if (r < 0) { + std::cerr << "rbd: create error: " << cpp_strerror(r) << std::endl; + return r; + } + + return 0; +} + +int execute_list(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string pool_name; + std::string namespace_name; + size_t arg_index = 0; + int r = utils::get_pool_and_namespace_names(vm, false, &pool_name, + &namespace_name, &arg_index); + if (r < 0) { + return r; + } + + at::Format::Formatter formatter; + r = utils::get_formatter(vm, &formatter); + if (r < 0) { + return r; + } + Formatter *f = formatter.get(); + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + librbd::RBD rbd; + std::vector<std::string> names; + r = rbd.group_list(io_ctx, &names); + if (r < 0) + return r; + + if (f) + f->open_array_section("groups"); + for (auto i : names) { + if (f) + f->dump_string("name", i); + else + std::cout << i << std::endl; + } + if (f) { + f->close_section(); + f->flush(std::cout); + } + + return 0; +} + +int execute_remove(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + + std::string pool_name; + std::string namespace_name; + std::string group_name; + + int r = utils::get_pool_generic_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, at::POOL_NAME, &pool_name, + &namespace_name, GROUP_NAME, "group", &group_name, nullptr, true, + utils::SNAPSHOT_PRESENCE_NONE, utils::SPEC_VALIDATION_FULL); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + librbd::RBD rbd; + + r = rbd.group_remove(io_ctx, group_name.c_str()); + if (r < 0) { + std::cerr << "rbd: remove error: " << cpp_strerror(r) << std::endl; + return r; + } + + return 0; +} + +int execute_rename(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + + std::string pool_name; + std::string namespace_name; + std::string group_name; + + int r = utils::get_pool_generic_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, at::POOL_NAME, &pool_name, + &namespace_name, GROUP_NAME, "group", &group_name, nullptr, true, + utils::SNAPSHOT_PRESENCE_NONE, utils::SPEC_VALIDATION_FULL); + if (r < 0) { + return r; + } + + std::string dest_pool_name; + std::string dest_namespace_name; + std::string dest_group_name; + + r = utils::get_pool_generic_snapshot_names( + vm, at::ARGUMENT_MODIFIER_DEST, &arg_index, at::DEST_POOL_NAME, + &dest_pool_name, &dest_namespace_name, DEST_GROUP_NAME, "group", + &dest_group_name, nullptr, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_FULL); + if (r < 0) { + return r; + } + + if (pool_name != dest_pool_name) { + std::cerr << "rbd: group rename across pools not supported" << std::endl + << "source pool: " << pool_name << ", dest pool: " + << dest_pool_name << std::endl; + return -EINVAL; + } else if (namespace_name != dest_namespace_name) { + std::cerr << "rbd: group rename across namespaces not supported" + << std::endl + << "source namespace: " << namespace_name << ", dest namespace: " + << dest_namespace_name << std::endl; + return -EINVAL; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + librbd::RBD rbd; + r = rbd.group_rename(io_ctx, group_name.c_str(), + dest_group_name.c_str()); + + if (r < 0) { + std::cerr << "rbd: failed to rename group: " + << cpp_strerror(r) << std::endl; + return r; + } + + return 0; +} + +int execute_add(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + // Parse group data. + std::string group_pool_name; + std::string group_namespace_name; + std::string group_name; + + int r = utils::get_pool_generic_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, GROUP_POOL_NAME, + &group_pool_name, &group_namespace_name, GROUP_NAME, "group", &group_name, + nullptr, true, utils::SNAPSHOT_PRESENCE_NONE, utils::SPEC_VALIDATION_FULL); + if (r < 0) { + return r; + } + + std::string image_pool_name; + std::string image_namespace_name; + std::string image_name; + + r = utils::get_pool_generic_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, IMAGE_POOL_NAME, + &image_pool_name, &image_namespace_name, at::IMAGE_NAME, "image", + &image_name, nullptr, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_FULL); + if (r < 0) { + return r; + } + + if (group_namespace_name != image_namespace_name) { + std::cerr << "rbd: group and image namespace must match." << std::endl; + return -EINVAL; + } + + librados::Rados rados; + librados::IoCtx cg_io_ctx; + r = utils::init(group_pool_name, group_namespace_name, &rados, &cg_io_ctx); + if (r < 0) { + return r; + } + + librados::IoCtx image_io_ctx; + r = utils::init(image_pool_name, group_namespace_name, &rados, &image_io_ctx); + if (r < 0) { + return r; + } + + librbd::RBD rbd; + r = rbd.group_image_add(cg_io_ctx, group_name.c_str(), + image_io_ctx, image_name.c_str()); + if (r < 0) { + std::cerr << "rbd: add image error: " << cpp_strerror(r) << std::endl; + return r; + } + + return 0; +} + +int execute_remove_image(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + + std::string group_pool_name; + std::string group_namespace_name; + std::string group_name; + + int r = utils::get_pool_generic_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, GROUP_POOL_NAME, + &group_pool_name, &group_namespace_name, GROUP_NAME, "group", &group_name, + nullptr, true, utils::SNAPSHOT_PRESENCE_NONE, utils::SPEC_VALIDATION_FULL); + if (r < 0) { + return r; + } + + std::string image_pool_name; + std::string image_namespace_name; + std::string image_name; + std::string image_id; + + if (vm.count(at::IMAGE_ID)) { + image_id = vm[at::IMAGE_ID].as<std::string>(); + } + + r = utils::get_pool_generic_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, IMAGE_POOL_NAME, + &image_pool_name, &image_namespace_name, at::IMAGE_NAME, "image", + &image_name, nullptr, image_id.empty(), utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_FULL); + if (r < 0) { + return r; + } + + if (group_namespace_name != image_namespace_name) { + std::cerr << "rbd: group and image namespace must match." << std::endl; + return -EINVAL; + } else if (!image_id.empty() && !image_name.empty()) { + std::cerr << "rbd: trying to access image using both name and id. " + << std::endl; + return -EINVAL; + } + + librados::Rados rados; + librados::IoCtx cg_io_ctx; + r = utils::init(group_pool_name, group_namespace_name, &rados, &cg_io_ctx); + if (r < 0) { + return r; + } + + librados::IoCtx image_io_ctx; + r = utils::init(image_pool_name, group_namespace_name, &rados, &image_io_ctx); + if (r < 0) { + return r; + } + + librbd::RBD rbd; + if (image_id.empty()) { + r = rbd.group_image_remove(cg_io_ctx, group_name.c_str(), + image_io_ctx, image_name.c_str()); + } else { + r = rbd.group_image_remove_by_id(cg_io_ctx, group_name.c_str(), + image_io_ctx, image_id.c_str()); + } + if (r < 0) { + std::cerr << "rbd: remove image error: " << cpp_strerror(r) << std::endl; + return r; + } + + return 0; +} + +int execute_list_images(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string group_name; + + int r = utils::get_pool_generic_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, at::POOL_NAME, &pool_name, + &namespace_name, GROUP_NAME, "group", &group_name, nullptr, true, + utils::SNAPSHOT_PRESENCE_NONE, utils::SPEC_VALIDATION_FULL); + if (r < 0) { + return r; + } + + at::Format::Formatter formatter; + r = utils::get_formatter(vm, &formatter); + if (r < 0) { + return r; + } + Formatter *f = formatter.get(); + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + librbd::RBD rbd; + std::vector<librbd::group_image_info_t> images; + + r = rbd.group_image_list(io_ctx, group_name.c_str(), &images, + sizeof(librbd::group_image_info_t)); + + if (r == -ENOENT) + r = 0; + + if (r < 0) + return r; + + std::sort(images.begin(), images.end(), + [](const librbd::group_image_info_t &lhs, + const librbd::group_image_info_t &rhs) { + if (lhs.pool != rhs.pool) { + return lhs.pool < rhs.pool; + } + return lhs.name < rhs.name; + } + ); + + if (f) + f->open_array_section("images"); + + for (auto image : images) { + std::string image_name = image.name; + int state = image.state; + std::string state_string; + if (RBD_GROUP_IMAGE_STATE_INCOMPLETE == state) { + state_string = "incomplete"; + } + + std::string pool_name = ""; + + librados::Rados rados(io_ctx); + librados::IoCtx pool_io_ctx; + r = rados.ioctx_create2(image.pool, pool_io_ctx); + if (r < 0) { + pool_name = "<missing image pool " + stringify(image.pool) + ">"; + } else { + pool_name = pool_io_ctx.get_pool_name(); + } + + if (f) { + f->open_object_section("image"); + f->dump_string("image", image_name); + f->dump_string("pool", pool_name); + f->dump_string("namespace", io_ctx.get_namespace()); + f->dump_int("state", state); + f->close_section(); + } else { + std::cout << pool_name << "/"; + if (!io_ctx.get_namespace().empty()) { + std::cout << io_ctx.get_namespace() << "/"; + } + std::cout << image_name << " " << state_string << std::endl; + } + } + + if (f) { + f->close_section(); + f->flush(std::cout); + } + + return 0; +} + +int execute_group_snap_create(const po::variables_map &vm, + const std::vector<std::string> &global_args) { + size_t arg_index = 0; + + std::string pool_name; + std::string namespace_name; + std::string group_name; + std::string snap_name; + + int r = utils::get_pool_generic_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, at::POOL_NAME, &pool_name, + &namespace_name, GROUP_NAME, "group", &group_name, &snap_name, true, + utils::SNAPSHOT_PRESENCE_REQUIRED, utils::SPEC_VALIDATION_FULL); + if (r < 0) { + return r; + } + + uint32_t flags; + r = utils::get_snap_create_flags(vm, &flags); + if (r < 0) { + return r; + } + + librados::IoCtx io_ctx; + librados::Rados rados; + + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + librbd::RBD rbd; + r = rbd.group_snap_create2(io_ctx, group_name.c_str(), snap_name.c_str(), + flags); + if (r < 0) { + return r; + } + + return 0; +} + +int execute_group_snap_remove(const po::variables_map &vm, + const std::vector<std::string> &global_args) { + size_t arg_index = 0; + + std::string pool_name; + std::string namespace_name; + std::string group_name; + std::string snap_name; + + int r = utils::get_pool_generic_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, at::POOL_NAME, &pool_name, + &namespace_name, GROUP_NAME, "group", &group_name, &snap_name, true, + utils::SNAPSHOT_PRESENCE_REQUIRED, utils::SPEC_VALIDATION_FULL); + if (r < 0) { + return r; + } + + librados::IoCtx io_ctx; + librados::Rados rados; + + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + librbd::RBD rbd; + r = rbd.group_snap_remove(io_ctx, group_name.c_str(), snap_name.c_str()); + if (r < 0) { + std::cerr << "rbd: failed to remove group snapshot: " + << cpp_strerror(r) << std::endl; + return r; + } + + return 0; +} + +int execute_group_snap_rename(const po::variables_map &vm, + const std::vector<std::string> &global_args) { + size_t arg_index = 0; + + std::string pool_name; + std::string namespace_name; + std::string group_name; + std::string source_snap_name; + + int r = utils::get_pool_generic_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, at::POOL_NAME, &pool_name, + &namespace_name, GROUP_NAME, "group", &group_name, &source_snap_name, true, + utils::SNAPSHOT_PRESENCE_REQUIRED, utils::SPEC_VALIDATION_FULL); + if (r < 0) { + return r; + } + + std::string dest_snap_name; + if (vm.count(at::DEST_SNAPSHOT_NAME)) { + dest_snap_name = vm[at::DEST_SNAPSHOT_NAME].as<std::string>(); + } + + if (dest_snap_name.empty()) { + dest_snap_name = utils::get_positional_argument(vm, arg_index++); + } + + if (dest_snap_name.empty()) { + std::cerr << "rbd: destination snapshot name was not specified" + << std::endl; + return -EINVAL; + } + + r = utils::validate_snapshot_name(at::ARGUMENT_MODIFIER_DEST, dest_snap_name, + utils::SNAPSHOT_PRESENCE_REQUIRED, + utils::SPEC_VALIDATION_SNAP); + if (r < 0) { + return r; + } + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + librbd::RBD rbd; + r = rbd.group_snap_rename(io_ctx, group_name.c_str(), + source_snap_name.c_str(), dest_snap_name.c_str()); + + if (r < 0) { + std::cerr << "rbd: failed to rename group snapshot: " + << cpp_strerror(r) << std::endl; + return r; + } + + return 0; +} + +int execute_group_snap_list(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string group_name; + + int r = utils::get_pool_generic_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, at::POOL_NAME, &pool_name, + &namespace_name, GROUP_NAME, "group", &group_name, nullptr, true, + utils::SNAPSHOT_PRESENCE_NONE, utils::SPEC_VALIDATION_FULL); + if (r < 0) { + return r; + } + + at::Format::Formatter formatter; + r = utils::get_formatter(vm, &formatter); + if (r < 0) { + return r; + } + Formatter *f = formatter.get(); + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + librbd::RBD rbd; + std::vector<librbd::group_snap_info_t> snaps; + + r = rbd.group_snap_list(io_ctx, group_name.c_str(), &snaps, + sizeof(librbd::group_snap_info_t)); + + if (r == -ENOENT) { + r = 0; + } + if (r < 0) { + return r; + } + + TextTable t; + if (f) { + f->open_array_section("group_snaps"); + } else { + t.define_column("NAME", TextTable::LEFT, TextTable::LEFT); + t.define_column("STATUS", TextTable::LEFT, TextTable::RIGHT); + } + + for (auto i : snaps) { + std::string snap_name = i.name; + int state = i.state; + std::string state_string; + if (RBD_GROUP_SNAP_STATE_INCOMPLETE == state) { + state_string = "incomplete"; + } else { + state_string = "ok"; + } + if (r < 0) { + return r; + } + if (f) { + f->open_object_section("group_snap"); + f->dump_string("snapshot", snap_name); + f->dump_string("state", state_string); + f->close_section(); + } else { + t << snap_name << state_string << TextTable::endrow; + } + } + + if (f) { + f->close_section(); + f->flush(std::cout); + } else if (snaps.size()) { + std::cout << t; + } + return 0; +} + +int execute_group_snap_rollback(const po::variables_map &vm, + const std::vector<std::string> &global_args) { + size_t arg_index = 0; + + std::string group_name; + std::string namespace_name; + std::string pool_name; + std::string snap_name; + + int r = utils::get_pool_generic_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, at::POOL_NAME, &pool_name, + &namespace_name, GROUP_NAME, "group", &group_name, &snap_name, true, + utils::SNAPSHOT_PRESENCE_REQUIRED, utils::SPEC_VALIDATION_FULL); + if (r < 0) { + return r; + } + + librados::IoCtx io_ctx; + librados::Rados rados; + + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + librbd::RBD rbd; + utils::ProgressContext pc("Rolling back to group snapshot", + vm[at::NO_PROGRESS].as<bool>()); + r = rbd.group_snap_rollback_with_progress(io_ctx, group_name.c_str(), + snap_name.c_str(), pc); + if (r < 0) { + pc.fail(); + std::cerr << "rbd: rollback group to snapshot failed: " + << cpp_strerror(r) << std::endl; + return r; + } + + pc.finish(); + return 0; +} + +void get_create_arguments(po::options_description *positional, + po::options_description *options) { + add_group_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE, + false); +} + +void get_remove_arguments(po::options_description *positional, + po::options_description *options) { + add_group_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE, + false); +} + +void get_list_arguments(po::options_description *positional, + po::options_description *options) { + at::add_pool_options(positional, options, true); + at::add_format_options(options); +} + +void get_rename_arguments(po::options_description *positional, + po::options_description *options) { + add_group_spec_options(positional, options, at::ARGUMENT_MODIFIER_SOURCE, + false); + add_group_spec_options(positional, options, at::ARGUMENT_MODIFIER_DEST, + false); +} + +void get_add_arguments(po::options_description *positional, + po::options_description *options) { + positional->add_options() + (GROUP_SPEC.c_str(), + "group specification\n" + "(example: [<pool-name>/[<namespace>/]]<group-name>)"); + + add_prefixed_pool_option(options, "group"); + add_prefixed_namespace_option(options, "group"); + add_group_option(options, at::ARGUMENT_MODIFIER_NONE); + + positional->add_options() + (at::IMAGE_SPEC.c_str(), + "image specification\n" + "(example: [<pool-name>/[<namespace>/]]<image-name>)"); + + add_prefixed_pool_option(options, "image"); + add_prefixed_namespace_option(options, "image"); + at::add_image_option(options, at::ARGUMENT_MODIFIER_NONE); + + at::add_pool_option(options, at::ARGUMENT_MODIFIER_NONE, + " unless overridden"); +} + +void get_remove_image_arguments(po::options_description *positional, + po::options_description *options) { + positional->add_options() + (GROUP_SPEC.c_str(), + "group specification\n" + "(example: [<pool-name>/[<namespace>/]]<group-name>)"); + + add_prefixed_pool_option(options, "group"); + add_prefixed_namespace_option(options, "group"); + add_group_option(options, at::ARGUMENT_MODIFIER_NONE); + + positional->add_options() + (at::IMAGE_SPEC.c_str(), + "image specification\n" + "(example: [<pool-name>/[<namespace>/]]<image-name>)"); + + add_prefixed_pool_option(options, "image"); + add_prefixed_namespace_option(options, "image"); + at::add_image_option(options, at::ARGUMENT_MODIFIER_NONE); + + at::add_pool_option(options, at::ARGUMENT_MODIFIER_NONE, + " unless overridden"); + at::add_image_id_option(options); +} + +void get_list_images_arguments(po::options_description *positional, + po::options_description *options) { + at::add_format_options(options); + add_group_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE, + false); +} + +void get_group_snap_create_arguments(po::options_description *positional, + po::options_description *options) { + add_group_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE, + true); + at::add_snap_create_options(options); +} + +void get_group_snap_remove_arguments(po::options_description *positional, + po::options_description *options) { + add_group_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE, + true); +} + +void get_group_snap_rename_arguments(po::options_description *positional, + po::options_description *options) { + add_group_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE, + true); + + positional->add_options() + (at::DEST_SNAPSHOT_NAME.c_str(), + "destination snapshot name\n(example: <snap-name>)"); + at::add_snap_option(options, at::ARGUMENT_MODIFIER_DEST); +} + +void get_group_snap_list_arguments(po::options_description *positional, + po::options_description *options) { + at::add_format_options(options); + add_group_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE, + false); +} + +void get_group_snap_rollback_arguments(po::options_description *positional, + po::options_description *options) { + at::add_no_progress_option(options); + add_group_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE, + true); +} + +Shell::Action action_create( + {"group", "create"}, {}, "Create a group.", + "", &get_create_arguments, &execute_create); +Shell::Action action_remove( + {"group", "remove"}, {"group", "rm"}, "Delete a group.", + "", &get_remove_arguments, &execute_remove); +Shell::Action action_list( + {"group", "list"}, {"group", "ls"}, "List rbd groups.", + "", &get_list_arguments, &execute_list); +Shell::Action action_rename( + {"group", "rename"}, {}, "Rename a group within pool.", + "", &get_rename_arguments, &execute_rename); +Shell::Action action_add( + {"group", "image", "add"}, {}, "Add an image to a group.", + "", &get_add_arguments, &execute_add); +Shell::Action action_remove_image( + {"group", "image", "remove"}, {"group", "image", "rm"}, + "Remove an image from a group.", "", + &get_remove_image_arguments, &execute_remove_image); +Shell::Action action_list_images( + {"group", "image", "list"}, {"group", "image", "ls"}, + "List images in a group.", "", + &get_list_images_arguments, &execute_list_images); +Shell::Action action_group_snap_create( + {"group", "snap", "create"}, {}, "Make a snapshot of a group.", + "", &get_group_snap_create_arguments, &execute_group_snap_create); +Shell::Action action_group_snap_remove( + {"group", "snap", "remove"}, {"group", "snap", "rm"}, + "Remove a snapshot from a group.", + "", &get_group_snap_remove_arguments, &execute_group_snap_remove); +Shell::Action action_group_snap_rename( + {"group", "snap", "rename"}, {}, "Rename group's snapshot.", + "", &get_group_snap_rename_arguments, &execute_group_snap_rename); +Shell::Action action_group_snap_list( + {"group", "snap", "list"}, {"group", "snap", "ls"}, + "List snapshots of a group.", + "", &get_group_snap_list_arguments, &execute_group_snap_list); +Shell::Action action_group_snap_rollback( + {"group", "snap", "rollback"}, {}, + "Rollback group to snapshot.", + "", &get_group_snap_rollback_arguments, &execute_group_snap_rollback); + +} // namespace group +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/ImageMeta.cc b/src/tools/rbd/action/ImageMeta.cc new file mode 100644 index 000000000..20c4555da --- /dev/null +++ b/src/tools/rbd/action/ImageMeta.cc @@ -0,0 +1,345 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "common/errno.h" +#include "common/Formatter.h" +#include "common/TextTable.h" +#include <iostream> +#include <boost/program_options.hpp> + +namespace rbd { +namespace action { +namespace image_meta { + +namespace at = argument_types; +namespace po = boost::program_options; + +namespace { + +void add_key_option(po::options_description *positional) { + positional->add_options() + ("key", "image meta key"); +} + +int get_key(const po::variables_map &vm, size_t *arg_index, + std::string *key) { + *key = utils::get_positional_argument(vm, *arg_index); + if (key->empty()) { + std::cerr << "rbd: metadata key was not specified" << std::endl; + return -EINVAL; + } else { + ++(*arg_index); + } + return 0; +} + +const uint32_t MAX_KEYS = 64; + +} // anonymous namespace + +static int do_metadata_list(librbd::Image& image, Formatter *f) +{ + int r; + TextTable tbl; + + size_t count = 0; + std::string last_key; + bool more_results = true; + while (more_results) { + std::map<std::string, bufferlist> pairs; + r = image.metadata_list(last_key, MAX_KEYS, &pairs); + if (r < 0) { + std::cerr << "failed to list metadata of image : " << cpp_strerror(r) + << std::endl; + return r; + } + + more_results = (pairs.size() == MAX_KEYS); + if (!pairs.empty()) { + if (count == 0) { + if (f) { + f->open_object_section("metadatas"); + } else { + tbl.define_column("Key", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("Value", TextTable::LEFT, TextTable::LEFT); + } + } + + last_key = pairs.rbegin()->first; + count += pairs.size(); + + for (auto kv : pairs) { + std::string val(kv.second.c_str(), kv.second.length()); + if (f) { + f->dump_string(kv.first.c_str(), val.c_str()); + } else { + tbl << kv.first << val << TextTable::endrow; + } + } + } + } + + if (f == nullptr) { + bool single = (count == 1); + std::cout << "There " << (single ? "is" : "are") << " " << count << " " + << (single ? "metadatum" : "metadata") << " on this image" + << (count == 0 ? "." : ":") << std::endl; + } + + if (count > 0) { + if (f) { + f->close_section(); + f->flush(std::cout); + } else { + std::cout << std::endl << tbl; + } + } + return 0; +} + +static int do_metadata_set(librbd::Image& image, std::string &key, + std::string &value) +{ + int r = image.metadata_set(key, value); + if (r < 0) { + std::cerr << "failed to set metadata " << key << " of image : " + << cpp_strerror(r) << std::endl; + } + return r; +} + +static int do_metadata_remove(librbd::Image& image, std::string &key) +{ + int r = image.metadata_remove(key); + if (r == -ENOENT) { + std::cerr << "rbd: no existing metadata key " << key << " of image : " + << cpp_strerror(r) << std::endl; + } else if(r < 0) { + std::cerr << "failed to remove metadata " << key << " of image : " + << cpp_strerror(r) << std::endl; + } + return r; +} + +static int do_metadata_get(librbd::Image& image, std::string &key) +{ + std::string s; + int r = image.metadata_get(key, &s); + if (r < 0) { + std::cerr << "failed to get metadata " << key << " of image : " + << cpp_strerror(r) << std::endl; + return r; + } + std::cout << s << std::endl; + return r; +} + +void get_list_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + at::add_format_options(options); +} + +int execute_list(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + at::Format::Formatter formatter; + r = utils::get_formatter(vm, &formatter); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "", + false, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + r = do_metadata_list(image, formatter.get()); + if (r < 0) { + std::cerr << "rbd: listing metadata failed: " << cpp_strerror(r) + << std::endl; + return r; + } + return 0; +} + +void get_get_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + add_key_option(positional); +} + +int execute_get(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + std::string key; + r = get_key(vm, &arg_index, &key); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "", + false, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + r = do_metadata_get(image, key); + if (r < 0) { + std::cerr << "rbd: getting metadata failed: " << cpp_strerror(r) + << std::endl; + return r; + } + return 0; +} + +void get_set_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + add_key_option(positional); + positional->add_options() + ("value", "image meta value"); +} + +int execute_set(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + std::string key; + r = get_key(vm, &arg_index, &key); + if (r < 0) { + return r; + } + + std::string value = utils::get_positional_argument(vm, arg_index); + if (value.empty()) { + std::cerr << "rbd: metadata value was not specified" << std::endl; + return -EINVAL; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "", + false, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + r = do_metadata_set(image, key, value); + if (r < 0) { + std::cerr << "rbd: setting metadata failed: " << cpp_strerror(r) + << std::endl; + return r; + } + return 0; +} + +void get_remove_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + add_key_option(positional); +} + +int execute_remove(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + std::string key; + r = get_key(vm, &arg_index, &key); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "", + false, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + r = do_metadata_remove(image, key); + if (r < 0) { + std::cerr << "rbd: removing metadata failed: " << cpp_strerror(r) + << std::endl; + return r; + } + return 0; +} + +Shell::Action action_list( + {"image-meta", "list"}, {"image-meta", "ls"}, "Image metadata list keys with values.", "", + &get_list_arguments, &execute_list); +Shell::Action action_get( + {"image-meta", "get"}, {}, + "Image metadata get the value associated with the key.", "", + &get_get_arguments, &execute_get); +Shell::Action action_set( + {"image-meta", "set"}, {}, "Image metadata set key with value.", "", + &get_set_arguments, &execute_set); +Shell::Action action_remove( + {"image-meta", "remove"}, {"image-meta", "rm"}, + "Image metadata remove the key and value associated.", "", + &get_remove_arguments, &execute_remove); + +} // namespace image_meta +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Import.cc b/src/tools/rbd/action/Import.cc new file mode 100644 index 000000000..3358c5bc6 --- /dev/null +++ b/src/tools/rbd/action/Import.cc @@ -0,0 +1,1036 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "include/Context.h" +#include "common/blkdev.h" +#include "common/debug.h" +#include "common/errno.h" +#include "common/Throttle.h" +#include "include/compat.h" +#include "include/encoding.h" +#include "common/debug.h" +#include "common/errno.h" +#include "common/safe_io.h" +#include <iostream> +#include <boost/program_options.hpp> +#include <boost/scoped_ptr.hpp> +#include "include/ceph_assert.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd + +using std::cerr; +using std::string; + +namespace rbd { +namespace action { +namespace import { + +struct ImportDiffContext { + librbd::Image *image; + int fd; + size_t size; + utils::ProgressContext pc; + OrderedThrottle throttle; + uint64_t last_offset; + + ImportDiffContext(librbd::Image *image, int fd, size_t size, bool no_progress) + : image(image), fd(fd), size(size), pc("Importing image diff", no_progress), + throttle((fd == STDIN_FILENO) ? 1 : + g_conf().get_val<uint64_t>("rbd_concurrent_management_ops"), + false), + last_offset(0) { + } + + void update_size(size_t new_size) + { + if (fd == STDIN_FILENO) { + size = new_size; + } + } + + void update_progress(uint64_t off) + { + if (size) { + pc.update_progress(off, size); + last_offset = off; + } + } + + void update_progress() + { + uint64_t off = last_offset; + if (fd != STDIN_FILENO) { + off = lseek(fd, 0, SEEK_CUR); + } + + update_progress(off); + } + + void finish(int r) + { + if (r < 0) { + pc.fail(); + } else { + pc.finish(); + } + } +}; + +class C_ImportDiff : public Context { +public: + C_ImportDiff(ImportDiffContext *idiffctx, bufferlist data, uint64_t offset, + uint64_t length, bool write_zeroes) + : m_idiffctx(idiffctx), m_data(data), m_offset(offset), m_length(length), + m_write_zeroes(write_zeroes) { + // use block offset (stdin) or import file position to report + // progress. + if (m_idiffctx->fd == STDIN_FILENO) { + m_prog_offset = offset; + } else { + m_prog_offset = lseek(m_idiffctx->fd, 0, SEEK_CUR); + } + } + + int send() + { + if (m_idiffctx->throttle.pending_error()) { + return m_idiffctx->throttle.wait_for_ret(); + } + + C_OrderedThrottle *ctx = m_idiffctx->throttle.start_op(this); + librbd::RBD::AioCompletion *aio_completion = + new librbd::RBD::AioCompletion(ctx, &utils::aio_context_callback); + + int r; + if (m_write_zeroes) { + r = m_idiffctx->image->aio_write_zeroes(m_offset, m_length, + aio_completion, 0U, + LIBRADOS_OP_FLAG_FADVISE_NOCACHE); + } else { + r = m_idiffctx->image->aio_write2(m_offset, m_length, m_data, + aio_completion, + LIBRADOS_OP_FLAG_FADVISE_NOCACHE); + } + + if (r < 0) { + aio_completion->release(); + ctx->complete(r); + } + + return r; + } + + void finish(int r) override + { + m_idiffctx->update_progress(m_prog_offset); + m_idiffctx->throttle.end_op(r); + } + +private: + ImportDiffContext *m_idiffctx; + bufferlist m_data; + uint64_t m_offset; + uint64_t m_length; + bool m_write_zeroes; + uint64_t m_prog_offset; +}; + +static int do_image_snap_from(ImportDiffContext *idiffctx) +{ + int r; + string from; + r = utils::read_string(idiffctx->fd, 4096, &from); // 4k limit to make sure we don't get a garbage string + if (r < 0) { + std::cerr << "rbd: failed to decode start snap name" << std::endl; + return r; + } + + bool exists; + r = idiffctx->image->snap_exists2(from.c_str(), &exists); + if (r < 0) { + std::cerr << "rbd: failed to query start snap state" << std::endl; + return r; + } + + if (!exists) { + std::cerr << "start snapshot '" << from + << "' does not exist in the image, aborting" << std::endl; + return -EINVAL; + } + + idiffctx->update_progress(); + return 0; +} + +static int do_image_snap_to(ImportDiffContext *idiffctx, std::string *tosnap) +{ + int r; + string to; + r = utils::read_string(idiffctx->fd, 4096, &to); // 4k limit to make sure we don't get a garbage string + if (r < 0) { + std::cerr << "rbd: failed to decode end snap name" << std::endl; + return r; + } + + bool exists; + r = idiffctx->image->snap_exists2(to.c_str(), &exists); + if (r < 0) { + std::cerr << "rbd: failed to query end snap state" << std::endl; + return r; + } + + if (exists) { + std::cerr << "end snapshot '" << to << "' already exists, aborting" + << std::endl; + return -EEXIST; + } + + *tosnap = to; + idiffctx->update_progress(); + + return 0; +} + +static int get_snap_protection_status(ImportDiffContext *idiffctx, + bool *is_protected) +{ + int r; + char buf[sizeof(__u8)]; + r = safe_read_exact(idiffctx->fd, buf, sizeof(buf)); + if (r < 0) { + std::cerr << "rbd: failed to decode snap protection status" << std::endl; + return r; + } + + *is_protected = (buf[0] != 0); + idiffctx->update_progress(); + + return 0; +} + +static int do_image_resize(ImportDiffContext *idiffctx) +{ + int r; + char buf[sizeof(uint64_t)]; + uint64_t end_size; + r = safe_read_exact(idiffctx->fd, buf, sizeof(buf)); + if (r < 0) { + std::cerr << "rbd: failed to decode image size" << std::endl; + return r; + } + + bufferlist bl; + bl.append(buf, sizeof(buf)); + auto p = bl.cbegin(); + decode(end_size, p); + + uint64_t cur_size; + idiffctx->image->size(&cur_size); + if (cur_size != end_size) { + idiffctx->image->resize(end_size); + } + + idiffctx->update_size(end_size); + idiffctx->update_progress(); + return 0; +} + +static int do_image_io(ImportDiffContext *idiffctx, bool write_zeroes, + size_t sparse_size) +{ + int r; + char buf[16]; + r = safe_read_exact(idiffctx->fd, buf, sizeof(buf)); + if (r < 0) { + std::cerr << "rbd: failed to decode IO length" << std::endl; + return r; + } + + bufferlist bl; + bl.append(buf, sizeof(buf)); + auto p = bl.cbegin(); + + uint64_t image_offset, buffer_length; + decode(image_offset, p); + decode(buffer_length, p); + + if (!write_zeroes) { + bufferptr bp = buffer::create(buffer_length); + r = safe_read_exact(idiffctx->fd, bp.c_str(), buffer_length); + if (r < 0) { + std::cerr << "rbd: failed to decode write data" << std::endl; + return r; + } + + size_t buffer_offset = 0; + while (buffer_offset < buffer_length) { + size_t write_length = 0; + bool zeroed = false; + utils::calc_sparse_extent(bp, sparse_size, buffer_offset, buffer_length, + &write_length, &zeroed); + ceph_assert(write_length > 0); + + bufferlist write_bl; + if (!zeroed) { + bufferptr write_ptr(bp, buffer_offset, write_length); + write_bl.push_back(write_ptr); + ceph_assert(write_bl.length() == write_length); + } + + C_ImportDiff *ctx = new C_ImportDiff(idiffctx, write_bl, + image_offset + buffer_offset, + write_length, zeroed); + r = ctx->send(); + if (r < 0) { + return r; + } + + buffer_offset += write_length; + } + } else { + bufferlist data; + C_ImportDiff *ctx = new C_ImportDiff(idiffctx, data, image_offset, + buffer_length, true); + return ctx->send(); + } + return r; +} + +static int validate_banner(int fd, std::string banner) +{ + int r; + char buf[banner.size() + 1]; + memset(buf, 0, sizeof(buf)); + r = safe_read_exact(fd, buf, banner.size()); + if (r < 0) { + std::cerr << "rbd: failed to decode diff banner" << std::endl; + return r; + } + + buf[banner.size()] = '\0'; + if (strcmp(buf, banner.c_str())) { + std::cerr << "rbd: invalid or unexpected diff banner" << std::endl; + return -EINVAL; + } + + return 0; +} + +static int skip_tag(int fd, uint64_t length) +{ + int r; + + if (fd == STDIN_FILENO) { + // read the appending data out to skip this tag. + char buf[4096]; + uint64_t len = std::min<uint64_t>(length, sizeof(buf)); + while (len > 0) { + r = safe_read_exact(fd, buf, len); + if (r < 0) { + std::cerr << "rbd: failed to decode skipped tag data" << std::endl; + return r; + } + length -= len; + len = std::min<uint64_t>(length, sizeof(buf)); + } + } else { + // lseek to skip this tag + off64_t offs = lseek64(fd, length, SEEK_CUR); + if (offs < 0) { + return -errno; + } + } + + return 0; +} + +static int read_tag(int fd, __u8 end_tag, int format, __u8 *tag, uint64_t *readlen) +{ + int r; + __u8 read_tag; + + r = safe_read_exact(fd, &read_tag, sizeof(read_tag)); + if (r < 0) { + std::cerr << "rbd: failed to decode tag" << std::endl; + return r; + } + + *tag = read_tag; + if (read_tag != end_tag && format == 2) { + char buf[sizeof(uint64_t)]; + r = safe_read_exact(fd, buf, sizeof(buf)); + if (r < 0) { + std::cerr << "rbd: failed to decode tag length" << std::endl; + return r; + } + + bufferlist bl; + bl.append(buf, sizeof(buf)); + auto p = bl.cbegin(); + decode(*readlen, p); + } + + return 0; +} + +int do_import_diff_fd(librados::Rados &rados, librbd::Image &image, int fd, + bool no_progress, int format, size_t sparse_size) +{ + int r; + + uint64_t size = 0; + bool from_stdin = (fd == STDIN_FILENO); + if (!from_stdin) { + struct stat stat_buf; + r = ::fstat(fd, &stat_buf); + if (r < 0) { + std::cerr << "rbd: failed to stat specified diff file" << std::endl; + return r; + } + size = (uint64_t)stat_buf.st_size; + } + + r = validate_banner(fd, (format == 1 ? utils::RBD_DIFF_BANNER : + utils::RBD_DIFF_BANNER_V2)); + if (r < 0) { + return r; + } + + // begin image import + std::string tosnap; + bool is_protected = false; + ImportDiffContext idiffctx(&image, fd, size, no_progress); + while (r == 0) { + __u8 tag; + uint64_t length = 0; + + r = read_tag(fd, RBD_DIFF_END, format, &tag, &length); + if (r < 0 || tag == RBD_DIFF_END) { + break; + } + + if (tag == RBD_DIFF_FROM_SNAP) { + r = do_image_snap_from(&idiffctx); + } else if (tag == RBD_DIFF_TO_SNAP) { + r = do_image_snap_to(&idiffctx, &tosnap); + } else if (tag == RBD_SNAP_PROTECTION_STATUS) { + r = get_snap_protection_status(&idiffctx, &is_protected); + } else if (tag == RBD_DIFF_IMAGE_SIZE) { + r = do_image_resize(&idiffctx); + } else if (tag == RBD_DIFF_WRITE || tag == RBD_DIFF_ZERO) { + r = do_image_io(&idiffctx, (tag == RBD_DIFF_ZERO), sparse_size); + } else { + std::cerr << "unrecognized tag byte " << (int)tag << " in stream; skipping" + << std::endl; + r = skip_tag(fd, length); + } + } + + int temp_r = idiffctx.throttle.wait_for_ret(); + r = (r < 0) ? r : temp_r; // preserve original error + if (r == 0 && tosnap.length()) { + r = idiffctx.image->snap_create(tosnap.c_str()); + if (r == 0 && is_protected) { + r = idiffctx.image->snap_protect(tosnap.c_str()); + } + } + + idiffctx.finish(r); + return r; +} + +int do_import_diff(librados::Rados &rados, librbd::Image &image, + const char *path, bool no_progress, size_t sparse_size) +{ + int r; + int fd; + + if (strcmp(path, "-") == 0) { + fd = STDIN_FILENO; + } else { + fd = open(path, O_RDONLY|O_BINARY); + if (fd < 0) { + r = -errno; + std::cerr << "rbd: error opening " << path << std::endl; + return r; + } + } + r = do_import_diff_fd(rados, image, fd, no_progress, 1, sparse_size); + + if (fd != 0) + close(fd); + return r; +} + +namespace at = argument_types; +namespace po = boost::program_options; + +void get_arguments_diff(po::options_description *positional, + po::options_description *options) { + at::add_path_options(positional, options, + "import file (or '-' for stdin)"); + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + at::add_sparse_size_option(options); + at::add_no_progress_option(options); +} + +int execute_diff(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string path; + size_t arg_index = 0; + int r = utils::get_path(vm, &arg_index, &path); + if (r < 0) { + return r; + } + + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + size_t sparse_size = utils::RBD_DEFAULT_SPARSE_SIZE; + if (vm.count(at::IMAGE_SPARSE_SIZE)) { + sparse_size = vm[at::IMAGE_SPARSE_SIZE].as<size_t>(); + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "", + false, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + r = do_import_diff(rados, image, path.c_str(), + vm[at::NO_PROGRESS].as<bool>(), sparse_size); + if (r == -EDOM) { + r = -EBADMSG; + } + if (r < 0) { + cerr << "rbd: import-diff failed: " << cpp_strerror(r) << std::endl; + return r; + } + return 0; +} + +Shell::Action action_diff( + {"import-diff"}, {}, "Import an incremental diff.", "", &get_arguments_diff, + &execute_diff); + +class C_Import : public Context { +public: + C_Import(SimpleThrottle &simple_throttle, librbd::Image &image, + bufferlist &bl, uint64_t offset) + : m_throttle(simple_throttle), m_image(image), + m_aio_completion( + new librbd::RBD::AioCompletion(this, &utils::aio_context_callback)), + m_bufferlist(bl), m_offset(offset) + { + } + + void send() + { + m_throttle.start_op(); + + int op_flags = LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL | + LIBRADOS_OP_FLAG_FADVISE_NOCACHE; + int r = m_image.aio_write2(m_offset, m_bufferlist.length(), m_bufferlist, + m_aio_completion, op_flags); + if (r < 0) { + std::cerr << "rbd: error requesting write to destination image" + << std::endl; + m_aio_completion->release(); + m_throttle.end_op(r); + } + } + + void finish(int r) override + { + if (r < 0) { + std::cerr << "rbd: error writing to destination image at offset " + << m_offset << ": " << cpp_strerror(r) << std::endl; + } + m_throttle.end_op(r); + } + +private: + SimpleThrottle &m_throttle; + librbd::Image &m_image; + librbd::RBD::AioCompletion *m_aio_completion; + bufferlist m_bufferlist; + uint64_t m_offset; +}; + +static int decode_and_set_image_option(int fd, uint64_t imageopt, librbd::ImageOptions& opts) +{ + int r; + char buf[sizeof(uint64_t)]; + + r = safe_read_exact(fd, buf, sizeof(buf)); + if (r < 0) { + std::cerr << "rbd: failed to decode image option" << std::endl; + return r; + } + + bufferlist bl; + bl.append(buf, sizeof(buf)); + auto it = bl.cbegin(); + + uint64_t val; + decode(val, it); + + if (opts.get(imageopt, &val) != 0) { + opts.set(imageopt, val); + } + + return 0; +} + +static int do_import_metadata(int import_format, librbd::Image& image, + const std::map<std::string, std::string> &imagemetas) +{ + int r = 0; + + //v1 format + if (import_format == 1) { + return 0; + } + + for (std::map<std::string, std::string>::const_iterator it = imagemetas.begin(); + it != imagemetas.end(); ++it) { + r = image.metadata_set(it->first, it->second); + if (r < 0) + return r; + } + + return 0; +} + +static int decode_imagemeta(int fd, uint64_t length, std::map<std::string, std::string>* imagemetas) +{ + int r; + string key; + string value; + + r = utils::read_string(fd, length, &key); + if (r < 0) { + std::cerr << "rbd: failed to decode metadata key" << std::endl; + return r; + } + + r = utils::read_string(fd, length, &value); + if (r < 0) { + std::cerr << "rbd: failed to decode metadata value" << std::endl; + return r; + } + + (*imagemetas)[key] = value; + return 0; +} + +static int do_import_header(int fd, int import_format, librbd::ImageOptions& opts, + std::map<std::string, std::string>* imagemetas) +{ + // There is no header in v1 image. + if (import_format == 1) { + return 0; + } + + int r; + r = validate_banner(fd, utils::RBD_IMAGE_BANNER_V2); + if (r < 0) { + return r; + } + + // As V1 format for image is already deprecated, import image in V2 by default. + uint64_t image_format = 2; + if (opts.get(RBD_IMAGE_OPTION_FORMAT, &image_format) != 0) { + opts.set(RBD_IMAGE_OPTION_FORMAT, image_format); + } + + while (r == 0) { + __u8 tag; + uint64_t length = 0; + r = read_tag(fd, RBD_EXPORT_IMAGE_END, image_format, &tag, &length); + if (r < 0 || tag == RBD_EXPORT_IMAGE_END) { + break; + } + + if (tag == RBD_EXPORT_IMAGE_ORDER) { + r = decode_and_set_image_option(fd, RBD_IMAGE_OPTION_ORDER, opts); + } else if (tag == RBD_EXPORT_IMAGE_FEATURES) { + r = decode_and_set_image_option(fd, RBD_IMAGE_OPTION_FEATURES, opts); + } else if (tag == RBD_EXPORT_IMAGE_STRIPE_UNIT) { + r = decode_and_set_image_option(fd, RBD_IMAGE_OPTION_STRIPE_UNIT, opts); + } else if (tag == RBD_EXPORT_IMAGE_STRIPE_COUNT) { + r = decode_and_set_image_option(fd, RBD_IMAGE_OPTION_STRIPE_COUNT, opts); + } else if (tag == RBD_EXPORT_IMAGE_META) { + r = decode_imagemeta(fd, length, imagemetas); + } else { + std::cerr << "rbd: invalid tag in image properties zone: " << tag << "Skip it." + << std::endl; + r = skip_tag(fd, length); + } + } + + return r; +} + +static int do_import_v2(librados::Rados &rados, int fd, librbd::Image &image, + uint64_t size, size_t imgblklen, + utils::ProgressContext &pc, size_t sparse_size) +{ + int r = 0; + r = validate_banner(fd, utils::RBD_IMAGE_DIFFS_BANNER_V2); + if (r < 0) { + return r; + } + + char buf[sizeof(uint64_t)]; + r = safe_read_exact(fd, buf, sizeof(buf)); + if (r < 0) { + std::cerr << "rbd: failed to decode diff count" << std::endl; + return r; + } + bufferlist bl; + bl.append(buf, sizeof(buf)); + auto p = bl.cbegin(); + uint64_t diff_num; + decode(diff_num, p); + for (size_t i = 0; i < diff_num; i++) { + r = do_import_diff_fd(rados, image, fd, true, 2, sparse_size); + if (r < 0) { + pc.fail(); + std::cerr << "rbd: import-diff failed: " << cpp_strerror(r) << std::endl; + return r; + } + pc.update_progress(i + 1, diff_num); + } + + return r; +} + +static int do_import_v1(int fd, librbd::Image &image, uint64_t size, + size_t imgblklen, utils::ProgressContext &pc, + size_t sparse_size) +{ + int r = 0; + size_t reqlen = imgblklen; // amount requested from read + ssize_t readlen; // amount received from one read + size_t blklen = 0; // amount accumulated from reads to fill blk + char *p = new char[imgblklen]; + uint64_t image_pos = 0; + bool from_stdin = (fd == STDIN_FILENO); + boost::scoped_ptr<SimpleThrottle> throttle; + + if (from_stdin) { + throttle.reset(new SimpleThrottle(1, false)); + } else { + throttle.reset(new SimpleThrottle( + g_conf().get_val<uint64_t>("rbd_concurrent_management_ops"), false)); + } + + reqlen = std::min<uint64_t>(reqlen, size); + // loop body handles 0 return, as we may have a block to flush + while ((readlen = ::read(fd, p + blklen, reqlen)) >= 0) { + if (throttle->pending_error()) { + break; + } + + blklen += readlen; + // if read was short, try again to fill the block before writing + if (readlen && ((size_t)readlen < reqlen)) { + reqlen -= readlen; + continue; + } + if (!from_stdin) + pc.update_progress(image_pos, size); + + bufferptr blkptr(p, blklen); + // resize output image by binary expansion as we go for stdin + if (from_stdin && (image_pos + (size_t)blklen) > size) { + size *= 2; + r = image.resize(size); + if (r < 0) { + std::cerr << "rbd: can't resize image during import" << std::endl; + goto out; + } + } + + // write as much as we got; perhaps less than imgblklen + // but skip writing zeros to create sparse images + size_t buffer_offset = 0; + while (buffer_offset < blklen) { + size_t write_length = 0; + bool zeroed = false; + utils::calc_sparse_extent(blkptr, sparse_size, buffer_offset, blklen, + &write_length, &zeroed); + + if (!zeroed) { + bufferlist write_bl; + bufferptr write_ptr(blkptr, buffer_offset, write_length); + write_bl.push_back(write_ptr); + ceph_assert(write_bl.length() == write_length); + + C_Import *ctx = new C_Import(*throttle, image, write_bl, + image_pos + buffer_offset); + ctx->send(); + } + + buffer_offset += write_length; + } + + // done with whole block, whether written or not + image_pos += blklen; + if (!from_stdin && image_pos >= size) + break; + // if read had returned 0, we're at EOF and should quit + if (readlen == 0) + break; + blklen = 0; + reqlen = imgblklen; + } + r = throttle->wait_for_ret(); + if (r < 0) { + goto out; + } + + if (fd == STDIN_FILENO) { + r = image.resize(image_pos); + if (r < 0) { + std::cerr << "rbd: final image resize failed" << std::endl; + goto out; + } + } +out: + delete[] p; + return r; +} + +static int do_import(librados::Rados &rados, librbd::RBD &rbd, + librados::IoCtx& io_ctx, const char *imgname, + const char *path, librbd::ImageOptions& opts, + bool no_progress, int import_format, size_t sparse_size) +{ + int fd, r; + struct stat stat_buf; + utils::ProgressContext pc("Importing image", no_progress); + std::map<std::string, std::string> imagemetas; + + ceph_assert(imgname); + + uint64_t order; + if (opts.get(RBD_IMAGE_OPTION_ORDER, &order) != 0) { + order = g_conf().get_val<uint64_t>("rbd_default_order"); + } + + // try to fill whole imgblklen blocks for sparsification + size_t imgblklen = 1 << order; + librbd::Image image; + uint64_t size = 0; + + bool from_stdin = !strcmp(path, "-"); + if (from_stdin) { + fd = STDIN_FILENO; + size = 1ULL << order; + } else { + if ((fd = open(path, O_RDONLY|O_BINARY)) < 0) { + r = -errno; + std::cerr << "rbd: error opening " << path << std::endl; + goto done2; + } + + if ((fstat(fd, &stat_buf)) < 0) { + r = -errno; + std::cerr << "rbd: stat error " << path << std::endl; + goto done; + } + if (S_ISDIR(stat_buf.st_mode)) { + r = -EISDIR; + std::cerr << "rbd: cannot import a directory" << std::endl; + goto done; + } + if (stat_buf.st_size) + size = (uint64_t)stat_buf.st_size; + + if (!size) { + int64_t bdev_size = 0; + BlkDev blkdev(fd); + r = blkdev.get_size(&bdev_size); + if (r < 0) { + std::cerr << "rbd: unable to get size of file/block device" + << std::endl; + goto done; + } + ceph_assert(bdev_size >= 0); + size = (uint64_t) bdev_size; + } +#ifdef HAVE_POSIX_FADVISE + posix_fadvise(fd, 0, 0, POSIX_FADV_SEQUENTIAL); +#endif + } + + r = do_import_header(fd, import_format, opts, &imagemetas); + if (r < 0) { + std::cerr << "rbd: import header failed." << std::endl; + goto done; + } + + r = rbd.create4(io_ctx, imgname, size, opts); + if (r < 0) { + std::cerr << "rbd: image creation failed" << std::endl; + goto done; + } + + r = rbd.open(io_ctx, image, imgname); + if (r < 0) { + std::cerr << "rbd: failed to open image" << std::endl; + goto err; + } + + r = do_import_metadata(import_format, image, imagemetas); + if (r < 0) { + std::cerr << "rbd: failed to import image-meta" << std::endl; + goto err; + } + + if (import_format == 1) { + r = do_import_v1(fd, image, size, imgblklen, pc, sparse_size); + } else { + r = do_import_v2(rados, fd, image, size, imgblklen, pc, sparse_size); + } + if (r < 0) { + std::cerr << "rbd: failed to import image" << std::endl; + image.close(); + goto err; + } + + r = image.close(); +err: + if (r < 0) + rbd.remove(io_ctx, imgname); +done: + if (r < 0) + pc.fail(); + else + pc.finish(); + if (!from_stdin) + close(fd); +done2: + return r; +} + +void get_arguments(po::options_description *positional, + po::options_description *options) { + at::add_path_options(positional, options, + "import file (or '-' for stdin)"); + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_DEST); + at::add_create_image_options(options, true); + at::add_sparse_size_option(options); + at::add_no_progress_option(options); + at::add_export_format_option(options); + + // TODO legacy rbd allowed import to accept both 'image'/'dest' and + // 'pool'/'dest-pool' + at::add_pool_option(options, at::ARGUMENT_MODIFIER_NONE, " deprecated[:dest-pool]"); + at::add_image_option(options, at::ARGUMENT_MODIFIER_NONE, " deprecated[:dest]"); +} + +int execute(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string path; + size_t arg_index = 0; + int r = utils::get_path(vm, &arg_index, &path); + if (r < 0) { + return r; + } + + // odd check to support legacy / deprecated behavior of import + std::string deprecated_pool_name; + if (vm.count(at::POOL_NAME)) { + deprecated_pool_name = vm[at::POOL_NAME].as<std::string>(); + } + + std::string deprecated_image_name; + if (vm.count(at::IMAGE_NAME)) { + deprecated_image_name = vm[at::IMAGE_NAME].as<std::string>(); + } else { + deprecated_image_name = path.substr(path.find_last_of("/\\") + 1); + } + + std::string deprecated_snap_name; + r = utils::extract_spec(deprecated_image_name, &deprecated_pool_name, + nullptr, &deprecated_image_name, + &deprecated_snap_name, utils::SPEC_VALIDATION_FULL); + if (r < 0) { + return r; + } + + size_t sparse_size = utils::RBD_DEFAULT_SPARSE_SIZE; + if (vm.count(at::IMAGE_SPARSE_SIZE)) { + sparse_size = vm[at::IMAGE_SPARSE_SIZE].as<size_t>(); + } + + std::string pool_name = deprecated_pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name = deprecated_snap_name; + r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_DEST, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, false, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_FULL); + if (r < 0) { + return r; + } + + if (image_name.empty()) { + image_name = deprecated_image_name; + } + + librbd::ImageOptions opts; + r = utils::get_image_options(vm, true, &opts); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + int format = 1; + if (vm.count("export-format")) + format = vm["export-format"].as<uint64_t>(); + + librbd::RBD rbd; + r = do_import(rados, rbd, io_ctx, image_name.c_str(), path.c_str(), + opts, vm[at::NO_PROGRESS].as<bool>(), format, sparse_size); + if (r < 0) { + std::cerr << "rbd: import failed: " << cpp_strerror(r) << std::endl; + return r; + } + + return 0; +} + +Shell::Action action( + {"import"}, {}, "Import image from file.", at::get_long_features_help(), + &get_arguments, &execute); + +} // namespace import +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Info.cc b/src/tools/rbd/action/Info.cc new file mode 100644 index 000000000..f8d053cd7 --- /dev/null +++ b/src/tools/rbd/action/Info.cc @@ -0,0 +1,471 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "include/types.h" +#include "include/stringify.h" +#include "common/errno.h" +#include "common/Formatter.h" +#include <iostream> +#include <boost/program_options.hpp> + +#include "common/Clock.h" + +namespace rbd { +namespace action { +namespace info { + +namespace at = argument_types; +namespace po = boost::program_options; + +static void format_bitmask(Formatter *f, const std::string &name, + const std::map<uint64_t, std::string>& mapping, + uint64_t bitmask) +{ + int count = 0; + std::string group_name(name + "s"); + if (f == NULL) { + std::cout << "\t" << group_name << ": "; + } else { + f->open_array_section(group_name.c_str()); + } + for (std::map<uint64_t, std::string>::const_iterator it = mapping.begin(); + it != mapping.end(); ++it) { + if ((it->first & bitmask) == 0) { + continue; + } + + if (f == NULL) { + if (count++ > 0) { + std::cout << ", "; + } + std::cout << it->second; + } else { + f->dump_string(name.c_str(), it->second); + } + } + if (f == NULL) { + std::cout << std::endl; + } else { + f->close_section(); + } +} + +static void format_features(Formatter *f, uint64_t features) +{ + format_bitmask(f, "feature", at::ImageFeatures::FEATURE_MAPPING, features); +} + +static void format_op_features(Formatter *f, uint64_t op_features) +{ + static std::map<uint64_t, std::string> mapping = { + {RBD_OPERATION_FEATURE_CLONE_PARENT, RBD_OPERATION_FEATURE_NAME_CLONE_PARENT}, + {RBD_OPERATION_FEATURE_CLONE_CHILD, RBD_OPERATION_FEATURE_NAME_CLONE_CHILD}, + {RBD_OPERATION_FEATURE_GROUP, RBD_OPERATION_FEATURE_NAME_GROUP}, + {RBD_OPERATION_FEATURE_SNAP_TRASH, RBD_OPERATION_FEATURE_NAME_SNAP_TRASH}}; + format_bitmask(f, "op_feature", mapping, op_features); +} + +static void format_flags(Formatter *f, uint64_t flags) +{ + std::map<uint64_t, std::string> mapping = { + {RBD_FLAG_OBJECT_MAP_INVALID, "object map invalid"}, + {RBD_FLAG_FAST_DIFF_INVALID, "fast diff invalid"}}; + format_bitmask(f, "flag", mapping, flags); +} + +void format_timestamp(struct timespec timestamp, std::string ×tamp_str) { + if(timestamp.tv_sec != 0) { + time_t ts = timestamp.tv_sec; + timestamp_str = ctime(&ts); + timestamp_str = timestamp_str.substr(0, timestamp_str.length() - 1); + } +} + +static int do_show_info(librados::IoCtx &io_ctx, librbd::Image& image, + const std::string &snapname, Formatter *f) +{ + librbd::image_info_t info; + uint8_t old_format; + uint64_t overlap, features, flags, snap_limit; + bool snap_protected = false; + librbd::mirror_image_info_t mirror_image; + librbd::mirror_image_mode_t mirror_mode = RBD_MIRROR_IMAGE_MODE_JOURNAL; + std::vector<librbd::snap_info_t> snaps; + int r; + + std::string imgname; + r = image.get_name(&imgname); + if (r < 0) + return r; + + r = image.snap_list(snaps); + if (r < 0) + return r; + + r = image.stat(info, sizeof(info)); + if (r < 0) + return r; + + r = image.old_format(&old_format); + if (r < 0) + return r; + + std::string imgid; + if (!old_format) { + r = image.get_id(&imgid); + if (r < 0) + return r; + } + + std::string data_pool; + if (!old_format) { + int64_t data_pool_id = image.get_data_pool_id(); + if (data_pool_id != io_ctx.get_id()) { + librados::Rados rados(io_ctx); + librados::IoCtx data_io_ctx; + r = rados.ioctx_create2(data_pool_id, data_io_ctx); + if (r < 0) { + data_pool = "<missing data pool " + stringify(data_pool_id) + ">"; + } else { + data_pool = data_io_ctx.get_pool_name(); + } + } + } + + r = image.overlap(&overlap); + if (r < 0) + return r; + + r = image.features(&features); + if (r < 0) + return r; + + uint64_t op_features; + r = image.get_op_features(&op_features); + if (r < 0) { + return r; + } + + r = image.get_flags(&flags); + if (r < 0) { + return r; + } + + if (!snapname.empty()) { + r = image.snap_is_protected(snapname.c_str(), &snap_protected); + if (r < 0) + return r; + } + + mirror_image.state = RBD_MIRROR_IMAGE_DISABLED; + r = image.mirror_image_get_info(&mirror_image, sizeof(mirror_image)); + if (r < 0) { + return r; + } + + if (mirror_image.state != RBD_MIRROR_IMAGE_DISABLED) { + r = image.mirror_image_get_mode(&mirror_mode); + if (r < 0) { + return r; + } + } + + r = image.snap_get_limit(&snap_limit); + if (r < 0) + return r; + + std::string prefix = image.get_block_name_prefix(); + + librbd::group_info_t group_info; + r = image.get_group(&group_info, sizeof(group_info)); + if (r < 0) { + return r; + } + + std::string group_string = ""; + if (RBD_GROUP_INVALID_POOL != group_info.pool) { + std::string group_pool; + librados::Rados rados(io_ctx); + librados::IoCtx group_io_ctx; + r = rados.ioctx_create2(group_info.pool, group_io_ctx); + if (r < 0) { + group_pool = "<missing group pool " + stringify(group_info.pool) + ">"; + } else { + group_pool = group_io_ctx.get_pool_name(); + } + + group_string = group_pool + "/"; + if (!io_ctx.get_namespace().empty()) { + group_string += io_ctx.get_namespace() + "/"; + } + group_string += group_info.name; + } + + struct timespec create_timestamp; + image.get_create_timestamp(&create_timestamp); + + std::string create_timestamp_str = ""; + format_timestamp(create_timestamp, create_timestamp_str); + + struct timespec access_timestamp; + image.get_access_timestamp(&access_timestamp); + + std::string access_timestamp_str = ""; + format_timestamp(access_timestamp, access_timestamp_str); + + struct timespec modify_timestamp; + image.get_modify_timestamp(&modify_timestamp); + + std::string modify_timestamp_str = ""; + format_timestamp(modify_timestamp, modify_timestamp_str); + + if (f) { + f->open_object_section("image"); + f->dump_string("name", imgname); + f->dump_string("id", imgid); + f->dump_unsigned("size", info.size); + f->dump_unsigned("objects", info.num_objs); + f->dump_int("order", info.order); + f->dump_unsigned("object_size", info.obj_size); + f->dump_int("snapshot_count", snaps.size()); + if (!data_pool.empty()) { + f->dump_string("data_pool", data_pool); + } + f->dump_string("block_name_prefix", prefix); + f->dump_int("format", (old_format ? 1 : 2)); + } else { + std::cout << "rbd image '" << imgname << "':\n" + << "\tsize " << byte_u_t(info.size) << " in " + << info.num_objs << " objects" + << std::endl + << "\torder " << info.order + << " (" << byte_u_t(info.obj_size) << " objects)" + << std::endl + << "\tsnapshot_count: " << snaps.size() + << std::endl; + if (!imgid.empty()) { + std::cout << "\tid: " << imgid << std::endl; + } + if (!data_pool.empty()) { + std::cout << "\tdata_pool: " << data_pool << std::endl; + } + std::cout << "\tblock_name_prefix: " << prefix + << std::endl + << "\tformat: " << (old_format ? "1" : "2") + << std::endl; + } + + if (!old_format) { + format_features(f, features); + format_op_features(f, op_features); + format_flags(f, flags); + } + + if (!group_string.empty()) { + if (f) { + f->dump_string("group", group_string); + } else { + std::cout << "\tgroup: " << group_string + << std::endl; + } + } + + if (!create_timestamp_str.empty()) { + if (f) { + f->dump_string("create_timestamp", create_timestamp_str); + } else { + std::cout << "\tcreate_timestamp: " << create_timestamp_str + << std::endl; + } + } + + if (!access_timestamp_str.empty()) { + if (f) { + f->dump_string("access_timestamp", access_timestamp_str); + } else { + std::cout << "\taccess_timestamp: " << access_timestamp_str + << std::endl; + } + } + + if (!modify_timestamp_str.empty()) { + if (f) { + f->dump_string("modify_timestamp", modify_timestamp_str); + } else { + std::cout << "\tmodify_timestamp: " << modify_timestamp_str + << std::endl; + } + } + + // snapshot info, if present + if (!snapname.empty()) { + if (f) { + f->dump_string("protected", snap_protected ? "true" : "false"); + } else { + std::cout << "\tprotected: " << (snap_protected ? "True" : "False") + << std::endl; + } + } + + if (snap_limit < UINT64_MAX) { + if (f) { + f->dump_unsigned("snapshot_limit", snap_limit); + } else { + std::cout << "\tsnapshot_limit: " << snap_limit << std::endl; + } + } + + // parent info, if present + librbd::linked_image_spec_t parent_image_spec; + librbd::snap_spec_t parent_snap_spec; + if ((image.get_parent(&parent_image_spec, &parent_snap_spec) == 0) && + (parent_image_spec.image_name.length() > 0)) { + if (f) { + f->open_object_section("parent"); + f->dump_string("pool", parent_image_spec.pool_name); + f->dump_string("pool_namespace", parent_image_spec.pool_namespace); + f->dump_string("image", parent_image_spec.image_name); + f->dump_string("id", parent_image_spec.image_id); + f->dump_string("snapshot", parent_snap_spec.name); + f->dump_bool("trash", parent_image_spec.trash); + f->dump_unsigned("overlap", overlap); + f->close_section(); + } else { + std::cout << "\tparent: " << parent_image_spec.pool_name << "/"; + if (!parent_image_spec.pool_namespace.empty()) { + std::cout << parent_image_spec.pool_namespace << "/"; + } + std::cout << parent_image_spec.image_name << "@" + << parent_snap_spec.name; + if (parent_image_spec.trash) { + std::cout << " (trash " << parent_image_spec.image_id << ")"; + } + std::cout << std::endl; + std::cout << "\toverlap: " << byte_u_t(overlap) << std::endl; + } + } + + // striping info, if feature is set + if (features & RBD_FEATURE_STRIPINGV2) { + if (f) { + f->dump_unsigned("stripe_unit", image.get_stripe_unit()); + f->dump_unsigned("stripe_count", image.get_stripe_count()); + } else { + std::cout << "\tstripe unit: " << byte_u_t(image.get_stripe_unit()) + << std::endl + << "\tstripe count: " << image.get_stripe_count() << std::endl; + } + } + + if (features & RBD_FEATURE_JOURNALING) { + if (f) { + f->dump_string("journal", utils::image_id(image)); + } else { + std::cout << "\tjournal: " << utils::image_id(image) << std::endl; + } + } + + if (features & RBD_FEATURE_JOURNALING || + mirror_image.state != RBD_MIRROR_IMAGE_DISABLED) { + if (f) { + f->open_object_section("mirroring"); + f->dump_string("mode", + utils::mirror_image_mode(mirror_mode)); + f->dump_string("state", + utils::mirror_image_state(mirror_image.state)); + if (mirror_image.state != RBD_MIRROR_IMAGE_DISABLED) { + f->dump_string("global_id", mirror_image.global_id); + f->dump_bool("primary", mirror_image.primary); + } + f->close_section(); + } else { + std::cout << "\tmirroring state: " + << utils::mirror_image_state(mirror_image.state) << std::endl; + if (mirror_image.state != RBD_MIRROR_IMAGE_DISABLED) { + std::cout << "\tmirroring mode: " + << utils::mirror_image_mode(mirror_mode) << std::endl + << "\tmirroring global id: " << mirror_image.global_id + << std::endl + << "\tmirroring primary: " + << (mirror_image.primary ? "true" : "false") <<std::endl; + } + } + } + + if (f) { + f->close_section(); + f->flush(std::cout); + } + + return 0; +} + +void get_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_or_snap_spec_options(positional, options, + at::ARGUMENT_MODIFIER_NONE); + at::add_image_id_option(options); + at::add_format_options(options); +} + +int execute(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + std::string image_id; + + if (vm.count(at::IMAGE_ID)) { + image_id = vm[at::IMAGE_ID].as<std::string>(); + } + + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, image_id.empty(), + utils::SNAPSHOT_PRESENCE_PERMITTED, utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + if (!image_id.empty() && !image_name.empty()) { + std::cerr << "rbd: trying to access image using both name and id. " + << std::endl; + return -EINVAL; + } + + at::Format::Formatter formatter; + r = utils::get_formatter(vm, &formatter); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, + image_id, snap_name, true, &rados, &io_ctx, + &image); + if (r < 0) { + return r; + } + + r = do_show_info(io_ctx, image, snap_name, formatter.get()); + if (r < 0) { + std::cerr << "rbd: info: " << cpp_strerror(r) << std::endl; + return r; + } + return 0; +} + +Shell::Action action( + {"info"}, {}, "Show information about image size, striping, etc.", "", + &get_arguments, &execute); + +} // namespace info +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Journal.cc b/src/tools/rbd/action/Journal.cc new file mode 100644 index 000000000..08606fcc3 --- /dev/null +++ b/src/tools/rbd/action/Journal.cc @@ -0,0 +1,1251 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "common/Cond.h" +#include "common/Formatter.h" +#include "common/ceph_json.h" +#include "common/errno.h" +#include "common/safe_io.h" +#include "include/stringify.h" +#include <fstream> +#include <sstream> +#include <boost/program_options.hpp> +#include "cls/rbd/cls_rbd_client.h" +#include "cls/journal/cls_journal_types.h" +#include "cls/journal/cls_journal_client.h" + +#include "journal/Journaler.h" +#include "journal/ReplayEntry.h" +#include "journal/ReplayHandler.h" +#include "journal/Settings.h" +#include "librbd/journal/Types.h" + +namespace rbd { +namespace action { +namespace journal { + +namespace at = argument_types; +namespace po = boost::program_options; + +static const std::string JOURNAL_SPEC("journal-spec"); +static const std::string JOURNAL_NAME("journal"); +static const std::string DEST_JOURNAL_NAME("dest-journal"); + +void add_journal_option(po::options_description *opt, + at::ArgumentModifier modifier) { + std::string name = JOURNAL_NAME; + std::string description = at::get_description_prefix(modifier) + + "journal name"; + switch (modifier) { + case at::ARGUMENT_MODIFIER_NONE: + case at::ARGUMENT_MODIFIER_SOURCE: + break; + case at::ARGUMENT_MODIFIER_DEST: + name = DEST_JOURNAL_NAME; + break; + } + + // TODO add validator + opt->add_options() + (name.c_str(), po::value<std::string>(), description.c_str()); +} + +void add_journal_spec_options(po::options_description *pos, + po::options_description *opt, + at::ArgumentModifier modifier) { + + pos->add_options() + ((get_name_prefix(modifier) + JOURNAL_SPEC).c_str(), + (get_description_prefix(modifier) + "journal specification\n" + + "(example: [<pool-name>/[<namespace>/]]<journal-name>)").c_str()); + add_pool_option(opt, modifier); + add_namespace_option(opt, modifier); + add_image_option(opt, modifier); + add_journal_option(opt, modifier); +} + +int get_pool_journal_names(const po::variables_map &vm, + at::ArgumentModifier mod, + size_t *spec_arg_index, + std::string *pool_name, + std::string *namespace_name, + std::string *journal_name) { + std::string pool_key = (mod == at::ARGUMENT_MODIFIER_DEST ? + at::DEST_POOL_NAME : at::POOL_NAME); + std::string namespace_key = (mod == at::ARGUMENT_MODIFIER_DEST ? + at::DEST_NAMESPACE_NAME : at::NAMESPACE_NAME); + std::string image_key = (mod == at::ARGUMENT_MODIFIER_DEST ? + at::DEST_IMAGE_NAME : at::IMAGE_NAME); + std::string journal_key = (mod == at::ARGUMENT_MODIFIER_DEST ? + DEST_JOURNAL_NAME : JOURNAL_NAME); + + if (vm.count(pool_key) && pool_name != nullptr) { + *pool_name = vm[pool_key].as<std::string>(); + } + if (vm.count(namespace_key) && namespace_name != nullptr) { + *namespace_name = vm[namespace_key].as<std::string>(); + } + if (vm.count(journal_key) && journal_name != nullptr) { + *journal_name = vm[journal_key].as<std::string>(); + } + + std::string image_name; + if (vm.count(image_key)) { + image_name = vm[image_key].as<std::string>(); + } + + int r; + if (journal_name != nullptr && !journal_name->empty()) { + // despite the separate pool option, + // we can also specify them via the journal option + std::string journal_name_copy(*journal_name); + r = extract_spec(journal_name_copy, pool_name, namespace_name, journal_name, + nullptr, utils::SPEC_VALIDATION_FULL); + if (r < 0) { + return r; + } + } + + if (!image_name.empty()) { + // despite the separate pool option, + // we can also specify them via the image option + std::string image_name_copy(image_name); + r = extract_spec(image_name_copy, pool_name, namespace_name, &image_name, + nullptr, utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + } + + if (journal_name != nullptr && spec_arg_index != nullptr && + journal_name->empty()) { + std::string spec = utils::get_positional_argument(vm, (*spec_arg_index)++); + if (!spec.empty()) { + r = extract_spec(spec, pool_name, namespace_name, journal_name, nullptr, + utils::SPEC_VALIDATION_FULL); + if (r < 0) { + return r; + } + } + } + + if (pool_name != nullptr && pool_name->empty()) { + *pool_name = utils::get_default_pool_name(); + } + + if (pool_name != nullptr && namespace_name != nullptr && + journal_name != nullptr && journal_name->empty() && !image_name.empty()) { + // Try to get journal name from image info. + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + int r = utils::init_and_open_image(*pool_name, *namespace_name, image_name, + "", "", true, &rados, &io_ctx, &image); + if (r < 0) { + std::cerr << "rbd: failed to open image " << image_name + << " to get journal name: " << cpp_strerror(r) << std::endl; + return r; + } + + uint64_t features; + r = image.features(&features); + if (r < 0) { + return r; + } + if ((features & RBD_FEATURE_JOURNALING) == 0) { + std::cerr << "rbd: journaling is not enabled for image " << image_name + << std::endl; + return -EINVAL; + } + *journal_name = utils::image_id(image); + } + + if (journal_name != nullptr && journal_name->empty()) { + std::string prefix = at::get_description_prefix(mod); + std::cerr << "rbd: " + << (mod == at::ARGUMENT_MODIFIER_DEST ? prefix : std::string()) + << "journal was not specified" << std::endl; + return -EINVAL; + } + + return 0; +} + +static int do_show_journal_info(librados::Rados& rados, librados::IoCtx& io_ctx, + const std::string& journal_id, Formatter *f) +{ + int r; + C_SaferCond cond; + + std::string header_oid = ::journal::Journaler::header_oid(journal_id); + std::string object_oid_prefix = ::journal::Journaler::object_oid_prefix( + io_ctx.get_id(), journal_id); + uint8_t order; + uint8_t splay_width; + int64_t pool_id; + + cls::journal::client::get_immutable_metadata(io_ctx, header_oid, &order, + &splay_width, &pool_id, &cond); + r = cond.wait(); + if (r < 0) { + std::cerr << "failed to get journal metadata: " << cpp_strerror(r) + << std::endl; + return r; + } + + std::string object_pool_name; + if (pool_id >= 0) { + r = rados.pool_reverse_lookup(pool_id, &object_pool_name); + if (r < 0) { + std::cerr << "error looking up pool name for pool_id=" << pool_id << ": " + << cpp_strerror(r) << std::endl; + } + } + + if (f) { + f->open_object_section("journal"); + f->dump_string("journal_id", journal_id); + f->dump_string("header_oid", header_oid); + f->dump_string("object_oid_prefix", object_oid_prefix); + f->dump_int("order", order); + f->dump_int("splay_width", splay_width); + if (!object_pool_name.empty()) { + f->dump_string("object_pool", object_pool_name); + } + f->close_section(); + f->flush(std::cout); + } else { + std::cout << "rbd journal '" << journal_id << "':" << std::endl; + std::cout << "\theader_oid: " << header_oid << std::endl; + std::cout << "\tobject_oid_prefix: " << object_oid_prefix << std::endl; + std::cout << "\torder: " << static_cast<int>(order) << " (" + << byte_u_t(1ull << order) << " objects)"<< std::endl; + std::cout << "\tsplay_width: " << static_cast<int>(splay_width) << std::endl; + if (!object_pool_name.empty()) { + std::cout << "\tobject_pool: " << object_pool_name << std::endl; + } + } + return 0; +} + +static int do_show_journal_status(librados::IoCtx& io_ctx, + const std::string& journal_id, Formatter *f) +{ + int r; + + C_SaferCond cond; + uint64_t minimum_set; + uint64_t active_set; + std::set<cls::journal::Client> registered_clients; + std::string oid = ::journal::Journaler::header_oid(journal_id); + + cls::journal::client::get_mutable_metadata(io_ctx, oid, &minimum_set, + &active_set, ®istered_clients, + &cond); + r = cond.wait(); + if (r < 0) { + std::cerr << "warning: failed to get journal metadata" << std::endl; + return r; + } + + if (f) { + f->open_object_section("status"); + f->dump_unsigned("minimum_set", minimum_set); + f->dump_unsigned("active_set", active_set); + f->open_array_section("registered_clients"); + for (std::set<cls::journal::Client>::iterator c = + registered_clients.begin(); c != registered_clients.end(); ++c) { + f->open_object_section("client"); + c->dump(f); + f->close_section(); + } + f->close_section(); + f->close_section(); + f->flush(std::cout); + } else { + std::cout << "minimum_set: " << minimum_set << std::endl; + std::cout << "active_set: " << active_set << std::endl; + std::cout << "registered clients: " << std::endl; + for (std::set<cls::journal::Client>::iterator c = + registered_clients.begin(); c != registered_clients.end(); ++c) { + std::cout << "\t" << *c << std::endl; + } + } + return 0; +} + +static int do_reset_journal(librados::IoCtx& io_ctx, + const std::string& journal_id) +{ + // disable/re-enable journaling to delete/re-create the journal + // to properly handle mirroring constraints + std::string image_name; + int r = librbd::cls_client::dir_get_name(&io_ctx, RBD_DIRECTORY, journal_id, + &image_name); + if (r < 0) { + std::cerr << "failed to locate journal's image: " << cpp_strerror(r) + << std::endl; + return r; + } + + librbd::Image image; + r = utils::open_image(io_ctx, image_name, false, &image); + if (r < 0) { + std::cerr << "failed to open image: " << cpp_strerror(r) << std::endl; + return r; + } + + r = image.update_features(RBD_FEATURE_JOURNALING, false); + if (r < 0) { + std::cerr << "failed to disable image journaling: " << cpp_strerror(r) + << std::endl; + return r; + } + + r = image.update_features(RBD_FEATURE_JOURNALING, true); + if (r < 0) { + std::cerr << "failed to re-enable image journaling: " << cpp_strerror(r) + << std::endl; + return r; + } + return 0; +} + +static int do_disconnect_journal_client(librados::IoCtx& io_ctx, + const std::string& journal_id, + const std::string& client_id) +{ + int r; + + C_SaferCond cond; + uint64_t minimum_set; + uint64_t active_set; + std::set<cls::journal::Client> registered_clients; + std::string oid = ::journal::Journaler::header_oid(journal_id); + + cls::journal::client::get_mutable_metadata(io_ctx, oid, &minimum_set, + &active_set, ®istered_clients, + &cond); + r = cond.wait(); + if (r < 0) { + std::cerr << "warning: failed to get journal metadata" << std::endl; + return r; + } + + static const std::string IMAGE_CLIENT_ID(""); + + bool found = false; + for (auto &c : registered_clients) { + if (c.id == IMAGE_CLIENT_ID || (!client_id.empty() && client_id != c.id)) { + continue; + } + r = cls::journal::client::client_update_state(io_ctx, oid, c.id, + cls::journal::CLIENT_STATE_DISCONNECTED); + if (r < 0) { + std::cerr << "warning: failed to disconnect client " << c.id << ": " + << cpp_strerror(r) << std::endl; + return r; + } + std::cout << "client " << c.id << " disconnected" << std::endl; + found = true; + } + + if (!found) { + if (!client_id.empty()) { + std::cerr << "warning: client " << client_id << " is not registered" + << std::endl; + } else { + std::cerr << "no registered clients to disconnect" << std::endl; + } + return -ENOENT; + } + + bufferlist bl; + r = io_ctx.notify2(oid, bl, 5000, NULL); + if (r < 0) { + std::cerr << "warning: failed to notify state change:" << ": " + << cpp_strerror(r) << std::endl; + return r; + } + + return 0; +} + +class Journaler : public ::journal::Journaler { +public: + Journaler(librados::IoCtx& io_ctx, const std::string& journal_id, + const std::string &client_id) : + ::journal::Journaler(io_ctx, journal_id, client_id, {}, nullptr) { + } + + int init() { + int r; + + // TODO register with librbd payload + r = register_client(bufferlist()); + if (r < 0) { + std::cerr << "failed to register client: " << cpp_strerror(r) + << std::endl; + return r; + } + + C_SaferCond cond; + + ::journal::Journaler::init(&cond); + r = cond.wait(); + if (r < 0) { + std::cerr << "failed to initialize journal: " << cpp_strerror(r) + << std::endl; + (void) unregister_client(); + return r; + } + + return 0; + } + + int shut_down() { + int r = unregister_client(); + if (r < 0) { + std::cerr << "rbd: failed to unregister journal client: " + << cpp_strerror(r) << std::endl; + } + ::journal::Journaler::shut_down(); + + return r; + } +}; + +class JournalPlayer { +public: + JournalPlayer(librados::IoCtx& io_ctx, const std::string& journal_id, + const std::string &client_id) : + m_journaler(io_ctx, journal_id, client_id), + m_cond(), + m_r(0) { + } + + virtual ~JournalPlayer() {} + + virtual int exec() { + int r; + + r = m_journaler.init(); + if (r < 0) { + return r; + } + + ReplayHandler replay_handler(this); + + m_journaler.start_replay(&replay_handler); + + r = m_cond.wait(); + if (r < 0) { + std::cerr << "rbd: failed to process journal: " << cpp_strerror(r) + << std::endl; + if (m_r == 0) { + m_r = r; + } + } + return m_r; + } + + int shut_down() { + return m_journaler.shut_down(); + } + +protected: + struct ReplayHandler : public ::journal::ReplayHandler { + JournalPlayer *journal; + explicit ReplayHandler(JournalPlayer *_journal) : journal(_journal) {} + + void handle_entries_available() override { + journal->handle_replay_ready(); + } + void handle_complete(int r) override { + journal->handle_replay_complete(r); + } + }; + + void handle_replay_ready() { + int r = 0; + while (true) { + ::journal::ReplayEntry replay_entry; + uint64_t tag_id; + if (!m_journaler.try_pop_front(&replay_entry, &tag_id)) { + break; + } + + r = process_entry(replay_entry, tag_id); + if (r < 0) { + break; + } + } + } + + virtual int process_entry(::journal::ReplayEntry replay_entry, + uint64_t tag_id) = 0; + + void handle_replay_complete(int r) { + if (m_r == 0 && r < 0) { + m_r = r; + } + m_journaler.stop_replay(&m_cond); + } + + Journaler m_journaler; + C_SaferCond m_cond; + int m_r; +}; + +static int inspect_entry(bufferlist& data, + librbd::journal::EventEntry& event_entry, + bool verbose) { + try { + auto it = data.cbegin(); + decode(event_entry, it); + } catch (const buffer::error &err) { + std::cerr << "failed to decode event entry: " << err.what() << std::endl; + return -EINVAL; + } + if (verbose) { + JSONFormatter f(true); + f.open_object_section("event_entry"); + event_entry.dump(&f); + f.close_section(); + f.flush(std::cout); + } + return 0; +} + +class JournalInspector : public JournalPlayer { +public: + JournalInspector(librados::IoCtx& io_ctx, const std::string& journal_id, + bool verbose) : + JournalPlayer(io_ctx, journal_id, "INSPECT"), + m_verbose(verbose), + m_s() { + } + + int exec() override { + int r = JournalPlayer::exec(); + m_s.print(); + return r; + } + +private: + struct Stats { + Stats() : total(0), error(0) {} + + void print() { + std::cout << "Summary:" << std::endl + << " " << total << " entries inspected, " << error << " errors" + << std::endl; + } + + int total; + int error; + }; + + int process_entry(::journal::ReplayEntry replay_entry, + uint64_t tag_id) override { + m_s.total++; + if (m_verbose) { + std::cout << "Entry: tag_id=" << tag_id << ", commit_tid=" + << replay_entry.get_commit_tid() << std::endl; + } + bufferlist data = replay_entry.get_data(); + librbd::journal::EventEntry event_entry; + int r = inspect_entry(data, event_entry, m_verbose); + if (r < 0) { + m_r = r; + m_s.error++; + } + return 0; + } + + bool m_verbose; + Stats m_s; +}; + +static int do_inspect_journal(librados::IoCtx& io_ctx, + const std::string& journal_id, + bool verbose) { + JournalInspector inspector(io_ctx, journal_id, verbose); + int r = inspector.exec(); + if (r < 0) { + inspector.shut_down(); + return r; + } + + r = inspector.shut_down(); + if (r < 0) { + return r; + } + return 0; +} + +struct ExportEntry { + uint64_t tag_id; + uint64_t commit_tid; + int type; + bufferlist entry; + + ExportEntry() : tag_id(0), commit_tid(0), type(0), entry() {} + + ExportEntry(uint64_t tag_id, uint64_t commit_tid, int type, + const bufferlist& entry) + : tag_id(tag_id), commit_tid(commit_tid), type(type), entry(entry) { + } + + void dump(Formatter *f) const { + ::encode_json("tag_id", tag_id, f); + ::encode_json("commit_tid", commit_tid, f); + ::encode_json("type", type, f); + ::encode_json("entry", entry, f); + } + + void decode_json(JSONObj *obj) { + JSONDecoder::decode_json("tag_id", tag_id, obj); + JSONDecoder::decode_json("commit_tid", commit_tid, obj); + JSONDecoder::decode_json("type", type, obj); + JSONDecoder::decode_json("entry", entry, obj); + } +}; + +class JournalExporter : public JournalPlayer { +public: + JournalExporter(librados::IoCtx& io_ctx, const std::string& journal_id, + int fd, bool no_error, bool verbose) : + JournalPlayer(io_ctx, journal_id, "EXPORT"), + m_journal_id(journal_id), + m_fd(fd), + m_no_error(no_error), + m_verbose(verbose), + m_s() { + } + + int exec() override { + std::string header("# journal_id: " + m_journal_id + "\n"); + int r; + r = safe_write(m_fd, header.c_str(), header.size()); + if (r < 0) { + std::cerr << "rbd: failed to write to export file: " << cpp_strerror(r) + << std::endl; + return r; + } + r = JournalPlayer::exec(); + m_s.print(); + return r; + } + +private: + struct Stats { + Stats() : total(0), error(0) {} + + void print() { + std::cout << total << " entries processed, " << error << " errors" + << std::endl; + } + + int total; + int error; + }; + + int process_entry(::journal::ReplayEntry replay_entry, + uint64_t tag_id) override { + m_s.total++; + int type = -1; + bufferlist entry = replay_entry.get_data(); + librbd::journal::EventEntry event_entry; + int r = inspect_entry(entry, event_entry, m_verbose); + if (r < 0) { + m_s.error++; + m_r = r; + return m_no_error ? 0 : r; + } else { + type = event_entry.get_event_type(); + } + ExportEntry export_entry(tag_id, replay_entry.get_commit_tid(), type, + entry); + JSONFormatter f; + ::encode_json("event_entry", export_entry, &f); + std::ostringstream oss; + f.flush(oss); + std::string objstr = oss.str(); + std::string header = stringify(objstr.size()) + " "; + r = safe_write(m_fd, header.c_str(), header.size()); + if (r == 0) { + r = safe_write(m_fd, objstr.c_str(), objstr.size()); + } + if (r == 0) { + r = safe_write(m_fd, "\n", 1); + } + if (r < 0) { + std::cerr << "rbd: failed to write to export file: " << cpp_strerror(r) + << std::endl; + m_s.error++; + return r; + } + return 0; + } + + std::string m_journal_id; + int m_fd; + bool m_no_error; + bool m_verbose; + Stats m_s; +}; + +static int do_export_journal(librados::IoCtx& io_ctx, + const std::string& journal_id, + const std::string& path, + bool no_error, bool verbose) { + int r; + int fd; + bool to_stdout = path == "-"; + if (to_stdout) { + fd = STDOUT_FILENO; + } else { + fd = open(path.c_str(), O_WRONLY | O_CREAT | O_EXCL | O_BINARY, 0644); + if (fd < 0) { + r = -errno; + std::cerr << "rbd: error creating " << path << std::endl; + return r; + } +#ifdef HAVE_POSIX_FADVISE + posix_fadvise(fd, 0, 0, POSIX_FADV_SEQUENTIAL); +#endif + } + + JournalExporter exporter(io_ctx, journal_id, fd, no_error, verbose); + r = exporter.exec(); + + if (!to_stdout) { + close(fd); + } + + int shut_down_r = exporter.shut_down(); + if (r == 0 && shut_down_r < 0) { + r = shut_down_r; + } + + return r; +} + +class JournalImporter { +public: + JournalImporter(librados::IoCtx& io_ctx, const std::string& journal_id, + int fd, bool no_error, bool verbose) : + m_journaler(io_ctx, journal_id, "IMPORT"), + m_fd(fd), + m_no_error(no_error), + m_verbose(verbose) { + } + + bool read_entry(bufferlist& bl, int& r) { + // Entries are stored in the file using the following format: + // + // # Optional comments + // NNN {json encoded entry} + // ... + // + // Where NNN is the encoded entry size. + bl.clear(); + char buf[80]; + // Skip line feed and comments (lines started with #). + while ((r = safe_read_exact(m_fd, buf, 1)) == 0) { + if (buf[0] == '\n') { + continue; + } else if (buf[0] == '#') { + while ((r = safe_read_exact(m_fd, buf, 1)) == 0) { + if (buf[0] == '\n') { + break; + } + } + } else { + break; + } + } + if (r < 0) { + if (r == -EDOM) { + r = 0; + } + return false; + } + // Read entry size to buf. + if (!isdigit(buf[0])) { + r = -EINVAL; + std::cerr << "rbd: import data invalid format (digit expected)" + << std::endl; + return false; + } + for (size_t i = 1; i < sizeof(buf); i++) { + r = safe_read_exact(m_fd, buf + i, 1); + if (r < 0) { + std::cerr << "rbd: error reading import data" << std::endl; + return false; + } + if (!isdigit(buf[i])) { + if (buf[i] != ' ') { + r = -EINVAL; + std::cerr << "rbd: import data invalid format (space expected)" + << std::endl; + return false; + } + buf[i] = '\0'; + break; + } + } + int entry_size = atoi(buf); + if (entry_size == 0) { + r = -EINVAL; + std::cerr << "rbd: import data invalid format (zero entry size)" + << std::endl; + return false; + } + ceph_assert(entry_size > 0); + // Read entry. + r = bl.read_fd(m_fd, entry_size); + if (r < 0) { + std::cerr << "rbd: error reading from stdin: " << cpp_strerror(r) + << std::endl; + return false; + } + if (r != entry_size) { + std::cerr << "rbd: error reading from stdin: truncated" + << std::endl; + r = -EINVAL; + return false; + } + r = 0; + return true; + } + + int exec() { + int r = m_journaler.init(); + if (r < 0) { + return r; + } + m_journaler.start_append(0); + + int r1 = 0; + bufferlist bl; + int n = 0; + int error_count = 0; + while (read_entry(bl, r)) { + n++; + error_count++; + JSONParser p; + if (!p.parse(bl.c_str(), bl.length())) { + std::cerr << "rbd: error parsing input (entry " << n << ")" + << std::endl; + r = -EINVAL; + if (m_no_error) { + r1 = r; + continue; + } else { + break; + } + } + ExportEntry e; + try { + decode_json_obj(e, &p); + } catch (const JSONDecoder::err& err) { + std::cerr << "rbd: error json decoding import data (entry " << n << "):" + << err.what() << std::endl; + r = -EINVAL; + if (m_no_error) { + r1 = r; + continue; + } else { + break; + } + } + librbd::journal::EventEntry event_entry; + r = inspect_entry(e.entry, event_entry, m_verbose); + if (r < 0) { + std::cerr << "rbd: corrupted entry " << n << ": tag_tid=" << e.tag_id + << ", commit_tid=" << e.commit_tid << std::endl; + if (m_no_error) { + r1 = r; + continue; + } else { + break; + } + } + m_journaler.append(e.tag_id, e.entry); + error_count--; + } + + std::cout << n << " entries processed, " << error_count << " errors" << std::endl; + + std::cout << "Waiting for journal append to complete..." << std::endl; + + C_SaferCond cond; + m_journaler.stop_append(&cond); + r = cond.wait(); + + if (r < 0) { + std::cerr << "failed to append journal: " << cpp_strerror(r) << std::endl; + } + + if (r1 < 0 && r == 0) { + r = r1; + } + return r; + } + + int shut_down() { + return m_journaler.shut_down(); + } + +private: + Journaler m_journaler; + int m_fd; + bool m_no_error; + bool m_verbose; +}; + +static int do_import_journal(librados::IoCtx& io_ctx, + const std::string& journal_id, + const std::string& path, + bool no_error, bool verbose) { + int r; + + int fd; + bool from_stdin = path == "-"; + if (from_stdin) { + fd = STDIN_FILENO; + } else { + if ((fd = open(path.c_str(), O_RDONLY|O_BINARY)) < 0) { + r = -errno; + std::cerr << "rbd: error opening " << path << std::endl; + return r; + } +#ifdef HAVE_POSIX_FADVISE + posix_fadvise(fd, 0, 0, POSIX_FADV_SEQUENTIAL); +#endif + } + + JournalImporter importer(io_ctx, journal_id, fd, no_error, verbose); + r = importer.exec(); + + if (!from_stdin) { + close(fd); + } + + int shut_down_r = importer.shut_down(); + if (r == 0 && shut_down_r < 0) { + r = shut_down_r; + } + + return r; +} + +void get_info_arguments(po::options_description *positional, + po::options_description *options) { + add_journal_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + at::add_format_options(options); +} + +int execute_info(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string journal_name; + int r = get_pool_journal_names(vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, + &pool_name, &namespace_name, &journal_name); + if (r < 0) { + return r; + } + + at::Format::Formatter formatter; + r = utils::get_formatter(vm, &formatter); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + r = do_show_journal_info(rados, io_ctx, journal_name, formatter.get()); + if (r < 0) { + std::cerr << "rbd: journal info: " << cpp_strerror(r) << std::endl; + return r; + } + return 0; + +} + +void get_status_arguments(po::options_description *positional, + po::options_description *options) { + add_journal_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + at::add_format_options(options); +} + +int execute_status(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string journal_name; + int r = get_pool_journal_names(vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, + &pool_name, &namespace_name, &journal_name); + if (r < 0) { + return r; + } + + at::Format::Formatter formatter; + r = utils::get_formatter(vm, &formatter); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + r = do_show_journal_status(io_ctx, journal_name, formatter.get()); + if (r < 0) { + std::cerr << "rbd: journal status: " << cpp_strerror(r) << std::endl; + return r; + } + return 0; +} + +void get_reset_arguments(po::options_description *positional, + po::options_description *options) { + add_journal_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); +} + +int execute_reset(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string journal_name; + int r = get_pool_journal_names(vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, + &pool_name, &namespace_name, &journal_name); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + r = do_reset_journal(io_ctx, journal_name); + if (r < 0) { + std::cerr << "rbd: journal reset: " << cpp_strerror(r) << std::endl; + return r; + } + return 0; +} + +void get_client_disconnect_arguments(po::options_description *positional, + po::options_description *options) { + add_journal_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + options->add_options() + ("client-id", po::value<std::string>(), + "client ID (or leave unspecified to disconnect all)"); +} + +int execute_client_disconnect(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string journal_name; + int r = get_pool_journal_names(vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, + &pool_name, &namespace_name, &journal_name); + if (r < 0) { + return r; + } + + std::string client_id; + if (vm.count("client-id")) { + client_id = vm["client-id"].as<std::string>(); + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + r = do_disconnect_journal_client(io_ctx, journal_name, client_id); + if (r < 0) { + std::cerr << "rbd: journal client disconnect: " << cpp_strerror(r) + << std::endl; + return r; + } + return 0; +} + +void get_inspect_arguments(po::options_description *positional, + po::options_description *options) { + add_journal_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + at::add_verbose_option(options); +} + +int execute_inspect(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string journal_name; + int r = get_pool_journal_names(vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, + &pool_name, &namespace_name, &journal_name); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + r = do_inspect_journal(io_ctx, journal_name, vm[at::VERBOSE].as<bool>()); + if (r < 0) { + std::cerr << "rbd: journal inspect: " << cpp_strerror(r) << std::endl; + return r; + } + return 0; +} + +void get_export_arguments(po::options_description *positional, + po::options_description *options) { + add_journal_spec_options(positional, options, + at::ARGUMENT_MODIFIER_SOURCE); + at::add_path_options(positional, options, + "export file (or '-' for stdout)"); + at::add_verbose_option(options); + at::add_no_error_option(options); +} + +int execute_export(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string journal_name; + int r = get_pool_journal_names(vm, at::ARGUMENT_MODIFIER_SOURCE, &arg_index, + &pool_name, &namespace_name, &journal_name); + if (r < 0) { + return r; + } + + std::string path; + r = utils::get_path(vm, &arg_index, &path); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + r = do_export_journal(io_ctx, journal_name, path, vm[at::NO_ERR].as<bool>(), + vm[at::VERBOSE].as<bool>()); + if (r < 0) { + std::cerr << "rbd: journal export: " << cpp_strerror(r) << std::endl; + return r; + } + return 0; +} + +void get_import_arguments(po::options_description *positional, + po::options_description *options) { + at::add_path_options(positional, options, + "import file (or '-' for stdin)"); + add_journal_spec_options(positional, options, at::ARGUMENT_MODIFIER_DEST); + at::add_verbose_option(options); + at::add_no_error_option(options); +} + +int execute_import(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string path; + size_t arg_index = 0; + int r = utils::get_path(vm, &arg_index, &path); + if (r < 0) { + return r; + } + + std::string pool_name; + std::string namespace_name; + std::string journal_name; + r = get_pool_journal_names(vm, at::ARGUMENT_MODIFIER_DEST, &arg_index, + &pool_name, &namespace_name, &journal_name); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + r = do_import_journal(io_ctx, journal_name, path, vm[at::NO_ERR].as<bool>(), + vm[at::VERBOSE].as<bool>()); + if (r < 0) { + std::cerr << "rbd: journal import: " << cpp_strerror(r) << std::endl; + return r; + } + return 0; +} + +Shell::Action action_info( + {"journal", "info"}, {}, "Show information about image journal.", "", + &get_info_arguments, &execute_info); + +Shell::Action action_status( + {"journal", "status"}, {}, "Show status of image journal.", "", + &get_status_arguments, &execute_status); + +Shell::Action action_reset( + {"journal", "reset"}, {}, "Reset image journal.", "", + &get_reset_arguments, &execute_reset); + +Shell::Action action_inspect( + {"journal", "inspect"}, {}, "Inspect image journal for structural errors.", "", + &get_inspect_arguments, &execute_inspect); + +Shell::Action action_export( + {"journal", "export"}, {}, "Export image journal.", "", + &get_export_arguments, &execute_export); + +Shell::Action action_import( + {"journal", "import"}, {}, "Import image journal.", "", + &get_import_arguments, &execute_import); + +Shell::Action action_disconnect( + {"journal", "client", "disconnect"}, {}, + "Flag image journal client as disconnected.", "", + &get_client_disconnect_arguments, &execute_client_disconnect); + +} // namespace journal +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Kernel.cc b/src/tools/rbd/action/Kernel.cc new file mode 100644 index 000000000..117f9492d --- /dev/null +++ b/src/tools/rbd/action/Kernel.cc @@ -0,0 +1,679 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "acconfig.h" +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "include/krbd.h" +#include "include/stringify.h" +#include "include/uuid.h" +#include "common/config_proxy.h" +#include "common/errno.h" +#include "common/safe_io.h" +#include "common/strtol.h" +#include "common/Formatter.h" +#include "msg/msg_types.h" +#include "global/global_context.h" +#include <iostream> +#include <boost/algorithm/string/predicate.hpp> +#include <boost/scope_exit.hpp> +#include <boost/program_options.hpp> + +namespace rbd { +namespace action { +namespace kernel { + +namespace at = argument_types; +namespace po = boost::program_options; + +typedef std::map<std::string, std::string> MapOptions; + +static std::string map_option_uuid_cb(const char *value_char) +{ + uuid_d u; + if (!u.parse(value_char)) + return ""; + + return stringify(u); +} + +static std::string map_option_ip_cb(const char *value_char) +{ + entity_addr_t a; + if (!a.parse(value_char)) { + return ""; + } + + return stringify(a.get_sockaddr()); +} + +static std::string map_option_int_cb(const char *value_char) +{ + std::string err; + int d = strict_strtol(value_char, 10, &err); + if (!err.empty() || d < 0) + return ""; + + return stringify(d); +} + +static std::string map_option_string_cb(const char *value_char) +{ + return value_char; +} + +static std::string map_option_read_from_replica_cb(const char *value_char) +{ + if (!strcmp(value_char, "no") || !strcmp(value_char, "balance") || + !strcmp(value_char, "localize")) { + return value_char; + } + return ""; +} + +static std::string map_option_compression_hint_cb(const char *value_char) +{ + if (!strcmp(value_char, "none") || !strcmp(value_char, "compressible") || + !strcmp(value_char, "incompressible")) { + return value_char; + } + return ""; +} + +static std::string map_option_ms_mode_cb(const char *value_char) +{ + if (!strcmp(value_char, "legacy") || !strcmp(value_char, "crc") || + !strcmp(value_char, "secure") || !strcmp(value_char, "prefer-crc") || + !strcmp(value_char, "prefer-secure")) { + return value_char; + } + return ""; +} + +static void put_map_option(const std::string &key, const std::string &val, + MapOptions* map_options) +{ + (*map_options)[key] = val; +} + +static int put_map_option_value(const std::string &opt, const char *value_char, + std::string (*parse_cb)(const char *), + MapOptions* map_options) +{ + if (!value_char || *value_char == '\0') { + std::cerr << "rbd: " << opt << " option requires a value" << std::endl; + return -EINVAL; + } + + std::string value = parse_cb(value_char); + if (value.empty()) { + std::cerr << "rbd: invalid " << opt << " value '" << value_char << "'" + << std::endl; + return -EINVAL; + } + + put_map_option(opt, opt + "=" + value, map_options); + return 0; +} + +static int parse_map_options(const std::string &options_string, + MapOptions* map_options) +{ + char *options = strdup(options_string.c_str()); + BOOST_SCOPE_EXIT(options) { + free(options); + } BOOST_SCOPE_EXIT_END; + + for (char *this_char = strtok(options, ", "); + this_char != NULL; + this_char = strtok(NULL, ",")) { + char *value_char; + + if ((value_char = strchr(this_char, '=')) != NULL) + *value_char++ = '\0'; + + if (!strcmp(this_char, "fsid")) { + if (put_map_option_value("fsid", value_char, map_option_uuid_cb, + map_options)) + return -EINVAL; + } else if (!strcmp(this_char, "ip")) { + if (put_map_option_value("ip", value_char, map_option_ip_cb, + map_options)) + return -EINVAL; + } else if (!strcmp(this_char, "share") || !strcmp(this_char, "noshare")) { + put_map_option("share", this_char, map_options); + } else if (!strcmp(this_char, "crc") || !strcmp(this_char, "nocrc")) { + put_map_option("crc", this_char, map_options); + } else if (!strcmp(this_char, "cephx_require_signatures") || + !strcmp(this_char, "nocephx_require_signatures")) { + put_map_option("cephx_require_signatures", this_char, map_options); + } else if (!strcmp(this_char, "tcp_nodelay") || + !strcmp(this_char, "notcp_nodelay")) { + put_map_option("tcp_nodelay", this_char, map_options); + } else if (!strcmp(this_char, "cephx_sign_messages") || + !strcmp(this_char, "nocephx_sign_messages")) { + put_map_option("cephx_sign_messages", this_char, map_options); + } else if (!strcmp(this_char, "mount_timeout")) { + if (put_map_option_value("mount_timeout", value_char, map_option_int_cb, + map_options)) + return -EINVAL; + } else if (!strcmp(this_char, "osd_request_timeout")) { + if (put_map_option_value("osd_request_timeout", value_char, + map_option_int_cb, map_options)) + return -EINVAL; + } else if (!strcmp(this_char, "lock_timeout")) { + if (put_map_option_value("lock_timeout", value_char, map_option_int_cb, + map_options)) + return -EINVAL; + } else if (!strcmp(this_char, "osdkeepalive")) { + if (put_map_option_value("osdkeepalive", value_char, map_option_int_cb, + map_options)) + return -EINVAL; + } else if (!strcmp(this_char, "osd_idle_ttl")) { + if (put_map_option_value("osd_idle_ttl", value_char, map_option_int_cb, + map_options)) + return -EINVAL; + } else if (!strcmp(this_char, "rw") || !strcmp(this_char, "ro")) { + put_map_option("rw", this_char, map_options); + } else if (!strcmp(this_char, "queue_depth")) { + if (put_map_option_value("queue_depth", value_char, map_option_int_cb, + map_options)) + return -EINVAL; + } else if (!strcmp(this_char, "lock_on_read")) { + put_map_option("lock_on_read", this_char, map_options); + } else if (!strcmp(this_char, "exclusive")) { + put_map_option("exclusive", this_char, map_options); + } else if (!strcmp(this_char, "notrim")) { + put_map_option("notrim", this_char, map_options); + } else if (!strcmp(this_char, "abort_on_full")) { + put_map_option("abort_on_full", this_char, map_options); + } else if (!strcmp(this_char, "alloc_size")) { + if (put_map_option_value("alloc_size", value_char, map_option_int_cb, + map_options)) + return -EINVAL; + } else if (!strcmp(this_char, "crush_location")) { + if (put_map_option_value("crush_location", value_char, + map_option_string_cb, map_options)) + return -EINVAL; + } else if (!strcmp(this_char, "read_from_replica")) { + if (put_map_option_value("read_from_replica", value_char, + map_option_read_from_replica_cb, map_options)) + return -EINVAL; + } else if (!strcmp(this_char, "compression_hint")) { + if (put_map_option_value("compression_hint", value_char, + map_option_compression_hint_cb, map_options)) + return -EINVAL; + } else if (!strcmp(this_char, "ms_mode")) { + if (put_map_option_value("ms_mode", value_char, map_option_ms_mode_cb, + map_options)) + return -EINVAL; + } else if (!strcmp(this_char, "rxbounce")) { + put_map_option("rxbounce", this_char, map_options); + } else if (!strcmp(this_char, "udev") || !strcmp(this_char, "noudev")) { + put_map_option("udev", this_char, map_options); + } else { + std::cerr << "rbd: unknown map option '" << this_char << "'" << std::endl; + return -EINVAL; + } + } + + return 0; +} + +static int parse_unmap_options(const std::string &options_string, + MapOptions* unmap_options) +{ + char *options = strdup(options_string.c_str()); + BOOST_SCOPE_EXIT(options) { + free(options); + } BOOST_SCOPE_EXIT_END; + + for (char *this_char = strtok(options, ", "); + this_char != NULL; + this_char = strtok(NULL, ",")) { + char *value_char; + + if ((value_char = strchr(this_char, '=')) != NULL) + *value_char++ = '\0'; + + if (!strcmp(this_char, "force")) { + put_map_option("force", this_char, unmap_options); + } else if (!strcmp(this_char, "udev") || !strcmp(this_char, "noudev")) { + put_map_option("udev", this_char, unmap_options); + } else { + std::cerr << "rbd: unknown unmap option '" << this_char << "'" + << std::endl; + return -EINVAL; + } + } + + return 0; +} + +static int do_kernel_list(Formatter *f) { +#if defined(WITH_KRBD) + struct krbd_ctx *krbd; + int r; + + r = krbd_create_from_context(g_ceph_context, 0, &krbd); + if (r < 0) + return r; + + r = krbd_showmapped(krbd, f); + + krbd_destroy(krbd); + return r; +#else + std::cerr << "rbd: kernel device is not supported" << std::endl; + return -EOPNOTSUPP; +#endif +} + +static int get_unsupported_features(librbd::Image &image, + uint64_t *unsupported_features) +{ + char buf[20]; + uint64_t features, supported_features; + int r; + + r = safe_read_file("/sys/bus/rbd/", "supported_features", buf, + sizeof(buf) - 1); + if (r < 0) + return r; + + buf[r] = '\0'; + try { + supported_features = std::stoull(buf, nullptr, 16); + } catch (...) { + return -EINVAL; + } + + r = image.features(&features); + if (r < 0) + return r; + + *unsupported_features = features & ~supported_features; + return 0; +} + +/* + * hint user to check syslog for krbd related messages and provide suggestions + * based on errno return by krbd_map(). also note that even if some librbd calls + * fail, we at least dump the "try dmesg..." message to aid debugging. + */ +static void print_error_description(const char *poolname, + const char *nspace_name, + const char *imgname, + const char *snapname, + int maperrno) +{ + int r; + uint8_t oldformat; + librados::Rados rados; + librados::IoCtx ioctx; + librbd::Image image; + + if (maperrno == -ENOENT) + goto done; + + r = utils::init_and_open_image(poolname, nspace_name, imgname, "", snapname, + true, &rados, &ioctx, &image); + if (r < 0) + goto done; + + r = image.old_format(&oldformat); + if (r < 0) + goto done; + + /* + * kernel returns -ENXIO when mapping a V2 image due to unsupported feature + * set - so, hint about that too... + */ + if (!oldformat && (maperrno == -ENXIO)) { + uint64_t unsupported_features; + bool need_terminate = true; + + std::cout << "RBD image feature set mismatch. "; + r = get_unsupported_features(image, &unsupported_features); + if (r == 0 && (unsupported_features & ~RBD_FEATURES_ALL) == 0) { + uint64_t immutable = RBD_FEATURES_ALL & ~(RBD_FEATURES_MUTABLE | + RBD_FEATURES_DISABLE_ONLY); + if (unsupported_features & immutable) { + std::cout << "This image cannot be mapped because the following " + << "immutable features are unsupported by the kernel:"; + unsupported_features &= immutable; + need_terminate = false; + } else { + std::cout << "You can disable features unsupported by the kernel " + << "with \"rbd feature disable "; + if (poolname != utils::get_default_pool_name() || *nspace_name) { + std::cout << poolname << "/"; + } + if (*nspace_name) { + std::cout << nspace_name << "/"; + } + std::cout << imgname; + } + } else { + std::cout << "Try disabling features unsupported by the kernel " + << "with \"rbd feature disable"; + unsupported_features = 0; + } + for (auto it : at::ImageFeatures::FEATURE_MAPPING) { + if (it.first & unsupported_features) { + std::cout << " " << it.second; + } + } + if (need_terminate) + std::cout << "\""; + std::cout << "." << std::endl; + } + + done: + std::cout << "In some cases useful info is found in syslog - try \"dmesg | tail\"." << std::endl; +} + +static int do_kernel_map(const char *poolname, const char *nspace_name, + const char *imgname, const char *snapname, + MapOptions&& map_options) +{ +#if defined(WITH_KRBD) + struct krbd_ctx *krbd; + std::ostringstream oss; + uint32_t flags = 0; + char *devnode; + int r; + + for (auto it = map_options.begin(); it != map_options.end(); ) { + // for compatibility with < 3.7 kernels, assume that rw is on by + // default and omit it even if it was specified by the user + // (see ceph.git commit fb0f1986449b) + if (it->first == "rw" && it->second == "rw") { + it = map_options.erase(it); + } else if (it->first == "udev") { + if (it->second == "noudev") { + flags |= KRBD_CTX_F_NOUDEV; + } + it = map_options.erase(it); + } else { + if (it != map_options.begin()) + oss << ","; + oss << it->second; + ++it; + } + } + + r = krbd_create_from_context(g_ceph_context, flags, &krbd); + if (r < 0) + return r; + + r = krbd_is_mapped(krbd, poolname, nspace_name, imgname, snapname, &devnode); + if (r < 0) { + std::cerr << "rbd: warning: can't get image map information: " + << cpp_strerror(r) << std::endl; + } else if (r > 0) { + std::cerr << "rbd: warning: image already mapped as " << devnode + << std::endl; + free(devnode); + } + + r = krbd_map(krbd, poolname, nspace_name, imgname, snapname, + oss.str().c_str(), &devnode); + if (r < 0) { + print_error_description(poolname, nspace_name, imgname, snapname, r); + goto out; + } + + std::cout << devnode << std::endl; + + free(devnode); +out: + krbd_destroy(krbd); + return r; +#else + std::cerr << "rbd: kernel device is not supported" << std::endl; + return -EOPNOTSUPP; +#endif +} + +static int do_kernel_unmap(const char *dev, const char *poolname, + const char *nspace_name, const char *imgname, + const char *snapname, MapOptions&& unmap_options) +{ +#if defined(WITH_KRBD) + struct krbd_ctx *krbd; + std::ostringstream oss; + uint32_t flags = 0; + int r; + + for (auto it = unmap_options.begin(); it != unmap_options.end(); ) { + if (it->first == "udev") { + if (it->second == "noudev") { + flags |= KRBD_CTX_F_NOUDEV; + } + it = unmap_options.erase(it); + } else { + if (it != unmap_options.begin()) + oss << ","; + oss << it->second; + ++it; + } + } + + r = krbd_create_from_context(g_ceph_context, flags, &krbd); + if (r < 0) + return r; + + if (dev) + r = krbd_unmap(krbd, dev, oss.str().c_str()); + else + r = krbd_unmap_by_spec(krbd, poolname, nspace_name, imgname, snapname, + oss.str().c_str()); + + krbd_destroy(krbd); + return r; +#else + std::cerr << "rbd: kernel device is not supported" << std::endl; + return -EOPNOTSUPP; +#endif +} + +int execute_list(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + at::Format::Formatter formatter; + int r = utils::get_formatter(vm, &formatter); + if (r < 0) { + return r; + } + + utils::init_context(); + + r = do_kernel_list(formatter.get()); + if (r < 0) { + std::cerr << "rbd: device list failed: " << cpp_strerror(r) << std::endl; + return r; + } + return 0; +} + +int execute_map(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string nspace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &nspace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_PERMITTED, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + MapOptions map_options; + if (vm.count("options")) { + for (auto &options : vm["options"].as<std::vector<std::string>>()) { + r = parse_map_options(options, &map_options); + if (r < 0) { + std::cerr << "rbd: couldn't parse map options" << std::endl; + return r; + } + } + } + + // parse options common to all device types after parsing krbd-specific + // options so that common options win (in particular "-o rw --read-only" + // should result in read-only mapping) + if (vm["read-only"].as<bool>()) { + put_map_option("rw", "ro", &map_options); + } + if (vm["exclusive"].as<bool>()) { + put_map_option("exclusive", "exclusive", &map_options); + } + if (vm["quiesce"].as<bool>()) { + std::cerr << "rbd: warning: quiesce is not supported" << std::endl; + } + if (vm.count("quiesce-hook")) { + std::cerr << "rbd: warning: quiesce-hook is not supported" << std::endl; + } + + // connect to the cluster to get the default pool and the default map + // options + librados::Rados rados; + r = utils::init_rados(&rados); + if (r < 0) { + return r; + } + + utils::normalize_pool_name(&pool_name); + + librados::IoCtx ioctx; + librbd::Image image; + r = utils::init_io_ctx(rados, pool_name, nspace_name, &ioctx); + if (r < 0) { + return r; + } + + r = utils::open_image(ioctx, image_name, true, &image); + if (r < 0) { + return r; + } + + MapOptions default_map_options; + std::vector<librbd::config_option_t> options; + image.config_list(&options); + for (const auto &option : options) { + if (option.name == "rbd_default_map_options") { + r = parse_map_options(option.value, &default_map_options); + if (r < 0) { + std::cerr << "rbd: couldn't parse default map options" << std::endl; + return r; + } + + break; + } + } + + for (auto& [key, value] : default_map_options) { + if (map_options.count(key) == 0) { + map_options[key] = value; + } + } + + r = do_kernel_map(pool_name.c_str(), nspace_name.c_str(), image_name.c_str(), + snap_name.c_str(), std::move(map_options)); + if (r < 0) { + std::cerr << "rbd: map failed: " << cpp_strerror(r) << std::endl; + return r; + } + + return 0; +} + +int execute_unmap(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string device_name = utils::get_positional_argument(vm, 0); + if (!boost::starts_with(device_name, "/dev/")) { + device_name.clear(); + } + + size_t arg_index = 0; + std::string pool_name; + std::string nspace_name; + std::string image_name; + std::string snap_name; + int r; + if (device_name.empty()) { + r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &nspace_name, + &image_name, &snap_name, false, utils::SNAPSHOT_PRESENCE_PERMITTED, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + } + + if (device_name.empty() && image_name.empty()) { + std::cerr << "rbd: unmap requires either image name or device path" + << std::endl; + return -EINVAL; + } + + MapOptions unmap_options; + if (vm.count("options")) { + for (auto &options : vm["options"].as<std::vector<std::string>>()) { + r = parse_unmap_options(options, &unmap_options); + if (r < 0) { + std::cerr << "rbd: couldn't parse unmap options" << std::endl; + return r; + } + } + } + + if (device_name.empty() && pool_name.empty()) { + // connect to the cluster to get the default pool + librados::Rados rados; + r = utils::init_rados(&rados); + if (r < 0) { + return r; + } + + utils::normalize_pool_name(&pool_name); + } + + r = do_kernel_unmap(device_name.empty() ? nullptr : device_name.c_str(), + pool_name.c_str(), nspace_name.c_str(), + image_name.c_str(), snap_name.c_str(), + std::move(unmap_options)); + if (r < 0) { + std::cerr << "rbd: unmap failed: " << cpp_strerror(r) << std::endl; + return r; + } + return 0; +} + +int execute_attach(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { +#if defined(WITH_KRBD) + std::cerr << "rbd: krbd does not support attach" << std::endl; +#else + std::cerr << "rbd: kernel device is not supported" << std::endl; +#endif + return -EOPNOTSUPP; +} + +int execute_detach(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { +#if defined(WITH_KRBD) + std::cerr << "rbd: krbd does not support detach" << std::endl; +#else + std::cerr << "rbd: kernel device is not supported" << std::endl; +#endif + return -EOPNOTSUPP; +} + +} // namespace kernel +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/List.cc b/src/tools/rbd/action/List.cc new file mode 100644 index 000000000..8fdc3c1a7 --- /dev/null +++ b/src/tools/rbd/action/List.cc @@ -0,0 +1,346 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "include/Context.h" +#include "include/stringify.h" +#include "include/types.h" +#include "common/errno.h" +#include "common/Formatter.h" +#include "common/TextTable.h" +#include <iostream> +#include <boost/bind/bind.hpp> +#include <boost/program_options.hpp> +#include "global/global_context.h" + +namespace rbd { + +namespace action { +namespace list { + +namespace at = argument_types; +namespace po = boost::program_options; +using namespace boost::placeholders; + +enum WorkerState { + STATE_IDLE = 0, + STATE_OPENED, + STATE_DONE +} ; + +struct WorkerEntry { + librbd::Image img; + librbd::RBD::AioCompletion* completion; + WorkerState state; + std::string name; + std::string id; + + WorkerEntry() { + state = STATE_IDLE; + completion = nullptr; + } +}; + + +int list_process_image(librados::Rados* rados, WorkerEntry* w, bool lflag, Formatter *f, TextTable &tbl) +{ + int r = 0; + librbd::image_info_t info; + std::string parent; + + // handle second-nth trips through loop + librbd::linked_image_spec_t parent_image_spec; + librbd::snap_spec_t parent_snap_spec; + r = w->img.get_parent(&parent_image_spec, &parent_snap_spec); + if (r < 0 && r != -ENOENT) { + return r; + } + + bool has_parent = false; + if (r != -ENOENT) { + parent = parent_image_spec.pool_name + "/"; + if (!parent_image_spec.pool_namespace.empty()) { + parent += parent_image_spec.pool_namespace + "/"; + } + parent += parent_image_spec.image_name + "@" + parent_snap_spec.name; + has_parent = true; + } + + if (w->img.stat(info, sizeof(info)) < 0) { + return -EINVAL; + } + + uint8_t old_format; + w->img.old_format(&old_format); + + std::list<librbd::locker_t> lockers; + bool exclusive; + r = w->img.list_lockers(&lockers, &exclusive, NULL); + if (r < 0) + return r; + std::string lockstr; + if (!lockers.empty()) { + lockstr = (exclusive) ? "excl" : "shr"; + } + + if (f) { + f->open_object_section("image"); + f->dump_string("image", w->name); + f->dump_string("id", w->id); + f->dump_unsigned("size", info.size); + if (has_parent) { + f->open_object_section("parent"); + f->dump_string("pool", parent_image_spec.pool_name); + f->dump_string("pool_namespace", parent_image_spec.pool_namespace); + f->dump_string("image", parent_image_spec.image_name); + f->dump_string("snapshot", parent_snap_spec.name); + f->close_section(); + } + f->dump_int("format", old_format ? 1 : 2); + if (!lockers.empty()) + f->dump_string("lock_type", exclusive ? "exclusive" : "shared"); + f->close_section(); + } else { + tbl << w->name + << stringify(byte_u_t(info.size)) + << parent + << ((old_format) ? '1' : '2') + << "" // protect doesn't apply to images + << lockstr + << TextTable::endrow; + } + + std::vector<librbd::snap_info_t> snaplist; + if (w->img.snap_list(snaplist) >= 0 && !snaplist.empty()) { + snaplist.erase(remove_if(snaplist.begin(), + snaplist.end(), + boost::bind(utils::is_not_user_snap_namespace, &w->img, _1)), + snaplist.end()); + for (std::vector<librbd::snap_info_t>::iterator s = snaplist.begin(); + s != snaplist.end(); ++s) { + bool is_protected; + bool has_parent = false; + parent.clear(); + w->img.snap_set(s->name.c_str()); + r = w->img.snap_is_protected(s->name.c_str(), &is_protected); + if (r < 0) + return r; + if (w->img.get_parent(&parent_image_spec, &parent_snap_spec) >= 0) { + parent = parent_image_spec.pool_name + "/"; + if (!parent_image_spec.pool_namespace.empty()) { + parent += parent_image_spec.pool_namespace + "/"; + } + parent += parent_image_spec.image_name + "@" + parent_snap_spec.name; + has_parent = true; + } + if (f) { + f->open_object_section("snapshot"); + f->dump_string("image", w->name); + f->dump_string("id", w->id); + f->dump_string("snapshot", s->name); + f->dump_unsigned("snapshot_id", s->id); + f->dump_unsigned("size", s->size); + if (has_parent) { + f->open_object_section("parent"); + f->dump_string("pool", parent_image_spec.pool_name); + f->dump_string("pool_namespace", parent_image_spec.pool_namespace); + f->dump_string("image", parent_image_spec.image_name); + f->dump_string("snapshot", parent_snap_spec.name); + f->close_section(); + } + f->dump_int("format", old_format ? 1 : 2); + f->dump_string("protected", is_protected ? "true" : "false"); + f->close_section(); + } else { + tbl << w->name + "@" + s->name + << stringify(byte_u_t(s->size)) + << parent + << ((old_format) ? '1' : '2') + << (is_protected ? "yes" : "") + << "" // locks don't apply to snaps + << TextTable::endrow; + } + } + } + + return 0; +} + +int do_list(const std::string &pool_name, const std::string& namespace_name, + bool lflag, Formatter *f) { + std::vector<WorkerEntry*> workers; + std::vector<librbd::image_spec_t> images; + librados::Rados rados; + librbd::RBD rbd; + librados::IoCtx ioctx; + + int r = utils::init(pool_name, namespace_name, &rados, &ioctx); + if (r < 0) { + return r; + } + + int threads = g_conf().get_val<uint64_t>("rbd_concurrent_management_ops"); + if (threads < 1) { + threads = 1; + } + if (threads > 32) { + threads = 32; + } + + utils::disable_cache(); + + r = rbd.list2(ioctx, &images); + if (r < 0) + return r; + + if (!lflag) { + if (f) + f->open_array_section("images"); + for (auto& image : images) { + if (f) + f->dump_string("name", image.name); + else + std::cout << image.name << std::endl; + } + if (f) { + f->close_section(); + f->flush(std::cout); + } + return 0; + } + + TextTable tbl; + + if (f) { + f->open_array_section("images"); + } else { + tbl.define_column("NAME", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("SIZE", TextTable::LEFT, TextTable::RIGHT); + tbl.define_column("PARENT", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("FMT", TextTable::LEFT, TextTable::RIGHT); + tbl.define_column("PROT", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("LOCK", TextTable::LEFT, TextTable::LEFT); + } + + for (size_t left = 0; left < std::min<size_t>(threads, images.size()); + left++) { + workers.push_back(new WorkerEntry()); + } + + auto i = images.begin(); + while (true) { + size_t workers_idle = 0; + for (auto comp : workers) { + switch (comp->state) { + case STATE_DONE: + comp->completion->wait_for_complete(); + comp->state = STATE_IDLE; + comp->completion->release(); + comp->completion = nullptr; + // we want it to fall through in this case + case STATE_IDLE: + if (i == images.end()) { + workers_idle++; + continue; + } + comp->name = i->name; + comp->id = i->id; + comp->completion = new librbd::RBD::AioCompletion(nullptr, nullptr); + r = rbd.aio_open_read_only(ioctx, comp->img, i->name.c_str(), nullptr, + comp->completion); + i++; + comp->state = STATE_OPENED; + break; + case STATE_OPENED: + comp->completion->wait_for_complete(); + // image might disappear between rbd.list() and rbd.open(); ignore + // that, warn about other possible errors (EPERM, say, for opening + // an old-format image, because you need execute permission for the + // class method) + r = comp->completion->get_return_value(); + comp->completion->release(); + if (r < 0) { + std::cerr << "rbd: error opening " << comp->name << ": " + << cpp_strerror(r) << std::endl; + + // in any event, continue to next image + comp->state = STATE_IDLE; + continue; + } + r = list_process_image(&rados, comp, lflag, f, tbl); + if (r < 0) { + std::cerr << "rbd: error processing image " << comp->name << ": " + << cpp_strerror(r) << std::endl; + } + comp->completion = new librbd::RBD::AioCompletion(nullptr, nullptr); + r = comp->img.aio_close(comp->completion); + comp->state = STATE_DONE; + break; + } + } + if (workers_idle == workers.size()) { + break; + } + } + + if (f) { + f->close_section(); + f->flush(std::cout); + } else if (!images.empty()) { + std::cout << tbl; + } + + rados.shutdown(); + + for (auto comp : workers) { + delete comp; + } + + return r < 0 ? r : 0; +} + +void get_arguments(po::options_description *positional, + po::options_description *options) { + options->add_options() + ("long,l", po::bool_switch(), "long listing format"); + at::add_pool_options(positional, options, true); + at::add_format_options(options); +} + +int execute(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string pool_name; + std::string namespace_name; + size_t arg_index = 0; + int r = utils::get_pool_and_namespace_names(vm, false, &pool_name, + &namespace_name, &arg_index); + if (r < 0) { + return r; + } + + at::Format::Formatter formatter; + r = utils::get_formatter(vm, &formatter); + if (r < 0) { + return r; + } + + r = do_list(pool_name, namespace_name, vm["long"].as<bool>(), + formatter.get()); + if (r < 0) { + std::cerr << "rbd: listing images failed: " << cpp_strerror(r) + << std::endl; + return r; + } + + return 0; +} + +Shell::SwitchArguments switched_arguments({"long", "l"}); +Shell::Action action( + {"list"}, {"ls"}, "List rbd images.", "", &get_arguments, &execute); + +} // namespace list +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Lock.cc b/src/tools/rbd/action/Lock.cc new file mode 100644 index 000000000..754cb384c --- /dev/null +++ b/src/tools/rbd/action/Lock.cc @@ -0,0 +1,279 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "common/errno.h" +#include "common/Formatter.h" +#include "common/TextTable.h" +#include <iostream> +#include <boost/program_options.hpp> + +namespace rbd { +namespace action { +namespace lock { + +namespace at = argument_types; +namespace po = boost::program_options; + +namespace { + +void add_id_option(po::options_description *positional) { + positional->add_options() + ("lock-id", "unique lock id"); +} + +int get_id(const po::variables_map &vm, size_t *arg_index, + std::string *id) { + *id = utils::get_positional_argument(vm, *arg_index); + if (id->empty()) { + std::cerr << "rbd: lock id was not specified" << std::endl; + return -EINVAL; + } else { + ++(*arg_index); + } + return 0; +} + +} // anonymous namespace + +static int do_lock_list(librbd::Image& image, Formatter *f) +{ + std::list<librbd::locker_t> lockers; + bool exclusive; + std::string tag; + TextTable tbl; + int r; + + r = image.list_lockers(&lockers, &exclusive, &tag); + if (r < 0) + return r; + + if (f) { + f->open_array_section("locks"); + } else { + tbl.define_column("Locker", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("ID", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("Address", TextTable::LEFT, TextTable::LEFT); + } + + if (lockers.size()) { + bool one = (lockers.size() == 1); + + if (!f) { + std::cout << "There " << (one ? "is " : "are ") << lockers.size() + << (exclusive ? " exclusive" : " shared") + << " lock" << (one ? "" : "s") << " on this image.\n"; + if (!exclusive) + std::cout << "Lock tag: " << tag << "\n"; + } + + for (std::list<librbd::locker_t>::const_iterator it = lockers.begin(); + it != lockers.end(); ++it) { + if (f) { + f->open_object_section("lock"); + f->dump_string("id", it->cookie); + f->dump_string("locker", it->client); + f->dump_string("address", it->address); + f->close_section(); + } else { + tbl << it->client << it->cookie << it->address << TextTable::endrow; + } + } + if (!f) + std::cout << tbl; + } + + if (f) { + f->close_section(); + f->flush(std::cout); + } + return 0; +} + +static int do_lock_add(librbd::Image& image, const char *cookie, + const char *tag) +{ + if (tag) + return image.lock_shared(cookie, tag); + else + return image.lock_exclusive(cookie); +} + +static int do_lock_remove(librbd::Image& image, const char *client, + const char *cookie) +{ + return image.break_lock(client, cookie); +} + +void get_list_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + at::add_format_options(options); +} + +int execute_list(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + at::Format::Formatter formatter; + r = utils::get_formatter(vm, &formatter); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "", + true, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + r = do_lock_list(image, formatter.get()); + if (r < 0) { + std::cerr << "rbd: listing locks failed: " << cpp_strerror(r) << std::endl; + return r; + } + return 0; +} + +void get_add_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + add_id_option(positional); + options->add_options() + ("shared", po::value<std::string>(), "shared lock tag"); +} + +int execute_add(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + std::string lock_cookie; + r = get_id(vm, &arg_index, &lock_cookie); + if (r < 0) { + return r; + } + + std::string lock_tag; + if (vm.count("shared")) { + lock_tag = vm["shared"].as<std::string>(); + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "", + false, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + r = do_lock_add(image, lock_cookie.c_str(), + lock_tag.empty() ? nullptr : lock_tag.c_str()); + if (r < 0) { + if (r == -EBUSY || r == -EEXIST) { + if (!lock_tag.empty()) { + std::cerr << "rbd: lock is already held by someone else" + << " with a different tag" << std::endl; + } else { + std::cerr << "rbd: lock is already held by someone else" << std::endl; + } + } else { + std::cerr << "rbd: taking lock failed: " << cpp_strerror(r) << std::endl; + } + return r; + } + return 0; +} + +void get_remove_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + add_id_option(positional); + positional->add_options() + ("locker", "locker client"); +} + +int execute_remove(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + std::string lock_cookie; + r = get_id(vm, &arg_index, &lock_cookie); + if (r < 0) { + return r; + } + + std::string lock_client = utils::get_positional_argument(vm, arg_index); + if (lock_client.empty()) { + std::cerr << "rbd: locker was not specified" << std::endl; + return -EINVAL; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "", + false, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + r = do_lock_remove(image, lock_client.c_str(), lock_cookie.c_str()); + if (r < 0) { + std::cerr << "rbd: releasing lock failed: " << cpp_strerror(r) << std::endl; + return r; + } + return 0; +} + +Shell::Action action_list( + {"lock", "list"}, {"lock", "ls"}, "Show locks held on an image.", "", + &get_list_arguments, &execute_list); +Shell::Action action_add( + {"lock", "add"}, {}, "Take a lock on an image.", "", + &get_add_arguments, &execute_add); +Shell::Action action_remove( + {"lock", "remove"}, {"lock", "rm"}, "Release a lock on an image.", "", + &get_remove_arguments, &execute_remove); + +} // namespace lock +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/MergeDiff.cc b/src/tools/rbd/action/MergeDiff.cc new file mode 100644 index 000000000..c387be9a4 --- /dev/null +++ b/src/tools/rbd/action/MergeDiff.cc @@ -0,0 +1,456 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#define _LARGEFILE64_SOURCE +#include <sys/types.h> +#include <unistd.h> + +#include "include/compat.h" +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "common/safe_io.h" +#include "common/debug.h" +#include "common/errno.h" +#include <iostream> +#include <boost/program_options.hpp> + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd + +using std::string; + +namespace rbd { +namespace action { +namespace merge_diff { + +namespace at = argument_types; +namespace po = boost::program_options; + +static int parse_diff_header(int fd, __u8 *tag, string *from, string *to, uint64_t *size) +{ + int r; + + {//header + char buf[utils::RBD_DIFF_BANNER.size() + 1]; + r = safe_read_exact(fd, buf, utils::RBD_DIFF_BANNER.size()); + if (r < 0) + return r; + + buf[utils::RBD_DIFF_BANNER.size()] = '\0'; + if (strcmp(buf, utils::RBD_DIFF_BANNER.c_str())) { + std::cerr << "invalid banner '" << buf << "', expected '" + << utils::RBD_DIFF_BANNER << "'" << std::endl; + return -EINVAL; + } + } + + while (true) { + r = safe_read_exact(fd, tag, 1); + if (r < 0) + return r; + + if (*tag == RBD_DIFF_FROM_SNAP) { + r = utils::read_string(fd, 4096, from); // 4k limit to make sure we don't get a garbage string + if (r < 0) + return r; + dout(2) << " from snap " << *from << dendl; + } else if (*tag == RBD_DIFF_TO_SNAP) { + r = utils::read_string(fd, 4096, to); // 4k limit to make sure we don't get a garbage string + if (r < 0) + return r; + dout(2) << " to snap " << *to << dendl; + } else if (*tag == RBD_DIFF_IMAGE_SIZE) { + char buf[8]; + r = safe_read_exact(fd, buf, 8); + if (r < 0) + return r; + + bufferlist bl; + bl.append(buf, 8); + auto p = bl.cbegin(); + decode(*size, p); + } else { + break; + } + } + + return 0; +} + +static int parse_diff_body(int fd, __u8 *tag, uint64_t *offset, uint64_t *length) +{ + int r; + + if (!(*tag)) { + r = safe_read_exact(fd, tag, 1); + if (r < 0) + return r; + } + + if (*tag == RBD_DIFF_END) { + offset = 0; + length = 0; + return 0; + } + + if (*tag != RBD_DIFF_WRITE && *tag != RBD_DIFF_ZERO) + return -ENOTSUP; + + char buf[16]; + r = safe_read_exact(fd, buf, 16); + if (r < 0) + return r; + + bufferlist bl; + bl.append(buf, 16); + auto p = bl.cbegin(); + decode(*offset, p); + decode(*length, p); + + if (!(*length)) + return -ENOTSUP; + + return 0; +} + +/* + * fd: the diff file to read from + * pd: the diff file to be written into + */ +static int accept_diff_body(int fd, int pd, __u8 tag, uint64_t offset, uint64_t length) +{ + if (tag == RBD_DIFF_END) + return 0; + + bufferlist bl; + encode(tag, bl); + encode(offset, bl); + encode(length, bl); + int r; + r = bl.write_fd(pd); + if (r < 0) + return r; + + if (tag == RBD_DIFF_WRITE) { + bufferptr bp = buffer::create(length); + r = safe_read_exact(fd, bp.c_str(), length); + if (r < 0) + return r; + bufferlist data; + data.append(bp); + r = data.write_fd(pd); + if (r < 0) + return r; + } + + return 0; +} + +/* + * Merge two diff files into one single file + * Note: It does not do the merging work if + * either of the source diff files is stripped, + * since which complicates the process and is + * rarely used + */ +static int do_merge_diff(const char *first, const char *second, + const char *path, bool no_progress) +{ + utils::ProgressContext pc("Merging image diff", no_progress); + int fd = -1, sd = -1, pd = -1, r; + + string f_from, f_to; + string s_from, s_to; + uint64_t f_size = 0; + uint64_t s_size = 0; + uint64_t pc_size; + + __u8 f_tag = 0, s_tag = 0; + uint64_t f_off = 0, f_len = 0; + uint64_t s_off = 0, s_len = 0; + bool f_end = false, s_end = false; + + bool first_stdin = !strcmp(first, "-"); + if (first_stdin) { + fd = STDIN_FILENO; + } else { + fd = open(first, O_RDONLY|O_BINARY); + if (fd < 0) { + r = -errno; + std::cerr << "rbd: error opening " << first << std::endl; + goto done; + } + } + + sd = open(second, O_RDONLY|O_BINARY); + if (sd < 0) { + r = -errno; + std::cerr << "rbd: error opening " << second << std::endl; + goto done; + } + + if (strcmp(path, "-") == 0) { + pd = 1; + } else { + pd = open(path, O_WRONLY | O_CREAT | O_EXCL | O_BINARY, 0644); + if (pd < 0) { + r = -errno; + std::cerr << "rbd: error create " << path << std::endl; + goto done; + } + } + + //We just handle the case like 'banner, [ftag], [ttag], stag, [wztag]*,etag', + // and the (offset,length) in wztag must be ascending order. + r = parse_diff_header(fd, &f_tag, &f_from, &f_to, &f_size); + if (r < 0) { + std::cerr << "rbd: failed to parse first diff header" << std::endl; + goto done; + } + + r = parse_diff_header(sd, &s_tag, &s_from, &s_to, &s_size); + if (r < 0) { + std::cerr << "rbd: failed to parse second diff header" << std::endl; + goto done; + } + + if (f_to != s_from) { + r = -EINVAL; + std::cerr << "The first TO snapshot must be equal with the second FROM " + << "snapshot, aborting" << std::endl; + goto done; + } + + { + // header + bufferlist bl; + bl.append(utils::RBD_DIFF_BANNER); + + __u8 tag; + if (f_from.size()) { + tag = RBD_DIFF_FROM_SNAP; + encode(tag, bl); + encode(f_from, bl); + } + + if (s_to.size()) { + tag = RBD_DIFF_TO_SNAP; + encode(tag, bl); + encode(s_to, bl); + } + + tag = RBD_DIFF_IMAGE_SIZE; + encode(tag, bl); + encode(s_size, bl); + + r = bl.write_fd(pd); + if (r < 0) { + std::cerr << "rbd: failed to write merged diff header" << std::endl; + goto done; + } + } + if (f_size > s_size) + pc_size = f_size << 1; + else + pc_size = s_size << 1; + + //data block + while (!f_end || !s_end) { + // progress through input + pc.update_progress(f_off + s_off, pc_size); + + if (!f_end && !f_len) { + uint64_t last_off = f_off; + + r = parse_diff_body(fd, &f_tag, &f_off, &f_len); + dout(2) << "first diff data chunk: tag=" << f_tag << ", " + << "off=" << f_off << ", " + << "len=" << f_len << dendl; + if (r < 0) { + std::cerr << "rbd: failed to read first diff data chunk header" + << std::endl; + goto done; + } + + if (f_tag == RBD_DIFF_END) { + f_end = true; + f_tag = RBD_DIFF_ZERO; + f_off = f_size; + if (f_size < s_size) + f_len = s_size - f_size; + else + f_len = 0; + } + + if (last_off > f_off) { + r = -ENOTSUP; + std::cerr << "rbd: out-of-order offset from first diff (" + << last_off << " > " << f_off << ")" << std::endl; + goto done; + } + } + + if (!s_end && !s_len) { + uint64_t last_off = s_off; + + r = parse_diff_body(sd, &s_tag, &s_off, &s_len); + dout(2) << "second diff data chunk: tag=" << s_tag << ", " + << "off=" << s_off << ", " + << "len=" << s_len << dendl; + if (r < 0) { + std::cerr << "rbd: failed to read second diff data chunk header" + << std::endl; + goto done; + } + + if (s_tag == RBD_DIFF_END) { + s_end = true; + s_off = s_size; + if (s_size < f_size) + s_len = f_size - s_size; + else + s_len = 0; + } + + if (last_off > s_off) { + r = -ENOTSUP; + std::cerr << "rbd: out-of-order offset from second diff (" + << last_off << " > " << s_off << ")" << std::endl; + goto done; + } + } + + if (f_off < s_off && f_len) { + uint64_t delta = s_off - f_off; + if (delta > f_len) + delta = f_len; + r = accept_diff_body(fd, pd, f_tag, f_off, delta); + if (r < 0) { + std::cerr << "rbd: failed to merge diff chunk" << std::endl; + goto done; + } + f_off += delta; + f_len -= delta; + + if (!f_len) { + f_tag = 0; + continue; + } + } + ceph_assert(f_off >= s_off); + + if (f_off < s_off + s_len && f_len) { + uint64_t delta = s_off + s_len - f_off; + if (delta > f_len) + delta = f_len; + if (f_tag == RBD_DIFF_WRITE) { + if (first_stdin) { + bufferptr bp = buffer::create(delta); + r = safe_read_exact(fd, bp.c_str(), delta); + } else { + off64_t l = lseek64(fd, delta, SEEK_CUR); + r = l < 0 ? -errno : 0; + } + if (r < 0) { + std::cerr << "rbd: failed to skip first diff data" << std::endl; + goto done; + } + } + f_off += delta; + f_len -= delta; + + if (!f_len) { + f_tag = 0; + continue; + } + } + ceph_assert(f_off >= s_off + s_len); + if (s_len) { + r = accept_diff_body(sd, pd, s_tag, s_off, s_len); + if (r < 0) { + std::cerr << "rbd: failed to merge diff chunk" << std::endl; + goto done; + } + s_off += s_len; + s_len = 0; + s_tag = 0; + } else { + ceph_assert(f_end && s_end); + } + continue; + } + + {//tail + __u8 tag = RBD_DIFF_END; + bufferlist bl; + encode(tag, bl); + r = bl.write_fd(pd); + } + +done: + if (pd > 2) + close(pd); + if (sd > 2) + close(sd); + if (fd > 2) + close(fd); + + if(r < 0) { + pc.fail(); + if (pd > 2) + unlink(path); + } else + pc.finish(); + + return r; +} + +void get_arguments(po::options_description *positional, + po::options_description *options) { + positional->add_options() + ("diff1-path", "path to first diff (or '-' for stdin)") + ("diff2-path", "path to second diff"); + at::add_path_options(positional, options, + "path to merged diff (or '-' for stdout)"); + at::add_no_progress_option(options); +} + +int execute(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string first_diff = utils::get_positional_argument(vm, 0); + if (first_diff.empty()) { + std::cerr << "rbd: first diff was not specified" << std::endl; + return -EINVAL; + } + + std::string second_diff = utils::get_positional_argument(vm, 1); + if (second_diff.empty()) { + std::cerr << "rbd: second diff was not specified" << std::endl; + return -EINVAL; + } + + std::string path; + size_t arg_index = 2; + int r = utils::get_path(vm, &arg_index, &path); + if (r < 0) { + return r; + } + + r = do_merge_diff(first_diff.c_str(), second_diff.c_str(), path.c_str(), + vm[at::NO_PROGRESS].as<bool>()); + if (r < 0) { + std::cerr << "rbd: merge-diff error" << std::endl; + return -r; + } + + return 0; +} + +Shell::Action action( + {"merge-diff"}, {}, "Merge two diff exports together.", "", + &get_arguments, &execute); + +} // namespace merge_diff +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Migration.cc b/src/tools/rbd/action/Migration.cc new file mode 100644 index 000000000..1ce6201d9 --- /dev/null +++ b/src/tools/rbd/action/Migration.cc @@ -0,0 +1,429 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "include/compat.h" +#include "common/errno.h" +#include "common/safe_io.h" + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" + +#include <sys/types.h> +#include <fcntl.h> +#include <iostream> +#include <boost/program_options.hpp> + +namespace rbd { +namespace action { +namespace migration { + +namespace at = argument_types; +namespace po = boost::program_options; + +static int do_execute(librados::IoCtx& io_ctx, const std::string &image_name, + bool no_progress) { + utils::ProgressContext pc("Image migration", no_progress); + int r = librbd::RBD().migration_execute_with_progress(io_ctx, + image_name.c_str(), pc); + if (r < 0) { + pc.fail(); + std::cerr << "rbd: migration failed: " << cpp_strerror(r) << std::endl; + return r; + } + pc.finish(); + return 0; +} + +static int do_abort(librados::IoCtx& io_ctx, const std::string &image_name, + bool no_progress) { + utils::ProgressContext pc("Abort image migration", no_progress); + int r = librbd::RBD().migration_abort_with_progress(io_ctx, + image_name.c_str(), pc); + if (r < 0) { + pc.fail(); + std::cerr << "rbd: aborting migration failed: " << cpp_strerror(r) + << std::endl; + return r; + } + pc.finish(); + return 0; +} + +static int do_commit(librados::IoCtx& io_ctx, const std::string &image_name, + bool force, bool no_progress) { + librbd::image_migration_status_t migration_status; + int r = librbd::RBD().migration_status(io_ctx, image_name.c_str(), + &migration_status, + sizeof(migration_status)); + if (r < 0) { + std::cerr << "rbd: getting migration status failed: " << cpp_strerror(r) + << std::endl; + return r; + } + + librados::IoCtx dst_io_ctx; + r = librados::Rados(io_ctx).ioctx_create2(migration_status.dest_pool_id, dst_io_ctx); + if (r < 0) { + std::cerr << "rbd: accessing source pool id=" + << migration_status.dest_pool_id << " failed: " + << cpp_strerror(r) << std::endl; + return r; + } + + r = utils::set_namespace(migration_status.dest_pool_namespace, &dst_io_ctx); + if (r < 0) { + return r; + } + + librbd::Image image; + r = utils::open_image_by_id(dst_io_ctx, migration_status.dest_image_id, + true, &image); + if (r < 0) { + return r; + } + + std::vector<librbd::linked_image_spec_t> children; + r = image.list_descendants(&children); + if (r < 0) { + std::cerr << "rbd: listing descendants failed: " << cpp_strerror(r) + << std::endl; + return r; + } + + if (children.size() > 0) { + std::cerr << "rbd: the image has " + << (children.size() == 1 ? "a descendant" : "descendants") << ": " + << std::endl; + for (auto& child : children) { + std::cerr << " " << child.pool_name << "/"; + if (!child.pool_namespace.empty()) { + std::cerr << child.pool_namespace << "/"; + } + std::cerr << child.image_name; + if (child.trash) { + std::cerr << " (trash " << child.image_id << ")"; + } + std::cerr << std::endl; + } + std::cerr << "Warning: in-use, read-only descendant images" + << " will not detect the parent update." << std::endl; + if (force) { + std::cerr << "Proceeding anyway due to force flag set." << std::endl; + } else { + std::cerr << "Ensure no descendant images are opened read-only" + << " and run again with force flag." << std::endl; + return -EBUSY; + } + } + + utils::ProgressContext pc("Commit image migration", no_progress); + r = librbd::RBD().migration_commit_with_progress(io_ctx, image_name.c_str(), + pc); + if (r < 0) { + pc.fail(); + std::cerr << "rbd: committing migration failed: " << cpp_strerror(r) + << std::endl; + return r; + } + pc.finish(); + return 0; +} + +void get_prepare_arguments(po::options_description *positional, + po::options_description *options) { + options->add_options() + ("import-only", po::bool_switch(), "only import data from source") + ("source-spec-path", po::value<std::string>(), + "source-spec file (or '-' for stdin)") + ("source-spec", po::value<std::string>(), + "source-spec"); + at::add_image_or_snap_spec_options(positional, options, + at::ARGUMENT_MODIFIER_SOURCE); + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_DEST); + at::add_create_image_options(options, true); + at::add_flatten_option(options); +} + +int execute_prepare(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + bool import_only = vm["import-only"].as<bool>(); + + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_SOURCE, &arg_index, &pool_name, &namespace_name, + &image_name, import_only ? &snap_name : nullptr, true, + import_only ? utils::SNAPSHOT_PRESENCE_PERMITTED : + utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + std::string dst_pool_name; + std::string dst_namespace_name; + std::string dst_image_name; + r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_DEST, &arg_index, &dst_pool_name, + &dst_namespace_name, &dst_image_name, nullptr, false, + utils::SNAPSHOT_PRESENCE_NONE, utils::SPEC_VALIDATION_FULL); + if (r < 0) { + return r; + } + + std::string source_spec; + if (vm.count("source-spec") && vm.count("source-spec-path")) { + std::cerr << "rbd: cannot specify both source-spec and source-spec-path" + << std::endl; + return -EINVAL; + } else if (vm.count("source-spec-path")) { + std::string source_spec_path = vm["source-spec-path"].as<std::string>(); + + int fd = STDIN_FILENO; + if (source_spec_path != "-") { + fd = open(source_spec_path.c_str(), O_RDONLY); + if (fd < 0) { + r = -errno; + std::cerr << "rbd: error opening " << source_spec_path << std::endl; + return r; + } + } + + source_spec.resize(4096); + r = safe_read(fd, source_spec.data(), source_spec.size() - 1); + if (fd != STDIN_FILENO) { + VOID_TEMP_FAILURE_RETRY(close(fd)); + } + + if (r >= 0) { + source_spec.resize(r); + } else { + std::cerr << "rbd: error reading source-spec file: " << cpp_strerror(r) + << std::endl; + return r; + } + } else if (vm.count("source-spec")) { + source_spec = vm["source-spec"].as<std::string>(); + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + librados::IoCtx dst_io_ctx; + if (source_spec.empty()) { + utils::normalize_pool_name(&dst_pool_name); + r = utils::init_io_ctx(rados, dst_pool_name, dst_namespace_name, + &dst_io_ctx); + if (r < 0) { + return r; + } + } + + if (import_only && source_spec.empty()) { + if (snap_name.empty()) { + std::cerr << "rbd: snapshot name was not specified" << std::endl; + return -EINVAL; + } + + std::stringstream ss; + ss << R"({)" + << R"("type":"native",)" + << R"("pool_id":)" << io_ctx.get_id() << R"(,)" + << R"("pool_namespace":")" << io_ctx.get_namespace() << R"(",)" + << R"("image_name":")" << image_name << R"(",)" + << R"("snap_name":")" << snap_name << R"(")" + << R"(})"; + source_spec = ss.str(); + + if (dst_image_name.empty()) { + std::cerr << "rbd: destination image name must be provided" << std::endl; + return -EINVAL; + } + io_ctx = dst_io_ctx; + image_name = dst_image_name; + snap_name = ""; + } else if (!import_only && !source_spec.empty()) { + std::cerr << "rbd: --import-only must be used in combination with " + << "source-spec/source-spec-path" << std::endl; + return -EINVAL; + } + + if (!snap_name.empty()) { + std::cerr << "rbd: snapshot name specified for a command that doesn't " + << "use it" << std::endl; + return -EINVAL; + } + + librbd::ImageOptions opts; + r = utils::get_image_options(vm, true, &opts); + if (r < 0) { + return r; + } + + if (source_spec.empty()) { + if (dst_image_name.empty()) { + dst_image_name = image_name; + } + + int r = librbd::RBD().migration_prepare(io_ctx, image_name.c_str(), + dst_io_ctx, dst_image_name.c_str(), + opts); + if (r < 0) { + std::cerr << "rbd: preparing migration failed: " << cpp_strerror(r) + << std::endl; + return r; + } + } else { + ceph_assert(import_only); + r = librbd::RBD().migration_prepare_import(source_spec.c_str(), io_ctx, + image_name.c_str(), opts); + if (r < 0) { + std::cerr << "rbd: preparing import migration failed: " << cpp_strerror(r) + << std::endl; + return r; + } + } + + return 0; +} + +void get_execute_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + at::add_no_progress_option(options); +} + +int execute_execute(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, nullptr, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + io_ctx.set_pool_full_try(); + + r = do_execute(io_ctx, image_name, vm[at::NO_PROGRESS].as<bool>()); + if (r < 0) { + return r; + } + + return 0; +} + +void get_abort_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + at::add_no_progress_option(options); +} + +int execute_abort(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, nullptr, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + io_ctx.set_pool_full_try(); + + r = do_abort(io_ctx, image_name, vm[at::NO_PROGRESS].as<bool>()); + if (r < 0) { + return r; + } + + return 0; +} + +void get_commit_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + at::add_no_progress_option(options); + options->add_options() + ("force", po::bool_switch(), "proceed even if the image has children"); +} + +int execute_commit(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, nullptr, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + io_ctx.set_pool_full_try(); + + r = do_commit(io_ctx, image_name, vm["force"].as<bool>(), + vm[at::NO_PROGRESS].as<bool>()); + if (r < 0) { + return r; + } + + return 0; +} + +Shell::SwitchArguments switched_arguments({"import-only"}); + +Shell::Action action_prepare( + {"migration", "prepare"}, {}, "Prepare image migration.", + at::get_long_features_help(), &get_prepare_arguments, &execute_prepare); + +Shell::Action action_execute( + {"migration", "execute"}, {}, "Execute image migration.", "", + &get_execute_arguments, &execute_execute); + +Shell::Action action_abort( + {"migration", "abort"}, {}, "Cancel interrupted image migration.", "", + &get_abort_arguments, &execute_abort); + +Shell::Action action_commit( + {"migration", "commit"}, {}, "Commit image migration.", "", + &get_commit_arguments, &execute_commit); + +} // namespace migration +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/MirrorImage.cc b/src/tools/rbd/action/MirrorImage.cc new file mode 100644 index 000000000..505d377f4 --- /dev/null +++ b/src/tools/rbd/action/MirrorImage.cc @@ -0,0 +1,605 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2016 SUSE LINUX GmbH + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/MirrorDaemonServiceInfo.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "include/stringify.h" +#include "common/config.h" +#include "common/errno.h" +#include "common/Formatter.h" +#include "common/TextTable.h" +#include "global/global_context.h" +#include <iostream> +#include <boost/program_options.hpp> + +namespace rbd { +namespace action { +namespace mirror_image { + +namespace at = argument_types; +namespace po = boost::program_options; + +namespace { + +int validate_mirroring_enabled(librbd::Image &image, bool snapshot = false) { + librbd::mirror_image_info_t mirror_image; + int r = image.mirror_image_get_info(&mirror_image, sizeof(mirror_image)); + if (r < 0) { + std::cerr << "rbd: failed to retrieve mirror info: " + << cpp_strerror(r) << std::endl; + return r; + } + + if (mirror_image.state != RBD_MIRROR_IMAGE_ENABLED) { + std::cerr << "rbd: mirroring not enabled on the image" << std::endl; + return -EINVAL; + } + + if (snapshot) { + librbd::mirror_image_mode_t mode; + r = image.mirror_image_get_mode(&mode); + if (r < 0) { + std::cerr << "rbd: failed to retrieve mirror mode: " + << cpp_strerror(r) << std::endl; + return r; + } + + if (mode != RBD_MIRROR_IMAGE_MODE_SNAPSHOT) { + std::cerr << "rbd: snapshot based mirroring not enabled on the image" + << std::endl; + return -EINVAL; + } + } + + return 0; +} + +} // anonymous namespace + +void get_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); +} + +void get_arguments_enable(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + positional->add_options() + ("mode", po::value<std::string>()->default_value(""), + "mirror image mode (journal or snapshot) [default: journal]"); +} + +void get_arguments_disable(po::options_description *positional, + po::options_description *options) { + options->add_options() + ("force", po::bool_switch(), "disable even if not primary"); + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); +} + +int execute_enable_disable(const po::variables_map &vm, bool enable, + bool force) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "", + false, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + if (enable) { + librbd::mirror_image_mode_t mode = RBD_MIRROR_IMAGE_MODE_JOURNAL; + std::string mode_arg = utils::get_positional_argument(vm, arg_index++); + if (mode_arg == "journal") { + mode = RBD_MIRROR_IMAGE_MODE_JOURNAL; + } else if (mode_arg == "snapshot") { + mode = RBD_MIRROR_IMAGE_MODE_SNAPSHOT; + } else if (!mode_arg.empty()) { + std::cerr << "rbd: invalid mode name: " << mode_arg << std::endl; + return -EINVAL; + } + r = image.mirror_image_enable2(mode); + } else { + r = image.mirror_image_disable(force); + } + if (r < 0) { + return r; + } + + std::cout << (enable ? "Mirroring enabled" : "Mirroring disabled") + << std::endl; + + return 0; +} + +int execute_disable(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + return execute_enable_disable(vm, false, vm["force"].as<bool>()); +} + +int execute_enable(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + return execute_enable_disable(vm, true, false); +} + +void get_arguments_promote(po::options_description *positional, + po::options_description *options) { + options->add_options() + ("force", po::bool_switch(), "promote even if not cleanly demoted by remote cluster"); + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); +} + +int execute_promote(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + bool force = vm["force"].as<bool>(); + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "", + false, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + r = validate_mirroring_enabled(image); + if (r < 0) { + return r; + } + + r = image.mirror_image_promote(force); + if (r < 0) { + std::cerr << "rbd: error promoting image to primary" << std::endl; + return r; + } + + std::cout << "Image promoted to primary" << std::endl; + return 0; +} + +int execute_demote(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "", + false, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + r = validate_mirroring_enabled(image); + if (r < 0) { + return r; + } + + r = image.mirror_image_demote(); + if (r < 0) { + std::cerr << "rbd: error demoting image to non-primary" << std::endl; + return r; + } + + std::cout << "Image demoted to non-primary" << std::endl; + return 0; +} + +int execute_resync(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "", + false, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + r = validate_mirroring_enabled(image); + if (r < 0) { + return r; + } + + r = image.mirror_image_resync(); + if (r < 0) { + std::cerr << "rbd: error flagging image resync" << std::endl; + return r; + } + + std::cout << "Flagged image for resync from primary" << std::endl; + return 0; +} + +void get_status_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + at::add_format_options(options); +} + +int execute_status(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + at::Format::Formatter formatter; + int r = utils::get_formatter(vm, &formatter); + if (r < 0) { + return r; + } + + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "", + false, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + r = validate_mirroring_enabled(image); + if (r < 0) { + return r; + } + + librados::IoCtx default_ns_io_ctx; + default_ns_io_ctx.dup(io_ctx); + default_ns_io_ctx.set_namespace(""); + + std::vector<librbd::mirror_peer_site_t> mirror_peers; + utils::get_mirror_peer_sites(default_ns_io_ctx, &mirror_peers); + + std::map<std::string, std::string> peer_mirror_uuids_to_name; + utils::get_mirror_peer_mirror_uuids_to_names(mirror_peers, + &peer_mirror_uuids_to_name); + + librbd::mirror_image_global_status_t status; + r = image.mirror_image_get_global_status(&status, sizeof(status)); + if (r < 0) { + std::cerr << "rbd: failed to get status for image " << image_name << ": " + << cpp_strerror(r) << std::endl; + return r; + } + + utils::populate_unknown_mirror_image_site_statuses(mirror_peers, &status); + + std::string instance_id; + MirrorDaemonServiceInfo daemon_service_info(io_ctx); + + librbd::mirror_image_site_status_t local_status; + int local_site_r = utils::get_local_mirror_image_status( + status, &local_status); + status.site_statuses.erase( + std::remove_if(status.site_statuses.begin(), + status.site_statuses.end(), + [](auto& status) { + return (status.mirror_uuid == + RBD_MIRROR_IMAGE_STATUS_LOCAL_MIRROR_UUID); + }), + status.site_statuses.end()); + + if (local_site_r >= 0 && local_status.up) { + r = image.mirror_image_get_instance_id(&instance_id); + if (r == -EOPNOTSUPP) { + std::cerr << "rbd: newer release of Ceph OSDs required to map image " + << "to rbd-mirror daemon instance" << std::endl; + // not fatal + } else if (r < 0 && r != -ENOENT) { + std::cerr << "rbd: failed to get service id for image " + << image_name << ": " << cpp_strerror(r) << std::endl; + // not fatal + } else if (!instance_id.empty()) { + daemon_service_info.init(); + } + } + + std::vector<librbd::snap_info_t> snaps; + if (status.info.primary && status.info.state == RBD_MIRROR_IMAGE_ENABLED) { + librbd::mirror_image_mode_t mode = RBD_MIRROR_IMAGE_MODE_JOURNAL; + r = image.mirror_image_get_mode(&mode); + if (r < 0) { + std::cerr << "rbd: failed to retrieve mirror mode: " + << cpp_strerror(r) << std::endl; + // not fatal + } + + if (mode == RBD_MIRROR_IMAGE_MODE_SNAPSHOT) { + image.snap_list(snaps); + snaps.erase( + remove_if(snaps.begin(), + snaps.end(), + [&image](const librbd::snap_info_t &snap) { + librbd::snap_namespace_type_t type; + int r = image.snap_get_namespace_type(snap.id, &type); + if (r < 0) { + return false; + } + return type != RBD_SNAP_NAMESPACE_TYPE_MIRROR; + }), + snaps.end()); + } + } + + auto mirror_service = daemon_service_info.get_by_instance_id(instance_id); + + if (formatter != nullptr) { + formatter->open_object_section("image"); + formatter->dump_string("name", image_name); + formatter->dump_string("global_id", status.info.global_id); + if (local_site_r >= 0) { + formatter->dump_string("state", utils::mirror_image_site_status_state( + local_status)); + formatter->dump_string("description", local_status.description); + if (mirror_service != nullptr) { + mirror_service->dump_image(formatter); + } + formatter->dump_string("last_update", utils::timestr( + local_status.last_update)); + } + if (!status.site_statuses.empty()) { + formatter->open_array_section("peer_sites"); + for (auto& status : status.site_statuses) { + formatter->open_object_section("peer_site"); + + auto name_it = peer_mirror_uuids_to_name.find(status.mirror_uuid); + formatter->dump_string("site_name", + (name_it != peer_mirror_uuids_to_name.end() ? name_it->second : "")); + formatter->dump_string("mirror_uuids", status.mirror_uuid); + + formatter->dump_string("state", utils::mirror_image_site_status_state( + status)); + formatter->dump_string("description", status.description); + formatter->dump_string("last_update", utils::timestr( + status.last_update)); + formatter->close_section(); // peer_site + } + formatter->close_section(); // peer_sites + } + if (!snaps.empty()) { + formatter->open_array_section("snapshots"); + for (auto &snap : snaps) { + librbd::snap_mirror_namespace_t info; + r = image.snap_get_mirror_namespace(snap.id, &info, sizeof(info)); + if (r < 0 || + (info.state != RBD_SNAP_MIRROR_STATE_PRIMARY && + info.state != RBD_SNAP_MIRROR_STATE_PRIMARY_DEMOTED)) { + continue; + } + formatter->open_object_section("snapshot"); + formatter->dump_unsigned("id", snap.id); + formatter->dump_string("name", snap.name); + formatter->dump_bool("demoted", + info.state == RBD_SNAP_MIRROR_STATE_PRIMARY_DEMOTED); + formatter->open_array_section("mirror_peer_uuids"); + for (auto &peer : info.mirror_peer_uuids) { + formatter->dump_string("peer_uuid", peer); + } + formatter->close_section(); // mirror_peer_uuids + formatter->close_section(); // snapshot + } + formatter->close_section(); // snapshots + } + formatter->close_section(); // image + formatter->flush(std::cout); + } else { + std::cout << image_name << ":\n" + << " global_id: " << status.info.global_id << "\n"; + if (local_site_r >= 0) { + std::cout << " state: " << utils::mirror_image_site_status_state( + local_status) << "\n" + << " description: " << local_status.description << "\n"; + if (mirror_service != nullptr) { + std::cout << " service: " << + mirror_service->get_image_description() << "\n"; + } + std::cout << " last_update: " << utils::timestr( + local_status.last_update) << std::endl; + } + if (!status.site_statuses.empty()) { + std::cout << " peer_sites:" << std::endl; + + bool first_site = true; + for (auto& site : status.site_statuses) { + if (!first_site) { + std::cout << std::endl; + } + first_site = false; + + auto name_it = peer_mirror_uuids_to_name.find(site.mirror_uuid); + std::cout << " name: " + << (name_it != peer_mirror_uuids_to_name.end() ? + name_it->second : site.mirror_uuid) + << std::endl + << " state: " << utils::mirror_image_site_status_state( + site) << std::endl + << " description: " << site.description << std::endl + << " last_update: " << utils::timestr( + site.last_update) << std::endl; + } + } + if (!snaps.empty()) { + std::cout << " snapshots:" << std::endl; + + bool first_site = true; + for (auto &snap : snaps) { + librbd::snap_mirror_namespace_t info; + r = image.snap_get_mirror_namespace(snap.id, &info, sizeof(info)); + if (r < 0 || + (info.state != RBD_SNAP_MIRROR_STATE_PRIMARY && + info.state != RBD_SNAP_MIRROR_STATE_PRIMARY_DEMOTED)) { + continue; + } + + if (!first_site) { + std::cout << std::endl; + } + + first_site = false; + std::cout << " " << snap.id << " " << snap.name << " (" + << (info.state == RBD_SNAP_MIRROR_STATE_PRIMARY_DEMOTED ? + "demoted " : "") + << "peer_uuids:[" << info.mirror_peer_uuids << "])"; + } + std::cout << std::endl; + } + } + + return 0; +} + +void get_snapshot_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + at::add_snap_create_options(options); +} + +int execute_snapshot(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, nullptr, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + uint32_t flags; + r = utils::get_snap_create_flags(vm, &flags); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "", + false, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + r = validate_mirroring_enabled(image, true); + if (r < 0) { + return r; + } + + uint64_t snap_id; + r = image.mirror_image_create_snapshot2(flags, &snap_id); + if (r < 0) { + std::cerr << "rbd: error creating snapshot: " << cpp_strerror(r) + << std::endl; + return r; + } + + std::cout << "Snapshot ID: " << snap_id << std::endl; + return 0; +} + +Shell::Action action_enable( + {"mirror", "image", "enable"}, {}, + "Enable RBD mirroring for an image.", "", + &get_arguments_enable, &execute_enable); +Shell::Action action_disable( + {"mirror", "image", "disable"}, {}, + "Disable RBD mirroring for an image.", "", + &get_arguments_disable, &execute_disable); +Shell::Action action_promote( + {"mirror", "image", "promote"}, {}, + "Promote an image to primary for RBD mirroring.", "", + &get_arguments_promote, &execute_promote); +Shell::Action action_demote( + {"mirror", "image", "demote"}, {}, + "Demote an image to non-primary for RBD mirroring.", "", + &get_arguments, &execute_demote); +Shell::Action action_resync( + {"mirror", "image", "resync"}, {}, + "Force resync to primary image for RBD mirroring.", "", + &get_arguments, &execute_resync); +Shell::Action action_status( + {"mirror", "image", "status"}, {}, + "Show RBD mirroring status for an image.", "", + &get_status_arguments, &execute_status); +Shell::Action action_snapshot( + {"mirror", "image", "snapshot"}, {}, + "Create RBD mirroring image snapshot.", "", + &get_snapshot_arguments, &execute_snapshot); + +} // namespace mirror_image +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/MirrorPool.cc b/src/tools/rbd/action/MirrorPool.cc new file mode 100644 index 000000000..b714c3bab --- /dev/null +++ b/src/tools/rbd/action/MirrorPool.cc @@ -0,0 +1,1772 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/MirrorDaemonServiceInfo.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "include/buffer.h" +#include "include/Context.h" +#include "include/stringify.h" +#include "include/rbd/librbd.hpp" +#include "common/ceph_json.h" +#include "common/config.h" +#include "common/debug.h" +#include "common/errno.h" +#include "common/Formatter.h" +#include "common/TextTable.h" +#include "common/Throttle.h" +#include "global/global_context.h" +#include <fstream> +#include <functional> +#include <iostream> +#include <regex> +#include <set> +#include <boost/program_options.hpp> +#include "include/ceph_assert.h" + +#include <atomic> + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "rbd::action::MirrorPool: " + +namespace rbd { +namespace action { +namespace mirror_pool { + +namespace at = argument_types; +namespace po = boost::program_options; + +static const std::string ALL_NAME("all"); +static const std::string SITE_NAME("site-name"); + +namespace { + +void add_site_name_optional(po::options_description *options) { + options->add_options() + (SITE_NAME.c_str(), po::value<std::string>(), "local site name"); +} + +int set_site_name(librados::Rados& rados, const std::string& site_name) { + librbd::RBD rbd; + int r = rbd.mirror_site_name_set(rados, site_name); + if (r == -EOPNOTSUPP) { + std::cerr << "rbd: cluster does not support site names" << std::endl; + return r; + } else if (r < 0) { + std::cerr << "rbd: failed to set site name" << cpp_strerror(r) + << std::endl; + return r; + } + + return 0; +} + +struct MirrorPeerDirection {}; + +void validate(boost::any& v, const std::vector<std::string>& values, + MirrorPeerDirection *target_type, int permit_tx) { + po::validators::check_first_occurrence(v); + const std::string &s = po::validators::get_single_string(values); + + if (s == "rx-only") { + v = boost::any(RBD_MIRROR_PEER_DIRECTION_RX); + } else if (s == "rx-tx") { + v = boost::any(RBD_MIRROR_PEER_DIRECTION_RX_TX); + } else if (permit_tx != 0 && s == "tx-only") { + v = boost::any(RBD_MIRROR_PEER_DIRECTION_TX); + } else { + throw po::validation_error(po::validation_error::invalid_option_value); + } +} + +void add_direction_optional(po::options_description *options) { + options->add_options() + ("direction", po::value<MirrorPeerDirection>(), + "mirroring direction (rx-only, rx-tx)\n" + "[default: rx-tx]"); +} + +int validate_mirroring_enabled(librados::IoCtx& io_ctx) { + librbd::RBD rbd; + rbd_mirror_mode_t mirror_mode; + int r = rbd.mirror_mode_get(io_ctx, &mirror_mode); + if (r < 0) { + std::cerr << "rbd: failed to retrieve mirror mode: " + << cpp_strerror(r) << std::endl; + return r; + } + + if (mirror_mode == RBD_MIRROR_MODE_DISABLED) { + std::cerr << "rbd: mirroring not enabled on the pool" << std::endl; + return -EINVAL; + } + return 0; +} + +int validate_uuid(const std::string &uuid) { + std::regex pattern("^[A-F0-9]{8}-[A-F0-9]{4}-[A-F0-9]{4}-[A-F0-9]{4}-[A-F0-9]{12}$", + std::regex::icase); + std::smatch match; + if (!std::regex_match(uuid, match, pattern)) { + std::cerr << "rbd: invalid uuid '" << uuid << "'" << std::endl; + return -EINVAL; + } + return 0; +} + +int read_key_file(std::string path, std::string* key) { + std::ifstream key_file; + key_file.open(path); + if (key_file.fail()) { + std::cerr << "rbd: failed to open " << path << std::endl; + return -EINVAL; + } + + std::getline(key_file, *key); + if (key_file.bad()) { + std::cerr << "rbd: failed to read key from " << path << std::endl; + return -EINVAL; + } + + key_file.close(); + return 0; +} + +void add_uuid_option(po::options_description *positional) { + positional->add_options() + ("uuid", po::value<std::string>(), "peer uuid"); +} + +int get_uuid(const po::variables_map &vm, size_t arg_index, + std::string *uuid) { + *uuid = utils::get_positional_argument(vm, arg_index); + if (uuid->empty()) { + std::cerr << "rbd: must specify peer uuid" << std::endl; + return -EINVAL; + } + return validate_uuid(*uuid); +} + +int get_remote_cluster_spec(const po::variables_map &vm, + const std::string &spec, + std::string *remote_client_name, + std::string *remote_cluster, + std::map<std::string, std::string>* attributes) { + if (vm.count("remote-client-name")) { + *remote_client_name = vm["remote-client-name"].as<std::string>(); + } + if (vm.count("remote-cluster")) { + *remote_cluster = vm["remote-cluster"].as<std::string>(); + } + if (vm.count("remote-mon-host")) { + (*attributes)["mon_host"] = vm["remote-mon-host"].as<std::string>(); + } + if (vm.count("remote-key-file")) { + std::string key; + int r = read_key_file(vm["remote-key-file"].as<std::string>(), &key); + if (r < 0) { + return r; + } + (*attributes)["key"] = key; + } + + if (!spec.empty()) { + std::regex pattern("^(?:(client\\.[^@]+)@)?([^/@]+)$"); + std::smatch match; + if (!std::regex_match(spec, match, pattern)) { + std::cerr << "rbd: invalid spec '" << spec << "'" << std::endl; + return -EINVAL; + } + if (match[1].matched) { + *remote_client_name = match[1]; + } + *remote_cluster = match[2]; + } + + if (remote_cluster->empty()) { + std::cerr << "rbd: remote cluster was not specified" << std::endl; + return -EINVAL; + } + return 0; +} + +int set_peer_config_key(librados::IoCtx& io_ctx, const std::string& peer_uuid, + std::map<std::string, std::string>&& attributes) { + librbd::RBD rbd; + int r = rbd.mirror_peer_site_set_attributes(io_ctx, peer_uuid, attributes); + if (r == -EPERM) { + std::cerr << "rbd: permission denied attempting to set peer " + << "config-key secrets in the monitor" << std::endl; + return r; + } else if (r < 0) { + std::cerr << "rbd: failed to update mirroring peer config: " + << cpp_strerror(r) << std::endl; + return r; + } + return 0; +} + +int get_peer_config_key(librados::IoCtx& io_ctx, const std::string& peer_uuid, + std::map<std::string, std::string>* attributes) { + librbd::RBD rbd; + int r = rbd.mirror_peer_site_get_attributes(io_ctx, peer_uuid, attributes); + if (r == -ENOENT) { + return r; + } else if (r == -EPERM) { + std::cerr << "rbd: permission denied attempting to access peer " + << "config-key secrets from the monitor" << std::endl; + return r; + } else if (r == -EINVAL) { + std::cerr << "rbd: corrupt mirroring peer config" << std::endl; + return r; + } else if (r < 0) { + std::cerr << "rbd: error reading mirroring peer config: " + << cpp_strerror(r) << std::endl; + return r; + } + + return 0; +} + +int update_peer_config_key(librados::IoCtx& io_ctx, + const std::string& peer_uuid, + const std::string& key, + const std::string& value) { + std::map<std::string, std::string> attributes; + int r = get_peer_config_key(io_ctx, peer_uuid, &attributes); + if (r == -ENOENT) { + return set_peer_config_key(io_ctx, peer_uuid, {{key, value}}); + } else if (r < 0) { + return r; + } + + if (value.empty()) { + attributes.erase(key); + } else { + attributes[key] = value; + } + return set_peer_config_key(io_ctx, peer_uuid, std::move(attributes)); +} + +int format_mirror_peers(librados::IoCtx& io_ctx, + at::Format::Formatter formatter, + const std::vector<librbd::mirror_peer_site_t> &peers, + bool config_key) { + if (formatter != nullptr) { + formatter->open_array_section("peers"); + } else { + std::cout << "Peer Sites: "; + if (peers.empty()) { + std::cout << "none"; + } + std::cout << std::endl; + } + + for (auto &peer : peers) { + std::map<std::string, std::string> attributes; + if (config_key) { + int r = get_peer_config_key(io_ctx, peer.uuid, &attributes); + if (r < 0 && r != -ENOENT) { + return r; + } + } + + std::string direction; + switch (peer.direction) { + case RBD_MIRROR_PEER_DIRECTION_RX: + direction = "rx-only"; + break; + case RBD_MIRROR_PEER_DIRECTION_TX: + direction = "tx-only"; + break; + case RBD_MIRROR_PEER_DIRECTION_RX_TX: + direction = "rx-tx"; + break; + default: + direction = "unknown"; + break; + } + + if (formatter != nullptr) { + formatter->open_object_section("peer"); + formatter->dump_string("uuid", peer.uuid); + formatter->dump_string("direction", direction); + formatter->dump_string("site_name", peer.site_name); + formatter->dump_string("mirror_uuid", peer.mirror_uuid); + formatter->dump_string("client_name", peer.client_name); + for (auto& pair : attributes) { + formatter->dump_string(pair.first.c_str(), pair.second); + } + formatter->close_section(); + } else { + std::cout << std::endl + << "UUID: " << peer.uuid << std::endl + << "Name: " << peer.site_name << std::endl; + if (peer.direction != RBD_MIRROR_PEER_DIRECTION_RX || + !peer.mirror_uuid.empty()) { + std::cout << "Mirror UUID: " << peer.mirror_uuid << std::endl; + } + std::cout << "Direction: " << direction << std::endl; + if (peer.direction != RBD_MIRROR_PEER_DIRECTION_TX || + !peer.client_name.empty()) { + std::cout << "Client: " << peer.client_name << std::endl; + } + if (config_key) { + std::cout << "Mon Host: " << attributes["mon_host"] << std::endl + << "Key: " << attributes["key"] << std::endl; + } + if (peer.site_name != peers.rbegin()->site_name) { + std::cout << std::endl; + } + } + } + + if (formatter != nullptr) { + formatter->close_section(); + } + return 0; +} + +class ImageRequestBase { +public: + void send() { + dout(20) << this << " " << __func__ << ": image_name=" << m_image_name + << dendl; + + auto ctx = new LambdaContext([this](int r) { + handle_finalize(r); + }); + + // will pause here until slots are available + m_finalize_ctx = m_throttle.start_op(ctx); + + open_image(); + } + +protected: + ImageRequestBase(librados::IoCtx &io_ctx, OrderedThrottle &throttle, + const std::string &image_name) + : m_io_ctx(io_ctx), m_throttle(throttle), m_image_name(image_name) { + } + virtual ~ImageRequestBase() { + } + + virtual bool skip_get_info() const { + return false; + } + virtual void get_info(librbd::Image &image, librbd::mirror_image_info_t *info, + librbd::RBD::AioCompletion *aio_comp) { + image.aio_mirror_image_get_info(info, sizeof(librbd::mirror_image_info_t), + aio_comp); + } + + virtual bool skip_action(const librbd::mirror_image_info_t &info) const { + return false; + } + virtual void execute_action(librbd::Image &image, + librbd::RBD::AioCompletion *aio_comp) = 0; + virtual void handle_execute_action(int r) { + dout(20) << this << " " << __func__ << ": r=" << r << dendl; + + if (r < 0 && r != -ENOENT) { + std::cerr << "rbd: failed to " << get_action_type() << " image " + << m_image_name << ": " << cpp_strerror(r) << std::endl; + m_ret_val = r; + } + + close_image(); + } + + virtual void finalize_action() { + } + virtual std::string get_action_type() const = 0; + +private: + /** + * @verbatim + * + * <start> + * | + * v + * OPEN_IMAGE + * | + * v + * GET_INFO + * | + * v + * EXECUTE_ACTION + * | + * v + * CLOSE_IMAGE + * | + * v + * FINALIZE_ACTION + * | + * v + * <finish> + * + * @endverbatim + */ + + librados::IoCtx &m_io_ctx; + OrderedThrottle &m_throttle; + const std::string m_image_name; + + librbd::Image m_image; + Context *m_finalize_ctx = nullptr; + + librbd::mirror_image_info_t m_mirror_image_info; + + int m_ret_val = 0; + + void open_image() { + dout(20) << this << " " << __func__ << dendl; + + librbd::RBD rbd; + auto aio_completion = utils::create_aio_completion< + ImageRequestBase, &ImageRequestBase::handle_open_image>(this); + rbd.aio_open(m_io_ctx, m_image, m_image_name.c_str(), nullptr, + aio_completion); + } + + void handle_open_image(int r) { + dout(20) << this << " " << __func__ << ": r=" << r << dendl; + + if (r < 0) { + std::cerr << "rbd: failed to open image " + << m_image_name << ": " << cpp_strerror(r) << std::endl; + m_finalize_ctx->complete(r); + return; + } + + get_info(); + } + + void get_info() { + if (skip_get_info()) { + execute_action(); + return; + } + dout(20) << this << " " << __func__ << dendl; + + auto aio_completion = utils::create_aio_completion< + ImageRequestBase, &ImageRequestBase::handle_get_info>(this); + get_info(m_image, &m_mirror_image_info, aio_completion); + } + + void handle_get_info(int r) { + dout(20) << this << " " << __func__ << ": r=" << r << dendl; + + if (r == -ENOENT) { + close_image(); + return; + } else if (r < 0) { + std::cerr << "rbd: failed to retrieve mirror image info for " + << m_image_name << ": " << cpp_strerror(r) << std::endl; + m_ret_val = r; + close_image(); + return; + } + + execute_action(); + } + + void execute_action() { + if (skip_action(m_mirror_image_info)) { + close_image(); + return; + } + dout(20) << this << " " << __func__ << dendl; + + auto aio_completion = utils::create_aio_completion< + ImageRequestBase, &ImageRequestBase::handle_execute_action>(this); + execute_action(m_image, aio_completion); + } + + void close_image() { + dout(20) << this << " " << __func__ << dendl; + + auto aio_completion = utils::create_aio_completion< + ImageRequestBase, &ImageRequestBase::handle_close_image>(this); + m_image.aio_close(aio_completion); + } + + void handle_close_image(int r) { + dout(20) << this << " " << __func__ << ": r=" << r << dendl; + + if (r < 0) { + std::cerr << "rbd: failed to close image " + << m_image_name << ": " << cpp_strerror(r) << std::endl; + } + + m_finalize_ctx->complete(r); + } + + void handle_finalize(int r) { + dout(20) << this << " " << __func__ << ": r=" << r << dendl; + + if (r == 0 && m_ret_val < 0) { + r = m_ret_val; + } + if (r >= 0) { + finalize_action(); + } + m_throttle.end_op(r); + delete this; + } + +}; + +class PromoteImageRequest : public ImageRequestBase { +public: + PromoteImageRequest(librados::IoCtx &io_ctx, OrderedThrottle &throttle, + const std::string &image_name, std::atomic<unsigned> *counter, + bool force) + : ImageRequestBase(io_ctx, throttle, image_name), m_counter(counter), + m_force(force) { + } + +protected: + bool skip_action(const librbd::mirror_image_info_t &info) const override { + return (info.state != RBD_MIRROR_IMAGE_ENABLED || info.primary); + } + + void execute_action(librbd::Image &image, + librbd::RBD::AioCompletion *aio_comp) override { + image.aio_mirror_image_promote(m_force, aio_comp); + } + + void handle_execute_action(int r) override { + if (r >= 0) { + (*m_counter)++; + } + ImageRequestBase::handle_execute_action(r); + } + + std::string get_action_type() const override { + return "promote"; + } + +private: + std::atomic<unsigned> *m_counter = nullptr; + bool m_force; +}; + +class DemoteImageRequest : public ImageRequestBase { +public: + DemoteImageRequest(librados::IoCtx &io_ctx, OrderedThrottle &throttle, + const std::string &image_name, std::atomic<unsigned> *counter) + : ImageRequestBase(io_ctx, throttle, image_name), m_counter(counter) { + } + +protected: + bool skip_action(const librbd::mirror_image_info_t &info) const override { + return (info.state != RBD_MIRROR_IMAGE_ENABLED || !info.primary); + } + + void execute_action(librbd::Image &image, + librbd::RBD::AioCompletion *aio_comp) override { + image.aio_mirror_image_demote(aio_comp); + } + void handle_execute_action(int r) override { + if (r >= 0) { + (*m_counter)++; + } + ImageRequestBase::handle_execute_action(r); + } + + std::string get_action_type() const override { + return "demote"; + } + +private: + std::atomic<unsigned> *m_counter = nullptr; +}; + +class StatusImageRequest : public ImageRequestBase { +public: + StatusImageRequest( + librados::IoCtx &io_ctx, OrderedThrottle &throttle, + const std::string &image_name, + const std::map<std::string, std::string> &instance_ids, + const std::vector<librbd::mirror_peer_site_t>& mirror_peers, + const std::map<std::string, std::string> &peer_mirror_uuids_to_name, + const MirrorDaemonServiceInfo &daemon_service_info, + at::Format::Formatter formatter) + : ImageRequestBase(io_ctx, throttle, image_name), + m_instance_ids(instance_ids), m_mirror_peers(mirror_peers), + m_peer_mirror_uuids_to_name(peer_mirror_uuids_to_name), + m_daemon_service_info(daemon_service_info), m_formatter(formatter) { + } + +protected: + bool skip_get_info() const override { + return true; + } + + void execute_action(librbd::Image &image, + librbd::RBD::AioCompletion *aio_comp) override { + image.get_id(&m_image_id); + image.aio_mirror_image_get_global_status( + &m_mirror_image_global_status, sizeof(m_mirror_image_global_status), + aio_comp); + } + + void finalize_action() override { + if (m_mirror_image_global_status.info.global_id.empty()) { + return; + } + + utils::populate_unknown_mirror_image_site_statuses( + m_mirror_peers, &m_mirror_image_global_status); + + librbd::mirror_image_site_status_t local_status; + int local_site_r = utils::get_local_mirror_image_status( + m_mirror_image_global_status, &local_status); + m_mirror_image_global_status.site_statuses.erase( + std::remove_if(m_mirror_image_global_status.site_statuses.begin(), + m_mirror_image_global_status.site_statuses.end(), + [](auto& status) { + return (status.mirror_uuid == + RBD_MIRROR_IMAGE_STATUS_LOCAL_MIRROR_UUID); + }), + m_mirror_image_global_status.site_statuses.end()); + + std::string instance_id = (local_site_r >= 0 && local_status.up && + m_instance_ids.count(m_image_id)) ? + m_instance_ids.find(m_image_id)->second : ""; + + auto mirror_service = m_daemon_service_info.get_by_instance_id(instance_id); + if (m_formatter != nullptr) { + m_formatter->open_object_section("image"); + m_formatter->dump_string("name", m_mirror_image_global_status.name); + m_formatter->dump_string( + "global_id", m_mirror_image_global_status.info.global_id); + if (local_site_r >= 0) { + m_formatter->dump_string("state", utils::mirror_image_site_status_state( + local_status)); + m_formatter->dump_string("description", local_status.description); + if (mirror_service != nullptr) { + mirror_service->dump_image(m_formatter); + } + m_formatter->dump_string("last_update", utils::timestr( + local_status.last_update)); + } + if (!m_mirror_image_global_status.site_statuses.empty()) { + m_formatter->open_array_section("peer_sites"); + for (auto& status : m_mirror_image_global_status.site_statuses) { + m_formatter->open_object_section("peer_site"); + + auto name_it = m_peer_mirror_uuids_to_name.find(status.mirror_uuid); + m_formatter->dump_string("site_name", + (name_it != m_peer_mirror_uuids_to_name.end() ? + name_it->second : "")); + m_formatter->dump_string("mirror_uuids", status.mirror_uuid); + + m_formatter->dump_string( + "state", utils::mirror_image_site_status_state(status)); + m_formatter->dump_string("description", status.description); + m_formatter->dump_string("last_update", utils::timestr( + status.last_update)); + m_formatter->close_section(); // peer_site + } + m_formatter->close_section(); // peer_sites + } + m_formatter->close_section(); // image + } else { + std::cout << std::endl + << m_mirror_image_global_status.name << ":" << std::endl + << " global_id: " + << m_mirror_image_global_status.info.global_id << std::endl; + if (local_site_r >= 0) { + std::cout << " state: " << utils::mirror_image_site_status_state( + local_status) << std::endl + << " description: " << local_status.description << std::endl; + if (mirror_service != nullptr) { + std::cout << " service: " << + mirror_service->get_image_description() << std::endl; + } + std::cout << " last_update: " << utils::timestr( + local_status.last_update) << std::endl; + } + if (!m_mirror_image_global_status.site_statuses.empty()) { + std::cout << " peer_sites:" << std::endl; + bool first_site = true; + for (auto& site : m_mirror_image_global_status.site_statuses) { + if (!first_site) { + std::cout << std::endl; + } + first_site = false; + + auto name_it = m_peer_mirror_uuids_to_name.find(site.mirror_uuid); + std::cout << " name: " + << (name_it != m_peer_mirror_uuids_to_name.end() ? + name_it->second : site.mirror_uuid) + << std::endl + << " state: " << utils::mirror_image_site_status_state( + site) << std::endl + << " description: " << site.description << std::endl + << " last_update: " << utils::timestr( + site.last_update) << std::endl; + } + } + } + } + + std::string get_action_type() const override { + return "status"; + } + +private: + const std::map<std::string, std::string> &m_instance_ids; + const std::vector<librbd::mirror_peer_site_t> &m_mirror_peers; + const std::map<std::string, std::string> &m_peer_mirror_uuids_to_name; + const MirrorDaemonServiceInfo &m_daemon_service_info; + at::Format::Formatter m_formatter; + std::string m_image_id; + librbd::mirror_image_global_status_t m_mirror_image_global_status; +}; + +template <typename RequestT> +class ImageRequestAllocator { +public: + template <class... Args> + RequestT *operator()(librados::IoCtx &io_ctx, OrderedThrottle &throttle, + const std::string &image_name, Args&&... args) { + return new RequestT(io_ctx, throttle, image_name, + std::forward<Args>(args)...); + } +}; + +template <typename RequestT> +class ImageRequestGenerator { +public: + template <class... Args> + ImageRequestGenerator(librados::IoCtx &io_ctx, Args&&... args) + : m_io_ctx(io_ctx), + m_factory(std::bind(ImageRequestAllocator<RequestT>(), + std::ref(m_io_ctx), std::ref(m_throttle), + std::placeholders::_1, std::forward<Args>(args)...)), + m_throttle(g_conf().get_val<uint64_t>("rbd_concurrent_management_ops"), + true) { + } + + int execute() { + // use the alphabetical list of image names for pool-level + // mirror image operations + librbd::RBD rbd; + int r = rbd.list2(m_io_ctx, &m_images); + if (r < 0 && r != -ENOENT) { + std::cerr << "rbd: failed to list images within pool" << std::endl; + return r; + } + + for (auto &image : m_images) { + auto request = m_factory(image.name); + request->send(); + } + + return m_throttle.wait_for_ret(); + } +private: + typedef std::function<RequestT*(const std::string&)> Factory; + + librados::IoCtx &m_io_ctx; + Factory m_factory; + + OrderedThrottle m_throttle; + + std::vector<librbd::image_spec_t> m_images; + +}; + +int get_mirror_image_status( + librados::IoCtx& io_ctx, uint32_t* total_images, + std::map<librbd::mirror_image_status_state_t, int>* mirror_image_states, + MirrorHealth* mirror_image_health) { + librbd::RBD rbd; + int r = rbd.mirror_image_status_summary(io_ctx, mirror_image_states); + if (r < 0) { + std::cerr << "rbd: failed to get status summary for mirrored images: " + << cpp_strerror(r) << std::endl; + return r; + } + + *mirror_image_health = MIRROR_HEALTH_OK; + for (auto &it : *mirror_image_states) { + auto &state = it.first; + if (*mirror_image_health < MIRROR_HEALTH_WARNING && + (state != MIRROR_IMAGE_STATUS_STATE_REPLAYING && + state != MIRROR_IMAGE_STATUS_STATE_STOPPED)) { + *mirror_image_health = MIRROR_HEALTH_WARNING; + } + if (*mirror_image_health < MIRROR_HEALTH_ERROR && + state == MIRROR_IMAGE_STATUS_STATE_ERROR) { + *mirror_image_health = MIRROR_HEALTH_ERROR; + } + *total_images += it.second; + } + + return 0; +} + +} // anonymous namespace + +void get_peer_bootstrap_create_arguments(po::options_description *positional, + po::options_description *options) { + at::add_pool_options(positional, options, false); + add_site_name_optional(options); +} + +int execute_peer_bootstrap_create( + const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string pool_name; + size_t arg_index = 0; + int r = utils::get_pool_and_namespace_names(vm, true, &pool_name, + nullptr, &arg_index); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, "", &rados, &io_ctx); + if (r < 0) { + return r; + } + + r = validate_mirroring_enabled(io_ctx); + if (r < 0) { + return r; + } + + if (vm.count(SITE_NAME)) { + r = set_site_name(rados, vm[SITE_NAME].as<std::string>()); + if (r < 0) { + return r; + } + } + + librbd::RBD rbd; + std::string token; + r = rbd.mirror_peer_bootstrap_create(io_ctx, &token); + if (r == -EEXIST) { + std::cerr << "rbd: mismatch with pre-existing RBD mirroring peer user caps" + << std::endl; + } else if (r < 0) { + std::cerr << "rbd: failed to create mirroring bootstrap token: " + << cpp_strerror(r) << std::endl; + return r; + } + + std::cout << token << std::endl; + return 0; +} + +void get_peer_bootstrap_import_arguments(po::options_description *positional, + po::options_description *options) { + at::add_pool_options(positional, options, false); + add_site_name_optional(options); + positional->add_options() + ("token-path", po::value<std::string>(), + "bootstrap token file (or '-' for stdin)"); + options->add_options() + ("token-path", po::value<std::string>(), + "bootstrap token file (or '-' for stdin)"); + add_direction_optional(options); +} + +int execute_peer_bootstrap_import( + const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string pool_name; + size_t arg_index = 0; + int r = utils::get_pool_and_namespace_names(vm, true, &pool_name, + nullptr, &arg_index); + if (r < 0) { + return r; + } + + std::string token_path; + if (vm.count("token-path")) { + token_path = vm["token-path"].as<std::string>(); + } else { + token_path = utils::get_positional_argument(vm, arg_index++); + } + + if (token_path.empty()) { + std::cerr << "rbd: token path was not specified" << std::endl; + return -EINVAL; + } + + rbd_mirror_peer_direction_t mirror_peer_direction = + RBD_MIRROR_PEER_DIRECTION_RX_TX; + if (vm.count("direction")) { + mirror_peer_direction = vm["direction"].as<rbd_mirror_peer_direction_t>(); + } + + int fd = STDIN_FILENO; + if (token_path != "-") { + fd = open(token_path.c_str(), O_RDONLY|O_BINARY); + if (fd < 0) { + r = -errno; + std::cerr << "rbd: error opening " << token_path << std::endl; + return r; + } + } + + char token[1024]; + memset(token, 0, sizeof(token)); + r = safe_read(fd, token, sizeof(token) - 1); + if (fd != STDIN_FILENO) { + VOID_TEMP_FAILURE_RETRY(close(fd)); + } + + if (r < 0) { + std::cerr << "rbd: error reading token file: " << cpp_strerror(r) + << std::endl; + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, "", &rados, &io_ctx); + if (r < 0) { + return r; + } + + if (vm.count(SITE_NAME)) { + r = set_site_name(rados, vm[SITE_NAME].as<std::string>()); + if (r < 0) { + return r; + } + } + + librbd::RBD rbd; + r = rbd.mirror_peer_bootstrap_import(io_ctx, mirror_peer_direction, token); + if (r == -ENOSYS) { + std::cerr << "rbd: mirroring is not enabled on remote peer" << std::endl; + return r; + } else if (r < 0) { + std::cerr << "rbd: failed to import peer bootstrap token" << std::endl; + return r; + } + + return 0; +} + +void get_peer_add_arguments(po::options_description *positional, + po::options_description *options) { + at::add_pool_options(positional, options, false); + positional->add_options() + ("remote-cluster-spec", "remote cluster spec\n" + "(example: [<client name>@]<cluster name>)"); + options->add_options() + ("remote-client-name", po::value<std::string>(), "remote client name") + ("remote-cluster", po::value<std::string>(), "remote cluster name") + ("remote-mon-host", po::value<std::string>(), "remote mon host(s)") + ("remote-key-file", po::value<std::string>(), + "path to file containing remote key"); + add_direction_optional(options); +} + +int execute_peer_add(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string pool_name; + size_t arg_index = 0; + int r = utils::get_pool_and_namespace_names(vm, true, &pool_name, + nullptr, &arg_index); + if (r < 0) { + return r; + } + + std::string remote_client_name = g_ceph_context->_conf->name.to_str(); + std::string remote_cluster; + std::map<std::string, std::string> attributes; + r = get_remote_cluster_spec( + vm, utils::get_positional_argument(vm, arg_index), + &remote_client_name, &remote_cluster, &attributes); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, "", &rados, &io_ctx); + if (r < 0) { + return r; + } + + r = validate_mirroring_enabled(io_ctx); + if (r < 0) { + return r; + } + + // TODO: temporary restriction to prevent adding multiple peers + // until rbd-mirror daemon can properly handle the scenario + librbd::RBD rbd; + std::vector<librbd::mirror_peer_site_t> mirror_peers; + r = rbd.mirror_peer_site_list(io_ctx, &mirror_peers); + if (r < 0) { + std::cerr << "rbd: failed to list mirror peers" << std::endl; + return r; + } + + // ignore tx-only peers since the restriction is for rx + mirror_peers.erase( + std::remove_if( + mirror_peers.begin(), mirror_peers.end(), + [](const librbd::mirror_peer_site_t& peer) { + return (peer.direction == RBD_MIRROR_PEER_DIRECTION_TX); + }), + mirror_peers.end()); + + if (!mirror_peers.empty()) { + std::cerr << "rbd: multiple RX peers are not currently supported" + << std::endl; + return -EINVAL; + } + + rbd_mirror_peer_direction_t mirror_peer_direction = + RBD_MIRROR_PEER_DIRECTION_RX_TX; + if (vm.count("direction")) { + mirror_peer_direction = vm["direction"].as<rbd_mirror_peer_direction_t>(); + } + + std::string uuid; + r = rbd.mirror_peer_site_add( + io_ctx, &uuid, mirror_peer_direction, remote_cluster, remote_client_name); + if (r == -EEXIST) { + std::cerr << "rbd: mirror peer already exists" << std::endl; + return r; + } else if (r < 0) { + std::cerr << "rbd: error adding mirror peer" << std::endl; + return r; + } + + if (!attributes.empty()) { + r = set_peer_config_key(io_ctx, uuid, std::move(attributes)); + if (r < 0) { + return r; + } + } + + std::cout << uuid << std::endl; + return 0; +} + +void get_peer_remove_arguments(po::options_description *positional, + po::options_description *options) { + at::add_pool_options(positional, options, false); + add_uuid_option(positional); +} + +int execute_peer_remove(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string pool_name; + size_t arg_index = 0; + int r = utils::get_pool_and_namespace_names(vm, true, &pool_name, + nullptr, &arg_index); + if (r < 0) { + return r; + } + + std::string uuid; + r = get_uuid(vm, arg_index, &uuid); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, "", &rados, &io_ctx); + if (r < 0) { + return r; + } + + r = validate_mirroring_enabled(io_ctx); + if (r < 0) { + return r; + } + + librbd::RBD rbd; + r = rbd.mirror_peer_site_remove(io_ctx, uuid); + if (r < 0) { + std::cerr << "rbd: error removing mirror peer" << std::endl; + return r; + } + return 0; +} + +void get_peer_set_arguments(po::options_description *positional, + po::options_description *options) { + at::add_pool_options(positional, options, false); + add_uuid_option(positional); + positional->add_options() + ("key", "peer parameter\n" + "(direction, site-name, client, mon-host, key-file)") + ("value", "new value for specified key\n" + "(rx-only, tx-only, or rx-tx for direction)"); +} + +int execute_peer_set(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string pool_name; + size_t arg_index = 0; + int r = utils::get_pool_and_namespace_names(vm, true, &pool_name, + nullptr, &arg_index); + if (r < 0) { + return r; + } + + std::string uuid; + r = get_uuid(vm, arg_index++, &uuid); + if (r < 0) { + return r; + } + + std::set<std::string> valid_keys{{"direction", "site-name", "cluster", + "client", "mon-host", "key-file"}}; + std::string key = utils::get_positional_argument(vm, arg_index++); + if (valid_keys.find(key) == valid_keys.end()) { + std::cerr << "rbd: must specify "; + for (auto& valid_key : valid_keys) { + std::cerr << "'" << valid_key << "'"; + if (&valid_key != &(*valid_keys.rbegin())) { + std::cerr << ", "; + } + } + std::cerr << " key." << std::endl; + return -EINVAL; + } + + std::string value = utils::get_positional_argument(vm, arg_index++); + if (value.empty() && (key == "client" || key == "cluster")) { + std::cerr << "rbd: must specify new " << key << " value." << std::endl; + } else if (key == "key-file") { + key = "key"; + r = read_key_file(value, &value); + if (r < 0) { + return r; + } + } else if (key == "mon-host") { + key = "mon_host"; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, "", &rados, &io_ctx); + if (r < 0) { + return r; + } + + r = validate_mirroring_enabled(io_ctx); + if (r < 0) { + return r; + } + + librbd::RBD rbd; + if (key == "client") { + r = rbd.mirror_peer_site_set_client_name(io_ctx, uuid.c_str(), + value.c_str()); + } else if (key == "site-name" || key == "cluster") { + r = rbd.mirror_peer_site_set_name(io_ctx, uuid.c_str(), value.c_str()); + } else if (key == "direction") { + MirrorPeerDirection tag; + boost::any direction; + try { + validate(direction, {value}, &tag, 1); + } catch (...) { + std::cerr << "rbd: invalid direction" << std::endl; + return -EINVAL; + } + + auto peer_direction = boost::any_cast<rbd_mirror_peer_direction_t>( + direction); + if (peer_direction != RBD_MIRROR_PEER_DIRECTION_TX) { + // TODO: temporary restriction to prevent adding multiple peers + // until rbd-mirror daemon can properly handle the scenario + std::vector<librbd::mirror_peer_site_t> mirror_peers; + r = rbd.mirror_peer_site_list(io_ctx, &mirror_peers); + if (r < 0) { + std::cerr << "rbd: failed to list mirror peers" << std::endl; + return r; + } + + // ignore peer to be updated and tx-only peers since the restriction is + // for rx + mirror_peers.erase( + std::remove_if( + mirror_peers.begin(), mirror_peers.end(), + [uuid](const librbd::mirror_peer_site_t& peer) { + return (peer.uuid == uuid || + peer.direction == RBD_MIRROR_PEER_DIRECTION_TX); + }), + mirror_peers.end()); + + if (!mirror_peers.empty()) { + std::cerr << "rbd: multiple RX peers are not currently supported" + << std::endl; + return -EINVAL; + } + } + + r = rbd.mirror_peer_site_set_direction(io_ctx, uuid, peer_direction); + } else { + r = update_peer_config_key(io_ctx, uuid, key, value); + } + + if (r == -ENOENT) { + std::cerr << "rbd: mirror peer " << uuid << " does not exist" + << std::endl; + } + + if (r < 0) { + return r; + } + return 0; +} + +void get_disable_arguments(po::options_description *positional, + po::options_description *options) { + at::add_pool_options(positional, options, true); +} + +void get_enable_arguments(po::options_description *positional, + po::options_description *options) { + at::add_pool_options(positional, options, true); + positional->add_options() + ("mode", "mirror mode [image or pool]"); + add_site_name_optional(options); +} + +int execute_enable_disable(librados::IoCtx& io_ctx, + rbd_mirror_mode_t next_mirror_mode, + const std::string &mode, bool ignore_no_update) { + librbd::RBD rbd; + rbd_mirror_mode_t current_mirror_mode; + int r = rbd.mirror_mode_get(io_ctx, ¤t_mirror_mode); + if (r < 0) { + std::cerr << "rbd: failed to retrieve mirror mode: " + << cpp_strerror(r) << std::endl; + return r; + } + + if (current_mirror_mode == next_mirror_mode) { + if (!ignore_no_update) { + if (mode == "disabled") { + std::cout << "rbd: mirroring is already " << mode << std::endl; + } else { + std::cout << "rbd: mirroring is already configured for " + << mode << " mode" << std::endl; + } + } + return 0; + } else if (next_mirror_mode == RBD_MIRROR_MODE_IMAGE && + current_mirror_mode == RBD_MIRROR_MODE_POOL) { + std::cout << "note: changing mirroring mode from pool to image" + << std::endl; + } else if (next_mirror_mode == RBD_MIRROR_MODE_POOL && + current_mirror_mode == RBD_MIRROR_MODE_IMAGE) { + std::cout << "note: changing mirroring mode from image to pool" + << std::endl; + } + + r = rbd.mirror_mode_set(io_ctx, next_mirror_mode); + if (r < 0) { + return r; + } + return 0; +} + +int execute_disable(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string pool_name; + std::string namespace_name; + size_t arg_index = 0; + int r = utils::get_pool_and_namespace_names(vm, true, &pool_name, + &namespace_name, &arg_index); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + return execute_enable_disable(io_ctx, RBD_MIRROR_MODE_DISABLED, "disabled", + false); +} + +int execute_enable(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string pool_name; + std::string namespace_name; + size_t arg_index = 0; + int r = utils::get_pool_and_namespace_names(vm, true, &pool_name, + &namespace_name, &arg_index); + if (r < 0) { + return r; + } + + rbd_mirror_mode_t mirror_mode; + std::string mode = utils::get_positional_argument(vm, arg_index++); + if (mode == "image") { + mirror_mode = RBD_MIRROR_MODE_IMAGE; + } else if (mode == "pool") { + mirror_mode = RBD_MIRROR_MODE_POOL; + } else { + std::cerr << "rbd: must specify 'image' or 'pool' mode." << std::endl; + return -EINVAL; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + bool updated = false; + if (vm.count(SITE_NAME)) { + librbd::RBD rbd; + + auto site_name = vm[SITE_NAME].as<std::string>(); + std::string original_site_name; + r = rbd.mirror_site_name_get(rados, &original_site_name); + updated = (r >= 0 && site_name != original_site_name); + + r = set_site_name(rados, site_name); + if (r < 0) { + return r; + } + } + + return execute_enable_disable(io_ctx, mirror_mode, mode, updated); +} + +void get_info_arguments(po::options_description *positional, + po::options_description *options) { + at::add_pool_options(positional, options, true); + at::add_format_options(options); + options->add_options() + (ALL_NAME.c_str(), po::bool_switch(), "list all attributes"); +} + +int execute_info(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string pool_name; + std::string namespace_name; + size_t arg_index = 0; + int r = utils::get_pool_and_namespace_names(vm, false, &pool_name, + &namespace_name, &arg_index); + if (r < 0) { + return r; + } + + at::Format::Formatter formatter; + r = utils::get_formatter(vm, &formatter); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + librbd::RBD rbd; + rbd_mirror_mode_t mirror_mode; + r = rbd.mirror_mode_get(io_ctx, &mirror_mode); + if (r < 0) { + return r; + } + + std::string site_name; + r = rbd.mirror_site_name_get(rados, &site_name); + if (r < 0 && r != -EOPNOTSUPP) { + return r; + } + + std::vector<librbd::mirror_peer_site_t> mirror_peers; + if (namespace_name.empty()) { + r = rbd.mirror_peer_site_list(io_ctx, &mirror_peers); + if (r < 0) { + return r; + } + } + + std::string mirror_mode_desc; + switch (mirror_mode) { + case RBD_MIRROR_MODE_DISABLED: + mirror_mode_desc = "disabled"; + break; + case RBD_MIRROR_MODE_IMAGE: + mirror_mode_desc = "image"; + break; + case RBD_MIRROR_MODE_POOL: + mirror_mode_desc = "pool"; + break; + default: + mirror_mode_desc = "unknown"; + break; + } + + if (formatter != nullptr) { + formatter->open_object_section("mirror"); + formatter->dump_string("mode", mirror_mode_desc); + } else { + std::cout << "Mode: " << mirror_mode_desc << std::endl; + } + + if (mirror_mode != RBD_MIRROR_MODE_DISABLED && namespace_name.empty()) { + if (formatter != nullptr) { + formatter->dump_string("site_name", site_name); + } else { + std::cout << "Site Name: " << site_name << std::endl + << std::endl; + } + + r = format_mirror_peers(io_ctx, formatter, mirror_peers, + vm[ALL_NAME].as<bool>()); + if (r < 0) { + return r; + } + } + if (formatter != nullptr) { + formatter->close_section(); + formatter->flush(std::cout); + } + return 0; +} + +void get_status_arguments(po::options_description *positional, + po::options_description *options) { + at::add_pool_options(positional, options, true); + at::add_format_options(options); + at::add_verbose_option(options); +} + +int execute_status(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string pool_name; + std::string namespace_name; + size_t arg_index = 0; + int r = utils::get_pool_and_namespace_names(vm, false, &pool_name, + &namespace_name, &arg_index); + if (r < 0) { + return r; + } + + at::Format::Formatter formatter; + r = utils::get_formatter(vm, &formatter); + if (r < 0) { + return r; + } + + bool verbose = vm[at::VERBOSE].as<bool>(); + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + r = validate_mirroring_enabled(io_ctx); + if (r < 0) { + return r; + } + + librbd::RBD rbd; + + uint32_t total_images = 0; + std::map<librbd::mirror_image_status_state_t, int> mirror_image_states; + MirrorHealth mirror_image_health = MIRROR_HEALTH_UNKNOWN; + r = get_mirror_image_status(io_ctx, &total_images, &mirror_image_states, + &mirror_image_health); + if (r < 0) { + return r; + } + + MirrorDaemonServiceInfo daemon_service_info(io_ctx); + daemon_service_info.init(); + + MirrorHealth mirror_daemon_health = daemon_service_info.get_daemon_health(); + auto mirror_services = daemon_service_info.get_mirror_services(); + + auto mirror_health = std::max(mirror_image_health, mirror_daemon_health); + + if (formatter != nullptr) { + formatter->open_object_section("status"); + formatter->open_object_section("summary"); + formatter->dump_stream("health") << mirror_health; + formatter->dump_stream("daemon_health") << mirror_daemon_health; + formatter->dump_stream("image_health") << mirror_image_health; + formatter->open_object_section("states"); + for (auto &it : mirror_image_states) { + std::string state_name = utils::mirror_image_status_state(it.first); + formatter->dump_int(state_name.c_str(), it.second); + } + formatter->close_section(); // states + formatter->close_section(); // summary + } else { + std::cout << "health: " << mirror_health << std::endl; + std::cout << "daemon health: " << mirror_daemon_health << std::endl; + std::cout << "image health: " << mirror_image_health << std::endl; + std::cout << "images: " << total_images << " total" << std::endl; + for (auto &it : mirror_image_states) { + std::cout << " " << it.second << " " + << utils::mirror_image_status_state(it.first) << std::endl; + } + } + + int ret = 0; + + if (verbose) { + // dump per-daemon status + if (formatter != nullptr) { + formatter->open_array_section("daemons"); + for (auto& mirror_service : mirror_services) { + formatter->open_object_section("daemon"); + formatter->dump_string("service_id", mirror_service.service_id); + formatter->dump_string("instance_id", mirror_service.instance_id); + formatter->dump_string("client_id", mirror_service.client_id); + formatter->dump_string("hostname", mirror_service.hostname); + formatter->dump_string("ceph_version", mirror_service.ceph_version); + formatter->dump_bool("leader", mirror_service.leader); + formatter->dump_stream("health") << mirror_service.health; + if (!mirror_service.callouts.empty()) { + formatter->open_array_section("callouts"); + for (auto& callout : mirror_service.callouts) { + formatter->dump_string("callout", callout); + } + formatter->close_section(); // callouts + } + formatter->close_section(); // daemon + } + formatter->close_section(); // daemons + } else { + std::cout << std::endl << "DAEMONS" << std::endl; + if (mirror_services.empty()) { + std::cout << " none" << std::endl; + } + for (auto& mirror_service : mirror_services) { + std::cout << "service " << mirror_service.service_id << ":" + << std::endl + << " instance_id: " << mirror_service.instance_id + << std::endl + << " client_id: " << mirror_service.client_id << std::endl + << " hostname: " << mirror_service.hostname << std::endl + << " version: " << mirror_service.ceph_version << std::endl + << " leader: " << (mirror_service.leader ? "true" : "false") + << std::endl + << " health: " << mirror_service.health << std::endl; + if (!mirror_service.callouts.empty()) { + std::cout << " callouts: " << mirror_service.callouts << std::endl; + } + std::cout << std::endl; + } + std::cout << std::endl; + } + + // dump per-image status + librados::IoCtx default_ns_io_ctx; + default_ns_io_ctx.dup(io_ctx); + default_ns_io_ctx.set_namespace(""); + std::vector<librbd::mirror_peer_site_t> mirror_peers; + utils::get_mirror_peer_sites(default_ns_io_ctx, &mirror_peers); + + std::map<std::string, std::string> peer_mirror_uuids_to_name; + utils::get_mirror_peer_mirror_uuids_to_names(mirror_peers, + &peer_mirror_uuids_to_name); + + if (formatter != nullptr) { + formatter->open_array_section("images"); + } else { + std::cout << "IMAGES"; + } + + std::map<std::string, std::string> instance_ids; + + std::string start_image_id; + while (true) { + std::map<std::string, std::string> ids; + r = rbd.mirror_image_instance_id_list(io_ctx, start_image_id, 1024, &ids); + if (r < 0) { + if (r == -EOPNOTSUPP) { + std::cerr << "rbd: newer release of Ceph OSDs required to map image " + << "to rbd-mirror daemon instance" << std::endl; + } else { + std::cerr << "rbd: failed to get instance id list: " + << cpp_strerror(r) << std::endl; + } + // not fatal + break; + } + if (ids.empty()) { + break; + } + instance_ids.insert(ids.begin(), ids.end()); + start_image_id = ids.rbegin()->first; + } + + ImageRequestGenerator<StatusImageRequest> generator( + io_ctx, instance_ids, mirror_peers, peer_mirror_uuids_to_name, + daemon_service_info, formatter); + ret = generator.execute(); + + if (formatter != nullptr) { + formatter->close_section(); // images + } + } + + if (formatter != nullptr) { + formatter->close_section(); // status + formatter->flush(std::cout); + } + + return ret; +} + +void get_promote_arguments(po::options_description *positional, + po::options_description *options) { + options->add_options() + ("force", po::bool_switch(), + "promote even if not cleanly demoted by remote cluster"); + at::add_pool_options(positional, options, true); +} + +int execute_promote(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string pool_name; + std::string namespace_name; + size_t arg_index = 0; + int r = utils::get_pool_and_namespace_names(vm, true, &pool_name, + &namespace_name, &arg_index); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + r = validate_mirroring_enabled(io_ctx); + if (r < 0) { + return r; + } + + utils::disable_cache(); + + std::atomic<unsigned> counter = { 0 }; + ImageRequestGenerator<PromoteImageRequest> generator(io_ctx, &counter, + vm["force"].as<bool>()); + r = generator.execute(); + + std::cout << "Promoted " << counter.load() << " mirrored images" << std::endl; + return r; +} + +void get_demote_arguments(po::options_description *positional, + po::options_description *options) { + at::add_pool_options(positional, options, true); +} + +int execute_demote(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string pool_name; + std::string namespace_name; + size_t arg_index = 0; + int r = utils::get_pool_and_namespace_names(vm, true, &pool_name, + &namespace_name, &arg_index); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + r = validate_mirroring_enabled(io_ctx); + if (r < 0) { + return r; + } + + utils::disable_cache(); + + std::atomic<unsigned> counter { 0 }; + ImageRequestGenerator<DemoteImageRequest> generator(io_ctx, &counter); + r = generator.execute(); + + std::cout << "Demoted " << counter.load() << " mirrored images" << std::endl; + return r; +} + +Shell::Action action_bootstrap_create( + {"mirror", "pool", "peer", "bootstrap", "create"}, {}, + "Create a peer bootstrap token to import in a remote cluster", "", + &get_peer_bootstrap_create_arguments, &execute_peer_bootstrap_create); +Shell::Action action_bootstreap_import( + {"mirror", "pool", "peer", "bootstrap", "import"}, {}, + "Import a peer bootstrap token created from a remote cluster", "", + &get_peer_bootstrap_import_arguments, &execute_peer_bootstrap_import); + +Shell::Action action_add( + {"mirror", "pool", "peer", "add"}, {}, + "Add a mirroring peer to a pool.", "", + &get_peer_add_arguments, &execute_peer_add); +Shell::Action action_remove( + {"mirror", "pool", "peer", "remove"}, {}, + "Remove a mirroring peer from a pool.", "", + &get_peer_remove_arguments, &execute_peer_remove); +Shell::Action action_set( + {"mirror", "pool", "peer", "set"}, {}, + "Update mirroring peer settings.", "", + &get_peer_set_arguments, &execute_peer_set); + +Shell::Action action_disable( + {"mirror", "pool", "disable"}, {}, + "Disable RBD mirroring by default within a pool.", "", + &get_disable_arguments, &execute_disable); +Shell::Action action_enable( + {"mirror", "pool", "enable"}, {}, + "Enable RBD mirroring by default within a pool.", "", + &get_enable_arguments, &execute_enable); +Shell::Action action_info( + {"mirror", "pool", "info"}, {}, + "Show information about the pool mirroring configuration.", {}, + &get_info_arguments, &execute_info); +Shell::Action action_status( + {"mirror", "pool", "status"}, {}, + "Show status for all mirrored images in the pool.", {}, + &get_status_arguments, &execute_status); +Shell::Action action_promote( + {"mirror", "pool", "promote"}, {}, + "Promote all non-primary images in the pool.", {}, + &get_promote_arguments, &execute_promote); +Shell::Action action_demote( + {"mirror", "pool", "demote"}, {}, + "Demote all primary images in the pool.", {}, + &get_demote_arguments, &execute_demote); + +} // namespace mirror_pool +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/MirrorSnapshotSchedule.cc b/src/tools/rbd/action/MirrorSnapshotSchedule.cc new file mode 100644 index 000000000..3f269c2ad --- /dev/null +++ b/src/tools/rbd/action/MirrorSnapshotSchedule.cc @@ -0,0 +1,322 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Schedule.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "common/ceph_context.h" +#include "common/ceph_json.h" +#include "common/errno.h" +#include "common/Formatter.h" +#include "common/TextTable.h" +#include "global/global_context.h" +#include "include/stringify.h" + +#include <iostream> +#include <list> +#include <map> +#include <string> +#include <boost/program_options.hpp> + +#include "json_spirit/json_spirit.h" + +namespace rbd { +namespace action { +namespace mirror_snapshot_schedule { + +namespace at = argument_types; +namespace po = boost::program_options; + +namespace { + +class ScheduleStatus { +public: + ScheduleStatus() { + } + + int parse(const std::string &status) { + json_spirit::mValue json_root; + if(!json_spirit::read(status, json_root)) { + std::cerr << "rbd: invalid schedule status JSON received" << std::endl; + return -EBADMSG; + } + + try { + auto &s = json_root.get_obj(); + + if (s["scheduled_images"].type() != json_spirit::array_type) { + std::cerr << "rbd: unexpected schedule JSON received: " + << "scheduled_images is not array" << std::endl; + return -EBADMSG; + } + + for (auto &item_val : s["scheduled_images"].get_array()) { + if (item_val.type() != json_spirit::obj_type) { + std::cerr << "rbd: unexpected schedule status JSON received: " + << "schedule item is not object" << std::endl; + return -EBADMSG; + } + + auto &item = item_val.get_obj(); + + if (item["schedule_time"].type() != json_spirit::str_type) { + std::cerr << "rbd: unexpected schedule JSON received: " + << "schedule_time is not string" << std::endl; + return -EBADMSG; + } + auto schedule_time = item["schedule_time"].get_str(); + + if (item["image"].type() != json_spirit::str_type) { + std::cerr << "rbd: unexpected schedule JSON received: " + << "image is not string" << std::endl; + return -EBADMSG; + } + auto image = item["image"].get_str(); + + scheduled_images.push_back({schedule_time, image}); + } + + } catch (std::runtime_error &) { + std::cerr << "rbd: invalid schedule JSON received" << std::endl; + return -EBADMSG; + } + + return 0; + } + + void dump(Formatter *f) { + f->open_array_section("scheduled_images"); + for (auto &image : scheduled_images) { + f->open_object_section("image"); + f->dump_string("schedule_time", image.first); + f->dump_string("image", image.second); + f->close_section(); // image + } + f->close_section(); // scheduled_images + } + + friend std::ostream& operator<<(std::ostream& os, ScheduleStatus &d); + +private: + + std::list<std::pair<std::string, std::string>> scheduled_images; +}; + +std::ostream& operator<<(std::ostream& os, ScheduleStatus &s) { + TextTable tbl; + tbl.define_column("SCHEDULE TIME", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("IMAGE", TextTable::LEFT, TextTable::LEFT); + + for (auto &[schedule_time, image] : s.scheduled_images) { + tbl << schedule_time << image << TextTable::endrow; + } + + os << tbl; + return os; +} + +} // anonymous namespace + +void get_arguments_add(po::options_description *positional, + po::options_description *options) { + add_level_spec_options(options); + add_schedule_options(positional, true); +} + +int execute_add(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::map<std::string, std::string> args; + + int r = get_level_spec_args(vm, &args); + if (r < 0) { + return r; + } + r = get_schedule_args(vm, true, &args); + if (r < 0) { + return r; + } + + librados::Rados rados; + r = utils::init_rados(&rados); + if (r < 0) { + return r; + } + + normalize_level_spec_args(&args); + r = utils::mgr_command(rados, "rbd mirror snapshot schedule add", args, + &std::cout, &std::cerr); + if (r < 0) { + return r; + } + + return 0; +} + +void get_arguments_remove(po::options_description *positional, + po::options_description *options) { + add_level_spec_options(options); + add_schedule_options(positional, false); +} + +int execute_remove(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::map<std::string, std::string> args; + + int r = get_level_spec_args(vm, &args); + if (r < 0) { + return r; + } + r = get_schedule_args(vm, false, &args); + if (r < 0) { + return r; + } + + librados::Rados rados; + r = utils::init_rados(&rados); + if (r < 0) { + return r; + } + + normalize_level_spec_args(&args); + r = utils::mgr_command(rados, "rbd mirror snapshot schedule remove", args, + &std::cout, &std::cerr); + if (r < 0) { + return r; + } + + return 0; +} + +void get_arguments_list(po::options_description *positional, + po::options_description *options) { + add_level_spec_options(options); + options->add_options() + ("recursive,R", po::bool_switch(), "list all schedules"); + at::add_format_options(options); +} + +int execute_list(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::map<std::string, std::string> args; + + int r = get_level_spec_args(vm, &args); + if (r < 0) { + return r; + } + + at::Format::Formatter formatter; + r = utils::get_formatter(vm, &formatter); + if (r < 0) { + return r; + } + + librados::Rados rados; + r = utils::init_rados(&rados); + if (r < 0) { + return r; + } + + normalize_level_spec_args(&args); + std::stringstream out; + r = utils::mgr_command(rados, "rbd mirror snapshot schedule list", args, &out, + &std::cerr); + if (r < 0) { + return r; + } + + ScheduleList schedule_list; + r = schedule_list.parse(out.str()); + if (r < 0) { + return r; + } + + if (vm["recursive"].as<bool>()) { + if (formatter.get()) { + schedule_list.dump(formatter.get()); + formatter->flush(std::cout); + } else { + std::cout << schedule_list; + } + } else { + auto schedule = schedule_list.find(args["level_spec"]); + if (schedule == nullptr) { + return -ENOENT; + } + + if (formatter.get()) { + schedule->dump(formatter.get()); + formatter->flush(std::cout); + } else { + std::cout << *schedule << std::endl; + } + } + + return 0; +} + +void get_arguments_status(po::options_description *positional, + po::options_description *options) { + add_level_spec_options(options); + at::add_format_options(options); +} + +int execute_status(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::map<std::string, std::string> args; + + int r = get_level_spec_args(vm, &args); + if (r < 0) { + return r; + } + + at::Format::Formatter formatter; + r = utils::get_formatter(vm, &formatter); + if (r < 0) { + return r; + } + + librados::Rados rados; + r = utils::init_rados(&rados); + if (r < 0) { + return r; + } + + normalize_level_spec_args(&args); + std::stringstream out; + r = utils::mgr_command(rados, "rbd mirror snapshot schedule status", args, + &out, &std::cerr); + ScheduleStatus schedule_status; + r = schedule_status.parse(out.str()); + if (r < 0) { + return r; + } + + if (formatter.get()) { + schedule_status.dump(formatter.get()); + formatter->flush(std::cout); + } else { + std::cout << schedule_status; + } + + return 0; +} + +Shell::Action add_action( + {"mirror", "snapshot", "schedule", "add"}, {}, + "Add mirror snapshot schedule.", "", &get_arguments_add, &execute_add); +Shell::Action remove_action( + {"mirror", "snapshot", "schedule", "remove"}, + {"mirror", "snapshot", "schedule", "rm"}, "Remove mirror snapshot schedule.", + "", &get_arguments_remove, &execute_remove); +Shell::Action list_action( + {"mirror", "snapshot", "schedule", "list"}, + {"mirror", "snapshot", "schedule", "ls"}, "List mirror snapshot schedule.", + "", &get_arguments_list, &execute_list); +Shell::Action status_action( + {"mirror", "snapshot", "schedule", "status"}, {}, + "Show mirror snapshot schedule status.", "", &get_arguments_status, &execute_status); + +} // namespace mirror_snapshot_schedule +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Namespace.cc b/src/tools/rbd/action/Namespace.cc new file mode 100644 index 000000000..12d92bff8 --- /dev/null +++ b/src/tools/rbd/action/Namespace.cc @@ -0,0 +1,191 @@ + +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "common/errno.h" +#include "include/stringify.h" +#include "common/Formatter.h" +#include "common/TextTable.h" +#include <algorithm> +#include <iostream> +#include <boost/program_options.hpp> + +namespace rbd { +namespace action { +namespace ns { + +namespace at = argument_types; +namespace po = boost::program_options; + +void get_create_arguments(po::options_description *positional, + po::options_description *options) { + at::add_pool_options(positional, options, true); +} + +int execute_create(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string pool_name; + std::string namespace_name; + size_t arg_index = 0; + int r = utils::get_pool_and_namespace_names(vm, true, &pool_name, + &namespace_name, &arg_index); + if (r < 0) { + return r; + } + + if (namespace_name.empty()) { + std::cerr << "rbd: namespace name was not specified" << std::endl; + return -EINVAL; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, "", &rados, &io_ctx); + if (r < 0) { + return r; + } + + librbd::RBD rbd; + r = rbd.namespace_create(io_ctx, namespace_name.c_str()); + if (r < 0) { + std::cerr << "rbd: failed to created namespace: " << cpp_strerror(r) + << std::endl; + return r; + } + + return 0; +} + +void get_remove_arguments(po::options_description *positional, + po::options_description *options) { + at::add_pool_options(positional, options, true); +} + +int execute_remove(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string pool_name; + std::string namespace_name; + size_t arg_index = 0; + int r = utils::get_pool_and_namespace_names(vm, true, &pool_name, + &namespace_name, &arg_index); + if (r < 0) { + return r; + } + + if (namespace_name.empty()) { + std::cerr << "rbd: namespace name was not specified" << std::endl; + return -EINVAL; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, "", &rados, &io_ctx); + if (r < 0) { + return r; + } + + librbd::RBD rbd; + r = rbd.namespace_remove(io_ctx, namespace_name.c_str()); + if (r == -EBUSY) { + std::cerr << "rbd: namespace contains images which must be deleted first." + << std::endl; + return r; + } else if (r == -ENOENT) { + std::cerr << "rbd: namespace does not exist." << std::endl; + return r; + } else if (r < 0) { + std::cerr << "rbd: failed to remove namespace: " << cpp_strerror(r) + << std::endl; + return r; + } + + return 0; +} + +void get_list_arguments(po::options_description *positional, + po::options_description *options) { + at::add_pool_options(positional, options, false); + at::add_format_options(options); +} + +int execute_list(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string pool_name; + size_t arg_index = 0; + int r = utils::get_pool_and_namespace_names(vm, true, &pool_name, + nullptr, &arg_index); + if (r < 0) { + return r; + } + + at::Format::Formatter formatter; + r = utils::get_formatter(vm, &formatter); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, "", &rados, &io_ctx); + if (r < 0) { + return r; + } + + librbd::RBD rbd; + std::vector<std::string> names; + r = rbd.namespace_list(io_ctx, &names); + if (r < 0 && r != -ENOENT) { + std::cerr << "rbd: failed to list namespaces: " << cpp_strerror(r) + << std::endl; + return r; + } + + std::sort(names.begin(), names.end()); + + TextTable tbl; + if (formatter) { + formatter->open_array_section("namespaces"); + } else { + tbl.define_column("NAME", TextTable::LEFT, TextTable::LEFT); + } + + for (auto& name : names) { + if (formatter) { + formatter->open_object_section("namespace"); + formatter->dump_string("name", name); + formatter->close_section(); + } else { + tbl << name << TextTable::endrow; + } + } + + if (formatter) { + formatter->close_section(); + formatter->flush(std::cout); + } else if (!names.empty()) { + std::cout << tbl; + } + + return 0; +} + +Shell::Action action_create( + {"namespace", "create"}, {}, + "Create an RBD image namespace.", "", + &get_create_arguments, &execute_create); + +Shell::Action action_remove( + {"namespace", "remove"}, {"namespace", "rm"}, + "Remove an RBD image namespace.", "", + &get_remove_arguments, &execute_remove); + +Shell::Action action_list( + {"namespace", "list"}, {"namespace", "ls"}, "List RBD image namespaces.", "", + &get_list_arguments, &execute_list); + +} // namespace ns +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Nbd.cc b/src/tools/rbd/action/Nbd.cc new file mode 100644 index 000000000..dd5ef3290 --- /dev/null +++ b/src/tools/rbd/action/Nbd.cc @@ -0,0 +1,389 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "include/stringify.h" +#include "common/SubProcess.h" +#include <iostream> +#include <boost/algorithm/string.hpp> +#include <boost/algorithm/string/predicate.hpp> +#include <boost/program_options.hpp> + +namespace rbd { +namespace action { +namespace nbd { + +namespace at = argument_types; +namespace po = boost::program_options; + +static int call_nbd_cmd(const po::variables_map &vm, + const std::vector<std::string> &args, + const std::vector<std::string> &ceph_global_init_args) { + #if defined(__FreeBSD__) || defined(_WIN32) + std::cerr << "rbd: nbd device is not supported" << std::endl; + return -EOPNOTSUPP; + #else + char exe_path[PATH_MAX]; + ssize_t exe_path_bytes = readlink("/proc/self/exe", exe_path, + sizeof(exe_path) - 1); + if (exe_path_bytes < 0) { + strcpy(exe_path, "rbd-nbd"); + } else { + if (snprintf(exe_path + exe_path_bytes, + sizeof(exe_path) - exe_path_bytes, + "-nbd") < 0) { + return -EOVERFLOW; + } + } + + SubProcess process(exe_path, SubProcess::KEEP, SubProcess::KEEP, SubProcess::KEEP); + + for (auto &arg : ceph_global_init_args) { + process.add_cmd_arg(arg.c_str()); + } + + for (auto &arg : args) { + process.add_cmd_arg(arg.c_str()); + } + + if (process.spawn()) { + std::cerr << "rbd: failed to run rbd-nbd: " << process.err() << std::endl; + return -EINVAL; + } else if (process.join()) { + std::cerr << "rbd: rbd-nbd failed with error: " << process.err() << std::endl; + return -EINVAL; + } + + return 0; + #endif +} + +int execute_list(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { +#if defined(__FreeBSD__) || defined(_WIN32) + std::cerr << "rbd: nbd device is not supported" << std::endl; + return -EOPNOTSUPP; +#else + std::vector<std::string> args; + + args.push_back("list-mapped"); + + if (vm.count("format")) { + args.push_back("--format"); + args.push_back(vm["format"].as<at::Format>().value); + } + if (vm["pretty-format"].as<bool>()) { + args.push_back("--pretty-format"); + } + + return call_nbd_cmd(vm, args, ceph_global_init_args); +#endif +} + +int execute_attach(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { +#if defined(__FreeBSD__) || defined(_WIN32) + std::cerr << "rbd: nbd device is not supported" << std::endl; + return -EOPNOTSUPP; +#else + std::vector<std::string> args; + std::string device_path; + + args.push_back("attach"); + std::string img; + int r = utils::get_image_or_snap_spec(vm, &img); + if (r < 0) { + return r; + } + args.push_back(img); + + if (vm.count("device")) { + device_path = vm["device"].as<std::string>(); + args.push_back("--device"); + args.push_back(device_path); + } else { + std::cerr << "rbd: device was not specified" << std::endl; + return -EINVAL; + } + + if (vm["show-cookie"].as<bool>()) { + args.push_back("--show-cookie"); + } + + if (vm.count("cookie")) { + args.push_back("--cookie"); + args.push_back(vm["cookie"].as<std::string>()); + } else if (!vm["force"].as<bool>()) { + std::cerr << "rbd: could not validate attach request\n"; + std::cerr << "rbd: mismatching the image and the device may lead to data corruption\n"; + std::cerr << "rbd: must specify --cookie <arg> or --force to proceed" << std::endl; + return -EINVAL; + } + + if (vm.count(at::SNAPSHOT_ID)) { + args.push_back("--snap-id"); + args.push_back(std::to_string(vm[at::SNAPSHOT_ID].as<uint64_t>())); + } + + if (vm["quiesce"].as<bool>()) { + args.push_back("--quiesce"); + } + + if (vm["read-only"].as<bool>()) { + args.push_back("--read-only"); + } + + if (vm["exclusive"].as<bool>()) { + args.push_back("--exclusive"); + } + + if (vm.count("quiesce-hook")) { + args.push_back("--quiesce-hook"); + args.push_back(vm["quiesce-hook"].as<std::string>()); + } + + if (vm.count("options")) { + utils::append_options_as_args(vm["options"].as<std::vector<std::string>>(), + &args); + } + + return call_nbd_cmd(vm, args, ceph_global_init_args); +#endif +} + +int execute_detach(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { +#if defined(__FreeBSD__) || defined(_WIN32) + std::cerr << "rbd: nbd device is not supported" << std::endl; + return -EOPNOTSUPP; +#else + std::string device_name = utils::get_positional_argument(vm, 0); + if (!boost::starts_with(device_name, "/dev/")) { + device_name.clear(); + } + + std::vector<std::string> args; + + args.push_back("detach"); + std::string image_name; + if (device_name.empty()) { + int r = utils::get_image_or_snap_spec(vm, &image_name); + if (r < 0) { + return r; + } + + if (image_name.empty()) { + std::cerr << "rbd: detach requires either image name or device path" + << std::endl; + return -EINVAL; + } + + if (vm.count(at::SNAPSHOT_ID)) { + args.push_back("--snap-id"); + args.push_back(std::to_string(vm[at::SNAPSHOT_ID].as<uint64_t>())); + } + } + + args.push_back(device_name.empty() ? image_name : device_name); + + if (vm.count("options")) { + utils::append_options_as_args(vm["options"].as<std::vector<std::string>>(), + &args); + } + + return call_nbd_cmd(vm, args, ceph_global_init_args); +#endif +} + +int execute_map(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { +#if defined(__FreeBSD__) || defined(_WIN32) + std::cerr << "rbd: nbd device is not supported" << std::endl; + return -EOPNOTSUPP; +#else + std::vector<std::string> args; + + args.push_back("map"); + std::string img; + int r = utils::get_image_or_snap_spec(vm, &img); + if (r < 0) { + return r; + } + args.push_back(img); + + if (vm["quiesce"].as<bool>()) { + args.push_back("--quiesce"); + } + + if (vm["show-cookie"].as<bool>()) { + args.push_back("--show-cookie"); + } + + if (vm.count("cookie")) { + args.push_back("--cookie"); + args.push_back(vm["cookie"].as<std::string>()); + } + + if (vm.count(at::SNAPSHOT_ID)) { + args.push_back("--snap-id"); + args.push_back(std::to_string(vm[at::SNAPSHOT_ID].as<uint64_t>())); + } + + if (vm["read-only"].as<bool>()) { + args.push_back("--read-only"); + } + + if (vm["exclusive"].as<bool>()) { + args.push_back("--exclusive"); + } + + if (vm.count("quiesce-hook")) { + args.push_back("--quiesce-hook"); + args.push_back(vm["quiesce-hook"].as<std::string>()); + } + + if (vm.count("options")) { + utils::append_options_as_args(vm["options"].as<std::vector<std::string>>(), + &args); + } + + return call_nbd_cmd(vm, args, ceph_global_init_args); +#endif +} + +int execute_unmap(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { +#if defined(__FreeBSD__) || defined(_WIN32) + std::cerr << "rbd: nbd device is not supported" << std::endl; + return -EOPNOTSUPP; +#else + std::string device_name = utils::get_positional_argument(vm, 0); + if (!boost::starts_with(device_name, "/dev/")) { + device_name.clear(); + } + + std::vector<std::string> args; + + args.push_back("unmap"); + std::string image_name; + if (device_name.empty()) { + int r = utils::get_image_or_snap_spec(vm, &image_name); + if (r < 0) { + return r; + } + + if (image_name.empty()) { + std::cerr << "rbd: unmap requires either image name or device path" + << std::endl; + return -EINVAL; + } + + if (vm.count(at::SNAPSHOT_ID)) { + args.push_back("--snap-id"); + args.push_back(std::to_string(vm[at::SNAPSHOT_ID].as<uint64_t>())); + } + } + + args.push_back(device_name.empty() ? image_name : device_name); + + if (vm.count("options")) { + utils::append_options_as_args(vm["options"].as<std::vector<std::string>>(), + &args); + } + + return call_nbd_cmd(vm, args, ceph_global_init_args); +#endif +} + +void get_list_arguments_deprecated(po::options_description *positional, + po::options_description *options) { + at::add_format_options(options); +} + +int execute_list_deprecated(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_args) { + std::cerr << "rbd: 'nbd list' command is deprecated, " + << "use 'device list -t nbd' instead" << std::endl; + return execute_list(vm, ceph_global_args); +} + +void get_map_arguments_deprecated(po::options_description *positional, + po::options_description *options) { + at::add_image_or_snap_spec_options(positional, options, + at::ARGUMENT_MODIFIER_NONE); + options->add_options() + ("read-only", po::bool_switch(), "map read-only") + ("exclusive", po::bool_switch(), "forbid writes by other clients") + ("device", po::value<std::string>(), "specify nbd device") + ("nbds_max", po::value<std::string>(), "override module param nbds_max") + ("max_part", po::value<std::string>(), "override module param max_part") + ("timeout", po::value<std::string>(), "set nbd request timeout (seconds)"); +} + +int execute_map_deprecated(const po::variables_map &vm_deprecated, + const std::vector<std::string> &ceph_global_args) { + std::cerr << "rbd: 'nbd map' command is deprecated, " + << "use 'device map -t nbd' instead" << std::endl; + + po::options_description options; + options.add_options() + ("options,o", po::value<std::vector<std::string>>() + ->default_value(std::vector<std::string>(), ""), ""); + + po::variables_map vm = vm_deprecated; + po::store(po::command_line_parser({}).options(options).run(), vm); + + std::vector<std::string> opts; + if (vm_deprecated.count("device")) { + opts.push_back("device=" + vm_deprecated["device"].as<std::string>()); + } + if (vm_deprecated.count("nbds_max")) { + opts.push_back("nbds_max=" + vm_deprecated["nbds_max"].as<std::string>()); + } + if (vm_deprecated.count("max_part")) { + opts.push_back("max_part=" + vm_deprecated["max_part"].as<std::string>()); + } + if (vm_deprecated.count("timeout")) { + opts.push_back("timeout=" + vm_deprecated["timeout"].as<std::string>()); + } + + vm.at("options").value() = boost::any(opts); + + return execute_map(vm, ceph_global_args); +} + +void get_unmap_arguments_deprecated(po::options_description *positional, + po::options_description *options) { + positional->add_options() + ("image-or-snap-or-device-spec", + "image, snapshot, or device specification\n" + "[<pool-name>/]<image-name>[@<snap-name>] or <device-path>"); + at::add_pool_option(options, at::ARGUMENT_MODIFIER_NONE); + at::add_image_option(options, at::ARGUMENT_MODIFIER_NONE); + at::add_snap_option(options, at::ARGUMENT_MODIFIER_NONE); +} + +int execute_unmap_deprecated(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_args) { + std::cerr << "rbd: 'nbd unmap' command is deprecated, " + << "use 'device unmap -t nbd' instead" << std::endl; + return execute_unmap(vm, ceph_global_args); +} + +Shell::Action action_show_deprecated( + {"nbd", "list"}, {"nbd", "ls"}, "List the nbd devices already used.", "", + &get_list_arguments_deprecated, &execute_list_deprecated, false); + +Shell::Action action_map_deprecated( + {"nbd", "map"}, {}, "Map image to a nbd device.", "", + &get_map_arguments_deprecated, &execute_map_deprecated, false); + +Shell::Action action_unmap_deprecated( + {"nbd", "unmap"}, {}, "Unmap a nbd device.", "", + &get_unmap_arguments_deprecated, &execute_unmap_deprecated, false); + +} // namespace nbd +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/ObjectMap.cc b/src/tools/rbd/action/ObjectMap.cc new file mode 100644 index 000000000..40ee2d472 --- /dev/null +++ b/src/tools/rbd/action/ObjectMap.cc @@ -0,0 +1,131 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "common/errno.h" +#include <iostream> +#include <boost/program_options.hpp> + +namespace rbd { +namespace action { +namespace object_map { + +namespace at = argument_types; +namespace po = boost::program_options; + +static int do_object_map_rebuild(librbd::Image &image, bool no_progress) +{ + utils::ProgressContext pc("Object Map Rebuild", no_progress); + int r = image.rebuild_object_map(pc); + if (r < 0) { + pc.fail(); + return r; + } + pc.finish(); + return 0; +} + +void get_rebuild_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_or_snap_spec_options(positional, options, + at::ARGUMENT_MODIFIER_NONE); + at::add_no_progress_option(options); +} + +int execute_rebuild(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_PERMITTED, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", + snap_name, false, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + r = do_object_map_rebuild(image, vm[at::NO_PROGRESS].as<bool>()); + if (r < 0) { + std::cerr << "rbd: rebuilding object map failed: " << cpp_strerror(r) + << std::endl; + return r; + } + return 0; +} + +static int do_object_map_check(librbd::Image &image, bool no_progress) +{ + utils::ProgressContext pc("Object Map Check", no_progress); + int r = image.check_object_map(pc); + if (r < 0) { + pc.fail(); + return r; + } + pc.finish(); + return 0; +} + +void get_check_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_or_snap_spec_options(positional, options, + at::ARGUMENT_MODIFIER_NONE); + at::add_no_progress_option(options); +} + +int execute_check(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_PERMITTED, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", + snap_name, false, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + r = do_object_map_check(image, vm[at::NO_PROGRESS].as<bool>()); + if (r < 0) { + std::cerr << "rbd: checking object map failed: " << cpp_strerror(r) + << std::endl; + return r; + } + return 0; +} + +Shell::Action action_rebuild( + {"object-map", "rebuild"}, {}, "Rebuild an invalid object map.", "", + &get_rebuild_arguments, &execute_rebuild); +Shell::Action action_check( + {"object-map", "check"}, {}, "Verify the object map is correct.", "", + &get_check_arguments, &execute_check); + +} // namespace object_map +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Perf.cc b/src/tools/rbd/action/Perf.cc new file mode 100644 index 000000000..b39beac91 --- /dev/null +++ b/src/tools/rbd/action/Perf.cc @@ -0,0 +1,717 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "include/stringify.h" +#include "common/ceph_context.h" +#include "common/ceph_json.h" +#include "common/errno.h" +#include "common/Formatter.h" +#include "common/TextTable.h" +#include "global/global_context.h" +#ifdef HAVE_CURSES +#include <ncurses.h> +#endif +#include <stdio.h> +#include <unistd.h> +#include <sys/time.h> +#include <sys/types.h> +#include <iostream> +#include <vector> +#include <boost/algorithm/string.hpp> +#include <boost/assign.hpp> +#include <boost/bimap.hpp> +#include <boost/program_options.hpp> +#include "json_spirit/json_spirit.h" + +namespace rbd { +namespace action { +namespace perf { + +namespace at = argument_types; +namespace po = boost::program_options; + +namespace { + +enum class StatDescriptor { + WRITE_OPS = 0, + READ_OPS, + WRITE_BYTES, + READ_BYTES, + WRITE_LATENCY, + READ_LATENCY +}; + +typedef boost::bimap<StatDescriptor, std::string> StatDescriptors; + +static const StatDescriptors STAT_DESCRIPTORS = + boost::assign::list_of<StatDescriptors::relation> + (StatDescriptor::WRITE_OPS, "write_ops") + (StatDescriptor::READ_OPS, "read_ops") + (StatDescriptor::WRITE_BYTES, "write_bytes") + (StatDescriptor::READ_BYTES, "read_bytes") + (StatDescriptor::WRITE_LATENCY, "write_latency") + (StatDescriptor::READ_LATENCY, "read_latency"); + +std::ostream& operator<<(std::ostream& os, const StatDescriptor& val) { + auto it = STAT_DESCRIPTORS.left.find(val); + if (it == STAT_DESCRIPTORS.left.end()) { + os << "unknown (" << static_cast<int>(val) << ")"; + } else { + os << it->second; + } + return os; +} + +void validate(boost::any& v, const std::vector<std::string>& values, + StatDescriptor *target_type, int) { + po::validators::check_first_occurrence(v); + std::string s = po::validators::get_single_string(values); + boost::replace_all(s, "_", " "); + boost::replace_all(s, "-", "_"); + + auto it = STAT_DESCRIPTORS.right.find(s); + if (it == STAT_DESCRIPTORS.right.end()) { + throw po::validation_error(po::validation_error::invalid_option_value); + } + v = boost::any(it->second); +} + +struct ImageStat { + ImageStat(const std::string& pool_name, const std::string& pool_namespace, + const std::string& image_name) + : pool_name(pool_name), pool_namespace(pool_namespace), + image_name(image_name) { + stats.resize(STAT_DESCRIPTORS.size()); + } + + std::string pool_name; + std::string pool_namespace; + std::string image_name; + std::vector<double> stats; +}; + +typedef std::vector<ImageStat> ImageStats; + +typedef std::pair<std::string, std::string> SpecPair; + +std::string format_pool_spec(const std::string& pool, + const std::string& pool_namespace) { + std::string pool_spec{pool}; + if (!pool_namespace.empty()) { + pool_spec += "/" + pool_namespace; + } + return pool_spec; +} + +int query_iostats(librados::Rados& rados, const std::string& pool_spec, + StatDescriptor sort_by, ImageStats* image_stats, + std::ostream& err_os) { + auto sort_by_str = STAT_DESCRIPTORS.left.find(sort_by)->second; + + std::string cmd = R"( + { + "prefix": "rbd perf image stats", + "pool_spec": ")" + pool_spec + R"(", + "sort_by": ")" + sort_by_str + R"(", + "format": "json" + }")"; + + bufferlist in_bl; + bufferlist out_bl; + std::string outs; + int r = rados.mgr_command(cmd, in_bl, &out_bl, &outs); + if (r == -EOPNOTSUPP) { + err_os << "rbd: 'rbd_support' mgr module is not enabled." + << std::endl << std::endl + << "Use 'ceph mgr module enable rbd_support' to enable." + << std::endl; + return r; + } else if (r < 0) { + err_os << "rbd: mgr command failed: " << cpp_strerror(r); + if (!outs.empty()) { + err_os << ": " << outs; + } + err_os << std::endl; + return r; + } + + json_spirit::mValue json_root; + if (!json_spirit::read(out_bl.to_str(), json_root)) { + err_os << "rbd: error parsing perf stats" << std::endl; + return -EINVAL; + } + + image_stats->clear(); + try { + auto& root = json_root.get_obj(); + + // map JSON stat descriptor order to our internal order + std::map<uint32_t, uint32_t> json_to_internal_stats; + auto& json_stat_descriptors = root["stat_descriptors"].get_array(); + for (size_t idx = 0; idx < json_stat_descriptors.size(); ++idx) { + auto it = STAT_DESCRIPTORS.right.find( + json_stat_descriptors[idx].get_str()); + if (it == STAT_DESCRIPTORS.right.end()) { + continue; + } + json_to_internal_stats[idx] = static_cast<uint32_t>(it->second); + } + + // cache a mapping from pool descriptors back to pool-specs + std::map<std::string, SpecPair> json_to_internal_pools; + auto& pool_descriptors = root["pool_descriptors"].get_obj(); + for (auto& pool : pool_descriptors) { + auto& pool_spec = pool.second.get_str(); + auto pos = pool_spec.rfind("/"); + + SpecPair pair{pool_spec.substr(0, pos), ""}; + if (pos != std::string::npos) { + pair.second = pool_spec.substr(pos + 1); + } + + json_to_internal_pools[pool.first] = pair; + } + + auto& stats = root["stats"].get_array(); + for (auto& stat : stats) { + auto& stat_obj = stat.get_obj(); + if (!stat_obj.empty()) { + auto& image_spec = stat_obj.begin()->first; + + auto pos = image_spec.find("/"); + SpecPair pair{image_spec.substr(0, pos), ""}; + if (pos != std::string::npos) { + pair.second = image_spec.substr(pos + 1); + } + + const auto pool_it = json_to_internal_pools.find(pair.first); + if (pool_it == json_to_internal_pools.end()) { + continue; + } + + image_stats->emplace_back( + pool_it->second.first, pool_it->second.second, pair.second); + + auto& image_stat = image_stats->back(); + auto& data = stat_obj.begin()->second.get_array(); + for (auto& indexes : json_to_internal_stats) { + image_stat.stats[indexes.second] = data[indexes.first].get_real(); + } + } + } + } catch (std::runtime_error &e) { + err_os << "rbd: error parsing perf stats: " << e.what() << std::endl; + return -EINVAL; + } + + return 0; +} + +void format_stat(StatDescriptor stat_descriptor, double stat, + std::ostream& os) { + switch (stat_descriptor) { + case StatDescriptor::WRITE_OPS: + case StatDescriptor::READ_OPS: + os << si_u_t(stat) << "/s"; + break; + case StatDescriptor::WRITE_BYTES: + case StatDescriptor::READ_BYTES: + os << byte_u_t(stat) << "/s"; + break; + case StatDescriptor::WRITE_LATENCY: + case StatDescriptor::READ_LATENCY: + os << std::fixed << std::setprecision(2); + if (stat >= 1000000000) { + os << (stat / 1000000000) << " s"; + } else if (stat >= 1000000) { + os << (stat / 1000000) << " ms"; + } else if (stat >= 1000) { + os << (stat / 1000) << " us"; + } else { + os << stat << " ns"; + } + break; + default: + ceph_assert(false); + break; + } +} + +} // anonymous namespace + +namespace iostat { + +struct Iterations {}; + +void validate(boost::any& v, const std::vector<std::string>& values, + Iterations *target_type, int) { + po::validators::check_first_occurrence(v); + auto& s = po::validators::get_single_string(values); + + try { + auto iterations = boost::lexical_cast<uint32_t>(s); + if (iterations > 0) { + v = boost::any(iterations); + return; + } + } catch (const boost::bad_lexical_cast &) { + } + throw po::validation_error(po::validation_error::invalid_option_value); +} + +void format(const ImageStats& image_stats, Formatter* f, bool global_search) { + TextTable tbl; + if (f) { + f->open_array_section("images"); + } else { + tbl.define_column("NAME", TextTable::LEFT, TextTable::LEFT); + for (auto& stat : STAT_DESCRIPTORS.left) { + std::string title; + switch (stat.first) { + case StatDescriptor::WRITE_OPS: + title = "WR "; + break; + case StatDescriptor::READ_OPS: + title = "RD "; + break; + case StatDescriptor::WRITE_BYTES: + title = "WR_BYTES "; + break; + case StatDescriptor::READ_BYTES: + title = "RD_BYTES "; + break; + case StatDescriptor::WRITE_LATENCY: + title = "WR_LAT "; + break; + case StatDescriptor::READ_LATENCY: + title = "RD_LAT "; + break; + default: + ceph_assert(false); + break; + } + tbl.define_column(title, TextTable::RIGHT, TextTable::RIGHT); + } + } + + for (auto& image_stat : image_stats) { + if (f) { + f->open_object_section("image"); + f->dump_string("pool", image_stat.pool_name); + f->dump_string("pool_namespace", image_stat.pool_namespace); + f->dump_string("image", image_stat.image_name); + for (auto& pair : STAT_DESCRIPTORS.left) { + f->dump_float(pair.second.c_str(), + image_stat.stats[static_cast<size_t>(pair.first)]); + } + f->close_section(); + } else { + std::string name; + if (global_search) { + name += image_stat.pool_name + "/"; + if (!image_stat.pool_namespace.empty()) { + name += image_stat.pool_namespace + "/"; + } + } + name += image_stat.image_name; + + tbl << name; + for (auto& pair : STAT_DESCRIPTORS.left) { + std::stringstream str; + format_stat(pair.first, + image_stat.stats[static_cast<size_t>(pair.first)], str); + str << ' '; + tbl << str.str(); + } + tbl << TextTable::endrow; + } + } + + if (f) { + f->close_section(); + f->flush(std::cout); + } else { + std::cout << tbl << std::endl; + } +} + +} // namespace iostat + +#ifdef HAVE_CURSES +namespace iotop { + +class MainWindow { +public: + MainWindow(librados::Rados& rados, const std::string& pool_spec) + : m_rados(rados), m_pool_spec(pool_spec) { + initscr(); + curs_set(0); + cbreak(); + noecho(); + keypad(stdscr, TRUE); + nodelay(stdscr, TRUE); + + init_columns(); + } + + int run() { + redraw(); + + int r = 0; + std::stringstream err_str; + while (true) { + r = query_iostats(m_rados, m_pool_spec, m_sort_by, &m_image_stats, + err_str); + if (r < 0) { + break; + return r; + } + + redraw(); + wait_for_key_or_delay(); + + int ch = getch(); + if (ch == 'q' || ch == 'Q') { + break; + } else if (ch == '<' || ch == KEY_LEFT) { + auto it = STAT_DESCRIPTORS.left.find(m_sort_by); + if (it != STAT_DESCRIPTORS.left.begin()) { + m_sort_by = (--it)->first; + } + } else if (ch == '>' || ch == KEY_RIGHT) { + auto it = STAT_DESCRIPTORS.left.find(m_sort_by); + if (it != STAT_DESCRIPTORS.left.end() && + ++it != STAT_DESCRIPTORS.left.end()) { + m_sort_by = it->first; + } + } + } + + endwin(); + + if (r < 0) { + std::cerr << err_str.str() << std::endl; + } + return r; + } + +private: + static const size_t STAT_COLUMN_WIDTH = 12; + + librados::Rados& m_rados; + std::string m_pool_spec; + + ImageStats m_image_stats; + StatDescriptor m_sort_by = StatDescriptor::WRITE_OPS; + + bool m_pending_win_opened = false; + WINDOW* m_pending_win = nullptr; + + int m_height = 1; + int m_width = 1; + + std::map<StatDescriptor, std::string> m_columns; + + void init_columns() { + m_columns.clear(); + for (auto& pair : STAT_DESCRIPTORS.left) { + std::string title; + switch (pair.first) { + case StatDescriptor::WRITE_OPS: + title = "WRITES OPS"; + break; + case StatDescriptor::READ_OPS: + title = "READS OPS"; + break; + case StatDescriptor::WRITE_BYTES: + title = "WRITE BYTES"; + break; + case StatDescriptor::READ_BYTES: + title = "READ BYTES"; + break; + case StatDescriptor::WRITE_LATENCY: + title = "WRITE LAT"; + break; + case StatDescriptor::READ_LATENCY: + title = "READ LAT"; + break; + default: + ceph_assert(false); + break; + } + m_columns[pair.first] = (title); + } + } + + void redraw() { + getmaxyx(stdscr, m_height, m_width); + + redraw_main_window(); + redraw_pending_window(); + + doupdate(); + } + + void redraw_main_window() { + werase(stdscr); + mvhline(0, 0, ' ' | A_REVERSE, m_width); + + // print header for all metrics + int remaining_cols = m_width; + std::stringstream str; + for (auto& pair : m_columns) { + int attr = A_REVERSE; + std::string title; + if (pair.first == m_sort_by) { + title += '>'; + attr |= A_BOLD; + } else { + title += ' '; + } + title += pair.second; + + str.str(""); + str << std::right << std::setfill(' ') + << std::setw(STAT_COLUMN_WIDTH) + << title << ' '; + + attrset(attr); + addstr(str.str().c_str()); + remaining_cols -= title.size(); + } + + attrset(A_REVERSE); + addstr("IMAGE"); + attrset(A_NORMAL); + + // print each image (one per line) + int row = 1; + int remaining_lines = m_height - 1; + for (auto& image_stat : m_image_stats) { + if (remaining_lines <= 0) { + break; + } + --remaining_lines; + + move(row++, 0); + for (auto& pair : m_columns) { + str.str(""); + format_stat(pair.first, + image_stat.stats[static_cast<size_t>(pair.first)], str); + auto value = str.str().substr(0, STAT_COLUMN_WIDTH); + + str.str(""); + str << std::right << std::setfill(' ') + << std::setw(STAT_COLUMN_WIDTH) + << value << ' '; + addstr(str.str().c_str()); + } + + std::string image; + if (m_pool_spec.empty()) { + image = format_pool_spec(image_stat.pool_name, + image_stat.pool_namespace) + "/"; + } + image += image_stat.image_name; + addstr(image.substr(0, remaining_cols).c_str()); + } + + wnoutrefresh(stdscr); + } + + void redraw_pending_window() { + // draw a "please by patient" window while waiting + const char* msg = "Waiting for initial stats"; + int height = 5; + int width = strlen(msg) + 4;; + int starty = (m_height - height) / 2; + int startx = (m_width - width) / 2; + + if (m_image_stats.empty() && !m_pending_win_opened) { + m_pending_win_opened = true; + m_pending_win = newwin(height, width, starty, startx); + } + + if (m_pending_win != nullptr) { + if (m_image_stats.empty()) { + box(m_pending_win, 0 , 0); + mvwaddstr(m_pending_win, 2, 2, msg); + wnoutrefresh(m_pending_win); + } else { + delwin(m_pending_win); + m_pending_win = nullptr; + } + } + } + + void wait_for_key_or_delay() { + fd_set fds; + FD_ZERO(&fds); + FD_SET(STDIN_FILENO, &fds); + + // no point to refreshing faster than the stats period + struct timeval tval; + tval.tv_sec = std::min<uint32_t>( + 10, g_conf().get_val<int64_t>("mgr_stats_period")); + tval.tv_usec = 0; + + select(STDIN_FILENO + 1, &fds, NULL, NULL, &tval); + } +}; + +} // namespace iotop +#endif // HAVE_CURSES + + +void get_arguments_iostat(po::options_description *positional, + po::options_description *options) { + at::add_pool_options(positional, options, true); + options->add_options() + ("iterations", po::value<iostat::Iterations>(), + "iterations of metric collection [> 0]") + ("sort-by", po::value<StatDescriptor>()->default_value(StatDescriptor::WRITE_OPS), + "sort-by IO metric " + "(write-ops, read-ops, write-bytes, read-bytes, write-latency, read-latency) " + "[default: write-ops]"); + at::add_format_options(options); +} + +int execute_iostat(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string pool; + std::string pool_namespace; + size_t arg_index = 0; + int r = utils::get_pool_and_namespace_names(vm, false, &pool, + &pool_namespace, &arg_index); + if (r < 0) { + return r; + } + + uint32_t iterations = 0; + if (vm.count("iterations")) { + iterations = vm["iterations"].as<uint32_t>(); + } + auto sort_by = vm["sort-by"].as<StatDescriptor>(); + + at::Format::Formatter formatter; + r = utils::get_formatter(vm, &formatter); + if (r < 0) { + return r; + } + + auto f = formatter.get(); + if (iterations > 1 && f != nullptr) { + std::cerr << "rbd: specifing iterations is not valid with formatted output" + << std::endl; + return -EINVAL; + } + + librados::Rados rados; + r = utils::init_rados(&rados); + if (r < 0) { + return r; + } + + r = rados.wait_for_latest_osdmap(); + if (r < 0) { + std::cerr << "rbd: failed to retrieve OSD map" << std::endl; + return r; + } + + if (!pool_namespace.empty()) { + // default empty pool name only if namespace is specified to allow + // for an empty pool_spec (-> GLOBAL_POOL_KEY) + utils::normalize_pool_name(&pool); + } + std::string pool_spec = format_pool_spec(pool, pool_namespace); + + // no point to refreshing faster than the stats period + auto delay = std::min<uint32_t>(10, g_conf().get_val<int64_t>("mgr_stats_period")); + + ImageStats image_stats; + uint32_t count = 0; + bool printed_notice = false; + while (count++ < iterations || iterations == 0) { + r = query_iostats(rados, pool_spec, sort_by, &image_stats, std::cerr); + if (r < 0) { + return r; + } + + if (count == 1 && image_stats.empty()) { + count = 0; + if (!printed_notice) { + std::cerr << "rbd: waiting for initial image stats" + << std::endl << std::endl;; + printed_notice = true; + } + } else { + iostat::format(image_stats, f, pool_spec.empty()); + if (f != nullptr) { + break; + } + } + + sleep(delay); + } + + return 0; +} + +#ifdef HAVE_CURSES +void get_arguments_iotop(po::options_description *positional, + po::options_description *options) { + at::add_pool_options(positional, options, true); +} + +int execute_iotop(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string pool; + std::string pool_namespace; + size_t arg_index = 0; + int r = utils::get_pool_and_namespace_names(vm, false, &pool, + &pool_namespace, &arg_index); + if (r < 0) { + return r; + } + + librados::Rados rados; + r = utils::init_rados(&rados); + if (r < 0) { + return r; + } + + r = rados.wait_for_latest_osdmap(); + if (r < 0) { + std::cerr << "rbd: failed to retrieve OSD map" << std::endl; + return r; + } + + if (!pool_namespace.empty()) { + // default empty pool name only if namespace is specified to allow + // for an empty pool_spec (-> GLOBAL_POOL_KEY) + utils::normalize_pool_name(&pool); + } + iotop::MainWindow mainWindow(rados, format_pool_spec(pool, pool_namespace)); + r = mainWindow.run(); + if (r < 0) { + return r; + } + + return 0; +} + +Shell::Action top_action( + {"perf", "image", "iotop"}, {}, "Display a top-like IO monitor.", "", + &get_arguments_iotop, &execute_iotop); + +#endif // HAVE_CURSES + +Shell::Action stat_action( + {"perf", "image", "iostat"}, {}, "Display image IO statistics.", "", + &get_arguments_iostat, &execute_iostat); +} // namespace perf +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/PersistentCache.cc b/src/tools/rbd/action/PersistentCache.cc new file mode 100644 index 000000000..949006b82 --- /dev/null +++ b/src/tools/rbd/action/PersistentCache.cc @@ -0,0 +1,122 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "include/types.h" +#include "include/rbd_types.h" +#include "include/stringify.h" +#include "common/errno.h" +#include "common/Formatter.h" +#include <iostream> +#include <boost/program_options.hpp> + +namespace rbd { +namespace action { +namespace persistent_cache { + +namespace at = argument_types; +namespace po = boost::program_options; + +void get_arguments_invalidate(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + at::add_image_id_option(options); +} + +int execute_invalidate(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "", + false, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + r = image.invalidate_cache(); + if (r < 0) { + std::cerr << "rbd: invalidating persistent cache failed: " + << cpp_strerror(r) << std::endl; + return r; + } + + return 0; +} + +void get_arguments_flush(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + at::add_image_id_option(options); +} + +int execute_flush(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "", + false, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + uint64_t features; + r = image.features(&features); + if (r < 0) { + return r; + } + + if (features & RBD_FEATURE_DIRTY_CACHE) { + r = image.flush(); + if (r < 0) { + std::cerr << "rbd: flushing persistent cache failed: " + << cpp_strerror(r) << std::endl; + return r; + } + } else { + std::cout << "rbd: persistent cache is clean or disabled" << std::endl; + } + + return 0; +} + +Shell::Action action_invalidate( + {"persistent-cache", "invalidate"}, {}, + "Invalidate (discard) existing / dirty persistent cache.", "", + &get_arguments_invalidate, &execute_invalidate); +Shell::Action action_flush( + {"persistent-cache", "flush"}, {}, "Flush persistent cache.", "", + &get_arguments_flush, &execute_flush); + +} // namespace persistent_cache +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Pool.cc b/src/tools/rbd/action/Pool.cc new file mode 100644 index 000000000..2ad8e17ff --- /dev/null +++ b/src/tools/rbd/action/Pool.cc @@ -0,0 +1,162 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "include/stringify.h" +#include "common/errno.h" +#include "common/Formatter.h" +#include <iostream> +#include <boost/program_options.hpp> + +namespace rbd { +namespace action { +namespace pool { + +namespace at = argument_types; +namespace po = boost::program_options; + +void get_arguments_init(po::options_description *positional, + po::options_description *options) { + at::add_pool_options(positional, options, false); + options->add_options() + ("force", po::bool_switch(), + "force initialize pool for RBD use if registered by another application"); +} + +int execute_init(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string pool_name; + size_t arg_index = 0; + int r = utils::get_pool_and_namespace_names(vm, false, &pool_name, + nullptr, &arg_index); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, "", &rados, &io_ctx); + if (r < 0) { + return r; + } + + librbd::RBD rbd; + r = rbd.pool_init(io_ctx, vm["force"].as<bool>()); + if (r == -EOPNOTSUPP) { + std::cerr << "rbd: luminous or later release required." << std::endl; + } else if (r == -EPERM) { + std::cerr << "rbd: pool already registered to a different application." + << std::endl; + } else if (r < 0) { + std::cerr << "rbd: error registered application: " << cpp_strerror(r) + << std::endl; + } + + return 0; +} + +void get_arguments_stats(po::options_description *positional, + po::options_description *options) { + at::add_pool_options(positional, options, true); + at::add_format_options(options); +} + +int execute_stats(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string pool_name; + std::string namespace_name; + size_t arg_index = 0; + int r = utils::get_pool_and_namespace_names(vm, false, &pool_name, + &namespace_name, &arg_index); + if (r < 0) { + return r; + } + + at::Format::Formatter formatter; + r = utils::get_formatter(vm, &formatter); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + librbd::RBD rbd; + uint64_t image_count; + uint64_t provisioned_bytes; + uint64_t snap_count; + uint64_t trash_count; + uint64_t trash_provisioned_bytes; + uint64_t trash_snap_count; + + librbd::PoolStats pool_stats; + pool_stats.add(RBD_POOL_STAT_OPTION_IMAGES, &image_count); + pool_stats.add(RBD_POOL_STAT_OPTION_IMAGE_MAX_PROVISIONED_BYTES, + &provisioned_bytes); + pool_stats.add(RBD_POOL_STAT_OPTION_IMAGE_SNAPSHOTS, &snap_count); + pool_stats.add(RBD_POOL_STAT_OPTION_TRASH_IMAGES, &trash_count); + pool_stats.add(RBD_POOL_STAT_OPTION_TRASH_MAX_PROVISIONED_BYTES, + &trash_provisioned_bytes); + pool_stats.add(RBD_POOL_STAT_OPTION_TRASH_SNAPSHOTS, &trash_snap_count); + + r = rbd.pool_stats_get(io_ctx, &pool_stats); + if (r < 0) { + std::cerr << "rbd: failed to query pool stats: " << cpp_strerror(r) + << std::endl; + return r; + } + + if (formatter) { + formatter->open_object_section("stats"); + formatter->open_object_section("images"); + formatter->dump_unsigned("count", image_count); + formatter->dump_unsigned("provisioned_bytes", provisioned_bytes); + formatter->dump_unsigned("snap_count", snap_count); + formatter->close_section(); + formatter->open_object_section("trash"); + formatter->dump_unsigned("count", trash_count); + formatter->dump_unsigned("provisioned_bytes", trash_provisioned_bytes); + formatter->dump_unsigned("snap_count", trash_snap_count); + formatter->close_section(); + formatter->close_section(); + formatter->flush(std::cout); + } else { + std::cout << "Total Images: " << image_count; + if (trash_count > 0) { + std::cout << " (" << trash_count << " in trash)"; + } + std::cout << std::endl; + + std::cout << "Total Snapshots: " << snap_count; + if (trash_count > 0) { + std::cout << " (" << trash_snap_count << " in trash)"; + } + std::cout << std::endl; + + std::cout << "Provisioned Size: " << byte_u_t(provisioned_bytes); + if (trash_count > 0) { + std::cout << " (" << byte_u_t(trash_provisioned_bytes) << " in trash)"; + } + std::cout << std::endl; + } + + return 0; +} + +Shell::Action init_action( + {"pool", "init"}, {}, "Initialize pool for use by RBD.", "", + &get_arguments_init, &execute_init); +Shell::Action stat_action( + {"pool", "stats"}, {}, "Display pool statistics.", + "Note: legacy v1 images are not included in stats", + &get_arguments_stats, &execute_stats); + +} // namespace pool +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Remove.cc b/src/tools/rbd/action/Remove.cc new file mode 100644 index 000000000..c5dcf2323 --- /dev/null +++ b/src/tools/rbd/action/Remove.cc @@ -0,0 +1,161 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "common/errno.h" +#include "include/stringify.h" +#include <iostream> +#include <boost/program_options.hpp> + +namespace rbd { +namespace action { +namespace remove { + +namespace { + +bool is_auto_delete_snapshot(librbd::Image* image, + const librbd::snap_info_t &snap_info) { + librbd::snap_namespace_type_t namespace_type; + int r = image->snap_get_namespace_type(snap_info.id, &namespace_type); + if (r < 0) { + return false; + } + + switch (namespace_type) { + case RBD_SNAP_NAMESPACE_TYPE_TRASH: + return true; + default: + return false; + } +} + +} // anonymous namespace + +namespace at = argument_types; +namespace po = boost::program_options; + +static int do_delete(librbd::RBD &rbd, librados::IoCtx& io_ctx, + const char *imgname, bool no_progress) +{ + utils::ProgressContext pc("Removing image", no_progress); + int r = rbd.remove_with_progress(io_ctx, imgname, pc); + if (r < 0) { + pc.fail(); + return r; + } + pc.finish(); + return 0; +} + +void get_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + at::add_no_progress_option(options); +} + +int execute(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + io_ctx.set_pool_full_try(); + + librbd::RBD rbd; + r = do_delete(rbd, io_ctx, image_name.c_str(), + vm[at::NO_PROGRESS].as<bool>()); + if (r < 0) { + if (r == -ENOTEMPTY) { + librbd::Image image; + std::vector<librbd::snap_info_t> snaps; + int image_r = utils::open_image(io_ctx, image_name, true, &image); + if (image_r >= 0) { + image_r = image.snap_list(snaps); + } + if (image_r >= 0) { + snaps.erase(std::remove_if(snaps.begin(), snaps.end(), + [&image](const librbd::snap_info_t& snap) { + return is_auto_delete_snapshot(&image, + snap); + }), + snaps.end()); + } + + if (!snaps.empty()) { + std::cerr << "rbd: image has snapshots - these must be deleted" + << " with 'rbd snap purge' before the image can be removed." + << std::endl; + } else { + std::cerr << "rbd: image has snapshots with linked clones - these must " + << "be deleted or flattened before the image can be removed." + << std::endl; + } + } else if (r == -EBUSY) { + std::cerr << "rbd: error: image still has watchers" + << std::endl + << "This means the image is still open or the client using " + << "it crashed. Try again after closing/unmapping it or " + << "waiting 30s for the crashed client to timeout." + << std::endl; + } else if (r == -EMLINK) { + librbd::Image image; + int image_r = utils::open_image(io_ctx, image_name, true, &image); + librbd::group_info_t group_info; + if (image_r == 0) { + image_r = image.get_group(&group_info, sizeof(group_info)); + } + if (image_r == 0) { + std::string pool_name = ""; + librados::Rados rados(io_ctx); + librados::IoCtx pool_io_ctx; + image_r = rados.ioctx_create2(group_info.pool, pool_io_ctx); + if (image_r < 0) { + pool_name = "<missing group pool " + stringify(group_info.pool) + ">"; + } else { + pool_name = pool_io_ctx.get_pool_name(); + } + std::cerr << "rbd: error: image belongs to a group " + << pool_name << "/"; + if (!io_ctx.get_namespace().empty()) { + std::cerr << io_ctx.get_namespace() << "/"; + } + std::cerr << group_info.name; + } else + std::cerr << "rbd: error: image belongs to a group"; + + std::cerr << std::endl + << "Remove the image from the group and try again." + << std::endl; + image.close(); + } else { + std::cerr << "rbd: delete error: " << cpp_strerror(r) << std::endl; + } + return r; + } + return 0; +} + +Shell::Action action( + {"remove"}, {"rm"}, "Delete an image.", "", &get_arguments, &execute); + +} // namespace remove +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Rename.cc b/src/tools/rbd/action/Rename.cc new file mode 100644 index 000000000..b4954bcbb --- /dev/null +++ b/src/tools/rbd/action/Rename.cc @@ -0,0 +1,94 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "common/errno.h" +#include <iostream> +#include <boost/program_options.hpp> + +namespace rbd { +namespace action { +namespace rename { + +namespace at = argument_types; +namespace po = boost::program_options; + +static int do_rename(librbd::RBD &rbd, librados::IoCtx& io_ctx, + const char *imgname, const char *destname) +{ + int r = rbd.rename(io_ctx, imgname, destname); + if (r < 0) + return r; + return 0; +} + +void get_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_SOURCE); + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_DEST); +} + +int execute(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_SOURCE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + std::string dst_image_name; + std::string dst_snap_name; + std::string dst_pool_name = pool_name; + std::string dst_namespace_name = namespace_name; + r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_DEST, &arg_index, &dst_pool_name, + &dst_namespace_name, &dst_image_name, &dst_snap_name, true, + utils::SNAPSHOT_PRESENCE_NONE, utils::SPEC_VALIDATION_FULL); + if (r < 0) { + return r; + } + + if (pool_name != dst_pool_name) { + std::cerr << "rbd: mv/rename across pools not supported" << std::endl + << "source pool: " << pool_name << " dest pool: " << dst_pool_name + << std::endl; + return -EINVAL; + } else if (namespace_name != dst_namespace_name) { + std::cerr << "rbd: mv/rename across namespaces not supported" << std::endl + << "source namespace: " << namespace_name << " dest namespace: " + << dst_namespace_name << std::endl; + return -EINVAL; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + librbd::RBD rbd; + r = do_rename(rbd, io_ctx, image_name.c_str(), dst_image_name.c_str()); + if (r < 0) { + std::cerr << "rbd: rename error: " << cpp_strerror(r) << std::endl; + return r; + } + return 0; +} + +Shell::Action action( + {"rename"}, {"mv"}, "Rename image within pool.", "", &get_arguments, + &execute); + +} // namespace rename +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Resize.cc b/src/tools/rbd/action/Resize.cc new file mode 100644 index 000000000..79fbbd127 --- /dev/null +++ b/src/tools/rbd/action/Resize.cc @@ -0,0 +1,123 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "common/errno.h" +#include <iostream> +#include <boost/program_options.hpp> + +namespace rbd { +namespace action { +namespace resize { + +namespace at = argument_types; +namespace po = boost::program_options; + +static int do_resize(librbd::Image& image, uint64_t size, bool allow_shrink, bool no_progress) +{ + utils::ProgressContext pc("Resizing image", no_progress); + int r = image.resize2(size, allow_shrink, pc); + if (r < 0) { + pc.fail(); + return r; + } + pc.finish(); + return 0; +} + +void get_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + at::add_size_option(options); + options->add_options() + ("allow-shrink", po::bool_switch(), "permit shrinking"); + at::add_no_progress_option(options); + at::add_encryption_options(options); +} + +int execute(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + uint64_t size; + r = utils::get_image_size(vm, &size); + if (r < 0) { + return r; + } + + utils::EncryptionOptions encryption_options; + r = utils::get_encryption_options(vm, &encryption_options); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", + snap_name, false, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + if (!encryption_options.specs.empty()) { + r = image.encryption_load2(encryption_options.specs.data(), + encryption_options.specs.size()); + if (r < 0) { + std::cerr << "rbd: encryption load failed: " << cpp_strerror(r) + << std::endl; + return r; + } + } + + librbd::image_info_t info; + r = image.stat(info, sizeof(info)); + if (r < 0) { + std::cerr << "rbd: resize error: " << cpp_strerror(r) << std::endl; + return r; + } + + if (info.size == size) { + std::cerr << "rbd: new size is equal to original size " << std::endl; + return -EINVAL; + } + + if (info.size > size && !vm["allow-shrink"].as<bool>()) { + r = -EINVAL; + } else { + r = do_resize(image, size, vm["allow-shrink"].as<bool>(), vm[at::NO_PROGRESS].as<bool>()); + } + + if (r < 0) { + if (r == -EINVAL && !vm["allow-shrink"].as<bool>()) { + std::cerr << "rbd: shrinking an image is only allowed with the " + << "--allow-shrink flag" << std::endl; + return r; + } + std::cerr << "rbd: resize error: " << cpp_strerror(r) << std::endl; + return r; + } + return 0; +} + +Shell::SwitchArguments switched_arguments({"allow-shrink"}); +Shell::Action action( + {"resize"}, {}, "Resize (expand or shrink) image.", "", &get_arguments, + &execute); + +} // namespace resize +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Snap.cc b/src/tools/rbd/action/Snap.cc new file mode 100644 index 000000000..cb87735f9 --- /dev/null +++ b/src/tools/rbd/action/Snap.cc @@ -0,0 +1,972 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "include/types.h" +#include "include/stringify.h" +#include "common/errno.h" +#include "common/Formatter.h" +#include "common/TextTable.h" +#include <iostream> +#include <boost/program_options.hpp> +#include <boost/bind/bind.hpp> + +namespace rbd { +namespace action { +namespace snap { + +using namespace boost::placeholders; + +static const std::string ALL_NAME("all"); + +namespace at = argument_types; +namespace po = boost::program_options; + +int do_list_snaps(librbd::Image& image, Formatter *f, bool all_snaps, librados::Rados& rados) +{ + std::vector<librbd::snap_info_t> snaps; + TextTable t; + int r; + + r = image.snap_list(snaps); + if (r < 0) { + std::cerr << "rbd: unable to list snapshots" << std::endl; + return r; + } + + librbd::image_info_t info; + if (!all_snaps) { + snaps.erase(remove_if(snaps.begin(), + snaps.end(), + boost::bind(utils::is_not_user_snap_namespace, &image, _1)), + snaps.end()); + } else if (!f) { + r = image.stat(info, sizeof(info)); + if (r < 0) { + std::cerr << "rbd: unable to get image info" << std::endl; + return r; + } + } + + if (f) { + f->open_array_section("snapshots"); + } else { + t.define_column("SNAPID", TextTable::LEFT, TextTable::RIGHT); + t.define_column("NAME", TextTable::LEFT, TextTable::LEFT); + t.define_column("SIZE", TextTable::LEFT, TextTable::RIGHT); + t.define_column("PROTECTED", TextTable::LEFT, TextTable::LEFT); + t.define_column("TIMESTAMP", TextTable::LEFT, TextTable::RIGHT); + if (all_snaps) { + t.define_column("NAMESPACE", TextTable::LEFT, TextTable::LEFT); + } + } + + std::list<std::pair<int64_t, std::string>> pool_list; + rados.pool_list2(pool_list); + std::map<int64_t, std::string> pool_map(pool_list.begin(), pool_list.end()); + + for (std::vector<librbd::snap_info_t>::iterator s = snaps.begin(); + s != snaps.end(); ++s) { + struct timespec timestamp; + bool snap_protected = false; + image.snap_get_timestamp(s->id, ×tamp); + std::string tt_str = ""; + if(timestamp.tv_sec != 0) { + time_t tt = timestamp.tv_sec; + tt_str = ctime(&tt); + tt_str = tt_str.substr(0, tt_str.length() - 1); + } + + librbd::snap_namespace_type_t snap_namespace; + r = image.snap_get_namespace_type(s->id, &snap_namespace); + if (r < 0) { + std::cerr << "rbd: unable to retrieve snap namespace" << std::endl; + return r; + } + + std::string snap_namespace_name = "Unknown"; + switch (snap_namespace) { + case RBD_SNAP_NAMESPACE_TYPE_USER: + snap_namespace_name = "user"; + break; + case RBD_SNAP_NAMESPACE_TYPE_GROUP: + snap_namespace_name = "group"; + break; + case RBD_SNAP_NAMESPACE_TYPE_TRASH: + snap_namespace_name = "trash"; + break; + case RBD_SNAP_NAMESPACE_TYPE_MIRROR: + snap_namespace_name = "mirror"; + break; + } + + int get_trash_res = -ENOENT; + std::string trash_original_name; + int get_group_res = -ENOENT; + librbd::snap_group_namespace_t group_snap; + int get_mirror_res = -ENOENT; + librbd::snap_mirror_namespace_t mirror_snap; + std::string mirror_snap_state = "unknown"; + if (snap_namespace == RBD_SNAP_NAMESPACE_TYPE_GROUP) { + get_group_res = image.snap_get_group_namespace(s->id, &group_snap, + sizeof(group_snap)); + } else if (snap_namespace == RBD_SNAP_NAMESPACE_TYPE_TRASH) { + get_trash_res = image.snap_get_trash_namespace( + s->id, &trash_original_name); + } else if (snap_namespace == RBD_SNAP_NAMESPACE_TYPE_MIRROR) { + get_mirror_res = image.snap_get_mirror_namespace( + s->id, &mirror_snap, sizeof(mirror_snap)); + + switch (mirror_snap.state) { + case RBD_SNAP_MIRROR_STATE_PRIMARY: + mirror_snap_state = "primary"; + break; + case RBD_SNAP_MIRROR_STATE_NON_PRIMARY: + mirror_snap_state = "non-primary"; + break; + case RBD_SNAP_MIRROR_STATE_PRIMARY_DEMOTED: + case RBD_SNAP_MIRROR_STATE_NON_PRIMARY_DEMOTED: + mirror_snap_state = "demoted"; + break; + } + } + + std::string protected_str = ""; + if (snap_namespace == RBD_SNAP_NAMESPACE_TYPE_USER) { + r = image.snap_is_protected(s->name.c_str(), &snap_protected); + if (r < 0) { + std::cerr << "rbd: unable to retrieve snap protection" << std::endl; + return r; + } + } + + if (f) { + protected_str = snap_protected ? "true" : "false"; + f->open_object_section("snapshot"); + f->dump_unsigned("id", s->id); + f->dump_string("name", s->name); + f->dump_unsigned("size", s->size); + f->dump_string("protected", protected_str); + f->dump_string("timestamp", tt_str); + if (all_snaps) { + f->open_object_section("namespace"); + f->dump_string("type", snap_namespace_name); + if (get_group_res == 0) { + std::string pool_name = pool_map[group_snap.group_pool]; + f->dump_string("pool", pool_name); + f->dump_string("group", group_snap.group_name); + f->dump_string("group snap", group_snap.group_snap_name); + } else if (get_trash_res == 0) { + f->dump_string("original_name", trash_original_name); + } else if (get_mirror_res == 0) { + f->dump_string("state", mirror_snap_state); + f->open_array_section("mirror_peer_uuids"); + for (auto &uuid : mirror_snap.mirror_peer_uuids) { + f->dump_string("peer_uuid", uuid); + } + f->close_section(); + f->dump_bool("complete", mirror_snap.complete); + if (mirror_snap.state == RBD_SNAP_MIRROR_STATE_NON_PRIMARY || + mirror_snap.state == RBD_SNAP_MIRROR_STATE_NON_PRIMARY_DEMOTED) { + f->dump_string("primary_mirror_uuid", + mirror_snap.primary_mirror_uuid); + f->dump_unsigned("primary_snap_id", + mirror_snap.primary_snap_id); + f->dump_unsigned("last_copied_object_number", + mirror_snap.last_copied_object_number); + } + } + f->close_section(); + } + f->close_section(); + } else { + protected_str = snap_protected ? "yes" : ""; + t << s->id << s->name << stringify(byte_u_t(s->size)) << protected_str << tt_str; + + if (all_snaps) { + std::ostringstream oss; + oss << snap_namespace_name; + + if (get_group_res == 0) { + std::string pool_name = pool_map[group_snap.group_pool]; + oss << " (" << pool_name << "/" + << group_snap.group_name << "@" + << group_snap.group_snap_name << ")"; + } else if (get_trash_res == 0) { + oss << " (" << trash_original_name << ")"; + } else if (get_mirror_res == 0) { + oss << " (" << mirror_snap_state << " " + << "peer_uuids:[" << mirror_snap.mirror_peer_uuids << "]"; + if (mirror_snap.state == RBD_SNAP_MIRROR_STATE_NON_PRIMARY || + mirror_snap.state == RBD_SNAP_MIRROR_STATE_NON_PRIMARY_DEMOTED) { + oss << " " << mirror_snap.primary_mirror_uuid << ":" + << mirror_snap.primary_snap_id << " "; + if (!mirror_snap.complete) { + if (info.num_objs > 0) { + auto progress = std::min<uint64_t>( + 100, 100 * mirror_snap.last_copied_object_number / + info.num_objs); + oss << progress << "% "; + } else { + oss << "not "; + } + } + oss << "copied"; + } + oss << ")"; + } + + t << oss.str(); + } + t << TextTable::endrow; + } + } + + if (f) { + f->close_section(); + f->flush(std::cout); + } else if (snaps.size()) { + std::cout << t; + } + + return 0; +} + +int do_add_snap(librbd::Image& image, const char *snapname, + uint32_t flags, bool no_progress) +{ + utils::ProgressContext pc("Creating snap", no_progress); + + int r = image.snap_create2(snapname, flags, pc); + if (r < 0) { + pc.fail(); + return r; + } + + pc.finish(); + return 0; +} + +int do_remove_snap(librbd::Image& image, const char *snapname, bool force, + bool no_progress) +{ + uint32_t flags = force? RBD_SNAP_REMOVE_FORCE : 0; + int r = 0; + utils::ProgressContext pc("Removing snap", no_progress); + + r = image.snap_remove2(snapname, flags, pc); + if (r < 0) { + pc.fail(); + return r; + } + + pc.finish(); + return 0; +} + +int do_rollback_snap(librbd::Image& image, const char *snapname, + bool no_progress) +{ + utils::ProgressContext pc("Rolling back to snapshot", no_progress); + int r = image.snap_rollback_with_progress(snapname, pc); + if (r < 0) { + pc.fail(); + return r; + } + pc.finish(); + return 0; +} + +int do_purge_snaps(librbd::Image& image, bool no_progress) +{ + utils::ProgressContext pc("Removing all snapshots", no_progress); + std::vector<librbd::snap_info_t> snaps; + bool is_protected = false; + int r = image.snap_list(snaps); + if (r < 0) { + pc.fail(); + return r; + } else if (0 == snaps.size()) { + return 0; + } else { + std::list<std::string> protect; + snaps.erase(remove_if(snaps.begin(), + snaps.end(), + boost::bind(utils::is_not_user_snap_namespace, &image, _1)), + snaps.end()); + for (auto it = snaps.begin(); it != snaps.end();) { + r = image.snap_is_protected(it->name.c_str(), &is_protected); + if (r < 0) { + pc.fail(); + return r; + } else if (is_protected == true) { + protect.push_back(it->name.c_str()); + snaps.erase(it); + } else { + ++it; + } + } + + if (!protect.empty()) { + std::cout << "rbd: error removing snapshot(s) '" << protect << "', which " + << (1 == protect.size() ? "is" : "are") + << " protected - these must be unprotected with " + << "`rbd snap unprotect`." + << std::endl; + } + for (size_t i = 0; i < snaps.size(); ++i) { + r = image.snap_remove(snaps[i].name.c_str()); + if (r < 0) { + pc.fail(); + return r; + } + pc.update_progress(i + 1, snaps.size() + protect.size()); + } + + if (!protect.empty()) { + pc.fail(); + } else if (snaps.size() > 0) { + pc.finish(); + } + + return 0; + } +} + +int do_protect_snap(librbd::Image& image, const char *snapname) +{ + int r = image.snap_protect(snapname); + if (r < 0) + return r; + + return 0; +} + +int do_unprotect_snap(librbd::Image& image, const char *snapname) +{ + int r = image.snap_unprotect(snapname); + if (r < 0) + return r; + + return 0; +} + +int do_set_limit(librbd::Image& image, uint64_t limit) +{ + return image.snap_set_limit(limit); +} + +void get_list_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + at::add_image_id_option(options); + at::add_format_options(options); + + std::string name = ALL_NAME + ",a"; + + options->add_options() + (name.c_str(), po::bool_switch(), "list snapshots from all namespaces"); +} + +int execute_list(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + std::string image_id; + + if (vm.count(at::IMAGE_ID)) { + image_id = vm[at::IMAGE_ID].as<std::string>(); + } + + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, image_id.empty(), + utils::SNAPSHOT_PRESENCE_NONE, utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + if (!image_id.empty() && !image_name.empty()) { + std::cerr << "rbd: trying to access image using both name and id. " + << std::endl; + return -EINVAL; + } + + at::Format::Formatter formatter; + r = utils::get_formatter(vm, &formatter); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, + image_id, "", true, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + bool all_snaps = vm[ALL_NAME].as<bool>(); + r = do_list_snaps(image, formatter.get(), all_snaps, rados); + if (r < 0) { + std::cerr << "rbd: failed to list snapshots: " << cpp_strerror(r) + << std::endl; + return r; + } + return 0; +} + +void get_create_arguments(po::options_description *positional, + po::options_description *options) { + at::add_snap_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + at::add_snap_create_options(options); + at::add_no_progress_option(options); +} + +int execute_create(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_REQUIRED, + utils::SPEC_VALIDATION_SNAP); + if (r < 0) { + return r; + } + + uint32_t flags; + r = utils::get_snap_create_flags(vm, &flags); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "", + false, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + r = do_add_snap(image, snap_name.c_str(), flags, + vm[at::NO_PROGRESS].as<bool>()); + if (r < 0) { + std::cerr << "rbd: failed to create snapshot: " << cpp_strerror(r) + << std::endl; + return r; + } + return 0; +} + +void get_remove_arguments(po::options_description *positional, + po::options_description *options) { + at::add_snap_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + at::add_image_id_option(options); + at::add_snap_id_option(options); + at::add_no_progress_option(options); + + options->add_options() + ("force", po::bool_switch(), "flatten children and unprotect snapshot if needed."); +} + +int execute_remove(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + std::string image_id; + uint64_t snap_id = CEPH_NOSNAP; + bool force = vm["force"].as<bool>(); + bool no_progress = vm[at::NO_PROGRESS].as<bool>(); + + if (vm.count(at::IMAGE_ID)) { + image_id = vm[at::IMAGE_ID].as<std::string>(); + } + if (vm.count(at::SNAPSHOT_ID)) { + snap_id = vm[at::SNAPSHOT_ID].as<uint64_t>(); + } + + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, image_id.empty(), + (snap_id == CEPH_NOSNAP ? utils::SNAPSHOT_PRESENCE_REQUIRED : + utils::SNAPSHOT_PRESENCE_PERMITTED), + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + if (!image_id.empty() && !image_name.empty()) { + std::cerr << "rbd: trying to access image using both name and id." + << std::endl; + return -EINVAL; + } else if (!snap_name.empty() && snap_id != CEPH_NOSNAP) { + std::cerr << "rbd: trying to access snapshot using both name and id." + << std::endl; + return -EINVAL; + } else if ((force || no_progress) && snap_id != CEPH_NOSNAP) { + std::cerr << "rbd: force and no-progress options not permitted when " + << "removing by id." << std::endl; + return -EINVAL; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + io_ctx.set_pool_full_try(); + if (image_id.empty()) { + r = utils::open_image(io_ctx, image_name, false, &image); + } else { + r = utils::open_image_by_id(io_ctx, image_id, false, &image); + } + if (r < 0) { + return r; + } + + if (!snap_name.empty()) { + r = do_remove_snap(image, snap_name.c_str(), force, no_progress); + } else { + r = image.snap_remove_by_id(snap_id); + } + + if (r < 0) { + if (r == -EBUSY) { + std::cerr << "rbd: snapshot " + << (snap_name.empty() ? std::string("id ") + stringify(snap_id) : + std::string("'") + snap_name + "'") + << " is protected from removal." << std::endl; + } else { + std::cerr << "rbd: failed to remove snapshot: " << cpp_strerror(r) + << std::endl; + } + return r; + } + return 0; +} + +void get_purge_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + at::add_image_id_option(options); + at::add_no_progress_option(options); +} + +int execute_purge(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + std::string image_id; + + if (vm.count(at::IMAGE_ID)) { + image_id = vm[at::IMAGE_ID].as<std::string>(); + } + + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, image_id.empty(), + utils::SNAPSHOT_PRESENCE_NONE, utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + if (!image_id.empty() && !image_name.empty()) { + std::cerr << "rbd: trying to access image using both name and id. " + << std::endl; + return -EINVAL; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + io_ctx.set_pool_full_try(); + if (image_id.empty()) { + r = utils::open_image(io_ctx, image_name, false, &image); + } else { + r = utils::open_image_by_id(io_ctx, image_id, false, &image); + } + if (r < 0) { + return r; + } + + r = do_purge_snaps(image, vm[at::NO_PROGRESS].as<bool>()); + if (r < 0) { + if (r != -EBUSY) { + std::cerr << "rbd: removing snaps failed: " << cpp_strerror(r) + << std::endl; + } + return r; + } + return 0; +} + +void get_rollback_arguments(po::options_description *positional, + po::options_description *options) { + at::add_snap_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + at::add_no_progress_option(options); +} + +int execute_rollback(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_REQUIRED, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "", + false, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + r = do_rollback_snap(image, snap_name.c_str(), + vm[at::NO_PROGRESS].as<bool>()); + if (r < 0) { + std::cerr << "rbd: rollback failed: " << cpp_strerror(r) << std::endl; + return r; + } + return 0; +} + +void get_protect_arguments(po::options_description *positional, + po::options_description *options) { + at::add_snap_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); +} + +int execute_protect(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_REQUIRED, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "", + false, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + bool is_protected = false; + r = image.snap_is_protected(snap_name.c_str(), &is_protected); + if (r < 0) { + std::cerr << "rbd: protecting snap failed: " << cpp_strerror(r) + << std::endl; + return r; + } else if (is_protected) { + std::cerr << "rbd: snap is already protected" << std::endl; + return -EBUSY; + } + + r = do_protect_snap(image, snap_name.c_str()); + if (r < 0) { + std::cerr << "rbd: protecting snap failed: " << cpp_strerror(r) + << std::endl; + return r; + } + return 0; +} + +void get_unprotect_arguments(po::options_description *positional, + po::options_description *options) { + at::add_snap_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + at::add_image_id_option(options); +} + +int execute_unprotect(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + std::string image_id; + + if (vm.count(at::IMAGE_ID)) { + image_id = vm[at::IMAGE_ID].as<std::string>(); + } + + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, image_id.empty(), + utils::SNAPSHOT_PRESENCE_REQUIRED, utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + if (!image_id.empty() && !image_name.empty()) { + std::cerr << "rbd: trying to access image using both name and id. " + << std::endl; + return -EINVAL; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + io_ctx.set_pool_full_try(); + if (image_id.empty()) { + r = utils::open_image(io_ctx, image_name, false, &image); + } else { + r = utils::open_image_by_id(io_ctx, image_id, false, &image); + } + if (r < 0) { + return r; + } + + bool is_protected = false; + r = image.snap_is_protected(snap_name.c_str(), &is_protected); + if (r < 0) { + std::cerr << "rbd: unprotecting snap failed: " << cpp_strerror(r) + << std::endl; + return r; + } else if (!is_protected) { + std::cerr << "rbd: snap is already unprotected" << std::endl; + return -EINVAL; + } + + r = do_unprotect_snap(image, snap_name.c_str()); + if (r < 0) { + std::cerr << "rbd: unprotecting snap failed: " << cpp_strerror(r) + << std::endl; + return r; + } + return 0; +} + +void get_set_limit_arguments(po::options_description *pos, + po::options_description *opt) { + at::add_image_spec_options(pos, opt, at::ARGUMENT_MODIFIER_NONE); + at::add_limit_option(opt); +} + +int execute_set_limit(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + uint64_t limit; + + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + if (vm.count(at::LIMIT)) { + limit = vm[at::LIMIT].as<uint64_t>(); + } else { + std::cerr << "rbd: must specify --limit <num>" << std::endl; + return -ERANGE; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "", + false, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + r = do_set_limit(image, limit); + if (r < 0) { + std::cerr << "rbd: setting snapshot limit failed: " << cpp_strerror(r) + << std::endl; + return r; + } + return 0; +} + +void get_clear_limit_arguments(po::options_description *pos, + po::options_description *opt) { + at::add_image_spec_options(pos, opt, at::ARGUMENT_MODIFIER_NONE); +} + +int execute_clear_limit(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "", + false, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + r = do_set_limit(image, UINT64_MAX); + if (r < 0) { + std::cerr << "rbd: clearing snapshot limit failed: " << cpp_strerror(r) + << std::endl; + return r; + } + return 0; +} + +void get_rename_arguments(po::options_description *positional, + po::options_description *options) { + at::add_snap_spec_options(positional, options, at::ARGUMENT_MODIFIER_SOURCE); + at::add_snap_spec_options(positional, options, at::ARGUMENT_MODIFIER_DEST); +} + +int execute_rename(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string src_snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_SOURCE, &arg_index, &pool_name, &namespace_name, + &image_name, &src_snap_name, true, utils::SNAPSHOT_PRESENCE_REQUIRED, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return -r; + } + + std::string dest_pool_name(pool_name); + std::string dest_namespace_name(namespace_name); + std::string dest_image_name; + std::string dest_snap_name; + r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_DEST, &arg_index, &dest_pool_name, + &dest_namespace_name, &dest_image_name, &dest_snap_name, true, + utils::SNAPSHOT_PRESENCE_REQUIRED, utils::SPEC_VALIDATION_SNAP); + if (r < 0) { + return -r; + } + + if (pool_name != dest_pool_name) { + std::cerr << "rbd: source and destination pool must be the same" + << std::endl; + return -EINVAL; + } else if (namespace_name != dest_namespace_name) { + std::cerr << "rbd: source and destination namespace must be the same" + << std::endl; + return -EINVAL; + } else if (image_name != dest_image_name) { + std::cerr << "rbd: source and destination image name must be the same" + << std::endl; + return -EINVAL; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "", + false, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + r = image.snap_rename(src_snap_name.c_str(), dest_snap_name.c_str()); + if (r < 0) { + std::cerr << "rbd: renaming snap failed: " << cpp_strerror(r) + << std::endl; + return r; + } + return 0; +} + +Shell::Action action_list( + {"snap", "list"}, {"snap", "ls"}, "Dump list of image snapshots.", "", + &get_list_arguments, &execute_list); +Shell::Action action_create( + {"snap", "create"}, {"snap", "add"}, "Create a snapshot.", "", + &get_create_arguments, &execute_create); +Shell::Action action_remove( + {"snap", "remove"}, {"snap", "rm"}, "Delete a snapshot.", "", + &get_remove_arguments, &execute_remove); +Shell::Action action_purge( + {"snap", "purge"}, {}, "Delete all unprotected snapshots.", "", + &get_purge_arguments, &execute_purge); +Shell::Action action_rollback( + {"snap", "rollback"}, {"snap", "revert"}, "Rollback image to snapshot.", "", + &get_rollback_arguments, &execute_rollback); +Shell::Action action_protect( + {"snap", "protect"}, {}, "Prevent a snapshot from being deleted.", "", + &get_protect_arguments, &execute_protect); +Shell::Action action_unprotect( + {"snap", "unprotect"}, {}, "Allow a snapshot to be deleted.", "", + &get_unprotect_arguments, &execute_unprotect); +Shell::Action action_set_limit( + {"snap", "limit", "set"}, {}, "Limit the number of snapshots.", "", + &get_set_limit_arguments, &execute_set_limit); +Shell::Action action_clear_limit( + {"snap", "limit", "clear"}, {}, "Remove snapshot limit.", "", + &get_clear_limit_arguments, &execute_clear_limit); +Shell::Action action_rename( + {"snap", "rename"}, {}, "Rename a snapshot.", "", + &get_rename_arguments, &execute_rename); + +} // namespace snap +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Sparsify.cc b/src/tools/rbd/action/Sparsify.cc new file mode 100644 index 000000000..a345f920b --- /dev/null +++ b/src/tools/rbd/action/Sparsify.cc @@ -0,0 +1,82 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "common/errno.h" +#include <iostream> +#include <boost/program_options.hpp> + +namespace rbd { +namespace action { +namespace sparsify { + +namespace at = argument_types; +namespace po = boost::program_options; + +static int do_sparsify(librbd::Image& image, size_t sparse_size, + bool no_progress) +{ + utils::ProgressContext pc("Image sparsify", no_progress); + int r = image.sparsify_with_progress(sparse_size, pc); + if (r < 0) { + pc.fail(); + return r; + } + pc.finish(); + return 0; +} + +void get_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + at::add_no_progress_option(options); + at::add_sparse_size_option(options); +} + +int execute(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "", + false, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + size_t sparse_size = utils::RBD_DEFAULT_SPARSE_SIZE; + if (vm.count(at::IMAGE_SPARSE_SIZE)) { + sparse_size = vm[at::IMAGE_SPARSE_SIZE].as<size_t>(); + } + + r = do_sparsify(image, sparse_size, vm[at::NO_PROGRESS].as<bool>()); + if (r < 0) { + std::cerr << "rbd: sparsify error: " << cpp_strerror(r) << std::endl; + return r; + } + return 0; +} + +Shell::Action action( + {"sparsify"}, {}, + "Reclaim space for zeroed image extents.", "", + &get_arguments, &execute); + +} // namespace sparsify +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Status.cc b/src/tools/rbd/action/Status.cc new file mode 100644 index 000000000..958a686c4 --- /dev/null +++ b/src/tools/rbd/action/Status.cc @@ -0,0 +1,365 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "common/errno.h" +#include "common/Formatter.h" +#include "json_spirit/json_spirit.h" +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "include/rbd_types.h" +#include "include/stringify.h" +#include "librbd/cache/Types.h" +#include <iostream> +#include <boost/program_options.hpp> + +namespace rbd { +namespace action { +namespace status { + +namespace at = argument_types; +namespace po = boost::program_options; + +static int do_show_status(librados::IoCtx& io_ctx, const std::string &image_name, + librbd::Image &image, Formatter *f) +{ + int r; + std::list<librbd::image_watcher_t> watchers; + + r = image.list_watchers(watchers); + if (r < 0) + return r; + + uint64_t features; + r = image.features(&features); + if (r < 0) { + return r; + } + + librbd::image_migration_status_t migration_status; + std::string source_spec; + std::string source_pool_name; + std::string dest_pool_name; + std::string migration_state; + if ((features & RBD_FEATURE_MIGRATING) != 0) { + r = librbd::RBD().migration_status(io_ctx, image_name.c_str(), + &migration_status, + sizeof(migration_status)); + if (r < 0) { + std::cerr << "rbd: getting migration status failed: " << cpp_strerror(r) + << std::endl; + // not fatal + } else { + if (migration_status.source_pool_id >= 0) { + librados::IoCtx src_io_ctx; + r = librados::Rados(io_ctx).ioctx_create2(migration_status.source_pool_id, src_io_ctx); + if (r < 0) { + source_pool_name = stringify(migration_status.source_pool_id); + } else { + source_pool_name = src_io_ctx.get_pool_name(); + } + } else { + r = image.get_migration_source_spec(&source_spec); + if (r < 0) { + std::cerr << "rbd: getting migration source spec failed: " + << cpp_strerror(r) << std::endl; + } + } + + librados::IoCtx dst_io_ctx; + r = librados::Rados(io_ctx).ioctx_create2(migration_status.dest_pool_id, dst_io_ctx); + if (r < 0) { + dest_pool_name = stringify(migration_status.dest_pool_id); + } else { + dest_pool_name = dst_io_ctx.get_pool_name(); + } + + switch (migration_status.state) { + case RBD_IMAGE_MIGRATION_STATE_ERROR: + migration_state = "error"; + break; + case RBD_IMAGE_MIGRATION_STATE_PREPARING: + migration_state = "preparing"; + break; + case RBD_IMAGE_MIGRATION_STATE_PREPARED: + migration_state = "prepared"; + break; + case RBD_IMAGE_MIGRATION_STATE_EXECUTING: + migration_state = "executing"; + break; + case RBD_IMAGE_MIGRATION_STATE_EXECUTED: + migration_state = "executed"; + break; + case RBD_IMAGE_MIGRATION_STATE_ABORTING: + migration_state = "aborting"; + break; + default: + migration_state = "unknown"; + } + } + } + + struct { + // decoded + std::string host; + std::string path; + uint64_t size; + std::string mode; + std::string stats_timestamp; + bool present; + bool empty; + bool clean; + uint64_t allocated_bytes; + uint64_t cached_bytes; + uint64_t dirty_bytes; + uint64_t free_bytes; + uint64_t hits_full; + uint64_t hits_partial; + uint64_t misses; + uint64_t hit_bytes; + uint64_t miss_bytes; + + // calculated + uint64_t total_read_ops; + uint64_t total_read_bytes; + int hits_full_percent; + int hits_partial_percent; + int hit_bytes_percent; + } cache_state; + std::string cache_str; + if (features & RBD_FEATURE_DIRTY_CACHE) { + r = image.metadata_get(librbd::cache::PERSISTENT_CACHE_STATE, &cache_str); + if (r < 0) { + std::cerr << "rbd: getting persistent cache state failed: " << cpp_strerror(r) + << std::endl; + // not fatal + } + json_spirit::mValue json_root; + if (!json_spirit::read(cache_str.c_str(), json_root)) { + std::cerr << "rbd: parsing persistent cache state failed" << std::endl; + cache_str.clear(); + } else { + try { + auto& o = json_root.get_obj(); + cache_state.host = o["host"].get_str(); + cache_state.path = o["path"].get_str(); + cache_state.size = o["size"].get_uint64(); + cache_state.mode = o["mode"].get_str(); + time_t stats_timestamp_sec = o["stats_timestamp"].get_uint64(); + cache_state.stats_timestamp = ctime(&stats_timestamp_sec); + cache_state.stats_timestamp.pop_back(); + cache_state.present = o["present"].get_bool(); + cache_state.empty = o["empty"].get_bool(); + cache_state.clean = o["clean"].get_bool(); + cache_state.allocated_bytes = o["allocated_bytes"].get_uint64(); + cache_state.cached_bytes = o["cached_bytes"].get_uint64(); + cache_state.dirty_bytes = o["dirty_bytes"].get_uint64(); + cache_state.free_bytes = o["free_bytes"].get_uint64(); + cache_state.hits_full = o["hits_full"].get_uint64(); + cache_state.hits_partial = o["hits_partial"].get_uint64(); + cache_state.misses = o["misses"].get_uint64(); + cache_state.hit_bytes = o["hit_bytes"].get_uint64(); + cache_state.miss_bytes = o["miss_bytes"].get_uint64(); + } catch (std::runtime_error &e) { + std::cerr << "rbd: parsing persistent cache state failed: " << e.what() + << std::endl; + cache_str.clear(); + } + cache_state.total_read_ops = cache_state.hits_full + + cache_state.hits_partial + cache_state.misses; + cache_state.total_read_bytes = cache_state.hit_bytes + + cache_state.miss_bytes; + cache_state.hits_full_percent = utils::get_percentage( + cache_state.hits_full, cache_state.total_read_ops); + cache_state.hits_partial_percent = utils::get_percentage( + cache_state.hits_partial, cache_state.total_read_ops); + cache_state.hit_bytes_percent = utils::get_percentage( + cache_state.hit_bytes, cache_state.total_read_bytes); + } + } + + if (f) + f->open_object_section("status"); + + if (f) { + f->open_array_section("watchers"); + for (auto &watcher : watchers) { + f->open_object_section("watcher"); + f->dump_string("address", watcher.addr); + f->dump_unsigned("client", watcher.id); + f->dump_unsigned("cookie", watcher.cookie); + f->close_section(); + } + f->close_section(); // watchers + if (!migration_state.empty()) { + f->open_object_section("migration"); + if (!source_spec.empty()) { + f->dump_string("source_spec", source_spec); + } else { + f->dump_string("source_pool_name", source_pool_name); + f->dump_string("source_pool_namespace", + migration_status.source_pool_namespace); + f->dump_string("source_image_name", migration_status.source_image_name); + f->dump_string("source_image_id", migration_status.source_image_id); + } + f->dump_string("dest_pool_name", dest_pool_name); + f->dump_string("dest_pool_namespace", + migration_status.dest_pool_namespace); + f->dump_string("dest_image_name", migration_status.dest_image_name); + f->dump_string("dest_image_id", migration_status.dest_image_id); + f->dump_string("state", migration_state); + f->dump_string("state_description", migration_status.state_description); + f->close_section(); // migration + } + if (!cache_str.empty()) { + f->open_object_section("persistent_cache"); + f->dump_string("host", cache_state.host); + f->dump_string("path", cache_state.path); + f->dump_unsigned("size", cache_state.size); + f->dump_string("mode", cache_state.mode); + f->dump_string("stats_timestamp", cache_state.stats_timestamp); + f->dump_bool("present", cache_state.present); + f->dump_bool("empty", cache_state.empty); + f->dump_bool("clean", cache_state.clean); + f->dump_unsigned("allocated_bytes", cache_state.allocated_bytes); + f->dump_unsigned("cached_bytes", cache_state.cached_bytes); + f->dump_unsigned("dirty_bytes", cache_state.dirty_bytes); + f->dump_unsigned("free_bytes", cache_state.free_bytes); + f->dump_unsigned("hits_full", cache_state.hits_full); + f->dump_int("hits_full_percent", cache_state.hits_full_percent); + f->dump_unsigned("hits_partial", cache_state.hits_partial); + f->dump_int("hits_partial_percent", cache_state.hits_partial_percent); + f->dump_unsigned("misses", cache_state.misses); + f->dump_unsigned("hit_bytes", cache_state.hit_bytes); + f->dump_int("hit_bytes_percent", cache_state.hit_bytes_percent); + f->dump_unsigned("miss_bytes", cache_state.miss_bytes); + f->close_section(); // persistent_cache + } + } else { + if (watchers.size()) { + std::cout << "Watchers:" << std::endl; + for (auto &watcher : watchers) { + std::cout << "\twatcher=" << watcher.addr << " client." << watcher.id + << " cookie=" << watcher.cookie << std::endl; + } + } else { + std::cout << "Watchers: none" << std::endl; + } + if (!migration_state.empty()) { + if (!migration_status.source_pool_namespace.empty()) { + source_pool_name += ("/" + migration_status.source_pool_namespace); + } + if (!migration_status.dest_pool_namespace.empty()) { + dest_pool_name += ("/" + migration_status.dest_pool_namespace); + } + + std::cout << "Migration:" << std::endl; + std::cout << "\tsource: "; + if (!source_spec.empty()) { + std::cout << source_spec; + } else { + std::cout << source_pool_name << "/" + << migration_status.source_image_name; + if (!migration_status.source_image_id.empty()) { + std::cout << " (" << migration_status.source_image_id << ")"; + } + } + std::cout << std::endl; + std::cout << "\tdestination: " << dest_pool_name << "/" + << migration_status.dest_image_name << " (" + << migration_status.dest_image_id << ")" << std::endl; + std::cout << "\tstate: " << migration_state; + if (!migration_status.state_description.empty()) { + std::cout << " (" << migration_status.state_description << ")"; + } + std::cout << std::endl; + } + if (!cache_str.empty()) { + std::cout << "Persistent cache state:" << std::endl; + std::cout << "\thost: " << cache_state.host << std::endl; + std::cout << "\tpath: " << cache_state.path << std::endl; + std::cout << "\tsize: " << byte_u_t(cache_state.size) << std::endl; + std::cout << "\tmode: " << cache_state.mode << std::endl; + std::cout << "\tstats_timestamp: " << cache_state.stats_timestamp + << std::endl; + std::cout << "\tpresent: " << (cache_state.present ? "true" : "false") + << "\tempty: " << (cache_state.empty ? "true" : "false") + << "\tclean: " << (cache_state.clean ? "true" : "false") + << std::endl; + std::cout << "\tallocated: " << byte_u_t(cache_state.allocated_bytes) + << std::endl; + std::cout << "\tcached: " << byte_u_t(cache_state.cached_bytes) + << std::endl; + std::cout << "\tdirty: " << byte_u_t(cache_state.dirty_bytes) << std::endl; + std::cout << "\tfree: " << byte_u_t(cache_state.free_bytes) << std::endl; + std::cout << "\thits_full: " << cache_state.hits_full << " / " + << cache_state.hits_full_percent << "%" << std::endl; + std::cout << "\thits_partial: " << cache_state.hits_partial << " / " + << cache_state.hits_partial_percent << "%" << std::endl; + std::cout << "\tmisses: " << cache_state.misses << std::endl; + std::cout << "\thit_bytes: " << byte_u_t(cache_state.hit_bytes) << " / " + << cache_state.hit_bytes_percent << "%" << std::endl; + std::cout << "\tmiss_bytes: " << byte_u_t(cache_state.miss_bytes) + << std::endl; + } + } + + if (f) { + f->close_section(); // status + f->flush(std::cout); + } + + return 0; +} + +void get_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + at::add_format_options(options); +} + +int execute(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + at::Format::Formatter formatter; + r = utils::get_formatter(vm, &formatter); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "", + true, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + r = do_show_status(io_ctx, image_name, image, formatter.get()); + if (r < 0) { + std::cerr << "rbd: show status failed: " << cpp_strerror(r) << std::endl; + return r; + } + return 0; +} + +Shell::Action action( + {"status"}, {}, "Show the status of this image.", "", &get_arguments, + &execute); + +} // namespace status +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Trash.cc b/src/tools/rbd/action/Trash.cc new file mode 100644 index 000000000..f1fd4df3c --- /dev/null +++ b/src/tools/rbd/action/Trash.cc @@ -0,0 +1,543 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2017 SUSE LINUX GmbH + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "common/errno.h" +#include "include/stringify.h" +#include "common/Formatter.h" +#include "common/TextTable.h" +#include "common/Clock.h" +#include <iostream> +#include <sstream> +#include <boost/program_options.hpp> +#include <boost/bind/bind.hpp> + +namespace rbd { +namespace action { +namespace trash { +using namespace boost::placeholders; + +namespace at = argument_types; +namespace po = boost::program_options; + +//Optional arguments used only by this set of commands (rbd trash *) +static const std::string EXPIRES_AT("expires-at"); +static const std::string EXPIRED_BEFORE("expired-before"); +static const std::string THRESHOLD("threshold"); + +static bool is_not_trash_user(const librbd::trash_image_info_t &trash_info) { + return trash_info.source != RBD_TRASH_IMAGE_SOURCE_USER && + trash_info.source != RBD_TRASH_IMAGE_SOURCE_USER_PARENT; +} + +void get_move_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + options->add_options() + (EXPIRES_AT.c_str(), po::value<std::string>()->default_value("now"), + "set the expiration time of an image so it can be purged when it is stale"); +} + +int execute_move(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + utime_t now = ceph_clock_now(); + utime_t exp_time = now; + std::string expires_at; + if (vm.find(EXPIRES_AT) != vm.end()) { + expires_at = vm[EXPIRES_AT].as<std::string>(); + r = utime_t::invoke_date(expires_at, &exp_time); + if (r < 0) { + std::cerr << "rbd: error calling /bin/date: " << cpp_strerror(r) + << std::endl; + return r; + } + } + + time_t dt = (exp_time - now).sec(); + if(dt < 0) { + std::cerr << "rbd: cannot use a date in the past as an expiration date" + << std::endl; + return -EINVAL; + } + + librbd::RBD rbd; + r = rbd.trash_move(io_ctx, image_name.c_str(), dt); + if (r < 0) { + std::cerr << "rbd: deferred delete error: " << cpp_strerror(r) + << std::endl; + } + + if (expires_at != "now") { + std::cout << "rbd: image " << image_name << " will expire at " << exp_time << std::endl; + } + return r; +} + +void get_remove_arguments(po::options_description *positional, + po::options_description *options) { + positional->add_options() + (at::IMAGE_ID.c_str(), "image id\n(example: [<pool-name>/[<namespace>/]]<image-id>)"); + at::add_pool_option(options, at::ARGUMENT_MODIFIER_NONE); + at::add_namespace_option(options, at::ARGUMENT_MODIFIER_NONE); + at::add_image_id_option(options); + + at::add_no_progress_option(options); + options->add_options() + ("force", po::bool_switch(), "force remove of non-expired delayed images"); +} + +int execute_remove(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_id; + int r = utils::get_pool_image_id(vm, &arg_index, &pool_name, &namespace_name, + &image_id); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + io_ctx.set_pool_full_try(); + librbd::RBD rbd; + + utils::ProgressContext pc("Removing image", vm[at::NO_PROGRESS].as<bool>()); + r = rbd.trash_remove_with_progress(io_ctx, image_id.c_str(), + vm["force"].as<bool>(), pc); + if (r < 0) { + if (r == -ENOTEMPTY) { + std::cerr << "rbd: image has snapshots - these must be deleted" + << " with 'rbd snap purge' before the image can be removed." + << std::endl; + } else if (r == -EUCLEAN) { + std::cerr << "rbd: error: image not fully moved to trash." + << std::endl; + } else if (r == -EBUSY) { + std::cerr << "rbd: error: image still has watchers" + << std::endl + << "This means the image is still open or the client using " + << "it crashed. Try again after closing/unmapping it or " + << "waiting 30s for the crashed client to timeout." + << std::endl; + } else if (r == -EMLINK) { + std::cerr << std::endl + << "Remove the image from the group and try again." + << std::endl; + } else if (r == -EPERM) { + std::cerr << std::endl + << "Deferment time has not expired, please use --force if you " + << "really want to remove the image" + << std::endl; + } else { + std::cerr << "rbd: remove error: " << cpp_strerror(r) << std::endl; + } + pc.fail(); + return r; + } + + pc.finish(); + + return r; +} + +std::string delete_status(time_t deferment_end_time) { + time_t now = time(nullptr); + + std::string time_str = ctime(&deferment_end_time); + time_str = time_str.substr(0, time_str.length() - 1); + + std::stringstream ss; + if (now < deferment_end_time) { + ss << "protected until " << time_str; + } else { + ss << "expired at " << time_str; + } + + return ss.str(); +} + +int do_list(librbd::RBD &rbd, librados::IoCtx& io_ctx, bool long_flag, + bool all_flag, Formatter *f) { + std::vector<librbd::trash_image_info_t> trash_entries; + int r = rbd.trash_list(io_ctx, trash_entries); + if (r < 0) { + return r; + } + + if (!all_flag) { + trash_entries.erase(remove_if(trash_entries.begin(), + trash_entries.end(), + boost::bind(is_not_trash_user, _1)), + trash_entries.end()); + } + + if (!long_flag) { + if (f) { + f->open_array_section("trash"); + } + for (const auto& entry : trash_entries) { + if (f) { + f->open_object_section("image"); + f->dump_string("id", entry.id); + f->dump_string("name", entry.name); + f->close_section(); + } else { + std::cout << entry.id << " " << entry.name << std::endl; + } + } + if (f) { + f->close_section(); + f->flush(std::cout); + } + return 0; + } + + TextTable tbl; + + if (f) { + f->open_array_section("trash"); + } else { + tbl.define_column("ID", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("NAME", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("SOURCE", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("DELETED_AT", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("STATUS", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("PARENT", TextTable::LEFT, TextTable::LEFT); + } + + for (const auto& entry : trash_entries) { + librbd::Image im; + + r = rbd.open_by_id_read_only(io_ctx, im, entry.id.c_str(), NULL); + // image might disappear between rbd.list() and rbd.open(); ignore + // that, warn about other possible errors (EPERM, say, for opening + // an old-format image, because you need execute permission for the + // class method) + if (r < 0) { + if (r != -ENOENT) { + std::cerr << "rbd: error opening " << entry.id << ": " + << cpp_strerror(r) << std::endl; + } + // in any event, continue to next image + continue; + } + + std::string del_source; + switch (entry.source) { + case RBD_TRASH_IMAGE_SOURCE_USER: + del_source = "USER"; + break; + case RBD_TRASH_IMAGE_SOURCE_MIRRORING: + del_source = "MIRRORING"; + break; + case RBD_TRASH_IMAGE_SOURCE_MIGRATION: + del_source = "MIGRATION"; + break; + case RBD_TRASH_IMAGE_SOURCE_REMOVING: + del_source = "REMOVING"; + break; + case RBD_TRASH_IMAGE_SOURCE_USER_PARENT: + del_source = "USER_PARENT"; + break; + } + + std::string time_str = ctime(&entry.deletion_time); + time_str = time_str.substr(0, time_str.length() - 1); + + bool has_parent = false; + std::string parent; + librbd::linked_image_spec_t parent_image; + librbd::snap_spec_t parent_snap; + r = im.get_parent(&parent_image, &parent_snap); + if (r == -ENOENT) { + r = 0; + } else if (r < 0) { + return r; + } else { + parent = parent_image.pool_name + "/"; + if (!parent_image.pool_namespace.empty()) { + parent += parent_image.pool_namespace + "/"; + } + parent += parent_image.image_name + "@" + parent_snap.name; + has_parent = true; + } + + if (f) { + f->open_object_section("image"); + f->dump_string("id", entry.id); + f->dump_string("name", entry.name); + f->dump_string("source", del_source); + f->dump_string("deleted_at", time_str); + f->dump_string("status", + delete_status(entry.deferment_end_time)); + if (has_parent) { + f->open_object_section("parent"); + f->dump_string("pool", parent_image.pool_name); + f->dump_string("pool_namespace", parent_image.pool_namespace); + f->dump_string("image", parent_image.image_name); + f->dump_string("snapshot", parent_snap.name); + f->close_section(); + } + f->close_section(); + } else { + tbl << entry.id + << entry.name + << del_source + << time_str + << delete_status(entry.deferment_end_time); + if (has_parent) + tbl << parent; + tbl << TextTable::endrow; + } + } + + if (f) { + f->close_section(); + f->flush(std::cout); + } else if (!trash_entries.empty()) { + std::cout << tbl; + } + + return r < 0 ? r : 0; +} + +void get_list_arguments(po::options_description *positional, + po::options_description *options) { + at::add_pool_options(positional, options, true); + options->add_options() + ("all,a", po::bool_switch(), "list images from all sources"); + options->add_options() + ("long,l", po::bool_switch(), "long listing format"); + at::add_format_options(options); +} + +int execute_list(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string pool_name; + std::string namespace_name; + size_t arg_index = 0; + int r = utils::get_pool_and_namespace_names(vm, false, &pool_name, + &namespace_name, &arg_index); + if (r < 0) { + return r; + } + + at::Format::Formatter formatter; + r = utils::get_formatter(vm, &formatter); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + utils::disable_cache(); + + librbd::RBD rbd; + r = do_list(rbd, io_ctx, vm["long"].as<bool>(), vm["all"].as<bool>(), + formatter.get()); + if (r < 0) { + std::cerr << "rbd: trash list: " << cpp_strerror(r) << std::endl; + return r; + } + + return 0; +} + +void get_purge_arguments(po::options_description *positional, + po::options_description *options) { + at::add_pool_options(positional, options, true); + at::add_no_progress_option(options); + + options->add_options() + (EXPIRED_BEFORE.c_str(), po::value<std::string>()->value_name("date"), + "purges images that expired before the given date"); + options->add_options() + (THRESHOLD.c_str(), po::value<float>(), + "purges images until the current pool data usage is reduced to X%, " + "value range: 0.0-1.0"); +} + +int execute_purge(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string pool_name; + std::string namespace_name; + size_t arg_index = 0; + int r = utils::get_pool_and_namespace_names(vm, false, &pool_name, + &namespace_name, &arg_index); + if (r < 0) { + return r; + } + + utils::disable_cache(); + + librbd::RBD rbd; + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + io_ctx.set_pool_full_try(); + + float threshold = -1; + time_t expire_ts = 0; + + if (vm.find(THRESHOLD) != vm.end()) { + threshold = vm[THRESHOLD].as<float>(); + } else { + if (vm.find(EXPIRED_BEFORE) != vm.end()) { + utime_t new_time; + r = utime_t::invoke_date(vm[EXPIRED_BEFORE].as<std::string>(), &new_time); + if (r < 0) { + std::cerr << "rbd: error calling /bin/date: " << cpp_strerror(r) + << std::endl; + return r; + } + expire_ts = new_time.sec(); + } + } + + utils::ProgressContext pc("Removing images", vm[at::NO_PROGRESS].as<bool>()); + r = rbd.trash_purge_with_progress(io_ctx, expire_ts, threshold, pc); + if (r < 0) { + pc.fail(); + if (r == -ENOTEMPTY || r == -EBUSY || r == -EMLINK || r == -EUCLEAN) { + std::cerr << "rbd: some expired images could not be removed" + << std::endl + << "Ensure that they are closed/unmapped, do not have " + << "snapshots (including trashed snapshots with linked " + << "clones), are not in a group and were moved to the " + << "trash successfully." + << std::endl; + } + return r; + } + + pc.finish(); + return 0; +} + +void get_restore_arguments(po::options_description *positional, + po::options_description *options) { + positional->add_options() + (at::IMAGE_ID.c_str(), "image id\n(example: [<pool-name>/]<image-id>)"); + at::add_pool_option(options, at::ARGUMENT_MODIFIER_NONE); + at::add_namespace_option(options, at::ARGUMENT_MODIFIER_NONE); + at::add_image_id_option(options); + at::add_image_option(options, at::ARGUMENT_MODIFIER_NONE, ""); +} + +int execute_restore(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_id; + int r = utils::get_pool_image_id(vm, &arg_index, &pool_name, &namespace_name, + &image_id); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + std::string name; + if (vm.find(at::IMAGE_NAME) != vm.end()) { + name = vm[at::IMAGE_NAME].as<std::string>(); + } + + librbd::RBD rbd; + r = rbd.trash_restore(io_ctx, image_id.c_str(), name.c_str()); + if (r < 0) { + if (r == -ENOENT) { + std::cerr << "rbd: error: image does not exist in trash" + << std::endl; + } else if (r == -EEXIST) { + std::cerr << "rbd: error: an image with the same name already exists, " + << "try again with a different name" + << std::endl; + } else { + std::cerr << "rbd: restore error: " << cpp_strerror(r) << std::endl; + } + return r; + } + + return r; +} + +Shell::Action action_move( + {"trash", "move"}, {"trash", "mv"}, "Move an image to the trash.", "", + &get_move_arguments, &execute_move); + +Shell::Action action_remove( + {"trash", "remove"}, {"trash", "rm"}, "Remove an image from trash.", "", + &get_remove_arguments, &execute_remove); + +Shell::Action action_purge( + {"trash", "purge"}, {}, "Remove all expired images from trash.", "", + &get_purge_arguments, &execute_purge); + +Shell::Action action_list( + {"trash", "list"}, {"trash", "ls"}, "List trash images.", "", + &get_list_arguments, &execute_list); + +Shell::Action action_restore( + {"trash", "restore"}, {}, "Restore an image from trash.", "", + &get_restore_arguments, &execute_restore); + +} // namespace trash +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/TrashPurgeSchedule.cc b/src/tools/rbd/action/TrashPurgeSchedule.cc new file mode 100644 index 000000000..5c133c295 --- /dev/null +++ b/src/tools/rbd/action/TrashPurgeSchedule.cc @@ -0,0 +1,355 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Schedule.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "common/ceph_context.h" +#include "common/ceph_json.h" +#include "common/errno.h" +#include "common/Formatter.h" +#include "common/TextTable.h" +#include "global/global_context.h" +#include "include/stringify.h" + +#include <iostream> +#include <list> +#include <map> +#include <set> +#include <string> +#include <boost/program_options.hpp> + +#include "json_spirit/json_spirit.h" + +namespace rbd { +namespace action { +namespace trash_purge_schedule { + +namespace at = argument_types; +namespace po = boost::program_options; + +namespace { + +class ScheduleStatus { +public: + ScheduleStatus() { + } + + int parse(const std::string &status) { + json_spirit::mValue json_root; + if(!json_spirit::read(status, json_root)) { + std::cerr << "rbd: invalid schedule status JSON received" << std::endl; + return -EBADMSG; + } + + try { + auto &s = json_root.get_obj(); + + if (s["scheduled"].type() != json_spirit::array_type) { + std::cerr << "rbd: unexpected schedule JSON received: " + << "scheduled is not array" << std::endl; + return -EBADMSG; + } + + for (auto &item_val : s["scheduled"].get_array()) { + if (item_val.type() != json_spirit::obj_type) { + std::cerr << "rbd: unexpected schedule status JSON received: " + << "schedule item is not object" << std::endl; + return -EBADMSG; + } + + auto &item = item_val.get_obj(); + + if (item["pool_name"].type() != json_spirit::str_type) { + std::cerr << "rbd: unexpected schedule JSON received: " + << "pool_name is not string" << std::endl; + return -EBADMSG; + } + auto pool_name = item["pool_name"].get_str(); + + if (item["namespace"].type() != json_spirit::str_type) { + std::cerr << "rbd: unexpected schedule JSON received: " + << "namespace is not string" << std::endl; + return -EBADMSG; + } + auto namespace_name = item["namespace"].get_str(); + + if (item["schedule_time"].type() != json_spirit::str_type) { + std::cerr << "rbd: unexpected schedule JSON received: " + << "schedule_time is not string" << std::endl; + return -EBADMSG; + } + auto schedule_time = item["schedule_time"].get_str(); + + scheduled.insert({pool_name, namespace_name, schedule_time}); + } + + } catch (std::runtime_error &) { + std::cerr << "rbd: invalid schedule JSON received" << std::endl; + return -EBADMSG; + } + + return 0; + } + + void dump(Formatter *f) { + f->open_array_section("scheduled"); + for (auto &item : scheduled) { + f->open_object_section("item"); + f->dump_string("pool", item.pool_name); + f->dump_string("namespace", item.namespace_name); + f->dump_string("schedule_time", item.schedule_time); + f->close_section(); // item + } + f->close_section(); // scheduled + } + + friend std::ostream& operator<<(std::ostream& os, ScheduleStatus &d); + +private: + + struct Item { + std::string pool_name; + std::string namespace_name; + std::string schedule_time; + + Item(const std::string &pool_name, const std::string &namespace_name, + const std::string &schedule_time) + : pool_name(pool_name), namespace_name(namespace_name), + schedule_time(schedule_time) { + } + + bool operator<(const Item &rhs) const { + if (pool_name != rhs.pool_name) { + return pool_name < rhs.pool_name; + } + return namespace_name < rhs.namespace_name; + } + }; + + std::set<Item> scheduled; +}; + +std::ostream& operator<<(std::ostream& os, ScheduleStatus &s) { + TextTable tbl; + tbl.define_column("POOL", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("NAMESPACE", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("SCHEDULE TIME", TextTable::LEFT, TextTable::LEFT); + + for (auto &item : s.scheduled) { + tbl << item.pool_name << item.namespace_name << item.schedule_time + << TextTable::endrow; + } + + os << tbl; + return os; +} + +} // anonymous namespace + +void get_arguments_add(po::options_description *positional, + po::options_description *options) { + add_level_spec_options(options, false); + add_schedule_options(positional, true); +} + +int execute_add(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::map<std::string, std::string> args; + + int r = get_level_spec_args(vm, &args); + if (r < 0) { + return r; + } + r = get_schedule_args(vm, true, &args); + if (r < 0) { + return r; + } + + librados::Rados rados; + r = utils::init_rados(&rados); + if (r < 0) { + return r; + } + + normalize_level_spec_args(&args); + r = utils::mgr_command(rados, "rbd trash purge schedule add", args, + &std::cout, &std::cerr); + if (r < 0) { + return r; + } + + return 0; +} + +void get_arguments_remove(po::options_description *positional, + po::options_description *options) { + add_level_spec_options(options, false); + add_schedule_options(positional, false); +} + +int execute_remove(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::map<std::string, std::string> args; + + int r = get_level_spec_args(vm, &args); + if (r < 0) { + return r; + } + r = get_schedule_args(vm, false, &args); + if (r < 0) { + return r; + } + + librados::Rados rados; + r = utils::init_rados(&rados); + if (r < 0) { + return r; + } + + normalize_level_spec_args(&args); + r = utils::mgr_command(rados, "rbd trash purge schedule remove", args, + &std::cout, &std::cerr); + if (r < 0) { + return r; + } + + return 0; +} + +void get_arguments_list(po::options_description *positional, + po::options_description *options) { + add_level_spec_options(options, false); + options->add_options() + ("recursive,R", po::bool_switch(), "list all schedules"); + at::add_format_options(options); +} + +int execute_list(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::map<std::string, std::string> args; + + int r = get_level_spec_args(vm, &args); + if (r < 0) { + return r; + } + + at::Format::Formatter formatter; + r = utils::get_formatter(vm, &formatter); + if (r < 0) { + return r; + } + + librados::Rados rados; + r = utils::init_rados(&rados); + if (r < 0) { + return r; + } + + normalize_level_spec_args(&args); + std::stringstream out; + r = utils::mgr_command(rados, "rbd trash purge schedule list", args, &out, + &std::cerr); + if (r < 0) { + return r; + } + + ScheduleList schedule_list(false); + r = schedule_list.parse(out.str()); + if (r < 0) { + return r; + } + + if (vm["recursive"].as<bool>()) { + if (formatter.get()) { + schedule_list.dump(formatter.get()); + formatter->flush(std::cout); + } else { + std::cout << schedule_list; + } + } else { + auto schedule = schedule_list.find(args["level_spec"]); + if (schedule == nullptr) { + return -ENOENT; + } + + if (formatter.get()) { + schedule->dump(formatter.get()); + formatter->flush(std::cout); + } else { + std::cout << *schedule << std::endl; + } + } + + return 0; +} + +void get_arguments_status(po::options_description *positional, + po::options_description *options) { + add_level_spec_options(options, false); + at::add_format_options(options); +} + +int execute_status(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::map<std::string, std::string> args; + + int r = get_level_spec_args(vm, &args); + if (r < 0) { + return r; + } + + at::Format::Formatter formatter; + r = utils::get_formatter(vm, &formatter); + if (r < 0) { + return r; + } + + librados::Rados rados; + r = utils::init_rados(&rados); + if (r < 0) { + return r; + } + + normalize_level_spec_args(&args); + std::stringstream out; + r = utils::mgr_command(rados, "rbd trash purge schedule status", args, &out, + &std::cerr); + ScheduleStatus schedule_status; + r = schedule_status.parse(out.str()); + if (r < 0) { + return r; + } + + if (formatter.get()) { + schedule_status.dump(formatter.get()); + formatter->flush(std::cout); + } else { + std::cout << schedule_status; + } + + return 0; +} + +Shell::SwitchArguments switched_arguments({"recursive", "R"}); + +Shell::Action add_action( + {"trash", "purge", "schedule", "add"}, {}, "Add trash purge schedule.", "", + &get_arguments_add, &execute_add); +Shell::Action remove_action( + {"trash", "purge", "schedule", "remove"}, + {"trash", "purge", "schedule", "rm"}, "Remove trash purge schedule.", + "", &get_arguments_remove, &execute_remove); +Shell::Action list_action( + {"trash", "purge", "schedule", "list"}, + {"trash", "purge", "schedule", "ls"}, "List trash purge schedule.", + "", &get_arguments_list, &execute_list); +Shell::Action status_action( + {"trash", "purge", "schedule", "status"}, {}, + "Show trash purge schedule status.", "", &get_arguments_status, + &execute_status); + +} // namespace trash_purge_schedule +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Watch.cc b/src/tools/rbd/action/Watch.cc new file mode 100644 index 000000000..98697bc28 --- /dev/null +++ b/src/tools/rbd/action/Watch.cc @@ -0,0 +1,149 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "include/rbd_types.h" +#include "librbd/WatchNotifyTypes.h" +#include "common/errno.h" +#include <iostream> +#include <boost/program_options.hpp> + +namespace rbd { +namespace action { +namespace watch { + +namespace at = argument_types; +namespace po = boost::program_options; + +class RbdWatchCtx : public librados::WatchCtx2 { +public: + RbdWatchCtx(librados::IoCtx& io_ctx, const char *image_name, + const std::string &header_oid) + : m_io_ctx(io_ctx), m_image_name(image_name), m_header_oid(header_oid) + { + } + + ~RbdWatchCtx() override {} + + void handle_notify(uint64_t notify_id, + uint64_t cookie, + uint64_t notifier_id, + bufferlist& bl) override { + using namespace librbd::watch_notify; + NotifyMessage notify_message; + if (bl.length() == 0) { + notify_message = NotifyMessage(new HeaderUpdatePayload()); + } else { + try { + auto iter = bl.cbegin(); + notify_message.decode(iter); + } catch (const buffer::error &err) { + std::cerr << "rbd: failed to decode image notification" << std::endl; + } + } + + std::cout << m_image_name << " received notification: notify_id=" + << notify_id << ", cookie=" << cookie << ", notifier_id=" + << notifier_id << ", bl.length=" << bl.length() << ", notify_op=" + << notify_message.get_notify_op() << std::endl; + bufferlist reply; + m_io_ctx.notify_ack(m_header_oid, notify_id, cookie, reply); + } + + void handle_error(uint64_t cookie, int err) override { + std::cerr << m_image_name << " received error: cookie=" << cookie << ", " + << "err=" << cpp_strerror(err) << std::endl; + } +private: + librados::IoCtx m_io_ctx; + const char *m_image_name; + std::string m_header_oid; +}; + +static int do_watch(librados::IoCtx& pp, librbd::Image &image, + const char *imgname) +{ + uint8_t old_format; + int r = image.old_format(&old_format); + if (r < 0) { + std::cerr << "failed to query format" << std::endl; + return r; + } + + std::string header_oid; + if (old_format != 0) { + header_oid = std::string(imgname) + RBD_SUFFIX; + } else { + std::string id; + r = image.get_id(&id); + if (r < 0) { + return r; + } + + header_oid = RBD_HEADER_PREFIX + id; + } + + uint64_t cookie; + RbdWatchCtx ctx(pp, imgname, header_oid); + r = pp.watch2(header_oid, &cookie, &ctx); + if (r < 0) { + std::cerr << "rbd: watch failed" << std::endl; + return r; + } + + std::cout << "press enter to exit..." << std::endl; + getchar(); + + r = pp.unwatch2(cookie); + if (r < 0) { + std::cerr << "rbd: unwatch failed" << std::endl; + return r; + } + return 0; +} + +void get_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); +} + +int execute(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "", + true, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + r = do_watch(io_ctx, image, image_name.c_str()); + if (r < 0) { + std::cerr << "rbd: watch failed: " << cpp_strerror(r) << std::endl; + return r; + } + return 0; +} + +Shell::Action action( + {"watch"}, {}, "Watch events on image.", "", &get_arguments, &execute); + +} // namespace watch +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Wnbd.cc b/src/tools/rbd/action/Wnbd.cc new file mode 100644 index 000000000..85d2c7057 --- /dev/null +++ b/src/tools/rbd/action/Wnbd.cc @@ -0,0 +1,172 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "include/stringify.h" +#include "common/SubProcess.h" +#include <iostream> +#include <boost/algorithm/string.hpp> +#include <boost/algorithm/string/predicate.hpp> +#include <boost/program_options.hpp> + +namespace rbd { +namespace action { +namespace wnbd { + +namespace at = argument_types; +namespace po = boost::program_options; + +#if defined(_WIN32) +static int call_wnbd_cmd(const po::variables_map &vm, + const std::vector<std::string> &args, + const std::vector<std::string> &ceph_global_init_args) { + char exe_path[PATH_MAX]; + ssize_t exe_path_bytes = get_self_exe_path(exe_path, PATH_MAX); + + if (exe_path_bytes > 4) { + // Drop .exe suffix as we're going to add the "-wnbd" suffix. + exe_path[strlen(exe_path) - 4] = '\0'; + exe_path_bytes -= 4; + } + + if (exe_path_bytes < 0) { + strcpy(exe_path, "rbd-wnbd"); + } else { + if (snprintf(exe_path + exe_path_bytes, + sizeof(exe_path) - exe_path_bytes, + "-wnbd") < 0) { + return -EOVERFLOW; + } + } + + SubProcess process(exe_path, SubProcess::KEEP, SubProcess::KEEP, SubProcess::KEEP); + + for (auto &arg : ceph_global_init_args) { + process.add_cmd_arg(arg.c_str()); + } + + for (auto &arg : args) { + process.add_cmd_arg(arg.c_str()); + } + + if (process.spawn()) { + std::cerr << "rbd: failed to run rbd-wnbd: " << process.err() << std::endl; + return -EINVAL; + } + int exit_code = process.join(); + if (exit_code) { + std::cerr << "rbd: rbd-wnbd failed with error: " << process.err() << std::endl; + return exit_code; + } + + return 0; +} +#endif + +int execute_list(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { +#if !defined(_WIN32) + std::cerr << "rbd: wnbd is only supported on Windows" << std::endl; + return -EOPNOTSUPP; +#else + std::vector<std::string> args; + + args.push_back("list"); + + if (vm.count("format")) { + args.push_back("--format"); + args.push_back(vm["format"].as<at::Format>().value); + } + if (vm["pretty-format"].as<bool>()) { + args.push_back("--pretty-format"); + } + + return call_wnbd_cmd(vm, args, ceph_global_init_args); +#endif +} + +int execute_map(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { +#if !defined(_WIN32) + std::cerr << "rbd: wnbd is only supported on Windows" << std::endl; + return -EOPNOTSUPP; +#else + std::vector<std::string> args; + + args.push_back("map"); + std::string img; + int r = utils::get_image_or_snap_spec(vm, &img); + if (r < 0) { + return r; + } + args.push_back(img); + + if (vm["read-only"].as<bool>()) { + args.push_back("--read-only"); + } + + if (vm["exclusive"].as<bool>()) { + args.push_back("--exclusive"); + } + + if (vm.count("options")) { + utils::append_options_as_args(vm["options"].as<std::vector<std::string>>(), + &args); + } + + return call_wnbd_cmd(vm, args, ceph_global_init_args); +#endif +} + +int execute_unmap(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { +#if !defined(_WIN32) + std::cerr << "rbd: wnbd is only supported on Windows" << std::endl; + return -EOPNOTSUPP; +#else + std::string image_name; + + int r = utils::get_image_or_snap_spec(vm, &image_name); + if (r < 0) { + return r; + } + + std::vector<std::string> args; + + args.push_back("unmap"); + args.push_back(image_name); + + if (vm.count("options")) { + utils::append_options_as_args(vm["options"].as<std::vector<std::string>>(), + &args); + } + + return call_wnbd_cmd(vm, args, ceph_global_init_args); +#endif +} + +int execute_attach(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { +#if !defined(_WIN32) + std::cerr << "rbd: wnbd is only supported on Windows" << std::endl; +#else + std::cerr << "rbd: wnbd attach command not supported" << std::endl; +#endif + return -EOPNOTSUPP; +} + +int execute_detach(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { +#if !defined(_WIN32) + std::cerr << "rbd: wnbd is only supported on Windows" << std::endl; +#else + std::cerr << "rbd: wnbd detach command not supported" << std::endl; +#endif + return -EOPNOTSUPP; +} + +} // namespace wnbd +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/rbd.cc b/src/tools/rbd/rbd.cc new file mode 100644 index 000000000..a8c59d575 --- /dev/null +++ b/src/tools/rbd/rbd.cc @@ -0,0 +1,10 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/Shell.h" + +int main(int argc, const char **argv) +{ + rbd::Shell shell; + return shell.execute(argc, argv); +} diff --git a/src/tools/rbd_ggate/CMakeLists.txt b/src/tools/rbd_ggate/CMakeLists.txt new file mode 100644 index 000000000..5c5572c48 --- /dev/null +++ b/src/tools/rbd_ggate/CMakeLists.txt @@ -0,0 +1,9 @@ +add_executable(rbd-ggate + Driver.cc + Server.cc + Watcher.cc + debug.cc + ggate_drv.c + main.cc) +target_link_libraries(rbd-ggate geom librbd librados global) +install(TARGETS rbd-ggate DESTINATION bin) diff --git a/src/tools/rbd_ggate/Driver.cc b/src/tools/rbd_ggate/Driver.cc new file mode 100644 index 000000000..80acfe00c --- /dev/null +++ b/src/tools/rbd_ggate/Driver.cc @@ -0,0 +1,165 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include <stdlib.h> + +#include "common/debug.h" +#include "common/errno.h" +#include "Driver.h" +#include "Request.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "rbd::ggate::Driver: " << this \ + << " " << __func__ << ": " + +namespace rbd { +namespace ggate { + +int Driver::load() { + + return ggate_drv_load(); +} + +int Driver::kill(const std::string &devname) { + + int r = ggate_drv_kill(devname.c_str()); + + return r; +} + +int Driver::list(std::map<std::string, DevInfo> *devices) { + size_t size = 1024; + ggate_drv_info *devs = nullptr; + int r; + + while (size <= 1024 * 1024) { + devs = static_cast<ggate_drv_info *>( + realloc(static_cast<void *>(devs), size * sizeof(*devs))); + r = ggate_drv_list(devs, &size); + if (r != -ERANGE) { + break; + } + } + if (r < 0) { + goto free; + } + + devices->clear(); + for (size_t i = 0; i < size; i++) { + auto &dev = devs[i]; + (*devices)[dev.id] = {dev.name, dev.info}; + } + +free: + free(devs); + + return r; +} + +Driver::Driver(const std::string &devname, size_t sectorsize, size_t mediasize, + bool readonly, const std::string &info) + : m_devname(devname), m_sectorsize(sectorsize), m_mediasize(mediasize), + m_readonly(readonly), m_info(info) { +} + +int Driver::init() { + dout(20) << dendl; + + char name[PATH_MAX]; + size_t namelen; + + if (m_devname.empty()) { + name[0] = '\0'; + namelen = PATH_MAX; + } else { + namelen = m_devname.size(); + if (namelen >= PATH_MAX) { + return -ENAMETOOLONG; + } + strncpy(name, m_devname.c_str(), namelen + 1); + } + + int r = ggate_drv_create(name, namelen, m_sectorsize, m_mediasize, m_readonly, + m_info.c_str(), &m_drv); + if (r < 0) { + return r; + } + + if (m_devname.empty()) { + m_devname = name; + } + + return 0; +} + +std::string Driver::get_devname() const { + dout(30) << m_devname << dendl; + + return m_devname; +} + +void Driver::shut_down() { + dout(20) << dendl; + + ggate_drv_destroy(m_drv); +} + +int Driver::resize(size_t newsize) { + dout(20) << "newsize=" << newsize << dendl; + + int r = ggate_drv_resize(m_drv, newsize); + if (r < 0) { + return r; + } + + m_mediasize = newsize; + return 0; +} + +int Driver::recv(Request **req) { + dout(20) << dendl; + + ggate_drv_req_t req_; + + int r = ggate_drv_recv(m_drv, &req_); + if (r < 0) { + return r; + } + + *req = new Request(req_); + + dout(20) << "req=" << *req << dendl; + + if (ggate_drv_req_cmd(req_) == GGATE_DRV_CMD_WRITE) { + bufferptr ptr(buffer::claim_malloc( + ggate_drv_req_length(req_), + static_cast<char *>(ggate_drv_req_release_buf(req_)))); + (*req)->bl.push_back(ptr); + } + + return 0; +} + +int Driver::send(Request *req) { + dout(20) << "req=" << req << dendl; + + if (ggate_drv_req_cmd(req->req) == GGATE_DRV_CMD_READ && + ggate_drv_req_error(req->req) == 0) { + ceph_assert(req->bl.length() == ggate_drv_req_length(req->req)); + // TODO: avoid copying? + req->bl.begin().copy(ggate_drv_req_length(req->req), + static_cast<char *>(ggate_drv_req_buf(req->req))); + dout(20) << "copied resulting " << req->bl.length() << " bytes to " + << ggate_drv_req_buf(req->req) << dendl; + } + + int r = ggate_drv_send(m_drv, req->req); + + delete req; + return r; +} + +} // namespace ggate +} // namespace rbd diff --git a/src/tools/rbd_ggate/Driver.h b/src/tools/rbd_ggate/Driver.h new file mode 100644 index 000000000..50be72b9c --- /dev/null +++ b/src/tools/rbd_ggate/Driver.h @@ -0,0 +1,50 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_GGATE_DRIVER_H +#define CEPH_RBD_GGATE_DRIVER_H + +#include <map> +#include <string> + +#include "ggate_drv.h" + +namespace rbd { +namespace ggate { + +struct Request; + +class Driver { +public: + typedef std::pair<std::string, std::string> DevInfo; + static int load(); + static int kill(const std::string &devname); + static int list(std::map<std::string, DevInfo> *devices); + + Driver(const std::string &devname, size_t sectorsize, size_t mediasize, + bool readonly, const std::string &info); + + int init(); + void shut_down(); + + std::string get_devname() const; + + int recv(Request **req); + int send(Request *req); + + int resize(size_t newsize); + +private: + std::string m_devname; + size_t m_sectorsize; + size_t m_mediasize; + bool m_readonly; + std::string m_info; + ggate_drv_t m_drv = 0; +}; + +} // namespace ggate +} // namespace rbd + +#endif // CEPH_RBD_GGATE_DRIVER_H + diff --git a/src/tools/rbd_ggate/Request.h b/src/tools/rbd_ggate/Request.h new file mode 100644 index 000000000..66f219858 --- /dev/null +++ b/src/tools/rbd_ggate/Request.h @@ -0,0 +1,55 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_GGATE_REQUEST_H +#define CEPH_RBD_GGATE_REQUEST_H + +#include "ggate_drv.h" + +namespace rbd { +namespace ggate { + +struct Request { + enum Command { + Unknown = 0, + Write = 1, + Read = 2, + Flush = 3, + Discard = 4, + }; + + ggate_drv_req_t req; + bufferlist bl; + + Request(ggate_drv_req_t req) : req(req) { + } + + uint64_t get_id() { + return ggate_drv_req_id(req); + } + + Command get_cmd() { + return static_cast<Command>(ggate_drv_req_cmd(req)); + } + + size_t get_length() { + return ggate_drv_req_length(req); + } + + uint64_t get_offset() { + return ggate_drv_req_offset(req); + } + + uint64_t get_error() { + return ggate_drv_req_error(req); + } + + void set_error(int error) { + ggate_drv_req_set_error(req, error); + } +}; + +} // namespace ggate +} // namespace rbd + +#endif // CEPH_RBD_GGATE_REQUEST_H diff --git a/src/tools/rbd_ggate/Server.cc b/src/tools/rbd_ggate/Server.cc new file mode 100644 index 000000000..2565ba10f --- /dev/null +++ b/src/tools/rbd_ggate/Server.cc @@ -0,0 +1,262 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "common/debug.h" +#include "common/errno.h" +#include "Driver.h" +#include "Server.h" +#include "Request.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "rbd::ggate::Server: " << this \ + << " " << __func__ << ": " + +namespace rbd { +namespace ggate { + +Server::Server(Driver *drv, librbd::Image& image) + : m_drv(drv), m_image(image), + m_reader_thread(this, &Server::reader_entry), + m_writer_thread(this, &Server::writer_entry) { +} + +void Server::run() { + dout(10) << dendl; + + int r = start(); + ceph_assert(r == 0); + + dout(20) << "entering run loop" << dendl; + + { + std::unique_lock locker{m_lock}; + m_cond.wait(locker, [this] { return m_stopping;}); + } + + dout(20) << "exiting run loop" << dendl; + + stop(); +} + +int Server::start() { + dout(10) << dendl; + + m_reader_thread.create("rbd_reader"); + m_writer_thread.create("rbd_writer"); + return 0; +} + +void Server::stop() { + dout(10) << dendl; + + { + std::lock_guard locker{m_lock}; + ceph_assert(m_stopping); + } + + m_reader_thread.join(); + m_writer_thread.join(); + + wait_clean(); +} + +void Server::io_start(IOContext *ctx) { + dout(20) << ctx << dendl; + + std::lock_guard locker{m_lock}; + m_io_pending.push_back(&ctx->item); +} + +void Server::io_finish(IOContext *ctx) { + dout(20) << ctx << dendl; + + std::lock_guard locker{m_lock}; + ceph_assert(ctx->item.is_on_list()); + + ctx->item.remove_myself(); + m_io_finished.push_back(&ctx->item); + m_cond.notify_all(); +} + +Server::IOContext *Server::wait_io_finish() { + dout(20) << dendl; + + std::unique_lock locker{m_lock}; + m_cond.wait(locker, [this] { return !m_io_finished.empty() || m_stopping;}); + + if (m_io_finished.empty()) { + return nullptr; + } + + IOContext *ret = m_io_finished.front(); + m_io_finished.pop_front(); + + return ret; +} + +void Server::wait_clean() { + dout(20) << dendl; + + ceph_assert(!m_reader_thread.is_started()); + + std::unique_lock locker{m_lock}; + m_cond.wait(locker, [this] { return m_io_pending.empty();}); + + while (!m_io_finished.empty()) { + std::unique_ptr<IOContext> free_ctx(m_io_finished.front()); + m_io_finished.pop_front(); + } +} + +void Server::aio_callback(librbd::completion_t cb, void *arg) { + librbd::RBD::AioCompletion *aio_completion = + reinterpret_cast<librbd::RBD::AioCompletion*>(cb); + + IOContext *ctx = reinterpret_cast<IOContext *>(arg); + int r = aio_completion->get_return_value(); + + ctx->server->handle_aio(ctx, r); + aio_completion->release(); +} + +void Server::handle_aio(IOContext *ctx, int r) { + dout(20) << ctx << ": r=" << r << dendl; + + if (r == -EINVAL) { + // if shrinking an image, a pagecache writeback might reference + // extents outside of the range of the new image extents + dout(5) << "masking IO out-of-bounds error" << dendl; + ctx->req->bl.clear(); + r = 0; + } + + if (r < 0) { + ctx->req->set_error(-r); + } else if ((ctx->req->get_cmd() == Request::Read) && + r != static_cast<int>(ctx->req->get_length())) { + int pad_byte_count = static_cast<int> (ctx->req->get_length()) - r; + ctx->req->bl.append_zero(pad_byte_count); + dout(20) << ctx << ": pad byte count: " << pad_byte_count << dendl; + ctx->req->set_error(0); + } else { + ctx->req->set_error(0); + } + io_finish(ctx); +} + +void Server::reader_entry() { + dout(20) << dendl; + + while (!m_stopping) { + std::unique_ptr<IOContext> ctx(new IOContext(this)); + + dout(20) << "waiting for ggate request" << dendl; + + int r = m_drv->recv(&ctx->req); + if (r < 0) { + if (r != -ECANCELED) { + derr << "recv: " << cpp_strerror(r) << dendl; + } + std::lock_guard locker{m_lock}; + m_stopping = true; + m_cond.notify_all(); + return; + } + + IOContext *pctx = ctx.release(); + + dout(20) << pctx << ": start: " << *pctx << dendl; + + io_start(pctx); + librbd::RBD::AioCompletion *c = + new librbd::RBD::AioCompletion(pctx, aio_callback); + switch (pctx->req->get_cmd()) + { + case rbd::ggate::Request::Write: + m_image.aio_write(pctx->req->get_offset(), pctx->req->get_length(), + pctx->req->bl, c); + break; + case rbd::ggate::Request::Read: + m_image.aio_read(pctx->req->get_offset(), pctx->req->get_length(), + pctx->req->bl, c); + break; + case rbd::ggate::Request::Flush: + m_image.aio_flush(c); + break; + case rbd::ggate::Request::Discard: + m_image.aio_discard(pctx->req->get_offset(), pctx->req->get_length(), c); + break; + default: + derr << pctx << ": invalid request command: " << pctx->req->get_cmd() + << dendl; + c->release(); + std::lock_guard locker{m_lock}; + m_stopping = true; + m_cond.notify_all(); + return; + } + } + dout(20) << "terminated" << dendl; +} + +void Server::writer_entry() { + dout(20) << dendl; + + while (!m_stopping) { + dout(20) << "waiting for io request" << dendl; + + std::unique_ptr<IOContext> ctx(wait_io_finish()); + if (!ctx) { + dout(20) << "no io requests, terminating" << dendl; + return; + } + + dout(20) << ctx.get() << ": got: " << *ctx << dendl; + + int r = m_drv->send(ctx->req); + if (r < 0) { + derr << ctx.get() << ": send: " << cpp_strerror(r) << dendl; + std::lock_guard locker{m_lock}; + m_stopping = true; + m_cond.notify_all(); + return; + } + dout(20) << ctx.get() << " finish" << dendl; + } + dout(20) << "terminated" << dendl; +} + +std::ostream &operator<<(std::ostream &os, const Server::IOContext &ctx) { + + os << "[" << ctx.req->get_id(); + + switch (ctx.req->get_cmd()) + { + case rbd::ggate::Request::Write: + os << " Write "; + break; + case rbd::ggate::Request::Read: + os << " Read "; + break; + case rbd::ggate::Request::Flush: + os << " Flush "; + break; + case rbd::ggate::Request::Discard: + os << " Discard "; + break; + default: + os << " Unknow(" << ctx.req->get_cmd() << ") "; + break; + } + + os << ctx.req->get_offset() << "~" << ctx.req->get_length() << " " + << ctx.req->get_error() << "]"; + + return os; +} + +} // namespace ggate +} // namespace rbd + diff --git a/src/tools/rbd_ggate/Server.h b/src/tools/rbd_ggate/Server.h new file mode 100644 index 000000000..bb31b89f7 --- /dev/null +++ b/src/tools/rbd_ggate/Server.h @@ -0,0 +1,88 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_GGATE_SERVER_H +#define CEPH_RBD_GGATE_SERVER_H + +#include "include/rbd/librbd.hpp" +#include "include/xlist.h" +#include "common/ceph_mutex.h" +#include "common/Thread.h" + +namespace rbd { +namespace ggate { + +class Driver; +struct Request; + +class Server { +public: + Server(Driver *drv, librbd::Image& image); + + void run(); + +private: + struct IOContext { + xlist<IOContext*>::item item; + Server *server; + Request *req = nullptr; + + IOContext(Server *server) : item(this), server(server) { + } + }; + + class ThreadHelper : public Thread { + public: + typedef void (Server::*entry_func)(); + + ThreadHelper(Server *server, entry_func func) + : server(server), func(func) { + } + + protected: + virtual void* entry() { + (server->*func)(); + return nullptr; + } + + private: + Server *server; + entry_func func; + }; + + friend std::ostream &operator<<(std::ostream &os, const IOContext &ctx); + + Driver *m_drv; + librbd::Image &m_image; + + mutable ceph::mutex m_lock = + ceph::make_mutex("rbd::ggate::Server::m_lock"); + ceph::condition_variable m_cond; + bool m_stopping = false; + ThreadHelper m_reader_thread, m_writer_thread; + xlist<IOContext*> m_io_pending; + xlist<IOContext*> m_io_finished; + + static void aio_callback(librbd::completion_t cb, void *arg); + + int start(); + void stop(); + + void reader_entry(); + void writer_entry(); + + void io_start(IOContext *ctx); + void io_finish(IOContext *ctx); + + IOContext *wait_io_finish(); + void wait_clean(); + + void handle_aio(IOContext *ctx, int r); +}; + +std::ostream &operator<<(std::ostream &os, const Server::IOContext &ctx); + +} // namespace ggate +} // namespace rbd + +#endif // CEPH_RBD_GGATE_SERVER_H diff --git a/src/tools/rbd_ggate/Watcher.cc b/src/tools/rbd_ggate/Watcher.cc new file mode 100644 index 000000000..57b3f960e --- /dev/null +++ b/src/tools/rbd_ggate/Watcher.cc @@ -0,0 +1,48 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "common/debug.h" +#include "common/errno.h" +#include "Driver.h" +#include "Watcher.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "rbd::ggate::Watcher: " << this \ + << " " << __func__ << ": " + +namespace rbd { +namespace ggate { + +Watcher::Watcher(Driver *drv, librados::IoCtx &ioctx, librbd::Image &image, + size_t size) + : m_drv(drv), m_ioctx(ioctx), m_image(image), m_size(size) { +} + +void Watcher::handle_notify() { + dout(20) << dendl; + + librbd::image_info_t info; + + if (m_image.stat(info, sizeof(info)) == 0) { + size_t new_size = info.size; + + if (new_size != m_size) { + int r = m_drv->resize(new_size); + if (r < 0) { + derr << "resize failed: " << cpp_strerror(r) << dendl; + m_drv->shut_down(); + } + r = m_image.invalidate_cache(); + if (r < 0) { + derr << "invalidate rbd cache failed: " << cpp_strerror(r) << dendl; + m_drv->shut_down(); + } + m_size = new_size; + } + } +} + +} // namespace ggate +} // namespace rbd diff --git a/src/tools/rbd_ggate/Watcher.h b/src/tools/rbd_ggate/Watcher.h new file mode 100644 index 000000000..8f524b43f --- /dev/null +++ b/src/tools/rbd_ggate/Watcher.h @@ -0,0 +1,34 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_GGATE_WATCHER_H +#define CEPH_RBD_GGATE_WATCHER_H + +#include "include/rbd/librbd.hpp" + +namespace rbd { +namespace ggate { + +class Driver; + +class Watcher : public librbd::UpdateWatchCtx +{ +public: + Watcher(Driver *m_drv, librados::IoCtx &ioctx, librbd::Image &image, + size_t size); + + void handle_notify() override; + +private: + Driver *m_drv; + librados::IoCtx &m_ioctx; + librbd::Image &m_image; + size_t m_size; +}; + + +} // namespace ggate +} // namespace rbd + +#endif // CEPH_RBD_GGATE_WATCHER_H + diff --git a/src/tools/rbd_ggate/debug.cc b/src/tools/rbd_ggate/debug.cc new file mode 100644 index 000000000..b675ba5b3 --- /dev/null +++ b/src/tools/rbd_ggate/debug.cc @@ -0,0 +1,55 @@ +#include "common/debug.h" +#include "common/errno.h" +#include "debug.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "rbd::ggate: " + +extern "C" void debugv(int level, const char *fmt, va_list ap) { + char *msg; + int saved_errno = errno; + + if (g_ceph_context == nullptr) { + return; + } + + vasprintf(&msg, fmt, ap); + + dout(ceph::dout::need_dynamic(level)) << msg << dendl; + + free(msg); + errno = saved_errno; +} + +extern "C" void debug(int level, const char *fmt, ...) { + va_list ap; + + va_start(ap, fmt); + debugv(level, fmt, ap); + va_end(ap); +} + +extern "C" void errx(const char *fmt, ...) { + va_list ap; + + va_start(ap, fmt); + debugv(-1, fmt, ap); + va_end(ap); +} + +extern "C" void err(const char *fmt, ...) { + va_list ap; + char *msg; + int saved_errno = errno; + + va_start(ap, fmt); + vasprintf(&msg, fmt, ap); + va_end(ap); + errno = saved_errno; + + errx("%s: %s", msg, cpp_strerror(errno).c_str()); + + free(msg); +} diff --git a/src/tools/rbd_ggate/debug.h b/src/tools/rbd_ggate/debug.h new file mode 100644 index 000000000..da9b46a38 --- /dev/null +++ b/src/tools/rbd_ggate/debug.h @@ -0,0 +1,17 @@ +#ifndef CEPH_RBD_GGATE_DEBUG_H +#define CEPH_RBD_GGATE_DEBUG_H + +#ifdef __cplusplus +extern "C" { +#endif + +void debug(int level, const char *fmt, ...) __printflike(2, 3); +void debugv(int level, const char *fmt, va_list ap) __printflike(2, 0); +void err(const char *fmt, ...) __printflike(1, 2); +void errx(const char *fmt, ...) __printflike(1, 2); + +#ifdef __cplusplus +} +#endif + +#endif // CEPH_RBD_GGATE_DEBUG_H diff --git a/src/tools/rbd_ggate/ggate_drv.c b/src/tools/rbd_ggate/ggate_drv.c new file mode 100644 index 000000000..11f6cf0a4 --- /dev/null +++ b/src/tools/rbd_ggate/ggate_drv.c @@ -0,0 +1,379 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include <sys/param.h> +#include <sys/bio.h> +#include <sys/disk.h> +#include <sys/linker.h> +#include <sys/queue.h> +#include <sys/stat.h> + +#include <geom/gate/g_gate.h> + +#include <errno.h> +#include <fcntl.h> +#include <stdarg.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <libgeom.h> + +#include "debug.h" +#include "ggate_drv.h" + +uint64_t ggate_drv_req_id(ggate_drv_req_t req) { + struct g_gate_ctl_io *ggio = (struct g_gate_ctl_io *)req; + + return ggio->gctl_seq; +} + +int ggate_drv_req_cmd(ggate_drv_req_t req) { + struct g_gate_ctl_io *ggio = (struct g_gate_ctl_io *)req; + + switch (ggio->gctl_cmd) { + case BIO_WRITE: + return GGATE_DRV_CMD_WRITE; + case BIO_READ: + return GGATE_DRV_CMD_READ; + case BIO_FLUSH: + return GGATE_DRV_CMD_FLUSH; + case BIO_DELETE: + return GGATE_DRV_CMD_DISCARD; + default: + return GGATE_DRV_CMD_UNKNOWN; + } +} + +uint64_t ggate_drv_req_offset(ggate_drv_req_t req) { + struct g_gate_ctl_io *ggio = (struct g_gate_ctl_io *)req; + + return ggio->gctl_offset; +} + +size_t ggate_drv_req_length(ggate_drv_req_t req) { + struct g_gate_ctl_io *ggio = (struct g_gate_ctl_io *)req; + + return ggio->gctl_length; +} + +void *ggate_drv_req_buf(ggate_drv_req_t req) { + struct g_gate_ctl_io *ggio = (struct g_gate_ctl_io *)req; + + return ggio->gctl_data; +} + +int ggate_drv_req_error(ggate_drv_req_t req) { + struct g_gate_ctl_io *ggio = (struct g_gate_ctl_io *)req; + + return ggio->gctl_error; +} + +void ggate_drv_req_set_error(ggate_drv_req_t req, int error) { + struct g_gate_ctl_io *ggio = (struct g_gate_ctl_io *)req; + + ggio->gctl_error = error; +} + +void *ggate_drv_req_release_buf(ggate_drv_req_t req) { + struct g_gate_ctl_io *ggio = (struct g_gate_ctl_io *)req; + + void *data = ggio->gctl_data; + ggio->gctl_data = NULL; + + return data; +} + +struct ggate_drv { + int fd; + int unit; +}; + +int ggate_drv_load() { + if (modfind("g_gate") != -1) { + /* Present in kernel. */ + return 0; + } + + if (kldload("geom_gate") == -1 || modfind("g_gate") == -1) { + if (errno != EEXIST) { + err("failed to load geom_gate module"); + return -errno; + } + } + return 0; +} + +int ggate_drv_create(char *name, size_t namelen, size_t sectorsize, + size_t mediasize, bool readonly, const char *info, ggate_drv_t *drv_) { + struct ggate_drv *drv; + struct g_gate_ctl_create ggiocreate; + + debug(20, "%s: name=%s, sectorsize=%zd, mediasize=%zd, readonly=%d, info=%s", + __func__, name, sectorsize, mediasize, (int)readonly, info); + + if (*name != '\0') { + if (namelen > sizeof(ggiocreate.gctl_name) - 1) { + return -ENAMETOOLONG; + } + } + + /* + * We communicate with ggate via /dev/ggctl. Open it. + */ + int fd = open("/dev/" G_GATE_CTL_NAME, O_RDWR); + if (fd == -1) { + err("failed to open /dev/" G_GATE_CTL_NAME); + return -errno; + } + + drv = calloc(1, sizeof(*drv)); + if (drv == NULL) { + errno = -ENOMEM; + goto fail_close; + } + + /* + * Create provider. + */ + memset(&ggiocreate, 0, sizeof(ggiocreate)); + ggiocreate.gctl_version = G_GATE_VERSION; + ggiocreate.gctl_mediasize = mediasize; + ggiocreate.gctl_sectorsize = sectorsize; + ggiocreate.gctl_flags = readonly ? G_GATE_FLAG_READONLY : 0; + ggiocreate.gctl_maxcount = 0; + ggiocreate.gctl_timeout = 0; + if (*name != '\0') { + ggiocreate.gctl_unit = G_GATE_NAME_GIVEN; + strlcpy(ggiocreate.gctl_name, name, sizeof(ggiocreate.gctl_name)); + } else { + ggiocreate.gctl_unit = G_GATE_UNIT_AUTO; + } + strlcpy(ggiocreate.gctl_info, info, sizeof(ggiocreate.gctl_info)); + if (ioctl(fd, G_GATE_CMD_CREATE, &ggiocreate) == -1) { + err("failed to create " G_GATE_PROVIDER_NAME " device"); + goto fail; + } + + debug(20, "%s: created, unit: %d, name: %s", __func__, ggiocreate.gctl_unit, + ggiocreate.gctl_name); + + drv->fd = fd; + drv->unit = ggiocreate.gctl_unit; + *drv_ = drv; + + if (*name == '\0') { + snprintf(name, namelen, "%s%d", G_GATE_PROVIDER_NAME, drv->unit); + } + + return 0; + +fail: + free(drv); +fail_close: + close(fd); + return -errno; +} + +void ggate_drv_destroy(ggate_drv_t drv_) { + struct ggate_drv *drv = (struct ggate_drv *)drv_; + struct g_gate_ctl_destroy ggiodestroy; + + debug(20, "%s %p", __func__, drv); + + memset(&ggiodestroy, 0, sizeof(ggiodestroy)); + ggiodestroy.gctl_version = G_GATE_VERSION; + ggiodestroy.gctl_unit = drv->unit; + ggiodestroy.gctl_force = 1; + + // Remember errno. + int rerrno = errno; + + int r = ioctl(drv->fd, G_GATE_CMD_DESTROY, &ggiodestroy); + if (r == -1) { + err("failed to destroy /dev/%s%d device", G_GATE_PROVIDER_NAME, + drv->unit); + } + // Restore errno. + errno = rerrno; + + free(drv); +} + +int ggate_drv_resize(ggate_drv_t drv_, size_t newsize) { + struct ggate_drv *drv = (struct ggate_drv *)drv_; + + debug(20, "%s %p: newsize=%zd", __func__, drv, newsize); + + struct g_gate_ctl_modify ggiomodify; + + memset(&ggiomodify, 0, sizeof(ggiomodify)); + ggiomodify.gctl_version = G_GATE_VERSION; + ggiomodify.gctl_unit = drv->unit; + ggiomodify.gctl_modify = GG_MODIFY_MEDIASIZE; + ggiomodify.gctl_mediasize = newsize; + + int r = ioctl(drv->fd, G_GATE_CMD_MODIFY, &ggiomodify); + if (r == -1) { + r = -errno; + err("failed to resize /dev/%s%d device", G_GATE_PROVIDER_NAME, drv->unit); + } + return r; +} + +int ggate_drv_kill(const char *devname) { + debug(20, "%s %s", __func__, devname); + + int fd = open("/dev/" G_GATE_CTL_NAME, O_RDWR); + if (fd == -1) { + err("failed to open /dev/" G_GATE_CTL_NAME); + return -errno; + } + + struct g_gate_ctl_destroy ggiodestroy; + memset(&ggiodestroy, 0, sizeof(ggiodestroy)); + ggiodestroy.gctl_version = G_GATE_VERSION; + ggiodestroy.gctl_unit = G_GATE_NAME_GIVEN; + ggiodestroy.gctl_force = 1; + + strlcpy(ggiodestroy.gctl_name, devname, sizeof(ggiodestroy.gctl_name)); + + int r = ioctl(fd, G_GATE_CMD_DESTROY, &ggiodestroy); + if (r == -1) { + r = -errno; + err("failed to destroy %s device", devname); + } + + close(fd); + return r; +} + +int ggate_drv_recv(ggate_drv_t drv_, ggate_drv_req_t *req) { + struct ggate_drv *drv = (struct ggate_drv *)drv_; + struct g_gate_ctl_io *ggio; + int error, r; + + debug(20, "%s", __func__); + + ggio = calloc(1, sizeof(*ggio)); + if (ggio == NULL) { + return -ENOMEM; + } + + ggio->gctl_version = G_GATE_VERSION; + ggio->gctl_unit = drv->unit; + ggio->gctl_data = malloc(MAXPHYS); + ggio->gctl_length = MAXPHYS; + + debug(20, "%s: waiting for request from kernel", __func__); + if (ioctl(drv->fd, G_GATE_CMD_START, ggio) == -1) { + err("%s: G_GATE_CMD_START failed", __func__); + return -errno; + } + + debug(20, "%s: got request from kernel: " + "unit=%d, seq=%ju, cmd=%u, offset=%ju, length=%ju, error=%d, data=%p", + __func__, ggio->gctl_unit, (uintmax_t)ggio->gctl_seq, ggio->gctl_cmd, + (uintmax_t)ggio->gctl_offset, (uintmax_t)ggio->gctl_length, + ggio->gctl_error, ggio->gctl_data); + + error = ggio->gctl_error; + switch (error) { + case 0: + break; + case ECANCELED: + debug(10, "%s: canceled: exit gracefully", __func__); + r = -error; + goto fail; + case ENOMEM: + /* + * Buffer too small? Impossible, we allocate MAXPHYS + * bytes - request can't be bigger than that. + */ + /* FALLTHROUGH */ + case ENXIO: + default: + errno = error; + err("%s: G_GATE_CMD_START failed", __func__); + r = -error; + goto fail; + } + + *req = ggio; + return 0; + +fail: + free(ggio->gctl_data); + free(ggio); + return r; +} + +int ggate_drv_send(ggate_drv_t drv_, ggate_drv_req_t req) { + struct ggate_drv *drv = (struct ggate_drv *)drv_; + struct g_gate_ctl_io *ggio = (struct g_gate_ctl_io *)req; + int r = 0; + + debug(20, "%s: send request to kernel: " + "unit=%d, seq=%ju, cmd=%u, offset=%ju, length=%ju, error=%d, data=%p", + __func__, ggio->gctl_unit, (uintmax_t)ggio->gctl_seq, ggio->gctl_cmd, + (uintmax_t)ggio->gctl_offset, (uintmax_t)ggio->gctl_length, + ggio->gctl_error, ggio->gctl_data); + + if (ioctl(drv->fd, G_GATE_CMD_DONE, ggio) == -1) { + err("%s: G_GATE_CMD_DONE failed", __func__); + r = -errno; + } + + free(ggio->gctl_data); + free(ggio); + return r; +} + +static const char * get_conf(struct ggeom *gp, const char *name) { + struct gconfig *conf; + + LIST_FOREACH(conf, &gp->lg_config, lg_config) { + if (strcmp(conf->lg_name, name) == 0) + return (conf->lg_val); + } + return ""; +} + +int ggate_drv_list(struct ggate_drv_info *info, size_t *size) { + struct gmesh mesh; + struct gclass *class; + struct ggeom *gp; + int r; + size_t max_size; + + r = geom_gettree(&mesh); + if (r != 0) { + return -errno; + } + + max_size = *size; + *size = 0; + + LIST_FOREACH(class, &mesh.lg_class, lg_class) { + if (strcmp(class->lg_name, G_GATE_CLASS_NAME) == 0) { + LIST_FOREACH(gp, &class->lg_geom, lg_geom) { + (*size)++; + } + if (*size > max_size) { + r = -ERANGE; + goto done; + } + LIST_FOREACH(gp, &class->lg_geom, lg_geom) { + strlcpy(info->id, get_conf(gp, "unit"), sizeof(info->id)); + strlcpy(info->name, gp->lg_name, sizeof(info->name)); + strlcpy(info->info, get_conf(gp, "info"), sizeof(info->info)); + info++; + } + } + } + +done: + geom_deletetree(&mesh); + return r; +} diff --git a/src/tools/rbd_ggate/ggate_drv.h b/src/tools/rbd_ggate/ggate_drv.h new file mode 100644 index 000000000..69268ebd4 --- /dev/null +++ b/src/tools/rbd_ggate/ggate_drv.h @@ -0,0 +1,64 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_GGATE_GGATE_DRV_H +#define CEPH_RBD_GGATE_GGATE_DRV_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include <sys/param.h> + +#include <stdbool.h> +#include <stdint.h> + +typedef void *ggate_drv_t; +typedef void *ggate_drv_req_t; + +/* + * GGATE driver commands. They are mapped to GgateReq::Command. + */ +enum { + GGATE_DRV_CMD_UNKNOWN = 0, + GGATE_DRV_CMD_WRITE = 1, + GGATE_DRV_CMD_READ = 2, + GGATE_DRV_CMD_FLUSH = 3, + GGATE_DRV_CMD_DISCARD = 4, +}; + +struct ggate_drv_info { + char id[16]; + char name[NAME_MAX]; + char info[2048]; /* G_GATE_INFOSIZE */ +}; + +uint64_t ggate_drv_req_id(ggate_drv_req_t req); +int ggate_drv_req_cmd(ggate_drv_req_t req); +void *ggate_drv_req_buf(ggate_drv_req_t req); +size_t ggate_drv_req_length(ggate_drv_req_t req); +uint64_t ggate_drv_req_offset(ggate_drv_req_t req); +int ggate_drv_req_error(ggate_drv_req_t req); + +void ggate_drv_req_set_error(ggate_drv_req_t req, int error); +void *ggate_drv_req_release_buf(ggate_drv_req_t req); + +int ggate_drv_load(); + +int ggate_drv_create(char *name, size_t namelen, size_t sectorsize, + size_t mediasize, bool readonly, const char *info, ggate_drv_t *drv); +void ggate_drv_destroy(ggate_drv_t drv); + +int ggate_drv_recv(ggate_drv_t drv, ggate_drv_req_t *req); +int ggate_drv_send(ggate_drv_t drv, ggate_drv_req_t req); + +int ggate_drv_resize(ggate_drv_t drv, size_t newsize); + +int ggate_drv_kill(const char *devname); +int ggate_drv_list(struct ggate_drv_info *info, size_t *size); + +#ifdef __cplusplus +} +#endif + +#endif // CEPH_RBD_GGATE_GGATE_DRV_H diff --git a/src/tools/rbd_ggate/main.cc b/src/tools/rbd_ggate/main.cc new file mode 100644 index 000000000..0942f5689 --- /dev/null +++ b/src/tools/rbd_ggate/main.cc @@ -0,0 +1,516 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "include/int_types.h" + +#include <sys/types.h> + +#include <stdio.h> +#include <stdlib.h> +#include <stddef.h> +#include <errno.h> +#include <string.h> +#include <assert.h> + +#include <iostream> +#include <memory> +#include <boost/algorithm/string/predicate.hpp> +#include <regex> + +#include "common/Formatter.h" +#include "common/Preforker.h" +#include "common/TextTable.h" +#include "common/ceph_argparse.h" +#include "common/config_proxy.h" +#include "common/debug.h" +#include "common/errno.h" +#include "global/global_init.h" +#include "global/signal_handler.h" + +#include "include/rados/librados.hpp" +#include "include/rbd/librbd.hpp" +#include "include/stringify.h" + +#include "Driver.h" +#include "Server.h" +#include "Watcher.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "rbd-ggate: " << __func__ << ": " + +static void usage() { + std::cout << "Usage: rbd-ggate [options] map <image-or-snap-spec> Map an image to ggate device\n" + << " unmap <device path> Unmap ggate device\n" + << " list List mapped ggate devices\n" + << "\n" + << "Map options:\n" + << " --device <device path> Specify ggate device path\n" + << " --read-only Map readonly\n" + << " --exclusive Forbid writes by other clients\n" + << "\n" + << "List options:\n" + << " --format plain|json|xml Output format (default: plain)\n" + << " --pretty-format Pretty formatting (json and xml)\n" + << std::endl; + generic_server_usage(); +} + +static std::string devpath, poolname, nsname, imgname, snapname; +static bool readonly = false; +static bool exclusive = false; + +static std::unique_ptr<rbd::ggate::Driver> drv; + +static void handle_signal(int signum) +{ + derr << "*** Got signal " << sig_str(signum) << " ***" << dendl; + + ceph_assert(signum == SIGINT || signum == SIGTERM); + ceph_assert(drv); + + drv->shut_down(); +} + +static int do_map(int argc, const char *argv[]) +{ + int r; + + librados::Rados rados; + librbd::RBD rbd; + librados::IoCtx io_ctx; + librbd::Image image; + + librbd::image_info_t info; + std::string desc; + + Preforker forker; + + auto args = argv_to_vec(argc, argv); + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_DAEMON, + CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS); + g_ceph_context->_conf.set_val_or_die("pid_file", ""); + + if (global_init_prefork(g_ceph_context) >= 0) { + std::string err; + r = forker.prefork(err); + if (r < 0) { + std::cerr << err << std::endl; + return r; + } + if (forker.is_parent()) { + if (forker.parent_wait(err) != 0) { + return -ENXIO; + } + return 0; + } + global_init_postfork_start(g_ceph_context); + } + + common_init_finish(g_ceph_context); + global_init_chdir(g_ceph_context); + + if (poolname.empty()) { + poolname = g_ceph_context->_conf.get_val<std::string>("rbd_default_pool"); + } + + std::string devname = boost::starts_with(devpath, "/dev/") ? + devpath.substr(5) : devpath; + std::unique_ptr<rbd::ggate::Watcher> watcher; + uint64_t handle; + + r = rados.init_with_context(g_ceph_context); + if (r < 0) { + goto done; + } + + r = rados.connect(); + if (r < 0) { + std::cerr << "rbd-ggate: failed to connect to cluster: " << cpp_strerror(r) + << std::endl; + goto done; + } + + r = rados.ioctx_create(poolname.c_str(), io_ctx); + if (r < 0) { + std::cerr << "rbd-ggate: failed to acces pool " << poolname << ": " + << cpp_strerror(r) << std::endl; + goto done; + } + + io_ctx.set_namespace(nsname); + + r = rbd.open(io_ctx, image, imgname.c_str()); + if (r < 0) { + std::cerr << "rbd-ggate: failed to open image " << imgname << ": " + << cpp_strerror(r) << std::endl; + goto done; + } + + if (exclusive) { + r = image.lock_acquire(RBD_LOCK_MODE_EXCLUSIVE); + if (r < 0) { + std::cerr << "rbd-ggate: failed to acquire exclusive lock: " + << cpp_strerror(r) << std::endl; + goto done; + } + } + + desc = "RBD " + poolname + "/" + (nsname.empty() ? "" : nsname + "/") + + imgname; + + if (!snapname.empty()) { + r = image.snap_set(snapname.c_str()); + if (r < 0) { + std::cerr << "rbd-ggate: failed to set snapshot " << snapname << ": " + << cpp_strerror(r) << std::endl; + goto done; + } + readonly = true; + desc += "@" + snapname; + } + + r = image.stat(info, sizeof(info)); + if (r < 0) { + std::cerr << "rbd-ggate: image stat failed: " << cpp_strerror(r) + << std::endl; + goto done; + } + + rbd::ggate::Driver::load(); + drv.reset(new rbd::ggate::Driver(devname, 512, info.size, readonly, desc)); + r = drv->init(); + if (r < 0) { + r = -errno; + std::cerr << "rbd-ggate: failed to create ggate device: " << cpp_strerror(r) + << std::endl; + goto done; + } + + watcher.reset(new rbd::ggate::Watcher(drv.get(), io_ctx, image, info.size)); + r = image.update_watch(watcher.get(), &handle); + if (r < 0) { + std::cerr << "rbd-ggate: failed to set watcher: " << cpp_strerror(r) + << std::endl; + drv->shut_down(); + goto done; + } + + std::cout << "/dev/" << drv->get_devname() << std::endl; + + if (g_conf()->daemonize) { + global_init_postfork_finish(g_ceph_context); + forker.daemonize(); + } + + init_async_signal_handler(); + register_async_signal_handler(SIGHUP, sighup_handler); + register_async_signal_handler_oneshot(SIGINT, handle_signal); + register_async_signal_handler_oneshot(SIGTERM, handle_signal); + + rbd::ggate::Server(drv.get(), image).run(); + + unregister_async_signal_handler(SIGHUP, sighup_handler); + unregister_async_signal_handler(SIGINT, handle_signal); + unregister_async_signal_handler(SIGTERM, handle_signal); + shutdown_async_signal_handler(); + + r = image.update_unwatch(handle); + ceph_assert(r == 0); + +done: + image.close(); + io_ctx.close(); + rados.shutdown(); + + if (r < 0) { + std::cerr << "rbd-ggate: failed to map: " << cpp_strerror(r) << std::endl; + } + + forker.exit(r < 0 ? EXIT_FAILURE : 0); + // Unreachable; + return r; +} + +static int do_unmap() +{ + std::string devname = boost::starts_with(devpath, "/dev/") ? + devpath.substr(5) : devpath; + + int r = rbd::ggate::Driver::kill(devname); + if (r < 0) { + cerr << "rbd-ggate: failed to destroy " << devname << ": " + << cpp_strerror(r) << std::endl; + return r; + } + + return 0; +} + +static int parse_imgpath(const std::string &imgpath, std::string *poolname, + std::string *nsname, std::string *imgname, + std::string *snapname) { + std::regex pattern("^(?:([^/]+)/(?:([^/@]+)/)?)?([^@]+)(?:@([^/@]+))?$"); + std::smatch match; + if (!std::regex_match(imgpath, match, pattern)) { + std::cerr << "rbd-ggate: invalid spec '" << imgpath << "'" << std::endl; + return -EINVAL; + } + + if (match[1].matched) { + *poolname = match[1]; + } + + if (match[2].matched) { + *nsname = match[2]; + } + + *imgname = match[3]; + + if (match[4].matched) { + *snapname = match[4]; + } + + return 0; +} + +static bool find_mapped_dev_by_spec(const std::string &spec, + std::string *devname) { + std::string poolname, nsname, imgname, snapname; + int r = parse_imgpath(spec, &poolname, &nsname, &imgname, &snapname); + if (r < 0) { + return false; + } + if (poolname.empty()) { + // We could use rbd_default_pool config to set pool name but then + // we would need to initialize the global context. So right now it + // is mandatory for the user to specify a pool. Fortunately the + // preferred way for users to call rbd-ggate is via rbd, which + // cares to set the pool name. + return false; + } + + std::map<std::string, rbd::ggate::Driver::DevInfo> devs; + r = rbd::ggate::Driver::list(&devs); + if (r < 0) { + return false; + } + + for (auto &it : devs) { + auto &name = it.second.first; + auto &info = it.second.second; + if (!boost::starts_with(info, "RBD ")) { + continue; + } + + std::string p, n, i, s; + parse_imgpath(info.substr(4), &p, &n, &i, &s); + if (p == poolname && n == nsname && i == imgname && s == snapname) { + *devname = name; + return true; + } + } + + return false; +} + +static int do_list(const std::string &format, bool pretty_format) +{ + rbd::ggate::Driver::load(); + + std::map<std::string, rbd::ggate::Driver::DevInfo> devs; + int r = rbd::ggate::Driver::list(&devs); + if (r < 0) { + return -r; + } + + std::unique_ptr<ceph::Formatter> f; + TextTable tbl; + + if (format == "json") { + f.reset(new JSONFormatter(pretty_format)); + } else if (format == "xml") { + f.reset(new XMLFormatter(pretty_format)); + } else if (!format.empty() && format != "plain") { + std::cerr << "rbd-ggate: invalid output format: " << format << std::endl; + return -EINVAL; + } + + if (f) { + f->open_array_section("devices"); + } else { + tbl.define_column("id", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("pool", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("namespace", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("image", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("snap", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("device", TextTable::LEFT, TextTable::LEFT); + } + + int count = 0; + + for (auto &it : devs) { + auto &id = it.first; + auto &name = it.second.first; + auto &info = it.second.second; + if (!boost::starts_with(info, "RBD ")) { + continue; + } + + std::string poolname; + std::string nsname; + std::string imgname; + std::string snapname(f ? "" : "-"); + parse_imgpath(info.substr(4), &poolname, &nsname, &imgname, &snapname); + + if (f) { + f->open_object_section("device"); + f->dump_string("id", id); + f->dump_string("pool", poolname); + f->dump_string("namespace", nsname); + f->dump_string("image", imgname); + f->dump_string("snap", snapname); + f->dump_string("device", "/dev/" + name); + f->close_section(); + } else { + tbl << id << poolname << nsname << imgname << snapname << "/dev/" + name + << TextTable::endrow; + } + count++; + } + + if (f) { + f->close_section(); // devices + f->flush(std::cout); + } else if (count > 0) { + std::cout << tbl; + } + + return 0; +} + +int main(int argc, const char *argv[]) { + int r; + enum { + None, + Connect, + Disconnect, + List + } cmd = None; + + auto args = argv_to_vec(argc, argv); + if (args.empty()) { + cerr << argv[0] << ": -h or --help for usage" << std::endl; + exit(1); + } + if (ceph_argparse_need_usage(args)) { + usage(); + exit(0); + } + // filter out ceph config options + ConfigProxy{false}.parse_argv(args); + + std::string format; + bool pretty_format = false; + + for (auto i = args.begin(); i != args.end(); ) { + if (ceph_argparse_flag(args, i, "-h", "--help", (char*)NULL)) { + usage(); + return 0; + } else if (ceph_argparse_witharg(args, i, &devpath, "--device", + (char *)NULL)) { + } else if (ceph_argparse_flag(args, i, "--read-only", (char *)NULL)) { + readonly = true; + } else if (ceph_argparse_flag(args, i, "--exclusive", (char *)NULL)) { + exclusive = true; + } else if (ceph_argparse_witharg(args, i, &format, "--format", + (char *)NULL)) { + } else if (ceph_argparse_flag(args, i, "--pretty-format", (char *)NULL)) { + pretty_format = true; + } else { + ++i; + } + } + + if (args.begin() != args.end()) { + if (strcmp(*args.begin(), "map") == 0) { + cmd = Connect; + } else if (strcmp(*args.begin(), "unmap") == 0) { + cmd = Disconnect; + } else if (strcmp(*args.begin(), "list") == 0) { + cmd = List; + } else { + cerr << "rbd-ggate: unknown command: " << *args.begin() << std::endl; + return EXIT_FAILURE; + } + args.erase(args.begin()); + } + + if (cmd == None) { + cerr << "rbd-ggate: must specify command" << std::endl; + return EXIT_FAILURE; + } + + switch (cmd) { + case Connect: + if (args.begin() == args.end()) { + cerr << "rbd-ggate: must specify image-or-snap-spec" << std::endl; + return EXIT_FAILURE; + } + if (parse_imgpath(*args.begin(), &poolname, &nsname, &imgname, + &snapname) < 0) { + return EXIT_FAILURE; + } + args.erase(args.begin()); + break; + case Disconnect: + if (args.begin() == args.end()) { + std::cerr << "rbd-ggate: must specify ggate device or image-or-snap-spec" + << std::endl; + return EXIT_FAILURE; + } + if (boost::starts_with(*args.begin(), "/dev/") || + !find_mapped_dev_by_spec(*args.begin(), &devpath)) { + devpath = *args.begin(); + } + args.erase(args.begin()); + break; + default: + break; + } + + if (args.begin() != args.end()) { + cerr << "rbd-ggate: unknown args: " << *args.begin() << std::endl; + return EXIT_FAILURE; + } + + switch (cmd) { + case Connect: + if (imgname.empty()) { + cerr << "rbd-ggate: image name was not specified" << std::endl; + return EXIT_FAILURE; + } + + r = do_map(argc, argv); + if (r < 0) + return EXIT_FAILURE; + break; + case Disconnect: + r = do_unmap(); + if (r < 0) + return EXIT_FAILURE; + break; + case List: + r = do_list(format, pretty_format); + if (r < 0) + return EXIT_FAILURE; + break; + default: + usage(); + return EXIT_FAILURE; + } + + return 0; +} diff --git a/src/tools/rbd_mirror/BaseRequest.h b/src/tools/rbd_mirror/BaseRequest.h new file mode 100644 index 000000000..0da98651d --- /dev/null +++ b/src/tools/rbd_mirror/BaseRequest.h @@ -0,0 +1,33 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_BASE_REQUEST_H +#define CEPH_RBD_MIRROR_BASE_REQUEST_H + +#include "include/Context.h" + +namespace rbd { +namespace mirror { + +class BaseRequest { +public: + BaseRequest(Context *on_finish) : m_on_finish(on_finish) { + } + virtual ~BaseRequest() {} + + virtual void send() = 0; + +protected: + virtual void finish(int r) { + m_on_finish->complete(r); + delete this; + } + +private: + Context *m_on_finish; +}; + +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_BASE_REQUEST_H diff --git a/src/tools/rbd_mirror/CMakeLists.txt b/src/tools/rbd_mirror/CMakeLists.txt new file mode 100644 index 000000000..43a6f03fe --- /dev/null +++ b/src/tools/rbd_mirror/CMakeLists.txt @@ -0,0 +1,91 @@ +add_library(rbd_mirror_types STATIC + image_map/Types.cc + instance_watcher/Types.cc + leader_watcher/Types.cc) + +set(rbd_mirror_internal + ClusterWatcher.cc + ImageDeleter.cc + ImageMap.cc + ImageReplayer.cc + ImageSync.cc + InstanceReplayer.cc + InstanceWatcher.cc + Instances.cc + LeaderWatcher.cc + Mirror.cc + MirrorStatusUpdater.cc + MirrorStatusWatcher.cc + NamespaceReplayer.cc + PoolMetaCache.cc + PoolReplayer.cc + PoolWatcher.cc + RemotePoolPoller.cc + ServiceDaemon.cc + Threads.cc + Throttler.cc + Types.cc + image_deleter/SnapshotPurgeRequest.cc + image_deleter/TrashMoveRequest.cc + image_deleter/TrashRemoveRequest.cc + image_deleter/TrashWatcher.cc + image_map/LoadRequest.cc + image_map/Policy.cc + image_map/SimplePolicy.cc + image_map/StateTransition.cc + image_map/UpdateRequest.cc + image_replayer/BootstrapRequest.cc + image_replayer/CloseImageRequest.cc + image_replayer/CreateImageRequest.cc + image_replayer/GetMirrorImageIdRequest.cc + image_replayer/OpenImageRequest.cc + image_replayer/OpenLocalImageRequest.cc + image_replayer/PrepareLocalImageRequest.cc + image_replayer/PrepareRemoteImageRequest.cc + image_replayer/StateBuilder.cc + image_replayer/TimeRollingMean.cc + image_replayer/Utils.cc + image_replayer/journal/CreateLocalImageRequest.cc + image_replayer/journal/EventPreprocessor.cc + image_replayer/journal/PrepareReplayRequest.cc + image_replayer/journal/Replayer.cc + image_replayer/journal/ReplayStatusFormatter.cc + image_replayer/journal/StateBuilder.cc + image_replayer/journal/SyncPointHandler.cc + image_replayer/snapshot/ApplyImageStateRequest.cc + image_replayer/snapshot/CreateLocalImageRequest.cc + image_replayer/snapshot/PrepareReplayRequest.cc + image_replayer/snapshot/Replayer.cc + image_replayer/snapshot/StateBuilder.cc + image_replayer/snapshot/Utils.cc + image_sync/SyncPointCreateRequest.cc + image_sync/SyncPointPruneRequest.cc + image_sync/Utils.cc + pool_watcher/RefreshImagesRequest.cc + service_daemon/Types.cc) + +add_library(rbd_mirror_internal STATIC + ${rbd_mirror_internal} + $<TARGET_OBJECTS:common_prioritycache_obj>) + +add_executable(rbd-mirror + main.cc) +target_link_libraries(rbd-mirror + rbd_mirror_internal + rbd_mirror_types + rbd_api + rbd_internal + rbd_types + journal + libneorados + librados + osdc + cls_rbd_client + cls_lock_client + cls_journal_client + global + heap_profiler + ${ALLOC_LIBS} + OpenSSL::SSL) +install(TARGETS rbd-mirror + DESTINATION ${CMAKE_INSTALL_BINDIR}) diff --git a/src/tools/rbd_mirror/CancelableRequest.h b/src/tools/rbd_mirror/CancelableRequest.h new file mode 100644 index 000000000..26e8dcb5b --- /dev/null +++ b/src/tools/rbd_mirror/CancelableRequest.h @@ -0,0 +1,44 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_CANCELABLE_REQUEST_H +#define CEPH_RBD_MIRROR_CANCELABLE_REQUEST_H + +#include "common/RefCountedObj.h" +#include "include/Context.h" + +namespace rbd { +namespace mirror { + +class CancelableRequest : public RefCountedObject { +public: + CancelableRequest(const std::string& name, CephContext *cct, + Context *on_finish) + : RefCountedObject(cct), m_name(name), m_cct(cct), + m_on_finish(on_finish) { + } + + virtual void send() = 0; + virtual void cancel() {} + +protected: + virtual void finish(int r) { + if (m_cct) { + lsubdout(m_cct, rbd_mirror, 20) << m_name << "::finish: r=" << r << dendl; + } + if (m_on_finish) { + m_on_finish->complete(r); + } + put(); + } + +private: + const std::string m_name; + CephContext *m_cct; + Context *m_on_finish; +}; + +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_CANCELABLE_REQUEST_H diff --git a/src/tools/rbd_mirror/ClusterWatcher.cc b/src/tools/rbd_mirror/ClusterWatcher.cc new file mode 100644 index 000000000..8bafb336e --- /dev/null +++ b/src/tools/rbd_mirror/ClusterWatcher.cc @@ -0,0 +1,252 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "ClusterWatcher.h" +#include "include/stringify.h" +#include "common/ceph_json.h" +#include "common/debug.h" +#include "common/errno.h" +#include "cls/rbd/cls_rbd_client.h" +#include "librbd/internal.h" +#include "librbd/api/Mirror.h" +#include "tools/rbd_mirror/ServiceDaemon.h" +#include "json_spirit/json_spirit.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::ClusterWatcher:" << this << " " \ + << __func__ << ": " + +using std::list; +using std::map; +using std::pair; +using std::set; +using std::string; +using std::unique_ptr; +using std::vector; + +using librados::Rados; +using librados::IoCtx; + +namespace rbd { +namespace mirror { + +ClusterWatcher::ClusterWatcher(RadosRef cluster, ceph::mutex &lock, + ServiceDaemon<librbd::ImageCtx>* service_daemon) + : m_cluster(cluster), m_lock(lock), m_service_daemon(service_daemon) +{ +} + +const ClusterWatcher::PoolPeers& ClusterWatcher::get_pool_peers() const +{ + ceph_assert(ceph_mutex_is_locked(m_lock)); + return m_pool_peers; +} + +std::string ClusterWatcher::get_site_name() const { + ceph_assert(ceph_mutex_is_locked(m_lock)); + return m_site_name; +} + +void ClusterWatcher::refresh_pools() +{ + dout(20) << "enter" << dendl; + + PoolPeers pool_peers; + read_pool_peers(&pool_peers); + + std::string site_name; + int r = read_site_name(&site_name); + + std::lock_guard l{m_lock}; + m_pool_peers = pool_peers; + + if (r >= 0) { + m_site_name = site_name; + } + + // TODO: perhaps use a workqueue instead, once we get notifications + // about config changes for existing pools +} + +void ClusterWatcher::read_pool_peers(PoolPeers *pool_peers) +{ + int r = m_cluster->wait_for_latest_osdmap(); + if (r < 0) { + derr << "error waiting for OSD map: " << cpp_strerror(r) << dendl; + return; + } + + list<pair<int64_t, string> > pools; + r = m_cluster->pool_list2(pools); + if (r < 0) { + derr << "error listing pools: " << cpp_strerror(r) << dendl; + return; + } + + std::set<int64_t> service_pool_ids; + for (auto& kv : pools) { + int64_t pool_id = kv.first; + auto& pool_name = kv.second; + int64_t base_tier; + r = m_cluster->pool_get_base_tier(pool_id, &base_tier); + if (r == -ENOENT) { + dout(10) << "pool " << pool_name << " no longer exists" << dendl; + continue; + } else if (r < 0) { + derr << "Error retrieving base tier for pool " << pool_name << dendl; + continue; + } + if (pool_id != base_tier) { + // pool is a cache; skip it + continue; + } + + IoCtx ioctx; + r = m_cluster->ioctx_create2(pool_id, ioctx); + if (r == -ENOENT) { + dout(10) << "pool " << pool_id << " no longer exists" << dendl; + continue; + } else if (r < 0) { + derr << "Error accessing pool " << pool_name << cpp_strerror(r) << dendl; + continue; + } + + cls::rbd::MirrorMode mirror_mode_internal; + r = librbd::cls_client::mirror_mode_get(&ioctx, &mirror_mode_internal); + if (r == 0 && mirror_mode_internal == cls::rbd::MIRROR_MODE_DISABLED) { + dout(10) << "mirroring is disabled for pool " << pool_name << dendl; + continue; + } + + service_pool_ids.insert(pool_id); + if (m_service_pools.find(pool_id) == m_service_pools.end()) { + m_service_pools[pool_id] = {}; + m_service_daemon->add_pool(pool_id, pool_name); + } + + if (r == -EPERM) { + dout(10) << "access denied querying pool " << pool_name << dendl; + m_service_pools[pool_id] = m_service_daemon->add_or_update_callout( + pool_id, m_service_pools[pool_id], + service_daemon::CALLOUT_LEVEL_WARNING, "access denied"); + continue; + } else if (r < 0) { + derr << "could not tell whether mirroring was enabled for " << pool_name + << " : " << cpp_strerror(r) << dendl; + m_service_pools[pool_id] = m_service_daemon->add_or_update_callout( + pool_id, m_service_pools[pool_id], + service_daemon::CALLOUT_LEVEL_WARNING, "mirroring mode query failed"); + continue; + } + + vector<librbd::mirror_peer_site_t> configs; + r = librbd::api::Mirror<>::peer_site_list(ioctx, &configs); + if (r < 0) { + derr << "error reading mirroring config for pool " << pool_name + << cpp_strerror(r) << dendl; + m_service_pools[pool_id] = m_service_daemon->add_or_update_callout( + pool_id, m_service_pools[pool_id], + service_daemon::CALLOUT_LEVEL_ERROR, "mirroring peer list failed"); + continue; + } + + std::vector<PeerSpec> peers; + peers.reserve(configs.size()); + for (auto& peer : configs) { + if (peer.direction != RBD_MIRROR_PEER_DIRECTION_TX) { + peers.push_back(peer); + } + } + + for (auto& peer : peers) { + r = resolve_peer_site_config_keys(pool_id, pool_name, &peer); + if (r < 0) { + break; + } + } + + if (m_service_pools[pool_id] != service_daemon::CALLOUT_ID_NONE) { + m_service_daemon->remove_callout(pool_id, m_service_pools[pool_id]); + m_service_pools[pool_id] = service_daemon::CALLOUT_ID_NONE; + } + + pool_peers->emplace(pool_id, Peers{peers.begin(), peers.end()}); + } + + for (auto it = m_service_pools.begin(); it != m_service_pools.end(); ) { + auto current_it(it++); + if (service_pool_ids.find(current_it->first) == service_pool_ids.end()) { + m_service_daemon->remove_pool(current_it->first); + m_service_pools.erase(current_it->first); + } + } +} + +int ClusterWatcher::read_site_name(std::string* site_name) { + dout(10) << dendl; + + librbd::RBD rbd; + return rbd.mirror_site_name_get(*m_cluster, site_name); +} + +int ClusterWatcher::resolve_peer_site_config_keys(int64_t pool_id, + const std::string& pool_name, + PeerSpec* peer) { + dout(10) << "retrieving config-key: pool_id=" << pool_id << ", " + << "pool_name=" << pool_name << ", " + << "peer_uuid=" << peer->uuid << dendl; + + std::string cmd = + "{" + "\"prefix\": \"config-key get\", " + "\"key\": \"" RBD_MIRROR_PEER_CONFIG_KEY_PREFIX + stringify(pool_id) + + "/" + peer->uuid + "\"" + "}"; + + bufferlist in_bl; + bufferlist out_bl; + int r = m_cluster->mon_command(cmd, in_bl, &out_bl, nullptr); + if (r == -ENOENT || out_bl.length() == 0) { + return 0; + } else if (r < 0) { + derr << "error reading mirroring peer config for pool " << pool_name << ": " + << cpp_strerror(r) << dendl; + m_service_pools[pool_id] = m_service_daemon->add_or_update_callout( + pool_id, m_service_pools[pool_id], + service_daemon::CALLOUT_LEVEL_WARNING, + "mirroring peer config-key query failed"); + return r; + } + + bool json_valid = false; + json_spirit::mValue json_root; + if(json_spirit::read(out_bl.to_str(), json_root)) { + try { + auto& json_obj = json_root.get_obj(); + if (json_obj.count("mon_host")) { + peer->mon_host = json_obj["mon_host"].get_str(); + } + if (json_obj.count("key")) { + peer->key = json_obj["key"].get_str(); + } + json_valid = true; + } catch (std::runtime_error&) { + } + } + + if (!json_valid) { + derr << "error parsing mirroring peer config for pool " << pool_name << ", " + << "peer " << peer->uuid << dendl; + m_service_pools[pool_id] = m_service_daemon->add_or_update_callout( + pool_id, m_service_pools[pool_id], + service_daemon::CALLOUT_LEVEL_WARNING, + "mirroring peer config-key decode failed"); + } + + return 0; +} + +} // namespace mirror +} // namespace rbd diff --git a/src/tools/rbd_mirror/ClusterWatcher.h b/src/tools/rbd_mirror/ClusterWatcher.h new file mode 100644 index 000000000..93356fec6 --- /dev/null +++ b/src/tools/rbd_mirror/ClusterWatcher.h @@ -0,0 +1,73 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_CLUSTER_WATCHER_H +#define CEPH_RBD_MIRROR_CLUSTER_WATCHER_H + +#include <map> +#include <memory> +#include <set> + +#include "common/ceph_context.h" +#include "common/ceph_mutex.h" +#include "common/Timer.h" +#include "include/rados/librados.hpp" +#include "tools/rbd_mirror/Types.h" +#include "tools/rbd_mirror/service_daemon/Types.h" +#include <unordered_map> + +namespace librbd { struct ImageCtx; } + +namespace rbd { +namespace mirror { + +template <typename> class ServiceDaemon; + +/** + * Tracks mirroring configuration for pools in a single + * cluster. + */ +class ClusterWatcher { +public: + struct PeerSpecCompare { + bool operator()(const PeerSpec& lhs, const PeerSpec& rhs) const { + return (lhs.uuid < rhs.uuid); + } + }; + typedef std::set<PeerSpec, PeerSpecCompare> Peers; + typedef std::map<int64_t, Peers> PoolPeers; + + ClusterWatcher(RadosRef cluster, ceph::mutex &lock, + ServiceDaemon<librbd::ImageCtx>* service_daemon); + ~ClusterWatcher() = default; + ClusterWatcher(const ClusterWatcher&) = delete; + ClusterWatcher& operator=(const ClusterWatcher&) = delete; + + // Caller controls frequency of calls + void refresh_pools(); + const PoolPeers& get_pool_peers() const; + std::string get_site_name() const; + +private: + typedef std::unordered_map<int64_t, service_daemon::CalloutId> ServicePools; + + RadosRef m_cluster; + ceph::mutex &m_lock; + ServiceDaemon<librbd::ImageCtx>* m_service_daemon; + + ServicePools m_service_pools; + PoolPeers m_pool_peers; + std::string m_site_name; + + void read_pool_peers(PoolPeers *pool_peers); + + int read_site_name(std::string* site_name); + + int resolve_peer_site_config_keys( + int64_t pool_id, const std::string& pool_name, PeerSpec* peer); +}; + +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_CLUSTER_WATCHER_H diff --git a/src/tools/rbd_mirror/ImageDeleter.cc b/src/tools/rbd_mirror/ImageDeleter.cc new file mode 100644 index 000000000..ba137e6fd --- /dev/null +++ b/src/tools/rbd_mirror/ImageDeleter.cc @@ -0,0 +1,549 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2016 SUSE LINUX GmbH + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include "include/rados/librados.hpp" +#include "common/Formatter.h" +#include "common/admin_socket.h" +#include "common/debug.h" +#include "common/errno.h" +#include "common/Timer.h" +#include "global/global_context.h" +#include "librbd/internal.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/Operations.h" +#include "librbd/asio/ContextWQ.h" +#include "cls/rbd/cls_rbd_client.h" +#include "cls/rbd/cls_rbd_types.h" +#include "librbd/Utils.h" +#include "ImageDeleter.h" +#include "tools/rbd_mirror/Threads.h" +#include "tools/rbd_mirror/Throttler.h" +#include "tools/rbd_mirror/image_deleter/TrashMoveRequest.h" +#include "tools/rbd_mirror/image_deleter/TrashRemoveRequest.h" +#include "tools/rbd_mirror/image_deleter/TrashWatcher.h" +#include <map> +#include <sstream> + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror + +using std::string; +using std::stringstream; +using std::vector; +using std::pair; +using std::make_pair; + +using librados::IoCtx; +using namespace librbd; + +namespace rbd { +namespace mirror { + +using librbd::util::create_async_context_callback; + +namespace { + +class ImageDeleterAdminSocketCommand { +public: + virtual ~ImageDeleterAdminSocketCommand() {} + virtual int call(Formatter *f) = 0; +}; + +template <typename I> +class StatusCommand : public ImageDeleterAdminSocketCommand { +public: + explicit StatusCommand(ImageDeleter<I> *image_del) : image_del(image_del) {} + + int call(Formatter *f) override { + image_del->print_status(f); + return 0; + } + +private: + ImageDeleter<I> *image_del; +}; + +} // anonymous namespace + +template <typename I> +class ImageDeleterAdminSocketHook : public AdminSocketHook { +public: + ImageDeleterAdminSocketHook(CephContext *cct, const std::string& pool_name, + ImageDeleter<I> *image_del) : + admin_socket(cct->get_admin_socket()) { + + std::string command; + int r; + + command = "rbd mirror deletion status " + pool_name; + r = admin_socket->register_command(command, this, + "get status for image deleter"); + if (r == 0) { + commands[command] = new StatusCommand<I>(image_del); + } + + } + + ~ImageDeleterAdminSocketHook() override { + (void)admin_socket->unregister_commands(this); + for (Commands::const_iterator i = commands.begin(); i != commands.end(); + ++i) { + delete i->second; + } + } + + int call(std::string_view command, const cmdmap_t& cmdmap, + const bufferlist&, + Formatter *f, + std::ostream& errss, + bufferlist& out) override { + Commands::const_iterator i = commands.find(command); + ceph_assert(i != commands.end()); + return i->second->call(f); + } + +private: + typedef std::map<std::string, ImageDeleterAdminSocketCommand*, + std::less<>> Commands; + AdminSocket *admin_socket; + Commands commands; +}; + +template <typename I> +ImageDeleter<I>::ImageDeleter( + librados::IoCtx& local_io_ctx, Threads<librbd::ImageCtx>* threads, + Throttler<librbd::ImageCtx>* image_deletion_throttler, + ServiceDaemon<librbd::ImageCtx>* service_daemon) + : m_local_io_ctx(local_io_ctx), m_threads(threads), + m_image_deletion_throttler(image_deletion_throttler), + m_service_daemon(service_daemon), m_trash_listener(this), + m_lock(ceph::make_mutex( + librbd::util::unique_lock_name("rbd::mirror::ImageDeleter::m_lock", + this))) { +} + +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::ImageDeleter: " << " " \ + << __func__ << ": " + +template <typename I> +void ImageDeleter<I>::trash_move(librados::IoCtx& local_io_ctx, + const std::string& global_image_id, + bool resync, + librbd::asio::ContextWQ* work_queue, + Context* on_finish) { + dout(10) << "global_image_id=" << global_image_id << ", " + << "resync=" << resync << dendl; + + auto req = rbd::mirror::image_deleter::TrashMoveRequest<>::create( + local_io_ctx, global_image_id, resync, work_queue, on_finish); + req->send(); +} + +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::ImageDeleter: " << this << " " \ + << __func__ << ": " + +template <typename I> +void ImageDeleter<I>::init(Context* on_finish) { + dout(10) << dendl; + + m_asok_hook = new ImageDeleterAdminSocketHook<I>( + g_ceph_context, m_local_io_ctx.get_pool_name(), this); + + m_trash_watcher = image_deleter::TrashWatcher<I>::create(m_local_io_ctx, + m_threads, + m_trash_listener); + m_trash_watcher->init(on_finish); +} + +template <typename I> +void ImageDeleter<I>::shut_down(Context* on_finish) { + dout(10) << dendl; + + delete m_asok_hook; + m_asok_hook = nullptr; + + m_image_deletion_throttler->drain(m_local_io_ctx.get_namespace(), + -ESTALE); + + shut_down_trash_watcher(on_finish); +} + +template <typename I> +void ImageDeleter<I>::shut_down_trash_watcher(Context* on_finish) { + dout(10) << dendl; + ceph_assert(m_trash_watcher); + auto ctx = new LambdaContext([this, on_finish](int r) { + delete m_trash_watcher; + m_trash_watcher = nullptr; + + wait_for_ops(on_finish); + }); + m_trash_watcher->shut_down(ctx); +} + +template <typename I> +void ImageDeleter<I>::wait_for_ops(Context* on_finish) { + { + std::scoped_lock locker{m_threads->timer_lock, m_lock}; + m_running = false; + cancel_retry_timer(); + } + + auto ctx = new LambdaContext([this, on_finish](int) { + cancel_all_deletions(on_finish); + }); + m_async_op_tracker.wait_for_ops(ctx); +} + +template <typename I> +void ImageDeleter<I>::cancel_all_deletions(Context* on_finish) { + m_image_deletion_throttler->drain(m_local_io_ctx.get_namespace(), + -ECANCELED); + { + std::lock_guard locker{m_lock}; + // wake up any external state machines waiting on deletions + ceph_assert(m_in_flight_delete_queue.empty()); + for (auto& queue : {&m_delete_queue, &m_retry_delete_queue}) { + for (auto& info : *queue) { + notify_on_delete(info->image_id, -ECANCELED); + } + queue->clear(); + } + } + on_finish->complete(0); +} + +template <typename I> +void ImageDeleter<I>::wait_for_deletion(const std::string& image_id, + bool scheduled_only, + Context* on_finish) { + dout(5) << "image_id=" << image_id << dendl; + + on_finish = new LambdaContext([this, on_finish](int r) { + m_threads->work_queue->queue(on_finish, r); + }); + + std::lock_guard locker{m_lock}; + auto del_info = find_delete_info(image_id); + if (!del_info && scheduled_only) { + // image not scheduled for deletion + on_finish->complete(0); + return; + } + + notify_on_delete(image_id, -ESTALE); + m_on_delete_contexts[image_id] = on_finish; +} + +template <typename I> +void ImageDeleter<I>::complete_active_delete(DeleteInfoRef* delete_info, + int r) { + dout(20) << "info=" << *delete_info << ", r=" << r << dendl; + std::lock_guard locker{m_lock}; + notify_on_delete((*delete_info)->image_id, r); + delete_info->reset(); +} + +template <typename I> +void ImageDeleter<I>::enqueue_failed_delete(DeleteInfoRef* delete_info, + int error_code, + double retry_delay) { + dout(20) << "info=" << *delete_info << ", r=" << error_code << dendl; + if (error_code == -EBLOCKLISTED) { + std::lock_guard locker{m_lock}; + derr << "blocklisted while deleting local image" << dendl; + complete_active_delete(delete_info, error_code); + return; + } + + std::scoped_lock locker{m_threads->timer_lock, m_lock}; + auto& delete_info_ref = *delete_info; + notify_on_delete(delete_info_ref->image_id, error_code); + delete_info_ref->error_code = error_code; + ++delete_info_ref->retries; + delete_info_ref->retry_time = (clock_t::now() + + ceph::make_timespan(retry_delay)); + m_retry_delete_queue.push_back(delete_info_ref); + + schedule_retry_timer(); +} + +template <typename I> +typename ImageDeleter<I>::DeleteInfoRef +ImageDeleter<I>::find_delete_info(const std::string &image_id) { + ceph_assert(ceph_mutex_is_locked(m_lock)); + DeleteQueue delete_queues[] = {m_in_flight_delete_queue, + m_retry_delete_queue, + m_delete_queue}; + + DeleteInfo delete_info{image_id}; + for (auto& queue : delete_queues) { + auto it = std::find_if(queue.begin(), queue.end(), + [&delete_info](const DeleteInfoRef& ref) { + return delete_info == *ref; + }); + if (it != queue.end()) { + return *it; + } + } + return {}; +} + +template <typename I> +void ImageDeleter<I>::print_status(Formatter *f) { + dout(20) << dendl; + + f->open_object_section("image_deleter_status"); + f->open_array_section("delete_images_queue"); + + std::lock_guard l{m_lock}; + for (const auto& image : m_delete_queue) { + image->print_status(f); + } + + f->close_section(); + f->open_array_section("failed_deletes_queue"); + for (const auto& image : m_retry_delete_queue) { + image->print_status(f, true); + } + + f->close_section(); + f->close_section(); +} + +template <typename I> +vector<string> ImageDeleter<I>::get_delete_queue_items() { + vector<string> items; + + std::lock_guard l{m_lock}; + for (const auto& del_info : m_delete_queue) { + items.push_back(del_info->image_id); + } + + return items; +} + +template <typename I> +vector<pair<string, int> > ImageDeleter<I>::get_failed_queue_items() { + vector<pair<string, int> > items; + + std::lock_guard l{m_lock}; + for (const auto& del_info : m_retry_delete_queue) { + items.push_back(make_pair(del_info->image_id, + del_info->error_code)); + } + + return items; +} + +template <typename I> +void ImageDeleter<I>::remove_images() { + dout(10) << dendl; + + std::lock_guard locker{m_lock}; + while (m_running && !m_delete_queue.empty()) { + + DeleteInfoRef delete_info = m_delete_queue.front(); + m_delete_queue.pop_front(); + + ceph_assert(delete_info); + + auto on_start = create_async_context_callback( + m_threads->work_queue, new LambdaContext( + [this, delete_info](int r) { + if (r < 0) { + notify_on_delete(delete_info->image_id, r); + return; + } + remove_image(delete_info); + })); + + m_image_deletion_throttler->start_op(m_local_io_ctx.get_namespace(), + delete_info->image_id, on_start); + } +} + +template <typename I> +void ImageDeleter<I>::remove_image(DeleteInfoRef delete_info) { + dout(10) << "info=" << *delete_info << dendl; + + std::lock_guard locker{m_lock}; + + m_in_flight_delete_queue.push_back(delete_info); + m_async_op_tracker.start_op(); + + auto ctx = new LambdaContext([this, delete_info](int r) { + handle_remove_image(delete_info, r); + m_async_op_tracker.finish_op(); + }); + + auto req = image_deleter::TrashRemoveRequest<I>::create( + m_local_io_ctx, delete_info->image_id, &delete_info->error_result, + m_threads->work_queue, ctx); + req->send(); +} + +template <typename I> +void ImageDeleter<I>::handle_remove_image(DeleteInfoRef delete_info, + int r) { + dout(10) << "info=" << *delete_info << ", r=" << r << dendl; + + m_image_deletion_throttler->finish_op(m_local_io_ctx.get_namespace(), + delete_info->image_id); + { + std::lock_guard locker{m_lock}; + ceph_assert(ceph_mutex_is_locked(m_lock)); + auto it = std::find(m_in_flight_delete_queue.begin(), + m_in_flight_delete_queue.end(), delete_info); + ceph_assert(it != m_in_flight_delete_queue.end()); + m_in_flight_delete_queue.erase(it); + } + + if (r < 0) { + if (delete_info->error_result == image_deleter::ERROR_RESULT_COMPLETE) { + complete_active_delete(&delete_info, r); + } else if (delete_info->error_result == + image_deleter::ERROR_RESULT_RETRY_IMMEDIATELY) { + enqueue_failed_delete(&delete_info, r, m_busy_interval); + } else { + auto cct = reinterpret_cast<CephContext *>(m_local_io_ctx.cct()); + double failed_interval = cct->_conf.get_val<double>( + "rbd_mirror_delete_retry_interval"); + enqueue_failed_delete(&delete_info, r, failed_interval); + } + } else { + complete_active_delete(&delete_info, 0); + } + + // process the next queued image to delete + remove_images(); +} + +template <typename I> +void ImageDeleter<I>::schedule_retry_timer() { + ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock)); + ceph_assert(ceph_mutex_is_locked(m_lock)); + if (!m_running || m_timer_ctx != nullptr || m_retry_delete_queue.empty()) { + return; + } + + dout(10) << dendl; + auto &delete_info = m_retry_delete_queue.front(); + m_timer_ctx = new LambdaContext([this](int r) { + handle_retry_timer(); + }); + m_threads->timer->add_event_at(delete_info->retry_time, m_timer_ctx); +} + +template <typename I> +void ImageDeleter<I>::cancel_retry_timer() { + dout(10) << dendl; + ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock)); + if (m_timer_ctx != nullptr) { + bool canceled = m_threads->timer->cancel_event(m_timer_ctx); + m_timer_ctx = nullptr; + ceph_assert(canceled); + } +} + +template <typename I> +void ImageDeleter<I>::handle_retry_timer() { + dout(10) << dendl; + ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock)); + std::lock_guard locker{m_lock}; + + ceph_assert(m_timer_ctx != nullptr); + m_timer_ctx = nullptr; + + ceph_assert(m_running); + ceph_assert(!m_retry_delete_queue.empty()); + + // move all ready-to-ready items back to main queue + auto now = clock_t::now(); + while (!m_retry_delete_queue.empty()) { + auto &delete_info = m_retry_delete_queue.front(); + if (delete_info->retry_time > now) { + break; + } + + m_delete_queue.push_back(delete_info); + m_retry_delete_queue.pop_front(); + } + + // schedule wake up for any future retries + schedule_retry_timer(); + + // start (concurrent) removal of images + m_async_op_tracker.start_op(); + auto ctx = new LambdaContext([this](int r) { + remove_images(); + m_async_op_tracker.finish_op(); + }); + m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +void ImageDeleter<I>::handle_trash_image(const std::string& image_id, + const ImageDeleter<I>::clock_t::time_point& deferment_end_time) { + std::scoped_lock locker{m_threads->timer_lock, m_lock}; + + auto del_info = find_delete_info(image_id); + if (del_info != nullptr) { + dout(20) << "image " << image_id << " " + << "was already scheduled for deletion" << dendl; + return; + } + + dout(10) << "image_id=" << image_id << ", " + << "deferment_end_time=" << utime_t{deferment_end_time} << dendl; + + del_info.reset(new DeleteInfo(image_id)); + del_info->retry_time = deferment_end_time; + m_retry_delete_queue.push_back(del_info); + + schedule_retry_timer(); +} + +template <typename I> +void ImageDeleter<I>::notify_on_delete(const std::string& image_id, + int r) { + dout(10) << "image_id=" << image_id << ", r=" << r << dendl; + auto it = m_on_delete_contexts.find(image_id); + if (it == m_on_delete_contexts.end()) { + return; + } + + it->second->complete(r); + m_on_delete_contexts.erase(it); +} + +template <typename I> +void ImageDeleter<I>::DeleteInfo::print_status(Formatter *f, + bool print_failure_info) { + f->open_object_section("delete_info"); + f->dump_string("image_id", image_id); + if (print_failure_info) { + f->dump_string("error_code", cpp_strerror(error_code)); + f->dump_int("retries", retries); + } + f->close_section(); +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::ImageDeleter<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/ImageDeleter.h b/src/tools/rbd_mirror/ImageDeleter.h new file mode 100644 index 000000000..5fe79496b --- /dev/null +++ b/src/tools/rbd_mirror/ImageDeleter.h @@ -0,0 +1,189 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2016 SUSE LINUX GmbH + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#ifndef CEPH_RBD_MIRROR_IMAGE_DELETER_H +#define CEPH_RBD_MIRROR_IMAGE_DELETER_H + +#include "include/utime.h" +#include "common/AsyncOpTracker.h" +#include "common/ceph_mutex.h" +#include "common/Timer.h" +#include "tools/rbd_mirror/Types.h" +#include "tools/rbd_mirror/image_deleter/Types.h" +#include <atomic> +#include <deque> +#include <iosfwd> +#include <map> +#include <memory> +#include <vector> + +class AdminSocketHook; +class Context; +namespace librbd { +struct ImageCtx; +namespace asio { struct ContextWQ; } +} // namespace librbd + +namespace rbd { +namespace mirror { + +template <typename> class ServiceDaemon; +template <typename> class Threads; +template <typename> class Throttler; + +namespace image_deleter { template <typename> struct TrashWatcher; } + +/** + * Manage deletion of non-primary images. + */ +template <typename ImageCtxT = librbd::ImageCtx> +class ImageDeleter { +public: + static ImageDeleter* create( + librados::IoCtx& local_io_ctx, Threads<librbd::ImageCtx>* threads, + Throttler<librbd::ImageCtx>* image_deletion_throttler, + ServiceDaemon<librbd::ImageCtx>* service_daemon) { + return new ImageDeleter(local_io_ctx, threads, image_deletion_throttler, + service_daemon); + } + + ImageDeleter(librados::IoCtx& local_io_ctx, + Threads<librbd::ImageCtx>* threads, + Throttler<librbd::ImageCtx>* image_deletion_throttler, + ServiceDaemon<librbd::ImageCtx>* service_daemon); + + ImageDeleter(const ImageDeleter&) = delete; + ImageDeleter& operator=(const ImageDeleter&) = delete; + + static void trash_move(librados::IoCtx& local_io_ctx, + const std::string& global_image_id, bool resync, + librbd::asio::ContextWQ* work_queue, + Context* on_finish); + + void init(Context* on_finish); + void shut_down(Context* on_finish); + + void print_status(Formatter *f); + + // for testing purposes + void wait_for_deletion(const std::string &image_id, + bool scheduled_only, Context* on_finish); + + std::vector<std::string> get_delete_queue_items(); + std::vector<std::pair<std::string, int> > get_failed_queue_items(); + + inline void set_busy_timer_interval(double interval) { + m_busy_interval = interval; + } + +private: + using clock_t = ceph::real_clock; + struct TrashListener : public image_deleter::TrashListener { + ImageDeleter *image_deleter; + + TrashListener(ImageDeleter *image_deleter) : image_deleter(image_deleter) { + } + + void handle_trash_image(const std::string& image_id, + const ceph::real_clock::time_point& deferment_end_time) override { + image_deleter->handle_trash_image(image_id, deferment_end_time); + } + }; + + struct DeleteInfo { + std::string image_id; + + image_deleter::ErrorResult error_result = {}; + int error_code = 0; + clock_t::time_point retry_time; + int retries = 0; + + DeleteInfo(const std::string& image_id) + : image_id(image_id) { + } + + inline bool operator==(const DeleteInfo& delete_info) const { + return (image_id == delete_info.image_id); + } + + friend std::ostream& operator<<(std::ostream& os, DeleteInfo& delete_info) { + os << "[image_id=" << delete_info.image_id << "]"; + return os; + } + + void print_status(Formatter *f, + bool print_failure_info=false); + }; + typedef std::shared_ptr<DeleteInfo> DeleteInfoRef; + typedef std::deque<DeleteInfoRef> DeleteQueue; + typedef std::map<std::string, Context*> OnDeleteContexts; + + librados::IoCtx& m_local_io_ctx; + Threads<librbd::ImageCtx>* m_threads; + Throttler<librbd::ImageCtx>* m_image_deletion_throttler; + ServiceDaemon<librbd::ImageCtx>* m_service_daemon; + + image_deleter::TrashWatcher<ImageCtxT>* m_trash_watcher = nullptr; + TrashListener m_trash_listener; + + std::atomic<unsigned> m_running { 1 }; + + double m_busy_interval = 1; + + AsyncOpTracker m_async_op_tracker; + + ceph::mutex m_lock; + DeleteQueue m_delete_queue; + DeleteQueue m_retry_delete_queue; + DeleteQueue m_in_flight_delete_queue; + + OnDeleteContexts m_on_delete_contexts; + + AdminSocketHook *m_asok_hook = nullptr; + + Context *m_timer_ctx = nullptr; + + bool process_image_delete(); + + void complete_active_delete(DeleteInfoRef* delete_info, int r); + void enqueue_failed_delete(DeleteInfoRef* delete_info, int error_code, + double retry_delay); + + DeleteInfoRef find_delete_info(const std::string &image_id); + + void remove_images(); + void remove_image(DeleteInfoRef delete_info); + void handle_remove_image(DeleteInfoRef delete_info, int r); + + void schedule_retry_timer(); + void cancel_retry_timer(); + void handle_retry_timer(); + + void handle_trash_image(const std::string& image_id, + const clock_t::time_point& deferment_end_time); + + void shut_down_trash_watcher(Context* on_finish); + void wait_for_ops(Context* on_finish); + void cancel_all_deletions(Context* on_finish); + + void notify_on_delete(const std::string& image_id, int r); + +}; + +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::ImageDeleter<librbd::ImageCtx>; + +#endif // CEPH_RBD_MIRROR_IMAGE_DELETER_H diff --git a/src/tools/rbd_mirror/ImageMap.cc b/src/tools/rbd_mirror/ImageMap.cc new file mode 100644 index 000000000..bd005b466 --- /dev/null +++ b/src/tools/rbd_mirror/ImageMap.cc @@ -0,0 +1,604 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "common/debug.h" +#include "common/errno.h" +#include "common/Timer.h" + +#include "librbd/Utils.h" +#include "librbd/asio/ContextWQ.h" +#include "tools/rbd_mirror/Threads.h" + +#include "ImageMap.h" +#include "image_map/LoadRequest.h" +#include "image_map/SimplePolicy.h" +#include "image_map/UpdateRequest.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::ImageMap: " << this << " " \ + << __func__ << ": " + +using namespace std; + +namespace rbd { +namespace mirror { + +using ::operator<<; +using image_map::Policy; + +using librbd::util::unique_lock_name; +using librbd::util::create_async_context_callback; + +template <typename I> +struct ImageMap<I>::C_NotifyInstance : public Context { + ImageMap* image_map; + std::string global_image_id; + bool acquire_release; + + C_NotifyInstance(ImageMap* image_map, const std::string& global_image_id, + bool acquire_release) + : image_map(image_map), global_image_id(global_image_id), + acquire_release(acquire_release) { + image_map->start_async_op(); + } + + void finish(int r) override { + if (acquire_release) { + image_map->handle_peer_ack(global_image_id, r); + } else { + image_map->handle_peer_ack_remove(global_image_id, r); + } + image_map->finish_async_op(); + } +}; + +template <typename I> +ImageMap<I>::ImageMap(librados::IoCtx &ioctx, Threads<I> *threads, + const std::string& instance_id, + image_map::Listener &listener) + : m_ioctx(ioctx), m_threads(threads), m_instance_id(instance_id), + m_listener(listener), + m_lock(ceph::make_mutex( + unique_lock_name("rbd::mirror::ImageMap::m_lock", this))) { +} + +template <typename I> +ImageMap<I>::~ImageMap() { + ceph_assert(m_async_op_tracker.empty()); + ceph_assert(m_timer_task == nullptr); + ceph_assert(m_rebalance_task == nullptr); +} + +template <typename I> +void ImageMap<I>::continue_action(const std::set<std::string> &global_image_ids, + int r) { + dout(20) << dendl; + + { + std::lock_guard locker{m_lock}; + if (m_shutting_down) { + return; + } + + for (auto const &global_image_id : global_image_ids) { + bool schedule = m_policy->finish_action(global_image_id, r); + if (schedule) { + schedule_action(global_image_id); + } + } + } + + schedule_update_task(); +} + +template <typename I> +void ImageMap<I>::handle_update_request( + const Updates &updates, + const std::set<std::string> &remove_global_image_ids, int r) { + dout(20) << "r=" << r << dendl; + + std::set<std::string> global_image_ids; + + global_image_ids.insert(remove_global_image_ids.begin(), + remove_global_image_ids.end()); + for (auto const &update : updates) { + global_image_ids.insert(update.global_image_id); + } + + continue_action(global_image_ids, r); +} + +template <typename I> +void ImageMap<I>::update_image_mapping(Updates&& map_updates, + std::set<std::string>&& map_removals) { + if (map_updates.empty() && map_removals.empty()) { + return; + } + + dout(5) << "updates=[" << map_updates << "], " + << "removes=[" << map_removals << "]" << dendl; + + Context *on_finish = new LambdaContext( + [this, map_updates, map_removals](int r) { + handle_update_request(map_updates, map_removals, r); + finish_async_op(); + }); + on_finish = create_async_context_callback(m_threads->work_queue, on_finish); + + // empty meta policy for now.. + image_map::PolicyMetaNone policy_meta; + + bufferlist bl; + encode(image_map::PolicyData(policy_meta), bl); + + // prepare update map + std::map<std::string, cls::rbd::MirrorImageMap> update_mapping; + for (auto const &update : map_updates) { + update_mapping.emplace( + update.global_image_id, cls::rbd::MirrorImageMap(update.instance_id, + update.mapped_time, bl)); + } + + start_async_op(); + image_map::UpdateRequest<I> *req = image_map::UpdateRequest<I>::create( + m_ioctx, std::move(update_mapping), std::move(map_removals), on_finish); + req->send(); +} + +template <typename I> +void ImageMap<I>::process_updates() { + dout(20) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock)); + ceph_assert(m_timer_task == nullptr); + + Updates map_updates; + std::set<std::string> map_removals; + Updates acquire_updates; + Updates release_updates; + + // gather updates by advancing the state machine + m_lock.lock(); + for (auto const &global_image_id : m_global_image_ids) { + image_map::ActionType action_type = + m_policy->start_action(global_image_id); + image_map::LookupInfo info = m_policy->lookup(global_image_id); + + dout(15) << "global_image_id=" << global_image_id << ", " + << "action=" << action_type << ", " + << "instance=" << info.instance_id << dendl; + switch (action_type) { + case image_map::ACTION_TYPE_NONE: + continue; + case image_map::ACTION_TYPE_MAP_UPDATE: + ceph_assert(info.instance_id != image_map::UNMAPPED_INSTANCE_ID); + map_updates.emplace_back(global_image_id, info.instance_id, + info.mapped_time); + break; + case image_map::ACTION_TYPE_MAP_REMOVE: + map_removals.emplace(global_image_id); + break; + case image_map::ACTION_TYPE_ACQUIRE: + ceph_assert(info.instance_id != image_map::UNMAPPED_INSTANCE_ID); + acquire_updates.emplace_back(global_image_id, info.instance_id); + break; + case image_map::ACTION_TYPE_RELEASE: + ceph_assert(info.instance_id != image_map::UNMAPPED_INSTANCE_ID); + release_updates.emplace_back(global_image_id, info.instance_id); + break; + } + } + m_global_image_ids.clear(); + m_lock.unlock(); + + // notify listener (acquire, release) and update on-disk map. note + // that its safe to process this outside m_lock as we still hold + // timer lock. + notify_listener_acquire_release_images(acquire_updates, release_updates); + update_image_mapping(std::move(map_updates), std::move(map_removals)); +} + +template <typename I> +void ImageMap<I>::schedule_update_task() { + std::lock_guard timer_lock{m_threads->timer_lock}; + schedule_update_task(m_threads->timer_lock); +} + +template <typename I> +void ImageMap<I>::schedule_update_task(const ceph::mutex &timer_lock) { + ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock)); + + schedule_rebalance_task(); + + if (m_timer_task != nullptr) { + return; + } + + { + std::lock_guard locker{m_lock}; + if (m_global_image_ids.empty()) { + return; + } + } + + m_timer_task = new LambdaContext([this](int r) { + ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock)); + m_timer_task = nullptr; + + process_updates(); + }); + + CephContext *cct = reinterpret_cast<CephContext *>(m_ioctx.cct()); + double after = cct->_conf.get_val<double>("rbd_mirror_image_policy_update_throttle_interval"); + + dout(20) << "scheduling image check update (" << m_timer_task << ")" + << " after " << after << " second(s)" << dendl; + m_threads->timer->add_event_after(after, m_timer_task); +} + +template <typename I> +void ImageMap<I>::rebalance() { + ceph_assert(m_rebalance_task == nullptr); + + { + std::lock_guard locker{m_lock}; + if (m_async_op_tracker.empty() && m_global_image_ids.empty()){ + dout(20) << "starting rebalance" << dendl; + + std::set<std::string> remap_global_image_ids; + m_policy->add_instances({}, &remap_global_image_ids); + + for (auto const &global_image_id : remap_global_image_ids) { + schedule_action(global_image_id); + } + } + } + + schedule_update_task(m_threads->timer_lock); +} + +template <typename I> +void ImageMap<I>::schedule_rebalance_task() { + ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock)); + + CephContext *cct = reinterpret_cast<CephContext *>(m_ioctx.cct()); + + // fetch the updated value of idle timeout for (re)scheduling + double resched_after = cct->_conf.get_val<double>( + "rbd_mirror_image_policy_rebalance_timeout"); + if (!resched_after) { + return; + } + + // cancel existing rebalance task if any before scheduling + if (m_rebalance_task != nullptr) { + m_threads->timer->cancel_event(m_rebalance_task); + } + + m_rebalance_task = new LambdaContext([this](int _) { + ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock)); + m_rebalance_task = nullptr; + + rebalance(); + }); + + dout(20) << "scheduling rebalance (" << m_rebalance_task << ")" + << " after " << resched_after << " second(s)" << dendl; + m_threads->timer->add_event_after(resched_after, m_rebalance_task); +} + +template <typename I> +void ImageMap<I>::schedule_action(const std::string &global_image_id) { + dout(20) << "global_image_id=" << global_image_id << dendl; + ceph_assert(ceph_mutex_is_locked(m_lock)); + + m_global_image_ids.emplace(global_image_id); +} + +template <typename I> +void ImageMap<I>::notify_listener_acquire_release_images( + const Updates &acquire, const Updates &release) { + if (acquire.empty() && release.empty()) { + return; + } + + dout(5) << "acquire=[" << acquire << "], " + << "release=[" << release << "]" << dendl; + + for (auto const &update : acquire) { + m_listener.acquire_image( + update.global_image_id, update.instance_id, + create_async_context_callback( + m_threads->work_queue, + new C_NotifyInstance(this, update.global_image_id, true))); + } + + for (auto const &update : release) { + m_listener.release_image( + update.global_image_id, update.instance_id, + create_async_context_callback( + m_threads->work_queue, + new C_NotifyInstance(this, update.global_image_id, true))); + } +} + +template <typename I> +void ImageMap<I>::notify_listener_remove_images(const std::string &peer_uuid, + const Updates &remove) { + dout(5) << "peer_uuid=" << peer_uuid << ", " + << "remove=[" << remove << "]" << dendl; + + for (auto const &update : remove) { + m_listener.remove_image( + peer_uuid, update.global_image_id, update.instance_id, + create_async_context_callback( + m_threads->work_queue, + new C_NotifyInstance(this, update.global_image_id, false))); + } +} + +template <typename I> +void ImageMap<I>::handle_load(const std::map<std::string, + cls::rbd::MirrorImageMap> &image_mapping) { + dout(20) << dendl; + + { + std::lock_guard locker{m_lock}; + m_policy->init(image_mapping); + + for (auto& pair : image_mapping) { + schedule_action(pair.first); + } + } + schedule_update_task(); +} + +template <typename I> +void ImageMap<I>::handle_peer_ack_remove(const std::string &global_image_id, + int r) { + std::lock_guard locker{m_lock}; + dout(5) << "global_image_id=" << global_image_id << dendl; + + if (r < 0) { + derr << "failed to remove global_image_id=" << global_image_id << dendl; + } + + auto peer_it = m_peer_map.find(global_image_id); + if (peer_it == m_peer_map.end()) { + return; + } + + m_peer_map.erase(peer_it); +} + +template <typename I> +void ImageMap<I>::update_images_added( + const std::string &peer_uuid, + const std::set<std::string> &global_image_ids) { + dout(5) << "peer_uuid=" << peer_uuid << ", " + << "global_image_ids=[" << global_image_ids << "]" << dendl; + ceph_assert(ceph_mutex_is_locked(m_lock)); + + for (auto const &global_image_id : global_image_ids) { + auto result = m_peer_map[global_image_id].insert(peer_uuid); + if (result.second && m_peer_map[global_image_id].size() == 1) { + if (m_policy->add_image(global_image_id)) { + schedule_action(global_image_id); + } + } + } +} + +template <typename I> +void ImageMap<I>::update_images_removed( + const std::string &peer_uuid, + const std::set<std::string> &global_image_ids) { + dout(5) << "peer_uuid=" << peer_uuid << ", " + << "global_image_ids=[" << global_image_ids << "]" << dendl; + ceph_assert(ceph_mutex_is_locked(m_lock)); + + Updates to_remove; + for (auto const &global_image_id : global_image_ids) { + image_map::LookupInfo info = m_policy->lookup(global_image_id); + bool image_mapped = (info.instance_id != image_map::UNMAPPED_INSTANCE_ID); + + bool image_removed = image_mapped; + bool peer_removed = false; + auto peer_it = m_peer_map.find(global_image_id); + if (peer_it != m_peer_map.end()) { + auto& peer_set = peer_it->second; + peer_removed = peer_set.erase(peer_uuid); + image_removed = peer_removed && peer_set.empty(); + } + + if (image_mapped && peer_removed && !peer_uuid.empty()) { + // peer image has been deleted + to_remove.emplace_back(global_image_id, info.instance_id); + } + + if (image_removed) { + // local and peer images have been deleted + if (m_policy->remove_image(global_image_id)) { + schedule_action(global_image_id); + } + } + } + + if (!to_remove.empty()) { + // removal notification will be notified instantly. this is safe + // even after scheduling action for images as we still hold m_lock + notify_listener_remove_images(peer_uuid, to_remove); + } +} + +template <typename I> +void ImageMap<I>::update_instances_added( + const std::vector<std::string> &instance_ids) { + { + std::lock_guard locker{m_lock}; + if (m_shutting_down) { + return; + } + + std::vector<std::string> filtered_instance_ids; + filter_instance_ids(instance_ids, &filtered_instance_ids, false); + if (filtered_instance_ids.empty()) { + return; + } + + dout(20) << "instance_ids=" << filtered_instance_ids << dendl; + + std::set<std::string> remap_global_image_ids; + m_policy->add_instances(filtered_instance_ids, &remap_global_image_ids); + + for (auto const &global_image_id : remap_global_image_ids) { + schedule_action(global_image_id); + } + } + + schedule_update_task(); +} + +template <typename I> +void ImageMap<I>::update_instances_removed( + const std::vector<std::string> &instance_ids) { + { + std::lock_guard locker{m_lock}; + if (m_shutting_down) { + return; + } + + std::vector<std::string> filtered_instance_ids; + filter_instance_ids(instance_ids, &filtered_instance_ids, true); + if (filtered_instance_ids.empty()) { + return; + } + + dout(20) << "instance_ids=" << filtered_instance_ids << dendl; + + std::set<std::string> remap_global_image_ids; + m_policy->remove_instances(filtered_instance_ids, &remap_global_image_ids); + + for (auto const &global_image_id : remap_global_image_ids) { + schedule_action(global_image_id); + } + } + + schedule_update_task(); +} + +template <typename I> +void ImageMap<I>::update_images(const std::string &peer_uuid, + std::set<std::string> &&added_global_image_ids, + std::set<std::string> &&removed_global_image_ids) { + dout(5) << "peer_uuid=" << peer_uuid << ", " << "added_count=" + << added_global_image_ids.size() << ", " << "removed_count=" + << removed_global_image_ids.size() << dendl; + + { + std::lock_guard locker{m_lock}; + if (m_shutting_down) { + return; + } + + if (!removed_global_image_ids.empty()) { + update_images_removed(peer_uuid, removed_global_image_ids); + } + if (!added_global_image_ids.empty()) { + update_images_added(peer_uuid, added_global_image_ids); + } + } + + schedule_update_task(); +} + +template <typename I> +void ImageMap<I>::handle_peer_ack(const std::string &global_image_id, int r) { + dout (20) << "global_image_id=" << global_image_id << ", r=" << r + << dendl; + + continue_action({global_image_id}, r); +} + +template <typename I> +void ImageMap<I>::init(Context *on_finish) { + dout(20) << dendl; + + CephContext *cct = reinterpret_cast<CephContext *>(m_ioctx.cct()); + std::string policy_type = cct->_conf.get_val<string>("rbd_mirror_image_policy_type"); + + if (policy_type == "none" || policy_type == "simple") { + m_policy.reset(image_map::SimplePolicy::create(m_ioctx)); + } else { + ceph_abort(); // not really needed as such, but catch it. + } + + dout(20) << "mapping policy=" << policy_type << dendl; + + start_async_op(); + C_LoadMap *ctx = new C_LoadMap(this, on_finish); + image_map::LoadRequest<I> *req = image_map::LoadRequest<I>::create( + m_ioctx, &ctx->image_mapping, ctx); + req->send(); +} + +template <typename I> +void ImageMap<I>::shut_down(Context *on_finish) { + dout(20) << dendl; + + { + std::lock_guard timer_lock{m_threads->timer_lock}; + + { + std::lock_guard locker{m_lock}; + ceph_assert(!m_shutting_down); + + m_shutting_down = true; + m_policy.reset(); + } + + if (m_timer_task != nullptr) { + m_threads->timer->cancel_event(m_timer_task); + m_timer_task = nullptr; + } + if (m_rebalance_task != nullptr) { + m_threads->timer->cancel_event(m_rebalance_task); + m_rebalance_task = nullptr; + } + } + + wait_for_async_ops(on_finish); +} + +template <typename I> +void ImageMap<I>::filter_instance_ids( + const std::vector<std::string> &instance_ids, + std::vector<std::string> *filtered_instance_ids, bool removal) const { + CephContext *cct = reinterpret_cast<CephContext *>(m_ioctx.cct()); + std::string policy_type = cct->_conf.get_val<string>("rbd_mirror_image_policy_type"); + + if (policy_type != "none") { + *filtered_instance_ids = instance_ids; + return; + } + + if (removal) { + // propagate removals for external instances + for (auto& instance_id : instance_ids) { + if (instance_id != m_instance_id) { + filtered_instance_ids->push_back(instance_id); + } + } + } else if (std::find(instance_ids.begin(), instance_ids.end(), + m_instance_id) != instance_ids.end()) { + // propagate addition only for local instance + filtered_instance_ids->push_back(m_instance_id); + } +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::ImageMap<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/ImageMap.h b/src/tools/rbd_mirror/ImageMap.h new file mode 100644 index 000000000..9dd61ee0d --- /dev/null +++ b/src/tools/rbd_mirror/ImageMap.h @@ -0,0 +1,175 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_MAP_H +#define CEPH_RBD_MIRROR_IMAGE_MAP_H + +#include <vector> + +#include "common/ceph_mutex.h" +#include "include/Context.h" +#include "common/AsyncOpTracker.h" +#include "cls/rbd/cls_rbd_types.h" +#include "include/rados/librados.hpp" + +#include "image_map/Policy.h" +#include "image_map/Types.h" + +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { + +template <typename> struct Threads; + +template <typename ImageCtxT = librbd::ImageCtx> +class ImageMap { +public: + static ImageMap *create(librados::IoCtx &ioctx, Threads<ImageCtxT> *threads, + const std::string& instance_id, + image_map::Listener &listener) { + return new ImageMap(ioctx, threads, instance_id, listener); + } + + ~ImageMap(); + + // init (load) the instance map from disk + void init(Context *on_finish); + + // shut down map operations + void shut_down(Context *on_finish); + + // update (add/remove) images + void update_images(const std::string &peer_uuid, + std::set<std::string> &&added_global_image_ids, + std::set<std::string> &&removed_global_image_ids); + + // add/remove instances + void update_instances_added(const std::vector<std::string> &instances); + void update_instances_removed(const std::vector<std::string> &instances); + +private: + struct C_NotifyInstance; + + ImageMap(librados::IoCtx &ioctx, Threads<ImageCtxT> *threads, + const std::string& instance_id, image_map::Listener &listener); + + struct Update { + std::string global_image_id; + std::string instance_id; + utime_t mapped_time; + + Update(const std::string &global_image_id, const std::string &instance_id, + utime_t mapped_time) + : global_image_id(global_image_id), + instance_id(instance_id), + mapped_time(mapped_time) { + } + Update(const std::string &global_image_id, const std::string &instance_id) + : Update(global_image_id, instance_id, ceph_clock_now()) { + } + + friend std::ostream& operator<<(std::ostream& os, + const Update& update) { + os << "{global_image_id=" << update.global_image_id << ", " + << "instance_id=" << update.instance_id << "}"; + return os; + } + + }; + typedef std::list<Update> Updates; + + // Lock ordering: m_threads->timer_lock, m_lock + + librados::IoCtx &m_ioctx; + Threads<ImageCtxT> *m_threads; + std::string m_instance_id; + image_map::Listener &m_listener; + + std::unique_ptr<image_map::Policy> m_policy; // our mapping policy + + Context *m_timer_task = nullptr; + ceph::mutex m_lock; + bool m_shutting_down = false; + AsyncOpTracker m_async_op_tracker; + + // global_image_id -> registered peers ("" == local, remote otherwise) + std::map<std::string, std::set<std::string> > m_peer_map; + + std::set<std::string> m_global_image_ids; + + Context *m_rebalance_task = nullptr; + + struct C_LoadMap : Context { + ImageMap *image_map; + Context *on_finish; + + std::map<std::string, cls::rbd::MirrorImageMap> image_mapping; + + C_LoadMap(ImageMap *image_map, Context *on_finish) + : image_map(image_map), + on_finish(on_finish) { + } + + void finish(int r) override { + if (r == 0) { + image_map->handle_load(image_mapping); + } + + image_map->finish_async_op(); + on_finish->complete(r); + } + }; + + // async op-tracker helper routines + void start_async_op() { + m_async_op_tracker.start_op(); + } + void finish_async_op() { + m_async_op_tracker.finish_op(); + } + void wait_for_async_ops(Context *on_finish) { + m_async_op_tracker.wait_for_ops(on_finish); + } + + void handle_peer_ack(const std::string &global_image_id, int r); + void handle_peer_ack_remove(const std::string &global_image_id, int r); + + void handle_load(const std::map<std::string, cls::rbd::MirrorImageMap> &image_mapping); + void handle_update_request(const Updates &updates, + const std::set<std::string> &remove_global_image_ids, int r); + + // continue (retry or resume depending on state machine) processing + // current action. + void continue_action(const std::set<std::string> &global_image_ids, int r); + + // schedule an image for update + void schedule_action(const std::string &global_image_id); + + void schedule_update_task(); + void schedule_update_task(const ceph::mutex &timer_lock); + void process_updates(); + void update_image_mapping(Updates&& map_updates, + std::set<std::string>&& map_removals); + + void rebalance(); + void schedule_rebalance_task(); + + void notify_listener_acquire_release_images(const Updates &acquire, const Updates &release); + void notify_listener_remove_images(const std::string &peer_uuid, const Updates &remove); + + void update_images_added(const std::string &peer_uuid, + const std::set<std::string> &global_image_ids); + void update_images_removed(const std::string &peer_uuid, + const std::set<std::string> &global_image_ids); + + void filter_instance_ids(const std::vector<std::string> &instance_ids, + std::vector<std::string> *filtered_instance_ids, + bool removal) const; + +}; + +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_IMAGE_MAP_H diff --git a/src/tools/rbd_mirror/ImageReplayer.cc b/src/tools/rbd_mirror/ImageReplayer.cc new file mode 100644 index 000000000..1e88c3262 --- /dev/null +++ b/src/tools/rbd_mirror/ImageReplayer.cc @@ -0,0 +1,1201 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "include/compat.h" +#include "common/Formatter.h" +#include "common/admin_socket.h" +#include "common/debug.h" +#include "common/errno.h" +#include "include/stringify.h" +#include "cls/rbd/cls_rbd_client.h" +#include "common/Timer.h" +#include "global/global_context.h" +#include "journal/Journaler.h" +#include "librbd/ExclusiveLock.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/Journal.h" +#include "librbd/Operations.h" +#include "librbd/Utils.h" +#include "librbd/asio/ContextWQ.h" +#include "ImageDeleter.h" +#include "ImageReplayer.h" +#include "MirrorStatusUpdater.h" +#include "Threads.h" +#include "tools/rbd_mirror/image_replayer/BootstrapRequest.h" +#include "tools/rbd_mirror/image_replayer/ReplayerListener.h" +#include "tools/rbd_mirror/image_replayer/StateBuilder.h" +#include "tools/rbd_mirror/image_replayer/Utils.h" +#include "tools/rbd_mirror/image_replayer/journal/Replayer.h" +#include "tools/rbd_mirror/image_replayer/journal/StateBuilder.h" +#include <map> + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::" << *this << " " \ + << __func__ << ": " + +namespace rbd { +namespace mirror { + +using librbd::util::create_context_callback; + +template <typename I> +std::ostream &operator<<(std::ostream &os, + const typename ImageReplayer<I>::State &state); + +namespace { + +template <typename I> +class ImageReplayerAdminSocketCommand { +public: + ImageReplayerAdminSocketCommand(const std::string &desc, + ImageReplayer<I> *replayer) + : desc(desc), replayer(replayer) { + } + virtual ~ImageReplayerAdminSocketCommand() {} + virtual int call(Formatter *f) = 0; + + std::string desc; + ImageReplayer<I> *replayer; + bool registered = false; +}; + +template <typename I> +class StatusCommand : public ImageReplayerAdminSocketCommand<I> { +public: + explicit StatusCommand(const std::string &desc, ImageReplayer<I> *replayer) + : ImageReplayerAdminSocketCommand<I>(desc, replayer) { + } + + int call(Formatter *f) override { + this->replayer->print_status(f); + return 0; + } +}; + +template <typename I> +class StartCommand : public ImageReplayerAdminSocketCommand<I> { +public: + explicit StartCommand(const std::string &desc, ImageReplayer<I> *replayer) + : ImageReplayerAdminSocketCommand<I>(desc, replayer) { + } + + int call(Formatter *f) override { + this->replayer->start(nullptr, true); + return 0; + } +}; + +template <typename I> +class StopCommand : public ImageReplayerAdminSocketCommand<I> { +public: + explicit StopCommand(const std::string &desc, ImageReplayer<I> *replayer) + : ImageReplayerAdminSocketCommand<I>(desc, replayer) { + } + + int call(Formatter *f) override { + this->replayer->stop(nullptr, true); + return 0; + } +}; + +template <typename I> +class RestartCommand : public ImageReplayerAdminSocketCommand<I> { +public: + explicit RestartCommand(const std::string &desc, ImageReplayer<I> *replayer) + : ImageReplayerAdminSocketCommand<I>(desc, replayer) { + } + + int call(Formatter *f) override { + this->replayer->restart(); + return 0; + } +}; + +template <typename I> +class FlushCommand : public ImageReplayerAdminSocketCommand<I> { +public: + explicit FlushCommand(const std::string &desc, ImageReplayer<I> *replayer) + : ImageReplayerAdminSocketCommand<I>(desc, replayer) { + } + + int call(Formatter *f) override { + this->replayer->flush(); + return 0; + } +}; + +template <typename I> +class ImageReplayerAdminSocketHook : public AdminSocketHook { +public: + ImageReplayerAdminSocketHook(CephContext *cct, const std::string &name, + ImageReplayer<I> *replayer) + : admin_socket(cct->get_admin_socket()), + commands{{"rbd mirror flush " + name, + new FlushCommand<I>("flush rbd mirror " + name, replayer)}, + {"rbd mirror restart " + name, + new RestartCommand<I>("restart rbd mirror " + name, replayer)}, + {"rbd mirror start " + name, + new StartCommand<I>("start rbd mirror " + name, replayer)}, + {"rbd mirror status " + name, + new StatusCommand<I>("get status for rbd mirror " + name, replayer)}, + {"rbd mirror stop " + name, + new StopCommand<I>("stop rbd mirror " + name, replayer)}} { + } + + int register_commands() { + for (auto &it : commands) { + int r = admin_socket->register_command(it.first, this, + it.second->desc); + if (r < 0) { + return r; + } + it.second->registered = true; + } + return 0; + } + + ~ImageReplayerAdminSocketHook() override { + admin_socket->unregister_commands(this); + for (auto &it : commands) { + delete it.second; + } + commands.clear(); + } + + int call(std::string_view command, const cmdmap_t& cmdmap, + const bufferlist&, + Formatter *f, + std::ostream& errss, + bufferlist& out) override { + auto i = commands.find(command); + ceph_assert(i != commands.end()); + return i->second->call(f); + } + +private: + typedef std::map<std::string, ImageReplayerAdminSocketCommand<I>*, + std::less<>> Commands; + + AdminSocket *admin_socket; + Commands commands; +}; + +} // anonymous namespace + +template <typename I> +void ImageReplayer<I>::BootstrapProgressContext::update_progress( + const std::string &description, bool flush) +{ + const std::string desc = "bootstrapping, " + description; + replayer->set_state_description(0, desc); + if (flush) { + replayer->update_mirror_image_status(false, boost::none); + } +} + +template <typename I> +struct ImageReplayer<I>::ReplayerListener + : public image_replayer::ReplayerListener { + ImageReplayer<I>* image_replayer; + + ReplayerListener(ImageReplayer<I>* image_replayer) + : image_replayer(image_replayer) { + } + + void handle_notification() override { + image_replayer->handle_replayer_notification(); + } +}; + +template <typename I> +ImageReplayer<I>::ImageReplayer( + librados::IoCtx &local_io_ctx, const std::string &local_mirror_uuid, + const std::string &global_image_id, Threads<I> *threads, + InstanceWatcher<I> *instance_watcher, + MirrorStatusUpdater<I>* local_status_updater, + journal::CacheManagerHandler *cache_manager_handler, + PoolMetaCache* pool_meta_cache) : + m_local_io_ctx(local_io_ctx), m_local_mirror_uuid(local_mirror_uuid), + m_global_image_id(global_image_id), m_threads(threads), + m_instance_watcher(instance_watcher), + m_local_status_updater(local_status_updater), + m_cache_manager_handler(cache_manager_handler), + m_pool_meta_cache(pool_meta_cache), + m_local_image_name(global_image_id), + m_lock(ceph::make_mutex("rbd::mirror::ImageReplayer " + + stringify(local_io_ctx.get_id()) + " " + global_image_id)), + m_progress_cxt(this), + m_replayer_listener(new ReplayerListener(this)) +{ + // Register asok commands using a temporary "remote_pool_name/global_image_id" + // name. When the image name becomes known on start the asok commands will be + // re-registered using "remote_pool_name/remote_image_name" name. + + m_image_spec = image_replayer::util::compute_image_spec( + local_io_ctx, global_image_id); + register_admin_socket_hook(); +} + +template <typename I> +ImageReplayer<I>::~ImageReplayer() +{ + unregister_admin_socket_hook(); + ceph_assert(m_state_builder == nullptr); + ceph_assert(m_on_start_finish == nullptr); + ceph_assert(m_on_stop_contexts.empty()); + ceph_assert(m_bootstrap_request == nullptr); + ceph_assert(m_update_status_task == nullptr); + delete m_replayer_listener; +} + +template <typename I> +image_replayer::HealthState ImageReplayer<I>::get_health_state() const { + std::lock_guard locker{m_lock}; + + if (!m_mirror_image_status_state) { + return image_replayer::HEALTH_STATE_OK; + } else if (*m_mirror_image_status_state == + cls::rbd::MIRROR_IMAGE_STATUS_STATE_SYNCING || + *m_mirror_image_status_state == + cls::rbd::MIRROR_IMAGE_STATUS_STATE_UNKNOWN) { + return image_replayer::HEALTH_STATE_WARNING; + } + return image_replayer::HEALTH_STATE_ERROR; +} + +template <typename I> +void ImageReplayer<I>::add_peer(const Peer<I>& peer) { + dout(10) << "peer=" << peer << dendl; + + std::lock_guard locker{m_lock}; + auto it = m_peers.find(peer); + if (it == m_peers.end()) { + m_peers.insert(peer); + } +} + +template <typename I> +void ImageReplayer<I>::set_state_description(int r, const std::string &desc) { + dout(10) << "r=" << r << ", desc=" << desc << dendl; + + std::lock_guard l{m_lock}; + m_last_r = r; + m_state_desc = desc; +} + +template <typename I> +void ImageReplayer<I>::start(Context *on_finish, bool manual, bool restart) +{ + dout(10) << "on_finish=" << on_finish << dendl; + + int r = 0; + { + std::lock_guard locker{m_lock}; + if (!is_stopped_()) { + derr << "already running" << dendl; + r = -EINVAL; + } else if (m_manual_stop && !manual) { + dout(5) << "stopped manually, ignoring start without manual flag" + << dendl; + r = -EPERM; + } else if (restart && !m_restart_requested) { + dout(10) << "canceled restart" << dendl; + r = -ECANCELED; + } else { + m_state = STATE_STARTING; + m_last_r = 0; + m_state_desc.clear(); + m_manual_stop = false; + m_delete_requested = false; + m_restart_requested = false; + m_status_removed = false; + + if (on_finish != nullptr) { + ceph_assert(m_on_start_finish == nullptr); + m_on_start_finish = on_finish; + } + ceph_assert(m_on_stop_contexts.empty()); + } + } + + if (r < 0) { + if (on_finish) { + on_finish->complete(r); + } + return; + } + + bootstrap(); +} + +template <typename I> +void ImageReplayer<I>::bootstrap() { + dout(10) << dendl; + + std::unique_lock locker{m_lock}; + if (m_peers.empty()) { + locker.unlock(); + + dout(5) << "no peer clusters" << dendl; + on_start_fail(-ENOENT, "no peer clusters"); + return; + } + + // TODO need to support multiple remote images + ceph_assert(!m_peers.empty()); + m_remote_image_peer = *m_peers.begin(); + + ceph_assert(m_state_builder == nullptr); + auto ctx = create_context_callback< + ImageReplayer, &ImageReplayer<I>::handle_bootstrap>(this); + auto request = image_replayer::BootstrapRequest<I>::create( + m_threads, m_local_io_ctx, m_remote_image_peer.io_ctx, m_instance_watcher, + m_global_image_id, m_local_mirror_uuid, + m_remote_image_peer.remote_pool_meta, m_cache_manager_handler, + m_pool_meta_cache, &m_progress_cxt, &m_state_builder, &m_resync_requested, + ctx); + + request->get(); + m_bootstrap_request = request; + + // proceed even if stop was requested to allow for m_delete_requested + // to get set; cancel() would prevent BootstrapRequest from going into + // image sync + if (m_stop_requested) { + request->cancel(); + } + locker.unlock(); + + update_mirror_image_status(false, boost::none); + request->send(); +} + +template <typename I> +void ImageReplayer<I>::handle_bootstrap(int r) { + dout(10) << "r=" << r << dendl; + { + std::lock_guard locker{m_lock}; + m_bootstrap_request->put(); + m_bootstrap_request = nullptr; + } + + // set m_delete_requested early to ensure that in case remote + // image no longer exists local image gets deleted even if start + // is interrupted + if (r == -ENOLINK) { + dout(5) << "remote image no longer exists" << dendl; + m_delete_requested = true; + } + + if (on_start_interrupted()) { + return; + } else if (r == -ENOMSG) { + dout(5) << "local image is primary" << dendl; + on_start_fail(0, "local image is primary"); + return; + } else if (r == -EREMOTEIO) { + dout(5) << "remote image is not primary" << dendl; + on_start_fail(-EREMOTEIO, "remote image is not primary"); + return; + } else if (r == -EEXIST) { + on_start_fail(r, "split-brain detected"); + return; + } else if (r == -ENOLINK) { + on_start_fail(0, "remote image no longer exists"); + return; + } else if (r == -ERESTART) { + on_start_fail(r, "image in transient state, try again"); + return; + } else if (r < 0) { + on_start_fail(r, "error bootstrapping replay"); + return; + } else if (m_resync_requested) { + on_start_fail(0, "resync requested"); + return; + } + + start_replay(); +} + +template <typename I> +void ImageReplayer<I>::start_replay() { + dout(10) << dendl; + + std::unique_lock locker{m_lock}; + ceph_assert(m_replayer == nullptr); + m_replayer = m_state_builder->create_replayer(m_threads, m_instance_watcher, + m_local_mirror_uuid, + m_pool_meta_cache, + m_replayer_listener); + + auto ctx = create_context_callback< + ImageReplayer<I>, &ImageReplayer<I>::handle_start_replay>(this); + m_replayer->init(ctx); +} + +template <typename I> +void ImageReplayer<I>::handle_start_replay(int r) { + dout(10) << "r=" << r << dendl; + + if (on_start_interrupted()) { + return; + } else if (r < 0) { + std::string error_description = m_replayer->get_error_description(); + if (r == -ENOTCONN && m_replayer->is_resync_requested()) { + std::unique_lock locker{m_lock}; + m_resync_requested = true; + } + + // shut down not required if init failed + m_replayer->destroy(); + m_replayer = nullptr; + + derr << "error starting replay: " << cpp_strerror(r) << dendl; + on_start_fail(r, error_description); + return; + } + + Context *on_finish = nullptr; + { + std::unique_lock locker{m_lock}; + ceph_assert(m_state == STATE_STARTING); + m_state = STATE_REPLAYING; + std::swap(m_on_start_finish, on_finish); + + std::unique_lock timer_locker{m_threads->timer_lock}; + schedule_update_mirror_image_replay_status(); + } + + update_mirror_image_status(true, boost::none); + if (on_replay_interrupted()) { + if (on_finish != nullptr) { + on_finish->complete(r); + } + return; + } + + dout(10) << "start succeeded" << dendl; + if (on_finish != nullptr) { + dout(10) << "on finish complete, r=" << r << dendl; + on_finish->complete(r); + } +} + +template <typename I> +void ImageReplayer<I>::on_start_fail(int r, const std::string &desc) +{ + dout(10) << "r=" << r << ", desc=" << desc << dendl; + Context *ctx = new LambdaContext([this, r, desc](int _r) { + { + std::lock_guard locker{m_lock}; + ceph_assert(m_state == STATE_STARTING); + m_state = STATE_STOPPING; + if (r < 0 && r != -ECANCELED && r != -EREMOTEIO && r != -ENOENT) { + derr << "start failed: " << cpp_strerror(r) << dendl; + } else { + dout(10) << "start canceled" << dendl; + } + } + + set_state_description(r, desc); + update_mirror_image_status(false, boost::none); + shut_down(r); + }); + m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +bool ImageReplayer<I>::on_start_interrupted() { + std::lock_guard locker{m_lock}; + return on_start_interrupted(m_lock); +} + +template <typename I> +bool ImageReplayer<I>::on_start_interrupted(ceph::mutex& lock) { + ceph_assert(ceph_mutex_is_locked(m_lock)); + ceph_assert(m_state == STATE_STARTING); + if (!m_stop_requested) { + return false; + } + + on_start_fail(-ECANCELED, ""); + return true; +} + +template <typename I> +void ImageReplayer<I>::stop(Context *on_finish, bool manual, bool restart) +{ + dout(10) << "on_finish=" << on_finish << ", manual=" << manual + << ", restart=" << restart << dendl; + + image_replayer::BootstrapRequest<I> *bootstrap_request = nullptr; + bool shut_down_replay = false; + bool is_stopped = false; + { + std::lock_guard locker{m_lock}; + + if (!is_running_()) { + if (manual && !m_manual_stop) { + dout(10) << "marking manual" << dendl; + m_manual_stop = true; + } + if (!restart && m_restart_requested) { + dout(10) << "canceling restart" << dendl; + m_restart_requested = false; + } + if (is_stopped_()) { + dout(10) << "already stopped" << dendl; + is_stopped = true; + } else { + dout(10) << "joining in-flight stop" << dendl; + if (on_finish != nullptr) { + m_on_stop_contexts.push_back(on_finish); + } + } + } else { + if (m_state == STATE_STARTING) { + dout(10) << "canceling start" << dendl; + if (m_bootstrap_request != nullptr) { + bootstrap_request = m_bootstrap_request; + bootstrap_request->get(); + } + } else { + dout(10) << "interrupting replay" << dendl; + shut_down_replay = true; + } + + ceph_assert(m_on_stop_contexts.empty()); + if (on_finish != nullptr) { + m_on_stop_contexts.push_back(on_finish); + } + m_stop_requested = true; + m_manual_stop = manual; + } + } + + if (is_stopped) { + if (on_finish) { + on_finish->complete(-EINVAL); + } + return; + } + + // avoid holding lock since bootstrap request will update status + if (bootstrap_request != nullptr) { + dout(10) << "canceling bootstrap" << dendl; + bootstrap_request->cancel(); + bootstrap_request->put(); + } + + if (shut_down_replay) { + on_stop_journal_replay(); + } +} + +template <typename I> +void ImageReplayer<I>::on_stop_journal_replay(int r, const std::string &desc) +{ + dout(10) << dendl; + + { + std::lock_guard locker{m_lock}; + if (m_state != STATE_REPLAYING) { + // might be invoked multiple times while stopping + return; + } + + m_stop_requested = true; + m_state = STATE_STOPPING; + } + + cancel_update_mirror_image_replay_status(); + set_state_description(r, desc); + update_mirror_image_status(true, boost::none); + shut_down(0); +} + +template <typename I> +void ImageReplayer<I>::restart(Context *on_finish) +{ + { + std::lock_guard locker{m_lock}; + m_restart_requested = true; + } + + auto ctx = new LambdaContext( + [this, on_finish](int r) { + if (r < 0) { + // Try start anyway. + } + start(on_finish, true, true); + }); + stop(ctx, false, true); +} + +template <typename I> +void ImageReplayer<I>::flush() +{ + C_SaferCond ctx; + + { + std::unique_lock locker{m_lock}; + if (m_state != STATE_REPLAYING) { + return; + } + + dout(10) << dendl; + ceph_assert(m_replayer != nullptr); + m_replayer->flush(&ctx); + } + + int r = ctx.wait(); + if (r >= 0) { + update_mirror_image_status(false, boost::none); + } +} + +template <typename I> +bool ImageReplayer<I>::on_replay_interrupted() +{ + bool shut_down; + { + std::lock_guard locker{m_lock}; + shut_down = m_stop_requested; + } + + if (shut_down) { + on_stop_journal_replay(); + } + return shut_down; +} + +template <typename I> +void ImageReplayer<I>::print_status(Formatter *f) +{ + dout(10) << dendl; + + std::lock_guard l{m_lock}; + + f->open_object_section("image_replayer"); + f->dump_string("name", m_image_spec); + f->dump_string("state", to_string(m_state)); + f->close_section(); +} + +template <typename I> +void ImageReplayer<I>::schedule_update_mirror_image_replay_status() { + ceph_assert(ceph_mutex_is_locked_by_me(m_lock)); + ceph_assert(ceph_mutex_is_locked_by_me(m_threads->timer_lock)); + if (m_state != STATE_REPLAYING) { + return; + } + + dout(10) << dendl; + + // periodically update the replaying status even if nothing changes + // so that we can adjust our performance stats + ceph_assert(m_update_status_task == nullptr); + m_update_status_task = create_context_callback< + ImageReplayer<I>, + &ImageReplayer<I>::handle_update_mirror_image_replay_status>(this); + m_threads->timer->add_event_after(10, m_update_status_task); +} + +template <typename I> +void ImageReplayer<I>::handle_update_mirror_image_replay_status(int r) { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked_by_me(m_threads->timer_lock)); + + ceph_assert(m_update_status_task != nullptr); + m_update_status_task = nullptr; + + auto ctx = new LambdaContext([this](int) { + update_mirror_image_status(false, boost::none); + + std::unique_lock locker{m_lock}; + std::unique_lock timer_locker{m_threads->timer_lock}; + + schedule_update_mirror_image_replay_status(); + m_in_flight_op_tracker.finish_op(); + }); + + m_in_flight_op_tracker.start_op(); + m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +void ImageReplayer<I>::cancel_update_mirror_image_replay_status() { + std::unique_lock timer_locker{m_threads->timer_lock}; + if (m_update_status_task != nullptr) { + dout(10) << dendl; + + if (m_threads->timer->cancel_event(m_update_status_task)) { + m_update_status_task = nullptr; + } + } +} + +template <typename I> +void ImageReplayer<I>::update_mirror_image_status( + bool force, const OptionalState &opt_state) { + dout(15) << "force=" << force << ", " + << "state=" << opt_state << dendl; + + { + std::lock_guard locker{m_lock}; + if (!force && !is_stopped_() && !is_running_()) { + dout(15) << "shut down in-progress: ignoring update" << dendl; + return; + } + } + + m_in_flight_op_tracker.start_op(); + auto ctx = new LambdaContext( + [this, force, opt_state](int r) { + set_mirror_image_status_update(force, opt_state); + }); + m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +void ImageReplayer<I>::set_mirror_image_status_update( + bool force, const OptionalState &opt_state) { + dout(15) << "force=" << force << ", " + << "state=" << opt_state << dendl; + + reregister_admin_socket_hook(); + + State state; + std::string state_desc; + int last_r; + bool stopping_replay; + + auto mirror_image_status_state = boost::make_optional( + false, cls::rbd::MIRROR_IMAGE_STATUS_STATE_UNKNOWN); + image_replayer::BootstrapRequest<I>* bootstrap_request = nullptr; + { + std::lock_guard locker{m_lock}; + state = m_state; + state_desc = m_state_desc; + mirror_image_status_state = m_mirror_image_status_state; + last_r = m_last_r; + stopping_replay = (m_replayer != nullptr); + + if (m_bootstrap_request != nullptr) { + bootstrap_request = m_bootstrap_request; + bootstrap_request->get(); + } + } + + bool syncing = false; + if (bootstrap_request != nullptr) { + syncing = bootstrap_request->is_syncing(); + bootstrap_request->put(); + bootstrap_request = nullptr; + } + + if (opt_state) { + state = *opt_state; + } + + cls::rbd::MirrorImageSiteStatus status; + status.up = true; + switch (state) { + case STATE_STARTING: + if (syncing) { + status.state = cls::rbd::MIRROR_IMAGE_STATUS_STATE_SYNCING; + status.description = state_desc.empty() ? "syncing" : state_desc; + mirror_image_status_state = status.state; + } else { + status.state = cls::rbd::MIRROR_IMAGE_STATUS_STATE_STARTING_REPLAY; + status.description = "starting replay"; + } + break; + case STATE_REPLAYING: + status.state = cls::rbd::MIRROR_IMAGE_STATUS_STATE_REPLAYING; + { + std::string desc; + auto on_req_finish = new LambdaContext( + [this, force](int r) { + dout(15) << "replay status ready: r=" << r << dendl; + if (r >= 0) { + set_mirror_image_status_update(force, boost::none); + } else if (r == -EAGAIN) { + m_in_flight_op_tracker.finish_op(); + } + }); + + ceph_assert(m_replayer != nullptr); + if (!m_replayer->get_replay_status(&desc, on_req_finish)) { + dout(15) << "waiting for replay status" << dendl; + return; + } + + status.description = "replaying, " + desc; + mirror_image_status_state = boost::make_optional( + false, cls::rbd::MIRROR_IMAGE_STATUS_STATE_UNKNOWN); + } + break; + case STATE_STOPPING: + if (stopping_replay) { + status.state = cls::rbd::MIRROR_IMAGE_STATUS_STATE_STOPPING_REPLAY; + status.description = state_desc.empty() ? "stopping replay" : state_desc; + break; + } + // FALLTHROUGH + case STATE_STOPPED: + if (last_r == -EREMOTEIO) { + status.state = cls::rbd::MIRROR_IMAGE_STATUS_STATE_UNKNOWN; + status.description = state_desc; + mirror_image_status_state = status.state; + } else if (last_r < 0 && last_r != -ECANCELED) { + status.state = cls::rbd::MIRROR_IMAGE_STATUS_STATE_ERROR; + status.description = state_desc; + mirror_image_status_state = status.state; + } else { + status.state = cls::rbd::MIRROR_IMAGE_STATUS_STATE_STOPPED; + status.description = state_desc.empty() ? "stopped" : state_desc; + mirror_image_status_state = boost::none; + } + break; + default: + ceph_assert(!"invalid state"); + } + + { + std::lock_guard locker{m_lock}; + m_mirror_image_status_state = mirror_image_status_state; + } + + // prevent the status from ping-ponging when failed replays are restarted + if (mirror_image_status_state && + *mirror_image_status_state == cls::rbd::MIRROR_IMAGE_STATUS_STATE_ERROR) { + status.state = *mirror_image_status_state; + } + + dout(15) << "status=" << status << dendl; + m_local_status_updater->set_mirror_image_status(m_global_image_id, status, + force); + if (m_remote_image_peer.mirror_status_updater != nullptr) { + m_remote_image_peer.mirror_status_updater->set_mirror_image_status( + m_global_image_id, status, force); + } + + m_in_flight_op_tracker.finish_op(); +} + +template <typename I> +void ImageReplayer<I>::shut_down(int r) { + dout(10) << "r=" << r << dendl; + + { + std::lock_guard locker{m_lock}; + ceph_assert(m_state == STATE_STOPPING); + } + + if (!m_in_flight_op_tracker.empty()) { + dout(15) << "waiting for in-flight operations to complete" << dendl; + m_in_flight_op_tracker.wait_for_ops(new LambdaContext([this, r](int) { + shut_down(r); + })); + return; + } + + // chain the shut down sequence (reverse order) + Context *ctx = new LambdaContext( + [this, r](int _r) { + update_mirror_image_status(true, STATE_STOPPED); + handle_shut_down(r); + }); + + // destruct the state builder + if (m_state_builder != nullptr) { + ctx = new LambdaContext([this, ctx](int r) { + m_state_builder->close(ctx); + }); + } + + // close the replayer + if (m_replayer != nullptr) { + ctx = new LambdaContext([this, ctx](int r) { + m_replayer->destroy(); + m_replayer = nullptr; + ctx->complete(0); + }); + ctx = new LambdaContext([this, ctx](int r) { + m_replayer->shut_down(ctx); + }); + } + + m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +void ImageReplayer<I>::handle_shut_down(int r) { + bool resync_requested = false; + bool delete_requested = false; + bool unregister_asok_hook = false; + { + std::lock_guard locker{m_lock}; + + if (m_delete_requested && m_state_builder != nullptr && + !m_state_builder->local_image_id.empty()) { + ceph_assert(m_state_builder->remote_image_id.empty()); + dout(0) << "remote image no longer exists: scheduling deletion" << dendl; + unregister_asok_hook = true; + std::swap(delete_requested, m_delete_requested); + m_delete_in_progress = true; + } + + std::swap(resync_requested, m_resync_requested); + if (!delete_requested && !resync_requested && m_last_r == -ENOENT && + ((m_state_builder == nullptr) || + (m_state_builder->local_image_id.empty() && + m_state_builder->remote_image_id.empty()))) { + dout(0) << "mirror image no longer exists" << dendl; + unregister_asok_hook = true; + m_finished = true; + } + } + + if (unregister_asok_hook) { + unregister_admin_socket_hook(); + } + + if (delete_requested || resync_requested) { + dout(5) << "moving image to trash" << dendl; + auto ctx = new LambdaContext([this, r](int) { + handle_shut_down(r); + }); + ImageDeleter<I>::trash_move(m_local_io_ctx, m_global_image_id, + resync_requested, m_threads->work_queue, ctx); + return; + } + + if (!m_in_flight_op_tracker.empty()) { + dout(15) << "waiting for in-flight operations to complete" << dendl; + m_in_flight_op_tracker.wait_for_ops(new LambdaContext([this, r](int) { + handle_shut_down(r); + })); + return; + } + + if (!m_status_removed) { + auto ctx = new LambdaContext([this, r](int) { + m_status_removed = true; + handle_shut_down(r); + }); + remove_image_status(m_delete_in_progress, ctx); + return; + } + + if (m_state_builder != nullptr) { + m_state_builder->destroy(); + m_state_builder = nullptr; + } + + dout(10) << "stop complete" << dendl; + Context *on_start = nullptr; + Contexts on_stop_contexts; + { + std::lock_guard locker{m_lock}; + std::swap(on_start, m_on_start_finish); + on_stop_contexts = std::move(m_on_stop_contexts); + m_stop_requested = false; + ceph_assert(m_state == STATE_STOPPING); + m_state = STATE_STOPPED; + } + + if (on_start != nullptr) { + dout(10) << "on start finish complete, r=" << r << dendl; + on_start->complete(r); + r = 0; + } + for (auto ctx : on_stop_contexts) { + dout(10) << "on stop finish " << ctx << " complete, r=" << r << dendl; + ctx->complete(r); + } +} + +template <typename I> +void ImageReplayer<I>::handle_replayer_notification() { + dout(10) << dendl; + + std::unique_lock locker{m_lock}; + if (m_state != STATE_REPLAYING) { + // might be attempting to shut down + return; + } + + { + // detect a rename of the local image + ceph_assert(m_state_builder != nullptr && + m_state_builder->local_image_ctx != nullptr); + std::shared_lock image_locker{m_state_builder->local_image_ctx->image_lock}; + if (m_local_image_name != m_state_builder->local_image_ctx->name) { + // will re-register with new name after next status update + dout(10) << "image renamed" << dendl; + m_local_image_name = m_state_builder->local_image_ctx->name; + } + } + + // replayer cannot be shut down while notification is in-flight + ceph_assert(m_replayer != nullptr); + locker.unlock(); + + if (m_replayer->is_resync_requested()) { + dout(10) << "resync requested" << dendl; + m_resync_requested = true; + on_stop_journal_replay(0, "resync requested"); + return; + } + + if (!m_replayer->is_replaying()) { + auto error_code = m_replayer->get_error_code(); + auto error_description = m_replayer->get_error_description(); + dout(10) << "replay interrupted: " + << "r=" << error_code << ", " + << "error=" << error_description << dendl; + on_stop_journal_replay(error_code, error_description); + return; + } + + update_mirror_image_status(false, {}); +} + +template <typename I> +std::string ImageReplayer<I>::to_string(const State state) { + switch (state) { + case ImageReplayer<I>::STATE_STARTING: + return "Starting"; + case ImageReplayer<I>::STATE_REPLAYING: + return "Replaying"; + case ImageReplayer<I>::STATE_STOPPING: + return "Stopping"; + case ImageReplayer<I>::STATE_STOPPED: + return "Stopped"; + default: + break; + } + return "Unknown(" + stringify(state) + ")"; +} + +template <typename I> +void ImageReplayer<I>::register_admin_socket_hook() { + ImageReplayerAdminSocketHook<I> *asok_hook; + { + std::lock_guard locker{m_lock}; + if (m_asok_hook != nullptr) { + return; + } + + dout(15) << "registered asok hook: " << m_image_spec << dendl; + asok_hook = new ImageReplayerAdminSocketHook<I>( + g_ceph_context, m_image_spec, this); + int r = asok_hook->register_commands(); + if (r == 0) { + m_asok_hook = asok_hook; + return; + } + derr << "error registering admin socket commands" << dendl; + } + delete asok_hook; +} + +template <typename I> +void ImageReplayer<I>::unregister_admin_socket_hook() { + dout(15) << dendl; + + AdminSocketHook *asok_hook = nullptr; + { + std::lock_guard locker{m_lock}; + std::swap(asok_hook, m_asok_hook); + } + delete asok_hook; +} + +template <typename I> +void ImageReplayer<I>::reregister_admin_socket_hook() { + std::unique_lock locker{m_lock}; + if (m_state == STATE_STARTING && m_bootstrap_request != nullptr) { + m_local_image_name = m_bootstrap_request->get_local_image_name(); + } + + auto image_spec = image_replayer::util::compute_image_spec( + m_local_io_ctx, m_local_image_name); + if (m_asok_hook != nullptr && m_image_spec == image_spec) { + return; + } + + dout(15) << "old_image_spec=" << m_image_spec << ", " + << "new_image_spec=" << image_spec << dendl; + m_image_spec = image_spec; + + if (m_state == STATE_STOPPING || m_state == STATE_STOPPED) { + // no need to re-register if stopping + return; + } + locker.unlock(); + + unregister_admin_socket_hook(); + register_admin_socket_hook(); +} + +template <typename I> +void ImageReplayer<I>::remove_image_status(bool force, Context *on_finish) +{ + auto ctx = new LambdaContext([this, force, on_finish](int) { + remove_image_status_remote(force, on_finish); + }); + + if (m_local_status_updater->exists(m_global_image_id)) { + dout(15) << "removing local mirror image status" << dendl; + if (force) { + m_local_status_updater->remove_mirror_image_status( + m_global_image_id, true, ctx); + } else { + m_local_status_updater->remove_refresh_mirror_image_status( + m_global_image_id, ctx); + } + return; + } + + ctx->complete(0); +} + +template <typename I> +void ImageReplayer<I>::remove_image_status_remote(bool force, Context *on_finish) +{ + if (m_remote_image_peer.mirror_status_updater != nullptr && + m_remote_image_peer.mirror_status_updater->exists(m_global_image_id)) { + dout(15) << "removing remote mirror image status" << dendl; + if (force) { + m_remote_image_peer.mirror_status_updater->remove_mirror_image_status( + m_global_image_id, true, on_finish); + } else { + m_remote_image_peer.mirror_status_updater->remove_refresh_mirror_image_status( + m_global_image_id, on_finish); + } + return; + } + if (on_finish) { + on_finish->complete(0); + } +} + +template <typename I> +std::ostream &operator<<(std::ostream &os, const ImageReplayer<I> &replayer) +{ + os << "ImageReplayer: " << &replayer << " [" << replayer.get_local_pool_id() + << "/" << replayer.get_global_image_id() << "]"; + return os; +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::ImageReplayer<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/ImageReplayer.h b/src/tools/rbd_mirror/ImageReplayer.h new file mode 100644 index 000000000..432fdf225 --- /dev/null +++ b/src/tools/rbd_mirror/ImageReplayer.h @@ -0,0 +1,273 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_REPLAYER_H +#define CEPH_RBD_MIRROR_IMAGE_REPLAYER_H + +#include "common/AsyncOpTracker.h" +#include "common/ceph_mutex.h" +#include "include/rados/librados.hpp" +#include "cls/rbd/cls_rbd_types.h" +#include "ProgressContext.h" +#include "tools/rbd_mirror/Types.h" +#include "tools/rbd_mirror/image_replayer/Types.h" +#include <boost/optional.hpp> +#include <string> + +class AdminSocketHook; + +namespace journal { struct CacheManagerHandler; } +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { + +template <typename> struct InstanceWatcher; +template <typename> struct MirrorStatusUpdater; +struct PoolMetaCache; +template <typename> struct Threads; + +namespace image_replayer { + +class Replayer; +template <typename> class BootstrapRequest; +template <typename> class StateBuilder; + +} // namespace image_replayer + +/** + * Replays changes from a remote cluster for a single image. + */ +template <typename ImageCtxT = librbd::ImageCtx> +class ImageReplayer { +public: + static ImageReplayer *create( + librados::IoCtx &local_io_ctx, const std::string &local_mirror_uuid, + const std::string &global_image_id, Threads<ImageCtxT> *threads, + InstanceWatcher<ImageCtxT> *instance_watcher, + MirrorStatusUpdater<ImageCtxT>* local_status_updater, + journal::CacheManagerHandler *cache_manager_handler, + PoolMetaCache* pool_meta_cache) { + return new ImageReplayer(local_io_ctx, local_mirror_uuid, global_image_id, + threads, instance_watcher, local_status_updater, + cache_manager_handler, pool_meta_cache); + } + void destroy() { + delete this; + } + + ImageReplayer(librados::IoCtx &local_io_ctx, + const std::string &local_mirror_uuid, + const std::string &global_image_id, + Threads<ImageCtxT> *threads, + InstanceWatcher<ImageCtxT> *instance_watcher, + MirrorStatusUpdater<ImageCtxT>* local_status_updater, + journal::CacheManagerHandler *cache_manager_handler, + PoolMetaCache* pool_meta_cache); + virtual ~ImageReplayer(); + ImageReplayer(const ImageReplayer&) = delete; + ImageReplayer& operator=(const ImageReplayer&) = delete; + + bool is_stopped() { std::lock_guard l{m_lock}; return is_stopped_(); } + bool is_running() { std::lock_guard l{m_lock}; return is_running_(); } + bool is_replaying() { std::lock_guard l{m_lock}; return is_replaying_(); } + + std::string get_name() { std::lock_guard l{m_lock}; return m_image_spec; }; + void set_state_description(int r, const std::string &desc); + + // TODO temporary until policy handles release of image replayers + inline bool is_finished() const { + std::lock_guard locker{m_lock}; + return m_finished; + } + inline void set_finished(bool finished) { + std::lock_guard locker{m_lock}; + m_finished = finished; + } + + inline bool is_blocklisted() const { + std::lock_guard locker{m_lock}; + return (m_last_r == -EBLOCKLISTED); + } + + image_replayer::HealthState get_health_state() const; + + void add_peer(const Peer<ImageCtxT>& peer); + + inline int64_t get_local_pool_id() const { + return m_local_io_ctx.get_id(); + } + inline const std::string& get_global_image_id() const { + return m_global_image_id; + } + + void start(Context *on_finish, bool manual = false, bool restart = false); + void stop(Context *on_finish, bool manual = false, bool restart = false); + void restart(Context *on_finish = nullptr); + void flush(); + + void print_status(Formatter *f); + +protected: + /** + * @verbatim + * (error) + * <uninitialized> <------------------------------------ FAIL + * | ^ + * v * + * <starting> * + * | * + * v (error) * + * BOOTSTRAP_IMAGE * * * * * * * * * * * * * * * * * * * * + * | * + * v (error) * + * START_REPLAY * * * * * * * * * * * * * * * * * * * * * * + * | + * v + * REPLAYING + * | + * v + * JOURNAL_REPLAY_SHUT_DOWN + * | + * v + * LOCAL_IMAGE_CLOSE + * | + * v + * <stopped> + * + * @endverbatim + */ + + void on_start_fail(int r, const std::string &desc); + bool on_start_interrupted(); + bool on_start_interrupted(ceph::mutex& lock); + + void on_stop_journal_replay(int r = 0, const std::string &desc = ""); + + bool on_replay_interrupted(); + +private: + typedef std::set<Peer<ImageCtxT>> Peers; + typedef std::list<Context *> Contexts; + + enum State { + STATE_UNKNOWN, + STATE_STARTING, + STATE_REPLAYING, + STATE_STOPPING, + STATE_STOPPED, + }; + + struct ReplayerListener; + + typedef boost::optional<State> OptionalState; + typedef boost::optional<cls::rbd::MirrorImageStatusState> + OptionalMirrorImageStatusState; + + class BootstrapProgressContext : public ProgressContext { + public: + BootstrapProgressContext(ImageReplayer<ImageCtxT> *replayer) : + replayer(replayer) { + } + + void update_progress(const std::string &description, + bool flush = true) override; + + private: + ImageReplayer<ImageCtxT> *replayer; + }; + + librados::IoCtx &m_local_io_ctx; + std::string m_local_mirror_uuid; + std::string m_global_image_id; + Threads<ImageCtxT> *m_threads; + InstanceWatcher<ImageCtxT> *m_instance_watcher; + MirrorStatusUpdater<ImageCtxT>* m_local_status_updater; + journal::CacheManagerHandler *m_cache_manager_handler; + PoolMetaCache* m_pool_meta_cache; + + Peers m_peers; + Peer<ImageCtxT> m_remote_image_peer; + + std::string m_local_image_name; + std::string m_image_spec; + + mutable ceph::mutex m_lock; + State m_state = STATE_STOPPED; + std::string m_state_desc; + + OptionalMirrorImageStatusState m_mirror_image_status_state = + boost::make_optional(false, cls::rbd::MIRROR_IMAGE_STATUS_STATE_UNKNOWN); + int m_last_r = 0; + + BootstrapProgressContext m_progress_cxt; + + bool m_finished = false; + bool m_delete_in_progress = false; + bool m_delete_requested = false; + bool m_resync_requested = false; + bool m_restart_requested = false; + + bool m_status_removed = false; + + image_replayer::StateBuilder<ImageCtxT>* m_state_builder = nullptr; + image_replayer::Replayer* m_replayer = nullptr; + ReplayerListener* m_replayer_listener = nullptr; + + Context *m_on_start_finish = nullptr; + Contexts m_on_stop_contexts; + bool m_stop_requested = false; + bool m_manual_stop = false; + + AdminSocketHook *m_asok_hook = nullptr; + + image_replayer::BootstrapRequest<ImageCtxT> *m_bootstrap_request = nullptr; + + AsyncOpTracker m_in_flight_op_tracker; + + Context* m_update_status_task = nullptr; + + static std::string to_string(const State state); + + bool is_stopped_() const { + return m_state == STATE_STOPPED; + } + bool is_running_() const { + return !is_stopped_() && m_state != STATE_STOPPING && !m_stop_requested; + } + bool is_replaying_() const { + return (m_state == STATE_REPLAYING); + } + + void schedule_update_mirror_image_replay_status(); + void handle_update_mirror_image_replay_status(int r); + void cancel_update_mirror_image_replay_status(); + + void update_mirror_image_status(bool force, const OptionalState &state); + void set_mirror_image_status_update(bool force, const OptionalState &state); + + void shut_down(int r); + void handle_shut_down(int r); + + void bootstrap(); + void handle_bootstrap(int r); + + void start_replay(); + void handle_start_replay(int r); + + void handle_replayer_notification(); + + void register_admin_socket_hook(); + void unregister_admin_socket_hook(); + void reregister_admin_socket_hook(); + void remove_image_status(bool force, Context *on_finish); + void remove_image_status_remote(bool force, Context *on_finish); + +}; + +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::ImageReplayer<librbd::ImageCtx>; + +#endif // CEPH_RBD_MIRROR_IMAGE_REPLAYER_H diff --git a/src/tools/rbd_mirror/ImageSync.cc b/src/tools/rbd_mirror/ImageSync.cc new file mode 100644 index 000000000..43d0c6663 --- /dev/null +++ b/src/tools/rbd_mirror/ImageSync.cc @@ -0,0 +1,469 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "ImageSync.h" +#include "InstanceWatcher.h" +#include "ProgressContext.h" +#include "common/debug.h" +#include "common/Timer.h" +#include "common/errno.h" +#include "librbd/DeepCopyRequest.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/Utils.h" +#include "librbd/internal.h" +#include "librbd/asio/ContextWQ.h" +#include "librbd/deep_copy/Handler.h" +#include "tools/rbd_mirror/Threads.h" +#include "tools/rbd_mirror/image_sync/SyncPointCreateRequest.h" +#include "tools/rbd_mirror/image_sync/SyncPointPruneRequest.h" +#include "tools/rbd_mirror/image_sync/Types.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::ImageSync: " \ + << this << " " << __func__ + +namespace rbd { +namespace mirror { + +using namespace image_sync; +using librbd::util::create_async_context_callback; +using librbd::util::create_context_callback; +using librbd::util::unique_lock_name; + +template <typename I> +class ImageSync<I>::ImageCopyProgressHandler + : public librbd::deep_copy::NoOpHandler { +public: + ImageCopyProgressHandler(ImageSync *image_sync) : image_sync(image_sync) { + } + + int update_progress(uint64_t object_no, uint64_t object_count) override { + image_sync->handle_copy_image_update_progress(object_no, object_count); + return 0; + } + + ImageSync *image_sync; +}; + +template <typename I> +ImageSync<I>::ImageSync( + Threads<I>* threads, + I *local_image_ctx, + I *remote_image_ctx, + const std::string &local_mirror_uuid, + image_sync::SyncPointHandler* sync_point_handler, + InstanceWatcher<I> *instance_watcher, + ProgressContext *progress_ctx, + Context *on_finish) + : CancelableRequest("rbd::mirror::ImageSync", local_image_ctx->cct, + on_finish), + m_threads(threads), + m_local_image_ctx(local_image_ctx), + m_remote_image_ctx(remote_image_ctx), + m_local_mirror_uuid(local_mirror_uuid), + m_sync_point_handler(sync_point_handler), + m_instance_watcher(instance_watcher), + m_progress_ctx(progress_ctx), + m_lock(ceph::make_mutex(unique_lock_name("ImageSync::m_lock", this))), + m_update_sync_point_interval( + m_local_image_ctx->cct->_conf.template get_val<double>( + "rbd_mirror_sync_point_update_age")) { +} + +template <typename I> +ImageSync<I>::~ImageSync() { + ceph_assert(m_image_copy_request == nullptr); + ceph_assert(m_image_copy_prog_handler == nullptr); + ceph_assert(m_update_sync_ctx == nullptr); +} + +template <typename I> +void ImageSync<I>::send() { + send_notify_sync_request(); +} + +template <typename I> +void ImageSync<I>::cancel() { + std::lock_guard locker{m_lock}; + + dout(10) << dendl; + + m_canceled = true; + + if (m_instance_watcher->cancel_sync_request(m_local_image_ctx->id)) { + return; + } + + if (m_image_copy_request != nullptr) { + m_image_copy_request->cancel(); + } +} + +template <typename I> +void ImageSync<I>::send_notify_sync_request() { + update_progress("NOTIFY_SYNC_REQUEST"); + + dout(10) << dendl; + + m_lock.lock(); + if (m_canceled) { + m_lock.unlock(); + CancelableRequest::finish(-ECANCELED); + return; + } + + Context *ctx = create_async_context_callback( + m_threads->work_queue, create_context_callback< + ImageSync<I>, &ImageSync<I>::handle_notify_sync_request>(this)); + m_instance_watcher->notify_sync_request(m_local_image_ctx->id, ctx); + m_lock.unlock(); +} + +template <typename I> +void ImageSync<I>::handle_notify_sync_request(int r) { + dout(10) << ": r=" << r << dendl; + + m_lock.lock(); + if (r == 0 && m_canceled) { + r = -ECANCELED; + } + m_lock.unlock(); + + if (r < 0) { + CancelableRequest::finish(r); + return; + } + + send_prune_catch_up_sync_point(); +} + +template <typename I> +void ImageSync<I>::send_prune_catch_up_sync_point() { + update_progress("PRUNE_CATCH_UP_SYNC_POINT"); + + if (m_sync_point_handler->get_sync_points().empty()) { + send_create_sync_point(); + return; + } + + dout(10) << dendl; + + // prune will remove sync points with missing snapshots and + // ensure we have a maximum of one sync point (in case we + // restarted) + Context *ctx = create_context_callback< + ImageSync<I>, &ImageSync<I>::handle_prune_catch_up_sync_point>(this); + SyncPointPruneRequest<I> *request = SyncPointPruneRequest<I>::create( + m_remote_image_ctx, false, m_sync_point_handler, ctx); + request->send(); +} + +template <typename I> +void ImageSync<I>::handle_prune_catch_up_sync_point(int r) { + dout(10) << ": r=" << r << dendl; + + if (r < 0) { + derr << ": failed to prune catch-up sync point: " + << cpp_strerror(r) << dendl; + finish(r); + return; + } + + send_create_sync_point(); +} + +template <typename I> +void ImageSync<I>::send_create_sync_point() { + update_progress("CREATE_SYNC_POINT"); + + // TODO: when support for disconnecting laggy clients is added, + // re-connect and create catch-up sync point + if (!m_sync_point_handler->get_sync_points().empty()) { + send_copy_image(); + return; + } + + dout(10) << dendl; + + Context *ctx = create_context_callback< + ImageSync<I>, &ImageSync<I>::handle_create_sync_point>(this); + SyncPointCreateRequest<I> *request = SyncPointCreateRequest<I>::create( + m_remote_image_ctx, m_local_mirror_uuid, m_sync_point_handler, ctx); + request->send(); +} + +template <typename I> +void ImageSync<I>::handle_create_sync_point(int r) { + dout(10) << ": r=" << r << dendl; + + if (r < 0) { + derr << ": failed to create sync point: " << cpp_strerror(r) + << dendl; + finish(r); + return; + } + + send_copy_image(); +} + +template <typename I> +void ImageSync<I>::send_copy_image() { + librados::snap_t snap_id_start = 0; + librados::snap_t snap_id_end; + librbd::deep_copy::ObjectNumber object_number; + int r = 0; + + m_snap_seqs_copy = m_sync_point_handler->get_snap_seqs(); + m_sync_points_copy = m_sync_point_handler->get_sync_points(); + ceph_assert(!m_sync_points_copy.empty()); + auto &sync_point = m_sync_points_copy.front(); + + { + std::shared_lock image_locker{m_remote_image_ctx->image_lock}; + snap_id_end = m_remote_image_ctx->get_snap_id( + cls::rbd::UserSnapshotNamespace(), sync_point.snap_name); + if (snap_id_end == CEPH_NOSNAP) { + derr << ": failed to locate snapshot: " << sync_point.snap_name << dendl; + r = -ENOENT; + } else if (!sync_point.from_snap_name.empty()) { + snap_id_start = m_remote_image_ctx->get_snap_id( + cls::rbd::UserSnapshotNamespace(), sync_point.from_snap_name); + if (snap_id_start == CEPH_NOSNAP) { + derr << ": failed to locate from snapshot: " + << sync_point.from_snap_name << dendl; + r = -ENOENT; + } + } + object_number = sync_point.object_number; + } + if (r < 0) { + finish(r); + return; + } + + m_lock.lock(); + if (m_canceled) { + m_lock.unlock(); + finish(-ECANCELED); + return; + } + + dout(10) << dendl; + + Context *ctx = create_context_callback< + ImageSync<I>, &ImageSync<I>::handle_copy_image>(this); + m_image_copy_prog_handler = new ImageCopyProgressHandler(this); + m_image_copy_request = librbd::DeepCopyRequest<I>::create( + m_remote_image_ctx, m_local_image_ctx, snap_id_start, snap_id_end, + 0, false, object_number, m_threads->work_queue, &m_snap_seqs_copy, + m_image_copy_prog_handler, ctx); + m_image_copy_request->get(); + m_lock.unlock(); + + update_progress("COPY_IMAGE"); + + m_image_copy_request->send(); +} + +template <typename I> +void ImageSync<I>::handle_copy_image(int r) { + dout(10) << ": r=" << r << dendl; + + { + std::scoped_lock locker{m_threads->timer_lock, m_lock}; + m_image_copy_request->put(); + m_image_copy_request = nullptr; + delete m_image_copy_prog_handler; + m_image_copy_prog_handler = nullptr; + if (r == 0 && m_canceled) { + r = -ECANCELED; + } + + if (m_update_sync_ctx != nullptr) { + m_threads->timer->cancel_event(m_update_sync_ctx); + m_update_sync_ctx = nullptr; + } + + if (m_updating_sync_point) { + m_ret_val = r; + return; + } + } + + if (r == -ECANCELED) { + dout(10) << ": image copy canceled" << dendl; + finish(r); + return; + } else if (r < 0) { + derr << ": failed to copy image: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + send_flush_sync_point(); +} + +template <typename I> +void ImageSync<I>::handle_copy_image_update_progress(uint64_t object_no, + uint64_t object_count) { + int percent = 100 * object_no / object_count; + update_progress("COPY_IMAGE " + stringify(percent) + "%"); + + std::lock_guard locker{m_lock}; + m_image_copy_object_no = object_no; + m_image_copy_object_count = object_count; + + if (m_update_sync_ctx == nullptr && !m_updating_sync_point) { + send_update_sync_point(); + } +} + +template <typename I> +void ImageSync<I>::send_update_sync_point() { + ceph_assert(ceph_mutex_is_locked(m_lock)); + + m_update_sync_ctx = nullptr; + + if (m_canceled) { + return; + } + + ceph_assert(!m_sync_points_copy.empty()); + auto sync_point = &m_sync_points_copy.front(); + + if (sync_point->object_number && + (m_image_copy_object_no - 1) == sync_point->object_number.get()) { + // update sync point did not progress since last sync + return; + } + + m_updating_sync_point = true; + + if (m_image_copy_object_no > 0) { + sync_point->object_number = m_image_copy_object_no - 1; + } + + auto ctx = create_context_callback< + ImageSync<I>, &ImageSync<I>::handle_update_sync_point>(this); + m_sync_point_handler->update_sync_points(m_snap_seqs_copy, + m_sync_points_copy, false, ctx); +} + +template <typename I> +void ImageSync<I>::handle_update_sync_point(int r) { + CephContext *cct = m_local_image_ctx->cct; + ldout(cct, 20) << ": r=" << r << dendl; + + { + std::scoped_lock locker{m_threads->timer_lock, m_lock}; + m_updating_sync_point = false; + + if (m_image_copy_request != nullptr) { + m_update_sync_ctx = new LambdaContext( + [this](int r) { + std::lock_guard locker{m_lock}; + this->send_update_sync_point(); + }); + m_threads->timer->add_event_after( + m_update_sync_point_interval, m_update_sync_ctx); + return; + } + } + + send_flush_sync_point(); +} + +template <typename I> +void ImageSync<I>::send_flush_sync_point() { + if (m_ret_val < 0) { + finish(m_ret_val); + return; + } + + update_progress("FLUSH_SYNC_POINT"); + + ceph_assert(!m_sync_points_copy.empty()); + auto sync_point = &m_sync_points_copy.front(); + + if (m_image_copy_object_no > 0) { + sync_point->object_number = m_image_copy_object_no - 1; + } else { + sync_point->object_number = boost::none; + } + + auto ctx = create_context_callback< + ImageSync<I>, &ImageSync<I>::handle_flush_sync_point>(this); + m_sync_point_handler->update_sync_points(m_snap_seqs_copy, + m_sync_points_copy, false, ctx); +} + +template <typename I> +void ImageSync<I>::handle_flush_sync_point(int r) { + dout(10) << ": r=" << r << dendl; + + if (r < 0) { + derr << ": failed to update client data: " << cpp_strerror(r) + << dendl; + finish(r); + return; + } + + send_prune_sync_points(); +} + +template <typename I> +void ImageSync<I>::send_prune_sync_points() { + dout(10) << dendl; + + update_progress("PRUNE_SYNC_POINTS"); + + Context *ctx = create_context_callback< + ImageSync<I>, &ImageSync<I>::handle_prune_sync_points>(this); + SyncPointPruneRequest<I> *request = SyncPointPruneRequest<I>::create( + m_remote_image_ctx, true, m_sync_point_handler, ctx); + request->send(); +} + +template <typename I> +void ImageSync<I>::handle_prune_sync_points(int r) { + dout(10) << ": r=" << r << dendl; + + if (r < 0) { + derr << ": failed to prune sync point: " + << cpp_strerror(r) << dendl; + finish(r); + return; + } + + if (!m_sync_point_handler->get_sync_points().empty()) { + send_copy_image(); + return; + } + + finish(0); +} + +template <typename I> +void ImageSync<I>::update_progress(const std::string &description) { + dout(20) << ": " << description << dendl; + + if (m_progress_ctx) { + m_progress_ctx->update_progress("IMAGE_SYNC/" + description); + } +} + +template <typename I> +void ImageSync<I>::finish(int r) { + dout(20) << ": r=" << r << dendl; + + m_instance_watcher->notify_sync_complete(m_local_image_ctx->id); + CancelableRequest::finish(r); +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::ImageSync<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/ImageSync.h b/src/tools/rbd_mirror/ImageSync.h new file mode 100644 index 000000000..b3389ce18 --- /dev/null +++ b/src/tools/rbd_mirror/ImageSync.h @@ -0,0 +1,151 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_SYNC_H +#define RBD_MIRROR_IMAGE_SYNC_H + +#include "include/int_types.h" +#include "librbd/ImageCtx.h" +#include "librbd/Types.h" +#include "common/ceph_mutex.h" +#include "tools/rbd_mirror/CancelableRequest.h" +#include "tools/rbd_mirror/image_sync/Types.h" + +class Context; +namespace journal { class Journaler; } +namespace librbd { template <typename> class DeepCopyRequest; } + +namespace rbd { +namespace mirror { + +class ProgressContext; +template <typename> class InstanceWatcher; +template <typename> class Threads; + +namespace image_sync { struct SyncPointHandler; } + +template <typename ImageCtxT = librbd::ImageCtx> +class ImageSync : public CancelableRequest { +public: + static ImageSync* create( + Threads<ImageCtxT>* threads, + ImageCtxT *local_image_ctx, + ImageCtxT *remote_image_ctx, + const std::string &local_mirror_uuid, + image_sync::SyncPointHandler* sync_point_handler, + InstanceWatcher<ImageCtxT> *instance_watcher, + ProgressContext *progress_ctx, + Context *on_finish) { + return new ImageSync(threads, local_image_ctx, remote_image_ctx, + local_mirror_uuid, sync_point_handler, + instance_watcher, progress_ctx, on_finish); + } + + ImageSync( + Threads<ImageCtxT>* threads, + ImageCtxT *local_image_ctx, + ImageCtxT *remote_image_ctx, + const std::string &local_mirror_uuid, + image_sync::SyncPointHandler* sync_point_handler, + InstanceWatcher<ImageCtxT> *instance_watcher, + ProgressContext *progress_ctx, + Context *on_finish); + ~ImageSync() override; + + void send() override; + void cancel() override; + +protected: + void finish(int r) override; + +private: + /** + * @verbatim + * + * <start> + * | + * v + * NOTIFY_SYNC_REQUEST + * | + * v + * PRUNE_CATCH_UP_SYNC_POINT + * | + * v + * CREATE_SYNC_POINT (skip if already exists and + * | not disconnected) + * v + * COPY_IMAGE . . . . . . . . . . . . . . + * | . + * v . + * FLUSH_SYNC_POINT . + * | . (image sync canceled) + * v . + * PRUNE_SYNC_POINTS . + * | . + * v . + * <finish> < . . . . . . . . . . . . . . + * + * @endverbatim + */ + + class ImageCopyProgressHandler; + + Threads<ImageCtxT>* m_threads; + ImageCtxT *m_local_image_ctx; + ImageCtxT *m_remote_image_ctx; + std::string m_local_mirror_uuid; + image_sync::SyncPointHandler* m_sync_point_handler; + InstanceWatcher<ImageCtxT> *m_instance_watcher; + ProgressContext *m_progress_ctx; + + ceph::mutex m_lock; + bool m_canceled = false; + + librbd::DeepCopyRequest<ImageCtxT> *m_image_copy_request = nullptr; + ImageCopyProgressHandler *m_image_copy_prog_handler = nullptr; + + bool m_updating_sync_point = false; + Context *m_update_sync_ctx = nullptr; + double m_update_sync_point_interval; + uint64_t m_image_copy_object_no = 0; + uint64_t m_image_copy_object_count = 0; + + librbd::SnapSeqs m_snap_seqs_copy; + image_sync::SyncPoints m_sync_points_copy; + + int m_ret_val = 0; + + void send_notify_sync_request(); + void handle_notify_sync_request(int r); + + void send_prune_catch_up_sync_point(); + void handle_prune_catch_up_sync_point(int r); + + void send_create_sync_point(); + void handle_create_sync_point(int r); + + void send_update_max_object_count(); + void handle_update_max_object_count(int r); + + void send_copy_image(); + void handle_copy_image(int r); + void handle_copy_image_update_progress(uint64_t object_no, + uint64_t object_count); + void send_update_sync_point(); + void handle_update_sync_point(int r); + + void send_flush_sync_point(); + void handle_flush_sync_point(int r); + + void send_prune_sync_points(); + void handle_prune_sync_points(int r); + + void update_progress(const std::string &description); +}; + +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::ImageSync<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_SYNC_H diff --git a/src/tools/rbd_mirror/InstanceReplayer.cc b/src/tools/rbd_mirror/InstanceReplayer.cc new file mode 100644 index 000000000..e625bf365 --- /dev/null +++ b/src/tools/rbd_mirror/InstanceReplayer.cc @@ -0,0 +1,543 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "include/stringify.h" +#include "common/Cond.h" +#include "common/Timer.h" +#include "common/debug.h" +#include "common/errno.h" +#include "librbd/Utils.h" +#include "librbd/asio/ContextWQ.h" +#include "ImageReplayer.h" +#include "InstanceReplayer.h" +#include "ServiceDaemon.h" +#include "Threads.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::InstanceReplayer: " \ + << this << " " << __func__ << ": " + +namespace rbd { +namespace mirror { + +namespace { + +const std::string SERVICE_DAEMON_ASSIGNED_COUNT_KEY("image_assigned_count"); +const std::string SERVICE_DAEMON_WARNING_COUNT_KEY("image_warning_count"); +const std::string SERVICE_DAEMON_ERROR_COUNT_KEY("image_error_count"); + +} // anonymous namespace + +using librbd::util::create_async_context_callback; +using librbd::util::create_context_callback; + +template <typename I> +InstanceReplayer<I>::InstanceReplayer( + librados::IoCtx &local_io_ctx, const std::string &local_mirror_uuid, + Threads<I> *threads, ServiceDaemon<I>* service_daemon, + MirrorStatusUpdater<I>* local_status_updater, + journal::CacheManagerHandler *cache_manager_handler, + PoolMetaCache* pool_meta_cache) + : m_local_io_ctx(local_io_ctx), m_local_mirror_uuid(local_mirror_uuid), + m_threads(threads), m_service_daemon(service_daemon), + m_local_status_updater(local_status_updater), + m_cache_manager_handler(cache_manager_handler), + m_pool_meta_cache(pool_meta_cache), + m_lock(ceph::make_mutex("rbd::mirror::InstanceReplayer " + + stringify(local_io_ctx.get_id()))) { +} + +template <typename I> +InstanceReplayer<I>::~InstanceReplayer() { + ceph_assert(m_image_state_check_task == nullptr); + ceph_assert(m_async_op_tracker.empty()); + ceph_assert(m_image_replayers.empty()); +} + +template <typename I> +bool InstanceReplayer<I>::is_blocklisted() const { + std::lock_guard locker{m_lock}; + return m_blocklisted; +} + +template <typename I> +int InstanceReplayer<I>::init() { + C_SaferCond init_ctx; + init(&init_ctx); + return init_ctx.wait(); +} + +template <typename I> +void InstanceReplayer<I>::init(Context *on_finish) { + dout(10) << dendl; + + Context *ctx = new LambdaContext( + [this, on_finish] (int r) { + { + std::lock_guard timer_locker{m_threads->timer_lock}; + schedule_image_state_check_task(); + } + on_finish->complete(0); + }); + + m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +void InstanceReplayer<I>::shut_down() { + C_SaferCond shut_down_ctx; + shut_down(&shut_down_ctx); + int r = shut_down_ctx.wait(); + ceph_assert(r == 0); +} + +template <typename I> +void InstanceReplayer<I>::shut_down(Context *on_finish) { + dout(10) << dendl; + + std::lock_guard locker{m_lock}; + + ceph_assert(m_on_shut_down == nullptr); + m_on_shut_down = on_finish; + + Context *ctx = new LambdaContext( + [this] (int r) { + cancel_image_state_check_task(); + wait_for_ops(); + }); + + m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +void InstanceReplayer<I>::add_peer(const Peer<I>& peer) { + dout(10) << "peer=" << peer << dendl; + + std::lock_guard locker{m_lock}; + auto result = m_peers.insert(peer).second; + ceph_assert(result); +} + +template <typename I> +void InstanceReplayer<I>::release_all(Context *on_finish) { + dout(10) << dendl; + + std::lock_guard locker{m_lock}; + + C_Gather *gather_ctx = new C_Gather(g_ceph_context, on_finish); + for (auto it = m_image_replayers.begin(); it != m_image_replayers.end(); + it = m_image_replayers.erase(it)) { + auto image_replayer = it->second; + auto ctx = gather_ctx->new_sub(); + ctx = new LambdaContext( + [image_replayer, ctx] (int r) { + image_replayer->destroy(); + ctx->complete(0); + }); + stop_image_replayer(image_replayer, ctx); + } + gather_ctx->activate(); +} + +template <typename I> +void InstanceReplayer<I>::acquire_image(InstanceWatcher<I> *instance_watcher, + const std::string &global_image_id, + Context *on_finish) { + dout(10) << "global_image_id=" << global_image_id << dendl; + + std::lock_guard locker{m_lock}; + + ceph_assert(m_on_shut_down == nullptr); + + auto it = m_image_replayers.find(global_image_id); + if (it == m_image_replayers.end()) { + auto image_replayer = ImageReplayer<I>::create( + m_local_io_ctx, m_local_mirror_uuid, global_image_id, + m_threads, instance_watcher, m_local_status_updater, + m_cache_manager_handler, m_pool_meta_cache); + + dout(10) << global_image_id << ": creating replayer " << image_replayer + << dendl; + + it = m_image_replayers.insert(std::make_pair(global_image_id, + image_replayer)).first; + + // TODO only a single peer is currently supported + ceph_assert(m_peers.size() == 1); + auto peer = *m_peers.begin(); + image_replayer->add_peer(peer); + start_image_replayer(image_replayer); + } else { + // A duplicate acquire notification implies (1) connection hiccup or + // (2) new leader election. For the second case, restart the replayer to + // detect if the image has been deleted while the leader was offline + auto& image_replayer = it->second; + image_replayer->set_finished(false); + image_replayer->restart(new C_TrackedOp(m_async_op_tracker, nullptr)); + } + + m_threads->work_queue->queue(on_finish, 0); +} + +template <typename I> +void InstanceReplayer<I>::release_image(const std::string &global_image_id, + Context *on_finish) { + dout(10) << "global_image_id=" << global_image_id << dendl; + + std::lock_guard locker{m_lock}; + ceph_assert(m_on_shut_down == nullptr); + + auto it = m_image_replayers.find(global_image_id); + if (it == m_image_replayers.end()) { + dout(5) << global_image_id << ": not found" << dendl; + m_threads->work_queue->queue(on_finish, 0); + return; + } + + auto image_replayer = it->second; + m_image_replayers.erase(it); + + on_finish = new LambdaContext( + [image_replayer, on_finish] (int r) { + image_replayer->destroy(); + on_finish->complete(0); + }); + stop_image_replayer(image_replayer, on_finish); +} + +template <typename I> +void InstanceReplayer<I>::remove_peer_image(const std::string &global_image_id, + const std::string &peer_mirror_uuid, + Context *on_finish) { + dout(10) << "global_image_id=" << global_image_id << ", " + << "peer_mirror_uuid=" << peer_mirror_uuid << dendl; + + std::lock_guard locker{m_lock}; + ceph_assert(m_on_shut_down == nullptr); + + auto it = m_image_replayers.find(global_image_id); + if (it != m_image_replayers.end()) { + // TODO only a single peer is currently supported, therefore + // we can just interrupt the current image replayer and + // it will eventually detect that the peer image is missing and + // determine if a delete propagation is required. + auto image_replayer = it->second; + image_replayer->restart(new C_TrackedOp(m_async_op_tracker, nullptr)); + } + m_threads->work_queue->queue(on_finish, 0); +} + +template <typename I> +void InstanceReplayer<I>::print_status(Formatter *f) { + dout(10) << dendl; + + std::lock_guard locker{m_lock}; + + f->open_array_section("image_replayers"); + for (auto &kv : m_image_replayers) { + auto &image_replayer = kv.second; + image_replayer->print_status(f); + } + f->close_section(); +} + +template <typename I> +void InstanceReplayer<I>::start() +{ + dout(10) << dendl; + + std::lock_guard locker{m_lock}; + + m_manual_stop = false; + + auto cct = static_cast<CephContext *>(m_local_io_ctx.cct()); + auto gather_ctx = new C_Gather( + cct, new C_TrackedOp(m_async_op_tracker, nullptr)); + for (auto &kv : m_image_replayers) { + auto &image_replayer = kv.second; + image_replayer->start(gather_ctx->new_sub(), true); + } + + gather_ctx->activate(); +} + +template <typename I> +void InstanceReplayer<I>::stop() +{ + stop(nullptr); +} + +template <typename I> +void InstanceReplayer<I>::stop(Context *on_finish) +{ + dout(10) << dendl; + + if (on_finish == nullptr) { + on_finish = new C_TrackedOp(m_async_op_tracker, on_finish); + } else { + on_finish = new LambdaContext( + [this, on_finish] (int r) { + m_async_op_tracker.wait_for_ops(on_finish); + }); + } + + auto cct = static_cast<CephContext *>(m_local_io_ctx.cct()); + auto gather_ctx = new C_Gather(cct, on_finish); + { + std::lock_guard locker{m_lock}; + + m_manual_stop = true; + + for (auto &kv : m_image_replayers) { + auto &image_replayer = kv.second; + image_replayer->stop(gather_ctx->new_sub(), true); + } + } + + gather_ctx->activate(); +} + +template <typename I> +void InstanceReplayer<I>::restart() +{ + dout(10) << dendl; + + std::lock_guard locker{m_lock}; + + m_manual_stop = false; + + for (auto &kv : m_image_replayers) { + auto &image_replayer = kv.second; + image_replayer->restart(new C_TrackedOp(m_async_op_tracker, nullptr)); + } +} + +template <typename I> +void InstanceReplayer<I>::flush() +{ + dout(10) << dendl; + + std::lock_guard locker{m_lock}; + + for (auto &kv : m_image_replayers) { + auto &image_replayer = kv.second; + image_replayer->flush(); + } +} + +template <typename I> +void InstanceReplayer<I>::start_image_replayer( + ImageReplayer<I> *image_replayer) { + ceph_assert(ceph_mutex_is_locked(m_lock)); + + std::string global_image_id = image_replayer->get_global_image_id(); + if (!image_replayer->is_stopped()) { + return; + } else if (image_replayer->is_blocklisted()) { + derr << "global_image_id=" << global_image_id << ": blocklisted detected " + << "during image replay" << dendl; + m_blocklisted = true; + return; + } else if (image_replayer->is_finished()) { + // TODO temporary until policy integrated + dout(5) << "removing image replayer for global_image_id=" + << global_image_id << dendl; + m_image_replayers.erase(image_replayer->get_global_image_id()); + image_replayer->destroy(); + return; + } else if (m_manual_stop) { + return; + } + + dout(10) << "global_image_id=" << global_image_id << dendl; + image_replayer->start(new C_TrackedOp(m_async_op_tracker, nullptr), false); +} + +template <typename I> +void InstanceReplayer<I>::queue_start_image_replayers() { + dout(10) << dendl; + + Context *ctx = create_context_callback< + InstanceReplayer, &InstanceReplayer<I>::start_image_replayers>(this); + m_async_op_tracker.start_op(); + m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +void InstanceReplayer<I>::start_image_replayers(int r) { + dout(10) << dendl; + + std::lock_guard locker{m_lock}; + if (m_on_shut_down != nullptr) { + m_async_op_tracker.finish_op(); + return; + } + + uint64_t image_count = 0; + uint64_t warning_count = 0; + uint64_t error_count = 0; + for (auto it = m_image_replayers.begin(); + it != m_image_replayers.end();) { + auto current_it(it); + ++it; + + ++image_count; + auto health_state = current_it->second->get_health_state(); + if (health_state == image_replayer::HEALTH_STATE_WARNING) { + ++warning_count; + } else if (health_state == image_replayer::HEALTH_STATE_ERROR) { + ++error_count; + } + + start_image_replayer(current_it->second); + } + + m_service_daemon->add_or_update_namespace_attribute( + m_local_io_ctx.get_id(), m_local_io_ctx.get_namespace(), + SERVICE_DAEMON_ASSIGNED_COUNT_KEY, image_count); + m_service_daemon->add_or_update_namespace_attribute( + m_local_io_ctx.get_id(), m_local_io_ctx.get_namespace(), + SERVICE_DAEMON_WARNING_COUNT_KEY, warning_count); + m_service_daemon->add_or_update_namespace_attribute( + m_local_io_ctx.get_id(), m_local_io_ctx.get_namespace(), + SERVICE_DAEMON_ERROR_COUNT_KEY, error_count); + + m_async_op_tracker.finish_op(); +} + +template <typename I> +void InstanceReplayer<I>::stop_image_replayer(ImageReplayer<I> *image_replayer, + Context *on_finish) { + dout(10) << image_replayer << " global_image_id=" + << image_replayer->get_global_image_id() << ", on_finish=" + << on_finish << dendl; + + if (image_replayer->is_stopped()) { + m_threads->work_queue->queue(on_finish, 0); + return; + } + + m_async_op_tracker.start_op(); + Context *ctx = create_async_context_callback( + m_threads->work_queue, new LambdaContext( + [this, image_replayer, on_finish] (int r) { + stop_image_replayer(image_replayer, on_finish); + m_async_op_tracker.finish_op(); + })); + + if (image_replayer->is_running()) { + image_replayer->stop(ctx, false); + } else { + int after = 1; + dout(10) << "scheduling image replayer " << image_replayer << " stop after " + << after << " sec (task " << ctx << ")" << dendl; + ctx = new LambdaContext( + [this, after, ctx] (int r) { + std::lock_guard timer_locker{m_threads->timer_lock}; + m_threads->timer->add_event_after(after, ctx); + }); + m_threads->work_queue->queue(ctx, 0); + } +} + +template <typename I> +void InstanceReplayer<I>::wait_for_ops() { + dout(10) << dendl; + + Context *ctx = create_context_callback< + InstanceReplayer, &InstanceReplayer<I>::handle_wait_for_ops>(this); + + m_async_op_tracker.wait_for_ops(ctx); +} + +template <typename I> +void InstanceReplayer<I>::handle_wait_for_ops(int r) { + dout(10) << "r=" << r << dendl; + + ceph_assert(r == 0); + + std::lock_guard locker{m_lock}; + stop_image_replayers(); +} + +template <typename I> +void InstanceReplayer<I>::stop_image_replayers() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + + Context *ctx = create_async_context_callback( + m_threads->work_queue, create_context_callback<InstanceReplayer<I>, + &InstanceReplayer<I>::handle_stop_image_replayers>(this)); + + C_Gather *gather_ctx = new C_Gather(g_ceph_context, ctx); + for (auto &it : m_image_replayers) { + stop_image_replayer(it.second, gather_ctx->new_sub()); + } + gather_ctx->activate(); +} + +template <typename I> +void InstanceReplayer<I>::handle_stop_image_replayers(int r) { + dout(10) << "r=" << r << dendl; + + ceph_assert(r == 0); + + Context *on_finish = nullptr; + { + std::lock_guard locker{m_lock}; + + for (auto &it : m_image_replayers) { + ceph_assert(it.second->is_stopped()); + it.second->destroy(); + } + m_image_replayers.clear(); + + ceph_assert(m_on_shut_down != nullptr); + std::swap(on_finish, m_on_shut_down); + } + on_finish->complete(r); +} + +template <typename I> +void InstanceReplayer<I>::cancel_image_state_check_task() { + std::lock_guard timer_locker{m_threads->timer_lock}; + + if (m_image_state_check_task == nullptr) { + return; + } + + dout(10) << m_image_state_check_task << dendl; + bool canceled = m_threads->timer->cancel_event(m_image_state_check_task); + ceph_assert(canceled); + m_image_state_check_task = nullptr; +} + +template <typename I> +void InstanceReplayer<I>::schedule_image_state_check_task() { + ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock)); + ceph_assert(m_image_state_check_task == nullptr); + + m_image_state_check_task = new LambdaContext( + [this](int r) { + ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock)); + m_image_state_check_task = nullptr; + schedule_image_state_check_task(); + queue_start_image_replayers(); + }); + + auto cct = static_cast<CephContext *>(m_local_io_ctx.cct()); + int after = cct->_conf.get_val<uint64_t>( + "rbd_mirror_image_state_check_interval"); + + dout(10) << "scheduling image state check after " << after << " sec (task " + << m_image_state_check_task << ")" << dendl; + m_threads->timer->add_event_after(after, m_image_state_check_task); +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::InstanceReplayer<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/InstanceReplayer.h b/src/tools/rbd_mirror/InstanceReplayer.h new file mode 100644 index 000000000..7a5c79723 --- /dev/null +++ b/src/tools/rbd_mirror/InstanceReplayer.h @@ -0,0 +1,138 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_INSTANCE_REPLAYER_H +#define RBD_MIRROR_INSTANCE_REPLAYER_H + +#include <map> +#include <sstream> + +#include "common/AsyncOpTracker.h" +#include "common/Formatter.h" +#include "common/ceph_mutex.h" +#include "tools/rbd_mirror/Types.h" + +namespace journal { struct CacheManagerHandler; } + +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { + +template <typename> class ImageReplayer; +template <typename> class InstanceWatcher; +template <typename> class MirrorStatusUpdater; +struct PoolMetaCache; +template <typename> class ServiceDaemon; +template <typename> struct Threads; + +template <typename ImageCtxT = librbd::ImageCtx> +class InstanceReplayer { +public: + static InstanceReplayer* create( + librados::IoCtx &local_io_ctx, const std::string &local_mirror_uuid, + Threads<ImageCtxT> *threads, ServiceDaemon<ImageCtxT> *service_daemon, + MirrorStatusUpdater<ImageCtxT>* local_status_updater, + journal::CacheManagerHandler *cache_manager_handler, + PoolMetaCache* pool_meta_cache) { + return new InstanceReplayer(local_io_ctx, local_mirror_uuid, threads, + service_daemon, local_status_updater, + cache_manager_handler, pool_meta_cache); + } + void destroy() { + delete this; + } + + InstanceReplayer(librados::IoCtx &local_io_ctx, + const std::string &local_mirror_uuid, + Threads<ImageCtxT> *threads, + ServiceDaemon<ImageCtxT> *service_daemon, + MirrorStatusUpdater<ImageCtxT>* local_status_updater, + journal::CacheManagerHandler *cache_manager_handler, + PoolMetaCache* pool_meta_cache); + ~InstanceReplayer(); + + bool is_blocklisted() const; + + int init(); + void shut_down(); + + void init(Context *on_finish); + void shut_down(Context *on_finish); + + void add_peer(const Peer<ImageCtxT>& peer); + + void acquire_image(InstanceWatcher<ImageCtxT> *instance_watcher, + const std::string &global_image_id, Context *on_finish); + void release_image(const std::string &global_image_id, Context *on_finish); + void remove_peer_image(const std::string &global_image_id, + const std::string &peer_mirror_uuid, + Context *on_finish); + + void release_all(Context *on_finish); + + void print_status(Formatter *f); + void start(); + void stop(); + void restart(); + void flush(); + + void stop(Context *on_finish); + +private: + /** + * @verbatim + * + * <uninitialized> <-------------------\ + * | (init) | (repeat for each + * v STOP_IMAGE_REPLAYER ---\ image replayer) + * SCHEDULE_IMAGE_STATE_CHECK_TASK ^ ^ | + * | | | | + * v (shut_down) | \---------/ + * <initialized> -----------------> WAIT_FOR_OPS + * + * @endverbatim + */ + + typedef std::set<Peer<ImageCtxT>> Peers; + + librados::IoCtx &m_local_io_ctx; + std::string m_local_mirror_uuid; + Threads<ImageCtxT> *m_threads; + ServiceDaemon<ImageCtxT> *m_service_daemon; + MirrorStatusUpdater<ImageCtxT>* m_local_status_updater; + journal::CacheManagerHandler *m_cache_manager_handler; + PoolMetaCache* m_pool_meta_cache; + + mutable ceph::mutex m_lock; + AsyncOpTracker m_async_op_tracker; + std::map<std::string, ImageReplayer<ImageCtxT> *> m_image_replayers; + Peers m_peers; + Context *m_image_state_check_task = nullptr; + Context *m_on_shut_down = nullptr; + bool m_manual_stop = false; + bool m_blocklisted = false; + + void wait_for_ops(); + void handle_wait_for_ops(int r); + + void start_image_replayer(ImageReplayer<ImageCtxT> *image_replayer); + void queue_start_image_replayers(); + void start_image_replayers(int r); + + void stop_image_replayer(ImageReplayer<ImageCtxT> *image_replayer, + Context *on_finish); + + void stop_image_replayers(); + void handle_stop_image_replayers(int r); + + void schedule_image_state_check_task(); + void cancel_image_state_check_task(); +}; + +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::InstanceReplayer<librbd::ImageCtx>; + +#endif // RBD_MIRROR_INSTANCE_REPLAYER_H diff --git a/src/tools/rbd_mirror/InstanceWatcher.cc b/src/tools/rbd_mirror/InstanceWatcher.cc new file mode 100644 index 000000000..7b531064d --- /dev/null +++ b/src/tools/rbd_mirror/InstanceWatcher.cc @@ -0,0 +1,1290 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "InstanceWatcher.h" +#include "include/stringify.h" +#include "common/debug.h" +#include "common/errno.h" +#include "cls/rbd/cls_rbd_client.h" +#include "librbd/AsioEngine.h" +#include "librbd/ManagedLock.h" +#include "librbd/Utils.h" +#include "librbd/asio/ContextWQ.h" +#include "InstanceReplayer.h" +#include "Throttler.h" +#include "common/Cond.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::InstanceWatcher: " + +namespace rbd { +namespace mirror { + +using namespace instance_watcher; + +using librbd::util::create_async_context_callback; +using librbd::util::create_context_callback; +using librbd::util::create_rados_callback; +using librbd::util::unique_lock_name; + +namespace { + +struct C_GetInstances : public Context { + std::vector<std::string> *instance_ids; + Context *on_finish; + bufferlist out_bl; + + C_GetInstances(std::vector<std::string> *instance_ids, Context *on_finish) + : instance_ids(instance_ids), on_finish(on_finish) { + } + + void finish(int r) override { + dout(10) << "C_GetInstances: " << this << " " << __func__ << ": r=" << r + << dendl; + + if (r == 0) { + auto it = out_bl.cbegin(); + r = librbd::cls_client::mirror_instances_list_finish(&it, instance_ids); + } else if (r == -ENOENT) { + r = 0; + } + on_finish->complete(r); + } +}; + +template <typename I> +struct C_RemoveInstanceRequest : public Context { + InstanceWatcher<I> instance_watcher; + Context *on_finish; + + C_RemoveInstanceRequest(librados::IoCtx &io_ctx, + librbd::AsioEngine& asio_engine, + const std::string &instance_id, Context *on_finish) + : instance_watcher(io_ctx, asio_engine, nullptr, nullptr, instance_id), + on_finish(on_finish) { + } + + void send() { + dout(10) << "C_RemoveInstanceRequest: " << this << " " << __func__ << dendl; + + instance_watcher.remove(this); + } + + void finish(int r) override { + dout(10) << "C_RemoveInstanceRequest: " << this << " " << __func__ << ": r=" + << r << dendl; + ceph_assert(r == 0); + + on_finish->complete(r); + } +}; + +} // anonymous namespace + +template <typename I> +struct InstanceWatcher<I>::C_NotifyInstanceRequest : public Context { + InstanceWatcher<I> *instance_watcher; + std::string instance_id; + uint64_t request_id; + bufferlist bl; + Context *on_finish; + bool send_to_leader; + std::unique_ptr<librbd::watcher::Notifier> notifier; + librbd::watcher::NotifyResponse response; + bool canceling = false; + + C_NotifyInstanceRequest(InstanceWatcher<I> *instance_watcher, + const std::string &instance_id, uint64_t request_id, + bufferlist &&bl, Context *on_finish) + : instance_watcher(instance_watcher), instance_id(instance_id), + request_id(request_id), bl(bl), on_finish(on_finish), + send_to_leader(instance_id.empty()) { + dout(10) << "C_NotifyInstanceRequest: " << this << " " << __func__ + << ": instance_watcher=" << instance_watcher << ", instance_id=" + << instance_id << ", request_id=" << request_id << dendl; + + ceph_assert(ceph_mutex_is_locked(instance_watcher->m_lock)); + + if (!send_to_leader) { + ceph_assert((!instance_id.empty())); + notifier.reset(new librbd::watcher::Notifier( + instance_watcher->m_work_queue, + instance_watcher->m_ioctx, + RBD_MIRROR_INSTANCE_PREFIX + instance_id)); + } + + instance_watcher->m_notify_op_tracker.start_op(); + auto result = instance_watcher->m_notify_ops.insert( + std::make_pair(instance_id, this)).second; + ceph_assert(result); + } + + void send() { + dout(10) << "C_NotifyInstanceRequest: " << this << " " << __func__ << dendl; + + ceph_assert(ceph_mutex_is_locked(instance_watcher->m_lock)); + + if (canceling) { + dout(10) << "C_NotifyInstanceRequest: " << this << " " << __func__ + << ": canceling" << dendl; + instance_watcher->m_work_queue->queue(this, -ECANCELED); + return; + } + + if (send_to_leader) { + if (instance_watcher->m_leader_instance_id.empty()) { + dout(10) << "C_NotifyInstanceRequest: " << this << " " << __func__ + << ": suspending" << dendl; + instance_watcher->suspend_notify_request(this); + return; + } + + if (instance_watcher->m_leader_instance_id != instance_id) { + auto count = instance_watcher->m_notify_ops.erase( + std::make_pair(instance_id, this)); + ceph_assert(count > 0); + + instance_id = instance_watcher->m_leader_instance_id; + + auto result = instance_watcher->m_notify_ops.insert( + std::make_pair(instance_id, this)).second; + ceph_assert(result); + + notifier.reset(new librbd::watcher::Notifier( + instance_watcher->m_work_queue, + instance_watcher->m_ioctx, + RBD_MIRROR_INSTANCE_PREFIX + instance_id)); + } + } + + dout(10) << "C_NotifyInstanceRequest: " << this << " " << __func__ + << ": sending to " << instance_id << dendl; + notifier->notify(bl, &response, this); + } + + void cancel() { + dout(10) << "C_NotifyInstanceRequest: " << this << " " << __func__ << dendl; + + ceph_assert(ceph_mutex_is_locked(instance_watcher->m_lock)); + + canceling = true; + instance_watcher->unsuspend_notify_request(this); + } + + void finish(int r) override { + dout(10) << "C_NotifyInstanceRequest: " << this << " " << __func__ << ": r=" + << r << dendl; + + if (r == 0 || r == -ETIMEDOUT) { + bool found = false; + for (auto &it : response.acks) { + auto &bl = it.second; + if (it.second.length() == 0) { + dout(5) << "C_NotifyInstanceRequest: " << this << " " << __func__ + << ": no payload in ack, ignoring" << dendl; + continue; + } + try { + auto iter = bl.cbegin(); + NotifyAckPayload ack; + decode(ack, iter); + if (ack.instance_id != instance_watcher->get_instance_id()) { + derr << "C_NotifyInstanceRequest: " << this << " " << __func__ + << ": ack instance_id (" << ack.instance_id << ") " + << "does not match, ignoring" << dendl; + continue; + } + if (ack.request_id != request_id) { + derr << "C_NotifyInstanceRequest: " << this << " " << __func__ + << ": ack request_id (" << ack.request_id << ") " + << "does not match, ignoring" << dendl; + continue; + } + r = ack.ret_val; + found = true; + break; + } catch (const buffer::error &err) { + derr << "C_NotifyInstanceRequest: " << this << " " << __func__ + << ": failed to decode ack: " << err.what() << dendl; + continue; + } + } + + if (!found) { + if (r == -ETIMEDOUT) { + derr << "C_NotifyInstanceRequest: " << this << " " << __func__ + << ": resending after timeout" << dendl; + std::lock_guard locker{instance_watcher->m_lock}; + send(); + return; + } else { + r = -EINVAL; + } + } else { + if (r == -ESTALE && send_to_leader) { + derr << "C_NotifyInstanceRequest: " << this << " " << __func__ + << ": resending due to leader change" << dendl; + std::lock_guard locker{instance_watcher->m_lock}; + send(); + return; + } + } + } + + on_finish->complete(r); + + { + std::lock_guard locker{instance_watcher->m_lock}; + auto result = instance_watcher->m_notify_ops.erase( + std::make_pair(instance_id, this)); + ceph_assert(result > 0); + instance_watcher->m_notify_op_tracker.finish_op(); + } + + delete this; + } + + void complete(int r) override { + finish(r); + } +}; + +template <typename I> +struct InstanceWatcher<I>::C_SyncRequest : public Context { + InstanceWatcher<I> *instance_watcher; + std::string sync_id; + Context *on_start; + Context *on_complete = nullptr; + C_NotifyInstanceRequest *req = nullptr; + + C_SyncRequest(InstanceWatcher<I> *instance_watcher, + const std::string &sync_id, Context *on_start) + : instance_watcher(instance_watcher), sync_id(sync_id), + on_start(on_start) { + dout(10) << "C_SyncRequest: " << this << " " << __func__ << ": sync_id=" + << sync_id << dendl; + } + + void finish(int r) override { + dout(10) << "C_SyncRequest: " << this << " " << __func__ << ": r=" + << r << dendl; + + if (on_start != nullptr) { + instance_watcher->handle_notify_sync_request(this, r); + } else { + instance_watcher->handle_notify_sync_complete(this, r); + delete this; + } + } + + // called twice + void complete(int r) override { + finish(r); + } +}; + +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::InstanceWatcher: " \ + << this << " " << __func__ << ": " +template <typename I> +void InstanceWatcher<I>::get_instances(librados::IoCtx &io_ctx, + std::vector<std::string> *instance_ids, + Context *on_finish) { + librados::ObjectReadOperation op; + librbd::cls_client::mirror_instances_list_start(&op); + C_GetInstances *ctx = new C_GetInstances(instance_ids, on_finish); + librados::AioCompletion *aio_comp = create_rados_callback(ctx); + + int r = io_ctx.aio_operate(RBD_MIRROR_LEADER, aio_comp, &op, &ctx->out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void InstanceWatcher<I>::remove_instance(librados::IoCtx &io_ctx, + librbd::AsioEngine& asio_engine, + const std::string &instance_id, + Context *on_finish) { + auto req = new C_RemoveInstanceRequest<I>(io_ctx, asio_engine, instance_id, + on_finish); + req->send(); +} + +template <typename I> +InstanceWatcher<I> *InstanceWatcher<I>::create( + librados::IoCtx &io_ctx, librbd::AsioEngine& asio_engine, + InstanceReplayer<I> *instance_replayer, + Throttler<I> *image_sync_throttler) { + return new InstanceWatcher<I>(io_ctx, asio_engine, instance_replayer, + image_sync_throttler, + stringify(io_ctx.get_instance_id())); +} + +template <typename I> +InstanceWatcher<I>::InstanceWatcher(librados::IoCtx &io_ctx, + librbd::AsioEngine& asio_engine, + InstanceReplayer<I> *instance_replayer, + Throttler<I> *image_sync_throttler, + const std::string &instance_id) + : Watcher(io_ctx, asio_engine.get_work_queue(), + RBD_MIRROR_INSTANCE_PREFIX + instance_id), + m_instance_replayer(instance_replayer), + m_image_sync_throttler(image_sync_throttler), m_instance_id(instance_id), + m_lock(ceph::make_mutex( + unique_lock_name("rbd::mirror::InstanceWatcher::m_lock", this))), + m_instance_lock(librbd::ManagedLock<I>::create( + m_ioctx, asio_engine, m_oid, this, librbd::managed_lock::EXCLUSIVE, true, + m_cct->_conf.get_val<uint64_t>("rbd_blocklist_expire_seconds"))) { +} + +template <typename I> +InstanceWatcher<I>::~InstanceWatcher() { + ceph_assert(m_requests.empty()); + ceph_assert(m_notify_ops.empty()); + ceph_assert(m_notify_op_tracker.empty()); + ceph_assert(m_suspended_ops.empty()); + ceph_assert(m_inflight_sync_reqs.empty()); + m_instance_lock->destroy(); +} + +template <typename I> +int InstanceWatcher<I>::init() { + C_SaferCond init_ctx; + init(&init_ctx); + return init_ctx.wait(); +} + +template <typename I> +void InstanceWatcher<I>::init(Context *on_finish) { + dout(10) << "instance_id=" << m_instance_id << dendl; + + std::lock_guard locker{m_lock}; + + ceph_assert(m_on_finish == nullptr); + m_on_finish = on_finish; + m_ret_val = 0; + + register_instance(); +} + +template <typename I> +void InstanceWatcher<I>::shut_down() { + C_SaferCond shut_down_ctx; + shut_down(&shut_down_ctx); + int r = shut_down_ctx.wait(); + ceph_assert(r == 0); +} + +template <typename I> +void InstanceWatcher<I>::shut_down(Context *on_finish) { + dout(10) << dendl; + + std::lock_guard locker{m_lock}; + + ceph_assert(m_on_finish == nullptr); + m_on_finish = on_finish; + m_ret_val = 0; + + release_lock(); +} + +template <typename I> +void InstanceWatcher<I>::remove(Context *on_finish) { + dout(10) << dendl; + + std::lock_guard locker{m_lock}; + + ceph_assert(m_on_finish == nullptr); + m_on_finish = on_finish; + m_ret_val = 0; + + get_instance_locker(); +} + +template <typename I> +void InstanceWatcher<I>::notify_image_acquire( + const std::string &instance_id, const std::string &global_image_id, + Context *on_notify_ack) { + dout(10) << "instance_id=" << instance_id << ", global_image_id=" + << global_image_id << dendl; + + std::lock_guard locker{m_lock}; + + ceph_assert(m_on_finish == nullptr); + + uint64_t request_id = ++m_request_seq; + bufferlist bl; + encode(NotifyMessage{ImageAcquirePayload{request_id, global_image_id}}, bl); + auto req = new C_NotifyInstanceRequest(this, instance_id, request_id, + std::move(bl), on_notify_ack); + req->send(); +} + +template <typename I> +void InstanceWatcher<I>::notify_image_release( + const std::string &instance_id, const std::string &global_image_id, + Context *on_notify_ack) { + dout(10) << "instance_id=" << instance_id << ", global_image_id=" + << global_image_id << dendl; + + std::lock_guard locker{m_lock}; + + ceph_assert(m_on_finish == nullptr); + + uint64_t request_id = ++m_request_seq; + bufferlist bl; + encode(NotifyMessage{ImageReleasePayload{request_id, global_image_id}}, bl); + auto req = new C_NotifyInstanceRequest(this, instance_id, request_id, + std::move(bl), on_notify_ack); + req->send(); +} + +template <typename I> +void InstanceWatcher<I>::notify_peer_image_removed( + const std::string &instance_id, const std::string &global_image_id, + const std::string &peer_mirror_uuid, Context *on_notify_ack) { + dout(10) << "instance_id=" << instance_id << ", " + << "global_image_id=" << global_image_id << ", " + << "peer_mirror_uuid=" << peer_mirror_uuid << dendl; + + std::lock_guard locker{m_lock}; + ceph_assert(m_on_finish == nullptr); + + uint64_t request_id = ++m_request_seq; + bufferlist bl; + encode(NotifyMessage{PeerImageRemovedPayload{request_id, global_image_id, + peer_mirror_uuid}}, bl); + auto req = new C_NotifyInstanceRequest(this, instance_id, request_id, + std::move(bl), on_notify_ack); + req->send(); +} + +template <typename I> +void InstanceWatcher<I>::notify_sync_request(const std::string &sync_id, + Context *on_sync_start) { + dout(10) << "sync_id=" << sync_id << dendl; + + std::lock_guard locker{m_lock}; + + ceph_assert(m_inflight_sync_reqs.count(sync_id) == 0); + + uint64_t request_id = ++m_request_seq; + + bufferlist bl; + encode(NotifyMessage{SyncRequestPayload{request_id, sync_id}}, bl); + + auto sync_ctx = new C_SyncRequest(this, sync_id, on_sync_start); + sync_ctx->req = new C_NotifyInstanceRequest(this, "", request_id, + std::move(bl), sync_ctx); + + m_inflight_sync_reqs[sync_id] = sync_ctx; + sync_ctx->req->send(); +} + +template <typename I> +bool InstanceWatcher<I>::cancel_sync_request(const std::string &sync_id) { + dout(10) << "sync_id=" << sync_id << dendl; + + std::lock_guard locker{m_lock}; + + auto it = m_inflight_sync_reqs.find(sync_id); + if (it == m_inflight_sync_reqs.end()) { + return false; + } + + auto sync_ctx = it->second; + + if (sync_ctx->on_start == nullptr) { + return false; + } + + ceph_assert(sync_ctx->req != nullptr); + sync_ctx->req->cancel(); + return true; +} + +template <typename I> +void InstanceWatcher<I>::notify_sync_start(const std::string &instance_id, + const std::string &sync_id) { + dout(10) << "sync_id=" << sync_id << dendl; + + std::lock_guard locker{m_lock}; + + uint64_t request_id = ++m_request_seq; + + bufferlist bl; + encode(NotifyMessage{SyncStartPayload{request_id, sync_id}}, bl); + + auto ctx = new LambdaContext( + [this, sync_id] (int r) { + dout(10) << "finish: sync_id=" << sync_id << ", r=" << r << dendl; + std::lock_guard locker{m_lock}; + if (r != -ESTALE && is_leader()) { + m_image_sync_throttler->finish_op(m_ioctx.get_namespace(), sync_id); + } + }); + auto req = new C_NotifyInstanceRequest(this, instance_id, request_id, + std::move(bl), ctx); + req->send(); +} + +template <typename I> +void InstanceWatcher<I>::notify_sync_complete(const std::string &sync_id) { + std::lock_guard locker{m_lock}; + notify_sync_complete(m_lock, sync_id); +} + +template <typename I> +void InstanceWatcher<I>::notify_sync_complete(const ceph::mutex&, + const std::string &sync_id) { + dout(10) << "sync_id=" << sync_id << dendl; + ceph_assert(ceph_mutex_is_locked(m_lock)); + + auto it = m_inflight_sync_reqs.find(sync_id); + ceph_assert(it != m_inflight_sync_reqs.end()); + + auto sync_ctx = it->second; + ceph_assert(sync_ctx->req == nullptr); + + m_inflight_sync_reqs.erase(it); + m_work_queue->queue(sync_ctx, 0); +} + +template <typename I> +void InstanceWatcher<I>::handle_notify_sync_request(C_SyncRequest *sync_ctx, + int r) { + dout(10) << "sync_id=" << sync_ctx->sync_id << ", r=" << r << dendl; + + Context *on_start = nullptr; + { + std::lock_guard locker{m_lock}; + ceph_assert(sync_ctx->req != nullptr); + ceph_assert(sync_ctx->on_start != nullptr); + + if (sync_ctx->req->canceling) { + r = -ECANCELED; + } + + std::swap(sync_ctx->on_start, on_start); + sync_ctx->req = nullptr; + + if (r == -ECANCELED) { + notify_sync_complete(m_lock, sync_ctx->sync_id); + } + } + + on_start->complete(r == -ECANCELED ? r : 0); +} + +template <typename I> +void InstanceWatcher<I>::handle_notify_sync_complete(C_SyncRequest *sync_ctx, + int r) { + dout(10) << "sync_id=" << sync_ctx->sync_id << ", r=" << r << dendl; + + if (sync_ctx->on_complete != nullptr) { + sync_ctx->on_complete->complete(r); + } +} + +template <typename I> +void InstanceWatcher<I>::handle_acquire_leader() { + dout(10) << dendl; + + std::lock_guard locker{m_lock}; + + m_leader_instance_id = m_instance_id; + unsuspend_notify_requests(); +} + +template <typename I> +void InstanceWatcher<I>::handle_release_leader() { + dout(10) << dendl; + + std::lock_guard locker{m_lock}; + + m_leader_instance_id.clear(); + + m_image_sync_throttler->drain(m_ioctx.get_namespace(), -ESTALE); +} + +template <typename I> +void InstanceWatcher<I>::handle_update_leader( + const std::string &leader_instance_id) { + dout(10) << "leader_instance_id=" << leader_instance_id << dendl; + + std::lock_guard locker{m_lock}; + + m_leader_instance_id = leader_instance_id; + + if (!m_leader_instance_id.empty()) { + unsuspend_notify_requests(); + } +} + +template <typename I> +void InstanceWatcher<I>::cancel_notify_requests( + const std::string &instance_id) { + dout(10) << "instance_id=" << instance_id << dendl; + + std::lock_guard locker{m_lock}; + + for (auto op : m_notify_ops) { + if (op.first == instance_id && !op.second->send_to_leader) { + op.second->cancel(); + } + } +} + +template <typename I> +void InstanceWatcher<I>::register_instance() { + ceph_assert(ceph_mutex_is_locked(m_lock)); + + dout(10) << dendl; + + librados::ObjectWriteOperation op; + librbd::cls_client::mirror_instances_add(&op, m_instance_id); + librados::AioCompletion *aio_comp = create_rados_callback< + InstanceWatcher<I>, &InstanceWatcher<I>::handle_register_instance>(this); + + int r = m_ioctx.aio_operate(RBD_MIRROR_LEADER, aio_comp, &op); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void InstanceWatcher<I>::handle_register_instance(int r) { + dout(10) << "r=" << r << dendl; + + Context *on_finish = nullptr; + { + std::lock_guard locker{m_lock}; + + if (r == 0) { + create_instance_object(); + return; + } + + derr << "error registering instance: " << cpp_strerror(r) << dendl; + + std::swap(on_finish, m_on_finish); + } + on_finish->complete(r); +} + + +template <typename I> +void InstanceWatcher<I>::create_instance_object() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + + librados::ObjectWriteOperation op; + op.create(true); + + librados::AioCompletion *aio_comp = create_rados_callback< + InstanceWatcher<I>, + &InstanceWatcher<I>::handle_create_instance_object>(this); + int r = m_ioctx.aio_operate(m_oid, aio_comp, &op); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void InstanceWatcher<I>::handle_create_instance_object(int r) { + dout(10) << "r=" << r << dendl; + + std::lock_guard locker{m_lock}; + + if (r < 0) { + derr << "error creating " << m_oid << " object: " << cpp_strerror(r) + << dendl; + + m_ret_val = r; + unregister_instance(); + return; + } + + register_watch(); +} + +template <typename I> +void InstanceWatcher<I>::register_watch() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback< + InstanceWatcher<I>, &InstanceWatcher<I>::handle_register_watch>(this)); + + librbd::Watcher::register_watch(ctx); +} + +template <typename I> +void InstanceWatcher<I>::handle_register_watch(int r) { + dout(10) << "r=" << r << dendl; + + std::lock_guard locker{m_lock}; + + if (r < 0) { + derr << "error registering instance watcher for " << m_oid << " object: " + << cpp_strerror(r) << dendl; + + m_ret_val = r; + remove_instance_object(); + return; + } + + acquire_lock(); +} + +template <typename I> +void InstanceWatcher<I>::acquire_lock() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback< + InstanceWatcher<I>, &InstanceWatcher<I>::handle_acquire_lock>(this)); + + m_instance_lock->acquire_lock(ctx); +} + +template <typename I> +void InstanceWatcher<I>::handle_acquire_lock(int r) { + dout(10) << "r=" << r << dendl; + + Context *on_finish = nullptr; + { + std::lock_guard locker{m_lock}; + + if (r < 0) { + + derr << "error acquiring instance lock: " << cpp_strerror(r) << dendl; + + m_ret_val = r; + unregister_watch(); + return; + } + + std::swap(on_finish, m_on_finish); + } + + on_finish->complete(r); +} + +template <typename I> +void InstanceWatcher<I>::release_lock() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback< + InstanceWatcher<I>, &InstanceWatcher<I>::handle_release_lock>(this)); + + m_instance_lock->shut_down(ctx); +} + +template <typename I> +void InstanceWatcher<I>::handle_release_lock(int r) { + dout(10) << "r=" << r << dendl; + + std::lock_guard locker{m_lock}; + + if (r < 0) { + derr << "error releasing instance lock: " << cpp_strerror(r) << dendl; + } + + unregister_watch(); +} + +template <typename I> +void InstanceWatcher<I>::unregister_watch() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback< + InstanceWatcher<I>, &InstanceWatcher<I>::handle_unregister_watch>(this)); + + librbd::Watcher::unregister_watch(ctx); +} + +template <typename I> +void InstanceWatcher<I>::handle_unregister_watch(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "error unregistering instance watcher for " << m_oid << " object: " + << cpp_strerror(r) << dendl; + } + + std::lock_guard locker{m_lock}; + remove_instance_object(); +} + +template <typename I> +void InstanceWatcher<I>::remove_instance_object() { + ceph_assert(ceph_mutex_is_locked(m_lock)); + + dout(10) << dendl; + + librados::ObjectWriteOperation op; + op.remove(); + + librados::AioCompletion *aio_comp = create_rados_callback< + InstanceWatcher<I>, + &InstanceWatcher<I>::handle_remove_instance_object>(this); + int r = m_ioctx.aio_operate(m_oid, aio_comp, &op); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void InstanceWatcher<I>::handle_remove_instance_object(int r) { + dout(10) << "r=" << r << dendl; + + if (r == -ENOENT) { + r = 0; + } + + if (r < 0) { + derr << "error removing " << m_oid << " object: " << cpp_strerror(r) + << dendl; + } + + std::lock_guard locker{m_lock}; + unregister_instance(); +} + +template <typename I> +void InstanceWatcher<I>::unregister_instance() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + + librados::ObjectWriteOperation op; + librbd::cls_client::mirror_instances_remove(&op, m_instance_id); + librados::AioCompletion *aio_comp = create_rados_callback< + InstanceWatcher<I>, &InstanceWatcher<I>::handle_unregister_instance>(this); + + int r = m_ioctx.aio_operate(RBD_MIRROR_LEADER, aio_comp, &op); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void InstanceWatcher<I>::handle_unregister_instance(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "error unregistering instance: " << cpp_strerror(r) << dendl; + } + + std::lock_guard locker{m_lock}; + wait_for_notify_ops(); +} + +template <typename I> +void InstanceWatcher<I>::wait_for_notify_ops() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + + for (auto op : m_notify_ops) { + op.second->cancel(); + } + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback< + InstanceWatcher<I>, &InstanceWatcher<I>::handle_wait_for_notify_ops>(this)); + + m_notify_op_tracker.wait_for_ops(ctx); +} + +template <typename I> +void InstanceWatcher<I>::handle_wait_for_notify_ops(int r) { + dout(10) << "r=" << r << dendl; + + ceph_assert(r == 0); + + Context *on_finish = nullptr; + { + std::lock_guard locker{m_lock}; + + ceph_assert(m_notify_ops.empty()); + + std::swap(on_finish, m_on_finish); + r = m_ret_val; + } + on_finish->complete(r); +} + +template <typename I> +void InstanceWatcher<I>::get_instance_locker() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback< + InstanceWatcher<I>, &InstanceWatcher<I>::handle_get_instance_locker>(this)); + + m_instance_lock->get_locker(&m_instance_locker, ctx); +} + +template <typename I> +void InstanceWatcher<I>::handle_get_instance_locker(int r) { + dout(10) << "r=" << r << dendl; + + std::lock_guard locker{m_lock}; + + if (r < 0) { + if (r != -ENOENT) { + derr << "error retrieving instance locker: " << cpp_strerror(r) << dendl; + } + remove_instance_object(); + return; + } + + break_instance_lock(); +} + +template <typename I> +void InstanceWatcher<I>::break_instance_lock() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback< + InstanceWatcher<I>, &InstanceWatcher<I>::handle_break_instance_lock>(this)); + + m_instance_lock->break_lock(m_instance_locker, true, ctx); +} + +template <typename I> +void InstanceWatcher<I>::handle_break_instance_lock(int r) { + dout(10) << "r=" << r << dendl; + + std::lock_guard locker{m_lock}; + + if (r < 0) { + if (r != -ENOENT) { + derr << "error breaking instance lock: " << cpp_strerror(r) << dendl; + } + remove_instance_object(); + return; + } + + remove_instance_object(); +} + +template <typename I> +void InstanceWatcher<I>::suspend_notify_request(C_NotifyInstanceRequest *req) { + dout(10) << req << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + + auto result = m_suspended_ops.insert(req).second; + ceph_assert(result); +} + +template <typename I> +bool InstanceWatcher<I>::unsuspend_notify_request( + C_NotifyInstanceRequest *req) { + dout(10) << req << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + + auto result = m_suspended_ops.erase(req); + if (result == 0) { + return false; + } + + req->send(); + return true; +} + +template <typename I> +void InstanceWatcher<I>::unsuspend_notify_requests() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + + std::set<C_NotifyInstanceRequest *> suspended_ops; + std::swap(m_suspended_ops, suspended_ops); + + for (auto op : suspended_ops) { + op->send(); + } +} + +template <typename I> +Context *InstanceWatcher<I>::prepare_request(const std::string &instance_id, + uint64_t request_id, + C_NotifyAck *on_notify_ack) { + dout(10) << "instance_id=" << instance_id << ", request_id=" << request_id + << dendl; + + std::lock_guard locker{m_lock}; + + Context *ctx = nullptr; + Request request(instance_id, request_id); + auto it = m_requests.find(request); + + if (it != m_requests.end()) { + dout(10) << "duplicate for in-progress request" << dendl; + delete it->on_notify_ack; + m_requests.erase(it); + } else { + ctx = create_async_context_callback( + m_work_queue, new LambdaContext( + [this, instance_id, request_id] (int r) { + complete_request(instance_id, request_id, r); + })); + } + + request.on_notify_ack = on_notify_ack; + m_requests.insert(request); + return ctx; +} + +template <typename I> +void InstanceWatcher<I>::complete_request(const std::string &instance_id, + uint64_t request_id, int r) { + dout(10) << "instance_id=" << instance_id << ", request_id=" << request_id + << dendl; + + C_NotifyAck *on_notify_ack; + { + std::lock_guard locker{m_lock}; + Request request(instance_id, request_id); + auto it = m_requests.find(request); + ceph_assert(it != m_requests.end()); + on_notify_ack = it->on_notify_ack; + m_requests.erase(it); + } + + encode(NotifyAckPayload(instance_id, request_id, r), on_notify_ack->out); + on_notify_ack->complete(0); +} + +template <typename I> +void InstanceWatcher<I>::handle_notify(uint64_t notify_id, uint64_t handle, + uint64_t notifier_id, bufferlist &bl) { + dout(10) << "notify_id=" << notify_id << ", handle=" << handle << ", " + << "notifier_id=" << notifier_id << dendl; + + auto ctx = new C_NotifyAck(this, notify_id, handle); + + NotifyMessage notify_message; + try { + auto iter = bl.cbegin(); + decode(notify_message, iter); + } catch (const buffer::error &err) { + derr << "error decoding image notification: " << err.what() << dendl; + ctx->complete(0); + return; + } + + apply_visitor(HandlePayloadVisitor(this, stringify(notifier_id), ctx), + notify_message.payload); +} + +template <typename I> +void InstanceWatcher<I>::handle_image_acquire( + const std::string &global_image_id, Context *on_finish) { + dout(10) << "global_image_id=" << global_image_id << dendl; + + auto ctx = new LambdaContext( + [this, global_image_id, on_finish] (int r) { + m_instance_replayer->acquire_image(this, global_image_id, on_finish); + m_notify_op_tracker.finish_op(); + }); + + m_notify_op_tracker.start_op(); + m_work_queue->queue(ctx, 0); +} + +template <typename I> +void InstanceWatcher<I>::handle_image_release( + const std::string &global_image_id, Context *on_finish) { + dout(10) << "global_image_id=" << global_image_id << dendl; + + auto ctx = new LambdaContext( + [this, global_image_id, on_finish] (int r) { + m_instance_replayer->release_image(global_image_id, on_finish); + m_notify_op_tracker.finish_op(); + }); + + m_notify_op_tracker.start_op(); + m_work_queue->queue(ctx, 0); +} + +template <typename I> +void InstanceWatcher<I>::handle_peer_image_removed( + const std::string &global_image_id, const std::string &peer_mirror_uuid, + Context *on_finish) { + dout(10) << "global_image_id=" << global_image_id << ", " + << "peer_mirror_uuid=" << peer_mirror_uuid << dendl; + + auto ctx = new LambdaContext( + [this, peer_mirror_uuid, global_image_id, on_finish] (int r) { + m_instance_replayer->remove_peer_image(global_image_id, + peer_mirror_uuid, on_finish); + m_notify_op_tracker.finish_op(); + }); + + m_notify_op_tracker.start_op(); + m_work_queue->queue(ctx, 0); +} + +template <typename I> +void InstanceWatcher<I>::handle_sync_request(const std::string &instance_id, + const std::string &sync_id, + Context *on_finish) { + dout(10) << "instance_id=" << instance_id << ", sync_id=" << sync_id << dendl; + + std::lock_guard locker{m_lock}; + + if (!is_leader()) { + dout(10) << "sync request for non-leader" << dendl; + m_work_queue->queue(on_finish, -ESTALE); + return; + } + + Context *on_start = create_async_context_callback( + m_work_queue, new LambdaContext( + [this, instance_id, sync_id, on_finish] (int r) { + dout(10) << "handle_sync_request: finish: instance_id=" << instance_id + << ", sync_id=" << sync_id << ", r=" << r << dendl; + if (r == 0) { + notify_sync_start(instance_id, sync_id); + } + if (r == -ENOENT) { + r = 0; + } + on_finish->complete(r); + })); + m_image_sync_throttler->start_op(m_ioctx.get_namespace(), sync_id, on_start); +} + +template <typename I> +void InstanceWatcher<I>::handle_sync_start(const std::string &instance_id, + const std::string &sync_id, + Context *on_finish) { + dout(10) << "instance_id=" << instance_id << ", sync_id=" << sync_id << dendl; + + std::lock_guard locker{m_lock}; + + auto it = m_inflight_sync_reqs.find(sync_id); + if (it == m_inflight_sync_reqs.end()) { + dout(5) << "not found" << dendl; + m_work_queue->queue(on_finish, 0); + return; + } + + auto sync_ctx = it->second; + + if (sync_ctx->on_complete != nullptr) { + dout(5) << "duplicate request" << dendl; + m_work_queue->queue(sync_ctx->on_complete, -ESTALE); + } + + sync_ctx->on_complete = on_finish; +} + +template <typename I> +void InstanceWatcher<I>::handle_payload(const std::string &instance_id, + const ImageAcquirePayload &payload, + C_NotifyAck *on_notify_ack) { + dout(10) << "image_acquire: instance_id=" << instance_id << ", " + << "request_id=" << payload.request_id << dendl; + + auto on_finish = prepare_request(instance_id, payload.request_id, + on_notify_ack); + if (on_finish != nullptr) { + handle_image_acquire(payload.global_image_id, on_finish); + } +} + +template <typename I> +void InstanceWatcher<I>::handle_payload(const std::string &instance_id, + const ImageReleasePayload &payload, + C_NotifyAck *on_notify_ack) { + dout(10) << "image_release: instance_id=" << instance_id << ", " + << "request_id=" << payload.request_id << dendl; + + auto on_finish = prepare_request(instance_id, payload.request_id, + on_notify_ack); + if (on_finish != nullptr) { + handle_image_release(payload.global_image_id, on_finish); + } +} + +template <typename I> +void InstanceWatcher<I>::handle_payload(const std::string &instance_id, + const PeerImageRemovedPayload &payload, + C_NotifyAck *on_notify_ack) { + dout(10) << "remove_peer_image: instance_id=" << instance_id << ", " + << "request_id=" << payload.request_id << dendl; + + auto on_finish = prepare_request(instance_id, payload.request_id, + on_notify_ack); + if (on_finish != nullptr) { + handle_peer_image_removed(payload.global_image_id, payload.peer_mirror_uuid, + on_finish); + } +} + +template <typename I> +void InstanceWatcher<I>::handle_payload(const std::string &instance_id, + const SyncRequestPayload &payload, + C_NotifyAck *on_notify_ack) { + dout(10) << "sync_request: instance_id=" << instance_id << ", " + << "request_id=" << payload.request_id << dendl; + + auto on_finish = prepare_request(instance_id, payload.request_id, + on_notify_ack); + if (on_finish == nullptr) { + return; + } + + handle_sync_request(instance_id, payload.sync_id, on_finish); +} + +template <typename I> +void InstanceWatcher<I>::handle_payload(const std::string &instance_id, + const SyncStartPayload &payload, + C_NotifyAck *on_notify_ack) { + dout(10) << "sync_start: instance_id=" << instance_id << ", " + << "request_id=" << payload.request_id << dendl; + + auto on_finish = prepare_request(instance_id, payload.request_id, + on_notify_ack); + if (on_finish == nullptr) { + return; + } + + handle_sync_start(instance_id, payload.sync_id, on_finish); +} + +template <typename I> +void InstanceWatcher<I>::handle_payload(const std::string &instance_id, + const UnknownPayload &payload, + C_NotifyAck *on_notify_ack) { + dout(5) << "unknown: instance_id=" << instance_id << dendl; + + on_notify_ack->complete(0); +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::InstanceWatcher<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/InstanceWatcher.h b/src/tools/rbd_mirror/InstanceWatcher.h new file mode 100644 index 000000000..08e40b40b --- /dev/null +++ b/src/tools/rbd_mirror/InstanceWatcher.h @@ -0,0 +1,269 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_INSTANCE_WATCHER_H +#define CEPH_RBD_MIRROR_INSTANCE_WATCHER_H + +#include <map> +#include <memory> +#include <set> +#include <string> +#include <vector> + +#include "common/AsyncOpTracker.h" +#include "librbd/Watcher.h" +#include "librbd/managed_lock/Types.h" +#include "tools/rbd_mirror/instance_watcher/Types.h" + +namespace librbd { + +class AsioEngine; +class ImageCtx; +template <typename> class ManagedLock; + +} // namespace librbd + +namespace rbd { +namespace mirror { + +template <typename> class InstanceReplayer; +template <typename> class Throttler; +template <typename> struct Threads; + +template <typename ImageCtxT = librbd::ImageCtx> +class InstanceWatcher : protected librbd::Watcher { + using librbd::Watcher::unregister_watch; // Silence overloaded virtual warning +public: + static void get_instances(librados::IoCtx &io_ctx, + std::vector<std::string> *instance_ids, + Context *on_finish); + static void remove_instance(librados::IoCtx &io_ctx, + librbd::AsioEngine& asio_engine, + const std::string &instance_id, + Context *on_finish); + + static InstanceWatcher *create( + librados::IoCtx &io_ctx, librbd::AsioEngine& asio_engine, + InstanceReplayer<ImageCtxT> *instance_replayer, + Throttler<ImageCtxT> *image_sync_throttler); + void destroy() { + delete this; + } + + InstanceWatcher(librados::IoCtx &io_ctx, librbd::AsioEngine& asio_engine, + InstanceReplayer<ImageCtxT> *instance_replayer, + Throttler<ImageCtxT> *image_sync_throttler, + const std::string &instance_id); + ~InstanceWatcher() override; + + inline std::string &get_instance_id() { + return m_instance_id; + } + + int init(); + void shut_down(); + + void init(Context *on_finish); + void shut_down(Context *on_finish); + void remove(Context *on_finish); + + void notify_image_acquire(const std::string &instance_id, + const std::string &global_image_id, + Context *on_notify_ack); + void notify_image_release(const std::string &instance_id, + const std::string &global_image_id, + Context *on_notify_ack); + void notify_peer_image_removed(const std::string &instance_id, + const std::string &global_image_id, + const std::string &peer_mirror_uuid, + Context *on_notify_ack); + + void notify_sync_request(const std::string &sync_id, Context *on_sync_start); + bool cancel_sync_request(const std::string &sync_id); + void notify_sync_complete(const std::string &sync_id); + + void cancel_notify_requests(const std::string &instance_id); + + void handle_acquire_leader(); + void handle_release_leader(); + void handle_update_leader(const std::string &leader_instance_id); + +private: + /** + * @verbatim + * + * BREAK_INSTANCE_LOCK -------\ + * ^ | + * | (error) | + * GET_INSTANCE_LOCKER * * *>| + * ^ (remove) | + * | | + * <uninitialized> <----------------+---- WAIT_FOR_NOTIFY_OPS + * | (init) ^ | ^ + * v (error) * | | + * REGISTER_INSTANCE * * * * * *|* *> UNREGISTER_INSTANCE + * | * | ^ + * v (error) * v | + * CREATE_INSTANCE_OBJECT * * * * * *> REMOVE_INSTANCE_OBJECT + * | * ^ + * v (error) * | + * REGISTER_WATCH * * * * * * * * * *> UNREGISTER_WATCH + * | * ^ + * v (error) * | + * ACQUIRE_LOCK * * * * * * * * * * * RELEASE_LOCK + * | ^ + * v (shut_down) | + * <watching> -------------------------------/ + * + * @endverbatim + */ + + struct C_NotifyInstanceRequest; + struct C_SyncRequest; + + typedef std::pair<std::string, std::string> Id; + + struct HandlePayloadVisitor : public boost::static_visitor<void> { + InstanceWatcher *instance_watcher; + std::string instance_id; + C_NotifyAck *on_notify_ack; + + HandlePayloadVisitor(InstanceWatcher *instance_watcher, + const std::string &instance_id, + C_NotifyAck *on_notify_ack) + : instance_watcher(instance_watcher), instance_id(instance_id), + on_notify_ack(on_notify_ack) { + } + + template <typename Payload> + inline void operator()(const Payload &payload) const { + instance_watcher->handle_payload(instance_id, payload, on_notify_ack); + } + }; + + struct Request { + std::string instance_id; + uint64_t request_id; + C_NotifyAck *on_notify_ack = nullptr; + + Request(const std::string &instance_id, uint64_t request_id) + : instance_id(instance_id), request_id(request_id) { + } + + inline bool operator<(const Request &rhs) const { + return instance_id < rhs.instance_id || + (instance_id == rhs.instance_id && request_id < rhs.request_id); + } + }; + + Threads<ImageCtxT> *m_threads; + InstanceReplayer<ImageCtxT> *m_instance_replayer; + Throttler<ImageCtxT> *m_image_sync_throttler; + std::string m_instance_id; + + mutable ceph::mutex m_lock; + librbd::ManagedLock<ImageCtxT> *m_instance_lock; + Context *m_on_finish = nullptr; + int m_ret_val = 0; + std::string m_leader_instance_id; + librbd::managed_lock::Locker m_instance_locker; + std::set<std::pair<std::string, C_NotifyInstanceRequest *>> m_notify_ops; + AsyncOpTracker m_notify_op_tracker; + uint64_t m_request_seq = 0; + std::set<Request> m_requests; + std::set<C_NotifyInstanceRequest *> m_suspended_ops; + std::map<std::string, C_SyncRequest *> m_inflight_sync_reqs; + + inline bool is_leader() const { + return m_leader_instance_id == m_instance_id; + } + + void register_instance(); + void handle_register_instance(int r); + + void create_instance_object(); + void handle_create_instance_object(int r); + + void register_watch(); + void handle_register_watch(int r); + + void acquire_lock(); + void handle_acquire_lock(int r); + + void release_lock(); + void handle_release_lock(int r); + + void unregister_watch(); + void handle_unregister_watch(int r); + + void remove_instance_object(); + void handle_remove_instance_object(int r); + + void unregister_instance(); + void handle_unregister_instance(int r); + + void wait_for_notify_ops(); + void handle_wait_for_notify_ops(int r); + + void get_instance_locker(); + void handle_get_instance_locker(int r); + + void break_instance_lock(); + void handle_break_instance_lock(int r); + + void suspend_notify_request(C_NotifyInstanceRequest *req); + bool unsuspend_notify_request(C_NotifyInstanceRequest *req); + void unsuspend_notify_requests(); + + void notify_sync_complete(const ceph::mutex& lock, const std::string &sync_id); + void handle_notify_sync_request(C_SyncRequest *sync_ctx, int r); + void handle_notify_sync_complete(C_SyncRequest *sync_ctx, int r); + + void notify_sync_start(const std::string &instance_id, + const std::string &sync_id); + + Context *prepare_request(const std::string &instance_id, uint64_t request_id, + C_NotifyAck *on_notify_ack); + void complete_request(const std::string &instance_id, uint64_t request_id, + int r); + + void handle_notify(uint64_t notify_id, uint64_t handle, + uint64_t notifier_id, bufferlist &bl) override; + + void handle_image_acquire(const std::string &global_image_id, + Context *on_finish); + void handle_image_release(const std::string &global_image_id, + Context *on_finish); + void handle_peer_image_removed(const std::string &global_image_id, + const std::string &peer_mirror_uuid, + Context *on_finish); + + void handle_sync_request(const std::string &instance_id, + const std::string &sync_id, Context *on_finish); + void handle_sync_start(const std::string &instance_id, + const std::string &sync_id, Context *on_finish); + + void handle_payload(const std::string &instance_id, + const instance_watcher::ImageAcquirePayload &payload, + C_NotifyAck *on_notify_ack); + void handle_payload(const std::string &instance_id, + const instance_watcher::ImageReleasePayload &payload, + C_NotifyAck *on_notify_ack); + void handle_payload(const std::string &instance_id, + const instance_watcher::PeerImageRemovedPayload &payload, + C_NotifyAck *on_notify_ack); + void handle_payload(const std::string &instance_id, + const instance_watcher::SyncRequestPayload &payload, + C_NotifyAck *on_notify_ack); + void handle_payload(const std::string &instance_id, + const instance_watcher::SyncStartPayload &payload, + C_NotifyAck *on_notify_ack); + void handle_payload(const std::string &instance_id, + const instance_watcher::UnknownPayload &payload, + C_NotifyAck *on_notify_ack); +}; + +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_INSTANCE_WATCHER_H diff --git a/src/tools/rbd_mirror/Instances.cc b/src/tools/rbd_mirror/Instances.cc new file mode 100644 index 000000000..ca291bb5f --- /dev/null +++ b/src/tools/rbd_mirror/Instances.cc @@ -0,0 +1,356 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "include/stringify.h" +#include "common/Timer.h" +#include "common/debug.h" +#include "common/errno.h" +#include "librbd/Utils.h" +#include "librbd/asio/ContextWQ.h" +#include "InstanceWatcher.h" +#include "Instances.h" +#include "Threads.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::Instances: " \ + << this << " " << __func__ << ": " + +namespace rbd { +namespace mirror { + +using librbd::util::create_async_context_callback; +using librbd::util::create_context_callback; +using librbd::util::create_rados_callback; + +template <typename I> +Instances<I>::Instances(Threads<I> *threads, librados::IoCtx &ioctx, + const std::string& instance_id, + instances::Listener& listener) : + m_threads(threads), m_ioctx(ioctx), m_instance_id(instance_id), + m_listener(listener), m_cct(reinterpret_cast<CephContext *>(ioctx.cct())), + m_lock(ceph::make_mutex("rbd::mirror::Instances " + ioctx.get_pool_name())) { +} + +template <typename I> +Instances<I>::~Instances() { +} + +template <typename I> +void Instances<I>::init(Context *on_finish) { + dout(10) << dendl; + + std::lock_guard locker{m_lock}; + ceph_assert(m_on_finish == nullptr); + m_on_finish = on_finish; + get_instances(); +} + +template <typename I> +void Instances<I>::shut_down(Context *on_finish) { + dout(10) << dendl; + + std::lock_guard locker{m_lock}; + ceph_assert(m_on_finish == nullptr); + m_on_finish = on_finish; + + Context *ctx = new LambdaContext( + [this](int r) { + std::scoped_lock locker{m_threads->timer_lock, m_lock}; + cancel_remove_task(); + wait_for_ops(); + }); + + m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +void Instances<I>::unblock_listener() { + dout(5) << dendl; + + std::lock_guard locker{m_lock}; + ceph_assert(m_listener_blocked); + m_listener_blocked = false; + + InstanceIds added_instance_ids; + for (auto& pair : m_instances) { + if (pair.second.state == INSTANCE_STATE_ADDING) { + added_instance_ids.push_back(pair.first); + } + } + + if (!added_instance_ids.empty()) { + m_threads->work_queue->queue( + new C_NotifyInstancesAdded(this, added_instance_ids), 0); + } +} + +template <typename I> +void Instances<I>::acked(const InstanceIds& instance_ids) { + dout(10) << "instance_ids=" << instance_ids << dendl; + + std::lock_guard locker{m_lock}; + if (m_on_finish != nullptr) { + dout(5) << "received on shut down, ignoring" << dendl; + return; + } + + Context *ctx = new C_HandleAcked(this, instance_ids); + m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +void Instances<I>::handle_acked(const InstanceIds& instance_ids) { + dout(5) << "instance_ids=" << instance_ids << dendl; + + std::scoped_lock locker{m_threads->timer_lock, m_lock}; + if (m_on_finish != nullptr) { + dout(5) << "handled on shut down, ignoring" << dendl; + return; + } + + InstanceIds added_instance_ids; + auto time = clock_t::now(); + for (auto& instance_id : instance_ids) { + auto &instance = m_instances.insert( + std::make_pair(instance_id, Instance{})).first->second; + instance.acked_time = time; + if (instance.state == INSTANCE_STATE_ADDING) { + added_instance_ids.push_back(instance_id); + } + } + + schedule_remove_task(time); + if (!m_listener_blocked && !added_instance_ids.empty()) { + m_threads->work_queue->queue( + new C_NotifyInstancesAdded(this, added_instance_ids), 0); + } +} + +template <typename I> +void Instances<I>::notify_instances_added(const InstanceIds& instance_ids) { + std::unique_lock locker{m_lock}; + InstanceIds added_instance_ids; + for (auto& instance_id : instance_ids) { + auto it = m_instances.find(instance_id); + if (it != m_instances.end() && it->second.state == INSTANCE_STATE_ADDING) { + added_instance_ids.push_back(instance_id); + } + } + + if (added_instance_ids.empty()) { + return; + } + + dout(5) << "instance_ids=" << added_instance_ids << dendl; + locker.unlock(); + m_listener.handle_added(added_instance_ids); + locker.lock(); + + for (auto& instance_id : added_instance_ids) { + auto it = m_instances.find(instance_id); + if (it != m_instances.end() && it->second.state == INSTANCE_STATE_ADDING) { + it->second.state = INSTANCE_STATE_IDLE; + } + } +} + +template <typename I> +void Instances<I>::notify_instances_removed(const InstanceIds& instance_ids) { + dout(5) << "instance_ids=" << instance_ids << dendl; + m_listener.handle_removed(instance_ids); + + std::lock_guard locker{m_lock}; + for (auto& instance_id : instance_ids) { + m_instances.erase(instance_id); + } +} + +template <typename I> +void Instances<I>::list(std::vector<std::string> *instance_ids) { + dout(20) << dendl; + + std::lock_guard locker{m_lock}; + + for (auto it : m_instances) { + instance_ids->push_back(it.first); + } +} + + +template <typename I> +void Instances<I>::get_instances() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + + Context *ctx = create_context_callback< + Instances, &Instances<I>::handle_get_instances>(this); + + InstanceWatcher<I>::get_instances(m_ioctx, &m_instance_ids, ctx); +} + +template <typename I> +void Instances<I>::handle_get_instances(int r) { + dout(10) << "r=" << r << dendl; + + Context *on_finish = nullptr; + { + std::lock_guard locker{m_lock}; + std::swap(on_finish, m_on_finish); + } + + if (r < 0) { + derr << "error retrieving instances: " << cpp_strerror(r) << dendl; + } else { + handle_acked(m_instance_ids); + } + on_finish->complete(r); +} + +template <typename I> +void Instances<I>::wait_for_ops() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + + Context *ctx = create_async_context_callback( + m_threads->work_queue, create_context_callback< + Instances, &Instances<I>::handle_wait_for_ops>(this)); + + m_async_op_tracker.wait_for_ops(ctx); +} + +template <typename I> +void Instances<I>::handle_wait_for_ops(int r) { + dout(10) << "r=" << r << dendl; + + ceph_assert(r == 0); + + Context *on_finish = nullptr; + { + std::lock_guard locker{m_lock}; + std::swap(on_finish, m_on_finish); + } + on_finish->complete(r); +} + +template <typename I> +void Instances<I>::remove_instances(const Instances<I>::clock_t::time_point& time) { + ceph_assert(ceph_mutex_is_locked(m_lock)); + + InstanceIds instance_ids; + for (auto& instance_pair : m_instances) { + if (instance_pair.first == m_instance_id) { + continue; + } + auto& instance = instance_pair.second; + if (instance.state != INSTANCE_STATE_REMOVING && + instance.acked_time <= time) { + instance.state = INSTANCE_STATE_REMOVING; + instance_ids.push_back(instance_pair.first); + } + } + ceph_assert(!instance_ids.empty()); + + dout(10) << "instance_ids=" << instance_ids << dendl; + Context* ctx = new LambdaContext([this, instance_ids](int r) { + handle_remove_instances(r, instance_ids); + }); + ctx = create_async_context_callback(m_threads->work_queue, ctx); + + auto gather_ctx = new C_Gather(m_cct, ctx); + for (auto& instance_id : instance_ids) { + InstanceWatcher<I>::remove_instance(m_ioctx, *m_threads->asio_engine, + instance_id, gather_ctx->new_sub()); + } + + m_async_op_tracker.start_op(); + gather_ctx->activate(); +} + +template <typename I> +void Instances<I>::handle_remove_instances( + int r, const InstanceIds& instance_ids) { + std::scoped_lock locker{m_threads->timer_lock, m_lock}; + + dout(10) << "r=" << r << ", instance_ids=" << instance_ids << dendl; + ceph_assert(r == 0); + + // fire removed notification now that instances have been blocklisted + m_threads->work_queue->queue( + new C_NotifyInstancesRemoved(this, instance_ids), 0); + + // reschedule the timer for the next batch + schedule_remove_task(clock_t::now()); + m_async_op_tracker.finish_op(); +} + +template <typename I> +void Instances<I>::cancel_remove_task() { + ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock)); + ceph_assert(ceph_mutex_is_locked(m_lock)); + + if (m_timer_task == nullptr) { + return; + } + + dout(10) << dendl; + + bool canceled = m_threads->timer->cancel_event(m_timer_task); + ceph_assert(canceled); + m_timer_task = nullptr; +} + +template <typename I> +void Instances<I>::schedule_remove_task(const Instances<I>::clock_t::time_point& time) { + cancel_remove_task(); + if (m_on_finish != nullptr) { + dout(10) << "received on shut down, ignoring" << dendl; + return; + } + + int after = m_cct->_conf.get_val<uint64_t>("rbd_mirror_leader_heartbeat_interval") * + (1 + m_cct->_conf.get_val<uint64_t>("rbd_mirror_leader_max_missed_heartbeats") + + m_cct->_conf.get_val<uint64_t>("rbd_mirror_leader_max_acquire_attempts_before_break")); + + bool schedule = false; + auto oldest_time = time; + for (auto& instance : m_instances) { + if (instance.first == m_instance_id) { + continue; + } + if (instance.second.state == INSTANCE_STATE_REMOVING) { + // removal is already in-flight + continue; + } + + oldest_time = std::min(oldest_time, instance.second.acked_time); + schedule = true; + } + + if (!schedule) { + return; + } + + dout(10) << dendl; + + // schedule a time to fire when the oldest instance should be removed + m_timer_task = new LambdaContext( + [this, oldest_time](int r) { + ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock)); + std::lock_guard locker{m_lock}; + m_timer_task = nullptr; + + remove_instances(oldest_time); + }); + + oldest_time += ceph::make_timespan(after); + m_threads->timer->add_event_at(oldest_time, m_timer_task); +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::Instances<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/Instances.h b/src/tools/rbd_mirror/Instances.h new file mode 100644 index 000000000..e6e104b73 --- /dev/null +++ b/src/tools/rbd_mirror/Instances.h @@ -0,0 +1,168 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_INSTANCES_H +#define CEPH_RBD_MIRROR_INSTANCES_H + +#include <map> +#include <vector> + +#include "include/buffer_fwd.h" +#include "include/rados/librados_fwd.hpp" +#include "common/AsyncOpTracker.h" +#include "common/ceph_mutex.h" +#include "librbd/Watcher.h" +#include "tools/rbd_mirror/instances/Types.h" + +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { + +template <typename> struct Threads; + +template <typename ImageCtxT = librbd::ImageCtx> +class Instances { +public: + typedef std::vector<std::string> InstanceIds; + + static Instances *create(Threads<ImageCtxT> *threads, + librados::IoCtx &ioctx, + const std::string& instance_id, + instances::Listener& listener) { + return new Instances(threads, ioctx, instance_id, listener); + } + void destroy() { + delete this; + } + + Instances(Threads<ImageCtxT> *threads, librados::IoCtx &ioctx, + const std::string& instance_id, instances::Listener& listener); + virtual ~Instances(); + + void init(Context *on_finish); + void shut_down(Context *on_finish); + + void unblock_listener(); + + void acked(const InstanceIds& instance_ids); + + void list(std::vector<std::string> *instance_ids); + +private: + /** + * @verbatim + * + * <uninitialized> <---------------------\ + * | (init) ^ | + * v (error) * | + * GET_INSTANCES * * * * * WAIT_FOR_OPS + * | ^ + * v (shut_down) | + * <initialized> ------------------------/ + * . + * . (remove_instance) + * v + * REMOVE_INSTANCE + * + * @endverbatim + */ + + enum InstanceState { + INSTANCE_STATE_ADDING, + INSTANCE_STATE_IDLE, + INSTANCE_STATE_REMOVING + }; + + using clock_t = ceph::real_clock; + struct Instance { + clock_t::time_point acked_time{}; + InstanceState state = INSTANCE_STATE_ADDING; + }; + + struct C_NotifyBase : public Context { + Instances *instances; + InstanceIds instance_ids; + + C_NotifyBase(Instances *instances, const InstanceIds& instance_ids) + : instances(instances), instance_ids(instance_ids) { + instances->m_async_op_tracker.start_op(); + } + + void finish(int r) override { + execute(); + instances->m_async_op_tracker.finish_op(); + } + + virtual void execute() = 0; + }; + + struct C_HandleAcked : public C_NotifyBase { + C_HandleAcked(Instances *instances, const InstanceIds& instance_ids) + : C_NotifyBase(instances, instance_ids) { + } + + void execute() override { + this->instances->handle_acked(this->instance_ids); + } + }; + + struct C_NotifyInstancesAdded : public C_NotifyBase { + C_NotifyInstancesAdded(Instances *instances, + const InstanceIds& instance_ids) + : C_NotifyBase(instances, instance_ids) { + } + + void execute() override { + this->instances->notify_instances_added(this->instance_ids); + } + }; + + struct C_NotifyInstancesRemoved : public C_NotifyBase { + C_NotifyInstancesRemoved(Instances *instances, + const InstanceIds& instance_ids) + : C_NotifyBase(instances, instance_ids) { + } + + void execute() override { + this->instances->notify_instances_removed(this->instance_ids); + } + }; + + Threads<ImageCtxT> *m_threads; + librados::IoCtx &m_ioctx; + std::string m_instance_id; + instances::Listener& m_listener; + CephContext *m_cct; + + ceph::mutex m_lock; + InstanceIds m_instance_ids; + std::map<std::string, Instance> m_instances; + Context *m_on_finish = nullptr; + AsyncOpTracker m_async_op_tracker; + + Context *m_timer_task = nullptr; + + bool m_listener_blocked = true; + + void handle_acked(const InstanceIds& instance_ids); + void notify_instances_added(const InstanceIds& instance_ids); + void notify_instances_removed(const InstanceIds& instance_ids); + + void get_instances(); + void handle_get_instances(int r); + + void wait_for_ops(); + void handle_wait_for_ops(int r); + + void remove_instances(const clock_t::time_point& time); + void handle_remove_instances(int r, const InstanceIds& instance_ids); + + void cancel_remove_task(); + void schedule_remove_task(const clock_t::time_point& time); +}; + +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_INSTANCES_H diff --git a/src/tools/rbd_mirror/LeaderWatcher.cc b/src/tools/rbd_mirror/LeaderWatcher.cc new file mode 100644 index 000000000..8f12af14c --- /dev/null +++ b/src/tools/rbd_mirror/LeaderWatcher.cc @@ -0,0 +1,1069 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "LeaderWatcher.h" +#include "common/Cond.h" +#include "common/Timer.h" +#include "common/debug.h" +#include "common/errno.h" +#include "cls/rbd/cls_rbd_client.h" +#include "include/stringify.h" +#include "librbd/Utils.h" +#include "librbd/asio/ContextWQ.h" +#include "librbd/watcher/Types.h" +#include "Threads.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::LeaderWatcher: " \ + << this << " " << __func__ << ": " +namespace rbd { +namespace mirror { + +using namespace leader_watcher; + +using librbd::util::create_async_context_callback; +using librbd::util::create_context_callback; +using librbd::util::create_rados_callback; + +template <typename I> +LeaderWatcher<I>::LeaderWatcher(Threads<I> *threads, librados::IoCtx &io_ctx, + leader_watcher::Listener *listener) + : Watcher(io_ctx, threads->work_queue, RBD_MIRROR_LEADER), + m_threads(threads), m_listener(listener), m_instances_listener(this), + m_lock(ceph::make_mutex("rbd::mirror::LeaderWatcher " + + io_ctx.get_pool_name())), + m_notifier_id(librados::Rados(io_ctx).get_instance_id()), + m_instance_id(stringify(m_notifier_id)), + m_leader_lock(new LeaderLock(m_ioctx, *m_threads->asio_engine, m_oid, this, + true, m_cct->_conf.get_val<uint64_t>( + "rbd_blocklist_expire_seconds"))) { +} + +template <typename I> +LeaderWatcher<I>::~LeaderWatcher() { + ceph_assert(m_instances == nullptr); + ceph_assert(m_timer_task == nullptr); + + delete m_leader_lock; +} + +template <typename I> +std::string LeaderWatcher<I>::get_instance_id() { + return m_instance_id; +} + +template <typename I> +int LeaderWatcher<I>::init() { + C_SaferCond init_ctx; + init(&init_ctx); + return init_ctx.wait(); +} + +template <typename I> +void LeaderWatcher<I>::init(Context *on_finish) { + dout(10) << "notifier_id=" << m_notifier_id << dendl; + + std::lock_guard locker{m_lock}; + + ceph_assert(m_on_finish == nullptr); + m_on_finish = on_finish; + + create_leader_object(); +} + +template <typename I> +void LeaderWatcher<I>::create_leader_object() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + + librados::ObjectWriteOperation op; + op.create(false); + + librados::AioCompletion *aio_comp = create_rados_callback< + LeaderWatcher<I>, &LeaderWatcher<I>::handle_create_leader_object>(this); + int r = m_ioctx.aio_operate(m_oid, aio_comp, &op); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void LeaderWatcher<I>::handle_create_leader_object(int r) { + dout(10) << "r=" << r << dendl; + + Context *on_finish = nullptr; + { + std::lock_guard locker{m_lock}; + + if (r == 0) { + register_watch(); + return; + } + + derr << "error creating " << m_oid << " object: " << cpp_strerror(r) + << dendl; + + std::swap(on_finish, m_on_finish); + } + on_finish->complete(r); +} + +template <typename I> +void LeaderWatcher<I>::register_watch() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback< + LeaderWatcher<I>, &LeaderWatcher<I>::handle_register_watch>(this)); + + librbd::Watcher::register_watch(ctx); +} + +template <typename I> +void LeaderWatcher<I>::handle_register_watch(int r) { + dout(10) << "r=" << r << dendl; + + Context *on_finish = nullptr; + { + std::lock_guard timer_locker(m_threads->timer_lock); + std::lock_guard locker{m_lock}; + + if (r < 0) { + derr << "error registering leader watcher for " << m_oid << " object: " + << cpp_strerror(r) << dendl; + } else { + schedule_acquire_leader_lock(0); + } + + ceph_assert(m_on_finish != nullptr); + std::swap(on_finish, m_on_finish); + } + + on_finish->complete(r); +} + +template <typename I> +void LeaderWatcher<I>::shut_down() { + C_SaferCond shut_down_ctx; + shut_down(&shut_down_ctx); + int r = shut_down_ctx.wait(); + ceph_assert(r == 0); +} + +template <typename I> +void LeaderWatcher<I>::shut_down(Context *on_finish) { + dout(10) << dendl; + + std::scoped_lock locker{m_threads->timer_lock, m_lock}; + + ceph_assert(m_on_shut_down_finish == nullptr); + m_on_shut_down_finish = on_finish; + cancel_timer_task(); + shut_down_leader_lock(); +} + +template <typename I> +void LeaderWatcher<I>::shut_down_leader_lock() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback< + LeaderWatcher<I>, &LeaderWatcher<I>::handle_shut_down_leader_lock>(this)); + + m_leader_lock->shut_down(ctx); +} + +template <typename I> +void LeaderWatcher<I>::handle_shut_down_leader_lock(int r) { + dout(10) << "r=" << r << dendl; + + std::lock_guard locker{m_lock}; + + if (r < 0) { + derr << "error shutting down leader lock: " << cpp_strerror(r) << dendl; + } + + unregister_watch(); +} + +template <typename I> +void LeaderWatcher<I>::unregister_watch() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback< + LeaderWatcher<I>, &LeaderWatcher<I>::handle_unregister_watch>(this)); + + librbd::Watcher::unregister_watch(ctx); +} + +template <typename I> +void LeaderWatcher<I>::handle_unregister_watch(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "error unregistering leader watcher for " << m_oid << " object: " + << cpp_strerror(r) << dendl; + } + wait_for_tasks(); +} + +template <typename I> +void LeaderWatcher<I>::wait_for_tasks() { + dout(10) << dendl; + + std::scoped_lock locker{m_threads->timer_lock, m_lock}; + schedule_timer_task("wait for tasks", 0, false, + &LeaderWatcher<I>::handle_wait_for_tasks, true); +} + +template <typename I> +void LeaderWatcher<I>::handle_wait_for_tasks() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock)); + ceph_assert(ceph_mutex_is_locked(m_lock)); + ceph_assert(m_on_shut_down_finish != nullptr); + + ceph_assert(!m_timer_op_tracker.empty()); + m_timer_op_tracker.finish_op(); + + auto ctx = new LambdaContext([this](int r) { + Context *on_finish; + { + // ensure lock isn't held when completing shut down + std::lock_guard locker{m_lock}; + ceph_assert(m_on_shut_down_finish != nullptr); + on_finish = m_on_shut_down_finish; + } + on_finish->complete(0); + }); + m_work_queue->queue(ctx, 0); +} + +template <typename I> +bool LeaderWatcher<I>::is_blocklisted() const { + std::lock_guard locker{m_lock}; + return m_blocklisted; +} + +template <typename I> +bool LeaderWatcher<I>::is_leader() const { + std::lock_guard locker{m_lock}; + return is_leader(m_lock); +} + +template <typename I> +bool LeaderWatcher<I>::is_leader(ceph::mutex &lock) const { + ceph_assert(ceph_mutex_is_locked(m_lock)); + + bool leader = m_leader_lock->is_leader(); + dout(10) << leader << dendl; + return leader; +} + +template <typename I> +bool LeaderWatcher<I>::is_releasing_leader() const { + std::lock_guard locker{m_lock}; + return is_releasing_leader(m_lock); +} + +template <typename I> +bool LeaderWatcher<I>::is_releasing_leader(ceph::mutex &lock) const { + ceph_assert(ceph_mutex_is_locked(m_lock)); + + bool releasing = m_leader_lock->is_releasing_leader(); + dout(10) << releasing << dendl; + return releasing; +} + +template <typename I> +bool LeaderWatcher<I>::get_leader_instance_id(std::string *instance_id) const { + dout(10) << dendl; + + std::lock_guard locker{m_lock}; + + if (is_leader(m_lock) || is_releasing_leader(m_lock)) { + *instance_id = m_instance_id; + return true; + } + + if (!m_locker.cookie.empty()) { + *instance_id = stringify(m_locker.entity.num()); + return true; + } + + return false; +} + +template <typename I> +void LeaderWatcher<I>::release_leader() { + dout(10) << dendl; + + std::lock_guard locker{m_lock}; + if (!is_leader(m_lock)) { + return; + } + + release_leader_lock(); +} + +template <typename I> +void LeaderWatcher<I>::list_instances(std::vector<std::string> *instance_ids) { + dout(10) << dendl; + + std::lock_guard locker{m_lock}; + + instance_ids->clear(); + if (m_instances != nullptr) { + m_instances->list(instance_ids); + } +} + +template <typename I> +void LeaderWatcher<I>::cancel_timer_task() { + ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock)); + ceph_assert(ceph_mutex_is_locked(m_lock)); + + if (m_timer_task == nullptr) { + return; + } + + dout(10) << m_timer_task << dendl; + bool canceled = m_threads->timer->cancel_event(m_timer_task); + ceph_assert(canceled); + m_timer_task = nullptr; +} + +template <typename I> +void LeaderWatcher<I>::schedule_timer_task(const std::string &name, + int delay_factor, bool leader, + TimerCallback timer_callback, + bool shutting_down) { + ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock)); + ceph_assert(ceph_mutex_is_locked(m_lock)); + + if (!shutting_down && m_on_shut_down_finish != nullptr) { + return; + } + + cancel_timer_task(); + + m_timer_task = new LambdaContext( + [this, leader, timer_callback](int r) { + ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock)); + m_timer_task = nullptr; + + if (m_timer_op_tracker.empty()) { + std::lock_guard locker{m_lock}; + execute_timer_task(leader, timer_callback); + return; + } + + // old timer task is still running -- do not start next + // task until the previous task completes + if (m_timer_gate == nullptr) { + m_timer_gate = new C_TimerGate(this); + m_timer_op_tracker.wait_for_ops(m_timer_gate); + } + m_timer_gate->leader = leader; + m_timer_gate->timer_callback = timer_callback; + }); + + int after = delay_factor * m_cct->_conf.get_val<uint64_t>( + "rbd_mirror_leader_heartbeat_interval"); + + dout(10) << "scheduling " << name << " after " << after << " sec (task " + << m_timer_task << ")" << dendl; + m_threads->timer->add_event_after(after, m_timer_task); +} + +template <typename I> +void LeaderWatcher<I>::execute_timer_task(bool leader, + TimerCallback timer_callback) { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock)); + ceph_assert(ceph_mutex_is_locked(m_lock)); + ceph_assert(m_timer_op_tracker.empty()); + + if (is_leader(m_lock) != leader) { + return; + } + + m_timer_op_tracker.start_op(); + (this->*timer_callback)(); +} + +template <typename I> +void LeaderWatcher<I>::handle_post_acquire_leader_lock(int r, + Context *on_finish) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + if (r == -EAGAIN) { + dout(10) << "already locked" << dendl; + } else { + derr << "error acquiring leader lock: " << cpp_strerror(r) << dendl; + } + on_finish->complete(r); + return; + } + + std::lock_guard locker{m_lock}; + ceph_assert(m_on_finish == nullptr); + m_on_finish = on_finish; + m_ret_val = 0; + + init_instances(); +} + +template <typename I> +void LeaderWatcher<I>::handle_pre_release_leader_lock(Context *on_finish) { + dout(10) << dendl; + + std::lock_guard locker{m_lock}; + ceph_assert(m_on_finish == nullptr); + m_on_finish = on_finish; + m_ret_val = 0; + + notify_listener(); +} + +template <typename I> +void LeaderWatcher<I>::handle_post_release_leader_lock(int r, + Context *on_finish) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + on_finish->complete(r); + return; + } + + std::lock_guard locker{m_lock}; + ceph_assert(m_on_finish == nullptr); + m_on_finish = on_finish; + + notify_lock_released(); +} + +template <typename I> +void LeaderWatcher<I>::break_leader_lock() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock)); + ceph_assert(ceph_mutex_is_locked(m_lock)); + ceph_assert(!m_timer_op_tracker.empty()); + + if (m_locker.cookie.empty()) { + get_locker(); + return; + } + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback< + LeaderWatcher<I>, &LeaderWatcher<I>::handle_break_leader_lock>(this)); + + m_leader_lock->break_lock(m_locker, true, ctx); +} + +template <typename I> +void LeaderWatcher<I>::handle_break_leader_lock(int r) { + dout(10) << "r=" << r << dendl; + + std::scoped_lock locker{m_threads->timer_lock, m_lock}; + ceph_assert(!m_timer_op_tracker.empty()); + + if (m_leader_lock->is_shutdown()) { + dout(10) << "canceling due to shutdown" << dendl; + m_timer_op_tracker.finish_op(); + return; + } + + if (r < 0 && r != -ENOENT) { + derr << "error breaking leader lock: " << cpp_strerror(r) << dendl; + schedule_acquire_leader_lock(1); + m_timer_op_tracker.finish_op(); + return; + } + + m_locker = {}; + m_acquire_attempts = 0; + acquire_leader_lock(); +} + +template <typename I> +void LeaderWatcher<I>::schedule_get_locker(bool reset_leader, + uint32_t delay_factor) { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock)); + ceph_assert(ceph_mutex_is_locked(m_lock)); + + if (reset_leader) { + m_locker = {}; + m_acquire_attempts = 0; + } + + schedule_timer_task("get locker", delay_factor, false, + &LeaderWatcher<I>::get_locker, false); +} + +template <typename I> +void LeaderWatcher<I>::get_locker() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock)); + ceph_assert(ceph_mutex_is_locked(m_lock)); + ceph_assert(!m_timer_op_tracker.empty()); + + C_GetLocker *get_locker_ctx = new C_GetLocker(this); + Context *ctx = create_async_context_callback(m_work_queue, get_locker_ctx); + + m_leader_lock->get_locker(&get_locker_ctx->locker, ctx); +} + +template <typename I> +void LeaderWatcher<I>::handle_get_locker(int r, + librbd::managed_lock::Locker& locker) { + dout(10) << "r=" << r << dendl; + + std::scoped_lock l{m_threads->timer_lock, m_lock}; + ceph_assert(!m_timer_op_tracker.empty()); + + if (m_leader_lock->is_shutdown()) { + dout(10) << "canceling due to shutdown" << dendl; + m_timer_op_tracker.finish_op(); + return; + } + + if (is_leader(m_lock)) { + m_locker = {}; + m_timer_op_tracker.finish_op(); + return; + } + + if (r == -ENOENT) { + m_locker = {}; + m_acquire_attempts = 0; + acquire_leader_lock(); + return; + } else if (r < 0) { + derr << "error retrieving leader locker: " << cpp_strerror(r) << dendl; + schedule_get_locker(true, 1); + m_timer_op_tracker.finish_op(); + return; + } + + bool notify_listener = false; + if (m_locker != locker) { + m_locker = locker; + notify_listener = true; + if (m_acquire_attempts > 1) { + dout(10) << "new lock owner detected -- resetting heartbeat counter" + << dendl; + m_acquire_attempts = 0; + } + } + + if (m_acquire_attempts >= m_cct->_conf.get_val<uint64_t>( + "rbd_mirror_leader_max_acquire_attempts_before_break")) { + dout(0) << "breaking leader lock after " << m_acquire_attempts << " " + << "failed attempts to acquire" << dendl; + break_leader_lock(); + return; + } + + schedule_acquire_leader_lock(1); + + if (!notify_listener) { + m_timer_op_tracker.finish_op(); + return; + } + + auto ctx = new LambdaContext( + [this](int r) { + std::string instance_id; + if (get_leader_instance_id(&instance_id)) { + m_listener->update_leader_handler(instance_id); + } + std::scoped_lock locker{m_threads->timer_lock, m_lock}; + m_timer_op_tracker.finish_op(); + }); + m_work_queue->queue(ctx, 0); +} + +template <typename I> +void LeaderWatcher<I>::schedule_acquire_leader_lock(uint32_t delay_factor) { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock)); + ceph_assert(ceph_mutex_is_locked(m_lock)); + + schedule_timer_task("acquire leader lock", + delay_factor * + m_cct->_conf.get_val<uint64_t>("rbd_mirror_leader_max_missed_heartbeats"), + false, &LeaderWatcher<I>::acquire_leader_lock, false); +} + +template <typename I> +void LeaderWatcher<I>::acquire_leader_lock() { + ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock)); + ceph_assert(ceph_mutex_is_locked(m_lock)); + ceph_assert(!m_timer_op_tracker.empty()); + + ++m_acquire_attempts; + dout(10) << "acquire_attempts=" << m_acquire_attempts << dendl; + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback< + LeaderWatcher<I>, &LeaderWatcher<I>::handle_acquire_leader_lock>(this)); + m_leader_lock->try_acquire_lock(ctx); +} + +template <typename I> +void LeaderWatcher<I>::handle_acquire_leader_lock(int r) { + dout(10) << "r=" << r << dendl; + + std::scoped_lock locker{m_threads->timer_lock, m_lock}; + ceph_assert(!m_timer_op_tracker.empty()); + + if (m_leader_lock->is_shutdown()) { + dout(10) << "canceling due to shutdown" << dendl; + m_timer_op_tracker.finish_op(); + return; + } + + if (r < 0) { + if (r == -EAGAIN) { + dout(10) << "already locked" << dendl; + } else { + derr << "error acquiring lock: " << cpp_strerror(r) << dendl; + } + + get_locker(); + return; + } + + m_locker = {}; + m_acquire_attempts = 0; + + if (m_ret_val) { + dout(5) << "releasing due to error on notify" << dendl; + release_leader_lock(); + m_timer_op_tracker.finish_op(); + return; + } + + notify_heartbeat(); +} + +template <typename I> +void LeaderWatcher<I>::release_leader_lock() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback< + LeaderWatcher<I>, &LeaderWatcher<I>::handle_release_leader_lock>(this)); + + m_leader_lock->release_lock(ctx); +} + +template <typename I> +void LeaderWatcher<I>::handle_release_leader_lock(int r) { + dout(10) << "r=" << r << dendl; + + std::scoped_lock locker{m_threads->timer_lock, m_lock}; + + if (r < 0) { + derr << "error releasing lock: " << cpp_strerror(r) << dendl; + return; + } + + schedule_acquire_leader_lock(1); +} + +template <typename I> +void LeaderWatcher<I>::init_instances() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + ceph_assert(m_instances == nullptr); + + m_instances = Instances<I>::create(m_threads, m_ioctx, m_instance_id, + m_instances_listener); + + Context *ctx = create_context_callback< + LeaderWatcher<I>, &LeaderWatcher<I>::handle_init_instances>(this); + + m_instances->init(ctx); +} + +template <typename I> +void LeaderWatcher<I>::handle_init_instances(int r) { + dout(10) << "r=" << r << dendl; + + Context *on_finish = nullptr; + if (r < 0) { + std::lock_guard locker{m_lock}; + derr << "error initializing instances: " << cpp_strerror(r) << dendl; + m_instances->destroy(); + m_instances = nullptr; + + ceph_assert(m_on_finish != nullptr); + std::swap(m_on_finish, on_finish); + } else { + std::lock_guard locker{m_lock}; + notify_listener(); + return; + } + + on_finish->complete(r); +} + +template <typename I> +void LeaderWatcher<I>::shut_down_instances() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + ceph_assert(m_instances != nullptr); + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback<LeaderWatcher<I>, + &LeaderWatcher<I>::handle_shut_down_instances>(this)); + + m_instances->shut_down(ctx); +} + +template <typename I> +void LeaderWatcher<I>::handle_shut_down_instances(int r) { + dout(10) << "r=" << r << dendl; + ceph_assert(r == 0); + + Context *on_finish = nullptr; + { + std::lock_guard locker{m_lock}; + + m_instances->destroy(); + m_instances = nullptr; + + ceph_assert(m_on_finish != nullptr); + std::swap(m_on_finish, on_finish); + } + on_finish->complete(r); +} + +template <typename I> +void LeaderWatcher<I>::notify_listener() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback< + LeaderWatcher<I>, &LeaderWatcher<I>::handle_notify_listener>(this)); + + if (is_leader(m_lock)) { + ctx = new LambdaContext( + [this, ctx](int r) { + m_listener->post_acquire_handler(ctx); + }); + } else { + ctx = new LambdaContext( + [this, ctx](int r) { + m_listener->pre_release_handler(ctx); + }); + } + m_work_queue->queue(ctx, 0); +} + +template <typename I> +void LeaderWatcher<I>::handle_notify_listener(int r) { + dout(10) << "r=" << r << dendl; + + std::lock_guard locker{m_lock}; + + if (r < 0) { + derr << "error notifying listener: " << cpp_strerror(r) << dendl; + m_ret_val = r; + } + + if (is_leader(m_lock)) { + notify_lock_acquired(); + } else { + shut_down_instances(); + } +} + +template <typename I> +void LeaderWatcher<I>::notify_lock_acquired() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + + Context *ctx = create_context_callback< + LeaderWatcher<I>, &LeaderWatcher<I>::handle_notify_lock_acquired>(this); + + bufferlist bl; + encode(NotifyMessage{LockAcquiredPayload{}}, bl); + + send_notify(bl, nullptr, ctx); +} + +template <typename I> +void LeaderWatcher<I>::handle_notify_lock_acquired(int r) { + dout(10) << "r=" << r << dendl; + + Context *on_finish = nullptr; + { + std::lock_guard locker{m_lock}; + if (r < 0 && r != -ETIMEDOUT) { + derr << "error notifying leader lock acquired: " << cpp_strerror(r) + << dendl; + m_ret_val = r; + } + + ceph_assert(m_on_finish != nullptr); + std::swap(m_on_finish, on_finish); + + if (m_ret_val == 0) { + // listener should be ready for instance add/remove events now + m_instances->unblock_listener(); + } + } + on_finish->complete(0); +} + +template <typename I> +void LeaderWatcher<I>::notify_lock_released() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + + Context *ctx = create_context_callback< + LeaderWatcher<I>, &LeaderWatcher<I>::handle_notify_lock_released>(this); + + bufferlist bl; + encode(NotifyMessage{LockReleasedPayload{}}, bl); + + send_notify(bl, nullptr, ctx); +} + +template <typename I> +void LeaderWatcher<I>::handle_notify_lock_released(int r) { + dout(10) << "r=" << r << dendl; + + Context *on_finish = nullptr; + { + std::lock_guard locker{m_lock}; + if (r < 0 && r != -ETIMEDOUT) { + derr << "error notifying leader lock released: " << cpp_strerror(r) + << dendl; + } + + ceph_assert(m_on_finish != nullptr); + std::swap(m_on_finish, on_finish); + } + on_finish->complete(r); +} + +template <typename I> +void LeaderWatcher<I>::notify_heartbeat() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock)); + ceph_assert(ceph_mutex_is_locked(m_lock)); + ceph_assert(!m_timer_op_tracker.empty()); + + if (!is_leader(m_lock)) { + dout(5) << "not leader, canceling" << dendl; + m_timer_op_tracker.finish_op(); + return; + } + + Context *ctx = create_context_callback< + LeaderWatcher<I>, &LeaderWatcher<I>::handle_notify_heartbeat>(this); + + bufferlist bl; + encode(NotifyMessage{HeartbeatPayload{}}, bl); + + m_heartbeat_response.acks.clear(); + send_notify(bl, &m_heartbeat_response, ctx); +} + +template <typename I> +void LeaderWatcher<I>::handle_notify_heartbeat(int r) { + dout(10) << "r=" << r << dendl; + + std::scoped_lock locker{m_threads->timer_lock, m_lock}; + ceph_assert(!m_timer_op_tracker.empty()); + + m_timer_op_tracker.finish_op(); + if (m_leader_lock->is_shutdown()) { + dout(10) << "canceling due to shutdown" << dendl; + return; + } else if (!is_leader(m_lock)) { + return; + } + + if (r < 0 && r != -ETIMEDOUT) { + derr << "error notifying heartbeat: " << cpp_strerror(r) + << ", releasing leader" << dendl; + release_leader_lock(); + return; + } + + dout(10) << m_heartbeat_response.acks.size() << " acks received, " + << m_heartbeat_response.timeouts.size() << " timed out" << dendl; + + std::vector<std::string> instance_ids; + for (auto &it: m_heartbeat_response.acks) { + uint64_t notifier_id = it.first.gid; + instance_ids.push_back(stringify(notifier_id)); + } + if (!instance_ids.empty()) { + m_instances->acked(instance_ids); + } + + schedule_timer_task("heartbeat", 1, true, + &LeaderWatcher<I>::notify_heartbeat, false); +} + +template <typename I> +void LeaderWatcher<I>::handle_heartbeat(Context *on_notify_ack) { + dout(10) << dendl; + + { + std::scoped_lock locker{m_threads->timer_lock, m_lock}; + if (is_leader(m_lock)) { + dout(5) << "got another leader heartbeat, ignoring" << dendl; + } else if (!m_locker.cookie.empty()) { + cancel_timer_task(); + m_acquire_attempts = 0; + schedule_acquire_leader_lock(1); + } + } + + on_notify_ack->complete(0); +} + +template <typename I> +void LeaderWatcher<I>::handle_lock_acquired(Context *on_notify_ack) { + dout(10) << dendl; + + { + std::scoped_lock locker{m_threads->timer_lock, m_lock}; + if (is_leader(m_lock)) { + dout(5) << "got another leader lock_acquired, ignoring" << dendl; + } else { + cancel_timer_task(); + schedule_get_locker(true, 0); + } + } + + on_notify_ack->complete(0); +} + +template <typename I> +void LeaderWatcher<I>::handle_lock_released(Context *on_notify_ack) { + dout(10) << dendl; + + { + std::scoped_lock locker{m_threads->timer_lock, m_lock}; + if (is_leader(m_lock)) { + dout(5) << "got another leader lock_released, ignoring" << dendl; + } else { + cancel_timer_task(); + schedule_get_locker(true, 0); + } + } + + on_notify_ack->complete(0); +} + +template <typename I> +void LeaderWatcher<I>::handle_notify(uint64_t notify_id, uint64_t handle, + uint64_t notifier_id, bufferlist &bl) { + dout(10) << "notify_id=" << notify_id << ", handle=" << handle << ", " + << "notifier_id=" << notifier_id << dendl; + + Context *ctx = new C_NotifyAck(this, notify_id, handle); + + if (notifier_id == m_notifier_id) { + dout(10) << "our own notification, ignoring" << dendl; + ctx->complete(0); + return; + } + + NotifyMessage notify_message; + try { + auto iter = bl.cbegin(); + decode(notify_message, iter); + } catch (const buffer::error &err) { + derr << "error decoding image notification: " << err.what() << dendl; + ctx->complete(0); + return; + } + + apply_visitor(HandlePayloadVisitor(this, ctx), notify_message.payload); +} + +template <typename I> +void LeaderWatcher<I>::handle_rewatch_complete(int r) { + dout(5) << "r=" << r << dendl; + + if (r == -EBLOCKLISTED) { + dout(1) << "blocklisted detected" << dendl; + m_blocklisted = true; + return; + } + + m_leader_lock->reacquire_lock(nullptr); +} + +template <typename I> +void LeaderWatcher<I>::handle_payload(const HeartbeatPayload &payload, + Context *on_notify_ack) { + dout(10) << "heartbeat" << dendl; + + handle_heartbeat(on_notify_ack); +} + +template <typename I> +void LeaderWatcher<I>::handle_payload(const LockAcquiredPayload &payload, + Context *on_notify_ack) { + dout(10) << "lock_acquired" << dendl; + + handle_lock_acquired(on_notify_ack); +} + +template <typename I> +void LeaderWatcher<I>::handle_payload(const LockReleasedPayload &payload, + Context *on_notify_ack) { + dout(10) << "lock_released" << dendl; + + handle_lock_released(on_notify_ack); +} + +template <typename I> +void LeaderWatcher<I>::handle_payload(const UnknownPayload &payload, + Context *on_notify_ack) { + dout(10) << "unknown" << dendl; + + on_notify_ack->complete(0); +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::LeaderWatcher<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/LeaderWatcher.h b/src/tools/rbd_mirror/LeaderWatcher.h new file mode 100644 index 000000000..58f23148f --- /dev/null +++ b/src/tools/rbd_mirror/LeaderWatcher.h @@ -0,0 +1,313 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_LEADER_WATCHER_H +#define CEPH_RBD_MIRROR_LEADER_WATCHER_H + +#include <list> +#include <memory> +#include <string> + +#include "common/AsyncOpTracker.h" +#include "librbd/ManagedLock.h" +#include "librbd/Watcher.h" +#include "librbd/managed_lock/Types.h" +#include "librbd/watcher/Types.h" +#include "Instances.h" +#include "tools/rbd_mirror/instances/Types.h" +#include "tools/rbd_mirror/leader_watcher/Types.h" + +namespace librbd { +class ImageCtx; +namespace asio { struct ContextWQ; } +} // namespace librbd + +namespace rbd { +namespace mirror { + +template <typename> struct Threads; + +template <typename ImageCtxT = librbd::ImageCtx> +class LeaderWatcher : protected librbd::Watcher { + using librbd::Watcher::unregister_watch; // Silence overloaded virtual warning +public: + static LeaderWatcher* create(Threads<ImageCtxT> *threads, + librados::IoCtx &io_ctx, + leader_watcher::Listener *listener) { + return new LeaderWatcher(threads, io_ctx, listener); + } + + LeaderWatcher(Threads<ImageCtxT> *threads, librados::IoCtx &io_ctx, + leader_watcher::Listener *listener); + ~LeaderWatcher() override; + + int init(); + void shut_down(); + + void init(Context *on_finish); + void shut_down(Context *on_finish); + + bool is_blocklisted() const; + bool is_leader() const; + bool is_releasing_leader() const; + bool get_leader_instance_id(std::string *instance_id) const; + void release_leader(); + void list_instances(std::vector<std::string> *instance_ids); + + std::string get_instance_id(); + +private: + /** + * @verbatim + * + * <uninitialized> <------------------------------ WAIT_FOR_TASKS + * | (init) ^ ^ + * v * | + * CREATE_OBJECT * * * * * (error) UNREGISTER_WATCH + * | * ^ + * v * | + * REGISTER_WATCH * * * * * SHUT_DOWN_LEADER_LOCK + * | ^ + * | (no leader heartbeat and acquire failed) | + * | BREAK_LOCK <-------------------------------------\ | + * | | (no leader heartbeat) | | (shut down) + * | | /----------------------------------------\ | | + * | | | (lock_released received) | | + * | | | /-------------------------------------\ | | + * | | | | (lock_acquired or | | | + * | | | | heartbeat received) | | | + * | | | | (ENOENT) /-----------\ | | | + * | | | | * * * * * * * * * * | | | | | + * v v v v v (error) * v | | | | + * ACQUIRE_LEADER_LOCK * * * * *> GET_LOCKER ---> <secondary> + * | * ^ + * ....|...................*.................... .....|..................... + * . v * . . | post_release . + * .INIT_INSTANCES * * * * * . .NOTIFY_LOCK_RELEASED . + * . | . .....^..................... + * . v . | + * .NOTIFY_LISTENER . RELEASE_LEADER_LOCK + * . | . ^ + * . v . .....|..................... + * .NOTIFY_LOCK_ACQUIRED . . | . + * . | post_acquire . .SHUT_DOWN_INSTANCES . + * ....|........................................ . ^ . + * v . | . + * <leader> -----------------------------------> .NOTIFY_LISTENER . + * (shut_down, release_leader, . pre_release . + * notify error) ........................... + * @endverbatim + */ + + struct InstancesListener : public instances::Listener { + LeaderWatcher* leader_watcher; + + InstancesListener(LeaderWatcher* leader_watcher) + : leader_watcher(leader_watcher) { + } + + void handle_added(const InstanceIds& instance_ids) override { + leader_watcher->m_listener->handle_instances_added(instance_ids); + } + + void handle_removed(const InstanceIds& instance_ids) override { + leader_watcher->m_listener->handle_instances_removed(instance_ids); + } + }; + + class LeaderLock : public librbd::ManagedLock<ImageCtxT> { + public: + typedef librbd::ManagedLock<ImageCtxT> Parent; + + LeaderLock(librados::IoCtx& ioctx, librbd::AsioEngine& asio_engine, + const std::string& oid, LeaderWatcher *watcher, + bool blocklist_on_break_lock, + uint32_t blocklist_expire_seconds) + : Parent(ioctx, asio_engine, oid, watcher, + librbd::managed_lock::EXCLUSIVE, blocklist_on_break_lock, + blocklist_expire_seconds), + watcher(watcher) { + } + + bool is_leader() const { + std::lock_guard locker{Parent::m_lock}; + return Parent::is_state_post_acquiring() || Parent::is_state_locked(); + } + + bool is_releasing_leader() const { + std::lock_guard locker{Parent::m_lock}; + return Parent::is_state_pre_releasing(); + } + + protected: + void post_acquire_lock_handler(int r, Context *on_finish) { + if (r == 0) { + // lock is owned at this point + std::lock_guard locker{Parent::m_lock}; + Parent::set_state_post_acquiring(); + } + watcher->handle_post_acquire_leader_lock(r, on_finish); + } + void pre_release_lock_handler(bool shutting_down, + Context *on_finish) { + watcher->handle_pre_release_leader_lock(on_finish); + } + void post_release_lock_handler(bool shutting_down, int r, + Context *on_finish) { + watcher->handle_post_release_leader_lock(r, on_finish); + } + private: + LeaderWatcher *watcher; + }; + + struct HandlePayloadVisitor : public boost::static_visitor<void> { + LeaderWatcher *leader_watcher; + Context *on_notify_ack; + + HandlePayloadVisitor(LeaderWatcher *leader_watcher, Context *on_notify_ack) + : leader_watcher(leader_watcher), on_notify_ack(on_notify_ack) { + } + + template <typename Payload> + inline void operator()(const Payload &payload) const { + leader_watcher->handle_payload(payload, on_notify_ack); + } + }; + + struct C_GetLocker : public Context { + LeaderWatcher *leader_watcher; + librbd::managed_lock::Locker locker; + + C_GetLocker(LeaderWatcher *leader_watcher) + : leader_watcher(leader_watcher) { + } + + void finish(int r) override { + leader_watcher->handle_get_locker(r, locker); + } + }; + + typedef void (LeaderWatcher<ImageCtxT>::*TimerCallback)(); + + struct C_TimerGate : public Context { + LeaderWatcher *leader_watcher; + + bool leader = false; + TimerCallback timer_callback = nullptr; + + C_TimerGate(LeaderWatcher *leader_watcher) + : leader_watcher(leader_watcher) { + } + + void finish(int r) override { + leader_watcher->m_timer_gate = nullptr; + leader_watcher->execute_timer_task(leader, timer_callback); + } + }; + + Threads<ImageCtxT> *m_threads; + leader_watcher::Listener *m_listener; + + InstancesListener m_instances_listener; + mutable ceph::mutex m_lock; + uint64_t m_notifier_id; + std::string m_instance_id; + LeaderLock *m_leader_lock; + Context *m_on_finish = nullptr; + Context *m_on_shut_down_finish = nullptr; + uint64_t m_acquire_attempts = 0; + int m_ret_val = 0; + Instances<ImageCtxT> *m_instances = nullptr; + librbd::managed_lock::Locker m_locker; + + bool m_blocklisted = false; + + AsyncOpTracker m_timer_op_tracker; + Context *m_timer_task = nullptr; + C_TimerGate *m_timer_gate = nullptr; + + librbd::watcher::NotifyResponse m_heartbeat_response; + + bool is_leader(ceph::mutex &m_lock) const; + bool is_releasing_leader(ceph::mutex &m_lock) const; + + void cancel_timer_task(); + void schedule_timer_task(const std::string &name, + int delay_factor, bool leader, + TimerCallback callback, bool shutting_down); + void execute_timer_task(bool leader, TimerCallback timer_callback); + + void create_leader_object(); + void handle_create_leader_object(int r); + + void register_watch(); + void handle_register_watch(int r); + + void shut_down_leader_lock(); + void handle_shut_down_leader_lock(int r); + + void unregister_watch(); + void handle_unregister_watch(int r); + + void wait_for_tasks(); + void handle_wait_for_tasks(); + + void break_leader_lock(); + void handle_break_leader_lock(int r); + + void schedule_get_locker(bool reset_leader, uint32_t delay_factor); + void get_locker(); + void handle_get_locker(int r, librbd::managed_lock::Locker& locker); + + void schedule_acquire_leader_lock(uint32_t delay_factor); + void acquire_leader_lock(); + void handle_acquire_leader_lock(int r); + + void release_leader_lock(); + void handle_release_leader_lock(int r); + + void init_instances(); + void handle_init_instances(int r); + + void shut_down_instances(); + void handle_shut_down_instances(int r); + + void notify_listener(); + void handle_notify_listener(int r); + + void notify_lock_acquired(); + void handle_notify_lock_acquired(int r); + + void notify_lock_released(); + void handle_notify_lock_released(int r); + + void notify_heartbeat(); + void handle_notify_heartbeat(int r); + + void handle_post_acquire_leader_lock(int r, Context *on_finish); + void handle_pre_release_leader_lock(Context *on_finish); + void handle_post_release_leader_lock(int r, Context *on_finish); + + void handle_notify(uint64_t notify_id, uint64_t handle, + uint64_t notifier_id, bufferlist &bl) override; + + void handle_rewatch_complete(int r) override; + + void handle_heartbeat(Context *on_ack); + void handle_lock_acquired(Context *on_ack); + void handle_lock_released(Context *on_ack); + + void handle_payload(const leader_watcher::HeartbeatPayload &payload, + Context *on_notify_ack); + void handle_payload(const leader_watcher::LockAcquiredPayload &payload, + Context *on_notify_ack); + void handle_payload(const leader_watcher::LockReleasedPayload &payload, + Context *on_notify_ack); + void handle_payload(const leader_watcher::UnknownPayload &payload, + Context *on_notify_ack); +}; + +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_LEADER_WATCHER_H diff --git a/src/tools/rbd_mirror/Mirror.cc b/src/tools/rbd_mirror/Mirror.cc new file mode 100644 index 000000000..e87009281 --- /dev/null +++ b/src/tools/rbd_mirror/Mirror.cc @@ -0,0 +1,763 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include <signal.h> + +#include <boost/range/adaptor/map.hpp> + +#include "common/Formatter.h" +#include "common/PriorityCache.h" +#include "common/admin_socket.h" +#include "common/debug.h" +#include "common/errno.h" +#include "journal/Types.h" +#include "librbd/ImageCtx.h" +#include "perfglue/heap_profiler.h" +#include "Mirror.h" +#include "PoolMetaCache.h" +#include "ServiceDaemon.h" +#include "Threads.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror + +using std::list; +using std::map; +using std::set; +using std::string; +using std::unique_ptr; +using std::vector; + +using librados::Rados; +using librados::IoCtx; +using librbd::mirror_peer_t; + +namespace rbd { +namespace mirror { + +namespace { + +class MirrorAdminSocketCommand { +public: + virtual ~MirrorAdminSocketCommand() {} + virtual int call(Formatter *f) = 0; +}; + +class StatusCommand : public MirrorAdminSocketCommand { +public: + explicit StatusCommand(Mirror *mirror) : mirror(mirror) {} + + int call(Formatter *f) override { + mirror->print_status(f); + return 0; + } + +private: + Mirror *mirror; +}; + +class StartCommand : public MirrorAdminSocketCommand { +public: + explicit StartCommand(Mirror *mirror) : mirror(mirror) {} + + int call(Formatter *f) override { + mirror->start(); + return 0; + } + +private: + Mirror *mirror; +}; + +class StopCommand : public MirrorAdminSocketCommand { +public: + explicit StopCommand(Mirror *mirror) : mirror(mirror) {} + + int call(Formatter *f) override { + mirror->stop(); + return 0; + } + +private: + Mirror *mirror; +}; + +class RestartCommand : public MirrorAdminSocketCommand { +public: + explicit RestartCommand(Mirror *mirror) : mirror(mirror) {} + + int call(Formatter *f) override { + mirror->restart(); + return 0; + } + +private: + Mirror *mirror; +}; + +class FlushCommand : public MirrorAdminSocketCommand { +public: + explicit FlushCommand(Mirror *mirror) : mirror(mirror) {} + + int call(Formatter *f) override { + mirror->flush(); + return 0; + } + +private: + Mirror *mirror; +}; + +class LeaderReleaseCommand : public MirrorAdminSocketCommand { +public: + explicit LeaderReleaseCommand(Mirror *mirror) : mirror(mirror) {} + + int call(Formatter *f) override { + mirror->release_leader(); + return 0; + } + +private: + Mirror *mirror; +}; + +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::PriCache: " << this << " " \ + << m_name << " " << __func__ << ": " + +struct PriCache : public PriorityCache::PriCache { + std::string m_name; + int64_t m_base_cache_max_size; + int64_t m_extra_cache_max_size; + + PriorityCache::Priority m_base_cache_pri = PriorityCache::Priority::PRI10; + PriorityCache::Priority m_extra_cache_pri = PriorityCache::Priority::PRI10; + int64_t m_base_cache_bytes = 0; + int64_t m_extra_cache_bytes = 0; + int64_t m_committed_bytes = 0; + double m_cache_ratio = 0; + + PriCache(const std::string &name, uint64_t min_size, uint64_t max_size) + : m_name(name), m_base_cache_max_size(min_size), + m_extra_cache_max_size(max_size - min_size) { + ceph_assert(max_size >= min_size); + } + + void prioritize() { + if (m_base_cache_pri == PriorityCache::Priority::PRI0) { + return; + } + auto pri = static_cast<uint8_t>(m_base_cache_pri); + m_base_cache_pri = static_cast<PriorityCache::Priority>(--pri); + + dout(30) << m_base_cache_pri << dendl; + } + + int64_t request_cache_bytes(PriorityCache::Priority pri, + uint64_t total_cache) const override { + int64_t cache_bytes = 0; + + if (pri == m_base_cache_pri) { + cache_bytes += m_base_cache_max_size; + } + if (pri == m_extra_cache_pri) { + cache_bytes += m_extra_cache_max_size; + } + + dout(30) << cache_bytes << dendl; + + return cache_bytes; + } + + int64_t get_cache_bytes(PriorityCache::Priority pri) const override { + int64_t cache_bytes = 0; + + if (pri == m_base_cache_pri) { + cache_bytes += m_base_cache_bytes; + } + if (pri == m_extra_cache_pri) { + cache_bytes += m_extra_cache_bytes; + } + + dout(30) << "pri=" << pri << " " << cache_bytes << dendl; + + return cache_bytes; + } + + int64_t get_cache_bytes() const override { + auto cache_bytes = m_base_cache_bytes + m_extra_cache_bytes; + + dout(30) << m_base_cache_bytes << "+" << m_extra_cache_bytes << "=" + << cache_bytes << dendl; + + return cache_bytes; + } + + void set_cache_bytes(PriorityCache::Priority pri, int64_t bytes) override { + ceph_assert(bytes >= 0); + ceph_assert(pri == m_base_cache_pri || pri == m_extra_cache_pri || + bytes == 0); + + dout(30) << "pri=" << pri << " " << bytes << dendl; + + if (pri == m_base_cache_pri) { + m_base_cache_bytes = std::min(m_base_cache_max_size, bytes); + bytes -= std::min(m_base_cache_bytes, bytes); + } + + if (pri == m_extra_cache_pri) { + m_extra_cache_bytes = bytes; + } + } + + void add_cache_bytes(PriorityCache::Priority pri, int64_t bytes) override { + ceph_assert(bytes >= 0); + ceph_assert(pri == m_base_cache_pri || pri == m_extra_cache_pri); + + dout(30) << "pri=" << pri << " " << bytes << dendl; + + if (pri == m_base_cache_pri) { + ceph_assert(m_base_cache_bytes <= m_base_cache_max_size); + + auto chunk = std::min(m_base_cache_max_size - m_base_cache_bytes, bytes); + m_base_cache_bytes += chunk; + bytes -= chunk; + } + + if (pri == m_extra_cache_pri) { + m_extra_cache_bytes += bytes; + } + } + + int64_t commit_cache_size(uint64_t total_cache) override { + m_committed_bytes = p2roundup<int64_t>(get_cache_bytes(), 4096); + + dout(30) << m_committed_bytes << dendl; + + return m_committed_bytes; + } + + int64_t get_committed_size() const override { + dout(30) << m_committed_bytes << dendl; + + return m_committed_bytes; + } + + double get_cache_ratio() const override { + dout(30) << m_cache_ratio << dendl; + + return m_cache_ratio; + } + + void set_cache_ratio(double ratio) override { + dout(30) << m_cache_ratio << dendl; + + m_cache_ratio = ratio; + } + + void shift_bins() override { + } + + void import_bins(const std::vector<uint64_t> &intervals) override { + } + + void set_bins(PriorityCache::Priority pri, uint64_t end_interval) override { + } + + uint64_t get_bins(PriorityCache::Priority pri) const override { + return 0; + } + + std::string get_cache_name() const override { + return m_name; + } +}; + +} // anonymous namespace + +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::Mirror: " << this << " " \ + << __func__ << ": " + +class MirrorAdminSocketHook : public AdminSocketHook { +public: + MirrorAdminSocketHook(CephContext *cct, Mirror *mirror) : + admin_socket(cct->get_admin_socket()) { + std::string command; + int r; + + command = "rbd mirror status"; + r = admin_socket->register_command(command, this, + "get status for rbd mirror"); + if (r == 0) { + commands[command] = new StatusCommand(mirror); + } + + command = "rbd mirror start"; + r = admin_socket->register_command(command, this, + "start rbd mirror"); + if (r == 0) { + commands[command] = new StartCommand(mirror); + } + + command = "rbd mirror stop"; + r = admin_socket->register_command(command, this, + "stop rbd mirror"); + if (r == 0) { + commands[command] = new StopCommand(mirror); + } + + command = "rbd mirror restart"; + r = admin_socket->register_command(command, this, + "restart rbd mirror"); + if (r == 0) { + commands[command] = new RestartCommand(mirror); + } + + command = "rbd mirror flush"; + r = admin_socket->register_command(command, this, + "flush rbd mirror"); + if (r == 0) { + commands[command] = new FlushCommand(mirror); + } + + command = "rbd mirror leader release"; + r = admin_socket->register_command(command, this, + "release rbd mirror leader"); + if (r == 0) { + commands[command] = new LeaderReleaseCommand(mirror); + } + } + + ~MirrorAdminSocketHook() override { + (void)admin_socket->unregister_commands(this); + for (Commands::const_iterator i = commands.begin(); i != commands.end(); + ++i) { + delete i->second; + } + } + + int call(std::string_view command, const cmdmap_t& cmdmap, + const bufferlist&, + Formatter *f, + std::ostream& errss, + bufferlist& out) override { + Commands::const_iterator i = commands.find(command); + ceph_assert(i != commands.end()); + return i->second->call(f); + } + +private: + typedef std::map<std::string, MirrorAdminSocketCommand*, std::less<>> Commands; + + AdminSocket *admin_socket; + Commands commands; +}; + +class CacheManagerHandler : public journal::CacheManagerHandler { +public: + CacheManagerHandler(CephContext *cct) + : m_cct(cct) { + + if (!m_cct->_conf.get_val<bool>("rbd_mirror_memory_autotune")) { + return; + } + + uint64_t base = m_cct->_conf.get_val<Option::size_t>( + "rbd_mirror_memory_base"); + double fragmentation = m_cct->_conf.get_val<double>( + "rbd_mirror_memory_expected_fragmentation"); + uint64_t target = m_cct->_conf.get_val<Option::size_t>( + "rbd_mirror_memory_target"); + uint64_t min = m_cct->_conf.get_val<Option::size_t>( + "rbd_mirror_memory_cache_min"); + uint64_t max = min; + + // When setting the maximum amount of memory to use for cache, first + // assume some base amount of memory for the daemon and then fudge in + // some overhead for fragmentation that scales with cache usage. + uint64_t ltarget = (1.0 - fragmentation) * target; + if (ltarget > base + min) { + max = ltarget - base; + } + + m_next_balance = ceph_clock_now(); + m_next_resize = ceph_clock_now(); + + m_cache_manager = std::make_unique<PriorityCache::Manager>( + m_cct, min, max, target, false); + } + + ~CacheManagerHandler() { + std::lock_guard locker{m_lock}; + + ceph_assert(m_caches.empty()); + } + + void register_cache(const std::string &cache_name, + uint64_t min_size, uint64_t max_size, + journal::CacheRebalanceHandler* handler) override { + if (!m_cache_manager) { + handler->handle_cache_rebalanced(max_size); + return; + } + + dout(20) << cache_name << " min_size=" << min_size << " max_size=" + << max_size << " handler=" << handler << dendl; + + std::lock_guard locker{m_lock}; + + auto p = m_caches.insert( + {cache_name, {cache_name, min_size, max_size, handler}}); + ceph_assert(p.second == true); + + m_cache_manager->insert(cache_name, p.first->second.pri_cache, false); + m_next_balance = ceph_clock_now(); + } + + void unregister_cache(const std::string &cache_name) override { + if (!m_cache_manager) { + return; + } + + dout(20) << cache_name << dendl; + + std::lock_guard locker{m_lock}; + + auto it = m_caches.find(cache_name); + ceph_assert(it != m_caches.end()); + + m_cache_manager->erase(cache_name); + m_caches.erase(it); + m_next_balance = ceph_clock_now(); + } + + void run_cache_manager() { + if (!m_cache_manager) { + return; + } + + std::lock_guard locker{m_lock}; + + // Before we trim, check and see if it's time to rebalance/resize. + auto autotune_interval = m_cct->_conf.get_val<double>( + "rbd_mirror_memory_cache_autotune_interval"); + auto resize_interval = m_cct->_conf.get_val<double>( + "rbd_mirror_memory_cache_resize_interval"); + + utime_t now = ceph_clock_now(); + + if (autotune_interval > 0 && m_next_balance <= now) { + dout(20) << "balance" << dendl; + m_cache_manager->balance(); + + for (auto &it : m_caches) { + auto pri_cache = static_cast<PriCache *>(it.second.pri_cache.get()); + auto new_cache_bytes = pri_cache->get_cache_bytes(); + it.second.handler->handle_cache_rebalanced(new_cache_bytes); + pri_cache->prioritize(); + } + + m_next_balance = ceph_clock_now(); + m_next_balance += autotune_interval; + } + + if (resize_interval > 0 && m_next_resize < now) { + if (ceph_using_tcmalloc()) { + dout(20) << "tune memory" << dendl; + m_cache_manager->tune_memory(); + } + + m_next_resize = ceph_clock_now(); + m_next_resize += resize_interval; + } + } + +private: + struct Cache { + std::shared_ptr<PriorityCache::PriCache> pri_cache; + journal::CacheRebalanceHandler *handler; + + Cache(const std::string name, uint64_t min_size, uint64_t max_size, + journal::CacheRebalanceHandler *handler) + : pri_cache(new PriCache(name, min_size, max_size)), handler(handler) { + } + }; + + CephContext *m_cct; + + mutable ceph::mutex m_lock = + ceph::make_mutex("rbd::mirror::CacheManagerHandler"); + std::unique_ptr<PriorityCache::Manager> m_cache_manager; + std::map<std::string, Cache> m_caches; + + utime_t m_next_balance; + utime_t m_next_resize; +}; + +Mirror::Mirror(CephContext *cct, const std::vector<const char*> &args) : + m_cct(cct), + m_args(args), + m_local(new librados::Rados()), + m_cache_manager_handler(new CacheManagerHandler(cct)), + m_pool_meta_cache(new PoolMetaCache(cct)), + m_asok_hook(new MirrorAdminSocketHook(cct, this)) { +} + +Mirror::~Mirror() +{ + delete m_asok_hook; +} + +void Mirror::handle_signal(int signum) +{ + dout(20) << signum << dendl; + + std::lock_guard l{m_lock}; + + switch (signum) { + case SIGHUP: + for (auto &it : m_pool_replayers) { + it.second->reopen_logs(); + } + g_ceph_context->reopen_logs(); + break; + + case SIGINT: + case SIGTERM: + m_stopping = true; + m_cond.notify_all(); + break; + + default: + ceph_abort_msgf("unexpected signal %d", signum); + } +} + +int Mirror::init() +{ + int r = m_local->init_with_context(m_cct); + if (r < 0) { + derr << "could not initialize rados handle" << dendl; + return r; + } + + r = m_local->connect(); + if (r < 0) { + derr << "error connecting to local cluster" << dendl; + return r; + } + + m_threads = &(m_cct->lookup_or_create_singleton_object< + Threads<librbd::ImageCtx>>("rbd_mirror::threads", false, m_local)); + m_service_daemon.reset(new ServiceDaemon<>(m_cct, m_local, m_threads)); + + r = m_service_daemon->init(); + if (r < 0) { + derr << "error registering service daemon: " << cpp_strerror(r) << dendl; + return r; + } + + m_local_cluster_watcher.reset(new ClusterWatcher(m_local, m_lock, + m_service_daemon.get())); + return r; +} + +void Mirror::run() +{ + dout(20) << "enter" << dendl; + + using namespace std::chrono_literals; + utime_t next_refresh_pools = ceph_clock_now(); + + while (!m_stopping) { + utime_t now = ceph_clock_now(); + bool refresh_pools = next_refresh_pools <= now; + if (refresh_pools) { + m_local_cluster_watcher->refresh_pools(); + next_refresh_pools = ceph_clock_now(); + next_refresh_pools += m_cct->_conf.get_val<uint64_t>( + "rbd_mirror_pool_replayers_refresh_interval"); + } + std::unique_lock l{m_lock}; + if (!m_manual_stop) { + if (refresh_pools) { + update_pool_replayers(m_local_cluster_watcher->get_pool_peers(), + m_local_cluster_watcher->get_site_name()); + } + m_cache_manager_handler->run_cache_manager(); + } + m_cond.wait_for(l, 1s); + } + + // stop all pool replayers in parallel + std::lock_guard locker{m_lock}; + for (auto &pool_replayer : m_pool_replayers) { + pool_replayer.second->stop(false); + } + dout(20) << "return" << dendl; +} + +void Mirror::print_status(Formatter *f) +{ + dout(20) << "enter" << dendl; + + std::lock_guard l{m_lock}; + + if (m_stopping) { + return; + } + + f->open_object_section("mirror_status"); + f->open_array_section("pool_replayers"); + for (auto &pool_replayer : m_pool_replayers) { + pool_replayer.second->print_status(f); + } + f->close_section(); + f->close_section(); +} + +void Mirror::start() +{ + dout(20) << "enter" << dendl; + std::lock_guard l{m_lock}; + + if (m_stopping) { + return; + } + + m_manual_stop = false; + + for (auto &pool_replayer : m_pool_replayers) { + pool_replayer.second->start(); + } +} + +void Mirror::stop() +{ + dout(20) << "enter" << dendl; + std::lock_guard l{m_lock}; + + if (m_stopping) { + return; + } + + m_manual_stop = true; + + for (auto &pool_replayer : m_pool_replayers) { + pool_replayer.second->stop(true); + } +} + +void Mirror::restart() +{ + dout(20) << "enter" << dendl; + std::lock_guard l{m_lock}; + + if (m_stopping) { + return; + } + + m_manual_stop = false; + + for (auto &pool_replayer : m_pool_replayers) { + pool_replayer.second->restart(); + } +} + +void Mirror::flush() +{ + dout(20) << "enter" << dendl; + std::lock_guard l{m_lock}; + + if (m_stopping || m_manual_stop) { + return; + } + + for (auto &pool_replayer : m_pool_replayers) { + pool_replayer.second->flush(); + } +} + +void Mirror::release_leader() +{ + dout(20) << "enter" << dendl; + std::lock_guard l{m_lock}; + + if (m_stopping) { + return; + } + + for (auto &pool_replayer : m_pool_replayers) { + pool_replayer.second->release_leader(); + } +} + +void Mirror::update_pool_replayers(const PoolPeers &pool_peers, + const std::string& site_name) +{ + dout(20) << "enter" << dendl; + ceph_assert(ceph_mutex_is_locked(m_lock)); + + // remove stale pool replayers before creating new pool replayers + for (auto it = m_pool_replayers.begin(); it != m_pool_replayers.end();) { + auto &peer = it->first.second; + auto pool_peer_it = pool_peers.find(it->first.first); + if (pool_peer_it == pool_peers.end() || + pool_peer_it->second.find(peer) == pool_peer_it->second.end()) { + dout(20) << "removing pool replayer for " << peer << dendl; + // TODO: make async + it->second->shut_down(); + it = m_pool_replayers.erase(it); + } else { + ++it; + } + } + + for (auto &kv : pool_peers) { + for (auto &peer : kv.second) { + PoolPeer pool_peer(kv.first, peer); + + auto pool_replayers_it = m_pool_replayers.find(pool_peer); + if (pool_replayers_it != m_pool_replayers.end()) { + auto& pool_replayer = pool_replayers_it->second; + if (!m_site_name.empty() && !site_name.empty() && + m_site_name != site_name) { + dout(0) << "restarting pool replayer for " << peer << " due to " + << "updated site name" << dendl; + // TODO: make async + pool_replayer->shut_down(); + pool_replayer->init(site_name); + } else if (pool_replayer->is_blocklisted()) { + derr << "restarting blocklisted pool replayer for " << peer << dendl; + // TODO: make async + pool_replayer->shut_down(); + pool_replayer->init(site_name); + } else if (!pool_replayer->is_running()) { + derr << "restarting failed pool replayer for " << peer << dendl; + // TODO: make async + pool_replayer->shut_down(); + pool_replayer->init(site_name); + } + } else { + dout(20) << "starting pool replayer for " << peer << dendl; + unique_ptr<PoolReplayer<>> pool_replayer( + new PoolReplayer<>(m_threads, m_service_daemon.get(), + m_cache_manager_handler.get(), + m_pool_meta_cache.get(), kv.first, peer, + m_args)); + + // TODO: make async + pool_replayer->init(site_name); + m_pool_replayers.emplace(pool_peer, std::move(pool_replayer)); + } + } + + // TODO currently only support a single peer + } + + m_site_name = site_name; +} + +} // namespace mirror +} // namespace rbd diff --git a/src/tools/rbd_mirror/Mirror.h b/src/tools/rbd_mirror/Mirror.h new file mode 100644 index 000000000..f92a63b68 --- /dev/null +++ b/src/tools/rbd_mirror/Mirror.h @@ -0,0 +1,89 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_H +#define CEPH_RBD_MIRROR_H + +#include "common/ceph_context.h" +#include "common/ceph_mutex.h" +#include "include/rados/librados.hpp" +#include "include/utime.h" +#include "ClusterWatcher.h" +#include "PoolReplayer.h" +#include "tools/rbd_mirror/Types.h" + +#include <set> +#include <map> +#include <memory> +#include <atomic> + +namespace journal { class CacheManagerHandler; } + +namespace librbd { struct ImageCtx; } + +namespace rbd { +namespace mirror { + +template <typename> struct ServiceDaemon; +template <typename> struct Threads; +class CacheManagerHandler; +class MirrorAdminSocketHook; +class PoolMetaCache; + +/** + * Contains the main loop and overall state for rbd-mirror. + * + * Sets up mirroring, and coordinates between noticing config + * changes and applying them. + */ +class Mirror { +public: + Mirror(CephContext *cct, const std::vector<const char*> &args); + Mirror(const Mirror&) = delete; + Mirror& operator=(const Mirror&) = delete; + ~Mirror(); + + int init(); + void run(); + void handle_signal(int signum); + + void print_status(Formatter *f); + void start(); + void stop(); + void restart(); + void flush(); + void release_leader(); + +private: + typedef ClusterWatcher::PoolPeers PoolPeers; + typedef std::pair<int64_t, PeerSpec> PoolPeer; + + void update_pool_replayers(const PoolPeers &pool_peers, + const std::string& site_name); + + void create_cache_manager(); + void run_cache_manager(utime_t *next_run_interval); + + CephContext *m_cct; + std::vector<const char*> m_args; + Threads<librbd::ImageCtx> *m_threads = nullptr; + ceph::mutex m_lock = ceph::make_mutex("rbd::mirror::Mirror"); + ceph::condition_variable m_cond; + RadosRef m_local; + std::unique_ptr<ServiceDaemon<librbd::ImageCtx>> m_service_daemon; + + // monitor local cluster for config changes in peers + std::unique_ptr<ClusterWatcher> m_local_cluster_watcher; + std::unique_ptr<CacheManagerHandler> m_cache_manager_handler; + std::unique_ptr<PoolMetaCache> m_pool_meta_cache; + std::map<PoolPeer, std::unique_ptr<PoolReplayer<>>> m_pool_replayers; + std::atomic<bool> m_stopping = { false }; + bool m_manual_stop = false; + MirrorAdminSocketHook *m_asok_hook; + std::string m_site_name; +}; + +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_H diff --git a/src/tools/rbd_mirror/MirrorStatusUpdater.cc b/src/tools/rbd_mirror/MirrorStatusUpdater.cc new file mode 100644 index 000000000..257cb1df2 --- /dev/null +++ b/src/tools/rbd_mirror/MirrorStatusUpdater.cc @@ -0,0 +1,397 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/MirrorStatusUpdater.h" +#include "include/Context.h" +#include "include/stringify.h" +#include "common/debug.h" +#include "common/errno.h" +#include "common/Timer.h" +#include "librbd/ImageCtx.h" +#include "librbd/Utils.h" +#include "librbd/asio/ContextWQ.h" +#include "tools/rbd_mirror/MirrorStatusWatcher.h" +#include "tools/rbd_mirror/Threads.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::MirrorStatusUpdater " << this \ + << " " << __func__ << ": " + +namespace rbd { +namespace mirror { + +static const double UPDATE_INTERVAL_SECONDS = 30; +static const uint32_t MAX_UPDATES_PER_OP = 100; + +using librbd::util::create_context_callback; +using librbd::util::create_rados_callback; + +template <typename I> +MirrorStatusUpdater<I>::MirrorStatusUpdater( + librados::IoCtx& io_ctx, Threads<I> *threads, + const std::string& local_mirror_uuid) + : m_io_ctx(io_ctx), m_threads(threads), + m_local_mirror_uuid(local_mirror_uuid), + m_lock(ceph::make_mutex("rbd::mirror::MirrorStatusUpdater " + + stringify(m_io_ctx.get_id()))) { + dout(10) << "local_mirror_uuid=" << local_mirror_uuid << ", " + << "pool_id=" << m_io_ctx.get_id() << dendl; +} + +template <typename I> +MirrorStatusUpdater<I>::~MirrorStatusUpdater() { + ceph_assert(!m_initialized); + delete m_mirror_status_watcher; +} + +template <typename I> +void MirrorStatusUpdater<I>::init(Context* on_finish) { + dout(10) << dendl; + + ceph_assert(!m_initialized); + m_initialized = true; + + { + std::lock_guard timer_locker{m_threads->timer_lock}; + schedule_timer_task(); + } + + init_mirror_status_watcher(on_finish); +} + +template <typename I> +void MirrorStatusUpdater<I>::init_mirror_status_watcher(Context* on_finish) { + dout(10) << dendl; + + auto ctx = new LambdaContext([this, on_finish](int r) { + handle_init_mirror_status_watcher(r, on_finish); + }); + m_mirror_status_watcher = MirrorStatusWatcher<I>::create( + m_io_ctx, m_threads->work_queue); + m_mirror_status_watcher->init(ctx); +} + +template <typename I> +void MirrorStatusUpdater<I>::handle_init_mirror_status_watcher( + int r, Context* on_finish) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to init mirror status watcher: " << cpp_strerror(r) + << dendl; + + delete m_mirror_status_watcher; + m_mirror_status_watcher = nullptr; + + on_finish = new LambdaContext([r, on_finish](int) { + on_finish->complete(r); + }); + shut_down(on_finish); + return; + } + + m_threads->work_queue->queue(on_finish, 0); +} + +template <typename I> +void MirrorStatusUpdater<I>::shut_down(Context* on_finish) { + dout(10) << dendl; + + { + std::lock_guard timer_locker{m_threads->timer_lock}; + ceph_assert(m_timer_task != nullptr); + m_threads->timer->cancel_event(m_timer_task); + } + + { + std::unique_lock locker(m_lock); + ceph_assert(m_initialized); + m_initialized = false; + } + + shut_down_mirror_status_watcher(on_finish); +} + +template <typename I> +void MirrorStatusUpdater<I>::shut_down_mirror_status_watcher( + Context* on_finish) { + if (m_mirror_status_watcher == nullptr) { + finalize_shutdown(0, on_finish); + return; + } + + dout(10) << dendl; + + auto ctx = new LambdaContext([this, on_finish](int r) { + handle_shut_down_mirror_status_watcher(r, on_finish); + }); + m_mirror_status_watcher->shut_down(ctx); +} + +template <typename I> +void MirrorStatusUpdater<I>::handle_shut_down_mirror_status_watcher( + int r, Context* on_finish) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to shut down mirror status watcher: " << cpp_strerror(r) + << dendl; + } + + finalize_shutdown(r, on_finish); +} + +template <typename I> +void MirrorStatusUpdater<I>::finalize_shutdown(int r, Context* on_finish) { + dout(10) << dendl; + + { + std::unique_lock locker(m_lock); + if (m_update_in_progress) { + if (r < 0) { + on_finish = new LambdaContext([r, on_finish](int) { + on_finish->complete(r); + }); + } + + m_update_on_finish_ctxs.push_back(on_finish); + return; + } + } + + m_threads->work_queue->queue(on_finish, r); +} + +template <typename I> +bool MirrorStatusUpdater<I>::exists(const std::string& global_image_id) { + dout(15) << "global_image_id=" << global_image_id << dendl; + + std::unique_lock locker(m_lock); + return (m_global_image_status.count(global_image_id) > 0); +} + +template <typename I> +void MirrorStatusUpdater<I>::set_mirror_image_status( + const std::string& global_image_id, + const cls::rbd::MirrorImageSiteStatus& mirror_image_site_status, + bool immediate_update) { + dout(15) << "global_image_id=" << global_image_id << ", " + << "mirror_image_site_status=" << mirror_image_site_status << dendl; + + std::unique_lock locker(m_lock); + + m_global_image_status[global_image_id] = mirror_image_site_status; + if (immediate_update) { + m_update_global_image_ids.insert(global_image_id); + queue_update_task(std::move(locker)); + } +} + +template <typename I> +void MirrorStatusUpdater<I>::remove_refresh_mirror_image_status( + const std::string& global_image_id, + Context* on_finish) { + if (try_remove_mirror_image_status(global_image_id, false, false, + on_finish)) { + m_threads->work_queue->queue(on_finish, 0); + } +} + +template <typename I> +void MirrorStatusUpdater<I>::remove_mirror_image_status( + const std::string& global_image_id, bool immediate_update, + Context* on_finish) { + if (try_remove_mirror_image_status(global_image_id, true, immediate_update, + on_finish)) { + m_threads->work_queue->queue(on_finish, 0); + } +} + +template <typename I> +bool MirrorStatusUpdater<I>::try_remove_mirror_image_status( + const std::string& global_image_id, bool queue_update, + bool immediate_update, Context* on_finish) { + dout(15) << "global_image_id=" << global_image_id << ", " + << "queue_update=" << queue_update << ", " + << "immediate_update=" << immediate_update << dendl; + + std::unique_lock locker(m_lock); + if ((m_update_in_flight && + m_updating_global_image_ids.count(global_image_id) > 0) || + ((m_update_in_progress || m_update_requested) && + m_update_global_image_ids.count(global_image_id) > 0)) { + // if update is scheduled/in-progress, wait for it to complete + on_finish = new LambdaContext( + [this, global_image_id, queue_update, immediate_update, + on_finish](int r) { + if (try_remove_mirror_image_status(global_image_id, queue_update, + immediate_update, on_finish)) { + on_finish->complete(0); + } + }); + m_update_on_finish_ctxs.push_back(on_finish); + return false; + } + + m_global_image_status.erase(global_image_id); + if (queue_update) { + m_update_global_image_ids.insert(global_image_id); + if (immediate_update) { + queue_update_task(std::move(locker)); + } + } + + return true; +} + +template <typename I> +void MirrorStatusUpdater<I>::schedule_timer_task() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock)); + ceph_assert(m_timer_task == nullptr); + m_timer_task = create_context_callback< + MirrorStatusUpdater<I>, + &MirrorStatusUpdater<I>::handle_timer_task>(this); + m_threads->timer->add_event_after(UPDATE_INTERVAL_SECONDS, m_timer_task); +} + +template <typename I> +void MirrorStatusUpdater<I>::handle_timer_task(int r) { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock)); + ceph_assert(m_timer_task != nullptr); + m_timer_task = nullptr; + schedule_timer_task(); + + std::unique_lock locker(m_lock); + for (auto& pair : m_global_image_status) { + m_update_global_image_ids.insert(pair.first); + } + + queue_update_task(std::move(locker)); +} + +template <typename I> +void MirrorStatusUpdater<I>::queue_update_task( + std::unique_lock<ceph::mutex>&& locker) { + if (!m_initialized) { + return; + } + + if (m_update_in_progress) { + if (m_update_in_flight) { + dout(10) << "deferring update due to in-flight ops" << dendl; + m_update_requested = true; + } + return; + } + + m_update_in_progress = true; + ceph_assert(!m_update_in_flight); + ceph_assert(!m_update_requested); + locker.unlock(); + + dout(10) << dendl; + auto ctx = create_context_callback< + MirrorStatusUpdater<I>, + &MirrorStatusUpdater<I>::update_task>(this); + m_threads->work_queue->queue(ctx); +} + +template <typename I> +void MirrorStatusUpdater<I>::update_task(int r) { + dout(10) << dendl; + + std::unique_lock locker(m_lock); + ceph_assert(m_update_in_progress); + ceph_assert(!m_update_in_flight); + m_update_in_flight = true; + + std::swap(m_updating_global_image_ids, m_update_global_image_ids); + auto updating_global_image_ids = m_updating_global_image_ids; + auto global_image_status = m_global_image_status; + locker.unlock(); + + Context* ctx = create_context_callback< + MirrorStatusUpdater<I>, + &MirrorStatusUpdater<I>::handle_update_task>(this); + if (updating_global_image_ids.empty()) { + ctx->complete(0); + return; + } + + auto gather = new C_Gather(g_ceph_context, ctx); + + auto it = updating_global_image_ids.begin(); + while (it != updating_global_image_ids.end()) { + librados::ObjectWriteOperation op; + uint32_t op_count = 0; + + while (it != updating_global_image_ids.end() && + op_count < MAX_UPDATES_PER_OP) { + auto& global_image_id = *it; + ++it; + + auto status_it = global_image_status.find(global_image_id); + if (status_it == global_image_status.end()) { + librbd::cls_client::mirror_image_status_remove(&op, global_image_id); + ++op_count; + continue; + } + + status_it->second.mirror_uuid = m_local_mirror_uuid; + librbd::cls_client::mirror_image_status_set(&op, global_image_id, + status_it->second); + ++op_count; + } + + auto aio_comp = create_rados_callback(gather->new_sub()); + int r = m_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op); + ceph_assert(r == 0); + aio_comp->release(); + } + + gather->activate(); +} + +template <typename I> +void MirrorStatusUpdater<I>::handle_update_task(int r) { + dout(10) << dendl; + if (r < 0) { + derr << "failed to update mirror image statuses: " << cpp_strerror(r) + << dendl; + } + + std::unique_lock locker(m_lock); + + Contexts on_finish_ctxs; + std::swap(on_finish_ctxs, m_update_on_finish_ctxs); + + ceph_assert(m_update_in_progress); + m_update_in_progress = false; + + ceph_assert(m_update_in_flight); + m_update_in_flight = false; + + m_updating_global_image_ids.clear(); + + if (m_update_requested) { + m_update_requested = false; + queue_update_task(std::move(locker)); + } else { + locker.unlock(); + } + + for (auto on_finish : on_finish_ctxs) { + on_finish->complete(0); + } +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::MirrorStatusUpdater<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/MirrorStatusUpdater.h b/src/tools/rbd_mirror/MirrorStatusUpdater.h new file mode 100644 index 000000000..783b818fc --- /dev/null +++ b/src/tools/rbd_mirror/MirrorStatusUpdater.h @@ -0,0 +1,119 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_MIRROR_STATUS_UPDATER_H +#define CEPH_RBD_MIRROR_MIRROR_STATUS_UPDATER_H + +#include "include/rados/librados.hpp" +#include "common/ceph_mutex.h" +#include "cls/rbd/cls_rbd_types.h" +#include <list> +#include <map> +#include <set> +#include <string> + +struct Context; +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { + +template <typename> struct MirrorStatusWatcher; +template <typename> struct Threads; + +template <typename ImageCtxT = librbd::ImageCtx> +class MirrorStatusUpdater { +public: + + static MirrorStatusUpdater* create(librados::IoCtx& io_ctx, + Threads<ImageCtxT> *threads, + const std::string& local_mirror_uuid) { + return new MirrorStatusUpdater(io_ctx, threads, local_mirror_uuid); + } + + MirrorStatusUpdater(librados::IoCtx& io_ctx, Threads<ImageCtxT> *threads, + const std::string& local_mirror_uuid); + ~MirrorStatusUpdater(); + + void init(Context* on_finish); + void shut_down(Context* on_finish); + + bool exists(const std::string& global_image_id); + void set_mirror_image_status( + const std::string& global_image_id, + const cls::rbd::MirrorImageSiteStatus& mirror_image_site_status, + bool immediate_update); + void remove_mirror_image_status(const std::string& global_image_id, + bool immediate_update, Context* on_finish); + void remove_refresh_mirror_image_status(const std::string& global_image_id, + Context* on_finish); + +private: + /** + * @verbatim + * + * <uninitialized> <----------------------\ + * | (init) ^ (error) | + * v * | + * INIT_STATUS_WATCHER * * * * * | + * | | + * | SHUT_DOWN_STATUS_WATCHER + * | ^ + * | | + * | (shutdown) | + * <initialized> -------------------------/ + * + * @endverbatim + */ + typedef std::list<Context*> Contexts; + typedef std::set<std::string> GlobalImageIds; + typedef std::map<std::string, cls::rbd::MirrorImageSiteStatus> + GlobalImageStatus; + + librados::IoCtx m_io_ctx; + Threads<ImageCtxT>* m_threads; + std::string m_local_mirror_uuid; + + Context* m_timer_task = nullptr; + + ceph::mutex m_lock; + + bool m_initialized = false; + + MirrorStatusWatcher<ImageCtxT>* m_mirror_status_watcher = nullptr; + + GlobalImageIds m_update_global_image_ids; + GlobalImageStatus m_global_image_status; + + bool m_update_in_progress = false; + bool m_update_in_flight = false; + bool m_update_requested = false; + Contexts m_update_on_finish_ctxs; + GlobalImageIds m_updating_global_image_ids; + + bool try_remove_mirror_image_status(const std::string& global_image_id, + bool queue_update, bool immediate_update, + Context* on_finish); + + void init_mirror_status_watcher(Context* on_finish); + void handle_init_mirror_status_watcher(int r, Context* on_finish); + + void shut_down_mirror_status_watcher(Context* on_finish); + void handle_shut_down_mirror_status_watcher(int r, Context* on_finish); + void finalize_shutdown(int r, Context* on_finish); + + void schedule_timer_task(); + void handle_timer_task(int r); + + void queue_update_task(std::unique_lock<ceph::mutex>&& locker); + void update_task(int r); + void handle_update_task(int r); + +}; + +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::MirrorStatusUpdater<librbd::ImageCtx>; + +#endif // CEPH_RBD_MIRROR_MIRROR_STATUS_UPDATER_H diff --git a/src/tools/rbd_mirror/MirrorStatusWatcher.cc b/src/tools/rbd_mirror/MirrorStatusWatcher.cc new file mode 100644 index 000000000..3e1564c5b --- /dev/null +++ b/src/tools/rbd_mirror/MirrorStatusWatcher.cc @@ -0,0 +1,74 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "MirrorStatusWatcher.h" +#include "common/debug.h" +#include "common/errno.h" +#include "cls/rbd/cls_rbd_client.h" +#include "librbd/Utils.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::MirrorStatusWatcher: " \ + << this << " " << __func__ << ": " + +namespace rbd { +namespace mirror { + +using librbd::util::create_rados_callback; + +template <typename I> +MirrorStatusWatcher<I>::MirrorStatusWatcher(librados::IoCtx &io_ctx, + librbd::asio::ContextWQ *work_queue) + : Watcher(io_ctx, work_queue, RBD_MIRRORING) { +} + +template <typename I> +MirrorStatusWatcher<I>::~MirrorStatusWatcher() { +} + +template <typename I> +void MirrorStatusWatcher<I>::init(Context *on_finish) { + dout(20) << dendl; + + on_finish = new LambdaContext( + [this, on_finish] (int r) { + if (r < 0) { + derr << "error removing down statuses: " << cpp_strerror(r) << dendl; + on_finish->complete(r); + return; + } + register_watch(on_finish); + }); + + librados::ObjectWriteOperation op; + librbd::cls_client::mirror_image_status_remove_down(&op); + librados::AioCompletion *aio_comp = create_rados_callback(on_finish); + + int r = m_ioctx.aio_operate(RBD_MIRRORING, aio_comp, &op); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void MirrorStatusWatcher<I>::shut_down(Context *on_finish) { + dout(20) << dendl; + + unregister_watch(on_finish); +} + +template <typename I> +void MirrorStatusWatcher<I>::handle_notify(uint64_t notify_id, uint64_t handle, + uint64_t notifier_id, + bufferlist &bl) { + dout(20) << dendl; + + bufferlist out; + acknowledge_notify(notify_id, handle, out); +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::MirrorStatusWatcher<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/MirrorStatusWatcher.h b/src/tools/rbd_mirror/MirrorStatusWatcher.h new file mode 100644 index 000000000..3335e9e63 --- /dev/null +++ b/src/tools/rbd_mirror/MirrorStatusWatcher.h @@ -0,0 +1,43 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_MIRROR_STATUS_WATCHER_H +#define CEPH_RBD_MIRROR_MIRROR_STATUS_WATCHER_H + +#include "librbd/Watcher.h" + +namespace librbd { +class ImageCtx; +namespace asio { struct ContextWQ; } +} // namespace librbd + +namespace rbd { +namespace mirror { + +template <typename ImageCtxT = librbd::ImageCtx> +class MirrorStatusWatcher : protected librbd::Watcher { +public: + static MirrorStatusWatcher *create(librados::IoCtx &io_ctx, + librbd::asio::ContextWQ *work_queue) { + return new MirrorStatusWatcher(io_ctx, work_queue); + } + void destroy() { + delete this; + } + + MirrorStatusWatcher(librados::IoCtx &io_ctx, + librbd::asio::ContextWQ *work_queue); + ~MirrorStatusWatcher() override; + + void init(Context *on_finish); + void shut_down(Context *on_finish); + +protected: + void handle_notify(uint64_t notify_id, uint64_t handle, + uint64_t notifier_id, bufferlist &bl) override; +}; + +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_MIRROR_STATUS_WATCHER_H diff --git a/src/tools/rbd_mirror/NamespaceReplayer.cc b/src/tools/rbd_mirror/NamespaceReplayer.cc new file mode 100644 index 000000000..d305d8472 --- /dev/null +++ b/src/tools/rbd_mirror/NamespaceReplayer.cc @@ -0,0 +1,862 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "NamespaceReplayer.h" +#include "common/Formatter.h" +#include "common/debug.h" +#include "common/errno.h" +#include "cls/rbd/cls_rbd_client.h" +#include "librbd/Utils.h" +#include "librbd/api/Config.h" +#include "librbd/api/Mirror.h" +#include "librbd/asio/ContextWQ.h" +#include "ServiceDaemon.h" +#include "Threads.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::NamespaceReplayer: " \ + << this << " " << __func__ << ": " + +using librbd::util::create_async_context_callback; +using librbd::util::create_context_callback; + +namespace rbd { +namespace mirror { + +using ::operator<<; + +namespace { + +const std::string SERVICE_DAEMON_LOCAL_COUNT_KEY("image_local_count"); +const std::string SERVICE_DAEMON_REMOTE_COUNT_KEY("image_remote_count"); + +} // anonymous namespace + +template <typename I> +NamespaceReplayer<I>::NamespaceReplayer( + const std::string &name, + librados::IoCtx &local_io_ctx, librados::IoCtx &remote_io_ctx, + const std::string &local_mirror_uuid, + const std::string& local_mirror_peer_uuid, + const RemotePoolMeta& remote_pool_meta, + Threads<I> *threads, + Throttler<I> *image_sync_throttler, + Throttler<I> *image_deletion_throttler, + ServiceDaemon<I> *service_daemon, + journal::CacheManagerHandler *cache_manager_handler, + PoolMetaCache* pool_meta_cache) : + m_namespace_name(name), + m_local_mirror_uuid(local_mirror_uuid), + m_local_mirror_peer_uuid(local_mirror_peer_uuid), + m_remote_pool_meta(remote_pool_meta), + m_threads(threads), m_image_sync_throttler(image_sync_throttler), + m_image_deletion_throttler(image_deletion_throttler), + m_service_daemon(service_daemon), + m_cache_manager_handler(cache_manager_handler), + m_pool_meta_cache(pool_meta_cache), + m_lock(ceph::make_mutex(librbd::util::unique_lock_name( + "rbd::mirror::NamespaceReplayer " + name, this))), + m_local_pool_watcher_listener(this, true), + m_remote_pool_watcher_listener(this, false), + m_image_map_listener(this) { + dout(10) << name << dendl; + + m_local_io_ctx.dup(local_io_ctx); + m_local_io_ctx.set_namespace(name); + m_remote_io_ctx.dup(remote_io_ctx); + m_remote_io_ctx.set_namespace(name); +} + +template <typename I> +bool NamespaceReplayer<I>::is_blocklisted() const { + std::lock_guard locker{m_lock}; + return m_instance_replayer->is_blocklisted() || + (m_local_pool_watcher && + m_local_pool_watcher->is_blocklisted()) || + (m_remote_pool_watcher && + m_remote_pool_watcher->is_blocklisted()); +} + +template <typename I> +void NamespaceReplayer<I>::init(Context *on_finish) { + dout(20) << dendl; + + std::lock_guard locker{m_lock}; + + ceph_assert(m_on_finish == nullptr); + m_on_finish = on_finish; + + init_local_status_updater(); +} + + +template <typename I> +void NamespaceReplayer<I>::shut_down(Context *on_finish) { + dout(20) << dendl; + + { + std::lock_guard locker{m_lock}; + + ceph_assert(m_on_finish == nullptr); + m_on_finish = on_finish; + + if (!m_image_map) { + stop_instance_replayer(); + return; + } + } + + auto ctx = new LambdaContext( + [this] (int r) { + std::lock_guard locker{m_lock}; + stop_instance_replayer(); + }); + handle_release_leader(ctx); +} + +template <typename I> +void NamespaceReplayer<I>::print_status(Formatter *f) +{ + dout(20) << dendl; + + ceph_assert(f); + + std::lock_guard locker{m_lock}; + + m_instance_replayer->print_status(f); + + if (m_image_deleter) { + f->open_object_section("image_deleter"); + m_image_deleter->print_status(f); + f->close_section(); + } +} + +template <typename I> +void NamespaceReplayer<I>::start() +{ + dout(20) << dendl; + + std::lock_guard locker{m_lock}; + + m_instance_replayer->start(); +} + +template <typename I> +void NamespaceReplayer<I>::stop() +{ + dout(20) << dendl; + + std::lock_guard locker{m_lock}; + + m_instance_replayer->stop(); +} + +template <typename I> +void NamespaceReplayer<I>::restart() +{ + dout(20) << dendl; + + std::lock_guard locker{m_lock}; + + m_instance_replayer->restart(); +} + +template <typename I> +void NamespaceReplayer<I>::flush() +{ + dout(20) << dendl; + + std::lock_guard locker{m_lock}; + + m_instance_replayer->flush(); +} + +template <typename I> +void NamespaceReplayer<I>::handle_update(const std::string &mirror_uuid, + ImageIds &&added_image_ids, + ImageIds &&removed_image_ids) { + std::lock_guard locker{m_lock}; + + if (!m_image_map) { + dout(20) << "not leader" << dendl; + return; + } + + dout(10) << "mirror_uuid=" << mirror_uuid << ", " + << "added_count=" << added_image_ids.size() << ", " + << "removed_count=" << removed_image_ids.size() << dendl; + + m_service_daemon->add_or_update_namespace_attribute( + m_local_io_ctx.get_id(), m_local_io_ctx.get_namespace(), + SERVICE_DAEMON_LOCAL_COUNT_KEY, m_local_pool_watcher->get_image_count()); + if (m_remote_pool_watcher) { + m_service_daemon->add_or_update_namespace_attribute( + m_local_io_ctx.get_id(), m_local_io_ctx.get_namespace(), + SERVICE_DAEMON_REMOTE_COUNT_KEY, + m_remote_pool_watcher->get_image_count()); + } + + std::set<std::string> added_global_image_ids; + for (auto& image_id : added_image_ids) { + added_global_image_ids.insert(image_id.global_id); + } + + std::set<std::string> removed_global_image_ids; + for (auto& image_id : removed_image_ids) { + removed_global_image_ids.insert(image_id.global_id); + } + + m_image_map->update_images(mirror_uuid, + std::move(added_global_image_ids), + std::move(removed_global_image_ids)); +} + +template <typename I> +void NamespaceReplayer<I>::handle_acquire_leader(Context *on_finish) { + dout(10) << dendl; + + m_instance_watcher->handle_acquire_leader(); + + init_image_map(on_finish); +} + +template <typename I> +void NamespaceReplayer<I>::handle_release_leader(Context *on_finish) { + dout(10) << dendl; + + m_instance_watcher->handle_release_leader(); + shut_down_image_deleter(on_finish); +} + +template <typename I> +void NamespaceReplayer<I>::handle_update_leader( + const std::string &leader_instance_id) { + dout(10) << "leader_instance_id=" << leader_instance_id << dendl; + + m_instance_watcher->handle_update_leader(leader_instance_id); +} + +template <typename I> +void NamespaceReplayer<I>::handle_instances_added( + const std::vector<std::string> &instance_ids) { + dout(10) << "instance_ids=" << instance_ids << dendl; + + std::lock_guard locker{m_lock}; + + if (!m_image_map) { + return; + } + + m_image_map->update_instances_added(instance_ids); +} + +template <typename I> +void NamespaceReplayer<I>::handle_instances_removed( + const std::vector<std::string> &instance_ids) { + dout(10) << "instance_ids=" << instance_ids << dendl; + + std::lock_guard locker{m_lock}; + + if (!m_image_map) { + return; + } + + m_image_map->update_instances_removed(instance_ids); +} + +template <typename I> +void NamespaceReplayer<I>::init_local_status_updater() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + ceph_assert(!m_local_status_updater); + + m_local_status_updater.reset(MirrorStatusUpdater<I>::create( + m_local_io_ctx, m_threads, "")); + auto ctx = create_context_callback< + NamespaceReplayer<I>, + &NamespaceReplayer<I>::handle_init_local_status_updater>(this); + + m_local_status_updater->init(ctx); +} + +template <typename I> +void NamespaceReplayer<I>::handle_init_local_status_updater(int r) { + dout(10) << "r=" << r << dendl; + + std::lock_guard locker{m_lock}; + + if (r < 0) { + derr << "error initializing local mirror status updater: " + << cpp_strerror(r) << dendl; + + m_local_status_updater.reset(); + ceph_assert(m_on_finish != nullptr); + m_threads->work_queue->queue(m_on_finish, r); + m_on_finish = nullptr; + return; + } + + init_remote_status_updater(); +} + +template <typename I> +void NamespaceReplayer<I>::init_remote_status_updater() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + ceph_assert(!m_remote_status_updater); + + m_remote_status_updater.reset(MirrorStatusUpdater<I>::create( + m_remote_io_ctx, m_threads, m_local_mirror_uuid)); + auto ctx = create_context_callback< + NamespaceReplayer<I>, + &NamespaceReplayer<I>::handle_init_remote_status_updater>(this); + m_remote_status_updater->init(ctx); +} + +template <typename I> +void NamespaceReplayer<I>::handle_init_remote_status_updater(int r) { + dout(10) << "r=" << r << dendl; + + std::lock_guard locker{m_lock}; + + if (r < 0) { + derr << "error initializing remote mirror status updater: " + << cpp_strerror(r) << dendl; + + m_remote_status_updater.reset(); + m_ret_val = r; + shut_down_local_status_updater(); + return; + } + + init_instance_replayer(); +} + +template <typename I> +void NamespaceReplayer<I>::init_instance_replayer() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + ceph_assert(!m_instance_replayer); + + m_instance_replayer.reset(InstanceReplayer<I>::create( + m_local_io_ctx, m_local_mirror_uuid, m_threads, m_service_daemon, + m_local_status_updater.get(), m_cache_manager_handler, + m_pool_meta_cache)); + auto ctx = create_context_callback<NamespaceReplayer<I>, + &NamespaceReplayer<I>::handle_init_instance_replayer>(this); + + m_instance_replayer->init(ctx); +} + +template <typename I> +void NamespaceReplayer<I>::handle_init_instance_replayer(int r) { + dout(10) << "r=" << r << dendl; + + std::lock_guard locker{m_lock}; + + if (r < 0) { + derr << "error initializing instance replayer: " << cpp_strerror(r) + << dendl; + + m_instance_replayer.reset(); + m_ret_val = r; + shut_down_remote_status_updater(); + return; + } + + m_instance_replayer->add_peer({m_local_mirror_peer_uuid, m_remote_io_ctx, + m_remote_pool_meta, + m_remote_status_updater.get()}); + + init_instance_watcher(); +} + +template <typename I> +void NamespaceReplayer<I>::init_instance_watcher() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + ceph_assert(!m_instance_watcher); + + m_instance_watcher.reset(InstanceWatcher<I>::create( + m_local_io_ctx, *m_threads->asio_engine, m_instance_replayer.get(), + m_image_sync_throttler)); + auto ctx = create_context_callback<NamespaceReplayer<I>, + &NamespaceReplayer<I>::handle_init_instance_watcher>(this); + + m_instance_watcher->init(ctx); +} + +template <typename I> +void NamespaceReplayer<I>::handle_init_instance_watcher(int r) { + dout(10) << "r=" << r << dendl; + + std::lock_guard locker{m_lock}; + + if (r < 0) { + derr << "error initializing instance watcher: " << cpp_strerror(r) + << dendl; + + m_instance_watcher.reset(); + m_ret_val = r; + shut_down_instance_replayer(); + return; + } + + ceph_assert(m_on_finish != nullptr); + m_threads->work_queue->queue(m_on_finish); + m_on_finish = nullptr; +} + +template <typename I> +void NamespaceReplayer<I>::stop_instance_replayer() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + + Context *ctx = create_async_context_callback( + m_threads->work_queue, create_context_callback<NamespaceReplayer<I>, + &NamespaceReplayer<I>::handle_stop_instance_replayer>(this)); + + m_instance_replayer->stop(ctx); +} + +template <typename I> +void NamespaceReplayer<I>::handle_stop_instance_replayer(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "error stopping instance replayer: " << cpp_strerror(r) << dendl; + } + + std::lock_guard locker{m_lock}; + + shut_down_instance_watcher(); +} + +template <typename I> +void NamespaceReplayer<I>::shut_down_instance_watcher() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + ceph_assert(m_instance_watcher); + + Context *ctx = create_async_context_callback( + m_threads->work_queue, create_context_callback<NamespaceReplayer<I>, + &NamespaceReplayer<I>::handle_shut_down_instance_watcher>(this)); + + m_instance_watcher->shut_down(ctx); +} + +template <typename I> +void NamespaceReplayer<I>::handle_shut_down_instance_watcher(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "error shutting instance watcher down: " << cpp_strerror(r) + << dendl; + } + + std::lock_guard locker{m_lock}; + + m_instance_watcher.reset(); + + shut_down_instance_replayer(); +} + +template <typename I> +void NamespaceReplayer<I>::shut_down_instance_replayer() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + ceph_assert(m_instance_replayer); + + Context *ctx = create_async_context_callback( + m_threads->work_queue, create_context_callback<NamespaceReplayer<I>, + &NamespaceReplayer<I>::handle_shut_down_instance_replayer>(this)); + + m_instance_replayer->shut_down(ctx); +} + +template <typename I> +void NamespaceReplayer<I>::handle_shut_down_instance_replayer(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "error shutting instance replayer down: " << cpp_strerror(r) + << dendl; + } + + std::lock_guard locker{m_lock}; + + m_instance_replayer.reset(); + + shut_down_remote_status_updater(); +} + +template <typename I> +void NamespaceReplayer<I>::shut_down_remote_status_updater() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + ceph_assert(m_remote_status_updater); + + auto ctx = create_async_context_callback( + m_threads->work_queue, create_context_callback< + NamespaceReplayer<I>, + &NamespaceReplayer<I>::handle_shut_down_remote_status_updater>(this)); + m_remote_status_updater->shut_down(ctx); +} + +template <typename I> +void NamespaceReplayer<I>::handle_shut_down_remote_status_updater(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "error shutting remote mirror status updater down: " + << cpp_strerror(r) << dendl; + } + + std::lock_guard locker{m_lock}; + m_remote_status_updater.reset(); + + shut_down_local_status_updater(); +} + +template <typename I> +void NamespaceReplayer<I>::shut_down_local_status_updater() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + ceph_assert(m_local_status_updater); + + auto ctx = create_async_context_callback( + m_threads->work_queue, create_context_callback< + NamespaceReplayer<I>, + &NamespaceReplayer<I>::handle_shut_down_local_status_updater>(this)); + + m_local_status_updater->shut_down(ctx); +} + +template <typename I> +void NamespaceReplayer<I>::handle_shut_down_local_status_updater(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "error shutting local mirror status updater down: " + << cpp_strerror(r) << dendl; + } + + std::lock_guard locker{m_lock}; + + m_local_status_updater.reset(); + + ceph_assert(!m_image_map); + ceph_assert(!m_image_deleter); + ceph_assert(!m_local_pool_watcher); + ceph_assert(!m_remote_pool_watcher); + ceph_assert(!m_instance_watcher); + ceph_assert(!m_instance_replayer); + + ceph_assert(m_on_finish != nullptr); + m_threads->work_queue->queue(m_on_finish, m_ret_val); + m_on_finish = nullptr; + m_ret_val = 0; +} + +template <typename I> +void NamespaceReplayer<I>::init_image_map(Context *on_finish) { + dout(10) << dendl; + + auto image_map = ImageMap<I>::create(m_local_io_ctx, m_threads, + m_instance_watcher->get_instance_id(), + m_image_map_listener); + + auto ctx = new LambdaContext( + [this, image_map, on_finish](int r) { + handle_init_image_map(r, image_map, on_finish); + }); + image_map->init(create_async_context_callback( + m_threads->work_queue, ctx)); +} + +template <typename I> +void NamespaceReplayer<I>::handle_init_image_map(int r, ImageMap<I> *image_map, + Context *on_finish) { + dout(10) << "r=" << r << dendl; + if (r < 0) { + derr << "failed to init image map: " << cpp_strerror(r) << dendl; + on_finish = new LambdaContext([image_map, on_finish, r](int) { + delete image_map; + on_finish->complete(r); + }); + image_map->shut_down(on_finish); + return; + } + + ceph_assert(!m_image_map); + m_image_map.reset(image_map); + + init_local_pool_watcher(on_finish); +} + +template <typename I> +void NamespaceReplayer<I>::init_local_pool_watcher(Context *on_finish) { + dout(10) << dendl; + + std::lock_guard locker{m_lock}; + ceph_assert(!m_local_pool_watcher); + m_local_pool_watcher.reset(PoolWatcher<I>::create( + m_threads, m_local_io_ctx, m_local_mirror_uuid, + m_local_pool_watcher_listener)); + + // ensure the initial set of local images is up-to-date + // after acquiring the leader role + auto ctx = new LambdaContext([this, on_finish](int r) { + handle_init_local_pool_watcher(r, on_finish); + }); + m_local_pool_watcher->init(create_async_context_callback( + m_threads->work_queue, ctx)); +} + +template <typename I> +void NamespaceReplayer<I>::handle_init_local_pool_watcher( + int r, Context *on_finish) { + dout(10) << "r=" << r << dendl; + if (r < 0) { + derr << "failed to retrieve local images: " << cpp_strerror(r) << dendl; + on_finish = new LambdaContext([on_finish, r](int) { + on_finish->complete(r); + }); + shut_down_pool_watchers(on_finish); + return; + } + + init_remote_pool_watcher(on_finish); +} + +template <typename I> +void NamespaceReplayer<I>::init_remote_pool_watcher(Context *on_finish) { + dout(10) << dendl; + + std::lock_guard locker{m_lock}; + ceph_assert(!m_remote_pool_watcher); + m_remote_pool_watcher.reset(PoolWatcher<I>::create( + m_threads, m_remote_io_ctx, m_remote_pool_meta.mirror_uuid, + m_remote_pool_watcher_listener)); + + auto ctx = new LambdaContext([this, on_finish](int r) { + handle_init_remote_pool_watcher(r, on_finish); + }); + m_remote_pool_watcher->init(create_async_context_callback( + m_threads->work_queue, ctx)); +} + +template <typename I> +void NamespaceReplayer<I>::handle_init_remote_pool_watcher( + int r, Context *on_finish) { + dout(10) << "r=" << r << dendl; + if (r == -ENOENT) { + // Technically nothing to do since the other side doesn't + // have mirroring enabled. Eventually the remote pool watcher will + // detect images (if mirroring is enabled), so no point propagating + // an error which would just busy-spin the state machines. + dout(0) << "remote peer does not have mirroring configured" << dendl; + } else if (r < 0) { + derr << "failed to retrieve remote images: " << cpp_strerror(r) << dendl; + on_finish = new LambdaContext([on_finish, r](int) { + on_finish->complete(r); + }); + shut_down_pool_watchers(on_finish); + return; + } + + init_image_deleter(on_finish); +} + +template <typename I> +void NamespaceReplayer<I>::init_image_deleter(Context *on_finish) { + dout(10) << dendl; + + std::lock_guard locker{m_lock}; + ceph_assert(!m_image_deleter); + + on_finish = new LambdaContext([this, on_finish](int r) { + handle_init_image_deleter(r, on_finish); + }); + m_image_deleter.reset(ImageDeleter<I>::create(m_local_io_ctx, m_threads, + m_image_deletion_throttler, + m_service_daemon)); + m_image_deleter->init(create_async_context_callback( + m_threads->work_queue, on_finish)); +} + +template <typename I> +void NamespaceReplayer<I>::handle_init_image_deleter( + int r, Context *on_finish) { + dout(10) << "r=" << r << dendl; + if (r < 0) { + derr << "failed to init image deleter: " << cpp_strerror(r) << dendl; + on_finish = new LambdaContext([on_finish, r](int) { + on_finish->complete(r); + }); + shut_down_image_deleter(on_finish); + return; + } + + on_finish->complete(0); +} + +template <typename I> +void NamespaceReplayer<I>::shut_down_image_deleter(Context* on_finish) { + dout(10) << dendl; + { + std::lock_guard locker{m_lock}; + if (m_image_deleter) { + Context *ctx = new LambdaContext([this, on_finish](int r) { + handle_shut_down_image_deleter(r, on_finish); + }); + ctx = create_async_context_callback(m_threads->work_queue, ctx); + + m_image_deleter->shut_down(ctx); + return; + } + } + shut_down_pool_watchers(on_finish); +} + +template <typename I> +void NamespaceReplayer<I>::handle_shut_down_image_deleter( + int r, Context* on_finish) { + dout(10) << "r=" << r << dendl; + + { + std::lock_guard locker{m_lock}; + ceph_assert(m_image_deleter); + m_image_deleter.reset(); + } + + shut_down_pool_watchers(on_finish); +} + +template <typename I> +void NamespaceReplayer<I>::shut_down_pool_watchers(Context *on_finish) { + dout(10) << dendl; + + { + std::lock_guard locker{m_lock}; + if (m_local_pool_watcher) { + Context *ctx = new LambdaContext([this, on_finish](int r) { + handle_shut_down_pool_watchers(r, on_finish); + }); + ctx = create_async_context_callback(m_threads->work_queue, ctx); + + auto gather_ctx = new C_Gather(g_ceph_context, ctx); + m_local_pool_watcher->shut_down(gather_ctx->new_sub()); + if (m_remote_pool_watcher) { + m_remote_pool_watcher->shut_down(gather_ctx->new_sub()); + } + gather_ctx->activate(); + return; + } + } + + on_finish->complete(0); +} + +template <typename I> +void NamespaceReplayer<I>::handle_shut_down_pool_watchers( + int r, Context *on_finish) { + dout(10) << "r=" << r << dendl; + + { + std::lock_guard locker{m_lock}; + ceph_assert(m_local_pool_watcher); + m_local_pool_watcher.reset(); + + if (m_remote_pool_watcher) { + m_remote_pool_watcher.reset(); + } + } + shut_down_image_map(on_finish); +} + +template <typename I> +void NamespaceReplayer<I>::shut_down_image_map(Context *on_finish) { + dout(5) << dendl; + + std::lock_guard locker{m_lock}; + if (m_image_map) { + on_finish = new LambdaContext( + [this, on_finish](int r) { + handle_shut_down_image_map(r, on_finish); + }); + m_image_map->shut_down(create_async_context_callback( + m_threads->work_queue, on_finish)); + return; + } + + m_threads->work_queue->queue(on_finish); +} + +template <typename I> +void NamespaceReplayer<I>::handle_shut_down_image_map(int r, Context *on_finish) { + dout(5) << "r=" << r << dendl; + if (r < 0 && r != -EBLOCKLISTED) { + derr << "failed to shut down image map: " << cpp_strerror(r) << dendl; + } + + std::lock_guard locker{m_lock}; + ceph_assert(m_image_map); + m_image_map.reset(); + + m_instance_replayer->release_all(create_async_context_callback( + m_threads->work_queue, on_finish)); +} + +template <typename I> +void NamespaceReplayer<I>::handle_acquire_image(const std::string &global_image_id, + const std::string &instance_id, + Context* on_finish) { + dout(5) << "global_image_id=" << global_image_id << ", " + << "instance_id=" << instance_id << dendl; + + m_instance_watcher->notify_image_acquire(instance_id, global_image_id, + on_finish); +} + +template <typename I> +void NamespaceReplayer<I>::handle_release_image(const std::string &global_image_id, + const std::string &instance_id, + Context* on_finish) { + dout(5) << "global_image_id=" << global_image_id << ", " + << "instance_id=" << instance_id << dendl; + + m_instance_watcher->notify_image_release(instance_id, global_image_id, + on_finish); +} + +template <typename I> +void NamespaceReplayer<I>::handle_remove_image(const std::string &mirror_uuid, + const std::string &global_image_id, + const std::string &instance_id, + Context* on_finish) { + ceph_assert(!mirror_uuid.empty()); + dout(5) << "mirror_uuid=" << mirror_uuid << ", " + << "global_image_id=" << global_image_id << ", " + << "instance_id=" << instance_id << dendl; + + m_instance_watcher->notify_peer_image_removed(instance_id, global_image_id, + mirror_uuid, on_finish); +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::NamespaceReplayer<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/NamespaceReplayer.h b/src/tools/rbd_mirror/NamespaceReplayer.h new file mode 100644 index 000000000..e304b8253 --- /dev/null +++ b/src/tools/rbd_mirror/NamespaceReplayer.h @@ -0,0 +1,308 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_NAMESPACE_REPLAYER_H +#define CEPH_RBD_MIRROR_NAMESPACE_REPLAYER_H + +#include "common/AsyncOpTracker.h" +#include "common/ceph_mutex.h" +#include "include/rados/librados.hpp" + +#include "tools/rbd_mirror/ImageDeleter.h" +#include "tools/rbd_mirror/ImageMap.h" +#include "tools/rbd_mirror/InstanceReplayer.h" +#include "tools/rbd_mirror/InstanceWatcher.h" +#include "tools/rbd_mirror/MirrorStatusUpdater.h" +#include "tools/rbd_mirror/PoolWatcher.h" +#include "tools/rbd_mirror/Types.h" +#include "tools/rbd_mirror/image_map/Types.h" +#include "tools/rbd_mirror/pool_watcher/Types.h" + +#include <memory> +#include <string> +#include <vector> + +class AdminSocketHook; + +namespace journal { struct CacheManagerHandler; } + +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { + +struct PoolMetaCache; +template <typename> class ServiceDaemon; +template <typename> class Throttler; +template <typename> struct Threads; + +/** + * Controls mirroring for a single remote cluster. + */ +template <typename ImageCtxT = librbd::ImageCtx> +class NamespaceReplayer { +public: + static NamespaceReplayer *create( + const std::string &name, + librados::IoCtx &local_ioctx, + librados::IoCtx &remote_ioctx, + const std::string &local_mirror_uuid, + const std::string &local_mirror_peer_uuid, + const RemotePoolMeta& remote_pool_meta, + Threads<ImageCtxT> *threads, + Throttler<ImageCtxT> *image_sync_throttler, + Throttler<ImageCtxT> *image_deletion_throttler, + ServiceDaemon<ImageCtxT> *service_daemon, + journal::CacheManagerHandler *cache_manager_handler, + PoolMetaCache* pool_meta_cache) { + return new NamespaceReplayer(name, local_ioctx, remote_ioctx, + local_mirror_uuid, local_mirror_peer_uuid, + remote_pool_meta, threads, + image_sync_throttler, image_deletion_throttler, + service_daemon, cache_manager_handler, + pool_meta_cache); + } + + NamespaceReplayer(const std::string &name, + librados::IoCtx &local_ioctx, + librados::IoCtx &remote_ioctx, + const std::string &local_mirror_uuid, + const std::string& local_mirror_peer_uuid, + const RemotePoolMeta& remote_pool_meta, + Threads<ImageCtxT> *threads, + Throttler<ImageCtxT> *image_sync_throttler, + Throttler<ImageCtxT> *image_deletion_throttler, + ServiceDaemon<ImageCtxT> *service_daemon, + journal::CacheManagerHandler *cache_manager_handler, + PoolMetaCache* pool_meta_cache); + NamespaceReplayer(const NamespaceReplayer&) = delete; + NamespaceReplayer& operator=(const NamespaceReplayer&) = delete; + + bool is_blocklisted() const; + + void init(Context *on_finish); + void shut_down(Context *on_finish); + + void handle_acquire_leader(Context *on_finish); + void handle_release_leader(Context *on_finish); + void handle_update_leader(const std::string &leader_instance_id); + void handle_instances_added(const std::vector<std::string> &instance_ids); + void handle_instances_removed(const std::vector<std::string> &instance_ids); + + void print_status(Formatter *f); + void start(); + void stop(); + void restart(); + void flush(); + +private: + /** + * @verbatim + * + * <uninitialized> <------------------------------------\ + * | (init) ^ (error) | + * v * | + * INIT_LOCAL_STATUS_UPDATER * * * * * * * * > SHUT_DOWN_LOCAL_STATUS_UPDATER + * | * (error) ^ + * v * | + * INIT_REMOTE_STATUS_UPDATER * * * * * * * > SHUT_DOWN_REMOTE_STATUS_UPDATER + * | * (error) ^ + * v * | + * INIT_INSTANCE_REPLAYER * * * * * * * * * > SHUT_DOWN_INSTANCE_REPLAYER + * | * ^ + * v * | + * INIT_INSTANCE_WATCHER * * * * * * * * * * SHUT_DOWN_INSTANCE_WATCHER + * | (error) ^ + * | | + * v STOP_INSTANCE_REPLAYER + * | ^ + * | (shut down) | + * | /----------------------------------------------/ + * v | + * <follower> <---------------------------\ + * . | + * . | + * v (leader acquired) | + * INIT_IMAGE_MAP | + * | | + * v | + * INIT_LOCAL_POOL_WATCHER SHUT_DOWN_IMAGE_MAP + * | ^ + * v | + * INIT_REMOTE_POOL_WATCHER SHUT_DOWN_POOL_WATCHERS + * | ^ + * v | + * INIT_IMAGE_DELETER SHUT_DOWN_IMAGE_DELETER + * | ^ + * v . + * <leader> <-----------\ . + * . | . + * . (image update) | . + * . . > NOTIFY_INSTANCE_WATCHER . + * . . + * . (leader lost / shut down) . + * . . . . . . . . . . . . . . . . . . . + * + * @endverbatim + */ + + struct PoolWatcherListener : public pool_watcher::Listener { + NamespaceReplayer *namespace_replayer; + bool local; + + PoolWatcherListener(NamespaceReplayer *namespace_replayer, bool local) + : namespace_replayer(namespace_replayer), local(local) { + } + + void handle_update(const std::string &mirror_uuid, + ImageIds &&added_image_ids, + ImageIds &&removed_image_ids) override { + namespace_replayer->handle_update((local ? "" : mirror_uuid), + std::move(added_image_ids), + std::move(removed_image_ids)); + } + }; + + struct ImageMapListener : public image_map::Listener { + NamespaceReplayer *namespace_replayer; + + ImageMapListener(NamespaceReplayer *namespace_replayer) + : namespace_replayer(namespace_replayer) { + } + + void acquire_image(const std::string &global_image_id, + const std::string &instance_id, + Context* on_finish) override { + namespace_replayer->handle_acquire_image(global_image_id, instance_id, + on_finish); + } + + void release_image(const std::string &global_image_id, + const std::string &instance_id, + Context* on_finish) override { + namespace_replayer->handle_release_image(global_image_id, instance_id, + on_finish); + } + + void remove_image(const std::string &mirror_uuid, + const std::string &global_image_id, + const std::string &instance_id, + Context* on_finish) override { + namespace_replayer->handle_remove_image(mirror_uuid, global_image_id, + instance_id, on_finish); + } + }; + + void handle_update(const std::string &mirror_uuid, + ImageIds &&added_image_ids, + ImageIds &&removed_image_ids); + + int init_rados(const std::string &cluster_name, + const std::string &client_name, + const std::string &mon_host, + const std::string &key, + const std::string &description, RadosRef *rados_ref, + bool strip_cluster_overrides); + + void init_local_status_updater(); + void handle_init_local_status_updater(int r); + + void init_remote_status_updater(); + void handle_init_remote_status_updater(int r); + + void init_instance_replayer(); + void handle_init_instance_replayer(int r); + + void init_instance_watcher(); + void handle_init_instance_watcher(int r); + + void stop_instance_replayer(); + void handle_stop_instance_replayer(int r); + + void shut_down_instance_watcher(); + void handle_shut_down_instance_watcher(int r); + + void shut_down_instance_replayer(); + void handle_shut_down_instance_replayer(int r); + + void shut_down_remote_status_updater(); + void handle_shut_down_remote_status_updater(int r); + + void shut_down_local_status_updater(); + void handle_shut_down_local_status_updater(int r); + + void init_image_map(Context *on_finish); + void handle_init_image_map(int r, ImageMap<ImageCtxT> *image_map, + Context *on_finish); + + void init_local_pool_watcher(Context *on_finish); + void handle_init_local_pool_watcher(int r, Context *on_finish); + + void init_remote_pool_watcher(Context *on_finish); + void handle_init_remote_pool_watcher(int r, Context *on_finish); + + void init_image_deleter(Context* on_finish); + void handle_init_image_deleter(int r, Context* on_finish); + + void shut_down_image_deleter(Context* on_finish); + void handle_shut_down_image_deleter(int r, Context* on_finish); + + void shut_down_pool_watchers(Context *on_finish); + void handle_shut_down_pool_watchers(int r, Context *on_finish); + + void shut_down_image_map(Context *on_finish); + void handle_shut_down_image_map(int r, Context *on_finish); + + void handle_acquire_image(const std::string &global_image_id, + const std::string &instance_id, + Context* on_finish); + void handle_release_image(const std::string &global_image_id, + const std::string &instance_id, + Context* on_finish); + void handle_remove_image(const std::string &mirror_uuid, + const std::string &global_image_id, + const std::string &instance_id, + Context* on_finish); + + std::string m_namespace_name; + librados::IoCtx m_local_io_ctx; + librados::IoCtx m_remote_io_ctx; + std::string m_local_mirror_uuid; + std::string m_local_mirror_peer_uuid; + RemotePoolMeta m_remote_pool_meta; + Threads<ImageCtxT> *m_threads; + Throttler<ImageCtxT> *m_image_sync_throttler; + Throttler<ImageCtxT> *m_image_deletion_throttler; + ServiceDaemon<ImageCtxT> *m_service_daemon; + journal::CacheManagerHandler *m_cache_manager_handler; + PoolMetaCache* m_pool_meta_cache; + + mutable ceph::mutex m_lock; + + int m_ret_val = 0; + Context *m_on_finish = nullptr; + + std::unique_ptr<MirrorStatusUpdater<ImageCtxT>> m_local_status_updater; + std::unique_ptr<MirrorStatusUpdater<ImageCtxT>> m_remote_status_updater; + + PoolWatcherListener m_local_pool_watcher_listener; + std::unique_ptr<PoolWatcher<ImageCtxT>> m_local_pool_watcher; + + PoolWatcherListener m_remote_pool_watcher_listener; + std::unique_ptr<PoolWatcher<ImageCtxT>> m_remote_pool_watcher; + + std::unique_ptr<InstanceReplayer<ImageCtxT>> m_instance_replayer; + std::unique_ptr<ImageDeleter<ImageCtxT>> m_image_deleter; + + ImageMapListener m_image_map_listener; + std::unique_ptr<ImageMap<ImageCtxT>> m_image_map; + + std::unique_ptr<InstanceWatcher<ImageCtxT>> m_instance_watcher; +}; + +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::NamespaceReplayer<librbd::ImageCtx>; + +#endif // CEPH_RBD_MIRROR_NAMESPACE_REPLAYER_H diff --git a/src/tools/rbd_mirror/PoolMetaCache.cc b/src/tools/rbd_mirror/PoolMetaCache.cc new file mode 100644 index 000000000..261802a55 --- /dev/null +++ b/src/tools/rbd_mirror/PoolMetaCache.cc @@ -0,0 +1,83 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "common/debug.h" +#include "common/dout.h" +#include "tools/rbd_mirror/PoolMetaCache.h" +#include <shared_mutex> + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::PoolMetaCache: " \ + << this << " " << __func__ << ": " + +namespace rbd { +namespace mirror { + +int PoolMetaCache::get_local_pool_meta( + int64_t pool_id, + LocalPoolMeta* local_pool_meta) const { + dout(15) << "pool_id=" << pool_id << dendl; + + std::shared_lock locker{m_lock}; + auto it = m_local_pool_metas.find(pool_id); + if (it == m_local_pool_metas.end()) { + return -ENOENT; + } + + *local_pool_meta = it->second; + return 0; +} + +void PoolMetaCache::set_local_pool_meta( + int64_t pool_id, + const LocalPoolMeta& local_pool_meta) { + dout(15) << "pool_id=" << pool_id << ", " + << "local_pool_meta=" << local_pool_meta << dendl; + + std::unique_lock locker(m_lock); + m_local_pool_metas[pool_id] = local_pool_meta; +} + +void PoolMetaCache::remove_local_pool_meta(int64_t pool_id) { + dout(15) << "pool_id=" << pool_id << dendl; + + std::unique_lock locker(m_lock); + m_local_pool_metas.erase(pool_id); +} + +int PoolMetaCache::get_remote_pool_meta( + int64_t pool_id, + RemotePoolMeta* remote_pool_meta) const { + dout(15) << "pool_id=" << pool_id << dendl; + + std::shared_lock locker{m_lock}; + auto it = m_remote_pool_metas.find(pool_id); + if (it == m_remote_pool_metas.end()) { + return -ENOENT; + } + + *remote_pool_meta = it->second; + return 0; +} + +void PoolMetaCache::set_remote_pool_meta( + int64_t pool_id, + const RemotePoolMeta& remote_pool_meta) { + dout(15) << "pool_id=" << pool_id << ", " + << "remote_pool_meta=" << remote_pool_meta << dendl; + + std::unique_lock locker(m_lock); + m_remote_pool_metas[pool_id] = remote_pool_meta; +} + +void PoolMetaCache::remove_remote_pool_meta(int64_t pool_id) { + dout(15) << "pool_id=" << pool_id << dendl; + + std::unique_lock locker(m_lock); + m_remote_pool_metas.erase(pool_id); +} + +} // namespace mirror +} // namespace rbd diff --git a/src/tools/rbd_mirror/PoolMetaCache.h b/src/tools/rbd_mirror/PoolMetaCache.h new file mode 100644 index 000000000..f0440120f --- /dev/null +++ b/src/tools/rbd_mirror/PoolMetaCache.h @@ -0,0 +1,47 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_POOL_META_CACHE_H +#define CEPH_RBD_MIRROR_POOL_META_CACHE_H + +#include "include/int_types.h" +#include "common/ceph_mutex.h" +#include "tools/rbd_mirror/Types.h" +#include <map> + +namespace rbd { +namespace mirror { + +class PoolMetaCache { +public: + PoolMetaCache(CephContext* cct) + : m_cct(cct) { + } + PoolMetaCache(const PoolMetaCache&) = delete; + PoolMetaCache& operator=(const PoolMetaCache&) = delete; + + int get_local_pool_meta(int64_t pool_id, + LocalPoolMeta* local_pool_meta) const; + void set_local_pool_meta(int64_t pool_id, + const LocalPoolMeta& local_pool_meta); + void remove_local_pool_meta(int64_t pool_id); + + int get_remote_pool_meta(int64_t pool_id, + RemotePoolMeta* remote_pool_meta) const; + void set_remote_pool_meta(int64_t pool_id, + const RemotePoolMeta& remote_pool_meta); + void remove_remote_pool_meta(int64_t pool_id); + +private: + CephContext* m_cct; + + mutable ceph::shared_mutex m_lock = + ceph::make_shared_mutex("rbd::mirror::PoolMetaCache::m_lock"); + std::map<int64_t, LocalPoolMeta> m_local_pool_metas; + std::map<int64_t, RemotePoolMeta> m_remote_pool_metas; +}; + +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_POOL_META_CACHE_H diff --git a/src/tools/rbd_mirror/PoolReplayer.cc b/src/tools/rbd_mirror/PoolReplayer.cc new file mode 100644 index 000000000..8a04219da --- /dev/null +++ b/src/tools/rbd_mirror/PoolReplayer.cc @@ -0,0 +1,1110 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "PoolReplayer.h" +#include "common/Cond.h" +#include "common/Formatter.h" +#include "common/admin_socket.h" +#include "common/ceph_argparse.h" +#include "common/code_environment.h" +#include "common/common_init.h" +#include "common/debug.h" +#include "common/errno.h" +#include "cls/rbd/cls_rbd_client.h" +#include "global/global_context.h" +#include "librbd/api/Config.h" +#include "librbd/api/Namespace.h" +#include "PoolMetaCache.h" +#include "RemotePoolPoller.h" +#include "ServiceDaemon.h" +#include "Threads.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::PoolReplayer: " \ + << this << " " << __func__ << ": " + +namespace rbd { +namespace mirror { + +using ::operator<<; + +namespace { + +const std::string SERVICE_DAEMON_INSTANCE_ID_KEY("instance_id"); +const std::string SERVICE_DAEMON_LEADER_KEY("leader"); + +const std::vector<std::string> UNIQUE_PEER_CONFIG_KEYS { + {"monmap", "mon_host", "mon_dns_srv_name", "key", "keyfile", "keyring"}}; + +template <typename I> +class PoolReplayerAdminSocketCommand { +public: + PoolReplayerAdminSocketCommand(PoolReplayer<I> *pool_replayer) + : pool_replayer(pool_replayer) { + } + virtual ~PoolReplayerAdminSocketCommand() {} + virtual int call(Formatter *f) = 0; +protected: + PoolReplayer<I> *pool_replayer; +}; + +template <typename I> +class StatusCommand : public PoolReplayerAdminSocketCommand<I> { +public: + explicit StatusCommand(PoolReplayer<I> *pool_replayer) + : PoolReplayerAdminSocketCommand<I>(pool_replayer) { + } + + int call(Formatter *f) override { + this->pool_replayer->print_status(f); + return 0; + } +}; + +template <typename I> +class StartCommand : public PoolReplayerAdminSocketCommand<I> { +public: + explicit StartCommand(PoolReplayer<I> *pool_replayer) + : PoolReplayerAdminSocketCommand<I>(pool_replayer) { + } + + int call(Formatter *f) override { + this->pool_replayer->start(); + return 0; + } +}; + +template <typename I> +class StopCommand : public PoolReplayerAdminSocketCommand<I> { +public: + explicit StopCommand(PoolReplayer<I> *pool_replayer) + : PoolReplayerAdminSocketCommand<I>(pool_replayer) { + } + + int call(Formatter *f) override { + this->pool_replayer->stop(true); + return 0; + } +}; + +template <typename I> +class RestartCommand : public PoolReplayerAdminSocketCommand<I> { +public: + explicit RestartCommand(PoolReplayer<I> *pool_replayer) + : PoolReplayerAdminSocketCommand<I>(pool_replayer) { + } + + int call(Formatter *f) override { + this->pool_replayer->restart(); + return 0; + } +}; + +template <typename I> +class FlushCommand : public PoolReplayerAdminSocketCommand<I> { +public: + explicit FlushCommand(PoolReplayer<I> *pool_replayer) + : PoolReplayerAdminSocketCommand<I>(pool_replayer) { + } + + int call(Formatter *f) override { + this->pool_replayer->flush(); + return 0; + } +}; + +template <typename I> +class LeaderReleaseCommand : public PoolReplayerAdminSocketCommand<I> { +public: + explicit LeaderReleaseCommand(PoolReplayer<I> *pool_replayer) + : PoolReplayerAdminSocketCommand<I>(pool_replayer) { + } + + int call(Formatter *f) override { + this->pool_replayer->release_leader(); + return 0; + } +}; + +template <typename I> +class PoolReplayerAdminSocketHook : public AdminSocketHook { +public: + PoolReplayerAdminSocketHook(CephContext *cct, const std::string &name, + PoolReplayer<I> *pool_replayer) + : admin_socket(cct->get_admin_socket()) { + std::string command; + int r; + + command = "rbd mirror status " + name; + r = admin_socket->register_command(command, this, + "get status for rbd mirror " + name); + if (r == 0) { + commands[command] = new StatusCommand<I>(pool_replayer); + } + + command = "rbd mirror start " + name; + r = admin_socket->register_command(command, this, + "start rbd mirror " + name); + if (r == 0) { + commands[command] = new StartCommand<I>(pool_replayer); + } + + command = "rbd mirror stop " + name; + r = admin_socket->register_command(command, this, + "stop rbd mirror " + name); + if (r == 0) { + commands[command] = new StopCommand<I>(pool_replayer); + } + + command = "rbd mirror restart " + name; + r = admin_socket->register_command(command, this, + "restart rbd mirror " + name); + if (r == 0) { + commands[command] = new RestartCommand<I>(pool_replayer); + } + + command = "rbd mirror flush " + name; + r = admin_socket->register_command(command, this, + "flush rbd mirror " + name); + if (r == 0) { + commands[command] = new FlushCommand<I>(pool_replayer); + } + + command = "rbd mirror leader release " + name; + r = admin_socket->register_command(command, this, + "release rbd mirror leader " + name); + if (r == 0) { + commands[command] = new LeaderReleaseCommand<I>(pool_replayer); + } + } + + ~PoolReplayerAdminSocketHook() override { + (void)admin_socket->unregister_commands(this); + for (auto i = commands.begin(); i != commands.end(); ++i) { + delete i->second; + } + } + + int call(std::string_view command, const cmdmap_t& cmdmap, + const bufferlist&, + Formatter *f, + std::ostream& ss, + bufferlist& out) override { + auto i = commands.find(command); + ceph_assert(i != commands.end()); + return i->second->call(f); + } + +private: + typedef std::map<std::string, PoolReplayerAdminSocketCommand<I>*, + std::less<>> Commands; + + AdminSocket *admin_socket; + Commands commands; +}; + +} // anonymous namespace + +template <typename I> +struct PoolReplayer<I>::RemotePoolPollerListener + : public remote_pool_poller::Listener { + + PoolReplayer<I>* m_pool_replayer; + + RemotePoolPollerListener(PoolReplayer<I>* pool_replayer) + : m_pool_replayer(pool_replayer) { + } + + void handle_updated(const RemotePoolMeta& remote_pool_meta) override { + m_pool_replayer->handle_remote_pool_meta_updated(remote_pool_meta); + } +}; + +template <typename I> +PoolReplayer<I>::PoolReplayer( + Threads<I> *threads, ServiceDaemon<I> *service_daemon, + journal::CacheManagerHandler *cache_manager_handler, + PoolMetaCache* pool_meta_cache, int64_t local_pool_id, + const PeerSpec &peer, const std::vector<const char*> &args) : + m_threads(threads), + m_service_daemon(service_daemon), + m_cache_manager_handler(cache_manager_handler), + m_pool_meta_cache(pool_meta_cache), + m_local_pool_id(local_pool_id), + m_peer(peer), + m_args(args), + m_lock(ceph::make_mutex("rbd::mirror::PoolReplayer " + stringify(peer))), + m_pool_replayer_thread(this), + m_leader_listener(this) { +} + +template <typename I> +PoolReplayer<I>::~PoolReplayer() +{ + shut_down(); + + ceph_assert(m_asok_hook == nullptr); +} + +template <typename I> +bool PoolReplayer<I>::is_blocklisted() const { + std::lock_guard locker{m_lock}; + return m_blocklisted; +} + +template <typename I> +bool PoolReplayer<I>::is_leader() const { + std::lock_guard locker{m_lock}; + return m_leader_watcher && m_leader_watcher->is_leader(); +} + +template <typename I> +bool PoolReplayer<I>::is_running() const { + return m_pool_replayer_thread.is_started() && !m_stopping; +} + +template <typename I> +void PoolReplayer<I>::init(const std::string& site_name) { + std::lock_guard locker{m_lock}; + + ceph_assert(!m_pool_replayer_thread.is_started()); + + // reset state + m_stopping = false; + m_blocklisted = false; + m_site_name = site_name; + + dout(10) << "replaying for " << m_peer << dendl; + int r = init_rados(g_ceph_context->_conf->cluster, + g_ceph_context->_conf->name.to_str(), + "", "", "local cluster", &m_local_rados, false); + if (r < 0) { + m_callout_id = m_service_daemon->add_or_update_callout( + m_local_pool_id, m_callout_id, service_daemon::CALLOUT_LEVEL_ERROR, + "unable to connect to local cluster"); + return; + } + + r = init_rados(m_peer.cluster_name, m_peer.client_name, + m_peer.mon_host, m_peer.key, + std::string("remote peer ") + stringify(m_peer), + &m_remote_rados, true); + if (r < 0) { + m_callout_id = m_service_daemon->add_or_update_callout( + m_local_pool_id, m_callout_id, service_daemon::CALLOUT_LEVEL_ERROR, + "unable to connect to remote cluster"); + return; + } + + r = m_local_rados->ioctx_create2(m_local_pool_id, m_local_io_ctx); + if (r < 0) { + derr << "error accessing local pool " << m_local_pool_id << ": " + << cpp_strerror(r) << dendl; + return; + } + + auto cct = reinterpret_cast<CephContext *>(m_local_io_ctx.cct()); + librbd::api::Config<I>::apply_pool_overrides(m_local_io_ctx, &cct->_conf); + + r = librbd::cls_client::mirror_uuid_get(&m_local_io_ctx, + &m_local_mirror_uuid); + if (r < 0) { + derr << "failed to retrieve local mirror uuid from pool " + << m_local_io_ctx.get_pool_name() << ": " << cpp_strerror(r) << dendl; + m_callout_id = m_service_daemon->add_or_update_callout( + m_local_pool_id, m_callout_id, service_daemon::CALLOUT_LEVEL_ERROR, + "unable to query local mirror uuid"); + return; + } + + r = m_remote_rados->ioctx_create(m_local_io_ctx.get_pool_name().c_str(), + m_remote_io_ctx); + if (r < 0) { + derr << "error accessing remote pool " << m_local_io_ctx.get_pool_name() + << ": " << cpp_strerror(r) << dendl; + m_callout_id = m_service_daemon->add_or_update_callout( + m_local_pool_id, m_callout_id, service_daemon::CALLOUT_LEVEL_WARNING, + "unable to access remote pool"); + return; + } + + dout(10) << "connected to " << m_peer << dendl; + + m_image_sync_throttler.reset( + Throttler<I>::create(cct, "rbd_mirror_concurrent_image_syncs")); + + m_image_deletion_throttler.reset( + Throttler<I>::create(cct, "rbd_mirror_concurrent_image_deletions")); + + m_remote_pool_poller_listener.reset(new RemotePoolPollerListener(this)); + m_remote_pool_poller.reset(RemotePoolPoller<I>::create( + m_threads, m_remote_io_ctx, m_site_name, m_local_mirror_uuid, + *m_remote_pool_poller_listener)); + + C_SaferCond on_pool_poller_init; + m_remote_pool_poller->init(&on_pool_poller_init); + r = on_pool_poller_init.wait(); + if (r < 0) { + derr << "failed to initialize remote pool poller: " << cpp_strerror(r) + << dendl; + m_callout_id = m_service_daemon->add_or_update_callout( + m_local_pool_id, m_callout_id, service_daemon::CALLOUT_LEVEL_ERROR, + "unable to initialize remote pool poller"); + m_remote_pool_poller.reset(); + return; + } + ceph_assert(!m_remote_pool_meta.mirror_uuid.empty()); + m_pool_meta_cache->set_remote_pool_meta( + m_remote_io_ctx.get_id(), m_remote_pool_meta); + m_pool_meta_cache->set_local_pool_meta( + m_local_io_ctx.get_id(), {m_local_mirror_uuid}); + + m_default_namespace_replayer.reset(NamespaceReplayer<I>::create( + "", m_local_io_ctx, m_remote_io_ctx, m_local_mirror_uuid, m_peer.uuid, + m_remote_pool_meta, m_threads, m_image_sync_throttler.get(), + m_image_deletion_throttler.get(), m_service_daemon, + m_cache_manager_handler, m_pool_meta_cache)); + + C_SaferCond on_init; + m_default_namespace_replayer->init(&on_init); + r = on_init.wait(); + if (r < 0) { + derr << "error initializing default namespace replayer: " << cpp_strerror(r) + << dendl; + m_callout_id = m_service_daemon->add_or_update_callout( + m_local_pool_id, m_callout_id, service_daemon::CALLOUT_LEVEL_ERROR, + "unable to initialize default namespace replayer"); + m_default_namespace_replayer.reset(); + return; + } + + m_leader_watcher.reset(LeaderWatcher<I>::create(m_threads, m_local_io_ctx, + &m_leader_listener)); + r = m_leader_watcher->init(); + if (r < 0) { + derr << "error initializing leader watcher: " << cpp_strerror(r) << dendl; + m_callout_id = m_service_daemon->add_or_update_callout( + m_local_pool_id, m_callout_id, service_daemon::CALLOUT_LEVEL_ERROR, + "unable to initialize leader messenger object"); + m_leader_watcher.reset(); + return; + } + + if (m_callout_id != service_daemon::CALLOUT_ID_NONE) { + m_service_daemon->remove_callout(m_local_pool_id, m_callout_id); + m_callout_id = service_daemon::CALLOUT_ID_NONE; + } + + m_service_daemon->add_or_update_attribute( + m_local_io_ctx.get_id(), SERVICE_DAEMON_INSTANCE_ID_KEY, + stringify(m_local_io_ctx.get_instance_id())); + + m_pool_replayer_thread.create("pool replayer"); +} + +template <typename I> +void PoolReplayer<I>::shut_down() { + { + std::lock_guard l{m_lock}; + m_stopping = true; + m_cond.notify_all(); + } + if (m_pool_replayer_thread.is_started()) { + m_pool_replayer_thread.join(); + } + + if (m_leader_watcher) { + m_leader_watcher->shut_down(); + } + m_leader_watcher.reset(); + + if (m_default_namespace_replayer) { + C_SaferCond on_shut_down; + m_default_namespace_replayer->shut_down(&on_shut_down); + on_shut_down.wait(); + } + m_default_namespace_replayer.reset(); + + if (m_remote_pool_poller) { + C_SaferCond ctx; + m_remote_pool_poller->shut_down(&ctx); + ctx.wait(); + + m_pool_meta_cache->remove_remote_pool_meta(m_remote_io_ctx.get_id()); + m_pool_meta_cache->remove_local_pool_meta(m_local_io_ctx.get_id()); + } + m_remote_pool_poller.reset(); + m_remote_pool_poller_listener.reset(); + + m_image_sync_throttler.reset(); + m_image_deletion_throttler.reset(); + + m_local_rados.reset(); + m_remote_rados.reset(); +} + +template <typename I> +int PoolReplayer<I>::init_rados(const std::string &cluster_name, + const std::string &client_name, + const std::string &mon_host, + const std::string &key, + const std::string &description, + RadosRef *rados_ref, + bool strip_cluster_overrides) { + // NOTE: manually bootstrap a CephContext here instead of via + // the librados API to avoid mixing global singletons between + // the librados shared library and the daemon + // TODO: eliminate intermingling of global singletons within Ceph APIs + CephInitParameters iparams(CEPH_ENTITY_TYPE_CLIENT); + if (client_name.empty() || !iparams.name.from_str(client_name)) { + derr << "error initializing cluster handle for " << description << dendl; + return -EINVAL; + } + + CephContext *cct = common_preinit(iparams, CODE_ENVIRONMENT_LIBRARY, + CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS); + cct->_conf->cluster = cluster_name; + + // librados::Rados::conf_read_file + int r = cct->_conf.parse_config_files(nullptr, nullptr, 0); + if (r < 0 && r != -ENOENT) { + // do not treat this as fatal, it might still be able to connect + derr << "could not read ceph conf for " << description << ": " + << cpp_strerror(r) << dendl; + } + + // preserve cluster-specific config settings before applying environment/cli + // overrides + std::map<std::string, std::string> config_values; + if (strip_cluster_overrides) { + // remote peer connections shouldn't apply cluster-specific + // configuration settings + for (auto& key : UNIQUE_PEER_CONFIG_KEYS) { + config_values[key] = cct->_conf.get_val<std::string>(key); + } + } + + cct->_conf.parse_env(cct->get_module_type()); + + // librados::Rados::conf_parse_env + std::vector<const char*> args; + r = cct->_conf.parse_argv(args); + if (r < 0) { + derr << "could not parse environment for " << description << ":" + << cpp_strerror(r) << dendl; + cct->put(); + return r; + } + cct->_conf.parse_env(cct->get_module_type()); + + if (!m_args.empty()) { + // librados::Rados::conf_parse_argv + args = m_args; + r = cct->_conf.parse_argv(args); + if (r < 0) { + derr << "could not parse command line args for " << description << ": " + << cpp_strerror(r) << dendl; + cct->put(); + return r; + } + } + + if (strip_cluster_overrides) { + // remote peer connections shouldn't apply cluster-specific + // configuration settings + for (auto& pair : config_values) { + auto value = cct->_conf.get_val<std::string>(pair.first); + if (pair.second != value) { + dout(0) << "reverting global config option override: " + << pair.first << ": " << value << " -> " << pair.second + << dendl; + cct->_conf.set_val_or_die(pair.first, pair.second); + } + } + } + + if (!g_ceph_context->_conf->admin_socket.empty()) { + cct->_conf.set_val_or_die("admin_socket", + "$run_dir/$name.$pid.$cluster.$cctid.asok"); + } + + if (!mon_host.empty()) { + r = cct->_conf.set_val("mon_host", mon_host); + if (r < 0) { + derr << "failed to set mon_host config for " << description << ": " + << cpp_strerror(r) << dendl; + cct->put(); + return r; + } + } + + if (!key.empty()) { + r = cct->_conf.set_val("key", key); + if (r < 0) { + derr << "failed to set key config for " << description << ": " + << cpp_strerror(r) << dendl; + cct->put(); + return r; + } + } + + // disable unnecessary librbd cache + cct->_conf.set_val_or_die("rbd_cache", "false"); + cct->_conf.apply_changes(nullptr); + cct->_conf.complain_about_parse_error(cct); + + rados_ref->reset(new librados::Rados()); + + r = (*rados_ref)->init_with_context(cct); + ceph_assert(r == 0); + cct->put(); + + r = (*rados_ref)->connect(); + if (r < 0) { + derr << "error connecting to " << description << ": " + << cpp_strerror(r) << dendl; + return r; + } + + return 0; +} + +template <typename I> +void PoolReplayer<I>::run() { + dout(20) << dendl; + + while (true) { + std::string asok_hook_name = m_local_io_ctx.get_pool_name() + " " + + m_peer.cluster_name; + if (m_asok_hook_name != asok_hook_name || m_asok_hook == nullptr) { + m_asok_hook_name = asok_hook_name; + delete m_asok_hook; + + m_asok_hook = new PoolReplayerAdminSocketHook<I>(g_ceph_context, + m_asok_hook_name, this); + } + + with_namespace_replayers([this]() { update_namespace_replayers(); }); + + std::unique_lock locker{m_lock}; + + if (m_leader_watcher->is_blocklisted() || + m_default_namespace_replayer->is_blocklisted()) { + m_blocklisted = true; + m_stopping = true; + } + + for (auto &it : m_namespace_replayers) { + if (it.second->is_blocklisted()) { + m_blocklisted = true; + m_stopping = true; + break; + } + } + + if (m_stopping) { + break; + } + + auto seconds = g_ceph_context->_conf.get_val<uint64_t>( + "rbd_mirror_pool_replayers_refresh_interval"); + m_cond.wait_for(locker, ceph::make_timespan(seconds)); + } + + // shut down namespace replayers + with_namespace_replayers([this]() { update_namespace_replayers(); }); + + delete m_asok_hook; + m_asok_hook = nullptr; +} + +template <typename I> +void PoolReplayer<I>::update_namespace_replayers() { + dout(20) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + + std::set<std::string> mirroring_namespaces; + if (!m_stopping) { + int r = list_mirroring_namespaces(&mirroring_namespaces); + if (r < 0) { + return; + } + } + + auto cct = reinterpret_cast<CephContext *>(m_local_io_ctx.cct()); + C_SaferCond cond; + auto gather_ctx = new C_Gather(cct, &cond); + for (auto it = m_namespace_replayers.begin(); + it != m_namespace_replayers.end(); ) { + auto iter = mirroring_namespaces.find(it->first); + if (iter == mirroring_namespaces.end()) { + auto namespace_replayer = it->second; + auto on_shut_down = new LambdaContext( + [namespace_replayer, ctx=gather_ctx->new_sub()](int r) { + delete namespace_replayer; + ctx->complete(r); + }); + m_service_daemon->remove_namespace(m_local_pool_id, it->first); + namespace_replayer->shut_down(on_shut_down); + it = m_namespace_replayers.erase(it); + } else { + mirroring_namespaces.erase(iter); + it++; + } + } + + for (auto &name : mirroring_namespaces) { + auto namespace_replayer = NamespaceReplayer<I>::create( + name, m_local_io_ctx, m_remote_io_ctx, m_local_mirror_uuid, m_peer.uuid, + m_remote_pool_meta, m_threads, m_image_sync_throttler.get(), + m_image_deletion_throttler.get(), m_service_daemon, + m_cache_manager_handler, m_pool_meta_cache); + auto on_init = new LambdaContext( + [this, namespace_replayer, name, &mirroring_namespaces, + ctx=gather_ctx->new_sub()](int r) { + std::lock_guard locker{m_lock}; + if (r < 0) { + derr << "failed to initialize namespace replayer for namespace " + << name << ": " << cpp_strerror(r) << dendl; + delete namespace_replayer; + mirroring_namespaces.erase(name); + } else { + m_namespace_replayers[name] = namespace_replayer; + m_service_daemon->add_namespace(m_local_pool_id, name); + } + ctx->complete(r); + }); + namespace_replayer->init(on_init); + } + + gather_ctx->activate(); + + m_lock.unlock(); + cond.wait(); + m_lock.lock(); + + if (m_leader) { + C_SaferCond acquire_cond; + auto acquire_gather_ctx = new C_Gather(cct, &acquire_cond); + + for (auto &name : mirroring_namespaces) { + namespace_replayer_acquire_leader(name, acquire_gather_ctx->new_sub()); + } + acquire_gather_ctx->activate(); + + m_lock.unlock(); + acquire_cond.wait(); + m_lock.lock(); + + std::vector<std::string> instance_ids; + m_leader_watcher->list_instances(&instance_ids); + + for (auto &name : mirroring_namespaces) { + auto it = m_namespace_replayers.find(name); + if (it == m_namespace_replayers.end()) { + // acuire leader for this namespace replayer failed + continue; + } + it->second->handle_instances_added(instance_ids); + } + } else { + std::string leader_instance_id; + if (m_leader_watcher->get_leader_instance_id(&leader_instance_id)) { + for (auto &name : mirroring_namespaces) { + m_namespace_replayers[name]->handle_update_leader(leader_instance_id); + } + } + } +} + +template <typename I> +int PoolReplayer<I>::list_mirroring_namespaces( + std::set<std::string> *namespaces) { + ceph_assert(ceph_mutex_is_locked(m_lock)); + + std::vector<std::string> names; + + int r = librbd::api::Namespace<I>::list(m_local_io_ctx, &names); + if (r < 0) { + derr << "failed to list namespaces: " << cpp_strerror(r) << dendl; + return r; + } + + for (auto &name : names) { + cls::rbd::MirrorMode mirror_mode = cls::rbd::MIRROR_MODE_DISABLED; + int r = librbd::cls_client::mirror_mode_get(&m_local_io_ctx, &mirror_mode); + if (r < 0 && r != -ENOENT) { + derr << "failed to get namespace mirror mode: " << cpp_strerror(r) + << dendl; + if (m_namespace_replayers.count(name) == 0) { + continue; + } + } else if (mirror_mode == cls::rbd::MIRROR_MODE_DISABLED) { + dout(10) << "mirroring is disabled for namespace " << name << dendl; + continue; + } + + namespaces->insert(name); + } + + return 0; +} + +template <typename I> +void PoolReplayer<I>::reopen_logs() +{ + std::lock_guard locker{m_lock}; + + if (m_local_rados) { + reinterpret_cast<CephContext *>(m_local_rados->cct())->reopen_logs(); + } + if (m_remote_rados) { + reinterpret_cast<CephContext *>(m_remote_rados->cct())->reopen_logs(); + } +} + +template <typename I> +void PoolReplayer<I>::namespace_replayer_acquire_leader(const std::string &name, + Context *on_finish) { + ceph_assert(ceph_mutex_is_locked(m_lock)); + + auto it = m_namespace_replayers.find(name); + ceph_assert(it != m_namespace_replayers.end()); + + on_finish = new LambdaContext( + [this, name, on_finish](int r) { + if (r < 0) { + derr << "failed to handle acquire leader for namespace: " + << name << ": " << cpp_strerror(r) << dendl; + + // remove the namespace replayer -- update_namespace_replayers will + // retry to create it and acquire leader. + + std::lock_guard locker{m_lock}; + + auto namespace_replayer = m_namespace_replayers[name]; + m_namespace_replayers.erase(name); + auto on_shut_down = new LambdaContext( + [namespace_replayer, on_finish](int r) { + delete namespace_replayer; + on_finish->complete(r); + }); + m_service_daemon->remove_namespace(m_local_pool_id, name); + namespace_replayer->shut_down(on_shut_down); + return; + } + on_finish->complete(0); + }); + + it->second->handle_acquire_leader(on_finish); +} + +template <typename I> +void PoolReplayer<I>::print_status(Formatter *f) { + dout(20) << dendl; + + assert(f); + + std::lock_guard l{m_lock}; + + f->open_object_section("pool_replayer_status"); + f->dump_stream("peer") << m_peer; + if (m_local_io_ctx.is_valid()) { + f->dump_string("pool", m_local_io_ctx.get_pool_name()); + f->dump_stream("instance_id") << m_local_io_ctx.get_instance_id(); + } + + std::string state("running"); + if (m_manual_stop) { + state = "stopped (manual)"; + } else if (m_stopping) { + state = "stopped"; + } else if (!is_running()) { + state = "error"; + } + f->dump_string("state", state); + + if (m_leader_watcher) { + std::string leader_instance_id; + m_leader_watcher->get_leader_instance_id(&leader_instance_id); + f->dump_string("leader_instance_id", leader_instance_id); + + bool leader = m_leader_watcher->is_leader(); + f->dump_bool("leader", leader); + if (leader) { + std::vector<std::string> instance_ids; + m_leader_watcher->list_instances(&instance_ids); + f->open_array_section("instances"); + for (auto instance_id : instance_ids) { + f->dump_string("instance_id", instance_id); + } + f->close_section(); // instances + } + } + + if (m_local_rados) { + auto cct = reinterpret_cast<CephContext *>(m_local_rados->cct()); + f->dump_string("local_cluster_admin_socket", + cct->_conf.get_val<std::string>("admin_socket")); + } + if (m_remote_rados) { + auto cct = reinterpret_cast<CephContext *>(m_remote_rados->cct()); + f->dump_string("remote_cluster_admin_socket", + cct->_conf.get_val<std::string>("admin_socket")); + } + + if (m_image_sync_throttler) { + f->open_object_section("sync_throttler"); + m_image_sync_throttler->print_status(f); + f->close_section(); // sync_throttler + } + + if (m_image_deletion_throttler) { + f->open_object_section("deletion_throttler"); + m_image_deletion_throttler->print_status(f); + f->close_section(); // deletion_throttler + } + + if (m_default_namespace_replayer) { + m_default_namespace_replayer->print_status(f); + } + + f->open_array_section("namespaces"); + for (auto &it : m_namespace_replayers) { + f->open_object_section("namespace"); + f->dump_string("name", it.first); + it.second->print_status(f); + f->close_section(); // namespace + } + f->close_section(); // namespaces + + f->close_section(); // pool_replayer_status +} + +template <typename I> +void PoolReplayer<I>::start() { + dout(20) << dendl; + + std::lock_guard l{m_lock}; + + if (m_stopping) { + return; + } + + m_manual_stop = false; + + if (m_default_namespace_replayer) { + m_default_namespace_replayer->start(); + } + for (auto &it : m_namespace_replayers) { + it.second->start(); + } +} + +template <typename I> +void PoolReplayer<I>::stop(bool manual) { + dout(20) << "enter: manual=" << manual << dendl; + + std::lock_guard l{m_lock}; + if (!manual) { + m_stopping = true; + m_cond.notify_all(); + return; + } else if (m_stopping) { + return; + } + + m_manual_stop = true; + + if (m_default_namespace_replayer) { + m_default_namespace_replayer->stop(); + } + for (auto &it : m_namespace_replayers) { + it.second->stop(); + } +} + +template <typename I> +void PoolReplayer<I>::restart() { + dout(20) << dendl; + + std::lock_guard l{m_lock}; + + if (m_stopping) { + return; + } + + if (m_default_namespace_replayer) { + m_default_namespace_replayer->restart(); + } + for (auto &it : m_namespace_replayers) { + it.second->restart(); + } +} + +template <typename I> +void PoolReplayer<I>::flush() { + dout(20) << dendl; + + std::lock_guard l{m_lock}; + + if (m_stopping || m_manual_stop) { + return; + } + + if (m_default_namespace_replayer) { + m_default_namespace_replayer->flush(); + } + for (auto &it : m_namespace_replayers) { + it.second->flush(); + } +} + +template <typename I> +void PoolReplayer<I>::release_leader() { + dout(20) << dendl; + + std::lock_guard l{m_lock}; + + if (m_stopping || !m_leader_watcher) { + return; + } + + m_leader_watcher->release_leader(); +} + +template <typename I> +void PoolReplayer<I>::handle_post_acquire_leader(Context *on_finish) { + dout(20) << dendl; + + with_namespace_replayers( + [this](Context *on_finish) { + dout(10) << "handle_post_acquire_leader" << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + + m_service_daemon->add_or_update_attribute(m_local_pool_id, + SERVICE_DAEMON_LEADER_KEY, + true); + auto ctx = new LambdaContext( + [this, on_finish](int r) { + if (r == 0) { + std::lock_guard locker{m_lock}; + m_leader = true; + } + on_finish->complete(r); + }); + + auto cct = reinterpret_cast<CephContext *>(m_local_io_ctx.cct()); + auto gather_ctx = new C_Gather(cct, ctx); + + m_default_namespace_replayer->handle_acquire_leader( + gather_ctx->new_sub()); + + for (auto &it : m_namespace_replayers) { + namespace_replayer_acquire_leader(it.first, gather_ctx->new_sub()); + } + + gather_ctx->activate(); + }, on_finish); +} + +template <typename I> +void PoolReplayer<I>::handle_pre_release_leader(Context *on_finish) { + dout(20) << dendl; + + with_namespace_replayers( + [this](Context *on_finish) { + dout(10) << "handle_pre_release_leader" << dendl; + + ceph_assert(ceph_mutex_is_locked(m_lock)); + + m_leader = false; + m_service_daemon->remove_attribute(m_local_pool_id, + SERVICE_DAEMON_LEADER_KEY); + + auto cct = reinterpret_cast<CephContext *>(m_local_io_ctx.cct()); + auto gather_ctx = new C_Gather(cct, on_finish); + + m_default_namespace_replayer->handle_release_leader( + gather_ctx->new_sub()); + + for (auto &it : m_namespace_replayers) { + it.second->handle_release_leader(gather_ctx->new_sub()); + } + + gather_ctx->activate(); + }, on_finish); +} + +template <typename I> +void PoolReplayer<I>::handle_update_leader( + const std::string &leader_instance_id) { + dout(10) << "leader_instance_id=" << leader_instance_id << dendl; + + std::lock_guard locker{m_lock}; + + m_default_namespace_replayer->handle_update_leader(leader_instance_id); + + for (auto &it : m_namespace_replayers) { + it.second->handle_update_leader(leader_instance_id); + } +} + +template <typename I> +void PoolReplayer<I>::handle_instances_added( + const std::vector<std::string> &instance_ids) { + dout(5) << "instance_ids=" << instance_ids << dendl; + + std::lock_guard locker{m_lock}; + if (!m_leader_watcher->is_leader()) { + return; + } + + m_default_namespace_replayer->handle_instances_added(instance_ids); + + for (auto &it : m_namespace_replayers) { + it.second->handle_instances_added(instance_ids); + } +} + +template <typename I> +void PoolReplayer<I>::handle_instances_removed( + const std::vector<std::string> &instance_ids) { + dout(5) << "instance_ids=" << instance_ids << dendl; + + std::lock_guard locker{m_lock}; + if (!m_leader_watcher->is_leader()) { + return; + } + + m_default_namespace_replayer->handle_instances_removed(instance_ids); + + for (auto &it : m_namespace_replayers) { + it.second->handle_instances_removed(instance_ids); + } +} + +template <typename I> +void PoolReplayer<I>::handle_remote_pool_meta_updated( + const RemotePoolMeta& remote_pool_meta) { + dout(5) << "remote_pool_meta=" << remote_pool_meta << dendl; + + if (!m_default_namespace_replayer) { + m_remote_pool_meta = remote_pool_meta; + return; + } + + derr << "remote pool metadata updated unexpectedly" << dendl; + std::unique_lock locker{m_lock}; + m_stopping = true; + m_cond.notify_all(); +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::PoolReplayer<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/PoolReplayer.h b/src/tools/rbd_mirror/PoolReplayer.h new file mode 100644 index 000000000..e0fd75377 --- /dev/null +++ b/src/tools/rbd_mirror/PoolReplayer.h @@ -0,0 +1,288 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_POOL_REPLAYER_H +#define CEPH_RBD_MIRROR_POOL_REPLAYER_H + +#include "common/Cond.h" +#include "common/ceph_mutex.h" +#include "include/rados/librados.hpp" +#include "librbd/Utils.h" +#include "librbd/asio/ContextWQ.h" + +#include "tools/rbd_mirror/LeaderWatcher.h" +#include "tools/rbd_mirror/NamespaceReplayer.h" +#include "tools/rbd_mirror/Throttler.h" +#include "tools/rbd_mirror/Types.h" +#include "tools/rbd_mirror/leader_watcher/Types.h" +#include "tools/rbd_mirror/service_daemon/Types.h" + +#include <map> +#include <memory> +#include <string> +#include <vector> + +class AdminSocketHook; + +namespace journal { struct CacheManagerHandler; } + +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { + +template <typename> class RemotePoolPoller; +namespace remote_pool_poller { struct Listener; } + +struct PoolMetaCache; +template <typename> class ServiceDaemon; +template <typename> struct Threads; + + +/** + * Controls mirroring for a single remote cluster. + */ +template <typename ImageCtxT = librbd::ImageCtx> +class PoolReplayer { +public: + PoolReplayer(Threads<ImageCtxT> *threads, + ServiceDaemon<ImageCtxT> *service_daemon, + journal::CacheManagerHandler *cache_manager_handler, + PoolMetaCache* pool_meta_cache, + int64_t local_pool_id, const PeerSpec &peer, + const std::vector<const char*> &args); + ~PoolReplayer(); + PoolReplayer(const PoolReplayer&) = delete; + PoolReplayer& operator=(const PoolReplayer&) = delete; + + bool is_blocklisted() const; + bool is_leader() const; + bool is_running() const; + + void init(const std::string& site_name); + void shut_down(); + + void run(); + + void print_status(Formatter *f); + void start(); + void stop(bool manual); + void restart(); + void flush(); + void release_leader(); + void reopen_logs(); + +private: + /** + * @verbatim + * + * <start> + * | + * v + * INIT + * | + * v + * <follower> <---------------------\ + * . | + * . (leader acquired) | + * v | + * NOTIFY_NAMESPACE_WATCHERS NOTIFY_NAMESPACE_WATCHERS + * | ^ + * v . + * <leader> . + * . . + * . (leader lost / shut down) . + * . . . . . . . . . . . . . . . . + * + * @endverbatim + */ + + struct RemotePoolPollerListener; + + int init_rados(const std::string &cluster_name, + const std::string &client_name, + const std::string &mon_host, + const std::string &key, + const std::string &description, RadosRef *rados_ref, + bool strip_cluster_overrides); + + void update_namespace_replayers(); + int list_mirroring_namespaces(std::set<std::string> *namespaces); + + void namespace_replayer_acquire_leader(const std::string &name, + Context *on_finish); + + void handle_post_acquire_leader(Context *on_finish); + void handle_pre_release_leader(Context *on_finish); + + void handle_update_leader(const std::string &leader_instance_id); + + void handle_instances_added(const std::vector<std::string> &instance_ids); + void handle_instances_removed(const std::vector<std::string> &instance_ids); + + // sync version, executed in the caller thread + template <typename L> + void with_namespace_replayers(L &&callback) { + std::lock_guard locker{m_lock}; + + if (m_namespace_replayers_locked) { + ceph_assert(m_on_namespace_replayers_unlocked == nullptr); + C_SaferCond cond; + m_on_namespace_replayers_unlocked = &cond; + m_lock.unlock(); + cond.wait(); + m_lock.lock(); + } else { + m_namespace_replayers_locked = true; + } + + ceph_assert(m_namespace_replayers_locked); + callback(); // may temporary release the lock + ceph_assert(m_namespace_replayers_locked); + + if (m_on_namespace_replayers_unlocked == nullptr) { + m_namespace_replayers_locked = false; + return; + } + + m_threads->work_queue->queue(m_on_namespace_replayers_unlocked); + m_on_namespace_replayers_unlocked = nullptr; + } + + // async version + template <typename L> + void with_namespace_replayers(L &&callback, Context *on_finish) { + std::lock_guard locker{m_lock}; + + on_finish = librbd::util::create_async_context_callback( + m_threads->work_queue, new LambdaContext( + [this, on_finish](int r) { + { + std::lock_guard locker{m_lock}; + ceph_assert(m_namespace_replayers_locked); + + m_namespace_replayers_locked = false; + + if (m_on_namespace_replayers_unlocked != nullptr) { + m_namespace_replayers_locked = true; + m_threads->work_queue->queue(m_on_namespace_replayers_unlocked); + m_on_namespace_replayers_unlocked = nullptr; + } + } + on_finish->complete(r); + })); + + auto on_lock = new LambdaContext( + [this, callback, on_finish](int) { + std::lock_guard locker{m_lock}; + ceph_assert(m_namespace_replayers_locked); + + callback(on_finish); + }); + + if (m_namespace_replayers_locked) { + ceph_assert(m_on_namespace_replayers_unlocked == nullptr); + m_on_namespace_replayers_unlocked = on_lock; + return; + } + + m_namespace_replayers_locked = true; + m_threads->work_queue->queue(on_lock); + } + + void handle_remote_pool_meta_updated(const RemotePoolMeta& remote_pool_meta); + + Threads<ImageCtxT> *m_threads; + ServiceDaemon<ImageCtxT> *m_service_daemon; + journal::CacheManagerHandler *m_cache_manager_handler; + PoolMetaCache* m_pool_meta_cache; + int64_t m_local_pool_id = -1; + PeerSpec m_peer; + std::vector<const char*> m_args; + + mutable ceph::mutex m_lock; + ceph::condition_variable m_cond; + std::string m_site_name; + bool m_stopping = false; + bool m_manual_stop = false; + bool m_blocklisted = false; + + RadosRef m_local_rados; + RadosRef m_remote_rados; + + librados::IoCtx m_local_io_ctx; + librados::IoCtx m_remote_io_ctx; + + std::string m_local_mirror_uuid; + + RemotePoolMeta m_remote_pool_meta; + std::unique_ptr<remote_pool_poller::Listener> m_remote_pool_poller_listener; + std::unique_ptr<RemotePoolPoller<ImageCtxT>> m_remote_pool_poller; + + std::unique_ptr<NamespaceReplayer<ImageCtxT>> m_default_namespace_replayer; + std::map<std::string, NamespaceReplayer<ImageCtxT> *> m_namespace_replayers; + + std::string m_asok_hook_name; + AdminSocketHook *m_asok_hook = nullptr; + + service_daemon::CalloutId m_callout_id = service_daemon::CALLOUT_ID_NONE; + + bool m_leader = false; + bool m_namespace_replayers_locked = false; + Context *m_on_namespace_replayers_unlocked = nullptr; + + class PoolReplayerThread : public Thread { + PoolReplayer *m_pool_replayer; + public: + PoolReplayerThread(PoolReplayer *pool_replayer) + : m_pool_replayer(pool_replayer) { + } + void *entry() override { + m_pool_replayer->run(); + return 0; + } + } m_pool_replayer_thread; + + class LeaderListener : public leader_watcher::Listener { + public: + LeaderListener(PoolReplayer *pool_replayer) + : m_pool_replayer(pool_replayer) { + } + + protected: + void post_acquire_handler(Context *on_finish) override { + m_pool_replayer->handle_post_acquire_leader(on_finish); + } + + void pre_release_handler(Context *on_finish) override { + m_pool_replayer->handle_pre_release_leader(on_finish); + } + + void update_leader_handler( + const std::string &leader_instance_id) override { + m_pool_replayer->handle_update_leader(leader_instance_id); + } + + void handle_instances_added(const InstanceIds& instance_ids) override { + m_pool_replayer->handle_instances_added(instance_ids); + } + + void handle_instances_removed(const InstanceIds& instance_ids) override { + m_pool_replayer->handle_instances_removed(instance_ids); + } + + private: + PoolReplayer *m_pool_replayer; + } m_leader_listener; + + std::unique_ptr<LeaderWatcher<ImageCtxT>> m_leader_watcher; + std::unique_ptr<Throttler<ImageCtxT>> m_image_sync_throttler; + std::unique_ptr<Throttler<ImageCtxT>> m_image_deletion_throttler; +}; + +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::PoolReplayer<librbd::ImageCtx>; + +#endif // CEPH_RBD_MIRROR_POOL_REPLAYER_H diff --git a/src/tools/rbd_mirror/PoolWatcher.cc b/src/tools/rbd_mirror/PoolWatcher.cc new file mode 100644 index 000000000..bec931cf3 --- /dev/null +++ b/src/tools/rbd_mirror/PoolWatcher.cc @@ -0,0 +1,473 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/PoolWatcher.h" +#include "include/rbd_types.h" +#include "cls/rbd/cls_rbd_client.h" +#include "common/debug.h" +#include "common/errno.h" +#include "common/Timer.h" +#include "librbd/ImageCtx.h" +#include "librbd/internal.h" +#include "librbd/MirroringWatcher.h" +#include "librbd/Utils.h" +#include "librbd/api/Image.h" +#include "librbd/api/Mirror.h" +#include "librbd/asio/ContextWQ.h" +#include "tools/rbd_mirror/Threads.h" +#include "tools/rbd_mirror/pool_watcher/RefreshImagesRequest.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::PoolWatcher: " << this << " " \ + << __func__ << ": " + +using std::list; +using std::string; +using std::unique_ptr; +using std::vector; +using librbd::util::create_context_callback; +using librbd::util::create_rados_callback; + +namespace rbd { +namespace mirror { + +template <typename I> +class PoolWatcher<I>::MirroringWatcher : public librbd::MirroringWatcher<I> { +public: + using ContextWQ = typename std::decay< + typename std::remove_pointer< + decltype(Threads<I>::work_queue)>::type>::type; + + MirroringWatcher(librados::IoCtx &io_ctx, ContextWQ *work_queue, + PoolWatcher *pool_watcher) + : librbd::MirroringWatcher<I>(io_ctx, work_queue), + m_pool_watcher(pool_watcher) { + } + + void handle_rewatch_complete(int r) override { + m_pool_watcher->handle_rewatch_complete(r); + } + + void handle_mode_updated(cls::rbd::MirrorMode mirror_mode) override { + // invalidate all image state and refresh the pool contents + m_pool_watcher->schedule_refresh_images(5); + } + + void handle_image_updated(cls::rbd::MirrorImageState state, + const std::string &image_id, + const std::string &global_image_id) override { + bool enabled = (state == cls::rbd::MIRROR_IMAGE_STATE_ENABLED); + m_pool_watcher->handle_image_updated(image_id, global_image_id, + enabled); + } + +private: + PoolWatcher *m_pool_watcher; +}; + +template <typename I> +PoolWatcher<I>::PoolWatcher(Threads<I> *threads, + librados::IoCtx &io_ctx, + const std::string& mirror_uuid, + pool_watcher::Listener &listener) + : m_threads(threads), + m_io_ctx(io_ctx), + m_mirror_uuid(mirror_uuid), + m_listener(listener), + m_lock(ceph::make_mutex(librbd::util::unique_lock_name( + "rbd::mirror::PoolWatcher", this))) { + m_mirroring_watcher = new MirroringWatcher(m_io_ctx, + m_threads->work_queue, this); +} + +template <typename I> +PoolWatcher<I>::~PoolWatcher() { + delete m_mirroring_watcher; +} + +template <typename I> +bool PoolWatcher<I>::is_blocklisted() const { + std::lock_guard locker{m_lock}; + return m_blocklisted; +} + +template <typename I> +void PoolWatcher<I>::init(Context *on_finish) { + dout(5) << dendl; + + { + std::lock_guard locker{m_lock}; + m_on_init_finish = on_finish; + + ceph_assert(!m_refresh_in_progress); + m_refresh_in_progress = true; + } + + // start async updates for mirror image directory + register_watcher(); +} + +template <typename I> +void PoolWatcher<I>::shut_down(Context *on_finish) { + dout(5) << dendl; + + { + std::scoped_lock locker{m_threads->timer_lock, m_lock}; + + ceph_assert(!m_shutting_down); + m_shutting_down = true; + if (m_timer_ctx != nullptr) { + m_threads->timer->cancel_event(m_timer_ctx); + m_timer_ctx = nullptr; + } + } + + // in-progress unregister tracked as async op + unregister_watcher(); + + m_async_op_tracker.wait_for_ops(on_finish); +} + +template <typename I> +void PoolWatcher<I>::register_watcher() { + { + std::lock_guard locker{m_lock}; + ceph_assert(m_image_ids_invalid); + ceph_assert(m_refresh_in_progress); + } + + // if the watch registration is in-flight, let the watcher + // handle the transition -- only (re-)register if it's not registered + if (!m_mirroring_watcher->is_unregistered()) { + refresh_images(); + return; + } + + // first time registering or the watch failed + dout(5) << dendl; + m_async_op_tracker.start_op(); + + Context *ctx = create_context_callback< + PoolWatcher, &PoolWatcher<I>::handle_register_watcher>(this); + m_mirroring_watcher->register_watch(ctx); +} + +template <typename I> +void PoolWatcher<I>::handle_register_watcher(int r) { + dout(5) << "r=" << r << dendl; + + { + std::lock_guard locker{m_lock}; + ceph_assert(m_image_ids_invalid); + ceph_assert(m_refresh_in_progress); + if (r < 0) { + m_refresh_in_progress = false; + } + } + + Context *on_init_finish = nullptr; + if (r >= 0) { + refresh_images(); + } else if (r == -EBLOCKLISTED) { + dout(0) << "detected client is blocklisted" << dendl; + + std::lock_guard locker{m_lock}; + m_blocklisted = true; + std::swap(on_init_finish, m_on_init_finish); + } else if (r == -ENOENT) { + dout(5) << "mirroring directory does not exist" << dendl; + { + std::lock_guard locker{m_lock}; + std::swap(on_init_finish, m_on_init_finish); + } + + schedule_refresh_images(30); + } else { + derr << "unexpected error registering mirroring directory watch: " + << cpp_strerror(r) << dendl; + schedule_refresh_images(10); + } + + m_async_op_tracker.finish_op(); + if (on_init_finish != nullptr) { + on_init_finish->complete(r); + } +} + +template <typename I> +void PoolWatcher<I>::unregister_watcher() { + dout(5) << dendl; + + m_async_op_tracker.start_op(); + Context *ctx = new LambdaContext([this](int r) { + dout(5) << "unregister_watcher: r=" << r << dendl; + if (r < 0) { + derr << "error unregistering watcher for " + << m_mirroring_watcher->get_oid() << " object: " << cpp_strerror(r) + << dendl; + } + m_async_op_tracker.finish_op(); + }); + + m_mirroring_watcher->unregister_watch(ctx); +} + +template <typename I> +void PoolWatcher<I>::refresh_images() { + dout(5) << dendl; + + { + std::lock_guard locker{m_lock}; + ceph_assert(m_image_ids_invalid); + ceph_assert(m_refresh_in_progress); + + // clear all pending notification events since we need to perform + // a full image list refresh + m_pending_added_image_ids.clear(); + m_pending_removed_image_ids.clear(); + } + + m_async_op_tracker.start_op(); + m_refresh_image_ids.clear(); + Context *ctx = create_context_callback< + PoolWatcher, &PoolWatcher<I>::handle_refresh_images>(this); + auto req = pool_watcher::RefreshImagesRequest<I>::create(m_io_ctx, + &m_refresh_image_ids, + ctx); + req->send(); +} + +template <typename I> +void PoolWatcher<I>::handle_refresh_images(int r) { + dout(5) << "r=" << r << dendl; + + bool deferred_refresh = false; + bool retry_refresh = false; + Context *on_init_finish = nullptr; + { + std::lock_guard locker{m_lock}; + ceph_assert(m_image_ids_invalid); + ceph_assert(m_refresh_in_progress); + m_refresh_in_progress = false; + + if (r == -ENOENT) { + dout(5) << "mirroring directory not found" << dendl; + r = 0; + m_refresh_image_ids.clear(); + } + + if (m_deferred_refresh) { + // need to refresh -- skip the notification + deferred_refresh = true; + } else if (r >= 0) { + m_pending_image_ids = std::move(m_refresh_image_ids); + m_image_ids_invalid = false; + std::swap(on_init_finish, m_on_init_finish); + + schedule_listener(); + } else if (r == -EBLOCKLISTED) { + dout(0) << "detected client is blocklisted during image refresh" << dendl; + + m_blocklisted = true; + std::swap(on_init_finish, m_on_init_finish); + } else { + retry_refresh = true; + } + } + + if (deferred_refresh) { + dout(5) << "scheduling deferred refresh" << dendl; + schedule_refresh_images(0); + } else if (retry_refresh) { + derr << "failed to retrieve mirroring directory: " << cpp_strerror(r) + << dendl; + schedule_refresh_images(10); + } + + m_async_op_tracker.finish_op(); + if (on_init_finish != nullptr) { + on_init_finish->complete(r); + } +} + +template <typename I> +void PoolWatcher<I>::schedule_refresh_images(double interval) { + std::scoped_lock locker{m_threads->timer_lock, m_lock}; + if (m_shutting_down || m_refresh_in_progress || m_timer_ctx != nullptr) { + if (m_refresh_in_progress && !m_deferred_refresh) { + dout(5) << "deferring refresh until in-flight refresh completes" << dendl; + m_deferred_refresh = true; + } + return; + } + + m_image_ids_invalid = true; + m_timer_ctx = m_threads->timer->add_event_after( + interval, + new LambdaContext([this](int r) { + process_refresh_images(); + })); +} + +template <typename I> +void PoolWatcher<I>::handle_rewatch_complete(int r) { + dout(5) << "r=" << r << dendl; + + if (r == -EBLOCKLISTED) { + dout(0) << "detected client is blocklisted" << dendl; + + std::lock_guard locker{m_lock}; + m_blocklisted = true; + return; + } else if (r == -ENOENT) { + dout(5) << "mirroring directory deleted" << dendl; + } else if (r < 0) { + derr << "unexpected error re-registering mirroring directory watch: " + << cpp_strerror(r) << dendl; + } + + schedule_refresh_images(5); +} + +template <typename I> +void PoolWatcher<I>::handle_image_updated(const std::string &id, + const std::string &global_image_id, + bool enabled) { + dout(10) << "image_id=" << id << ", " + << "global_image_id=" << global_image_id << ", " + << "enabled=" << enabled << dendl; + + std::lock_guard locker{m_lock}; + ImageId image_id(global_image_id, id); + m_pending_added_image_ids.erase(image_id); + m_pending_removed_image_ids.erase(image_id); + + if (enabled) { + m_pending_added_image_ids.insert(image_id); + schedule_listener(); + } else { + m_pending_removed_image_ids.insert(image_id); + schedule_listener(); + } +} + +template <typename I> +void PoolWatcher<I>::process_refresh_images() { + ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock)); + ceph_assert(m_timer_ctx != nullptr); + m_timer_ctx = nullptr; + + { + std::lock_guard locker{m_lock}; + ceph_assert(!m_refresh_in_progress); + m_refresh_in_progress = true; + m_deferred_refresh = false; + } + + // execute outside of the timer's lock + m_async_op_tracker.start_op(); + Context *ctx = new LambdaContext([this](int r) { + register_watcher(); + m_async_op_tracker.finish_op(); + }); + m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +void PoolWatcher<I>::schedule_listener() { + ceph_assert(ceph_mutex_is_locked(m_lock)); + m_pending_updates = true; + if (m_shutting_down || m_image_ids_invalid || m_notify_listener_in_progress) { + return; + } + + dout(20) << dendl; + + m_async_op_tracker.start_op(); + Context *ctx = new LambdaContext([this](int r) { + notify_listener(); + m_async_op_tracker.finish_op(); + }); + + m_notify_listener_in_progress = true; + m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +void PoolWatcher<I>::notify_listener() { + dout(10) << dendl; + + std::string mirror_uuid; + ImageIds added_image_ids; + ImageIds removed_image_ids; + { + std::lock_guard locker{m_lock}; + ceph_assert(m_notify_listener_in_progress); + } + + if (!removed_image_ids.empty()) { + m_listener.handle_update(mirror_uuid, {}, std::move(removed_image_ids)); + removed_image_ids.clear(); + } + + { + std::lock_guard locker{m_lock}; + ceph_assert(m_notify_listener_in_progress); + + // if the watch failed while we didn't own the lock, we are going + // to need to perform a full refresh + if (m_image_ids_invalid) { + m_notify_listener_in_progress = false; + return; + } + + // merge add/remove notifications into pending set (a given image + // can only be in one set or another) + for (auto &image_id : m_pending_removed_image_ids) { + dout(20) << "image_id=" << image_id << dendl; + m_pending_image_ids.erase(image_id); + } + + for (auto &image_id : m_pending_added_image_ids) { + dout(20) << "image_id=" << image_id << dendl; + m_pending_image_ids.erase(image_id); + m_pending_image_ids.insert(image_id); + } + m_pending_added_image_ids.clear(); + + // compute added/removed images + for (auto &image_id : m_image_ids) { + auto it = m_pending_image_ids.find(image_id); + if (it == m_pending_image_ids.end() || it->id != image_id.id) { + removed_image_ids.insert(image_id); + } + } + for (auto &image_id : m_pending_image_ids) { + auto it = m_image_ids.find(image_id); + if (it == m_image_ids.end() || it->id != image_id.id) { + added_image_ids.insert(image_id); + } + } + + m_pending_updates = false; + m_image_ids = m_pending_image_ids; + } + + m_listener.handle_update(m_mirror_uuid, std::move(added_image_ids), + std::move(removed_image_ids)); + + { + std::lock_guard locker{m_lock}; + m_notify_listener_in_progress = false; + if (m_pending_updates) { + schedule_listener(); + } + } +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::PoolWatcher<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/PoolWatcher.h b/src/tools/rbd_mirror/PoolWatcher.h new file mode 100644 index 000000000..2905de15f --- /dev/null +++ b/src/tools/rbd_mirror/PoolWatcher.h @@ -0,0 +1,161 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_POOL_WATCHER_H +#define CEPH_RBD_MIRROR_POOL_WATCHER_H + +#include <map> +#include <memory> +#include <set> +#include <string> + +#include "common/AsyncOpTracker.h" +#include "common/ceph_context.h" +#include "common/ceph_mutex.h" +#include "include/rados/librados.hpp" +#include "tools/rbd_mirror/Types.h" +#include <boost/functional/hash.hpp> +#include <boost/optional.hpp> +#include "include/ceph_assert.h" +#include "tools/rbd_mirror/pool_watcher/Types.h" + +namespace librbd { struct ImageCtx; } + +namespace rbd { +namespace mirror { + +template <typename> struct Threads; + +/** + * Keeps track of images that have mirroring enabled within all + * pools. + */ +template <typename ImageCtxT = librbd::ImageCtx> +class PoolWatcher { +public: + static PoolWatcher* create(Threads<ImageCtxT> *threads, + librados::IoCtx &io_ctx, + const std::string& mirror_uuid, + pool_watcher::Listener &listener) { + return new PoolWatcher(threads, io_ctx, mirror_uuid, listener); + } + + PoolWatcher(Threads<ImageCtxT> *threads, + librados::IoCtx &io_ctx, + const std::string& mirror_uuid, + pool_watcher::Listener &listener); + ~PoolWatcher(); + PoolWatcher(const PoolWatcher&) = delete; + PoolWatcher& operator=(const PoolWatcher&) = delete; + + bool is_blocklisted() const; + + void init(Context *on_finish = nullptr); + void shut_down(Context *on_finish); + + inline uint64_t get_image_count() const { + std::lock_guard locker{m_lock}; + return m_image_ids.size(); + } + +private: + /** + * @verbatim + * + * <start> + * | + * v + * INIT + * | + * v + * REGISTER_WATCHER + * | + * |/--------------------------------\ + * | | + * v | + * REFRESH_IMAGES | + * | | + * |/----------------------------\ | + * | | | + * v | | + * NOTIFY_LISTENER | | + * | | | + * v | | + * IDLE ---\ | | + * | | | | + * | |\---> IMAGE_UPDATED | | + * | | | | | + * | | v | | + * | | GET_IMAGE_NAME --/ | + * | | | + * | \----> WATCH_ERROR ---------/ + * v + * SHUT_DOWN + * | + * v + * UNREGISTER_WATCHER + * | + * v + * <finish> + * + * @endverbatim + */ + class MirroringWatcher; + + Threads<ImageCtxT> *m_threads; + librados::IoCtx m_io_ctx; + std::string m_mirror_uuid; + pool_watcher::Listener &m_listener; + + ImageIds m_refresh_image_ids; + bufferlist m_out_bl; + + mutable ceph::mutex m_lock; + + Context *m_on_init_finish = nullptr; + + ImageIds m_image_ids; + + bool m_pending_updates = false; + bool m_notify_listener_in_progress = false; + ImageIds m_pending_image_ids; + ImageIds m_pending_added_image_ids; + ImageIds m_pending_removed_image_ids; + + MirroringWatcher *m_mirroring_watcher; + + Context *m_timer_ctx = nullptr; + + AsyncOpTracker m_async_op_tracker; + bool m_blocklisted = false; + bool m_shutting_down = false; + bool m_image_ids_invalid = true; + bool m_refresh_in_progress = false; + bool m_deferred_refresh = false; + + void register_watcher(); + void handle_register_watcher(int r); + void unregister_watcher(); + + void refresh_images(); + void handle_refresh_images(int r); + + void schedule_refresh_images(double interval); + void process_refresh_images(); + + void handle_rewatch_complete(int r); + void handle_image_updated(const std::string &image_id, + const std::string &global_image_id, + bool enabled); + + void schedule_listener(); + void notify_listener(); + +}; + +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::PoolWatcher<librbd::ImageCtx>; + +#endif // CEPH_RBD_MIRROR_POOL_WATCHER_H diff --git a/src/tools/rbd_mirror/ProgressContext.h b/src/tools/rbd_mirror/ProgressContext.h new file mode 100644 index 000000000..e4430ee6a --- /dev/null +++ b/src/tools/rbd_mirror/ProgressContext.h @@ -0,0 +1,21 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_PROGRESS_CONTEXT_H +#define RBD_MIRROR_PROGRESS_CONTEXT_H + +namespace rbd { +namespace mirror { + +class ProgressContext +{ +public: + virtual ~ProgressContext() {} + virtual void update_progress(const std::string &description, + bool flush = true) = 0; +}; + +} // namespace mirror +} // namespace rbd + +#endif // RBD_MIRROR_PROGRESS_CONTEXT_H diff --git a/src/tools/rbd_mirror/RemotePoolPoller.cc b/src/tools/rbd_mirror/RemotePoolPoller.cc new file mode 100644 index 000000000..8bfb35d4a --- /dev/null +++ b/src/tools/rbd_mirror/RemotePoolPoller.cc @@ -0,0 +1,267 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "RemotePoolPoller.h" +#include "include/ceph_assert.h" +#include "common/debug.h" +#include "common/errno.h" +#include "common/Timer.h" +#include "cls/rbd/cls_rbd_client.h" +#include "librbd/ImageCtx.h" +#include "librbd/Utils.h" +#include "librbd/asio/ContextWQ.h" +#include "tools/rbd_mirror/Threads.h" +#include "tools/rbd_mirror/Types.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::RemotePoolPoller: " << this << " " \ + << __func__ << ": " + +namespace rbd { +namespace mirror { + +static const double POLL_INTERVAL_SECONDS = 30; + +using librbd::util::create_rados_callback; + +template <typename I> +RemotePoolPoller<I>::~RemotePoolPoller() { + ceph_assert(m_timer_task == nullptr); +} + +template <typename I> +void RemotePoolPoller<I>::init(Context* on_finish) { + dout(10) << dendl; + + ceph_assert(m_state == STATE_INITIALIZING); + ceph_assert(m_on_finish == nullptr); + m_on_finish = on_finish; + + get_mirror_uuid(); +} + +template <typename I> +void RemotePoolPoller<I>::shut_down(Context* on_finish) { + dout(10) << dendl; + + std::unique_lock locker(m_threads->timer_lock); + ceph_assert(m_state == STATE_POLLING); + m_state = STATE_SHUTTING_DOWN; + + if (m_timer_task == nullptr) { + // currently executing a poll + ceph_assert(m_on_finish == nullptr); + m_on_finish = on_finish; + return; + } + + m_threads->timer->cancel_event(m_timer_task); + m_timer_task = nullptr; + m_threads->work_queue->queue(on_finish, 0); +} + +template <typename I> +void RemotePoolPoller<I>::get_mirror_uuid() { + dout(10) << dendl; + + librados::ObjectReadOperation op; + librbd::cls_client::mirror_uuid_get_start(&op); + + auto aio_comp = create_rados_callback< + RemotePoolPoller<I>, &RemotePoolPoller<I>::handle_get_mirror_uuid>(this); + m_out_bl.clear(); + int r = m_remote_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op, &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void RemotePoolPoller<I>::handle_get_mirror_uuid(int r) { + dout(10) << "r=" << r << dendl; + std::string remote_mirror_uuid; + if (r >= 0) { + auto it = m_out_bl.cbegin(); + r = librbd::cls_client::mirror_uuid_get_finish(&it, &remote_mirror_uuid); + if (r >= 0 && remote_mirror_uuid.empty()) { + r = -ENOENT; + } + } + + if (r < 0) { + if (r == -ENOENT) { + dout(5) << "remote mirror uuid missing" << dendl; + } else { + derr << "failed to retrieve remote mirror uuid: " << cpp_strerror(r) + << dendl; + } + + m_remote_pool_meta.mirror_uuid = ""; + } + + // if we have the mirror uuid, we will poll until shut down + if (m_state == STATE_INITIALIZING) { + if (r < 0) { + schedule_task(r); + return; + } + + m_state = STATE_POLLING; + } + + dout(10) << "remote_mirror_uuid=" << remote_mirror_uuid << dendl; + if (m_remote_pool_meta.mirror_uuid != remote_mirror_uuid) { + m_remote_pool_meta.mirror_uuid = remote_mirror_uuid; + m_updated = true; + } + + mirror_peer_ping(); +} + +template <typename I> +void RemotePoolPoller<I>::mirror_peer_ping() { + dout(10) << dendl; + + librados::ObjectWriteOperation op; + librbd::cls_client::mirror_peer_ping(&op, m_site_name, m_local_mirror_uuid); + + auto aio_comp = create_rados_callback< + RemotePoolPoller<I>, &RemotePoolPoller<I>::handle_mirror_peer_ping>(this); + int r = m_remote_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void RemotePoolPoller<I>::handle_mirror_peer_ping(int r) { + dout(10) << "r=" << r << dendl; + + if (r == -EOPNOTSUPP) { + // older OSD that doesn't support snaphot-based mirroring, so no need + // to query remote peers + dout(10) << "remote peer does not support snapshot-based mirroring" + << dendl; + notify_listener(); + return; + } else if (r < 0) { + // we can still see if we can perform a peer list and find outselves + derr << "failed to ping remote mirror peer: " << cpp_strerror(r) << dendl; + } + + mirror_peer_list(); +} + +template <typename I> +void RemotePoolPoller<I>::mirror_peer_list() { + dout(10) << dendl; + + librados::ObjectReadOperation op; + librbd::cls_client::mirror_peer_list_start(&op); + + auto aio_comp = create_rados_callback< + RemotePoolPoller<I>, &RemotePoolPoller<I>::handle_mirror_peer_list>(this); + m_out_bl.clear(); + int r = m_remote_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op, &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void RemotePoolPoller<I>::handle_mirror_peer_list(int r) { + dout(10) << "r=" << r << dendl; + + std::vector<cls::rbd::MirrorPeer> peers; + if (r == 0) { + auto iter = m_out_bl.cbegin(); + r = librbd::cls_client::mirror_peer_list_finish(&iter, &peers); + } + + if (r < 0) { + derr << "failed to retrieve mirror peers: " << cpp_strerror(r) << dendl; + } + + cls::rbd::MirrorPeer* matched_peer = nullptr; + for (auto& peer : peers) { + if (peer.mirror_peer_direction == cls::rbd::MIRROR_PEER_DIRECTION_RX) { + continue; + } + + if (peer.mirror_uuid == m_local_mirror_uuid) { + matched_peer = &peer; + break; + } else if (peer.site_name == m_site_name) { + // keep searching in case we hit an exact match by fsid + matched_peer = &peer; + } + } + + // older OSDs don't support peer ping so we might fail to find a match, + // which will prevent snapshot mirroring from functioning + std::string remote_mirror_peer_uuid; + if (matched_peer != nullptr) { + remote_mirror_peer_uuid = matched_peer->uuid; + } + + dout(10) << "remote_mirror_peer_uuid=" << remote_mirror_peer_uuid << dendl; + if (m_remote_pool_meta.mirror_peer_uuid != remote_mirror_peer_uuid) { + m_remote_pool_meta.mirror_peer_uuid = remote_mirror_peer_uuid; + m_updated = true; + } + + notify_listener(); +} + +template <typename I> +void RemotePoolPoller<I>::notify_listener() { + bool updated = false; + std::swap(updated, m_updated); + if (updated) { + dout(10) << dendl; + m_listener.handle_updated(m_remote_pool_meta); + } + + schedule_task(0); +} + +template <typename I> +void RemotePoolPoller<I>::schedule_task(int r) { + std::unique_lock locker{m_threads->timer_lock}; + + if (m_state == STATE_POLLING) { + dout(10) << dendl; + + ceph_assert(m_timer_task == nullptr); + m_timer_task = new LambdaContext([this](int) { + handle_task(); + }); + + m_threads->timer->add_event_after(POLL_INTERVAL_SECONDS, m_timer_task); + } + + // finish init or shut down callback + if (m_on_finish != nullptr) { + locker.unlock(); + Context* on_finish = nullptr; + std::swap(on_finish, m_on_finish); + on_finish->complete(m_state == STATE_SHUTTING_DOWN ? 0 : r); + } +} + +template <typename I> +void RemotePoolPoller<I>::handle_task() { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked_by_me(m_threads->timer_lock)); + m_timer_task = nullptr; + + auto ctx = new LambdaContext([this](int) { + get_mirror_uuid(); + }); + m_threads->work_queue->queue(ctx); +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::RemotePoolPoller<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/RemotePoolPoller.h b/src/tools/rbd_mirror/RemotePoolPoller.h new file mode 100644 index 000000000..19d803ca1 --- /dev/null +++ b/src/tools/rbd_mirror/RemotePoolPoller.h @@ -0,0 +1,133 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_REMOTE_POOL_POLLER_H +#define CEPH_RBD_MIRROR_REMOTE_POOL_POLLER_H + +#include "include/rados/librados.hpp" +#include "tools/rbd_mirror/Types.h" +#include <string> + +struct Context; +namespace librbd { struct ImageCtx; } + +namespace rbd { +namespace mirror { + +template <typename> struct Threads; + +namespace remote_pool_poller { + +struct Listener { + virtual ~Listener() {} + + virtual void handle_updated(const RemotePoolMeta& remote_pool_meta) = 0; +}; + +}; // namespace remote_pool_poller + +template <typename ImageCtxT> +class RemotePoolPoller { +public: + static RemotePoolPoller* create( + Threads<ImageCtxT>* threads, + librados::IoCtx& remote_io_ctx, + const std::string& site_name, + const std::string& local_mirror_uuid, + remote_pool_poller::Listener& listener) { + return new RemotePoolPoller(threads, remote_io_ctx, site_name, + local_mirror_uuid, listener); + } + + RemotePoolPoller( + Threads<ImageCtxT>* threads, + librados::IoCtx& remote_io_ctx, + const std::string& site_name, + const std::string& local_mirror_uuid, + remote_pool_poller::Listener& listener) + : m_threads(threads), + m_remote_io_ctx(remote_io_ctx), + m_site_name(site_name), + m_local_mirror_uuid(local_mirror_uuid), + m_listener(listener) { + } + ~RemotePoolPoller(); + + void init(Context* on_finish); + void shut_down(Context* on_finish); + +private: + /** + * @verbatim + * + * <start> + * | + * |/----------------------------\ + * | | + * v | + * MIRROR_UUID_GET | + * | | + * v | + * MIRROR_PEER_PING | + * | | + * v | + * MIRROR_PEER_LIST | + * | | + * v | + * MIRROR_UUID_GET | + * | | + * v (skip if no changes) | + * NOTIFY_LISTENER | + * | | + * | (repeat periodically) | + * |\----------------------------/ + * | + * v + * <finish> + * + * @endverbatim + */ + + enum State { + STATE_INITIALIZING, + STATE_POLLING, + STATE_SHUTTING_DOWN + }; + + Threads<ImageCtxT>* m_threads; + librados::IoCtx& m_remote_io_ctx; + std::string m_site_name; + std::string m_local_mirror_uuid; + remote_pool_poller::Listener& m_listener; + + bufferlist m_out_bl; + + RemotePoolMeta m_remote_pool_meta; + bool m_updated = false; + + State m_state = STATE_INITIALIZING; + Context* m_timer_task = nullptr; + Context* m_on_finish = nullptr; + + void get_mirror_uuid(); + void handle_get_mirror_uuid(int r); + + void mirror_peer_ping(); + void handle_mirror_peer_ping(int r); + + void mirror_peer_list(); + void handle_mirror_peer_list(int r); + + void notify_listener(); + + void schedule_task(int r); + void handle_task(); + +}; + +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::RemotePoolPoller<librbd::ImageCtx>; + +#endif // CEPH_RBD_MIRROR_REMOTE_POOL_POLLER_H diff --git a/src/tools/rbd_mirror/ServiceDaemon.cc b/src/tools/rbd_mirror/ServiceDaemon.cc new file mode 100644 index 000000000..f3cabcc87 --- /dev/null +++ b/src/tools/rbd_mirror/ServiceDaemon.cc @@ -0,0 +1,327 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/ServiceDaemon.h" +#include "include/Context.h" +#include "include/stringify.h" +#include "common/ceph_context.h" +#include "common/config.h" +#include "common/debug.h" +#include "common/errno.h" +#include "common/Formatter.h" +#include "common/Timer.h" +#include "tools/rbd_mirror/Threads.h" +#include <sstream> + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::ServiceDaemon: " << this << " " \ + << __func__ << ": " + +namespace rbd { +namespace mirror { + +namespace { + +const std::string RBD_MIRROR_AUTH_ID_PREFIX("rbd-mirror."); + +struct AttributeDumpVisitor : public boost::static_visitor<void> { + ceph::Formatter *f; + const std::string& name; + + AttributeDumpVisitor(ceph::Formatter *f, const std::string& name) + : f(f), name(name) { + } + + void operator()(bool val) const { + f->dump_bool(name.c_str(), val); + } + void operator()(uint64_t val) const { + f->dump_unsigned(name.c_str(), val); + } + void operator()(const std::string& val) const { + f->dump_string(name.c_str(), val); + } +}; + +} // anonymous namespace + +using namespace service_daemon; + +template <typename I> +ServiceDaemon<I>::ServiceDaemon(CephContext *cct, RadosRef rados, + Threads<I>* threads) + : m_cct(cct), m_rados(rados), m_threads(threads) { + dout(20) << dendl; +} + +template <typename I> +ServiceDaemon<I>::~ServiceDaemon() { + dout(20) << dendl; + std::lock_guard timer_locker{m_threads->timer_lock}; + if (m_timer_ctx != nullptr) { + m_threads->timer->cancel_event(m_timer_ctx); + update_status(); + } +} + +template <typename I> +int ServiceDaemon<I>::init() { + dout(20) << dendl; + + std::string id = m_cct->_conf->name.get_id(); + if (id.find(RBD_MIRROR_AUTH_ID_PREFIX) == 0) { + id = id.substr(RBD_MIRROR_AUTH_ID_PREFIX.size()); + } + + std::string instance_id = stringify(m_rados->get_instance_id()); + std::map<std::string, std::string> service_metadata = { + {"id", id}, {"instance_id", instance_id}}; + int r = m_rados->service_daemon_register("rbd-mirror", instance_id, + service_metadata); + if (r < 0) { + return r; + } + + return 0; +} + +template <typename I> +void ServiceDaemon<I>::add_pool(int64_t pool_id, const std::string& pool_name) { + dout(20) << "pool_id=" << pool_id << ", pool_name=" << pool_name << dendl; + + { + std::lock_guard locker{m_lock}; + m_pools.insert({pool_id, {pool_name}}); + } + schedule_update_status(); +} + +template <typename I> +void ServiceDaemon<I>::remove_pool(int64_t pool_id) { + dout(20) << "pool_id=" << pool_id << dendl; + { + std::lock_guard locker{m_lock}; + m_pools.erase(pool_id); + } + schedule_update_status(); +} + +template <typename I> +void ServiceDaemon<I>::add_namespace(int64_t pool_id, + const std::string& namespace_name) { + dout(20) << "pool_id=" << pool_id << ", namespace=" << namespace_name + << dendl; + + std::lock_guard locker{m_lock}; + auto pool_it = m_pools.find(pool_id); + if (pool_it == m_pools.end()) { + return; + } + pool_it->second.ns_attributes[namespace_name]; + + // don't schedule update status as the namespace attributes are empty yet +} + +template <typename I> +void ServiceDaemon<I>::remove_namespace(int64_t pool_id, + const std::string& namespace_name) { + dout(20) << "pool_id=" << pool_id << ", namespace=" << namespace_name + << dendl; + { + std::lock_guard locker{m_lock}; + auto pool_it = m_pools.find(pool_id); + if (pool_it == m_pools.end()) { + return; + } + pool_it->second.ns_attributes.erase(namespace_name); + } + schedule_update_status(); +} + +template <typename I> +uint64_t ServiceDaemon<I>::add_or_update_callout(int64_t pool_id, + uint64_t callout_id, + CalloutLevel callout_level, + const std::string& text) { + dout(20) << "pool_id=" << pool_id << ", " + << "callout_id=" << callout_id << ", " + << "callout_level=" << callout_level << ", " + << "text=" << text << dendl; + + { + std::lock_guard locker{m_lock}; + auto pool_it = m_pools.find(pool_id); + if (pool_it == m_pools.end()) { + return CALLOUT_ID_NONE; + } + + if (callout_id == CALLOUT_ID_NONE) { + callout_id = ++m_callout_id; + } + pool_it->second.callouts[callout_id] = {callout_level, text}; + } + + schedule_update_status(); + return callout_id; +} + +template <typename I> +void ServiceDaemon<I>::remove_callout(int64_t pool_id, uint64_t callout_id) { + dout(20) << "pool_id=" << pool_id << ", " + << "callout_id=" << callout_id << dendl; + + { + std::lock_guard locker{m_lock}; + auto pool_it = m_pools.find(pool_id); + if (pool_it == m_pools.end()) { + return; + } + pool_it->second.callouts.erase(callout_id); + } + + schedule_update_status(); +} + +template <typename I> +void ServiceDaemon<I>::add_or_update_attribute(int64_t pool_id, + const std::string& key, + const AttributeValue& value) { + dout(20) << "pool_id=" << pool_id << ", " + << "key=" << key << ", " + << "value=" << value << dendl; + + { + std::lock_guard locker{m_lock}; + auto pool_it = m_pools.find(pool_id); + if (pool_it == m_pools.end()) { + return; + } + pool_it->second.attributes[key] = value; + } + + schedule_update_status(); +} + +template <typename I> +void ServiceDaemon<I>::add_or_update_namespace_attribute( + int64_t pool_id, const std::string& namespace_name, const std::string& key, + const AttributeValue& value) { + if (namespace_name.empty()) { + add_or_update_attribute(pool_id, key, value); + return; + } + + dout(20) << "pool_id=" << pool_id << ", " + << "namespace=" << namespace_name << ", " + << "key=" << key << ", " + << "value=" << value << dendl; + + { + std::lock_guard locker{m_lock}; + auto pool_it = m_pools.find(pool_id); + if (pool_it == m_pools.end()) { + return; + } + + auto ns_it = pool_it->second.ns_attributes.find(namespace_name); + if (ns_it == pool_it->second.ns_attributes.end()) { + return; + } + + ns_it->second[key] = value; + } + + schedule_update_status(); +} + +template <typename I> +void ServiceDaemon<I>::remove_attribute(int64_t pool_id, + const std::string& key) { + dout(20) << "pool_id=" << pool_id << ", " + << "key=" << key << dendl; + + { + std::lock_guard locker{m_lock}; + auto pool_it = m_pools.find(pool_id); + if (pool_it == m_pools.end()) { + return; + } + pool_it->second.attributes.erase(key); + } + + schedule_update_status(); +} + +template <typename I> +void ServiceDaemon<I>::schedule_update_status() { + std::lock_guard timer_locker{m_threads->timer_lock}; + if (m_timer_ctx != nullptr) { + return; + } + + m_timer_ctx = new LambdaContext([this](int) { + m_timer_ctx = nullptr; + update_status(); + }); + m_threads->timer->add_event_after(1, m_timer_ctx); +} + +template <typename I> +void ServiceDaemon<I>::update_status() { + dout(20) << dendl; + ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock)); + + ceph::JSONFormatter f; + { + std::lock_guard locker{m_lock}; + f.open_object_section("pools"); + for (auto& pool_pair : m_pools) { + f.open_object_section(stringify(pool_pair.first).c_str()); + f.dump_string("name", pool_pair.second.name); + f.open_object_section("callouts"); + for (auto& callout : pool_pair.second.callouts) { + f.open_object_section(stringify(callout.first).c_str()); + f.dump_string("level", stringify(callout.second.level).c_str()); + f.dump_string("text", callout.second.text.c_str()); + f.close_section(); + } + f.close_section(); // callouts + + for (auto& attribute : pool_pair.second.attributes) { + AttributeDumpVisitor attribute_dump_visitor(&f, attribute.first); + boost::apply_visitor(attribute_dump_visitor, attribute.second); + } + + if (!pool_pair.second.ns_attributes.empty()) { + f.open_object_section("namespaces"); + for (auto& [ns, attributes] : pool_pair.second.ns_attributes) { + f.open_object_section(ns.c_str()); + for (auto& [key, value] : attributes) { + AttributeDumpVisitor attribute_dump_visitor(&f, key); + boost::apply_visitor(attribute_dump_visitor, value); + } + f.close_section(); // namespace + } + f.close_section(); // namespaces + } + f.close_section(); // pool + } + f.close_section(); // pools + } + + std::stringstream ss; + f.flush(ss); + + int r = m_rados->service_daemon_update_status({{"json", ss.str()}}); + if (r < 0) { + derr << "failed to update service daemon status: " << cpp_strerror(r) + << dendl; + } +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::ServiceDaemon<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/ServiceDaemon.h b/src/tools/rbd_mirror/ServiceDaemon.h new file mode 100644 index 000000000..8b1e0f584 --- /dev/null +++ b/src/tools/rbd_mirror/ServiceDaemon.h @@ -0,0 +1,94 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_SERVICE_DAEMON_H +#define CEPH_RBD_MIRROR_SERVICE_DAEMON_H + +#include "common/ceph_mutex.h" +#include "include/common_fwd.h" +#include "tools/rbd_mirror/Types.h" +#include "tools/rbd_mirror/service_daemon/Types.h" +#include <map> +#include <string> + +struct Context; +namespace librbd { struct ImageCtx; } + +namespace rbd { +namespace mirror { + +template <typename> struct Threads; + +template <typename ImageCtxT = librbd::ImageCtx> +class ServiceDaemon { +public: + ServiceDaemon(CephContext *cct, RadosRef rados, Threads<ImageCtxT>* threads); + ~ServiceDaemon(); + + int init(); + + void add_pool(int64_t pool_id, const std::string& pool_name); + void remove_pool(int64_t pool_id); + + void add_namespace(int64_t pool_id, const std::string& namespace_name); + void remove_namespace(int64_t pool_id, const std::string& namespace_name); + + uint64_t add_or_update_callout(int64_t pool_id, uint64_t callout_id, + service_daemon::CalloutLevel callout_level, + const std::string& text); + void remove_callout(int64_t pool_id, uint64_t callout_id); + + void add_or_update_attribute(int64_t pool_id, const std::string& key, + const service_daemon::AttributeValue& value); + void add_or_update_namespace_attribute( + int64_t pool_id, const std::string& namespace_name, + const std::string& key, const service_daemon::AttributeValue& value); + void remove_attribute(int64_t pool_id, const std::string& key); + +private: + struct Callout { + service_daemon::CalloutLevel level; + std::string text; + + Callout() : level(service_daemon::CALLOUT_LEVEL_INFO) { + } + Callout(service_daemon::CalloutLevel level, const std::string& text) + : level(level), text(text) { + } + }; + typedef std::map<uint64_t, Callout> Callouts; + typedef std::map<std::string, service_daemon::AttributeValue> Attributes; + typedef std::map<std::string, Attributes> NamespaceAttributes; + + struct Pool { + std::string name; + Callouts callouts; + Attributes attributes; + NamespaceAttributes ns_attributes; + + Pool(const std::string& name) : name(name) { + } + }; + + typedef std::map<int64_t, Pool> Pools; + + CephContext *m_cct; + RadosRef m_rados; + Threads<ImageCtxT>* m_threads; + + ceph::mutex m_lock = ceph::make_mutex("rbd::mirror::ServiceDaemon"); + Pools m_pools; + uint64_t m_callout_id = service_daemon::CALLOUT_ID_NONE; + + Context* m_timer_ctx = nullptr; + + void schedule_update_status(); + void update_status(); +}; + +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::ServiceDaemon<librbd::ImageCtx>; + +#endif // CEPH_RBD_MIRROR_SERVICE_DAEMON_H diff --git a/src/tools/rbd_mirror/Threads.cc b/src/tools/rbd_mirror/Threads.cc new file mode 100644 index 000000000..b0c762641 --- /dev/null +++ b/src/tools/rbd_mirror/Threads.cc @@ -0,0 +1,38 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/Threads.h" +#include "common/Timer.h" +#include "librbd/AsioEngine.h" +#include "librbd/ImageCtx.h" +#include "librbd/asio/ContextWQ.h" + +namespace rbd { +namespace mirror { + +template <typename I> +Threads<I>::Threads(std::shared_ptr<librados::Rados>& rados) { + auto cct = static_cast<CephContext*>(rados->cct()); + asio_engine = new librbd::AsioEngine(rados); + work_queue = asio_engine->get_work_queue(); + + timer = new SafeTimer(cct, timer_lock, true); + timer->init(); +} + +template <typename I> +Threads<I>::~Threads() { + { + std::lock_guard timer_locker{timer_lock}; + timer->shutdown(); + } + delete timer; + + work_queue->drain(); + delete asio_engine; +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::Threads<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/Threads.h b/src/tools/rbd_mirror/Threads.h new file mode 100644 index 000000000..35c0b0f1c --- /dev/null +++ b/src/tools/rbd_mirror/Threads.h @@ -0,0 +1,45 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_THREADS_H +#define CEPH_RBD_MIRROR_THREADS_H + +#include "include/common_fwd.h" +#include "include/rados/librados_fwd.hpp" +#include "common/ceph_mutex.h" +#include "common/Timer.h" +#include <memory> + +class ThreadPool; + +namespace librbd { +struct AsioEngine; +struct ImageCtx; +namespace asio { struct ContextWQ; } +} // namespace librbd + +namespace rbd { +namespace mirror { + +template <typename ImageCtxT = librbd::ImageCtx> +class Threads { +public: + librbd::AsioEngine* asio_engine = nullptr; + librbd::asio::ContextWQ* work_queue = nullptr; + + SafeTimer *timer = nullptr; + ceph::mutex timer_lock = ceph::make_mutex("Threads::timer_lock"); + + explicit Threads(std::shared_ptr<librados::Rados>& rados); + Threads(const Threads&) = delete; + Threads& operator=(const Threads&) = delete; + + ~Threads(); +}; + +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::Threads<librbd::ImageCtx>; + +#endif // CEPH_RBD_MIRROR_THREADS_H diff --git a/src/tools/rbd_mirror/Throttler.cc b/src/tools/rbd_mirror/Throttler.cc new file mode 100644 index 000000000..07d6e397e --- /dev/null +++ b/src/tools/rbd_mirror/Throttler.cc @@ -0,0 +1,240 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2016 SUSE LINUX GmbH + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include "Throttler.h" +#include "common/Formatter.h" +#include "common/debug.h" +#include "common/errno.h" +#include "librbd/Utils.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::Throttler:: " << this \ + << " " << __func__ << ": " + +namespace rbd { +namespace mirror { + +template <typename I> +Throttler<I>::Throttler(CephContext *cct, const std::string &config_key) + : m_cct(cct), m_config_key(config_key), + m_config_keys{m_config_key.c_str(), nullptr}, + m_lock(ceph::make_mutex( + librbd::util::unique_lock_name("rbd::mirror::Throttler", this))), + m_max_concurrent_ops(cct->_conf.get_val<uint64_t>(m_config_key)) { + dout(20) << m_config_key << "=" << m_max_concurrent_ops << dendl; + m_cct->_conf.add_observer(this); +} + +template <typename I> +Throttler<I>::~Throttler() { + m_cct->_conf.remove_observer(this); + + std::lock_guard locker{m_lock}; + ceph_assert(m_inflight_ops.empty()); + ceph_assert(m_queue.empty()); +} + +template <typename I> +void Throttler<I>::start_op(const std::string &ns, + const std::string &id_, + Context *on_start) { + Id id{ns, id_}; + + dout(20) << "id=" << id << dendl; + + int r = 0; + { + std::lock_guard locker{m_lock}; + + if (m_inflight_ops.count(id) > 0) { + dout(20) << "duplicate for already started op " << id << dendl; + } else if (m_queued_ops.count(id) > 0) { + dout(20) << "duplicate for already queued op " << id << dendl; + std::swap(m_queued_ops[id], on_start); + r = -ENOENT; + } else if (m_max_concurrent_ops == 0 || + m_inflight_ops.size() < m_max_concurrent_ops) { + ceph_assert(m_queue.empty()); + m_inflight_ops.insert(id); + dout(20) << "ready to start op for " << id << " [" + << m_inflight_ops.size() << "/" << m_max_concurrent_ops << "]" + << dendl; + } else { + m_queue.push_back(id); + std::swap(m_queued_ops[id], on_start); + dout(20) << "op for " << id << " has been queued" << dendl; + } + } + + if (on_start != nullptr) { + on_start->complete(r); + } +} + +template <typename I> +bool Throttler<I>::cancel_op(const std::string &ns, + const std::string &id_) { + Id id{ns, id_}; + + dout(20) << "id=" << id << dendl; + + Context *on_start = nullptr; + { + std::lock_guard locker{m_lock}; + auto it = m_queued_ops.find(id); + if (it != m_queued_ops.end()) { + dout(20) << "canceled queued op for " << id << dendl; + m_queue.remove(id); + on_start = it->second; + m_queued_ops.erase(it); + } + } + + if (on_start == nullptr) { + return false; + } + + on_start->complete(-ECANCELED); + return true; +} + +template <typename I> +void Throttler<I>::finish_op(const std::string &ns, + const std::string &id_) { + Id id{ns, id_}; + + dout(20) << "id=" << id << dendl; + + if (cancel_op(ns, id_)) { + return; + } + + Context *on_start = nullptr; + { + std::lock_guard locker{m_lock}; + + m_inflight_ops.erase(id); + + if (m_inflight_ops.size() < m_max_concurrent_ops && !m_queue.empty()) { + auto id = m_queue.front(); + auto it = m_queued_ops.find(id); + ceph_assert(it != m_queued_ops.end()); + m_inflight_ops.insert(id); + dout(20) << "ready to start op for " << id << " [" + << m_inflight_ops.size() << "/" << m_max_concurrent_ops << "]" + << dendl; + on_start = it->second; + m_queued_ops.erase(it); + m_queue.pop_front(); + } + } + + if (on_start != nullptr) { + on_start->complete(0); + } +} + +template <typename I> +void Throttler<I>::drain(const std::string &ns, int r) { + dout(20) << "ns=" << ns << dendl; + + std::map<Id, Context *> queued_ops; + { + std::lock_guard locker{m_lock}; + for (auto it = m_queued_ops.begin(); it != m_queued_ops.end(); ) { + if (it->first.first == ns) { + queued_ops[it->first] = it->second; + m_queue.remove(it->first); + it = m_queued_ops.erase(it); + } else { + it++; + } + } + for (auto it = m_inflight_ops.begin(); it != m_inflight_ops.end(); ) { + if (it->first == ns) { + dout(20) << "inflight_op " << *it << dendl; + it = m_inflight_ops.erase(it); + } else { + it++; + } + } + } + + for (auto &it : queued_ops) { + dout(20) << "queued_op " << it.first << dendl; + it.second->complete(r); + } +} + +template <typename I> +void Throttler<I>::set_max_concurrent_ops(uint32_t max) { + dout(20) << "max=" << max << dendl; + + std::list<Context *> ops; + { + std::lock_guard locker{m_lock}; + m_max_concurrent_ops = max; + + // Start waiting ops in the case of available free slots + while ((m_max_concurrent_ops == 0 || + m_inflight_ops.size() < m_max_concurrent_ops) && + !m_queue.empty()) { + auto id = m_queue.front(); + m_inflight_ops.insert(id); + dout(20) << "ready to start op for " << id << " [" + << m_inflight_ops.size() << "/" << m_max_concurrent_ops << "]" + << dendl; + auto it = m_queued_ops.find(id); + ceph_assert(it != m_queued_ops.end()); + ops.push_back(it->second); + m_queued_ops.erase(it); + m_queue.pop_front(); + } + } + + for (const auto& ctx : ops) { + ctx->complete(0); + } +} + +template <typename I> +void Throttler<I>::print_status(ceph::Formatter *f) { + dout(20) << dendl; + + std::lock_guard locker{m_lock}; + + f->dump_int("max_parallel_requests", m_max_concurrent_ops); + f->dump_int("running_requests", m_inflight_ops.size()); + f->dump_int("waiting_requests", m_queue.size()); +} + +template <typename I> +const char** Throttler<I>::get_tracked_conf_keys() const { + return m_config_keys; +} + +template <typename I> +void Throttler<I>::handle_conf_change(const ConfigProxy& conf, + const std::set<std::string> &changed) { + if (changed.count(m_config_key)) { + set_max_concurrent_ops(conf.get_val<uint64_t>(m_config_key)); + } +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::Throttler<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/Throttler.h b/src/tools/rbd_mirror/Throttler.h new file mode 100644 index 000000000..32080238a --- /dev/null +++ b/src/tools/rbd_mirror/Throttler.h @@ -0,0 +1,74 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_THROTTLER_H +#define RBD_MIRROR_THROTTLER_H + +#include <list> +#include <map> +#include <set> +#include <sstream> +#include <string> +#include <utility> + +#include "common/ceph_mutex.h" +#include "common/config_obs.h" +#include "include/common_fwd.h" + +class Context; + +namespace ceph { class Formatter; } +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { + +template <typename ImageCtxT = librbd::ImageCtx> +class Throttler : public md_config_obs_t { +public: + static Throttler *create( + CephContext *cct, + const std::string &config_key) { + return new Throttler(cct, config_key); + } + void destroy() { + delete this; + } + + Throttler(CephContext *cct, + const std::string &config_key); + ~Throttler() override; + + void set_max_concurrent_ops(uint32_t max); + void start_op(const std::string &ns, const std::string &id, + Context *on_start); + bool cancel_op(const std::string &ns, const std::string &id); + void finish_op(const std::string &ns, const std::string &id); + void drain(const std::string &ns, int r); + + void print_status(ceph::Formatter *f); + +private: + typedef std::pair<std::string, std::string> Id; + + CephContext *m_cct; + const std::string m_config_key; + mutable const char* m_config_keys[2]; + + ceph::mutex m_lock; + uint32_t m_max_concurrent_ops; + std::list<Id> m_queue; + std::map<Id, Context *> m_queued_ops; + std::set<Id> m_inflight_ops; + + const char **get_tracked_conf_keys() const override; + void handle_conf_change(const ConfigProxy& conf, + const std::set<std::string> &changed) override; +}; + +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::Throttler<librbd::ImageCtx>; + +#endif // RBD_MIRROR_THROTTLER_H diff --git a/src/tools/rbd_mirror/Types.cc b/src/tools/rbd_mirror/Types.cc new file mode 100644 index 000000000..cd71c73b1 --- /dev/null +++ b/src/tools/rbd_mirror/Types.cc @@ -0,0 +1,32 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/Types.h" + +namespace rbd { +namespace mirror { + +std::ostream &operator<<(std::ostream &os, const ImageId &image_id) { + return os << "global id=" << image_id.global_id << ", " + << "id=" << image_id.id; +} + +std::ostream& operator<<(std::ostream& lhs, + const LocalPoolMeta& rhs) { + return lhs << "mirror_uuid=" << rhs.mirror_uuid; +} + +std::ostream& operator<<(std::ostream& lhs, + const RemotePoolMeta& rhs) { + return lhs << "mirror_uuid=" << rhs.mirror_uuid << ", " + "mirror_peer_uuid=" << rhs.mirror_peer_uuid; +} + +std::ostream& operator<<(std::ostream& lhs, const PeerSpec &peer) { + return lhs << "uuid: " << peer.uuid + << " cluster: " << peer.cluster_name + << " client: " << peer.client_name; +} + +} // namespace mirror +} // namespace rbd diff --git a/src/tools/rbd_mirror/Types.h b/src/tools/rbd_mirror/Types.h new file mode 100644 index 000000000..9bba58fb1 --- /dev/null +++ b/src/tools/rbd_mirror/Types.h @@ -0,0 +1,171 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_TYPES_H +#define CEPH_RBD_MIRROR_TYPES_H + +#include <iostream> +#include <memory> +#include <set> +#include <string> +#include <vector> + +#include "include/rados/librados.hpp" +#include "include/rbd/librbd.hpp" + +namespace rbd { +namespace mirror { + +template <typename> struct MirrorStatusUpdater; + +// Performance counters +enum { + l_rbd_mirror_journal_first = 27000, + l_rbd_mirror_journal_entries, + l_rbd_mirror_journal_replay_bytes, + l_rbd_mirror_journal_replay_latency, + l_rbd_mirror_journal_last, + l_rbd_mirror_snapshot_first, + l_rbd_mirror_snapshot_snapshots, + l_rbd_mirror_snapshot_sync_time, + l_rbd_mirror_snapshot_sync_bytes, + // per-image only counters below + l_rbd_mirror_snapshot_remote_timestamp, + l_rbd_mirror_snapshot_local_timestamp, + l_rbd_mirror_snapshot_last_sync_time, + l_rbd_mirror_snapshot_last_sync_bytes, + l_rbd_mirror_snapshot_last, +}; + +typedef std::shared_ptr<librados::Rados> RadosRef; +typedef std::shared_ptr<librados::IoCtx> IoCtxRef; +typedef std::shared_ptr<librbd::Image> ImageRef; + +struct ImageId { + std::string global_id; + std::string id; + + explicit ImageId(const std::string &global_id) : global_id(global_id) { + } + ImageId(const std::string &global_id, const std::string &id) + : global_id(global_id), id(id) { + } + + inline bool operator==(const ImageId &rhs) const { + return (global_id == rhs.global_id && id == rhs.id); + } + inline bool operator<(const ImageId &rhs) const { + return global_id < rhs.global_id; + } +}; + +std::ostream &operator<<(std::ostream &, const ImageId &image_id); + +typedef std::set<ImageId> ImageIds; + +struct LocalPoolMeta { + LocalPoolMeta() {} + LocalPoolMeta(const std::string& mirror_uuid) + : mirror_uuid(mirror_uuid) { + } + + std::string mirror_uuid; +}; + +std::ostream& operator<<(std::ostream& lhs, + const LocalPoolMeta& local_pool_meta); + +struct RemotePoolMeta { + RemotePoolMeta() {} + RemotePoolMeta(const std::string& mirror_uuid, + const std::string& mirror_peer_uuid) + : mirror_uuid(mirror_uuid), + mirror_peer_uuid(mirror_peer_uuid) { + } + + std::string mirror_uuid; + std::string mirror_peer_uuid; +}; + +std::ostream& operator<<(std::ostream& lhs, + const RemotePoolMeta& remote_pool_meta); + +template <typename I> +struct Peer { + std::string uuid; + mutable librados::IoCtx io_ctx; + RemotePoolMeta remote_pool_meta; + MirrorStatusUpdater<I>* mirror_status_updater = nullptr; + + Peer() { + } + Peer(const std::string& uuid, + librados::IoCtx& io_ctx, + const RemotePoolMeta& remote_pool_meta, + MirrorStatusUpdater<I>* mirror_status_updater) + : io_ctx(io_ctx), + remote_pool_meta(remote_pool_meta), + mirror_status_updater(mirror_status_updater) { + } + + inline bool operator<(const Peer &rhs) const { + return uuid < rhs.uuid; + } +}; + +template <typename I> +std::ostream& operator<<(std::ostream& lhs, const Peer<I>& peer) { + return lhs << peer.remote_pool_meta; +} + +struct PeerSpec { + PeerSpec() = default; + PeerSpec(const std::string &uuid, const std::string &cluster_name, + const std::string &client_name) + : uuid(uuid), cluster_name(cluster_name), client_name(client_name) + { + } + PeerSpec(const librbd::mirror_peer_site_t &peer) : + uuid(peer.uuid), + cluster_name(peer.site_name), + client_name(peer.client_name) + { + } + + std::string uuid; + std::string cluster_name; + std::string client_name; + + /// optional config properties + std::string mon_host; + std::string key; + + bool operator==(const PeerSpec& rhs) const { + return (uuid == rhs.uuid && + cluster_name == rhs.cluster_name && + client_name == rhs.client_name && + mon_host == rhs.mon_host && + key == rhs.key); + } + bool operator<(const PeerSpec& rhs) const { + if (uuid != rhs.uuid) { + return uuid < rhs.uuid; + } else if (cluster_name != rhs.cluster_name) { + return cluster_name < rhs.cluster_name; + } else if (client_name != rhs.client_name) { + return client_name < rhs.client_name; + } else if (mon_host < rhs.mon_host) { + return mon_host < rhs.mon_host; + } else { + return key < rhs.key; + } + } +}; + +std::ostream& operator<<(std::ostream& lhs, const PeerSpec &peer); + +} // namespace mirror +} // namespace rbd + + +#endif // CEPH_RBD_MIRROR_TYPES_H diff --git a/src/tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.cc b/src/tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.cc new file mode 100644 index 000000000..19a98804c --- /dev/null +++ b/src/tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.cc @@ -0,0 +1,299 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.h" +#include "common/debug.h" +#include "common/errno.h" +#include "librbd/ExclusiveLock.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/Operations.h" +#include "librbd/Utils.h" +#include "librbd/journal/Policy.h" +#include "tools/rbd_mirror/image_deleter/Types.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_deleter::SnapshotPurgeRequest: " \ + << this << " " << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace image_deleter { + +using librbd::util::create_context_callback; + +template <typename I> +void SnapshotPurgeRequest<I>::send() { + open_image(); +} + +template <typename I> +void SnapshotPurgeRequest<I>::open_image() { + dout(10) << dendl; + m_image_ctx = I::create("", m_image_id, nullptr, m_io_ctx, false); + + // ensure non-primary images can be modified + m_image_ctx->read_only_mask &= ~librbd::IMAGE_READ_ONLY_FLAG_NON_PRIMARY; + + { + std::unique_lock image_locker{m_image_ctx->image_lock}; + m_image_ctx->set_journal_policy(new JournalPolicy()); + } + + Context *ctx = create_context_callback< + SnapshotPurgeRequest<I>, &SnapshotPurgeRequest<I>::handle_open_image>( + this); + m_image_ctx->state->open(librbd::OPEN_FLAG_SKIP_OPEN_PARENT, ctx); +} + +template <typename I> +void SnapshotPurgeRequest<I>::handle_open_image(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to open image '" << m_image_id << "': " << cpp_strerror(r) + << dendl; + m_image_ctx = nullptr; + + finish(r); + return; + } + + acquire_lock(); +} + +template <typename I> +void SnapshotPurgeRequest<I>::acquire_lock() { + dout(10) << dendl; + + m_image_ctx->owner_lock.lock_shared(); + if (m_image_ctx->exclusive_lock == nullptr) { + m_image_ctx->owner_lock.unlock_shared(); + + start_snap_unprotect(); + return; + } + + m_image_ctx->exclusive_lock->acquire_lock(create_context_callback< + SnapshotPurgeRequest<I>, &SnapshotPurgeRequest<I>::handle_acquire_lock>( + this)); + m_image_ctx->owner_lock.unlock_shared(); +} + +template <typename I> +void SnapshotPurgeRequest<I>::handle_acquire_lock(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to acquire exclusive lock: " << cpp_strerror(r) << dendl; + m_ret_val = r; + close_image(); + return; + } + + start_snap_unprotect(); +} + +template <typename I> +void SnapshotPurgeRequest<I>::start_snap_unprotect() { + dout(10) << dendl; + + { + std::shared_lock image_locker{m_image_ctx->image_lock}; + m_snaps = m_image_ctx->snaps; + } + snap_unprotect(); +} + +template <typename I> +void SnapshotPurgeRequest<I>::snap_unprotect() { + if (m_snaps.empty()) { + close_image(); + return; + } + + librados::snap_t snap_id = m_snaps.back(); + m_image_ctx->image_lock.lock_shared(); + int r = m_image_ctx->get_snap_namespace(snap_id, &m_snap_namespace); + if (r < 0) { + m_image_ctx->image_lock.unlock_shared(); + + derr << "failed to get snap namespace: " << cpp_strerror(r) << dendl; + m_ret_val = r; + close_image(); + return; + } + + r = m_image_ctx->get_snap_name(snap_id, &m_snap_name); + if (r < 0) { + m_image_ctx->image_lock.unlock_shared(); + + derr << "failed to get snap name: " << cpp_strerror(r) << dendl; + m_ret_val = r; + close_image(); + return; + } + + bool is_protected; + r = m_image_ctx->is_snap_protected(snap_id, &is_protected); + if (r < 0) { + m_image_ctx->image_lock.unlock_shared(); + + derr << "failed to get snap protection status: " << cpp_strerror(r) + << dendl; + m_ret_val = r; + close_image(); + return; + } + m_image_ctx->image_lock.unlock_shared(); + + if (!is_protected) { + snap_remove(); + return; + } + + dout(10) << "snap_id=" << snap_id << ", " + << "snap_namespace=" << m_snap_namespace << ", " + << "snap_name=" << m_snap_name << dendl; + + auto finish_op_ctx = start_lock_op(&r); + if (finish_op_ctx == nullptr) { + derr << "lost exclusive lock" << dendl; + m_ret_val = r; + close_image(); + return; + } + + auto ctx = new LambdaContext([this, finish_op_ctx](int r) { + handle_snap_unprotect(r); + finish_op_ctx->complete(0); + }); + std::shared_lock owner_locker{m_image_ctx->owner_lock}; + m_image_ctx->operations->execute_snap_unprotect( + m_snap_namespace, m_snap_name.c_str(), ctx); +} + +template <typename I> +void SnapshotPurgeRequest<I>::handle_snap_unprotect(int r) { + dout(10) << "r=" << r << dendl; + + if (r == -EBUSY) { + dout(10) << "snapshot in-use" << dendl; + m_ret_val = r; + close_image(); + return; + } else if (r < 0) { + derr << "failed to unprotect snapshot: " << cpp_strerror(r) << dendl; + m_ret_val = r; + close_image(); + return; + } + + { + // avoid the need to refresh to delete the newly unprotected snapshot + std::shared_lock image_locker{m_image_ctx->image_lock}; + librados::snap_t snap_id = m_snaps.back(); + auto snap_info_it = m_image_ctx->snap_info.find(snap_id); + if (snap_info_it != m_image_ctx->snap_info.end()) { + snap_info_it->second.protection_status = + RBD_PROTECTION_STATUS_UNPROTECTED; + } + } + + snap_remove(); +} + +template <typename I> +void SnapshotPurgeRequest<I>::snap_remove() { + librados::snap_t snap_id = m_snaps.back(); + dout(10) << "snap_id=" << snap_id << ", " + << "snap_namespace=" << m_snap_namespace << ", " + << "snap_name=" << m_snap_name << dendl; + + int r; + auto finish_op_ctx = start_lock_op(&r); + if (finish_op_ctx == nullptr) { + derr << "lost exclusive lock" << dendl; + m_ret_val = r; + close_image(); + return; + } + + auto ctx = new LambdaContext([this, finish_op_ctx](int r) { + handle_snap_remove(r); + finish_op_ctx->complete(0); + }); + std::shared_lock owner_locker{m_image_ctx->owner_lock}; + m_image_ctx->operations->execute_snap_remove( + m_snap_namespace, m_snap_name.c_str(), ctx); +} + +template <typename I> +void SnapshotPurgeRequest<I>::handle_snap_remove(int r) { + dout(10) << "r=" << r << dendl; + + if (r == -EBUSY) { + dout(10) << "snapshot in-use" << dendl; + m_ret_val = r; + close_image(); + return; + } else if (r < 0) { + derr << "failed to remove snapshot: " << cpp_strerror(r) << dendl; + m_ret_val = r; + close_image(); + return; + } + + m_snaps.pop_back(); + snap_unprotect(); +} + +template <typename I> +void SnapshotPurgeRequest<I>::close_image() { + dout(10) << dendl; + + m_image_ctx->state->close(create_context_callback< + SnapshotPurgeRequest<I>, + &SnapshotPurgeRequest<I>::handle_close_image>(this)); +} + +template <typename I> +void SnapshotPurgeRequest<I>::handle_close_image(int r) { + dout(10) << "r=" << r << dendl; + + m_image_ctx = nullptr; + + if (r < 0) { + derr << "failed to close: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + finish(0); +} + +template <typename I> +void SnapshotPurgeRequest<I>::finish(int r) { + if (m_ret_val < 0) { + r = m_ret_val; + } + + m_on_finish->complete(r); + delete this; +} + +template <typename I> +Context *SnapshotPurgeRequest<I>::start_lock_op(int* r) { + std::shared_lock owner_locker{m_image_ctx->owner_lock}; + if (m_image_ctx->exclusive_lock == nullptr) { + return new LambdaContext([](int r) {}); + } + return m_image_ctx->exclusive_lock->start_op(r); +} + +} // namespace image_deleter +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_deleter::SnapshotPurgeRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.h b/src/tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.h new file mode 100644 index 000000000..70cae8518 --- /dev/null +++ b/src/tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.h @@ -0,0 +1,105 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_DELETER_SNAPSHOT_PURGE_REQUEST_H +#define CEPH_RBD_MIRROR_IMAGE_DELETER_SNAPSHOT_PURGE_REQUEST_H + +#include "include/rados/librados.hpp" +#include "cls/rbd/cls_rbd_types.h" +#include <string> +#include <vector> + +class Context; +namespace librbd { struct ImageCtx; } + +namespace rbd { +namespace mirror { +namespace image_deleter { + +template <typename ImageCtxT = librbd::ImageCtx> +class SnapshotPurgeRequest { +public: + static SnapshotPurgeRequest* create(librados::IoCtx &io_ctx, + const std::string &image_id, + Context *on_finish) { + return new SnapshotPurgeRequest(io_ctx, image_id, on_finish); + } + + SnapshotPurgeRequest(librados::IoCtx &io_ctx, const std::string &image_id, + Context *on_finish) + : m_io_ctx(io_ctx), m_image_id(image_id), m_on_finish(on_finish) { + } + + void send(); + +private: + /* + * @verbatim + * + * <start> + * | + * v + * OPEN_IMAGE + * | + * v + * ACQUIRE_LOCK + * | + * | (repeat for each snapshot) + * |/------------------------\ + * | | + * v (skip if not needed) | + * SNAP_UNPROTECT | + * | | + * v (skip if not needed) | + * SNAP_REMOVE -----------------/ + * | + * v + * CLOSE_IMAGE + * | + * v + * <finish> + * + * @endverbatim + */ + + librados::IoCtx &m_io_ctx; + std::string m_image_id; + Context *m_on_finish; + + ImageCtxT *m_image_ctx = nullptr; + int m_ret_val = 0; + + std::vector<librados::snap_t> m_snaps; + cls::rbd::SnapshotNamespace m_snap_namespace; + std::string m_snap_name; + + void open_image(); + void handle_open_image(int r); + + void acquire_lock(); + void handle_acquire_lock(int r); + + void start_snap_unprotect(); + void snap_unprotect(); + void handle_snap_unprotect(int r); + + void snap_remove(); + void handle_snap_remove(int r); + + void close_image(); + void handle_close_image(int r); + + void finish(int r); + + Context *start_lock_op(int* r); + +}; + +} // namespace image_deleter +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_deleter::SnapshotPurgeRequest<librbd::ImageCtx>; + +#endif // CEPH_RBD_MIRROR_IMAGE_DELETER_SNAPSHOT_PURGE_REQUEST_H + diff --git a/src/tools/rbd_mirror/image_deleter/TrashMoveRequest.cc b/src/tools/rbd_mirror/image_deleter/TrashMoveRequest.cc new file mode 100644 index 000000000..e53923ef3 --- /dev/null +++ b/src/tools/rbd_mirror/image_deleter/TrashMoveRequest.cc @@ -0,0 +1,419 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/image_deleter/TrashMoveRequest.h" +#include "include/rbd_types.h" +#include "cls/rbd/cls_rbd_client.h" +#include "common/debug.h" +#include "common/errno.h" +#include "common/WorkQueue.h" +#include "librbd/ExclusiveLock.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/Journal.h" +#include "librbd/TrashWatcher.h" +#include "librbd/Utils.h" +#include "librbd/asio/ContextWQ.h" +#include "librbd/journal/ResetRequest.h" +#include "librbd/mirror/ImageRemoveRequest.h" +#include "librbd/mirror/GetInfoRequest.h" +#include "librbd/trash/MoveRequest.h" +#include "tools/rbd_mirror/image_deleter/Types.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_deleter::TrashMoveRequest: " \ + << this << " " << __func__ << ": " +namespace rbd { +namespace mirror { +namespace image_deleter { + +using librbd::util::create_context_callback; +using librbd::util::create_rados_callback; + +template <typename I> +void TrashMoveRequest<I>::send() { + get_mirror_image_id(); +} + +template <typename I> +void TrashMoveRequest<I>::get_mirror_image_id() { + dout(10) << dendl; + + librados::ObjectReadOperation op; + librbd::cls_client::mirror_image_get_image_id_start(&op, m_global_image_id); + + auto aio_comp = create_rados_callback< + TrashMoveRequest<I>, + &TrashMoveRequest<I>::handle_get_mirror_image_id>(this); + m_out_bl.clear(); + int r = m_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op, &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void TrashMoveRequest<I>::handle_get_mirror_image_id(int r) { + dout(10) << "r=" << r << dendl; + + if (r == 0) { + auto bl_it = m_out_bl.cbegin(); + r = librbd::cls_client::mirror_image_get_image_id_finish(&bl_it, + &m_image_id); + } + if (r == -ENOENT) { + dout(10) << "image " << m_global_image_id << " is not mirrored" << dendl; + finish(r); + return; + } else if (r < 0) { + derr << "error retrieving local id for image " << m_global_image_id << ": " + << cpp_strerror(r) << dendl; + finish(r); + return; + } + + get_mirror_info(); +} + +template <typename I> +void TrashMoveRequest<I>::get_mirror_info() { + dout(10) << dendl; + + auto ctx = create_context_callback< + TrashMoveRequest<I>, &TrashMoveRequest<I>::handle_get_mirror_info>(this); + auto req = librbd::mirror::GetInfoRequest<I>::create( + m_io_ctx, m_op_work_queue, m_image_id, &m_mirror_image, &m_promotion_state, + &m_primary_mirror_uuid, ctx); + req->send(); +} + +template <typename I> +void TrashMoveRequest<I>::handle_get_mirror_info(int r) { + dout(10) << "r=" << r << dendl; + + if (r == -ENOENT) { + dout(5) << "image " << m_global_image_id << " is not mirrored" << dendl; + finish(r); + return; + } else if (r < 0) { + derr << "error retrieving image primary info for image " + << m_global_image_id << ": " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + if (m_promotion_state == librbd::mirror::PROMOTION_STATE_PRIMARY) { + dout(10) << "image " << m_global_image_id << " is local primary" << dendl; + finish(-EPERM); + return; + } else if (m_promotion_state == librbd::mirror::PROMOTION_STATE_ORPHAN && + !m_resync) { + dout(10) << "image " << m_global_image_id << " is orphaned" << dendl; + finish(-EPERM); + return; + } + + disable_mirror_image(); +} + +template <typename I> +void TrashMoveRequest<I>::disable_mirror_image() { + dout(10) << dendl; + + m_mirror_image.state = cls::rbd::MIRROR_IMAGE_STATE_DISABLING; + + librados::ObjectWriteOperation op; + librbd::cls_client::mirror_image_set(&op, m_image_id, m_mirror_image); + + auto aio_comp = create_rados_callback< + TrashMoveRequest<I>, + &TrashMoveRequest<I>::handle_disable_mirror_image>(this); + int r = m_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void TrashMoveRequest<I>::handle_disable_mirror_image(int r) { + dout(10) << "r=" << r << dendl; + + if (r == -ENOENT) { + dout(10) << "local image is not mirrored, aborting deletion." << dendl; + finish(r); + return; + } else if (r == -EEXIST || r == -EINVAL) { + derr << "cannot disable mirroring for image " << m_global_image_id + << ": global_image_id has changed/reused: " + << cpp_strerror(r) << dendl; + finish(r); + return; + } else if (r < 0) { + derr << "cannot disable mirroring for image " << m_global_image_id + << ": " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + open_image(); +} + +template <typename I> +void TrashMoveRequest<I>::open_image() { + dout(10) << dendl; + + m_image_ctx = I::create("", m_image_id, nullptr, m_io_ctx, false); + + // ensure non-primary images can be modified + m_image_ctx->read_only_mask &= ~librbd::IMAGE_READ_ONLY_FLAG_NON_PRIMARY; + + { + // don't attempt to open the journal + std::unique_lock image_locker{m_image_ctx->image_lock}; + m_image_ctx->set_journal_policy(new JournalPolicy()); + } + + Context *ctx = create_context_callback< + TrashMoveRequest<I>, &TrashMoveRequest<I>::handle_open_image>(this); + m_image_ctx->state->open(librbd::OPEN_FLAG_SKIP_OPEN_PARENT, ctx); +} + +template <typename I> +void TrashMoveRequest<I>::handle_open_image(int r) { + dout(10) << "r=" << r << dendl; + + if (r == -ENOENT) { + dout(5) << "mirror image does not exist, removing orphaned metadata" << dendl; + m_image_ctx = nullptr; + remove_mirror_image(); + return; + } + + if (r < 0) { + derr << "failed to open image: " << cpp_strerror(r) << dendl; + m_image_ctx = nullptr; + finish(r); + return; + } + + if (m_image_ctx->old_format) { + derr << "cannot move v1 image to trash" << dendl; + m_ret_val = -EINVAL; + close_image(); + return; + } + + reset_journal(); +} + +template <typename I> +void TrashMoveRequest<I>::reset_journal() { + if (m_mirror_image.mode == cls::rbd::MIRROR_IMAGE_MODE_SNAPSHOT) { + // snapshot-based mirroring doesn't require journal feature + acquire_lock(); + return; + } + + dout(10) << dendl; + + // TODO use Journal thread pool for journal ops until converted to ASIO + ContextWQ* context_wq; + librbd::Journal<>::get_work_queue( + reinterpret_cast<CephContext*>(m_io_ctx.cct()), &context_wq); + + // ensure that if the image is recovered any peers will split-brain + auto ctx = create_context_callback< + TrashMoveRequest<I>, &TrashMoveRequest<I>::handle_reset_journal>(this); + auto req = librbd::journal::ResetRequest<I>::create( + m_io_ctx, m_image_id, librbd::Journal<>::IMAGE_CLIENT_ID, + librbd::Journal<>::LOCAL_MIRROR_UUID, context_wq, ctx); + req->send(); +} + +template <typename I> +void TrashMoveRequest<I>::handle_reset_journal(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0 && r != -ENOENT) { + derr << "failed to reset journal: " << cpp_strerror(r) << dendl; + m_ret_val = r; + close_image(); + return; + } + + acquire_lock(); +} + +template <typename I> +void TrashMoveRequest<I>::acquire_lock() { + m_image_ctx->owner_lock.lock_shared(); + if (m_image_ctx->exclusive_lock == nullptr) { + m_image_ctx->owner_lock.unlock_shared(); + + if (m_mirror_image.mode == cls::rbd::MIRROR_IMAGE_MODE_SNAPSHOT) { + // snapshot-based mirroring doesn't require exclusive-lock + trash_move(); + } else { + derr << "exclusive lock feature not enabled" << dendl; + m_ret_val = -EINVAL; + close_image(); + } + return; + } + + dout(10) << dendl; + + Context *ctx = create_context_callback< + TrashMoveRequest<I>, &TrashMoveRequest<I>::handle_acquire_lock>(this); + m_image_ctx->exclusive_lock->block_requests(0); + m_image_ctx->exclusive_lock->acquire_lock(ctx); + m_image_ctx->owner_lock.unlock_shared(); +} + +template <typename I> +void TrashMoveRequest<I>::handle_acquire_lock(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to acquire exclusive lock: " << cpp_strerror(r) << dendl; + m_ret_val = r; + close_image(); + return; + } + + trash_move(); +} + +template <typename I> +void TrashMoveRequest<I>::trash_move() { + dout(10) << dendl; + + utime_t delete_time{ceph_clock_now()}; + utime_t deferment_end_time{delete_time}; + deferment_end_time += + m_image_ctx->config.template get_val<uint64_t>("rbd_mirroring_delete_delay"); + + m_trash_image_spec = { + cls::rbd::TRASH_IMAGE_SOURCE_MIRRORING, m_image_ctx->name, delete_time, + deferment_end_time}; + + Context *ctx = create_context_callback< + TrashMoveRequest<I>, &TrashMoveRequest<I>::handle_trash_move>(this); + auto req = librbd::trash::MoveRequest<I>::create( + m_io_ctx, m_image_id, m_trash_image_spec, ctx); + req->send(); +} + +template <typename I> +void TrashMoveRequest<I>::handle_trash_move(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to move image to trash: " << cpp_strerror(r) << dendl; + m_ret_val = r; + close_image(); + return; + } + + m_moved_to_trash = true; + remove_mirror_image(); +} + +template <typename I> +void TrashMoveRequest<I>::remove_mirror_image() { + dout(10) << dendl; + + auto ctx = create_context_callback< + TrashMoveRequest<I>, + &TrashMoveRequest<I>::handle_remove_mirror_image>(this); + auto req = librbd::mirror::ImageRemoveRequest<I>::create( + m_io_ctx, m_global_image_id, m_image_id, ctx); + req->send(); +} + +template <typename I> +void TrashMoveRequest<I>::handle_remove_mirror_image(int r) { + dout(10) << "r=" << r << dendl; + + if (r == -ENOENT) { + dout(10) << "local image is not mirrored" << dendl; + } else if (r < 0) { + derr << "failed to remove mirror image state for " << m_global_image_id + << ": " << cpp_strerror(r) << dendl; + m_ret_val = r; + } + + close_image(); +} + +template <typename I> +void TrashMoveRequest<I>::close_image() { + dout(10) << dendl; + + if (m_image_ctx == nullptr) { + handle_close_image(0); + return; + } + Context *ctx = create_context_callback< + TrashMoveRequest<I>, &TrashMoveRequest<I>::handle_close_image>(this); + m_image_ctx->state->close(ctx); +} + +template <typename I> +void TrashMoveRequest<I>::handle_close_image(int r) { + dout(10) << "r=" << r << dendl; + + m_image_ctx = nullptr; + + if (r < 0) { + derr << "failed to close image: " << cpp_strerror(r) << dendl; + } + + // don't send notification if we failed + if (!m_moved_to_trash) { + finish(0); + return; + } + + notify_trash_add(); +} + +template <typename I> +void TrashMoveRequest<I>::notify_trash_add() { + dout(10) << dendl; + + Context *ctx = create_context_callback< + TrashMoveRequest<I>, &TrashMoveRequest<I>::handle_notify_trash_add>(this); + librbd::TrashWatcher<I>::notify_image_added(m_io_ctx, m_image_id, + m_trash_image_spec, ctx); +} + +template <typename I> +void TrashMoveRequest<I>::handle_notify_trash_add(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to notify trash watchers: " << cpp_strerror(r) << dendl; + } + + finish(0); +} + +template <typename I> +void TrashMoveRequest<I>::finish(int r) { + if (m_ret_val < 0) { + r = m_ret_val; + } + + dout(10) << "r=" << r << dendl; + + m_on_finish->complete(r); + delete this; +} + +} // namespace image_deleter +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_deleter::TrashMoveRequest<librbd::ImageCtx>; + diff --git a/src/tools/rbd_mirror/image_deleter/TrashMoveRequest.h b/src/tools/rbd_mirror/image_deleter/TrashMoveRequest.h new file mode 100644 index 000000000..5b3f02519 --- /dev/null +++ b/src/tools/rbd_mirror/image_deleter/TrashMoveRequest.h @@ -0,0 +1,142 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_DELETE_TRASH_MOVE_REQUEST_H +#define CEPH_RBD_MIRROR_IMAGE_DELETE_TRASH_MOVE_REQUEST_H + +#include "include/buffer.h" +#include "include/rados/librados.hpp" +#include "cls/rbd/cls_rbd_types.h" +#include "librbd/mirror/Types.h" +#include <string> + +struct Context; +namespace librbd { +struct ImageCtx; +namespace asio { struct ContextWQ; } +} // namespace librbd + +namespace rbd { +namespace mirror { +namespace image_deleter { + +template <typename ImageCtxT = librbd::ImageCtx> +class TrashMoveRequest { +public: + static TrashMoveRequest* create(librados::IoCtx& io_ctx, + const std::string& global_image_id, + bool resync, + librbd::asio::ContextWQ* op_work_queue, + Context* on_finish) { + return new TrashMoveRequest(io_ctx, global_image_id, resync, op_work_queue, + on_finish); + } + + TrashMoveRequest(librados::IoCtx& io_ctx, const std::string& global_image_id, + bool resync, librbd::asio::ContextWQ* op_work_queue, + Context* on_finish) + : m_io_ctx(io_ctx), m_global_image_id(global_image_id), m_resync(resync), + m_op_work_queue(op_work_queue), m_on_finish(on_finish) { + } + + void send(); + +private: + /* + * @verbatim + * + * <start> + * | + * v + * GET_MIRROR_IMAGE_ID + * | + * v + * GET_MIRROR_INFO + * | + * v + * DISABLE_MIRROR_IMAGE + * | + * v + * OPEN_IMAGE + * | + * v (skip if not needed) + * RESET_JOURNAL + * | + * v (skip if not needed) + * ACQUIRE_LOCK + * | + * v + * TRASH_MOVE + * | + * v + * REMOVE_MIRROR_IMAGE + * | + * v + * CLOSE_IMAGE + * | + * v + * NOTIFY_TRASH_ADD + * | + * v + * <finish> + * + * @endverbatim + */ + + librados::IoCtx &m_io_ctx; + std::string m_global_image_id; + bool m_resync; + librbd::asio::ContextWQ *m_op_work_queue; + Context *m_on_finish; + + ceph::bufferlist m_out_bl; + std::string m_image_id; + cls::rbd::MirrorImage m_mirror_image; + librbd::mirror::PromotionState m_promotion_state; + std::string m_primary_mirror_uuid; + cls::rbd::TrashImageSpec m_trash_image_spec; + ImageCtxT *m_image_ctx = nullptr;; + int m_ret_val = 0; + bool m_moved_to_trash = false; + + void get_mirror_image_id(); + void handle_get_mirror_image_id(int r); + + void get_mirror_info(); + void handle_get_mirror_info(int r); + + void disable_mirror_image(); + void handle_disable_mirror_image(int r); + + void open_image(); + void handle_open_image(int r); + + void reset_journal(); + void handle_reset_journal(int r); + + void acquire_lock(); + void handle_acquire_lock(int r); + + void trash_move(); + void handle_trash_move(int r); + + void remove_mirror_image(); + void handle_remove_mirror_image(int r); + + void close_image(); + void handle_close_image(int r); + + void notify_trash_add(); + void handle_notify_trash_add(int r); + + void finish(int r); + +}; + +} // namespace image_deleter +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_deleter::TrashMoveRequest<librbd::ImageCtx>; + +#endif // CEPH_RBD_MIRROR_IMAGE_DELETE_TRASH_WATCHER_H diff --git a/src/tools/rbd_mirror/image_deleter/TrashRemoveRequest.cc b/src/tools/rbd_mirror/image_deleter/TrashRemoveRequest.cc new file mode 100644 index 000000000..4d7c1c9df --- /dev/null +++ b/src/tools/rbd_mirror/image_deleter/TrashRemoveRequest.cc @@ -0,0 +1,265 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/image_deleter/TrashRemoveRequest.h" +#include "include/ceph_assert.h" +#include "common/debug.h" +#include "common/errno.h" +#include "common/WorkQueue.h" +#include "cls/rbd/cls_rbd_client.h" +#include "librbd/ImageCtx.h" +#include "librbd/TrashWatcher.h" +#include "librbd/Utils.h" +#include "librbd/asio/ContextWQ.h" +#include "librbd/trash/RemoveRequest.h" +#include "tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_deleter::TrashRemoveRequest: " \ + << this << " " << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace image_deleter { + +using librbd::util::create_context_callback; +using librbd::util::create_rados_callback; + +template <typename I> +void TrashRemoveRequest<I>::send() { + *m_error_result = ERROR_RESULT_RETRY; + + get_trash_image_spec(); +} + +template <typename I> +void TrashRemoveRequest<I>::get_trash_image_spec() { + dout(10) << dendl; + + librados::ObjectReadOperation op; + librbd::cls_client::trash_get_start(&op, m_image_id); + + auto aio_comp = create_rados_callback< + TrashRemoveRequest<I>, + &TrashRemoveRequest<I>::handle_get_trash_image_spec>(this); + m_out_bl.clear(); + int r = m_io_ctx.aio_operate(RBD_TRASH, aio_comp, &op, &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void TrashRemoveRequest<I>::handle_get_trash_image_spec(int r) { + dout(10) << "r=" << r << dendl; + + if (r == 0) { + auto bl_it = m_out_bl.cbegin(); + r = librbd::cls_client::trash_get_finish(&bl_it, &m_trash_image_spec); + } + + if (r == -ENOENT || (r >= 0 && m_trash_image_spec.source != + cls::rbd::TRASH_IMAGE_SOURCE_MIRRORING)) { + dout(10) << "image id " << m_image_id << " not in mirroring trash" << dendl; + finish(0); + return; + } else if (r < 0) { + derr << "error getting image id " << m_image_id << " info from trash: " + << cpp_strerror(r) << dendl; + finish(r); + return; + } + + if (m_trash_image_spec.state != cls::rbd::TRASH_IMAGE_STATE_NORMAL && + m_trash_image_spec.state != cls::rbd::TRASH_IMAGE_STATE_REMOVING) { + dout(10) << "image " << m_image_id << " is not in an expected trash state: " + << m_trash_image_spec.state << dendl; + *m_error_result = ERROR_RESULT_RETRY_IMMEDIATELY; + finish(-EBUSY); + return; + } + + set_trash_state(); +} + +template <typename I> +void TrashRemoveRequest<I>::set_trash_state() { + if (m_trash_image_spec.state == cls::rbd::TRASH_IMAGE_STATE_REMOVING) { + get_snap_context(); + return; + } + + dout(10) << dendl; + + librados::ObjectWriteOperation op; + librbd::cls_client::trash_state_set(&op, m_image_id, + cls::rbd::TRASH_IMAGE_STATE_REMOVING, + cls::rbd::TRASH_IMAGE_STATE_NORMAL); + + auto aio_comp = create_rados_callback< + TrashRemoveRequest<I>, + &TrashRemoveRequest<I>::handle_set_trash_state>(this); + int r = m_io_ctx.aio_operate(RBD_TRASH, aio_comp, &op); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void TrashRemoveRequest<I>::handle_set_trash_state(int r) { + dout(10) << "r=" << r << dendl; + + if (r == -ENOENT) { + dout(10) << "image id " << m_image_id << " not in mirroring trash" << dendl; + finish(0); + return; + } else if (r < 0 && r != -EOPNOTSUPP) { + derr << "error setting trash image state for image id " << m_image_id + << ": " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + get_snap_context(); +} + +template <typename I> +void TrashRemoveRequest<I>::get_snap_context() { + dout(10) << dendl; + + librados::ObjectReadOperation op; + librbd::cls_client::get_snapcontext_start(&op); + + std::string header_oid = librbd::util::header_name(m_image_id); + + auto aio_comp = create_rados_callback< + TrashRemoveRequest<I>, + &TrashRemoveRequest<I>::handle_get_snap_context>(this); + m_out_bl.clear(); + int r = m_io_ctx.aio_operate(header_oid, aio_comp, &op, &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void TrashRemoveRequest<I>::handle_get_snap_context(int r) { + dout(10) << "r=" << r << dendl; + + ::SnapContext snapc; + if (r == 0) { + auto bl_it = m_out_bl.cbegin(); + r = librbd::cls_client::get_snapcontext_finish(&bl_it, &snapc); + } + if (r < 0 && r != -ENOENT) { + derr << "error retrieving snapshot context for image " + << m_image_id << ": " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + m_has_snapshots = (!snapc.empty()); + purge_snapshots(); +} + +template <typename I> +void TrashRemoveRequest<I>::purge_snapshots() { + if (!m_has_snapshots) { + remove_image(); + return; + } + + dout(10) << dendl; + auto ctx = create_context_callback< + TrashRemoveRequest<I>, + &TrashRemoveRequest<I>::handle_purge_snapshots>(this); + auto req = SnapshotPurgeRequest<I>::create(m_io_ctx, m_image_id, ctx); + req->send(); +} + +template <typename I> +void TrashRemoveRequest<I>::handle_purge_snapshots(int r) { + dout(10) << "r=" << r << dendl; + + if (r == -EBUSY) { + dout(10) << "snapshots still in-use" << dendl; + *m_error_result = ERROR_RESULT_RETRY_IMMEDIATELY; + finish(r); + return; + } else if (r < 0) { + derr << "failed to purge image snapshots: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + remove_image(); +} + +template <typename I> +void TrashRemoveRequest<I>::remove_image() { + dout(10) << dendl; + + auto ctx = create_context_callback< + TrashRemoveRequest<I>, + &TrashRemoveRequest<I>::handle_remove_image>(this); + auto req = librbd::trash::RemoveRequest<I>::create( + m_io_ctx, m_image_id, m_op_work_queue, true, m_progress_ctx, + ctx); + req->send(); +} + +template <typename I> +void TrashRemoveRequest<I>::handle_remove_image(int r) { + dout(10) << "r=" << r << dendl; + if (r == -ENOTEMPTY) { + // image must have clone v2 snapshot still associated to child + dout(10) << "snapshots still in-use" << dendl; + *m_error_result = ERROR_RESULT_RETRY_IMMEDIATELY; + finish(-EBUSY); + return; + } + + if (r < 0 && r != -ENOENT) { + derr << "error removing image " << m_image_id << " " + << "(" << m_image_id << ") from local pool: " + << cpp_strerror(r) << dendl; + finish(r); + return; + } + + notify_trash_removed(); +} + +template <typename I> +void TrashRemoveRequest<I>::notify_trash_removed() { + dout(10) << dendl; + + Context *ctx = create_context_callback< + TrashRemoveRequest<I>, + &TrashRemoveRequest<I>::handle_notify_trash_removed>(this); + librbd::TrashWatcher<I>::notify_image_removed(m_io_ctx, m_image_id, ctx); +} + +template <typename I> +void TrashRemoveRequest<I>::handle_notify_trash_removed(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to notify trash watchers: " << cpp_strerror(r) << dendl; + } + + finish(0); +} + +template <typename I> +void TrashRemoveRequest<I>::finish(int r) { + dout(10) << "r=" << r << dendl; + + m_on_finish->complete(r); + delete this; +} + +} // namespace image_deleter +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_deleter::TrashRemoveRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_deleter/TrashRemoveRequest.h b/src/tools/rbd_mirror/image_deleter/TrashRemoveRequest.h new file mode 100644 index 000000000..b99736b33 --- /dev/null +++ b/src/tools/rbd_mirror/image_deleter/TrashRemoveRequest.h @@ -0,0 +1,117 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_DELETER_TRASH_REMOVE_REQUEST_H +#define CEPH_RBD_MIRROR_IMAGE_DELETER_TRASH_REMOVE_REQUEST_H + +#include "include/rados/librados.hpp" +#include "include/buffer.h" +#include "cls/rbd/cls_rbd_types.h" +#include "librbd/internal.h" +#include "tools/rbd_mirror/image_deleter/Types.h" +#include <string> +#include <vector> + +class Context; +class ContextWQ; +namespace librbd { +struct ImageCtx; +namespace asio { struct ContextWQ; } +} // namespace librbd + +namespace rbd { +namespace mirror { +namespace image_deleter { + +template <typename ImageCtxT = librbd::ImageCtx> +class TrashRemoveRequest { +public: + static TrashRemoveRequest* create(librados::IoCtx &io_ctx, + const std::string &image_id, + ErrorResult *error_result, + librbd::asio::ContextWQ *op_work_queue, + Context *on_finish) { + return new TrashRemoveRequest(io_ctx, image_id, error_result, op_work_queue, + on_finish); + } + + TrashRemoveRequest(librados::IoCtx &io_ctx, const std::string &image_id, + ErrorResult *error_result, + librbd::asio::ContextWQ *op_work_queue, + Context *on_finish) + : m_io_ctx(io_ctx), m_image_id(image_id), m_error_result(error_result), + m_op_work_queue(op_work_queue), m_on_finish(on_finish) { + } + + void send(); + +private: + /* + * @verbatim + * + * <start> + * | + * v + * GET_TRASH_IMAGE_SPEC + * | + * v + * SET_TRASH_STATE + * | + * v + * GET_SNAP_CONTEXT + * | + * v + * PURGE_SNAPSHOTS + * | + * v + * TRASH_REMOVE + * | + * v + * NOTIFY_TRASH_REMOVE + * | + * v + * <finish> + * + * @endverbatim + */ + + librados::IoCtx &m_io_ctx; + std::string m_image_id; + ErrorResult *m_error_result; + librbd::asio::ContextWQ *m_op_work_queue; + Context *m_on_finish; + + ceph::bufferlist m_out_bl; + cls::rbd::TrashImageSpec m_trash_image_spec; + bool m_has_snapshots = false; + librbd::NoOpProgressContext m_progress_ctx; + + void get_trash_image_spec(); + void handle_get_trash_image_spec(int r); + + void set_trash_state(); + void handle_set_trash_state(int r); + + void get_snap_context(); + void handle_get_snap_context(int r); + + void purge_snapshots(); + void handle_purge_snapshots(int r); + + void remove_image(); + void handle_remove_image(int r); + + void notify_trash_removed(); + void handle_notify_trash_removed(int r); + + void finish(int r); + +}; + +} // namespace image_deleter +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_deleter::TrashRemoveRequest<librbd::ImageCtx>; + +#endif // CEPH_RBD_MIRROR_IMAGE_DELETER_TRASH_REMOVE_REQUEST_H diff --git a/src/tools/rbd_mirror/image_deleter/TrashWatcher.cc b/src/tools/rbd_mirror/image_deleter/TrashWatcher.cc new file mode 100644 index 000000000..552d77e0e --- /dev/null +++ b/src/tools/rbd_mirror/image_deleter/TrashWatcher.cc @@ -0,0 +1,384 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/image_deleter/TrashWatcher.h" +#include "include/rbd_types.h" +#include "cls/rbd/cls_rbd_client.h" +#include "common/debug.h" +#include "common/errno.h" +#include "common/Timer.h" +#include "librbd/ImageCtx.h" +#include "librbd/Utils.h" +#include "librbd/asio/ContextWQ.h" +#include "tools/rbd_mirror/Threads.h" +#include "tools/rbd_mirror/image_deleter/Types.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_deleter::TrashWatcher: " \ + << this << " " << __func__ << ": " + +using librbd::util::create_context_callback; +using librbd::util::create_rados_callback; + +namespace rbd { +namespace mirror { +namespace image_deleter { + +namespace { + +const size_t MAX_RETURN = 1024; + +} // anonymous namespace + +template <typename I> +TrashWatcher<I>::TrashWatcher(librados::IoCtx &io_ctx, Threads<I> *threads, + TrashListener& trash_listener) + : librbd::TrashWatcher<I>(io_ctx, threads->work_queue), + m_io_ctx(io_ctx), m_threads(threads), m_trash_listener(trash_listener), + m_lock(ceph::make_mutex(librbd::util::unique_lock_name( + "rbd::mirror::image_deleter::TrashWatcher", this))) { +} + +template <typename I> +void TrashWatcher<I>::init(Context *on_finish) { + dout(5) << dendl; + + { + std::lock_guard locker{m_lock}; + m_on_init_finish = on_finish; + + ceph_assert(!m_trash_list_in_progress); + m_trash_list_in_progress = true; + } + + create_trash(); +} + +template <typename I> +void TrashWatcher<I>::shut_down(Context *on_finish) { + dout(5) << dendl; + + { + std::scoped_lock locker{m_threads->timer_lock, m_lock}; + + ceph_assert(!m_shutting_down); + m_shutting_down = true; + if (m_timer_ctx != nullptr) { + m_threads->timer->cancel_event(m_timer_ctx); + m_timer_ctx = nullptr; + } + } + + auto ctx = new LambdaContext([this, on_finish](int r) { + unregister_watcher(on_finish); + }); + m_async_op_tracker.wait_for_ops(ctx); +} + +template <typename I> +void TrashWatcher<I>::handle_image_added(const std::string &image_id, + const cls::rbd::TrashImageSpec& spec) { + dout(10) << "image_id=" << image_id << dendl; + + std::lock_guard locker{m_lock}; + add_image(image_id, spec); +} + +template <typename I> +void TrashWatcher<I>::handle_image_removed(const std::string &image_id) { + // ignore removals -- the image deleter will ignore -ENOENTs +} + +template <typename I> +void TrashWatcher<I>::handle_rewatch_complete(int r) { + dout(5) << "r=" << r << dendl; + + if (r == -EBLOCKLISTED) { + dout(0) << "detected client is blocklisted" << dendl; + return; + } else if (r == -ENOENT) { + dout(5) << "trash directory deleted" << dendl; + } else if (r < 0) { + derr << "unexpected error re-registering trash directory watch: " + << cpp_strerror(r) << dendl; + } + schedule_trash_list(30); +} + +template <typename I> +void TrashWatcher<I>::create_trash() { + dout(20) << dendl; + { + std::lock_guard locker{m_lock}; + ceph_assert(m_trash_list_in_progress); + } + + librados::ObjectWriteOperation op; + op.create(false); + + m_async_op_tracker.start_op(); + auto aio_comp = create_rados_callback< + TrashWatcher<I>, &TrashWatcher<I>::handle_create_trash>(this); + int r = m_io_ctx.aio_operate(RBD_TRASH, aio_comp, &op); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void TrashWatcher<I>::handle_create_trash(int r) { + dout(20) << "r=" << r << dendl; + { + std::lock_guard locker{m_lock}; + ceph_assert(m_trash_list_in_progress); + } + + Context* on_init_finish = nullptr; + if (r == -EBLOCKLISTED || r == -ENOENT) { + if (r == -EBLOCKLISTED) { + dout(0) << "detected client is blocklisted" << dendl; + } else { + dout(0) << "detected pool no longer exists" << dendl; + } + + std::lock_guard locker{m_lock}; + std::swap(on_init_finish, m_on_init_finish); + m_trash_list_in_progress = false; + } else if (r < 0 && r != -EEXIST) { + derr << "failed to create trash object: " << cpp_strerror(r) << dendl; + { + std::lock_guard locker{m_lock}; + m_trash_list_in_progress = false; + } + + schedule_trash_list(30); + } else { + register_watcher(); + } + + m_async_op_tracker.finish_op(); + if (on_init_finish != nullptr) { + on_init_finish->complete(r); + } +} + +template <typename I> +void TrashWatcher<I>::register_watcher() { + { + std::lock_guard locker{m_lock}; + ceph_assert(m_trash_list_in_progress); + } + + // if the watch registration is in-flight, let the watcher + // handle the transition -- only (re-)register if it's not registered + if (!this->is_unregistered()) { + trash_list(true); + return; + } + + // first time registering or the watch failed + dout(5) << dendl; + m_async_op_tracker.start_op(); + + Context *ctx = create_context_callback< + TrashWatcher, &TrashWatcher<I>::handle_register_watcher>(this); + this->register_watch(ctx); +} + +template <typename I> +void TrashWatcher<I>::handle_register_watcher(int r) { + dout(5) << "r=" << r << dendl; + + { + std::lock_guard locker{m_lock}; + ceph_assert(m_trash_list_in_progress); + if (r < 0) { + m_trash_list_in_progress = false; + } + } + + Context *on_init_finish = nullptr; + if (r >= 0) { + trash_list(true); + } else if (r == -EBLOCKLISTED) { + dout(0) << "detected client is blocklisted" << dendl; + + std::lock_guard locker{m_lock}; + std::swap(on_init_finish, m_on_init_finish); + } else { + derr << "unexpected error registering trash directory watch: " + << cpp_strerror(r) << dendl; + schedule_trash_list(10); + } + + m_async_op_tracker.finish_op(); + if (on_init_finish != nullptr) { + on_init_finish->complete(r); + } +} + +template <typename I> +void TrashWatcher<I>::unregister_watcher(Context* on_finish) { + dout(5) << dendl; + + m_async_op_tracker.start_op(); + Context *ctx = new LambdaContext([this, on_finish](int r) { + handle_unregister_watcher(r, on_finish); + }); + this->unregister_watch(ctx); +} + +template <typename I> +void TrashWatcher<I>::handle_unregister_watcher(int r, Context* on_finish) { + dout(5) << "unregister_watcher: r=" << r << dendl; + if (r < 0) { + derr << "error unregistering watcher for trash directory: " + << cpp_strerror(r) << dendl; + } + m_async_op_tracker.finish_op(); + on_finish->complete(0); +} + +template <typename I> +void TrashWatcher<I>::trash_list(bool initial_request) { + if (initial_request) { + m_async_op_tracker.start_op(); + m_last_image_id = ""; + } + + dout(5) << "last_image_id=" << m_last_image_id << dendl; + + { + std::lock_guard locker{m_lock}; + ceph_assert(m_trash_list_in_progress); + } + + librados::ObjectReadOperation op; + librbd::cls_client::trash_list_start(&op, m_last_image_id, MAX_RETURN); + + librados::AioCompletion *aio_comp = create_rados_callback< + TrashWatcher<I>, &TrashWatcher<I>::handle_trash_list>(this); + m_out_bl.clear(); + int r = m_io_ctx.aio_operate(RBD_TRASH, aio_comp, &op, &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void TrashWatcher<I>::handle_trash_list(int r) { + dout(5) << "r=" << r << dendl; + + std::map<std::string, cls::rbd::TrashImageSpec> images; + if (r >= 0) { + auto bl_it = m_out_bl.cbegin(); + r = librbd::cls_client::trash_list_finish(&bl_it, &images); + } + + Context *on_init_finish = nullptr; + { + std::lock_guard locker{m_lock}; + ceph_assert(m_trash_list_in_progress); + if (r >= 0) { + for (auto& image : images) { + add_image(image.first, image.second); + } + } else if (r == -ENOENT) { + r = 0; + } + + if (r == -EBLOCKLISTED) { + dout(0) << "detected client is blocklisted during trash refresh" << dendl; + m_trash_list_in_progress = false; + std::swap(on_init_finish, m_on_init_finish); + } else if (r >= 0 && images.size() < MAX_RETURN) { + m_trash_list_in_progress = false; + std::swap(on_init_finish, m_on_init_finish); + } else if (r < 0) { + m_trash_list_in_progress = false; + } + } + + if (r >= 0 && images.size() == MAX_RETURN) { + m_last_image_id = images.rbegin()->first; + trash_list(false); + return; + } else if (r < 0 && r != -EBLOCKLISTED) { + derr << "failed to retrieve trash directory: " << cpp_strerror(r) << dendl; + schedule_trash_list(10); + } + + m_async_op_tracker.finish_op(); + if (on_init_finish != nullptr) { + on_init_finish->complete(r); + } +} + +template <typename I> +void TrashWatcher<I>::schedule_trash_list(double interval) { + std::scoped_lock locker{m_threads->timer_lock, m_lock}; + if (m_shutting_down || m_trash_list_in_progress || m_timer_ctx != nullptr) { + if (m_trash_list_in_progress && !m_deferred_trash_list) { + dout(5) << "deferring refresh until in-flight refresh completes" << dendl; + m_deferred_trash_list = true; + } + return; + } + + dout(5) << dendl; + m_timer_ctx = m_threads->timer->add_event_after( + interval, + new LambdaContext([this](int r) { + process_trash_list(); + })); +} + +template <typename I> +void TrashWatcher<I>::process_trash_list() { + dout(5) << dendl; + + ceph_assert(ceph_mutex_is_locked(m_threads->timer_lock)); + ceph_assert(m_timer_ctx != nullptr); + m_timer_ctx = nullptr; + + { + std::lock_guard locker{m_lock}; + ceph_assert(!m_trash_list_in_progress); + m_trash_list_in_progress = true; + } + + // execute outside of the timer's lock + m_async_op_tracker.start_op(); + Context *ctx = new LambdaContext([this](int r) { + create_trash(); + m_async_op_tracker.finish_op(); + }); + m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +void TrashWatcher<I>::add_image(const std::string& image_id, + const cls::rbd::TrashImageSpec& spec) { + if (spec.source != cls::rbd::TRASH_IMAGE_SOURCE_MIRRORING) { + return; + } + + ceph_assert(ceph_mutex_is_locked(m_lock)); + auto& deferment_end_time = spec.deferment_end_time; + dout(10) << "image_id=" << image_id << ", " + << "deferment_end_time=" << deferment_end_time << dendl; + + m_async_op_tracker.start_op(); + auto ctx = new LambdaContext([this, image_id, deferment_end_time](int r) { + m_trash_listener.handle_trash_image(image_id, + deferment_end_time.to_real_time()); + m_async_op_tracker.finish_op(); + }); + m_threads->work_queue->queue(ctx, 0); +} + +} // namespace image_deleter; +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_deleter::TrashWatcher<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_deleter/TrashWatcher.h b/src/tools/rbd_mirror/image_deleter/TrashWatcher.h new file mode 100644 index 000000000..e818a102c --- /dev/null +++ b/src/tools/rbd_mirror/image_deleter/TrashWatcher.h @@ -0,0 +1,139 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_DELETE_TRASH_WATCHER_H +#define CEPH_RBD_MIRROR_IMAGE_DELETE_TRASH_WATCHER_H + +#include "include/rados/librados.hpp" +#include "common/AsyncOpTracker.h" +#include "common/ceph_mutex.h" +#include "librbd/TrashWatcher.h" +#include <set> +#include <string> + +struct Context; +namespace librbd { struct ImageCtx; } + +namespace rbd { +namespace mirror { + +template <typename> struct Threads; + +namespace image_deleter { + +struct TrashListener; + +template <typename ImageCtxT = librbd::ImageCtx> +class TrashWatcher : public librbd::TrashWatcher<ImageCtxT> { +public: + static TrashWatcher* create(librados::IoCtx &io_ctx, + Threads<ImageCtxT> *threads, + TrashListener& trash_listener) { + return new TrashWatcher(io_ctx, threads, trash_listener); + } + + TrashWatcher(librados::IoCtx &io_ctx, Threads<ImageCtxT> *threads, + TrashListener& trash_listener); + TrashWatcher(const TrashWatcher&) = delete; + TrashWatcher& operator=(const TrashWatcher&) = delete; + + void init(Context *on_finish); + void shut_down(Context *on_finish); + +protected: + void handle_image_added(const std::string &image_id, + const cls::rbd::TrashImageSpec& spec) override; + + void handle_image_removed(const std::string &image_id) override; + + void handle_rewatch_complete(int r) override; + +private: + /** + * @verbatim + * + * <start> + * | + * v + * INIT + * | + * v + * CREATE_TRASH + * | + * v + * REGISTER_WATCHER + * | + * |/--------------------------------\ + * | | + * |/---------\ | + * | | | + * v | (more images) | + * TRASH_LIST ---/ | + * | | + * |/----------------------------\ | + * | | | + * v | | + * <idle> --\ | | + * | | | | + * | |\---> IMAGE_ADDED -----/ | + * | | | + * | \----> WATCH_ERROR ---------/ + * v + * SHUT_DOWN + * | + * v + * UNREGISTER_WATCHER + * | + * v + * <finish> + * + * @endverbatim + */ + + librados::IoCtx m_io_ctx; + Threads<ImageCtxT> *m_threads; + TrashListener& m_trash_listener; + + std::string m_last_image_id; + bufferlist m_out_bl; + + mutable ceph::mutex m_lock; + + Context *m_on_init_finish = nullptr; + Context *m_timer_ctx = nullptr; + + AsyncOpTracker m_async_op_tracker; + bool m_trash_list_in_progress = false; + bool m_deferred_trash_list = false; + bool m_shutting_down = false; + + void register_watcher(); + void handle_register_watcher(int r); + + void create_trash(); + void handle_create_trash(int r); + + void unregister_watcher(Context* on_finish); + void handle_unregister_watcher(int r, Context* on_finish); + + void trash_list(bool initial_request); + void handle_trash_list(int r); + + void schedule_trash_list(double interval); + void process_trash_list(); + + void get_mirror_uuid(); + void handle_get_mirror_uuid(int r); + + void add_image(const std::string& image_id, + const cls::rbd::TrashImageSpec& spec); + +}; + +} // namespace image_deleter +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_deleter::TrashWatcher<librbd::ImageCtx>; + +#endif // CEPH_RBD_MIRROR_IMAGE_DELETE_TRASH_WATCHER_H diff --git a/src/tools/rbd_mirror/image_deleter/Types.h b/src/tools/rbd_mirror/image_deleter/Types.h new file mode 100644 index 000000000..1c70b7e14 --- /dev/null +++ b/src/tools/rbd_mirror/image_deleter/Types.h @@ -0,0 +1,54 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_DELETER_TYPES_H +#define CEPH_RBD_MIRROR_IMAGE_DELETER_TYPES_H + +#include "include/Context.h" +#include "librbd/journal/Policy.h" +#include <string> + +struct utime_t; + +namespace rbd { +namespace mirror { +namespace image_deleter { + +enum ErrorResult { + ERROR_RESULT_COMPLETE, + ERROR_RESULT_RETRY, + ERROR_RESULT_RETRY_IMMEDIATELY +}; + +struct TrashListener { + TrashListener() { + } + TrashListener(const TrashListener&) = delete; + TrashListener& operator=(const TrashListener&) = delete; + + virtual ~TrashListener() { + } + + virtual void handle_trash_image(const std::string& image_id, + const ceph::real_clock::time_point& deferment_end_time) = 0; + +}; + +struct JournalPolicy : public librbd::journal::Policy { + bool append_disabled() const override { + return true; + } + bool journal_disabled() const override { + return true; + } + + void allocate_tag_on_lock(Context *on_finish) override { + on_finish->complete(0); + } +}; + +} // namespace image_deleter +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_IMAGE_DELETER_TYPES_H diff --git a/src/tools/rbd_mirror/image_map/LoadRequest.cc b/src/tools/rbd_mirror/image_map/LoadRequest.cc new file mode 100644 index 000000000..46564a160 --- /dev/null +++ b/src/tools/rbd_mirror/image_map/LoadRequest.cc @@ -0,0 +1,174 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "common/debug.h" +#include "common/errno.h" + +#include "librbd/Utils.h" +#include "include/rbd_types.h" +#include "cls/rbd/cls_rbd_client.h" + +#include "UpdateRequest.h" +#include "LoadRequest.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_map::LoadRequest: " \ + << this << " " << __func__ + +namespace rbd { +namespace mirror { +namespace image_map { + +static const uint32_t MAX_RETURN = 1024; + +using librbd::util::create_rados_callback; +using librbd::util::create_context_callback; + +template<typename I> +LoadRequest<I>::LoadRequest(librados::IoCtx &ioctx, + std::map<std::string, cls::rbd::MirrorImageMap> *image_mapping, + Context *on_finish) + : m_ioctx(ioctx), + m_image_mapping(image_mapping), + m_on_finish(on_finish) { +} + +template<typename I> +void LoadRequest<I>::send() { + dout(20) << dendl; + + image_map_list(); +} + +template<typename I> +void LoadRequest<I>::image_map_list() { + dout(20) << dendl; + + librados::ObjectReadOperation op; + librbd::cls_client::mirror_image_map_list_start(&op, m_start_after, MAX_RETURN); + + librados::AioCompletion *aio_comp = create_rados_callback< + LoadRequest, &LoadRequest::handle_image_map_list>(this); + + m_out_bl.clear(); + int r = m_ioctx.aio_operate(RBD_MIRROR_LEADER, aio_comp, &op, &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template<typename I> +void LoadRequest<I>::handle_image_map_list(int r) { + dout(20) << ": r=" << r << dendl; + + std::map<std::string, cls::rbd::MirrorImageMap> image_mapping; + if (r == 0) { + auto it = m_out_bl.cbegin(); + r = librbd::cls_client::mirror_image_map_list_finish(&it, &image_mapping); + } + + if (r < 0) { + derr << ": failed to get image map: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + m_image_mapping->insert(image_mapping.begin(), image_mapping.end()); + + if (image_mapping.size() == MAX_RETURN) { + m_start_after = image_mapping.rbegin()->first; + image_map_list(); + return; + } + + mirror_image_list(); +} + +template<typename I> +void LoadRequest<I>::mirror_image_list() { + dout(20) << dendl; + + librados::ObjectReadOperation op; + librbd::cls_client::mirror_image_list_start(&op, m_start_after, MAX_RETURN); + + m_out_bl.clear(); + librados::AioCompletion *aio_comp = create_rados_callback< + LoadRequest<I>, + &LoadRequest<I>::handle_mirror_image_list>(this); + int r = m_ioctx.aio_operate(RBD_MIRRORING, aio_comp, &op, &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template<typename I> +void LoadRequest<I>::handle_mirror_image_list(int r) { + dout(20) << ": r=" << r << dendl; + + std::map<std::string, std::string> ids; + if (r == 0) { + auto it = m_out_bl.cbegin(); + r = librbd::cls_client::mirror_image_list_finish(&it, &ids); + } + + if (r < 0 && r != -ENOENT) { + derr << "failed to list mirrored images: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + for (auto &id : ids) { + m_global_image_ids.emplace(id.second); + } + + if (ids.size() == MAX_RETURN) { + m_start_after = ids.rbegin()->first; + mirror_image_list(); + return; + } + + cleanup_image_map(); +} + +template<typename I> +void LoadRequest<I>::cleanup_image_map() { + dout(20) << dendl; + + std::set<std::string> map_removals; + + auto it = m_image_mapping->begin(); + while (it != m_image_mapping->end()) { + if (m_global_image_ids.count(it->first) > 0) { + ++it; + continue; + } + map_removals.emplace(it->first); + it = m_image_mapping->erase(it); + } + + if (map_removals.size() == 0) { + finish(0); + return; + } + + auto ctx = create_context_callback< + LoadRequest<I>, + &LoadRequest<I>::finish>(this); + image_map::UpdateRequest<I> *req = image_map::UpdateRequest<I>::create( + m_ioctx, {}, std::move(map_removals), ctx); + req->send(); +} + +template<typename I> +void LoadRequest<I>::finish(int r) { + dout(20) << ": r=" << r << dendl; + + m_on_finish->complete(r); + delete this; +} + +} // namespace image_map +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_map::LoadRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_map/LoadRequest.h b/src/tools/rbd_mirror/image_map/LoadRequest.h new file mode 100644 index 000000000..9b1be9685 --- /dev/null +++ b/src/tools/rbd_mirror/image_map/LoadRequest.h @@ -0,0 +1,77 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_MAP_LOAD_REQUEST_H +#define CEPH_RBD_MIRROR_IMAGE_MAP_LOAD_REQUEST_H + +#include "cls/rbd/cls_rbd_types.h" +#include "include/rados/librados.hpp" + +class Context; + +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { +namespace image_map { + +template<typename ImageCtxT = librbd::ImageCtx> +class LoadRequest { +public: + static LoadRequest *create(librados::IoCtx &ioctx, + std::map<std::string, cls::rbd::MirrorImageMap> *image_mapping, + Context *on_finish) { + return new LoadRequest(ioctx, image_mapping, on_finish); + } + + void send(); + +private: + /** + * @verbatim + * + * <start> + * | . . . . . . . . + * v v . MAX_RETURN + * IMAGE_MAP_LIST. . . . . . . + * | + * v + * MIRROR_IMAGE_LIST + * | + * v + * CLEANUP_IMAGE_MAP + * | + * v + * <finish> + * + * @endverbatim + */ + LoadRequest(librados::IoCtx &ioctx, + std::map<std::string, cls::rbd::MirrorImageMap> *image_mapping, + Context *on_finish); + + librados::IoCtx &m_ioctx; + std::map<std::string, cls::rbd::MirrorImageMap> *m_image_mapping; + Context *m_on_finish; + + std::set<std::string> m_global_image_ids; + + bufferlist m_out_bl; + std::string m_start_after; + + void image_map_list(); + void handle_image_map_list(int r); + + void mirror_image_list(); + void handle_mirror_image_list(int r); + + void cleanup_image_map(); + + void finish(int r); +}; + +} // namespace image_map +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_IMAGE_MAP_LOAD_REQUEST_H diff --git a/src/tools/rbd_mirror/image_map/Policy.cc b/src/tools/rbd_mirror/image_map/Policy.cc new file mode 100644 index 000000000..62fbd12dc --- /dev/null +++ b/src/tools/rbd_mirror/image_map/Policy.cc @@ -0,0 +1,407 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "common/debug.h" +#include "common/errno.h" + +#include "librbd/Utils.h" +#include "Policy.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_map::Policy: " << this \ + << " " << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace image_map { + +namespace { + +bool is_instance_action(ActionType action_type) { + switch (action_type) { + case ACTION_TYPE_ACQUIRE: + case ACTION_TYPE_RELEASE: + return true; + case ACTION_TYPE_NONE: + case ACTION_TYPE_MAP_UPDATE: + case ACTION_TYPE_MAP_REMOVE: + break; + } + return false; +} + +} // anonymous namespace + +using ::operator<<; +using librbd::util::unique_lock_name; + +Policy::Policy(librados::IoCtx &ioctx) + : m_ioctx(ioctx), + m_map_lock(ceph::make_shared_mutex( + unique_lock_name("rbd::mirror::image_map::Policy::m_map_lock", this))) { + + // map should at least have once instance + std::string instance_id = stringify(ioctx.get_instance_id()); + m_map.emplace(instance_id, std::set<std::string>{}); +} + +void Policy::init( + const std::map<std::string, cls::rbd::MirrorImageMap> &image_mapping) { + dout(20) << dendl; + + std::unique_lock map_lock{m_map_lock}; + for (auto& it : image_mapping) { + ceph_assert(!it.second.instance_id.empty()); + auto map_result = m_map[it.second.instance_id].emplace(it.first); + ceph_assert(map_result.second); + + auto image_state_result = m_image_states.emplace( + it.first, ImageState{it.second.instance_id, it.second.mapped_time}); + ceph_assert(image_state_result.second); + + // ensure we (re)send image acquire actions to the instance + auto& image_state = image_state_result.first->second; + auto start_action = set_state(&image_state, + StateTransition::STATE_INITIALIZING, false); + ceph_assert(start_action); + } +} + +LookupInfo Policy::lookup(const std::string &global_image_id) { + dout(20) << "global_image_id=" << global_image_id << dendl; + + std::shared_lock map_lock{m_map_lock}; + LookupInfo info; + + auto it = m_image_states.find(global_image_id); + if (it != m_image_states.end()) { + info.instance_id = it->second.instance_id; + info.mapped_time = it->second.mapped_time; + } + return info; +} + +bool Policy::add_image(const std::string &global_image_id) { + dout(5) << "global_image_id=" << global_image_id << dendl; + + std::unique_lock map_lock{m_map_lock}; + auto image_state_result = m_image_states.emplace(global_image_id, + ImageState{}); + auto& image_state = image_state_result.first->second; + if (image_state.state == StateTransition::STATE_INITIALIZING) { + // avoid duplicate acquire notifications upon leader startup + return false; + } + + return set_state(&image_state, StateTransition::STATE_ASSOCIATING, false); +} + +bool Policy::remove_image(const std::string &global_image_id) { + dout(5) << "global_image_id=" << global_image_id << dendl; + + std::unique_lock map_lock{m_map_lock}; + auto it = m_image_states.find(global_image_id); + if (it == m_image_states.end()) { + return false; + } + + auto& image_state = it->second; + return set_state(&image_state, StateTransition::STATE_DISSOCIATING, false); +} + +void Policy::add_instances(const InstanceIds &instance_ids, + GlobalImageIds* global_image_ids) { + dout(5) << "instance_ids=" << instance_ids << dendl; + + std::unique_lock map_lock{m_map_lock}; + for (auto& instance : instance_ids) { + ceph_assert(!instance.empty()); + m_map.emplace(instance, std::set<std::string>{}); + } + + // post-failover, remove any dead instances and re-shuffle their images + if (m_initial_update) { + dout(5) << "initial instance update" << dendl; + m_initial_update = false; + + std::set<std::string> alive_instances(instance_ids.begin(), + instance_ids.end()); + InstanceIds dead_instances; + for (auto& map_pair : m_map) { + if (alive_instances.find(map_pair.first) == alive_instances.end()) { + dead_instances.push_back(map_pair.first); + } + } + + if (!dead_instances.empty()) { + remove_instances(m_map_lock, dead_instances, global_image_ids); + } + } + + GlobalImageIds shuffle_global_image_ids; + do_shuffle_add_instances(m_map, m_image_states.size(), &shuffle_global_image_ids); + dout(5) << "shuffling global_image_ids=[" << shuffle_global_image_ids + << "]" << dendl; + for (auto& global_image_id : shuffle_global_image_ids) { + auto it = m_image_states.find(global_image_id); + ceph_assert(it != m_image_states.end()); + + auto& image_state = it->second; + if (set_state(&image_state, StateTransition::STATE_SHUFFLING, false)) { + global_image_ids->emplace(global_image_id); + } + } +} + +void Policy::remove_instances(const InstanceIds &instance_ids, + GlobalImageIds* global_image_ids) { + std::unique_lock map_lock{m_map_lock}; + remove_instances(m_map_lock, instance_ids, global_image_ids); +} + +void Policy::remove_instances(const ceph::shared_mutex& lock, + const InstanceIds &instance_ids, + GlobalImageIds* global_image_ids) { + ceph_assert(ceph_mutex_is_wlocked(m_map_lock)); + dout(5) << "instance_ids=" << instance_ids << dendl; + + for (auto& instance_id : instance_ids) { + auto map_it = m_map.find(instance_id); + if (map_it == m_map.end()) { + continue; + } + + auto& instance_global_image_ids = map_it->second; + if (instance_global_image_ids.empty()) { + m_map.erase(map_it); + continue; + } + + m_dead_instances.insert(instance_id); + dout(5) << "force shuffling: instance_id=" << instance_id << ", " + << "global_image_ids=[" << instance_global_image_ids << "]"<< dendl; + for (auto& global_image_id : instance_global_image_ids) { + auto it = m_image_states.find(global_image_id); + ceph_assert(it != m_image_states.end()); + + auto& image_state = it->second; + if (is_state_scheduled(image_state, + StateTransition::STATE_DISSOCIATING)) { + // don't shuffle images that no longer exist + continue; + } + + if (set_state(&image_state, StateTransition::STATE_SHUFFLING, true)) { + global_image_ids->emplace(global_image_id); + } + } + } +} + +ActionType Policy::start_action(const std::string &global_image_id) { + std::unique_lock map_lock{m_map_lock}; + + auto it = m_image_states.find(global_image_id); + ceph_assert(it != m_image_states.end()); + + auto& image_state = it->second; + auto& transition = image_state.transition; + ceph_assert(transition.action_type != ACTION_TYPE_NONE); + + dout(5) << "global_image_id=" << global_image_id << ", " + << "state=" << image_state.state << ", " + << "action_type=" << transition.action_type << dendl; + if (transition.start_policy_action) { + execute_policy_action(global_image_id, &image_state, + *transition.start_policy_action); + transition.start_policy_action = boost::none; + } + return transition.action_type; +} + +bool Policy::finish_action(const std::string &global_image_id, int r) { + std::unique_lock map_lock{m_map_lock}; + + auto it = m_image_states.find(global_image_id); + ceph_assert(it != m_image_states.end()); + + auto& image_state = it->second; + auto& transition = image_state.transition; + dout(5) << "global_image_id=" << global_image_id << ", " + << "state=" << image_state.state << ", " + << "action_type=" << transition.action_type << ", " + << "r=" << r << dendl; + + // retry on failure unless it's an RPC message to an instance that is dead + if (r < 0 && + (!is_instance_action(image_state.transition.action_type) || + image_state.instance_id == UNMAPPED_INSTANCE_ID || + m_dead_instances.find(image_state.instance_id) == + m_dead_instances.end())) { + return true; + } + + auto finish_policy_action = transition.finish_policy_action; + StateTransition::transit(image_state.state, &image_state.transition); + if (transition.finish_state) { + // in-progress state machine complete + ceph_assert(StateTransition::is_idle(*transition.finish_state)); + image_state.state = *transition.finish_state; + image_state.transition = {}; + } + + if (StateTransition::is_idle(image_state.state) && image_state.next_state) { + // advance to pending state machine + bool start_action = set_state(&image_state, *image_state.next_state, false); + ceph_assert(start_action); + } + + // image state may get purged in execute_policy_action() + bool pending_action = image_state.transition.action_type != ACTION_TYPE_NONE; + if (finish_policy_action) { + execute_policy_action(global_image_id, &image_state, *finish_policy_action); + } + + return pending_action; +} + +void Policy::execute_policy_action( + const std::string& global_image_id, ImageState* image_state, + StateTransition::PolicyAction policy_action) { + dout(5) << "global_image_id=" << global_image_id << ", " + << "policy_action=" << policy_action << dendl; + + switch (policy_action) { + case StateTransition::POLICY_ACTION_MAP: + map(global_image_id, image_state); + break; + case StateTransition::POLICY_ACTION_UNMAP: + unmap(global_image_id, image_state); + break; + case StateTransition::POLICY_ACTION_REMOVE: + if (image_state->state == StateTransition::STATE_UNASSOCIATED) { + ceph_assert(image_state->instance_id == UNMAPPED_INSTANCE_ID); + ceph_assert(!image_state->next_state); + m_image_states.erase(global_image_id); + } + break; + } +} + +void Policy::map(const std::string& global_image_id, ImageState* image_state) { + ceph_assert(ceph_mutex_is_wlocked(m_map_lock)); + + std::string instance_id = image_state->instance_id; + if (instance_id != UNMAPPED_INSTANCE_ID && !is_dead_instance(instance_id)) { + return; + } + if (is_dead_instance(instance_id)) { + unmap(global_image_id, image_state); + } + + instance_id = do_map(m_map, global_image_id); + ceph_assert(!instance_id.empty()); + dout(5) << "global_image_id=" << global_image_id << ", " + << "instance_id=" << instance_id << dendl; + + image_state->instance_id = instance_id; + image_state->mapped_time = ceph_clock_now(); + + auto ins = m_map[instance_id].emplace(global_image_id); + ceph_assert(ins.second); +} + +void Policy::unmap(const std::string &global_image_id, + ImageState* image_state) { + ceph_assert(ceph_mutex_is_wlocked(m_map_lock)); + + std::string instance_id = image_state->instance_id; + if (instance_id == UNMAPPED_INSTANCE_ID) { + return; + } + + dout(5) << "global_image_id=" << global_image_id << ", " + << "instance_id=" << instance_id << dendl; + + ceph_assert(!instance_id.empty()); + m_map[instance_id].erase(global_image_id); + image_state->instance_id = UNMAPPED_INSTANCE_ID; + image_state->mapped_time = {}; + + if (is_dead_instance(instance_id) && m_map[instance_id].empty()) { + dout(5) << "removing dead instance_id=" << instance_id << dendl; + m_map.erase(instance_id); + m_dead_instances.erase(instance_id); + } +} + +bool Policy::is_image_shuffling(const std::string &global_image_id) { + ceph_assert(ceph_mutex_is_locked(m_map_lock)); + + auto it = m_image_states.find(global_image_id); + ceph_assert(it != m_image_states.end()); + auto& image_state = it->second; + + // avoid attempting to re-shuffle a pending shuffle + auto result = is_state_scheduled(image_state, + StateTransition::STATE_SHUFFLING); + dout(20) << "global_image_id=" << global_image_id << ", " + << "result=" << result << dendl; + return result; +} + +bool Policy::can_shuffle_image(const std::string &global_image_id) { + ceph_assert(ceph_mutex_is_locked(m_map_lock)); + + CephContext *cct = reinterpret_cast<CephContext *>(m_ioctx.cct()); + int migration_throttle = cct->_conf.get_val<uint64_t>( + "rbd_mirror_image_policy_migration_throttle"); + + auto it = m_image_states.find(global_image_id); + ceph_assert(it != m_image_states.end()); + auto& image_state = it->second; + + utime_t last_shuffled_time = image_state.mapped_time; + + // idle images that haven't been recently remapped can shuffle + utime_t now = ceph_clock_now(); + auto result = (StateTransition::is_idle(image_state.state) && + ((migration_throttle <= 0) || + (now - last_shuffled_time >= migration_throttle))); + dout(10) << "global_image_id=" << global_image_id << ", " + << "migration_throttle=" << migration_throttle << ", " + << "last_shuffled_time=" << last_shuffled_time << ", " + << "result=" << result << dendl; + return result; +} + +bool Policy::set_state(ImageState* image_state, StateTransition::State state, + bool ignore_current_state) { + if (!ignore_current_state && image_state->state == state) { + image_state->next_state = boost::none; + return false; + } else if (StateTransition::is_idle(image_state->state)) { + image_state->state = state; + image_state->next_state = boost::none; + + StateTransition::transit(image_state->state, &image_state->transition); + ceph_assert(image_state->transition.action_type != ACTION_TYPE_NONE); + ceph_assert(!image_state->transition.finish_state); + return true; + } + + image_state->next_state = state; + return false; +} + +bool Policy::is_state_scheduled(const ImageState& image_state, + StateTransition::State state) const { + return (image_state.state == state || + (image_state.next_state && *image_state.next_state == state)); +} + +} // namespace image_map +} // namespace mirror +} // namespace rbd diff --git a/src/tools/rbd_mirror/image_map/Policy.h b/src/tools/rbd_mirror/image_map/Policy.h new file mode 100644 index 000000000..b256e2f1d --- /dev/null +++ b/src/tools/rbd_mirror/image_map/Policy.h @@ -0,0 +1,122 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_MAP_POLICY_H +#define CEPH_RBD_MIRROR_IMAGE_MAP_POLICY_H + +#include <map> +#include <tuple> +#include <boost/optional.hpp> + +#include "cls/rbd/cls_rbd_types.h" +#include "include/rados/librados.hpp" +#include "tools/rbd_mirror/image_map/StateTransition.h" +#include "tools/rbd_mirror/image_map/Types.h" + +class Context; + +namespace rbd { +namespace mirror { +namespace image_map { + +class Policy { +public: + Policy(librados::IoCtx &ioctx); + + virtual ~Policy() { + } + + // init -- called during initialization + void init( + const std::map<std::string, cls::rbd::MirrorImageMap> &image_mapping); + + // lookup an image from the map + LookupInfo lookup(const std::string &global_image_id); + + // add, remove + bool add_image(const std::string &global_image_id); + bool remove_image(const std::string &global_image_id); + + // shuffle images when instances are added/removed + void add_instances(const InstanceIds &instance_ids, + GlobalImageIds* global_image_ids); + void remove_instances(const InstanceIds &instance_ids, + GlobalImageIds* global_image_ids); + + ActionType start_action(const std::string &global_image_id); + bool finish_action(const std::string &global_image_id, int r); + +protected: + typedef std::map<std::string, std::set<std::string> > InstanceToImageMap; + + bool is_dead_instance(const std::string instance_id) { + ceph_assert(ceph_mutex_is_locked(m_map_lock)); + return m_dead_instances.find(instance_id) != m_dead_instances.end(); + } + + bool is_image_shuffling(const std::string &global_image_id); + bool can_shuffle_image(const std::string &global_image_id); + + // map an image (global image id) to an instance + virtual std::string do_map(const InstanceToImageMap& map, + const std::string &global_image_id) = 0; + + // shuffle images when instances are added/removed + virtual void do_shuffle_add_instances( + const InstanceToImageMap& map, size_t image_count, + std::set<std::string> *remap_global_image_ids) = 0; + +private: + struct ImageState { + std::string instance_id = UNMAPPED_INSTANCE_ID; + utime_t mapped_time; + + ImageState() {} + ImageState(const std::string& instance_id, const utime_t& mapped_time) + : instance_id(instance_id), mapped_time(mapped_time) { + } + + // active state and action + StateTransition::State state = StateTransition::STATE_UNASSOCIATED; + StateTransition::Transition transition; + + // next scheduled state + boost::optional<StateTransition::State> next_state = boost::none; + }; + + typedef std::map<std::string, ImageState> ImageStates; + + librados::IoCtx &m_ioctx; + + ceph::shared_mutex m_map_lock; // protects m_map + InstanceToImageMap m_map; // instance_id -> global_id map + + ImageStates m_image_states; + std::set<std::string> m_dead_instances; + + bool m_initial_update = true; + + void remove_instances(const ceph::shared_mutex& lock, + const InstanceIds &instance_ids, + GlobalImageIds* global_image_ids); + + bool set_state(ImageState* image_state, StateTransition::State state, + bool ignore_current_state); + + void execute_policy_action(const std::string& global_image_id, + ImageState* image_state, + StateTransition::PolicyAction policy_action); + + void map(const std::string& global_image_id, ImageState* image_state); + void unmap(const std::string &global_image_id, ImageState* image_state); + + bool is_state_scheduled(const ImageState& image_state, + StateTransition::State state) const; + +}; + +} // namespace image_map +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_IMAGE_MAP_POLICY_H diff --git a/src/tools/rbd_mirror/image_map/SimplePolicy.cc b/src/tools/rbd_mirror/image_map/SimplePolicy.cc new file mode 100644 index 000000000..f26805819 --- /dev/null +++ b/src/tools/rbd_mirror/image_map/SimplePolicy.cc @@ -0,0 +1,89 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "common/debug.h" +#include "common/errno.h" + +#include "SimplePolicy.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_map::SimplePolicy: " << this \ + << " " << __func__ << ": " +namespace rbd { +namespace mirror { +namespace image_map { + +SimplePolicy::SimplePolicy(librados::IoCtx &ioctx) + : Policy(ioctx) { +} + +size_t SimplePolicy::calc_images_per_instance(const InstanceToImageMap& map, + size_t image_count) { + size_t nr_instances = 0; + for (auto const &it : map) { + if (!Policy::is_dead_instance(it.first)) { + ++nr_instances; + } + } + ceph_assert(nr_instances > 0); + + size_t images_per_instance = image_count / nr_instances; + if (images_per_instance == 0) { + ++images_per_instance; + } + + return images_per_instance; +} + +void SimplePolicy::do_shuffle_add_instances( + const InstanceToImageMap& map, size_t image_count, + std::set<std::string> *remap_global_image_ids) { + uint64_t images_per_instance = calc_images_per_instance(map, image_count); + dout(5) << "images per instance=" << images_per_instance << dendl; + + for (auto const &instance : map) { + if (instance.second.size() <= images_per_instance) { + continue; + } + + auto it = instance.second.begin(); + uint64_t cut_off = instance.second.size() - images_per_instance; + + while (it != instance.second.end() && cut_off > 0) { + if (Policy::is_image_shuffling(*it)) { + --cut_off; + } else if (Policy::can_shuffle_image(*it)) { + --cut_off; + remap_global_image_ids->emplace(*it); + } + + ++it; + } + } +} + +std::string SimplePolicy::do_map(const InstanceToImageMap& map, + const std::string &global_image_id) { + auto min_it = map.end(); + for (auto it = map.begin(); it != map.end(); ++it) { + ceph_assert(it->second.find(global_image_id) == it->second.end()); + if (Policy::is_dead_instance(it->first)) { + continue; + } else if (min_it == map.end()) { + min_it = it; + } else if (it->second.size() < min_it->second.size()) { + min_it = it; + } + } + + ceph_assert(min_it != map.end()); + dout(20) << "global_image_id=" << global_image_id << " maps to instance_id=" + << min_it->first << dendl; + return min_it->first; +} + +} // namespace image_map +} // namespace mirror +} // namespace rbd diff --git a/src/tools/rbd_mirror/image_map/SimplePolicy.h b/src/tools/rbd_mirror/image_map/SimplePolicy.h new file mode 100644 index 000000000..ad2071b2c --- /dev/null +++ b/src/tools/rbd_mirror/image_map/SimplePolicy.h @@ -0,0 +1,39 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_MAP_SIMPLE_POLICY_H +#define CEPH_RBD_MIRROR_IMAGE_MAP_SIMPLE_POLICY_H + +#include "Policy.h" + +namespace rbd { +namespace mirror { +namespace image_map { + +class SimplePolicy : public Policy { +public: + static SimplePolicy *create(librados::IoCtx &ioctx) { + return new SimplePolicy(ioctx); + } + +protected: + SimplePolicy(librados::IoCtx &ioctx); + + std::string do_map(const InstanceToImageMap& map, + const std::string &global_image_id) override; + + void do_shuffle_add_instances( + const InstanceToImageMap& map, size_t image_count, + std::set<std::string> *remap_global_image_ids) override; + +private: + size_t calc_images_per_instance(const InstanceToImageMap& map, + size_t image_count); + +}; + +} // namespace image_map +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_IMAGE_MAP_SIMPLE_POLICY_H diff --git a/src/tools/rbd_mirror/image_map/StateTransition.cc b/src/tools/rbd_mirror/image_map/StateTransition.cc new file mode 100644 index 000000000..ec5f07ff9 --- /dev/null +++ b/src/tools/rbd_mirror/image_map/StateTransition.cc @@ -0,0 +1,94 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include <ostream> +#include "include/ceph_assert.h" +#include "StateTransition.h" + +namespace rbd { +namespace mirror { +namespace image_map { + +std::ostream &operator<<(std::ostream &os, + const StateTransition::State &state) { + switch(state) { + case StateTransition::STATE_INITIALIZING: + os << "INITIALIZING"; + break; + case StateTransition::STATE_ASSOCIATING: + os << "ASSOCIATING"; + break; + case StateTransition::STATE_ASSOCIATED: + os << "ASSOCIATED"; + break; + case StateTransition::STATE_SHUFFLING: + os << "SHUFFLING"; + break; + case StateTransition::STATE_DISSOCIATING: + os << "DISSOCIATING"; + break; + case StateTransition::STATE_UNASSOCIATED: + os << "UNASSOCIATED"; + break; + } + return os; +} + +std::ostream &operator<<(std::ostream &os, + const StateTransition::PolicyAction &policy_action) { + switch(policy_action) { + case StateTransition::POLICY_ACTION_MAP: + os << "MAP"; + break; + case StateTransition::POLICY_ACTION_UNMAP: + os << "UNMAP"; + break; + case StateTransition::POLICY_ACTION_REMOVE: + os << "REMOVE"; + break; + } + return os; +} + +const StateTransition::TransitionTable StateTransition::s_transition_table { + // state current_action Transition + // --------------------------------------------------------------------------- + {{STATE_INITIALIZING, ACTION_TYPE_NONE}, {ACTION_TYPE_ACQUIRE, {}, {}, + {}}}, + {{STATE_INITIALIZING, ACTION_TYPE_ACQUIRE}, {ACTION_TYPE_NONE, {}, {}, + {STATE_ASSOCIATED}}}, + + {{STATE_ASSOCIATING, ACTION_TYPE_NONE}, {ACTION_TYPE_MAP_UPDATE, + {POLICY_ACTION_MAP}, {}, {}}}, + {{STATE_ASSOCIATING, ACTION_TYPE_MAP_UPDATE}, {ACTION_TYPE_ACQUIRE, {}, {}, + {}}}, + {{STATE_ASSOCIATING, ACTION_TYPE_ACQUIRE}, {ACTION_TYPE_NONE, {}, {}, + {STATE_ASSOCIATED}}}, + + {{STATE_DISSOCIATING, ACTION_TYPE_NONE}, {ACTION_TYPE_RELEASE, {}, + {POLICY_ACTION_UNMAP}, {}}}, + {{STATE_DISSOCIATING, ACTION_TYPE_RELEASE}, {ACTION_TYPE_MAP_REMOVE, {}, + {POLICY_ACTION_REMOVE}, {}}}, + {{STATE_DISSOCIATING, ACTION_TYPE_MAP_REMOVE}, {ACTION_TYPE_NONE, {}, + {}, {STATE_UNASSOCIATED}}}, + + {{STATE_SHUFFLING, ACTION_TYPE_NONE}, {ACTION_TYPE_RELEASE, {}, + {POLICY_ACTION_UNMAP}, {}}}, + {{STATE_SHUFFLING, ACTION_TYPE_RELEASE}, {ACTION_TYPE_MAP_UPDATE, + {POLICY_ACTION_MAP}, {}, {}}}, + {{STATE_SHUFFLING, ACTION_TYPE_MAP_UPDATE}, {ACTION_TYPE_ACQUIRE, {}, {}, + {}}}, + {{STATE_SHUFFLING, ACTION_TYPE_ACQUIRE}, {ACTION_TYPE_NONE, {}, {}, + {STATE_ASSOCIATED}}} +}; + +void StateTransition::transit(State state, Transition* transition) { + auto it = s_transition_table.find({state, transition->action_type}); + ceph_assert(it != s_transition_table.end()); + + *transition = it->second; +} + +} // namespace image_map +} // namespace mirror +} // namespace rbd diff --git a/src/tools/rbd_mirror/image_map/StateTransition.h b/src/tools/rbd_mirror/image_map/StateTransition.h new file mode 100644 index 000000000..02a5ce4e9 --- /dev/null +++ b/src/tools/rbd_mirror/image_map/StateTransition.h @@ -0,0 +1,76 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_MAP_STATE_TRANSITION_H +#define CEPH_RBD_MIRROR_IMAGE_MAP_STATE_TRANSITION_H + +#include "tools/rbd_mirror/image_map/Types.h" +#include <boost/optional.hpp> +#include <map> + +namespace rbd { +namespace mirror { +namespace image_map { + +class StateTransition { +public: + enum State { + STATE_UNASSOCIATED, + STATE_INITIALIZING, + STATE_ASSOCIATING, + STATE_ASSOCIATED, + STATE_SHUFFLING, + STATE_DISSOCIATING + }; + + enum PolicyAction { + POLICY_ACTION_MAP, + POLICY_ACTION_UNMAP, + POLICY_ACTION_REMOVE + }; + + struct Transition { + // image map action + ActionType action_type = ACTION_TYPE_NONE; + + // policy internal action + boost::optional<PolicyAction> start_policy_action; + boost::optional<PolicyAction> finish_policy_action; + + // state machine complete + boost::optional<State> finish_state; + + Transition() { + } + Transition(ActionType action_type, + const boost::optional<PolicyAction>& start_policy_action, + const boost::optional<PolicyAction>& finish_policy_action, + const boost::optional<State>& finish_state) + : action_type(action_type), start_policy_action(start_policy_action), + finish_policy_action(finish_policy_action), finish_state(finish_state) { + } + }; + + static bool is_idle(State state) { + return (state == STATE_UNASSOCIATED || state == STATE_ASSOCIATED); + } + + static void transit(State state, Transition* transition); + +private: + typedef std::pair<State, ActionType> TransitionKey; + typedef std::map<TransitionKey, Transition> TransitionTable; + + // image transition table + static const TransitionTable s_transition_table; +}; + +std::ostream &operator<<(std::ostream &os, const StateTransition::State &state); +std::ostream &operator<<(std::ostream &os, + const StateTransition::PolicyAction &policy_action); + +} // namespace image_map +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_IMAGE_MAP_STATE_TRANSITION_H diff --git a/src/tools/rbd_mirror/image_map/Types.cc b/src/tools/rbd_mirror/image_map/Types.cc new file mode 100644 index 000000000..47de9c3cf --- /dev/null +++ b/src/tools/rbd_mirror/image_map/Types.cc @@ -0,0 +1,138 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "Types.h" +#include "include/ceph_assert.h" +#include "include/stringify.h" +#include "common/Formatter.h" +#include <iostream> + +namespace rbd { +namespace mirror { +namespace image_map { + +const std::string UNMAPPED_INSTANCE_ID(""); + +namespace { + +template <typename E> +class GetTypeVisitor : public boost::static_visitor<E> { +public: + template <typename T> + inline E operator()(const T&) const { + return T::TYPE; + } +}; + +class EncodeVisitor : public boost::static_visitor<void> { +public: + explicit EncodeVisitor(bufferlist &bl) : m_bl(bl) { + } + + template <typename T> + inline void operator()(const T& t) const { + using ceph::encode; + encode(static_cast<uint32_t>(T::TYPE), m_bl); + t.encode(m_bl); + } +private: + bufferlist &m_bl; +}; + +class DecodeVisitor : public boost::static_visitor<void> { +public: + DecodeVisitor(__u8 version, bufferlist::const_iterator &iter) + : m_version(version), m_iter(iter) { + } + + template <typename T> + inline void operator()(T& t) const { + t.decode(m_version, m_iter); + } +private: + __u8 m_version; + bufferlist::const_iterator &m_iter; +}; + +class DumpVisitor : public boost::static_visitor<void> { +public: + explicit DumpVisitor(Formatter *formatter, const std::string &key) + : m_formatter(formatter), m_key(key) {} + + template <typename T> + inline void operator()(const T& t) const { + auto type = T::TYPE; + m_formatter->dump_string(m_key.c_str(), stringify(type)); + t.dump(m_formatter); + } +private: + ceph::Formatter *m_formatter; + std::string m_key; +}; + +} // anonymous namespace + +PolicyMetaType PolicyData::get_policy_meta_type() const { + return boost::apply_visitor(GetTypeVisitor<PolicyMetaType>(), policy_meta); +} + +void PolicyData::encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + boost::apply_visitor(EncodeVisitor(bl), policy_meta); + ENCODE_FINISH(bl); +} + +void PolicyData::decode(bufferlist::const_iterator& it) { + DECODE_START(1, it); + + uint32_t policy_meta_type; + decode(policy_meta_type, it); + + switch (policy_meta_type) { + case POLICY_META_TYPE_NONE: + policy_meta = PolicyMetaNone(); + break; + default: + policy_meta = PolicyMetaUnknown(); + break; + } + + boost::apply_visitor(DecodeVisitor(struct_v, it), policy_meta); + DECODE_FINISH(it); +} + +void PolicyData::dump(Formatter *f) const { + boost::apply_visitor(DumpVisitor(f, "policy_meta_type"), policy_meta); +} + +void PolicyData::generate_test_instances(std::list<PolicyData *> &o) { + o.push_back(new PolicyData(PolicyMetaNone())); +} + +std::ostream &operator<<(std::ostream &os, const ActionType& action_type) { + switch (action_type) { + case ACTION_TYPE_NONE: + os << "NONE"; + break; + case ACTION_TYPE_MAP_UPDATE: + os << "MAP_UPDATE"; + break; + case ACTION_TYPE_MAP_REMOVE: + os << "MAP_REMOVE"; + break; + case ACTION_TYPE_ACQUIRE: + os << "ACQUIRE"; + break; + case ACTION_TYPE_RELEASE: + os << "RELEASE"; + break; + default: + os << "UNKNOWN (" << static_cast<uint32_t>(action_type) << ")"; + break; + } + return os; +} + +} // namespace image_map +} // namespace mirror +} // namespace rbd diff --git a/src/tools/rbd_mirror/image_map/Types.h b/src/tools/rbd_mirror/image_map/Types.h new file mode 100644 index 000000000..5a97430f3 --- /dev/null +++ b/src/tools/rbd_mirror/image_map/Types.h @@ -0,0 +1,130 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_MAP_TYPES_H +#define CEPH_RBD_MIRROR_IMAGE_MAP_TYPES_H + +#include <iosfwd> +#include <map> +#include <set> +#include <string> +#include <boost/variant.hpp> + +#include "include/buffer.h" +#include "include/encoding.h" +#include "include/utime.h" +#include "tools/rbd_mirror/Types.h" + +struct Context; + +namespace ceph { +class Formatter; +} + +namespace rbd { +namespace mirror { +namespace image_map { + +extern const std::string UNMAPPED_INSTANCE_ID; + +struct Listener { + virtual ~Listener() { + } + + virtual void acquire_image(const std::string &global_image_id, + const std::string &instance_id, + Context* on_finish) = 0; + virtual void release_image(const std::string &global_image_id, + const std::string &instance_id, + Context* on_finish) = 0; + virtual void remove_image(const std::string &mirror_uuid, + const std::string &global_image_id, + const std::string &instance_id, + Context* on_finish) = 0; +}; + +struct LookupInfo { + std::string instance_id = UNMAPPED_INSTANCE_ID; + utime_t mapped_time; +}; + +enum ActionType { + ACTION_TYPE_NONE, + ACTION_TYPE_MAP_UPDATE, + ACTION_TYPE_MAP_REMOVE, + ACTION_TYPE_ACQUIRE, + ACTION_TYPE_RELEASE +}; + +typedef std::vector<std::string> InstanceIds; +typedef std::set<std::string> GlobalImageIds; +typedef std::map<std::string, ActionType> ImageActionTypes; + +enum PolicyMetaType { + POLICY_META_TYPE_NONE = 0, +}; + +struct PolicyMetaNone { + static const PolicyMetaType TYPE = POLICY_META_TYPE_NONE; + + PolicyMetaNone() { + } + + void encode(bufferlist& bl) const { + } + + void decode(__u8 version, bufferlist::const_iterator& it) { + } + + void dump(Formatter *f) const { + } +}; + +struct PolicyMetaUnknown { + static const PolicyMetaType TYPE = static_cast<PolicyMetaType>(-1); + + PolicyMetaUnknown() { + } + + void encode(bufferlist& bl) const { + ceph_abort(); + } + + void decode(__u8 version, bufferlist::const_iterator& it) { + } + + void dump(Formatter *f) const { + } +}; + +typedef boost::variant<PolicyMetaNone, + PolicyMetaUnknown> PolicyMeta; + +struct PolicyData { + PolicyData() + : policy_meta(PolicyMetaUnknown()) { + } + PolicyData(const PolicyMeta &policy_meta) + : policy_meta(policy_meta) { + } + + PolicyMeta policy_meta; + + PolicyMetaType get_policy_meta_type() const; + + void encode(bufferlist& bl) const; + void decode(bufferlist::const_iterator& it); + void dump(Formatter *f) const; + + static void generate_test_instances(std::list<PolicyData *> &o); +}; + +WRITE_CLASS_ENCODER(PolicyData); + +std::ostream &operator<<(std::ostream &os, const ActionType &action_type); + +} // namespace image_map +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_IMAGE_MAP_TYPES_H diff --git a/src/tools/rbd_mirror/image_map/UpdateRequest.cc b/src/tools/rbd_mirror/image_map/UpdateRequest.cc new file mode 100644 index 000000000..799c5670f --- /dev/null +++ b/src/tools/rbd_mirror/image_map/UpdateRequest.cc @@ -0,0 +1,100 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "common/debug.h" +#include "common/errno.h" + +#include "librbd/Utils.h" +#include "include/rbd_types.h" +#include "cls/rbd/cls_rbd_client.h" + +#include "UpdateRequest.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_map::UpdateRequest: " \ + << this << " " << __func__ + +namespace rbd { +namespace mirror { +namespace image_map { + +using librbd::util::create_rados_callback; + +static const uint32_t MAX_UPDATE = 256; + +template <typename I> +UpdateRequest<I>::UpdateRequest(librados::IoCtx &ioctx, + std::map<std::string, cls::rbd::MirrorImageMap> &&update_mapping, + std::set<std::string> &&remove_global_image_ids, Context *on_finish) + : m_ioctx(ioctx), + m_update_mapping(update_mapping), + m_remove_global_image_ids(remove_global_image_ids), + m_on_finish(on_finish) { +} + +template <typename I> +void UpdateRequest<I>::send() { + dout(20) << dendl; + + update_image_map(); +} + +template <typename I> +void UpdateRequest<I>::update_image_map() { + dout(20) << dendl; + + if (m_update_mapping.empty() && m_remove_global_image_ids.empty()) { + finish(0); + return; + } + + uint32_t nr_updates = 0; + librados::ObjectWriteOperation op; + + auto it1 = m_update_mapping.begin(); + while (it1 != m_update_mapping.end() && nr_updates++ < MAX_UPDATE) { + librbd::cls_client::mirror_image_map_update(&op, it1->first, it1->second); + it1 = m_update_mapping.erase(it1); + } + + auto it2 = m_remove_global_image_ids.begin(); + while (it2 != m_remove_global_image_ids.end() && nr_updates++ < MAX_UPDATE) { + librbd::cls_client::mirror_image_map_remove(&op, *it2); + it2 = m_remove_global_image_ids.erase(it2); + } + + librados::AioCompletion *aio_comp = create_rados_callback< + UpdateRequest, &UpdateRequest::handle_update_image_map>(this); + int r = m_ioctx.aio_operate(RBD_MIRROR_LEADER, aio_comp, &op); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void UpdateRequest<I>::handle_update_image_map(int r) { + dout(20) << ": r=" << r << dendl; + + if (r < 0) { + derr << ": failed to update image map: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + update_image_map(); +} + +template <typename I> +void UpdateRequest<I>::finish(int r) { + dout(20) << ": r=" << r << dendl; + + m_on_finish->complete(r); + delete this; +} + +} // namespace image_map +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_map::UpdateRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_map/UpdateRequest.h b/src/tools/rbd_mirror/image_map/UpdateRequest.h new file mode 100644 index 000000000..841cc6f9b --- /dev/null +++ b/src/tools/rbd_mirror/image_map/UpdateRequest.h @@ -0,0 +1,65 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_MAP_UPDATE_REQUEST_H +#define CEPH_RBD_MIRROR_IMAGE_MAP_UPDATE_REQUEST_H + +#include "cls/rbd/cls_rbd_types.h" +#include "include/rados/librados.hpp" + +class Context; + +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { +namespace image_map { + +template<typename ImageCtxT = librbd::ImageCtx> +class UpdateRequest { +public: + // accepts an image map for updation and a collection of + // global image ids to purge. + static UpdateRequest *create(librados::IoCtx &ioctx, + std::map<std::string, cls::rbd::MirrorImageMap> &&update_mapping, + std::set<std::string> &&remove_global_image_ids, Context *on_finish) { + return new UpdateRequest(ioctx, std::move(update_mapping), std::move(remove_global_image_ids), + on_finish); + } + + void send(); + +private: + /** + * @verbatim + * + * <start> + * | . . . . . . . . + * v v . MAX_UPDATE + * UPDATE_IMAGE_MAP. . . . . . . + * | + * v + * <finish> + * + * @endverbatim + */ + UpdateRequest(librados::IoCtx &ioctx, + std::map<std::string, cls::rbd::MirrorImageMap> &&update_mapping, + std::set<std::string> &&remove_global_image_ids, Context *on_finish); + + librados::IoCtx &m_ioctx; + std::map<std::string, cls::rbd::MirrorImageMap> m_update_mapping; + std::set<std::string> m_remove_global_image_ids; + Context *m_on_finish; + + void update_image_map(); + void handle_update_image_map(int r); + + void finish(int r); +}; + +} // namespace image_map +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_IMAGE_MAP_UPDATE_REQUEST_H diff --git a/src/tools/rbd_mirror/image_replayer/BootstrapRequest.cc b/src/tools/rbd_mirror/image_replayer/BootstrapRequest.cc new file mode 100644 index 000000000..bda5b5f9b --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/BootstrapRequest.cc @@ -0,0 +1,485 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "include/compat.h" +#include "BootstrapRequest.h" +#include "CreateImageRequest.h" +#include "OpenImageRequest.h" +#include "OpenLocalImageRequest.h" +#include "common/debug.h" +#include "common/dout.h" +#include "common/errno.h" +#include "cls/rbd/cls_rbd_client.h" +#include "journal/Journaler.h" +#include "journal/Settings.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/internal.h" +#include "librbd/Journal.h" +#include "librbd/Utils.h" +#include "librbd/asio/ContextWQ.h" +#include "librbd/journal/Types.h" +#include "tools/rbd_mirror/BaseRequest.h" +#include "tools/rbd_mirror/ImageSync.h" +#include "tools/rbd_mirror/ProgressContext.h" +#include "tools/rbd_mirror/Threads.h" +#include "tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.h" +#include "tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.h" +#include "tools/rbd_mirror/image_replayer/journal/StateBuilder.h" +#include "tools/rbd_mirror/image_replayer/journal/SyncPointHandler.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::" \ + << "BootstrapRequest: " << this << " " \ + << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace image_replayer { + +using librbd::util::create_context_callback; +using librbd::util::unique_lock_name; + +template <typename I> +BootstrapRequest<I>::BootstrapRequest( + Threads<I>* threads, + librados::IoCtx& local_io_ctx, + librados::IoCtx& remote_io_ctx, + InstanceWatcher<I>* instance_watcher, + const std::string& global_image_id, + const std::string& local_mirror_uuid, + const RemotePoolMeta& remote_pool_meta, + ::journal::CacheManagerHandler* cache_manager_handler, + PoolMetaCache* pool_meta_cache, + ProgressContext* progress_ctx, + StateBuilder<I>** state_builder, + bool* do_resync, + Context* on_finish) + : CancelableRequest("rbd::mirror::image_replayer::BootstrapRequest", + reinterpret_cast<CephContext*>(local_io_ctx.cct()), + on_finish), + m_threads(threads), + m_local_io_ctx(local_io_ctx), + m_remote_io_ctx(remote_io_ctx), + m_instance_watcher(instance_watcher), + m_global_image_id(global_image_id), + m_local_mirror_uuid(local_mirror_uuid), + m_remote_pool_meta(remote_pool_meta), + m_cache_manager_handler(cache_manager_handler), + m_pool_meta_cache(pool_meta_cache), + m_progress_ctx(progress_ctx), + m_state_builder(state_builder), + m_do_resync(do_resync), + m_lock(ceph::make_mutex(unique_lock_name("BootstrapRequest::m_lock", + this))) { + dout(10) << dendl; +} + +template <typename I> +bool BootstrapRequest<I>::is_syncing() const { + std::lock_guard locker{m_lock}; + return (m_image_sync != nullptr); +} + +template <typename I> +void BootstrapRequest<I>::send() { + *m_do_resync = false; + + prepare_local_image(); +} + +template <typename I> +void BootstrapRequest<I>::cancel() { + dout(10) << dendl; + + std::lock_guard locker{m_lock}; + m_canceled = true; + + if (m_image_sync != nullptr) { + m_image_sync->cancel(); + } +} + +template <typename I> +std::string BootstrapRequest<I>::get_local_image_name() const { + std::unique_lock locker{m_lock}; + return m_local_image_name; +} + +template <typename I> +void BootstrapRequest<I>::prepare_local_image() { + dout(10) << dendl; + update_progress("PREPARE_LOCAL_IMAGE"); + + { + std::unique_lock locker{m_lock}; + m_local_image_name = m_global_image_id; + } + + ceph_assert(*m_state_builder == nullptr); + auto ctx = create_context_callback< + BootstrapRequest, &BootstrapRequest<I>::handle_prepare_local_image>(this); + auto req = image_replayer::PrepareLocalImageRequest<I>::create( + m_local_io_ctx, m_global_image_id, &m_prepare_local_image_name, + m_state_builder, m_threads->work_queue, ctx); + req->send(); +} + +template <typename I> +void BootstrapRequest<I>::handle_prepare_local_image(int r) { + dout(10) << "r=" << r << dendl; + + ceph_assert(r < 0 || *m_state_builder != nullptr); + if (r == -ENOENT) { + dout(10) << "local image does not exist" << dendl; + } else if (r < 0) { + derr << "error preparing local image for replay: " << cpp_strerror(r) + << dendl; + finish(r); + return; + } + + // image replayer will detect the name change (if any) at next + // status update + if (r >= 0 && !m_prepare_local_image_name.empty()) { + std::unique_lock locker{m_lock}; + m_local_image_name = m_prepare_local_image_name; + } + + prepare_remote_image(); +} + +template <typename I> +void BootstrapRequest<I>::prepare_remote_image() { + dout(10) << dendl; + update_progress("PREPARE_REMOTE_IMAGE"); + + Context *ctx = create_context_callback< + BootstrapRequest, &BootstrapRequest<I>::handle_prepare_remote_image>(this); + auto req = image_replayer::PrepareRemoteImageRequest<I>::create( + m_threads, m_local_io_ctx, m_remote_io_ctx, m_global_image_id, + m_local_mirror_uuid, m_remote_pool_meta, m_cache_manager_handler, + m_state_builder, ctx); + req->send(); +} + +template <typename I> +void BootstrapRequest<I>::handle_prepare_remote_image(int r) { + dout(10) << "r=" << r << dendl; + + auto state_builder = *m_state_builder; + ceph_assert(state_builder == nullptr || + !state_builder->remote_mirror_uuid.empty()); + + if (state_builder != nullptr && state_builder->is_local_primary()) { + dout(5) << "local image is primary" << dendl; + finish(-ENOMSG); + return; + } else if (r == -ENOENT || state_builder == nullptr) { + dout(10) << "remote image does not exist"; + if (state_builder != nullptr) { + *_dout << ": " + << "local_image_id=" << state_builder->local_image_id << ", " + << "remote_image_id=" << state_builder->remote_image_id << ", " + << "is_linked=" << state_builder->is_linked(); + } + *_dout << dendl; + + // TODO need to support multiple remote images + if (state_builder != nullptr && + state_builder->remote_image_id.empty() && + (state_builder->local_image_id.empty() || + state_builder->is_linked())) { + // both images doesn't exist or local image exists and is non-primary + // and linked to the missing remote image + finish(-ENOLINK); + } else { + finish(-ENOENT); + } + return; + } else if (r < 0) { + derr << "error preparing remote image for replay: " << cpp_strerror(r) + << dendl; + finish(r); + return; + } + + if (!state_builder->is_remote_primary()) { + ceph_assert(!state_builder->remote_image_id.empty()); + if (state_builder->local_image_id.empty()) { + dout(10) << "local image does not exist and remote image is not primary" + << dendl; + finish(-EREMOTEIO); + return; + } else if (!state_builder->is_linked()) { + dout(10) << "local image is unlinked and remote image is not primary" + << dendl; + finish(-EREMOTEIO); + return; + } + // if the local image is linked to the remote image, we ignore that + // the remote image is not primary so that we can replay demotion + } + + open_remote_image(); +} + +template <typename I> +void BootstrapRequest<I>::open_remote_image() { + ceph_assert(*m_state_builder != nullptr); + auto remote_image_id = (*m_state_builder)->remote_image_id; + dout(15) << "remote_image_id=" << remote_image_id << dendl; + + update_progress("OPEN_REMOTE_IMAGE"); + + auto ctx = create_context_callback< + BootstrapRequest<I>, + &BootstrapRequest<I>::handle_open_remote_image>(this); + ceph_assert(*m_state_builder != nullptr); + OpenImageRequest<I> *request = OpenImageRequest<I>::create( + m_remote_io_ctx, &(*m_state_builder)->remote_image_ctx, remote_image_id, + false, ctx); + request->send(); +} + +template <typename I> +void BootstrapRequest<I>::handle_open_remote_image(int r) { + dout(15) << "r=" << r << dendl; + + ceph_assert(*m_state_builder != nullptr); + if (r < 0) { + derr << "failed to open remote image: " << cpp_strerror(r) << dendl; + ceph_assert((*m_state_builder)->remote_image_ctx == nullptr); + finish(r); + return; + } + + if ((*m_state_builder)->local_image_id.empty()) { + create_local_image(); + return; + } + + open_local_image(); +} + +template <typename I> +void BootstrapRequest<I>::open_local_image() { + ceph_assert(*m_state_builder != nullptr); + auto local_image_id = (*m_state_builder)->local_image_id; + + dout(15) << "local_image_id=" << local_image_id << dendl; + + update_progress("OPEN_LOCAL_IMAGE"); + + Context *ctx = create_context_callback< + BootstrapRequest<I>, &BootstrapRequest<I>::handle_open_local_image>( + this); + OpenLocalImageRequest<I> *request = OpenLocalImageRequest<I>::create( + m_local_io_ctx, &(*m_state_builder)->local_image_ctx, local_image_id, + m_threads->work_queue, ctx); + request->send(); +} + +template <typename I> +void BootstrapRequest<I>::handle_open_local_image(int r) { + dout(15) << "r=" << r << dendl; + + ceph_assert(*m_state_builder != nullptr); + auto local_image_ctx = (*m_state_builder)->local_image_ctx; + ceph_assert((r >= 0 && local_image_ctx != nullptr) || + (r < 0 && local_image_ctx == nullptr)); + + if (r == -ENOENT) { + dout(10) << "local image missing" << dendl; + create_local_image(); + return; + } else if (r == -EREMOTEIO) { + dout(10) << "local image is primary -- skipping image replay" << dendl; + m_ret_val = r; + close_remote_image(); + return; + } else if (r < 0) { + derr << "failed to open local image: " << cpp_strerror(r) << dendl; + m_ret_val = r; + close_remote_image(); + return; + } + + prepare_replay(); +} + +template <typename I> +void BootstrapRequest<I>::prepare_replay() { + dout(10) << dendl; + update_progress("PREPARE_REPLAY"); + + ceph_assert(*m_state_builder != nullptr); + auto ctx = create_context_callback< + BootstrapRequest<I>, &BootstrapRequest<I>::handle_prepare_replay>(this); + auto request = (*m_state_builder)->create_prepare_replay_request( + m_local_mirror_uuid, m_progress_ctx, m_do_resync, &m_syncing, ctx); + request->send(); +} + +template <typename I> +void BootstrapRequest<I>::handle_prepare_replay(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to prepare local replay: " << cpp_strerror(r) << dendl; + m_ret_val = r; + close_remote_image(); + return; + } else if (*m_do_resync) { + dout(10) << "local image resync requested" << dendl; + close_remote_image(); + return; + } else if ((*m_state_builder)->is_disconnected()) { + dout(10) << "client flagged disconnected -- skipping bootstrap" << dendl; + // The caller is expected to detect disconnect initializing remote journal. + m_ret_val = 0; + close_remote_image(); + return; + } else if (m_syncing) { + dout(10) << "local image still syncing to remote image" << dendl; + image_sync(); + return; + } + + close_remote_image(); +} + +template <typename I> +void BootstrapRequest<I>::create_local_image() { + dout(10) << dendl; + update_progress("CREATE_LOCAL_IMAGE"); + + ceph_assert(*m_state_builder != nullptr); + auto ctx = create_context_callback< + BootstrapRequest<I>, + &BootstrapRequest<I>::handle_create_local_image>(this); + auto request = (*m_state_builder)->create_local_image_request( + m_threads, m_local_io_ctx, m_global_image_id, m_pool_meta_cache, + m_progress_ctx, ctx); + request->send(); +} + +template <typename I> +void BootstrapRequest<I>::handle_create_local_image(int r) { + dout(15) << "r=" << r << dendl; + + if (r < 0) { + if (r == -ENOENT) { + dout(10) << "parent image does not exist" << dendl; + } else { + derr << "failed to create local image: " << cpp_strerror(r) << dendl; + } + m_ret_val = r; + close_remote_image(); + return; + } + + open_local_image(); +} + +template <typename I> +void BootstrapRequest<I>::image_sync() { + std::unique_lock locker{m_lock}; + if (m_canceled) { + locker.unlock(); + + m_ret_val = -ECANCELED; + dout(10) << "request canceled" << dendl; + close_remote_image(); + return; + } + + dout(15) << dendl; + ceph_assert(m_image_sync == nullptr); + + auto state_builder = *m_state_builder; + auto sync_point_handler = state_builder->create_sync_point_handler(); + + Context *ctx = create_context_callback< + BootstrapRequest<I>, &BootstrapRequest<I>::handle_image_sync>(this); + m_image_sync = ImageSync<I>::create( + m_threads, state_builder->local_image_ctx, state_builder->remote_image_ctx, + m_local_mirror_uuid, sync_point_handler, m_instance_watcher, + m_progress_ctx, ctx); + m_image_sync->get(); + locker.unlock(); + + update_progress("IMAGE_SYNC"); + m_image_sync->send(); +} + +template <typename I> +void BootstrapRequest<I>::handle_image_sync(int r) { + dout(15) << "r=" << r << dendl; + + { + std::lock_guard locker{m_lock}; + m_image_sync->put(); + m_image_sync = nullptr; + + (*m_state_builder)->destroy_sync_point_handler(); + } + + if (r < 0) { + if (r == -ECANCELED) { + dout(10) << "request canceled" << dendl; + } else { + derr << "failed to sync remote image: " << cpp_strerror(r) << dendl; + } + m_ret_val = r; + } + + close_remote_image(); +} + +template <typename I> +void BootstrapRequest<I>::close_remote_image() { + if ((*m_state_builder)->replay_requires_remote_image()) { + finish(m_ret_val); + return; + } + + dout(15) << dendl; + + update_progress("CLOSE_REMOTE_IMAGE"); + + auto ctx = create_context_callback< + BootstrapRequest<I>, + &BootstrapRequest<I>::handle_close_remote_image>(this); + ceph_assert(*m_state_builder != nullptr); + (*m_state_builder)->close_remote_image(ctx); +} + +template <typename I> +void BootstrapRequest<I>::handle_close_remote_image(int r) { + dout(15) << "r=" << r << dendl; + + if (r < 0) { + derr << "error encountered closing remote image: " << cpp_strerror(r) + << dendl; + } + + finish(m_ret_val); +} + +template <typename I> +void BootstrapRequest<I>::update_progress(const std::string &description) { + dout(15) << description << dendl; + + if (m_progress_ctx) { + m_progress_ctx->update_progress(description); + } +} + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::BootstrapRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/BootstrapRequest.h b/src/tools/rbd_mirror/image_replayer/BootstrapRequest.h new file mode 100644 index 000000000..f5bb8dd8a --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/BootstrapRequest.h @@ -0,0 +1,181 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_BOOTSTRAP_REQUEST_H +#define RBD_MIRROR_IMAGE_REPLAYER_BOOTSTRAP_REQUEST_H + +#include "include/int_types.h" +#include "include/rados/librados.hpp" +#include "common/ceph_mutex.h" +#include "common/Timer.h" +#include "cls/rbd/cls_rbd_types.h" +#include "librbd/mirror/Types.h" +#include "tools/rbd_mirror/CancelableRequest.h" +#include "tools/rbd_mirror/Types.h" +#include <string> + +class Context; + +namespace journal { class CacheManagerHandler; } +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { + +class ProgressContext; + +template <typename> class ImageSync; +template <typename> class InstanceWatcher; +struct PoolMetaCache; +template <typename> struct Threads; + +namespace image_replayer { + +template <typename> class StateBuilder; + +template <typename ImageCtxT = librbd::ImageCtx> +class BootstrapRequest : public CancelableRequest { +public: + typedef rbd::mirror::ProgressContext ProgressContext; + + static BootstrapRequest* create( + Threads<ImageCtxT>* threads, + librados::IoCtx& local_io_ctx, + librados::IoCtx& remote_io_ctx, + InstanceWatcher<ImageCtxT>* instance_watcher, + const std::string& global_image_id, + const std::string& local_mirror_uuid, + const RemotePoolMeta& remote_pool_meta, + ::journal::CacheManagerHandler* cache_manager_handler, + PoolMetaCache* pool_meta_cache, + ProgressContext* progress_ctx, + StateBuilder<ImageCtxT>** state_builder, + bool* do_resync, + Context* on_finish) { + return new BootstrapRequest( + threads, local_io_ctx, remote_io_ctx, instance_watcher, global_image_id, + local_mirror_uuid, remote_pool_meta, cache_manager_handler, + pool_meta_cache, progress_ctx, state_builder, do_resync, on_finish); + } + + BootstrapRequest( + Threads<ImageCtxT>* threads, + librados::IoCtx& local_io_ctx, + librados::IoCtx& remote_io_ctx, + InstanceWatcher<ImageCtxT>* instance_watcher, + const std::string& global_image_id, + const std::string& local_mirror_uuid, + const RemotePoolMeta& remote_pool_meta, + ::journal::CacheManagerHandler* cache_manager_handler, + PoolMetaCache* pool_meta_cache, + ProgressContext* progress_ctx, + StateBuilder<ImageCtxT>** state_builder, + bool* do_resync, + Context* on_finish); + + bool is_syncing() const; + + void send() override; + void cancel() override; + + std::string get_local_image_name() const; + +private: + /** + * @verbatim + * + * <start> + * | + * v (error) + * PREPARE_LOCAL_IMAGE * * * * * * * * * * * * * * * * * * + * | * + * v (error) * + * PREPARE_REMOTE_IMAGE * * * * * * * * * * * * * * * * * * + * | * + * v (error) * + * OPEN_REMOTE_IMAGE * * * * * * * * * * * * * * * * * * * + * | * + * | * + * \----> CREATE_LOCAL_IMAGE * * * * * * * * * * * * * + * | | ^ * * + * | | . * * + * | v . (image DNE) * * + * \----> OPEN_LOCAL_IMAGE * * * * * * * * * * * * * * + * | * * + * | * * + * v * * + * PREPARE_REPLAY * * * * * * * * * * * * * * * + * | * * + * | * * + * v (skip if not needed) * * + * IMAGE_SYNC * * * * * * * * * * * * * * * * * + * | * * + * | * * + * /---------/ * * + * | * * + * v * * + * CLOSE_REMOTE_IMAGE < * * * * * * * * * * * * * * * * * + * | * + * v * + * <finish> < * * * * * * * * * * * * * * * * * * * * * * * + * + * @endverbatim + */ + Threads<ImageCtxT>* m_threads; + librados::IoCtx &m_local_io_ctx; + librados::IoCtx &m_remote_io_ctx; + InstanceWatcher<ImageCtxT> *m_instance_watcher; + std::string m_global_image_id; + std::string m_local_mirror_uuid; + RemotePoolMeta m_remote_pool_meta; + ::journal::CacheManagerHandler *m_cache_manager_handler; + PoolMetaCache* m_pool_meta_cache; + ProgressContext *m_progress_ctx; + StateBuilder<ImageCtxT>** m_state_builder; + bool *m_do_resync; + + mutable ceph::mutex m_lock; + bool m_canceled = false; + + int m_ret_val = 0; + + std::string m_local_image_name; + std::string m_prepare_local_image_name; + + bool m_syncing = false; + ImageSync<ImageCtxT> *m_image_sync = nullptr; + + void prepare_local_image(); + void handle_prepare_local_image(int r); + + void prepare_remote_image(); + void handle_prepare_remote_image(int r); + + void open_remote_image(); + void handle_open_remote_image(int r); + + void open_local_image(); + void handle_open_local_image(int r); + + void create_local_image(); + void handle_create_local_image(int r); + + void prepare_replay(); + void handle_prepare_replay(int r); + + void image_sync(); + void handle_image_sync(int r); + + void close_remote_image(); + void handle_close_remote_image(int r); + + void update_progress(const std::string &description); +}; + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::BootstrapRequest<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_REPLAYER_BOOTSTRAP_REQUEST_H diff --git a/src/tools/rbd_mirror/image_replayer/CloseImageRequest.cc b/src/tools/rbd_mirror/image_replayer/CloseImageRequest.cc new file mode 100644 index 000000000..872c8baa9 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/CloseImageRequest.cc @@ -0,0 +1,62 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "CloseImageRequest.h" +#include "common/debug.h" +#include "common/errno.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/Utils.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::CloseImageRequest: " \ + << this << " " << __func__ + +namespace rbd { +namespace mirror { +namespace image_replayer { + +using librbd::util::create_context_callback; + +template <typename I> +CloseImageRequest<I>::CloseImageRequest(I **image_ctx, Context *on_finish) + : m_image_ctx(image_ctx), m_on_finish(on_finish) { +} + +template <typename I> +void CloseImageRequest<I>::send() { + close_image(); +} + +template <typename I> +void CloseImageRequest<I>::close_image() { + dout(20) << dendl; + + Context *ctx = create_context_callback< + CloseImageRequest<I>, &CloseImageRequest<I>::handle_close_image>(this); + (*m_image_ctx)->state->close(ctx); +} + +template <typename I> +void CloseImageRequest<I>::handle_close_image(int r) { + dout(20) << ": r=" << r << dendl; + + if (r < 0) { + derr << ": error encountered while closing image: " << cpp_strerror(r) + << dendl; + } + + *m_image_ctx = nullptr; + + m_on_finish->complete(0); + delete this; +} + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::CloseImageRequest<librbd::ImageCtx>; + diff --git a/src/tools/rbd_mirror/image_replayer/CloseImageRequest.h b/src/tools/rbd_mirror/image_replayer/CloseImageRequest.h new file mode 100644 index 000000000..02481369d --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/CloseImageRequest.h @@ -0,0 +1,56 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_CLOSE_IMAGE_REQUEST_H +#define RBD_MIRROR_IMAGE_REPLAYER_CLOSE_IMAGE_REQUEST_H + +#include "include/int_types.h" +#include "librbd/ImageCtx.h" +#include <string> + +class Context; +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { +namespace image_replayer { + +template <typename ImageCtxT = librbd::ImageCtx> +class CloseImageRequest { +public: + static CloseImageRequest* create(ImageCtxT **image_ctx, Context *on_finish) { + return new CloseImageRequest(image_ctx, on_finish); + } + + CloseImageRequest(ImageCtxT **image_ctx, Context *on_finish); + + void send(); + +private: + /** + * @verbatim + * + * <start> + * | + * v + * CLOSE_IMAGE + * | + * v + * <finish> + * + * @endverbatim + */ + ImageCtxT **m_image_ctx; + Context *m_on_finish; + + void close_image(); + void handle_close_image(int r); +}; + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::CloseImageRequest<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_REPLAYER_CLOSE_IMAGE_REQUEST_H diff --git a/src/tools/rbd_mirror/image_replayer/CreateImageRequest.cc b/src/tools/rbd_mirror/image_replayer/CreateImageRequest.cc new file mode 100644 index 000000000..641bb03e8 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/CreateImageRequest.cc @@ -0,0 +1,451 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "CreateImageRequest.h" +#include "CloseImageRequest.h" +#include "OpenImageRequest.h" +#include "common/debug.h" +#include "common/errno.h" +#include "cls/rbd/cls_rbd_client.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/internal.h" +#include "librbd/Utils.h" +#include "librbd/asio/ContextWQ.h" +#include "librbd/image/CreateRequest.h" +#include "librbd/image/CloneRequest.h" +#include "tools/rbd_mirror/PoolMetaCache.h" +#include "tools/rbd_mirror/Types.h" +#include "tools/rbd_mirror/Threads.h" +#include "tools/rbd_mirror/image_replayer/Utils.h" +#include "tools/rbd_mirror/image_sync/Utils.h" +#include <boost/algorithm/string/predicate.hpp> + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::CreateImageRequest: " \ + << this << " " << __func__ << ": " + +using librbd::util::create_async_context_callback; +using librbd::util::create_context_callback; +using librbd::util::create_rados_callback; + +namespace rbd { +namespace mirror { +namespace image_replayer { + +template <typename I> +CreateImageRequest<I>::CreateImageRequest( + Threads<I>* threads, + librados::IoCtx &local_io_ctx, + const std::string &global_image_id, + const std::string &remote_mirror_uuid, + const std::string &local_image_name, + const std::string &local_image_id, + I *remote_image_ctx, + PoolMetaCache* pool_meta_cache, + cls::rbd::MirrorImageMode mirror_image_mode, + Context *on_finish) + : m_threads(threads), m_local_io_ctx(local_io_ctx), + m_global_image_id(global_image_id), + m_remote_mirror_uuid(remote_mirror_uuid), + m_local_image_name(local_image_name), m_local_image_id(local_image_id), + m_remote_image_ctx(remote_image_ctx), + m_pool_meta_cache(pool_meta_cache), + m_mirror_image_mode(mirror_image_mode), m_on_finish(on_finish) { +} + +template <typename I> +void CreateImageRequest<I>::send() { + int r = validate_parent(); + if (r < 0) { + error(r); + return; + } + + if (m_remote_parent_spec.pool_id == -1) { + create_image(); + } else { + get_parent_global_image_id(); + } +} + +template <typename I> +void CreateImageRequest<I>::create_image() { + dout(10) << dendl; + + using klass = CreateImageRequest<I>; + Context *ctx = create_context_callback< + klass, &klass::handle_create_image>(this); + + std::shared_lock image_locker{m_remote_image_ctx->image_lock}; + + auto& config{ + reinterpret_cast<CephContext*>(m_local_io_ctx.cct())->_conf}; + + librbd::ImageOptions image_options; + populate_image_options(&image_options); + + auto req = librbd::image::CreateRequest<I>::create( + config, m_local_io_ctx, m_local_image_name, m_local_image_id, + m_remote_image_ctx->size, image_options, 0U, m_mirror_image_mode, + m_global_image_id, m_remote_mirror_uuid, m_remote_image_ctx->op_work_queue, + ctx); + req->send(); +} + +template <typename I> +void CreateImageRequest<I>::handle_create_image(int r) { + dout(10) << "r=" << r << dendl; + if (r == -EBADF) { + dout(5) << "image id " << m_local_image_id << " already in-use" << dendl; + finish(r); + return; + } else if (r < 0) { + derr << "failed to create local image: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + finish(0); +} + +template <typename I> +void CreateImageRequest<I>::get_parent_global_image_id() { + dout(10) << dendl; + + librados::ObjectReadOperation op; + librbd::cls_client::mirror_image_get_start(&op, + m_remote_parent_spec.image_id); + + librados::AioCompletion *aio_comp = create_rados_callback< + CreateImageRequest<I>, + &CreateImageRequest<I>::handle_get_parent_global_image_id>(this); + m_out_bl.clear(); + int r = m_remote_parent_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op, + &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void CreateImageRequest<I>::handle_get_parent_global_image_id(int r) { + dout(10) << "r=" << r << dendl; + if (r == 0) { + cls::rbd::MirrorImage mirror_image; + auto iter = m_out_bl.cbegin(); + r = librbd::cls_client::mirror_image_get_finish(&iter, &mirror_image); + if (r == 0) { + m_parent_global_image_id = mirror_image.global_image_id; + dout(15) << "parent_global_image_id=" << m_parent_global_image_id + << dendl; + } + } + + if (r == -ENOENT) { + dout(10) << "parent image " << m_remote_parent_spec.image_id + << " not mirrored" << dendl; + finish(r); + return; + } else if (r < 0) { + derr << "failed to retrieve global image id for parent image " + << m_remote_parent_spec.image_id << ": " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + get_local_parent_image_id(); +} + +template <typename I> +void CreateImageRequest<I>::get_local_parent_image_id() { + dout(10) << dendl; + + librados::ObjectReadOperation op; + librbd::cls_client::mirror_image_get_image_id_start( + &op, m_parent_global_image_id); + + librados::AioCompletion *aio_comp = create_rados_callback< + CreateImageRequest<I>, + &CreateImageRequest<I>::handle_get_local_parent_image_id>(this); + m_out_bl.clear(); + int r = m_local_parent_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op, + &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void CreateImageRequest<I>::handle_get_local_parent_image_id(int r) { + dout(10) << "r=" << r << dendl; + + if (r == 0) { + auto iter = m_out_bl.cbegin(); + r = librbd::cls_client::mirror_image_get_image_id_finish( + &iter, &m_local_parent_spec.image_id); + } + + if (r == -ENOENT) { + dout(10) << "parent image " << m_parent_global_image_id << " not " + << "registered locally" << dendl; + finish(r); + return; + } else if (r < 0) { + derr << "failed to retrieve local image id for parent image " + << m_parent_global_image_id << ": " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + open_remote_parent_image(); +} + +template <typename I> +void CreateImageRequest<I>::open_remote_parent_image() { + dout(10) << dendl; + + Context *ctx = create_context_callback< + CreateImageRequest<I>, + &CreateImageRequest<I>::handle_open_remote_parent_image>(this); + OpenImageRequest<I> *request = OpenImageRequest<I>::create( + m_remote_parent_io_ctx, &m_remote_parent_image_ctx, + m_remote_parent_spec.image_id, true, ctx); + request->send(); +} + +template <typename I> +void CreateImageRequest<I>::handle_open_remote_parent_image(int r) { + dout(10) << "r=" << r << dendl; + if (r < 0) { + derr << "failed to open remote parent image " << m_parent_pool_name << "/" + << m_remote_parent_spec.image_id << dendl; + finish(r); + return; + } + + clone_image(); +} + +template <typename I> +void CreateImageRequest<I>::clone_image() { + dout(10) << dendl; + + LocalPoolMeta local_parent_pool_meta; + int r = m_pool_meta_cache->get_local_pool_meta( + m_local_parent_io_ctx.get_id(), &local_parent_pool_meta); + if (r < 0) { + derr << "failed to retrieve local parent mirror uuid for pool " + << m_local_parent_io_ctx.get_id() << dendl; + m_ret_val = r; + close_remote_parent_image(); + return; + } + + // ensure no image sync snapshots for the local cluster exist in the + // remote image + bool found_parent_snap = false; + bool found_image_sync_snap = false; + std::string snap_name; + cls::rbd::SnapshotNamespace snap_namespace; + { + auto snap_prefix = image_sync::util::get_snapshot_name_prefix( + local_parent_pool_meta.mirror_uuid); + + std::shared_lock remote_image_locker(m_remote_parent_image_ctx->image_lock); + for (auto snap_info : m_remote_parent_image_ctx->snap_info) { + if (snap_info.first == m_remote_parent_spec.snap_id) { + found_parent_snap = true; + snap_name = snap_info.second.name; + snap_namespace = snap_info.second.snap_namespace; + } else if (boost::starts_with(snap_info.second.name, snap_prefix)) { + found_image_sync_snap = true; + } + } + } + + if (!found_parent_snap) { + dout(15) << "remote parent image snapshot not found" << dendl; + m_ret_val = -ENOENT; + close_remote_parent_image(); + return; + } else if (found_image_sync_snap) { + dout(15) << "parent image not synced to local cluster" << dendl; + m_ret_val = -ENOENT; + close_remote_parent_image(); + return; + } + + librbd::ImageOptions opts; + populate_image_options(&opts); + + auto& config{ + reinterpret_cast<CephContext*>(m_local_io_ctx.cct())->_conf}; + + using klass = CreateImageRequest<I>; + Context *ctx = create_context_callback< + klass, &klass::handle_clone_image>(this); + + librbd::image::CloneRequest<I> *req = librbd::image::CloneRequest<I>::create( + config, m_local_parent_io_ctx, m_local_parent_spec.image_id, snap_name, + snap_namespace, CEPH_NOSNAP, m_local_io_ctx, m_local_image_name, + m_local_image_id, opts, m_mirror_image_mode, m_global_image_id, + m_remote_mirror_uuid, m_remote_image_ctx->op_work_queue, ctx); + req->send(); +} + +template <typename I> +void CreateImageRequest<I>::handle_clone_image(int r) { + dout(10) << "r=" << r << dendl; + if (r == -EBADF) { + dout(5) << "image id " << m_local_image_id << " already in-use" << dendl; + m_ret_val = r; + } else if (r < 0) { + derr << "failed to clone image " << m_parent_pool_name << "/" + << m_remote_parent_spec.image_id << " to " + << m_local_image_name << dendl; + m_ret_val = r; + } + + close_remote_parent_image(); +} + +template <typename I> +void CreateImageRequest<I>::close_remote_parent_image() { + dout(10) << dendl; + Context *ctx = create_context_callback< + CreateImageRequest<I>, + &CreateImageRequest<I>::handle_close_remote_parent_image>(this); + CloseImageRequest<I> *request = CloseImageRequest<I>::create( + &m_remote_parent_image_ctx, ctx); + request->send(); +} + +template <typename I> +void CreateImageRequest<I>::handle_close_remote_parent_image(int r) { + dout(10) << "r=" << r << dendl; + if (r < 0) { + derr << "error encountered closing remote parent image: " + << cpp_strerror(r) << dendl; + } + + finish(m_ret_val); +} + +template <typename I> +void CreateImageRequest<I>::error(int r) { + dout(10) << "r=" << r << dendl; + + m_threads->work_queue->queue(create_context_callback< + CreateImageRequest<I>, &CreateImageRequest<I>::finish>(this), r); +} + +template <typename I> +void CreateImageRequest<I>::finish(int r) { + dout(10) << "r=" << r << dendl; + m_on_finish->complete(r); + delete this; +} + +template <typename I> +int CreateImageRequest<I>::validate_parent() { + std::shared_lock owner_locker{m_remote_image_ctx->owner_lock}; + std::shared_lock image_locker{m_remote_image_ctx->image_lock}; + + m_remote_parent_spec = m_remote_image_ctx->parent_md.spec; + + // scan all remote snapshots for a linked parent + for (auto &snap_info_pair : m_remote_image_ctx->snap_info) { + auto &parent_spec = snap_info_pair.second.parent.spec; + if (parent_spec.pool_id == -1) { + continue; + } else if (m_remote_parent_spec.pool_id == -1) { + m_remote_parent_spec = parent_spec; + continue; + } + + if (m_remote_parent_spec != parent_spec) { + derr << "remote image parent spec mismatch" << dendl; + return -EINVAL; + } + } + + if (m_remote_parent_spec.pool_id == -1) { + return 0; + } + + // map remote parent pool to local parent pool + int r = librbd::util::create_ioctx( + m_remote_image_ctx->md_ctx, "remote parent pool", + m_remote_parent_spec.pool_id, m_remote_parent_spec.pool_namespace, + &m_remote_parent_io_ctx); + if (r < 0) { + derr << "failed to open remote parent pool " << m_remote_parent_spec.pool_id + << ": " << cpp_strerror(r) << dendl; + return r; + } + + m_parent_pool_name = m_remote_parent_io_ctx.get_pool_name(); + + librados::Rados local_rados(m_local_io_ctx); + r = local_rados.ioctx_create(m_parent_pool_name.c_str(), + m_local_parent_io_ctx); + if (r < 0) { + derr << "failed to open local parent pool " << m_parent_pool_name << ": " + << cpp_strerror(r) << dendl; + return r; + } + m_local_parent_io_ctx.set_namespace(m_remote_parent_io_ctx.get_namespace()); + + return 0; +} + +template <typename I> +void CreateImageRequest<I>::populate_image_options( + librbd::ImageOptions* image_options) { + image_options->set(RBD_IMAGE_OPTION_FEATURES, + m_remote_image_ctx->features); + image_options->set(RBD_IMAGE_OPTION_ORDER, m_remote_image_ctx->order); + image_options->set(RBD_IMAGE_OPTION_STRIPE_UNIT, + m_remote_image_ctx->stripe_unit); + image_options->set(RBD_IMAGE_OPTION_STRIPE_COUNT, + m_remote_image_ctx->stripe_count); + + // Determine the data pool for the local image as follows: + // 1. If the local pool has a default data pool, use it. + // 2. If the remote image has a data pool different from its metadata pool and + // a pool with the same name exists locally, use it. + // 3. Don't set the data pool explicitly. + std::string data_pool; + librados::Rados local_rados(m_local_io_ctx); + auto default_data_pool = g_ceph_context->_conf.get_val<std::string>("rbd_default_data_pool"); + auto remote_md_pool = m_remote_image_ctx->md_ctx.get_pool_name(); + auto remote_data_pool = m_remote_image_ctx->data_ctx.get_pool_name(); + + if (default_data_pool != "") { + data_pool = default_data_pool; + } else if (remote_data_pool != remote_md_pool) { + if (local_rados.pool_lookup(remote_data_pool.c_str()) >= 0) { + data_pool = remote_data_pool; + } + } + + if (data_pool != "") { + image_options->set(RBD_IMAGE_OPTION_DATA_POOL, data_pool); + } + + if (m_remote_parent_spec.pool_id != -1) { + uint64_t clone_format = 1; + if (m_remote_image_ctx->test_op_features( + RBD_OPERATION_FEATURE_CLONE_CHILD)) { + clone_format = 2; + } + image_options->set(RBD_IMAGE_OPTION_CLONE_FORMAT, clone_format); + } +} + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::CreateImageRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/CreateImageRequest.h b/src/tools/rbd_mirror/image_replayer/CreateImageRequest.h new file mode 100644 index 000000000..2ff7794e8 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/CreateImageRequest.h @@ -0,0 +1,144 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_CREATE_IMAGE_REQUEST_H +#define RBD_MIRROR_IMAGE_REPLAYER_CREATE_IMAGE_REQUEST_H + +#include "include/int_types.h" +#include "include/types.h" +#include "include/rados/librados.hpp" +#include "cls/rbd/cls_rbd_types.h" +#include "librbd/Types.h" +#include <string> + +class Context; +namespace librbd { class ImageCtx; } +namespace librbd { class ImageOptions; } + +namespace rbd { +namespace mirror { + +class PoolMetaCache; +template <typename> struct Threads; + +namespace image_replayer { + +template <typename ImageCtxT = librbd::ImageCtx> +class CreateImageRequest { +public: + static CreateImageRequest *create( + Threads<ImageCtxT> *threads, + librados::IoCtx &local_io_ctx, + const std::string &global_image_id, + const std::string &remote_mirror_uuid, + const std::string &local_image_name, + const std::string &local_image_id, + ImageCtxT *remote_image_ctx, + PoolMetaCache* pool_meta_cache, + cls::rbd::MirrorImageMode mirror_image_mode, + Context *on_finish) { + return new CreateImageRequest(threads, local_io_ctx, global_image_id, + remote_mirror_uuid, local_image_name, + local_image_id, remote_image_ctx, + pool_meta_cache, mirror_image_mode, + on_finish); + } + + CreateImageRequest( + Threads<ImageCtxT> *threads, librados::IoCtx &local_io_ctx, + const std::string &global_image_id, + const std::string &remote_mirror_uuid, + const std::string &local_image_name, + const std::string &local_image_id, + ImageCtxT *remote_image_ctx, + PoolMetaCache* pool_meta_cache, + cls::rbd::MirrorImageMode mirror_image_mode, + Context *on_finish); + + void send(); + +private: + /** + * @verbatim + * + * <start> * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * | * + * | (non-clone) * + * |\------------> CREATE_IMAGE ---------------------\ * (error) + * | | * + * | (clone) | * + * \-------------> GET_PARENT_GLOBAL_IMAGE_ID * * * | * * * * + * | | * * + * v | * + * GET_LOCAL_PARENT_IMAGE_ID * * * * | * * * * + * | | * * + * v | * + * OPEN_REMOTE_PARENT * * * * * * * | * * * * + * | | * * + * v | * + * CLONE_IMAGE | * + * | | * + * v | * + * CLOSE_REMOTE_PARENT | * + * | v * + * \------------------------> <finish> < * * + * @endverbatim + */ + + Threads<ImageCtxT> *m_threads; + librados::IoCtx &m_local_io_ctx; + std::string m_global_image_id; + std::string m_remote_mirror_uuid; + std::string m_local_image_name; + std::string m_local_image_id; + ImageCtxT *m_remote_image_ctx; + PoolMetaCache* m_pool_meta_cache; + cls::rbd::MirrorImageMode m_mirror_image_mode; + Context *m_on_finish; + + librados::IoCtx m_remote_parent_io_ctx; + ImageCtxT *m_remote_parent_image_ctx = nullptr; + cls::rbd::ParentImageSpec m_remote_parent_spec; + + librados::IoCtx m_local_parent_io_ctx; + cls::rbd::ParentImageSpec m_local_parent_spec; + + bufferlist m_out_bl; + std::string m_parent_global_image_id; + std::string m_parent_pool_name; + int m_ret_val = 0; + + void create_image(); + void handle_create_image(int r); + + void get_parent_global_image_id(); + void handle_get_parent_global_image_id(int r); + + void get_local_parent_image_id(); + void handle_get_local_parent_image_id(int r); + + void open_remote_parent_image(); + void handle_open_remote_parent_image(int r); + + void clone_image(); + void handle_clone_image(int r); + + void close_remote_parent_image(); + void handle_close_remote_parent_image(int r); + + void error(int r); + void finish(int r); + + int validate_parent(); + + void populate_image_options(librbd::ImageOptions* image_options); + +}; + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::CreateImageRequest<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_REPLAYER_CREATE_IMAGE_REQUEST_H diff --git a/src/tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.cc b/src/tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.cc new file mode 100644 index 000000000..74e975373 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.cc @@ -0,0 +1,85 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.h" +#include "include/rados/librados.hpp" +#include "cls/rbd/cls_rbd_client.h" +#include "common/debug.h" +#include "common/errno.h" +#include "librbd/ImageCtx.h" +#include "librbd/Utils.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::" \ + << "GetMirrorImageIdRequest: " << this << " " \ + << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace image_replayer { + +using librbd::util::create_rados_callback; + +template <typename I> +void GetMirrorImageIdRequest<I>::send() { + dout(20) << dendl; + get_image_id(); +} + +template <typename I> +void GetMirrorImageIdRequest<I>::get_image_id() { + dout(20) << dendl; + + // attempt to cross-reference a image id by the global image id + librados::ObjectReadOperation op; + librbd::cls_client::mirror_image_get_image_id_start(&op, m_global_image_id); + + librados::AioCompletion *aio_comp = create_rados_callback< + GetMirrorImageIdRequest<I>, + &GetMirrorImageIdRequest<I>::handle_get_image_id>( + this); + int r = m_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op, &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void GetMirrorImageIdRequest<I>::handle_get_image_id(int r) { + if (r == 0) { + auto iter = m_out_bl.cbegin(); + r = librbd::cls_client::mirror_image_get_image_id_finish( + &iter, m_image_id); + } + + dout(20) << "r=" << r << ", " + << "image_id=" << *m_image_id << dendl; + + if (r < 0) { + if (r == -ENOENT) { + dout(10) << "global image " << m_global_image_id << " not registered" + << dendl; + } else { + derr << "failed to retrieve image id: " << cpp_strerror(r) << dendl; + } + finish(r); + return; + } + + finish(0); +} + +template <typename I> +void GetMirrorImageIdRequest<I>::finish(int r) { + dout(20) << "r=" << r << dendl; + + m_on_finish->complete(r); + delete this; +} + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::GetMirrorImageIdRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.h b/src/tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.h new file mode 100644 index 000000000..b26645138 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.h @@ -0,0 +1,75 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_GET_MIRROR_IMAGE_ID_REQUEST_H +#define RBD_MIRROR_IMAGE_REPLAYER_GET_MIRROR_IMAGE_ID_REQUEST_H + +#include "include/buffer.h" +#include "include/rados/librados_fwd.hpp" +#include <string> + +namespace librbd { struct ImageCtx; } + +struct Context; + +namespace rbd { +namespace mirror { +namespace image_replayer { + +template <typename ImageCtxT = librbd::ImageCtx> +class GetMirrorImageIdRequest { +public: + static GetMirrorImageIdRequest *create(librados::IoCtx &io_ctx, + const std::string &global_image_id, + std::string *image_id, + Context *on_finish) { + return new GetMirrorImageIdRequest(io_ctx, global_image_id, image_id, + on_finish); + } + + GetMirrorImageIdRequest(librados::IoCtx &io_ctx, + const std::string &global_image_id, + std::string *image_id, + Context *on_finish) + : m_io_ctx(io_ctx), m_global_image_id(global_image_id), + m_image_id(image_id), m_on_finish(on_finish) { + } + + void send(); + +private: + /** + * @verbatim + * + * <start> + * | + * v + * GET_IMAGE_ID + * | + * v + * <finish> + + * @endverbatim + */ + + librados::IoCtx &m_io_ctx; + std::string m_global_image_id; + std::string *m_image_id; + Context *m_on_finish; + + bufferlist m_out_bl; + + void get_image_id(); + void handle_get_image_id(int r); + + void finish(int r); + +}; + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::GetMirrorImageIdRequest<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_REPLAYER_GET_MIRROR_IMAGE_ID_REQUEST_H diff --git a/src/tools/rbd_mirror/image_replayer/OpenImageRequest.cc b/src/tools/rbd_mirror/image_replayer/OpenImageRequest.cc new file mode 100644 index 000000000..e6ab382be --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/OpenImageRequest.cc @@ -0,0 +1,79 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "OpenImageRequest.h" +#include "common/debug.h" +#include "common/errno.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/Utils.h" +#include <type_traits> + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::OpenImageRequest: " \ + << this << " " << __func__ << " " + +namespace rbd { +namespace mirror { +namespace image_replayer { + +using librbd::util::create_context_callback; + +template <typename I> +OpenImageRequest<I>::OpenImageRequest(librados::IoCtx &io_ctx, I **image_ctx, + const std::string &image_id, + bool read_only, Context *on_finish) + : m_io_ctx(io_ctx), m_image_ctx(image_ctx), m_image_id(image_id), + m_read_only(read_only), m_on_finish(on_finish) { +} + +template <typename I> +void OpenImageRequest<I>::send() { + send_open_image(); +} + +template <typename I> +void OpenImageRequest<I>::send_open_image() { + dout(20) << dendl; + + *m_image_ctx = I::create("", m_image_id, nullptr, m_io_ctx, m_read_only); + + if (!m_read_only) { + // ensure non-primary images can be modified + (*m_image_ctx)->read_only_mask = ~librbd::IMAGE_READ_ONLY_FLAG_NON_PRIMARY; + } + + Context *ctx = create_context_callback< + OpenImageRequest<I>, &OpenImageRequest<I>::handle_open_image>( + this); + (*m_image_ctx)->state->open(0, ctx); +} + +template <typename I> +void OpenImageRequest<I>::handle_open_image(int r) { + dout(20) << ": r=" << r << dendl; + + if (r < 0) { + derr << ": failed to open image '" << m_image_id << "': " + << cpp_strerror(r) << dendl; + *m_image_ctx = nullptr; + } + + finish(r); +} + +template <typename I> +void OpenImageRequest<I>::finish(int r) { + dout(20) << ": r=" << r << dendl; + + m_on_finish->complete(r); + delete this; +} + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::OpenImageRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/OpenImageRequest.h b/src/tools/rbd_mirror/image_replayer/OpenImageRequest.h new file mode 100644 index 000000000..01ab31171 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/OpenImageRequest.h @@ -0,0 +1,71 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_OPEN_IMAGE_REQUEST_H +#define RBD_MIRROR_IMAGE_REPLAYER_OPEN_IMAGE_REQUEST_H + +#include "include/int_types.h" +#include "librbd/ImageCtx.h" +#include <string> + +class Context; +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { +namespace image_replayer { + +template <typename ImageCtxT = librbd::ImageCtx> +class OpenImageRequest { +public: + static OpenImageRequest* create(librados::IoCtx &io_ctx, + ImageCtxT **image_ctx, + const std::string &image_id, + bool read_only, Context *on_finish) { + return new OpenImageRequest(io_ctx, image_ctx, image_id, read_only, + on_finish); + } + + OpenImageRequest(librados::IoCtx &io_ctx, ImageCtxT **image_ctx, + const std::string &image_id, bool read_only, + Context *on_finish); + + void send(); + +private: + /** + * @verbatim + * + * <start> + * | + * v + * OPEN_IMAGE + * | + * v + * <finish> + * + * @endverbatim + */ + librados::IoCtx &m_io_ctx; + ImageCtxT **m_image_ctx; + std::string m_image_id; + bool m_read_only; + Context *m_on_finish; + + void send_open_image(); + void handle_open_image(int r); + + void send_close_image(int r); + void handle_close_image(int r); + + void finish(int r); + +}; + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::OpenImageRequest<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_REPLAYER_OPEN_IMAGE_REQUEST_H diff --git a/src/tools/rbd_mirror/image_replayer/OpenLocalImageRequest.cc b/src/tools/rbd_mirror/image_replayer/OpenLocalImageRequest.cc new file mode 100644 index 000000000..7f8d9608e --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/OpenLocalImageRequest.cc @@ -0,0 +1,292 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "include/compat.h" +#include "CloseImageRequest.h" +#include "OpenLocalImageRequest.h" +#include "common/debug.h" +#include "common/errno.h" +#include "librbd/ExclusiveLock.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/Journal.h" +#include "librbd/Utils.h" +#include "librbd/asio/ContextWQ.h" +#include "librbd/exclusive_lock/Policy.h" +#include "librbd/journal/Policy.h" +#include "librbd/mirror/GetInfoRequest.h" +#include <type_traits> + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::OpenLocalImageRequest: " \ + << this << " " << __func__ << " " + +namespace rbd { +namespace mirror { +namespace image_replayer { + +using librbd::util::create_context_callback; + +namespace { + +template <typename I> +struct MirrorExclusiveLockPolicy : public librbd::exclusive_lock::Policy { + I *image_ctx; + + MirrorExclusiveLockPolicy(I *image_ctx) : image_ctx(image_ctx) { + } + + bool may_auto_request_lock() override { + return false; + } + + int lock_requested(bool force) override { + int r = -EROFS; + { + std::shared_lock owner_locker{image_ctx->owner_lock}; + std::shared_lock image_locker{image_ctx->image_lock}; + if (image_ctx->journal == nullptr || image_ctx->journal->is_tag_owner()) { + r = 0; + } + } + + if (r == 0) { + // if the local image journal has been closed or if it was (force) + // promoted allow the lock to be released to another client + image_ctx->exclusive_lock->release_lock(nullptr); + } + return r; + } + + bool accept_blocked_request( + librbd::exclusive_lock::OperationRequestType request_type) override { + switch (request_type) { + case librbd::exclusive_lock::OPERATION_REQUEST_TYPE_TRASH_SNAP_REMOVE: + case librbd::exclusive_lock::OPERATION_REQUEST_TYPE_FORCE_PROMOTION: + return true; + default: + return false; + } + } +}; + +struct MirrorJournalPolicy : public librbd::journal::Policy { + librbd::asio::ContextWQ *work_queue; + + MirrorJournalPolicy(librbd::asio::ContextWQ *work_queue) + : work_queue(work_queue) { + } + + bool append_disabled() const override { + // avoid recording any events to the local journal + return true; + } + bool journal_disabled() const override { + return false; + } + + void allocate_tag_on_lock(Context *on_finish) override { + // rbd-mirror will manually create tags by copying them from the peer + work_queue->queue(on_finish, 0); + } +}; + +} // anonymous namespace + +template <typename I> +OpenLocalImageRequest<I>::OpenLocalImageRequest( + librados::IoCtx &local_io_ctx, + I **local_image_ctx, + const std::string &local_image_id, + librbd::asio::ContextWQ *work_queue, + Context *on_finish) + : m_local_io_ctx(local_io_ctx), m_local_image_ctx(local_image_ctx), + m_local_image_id(local_image_id), m_work_queue(work_queue), + m_on_finish(on_finish) { +} + +template <typename I> +void OpenLocalImageRequest<I>::send() { + send_open_image(); +} + +template <typename I> +void OpenLocalImageRequest<I>::send_open_image() { + dout(20) << dendl; + + *m_local_image_ctx = I::create("", m_local_image_id, nullptr, + m_local_io_ctx, false); + + // ensure non-primary images can be modified + (*m_local_image_ctx)->read_only_mask = + ~librbd::IMAGE_READ_ONLY_FLAG_NON_PRIMARY; + + { + std::scoped_lock locker{(*m_local_image_ctx)->owner_lock, + (*m_local_image_ctx)->image_lock}; + (*m_local_image_ctx)->set_exclusive_lock_policy( + new MirrorExclusiveLockPolicy<I>(*m_local_image_ctx)); + (*m_local_image_ctx)->set_journal_policy( + new MirrorJournalPolicy(m_work_queue)); + } + + Context *ctx = create_context_callback< + OpenLocalImageRequest<I>, &OpenLocalImageRequest<I>::handle_open_image>( + this); + (*m_local_image_ctx)->state->open(0, ctx); +} + +template <typename I> +void OpenLocalImageRequest<I>::handle_open_image(int r) { + dout(20) << ": r=" << r << dendl; + + if (r < 0) { + if (r == -ENOENT) { + dout(10) << ": local image does not exist" << dendl; + } else { + derr << ": failed to open image '" << m_local_image_id << "': " + << cpp_strerror(r) << dendl; + } + *m_local_image_ctx = nullptr; + finish(r); + return; + } + + send_get_mirror_info(); +} + +template <typename I> +void OpenLocalImageRequest<I>::send_get_mirror_info() { + dout(20) << dendl; + + Context *ctx = create_context_callback< + OpenLocalImageRequest<I>, + &OpenLocalImageRequest<I>::handle_get_mirror_info>( + this); + auto request = librbd::mirror::GetInfoRequest<I>::create( + **m_local_image_ctx, &m_mirror_image, &m_promotion_state, + &m_primary_mirror_uuid, ctx); + request->send(); +} + +template <typename I> +void OpenLocalImageRequest<I>::handle_get_mirror_info(int r) { + dout(20) << ": r=" << r << dendl; + + if (r == -ENOENT) { + dout(5) << ": local image is not mirrored" << dendl; + send_close_image(r); + return; + } else if (r < 0) { + derr << ": error querying local image primary status: " << cpp_strerror(r) + << dendl; + send_close_image(r); + return; + } + + if (m_mirror_image.state == cls::rbd::MIRROR_IMAGE_STATE_DISABLING) { + dout(5) << ": local image mirroring is being disabled" << dendl; + send_close_image(-ENOENT); + return; + } + + // if the local image owns the tag -- don't steal the lock since + // we aren't going to mirror peer data into this image anyway + if (m_promotion_state == librbd::mirror::PROMOTION_STATE_PRIMARY) { + dout(10) << ": local image is primary -- skipping image replay" << dendl; + send_close_image(-EREMOTEIO); + return; + } + + send_lock_image(); +} + +template <typename I> +void OpenLocalImageRequest<I>::send_lock_image() { + std::shared_lock owner_locker{(*m_local_image_ctx)->owner_lock}; + if ((*m_local_image_ctx)->exclusive_lock == nullptr) { + owner_locker.unlock(); + if (m_mirror_image.mode == cls::rbd::MIRROR_IMAGE_MODE_SNAPSHOT) { + finish(0); + } else { + derr << ": image does not support exclusive lock" << dendl; + send_close_image(-EINVAL); + } + return; + } + + dout(20) << dendl; + + // disallow any proxied maintenance operations before grabbing lock + (*m_local_image_ctx)->exclusive_lock->block_requests(-EROFS); + + Context *ctx = create_context_callback< + OpenLocalImageRequest<I>, &OpenLocalImageRequest<I>::handle_lock_image>( + this); + + (*m_local_image_ctx)->exclusive_lock->acquire_lock(ctx); +} + +template <typename I> +void OpenLocalImageRequest<I>::handle_lock_image(int r) { + dout(20) << ": r=" << r << dendl; + + if (r < 0) { + derr << ": failed to lock image '" << m_local_image_id << "': " + << cpp_strerror(r) << dendl; + send_close_image(r); + return; + } + + { + std::shared_lock owner_locker{(*m_local_image_ctx)->owner_lock}; + if ((*m_local_image_ctx)->exclusive_lock == nullptr || + !(*m_local_image_ctx)->exclusive_lock->is_lock_owner()) { + derr << ": image is not locked" << dendl; + send_close_image(-EBUSY); + return; + } + } + + finish(0); +} + +template <typename I> +void OpenLocalImageRequest<I>::send_close_image(int r) { + dout(20) << dendl; + + if (m_ret_val == 0 && r < 0) { + m_ret_val = r; + } + + Context *ctx = create_context_callback< + OpenLocalImageRequest<I>, &OpenLocalImageRequest<I>::handle_close_image>( + this); + CloseImageRequest<I> *request = CloseImageRequest<I>::create( + m_local_image_ctx, ctx); + request->send(); +} + +template <typename I> +void OpenLocalImageRequest<I>::handle_close_image(int r) { + dout(20) << dendl; + + ceph_assert(r == 0); + finish(m_ret_val); +} + +template <typename I> +void OpenLocalImageRequest<I>::finish(int r) { + dout(20) << ": r=" << r << dendl; + + m_on_finish->complete(r); + delete this; +} + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::OpenLocalImageRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/OpenLocalImageRequest.h b/src/tools/rbd_mirror/image_replayer/OpenLocalImageRequest.h new file mode 100644 index 000000000..9a642bc39 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/OpenLocalImageRequest.h @@ -0,0 +1,97 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_OPEN_LOCAL_IMAGE_REQUEST_H +#define RBD_MIRROR_IMAGE_REPLAYER_OPEN_LOCAL_IMAGE_REQUEST_H + +#include "include/int_types.h" +#include "cls/rbd/cls_rbd_types.h" +#include "librbd/ImageCtx.h" +#include "librbd/mirror/Types.h" +#include <string> + +class Context; +namespace librbd { +class ImageCtx; +namespace asio { struct ContextWQ; } +} // namespace librbd + +namespace rbd { +namespace mirror { +namespace image_replayer { + +template <typename ImageCtxT = librbd::ImageCtx> +class OpenLocalImageRequest { +public: + static OpenLocalImageRequest* create(librados::IoCtx &local_io_ctx, + ImageCtxT **local_image_ctx, + const std::string &local_image_id, + librbd::asio::ContextWQ *work_queue, + Context *on_finish) { + return new OpenLocalImageRequest(local_io_ctx, local_image_ctx, + local_image_id, work_queue, on_finish); + } + + OpenLocalImageRequest(librados::IoCtx &local_io_ctx, + ImageCtxT **local_image_ctx, + const std::string &local_image_id, + librbd::asio::ContextWQ *work_queue, + Context *on_finish); + + void send(); + +private: + /** + * @verbatim + * + * <start> + * | + * v + * OPEN_IMAGE * * * * * * * * + * | * + * v * + * GET_MIRROR_INFO * * * * * + * | * + * v (skip if primary) v + * LOCK_IMAGE * * * > CLOSE_IMAGE + * | | + * v | + * <finish> <---------------/ + * + * @endverbatim + */ + librados::IoCtx &m_local_io_ctx; + ImageCtxT **m_local_image_ctx; + std::string m_local_image_id; + librbd::asio::ContextWQ *m_work_queue; + Context *m_on_finish; + + cls::rbd::MirrorImage m_mirror_image; + librbd::mirror::PromotionState m_promotion_state = + librbd::mirror::PROMOTION_STATE_NON_PRIMARY; + std::string m_primary_mirror_uuid; + int m_ret_val = 0; + + void send_open_image(); + void handle_open_image(int r); + + void send_get_mirror_info(); + void handle_get_mirror_info(int r); + + void send_lock_image(); + void handle_lock_image(int r); + + void send_close_image(int r); + void handle_close_image(int r); + + void finish(int r); + +}; + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::OpenLocalImageRequest<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_REPLAYER_OPEN_LOCAL_IMAGE_REQUEST_H diff --git a/src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.cc b/src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.cc new file mode 100644 index 000000000..b1fef7254 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.cc @@ -0,0 +1,197 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.h" +#include "include/rados/librados.hpp" +#include "cls/rbd/cls_rbd_client.h" +#include "common/debug.h" +#include "common/errno.h" +#include "librbd/ImageCtx.h" +#include "librbd/Journal.h" +#include "librbd/Utils.h" +#include "librbd/mirror/GetInfoRequest.h" +#include "tools/rbd_mirror/ImageDeleter.h" +#include "tools/rbd_mirror/Threads.h" +#include "tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.h" +#include "tools/rbd_mirror/image_replayer/journal/StateBuilder.h" +#include "tools/rbd_mirror/image_replayer/snapshot/StateBuilder.h" +#include <type_traits> + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::" \ + << "PrepareLocalImageRequest: " << this << " " \ + << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace image_replayer { + +using librbd::util::create_context_callback; +using librbd::util::create_rados_callback; + +template <typename I> +void PrepareLocalImageRequest<I>::send() { + dout(10) << dendl; + get_local_image_id(); +} + +template <typename I> +void PrepareLocalImageRequest<I>::get_local_image_id() { + dout(10) << dendl; + + Context *ctx = create_context_callback< + PrepareLocalImageRequest<I>, + &PrepareLocalImageRequest<I>::handle_get_local_image_id>(this); + auto req = GetMirrorImageIdRequest<I>::create(m_io_ctx, m_global_image_id, + &m_local_image_id, ctx); + req->send(); +} + +template <typename I> +void PrepareLocalImageRequest<I>::handle_get_local_image_id(int r) { + dout(10) << "r=" << r << ", " + << "local_image_id=" << m_local_image_id << dendl; + + if (r < 0) { + finish(r); + return; + } + + get_local_image_name(); +} + +template <typename I> +void PrepareLocalImageRequest<I>::get_local_image_name() { + dout(10) << dendl; + + librados::ObjectReadOperation op; + librbd::cls_client::dir_get_name_start(&op, m_local_image_id); + + m_out_bl.clear(); + librados::AioCompletion *aio_comp = create_rados_callback< + PrepareLocalImageRequest<I>, + &PrepareLocalImageRequest<I>::handle_get_local_image_name>(this); + int r = m_io_ctx.aio_operate(RBD_DIRECTORY, aio_comp, &op, &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void PrepareLocalImageRequest<I>::handle_get_local_image_name(int r) { + dout(10) << "r=" << r << dendl; + + if (r == 0) { + auto it = m_out_bl.cbegin(); + r = librbd::cls_client::dir_get_name_finish(&it, m_local_image_name); + } + + if (r == -ENOENT) { + // proceed we should have a mirror image record if we got this far + dout(10) << "image does not exist for local image id " << m_local_image_id + << dendl; + *m_local_image_name = ""; + } else if (r < 0) { + derr << "failed to retrieve image name: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + get_mirror_info(); +} + +template <typename I> +void PrepareLocalImageRequest<I>::get_mirror_info() { + dout(10) << dendl; + + auto ctx = create_context_callback< + PrepareLocalImageRequest<I>, + &PrepareLocalImageRequest<I>::handle_get_mirror_info>(this); + auto req = librbd::mirror::GetInfoRequest<I>::create( + m_io_ctx, m_work_queue, m_local_image_id, &m_mirror_image, + &m_promotion_state, &m_primary_mirror_uuid, ctx); + req->send(); +} + +template <typename I> +void PrepareLocalImageRequest<I>::handle_get_mirror_info(int r) { + dout(10) << ": r=" << r << dendl; + + if (r < 0) { + derr << "failed to retrieve local mirror image info: " << cpp_strerror(r) + << dendl; + finish(r); + return; + } + + if (m_mirror_image.state == cls::rbd::MIRROR_IMAGE_STATE_CREATING) { + dout(5) << "local image is still in creating state, issuing a removal" + << dendl; + move_to_trash(); + return; + } else if (m_mirror_image.state == cls::rbd::MIRROR_IMAGE_STATE_DISABLING) { + dout(5) << "local image mirroring is in disabling state" << dendl; + finish(-ERESTART); + return; + } + + switch (m_mirror_image.mode) { + case cls::rbd::MIRROR_IMAGE_MODE_JOURNAL: + // journal-based local image exists + { + auto state_builder = journal::StateBuilder<I>::create(m_global_image_id); + state_builder->local_primary_mirror_uuid = m_primary_mirror_uuid; + *m_state_builder = state_builder; + } + break; + case cls::rbd::MIRROR_IMAGE_MODE_SNAPSHOT: + // snapshot-based local image exists + *m_state_builder = snapshot::StateBuilder<I>::create(m_global_image_id); + break; + default: + derr << "unsupported mirror image mode " << m_mirror_image.mode << " " + << "for image " << m_global_image_id << dendl; + finish(-EOPNOTSUPP); + break; + } + + dout(10) << "local_image_id=" << m_local_image_id << ", " + << "local_promotion_state=" << m_promotion_state << ", " + << "local_primary_mirror_uuid=" << m_primary_mirror_uuid << dendl; + (*m_state_builder)->local_image_id = m_local_image_id; + (*m_state_builder)->local_promotion_state = m_promotion_state; + finish(0); +} + +template <typename I> +void PrepareLocalImageRequest<I>::move_to_trash() { + dout(10) << dendl; + + Context *ctx = create_context_callback< + PrepareLocalImageRequest<I>, + &PrepareLocalImageRequest<I>::handle_move_to_trash>(this); + ImageDeleter<I>::trash_move(m_io_ctx, m_global_image_id, + false, m_work_queue, ctx); +} + +template <typename I> +void PrepareLocalImageRequest<I>::handle_move_to_trash(int r) { + dout(10) << ": r=" << r << dendl; + + finish(-ENOENT); +} + +template <typename I> +void PrepareLocalImageRequest<I>::finish(int r) { + dout(10) << "r=" << r << dendl; + + m_on_finish->complete(r); + delete this; +} + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::PrepareLocalImageRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.h b/src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.h new file mode 100644 index 000000000..6372169ff --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.h @@ -0,0 +1,115 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_PREPARE_LOCAL_IMAGE_REQUEST_H +#define RBD_MIRROR_IMAGE_REPLAYER_PREPARE_LOCAL_IMAGE_REQUEST_H + +#include "include/buffer.h" +#include "include/rados/librados_fwd.hpp" +#include "cls/rbd/cls_rbd_types.h" +#include "librbd/mirror/Types.h" +#include <string> + +struct Context; + +namespace librbd { +struct ImageCtx; +namespace asio { struct ContextWQ; } +} // namespace librbd + +namespace rbd { +namespace mirror { +namespace image_replayer { + +template <typename> class StateBuilder; + +template <typename ImageCtxT = librbd::ImageCtx> +class PrepareLocalImageRequest { +public: + static PrepareLocalImageRequest *create( + librados::IoCtx &io_ctx, + const std::string &global_image_id, + std::string *local_image_name, + StateBuilder<ImageCtxT>** state_builder, + librbd::asio::ContextWQ *work_queue, + Context *on_finish) { + return new PrepareLocalImageRequest(io_ctx, global_image_id, + local_image_name, state_builder, + work_queue, on_finish); + } + + PrepareLocalImageRequest( + librados::IoCtx &io_ctx, + const std::string &global_image_id, + std::string *local_image_name, + StateBuilder<ImageCtxT>** state_builder, + librbd::asio::ContextWQ *work_queue, + Context *on_finish) + : m_io_ctx(io_ctx), m_global_image_id(global_image_id), + m_local_image_name(local_image_name), m_state_builder(state_builder), + m_work_queue(work_queue), m_on_finish(on_finish) { + } + + void send(); + +private: + /** + * @verbatim + * + * <start> + * | + * v + * GET_LOCAL_IMAGE_ID + * | + * v + * GET_LOCAL_IMAGE_NAME + * | + * v + * GET_MIRROR_INFO + * | + * | (if the image mirror state is CREATING) + * v + * TRASH_MOVE + * | + * v + * <finish> + * + * @endverbatim + */ + + librados::IoCtx &m_io_ctx; + std::string m_global_image_id; + std::string *m_local_image_name; + StateBuilder<ImageCtxT>** m_state_builder; + librbd::asio::ContextWQ *m_work_queue; + Context *m_on_finish; + + bufferlist m_out_bl; + std::string m_local_image_id; + cls::rbd::MirrorImage m_mirror_image; + librbd::mirror::PromotionState m_promotion_state; + std::string m_primary_mirror_uuid; + + void get_local_image_id(); + void handle_get_local_image_id(int r); + + void get_local_image_name(); + void handle_get_local_image_name(int r); + + void get_mirror_info(); + void handle_get_mirror_info(int r); + + void move_to_trash(); + void handle_move_to_trash(int r); + + void finish(int r); + +}; + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::PrepareLocalImageRequest<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_REPLAYER_PREPARE_LOCAL_IMAGE_REQUEST_H diff --git a/src/tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.cc b/src/tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.cc new file mode 100644 index 000000000..45a44a300 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.cc @@ -0,0 +1,283 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.h" +#include "include/rados/librados.hpp" +#include "cls/rbd/cls_rbd_client.h" +#include "common/debug.h" +#include "common/errno.h" +#include "journal/Journaler.h" +#include "journal/Settings.h" +#include "librbd/ImageCtx.h" +#include "librbd/Journal.h" +#include "librbd/Utils.h" +#include "librbd/asio/ContextWQ.h" +#include "librbd/mirror/GetInfoRequest.h" +#include "tools/rbd_mirror/Threads.h" +#include "tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.h" +#include "tools/rbd_mirror/image_replayer/Utils.h" +#include "tools/rbd_mirror/image_replayer/journal/StateBuilder.h" +#include "tools/rbd_mirror/image_replayer/snapshot/StateBuilder.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::" \ + << "PrepareRemoteImageRequest: " << this << " " \ + << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace image_replayer { + +using librbd::util::create_async_context_callback; +using librbd::util::create_context_callback; +using librbd::util::create_rados_callback; + +template <typename I> +void PrepareRemoteImageRequest<I>::send() { + if (*m_state_builder != nullptr) { + (*m_state_builder)->remote_mirror_uuid = m_remote_pool_meta.mirror_uuid; + auto state_builder = dynamic_cast<snapshot::StateBuilder<I>*>(*m_state_builder); + if (state_builder) { + state_builder->remote_mirror_peer_uuid = m_remote_pool_meta.mirror_peer_uuid; + } + } + + get_remote_image_id(); +} + +template <typename I> +void PrepareRemoteImageRequest<I>::get_remote_image_id() { + dout(10) << dendl; + + Context *ctx = create_context_callback< + PrepareRemoteImageRequest<I>, + &PrepareRemoteImageRequest<I>::handle_get_remote_image_id>(this); + auto req = GetMirrorImageIdRequest<I>::create(m_remote_io_ctx, + m_global_image_id, + &m_remote_image_id, ctx); + req->send(); +} + +template <typename I> +void PrepareRemoteImageRequest<I>::handle_get_remote_image_id(int r) { + dout(10) << "r=" << r << ", " + << "remote_image_id=" << m_remote_image_id << dendl; + + if (r < 0) { + finish(r); + return; + } + + get_mirror_info(); +} + +template <typename I> +void PrepareRemoteImageRequest<I>::get_mirror_info() { + dout(10) << dendl; + + auto ctx = create_context_callback< + PrepareRemoteImageRequest<I>, + &PrepareRemoteImageRequest<I>::handle_get_mirror_info>(this); + auto req = librbd::mirror::GetInfoRequest<I>::create( + m_remote_io_ctx, m_threads->work_queue, m_remote_image_id, + &m_mirror_image, &m_promotion_state, &m_primary_mirror_uuid, + ctx); + req->send(); +} + +template <typename I> +void PrepareRemoteImageRequest<I>::handle_get_mirror_info(int r) { + dout(10) << "r=" << r << dendl; + + if (r == -ENOENT) { + dout(10) << "image " << m_global_image_id << " not mirrored" << dendl; + finish(r); + return; + } else if (r < 0) { + derr << "failed to retrieve mirror image details for image " + << m_global_image_id << ": " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + auto state_builder = *m_state_builder; + if (state_builder != nullptr && + state_builder->get_mirror_image_mode() != m_mirror_image.mode) { + derr << "local and remote mirror image using different mirroring modes " + << "for image " << m_global_image_id << ": split-brain" << dendl; + finish(-EEXIST); + return; + } else if (m_mirror_image.state == cls::rbd::MIRROR_IMAGE_STATE_DISABLING) { + dout(5) << "remote image mirroring is being disabled" << dendl; + finish(-ENOENT); + return; + } + + switch (m_mirror_image.mode) { + case cls::rbd::MIRROR_IMAGE_MODE_JOURNAL: + get_client(); + break; + case cls::rbd::MIRROR_IMAGE_MODE_SNAPSHOT: + finalize_snapshot_state_builder(); + finish(0); + break; + default: + derr << "unsupported mirror image mode " << m_mirror_image.mode << " " + << "for image " << m_global_image_id << dendl; + finish(-EOPNOTSUPP); + break; + } +} + +template <typename I> +void PrepareRemoteImageRequest<I>::get_client() { + dout(10) << dendl; + + auto cct = static_cast<CephContext *>(m_local_io_ctx.cct()); + ::journal::Settings journal_settings; + journal_settings.commit_interval = cct->_conf.get_val<double>( + "rbd_mirror_journal_commit_age"); + + // TODO use Journal thread pool for journal ops until converted to ASIO + ContextWQ* context_wq; + librbd::Journal<>::get_work_queue(cct, &context_wq); + + ceph_assert(m_remote_journaler == nullptr); + m_remote_journaler = new Journaler(context_wq, m_threads->timer, + &m_threads->timer_lock, m_remote_io_ctx, + m_remote_image_id, m_local_mirror_uuid, + journal_settings, m_cache_manager_handler); + + Context *ctx = create_async_context_callback( + m_threads->work_queue, create_context_callback< + PrepareRemoteImageRequest<I>, + &PrepareRemoteImageRequest<I>::handle_get_client>(this)); + m_remote_journaler->get_client(m_local_mirror_uuid, &m_client, ctx); +} + +template <typename I> +void PrepareRemoteImageRequest<I>::handle_get_client(int r) { + dout(10) << "r=" << r << dendl; + + MirrorPeerClientMeta client_meta; + if (r == -ENOENT) { + dout(10) << "client not registered" << dendl; + register_client(); + } else if (r < 0) { + derr << "failed to retrieve client: " << cpp_strerror(r) << dendl; + finish(r); + } else if (!util::decode_client_meta(m_client, &client_meta)) { + // require operator intervention since the data is corrupt + finish(-EBADMSG); + } else { + // skip registration if it already exists + finalize_journal_state_builder(m_client.state, client_meta); + finish(0); + } +} + +template <typename I> +void PrepareRemoteImageRequest<I>::register_client() { + dout(10) << dendl; + + auto state_builder = *m_state_builder; + librbd::journal::MirrorPeerClientMeta client_meta{ + (state_builder == nullptr ? "" : state_builder->local_image_id)}; + client_meta.state = librbd::journal::MIRROR_PEER_STATE_REPLAYING; + + librbd::journal::ClientData client_data{client_meta}; + bufferlist client_data_bl; + encode(client_data, client_data_bl); + + Context *ctx = create_async_context_callback( + m_threads->work_queue, create_context_callback< + PrepareRemoteImageRequest<I>, + &PrepareRemoteImageRequest<I>::handle_register_client>(this)); + m_remote_journaler->register_client(client_data_bl, ctx); +} + +template <typename I> +void PrepareRemoteImageRequest<I>::handle_register_client(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to register with remote journal: " << cpp_strerror(r) + << dendl; + finish(r); + return; + } + + auto state_builder = *m_state_builder; + librbd::journal::MirrorPeerClientMeta client_meta{ + (state_builder == nullptr ? "" : state_builder->local_image_id)}; + client_meta.state = librbd::journal::MIRROR_PEER_STATE_REPLAYING; + finalize_journal_state_builder(cls::journal::CLIENT_STATE_CONNECTED, + client_meta); + finish(0); +} + +template <typename I> +void PrepareRemoteImageRequest<I>::finalize_journal_state_builder( + cls::journal::ClientState client_state, + const MirrorPeerClientMeta& client_meta) { + journal::StateBuilder<I>* state_builder = nullptr; + if (*m_state_builder != nullptr) { + // already verified that it's a matching builder in + // 'handle_get_mirror_info' + state_builder = dynamic_cast<journal::StateBuilder<I>*>(*m_state_builder); + ceph_assert(state_builder != nullptr); + } else { + state_builder = journal::StateBuilder<I>::create(m_global_image_id); + *m_state_builder = state_builder; + } + + state_builder->remote_mirror_uuid = m_remote_pool_meta.mirror_uuid; + state_builder->remote_image_id = m_remote_image_id; + state_builder->remote_promotion_state = m_promotion_state; + state_builder->remote_journaler = m_remote_journaler; + state_builder->remote_client_state = client_state; + state_builder->remote_client_meta = client_meta; +} + +template <typename I> +void PrepareRemoteImageRequest<I>::finalize_snapshot_state_builder() { + snapshot::StateBuilder<I>* state_builder = nullptr; + if (*m_state_builder != nullptr) { + state_builder = dynamic_cast<snapshot::StateBuilder<I>*>(*m_state_builder); + ceph_assert(state_builder != nullptr); + } else { + state_builder = snapshot::StateBuilder<I>::create(m_global_image_id); + *m_state_builder = state_builder; + } + + dout(10) << "remote_mirror_uuid=" << m_remote_pool_meta.mirror_uuid << ", " + << "remote_mirror_peer_uuid=" + << m_remote_pool_meta.mirror_peer_uuid << ", " + << "remote_image_id=" << m_remote_image_id << ", " + << "remote_promotion_state=" << m_promotion_state << dendl; + state_builder->remote_mirror_uuid = m_remote_pool_meta.mirror_uuid; + state_builder->remote_mirror_peer_uuid = m_remote_pool_meta.mirror_peer_uuid; + state_builder->remote_image_id = m_remote_image_id; + state_builder->remote_promotion_state = m_promotion_state; +} + +template <typename I> +void PrepareRemoteImageRequest<I>::finish(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + delete m_remote_journaler; + m_remote_journaler = nullptr; + } + + m_on_finish->complete(r); + delete this; +} + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::PrepareRemoteImageRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.h b/src/tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.h new file mode 100644 index 000000000..483cfc001 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.h @@ -0,0 +1,153 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_PREPARE_REMOTE_IMAGE_REQUEST_H +#define RBD_MIRROR_IMAGE_REPLAYER_PREPARE_REMOTE_IMAGE_REQUEST_H + +#include "include/buffer_fwd.h" +#include "include/rados/librados_fwd.hpp" +#include "cls/journal/cls_journal_types.h" +#include "cls/rbd/cls_rbd_types.h" +#include "librbd/journal/Types.h" +#include "librbd/journal/TypeTraits.h" +#include "librbd/mirror/Types.h" +#include "tools/rbd_mirror/Types.h" +#include <string> + +namespace journal { class Journaler; } +namespace journal { struct CacheManagerHandler; } +namespace librbd { struct ImageCtx; } +namespace librbd { namespace journal { struct MirrorPeerClientMeta; } } + +struct Context; + +namespace rbd { +namespace mirror { + +template <typename> struct Threads; + +namespace image_replayer { + +template <typename> class StateBuilder; + +template <typename ImageCtxT = librbd::ImageCtx> +class PrepareRemoteImageRequest { +public: + typedef librbd::journal::TypeTraits<ImageCtxT> TypeTraits; + typedef typename TypeTraits::Journaler Journaler; + typedef librbd::journal::MirrorPeerClientMeta MirrorPeerClientMeta; + + static PrepareRemoteImageRequest *create( + Threads<ImageCtxT> *threads, + librados::IoCtx &local_io_ctx, + librados::IoCtx &remote_io_ctx, + const std::string &global_image_id, + const std::string &local_mirror_uuid, + const RemotePoolMeta& remote_pool_meta, + ::journal::CacheManagerHandler *cache_manager_handler, + StateBuilder<ImageCtxT>** state_builder, + Context *on_finish) { + return new PrepareRemoteImageRequest(threads, local_io_ctx, remote_io_ctx, + global_image_id, local_mirror_uuid, + remote_pool_meta, + cache_manager_handler, state_builder, + on_finish); + } + + PrepareRemoteImageRequest( + Threads<ImageCtxT> *threads, + librados::IoCtx &local_io_ctx, + librados::IoCtx &remote_io_ctx, + const std::string &global_image_id, + const std::string &local_mirror_uuid, + const RemotePoolMeta& remote_pool_meta, + ::journal::CacheManagerHandler *cache_manager_handler, + StateBuilder<ImageCtxT>** state_builder, + Context *on_finish) + : m_threads(threads), + m_local_io_ctx(local_io_ctx), + m_remote_io_ctx(remote_io_ctx), + m_global_image_id(global_image_id), + m_local_mirror_uuid(local_mirror_uuid), + m_remote_pool_meta(remote_pool_meta), + m_cache_manager_handler(cache_manager_handler), + m_state_builder(state_builder), + m_on_finish(on_finish) { + } + + void send(); + +private: + /** + * @verbatim + * + * <start> + * | + * v + * GET_REMOTE_IMAGE_ID + * | + * v + * GET_REMOTE_MIRROR_INFO + * | + * | (journal) + * \-----------> GET_CLIENT + * | | + * | v (skip if not needed) + * | REGISTER_CLIENT + * | | + * | | + * |/----------------/ + * | + * v + * <finish> + * + * @endverbatim + */ + + Threads<ImageCtxT> *m_threads; + librados::IoCtx &m_local_io_ctx; + librados::IoCtx &m_remote_io_ctx; + std::string m_global_image_id; + std::string m_local_mirror_uuid; + RemotePoolMeta m_remote_pool_meta; + ::journal::CacheManagerHandler *m_cache_manager_handler; + StateBuilder<ImageCtxT>** m_state_builder; + Context *m_on_finish; + + bufferlist m_out_bl; + std::string m_remote_image_id; + cls::rbd::MirrorImage m_mirror_image; + librbd::mirror::PromotionState m_promotion_state = + librbd::mirror::PROMOTION_STATE_UNKNOWN; + std::string m_primary_mirror_uuid; + + // journal-based mirroring + Journaler *m_remote_journaler = nullptr; + cls::journal::Client m_client; + + void get_remote_image_id(); + void handle_get_remote_image_id(int r); + + void get_mirror_info(); + void handle_get_mirror_info(int r); + + void get_client(); + void handle_get_client(int r); + + void register_client(); + void handle_register_client(int r); + + void finalize_journal_state_builder(cls::journal::ClientState client_state, + const MirrorPeerClientMeta& client_meta); + void finalize_snapshot_state_builder(); + + void finish(int r); +}; + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::PrepareRemoteImageRequest<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_REPLAYER_PREPARE_REMOTE_IMAGE_REQUEST_H diff --git a/src/tools/rbd_mirror/image_replayer/Replayer.h b/src/tools/rbd_mirror/image_replayer/Replayer.h new file mode 100644 index 000000000..f3bfa4da0 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/Replayer.h @@ -0,0 +1,39 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_REPLAYER_H +#define RBD_MIRROR_IMAGE_REPLAYER_REPLAYER_H + +#include <string> + +struct Context; + +namespace rbd { +namespace mirror { +namespace image_replayer { + +struct Replayer { + virtual ~Replayer() {} + + virtual void destroy() = 0; + + virtual void init(Context* on_finish) = 0; + virtual void shut_down(Context* on_finish) = 0; + + virtual void flush(Context* on_finish) = 0; + + virtual bool get_replay_status(std::string* description, + Context* on_finish) = 0; + + virtual bool is_replaying() const = 0; + virtual bool is_resync_requested() const = 0; + + virtual int get_error_code() const = 0; + virtual std::string get_error_description() const = 0; +}; + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +#endif // RBD_MIRROR_IMAGE_REPLAYER_REPLAYER_H diff --git a/src/tools/rbd_mirror/image_replayer/ReplayerListener.h b/src/tools/rbd_mirror/image_replayer/ReplayerListener.h new file mode 100644 index 000000000..f17f401b1 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/ReplayerListener.h @@ -0,0 +1,21 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_REPLAYER_LISTENER_H +#define RBD_MIRROR_IMAGE_REPLAYER_REPLAYER_LISTENER_H + +namespace rbd { +namespace mirror { +namespace image_replayer { + +struct ReplayerListener { + virtual ~ReplayerListener() {} + + virtual void handle_notification() = 0; +}; + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +#endif // RBD_MIRROR_IMAGE_REPLAYER_REPLAYER_LISTENER_H diff --git a/src/tools/rbd_mirror/image_replayer/StateBuilder.cc b/src/tools/rbd_mirror/image_replayer/StateBuilder.cc new file mode 100644 index 000000000..55fb3509d --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/StateBuilder.cc @@ -0,0 +1,138 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "StateBuilder.h" +#include "include/ceph_assert.h" +#include "include/Context.h" +#include "common/debug.h" +#include "common/errno.h" +#include "journal/Journaler.h" +#include "librbd/ImageCtx.h" +#include "tools/rbd_mirror/image_replayer/CloseImageRequest.h" +#include "tools/rbd_mirror/image_sync/Types.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::" \ + << "StateBuilder: " << this << " " \ + << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace image_replayer { + +template <typename I> +StateBuilder<I>::StateBuilder(const std::string& global_image_id) + : global_image_id(global_image_id) { + dout(10) << "global_image_id=" << global_image_id << dendl; +} + +template <typename I> +StateBuilder<I>::~StateBuilder() { + ceph_assert(local_image_ctx == nullptr); + ceph_assert(remote_image_ctx == nullptr); + ceph_assert(m_sync_point_handler == nullptr); +} + +template <typename I> +bool StateBuilder<I>::is_local_primary() const { + if (local_promotion_state == librbd::mirror::PROMOTION_STATE_PRIMARY) { + ceph_assert(!local_image_id.empty()); + return true; + } + return false; +} + +template <typename I> +bool StateBuilder<I>::is_remote_primary() const { + if (remote_promotion_state == librbd::mirror::PROMOTION_STATE_PRIMARY) { + ceph_assert(!remote_image_id.empty()); + return true; + } + return false; +} + +template <typename I> +bool StateBuilder<I>::is_linked() const { + if (local_promotion_state == librbd::mirror::PROMOTION_STATE_NON_PRIMARY) { + ceph_assert(!local_image_id.empty()); + return is_linked_impl(); + } + return false; +} + +template <typename I> +void StateBuilder<I>::close_local_image(Context* on_finish) { + if (local_image_ctx == nullptr) { + on_finish->complete(0); + return; + } + + dout(10) << dendl; + auto ctx = new LambdaContext([this, on_finish](int r) { + handle_close_local_image(r, on_finish); + }); + auto request = image_replayer::CloseImageRequest<I>::create( + &local_image_ctx, ctx); + request->send(); +} + +template <typename I> +void StateBuilder<I>::handle_close_local_image(int r, Context* on_finish) { + dout(10) << "r=" << r << dendl; + + ceph_assert(local_image_ctx == nullptr); + if (r < 0) { + derr << "failed to close local image for image " << global_image_id << ": " + << cpp_strerror(r) << dendl; + } + + on_finish->complete(r); +} + +template <typename I> +void StateBuilder<I>::close_remote_image(Context* on_finish) { + if (remote_image_ctx == nullptr) { + on_finish->complete(0); + return; + } + + dout(10) << dendl; + auto ctx = new LambdaContext([this, on_finish](int r) { + handle_close_remote_image(r, on_finish); + }); + auto request = image_replayer::CloseImageRequest<I>::create( + &remote_image_ctx, ctx); + request->send(); +} + +template <typename I> +void StateBuilder<I>::handle_close_remote_image(int r, Context* on_finish) { + dout(10) << "r=" << r << dendl; + + ceph_assert(remote_image_ctx == nullptr); + if (r < 0) { + derr << "failed to close remote image for image " << global_image_id << ": " + << cpp_strerror(r) << dendl; + } + + on_finish->complete(r); +} + +template <typename I> +void StateBuilder<I>::destroy_sync_point_handler() { + if (m_sync_point_handler == nullptr) { + return; + } + + dout(15) << dendl; + m_sync_point_handler->destroy(); + m_sync_point_handler = nullptr; +} + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::StateBuilder<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/StateBuilder.h b/src/tools/rbd_mirror/image_replayer/StateBuilder.h new file mode 100644 index 000000000..51cf8668c --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/StateBuilder.h @@ -0,0 +1,114 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_REPLAYER_STATE_BUILDER_H +#define CEPH_RBD_MIRROR_IMAGE_REPLAYER_STATE_BUILDER_H + +#include "include/rados/librados_fwd.hpp" +#include "cls/rbd/cls_rbd_types.h" +#include "librbd/mirror/Types.h" + +struct Context; +namespace librbd { struct ImageCtx; } + +namespace rbd { +namespace mirror { + +struct BaseRequest; +template <typename> class InstanceWatcher; +struct PoolMetaCache; +struct ProgressContext; +template <typename> class Threads; + +namespace image_sync { struct SyncPointHandler; } + +namespace image_replayer { + +struct Replayer; +struct ReplayerListener; + +template <typename ImageCtxT> +class StateBuilder { +public: + StateBuilder(const StateBuilder&) = delete; + StateBuilder& operator=(const StateBuilder&) = delete; + + virtual ~StateBuilder(); + + virtual void destroy() { + delete this; + } + + virtual void close(Context* on_finish) = 0; + + virtual bool is_disconnected() const = 0; + + bool is_local_primary() const; + bool is_remote_primary() const; + bool is_linked() const; + + virtual cls::rbd::MirrorImageMode get_mirror_image_mode() const = 0; + + virtual image_sync::SyncPointHandler* create_sync_point_handler() = 0; + void destroy_sync_point_handler(); + + virtual bool replay_requires_remote_image() const = 0; + + void close_remote_image(Context* on_finish); + + virtual BaseRequest* create_local_image_request( + Threads<ImageCtxT>* threads, + librados::IoCtx& local_io_ctx, + const std::string& global_image_id, + PoolMetaCache* pool_meta_cache, + ProgressContext* progress_ctx, + Context* on_finish) = 0; + + virtual BaseRequest* create_prepare_replay_request( + const std::string& local_mirror_uuid, + ProgressContext* progress_ctx, + bool* resync_requested, + bool* syncing, + Context* on_finish) = 0; + + virtual Replayer* create_replayer( + Threads<ImageCtxT>* threads, + InstanceWatcher<ImageCtxT>* instance_watcher, + const std::string& local_mirror_uuid, + PoolMetaCache* pool_meta_cache, + ReplayerListener* replayer_listener) = 0; + + std::string global_image_id; + + std::string local_image_id; + librbd::mirror::PromotionState local_promotion_state = + librbd::mirror::PROMOTION_STATE_UNKNOWN; + ImageCtxT* local_image_ctx = nullptr; + + std::string remote_mirror_uuid; + std::string remote_image_id; + librbd::mirror::PromotionState remote_promotion_state = + librbd::mirror::PROMOTION_STATE_UNKNOWN; + ImageCtxT* remote_image_ctx = nullptr; + +protected: + image_sync::SyncPointHandler* m_sync_point_handler = nullptr; + + StateBuilder(const std::string& global_image_id); + + void close_local_image(Context* on_finish); + +private: + virtual bool is_linked_impl() const = 0; + + void handle_close_local_image(int r, Context* on_finish); + void handle_close_remote_image(int r, Context* on_finish); +}; + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::StateBuilder<librbd::ImageCtx>; + +#endif // CEPH_RBD_MIRROR_IMAGE_REPLAYER_STATE_BUILDER_H diff --git a/src/tools/rbd_mirror/image_replayer/TimeRollingMean.cc b/src/tools/rbd_mirror/image_replayer/TimeRollingMean.cc new file mode 100644 index 000000000..5d9c9aca1 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/TimeRollingMean.cc @@ -0,0 +1,34 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/image_replayer/TimeRollingMean.h" +#include "common/Clock.h" + +namespace rbd { +namespace mirror { +namespace image_replayer { + +void TimeRollingMean::operator()(uint32_t value) { + auto time = ceph_clock_now(); + if (m_last_time.is_zero()) { + m_last_time = time; + } else if (m_last_time.sec() < time.sec()) { + auto sec = m_last_time.sec(); + while (sec++ < time.sec()) { + m_rolling_mean(m_sum); + m_sum = 0; + } + + m_last_time = time; + } + + m_sum += value; +} + +double TimeRollingMean::get_average() const { + return boost::accumulators::rolling_mean(m_rolling_mean); +} + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd diff --git a/src/tools/rbd_mirror/image_replayer/TimeRollingMean.h b/src/tools/rbd_mirror/image_replayer/TimeRollingMean.h new file mode 100644 index 000000000..139ef893f --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/TimeRollingMean.h @@ -0,0 +1,40 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_TIME_ROLLING_MEAN_H +#define RBD_MIRROR_IMAGE_REPLAYER_TIME_ROLLING_MEAN_H + +#include "include/utime.h" +#include <boost/accumulators/accumulators.hpp> +#include <boost/accumulators/statistics/stats.hpp> +#include <boost/accumulators/statistics/rolling_mean.hpp> + +namespace rbd { +namespace mirror { +namespace image_replayer { + +class TimeRollingMean { +public: + + void operator()(uint32_t value); + + double get_average() const; + +private: + typedef boost::accumulators::accumulator_set< + uint64_t, boost::accumulators::stats< + boost::accumulators::tag::rolling_mean>> RollingMean; + + utime_t m_last_time; + uint64_t m_sum = 0; + + RollingMean m_rolling_mean{ + boost::accumulators::tag::rolling_window::window_size = 30}; + +}; + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +#endif // RBD_MIRROR_IMAGE_REPLAYER_TIME_ROLLING_MEAN_H diff --git a/src/tools/rbd_mirror/image_replayer/Types.h b/src/tools/rbd_mirror/image_replayer/Types.h new file mode 100644 index 000000000..6ab988a76 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/Types.h @@ -0,0 +1,21 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_REPLAYER_TYPES_H +#define CEPH_RBD_MIRROR_IMAGE_REPLAYER_TYPES_H + +namespace rbd { +namespace mirror { +namespace image_replayer { + +enum HealthState { + HEALTH_STATE_OK, + HEALTH_STATE_WARNING, + HEALTH_STATE_ERROR +}; + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_IMAGE_REPLAYER_TYPES_H diff --git a/src/tools/rbd_mirror/image_replayer/Utils.cc b/src/tools/rbd_mirror/image_replayer/Utils.cc new file mode 100644 index 000000000..55162a4e4 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/Utils.cc @@ -0,0 +1,61 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/image_replayer/Utils.h" +#include "include/rados/librados.hpp" +#include "common/debug.h" +#include "common/errno.h" +#include "cls/journal/cls_journal_types.h" +#include "librbd/journal/Types.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::util::" \ + << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace image_replayer { +namespace util { + +std::string compute_image_spec(librados::IoCtx& io_ctx, + const std::string& image_name) { + std::string name = io_ctx.get_namespace(); + if (!name.empty()) { + name += "/"; + } + + return io_ctx.get_pool_name() + "/" + name + image_name; +} + +bool decode_client_meta(const cls::journal::Client& client, + librbd::journal::MirrorPeerClientMeta* client_meta) { + dout(15) << dendl; + + librbd::journal::ClientData client_data; + auto it = client.data.cbegin(); + try { + decode(client_data, it); + } catch (const buffer::error &err) { + derr << "failed to decode client meta data: " << err.what() << dendl; + return false; + } + + auto local_client_meta = boost::get<librbd::journal::MirrorPeerClientMeta>( + &client_data.client_meta); + if (local_client_meta == nullptr) { + derr << "unknown peer registration" << dendl; + return false; + } + + *client_meta = *local_client_meta; + dout(15) << "client found: client_meta=" << *client_meta << dendl; + return true; +} + +} // namespace util +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + diff --git a/src/tools/rbd_mirror/image_replayer/Utils.h b/src/tools/rbd_mirror/image_replayer/Utils.h new file mode 100644 index 000000000..6c5352cd1 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/Utils.h @@ -0,0 +1,29 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_UTILS_H +#define RBD_MIRROR_IMAGE_REPLAYER_UTILS_H + +#include "include/rados/librados_fwd.hpp" +#include <string> + +namespace cls { namespace journal { struct Client; } } +namespace librbd { namespace journal { struct MirrorPeerClientMeta; } } + +namespace rbd { +namespace mirror { +namespace image_replayer { +namespace util { + +std::string compute_image_spec(librados::IoCtx& io_ctx, + const std::string& image_name); + +bool decode_client_meta(const cls::journal::Client& client, + librbd::journal::MirrorPeerClientMeta* client_meta); + +} // namespace util +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +#endif // RBD_MIRROR_IMAGE_REPLAYER_UTILS_H diff --git a/src/tools/rbd_mirror/image_replayer/journal/CreateLocalImageRequest.cc b/src/tools/rbd_mirror/image_replayer/journal/CreateLocalImageRequest.cc new file mode 100644 index 000000000..087cf4f5f --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/journal/CreateLocalImageRequest.cc @@ -0,0 +1,162 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "CreateLocalImageRequest.h" +#include "include/rados/librados.hpp" +#include "common/debug.h" +#include "common/dout.h" +#include "common/errno.h" +#include "journal/Journaler.h" +#include "librbd/ImageCtx.h" +#include "librbd/Utils.h" +#include "librbd/journal/Types.h" +#include "tools/rbd_mirror/PoolMetaCache.h" +#include "tools/rbd_mirror/ProgressContext.h" +#include "tools/rbd_mirror/Threads.h" +#include "tools/rbd_mirror/image_replayer/CreateImageRequest.h" +#include "tools/rbd_mirror/image_replayer/Utils.h" +#include "tools/rbd_mirror/image_replayer/journal/StateBuilder.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::journal::" \ + << "CreateLocalImageRequest: " << this << " " \ + << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace image_replayer { +namespace journal { + +using librbd::util::create_async_context_callback; +using librbd::util::create_context_callback; + +template <typename I> +void CreateLocalImageRequest<I>::send() { + unregister_client(); +} + +template <typename I> +void CreateLocalImageRequest<I>::unregister_client() { + dout(10) << dendl; + update_progress("UNREGISTER_CLIENT"); + + auto ctx = create_context_callback< + CreateLocalImageRequest<I>, + &CreateLocalImageRequest<I>::handle_unregister_client>(this); + m_state_builder->remote_journaler->unregister_client(ctx); +} + +template <typename I> +void CreateLocalImageRequest<I>::handle_unregister_client(int r) { + dout(10) << "r=" << r << dendl; + if (r < 0 && r != -ENOENT) { + derr << "failed to unregister with remote journal: " << cpp_strerror(r) + << dendl; + finish(r); + return; + } + + m_state_builder->local_image_id = ""; + m_state_builder->remote_client_meta = {}; + register_client(); +} + +template <typename I> +void CreateLocalImageRequest<I>::register_client() { + ceph_assert(m_state_builder->local_image_id.empty()); + m_state_builder->local_image_id = + librbd::util::generate_image_id<I>(m_local_io_ctx); + dout(10) << "local_image_id=" << m_state_builder->local_image_id << dendl; + update_progress("REGISTER_CLIENT"); + + librbd::journal::MirrorPeerClientMeta client_meta{ + m_state_builder->local_image_id}; + client_meta.state = librbd::journal::MIRROR_PEER_STATE_SYNCING; + + librbd::journal::ClientData client_data{client_meta}; + bufferlist client_data_bl; + encode(client_data, client_data_bl); + + auto ctx = create_context_callback< + CreateLocalImageRequest<I>, + &CreateLocalImageRequest<I>::handle_register_client>(this); + m_state_builder->remote_journaler->register_client(client_data_bl, ctx); +} + +template <typename I> +void CreateLocalImageRequest<I>::handle_register_client(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to register with remote journal: " << cpp_strerror(r) + << dendl; + finish(r); + return; + } + + m_state_builder->remote_client_state = cls::journal::CLIENT_STATE_CONNECTED; + m_state_builder->remote_client_meta = {m_state_builder->local_image_id}; + m_state_builder->remote_client_meta.state = + librbd::journal::MIRROR_PEER_STATE_SYNCING; + + create_local_image(); +} + +template <typename I> +void CreateLocalImageRequest<I>::create_local_image() { + dout(10) << "local_image_id=" << m_state_builder->local_image_id << dendl; + update_progress("CREATE_LOCAL_IMAGE"); + + m_remote_image_ctx->image_lock.lock_shared(); + std::string image_name = m_remote_image_ctx->name; + m_remote_image_ctx->image_lock.unlock_shared(); + + auto ctx = create_context_callback< + CreateLocalImageRequest<I>, + &CreateLocalImageRequest<I>::handle_create_local_image>(this); + auto request = CreateImageRequest<I>::create( + m_threads, m_local_io_ctx, m_global_image_id, + m_state_builder->remote_mirror_uuid, image_name, + m_state_builder->local_image_id, m_remote_image_ctx, + m_pool_meta_cache, cls::rbd::MIRROR_IMAGE_MODE_JOURNAL, ctx); + request->send(); +} +template <typename I> +void CreateLocalImageRequest<I>::handle_create_local_image(int r) { + dout(10) << "r=" << r << dendl; + + if (r == -EBADF) { + dout(5) << "image id " << m_state_builder->local_image_id << " " + << "already in-use" << dendl; + unregister_client(); + return; + } else if (r < 0) { + if (r == -ENOENT) { + dout(10) << "parent image does not exist" << dendl; + } else { + derr << "failed to create local image: " << cpp_strerror(r) << dendl; + } + finish(r); + return; + } + + finish(0); +} + +template <typename I> +void CreateLocalImageRequest<I>::update_progress( + const std::string& description) { + dout(15) << description << dendl; + if (m_progress_ctx != nullptr) { + m_progress_ctx->update_progress(description); + } +} + +} // namespace journal +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::journal::CreateLocalImageRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/journal/CreateLocalImageRequest.h b/src/tools/rbd_mirror/image_replayer/journal/CreateLocalImageRequest.h new file mode 100644 index 000000000..fc776ecc3 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/journal/CreateLocalImageRequest.h @@ -0,0 +1,116 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_CREATE_LOCAL_IMAGE_REQUEST_H +#define RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_CREATE_LOCAL_IMAGE_REQUEST_H + +#include "include/rados/librados_fwd.hpp" +#include "tools/rbd_mirror/BaseRequest.h" +#include <string> + +struct Context; +namespace journal { class Journaler; } +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { + +class PoolMetaCache; +class ProgressContext; +template <typename> struct Threads; + +namespace image_replayer { +namespace journal { + +template <typename> class StateBuilder; + +template <typename ImageCtxT> +class CreateLocalImageRequest : public BaseRequest { +public: + typedef rbd::mirror::ProgressContext ProgressContext; + + static CreateLocalImageRequest* create( + Threads<ImageCtxT>* threads, + librados::IoCtx& local_io_ctx, + ImageCtxT* remote_image_ctx, + const std::string& global_image_id, + PoolMetaCache* pool_meta_cache, + ProgressContext* progress_ctx, + StateBuilder<ImageCtxT>* state_builder, + Context* on_finish) { + return new CreateLocalImageRequest(threads, local_io_ctx, remote_image_ctx, + global_image_id, pool_meta_cache, + progress_ctx, state_builder, on_finish); + } + + CreateLocalImageRequest( + Threads<ImageCtxT>* threads, + librados::IoCtx& local_io_ctx, + ImageCtxT* remote_image_ctx, + const std::string& global_image_id, + PoolMetaCache* pool_meta_cache, + ProgressContext* progress_ctx, + StateBuilder<ImageCtxT>* state_builder, + Context* on_finish) + : BaseRequest(on_finish), + m_threads(threads), + m_local_io_ctx(local_io_ctx), + m_remote_image_ctx(remote_image_ctx), + m_global_image_id(global_image_id), + m_pool_meta_cache(pool_meta_cache), + m_progress_ctx(progress_ctx), + m_state_builder(state_builder) { + } + + void send(); + +private: + /** + * @verbatim + * + * <start> + * | + * v + * UNREGISTER_CLIENT < * * * * * * * * + * | * + * v * + * REGISTER_CLIENT * + * | * + * v (id exists) * + * CREATE_LOCAL_IMAGE * * * * * * * * * + * | + * v + * <finish> + * + * @endverbatim + */ + + Threads<ImageCtxT>* m_threads; + librados::IoCtx& m_local_io_ctx; + ImageCtxT* m_remote_image_ctx; + std::string m_global_image_id; + PoolMetaCache* m_pool_meta_cache; + ProgressContext* m_progress_ctx; + StateBuilder<ImageCtxT>* m_state_builder; + + void unregister_client(); + void handle_unregister_client(int r); + + void register_client(); + void handle_register_client(int r); + + void create_local_image(); + void handle_create_local_image(int r); + + void update_progress(const std::string& description); + +}; + +} // namespace journal +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::journal::CreateLocalImageRequest<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_CREATE_LOCAL_IMAGE_REQUEST_H diff --git a/src/tools/rbd_mirror/image_replayer/journal/EventPreprocessor.cc b/src/tools/rbd_mirror/image_replayer/journal/EventPreprocessor.cc new file mode 100644 index 000000000..f5d49048e --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/journal/EventPreprocessor.cc @@ -0,0 +1,206 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "EventPreprocessor.h" +#include "common/debug.h" +#include "common/dout.h" +#include "common/errno.h" +#include "journal/Journaler.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/Utils.h" +#include "librbd/asio/ContextWQ.h" +#include "librbd/journal/Types.h" +#include <boost/variant.hpp> + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror + +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::journal::" \ + << "EventPreprocessor: " << this << " " << __func__ \ + << ": " + +namespace rbd { +namespace mirror { +namespace image_replayer { +namespace journal { + +using librbd::util::create_context_callback; + +template <typename I> +EventPreprocessor<I>::EventPreprocessor(I &local_image_ctx, + Journaler &remote_journaler, + const std::string &local_mirror_uuid, + MirrorPeerClientMeta *client_meta, + librbd::asio::ContextWQ *work_queue) + : m_local_image_ctx(local_image_ctx), m_remote_journaler(remote_journaler), + m_local_mirror_uuid(local_mirror_uuid), m_client_meta(client_meta), + m_work_queue(work_queue) { +} + +template <typename I> +EventPreprocessor<I>::~EventPreprocessor() { + ceph_assert(!m_in_progress); +} + +template <typename I> +bool EventPreprocessor<I>::is_required(const EventEntry &event_entry) { + SnapSeqs snap_seqs(m_client_meta->snap_seqs); + return (prune_snap_map(&snap_seqs) || + event_entry.get_event_type() == + librbd::journal::EVENT_TYPE_SNAP_RENAME); +} + +template <typename I> +void EventPreprocessor<I>::preprocess(EventEntry *event_entry, + Context *on_finish) { + ceph_assert(!m_in_progress); + m_in_progress = true; + m_event_entry = event_entry; + m_on_finish = on_finish; + + refresh_image(); +} + +template <typename I> +void EventPreprocessor<I>::refresh_image() { + dout(20) << dendl; + + Context *ctx = create_context_callback< + EventPreprocessor<I>, &EventPreprocessor<I>::handle_refresh_image>(this); + m_local_image_ctx.state->refresh(ctx); +} + +template <typename I> +void EventPreprocessor<I>::handle_refresh_image(int r) { + dout(20) << "r=" << r << dendl; + + if (r < 0) { + derr << "error encountered during image refresh: " << cpp_strerror(r) + << dendl; + finish(r); + return; + } + + preprocess_event(); +} + +template <typename I> +void EventPreprocessor<I>::preprocess_event() { + dout(20) << dendl; + + m_snap_seqs = m_client_meta->snap_seqs; + m_snap_seqs_updated = prune_snap_map(&m_snap_seqs); + + int r = boost::apply_visitor(PreprocessEventVisitor(this), + m_event_entry->event); + if (r < 0) { + finish(r); + return; + } + + update_client(); +} + +template <typename I> +int EventPreprocessor<I>::preprocess_snap_rename( + librbd::journal::SnapRenameEvent &event) { + dout(20) << "remote_snap_id=" << event.snap_id << ", " + << "src_snap_name=" << event.src_snap_name << ", " + << "dest_snap_name=" << event.dst_snap_name << dendl; + + auto snap_seq_it = m_snap_seqs.find(event.snap_id); + if (snap_seq_it != m_snap_seqs.end()) { + dout(20) << "remapping remote snap id " << snap_seq_it->first << " " + << "to local snap id " << snap_seq_it->second << dendl; + event.snap_id = snap_seq_it->second; + return 0; + } + + auto snap_id_it = m_local_image_ctx.snap_ids.find({cls::rbd::UserSnapshotNamespace(), + event.src_snap_name}); + if (snap_id_it == m_local_image_ctx.snap_ids.end()) { + dout(20) << "cannot map remote snapshot '" << event.src_snap_name << "' " + << "to local snapshot" << dendl; + event.snap_id = CEPH_NOSNAP; + return -ENOENT; + } + + dout(20) << "mapping remote snap id " << event.snap_id << " " + << "to local snap id " << snap_id_it->second << dendl; + m_snap_seqs_updated = true; + m_snap_seqs[event.snap_id] = snap_id_it->second; + event.snap_id = snap_id_it->second; + return 0; +} + +template <typename I> +void EventPreprocessor<I>::update_client() { + if (!m_snap_seqs_updated) { + finish(0); + return; + } + + dout(20) << dendl; + librbd::journal::MirrorPeerClientMeta client_meta(*m_client_meta); + client_meta.snap_seqs = m_snap_seqs; + + librbd::journal::ClientData client_data(client_meta); + bufferlist data_bl; + encode(client_data, data_bl); + + Context *ctx = create_context_callback< + EventPreprocessor<I>, &EventPreprocessor<I>::handle_update_client>( + this); + m_remote_journaler.update_client(data_bl, ctx); +} + +template <typename I> +void EventPreprocessor<I>::handle_update_client(int r) { + dout(20) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to update mirror peer journal client: " + << cpp_strerror(r) << dendl; + finish(r); + return; + } + + m_client_meta->snap_seqs = m_snap_seqs; + finish(0); +} + +template <typename I> +bool EventPreprocessor<I>::prune_snap_map(SnapSeqs *snap_seqs) { + bool pruned = false; + + std::shared_lock image_locker{m_local_image_ctx.image_lock}; + for (auto it = snap_seqs->begin(); it != snap_seqs->end(); ) { + auto current_it(it++); + if (m_local_image_ctx.snap_info.count(current_it->second) == 0) { + snap_seqs->erase(current_it); + pruned = true; + } + } + return pruned; +} + +template <typename I> +void EventPreprocessor<I>::finish(int r) { + dout(20) << "r=" << r << dendl; + + Context *on_finish = m_on_finish; + m_on_finish = nullptr; + m_event_entry = nullptr; + m_in_progress = false; + m_snap_seqs_updated = false; + m_work_queue->queue(on_finish, r); +} + +} // namespace journal +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::journal::EventPreprocessor<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/journal/EventPreprocessor.h b/src/tools/rbd_mirror/image_replayer/journal/EventPreprocessor.h new file mode 100644 index 000000000..12f70eb93 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/journal/EventPreprocessor.h @@ -0,0 +1,127 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_EVENT_PREPROCESSOR_H +#define RBD_MIRROR_IMAGE_REPLAYER_EVENT_PREPROCESSOR_H + +#include "include/int_types.h" +#include "librbd/journal/Types.h" +#include "librbd/journal/TypeTraits.h" +#include <map> +#include <string> +#include <boost/variant/static_visitor.hpp> + +struct Context; +namespace journal { class Journaler; } +namespace librbd { +class ImageCtx; +namespace asio { struct ContextWQ; } +} // namespace librbd + +namespace rbd { +namespace mirror { +namespace image_replayer { +namespace journal { + +template <typename ImageCtxT = librbd::ImageCtx> +class EventPreprocessor { +public: + using Journaler = typename librbd::journal::TypeTraits<ImageCtxT>::Journaler; + using EventEntry = librbd::journal::EventEntry; + using MirrorPeerClientMeta = librbd::journal::MirrorPeerClientMeta; + + static EventPreprocessor *create(ImageCtxT &local_image_ctx, + Journaler &remote_journaler, + const std::string &local_mirror_uuid, + MirrorPeerClientMeta *client_meta, + librbd::asio::ContextWQ *work_queue) { + return new EventPreprocessor(local_image_ctx, remote_journaler, + local_mirror_uuid, client_meta, work_queue); + } + + static void destroy(EventPreprocessor* processor) { + delete processor; + } + + EventPreprocessor(ImageCtxT &local_image_ctx, Journaler &remote_journaler, + const std::string &local_mirror_uuid, + MirrorPeerClientMeta *client_meta, + librbd::asio::ContextWQ *work_queue); + ~EventPreprocessor(); + + bool is_required(const EventEntry &event_entry); + void preprocess(EventEntry *event_entry, Context *on_finish); + +private: + /** + * @verbatim + * + * <start> + * | + * v (skip if not required) + * REFRESH_IMAGE + * | + * v (skip if not required) + * PREPROCESS_EVENT + * | + * v (skip if not required) + * UPDATE_CLIENT + * + * @endverbatim + */ + + typedef std::map<uint64_t, uint64_t> SnapSeqs; + + class PreprocessEventVisitor : public boost::static_visitor<int> { + public: + EventPreprocessor *event_preprocessor; + + PreprocessEventVisitor(EventPreprocessor *event_preprocessor) + : event_preprocessor(event_preprocessor) { + } + + template <typename T> + inline int operator()(T&) const { + return 0; + } + inline int operator()(librbd::journal::SnapRenameEvent &event) const { + return event_preprocessor->preprocess_snap_rename(event); + } + }; + + ImageCtxT &m_local_image_ctx; + Journaler &m_remote_journaler; + std::string m_local_mirror_uuid; + MirrorPeerClientMeta *m_client_meta; + librbd::asio::ContextWQ *m_work_queue; + + bool m_in_progress = false; + EventEntry *m_event_entry = nullptr; + Context *m_on_finish = nullptr; + + SnapSeqs m_snap_seqs; + bool m_snap_seqs_updated = false; + + bool prune_snap_map(SnapSeqs *snap_seqs); + + void refresh_image(); + void handle_refresh_image(int r); + + void preprocess_event(); + int preprocess_snap_rename(librbd::journal::SnapRenameEvent &event); + + void update_client(); + void handle_update_client(int r); + + void finish(int r); + +}; + +} // namespace journal +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::journal::EventPreprocessor<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_REPLAYER_EVENT_PREPROCESSOR_H diff --git a/src/tools/rbd_mirror/image_replayer/journal/PrepareReplayRequest.cc b/src/tools/rbd_mirror/image_replayer/journal/PrepareReplayRequest.cc new file mode 100644 index 000000000..c8a96a4ad --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/journal/PrepareReplayRequest.cc @@ -0,0 +1,316 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "PrepareReplayRequest.h" +#include "common/debug.h" +#include "common/dout.h" +#include "common/errno.h" +#include "journal/Journaler.h" +#include "librbd/ImageCtx.h" +#include "librbd/Journal.h" +#include "librbd/Utils.h" +#include "tools/rbd_mirror/ProgressContext.h" +#include "tools/rbd_mirror/image_replayer/journal/StateBuilder.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::journal::" \ + << "PrepareReplayRequest: " << this << " " \ + << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace image_replayer { +namespace journal { + +using librbd::util::create_context_callback; + +template <typename I> +void PrepareReplayRequest<I>::send() { + *m_resync_requested = false; + *m_syncing = false; + + if (m_state_builder->local_image_id != + m_state_builder->remote_client_meta.image_id) { + // somehow our local image has a different image id than the image id + // registered in the remote image + derr << "split-brain detected: local_image_id=" + << m_state_builder->local_image_id << ", " + << "registered local_image_id=" + << m_state_builder->remote_client_meta.image_id << dendl; + finish(-EEXIST); + return; + } + + std::shared_lock image_locker(m_state_builder->local_image_ctx->image_lock); + if (m_state_builder->local_image_ctx->journal == nullptr) { + image_locker.unlock(); + + derr << "local image does not support journaling" << dendl; + finish(-EINVAL); + return; + } + + int r = m_state_builder->local_image_ctx->journal->is_resync_requested( + m_resync_requested); + if (r < 0) { + image_locker.unlock(); + + derr << "failed to check if a resync was requested" << dendl; + finish(r); + return; + } + + m_local_tag_tid = m_state_builder->local_image_ctx->journal->get_tag_tid(); + m_local_tag_data = m_state_builder->local_image_ctx->journal->get_tag_data(); + dout(10) << "local tag=" << m_local_tag_tid << ", " + << "local tag data=" << m_local_tag_data << dendl; + image_locker.unlock(); + + if (*m_resync_requested) { + finish(0); + return; + } else if (m_state_builder->remote_client_meta.state == + librbd::journal::MIRROR_PEER_STATE_SYNCING && + m_local_tag_data.mirror_uuid == + m_state_builder->remote_mirror_uuid) { + // if the initial sync hasn't completed, we cannot replay + *m_syncing = true; + finish(0); + return; + } + + update_client_state(); +} + +template <typename I> +void PrepareReplayRequest<I>::update_client_state() { + if (m_state_builder->remote_client_meta.state != + librbd::journal::MIRROR_PEER_STATE_SYNCING || + m_local_tag_data.mirror_uuid == m_state_builder->remote_mirror_uuid) { + get_remote_tag_class(); + return; + } + + // our local image is not primary, is flagged as syncing on the remote side, + // but is no longer tied to the remote -- this implies we were forced + // promoted and then demoted at some point + dout(15) << dendl; + update_progress("UPDATE_CLIENT_STATE"); + + auto client_meta = m_state_builder->remote_client_meta; + client_meta.state = librbd::journal::MIRROR_PEER_STATE_REPLAYING; + + librbd::journal::ClientData client_data(client_meta); + bufferlist data_bl; + encode(client_data, data_bl); + + auto ctx = create_context_callback< + PrepareReplayRequest<I>, + &PrepareReplayRequest<I>::handle_update_client_state>(this); + m_state_builder->remote_journaler->update_client(data_bl, ctx); +} + +template <typename I> +void PrepareReplayRequest<I>::handle_update_client_state(int r) { + dout(15) << "r=" << r << dendl; + if (r < 0) { + derr << "failed to update client: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + m_state_builder->remote_client_meta.state = + librbd::journal::MIRROR_PEER_STATE_REPLAYING; + get_remote_tag_class(); +} + +template <typename I> +void PrepareReplayRequest<I>::get_remote_tag_class() { + dout(10) << dendl; + update_progress("GET_REMOTE_TAG_CLASS"); + + auto ctx = create_context_callback< + PrepareReplayRequest<I>, + &PrepareReplayRequest<I>::handle_get_remote_tag_class>(this); + m_state_builder->remote_journaler->get_client( + librbd::Journal<>::IMAGE_CLIENT_ID, &m_client, ctx); +} + +template <typename I> +void PrepareReplayRequest<I>::handle_get_remote_tag_class(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to retrieve remote client: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + librbd::journal::ClientData client_data; + auto it = m_client.data.cbegin(); + try { + decode(client_data, it); + } catch (const buffer::error &err) { + derr << "failed to decode remote client meta data: " << err.what() + << dendl; + finish(-EBADMSG); + return; + } + + librbd::journal::ImageClientMeta *client_meta = + boost::get<librbd::journal::ImageClientMeta>(&client_data.client_meta); + if (client_meta == nullptr) { + derr << "unknown remote client registration" << dendl; + finish(-EINVAL); + return; + } + + m_remote_tag_class = client_meta->tag_class; + dout(10) << "remote tag class=" << m_remote_tag_class << dendl; + + get_remote_tags(); +} + +template <typename I> +void PrepareReplayRequest<I>::get_remote_tags() { + dout(10) << dendl; + update_progress("GET_REMOTE_TAGS"); + + auto ctx = create_context_callback< + PrepareReplayRequest<I>, + &PrepareReplayRequest<I>::handle_get_remote_tags>(this); + m_state_builder->remote_journaler->get_tags(m_remote_tag_class, + &m_remote_tags, ctx); +} + +template <typename I> +void PrepareReplayRequest<I>::handle_get_remote_tags(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to retrieve remote tags: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + // At this point, the local image was existing, non-primary, and replaying; + // and the remote image is primary. Attempt to link the local image's most + // recent tag to the remote image's tag chain. + bool remote_tag_data_valid = false; + librbd::journal::TagData remote_tag_data; + boost::optional<uint64_t> remote_orphan_tag_tid = + boost::make_optional<uint64_t>(false, 0U); + bool reconnect_orphan = false; + + // decode the remote tags + for (auto &remote_tag : m_remote_tags) { + if (m_local_tag_data.predecessor.commit_valid && + m_local_tag_data.predecessor.mirror_uuid == + m_state_builder->remote_mirror_uuid && + m_local_tag_data.predecessor.tag_tid > remote_tag.tid) { + dout(10) << "skipping processed predecessor remote tag " + << remote_tag.tid << dendl; + continue; + } + + try { + auto it = remote_tag.data.cbegin(); + decode(remote_tag_data, it); + remote_tag_data_valid = true; + } catch (const buffer::error &err) { + derr << "failed to decode remote tag " << remote_tag.tid << ": " + << err.what() << dendl; + finish(-EBADMSG); + return; + } + + dout(10) << "decoded remote tag " << remote_tag.tid << ": " + << remote_tag_data << dendl; + + if (!m_local_tag_data.predecessor.commit_valid) { + // newly synced local image (no predecessor) replays from the first tag + if (remote_tag_data.mirror_uuid != librbd::Journal<>::LOCAL_MIRROR_UUID) { + dout(10) << "skipping non-primary remote tag" << dendl; + continue; + } + + dout(10) << "using initial primary remote tag" << dendl; + break; + } + + if (m_local_tag_data.mirror_uuid == librbd::Journal<>::ORPHAN_MIRROR_UUID) { + // demotion last available local epoch + + if (remote_tag_data.mirror_uuid == m_local_tag_data.mirror_uuid && + remote_tag_data.predecessor.commit_valid && + remote_tag_data.predecessor.tag_tid == + m_local_tag_data.predecessor.tag_tid) { + // demotion matches remote epoch + + if (remote_tag_data.predecessor.mirror_uuid == m_local_mirror_uuid && + m_local_tag_data.predecessor.mirror_uuid == + librbd::Journal<>::LOCAL_MIRROR_UUID) { + // local demoted and remote has matching event + dout(10) << "found matching local demotion tag" << dendl; + remote_orphan_tag_tid = remote_tag.tid; + continue; + } + + if (m_local_tag_data.predecessor.mirror_uuid == + m_state_builder->remote_mirror_uuid && + remote_tag_data.predecessor.mirror_uuid == + librbd::Journal<>::LOCAL_MIRROR_UUID) { + // remote demoted and local has matching event + dout(10) << "found matching remote demotion tag" << dendl; + remote_orphan_tag_tid = remote_tag.tid; + continue; + } + } + + if (remote_tag_data.mirror_uuid == librbd::Journal<>::LOCAL_MIRROR_UUID && + remote_tag_data.predecessor.mirror_uuid == + librbd::Journal<>::ORPHAN_MIRROR_UUID && + remote_tag_data.predecessor.commit_valid && remote_orphan_tag_tid && + remote_tag_data.predecessor.tag_tid == *remote_orphan_tag_tid) { + // remote promotion tag chained to remote/local demotion tag + dout(10) << "found chained remote promotion tag" << dendl; + reconnect_orphan = true; + break; + } + + // promotion must follow demotion + remote_orphan_tag_tid = boost::none; + } + } + + if (remote_tag_data_valid && + m_local_tag_data.mirror_uuid == m_state_builder->remote_mirror_uuid) { + dout(10) << "local image is in clean replay state" << dendl; + } else if (reconnect_orphan) { + dout(10) << "remote image was demoted/promoted" << dendl; + } else { + derr << "split-brain detected -- skipping image replay" << dendl; + finish(-EEXIST); + return; + } + + finish(0); +} + +template <typename I> +void PrepareReplayRequest<I>::update_progress(const std::string &description) { + dout(10) << description << dendl; + + if (m_progress_ctx != nullptr) { + m_progress_ctx->update_progress(description); + } +} + +} // namespace journal +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::journal::PrepareReplayRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/journal/PrepareReplayRequest.h b/src/tools/rbd_mirror/image_replayer/journal/PrepareReplayRequest.h new file mode 100644 index 000000000..2b6fb659b --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/journal/PrepareReplayRequest.h @@ -0,0 +1,115 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_PREPARE_REPLAY_REQUEST_H +#define RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_PREPARE_REPLAY_REQUEST_H + +#include "include/int_types.h" +#include "cls/journal/cls_journal_types.h" +#include "librbd/journal/Types.h" +#include "librbd/mirror/Types.h" +#include "tools/rbd_mirror/BaseRequest.h" +#include <list> +#include <string> + +struct Context; +namespace librbd { struct ImageCtx; } + +namespace rbd { +namespace mirror { + +class ProgressContext; + +namespace image_replayer { +namespace journal { + +template <typename> class StateBuilder; + +template <typename ImageCtxT> +class PrepareReplayRequest : public BaseRequest { +public: + static PrepareReplayRequest* create( + const std::string& local_mirror_uuid, + ProgressContext* progress_ctx, + StateBuilder<ImageCtxT>* state_builder, + bool* resync_requested, + bool* syncing, + Context* on_finish) { + return new PrepareReplayRequest( + local_mirror_uuid, progress_ctx, state_builder, resync_requested, + syncing, on_finish); + } + + PrepareReplayRequest( + const std::string& local_mirror_uuid, + ProgressContext* progress_ctx, + StateBuilder<ImageCtxT>* state_builder, + bool* resync_requested, + bool* syncing, + Context* on_finish) + : BaseRequest(on_finish), + m_local_mirror_uuid(local_mirror_uuid), + m_progress_ctx(progress_ctx), + m_state_builder(state_builder), + m_resync_requested(resync_requested), + m_syncing(syncing) { + } + + void send() override; + +private: + /** + * @verbatim + * + * <start> + * | + * v + * UPDATE_CLIENT_STATE + * | + * v + * GET_REMOTE_TAG_CLASS + * | + * v + * GET_REMOTE_TAGS + * | + * v + * <finish> + * + * @endverbatim + */ + typedef std::list<cls::journal::Tag> Tags; + + std::string m_local_mirror_uuid; + ProgressContext* m_progress_ctx; + StateBuilder<ImageCtxT>* m_state_builder; + bool* m_resync_requested; + bool* m_syncing; + + uint64_t m_local_tag_tid = 0; + librbd::journal::TagData m_local_tag_data; + + uint64_t m_remote_tag_class = 0; + Tags m_remote_tags; + cls::journal::Client m_client; + + void update_client_state(); + void handle_update_client_state(int r); + + void get_remote_tag_class(); + void handle_get_remote_tag_class(int r); + + void get_remote_tags(); + void handle_get_remote_tags(int r); + + void update_progress(const std::string& description); + +}; + +} // namespace journal +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::journal::PrepareReplayRequest<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_PREPARE_REPLAY_REQUEST_H diff --git a/src/tools/rbd_mirror/image_replayer/journal/ReplayStatusFormatter.cc b/src/tools/rbd_mirror/image_replayer/journal/ReplayStatusFormatter.cc new file mode 100644 index 000000000..eb99d5add --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/journal/ReplayStatusFormatter.cc @@ -0,0 +1,284 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "ReplayStatusFormatter.h" +#include "common/debug.h" +#include "common/dout.h" +#include "common/errno.h" +#include "journal/Journaler.h" +#include "json_spirit/json_spirit.h" +#include "librbd/ImageCtx.h" +#include "librbd/Journal.h" +#include "librbd/Utils.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::journal::" \ + << "ReplayStatusFormatter: " << this << " " \ + << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace image_replayer { +namespace journal { + +using librbd::util::unique_lock_name; + +namespace { + +double round_to_two_places(double value) { + return abs(round(value * 100) / 100); +} + +json_spirit::mObject to_json_object( + const cls::journal::ObjectPosition& position) { + json_spirit::mObject object; + if (position != cls::journal::ObjectPosition{}) { + object["object_number"] = position.object_number; + object["tag_tid"] = position.tag_tid; + object["entry_tid"] = position.entry_tid; + } + return object; +} + +} // anonymous namespace + +template <typename I> +ReplayStatusFormatter<I>::ReplayStatusFormatter(Journaler *journaler, + const std::string &mirror_uuid) + : m_journaler(journaler), + m_mirror_uuid(mirror_uuid), + m_lock(ceph::make_mutex(unique_lock_name("ReplayStatusFormatter::m_lock", this))) { +} + +template <typename I> +void ReplayStatusFormatter<I>::handle_entry_processed(uint32_t bytes) { + dout(20) << dendl; + + m_bytes_per_second(bytes); + m_entries_per_second(1); +} + +template <typename I> +bool ReplayStatusFormatter<I>::get_or_send_update(std::string *description, + Context *on_finish) { + dout(20) << dendl; + + bool in_progress = false; + { + std::lock_guard locker{m_lock}; + if (m_on_finish) { + in_progress = true; + } else { + m_on_finish = on_finish; + } + } + + if (in_progress) { + dout(10) << "previous request is still in progress, ignoring" << dendl; + on_finish->complete(-EAGAIN); + return false; + } + + m_master_position = cls::journal::ObjectPosition(); + m_mirror_position = cls::journal::ObjectPosition(); + + cls::journal::Client master_client, mirror_client; + int r; + + r = m_journaler->get_cached_client(librbd::Journal<>::IMAGE_CLIENT_ID, + &master_client); + if (r < 0) { + derr << "error retrieving registered master client: " + << cpp_strerror(r) << dendl; + } else { + r = m_journaler->get_cached_client(m_mirror_uuid, &mirror_client); + if (r < 0) { + derr << "error retrieving registered mirror client: " + << cpp_strerror(r) << dendl; + } + } + + if (!master_client.commit_position.object_positions.empty()) { + m_master_position = + *(master_client.commit_position.object_positions.begin()); + } + + if (!mirror_client.commit_position.object_positions.empty()) { + m_mirror_position = + *(mirror_client.commit_position.object_positions.begin()); + } + + if (!calculate_behind_master_or_send_update()) { + dout(20) << "need to update tag cache" << dendl; + return false; + } + + format(description); + + { + std::lock_guard locker{m_lock}; + ceph_assert(m_on_finish == on_finish); + m_on_finish = nullptr; + } + + on_finish->complete(-EEXIST); + return true; +} + +template <typename I> +bool ReplayStatusFormatter<I>::calculate_behind_master_or_send_update() { + dout(20) << "m_master_position=" << m_master_position + << ", m_mirror_position=" << m_mirror_position << dendl; + + m_entries_behind_master = 0; + + if (m_master_position == cls::journal::ObjectPosition() || + m_master_position.tag_tid < m_mirror_position.tag_tid) { + return true; + } + + cls::journal::ObjectPosition master = m_master_position; + uint64_t mirror_tag_tid = m_mirror_position.tag_tid; + + while (master.tag_tid > mirror_tag_tid) { + auto tag_it = m_tag_cache.find(master.tag_tid); + if (tag_it == m_tag_cache.end()) { + send_update_tag_cache(master.tag_tid, mirror_tag_tid); + return false; + } + librbd::journal::TagData &tag_data = tag_it->second; + m_entries_behind_master += master.entry_tid; + master = {0, tag_data.predecessor.tag_tid, tag_data.predecessor.entry_tid}; + } + if (master.tag_tid == mirror_tag_tid && + master.entry_tid > m_mirror_position.entry_tid) { + m_entries_behind_master += master.entry_tid - m_mirror_position.entry_tid; + } + + dout(20) << "clearing tags not needed any more (below mirror position)" + << dendl; + + uint64_t tag_tid = mirror_tag_tid; + size_t old_size = m_tag_cache.size(); + while (tag_tid != 0) { + auto tag_it = m_tag_cache.find(tag_tid); + if (tag_it == m_tag_cache.end()) { + break; + } + librbd::journal::TagData &tag_data = tag_it->second; + + dout(20) << "erasing tag " << tag_data << "for tag_tid " << tag_tid + << dendl; + + tag_tid = tag_data.predecessor.tag_tid; + m_tag_cache.erase(tag_it); + } + + dout(20) << old_size - m_tag_cache.size() << " entries cleared" << dendl; + + return true; +} + +template <typename I> +void ReplayStatusFormatter<I>::send_update_tag_cache(uint64_t master_tag_tid, + uint64_t mirror_tag_tid) { + if (master_tag_tid <= mirror_tag_tid || + m_tag_cache.find(master_tag_tid) != m_tag_cache.end()) { + Context *on_finish = nullptr; + { + std::lock_guard locker{m_lock}; + std::swap(m_on_finish, on_finish); + } + + ceph_assert(on_finish); + on_finish->complete(0); + return; + } + + dout(20) << "master_tag_tid=" << master_tag_tid << ", mirror_tag_tid=" + << mirror_tag_tid << dendl; + + auto ctx = new LambdaContext( + [this, master_tag_tid, mirror_tag_tid](int r) { + handle_update_tag_cache(master_tag_tid, mirror_tag_tid, r); + }); + m_journaler->get_tag(master_tag_tid, &m_tag, ctx); +} + +template <typename I> +void ReplayStatusFormatter<I>::handle_update_tag_cache(uint64_t master_tag_tid, + uint64_t mirror_tag_tid, + int r) { + librbd::journal::TagData tag_data; + + if (r < 0) { + derr << "error retrieving tag " << master_tag_tid << ": " << cpp_strerror(r) + << dendl; + } else { + dout(20) << "retrieved tag " << master_tag_tid << ": " << m_tag << dendl; + + auto it = m_tag.data.cbegin(); + try { + decode(tag_data, it); + } catch (const buffer::error &err) { + derr << "error decoding tag " << master_tag_tid << ": " << err.what() + << dendl; + } + } + + if (tag_data.predecessor.mirror_uuid != + librbd::Journal<>::LOCAL_MIRROR_UUID && + tag_data.predecessor.mirror_uuid != + librbd::Journal<>::ORPHAN_MIRROR_UUID) { + dout(20) << "hit remote image non-primary epoch" << dendl; + tag_data.predecessor = {}; + } + + dout(20) << "decoded tag " << master_tag_tid << ": " << tag_data << dendl; + + m_tag_cache[master_tag_tid] = tag_data; + send_update_tag_cache(tag_data.predecessor.tag_tid, mirror_tag_tid); +} + +template <typename I> +void ReplayStatusFormatter<I>::format(std::string *description) { + dout(20) << "m_master_position=" << m_master_position + << ", m_mirror_position=" << m_mirror_position + << ", m_entries_behind_master=" << m_entries_behind_master << dendl; + + json_spirit::mObject root_obj; + root_obj["primary_position"] = to_json_object(m_master_position); + root_obj["non_primary_position"] = to_json_object(m_mirror_position); + root_obj["entries_behind_primary"] = ( + m_entries_behind_master > 0 ? m_entries_behind_master : 0); + + m_bytes_per_second(0); + root_obj["bytes_per_second"] = round_to_two_places( + m_bytes_per_second.get_average()); + + m_entries_per_second(0); + auto entries_per_second = m_entries_per_second.get_average(); + root_obj["entries_per_second"] = round_to_two_places(entries_per_second); + + if (m_entries_behind_master > 0 && entries_per_second > 0) { + std::uint64_t seconds_until_synced = round_to_two_places( + m_entries_behind_master / entries_per_second); + if (seconds_until_synced >= std::numeric_limits<uint64_t>::max()) { + seconds_until_synced = std::numeric_limits<uint64_t>::max(); + } + + root_obj["seconds_until_synced"] = seconds_until_synced; + } + + *description = json_spirit::write( + root_obj, json_spirit::remove_trailing_zeros); +} + +} // namespace journal +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::journal::ReplayStatusFormatter<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/journal/ReplayStatusFormatter.h b/src/tools/rbd_mirror/image_replayer/journal/ReplayStatusFormatter.h new file mode 100644 index 000000000..5dbbfe10d --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/journal/ReplayStatusFormatter.h @@ -0,0 +1,70 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_REPLAY_STATUS_FORMATTER_H +#define RBD_MIRROR_IMAGE_REPLAYER_REPLAY_STATUS_FORMATTER_H + +#include "include/Context.h" +#include "common/ceph_mutex.h" +#include "cls/journal/cls_journal_types.h" +#include "librbd/journal/Types.h" +#include "librbd/journal/TypeTraits.h" +#include "tools/rbd_mirror/image_replayer/TimeRollingMean.h" + +namespace journal { class Journaler; } +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { +namespace image_replayer { +namespace journal { + +template <typename ImageCtxT = librbd::ImageCtx> +class ReplayStatusFormatter { +public: + typedef typename librbd::journal::TypeTraits<ImageCtxT>::Journaler Journaler; + + static ReplayStatusFormatter* create(Journaler *journaler, + const std::string &mirror_uuid) { + return new ReplayStatusFormatter(journaler, mirror_uuid); + } + + static void destroy(ReplayStatusFormatter* formatter) { + delete formatter; + } + + ReplayStatusFormatter(Journaler *journaler, const std::string &mirror_uuid); + + void handle_entry_processed(uint32_t bytes); + + bool get_or_send_update(std::string *description, Context *on_finish); + +private: + Journaler *m_journaler; + std::string m_mirror_uuid; + ceph::mutex m_lock; + Context *m_on_finish = nullptr; + cls::journal::ObjectPosition m_master_position; + cls::journal::ObjectPosition m_mirror_position; + int64_t m_entries_behind_master = 0; + cls::journal::Tag m_tag; + std::map<uint64_t, librbd::journal::TagData> m_tag_cache; + + TimeRollingMean m_bytes_per_second; + TimeRollingMean m_entries_per_second; + + bool calculate_behind_master_or_send_update(); + void send_update_tag_cache(uint64_t master_tag_tid, uint64_t mirror_tag_tid); + void handle_update_tag_cache(uint64_t master_tag_tid, uint64_t mirror_tag_tid, + int r); + void format(std::string *description); +}; + +} // namespace journal +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::journal::ReplayStatusFormatter<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_REPLAYER_REPLAY_STATUS_FORMATTER_H diff --git a/src/tools/rbd_mirror/image_replayer/journal/Replayer.cc b/src/tools/rbd_mirror/image_replayer/journal/Replayer.cc new file mode 100644 index 000000000..20560038c --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/journal/Replayer.cc @@ -0,0 +1,1317 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "Replayer.h" +#include "common/debug.h" +#include "common/errno.h" +#include "common/perf_counters.h" +#include "common/perf_counters_key.h" +#include "common/Timer.h" +#include "librbd/Journal.h" +#include "librbd/Utils.h" +#include "librbd/asio/ContextWQ.h" +#include "librbd/journal/Replay.h" +#include "journal/Journaler.h" +#include "journal/JournalMetadataListener.h" +#include "journal/ReplayHandler.h" +#include "tools/rbd_mirror/Threads.h" +#include "tools/rbd_mirror/Types.h" +#include "tools/rbd_mirror/image_replayer/CloseImageRequest.h" +#include "tools/rbd_mirror/image_replayer/ReplayerListener.h" +#include "tools/rbd_mirror/image_replayer/Utils.h" +#include "tools/rbd_mirror/image_replayer/journal/EventPreprocessor.h" +#include "tools/rbd_mirror/image_replayer/journal/ReplayStatusFormatter.h" +#include "tools/rbd_mirror/image_replayer/journal/StateBuilder.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::journal::" \ + << "Replayer: " << this << " " << __func__ << ": " + +extern PerfCounters *g_journal_perf_counters; + +namespace rbd { +namespace mirror { +namespace image_replayer { +namespace journal { + +namespace { + +uint32_t calculate_replay_delay(const utime_t &event_time, + int mirroring_replay_delay) { + if (mirroring_replay_delay <= 0) { + return 0; + } + + utime_t now = ceph_clock_now(); + if (event_time + mirroring_replay_delay <= now) { + return 0; + } + + // ensure it is rounded up when converting to integer + return (event_time + mirroring_replay_delay - now) + 1; +} + +} // anonymous namespace + +using librbd::util::create_async_context_callback; +using librbd::util::create_context_callback; + +template <typename I> +struct Replayer<I>::C_ReplayCommitted : public Context { + Replayer* replayer; + ReplayEntry replay_entry; + uint64_t replay_bytes; + utime_t replay_start_time; + + C_ReplayCommitted(Replayer* replayer, ReplayEntry &&replay_entry, + uint64_t replay_bytes, const utime_t &replay_start_time) + : replayer(replayer), replay_entry(std::move(replay_entry)), + replay_bytes(replay_bytes), replay_start_time(replay_start_time) { + } + + void finish(int r) override { + replayer->handle_process_entry_safe(replay_entry, replay_bytes, + replay_start_time, r); + } +}; + +template <typename I> +struct Replayer<I>::RemoteJournalerListener + : public ::journal::JournalMetadataListener { + Replayer* replayer; + + RemoteJournalerListener(Replayer* replayer) : replayer(replayer) {} + + void handle_update(::journal::JournalMetadata*) override { + auto ctx = new C_TrackedOp( + replayer->m_in_flight_op_tracker, + new LambdaContext([this](int r) { + replayer->handle_remote_journal_metadata_updated(); + })); + replayer->m_threads->work_queue->queue(ctx, 0); + } +}; + +template <typename I> +struct Replayer<I>::RemoteReplayHandler : public ::journal::ReplayHandler { + Replayer* replayer; + + RemoteReplayHandler(Replayer* replayer) : replayer(replayer) {} + ~RemoteReplayHandler() override {}; + + void handle_entries_available() override { + replayer->handle_replay_ready(); + } + + void handle_complete(int r) override { + std::string error; + if (r == -ENOMEM) { + error = "not enough memory in autotune cache"; + } else if (r < 0) { + error = "replay completed with error: " + cpp_strerror(r); + } + replayer->handle_replay_complete(r, error); + } +}; + +template <typename I> +struct Replayer<I>::LocalJournalListener + : public librbd::journal::Listener { + Replayer* replayer; + + LocalJournalListener(Replayer* replayer) : replayer(replayer) { + } + + void handle_close() override { + replayer->handle_replay_complete(0, ""); + } + + void handle_promoted() override { + replayer->handle_replay_complete(0, "force promoted"); + } + + void handle_resync() override { + replayer->handle_resync_image(); + } +}; + +template <typename I> +Replayer<I>::Replayer( + Threads<I>* threads, + const std::string& local_mirror_uuid, + StateBuilder<I>* state_builder, + ReplayerListener* replayer_listener) + : m_threads(threads), + m_local_mirror_uuid(local_mirror_uuid), + m_state_builder(state_builder), + m_replayer_listener(replayer_listener), + m_lock(ceph::make_mutex(librbd::util::unique_lock_name( + "rbd::mirror::image_replayer::journal::Replayer", this))) { + dout(10) << dendl; +} + +template <typename I> +Replayer<I>::~Replayer() { + dout(10) << dendl; + + { + std::unique_lock locker{m_lock}; + unregister_perf_counters(); + } + + ceph_assert(m_remote_listener == nullptr); + ceph_assert(m_local_journal_listener == nullptr); + ceph_assert(m_local_journal_replay == nullptr); + ceph_assert(m_remote_replay_handler == nullptr); + ceph_assert(m_event_preprocessor == nullptr); + ceph_assert(m_replay_status_formatter == nullptr); + ceph_assert(m_delayed_preprocess_task == nullptr); + ceph_assert(m_flush_local_replay_task == nullptr); + ceph_assert(m_state_builder->local_image_ctx == nullptr); +} + +template <typename I> +void Replayer<I>::init(Context* on_finish) { + dout(10) << dendl; + + { + auto local_image_ctx = m_state_builder->local_image_ctx; + std::shared_lock image_locker{local_image_ctx->image_lock}; + m_image_spec = util::compute_image_spec(local_image_ctx->md_ctx, + local_image_ctx->name); + } + + { + std::unique_lock locker{m_lock}; + register_perf_counters(); + } + + ceph_assert(m_on_init_shutdown == nullptr); + m_on_init_shutdown = on_finish; + + init_remote_journaler(); +} + +template <typename I> +void Replayer<I>::shut_down(Context* on_finish) { + dout(10) << dendl; + + std::unique_lock locker{m_lock}; + ceph_assert(m_on_init_shutdown == nullptr); + m_on_init_shutdown = on_finish; + + if (m_state == STATE_INIT) { + // raced with the last piece of the init state machine + return; + } else if (m_state == STATE_REPLAYING) { + m_state = STATE_COMPLETE; + } + + // if shutting down due to an error notification, we don't + // need to propagate the same error again + m_error_code = 0; + m_error_description = ""; + + cancel_delayed_preprocess_task(); + cancel_flush_local_replay_task(); + wait_for_flush(); +} + +template <typename I> +void Replayer<I>::flush(Context* on_finish) { + dout(10) << dendl; + + flush_local_replay(new C_TrackedOp(m_in_flight_op_tracker, on_finish)); +} + +template <typename I> +bool Replayer<I>::get_replay_status(std::string* description, + Context* on_finish) { + dout(10) << dendl; + + std::unique_lock locker{m_lock}; + if (m_replay_status_formatter == nullptr) { + derr << "replay not running" << dendl; + locker.unlock(); + + on_finish->complete(-EAGAIN); + return false; + } + + on_finish = new C_TrackedOp(m_in_flight_op_tracker, on_finish); + return m_replay_status_formatter->get_or_send_update(description, + on_finish); +} + +template <typename I> +void Replayer<I>::init_remote_journaler() { + dout(10) << dendl; + + Context *ctx = create_context_callback< + Replayer, &Replayer<I>::handle_init_remote_journaler>(this); + m_state_builder->remote_journaler->init(ctx); +} + +template <typename I> +void Replayer<I>::handle_init_remote_journaler(int r) { + dout(10) << "r=" << r << dendl; + + std::unique_lock locker{m_lock}; + if (r < 0) { + derr << "failed to initialize remote journal: " << cpp_strerror(r) << dendl; + handle_replay_complete(locker, r, "error initializing remote journal"); + close_local_image(); + return; + } + + // listen for metadata updates to check for disconnect events + ceph_assert(m_remote_listener == nullptr); + m_remote_listener = new RemoteJournalerListener(this); + m_state_builder->remote_journaler->add_listener(m_remote_listener); + + cls::journal::Client remote_client; + r = m_state_builder->remote_journaler->get_cached_client(m_local_mirror_uuid, + &remote_client); + if (r < 0) { + derr << "error retrieving remote journal client: " << cpp_strerror(r) + << dendl; + handle_replay_complete(locker, r, "error retrieving remote journal client"); + close_local_image(); + return; + } + + std::string error; + r = validate_remote_client_state(remote_client, + &m_state_builder->remote_client_meta, + &m_resync_requested, &error); + if (r < 0) { + handle_replay_complete(locker, r, error); + close_local_image(); + return; + } + + start_external_replay(locker); +} + +template <typename I> +void Replayer<I>::start_external_replay(std::unique_lock<ceph::mutex>& locker) { + dout(10) << dendl; + + auto local_image_ctx = m_state_builder->local_image_ctx; + std::shared_lock local_image_locker{local_image_ctx->image_lock}; + + ceph_assert(m_local_journal == nullptr); + m_local_journal = local_image_ctx->journal; + if (m_local_journal == nullptr) { + local_image_locker.unlock(); + + derr << "local image journal closed" << dendl; + handle_replay_complete(locker, -EINVAL, "error accessing local journal"); + close_local_image(); + return; + } + + // safe to hold pointer to journal after external playback starts + Context *start_ctx = create_context_callback< + Replayer, &Replayer<I>::handle_start_external_replay>(this); + m_local_journal->start_external_replay(&m_local_journal_replay, start_ctx); +} + +template <typename I> +void Replayer<I>::handle_start_external_replay(int r) { + dout(10) << "r=" << r << dendl; + + std::unique_lock locker{m_lock}; + if (r < 0) { + ceph_assert(m_local_journal_replay == nullptr); + derr << "error starting external replay on local image " + << m_state_builder->local_image_ctx->id << ": " + << cpp_strerror(r) << dendl; + + handle_replay_complete(locker, r, "error starting replay on local image"); + close_local_image(); + return; + } + + if (!notify_init_complete(locker)) { + return; + } + + m_state = STATE_REPLAYING; + + // check for resync/promotion state after adding listener + if (!add_local_journal_listener(locker)) { + return; + } + + // start remote journal replay + m_event_preprocessor = EventPreprocessor<I>::create( + *m_state_builder->local_image_ctx, *m_state_builder->remote_journaler, + m_local_mirror_uuid, &m_state_builder->remote_client_meta, + m_threads->work_queue); + m_replay_status_formatter = ReplayStatusFormatter<I>::create( + m_state_builder->remote_journaler, m_local_mirror_uuid); + + auto cct = static_cast<CephContext *>(m_state_builder->local_image_ctx->cct); + double poll_seconds = cct->_conf.get_val<double>( + "rbd_mirror_journal_poll_age"); + m_remote_replay_handler = new RemoteReplayHandler(this); + m_state_builder->remote_journaler->start_live_replay(m_remote_replay_handler, + poll_seconds); + + notify_status_updated(); +} + +template <typename I> +bool Replayer<I>::add_local_journal_listener( + std::unique_lock<ceph::mutex>& locker) { + dout(10) << dendl; + + // listen for promotion and resync requests against local journal + ceph_assert(m_local_journal_listener == nullptr); + m_local_journal_listener = new LocalJournalListener(this); + m_local_journal->add_listener(m_local_journal_listener); + + // verify that the local image wasn't force-promoted and that a resync hasn't + // been requested now that we are listening for events + if (m_local_journal->is_tag_owner()) { + dout(10) << "local image force-promoted" << dendl; + handle_replay_complete(locker, 0, "force promoted"); + return false; + } + + bool resync_requested = false; + int r = m_local_journal->is_resync_requested(&resync_requested); + if (r < 0) { + dout(10) << "failed to determine resync state: " << cpp_strerror(r) + << dendl; + handle_replay_complete(locker, r, "error parsing resync state"); + return false; + } else if (resync_requested) { + dout(10) << "local image resync requested" << dendl; + handle_replay_complete(locker, 0, "resync requested"); + return false; + } + + return true; +} + +template <typename I> +bool Replayer<I>::notify_init_complete(std::unique_lock<ceph::mutex>& locker) { + dout(10) << dendl; + + ceph_assert(ceph_mutex_is_locked_by_me(m_lock)); + ceph_assert(m_state == STATE_INIT); + + // notify that init has completed + Context *on_finish = nullptr; + std::swap(m_on_init_shutdown, on_finish); + + locker.unlock(); + on_finish->complete(0); + locker.lock(); + + if (m_on_init_shutdown != nullptr) { + // shut down requested after we notified init complete but before we + // grabbed the lock + close_local_image(); + return false; + } + + return true; +} + +template <typename I> +void Replayer<I>::wait_for_flush() { + ceph_assert(ceph_mutex_is_locked_by_me(m_lock)); + + // ensure that we don't have two concurrent local journal replay shut downs + dout(10) << dendl; + auto ctx = create_async_context_callback( + m_threads->work_queue, create_context_callback< + Replayer<I>, &Replayer<I>::handle_wait_for_flush>(this)); + m_flush_tracker.wait_for_ops(ctx); +} + +template <typename I> +void Replayer<I>::handle_wait_for_flush(int r) { + dout(10) << "r=" << r << dendl; + + shut_down_local_journal_replay(); +} + +template <typename I> +void Replayer<I>::shut_down_local_journal_replay() { + std::unique_lock locker{m_lock}; + + if (m_local_journal_replay == nullptr) { + wait_for_event_replay(); + return; + } + + // It's required to stop the local journal replay state machine prior to + // waiting for the events to complete. This is to ensure that IO is properly + // flushed (it might be batched), wait for any running ops to complete, and + // to cancel any ops waiting for their associated OnFinish events. + dout(10) << dendl; + auto ctx = create_context_callback< + Replayer<I>, &Replayer<I>::handle_shut_down_local_journal_replay>(this); + m_local_journal_replay->shut_down(true, ctx); +} + +template <typename I> +void Replayer<I>::handle_shut_down_local_journal_replay(int r) { + dout(10) << "r=" << r << dendl; + + std::unique_lock locker{m_lock}; + if (r < 0) { + derr << "error shutting down journal replay: " << cpp_strerror(r) << dendl; + handle_replay_error(r, "failed to shut down local journal replay"); + } + + wait_for_event_replay(); +} + +template <typename I> +void Replayer<I>::wait_for_event_replay() { + ceph_assert(ceph_mutex_is_locked_by_me(m_lock)); + + dout(10) << dendl; + auto ctx = create_async_context_callback( + m_threads->work_queue, create_context_callback< + Replayer<I>, &Replayer<I>::handle_wait_for_event_replay>(this)); + m_event_replay_tracker.wait_for_ops(ctx); +} + +template <typename I> +void Replayer<I>::handle_wait_for_event_replay(int r) { + dout(10) << "r=" << r << dendl; + + std::unique_lock locker{m_lock}; + close_local_image(); +} + +template <typename I> +void Replayer<I>::close_local_image() { + ceph_assert(ceph_mutex_is_locked_by_me(m_lock)); + if (m_state_builder->local_image_ctx == nullptr) { + stop_remote_journaler_replay(); + return; + } + + dout(10) << dendl; + if (m_local_journal_listener != nullptr) { + // blocks if listener notification is in-progress + m_local_journal->remove_listener(m_local_journal_listener); + delete m_local_journal_listener; + m_local_journal_listener = nullptr; + } + + if (m_local_journal_replay != nullptr) { + m_local_journal->stop_external_replay(); + m_local_journal_replay = nullptr; + } + + if (m_event_preprocessor != nullptr) { + image_replayer::journal::EventPreprocessor<I>::destroy( + m_event_preprocessor); + m_event_preprocessor = nullptr; + } + + m_local_journal.reset(); + + // NOTE: it's important to ensure that the local image is fully + // closed before attempting to close the remote journal in + // case the remote cluster is unreachable + ceph_assert(m_state_builder->local_image_ctx != nullptr); + auto ctx = create_context_callback< + Replayer<I>, &Replayer<I>::handle_close_local_image>(this); + auto request = image_replayer::CloseImageRequest<I>::create( + &m_state_builder->local_image_ctx, ctx); + request->send(); +} + + +template <typename I> +void Replayer<I>::handle_close_local_image(int r) { + dout(10) << "r=" << r << dendl; + + std::unique_lock locker{m_lock}; + if (r < 0) { + derr << "error closing local iamge: " << cpp_strerror(r) << dendl; + handle_replay_error(r, "failed to close local image"); + } + + ceph_assert(m_state_builder->local_image_ctx == nullptr); + stop_remote_journaler_replay(); +} + +template <typename I> +void Replayer<I>::stop_remote_journaler_replay() { + ceph_assert(ceph_mutex_is_locked_by_me(m_lock)); + + if (m_state_builder->remote_journaler == nullptr) { + wait_for_in_flight_ops(); + return; + } else if (m_remote_replay_handler == nullptr) { + wait_for_in_flight_ops(); + return; + } + + dout(10) << dendl; + auto ctx = create_async_context_callback( + m_threads->work_queue, create_context_callback< + Replayer<I>, &Replayer<I>::handle_stop_remote_journaler_replay>(this)); + m_state_builder->remote_journaler->stop_replay(ctx); +} + +template <typename I> +void Replayer<I>::handle_stop_remote_journaler_replay(int r) { + dout(10) << "r=" << r << dendl; + + std::unique_lock locker{m_lock}; + if (r < 0) { + derr << "failed to stop remote journaler replay : " << cpp_strerror(r) + << dendl; + handle_replay_error(r, "failed to stop remote journaler replay"); + } + + delete m_remote_replay_handler; + m_remote_replay_handler = nullptr; + + wait_for_in_flight_ops(); +} + +template <typename I> +void Replayer<I>::wait_for_in_flight_ops() { + dout(10) << dendl; + if (m_remote_listener != nullptr) { + m_state_builder->remote_journaler->remove_listener(m_remote_listener); + delete m_remote_listener; + m_remote_listener = nullptr; + } + + auto ctx = create_async_context_callback( + m_threads->work_queue, create_context_callback< + Replayer<I>, &Replayer<I>::handle_wait_for_in_flight_ops>(this)); + m_in_flight_op_tracker.wait_for_ops(ctx); +} + +template <typename I> +void Replayer<I>::handle_wait_for_in_flight_ops(int r) { + dout(10) << "r=" << r << dendl; + + ReplayStatusFormatter<I>::destroy(m_replay_status_formatter); + m_replay_status_formatter = nullptr; + + Context* on_init_shutdown = nullptr; + { + std::unique_lock locker{m_lock}; + ceph_assert(m_on_init_shutdown != nullptr); + std::swap(m_on_init_shutdown, on_init_shutdown); + m_state = STATE_COMPLETE; + } + on_init_shutdown->complete(m_error_code); +} + +template <typename I> +void Replayer<I>::handle_remote_journal_metadata_updated() { + dout(20) << dendl; + + std::unique_lock locker{m_lock}; + if (m_state != STATE_REPLAYING) { + return; + } + + cls::journal::Client remote_client; + int r = m_state_builder->remote_journaler->get_cached_client( + m_local_mirror_uuid, &remote_client); + if (r < 0) { + derr << "failed to retrieve client: " << cpp_strerror(r) << dendl; + return; + } + + librbd::journal::MirrorPeerClientMeta remote_client_meta; + std::string error; + r = validate_remote_client_state(remote_client, &remote_client_meta, + &m_resync_requested, &error); + if (r < 0) { + dout(0) << "client flagged disconnected, stopping image replay" << dendl; + handle_replay_complete(locker, r, error); + } +} + +template <typename I> +void Replayer<I>::schedule_flush_local_replay_task() { + ceph_assert(ceph_mutex_is_locked_by_me(m_lock)); + + std::unique_lock timer_locker{m_threads->timer_lock}; + if (m_state != STATE_REPLAYING || m_flush_local_replay_task != nullptr) { + return; + } + + dout(15) << dendl; + m_flush_local_replay_task = create_async_context_callback( + m_threads->work_queue, create_context_callback< + Replayer<I>, &Replayer<I>::handle_flush_local_replay_task>(this)); + m_threads->timer->add_event_after(30, m_flush_local_replay_task); +} + +template <typename I> +void Replayer<I>::cancel_flush_local_replay_task() { + ceph_assert(ceph_mutex_is_locked_by_me(m_lock)); + + std::unique_lock timer_locker{m_threads->timer_lock}; + if (m_flush_local_replay_task != nullptr) { + dout(10) << dendl; + m_threads->timer->cancel_event(m_flush_local_replay_task); + m_flush_local_replay_task = nullptr; + } +} + +template <typename I> +void Replayer<I>::handle_flush_local_replay_task(int) { + dout(15) << dendl; + + m_in_flight_op_tracker.start_op(); + auto on_finish = new LambdaContext([this](int) { + std::unique_lock locker{m_lock}; + + { + std::unique_lock timer_locker{m_threads->timer_lock}; + m_flush_local_replay_task = nullptr; + } + + notify_status_updated(); + m_in_flight_op_tracker.finish_op(); + }); + flush_local_replay(on_finish); +} + +template <typename I> +void Replayer<I>::flush_local_replay(Context* on_flush) { + std::unique_lock locker{m_lock}; + if (m_state != STATE_REPLAYING) { + locker.unlock(); + on_flush->complete(0); + return; + } else if (m_local_journal_replay == nullptr) { + // raced w/ a tag creation stop/start, which implies that + // the replay is flushed + locker.unlock(); + flush_commit_position(on_flush); + return; + } + + dout(15) << dendl; + auto ctx = new LambdaContext( + [this, on_flush](int r) { + handle_flush_local_replay(on_flush, r); + }); + m_local_journal_replay->flush(ctx); +} + +template <typename I> +void Replayer<I>::handle_flush_local_replay(Context* on_flush, int r) { + dout(15) << "r=" << r << dendl; + if (r < 0) { + derr << "error flushing local replay: " << cpp_strerror(r) << dendl; + on_flush->complete(r); + return; + } + + flush_commit_position(on_flush); +} + +template <typename I> +void Replayer<I>::flush_commit_position(Context* on_flush) { + std::unique_lock locker{m_lock}; + if (m_state != STATE_REPLAYING) { + locker.unlock(); + on_flush->complete(0); + return; + } + + dout(15) << dendl; + auto ctx = new LambdaContext( + [this, on_flush](int r) { + handle_flush_commit_position(on_flush, r); + }); + m_state_builder->remote_journaler->flush_commit_position(ctx); +} + +template <typename I> +void Replayer<I>::handle_flush_commit_position(Context* on_flush, int r) { + dout(15) << "r=" << r << dendl; + if (r < 0) { + derr << "error flushing remote journal commit position: " + << cpp_strerror(r) << dendl; + } + + on_flush->complete(r); +} + +template <typename I> +void Replayer<I>::handle_replay_error(int r, const std::string &error) { + ceph_assert(ceph_mutex_is_locked_by_me(m_lock)); + + if (m_error_code == 0) { + m_error_code = r; + m_error_description = error; + } +} + +template <typename I> +bool Replayer<I>::is_replay_complete() const { + std::unique_lock locker{m_lock}; + return is_replay_complete(locker); +} + +template <typename I> +bool Replayer<I>::is_replay_complete( + const std::unique_lock<ceph::mutex>&) const { + ceph_assert(ceph_mutex_is_locked_by_me(m_lock)); + return (m_state == STATE_COMPLETE); +} + +template <typename I> +void Replayer<I>::handle_replay_complete(int r, const std::string &error) { + std::unique_lock locker{m_lock}; + handle_replay_complete(locker, r, error); +} + +template <typename I> +void Replayer<I>::handle_replay_complete( + const std::unique_lock<ceph::mutex>&, int r, const std::string &error) { + ceph_assert(ceph_mutex_is_locked_by_me(m_lock)); + + dout(10) << "r=" << r << ", error=" << error << dendl; + if (r < 0) { + derr << "replay encountered an error: " << cpp_strerror(r) << dendl; + handle_replay_error(r, error); + } + + if (m_state != STATE_REPLAYING) { + return; + } + + m_state = STATE_COMPLETE; + notify_status_updated(); +} + +template <typename I> +void Replayer<I>::handle_replay_ready() { + std::unique_lock locker{m_lock}; + handle_replay_ready(locker); +} + +template <typename I> +void Replayer<I>::handle_replay_ready( + std::unique_lock<ceph::mutex>& locker) { + ceph_assert(ceph_mutex_is_locked_by_me(m_lock)); + + dout(20) << dendl; + if (is_replay_complete(locker)) { + return; + } + + if (!m_state_builder->remote_journaler->try_pop_front(&m_replay_entry, + &m_replay_tag_tid)) { + dout(20) << "no entries ready for replay" << dendl; + return; + } + + // can safely drop lock once the entry is tracked + m_event_replay_tracker.start_op(); + locker.unlock(); + + dout(20) << "entry tid=" << m_replay_entry.get_commit_tid() + << "tag_tid=" << m_replay_tag_tid << dendl; + if (!m_replay_tag_valid || m_replay_tag.tid != m_replay_tag_tid) { + // must allocate a new local journal tag prior to processing + replay_flush(); + return; + } + + preprocess_entry(); +} + +template <typename I> +void Replayer<I>::replay_flush() { + dout(10) << dendl; + m_flush_tracker.start_op(); + + // shut down the replay to flush all IO and ops and create a new + // replayer to handle the new tag epoch + auto ctx = create_context_callback< + Replayer<I>, &Replayer<I>::handle_replay_flush_shut_down>(this); + ceph_assert(m_local_journal_replay != nullptr); + m_local_journal_replay->shut_down(false, ctx); +} + +template <typename I> +void Replayer<I>::handle_replay_flush_shut_down(int r) { + std::unique_lock locker{m_lock}; + dout(10) << "r=" << r << dendl; + + ceph_assert(m_local_journal != nullptr); + ceph_assert(m_local_journal_listener != nullptr); + + // blocks if listener notification is in-progress + m_local_journal->remove_listener(m_local_journal_listener); + delete m_local_journal_listener; + m_local_journal_listener = nullptr; + + m_local_journal->stop_external_replay(); + m_local_journal_replay = nullptr; + m_local_journal.reset(); + + if (r < 0) { + locker.unlock(); + + handle_replay_flush(r); + return; + } + + // journal might have been closed now that we stopped external replay + auto local_image_ctx = m_state_builder->local_image_ctx; + std::shared_lock local_image_locker{local_image_ctx->image_lock}; + m_local_journal = local_image_ctx->journal; + if (m_local_journal == nullptr) { + local_image_locker.unlock(); + locker.unlock(); + + derr << "local image journal closed" << dendl; + handle_replay_flush(-EINVAL); + return; + } + + auto ctx = create_context_callback< + Replayer<I>, &Replayer<I>::handle_replay_flush>(this); + m_local_journal->start_external_replay(&m_local_journal_replay, ctx); +} + +template <typename I> +void Replayer<I>::handle_replay_flush(int r) { + std::unique_lock locker{m_lock}; + dout(10) << "r=" << r << dendl; + m_flush_tracker.finish_op(); + + if (r < 0) { + derr << "replay flush encountered an error: " << cpp_strerror(r) << dendl; + handle_replay_complete(locker, r, "replay flush encountered an error"); + m_event_replay_tracker.finish_op(); + return; + } else if (is_replay_complete(locker)) { + m_event_replay_tracker.finish_op(); + return; + } + + // check for resync/promotion state after adding listener + if (!add_local_journal_listener(locker)) { + m_event_replay_tracker.finish_op(); + return; + } + locker.unlock(); + + get_remote_tag(); +} + +template <typename I> +void Replayer<I>::get_remote_tag() { + dout(15) << "tag_tid: " << m_replay_tag_tid << dendl; + + Context *ctx = create_context_callback< + Replayer, &Replayer<I>::handle_get_remote_tag>(this); + m_state_builder->remote_journaler->get_tag(m_replay_tag_tid, &m_replay_tag, + ctx); +} + +template <typename I> +void Replayer<I>::handle_get_remote_tag(int r) { + dout(15) << "r=" << r << dendl; + + if (r == 0) { + try { + auto it = m_replay_tag.data.cbegin(); + decode(m_replay_tag_data, it); + } catch (const buffer::error &err) { + r = -EBADMSG; + } + } + + if (r < 0) { + derr << "failed to retrieve remote tag " << m_replay_tag_tid << ": " + << cpp_strerror(r) << dendl; + handle_replay_complete(r, "failed to retrieve remote tag"); + m_event_replay_tracker.finish_op(); + return; + } + + m_replay_tag_valid = true; + dout(15) << "decoded remote tag " << m_replay_tag_tid << ": " + << m_replay_tag_data << dendl; + + allocate_local_tag(); +} + +template <typename I> +void Replayer<I>::allocate_local_tag() { + dout(15) << dendl; + + std::string mirror_uuid = m_replay_tag_data.mirror_uuid; + if (mirror_uuid == librbd::Journal<>::LOCAL_MIRROR_UUID) { + mirror_uuid = m_state_builder->remote_mirror_uuid; + } else if (mirror_uuid == m_local_mirror_uuid) { + mirror_uuid = librbd::Journal<>::LOCAL_MIRROR_UUID; + } else if (mirror_uuid == librbd::Journal<>::ORPHAN_MIRROR_UUID) { + // handle possible edge condition where daemon can failover and + // the local image has already been promoted/demoted + auto local_tag_data = m_local_journal->get_tag_data(); + if (local_tag_data.mirror_uuid == librbd::Journal<>::ORPHAN_MIRROR_UUID && + (local_tag_data.predecessor.commit_valid && + local_tag_data.predecessor.mirror_uuid == + librbd::Journal<>::LOCAL_MIRROR_UUID)) { + dout(15) << "skipping stale demotion event" << dendl; + handle_process_entry_safe(m_replay_entry, m_replay_bytes, + m_replay_start_time, 0); + handle_replay_ready(); + return; + } else { + dout(5) << "encountered image demotion: stopping" << dendl; + handle_replay_complete(0, ""); + } + } + + librbd::journal::TagPredecessor predecessor(m_replay_tag_data.predecessor); + if (predecessor.mirror_uuid == librbd::Journal<>::LOCAL_MIRROR_UUID) { + predecessor.mirror_uuid = m_state_builder->remote_mirror_uuid; + } else if (predecessor.mirror_uuid == m_local_mirror_uuid) { + predecessor.mirror_uuid = librbd::Journal<>::LOCAL_MIRROR_UUID; + } + + dout(15) << "mirror_uuid=" << mirror_uuid << ", " + << "predecessor=" << predecessor << ", " + << "replay_tag_tid=" << m_replay_tag_tid << dendl; + Context *ctx = create_context_callback< + Replayer, &Replayer<I>::handle_allocate_local_tag>(this); + m_local_journal->allocate_tag(mirror_uuid, predecessor, ctx); +} + +template <typename I> +void Replayer<I>::handle_allocate_local_tag(int r) { + dout(15) << "r=" << r << ", " + << "tag_tid=" << m_local_journal->get_tag_tid() << dendl; + if (r < 0) { + derr << "failed to allocate journal tag: " << cpp_strerror(r) << dendl; + handle_replay_complete(r, "failed to allocate journal tag"); + m_event_replay_tracker.finish_op(); + return; + } + + preprocess_entry(); +} + +template <typename I> +void Replayer<I>::preprocess_entry() { + dout(20) << "preprocessing entry tid=" << m_replay_entry.get_commit_tid() + << dendl; + + bufferlist data = m_replay_entry.get_data(); + auto it = data.cbegin(); + int r = m_local_journal_replay->decode(&it, &m_event_entry); + if (r < 0) { + derr << "failed to decode journal event" << dendl; + handle_replay_complete(r, "failed to decode journal event"); + m_event_replay_tracker.finish_op(); + return; + } + + m_replay_bytes = data.length(); + uint32_t delay = calculate_replay_delay( + m_event_entry.timestamp, + m_state_builder->local_image_ctx->mirroring_replay_delay); + if (delay == 0) { + handle_preprocess_entry_ready(0); + return; + } + + std::unique_lock locker{m_lock}; + if (is_replay_complete(locker)) { + // don't schedule a delayed replay task if a shut-down is in-progress + m_event_replay_tracker.finish_op(); + return; + } + + dout(20) << "delaying replay by " << delay << " sec" << dendl; + std::unique_lock timer_locker{m_threads->timer_lock}; + ceph_assert(m_delayed_preprocess_task == nullptr); + m_delayed_preprocess_task = create_context_callback< + Replayer<I>, &Replayer<I>::handle_delayed_preprocess_task>(this); + m_threads->timer->add_event_after(delay, m_delayed_preprocess_task); +} + +template <typename I> +void Replayer<I>::handle_delayed_preprocess_task(int r) { + dout(20) << "r=" << r << dendl; + + ceph_assert(ceph_mutex_is_locked_by_me(m_threads->timer_lock)); + m_delayed_preprocess_task = nullptr; + + m_threads->work_queue->queue(create_context_callback< + Replayer, &Replayer<I>::handle_preprocess_entry_ready>(this), 0); +} + +template <typename I> +void Replayer<I>::handle_preprocess_entry_ready(int r) { + dout(20) << "r=" << r << dendl; + ceph_assert(r == 0); + + m_replay_start_time = ceph_clock_now(); + if (!m_event_preprocessor->is_required(m_event_entry)) { + process_entry(); + return; + } + + Context *ctx = create_context_callback< + Replayer, &Replayer<I>::handle_preprocess_entry_safe>(this); + m_event_preprocessor->preprocess(&m_event_entry, ctx); +} + +template <typename I> +void Replayer<I>::handle_preprocess_entry_safe(int r) { + dout(20) << "r=" << r << dendl; + + if (r < 0) { + if (r == -ECANCELED) { + handle_replay_complete(0, "lost exclusive lock"); + } else { + derr << "failed to preprocess journal event" << dendl; + handle_replay_complete(r, "failed to preprocess journal event"); + } + + m_event_replay_tracker.finish_op(); + return; + } + + process_entry(); +} + +template <typename I> +void Replayer<I>::process_entry() { + dout(20) << "processing entry tid=" << m_replay_entry.get_commit_tid() + << dendl; + + Context *on_ready = create_context_callback< + Replayer, &Replayer<I>::handle_process_entry_ready>(this); + Context *on_commit = new C_ReplayCommitted(this, std::move(m_replay_entry), + m_replay_bytes, + m_replay_start_time); + + m_local_journal_replay->process(m_event_entry, on_ready, on_commit); +} + +template <typename I> +void Replayer<I>::handle_process_entry_ready(int r) { + std::unique_lock locker{m_lock}; + + dout(20) << dendl; + ceph_assert(r == 0); + + bool update_status = false; + { + auto local_image_ctx = m_state_builder->local_image_ctx; + std::shared_lock image_locker{local_image_ctx->image_lock}; + auto image_spec = util::compute_image_spec(local_image_ctx->md_ctx, + local_image_ctx->name); + if (m_image_spec != image_spec) { + m_image_spec = image_spec; + update_status = true; + } + } + + m_replay_status_formatter->handle_entry_processed(m_replay_bytes); + + if (update_status) { + unregister_perf_counters(); + register_perf_counters(); + notify_status_updated(); + } + + // attempt to process the next event + handle_replay_ready(locker); +} + +template <typename I> +void Replayer<I>::handle_process_entry_safe( + const ReplayEntry &replay_entry, uint64_t replay_bytes, + const utime_t &replay_start_time, int r) { + dout(20) << "commit_tid=" << replay_entry.get_commit_tid() << ", r=" << r + << dendl; + + if (r < 0) { + derr << "failed to commit journal event: " << cpp_strerror(r) << dendl; + handle_replay_complete(r, "failed to commit journal event"); + } else { + ceph_assert(m_state_builder->remote_journaler != nullptr); + m_state_builder->remote_journaler->committed(replay_entry); + } + + auto latency = ceph_clock_now() - replay_start_time; + if (g_journal_perf_counters) { + g_journal_perf_counters->inc(l_rbd_mirror_journal_entries); + g_journal_perf_counters->inc(l_rbd_mirror_journal_replay_bytes, + replay_bytes); + g_journal_perf_counters->tinc(l_rbd_mirror_journal_replay_latency, + latency); + } + + auto ctx = new LambdaContext( + [this, replay_bytes, latency](int r) { + std::unique_lock locker{m_lock}; + schedule_flush_local_replay_task(); + + if (m_perf_counters) { + m_perf_counters->inc(l_rbd_mirror_journal_entries); + m_perf_counters->inc(l_rbd_mirror_journal_replay_bytes, replay_bytes); + m_perf_counters->tinc(l_rbd_mirror_journal_replay_latency, latency); + } + + m_event_replay_tracker.finish_op(); + }); + m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +void Replayer<I>::handle_resync_image() { + dout(10) << dendl; + + std::unique_lock locker{m_lock}; + m_resync_requested = true; + handle_replay_complete(locker, 0, "resync requested"); +} + +template <typename I> +void Replayer<I>::notify_status_updated() { + ceph_assert(ceph_mutex_is_locked_by_me(m_lock)); + + dout(10) << dendl; + + auto ctx = new C_TrackedOp(m_in_flight_op_tracker, new LambdaContext( + [this](int) { + m_replayer_listener->handle_notification(); + })); + m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +void Replayer<I>::cancel_delayed_preprocess_task() { + ceph_assert(ceph_mutex_is_locked_by_me(m_lock)); + + bool canceled_delayed_preprocess_task = false; + { + std::unique_lock timer_locker{m_threads->timer_lock}; + if (m_delayed_preprocess_task != nullptr) { + dout(10) << dendl; + canceled_delayed_preprocess_task = m_threads->timer->cancel_event( + m_delayed_preprocess_task); + ceph_assert(canceled_delayed_preprocess_task); + m_delayed_preprocess_task = nullptr; + } + } + + if (canceled_delayed_preprocess_task) { + // wake up sleeping replay + m_event_replay_tracker.finish_op(); + } +} + +template <typename I> +int Replayer<I>::validate_remote_client_state( + const cls::journal::Client& remote_client, + librbd::journal::MirrorPeerClientMeta* remote_client_meta, + bool* resync_requested, std::string* error) { + ceph_assert(ceph_mutex_is_locked_by_me(m_lock)); + + if (!util::decode_client_meta(remote_client, remote_client_meta)) { + // require operator intervention since the data is corrupt + *error = "error retrieving remote journal client"; + return -EBADMSG; + } + + auto local_image_ctx = m_state_builder->local_image_ctx; + dout(5) << "image_id=" << local_image_ctx->id << ", " + << "remote_client_meta.image_id=" + << remote_client_meta->image_id << ", " + << "remote_client.state=" << remote_client.state << dendl; + if (remote_client_meta->image_id == local_image_ctx->id && + remote_client.state != cls::journal::CLIENT_STATE_CONNECTED) { + dout(5) << "client flagged disconnected, stopping image replay" << dendl; + if (local_image_ctx->config.template get_val<bool>( + "rbd_mirroring_resync_after_disconnect")) { + dout(10) << "disconnected: automatic resync" << dendl; + *resync_requested = true; + *error = "disconnected: automatic resync"; + return -ENOTCONN; + } else { + dout(10) << "disconnected" << dendl; + *error = "disconnected"; + return -ENOTCONN; + } + } + + return 0; +} + +template <typename I> +void Replayer<I>::register_perf_counters() { + dout(5) << dendl; + + ceph_assert(ceph_mutex_is_locked_by_me(m_lock)); + ceph_assert(m_perf_counters == nullptr); + + auto cct = static_cast<CephContext *>(m_state_builder->local_image_ctx->cct); + auto prio = cct->_conf.get_val<int64_t>("rbd_mirror_image_perf_stats_prio"); + + auto local_image_ctx = m_state_builder->local_image_ctx; + std::string labels = ceph::perf_counters::key_create( + "rbd_mirror_journal_image", + {{"pool", local_image_ctx->md_ctx.get_pool_name()}, + {"namespace", local_image_ctx->md_ctx.get_namespace()}, + {"image", local_image_ctx->name}}); + + PerfCountersBuilder plb(g_ceph_context, labels, l_rbd_mirror_journal_first, + l_rbd_mirror_journal_last); + plb.add_u64_counter(l_rbd_mirror_journal_entries, "entries", + "Number of entries replayed", nullptr, prio); + plb.add_u64_counter(l_rbd_mirror_journal_replay_bytes, "replay_bytes", + "Total bytes replayed", nullptr, prio, + unit_t(UNIT_BYTES)); + plb.add_time_avg(l_rbd_mirror_journal_replay_latency, "replay_latency", + "Replay latency", nullptr, prio); + m_perf_counters = plb.create_perf_counters(); + g_ceph_context->get_perfcounters_collection()->add(m_perf_counters); +} + +template <typename I> +void Replayer<I>::unregister_perf_counters() { + dout(5) << dendl; + ceph_assert(ceph_mutex_is_locked_by_me(m_lock)); + + PerfCounters *perf_counters = nullptr; + std::swap(perf_counters, m_perf_counters); + + if (perf_counters != nullptr) { + g_ceph_context->get_perfcounters_collection()->remove(perf_counters); + delete perf_counters; + } +} + +} // namespace journal +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::journal::Replayer<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/journal/Replayer.h b/src/tools/rbd_mirror/image_replayer/journal/Replayer.h new file mode 100644 index 000000000..6b1f36d9c --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/journal/Replayer.h @@ -0,0 +1,323 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_REPLAYER_H +#define RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_REPLAYER_H + +#include "tools/rbd_mirror/image_replayer/Replayer.h" +#include "include/utime.h" +#include "common/AsyncOpTracker.h" +#include "common/ceph_mutex.h" +#include "common/RefCountedObj.h" +#include "cls/journal/cls_journal_types.h" +#include "journal/ReplayEntry.h" +#include "librbd/ImageCtx.h" +#include "librbd/journal/Types.h" +#include "librbd/journal/TypeTraits.h" +#include <string> +#include <type_traits> + +namespace journal { class Journaler; } +namespace librbd { + +struct ImageCtx; +namespace journal { template <typename I> class Replay; } + +} // namespace librbd + +namespace rbd { +namespace mirror { + +template <typename> struct Threads; + +namespace image_replayer { + +struct ReplayerListener; + +namespace journal { + +template <typename> class EventPreprocessor; +template <typename> class ReplayStatusFormatter; +template <typename> class StateBuilder; + +template <typename ImageCtxT> +class Replayer : public image_replayer::Replayer { +public: + typedef typename librbd::journal::TypeTraits<ImageCtxT>::Journaler Journaler; + + static Replayer* create( + Threads<ImageCtxT>* threads, + const std::string& local_mirror_uuid, + StateBuilder<ImageCtxT>* state_builder, + ReplayerListener* replayer_listener) { + return new Replayer(threads, local_mirror_uuid, state_builder, + replayer_listener); + } + + Replayer( + Threads<ImageCtxT>* threads, + const std::string& local_mirror_uuid, + StateBuilder<ImageCtxT>* state_builder, + ReplayerListener* replayer_listener); + ~Replayer(); + + void destroy() override { + delete this; + } + + void init(Context* on_finish) override; + void shut_down(Context* on_finish) override; + + void flush(Context* on_finish) override; + + bool get_replay_status(std::string* description, Context* on_finish) override; + + bool is_replaying() const override { + std::unique_lock locker{m_lock}; + return (m_state == STATE_REPLAYING); + } + + bool is_resync_requested() const override { + std::unique_lock locker(m_lock); + return m_resync_requested; + } + + int get_error_code() const override { + std::unique_lock locker(m_lock); + return m_error_code; + } + + std::string get_error_description() const override { + std::unique_lock locker(m_lock); + return m_error_description; + } + + std::string get_image_spec() const { + std::unique_lock locker(m_lock); + return m_image_spec; + } + +private: + /** + * @verbatim + * + * <init> + * | + * v (error) + * INIT_REMOTE_JOURNALER * * * * * * * * * * * * * * * * * * * + * | * + * v (error) * + * START_EXTERNAL_REPLAY * * * * * * * * * * * * * * * * * * * + * | * + * | /--------------------------------------------\ * + * | | | * + * v v (asok flush) | * + * REPLAYING -------------> LOCAL_REPLAY_FLUSH | * + * | \ | | * + * | | v | * + * | | FLUSH_COMMIT_POSITION | * + * | | | | * + * | | \--------------------/| * + * | | | * + * | | (entries available) | * + * | \-----------> REPLAY_READY | * + * | | | * + * | | (skip if not | * + * | v needed) (error) * + * | REPLAY_FLUSH * * * * * * * * * * + * | | | * * + * | | (skip if not | * * + * | v needed) (error) * * + * | GET_REMOTE_TAG * * * * * * * * * + * | | | * * + * | | (skip if not | * * + * | v needed) (error) * * + * | ALLOCATE_LOCAL_TAG * * * * * * * + * | | | * * + * | v (error) * * + * | PREPROCESS_ENTRY * * * * * * * * + * | | | * * + * | v (error) * * + * | PROCESS_ENTRY * * * * * * * * * * + * | | | * * + * | \---------------------/ * * + * v (shutdown) * * + * REPLAY_COMPLETE < * * * * * * * * * * * * * * * * * * * * + * | * + * v * + * WAIT_FOR_FLUSH * + * | * + * v * + * SHUT_DOWN_LOCAL_JOURNAL_REPLAY * + * | * + * v * + * WAIT_FOR_REPLAY * + * | * + * v * + * CLOSE_LOCAL_IMAGE < * * * * * * * * * * * * * * * * * * * * + * | + * v (skip if not started) + * STOP_REMOTE_JOURNALER_REPLAY + * | + * v + * WAIT_FOR_IN_FLIGHT_OPS + * | + * v + * <shutdown> + * + * @endverbatim + */ + + typedef typename librbd::journal::TypeTraits<ImageCtxT>::ReplayEntry ReplayEntry; + + enum State { + STATE_INIT, + STATE_REPLAYING, + STATE_COMPLETE + }; + + struct C_ReplayCommitted; + struct RemoteJournalerListener; + struct RemoteReplayHandler; + struct LocalJournalListener; + + Threads<ImageCtxT>* m_threads; + std::string m_local_mirror_uuid; + StateBuilder<ImageCtxT>* m_state_builder; + ReplayerListener* m_replayer_listener; + + mutable ceph::mutex m_lock; + + std::string m_image_spec; + Context* m_on_init_shutdown = nullptr; + + State m_state = STATE_INIT; + int m_error_code = 0; + std::string m_error_description; + bool m_resync_requested = false; + + ceph::ref_t<typename std::remove_pointer<decltype(ImageCtxT::journal)>::type> + m_local_journal; + RemoteJournalerListener* m_remote_listener = nullptr; + + librbd::journal::Replay<ImageCtxT>* m_local_journal_replay = nullptr; + EventPreprocessor<ImageCtxT>* m_event_preprocessor = nullptr; + ReplayStatusFormatter<ImageCtxT>* m_replay_status_formatter = nullptr; + RemoteReplayHandler* m_remote_replay_handler = nullptr; + LocalJournalListener* m_local_journal_listener = nullptr; + + PerfCounters *m_perf_counters = nullptr; + + ReplayEntry m_replay_entry; + uint64_t m_replay_bytes = 0; + utime_t m_replay_start_time; + bool m_replay_tag_valid = false; + uint64_t m_replay_tag_tid = 0; + cls::journal::Tag m_replay_tag; + librbd::journal::TagData m_replay_tag_data; + librbd::journal::EventEntry m_event_entry; + + AsyncOpTracker m_flush_tracker; + + AsyncOpTracker m_event_replay_tracker; + Context *m_delayed_preprocess_task = nullptr; + + AsyncOpTracker m_in_flight_op_tracker; + Context *m_flush_local_replay_task = nullptr; + + void handle_remote_journal_metadata_updated(); + + void schedule_flush_local_replay_task(); + void cancel_flush_local_replay_task(); + void handle_flush_local_replay_task(int r); + + void flush_local_replay(Context* on_flush); + void handle_flush_local_replay(Context* on_flush, int r); + + void flush_commit_position(Context* on_flush); + void handle_flush_commit_position(Context* on_flush, int r); + + void init_remote_journaler(); + void handle_init_remote_journaler(int r); + + void start_external_replay(std::unique_lock<ceph::mutex>& locker); + void handle_start_external_replay(int r); + + bool add_local_journal_listener(std::unique_lock<ceph::mutex>& locker); + + bool notify_init_complete(std::unique_lock<ceph::mutex>& locker); + + void wait_for_flush(); + void handle_wait_for_flush(int r); + + void shut_down_local_journal_replay(); + void handle_shut_down_local_journal_replay(int r); + + void wait_for_event_replay(); + void handle_wait_for_event_replay(int r); + + void close_local_image(); + void handle_close_local_image(int r); + + void stop_remote_journaler_replay(); + void handle_stop_remote_journaler_replay(int r); + + void wait_for_in_flight_ops(); + void handle_wait_for_in_flight_ops(int r); + + void replay_flush(); + void handle_replay_flush_shut_down(int r); + void handle_replay_flush(int r); + + void get_remote_tag(); + void handle_get_remote_tag(int r); + + void allocate_local_tag(); + void handle_allocate_local_tag(int r); + + void handle_replay_error(int r, const std::string &error); + + bool is_replay_complete() const; + bool is_replay_complete(const std::unique_lock<ceph::mutex>& locker) const; + + void handle_replay_complete(int r, const std::string &error_desc); + void handle_replay_complete(const std::unique_lock<ceph::mutex>&, + int r, const std::string &error_desc); + void handle_replay_ready(); + void handle_replay_ready(std::unique_lock<ceph::mutex>& locker); + + void preprocess_entry(); + void handle_delayed_preprocess_task(int r); + void handle_preprocess_entry_ready(int r); + void handle_preprocess_entry_safe(int r); + + void process_entry(); + void handle_process_entry_ready(int r); + void handle_process_entry_safe(const ReplayEntry& replay_entry, + uint64_t relay_bytes, + const utime_t &replay_start_time, int r); + + void handle_resync_image(); + + void notify_status_updated(); + + void cancel_delayed_preprocess_task(); + + int validate_remote_client_state( + const cls::journal::Client& remote_client, + librbd::journal::MirrorPeerClientMeta* remote_client_meta, + bool* resync_requested, std::string* error); + + void register_perf_counters(); + void unregister_perf_counters(); + +}; + +} // namespace journal +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::journal::Replayer<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_REPLAYER_H diff --git a/src/tools/rbd_mirror/image_replayer/journal/StateBuilder.cc b/src/tools/rbd_mirror/image_replayer/journal/StateBuilder.cc new file mode 100644 index 000000000..5f1fb0e2f --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/journal/StateBuilder.cc @@ -0,0 +1,149 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "StateBuilder.h" +#include "include/ceph_assert.h" +#include "include/Context.h" +#include "common/debug.h" +#include "common/errno.h" +#include "journal/Journaler.h" +#include "librbd/ImageCtx.h" +#include "librbd/Journal.h" +#include "tools/rbd_mirror/image_replayer/journal/CreateLocalImageRequest.h" +#include "tools/rbd_mirror/image_replayer/journal/PrepareReplayRequest.h" +#include "tools/rbd_mirror/image_replayer/journal/Replayer.h" +#include "tools/rbd_mirror/image_replayer/journal/SyncPointHandler.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::journal::" \ + << "StateBuilder: " << this << " " \ + << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace image_replayer { +namespace journal { + +template <typename I> +StateBuilder<I>::StateBuilder(const std::string& global_image_id) + : image_replayer::StateBuilder<I>(global_image_id) { +} + +template <typename I> +StateBuilder<I>::~StateBuilder() { + ceph_assert(remote_journaler == nullptr); +} + +template <typename I> +void StateBuilder<I>::close(Context* on_finish) { + dout(10) << dendl; + + // close the remote journaler after closing the local image + // in case we have lost contact w/ the remote cluster and + // will block + on_finish = new LambdaContext([this, on_finish](int) { + shut_down_remote_journaler(on_finish); + }); + on_finish = new LambdaContext([this, on_finish](int) { + this->close_local_image(on_finish); + }); + this->close_remote_image(on_finish); +} + +template <typename I> +bool StateBuilder<I>::is_disconnected() const { + return (remote_client_state == cls::journal::CLIENT_STATE_DISCONNECTED); +} + +template <typename I> +bool StateBuilder<I>::is_linked_impl() const { + ceph_assert(!this->remote_mirror_uuid.empty()); + return (local_primary_mirror_uuid == this->remote_mirror_uuid); +} + +template <typename I> +cls::rbd::MirrorImageMode StateBuilder<I>::get_mirror_image_mode() const { + return cls::rbd::MIRROR_IMAGE_MODE_JOURNAL; +} + +template <typename I> +image_sync::SyncPointHandler* StateBuilder<I>::create_sync_point_handler() { + dout(10) << dendl; + + this->m_sync_point_handler = SyncPointHandler<I>::create(this); + return this->m_sync_point_handler; +} + +template <typename I> +BaseRequest* StateBuilder<I>::create_local_image_request( + Threads<I>* threads, + librados::IoCtx& local_io_ctx, + const std::string& global_image_id, + PoolMetaCache* pool_meta_cache, + ProgressContext* progress_ctx, + Context* on_finish) { + return CreateLocalImageRequest<I>::create( + threads, local_io_ctx, this->remote_image_ctx, this->global_image_id, + pool_meta_cache, progress_ctx, this, on_finish); +} + +template <typename I> +BaseRequest* StateBuilder<I>::create_prepare_replay_request( + const std::string& local_mirror_uuid, + ProgressContext* progress_ctx, + bool* resync_requested, + bool* syncing, + Context* on_finish) { + return PrepareReplayRequest<I>::create( + local_mirror_uuid, progress_ctx, this, resync_requested, syncing, + on_finish); +} + +template <typename I> +image_replayer::Replayer* StateBuilder<I>::create_replayer( + Threads<I>* threads, + InstanceWatcher<I>* instance_watcher, + const std::string& local_mirror_uuid, + PoolMetaCache* pool_meta_cache, + ReplayerListener* replayer_listener) { + return Replayer<I>::create( + threads, local_mirror_uuid, this, replayer_listener); +} + +template <typename I> +void StateBuilder<I>::shut_down_remote_journaler(Context* on_finish) { + if (remote_journaler == nullptr) { + on_finish->complete(0); + return; + } + + dout(10) << dendl; + auto ctx = new LambdaContext([this, on_finish](int r) { + handle_shut_down_remote_journaler(r, on_finish); + }); + remote_journaler->shut_down(ctx); +} + +template <typename I> +void StateBuilder<I>::handle_shut_down_remote_journaler(int r, + Context* on_finish) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to shut down remote journaler: " << cpp_strerror(r) + << dendl; + } + + delete remote_journaler; + remote_journaler = nullptr; + on_finish->complete(r); +} + +} // namespace journal +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::journal::StateBuilder<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/journal/StateBuilder.h b/src/tools/rbd_mirror/image_replayer/journal/StateBuilder.h new file mode 100644 index 000000000..790d1390b --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/journal/StateBuilder.h @@ -0,0 +1,94 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_STATE_BUILDER_H +#define CEPH_RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_STATE_BUILDER_H + +#include "tools/rbd_mirror/image_replayer/StateBuilder.h" +#include "cls/journal/cls_journal_types.h" +#include "librbd/journal/Types.h" +#include "librbd/journal/TypeTraits.h" +#include <string> + +struct Context; + +namespace librbd { struct ImageCtx; } + +namespace rbd { +namespace mirror { +namespace image_replayer { +namespace journal { + +template <typename> class SyncPointHandler; + +template <typename ImageCtxT> +class StateBuilder : public image_replayer::StateBuilder<ImageCtxT> { +public: + typedef librbd::journal::TypeTraits<ImageCtxT> TypeTraits; + typedef typename TypeTraits::Journaler Journaler; + + static StateBuilder* create(const std::string& global_image_id) { + return new StateBuilder(global_image_id); + } + + StateBuilder(const std::string& global_image_id); + ~StateBuilder() override; + + void close(Context* on_finish) override; + + bool is_disconnected() const override; + + cls::rbd::MirrorImageMode get_mirror_image_mode() const override; + + image_sync::SyncPointHandler* create_sync_point_handler() override; + + bool replay_requires_remote_image() const override { + return false; + } + + BaseRequest* create_local_image_request( + Threads<ImageCtxT>* threads, + librados::IoCtx& local_io_ctx, + const std::string& global_image_id, + PoolMetaCache* pool_meta_cache, + ProgressContext* progress_ctx, + Context* on_finish) override; + + BaseRequest* create_prepare_replay_request( + const std::string& local_mirror_uuid, + ProgressContext* progress_ctx, + bool* resync_requested, + bool* syncing, + Context* on_finish) override; + + image_replayer::Replayer* create_replayer( + Threads<ImageCtxT>* threads, + InstanceWatcher<ImageCtxT>* instance_watcher, + const std::string& local_mirror_uuid, + PoolMetaCache* pool_meta_cache, + ReplayerListener* replayer_listener) override; + + std::string local_primary_mirror_uuid; + + Journaler* remote_journaler = nullptr; + cls::journal::ClientState remote_client_state = + cls::journal::CLIENT_STATE_CONNECTED; + librbd::journal::MirrorPeerClientMeta remote_client_meta; + + SyncPointHandler<ImageCtxT>* sync_point_handler = nullptr; + +private: + bool is_linked_impl() const override; + + void shut_down_remote_journaler(Context* on_finish); + void handle_shut_down_remote_journaler(int r, Context* on_finish); +}; + +} // namespace journal +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::journal::StateBuilder<librbd::ImageCtx>; + +#endif // CEPH_RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_STATE_BUILDER_H diff --git a/src/tools/rbd_mirror/image_replayer/journal/SyncPointHandler.cc b/src/tools/rbd_mirror/image_replayer/journal/SyncPointHandler.cc new file mode 100644 index 000000000..66d13e555 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/journal/SyncPointHandler.cc @@ -0,0 +1,109 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "SyncPointHandler.h" +#include "StateBuilder.h" +#include "include/ceph_assert.h" +#include "include/Context.h" +#include "common/debug.h" +#include "common/errno.h" +#include "journal/Journaler.h" +#include "librbd/ImageCtx.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::journal::" \ + << "SyncPointHandler: " << this << " " \ + << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace image_replayer { +namespace journal { + +template <typename I> +SyncPointHandler<I>::SyncPointHandler(StateBuilder<I>* state_builder) + : m_state_builder(state_builder), + m_client_meta_copy(state_builder->remote_client_meta) { +} + +template <typename I> +typename SyncPointHandler<I>::SyncPoints +SyncPointHandler<I>::get_sync_points() const { + SyncPoints sync_points; + for (auto& sync_point : m_client_meta_copy.sync_points) { + sync_points.emplace_back( + sync_point.snap_namespace, + sync_point.snap_name, + sync_point.from_snap_name, + sync_point.object_number); + } + return sync_points; +} + +template <typename I> +librbd::SnapSeqs SyncPointHandler<I>::get_snap_seqs() const { + return m_client_meta_copy.snap_seqs; +} + +template <typename I> +void SyncPointHandler<I>::update_sync_points( + const librbd::SnapSeqs& snap_seqs, const SyncPoints& sync_points, + bool sync_complete, Context* on_finish) { + dout(10) << dendl; + + if (sync_complete && sync_points.empty()) { + m_client_meta_copy.state = librbd::journal::MIRROR_PEER_STATE_REPLAYING; + } + + m_client_meta_copy.snap_seqs = snap_seqs; + m_client_meta_copy.sync_points.clear(); + for (auto& sync_point : sync_points) { + m_client_meta_copy.sync_points.emplace_back( + sync_point.snap_namespace, + sync_point.snap_name, + sync_point.from_snap_name, + sync_point.object_number); + + if (sync_point.object_number) { + m_client_meta_copy.sync_object_count = std::max( + m_client_meta_copy.sync_object_count, *sync_point.object_number + 1); + } + } + + dout(20) << "client_meta=" << m_client_meta_copy << dendl; + bufferlist client_data_bl; + librbd::journal::ClientData client_data{m_client_meta_copy}; + encode(client_data, client_data_bl); + + auto ctx = new LambdaContext([this, on_finish](int r) { + handle_update_sync_points(r, on_finish); + }); + m_state_builder->remote_journaler->update_client(client_data_bl, ctx); +} + +template <typename I> +void SyncPointHandler<I>::handle_update_sync_points(int r, Context* on_finish) { + dout(10) << "r=" << r << dendl; + + if (r >= 0) { + m_state_builder->remote_client_meta.snap_seqs = + m_client_meta_copy.snap_seqs; + m_state_builder->remote_client_meta.sync_points = + m_client_meta_copy.sync_points; + } else { + derr << "failed to update remote journal client meta for image " + << m_state_builder->global_image_id << ": " << cpp_strerror(r) + << dendl; + } + + on_finish->complete(r); +} + +} // namespace journal +} // namespace image_sync +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::journal::SyncPointHandler<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/journal/SyncPointHandler.h b/src/tools/rbd_mirror/image_replayer/journal/SyncPointHandler.h new file mode 100644 index 000000000..b4f492c19 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/journal/SyncPointHandler.h @@ -0,0 +1,55 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_SYNC_POINT_HANDLER_H +#define RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_SYNC_POINT_HANDLER_H + +#include "tools/rbd_mirror/image_sync/Types.h" +#include "librbd/journal/Types.h" + +struct Context; +namespace librbd { struct ImageCtx; } + +namespace rbd { +namespace mirror { +namespace image_replayer { +namespace journal { + +template <typename> class StateBuilder; + +template <typename ImageCtxT> +class SyncPointHandler : public image_sync::SyncPointHandler { +public: + using SyncPoint = image_sync::SyncPoint; + using SyncPoints = image_sync::SyncPoints; + + static SyncPointHandler* create(StateBuilder<ImageCtxT>* state_builder) { + return new SyncPointHandler(state_builder); + } + SyncPointHandler(StateBuilder<ImageCtxT>* state_builder); + + SyncPoints get_sync_points() const override; + librbd::SnapSeqs get_snap_seqs() const override; + + void update_sync_points(const librbd::SnapSeqs& snap_seqs, + const SyncPoints& sync_points, + bool sync_complete, + Context* on_finish) override; + +private: + StateBuilder<ImageCtxT>* m_state_builder; + + librbd::journal::MirrorPeerClientMeta m_client_meta_copy; + + void handle_update_sync_points(int r, Context* on_finish); + +}; + +} // namespace journal +} // namespace image_sync +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::journal::SyncPointHandler<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_SYNC_POINT_HANDLER_H diff --git a/src/tools/rbd_mirror/image_replayer/snapshot/ApplyImageStateRequest.cc b/src/tools/rbd_mirror/image_replayer/snapshot/ApplyImageStateRequest.cc new file mode 100644 index 000000000..75881307c --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/snapshot/ApplyImageStateRequest.cc @@ -0,0 +1,658 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "ApplyImageStateRequest.h" +#include "common/debug.h" +#include "common/errno.h" +#include "cls/rbd/cls_rbd_client.h" +#include "librbd/ImageCtx.h" +#include "librbd/Operations.h" +#include "librbd/Utils.h" +#include "librbd/image/GetMetadataRequest.h" +#include "tools/rbd_mirror/image_replayer/snapshot/Utils.h" +#include <boost/algorithm/string/predicate.hpp> + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::snapshot::" \ + << "ApplyImageStateRequest: " << this << " " \ + << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace image_replayer { +namespace snapshot { + +using librbd::util::create_context_callback; +using librbd::util::create_rados_callback; + +template <typename I> +ApplyImageStateRequest<I>::ApplyImageStateRequest( + const std::string& local_mirror_uuid, + const std::string& remote_mirror_uuid, + I* local_image_ctx, + I* remote_image_ctx, + librbd::mirror::snapshot::ImageState image_state, + Context* on_finish) + : m_local_mirror_uuid(local_mirror_uuid), + m_remote_mirror_uuid(remote_mirror_uuid), + m_local_image_ctx(local_image_ctx), + m_remote_image_ctx(remote_image_ctx), + m_image_state(image_state), + m_on_finish(on_finish) { + dout(15) << "image_state=" << m_image_state << dendl; + + std::shared_lock image_locker{m_local_image_ctx->image_lock}; + m_features = m_local_image_ctx->features & ~RBD_FEATURES_IMPLICIT_ENABLE; + compute_local_to_remote_snap_ids(); +} + +template <typename I> +void ApplyImageStateRequest<I>::send() { + rename_image(); +} + +template <typename I> +void ApplyImageStateRequest<I>::rename_image() { + std::shared_lock owner_locker{m_local_image_ctx->owner_lock}; + std::shared_lock image_locker{m_local_image_ctx->image_lock}; + if (m_local_image_ctx->name == m_image_state.name) { + image_locker.unlock(); + owner_locker.unlock(); + + update_features(); + return; + } + image_locker.unlock(); + + dout(15) << "local_image_name=" << m_local_image_ctx->name << ", " + << "remote_image_name=" << m_image_state.name << dendl; + + auto ctx = create_context_callback< + ApplyImageStateRequest<I>, + &ApplyImageStateRequest<I>::handle_rename_image>(this); + m_local_image_ctx->operations->execute_rename(m_image_state.name, ctx); +} + +template <typename I> +void ApplyImageStateRequest<I>::handle_rename_image(int r) { + dout(15) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to rename image to '" << m_image_state.name << "': " + << cpp_strerror(r) << dendl; + finish(r); + return; + } + + update_features(); +} + +template <typename I> +void ApplyImageStateRequest<I>::update_features() { + uint64_t feature_updates = 0UL; + bool enabled = false; + + auto image_state_features = + m_image_state.features & ~RBD_FEATURES_IMPLICIT_ENABLE; + feature_updates = (m_features & ~image_state_features); + if (feature_updates == 0UL) { + feature_updates = (image_state_features & ~m_features); + enabled = (feature_updates != 0UL); + } + + if (feature_updates == 0UL) { + get_image_meta(); + return; + } + + dout(15) << "image_features=" << m_features << ", " + << "state_features=" << image_state_features << ", " + << "feature_updates=" << feature_updates << ", " + << "enabled=" << enabled << dendl; + + if (enabled) { + m_features |= feature_updates; + } else { + m_features &= ~feature_updates; + } + + std::shared_lock owner_lock{m_local_image_ctx->owner_lock}; + auto ctx = create_context_callback< + ApplyImageStateRequest<I>, + &ApplyImageStateRequest<I>::handle_update_features>(this); + m_local_image_ctx->operations->execute_update_features( + feature_updates, enabled, ctx, 0U); +} + +template <typename I> +void ApplyImageStateRequest<I>::handle_update_features(int r) { + dout(15) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to update image features: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + update_features(); +} + +template <typename I> +void ApplyImageStateRequest<I>::get_image_meta() { + dout(15) << dendl; + + auto ctx = create_context_callback< + ApplyImageStateRequest<I>, + &ApplyImageStateRequest<I>::handle_get_image_meta>(this); + auto req = librbd::image::GetMetadataRequest<I>::create( + m_local_image_ctx->md_ctx, m_local_image_ctx->header_oid, true, "", "", 0U, + &m_metadata, ctx); + req->send(); +} + +template <typename I> +void ApplyImageStateRequest<I>::handle_get_image_meta(int r) { + dout(15) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to fetch local image metadata: " << cpp_strerror(r) + << dendl; + finish(r); + return; + } + + update_image_meta(); +} + +template <typename I> +void ApplyImageStateRequest<I>::update_image_meta() { + std::set<std::string> keys_to_remove; + for (const auto& [key, value] : m_metadata) { + if (m_image_state.metadata.count(key) == 0) { + dout(15) << "removing image-meta key '" << key << "'" << dendl; + keys_to_remove.insert(key); + } + } + + std::map<std::string, bufferlist> metadata_to_update; + for (const auto& [key, value] : m_image_state.metadata) { + auto it = m_metadata.find(key); + if (it == m_metadata.end() || !it->second.contents_equal(value)) { + dout(15) << "updating image-meta key '" << key << "'" << dendl; + metadata_to_update.insert({key, value}); + } + } + + if (keys_to_remove.empty() && metadata_to_update.empty()) { + unprotect_snapshot(); + return; + } + + dout(15) << dendl; + + librados::ObjectWriteOperation op; + for (const auto& key : keys_to_remove) { + librbd::cls_client::metadata_remove(&op, key); + } + if (!metadata_to_update.empty()) { + librbd::cls_client::metadata_set(&op, metadata_to_update); + } + + auto aio_comp = create_rados_callback< + ApplyImageStateRequest<I>, + &ApplyImageStateRequest<I>::handle_update_image_meta>(this); + int r = m_local_image_ctx->md_ctx.aio_operate(m_local_image_ctx->header_oid, aio_comp, + &op); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void ApplyImageStateRequest<I>::handle_update_image_meta(int r) { + dout(15) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to update image metadata: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + m_metadata.clear(); + + m_prev_snap_id = CEPH_NOSNAP; + unprotect_snapshot(); +} + +template <typename I> +void ApplyImageStateRequest<I>::unprotect_snapshot() { + std::shared_lock image_locker{m_local_image_ctx->image_lock}; + + auto snap_it = m_local_image_ctx->snap_info.begin(); + if (m_prev_snap_id != CEPH_NOSNAP) { + snap_it = m_local_image_ctx->snap_info.upper_bound(m_prev_snap_id); + } + + for (; snap_it != m_local_image_ctx->snap_info.end(); ++snap_it) { + auto snap_id = snap_it->first; + const auto& snap_info = snap_it->second; + + auto user_ns = std::get_if<cls::rbd::UserSnapshotNamespace>( + &snap_info.snap_namespace); + if (user_ns == nullptr) { + dout(20) << "snapshot " << snap_id << " is not a user snapshot" << dendl; + continue; + } + + if (snap_info.protection_status == RBD_PROTECTION_STATUS_UNPROTECTED) { + dout(20) << "snapshot " << snap_id << " is already unprotected" << dendl; + continue; + } + + auto snap_id_map_it = m_local_to_remote_snap_ids.find(snap_id); + if (snap_id_map_it == m_local_to_remote_snap_ids.end()) { + dout(15) << "snapshot " << snap_id << " does not exist in remote image" + << dendl; + break; + } + + auto remote_snap_id = snap_id_map_it->second; + auto snap_state_it = m_image_state.snapshots.find(remote_snap_id); + if (snap_state_it == m_image_state.snapshots.end()) { + dout(15) << "snapshot " << snap_id << " does not exist in remote image " + << "state" << dendl; + break; + } + + const auto& snap_state = snap_state_it->second; + if (snap_state.protection_status == RBD_PROTECTION_STATUS_UNPROTECTED) { + dout(15) << "snapshot " << snap_id << " is unprotected in remote image" + << dendl; + break; + } + } + + if (snap_it == m_local_image_ctx->snap_info.end()) { + image_locker.unlock(); + + // no local snapshots to unprotect + m_prev_snap_id = CEPH_NOSNAP; + remove_snapshot(); + return; + } + + m_prev_snap_id = snap_it->first; + m_snap_name = snap_it->second.name; + image_locker.unlock(); + + dout(15) << "snap_name=" << m_snap_name << ", " + << "snap_id=" << m_prev_snap_id << dendl; + + std::shared_lock owner_locker{m_local_image_ctx->owner_lock}; + auto ctx = create_context_callback< + ApplyImageStateRequest<I>, + &ApplyImageStateRequest<I>::handle_unprotect_snapshot>(this); + m_local_image_ctx->operations->execute_snap_unprotect( + cls::rbd::UserSnapshotNamespace{}, m_snap_name.c_str(), ctx); +} + +template <typename I> +void ApplyImageStateRequest<I>::handle_unprotect_snapshot(int r) { + dout(15) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to unprotect snapshot " << m_snap_name << ": " + << cpp_strerror(r) << dendl; + finish(r); + return; + } + + unprotect_snapshot(); +} + +template <typename I> +void ApplyImageStateRequest<I>::remove_snapshot() { + std::shared_lock image_locker{m_local_image_ctx->image_lock}; + + auto snap_it = m_local_image_ctx->snap_info.begin(); + if (m_prev_snap_id != CEPH_NOSNAP) { + snap_it = m_local_image_ctx->snap_info.upper_bound(m_prev_snap_id); + } + + for (; snap_it != m_local_image_ctx->snap_info.end(); ++snap_it) { + auto snap_id = snap_it->first; + const auto& snap_info = snap_it->second; + + auto user_ns = std::get_if<cls::rbd::UserSnapshotNamespace>( + &snap_info.snap_namespace); + if (user_ns == nullptr) { + dout(20) << "snapshot " << snap_id << " is not a user snapshot" << dendl; + continue; + } + + auto snap_id_map_it = m_local_to_remote_snap_ids.find(snap_id); + if (snap_id_map_it == m_local_to_remote_snap_ids.end()) { + dout(15) << "snapshot " << snap_id << " does not exist in remote image" + << dendl; + break; + } + + auto remote_snap_id = snap_id_map_it->second; + auto snap_state_it = m_image_state.snapshots.find(remote_snap_id); + if (snap_state_it == m_image_state.snapshots.end()) { + dout(15) << "snapshot " << snap_id << " does not exist in remote image " + << "state" << dendl; + break; + } + } + + if (snap_it == m_local_image_ctx->snap_info.end()) { + image_locker.unlock(); + + // no local snapshots to remove + m_prev_snap_id = CEPH_NOSNAP; + protect_snapshot(); + return; + } + + m_prev_snap_id = snap_it->first; + m_snap_name = snap_it->second.name; + image_locker.unlock(); + + dout(15) << "snap_name=" << m_snap_name << ", " + << "snap_id=" << m_prev_snap_id << dendl; + + std::shared_lock owner_locker{m_local_image_ctx->owner_lock}; + auto ctx = create_context_callback< + ApplyImageStateRequest<I>, + &ApplyImageStateRequest<I>::handle_remove_snapshot>(this); + m_local_image_ctx->operations->execute_snap_remove( + cls::rbd::UserSnapshotNamespace{}, m_snap_name.c_str(), ctx); +} + +template <typename I> +void ApplyImageStateRequest<I>::handle_remove_snapshot(int r) { + dout(15) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to remove snapshot " << m_snap_name << ": " + << cpp_strerror(r) << dendl; + finish(r); + return; + } + + remove_snapshot(); +} + +template <typename I> +void ApplyImageStateRequest<I>::protect_snapshot() { + std::shared_lock image_locker{m_local_image_ctx->image_lock}; + + auto snap_it = m_local_image_ctx->snap_info.begin(); + if (m_prev_snap_id != CEPH_NOSNAP) { + snap_it = m_local_image_ctx->snap_info.upper_bound(m_prev_snap_id); + } + + for (; snap_it != m_local_image_ctx->snap_info.end(); ++snap_it) { + auto snap_id = snap_it->first; + const auto& snap_info = snap_it->second; + + auto user_ns = std::get_if<cls::rbd::UserSnapshotNamespace>( + &snap_info.snap_namespace); + if (user_ns == nullptr) { + dout(20) << "snapshot " << snap_id << " is not a user snapshot" << dendl; + continue; + } + + if (snap_info.protection_status == RBD_PROTECTION_STATUS_PROTECTED) { + dout(20) << "snapshot " << snap_id << " is already protected" << dendl; + continue; + } + + auto snap_id_map_it = m_local_to_remote_snap_ids.find(snap_id); + if (snap_id_map_it == m_local_to_remote_snap_ids.end()) { + dout(15) << "snapshot " << snap_id << " does not exist in remote image" + << dendl; + continue; + } + + auto remote_snap_id = snap_id_map_it->second; + auto snap_state_it = m_image_state.snapshots.find(remote_snap_id); + if (snap_state_it == m_image_state.snapshots.end()) { + dout(15) << "snapshot " << snap_id << " does not exist in remote image " + << "state" << dendl; + continue; + } + + const auto& snap_state = snap_state_it->second; + if (snap_state.protection_status == RBD_PROTECTION_STATUS_PROTECTED) { + dout(15) << "snapshot " << snap_id << " is protected in remote image" + << dendl; + break; + } + } + + if (snap_it == m_local_image_ctx->snap_info.end()) { + image_locker.unlock(); + + // no local snapshots to protect + m_prev_snap_id = CEPH_NOSNAP; + rename_snapshot(); + return; + } + + m_prev_snap_id = snap_it->first; + m_snap_name = snap_it->second.name; + image_locker.unlock(); + + dout(15) << "snap_name=" << m_snap_name << ", " + << "snap_id=" << m_prev_snap_id << dendl; + + std::shared_lock owner_locker{m_local_image_ctx->owner_lock}; + auto ctx = create_context_callback< + ApplyImageStateRequest<I>, + &ApplyImageStateRequest<I>::handle_protect_snapshot>(this); + m_local_image_ctx->operations->execute_snap_protect( + cls::rbd::UserSnapshotNamespace{}, m_snap_name.c_str(), ctx); +} + +template <typename I> +void ApplyImageStateRequest<I>::handle_protect_snapshot(int r) { + dout(15) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to protect snapshot " << m_snap_name << ": " + << cpp_strerror(r) << dendl; + finish(r); + return; + } + + protect_snapshot(); +} + +template <typename I> +void ApplyImageStateRequest<I>::rename_snapshot() { + std::shared_lock image_locker{m_local_image_ctx->image_lock}; + + auto snap_it = m_local_image_ctx->snap_info.begin(); + if (m_prev_snap_id != CEPH_NOSNAP) { + snap_it = m_local_image_ctx->snap_info.upper_bound(m_prev_snap_id); + } + + for (; snap_it != m_local_image_ctx->snap_info.end(); ++snap_it) { + auto snap_id = snap_it->first; + const auto& snap_info = snap_it->second; + + auto user_ns = std::get_if<cls::rbd::UserSnapshotNamespace>( + &snap_info.snap_namespace); + if (user_ns == nullptr) { + dout(20) << "snapshot " << snap_id << " is not a user snapshot" << dendl; + continue; + } + + auto snap_id_map_it = m_local_to_remote_snap_ids.find(snap_id); + if (snap_id_map_it == m_local_to_remote_snap_ids.end()) { + dout(15) << "snapshot " << snap_id << " does not exist in remote image" + << dendl; + continue; + } + + auto remote_snap_id = snap_id_map_it->second; + auto snap_state_it = m_image_state.snapshots.find(remote_snap_id); + if (snap_state_it == m_image_state.snapshots.end()) { + dout(15) << "snapshot " << snap_id << " does not exist in remote image " + << "state" << dendl; + continue; + } + + const auto& snap_state = snap_state_it->second; + if (snap_info.name != snap_state.name) { + dout(15) << "snapshot " << snap_id << " has been renamed from '" + << snap_info.name << "' to '" << snap_state.name << "'" + << dendl; + m_snap_name = snap_state.name; + break; + } + } + + if (snap_it == m_local_image_ctx->snap_info.end()) { + image_locker.unlock(); + + // no local snapshots to protect + m_prev_snap_id = CEPH_NOSNAP; + set_snapshot_limit(); + return; + } + + m_prev_snap_id = snap_it->first; + image_locker.unlock(); + + dout(15) << "snap_name=" << m_snap_name << ", " + << "snap_id=" << m_prev_snap_id << dendl; + + std::shared_lock owner_locker{m_local_image_ctx->owner_lock}; + auto ctx = create_context_callback< + ApplyImageStateRequest<I>, + &ApplyImageStateRequest<I>::handle_rename_snapshot>(this); + m_local_image_ctx->operations->execute_snap_rename( + m_prev_snap_id, m_snap_name.c_str(), ctx); +} + +template <typename I> +void ApplyImageStateRequest<I>::handle_rename_snapshot(int r) { + dout(15) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to protect snapshot " << m_snap_name << ": " + << cpp_strerror(r) << dendl; + finish(r); + return; + } + + rename_snapshot(); +} + +template <typename I> +void ApplyImageStateRequest<I>::set_snapshot_limit() { + dout(15) << "snap_limit=" << m_image_state.snap_limit << dendl; + + // no need to even check the current limit -- just set it + std::shared_lock owner_locker{m_local_image_ctx->owner_lock}; + auto ctx = create_context_callback< + ApplyImageStateRequest<I>, + &ApplyImageStateRequest<I>::handle_set_snapshot_limit>(this); + m_local_image_ctx->operations->execute_snap_set_limit( + m_image_state.snap_limit, ctx); +} + +template <typename I> +void ApplyImageStateRequest<I>::handle_set_snapshot_limit(int r) { + dout(15) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to update snapshot limit: " << cpp_strerror(r) + << dendl; + } + + finish(r); +} + +template <typename I> +void ApplyImageStateRequest<I>::finish(int r) { + dout(15) << "r=" << r << dendl; + + m_on_finish->complete(r); + delete this; +} + +template <typename I> +uint64_t ApplyImageStateRequest<I>::compute_remote_snap_id( + uint64_t local_snap_id) { + ceph_assert(ceph_mutex_is_locked(m_local_image_ctx->image_lock)); + ceph_assert(ceph_mutex_is_locked(m_remote_image_ctx->image_lock)); + + // Search our local non-primary snapshots for a mapping to the remote + // snapshot. The non-primary mirror snapshot with the mappings will always + // come at or after the snapshot we are searching against + auto remote_snap_id = util::compute_remote_snap_id( + m_local_image_ctx->image_lock, m_local_image_ctx->snap_info, + local_snap_id, m_remote_mirror_uuid); + if (remote_snap_id != CEPH_NOSNAP) { + return remote_snap_id; + } + + // if we failed to find a match to a remote snapshot in our local non-primary + // snapshots, check the remote image for non-primary snapshot mappings back + // to our snapshot + for (auto snap_it = m_remote_image_ctx->snap_info.begin(); + snap_it != m_remote_image_ctx->snap_info.end(); ++snap_it) { + auto snap_id = snap_it->first; + auto mirror_ns = std::get_if<cls::rbd::MirrorSnapshotNamespace>( + &snap_it->second.snap_namespace); + if (mirror_ns == nullptr || !mirror_ns->is_non_primary()) { + continue; + } + + if (mirror_ns->primary_mirror_uuid != m_local_mirror_uuid) { + dout(20) << "remote snapshot " << snap_id << " not tied to local" + << dendl; + continue; + } else if (mirror_ns->primary_snap_id == local_snap_id) { + dout(15) << "local snapshot " << local_snap_id << " maps to " + << "remote snapshot " << snap_id << dendl; + return snap_id; + } + + const auto& snap_seqs = mirror_ns->snap_seqs; + for (auto [local_snap_id_seq, remote_snap_id_seq] : snap_seqs) { + if (local_snap_id_seq == local_snap_id) { + dout(15) << "local snapshot " << local_snap_id << " maps to " + << "remote snapshot " << remote_snap_id_seq << dendl; + return remote_snap_id_seq; + } + } + } + + return CEPH_NOSNAP; +} + +template <typename I> +void ApplyImageStateRequest<I>::compute_local_to_remote_snap_ids() { + ceph_assert(ceph_mutex_is_locked(m_local_image_ctx->image_lock)); + std::shared_lock remote_image_locker{m_remote_image_ctx->image_lock}; + + for (const auto& [snap_id, snap_info] : m_local_image_ctx->snap_info) { + m_local_to_remote_snap_ids[snap_id] = compute_remote_snap_id(snap_id); + } + + dout(15) << "local_to_remote_snap_ids=" << m_local_to_remote_snap_ids + << dendl; +} + +} // namespace snapshot +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::snapshot::ApplyImageStateRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/snapshot/ApplyImageStateRequest.h b/src/tools/rbd_mirror/image_replayer/snapshot/ApplyImageStateRequest.h new file mode 100644 index 000000000..0e2d09ddf --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/snapshot/ApplyImageStateRequest.h @@ -0,0 +1,155 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_APPLY_IMAGE_STATE_REQUEST_H +#define RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_APPLY_IMAGE_STATE_REQUEST_H + +#include "common/ceph_mutex.h" +#include "librbd/mirror/snapshot/Types.h" +#include <map> +#include <string> + +struct Context; + +namespace librbd { + +struct ImageCtx; + +} // namespace librbd + +namespace rbd { +namespace mirror { +namespace image_replayer { +namespace snapshot { + +template <typename> class EventPreprocessor; +template <typename> class ReplayStatusFormatter; +template <typename> class StateBuilder; + +template <typename ImageCtxT> +class ApplyImageStateRequest { +public: + static ApplyImageStateRequest* create( + const std::string& local_mirror_uuid, + const std::string& remote_mirror_uuid, + ImageCtxT* local_image_ctx, + ImageCtxT* remote_image_ctx, + librbd::mirror::snapshot::ImageState image_state, + Context* on_finish) { + return new ApplyImageStateRequest(local_mirror_uuid, remote_mirror_uuid, + local_image_ctx, remote_image_ctx, + image_state, on_finish); + } + + ApplyImageStateRequest( + const std::string& local_mirror_uuid, + const std::string& remote_mirror_uuid, + ImageCtxT* local_image_ctx, + ImageCtxT* remote_image_ctx, + librbd::mirror::snapshot::ImageState image_state, + Context* on_finish); + + void send(); + +private: + /** + * @verbatim + * + * <start> + * | + * v + * RENAME_IMAGE + * | + * | /---------\ + * | | | + * v v | + * UPDATE_FEATURES -----/ + * | + * v + * GET_IMAGE_META + * | + * | /---------\ + * | | | + * v v | + * UPDATE_IMAGE_META ---/ + * | + * | /---------\ + * | | | + * v v | + * UNPROTECT_SNAPSHOT | + * | | + * v | + * REMOVE_SNAPSHOT | + * | | + * v | + * PROTECT_SNAPSHOT | + * | | + * v | + * RENAME_SNAPSHOT -----/ + * | + * v + * SET_SNAPSHOT_LIMIT + * | + * v + * <finish> + * + * @endverbatim + */ + + std::string m_local_mirror_uuid; + std::string m_remote_mirror_uuid; + ImageCtxT* m_local_image_ctx; + ImageCtxT* m_remote_image_ctx; + librbd::mirror::snapshot::ImageState m_image_state; + Context* m_on_finish; + + std::map<uint64_t, uint64_t> m_local_to_remote_snap_ids; + + uint64_t m_features = 0; + + std::map<std::string, bufferlist> m_metadata; + + uint64_t m_prev_snap_id = 0; + std::string m_snap_name; + + void rename_image(); + void handle_rename_image(int r); + + void update_features(); + void handle_update_features(int r); + + void get_image_meta(); + void handle_get_image_meta(int r); + + void update_image_meta(); + void handle_update_image_meta(int r); + + void unprotect_snapshot(); + void handle_unprotect_snapshot(int r); + + void remove_snapshot(); + void handle_remove_snapshot(int r); + + void protect_snapshot(); + void handle_protect_snapshot(int r); + + void rename_snapshot(); + void handle_rename_snapshot(int r); + + void set_snapshot_limit(); + void handle_set_snapshot_limit(int r); + + void finish(int r); + + uint64_t compute_remote_snap_id(uint64_t snap_id); + void compute_local_to_remote_snap_ids(); +}; + +} // namespace snapshot +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::snapshot::ApplyImageStateRequest<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_APPLY_IMAGE_STATE_REQUEST_H diff --git a/src/tools/rbd_mirror/image_replayer/snapshot/CreateLocalImageRequest.cc b/src/tools/rbd_mirror/image_replayer/snapshot/CreateLocalImageRequest.cc new file mode 100644 index 000000000..c923395c9 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/snapshot/CreateLocalImageRequest.cc @@ -0,0 +1,204 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "CreateLocalImageRequest.h" +#include "include/rados/librados.hpp" +#include "common/debug.h" +#include "common/dout.h" +#include "common/errno.h" +#include "cls/rbd/cls_rbd_client.h" +#include "cls/rbd/cls_rbd_types.h" +#include "librbd/ImageCtx.h" +#include "librbd/Utils.h" +#include "tools/rbd_mirror/ProgressContext.h" +#include "tools/rbd_mirror/image_replayer/CreateImageRequest.h" +#include "tools/rbd_mirror/image_replayer/snapshot/StateBuilder.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::snapshot::" \ + << "CreateLocalImageRequest: " << this << " " \ + << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace image_replayer { +namespace snapshot { + +using librbd::util::create_context_callback; +using librbd::util::create_rados_callback; + +template <typename I> +void CreateLocalImageRequest<I>::send() { + disable_mirror_image(); +} + +template <typename I> +void CreateLocalImageRequest<I>::disable_mirror_image() { + if (m_state_builder->local_image_id.empty()) { + add_mirror_image(); + return; + } + + dout(10) << dendl; + update_progress("DISABLE_MIRROR_IMAGE"); + + // need to send 'disabling' since the cls methods will fail if we aren't + // in that state + cls::rbd::MirrorImage mirror_image{ + cls::rbd::MIRROR_IMAGE_MODE_SNAPSHOT, m_global_image_id, + cls::rbd::MIRROR_IMAGE_STATE_DISABLING}; + librados::ObjectWriteOperation op; + librbd::cls_client::mirror_image_set(&op, m_state_builder->local_image_id, + mirror_image); + + auto aio_comp = create_rados_callback< + CreateLocalImageRequest<I>, + &CreateLocalImageRequest<I>::handle_disable_mirror_image>(this); + int r = m_local_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void CreateLocalImageRequest<I>::handle_disable_mirror_image(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to disable mirror image " << m_global_image_id << ": " + << cpp_strerror(r) << dendl; + finish(r); + return; + } + + remove_mirror_image(); +} + +template <typename I> +void CreateLocalImageRequest<I>::remove_mirror_image() { + dout(10) << dendl; + update_progress("REMOVE_MIRROR_IMAGE"); + + librados::ObjectWriteOperation op; + librbd::cls_client::mirror_image_remove(&op, m_state_builder->local_image_id); + + auto aio_comp = create_rados_callback< + CreateLocalImageRequest<I>, + &CreateLocalImageRequest<I>::handle_remove_mirror_image>(this); + int r = m_local_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void CreateLocalImageRequest<I>::handle_remove_mirror_image(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to remove mirror image " << m_global_image_id << ": " + << cpp_strerror(r) << dendl; + finish(r); + return; + } + + m_state_builder->local_image_id = ""; + add_mirror_image(); +} + +template <typename I> +void CreateLocalImageRequest<I>::add_mirror_image() { + ceph_assert(m_state_builder->local_image_id.empty()); + m_state_builder->local_image_id = + librbd::util::generate_image_id<I>(m_local_io_ctx); + + dout(10) << "local_image_id=" << m_state_builder->local_image_id << dendl; + update_progress("ADD_MIRROR_IMAGE"); + + // use 'creating' to track a partially constructed image. it will + // be switched to 'enabled' once the image is fully created + cls::rbd::MirrorImage mirror_image{ + cls::rbd::MIRROR_IMAGE_MODE_SNAPSHOT, m_global_image_id, + cls::rbd::MIRROR_IMAGE_STATE_CREATING}; + librados::ObjectWriteOperation op; + librbd::cls_client::mirror_image_set(&op, m_state_builder->local_image_id, + mirror_image); + + auto aio_comp = create_rados_callback< + CreateLocalImageRequest<I>, + &CreateLocalImageRequest<I>::handle_add_mirror_image>(this); + int r = m_local_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void CreateLocalImageRequest<I>::handle_add_mirror_image(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to register mirror image " << m_global_image_id << ": " + << cpp_strerror(r) << dendl; + this->finish(r); + return; + } + + create_local_image(); +} + +template <typename I> +void CreateLocalImageRequest<I>::create_local_image() { + dout(10) << "local_image_id=" << m_state_builder->local_image_id << dendl; + update_progress("CREATE_LOCAL_IMAGE"); + + m_remote_image_ctx->image_lock.lock_shared(); + std::string image_name = m_remote_image_ctx->name; + m_remote_image_ctx->image_lock.unlock_shared(); + + auto ctx = create_context_callback< + CreateLocalImageRequest<I>, + &CreateLocalImageRequest<I>::handle_create_local_image>(this); + auto request = CreateImageRequest<I>::create( + m_threads, m_local_io_ctx, m_global_image_id, + m_state_builder->remote_mirror_uuid, image_name, + m_state_builder->local_image_id, m_remote_image_ctx, + m_pool_meta_cache, cls::rbd::MIRROR_IMAGE_MODE_SNAPSHOT, ctx); + request->send(); +} +template <typename I> +void CreateLocalImageRequest<I>::handle_create_local_image(int r) { + dout(10) << "r=" << r << dendl; + + if (r == -EBADF) { + dout(5) << "image id " << m_state_builder->local_image_id << " " + << "already in-use" << dendl; + disable_mirror_image(); + return; + } else if (r < 0) { + if (r == -ENOENT) { + dout(10) << "parent image does not exist" << dendl; + } else { + derr << "failed to create local image: " << cpp_strerror(r) << dendl; + } + finish(r); + return; + } + + finish(0); +} + +template <typename I> +void CreateLocalImageRequest<I>::update_progress( + const std::string& description) { + dout(15) << description << dendl; + if (m_progress_ctx != nullptr) { + m_progress_ctx->update_progress(description); + } +} + +} // namespace snapshot +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::snapshot::CreateLocalImageRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/snapshot/CreateLocalImageRequest.h b/src/tools/rbd_mirror/image_replayer/snapshot/CreateLocalImageRequest.h new file mode 100644 index 000000000..3345154b4 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/snapshot/CreateLocalImageRequest.h @@ -0,0 +1,121 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_CREATE_LOCAL_IMAGE_REQUEST_H +#define RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_CREATE_LOCAL_IMAGE_REQUEST_H + +#include "include/rados/librados_fwd.hpp" +#include "tools/rbd_mirror/BaseRequest.h" +#include <string> + +struct Context; +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { + +class PoolMetaCache; +class ProgressContext; +template <typename> struct Threads; + +namespace image_replayer { +namespace snapshot { + +template <typename> class StateBuilder; + +template <typename ImageCtxT> +class CreateLocalImageRequest : public BaseRequest { +public: + typedef rbd::mirror::ProgressContext ProgressContext; + + static CreateLocalImageRequest* create( + Threads<ImageCtxT>* threads, + librados::IoCtx& local_io_ctx, + ImageCtxT* remote_image_ctx, + const std::string& global_image_id, + PoolMetaCache* pool_meta_cache, + ProgressContext* progress_ctx, + StateBuilder<ImageCtxT>* state_builder, + Context* on_finish) { + return new CreateLocalImageRequest(threads, local_io_ctx, remote_image_ctx, + global_image_id, pool_meta_cache, + progress_ctx, state_builder, on_finish); + } + + CreateLocalImageRequest( + Threads<ImageCtxT>* threads, + librados::IoCtx& local_io_ctx, + ImageCtxT* remote_image_ctx, + const std::string& global_image_id, + PoolMetaCache* pool_meta_cache, + ProgressContext* progress_ctx, + StateBuilder<ImageCtxT>* state_builder, + Context* on_finish) + : BaseRequest(on_finish), + m_threads(threads), + m_local_io_ctx(local_io_ctx), + m_remote_image_ctx(remote_image_ctx), + m_global_image_id(global_image_id), + m_pool_meta_cache(pool_meta_cache), + m_progress_ctx(progress_ctx), + m_state_builder(state_builder) { + } + + void send(); + +private: + /** + * @verbatim + * + * <start> + * | + * v + * DISABLE_MIRROR_IMAGE < * * * * * * + * | * + * v * + * REMOVE_MIRROR_IMAGE * + * | * + * v * + * ADD_MIRROR_IMAGE * + * | * + * v (id exists) * + * CREATE_LOCAL_IMAGE * * * * * * * * + * | + * v + * <finish> + * + * @endverbatim + */ + + Threads<ImageCtxT>* m_threads; + librados::IoCtx& m_local_io_ctx; + ImageCtxT* m_remote_image_ctx; + std::string m_global_image_id; + PoolMetaCache* m_pool_meta_cache; + ProgressContext* m_progress_ctx; + StateBuilder<ImageCtxT>* m_state_builder; + + void disable_mirror_image(); + void handle_disable_mirror_image(int r); + + void remove_mirror_image(); + void handle_remove_mirror_image(int r); + + void add_mirror_image(); + void handle_add_mirror_image(int r); + + void create_local_image(); + void handle_create_local_image(int r); + + void update_progress(const std::string& description); + +}; + +} // namespace snapshot +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::snapshot::CreateLocalImageRequest<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_CREATE_LOCAL_IMAGE_REQUEST_H diff --git a/src/tools/rbd_mirror/image_replayer/snapshot/PrepareReplayRequest.cc b/src/tools/rbd_mirror/image_replayer/snapshot/PrepareReplayRequest.cc new file mode 100644 index 000000000..575eb8534 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/snapshot/PrepareReplayRequest.cc @@ -0,0 +1,70 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "PrepareReplayRequest.h" +#include "common/debug.h" +#include "common/dout.h" +#include "common/errno.h" +#include "librbd/ImageCtx.h" +#include "librbd/Utils.h" +#include "librbd/mirror/snapshot/ImageMeta.h" +#include "tools/rbd_mirror/ProgressContext.h" +#include "tools/rbd_mirror/image_replayer/snapshot/StateBuilder.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::snapshot::" \ + << "PrepareReplayRequest: " << this << " " \ + << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace image_replayer { +namespace snapshot { + +using librbd::util::create_context_callback; + +template <typename I> +void PrepareReplayRequest<I>::send() { + *m_resync_requested = false; + *m_syncing = false; + + load_local_image_meta(); +} + +template <typename I> +void PrepareReplayRequest<I>::load_local_image_meta() { + dout(15) << dendl; + + ceph_assert(m_state_builder->local_image_meta == nullptr); + m_state_builder->local_image_meta = + librbd::mirror::snapshot::ImageMeta<I>::create( + m_state_builder->local_image_ctx, m_local_mirror_uuid); + + auto ctx = create_context_callback< + PrepareReplayRequest<I>, + &PrepareReplayRequest<I>::handle_load_local_image_meta>(this); + m_state_builder->local_image_meta->load(ctx); +} + +template <typename I> +void PrepareReplayRequest<I>::handle_load_local_image_meta(int r) { + dout(15) << "r=" << r << dendl; + + if (r < 0 && r != -ENOENT) { + derr << "failed to load local image-meta: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + *m_resync_requested = m_state_builder->local_image_meta->resync_requested; + finish(0); +} + +} // namespace snapshot +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::snapshot::PrepareReplayRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/snapshot/PrepareReplayRequest.h b/src/tools/rbd_mirror/image_replayer/snapshot/PrepareReplayRequest.h new file mode 100644 index 000000000..4e9246acd --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/snapshot/PrepareReplayRequest.h @@ -0,0 +1,92 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_PREPARE_REPLAY_REQUEST_H +#define RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_PREPARE_REPLAY_REQUEST_H + +#include "include/int_types.h" +#include "librbd/mirror/Types.h" +#include "tools/rbd_mirror/BaseRequest.h" +#include <list> +#include <string> + +struct Context; +namespace librbd { struct ImageCtx; } + +namespace rbd { +namespace mirror { + +class ProgressContext; + +namespace image_replayer { +namespace snapshot { + +template <typename> class StateBuilder; + +template <typename ImageCtxT> +class PrepareReplayRequest : public BaseRequest { +public: + static PrepareReplayRequest* create( + const std::string& local_mirror_uuid, + ProgressContext* progress_ctx, + StateBuilder<ImageCtxT>* state_builder, + bool* resync_requested, + bool* syncing, + Context* on_finish) { + return new PrepareReplayRequest( + local_mirror_uuid, progress_ctx, state_builder, resync_requested, + syncing, on_finish); + } + + PrepareReplayRequest( + const std::string& local_mirror_uuid, + ProgressContext* progress_ctx, + StateBuilder<ImageCtxT>* state_builder, + bool* resync_requested, + bool* syncing, + Context* on_finish) + : BaseRequest(on_finish), + m_local_mirror_uuid(local_mirror_uuid), + m_progress_ctx(progress_ctx), + m_state_builder(state_builder), + m_resync_requested(resync_requested), + m_syncing(syncing) { + } + + void send() override; + +private: + // TODO + /** + * @verbatim + * + * <start> + * | + * v + * LOAD_LOCAL_IMAGE_META + * | + * v + * <finish> + * + * @endverbatim + */ + + std::string m_local_mirror_uuid; + ProgressContext* m_progress_ctx; + StateBuilder<ImageCtxT>* m_state_builder; + bool* m_resync_requested; + bool* m_syncing; + + void load_local_image_meta(); + void handle_load_local_image_meta(int r); + +}; + +} // namespace snapshot +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::snapshot::PrepareReplayRequest<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_REPLAYER_JOURNAL_PREPARE_REPLAY_REQUEST_H diff --git a/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.cc b/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.cc new file mode 100644 index 000000000..67eaa9777 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.cc @@ -0,0 +1,1633 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "Replayer.h" +#include "common/debug.h" +#include "common/errno.h" +#include "common/perf_counters.h" +#include "common/perf_counters_key.h" +#include "include/stringify.h" +#include "common/Timer.h" +#include "cls/rbd/cls_rbd_client.h" +#include "json_spirit/json_spirit.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/Operations.h" +#include "librbd/Utils.h" +#include "librbd/asio/ContextWQ.h" +#include "librbd/deep_copy/Handler.h" +#include "librbd/deep_copy/ImageCopyRequest.h" +#include "librbd/deep_copy/SnapshotCopyRequest.h" +#include "librbd/mirror/ImageStateUpdateRequest.h" +#include "librbd/mirror/snapshot/CreateNonPrimaryRequest.h" +#include "librbd/mirror/snapshot/GetImageStateRequest.h" +#include "librbd/mirror/snapshot/ImageMeta.h" +#include "librbd/mirror/snapshot/UnlinkPeerRequest.h" +#include "tools/rbd_mirror/InstanceWatcher.h" +#include "tools/rbd_mirror/PoolMetaCache.h" +#include "tools/rbd_mirror/Threads.h" +#include "tools/rbd_mirror/Types.h" +#include "tools/rbd_mirror/image_replayer/CloseImageRequest.h" +#include "tools/rbd_mirror/image_replayer/ReplayerListener.h" +#include "tools/rbd_mirror/image_replayer/Utils.h" +#include "tools/rbd_mirror/image_replayer/snapshot/ApplyImageStateRequest.h" +#include "tools/rbd_mirror/image_replayer/snapshot/StateBuilder.h" +#include "tools/rbd_mirror/image_replayer/snapshot/Utils.h" +#include <set> + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::snapshot::" \ + << "Replayer: " << this << " " << __func__ << ": " + +extern PerfCounters *g_snapshot_perf_counters; + +namespace rbd { +namespace mirror { +namespace image_replayer { +namespace snapshot { + +namespace { + +double round_to_two_places(double value) { + return abs(round(value * 100) / 100); +} + +template<typename I> +std::pair<uint64_t, librbd::SnapInfo*> get_newest_mirror_snapshot( + I* image_ctx) { + for (auto snap_info_it = image_ctx->snap_info.rbegin(); + snap_info_it != image_ctx->snap_info.rend(); ++snap_info_it) { + const auto& snap_ns = snap_info_it->second.snap_namespace; + auto mirror_ns = std::get_if< + cls::rbd::MirrorSnapshotNamespace>(&snap_ns); + if (mirror_ns == nullptr || !mirror_ns->complete) { + continue; + } + + return {snap_info_it->first, &snap_info_it->second}; + } + + return {CEPH_NOSNAP, nullptr}; +} + +} // anonymous namespace + +using librbd::util::create_async_context_callback; +using librbd::util::create_context_callback; +using librbd::util::create_rados_callback; + +template <typename I> +struct Replayer<I>::C_UpdateWatchCtx : public librbd::UpdateWatchCtx { + Replayer<I>* replayer; + + C_UpdateWatchCtx(Replayer<I>* replayer) : replayer(replayer) { + } + + void handle_notify() override { + replayer->handle_image_update_notify(); + } +}; + +template <typename I> +struct Replayer<I>::DeepCopyHandler : public librbd::deep_copy::Handler { + Replayer *replayer; + + DeepCopyHandler(Replayer* replayer) : replayer(replayer) { + } + + void handle_read(uint64_t bytes_read) override { + replayer->handle_copy_image_read(bytes_read); + } + + int update_progress(uint64_t object_number, uint64_t object_count) override { + replayer->handle_copy_image_progress(object_number, object_count); + return 0; + } +}; + +template <typename I> +Replayer<I>::Replayer( + Threads<I>* threads, + InstanceWatcher<I>* instance_watcher, + const std::string& local_mirror_uuid, + PoolMetaCache* pool_meta_cache, + StateBuilder<I>* state_builder, + ReplayerListener* replayer_listener) + : m_threads(threads), + m_instance_watcher(instance_watcher), + m_local_mirror_uuid(local_mirror_uuid), + m_pool_meta_cache(pool_meta_cache), + m_state_builder(state_builder), + m_replayer_listener(replayer_listener), + m_lock(ceph::make_mutex(librbd::util::unique_lock_name( + "rbd::mirror::image_replayer::snapshot::Replayer", this))) { + dout(10) << dendl; +} + +template <typename I> +Replayer<I>::~Replayer() { + dout(10) << dendl; + + { + std::unique_lock locker{m_lock}; + unregister_perf_counters(); + } + + ceph_assert(m_state == STATE_COMPLETE); + ceph_assert(m_update_watch_ctx == nullptr); + ceph_assert(m_deep_copy_handler == nullptr); +} + +template <typename I> +void Replayer<I>::init(Context* on_finish) { + dout(10) << dendl; + + ceph_assert(m_state == STATE_INIT); + + RemotePoolMeta remote_pool_meta; + int r = m_pool_meta_cache->get_remote_pool_meta( + m_state_builder->remote_image_ctx->md_ctx.get_id(), &remote_pool_meta); + if (r < 0 || remote_pool_meta.mirror_peer_uuid.empty()) { + derr << "failed to retrieve mirror peer uuid from remote pool" << dendl; + m_state = STATE_COMPLETE; + m_threads->work_queue->queue(on_finish, r); + return; + } + + m_remote_mirror_peer_uuid = remote_pool_meta.mirror_peer_uuid; + dout(10) << "remote_mirror_peer_uuid=" << m_remote_mirror_peer_uuid << dendl; + + { + auto local_image_ctx = m_state_builder->local_image_ctx; + std::shared_lock image_locker{local_image_ctx->image_lock}; + m_image_spec = image_replayer::util::compute_image_spec( + local_image_ctx->md_ctx, local_image_ctx->name); + } + + { + std::unique_lock locker{m_lock}; + register_perf_counters(); + } + + ceph_assert(m_on_init_shutdown == nullptr); + m_on_init_shutdown = on_finish; + + register_local_update_watcher(); +} + +template <typename I> +void Replayer<I>::shut_down(Context* on_finish) { + dout(10) << dendl; + + std::unique_lock locker{m_lock}; + ceph_assert(m_on_init_shutdown == nullptr); + m_on_init_shutdown = on_finish; + m_error_code = 0; + m_error_description = ""; + + ceph_assert(m_state != STATE_INIT); + auto state = STATE_COMPLETE; + std::swap(m_state, state); + + if (state == STATE_REPLAYING) { + // if a sync request was pending, request a cancelation + m_instance_watcher->cancel_sync_request( + m_state_builder->local_image_ctx->id); + + // TODO interrupt snapshot copy and image copy state machines even if remote + // cluster is unreachable + dout(10) << "shut down pending on completion of snapshot replay" << dendl; + return; + } + locker.unlock(); + + unregister_remote_update_watcher(); +} + +template <typename I> +void Replayer<I>::flush(Context* on_finish) { + dout(10) << dendl; + + // TODO + m_threads->work_queue->queue(on_finish, 0); +} + +template <typename I> +bool Replayer<I>::get_replay_status(std::string* description, + Context* on_finish) { + dout(10) << dendl; + + std::unique_lock locker{m_lock}; + if (m_state != STATE_REPLAYING && m_state != STATE_IDLE) { + locker.unlock(); + + derr << "replay not running" << dendl; + on_finish->complete(-EAGAIN); + return false; + } + + std::shared_lock local_image_locker{ + m_state_builder->local_image_ctx->image_lock}; + auto [local_snap_id, local_snap_info] = get_newest_mirror_snapshot( + m_state_builder->local_image_ctx); + + std::shared_lock remote_image_locker{ + m_state_builder->remote_image_ctx->image_lock}; + auto [remote_snap_id, remote_snap_info] = get_newest_mirror_snapshot( + m_state_builder->remote_image_ctx); + + if (remote_snap_info == nullptr) { + remote_image_locker.unlock(); + local_image_locker.unlock(); + locker.unlock(); + + derr << "remote image does not contain mirror snapshots" << dendl; + on_finish->complete(-EAGAIN); + return false; + } + + std::string replay_state = "idle"; + if (m_remote_snap_id_end != CEPH_NOSNAP) { + replay_state = "syncing"; + } + + json_spirit::mObject root_obj; + root_obj["replay_state"] = replay_state; + root_obj["remote_snapshot_timestamp"] = remote_snap_info->timestamp.sec(); + if (m_perf_counters) { + m_perf_counters->tset(l_rbd_mirror_snapshot_remote_timestamp, + remote_snap_info->timestamp); + } + + auto matching_remote_snap_id = util::compute_remote_snap_id( + m_state_builder->local_image_ctx->image_lock, + m_state_builder->local_image_ctx->snap_info, + local_snap_id, m_state_builder->remote_mirror_uuid); + auto matching_remote_snap_it = + m_state_builder->remote_image_ctx->snap_info.find(matching_remote_snap_id); + if (matching_remote_snap_id != CEPH_NOSNAP && + matching_remote_snap_it != + m_state_builder->remote_image_ctx->snap_info.end()) { + // use the timestamp from the matching remote image since + // the local snapshot would just be the time the snapshot was + // synced and not the consistency point in time. + root_obj["local_snapshot_timestamp"] = + matching_remote_snap_it->second.timestamp.sec(); + if (m_perf_counters) { + m_perf_counters->tset(l_rbd_mirror_snapshot_local_timestamp, + matching_remote_snap_it->second.timestamp); + } + } + + matching_remote_snap_it = m_state_builder->remote_image_ctx->snap_info.find( + m_remote_snap_id_end); + if (m_remote_snap_id_end != CEPH_NOSNAP && + matching_remote_snap_it != + m_state_builder->remote_image_ctx->snap_info.end()) { + root_obj["syncing_snapshot_timestamp"] = remote_snap_info->timestamp.sec(); + + if (m_local_object_count > 0) { + root_obj["syncing_percent"] = + 100 * m_local_mirror_snap_ns.last_copied_object_number / + m_local_object_count; + } else { + // Set syncing_percent to 0 if m_local_object_count has + // not yet been set (last_copied_object_number may be > 0 + // if the sync is being resumed). + root_obj["syncing_percent"] = 0; + } + } + + m_bytes_per_second(0); + auto bytes_per_second = m_bytes_per_second.get_average(); + root_obj["bytes_per_second"] = round_to_two_places(bytes_per_second); + + auto bytes_per_snapshot = boost::accumulators::rolling_mean( + m_bytes_per_snapshot); + root_obj["bytes_per_snapshot"] = round_to_two_places(bytes_per_snapshot); + + root_obj["last_snapshot_sync_seconds"] = m_last_snapshot_sync_seconds; + root_obj["last_snapshot_bytes"] = m_last_snapshot_bytes; + + auto pending_bytes = bytes_per_snapshot * m_pending_snapshots; + if (bytes_per_second > 0 && m_pending_snapshots > 0) { + std::uint64_t seconds_until_synced = round_to_two_places( + pending_bytes / bytes_per_second); + if (seconds_until_synced >= std::numeric_limits<uint64_t>::max()) { + seconds_until_synced = std::numeric_limits<uint64_t>::max(); + } + + root_obj["seconds_until_synced"] = seconds_until_synced; + } + + *description = json_spirit::write( + root_obj, json_spirit::remove_trailing_zeros); + + local_image_locker.unlock(); + remote_image_locker.unlock(); + locker.unlock(); + on_finish->complete(-EEXIST); + return true; +} + +template <typename I> +void Replayer<I>::load_local_image_meta() { + dout(10) << dendl; + + { + // reset state in case new snapshot is added while we are scanning + std::unique_lock locker{m_lock}; + m_image_updated = false; + } + + bool update_status = false; + { + auto local_image_ctx = m_state_builder->local_image_ctx; + std::shared_lock image_locker{local_image_ctx->image_lock}; + auto image_spec = image_replayer::util::compute_image_spec( + local_image_ctx->md_ctx, local_image_ctx->name); + if (m_image_spec != image_spec) { + m_image_spec = image_spec; + update_status = true; + } + } + if (update_status) { + std::unique_lock locker{m_lock}; + unregister_perf_counters(); + register_perf_counters(); + notify_status_updated(); + } + + ceph_assert(m_state_builder->local_image_meta != nullptr); + auto ctx = create_context_callback< + Replayer<I>, &Replayer<I>::handle_load_local_image_meta>(this); + m_state_builder->local_image_meta->load(ctx); +} + +template <typename I> +void Replayer<I>::handle_load_local_image_meta(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0 && r != -ENOENT) { + derr << "failed to load local image-meta: " << cpp_strerror(r) << dendl; + handle_replay_complete(r, "failed to load local image-meta"); + return; + } + + if (r >= 0 && m_state_builder->local_image_meta->resync_requested) { + m_resync_requested = true; + + dout(10) << "local image resync requested" << dendl; + handle_replay_complete(0, "resync requested"); + return; + } + + refresh_local_image(); +} + +template <typename I> +void Replayer<I>::refresh_local_image() { + if (!m_state_builder->local_image_ctx->state->is_refresh_required()) { + refresh_remote_image(); + return; + } + + dout(10) << dendl; + auto ctx = create_context_callback< + Replayer<I>, &Replayer<I>::handle_refresh_local_image>(this); + m_state_builder->local_image_ctx->state->refresh(ctx); +} + +template <typename I> +void Replayer<I>::handle_refresh_local_image(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to refresh local image: " << cpp_strerror(r) << dendl; + handle_replay_complete(r, "failed to refresh local image"); + return; + } + + refresh_remote_image(); +} + +template <typename I> +void Replayer<I>::refresh_remote_image() { + if (!m_state_builder->remote_image_ctx->state->is_refresh_required()) { + std::unique_lock locker{m_lock}; + scan_local_mirror_snapshots(&locker); + return; + } + + dout(10) << dendl; + auto ctx = create_context_callback< + Replayer<I>, &Replayer<I>::handle_refresh_remote_image>(this); + m_state_builder->remote_image_ctx->state->refresh(ctx); +} + +template <typename I> +void Replayer<I>::handle_refresh_remote_image(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to refresh remote image: " << cpp_strerror(r) << dendl; + handle_replay_complete(r, "failed to refresh remote image"); + return; + } + + std::unique_lock locker{m_lock}; + scan_local_mirror_snapshots(&locker); +} + +template <typename I> +void Replayer<I>::scan_local_mirror_snapshots( + std::unique_lock<ceph::mutex>* locker) { + if (is_replay_interrupted(locker)) { + return; + } + + dout(10) << dendl; + + m_local_snap_id_start = 0; + m_local_snap_id_end = CEPH_NOSNAP; + m_local_mirror_snap_ns = {}; + m_local_object_count = 0; + + m_remote_snap_id_start = 0; + m_remote_snap_id_end = CEPH_NOSNAP; + m_remote_mirror_snap_ns = {}; + + std::set<uint64_t> prune_snap_ids; + + auto local_image_ctx = m_state_builder->local_image_ctx; + std::shared_lock image_locker{local_image_ctx->image_lock}; + for (auto snap_info_it = local_image_ctx->snap_info.begin(); + snap_info_it != local_image_ctx->snap_info.end(); ++snap_info_it) { + const auto& snap_ns = snap_info_it->second.snap_namespace; + auto mirror_ns = std::get_if< + cls::rbd::MirrorSnapshotNamespace>(&snap_ns); + if (mirror_ns == nullptr) { + continue; + } + + dout(15) << "local mirror snapshot: id=" << snap_info_it->first << ", " + << "mirror_ns=" << *mirror_ns << dendl; + m_local_mirror_snap_ns = *mirror_ns; + + auto local_snap_id = snap_info_it->first; + if (mirror_ns->is_non_primary()) { + if (mirror_ns->complete) { + // if remote has new snapshots, we would sync from here + m_local_snap_id_start = local_snap_id; + ceph_assert(m_local_snap_id_end == CEPH_NOSNAP); + + if (mirror_ns->mirror_peer_uuids.empty()) { + // no other peer will attempt to sync to this snapshot so store as + // a candidate for removal + prune_snap_ids.insert(local_snap_id); + } + } else if (mirror_ns->last_copied_object_number == 0 && + m_local_snap_id_start > 0) { + // snapshot might be missing image state, object-map, etc, so just + // delete and re-create it if we haven't started copying data + // objects. Also only prune this snapshot since we will need the + // previous mirror snapshot for syncing. Special case exception for + // the first non-primary snapshot since we know its snapshot is + // well-formed because otherwise the mirror-image-state would have + // forced an image deletion. + prune_snap_ids.clear(); + prune_snap_ids.insert(local_snap_id); + break; + } else { + // start snap will be last complete mirror snapshot or initial + // image revision + m_local_snap_id_end = local_snap_id; + break; + } + } else if (mirror_ns->is_primary()) { + if (mirror_ns->complete) { + m_local_snap_id_start = local_snap_id; + ceph_assert(m_local_snap_id_end == CEPH_NOSNAP); + } else { + derr << "incomplete local primary snapshot" << dendl; + handle_replay_complete(locker, -EINVAL, + "incomplete local primary snapshot"); + return; + } + } else { + derr << "unknown local mirror snapshot state" << dendl; + handle_replay_complete(locker, -EINVAL, + "invalid local mirror snapshot state"); + return; + } + } + image_locker.unlock(); + + if (m_local_snap_id_start > 0) { + // remove candidate that is required for delta snapshot sync + prune_snap_ids.erase(m_local_snap_id_start); + } + if (!prune_snap_ids.empty()) { + locker->unlock(); + + auto prune_snap_id = *prune_snap_ids.begin(); + dout(5) << "pruning unused non-primary snapshot " << prune_snap_id << dendl; + prune_non_primary_snapshot(prune_snap_id); + return; + } + + if (m_local_snap_id_start > 0 || m_local_snap_id_end != CEPH_NOSNAP) { + if (m_local_mirror_snap_ns.is_non_primary() && + m_local_mirror_snap_ns.primary_mirror_uuid != + m_state_builder->remote_mirror_uuid) { + if (m_local_mirror_snap_ns.is_orphan()) { + dout(5) << "local image being force promoted" << dendl; + handle_replay_complete(locker, 0, "orphan (force promoting)"); + return; + } + // TODO support multiple peers + derr << "local image linked to unknown peer: " + << m_local_mirror_snap_ns.primary_mirror_uuid << dendl; + handle_replay_complete(locker, -EEXIST, + "local image linked to unknown peer"); + return; + } else if (m_local_mirror_snap_ns.state == + cls::rbd::MIRROR_SNAPSHOT_STATE_PRIMARY) { + dout(5) << "local image promoted" << dendl; + handle_replay_complete(locker, 0, "force promoted"); + return; + } + + dout(10) << "found local mirror snapshot: " + << "local_snap_id_start=" << m_local_snap_id_start << ", " + << "local_snap_id_end=" << m_local_snap_id_end << ", " + << "local_snap_ns=" << m_local_mirror_snap_ns << dendl; + if (!m_local_mirror_snap_ns.is_primary() && + m_local_mirror_snap_ns.complete) { + // our remote sync should start after this completed snapshot + m_remote_snap_id_start = m_local_mirror_snap_ns.primary_snap_id; + } + } + + // we don't have any mirror snapshots or only completed non-primary + // mirror snapshots + scan_remote_mirror_snapshots(locker); +} + +template <typename I> +void Replayer<I>::scan_remote_mirror_snapshots( + std::unique_lock<ceph::mutex>* locker) { + dout(10) << dendl; + + m_pending_snapshots = 0; + + std::set<uint64_t> unlink_snap_ids; + bool split_brain = false; + bool remote_demoted = false; + auto remote_image_ctx = m_state_builder->remote_image_ctx; + std::shared_lock image_locker{remote_image_ctx->image_lock}; + for (auto snap_info_it = remote_image_ctx->snap_info.begin(); + snap_info_it != remote_image_ctx->snap_info.end(); ++snap_info_it) { + const auto& snap_ns = snap_info_it->second.snap_namespace; + auto mirror_ns = std::get_if< + cls::rbd::MirrorSnapshotNamespace>(&snap_ns); + if (mirror_ns == nullptr) { + continue; + } + + dout(15) << "remote mirror snapshot: id=" << snap_info_it->first << ", " + << "mirror_ns=" << *mirror_ns << dendl; + remote_demoted = mirror_ns->is_demoted(); + if (!mirror_ns->is_primary() && !mirror_ns->is_non_primary()) { + derr << "unknown remote mirror snapshot state" << dendl; + handle_replay_complete(locker, -EINVAL, + "invalid remote mirror snapshot state"); + return; + } else if (mirror_ns->mirror_peer_uuids.count(m_remote_mirror_peer_uuid) == + 0) { + dout(15) << "skipping remote snapshot due to missing mirror peer" + << dendl; + continue; + } + + auto remote_snap_id = snap_info_it->first; + if (m_local_snap_id_start > 0 || m_local_snap_id_end != CEPH_NOSNAP) { + // we have a local mirror snapshot + if (m_local_mirror_snap_ns.is_non_primary()) { + // previously validated that it was linked to remote + ceph_assert(m_local_mirror_snap_ns.primary_mirror_uuid == + m_state_builder->remote_mirror_uuid); + + if (m_remote_snap_id_end == CEPH_NOSNAP) { + // haven't found the end snap so treat this as a candidate for unlink + unlink_snap_ids.insert(remote_snap_id); + } + if (m_local_mirror_snap_ns.complete && + m_local_mirror_snap_ns.primary_snap_id >= remote_snap_id) { + // skip past completed remote snapshot + m_remote_snap_id_start = remote_snap_id; + m_remote_mirror_snap_ns = *mirror_ns; + dout(15) << "skipping synced remote snapshot " << remote_snap_id + << dendl; + continue; + } else if (!m_local_mirror_snap_ns.complete && + m_local_mirror_snap_ns.primary_snap_id > remote_snap_id) { + // skip until we get to the in-progress remote snapshot + dout(15) << "skipping synced remote snapshot " << remote_snap_id + << " while search for in-progress sync" << dendl; + m_remote_snap_id_start = remote_snap_id; + m_remote_mirror_snap_ns = *mirror_ns; + continue; + } + } else if (m_local_mirror_snap_ns.state == + cls::rbd::MIRROR_SNAPSHOT_STATE_PRIMARY_DEMOTED) { + // find the matching demotion snapshot in remote image + ceph_assert(m_local_snap_id_start > 0); + if (mirror_ns->state == + cls::rbd::MIRROR_SNAPSHOT_STATE_NON_PRIMARY_DEMOTED && + mirror_ns->primary_mirror_uuid == m_local_mirror_uuid && + mirror_ns->primary_snap_id == m_local_snap_id_start) { + dout(10) << "located matching demotion snapshot: " + << "remote_snap_id=" << remote_snap_id << ", " + << "local_snap_id=" << m_local_snap_id_start << dendl; + m_remote_snap_id_start = remote_snap_id; + split_brain = false; + continue; + } else if (m_remote_snap_id_start == 0) { + // still looking for our matching demotion snapshot + dout(15) << "skipping remote snapshot " << remote_snap_id << " " + << "while searching for demotion" << dendl; + split_brain = true; + continue; + } + } else { + // should not have been able to reach this + ceph_assert(false); + } + } else if (!mirror_ns->is_primary()) { + dout(15) << "skipping non-primary remote snapshot" << dendl; + continue; + } + + // found candidate snapshot to sync + ++m_pending_snapshots; + if (m_remote_snap_id_end != CEPH_NOSNAP) { + continue; + } + + // first primary snapshot where were are listed as a peer + m_remote_snap_id_end = remote_snap_id; + m_remote_mirror_snap_ns = *mirror_ns; + } + + if (m_remote_snap_id_start != 0 && + remote_image_ctx->snap_info.count(m_remote_snap_id_start) == 0) { + // the remote start snapshot was deleted out from under us + derr << "failed to locate remote start snapshot: " + << "snap_id=" << m_remote_snap_id_start << dendl; + split_brain = true; + } + + image_locker.unlock(); + + if (!split_brain) { + unlink_snap_ids.erase(m_remote_snap_id_start); + unlink_snap_ids.erase(m_remote_snap_id_end); + if (!unlink_snap_ids.empty()) { + locker->unlock(); + + // retry the unlinking process for a remote snapshot that we do not + // need anymore + auto remote_snap_id = *unlink_snap_ids.begin(); + dout(10) << "unlinking from remote snapshot " << remote_snap_id << dendl; + unlink_peer(remote_snap_id); + return; + } + + if (m_remote_snap_id_end != CEPH_NOSNAP) { + dout(10) << "found remote mirror snapshot: " + << "remote_snap_id_start=" << m_remote_snap_id_start << ", " + << "remote_snap_id_end=" << m_remote_snap_id_end << ", " + << "remote_snap_ns=" << m_remote_mirror_snap_ns << dendl; + if (m_remote_mirror_snap_ns.complete) { + locker->unlock(); + + if (m_local_snap_id_end != CEPH_NOSNAP && + !m_local_mirror_snap_ns.complete) { + // attempt to resume image-sync + dout(10) << "local image contains in-progress mirror snapshot" + << dendl; + get_local_image_state(); + } else { + copy_snapshots(); + } + return; + } else { + // might have raced with the creation of a remote mirror snapshot + // so we will need to refresh and rescan once it completes + dout(15) << "remote mirror snapshot not complete" << dendl; + } + } + } + + if (m_image_updated) { + // received update notification while scanning image, restart ... + m_image_updated = false; + locker->unlock(); + + dout(10) << "restarting snapshot scan due to remote update notification" + << dendl; + load_local_image_meta(); + return; + } + + if (is_replay_interrupted(locker)) { + return; + } else if (split_brain) { + derr << "split-brain detected: failed to find matching non-primary " + << "snapshot in remote image: " + << "local_snap_id_start=" << m_local_snap_id_start << ", " + << "local_snap_ns=" << m_local_mirror_snap_ns << dendl; + handle_replay_complete(locker, -EEXIST, "split-brain"); + return; + } else if (remote_demoted) { + dout(10) << "remote image demoted" << dendl; + handle_replay_complete(locker, -EREMOTEIO, "remote image demoted"); + return; + } + + dout(10) << "all remote snapshots synced: idling waiting for new snapshot" + << dendl; + ceph_assert(m_state == STATE_REPLAYING); + m_state = STATE_IDLE; + + notify_status_updated(); +} + +template <typename I> +void Replayer<I>::prune_non_primary_snapshot(uint64_t snap_id) { + dout(10) << "snap_id=" << snap_id << dendl; + + auto local_image_ctx = m_state_builder->local_image_ctx; + bool snap_valid = false; + cls::rbd::SnapshotNamespace snap_namespace; + std::string snap_name; + + { + std::shared_lock image_locker{local_image_ctx->image_lock}; + auto snap_info = local_image_ctx->get_snap_info(snap_id); + if (snap_info != nullptr) { + snap_valid = true; + snap_namespace = snap_info->snap_namespace; + snap_name = snap_info->name; + + ceph_assert(std::holds_alternative<cls::rbd::MirrorSnapshotNamespace>( + snap_namespace)); + } + } + + if (!snap_valid) { + load_local_image_meta(); + return; + } + + auto ctx = create_context_callback< + Replayer<I>, &Replayer<I>::handle_prune_non_primary_snapshot>(this); + local_image_ctx->operations->snap_remove(snap_namespace, snap_name, ctx); +} + +template <typename I> +void Replayer<I>::handle_prune_non_primary_snapshot(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0 && r != -ENOENT) { + derr << "failed to prune non-primary snapshot: " << cpp_strerror(r) + << dendl; + handle_replay_complete(r, "failed to prune non-primary snapshot"); + return; + } + + if (is_replay_interrupted()) { + return; + } + + load_local_image_meta(); +} + +template <typename I> +void Replayer<I>::copy_snapshots() { + dout(10) << "remote_snap_id_start=" << m_remote_snap_id_start << ", " + << "remote_snap_id_end=" << m_remote_snap_id_end << ", " + << "local_snap_id_start=" << m_local_snap_id_start << dendl; + + ceph_assert(m_remote_snap_id_start != CEPH_NOSNAP); + ceph_assert(m_remote_snap_id_end > 0 && + m_remote_snap_id_end != CEPH_NOSNAP); + ceph_assert(m_local_snap_id_start != CEPH_NOSNAP); + + m_local_mirror_snap_ns = {}; + auto ctx = create_context_callback< + Replayer<I>, &Replayer<I>::handle_copy_snapshots>(this); + auto req = librbd::deep_copy::SnapshotCopyRequest<I>::create( + m_state_builder->remote_image_ctx, m_state_builder->local_image_ctx, + m_remote_snap_id_start, m_remote_snap_id_end, m_local_snap_id_start, + false, m_threads->work_queue, &m_local_mirror_snap_ns.snap_seqs, + ctx); + req->send(); +} + +template <typename I> +void Replayer<I>::handle_copy_snapshots(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to copy snapshots from remote to local image: " + << cpp_strerror(r) << dendl; + handle_replay_complete( + r, "failed to copy snapshots from remote to local image"); + return; + } + + dout(10) << "remote_snap_id_start=" << m_remote_snap_id_start << ", " + << "remote_snap_id_end=" << m_remote_snap_id_end << ", " + << "local_snap_id_start=" << m_local_snap_id_start << ", " + << "snap_seqs=" << m_local_mirror_snap_ns.snap_seqs << dendl; + get_remote_image_state(); +} + +template <typename I> +void Replayer<I>::get_remote_image_state() { + dout(10) << dendl; + + auto ctx = create_context_callback< + Replayer<I>, &Replayer<I>::handle_get_remote_image_state>(this); + auto req = librbd::mirror::snapshot::GetImageStateRequest<I>::create( + m_state_builder->remote_image_ctx, m_remote_snap_id_end, + &m_image_state, ctx); + req->send(); +} + +template <typename I> +void Replayer<I>::handle_get_remote_image_state(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to retrieve remote snapshot image state: " + << cpp_strerror(r) << dendl; + handle_replay_complete(r, "failed to retrieve remote snapshot image state"); + return; + } + + create_non_primary_snapshot(); +} + +template <typename I> +void Replayer<I>::get_local_image_state() { + dout(10) << dendl; + + ceph_assert(m_local_snap_id_end != CEPH_NOSNAP); + auto ctx = create_context_callback< + Replayer<I>, &Replayer<I>::handle_get_local_image_state>(this); + auto req = librbd::mirror::snapshot::GetImageStateRequest<I>::create( + m_state_builder->local_image_ctx, m_local_snap_id_end, + &m_image_state, ctx); + req->send(); +} + +template <typename I> +void Replayer<I>::handle_get_local_image_state(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to retrieve local snapshot image state: " + << cpp_strerror(r) << dendl; + handle_replay_complete(r, "failed to retrieve local snapshot image state"); + return; + } + + request_sync(); +} + +template <typename I> +void Replayer<I>::create_non_primary_snapshot() { + auto local_image_ctx = m_state_builder->local_image_ctx; + + if (m_local_snap_id_start > 0) { + std::shared_lock local_image_locker{local_image_ctx->image_lock}; + + auto local_snap_info_it = local_image_ctx->snap_info.find( + m_local_snap_id_start); + if (local_snap_info_it == local_image_ctx->snap_info.end()) { + local_image_locker.unlock(); + + derr << "failed to locate local snapshot " << m_local_snap_id_start + << dendl; + handle_replay_complete(-ENOENT, "failed to locate local start snapshot"); + return; + } + + auto mirror_ns = std::get_if<cls::rbd::MirrorSnapshotNamespace>( + &local_snap_info_it->second.snap_namespace); + ceph_assert(mirror_ns != nullptr); + + auto remote_image_ctx = m_state_builder->remote_image_ctx; + std::shared_lock remote_image_locker{remote_image_ctx->image_lock}; + + // (re)build a full mapping from remote to local snap ids for all user + // snapshots to support applying image state in the future + for (auto& [remote_snap_id, remote_snap_info] : + remote_image_ctx->snap_info) { + if (remote_snap_id >= m_remote_snap_id_end) { + break; + } + + // we can ignore all non-user snapshots since image state only includes + // user snapshots + if (!std::holds_alternative<cls::rbd::UserSnapshotNamespace>( + remote_snap_info.snap_namespace)) { + continue; + } + + uint64_t local_snap_id = CEPH_NOSNAP; + if (mirror_ns->is_demoted() && !m_remote_mirror_snap_ns.is_demoted()) { + // if we are creating a non-primary snapshot following a demotion, + // re-build the full snapshot sequence since we don't have a valid + // snapshot mapping + auto local_snap_id_it = local_image_ctx->snap_ids.find( + {remote_snap_info.snap_namespace, remote_snap_info.name}); + if (local_snap_id_it != local_image_ctx->snap_ids.end()) { + local_snap_id = local_snap_id_it->second; + } + } else { + auto snap_seq_it = mirror_ns->snap_seqs.find(remote_snap_id); + if (snap_seq_it != mirror_ns->snap_seqs.end()) { + local_snap_id = snap_seq_it->second; + } + } + + if (m_local_mirror_snap_ns.snap_seqs.count(remote_snap_id) == 0 && + local_snap_id != CEPH_NOSNAP) { + dout(15) << "mapping remote snapshot " << remote_snap_id << " to " + << "local snapshot " << local_snap_id << dendl; + m_local_mirror_snap_ns.snap_seqs[remote_snap_id] = local_snap_id; + } + } + } + + dout(10) << "demoted=" << m_remote_mirror_snap_ns.is_demoted() << ", " + << "primary_mirror_uuid=" + << m_state_builder->remote_mirror_uuid << ", " + << "primary_snap_id=" << m_remote_snap_id_end << ", " + << "snap_seqs=" << m_local_mirror_snap_ns.snap_seqs << dendl; + + auto ctx = create_context_callback< + Replayer<I>, &Replayer<I>::handle_create_non_primary_snapshot>(this); + auto req = librbd::mirror::snapshot::CreateNonPrimaryRequest<I>::create( + local_image_ctx, m_remote_mirror_snap_ns.is_demoted(), + m_state_builder->remote_mirror_uuid, m_remote_snap_id_end, + m_local_mirror_snap_ns.snap_seqs, m_image_state, &m_local_snap_id_end, ctx); + req->send(); +} + +template <typename I> +void Replayer<I>::handle_create_non_primary_snapshot(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to create local mirror snapshot: " << cpp_strerror(r) + << dendl; + handle_replay_complete(r, "failed to create local mirror snapshot"); + return; + } + + dout(15) << "local_snap_id_end=" << m_local_snap_id_end << dendl; + + update_mirror_image_state(); +} + +template <typename I> +void Replayer<I>::update_mirror_image_state() { + if (m_local_snap_id_start > 0) { + request_sync(); + return; + } + + // a newly created non-primary image has a local mirror state of CREATING + // until this point so that we could avoid preserving the image until + // the first non-primary snapshot linked the two images together. + dout(10) << dendl; + auto ctx = create_context_callback< + Replayer<I>, &Replayer<I>::handle_update_mirror_image_state>(this); + auto req = librbd::mirror::ImageStateUpdateRequest<I>::create( + m_state_builder->local_image_ctx->md_ctx, + m_state_builder->local_image_ctx->id, + cls::rbd::MIRROR_IMAGE_STATE_ENABLED, {}, ctx); + req->send(); +} + +template <typename I> +void Replayer<I>::handle_update_mirror_image_state(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to update local mirror image state: " << cpp_strerror(r) + << dendl; + handle_replay_complete(r, "failed to update local mirror image state"); + return; + } + + request_sync(); +} + +template <typename I> +void Replayer<I>::request_sync() { + if (m_remote_mirror_snap_ns.clean_since_snap_id == m_remote_snap_id_start) { + dout(10) << "skipping unnecessary image copy: " + << "remote_snap_id_start=" << m_remote_snap_id_start << ", " + << "remote_mirror_snap_ns=" << m_remote_mirror_snap_ns << dendl; + apply_image_state(); + return; + } + + dout(10) << dendl; + std::unique_lock locker{m_lock}; + if (is_replay_interrupted(&locker)) { + return; + } + + auto ctx = create_async_context_callback( + m_threads->work_queue, create_context_callback< + Replayer<I>, &Replayer<I>::handle_request_sync>(this)); + m_instance_watcher->notify_sync_request(m_state_builder->local_image_ctx->id, + ctx); +} + +template <typename I> +void Replayer<I>::handle_request_sync(int r) { + dout(10) << "r=" << r << dendl; + + std::unique_lock locker{m_lock}; + if (is_replay_interrupted(&locker)) { + return; + } else if (r == -ECANCELED) { + dout(5) << "image-sync canceled" << dendl; + handle_replay_complete(&locker, r, "image-sync canceled"); + return; + } else if (r < 0) { + derr << "failed to request image-sync: " << cpp_strerror(r) << dendl; + handle_replay_complete(&locker, r, "failed to request image-sync"); + return; + } + + m_sync_in_progress = true; + locker.unlock(); + + copy_image(); +} + +template <typename I> +void Replayer<I>::copy_image() { + dout(10) << "remote_snap_id_start=" << m_remote_snap_id_start << ", " + << "remote_snap_id_end=" << m_remote_snap_id_end << ", " + << "local_snap_id_start=" << m_local_snap_id_start << ", " + << "last_copied_object_number=" + << m_local_mirror_snap_ns.last_copied_object_number << ", " + << "snap_seqs=" << m_local_mirror_snap_ns.snap_seqs << dendl; + + m_snapshot_bytes = 0; + m_snapshot_replay_start = ceph_clock_now(); + m_deep_copy_handler = new DeepCopyHandler(this); + auto ctx = create_context_callback< + Replayer<I>, &Replayer<I>::handle_copy_image>(this); + auto req = librbd::deep_copy::ImageCopyRequest<I>::create( + m_state_builder->remote_image_ctx, m_state_builder->local_image_ctx, + m_remote_snap_id_start, m_remote_snap_id_end, m_local_snap_id_start, false, + (m_local_mirror_snap_ns.last_copied_object_number > 0 ? + librbd::deep_copy::ObjectNumber{ + m_local_mirror_snap_ns.last_copied_object_number} : + librbd::deep_copy::ObjectNumber{}), + m_local_mirror_snap_ns.snap_seqs, m_deep_copy_handler, ctx); + req->send(); +} + +template <typename I> +void Replayer<I>::handle_copy_image(int r) { + dout(10) << "r=" << r << dendl; + + delete m_deep_copy_handler; + m_deep_copy_handler = nullptr; + + if (r < 0) { + derr << "failed to copy remote image to local image: " << cpp_strerror(r) + << dendl; + handle_replay_complete(r, "failed to copy remote image"); + return; + } + + { + std::unique_lock locker{m_lock}; + m_last_snapshot_bytes = m_snapshot_bytes; + m_bytes_per_snapshot(m_snapshot_bytes); + utime_t duration = ceph_clock_now() - m_snapshot_replay_start; + m_last_snapshot_sync_seconds = duration.sec(); + + if (g_snapshot_perf_counters) { + g_snapshot_perf_counters->inc(l_rbd_mirror_snapshot_sync_bytes, + m_snapshot_bytes); + g_snapshot_perf_counters->inc(l_rbd_mirror_snapshot_snapshots); + g_snapshot_perf_counters->tinc(l_rbd_mirror_snapshot_sync_time, + duration); + } + if (m_perf_counters) { + m_perf_counters->inc(l_rbd_mirror_snapshot_sync_bytes, m_snapshot_bytes); + m_perf_counters->inc(l_rbd_mirror_snapshot_snapshots); + m_perf_counters->tinc(l_rbd_mirror_snapshot_sync_time, duration); + m_perf_counters->tset(l_rbd_mirror_snapshot_last_sync_time, duration); + m_perf_counters->set(l_rbd_mirror_snapshot_last_sync_bytes, + m_snapshot_bytes); + } + } + + apply_image_state(); +} + +template <typename I> +void Replayer<I>::handle_copy_image_progress(uint64_t object_number, + uint64_t object_count) { + dout(10) << "object_number=" << object_number << ", " + << "object_count=" << object_count << dendl; + + std::unique_lock locker{m_lock}; + m_local_mirror_snap_ns.last_copied_object_number = std::min( + object_number, object_count); + m_local_object_count = object_count; + + update_non_primary_snapshot(false); +} + +template <typename I> +void Replayer<I>::handle_copy_image_read(uint64_t bytes_read) { + dout(20) << "bytes_read=" << bytes_read << dendl; + + std::unique_lock locker{m_lock}; + m_bytes_per_second(bytes_read); + m_snapshot_bytes += bytes_read; +} + +template <typename I> +void Replayer<I>::apply_image_state() { + dout(10) << dendl; + + auto ctx = create_context_callback< + Replayer<I>, &Replayer<I>::handle_apply_image_state>(this); + auto req = ApplyImageStateRequest<I>::create( + m_local_mirror_uuid, + m_state_builder->remote_mirror_uuid, + m_state_builder->local_image_ctx, + m_state_builder->remote_image_ctx, + m_image_state, ctx); + req->send(); +} + +template <typename I> +void Replayer<I>::handle_apply_image_state(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0 && r != -ENOENT) { + derr << "failed to apply remote image state to local image: " + << cpp_strerror(r) << dendl; + handle_replay_complete(r, "failed to apply remote image state"); + return; + } + + std::unique_lock locker{m_lock}; + update_non_primary_snapshot(true); +} + +template <typename I> +void Replayer<I>::update_non_primary_snapshot(bool complete) { + ceph_assert(ceph_mutex_is_locked_by_me(m_lock)); + if (!complete) { + // disallow two in-flight updates if this isn't the completion of the sync + if (m_updating_sync_point) { + return; + } + m_updating_sync_point = true; + } else { + m_local_mirror_snap_ns.complete = true; + } + + dout(10) << dendl; + + librados::ObjectWriteOperation op; + librbd::cls_client::mirror_image_snapshot_set_copy_progress( + &op, m_local_snap_id_end, m_local_mirror_snap_ns.complete, + m_local_mirror_snap_ns.last_copied_object_number); + + auto ctx = new C_TrackedOp( + m_in_flight_op_tracker, new LambdaContext([this, complete](int r) { + handle_update_non_primary_snapshot(complete, r); + })); + auto aio_comp = create_rados_callback(ctx); + int r = m_state_builder->local_image_ctx->md_ctx.aio_operate( + m_state_builder->local_image_ctx->header_oid, aio_comp, &op); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void Replayer<I>::handle_update_non_primary_snapshot(bool complete, int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to update local snapshot progress: " << cpp_strerror(r) + << dendl; + if (complete) { + // only fail if this was the final update + handle_replay_complete(r, "failed to update local snapshot progress"); + return; + } + } + + if (!complete) { + // periodic sync-point update -- do not advance state machine + std::unique_lock locker{m_lock}; + + ceph_assert(m_updating_sync_point); + m_updating_sync_point = false; + return; + } + + notify_image_update(); +} + +template <typename I> +void Replayer<I>::notify_image_update() { + dout(10) << dendl; + + auto ctx = create_context_callback< + Replayer<I>, &Replayer<I>::handle_notify_image_update>(this); + m_state_builder->local_image_ctx->notify_update(ctx); +} + +template <typename I> +void Replayer<I>::handle_notify_image_update(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to notify local image update: " << cpp_strerror(r) << dendl; + } + + unlink_peer(m_remote_snap_id_start); +} + +template <typename I> +void Replayer<I>::unlink_peer(uint64_t remote_snap_id) { + if (remote_snap_id == 0) { + finish_sync(); + return; + } + + // local snapshot fully synced -- we no longer depend on the sync + // start snapshot in the remote image + dout(10) << "remote_snap_id=" << remote_snap_id << dendl; + + auto ctx = create_context_callback< + Replayer<I>, &Replayer<I>::handle_unlink_peer>(this); + auto req = librbd::mirror::snapshot::UnlinkPeerRequest<I>::create( + m_state_builder->remote_image_ctx, remote_snap_id, + m_remote_mirror_peer_uuid, false, ctx); + req->send(); +} + +template <typename I> +void Replayer<I>::handle_unlink_peer(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0 && r != -ENOENT) { + derr << "failed to unlink local peer from remote image: " << cpp_strerror(r) + << dendl; + handle_replay_complete(r, "failed to unlink local peer from remote image"); + return; + } + + finish_sync(); +} + +template <typename I> +void Replayer<I>::finish_sync() { + dout(10) << dendl; + + { + std::unique_lock locker{m_lock}; + notify_status_updated(); + + if (m_sync_in_progress) { + m_sync_in_progress = false; + m_instance_watcher->notify_sync_complete( + m_state_builder->local_image_ctx->id); + } + } + + if (is_replay_interrupted()) { + return; + } + + load_local_image_meta(); +} + +template <typename I> +void Replayer<I>::register_local_update_watcher() { + dout(10) << dendl; + + m_update_watch_ctx = new C_UpdateWatchCtx(this); + + int r = m_state_builder->local_image_ctx->state->register_update_watcher( + m_update_watch_ctx, &m_local_update_watcher_handle); + auto ctx = create_context_callback< + Replayer<I>, &Replayer<I>::handle_register_local_update_watcher>(this); + m_threads->work_queue->queue(ctx, r); +} + +template <typename I> +void Replayer<I>::handle_register_local_update_watcher(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to register local update watcher: " << cpp_strerror(r) + << dendl; + handle_replay_complete(r, "failed to register local image update watcher"); + m_state = STATE_COMPLETE; + + delete m_update_watch_ctx; + m_update_watch_ctx = nullptr; + + Context* on_init = nullptr; + std::swap(on_init, m_on_init_shutdown); + on_init->complete(r); + return; + } + + register_remote_update_watcher(); +} + +template <typename I> +void Replayer<I>::register_remote_update_watcher() { + dout(10) << dendl; + + int r = m_state_builder->remote_image_ctx->state->register_update_watcher( + m_update_watch_ctx, &m_remote_update_watcher_handle); + auto ctx = create_context_callback< + Replayer<I>, &Replayer<I>::handle_register_remote_update_watcher>(this); + m_threads->work_queue->queue(ctx, r); +} + +template <typename I> +void Replayer<I>::handle_register_remote_update_watcher(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to register remote update watcher: " << cpp_strerror(r) + << dendl; + handle_replay_complete(r, "failed to register remote image update watcher"); + m_state = STATE_COMPLETE; + + unregister_local_update_watcher(); + return; + } + + m_state = STATE_REPLAYING; + + Context* on_init = nullptr; + std::swap(on_init, m_on_init_shutdown); + on_init->complete(0); + + // delay initial snapshot scan until after we have alerted + // image replayer that we have initialized in case an error + // occurs + { + std::unique_lock locker{m_lock}; + notify_status_updated(); + } + + load_local_image_meta(); +} + +template <typename I> +void Replayer<I>::unregister_remote_update_watcher() { + dout(10) << dendl; + + auto ctx = create_context_callback< + Replayer<I>, + &Replayer<I>::handle_unregister_remote_update_watcher>(this); + m_state_builder->remote_image_ctx->state->unregister_update_watcher( + m_remote_update_watcher_handle, ctx); +} + +template <typename I> +void Replayer<I>::handle_unregister_remote_update_watcher(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to unregister remote update watcher: " << cpp_strerror(r) + << dendl; + } + + unregister_local_update_watcher(); +} + +template <typename I> +void Replayer<I>::unregister_local_update_watcher() { + dout(10) << dendl; + + auto ctx = create_context_callback< + Replayer<I>, + &Replayer<I>::handle_unregister_local_update_watcher>(this); + m_state_builder->local_image_ctx->state->unregister_update_watcher( + m_local_update_watcher_handle, ctx); +} + +template <typename I> +void Replayer<I>::handle_unregister_local_update_watcher(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to unregister local update watcher: " << cpp_strerror(r) + << dendl; + } + + delete m_update_watch_ctx; + m_update_watch_ctx = nullptr; + + wait_for_in_flight_ops(); +} + +template <typename I> +void Replayer<I>::wait_for_in_flight_ops() { + dout(10) << dendl; + + auto ctx = create_async_context_callback( + m_threads->work_queue, create_context_callback< + Replayer<I>, &Replayer<I>::handle_wait_for_in_flight_ops>(this)); + m_in_flight_op_tracker.wait_for_ops(ctx); +} + +template <typename I> +void Replayer<I>::handle_wait_for_in_flight_ops(int r) { + dout(10) << "r=" << r << dendl; + + Context* on_shutdown = nullptr; + { + std::unique_lock locker{m_lock}; + ceph_assert(m_on_init_shutdown != nullptr); + std::swap(on_shutdown, m_on_init_shutdown); + } + on_shutdown->complete(m_error_code); +} + +template <typename I> +void Replayer<I>::handle_image_update_notify() { + dout(10) << dendl; + + std::unique_lock locker{m_lock}; + if (m_state == STATE_REPLAYING) { + dout(15) << "flagging snapshot rescan required" << dendl; + m_image_updated = true; + } else if (m_state == STATE_IDLE) { + m_state = STATE_REPLAYING; + locker.unlock(); + + dout(15) << "restarting idle replayer" << dendl; + load_local_image_meta(); + } +} + +template <typename I> +void Replayer<I>::handle_replay_complete(int r, + const std::string& description) { + std::unique_lock locker{m_lock}; + handle_replay_complete(&locker, r, description); +} + +template <typename I> +void Replayer<I>::handle_replay_complete(std::unique_lock<ceph::mutex>* locker, + int r, + const std::string& description) { + ceph_assert(ceph_mutex_is_locked_by_me(m_lock)); + + if (m_sync_in_progress) { + m_sync_in_progress = false; + m_instance_watcher->notify_sync_complete( + m_state_builder->local_image_ctx->id); + } + + // don't set error code and description if resuming a pending + // shutdown + if (is_replay_interrupted(locker)) { + return; + } + + if (m_error_code == 0) { + m_error_code = r; + m_error_description = description; + } + + if (m_state != STATE_REPLAYING && m_state != STATE_IDLE) { + return; + } + + m_state = STATE_COMPLETE; + notify_status_updated(); +} + +template <typename I> +void Replayer<I>::notify_status_updated() { + ceph_assert(ceph_mutex_is_locked_by_me(m_lock)); + + dout(10) << dendl; + auto ctx = new C_TrackedOp(m_in_flight_op_tracker, new LambdaContext( + [this](int) { + m_replayer_listener->handle_notification(); + })); + m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +bool Replayer<I>::is_replay_interrupted() { + std::unique_lock locker{m_lock}; + return is_replay_interrupted(&locker); +} + +template <typename I> +bool Replayer<I>::is_replay_interrupted(std::unique_lock<ceph::mutex>* locker) { + if (m_state == STATE_COMPLETE) { + locker->unlock(); + + dout(10) << "resuming pending shut down" << dendl; + unregister_remote_update_watcher(); + return true; + } + return false; +} + +template <typename I> +void Replayer<I>::register_perf_counters() { + dout(5) << dendl; + + ceph_assert(ceph_mutex_is_locked_by_me(m_lock)); + ceph_assert(m_perf_counters == nullptr); + + auto cct = static_cast<CephContext *>(m_state_builder->local_image_ctx->cct); + auto prio = cct->_conf.get_val<int64_t>("rbd_mirror_image_perf_stats_prio"); + + auto local_image_ctx = m_state_builder->local_image_ctx; + std::string labels = ceph::perf_counters::key_create( + "rbd_mirror_snapshot_image", + {{"pool", local_image_ctx->md_ctx.get_pool_name()}, + {"namespace", local_image_ctx->md_ctx.get_namespace()}, + {"image", local_image_ctx->name}}); + + PerfCountersBuilder plb(g_ceph_context, labels, l_rbd_mirror_snapshot_first, + l_rbd_mirror_snapshot_last); + plb.add_u64_counter(l_rbd_mirror_snapshot_snapshots, "snapshots", + "Number of snapshots synced", nullptr, prio); + plb.add_time_avg(l_rbd_mirror_snapshot_sync_time, "sync_time", + "Average sync time", nullptr, prio); + plb.add_u64_counter(l_rbd_mirror_snapshot_sync_bytes, "sync_bytes", + "Total bytes synced", nullptr, prio, unit_t(UNIT_BYTES)); + plb.add_time(l_rbd_mirror_snapshot_remote_timestamp, "remote_timestamp", + "Timestamp of the remote snapshot", nullptr, prio); + plb.add_time(l_rbd_mirror_snapshot_local_timestamp, "local_timestamp", + "Timestamp of the local snapshot", nullptr, prio); + plb.add_time(l_rbd_mirror_snapshot_last_sync_time, "last_sync_time", + "Time taken to sync the last snapshot", nullptr, prio); + plb.add_u64(l_rbd_mirror_snapshot_last_sync_bytes, "last_sync_bytes", + "Bytes synced for the last snapshot", nullptr, prio, + unit_t(UNIT_BYTES)); + + m_perf_counters = plb.create_perf_counters(); + g_ceph_context->get_perfcounters_collection()->add(m_perf_counters); +} + +template <typename I> +void Replayer<I>::unregister_perf_counters() { + dout(5) << dendl; + ceph_assert(ceph_mutex_is_locked_by_me(m_lock)); + + PerfCounters *perf_counters = nullptr; + std::swap(perf_counters, m_perf_counters); + + if (perf_counters != nullptr) { + g_ceph_context->get_perfcounters_collection()->remove(perf_counters); + delete perf_counters; + } +} + +} // namespace snapshot +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::snapshot::Replayer<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.h b/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.h new file mode 100644 index 000000000..17d45f6bc --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.h @@ -0,0 +1,349 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_REPLAYER_H +#define RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_REPLAYER_H + +#include "tools/rbd_mirror/image_replayer/Replayer.h" +#include "common/ceph_mutex.h" +#include "common/AsyncOpTracker.h" +#include "cls/rbd/cls_rbd_types.h" +#include "librbd/mirror/snapshot/Types.h" +#include "tools/rbd_mirror/image_replayer/TimeRollingMean.h" +#include <boost/accumulators/accumulators.hpp> +#include <boost/accumulators/statistics/stats.hpp> +#include <boost/accumulators/statistics/rolling_mean.hpp> +#include <string> +#include <type_traits> + +namespace librbd { + +struct ImageCtx; +namespace snapshot { template <typename I> class Replay; } + +} // namespace librbd + +namespace rbd { +namespace mirror { + +template <typename> struct InstanceWatcher; +class PoolMetaCache; +template <typename> struct Threads; + +namespace image_replayer { + +struct ReplayerListener; + +namespace snapshot { + +template <typename> class EventPreprocessor; +template <typename> class ReplayStatusFormatter; +template <typename> class StateBuilder; + +template <typename ImageCtxT> +class Replayer : public image_replayer::Replayer { +public: + static Replayer* create( + Threads<ImageCtxT>* threads, + InstanceWatcher<ImageCtxT>* instance_watcher, + const std::string& local_mirror_uuid, + PoolMetaCache* pool_meta_cache, + StateBuilder<ImageCtxT>* state_builder, + ReplayerListener* replayer_listener) { + return new Replayer(threads, instance_watcher, local_mirror_uuid, + pool_meta_cache, state_builder, replayer_listener); + } + + Replayer( + Threads<ImageCtxT>* threads, + InstanceWatcher<ImageCtxT>* instance_watcher, + const std::string& local_mirror_uuid, + PoolMetaCache* pool_meta_cache, + StateBuilder<ImageCtxT>* state_builder, + ReplayerListener* replayer_listener); + ~Replayer(); + + void destroy() override { + delete this; + } + + void init(Context* on_finish) override; + void shut_down(Context* on_finish) override; + + void flush(Context* on_finish) override; + + bool get_replay_status(std::string* description, Context* on_finish) override; + + bool is_replaying() const override { + std::unique_lock locker{m_lock}; + return (m_state == STATE_REPLAYING || m_state == STATE_IDLE); + } + + bool is_resync_requested() const override { + std::unique_lock locker{m_lock}; + return m_resync_requested; + } + + int get_error_code() const override { + std::unique_lock locker(m_lock); + return m_error_code; + } + + std::string get_error_description() const override { + std::unique_lock locker(m_lock); + return m_error_description; + } + + std::string get_image_spec() const { + std::unique_lock locker(m_lock); + return m_image_spec; + } + +private: + /** + * @verbatim + * + * <init> + * | + * v + * REGISTER_LOCAL_UPDATE_WATCHER + * | + * v + * REGISTER_REMOTE_UPDATE_WATCHER + * | + * v + * LOAD_LOCAL_IMAGE_META <----------------------------\ + * | | + * v (skip if not needed) | + * REFRESH_LOCAL_IMAGE | + * | | + * v (skip if not needed) | + * REFRESH_REMOTE_IMAGE | + * | | + * | (unused non-primary snapshot) | + * |\--------------> PRUNE_NON_PRIMARY_SNAPSHOT---/| + * | | + * | (interrupted sync) | + * |\--------------> GET_LOCAL_IMAGE_STATE ------\ | + * | | | + * | (new snapshot) | | + * |\--------------> COPY_SNAPSHOTS | | + * | | | | + * | v | | + * | GET_REMOTE_IMAGE_STATE | | + * | | | | + * | v | | + * | CREATE_NON_PRIMARY_SNAPSHOT | | + * | | | | + * | v (skip if not needed)| | + * | UPDATE_MIRROR_IMAGE_STATE | | + * | | | | + * | |/--------------------/ | + * | | | + * | v | + * | REQUEST_SYNC | + * | | | + * | v | + * | COPY_IMAGE | + * | | | + * | v | + * | APPLY_IMAGE_STATE | + * | | | + * | v | + * | UPDATE_NON_PRIMARY_SNAPSHOT | + * | | | + * | v | + * | NOTIFY_IMAGE_UPDATE | + * | | | + * | (interrupted unlink) v | + * |\--------------> UNLINK_PEER | + * | | | + * | v | + * | NOTIFY_LISTENER | + * | | | + * | \----------------------/| + * | | + * | (remote demoted) | + * \---------------> NOTIFY_LISTENER | + * | | | + * |/--------------------/ | + * | | + * | (update notification) | + * <idle> --------------------------------------------/ + * | + * v + * <shut down> + * | + * v + * UNREGISTER_REMOTE_UPDATE_WATCHER + * | + * v + * UNREGISTER_LOCAL_UPDATE_WATCHER + * | + * v + * WAIT_FOR_IN_FLIGHT_OPS + * | + * v + * <finish> + * + * @endverbatim + */ + + enum State { + STATE_INIT, + STATE_REPLAYING, + STATE_IDLE, + STATE_COMPLETE + }; + + struct C_UpdateWatchCtx; + struct DeepCopyHandler; + + Threads<ImageCtxT>* m_threads; + InstanceWatcher<ImageCtxT>* m_instance_watcher; + std::string m_local_mirror_uuid; + PoolMetaCache* m_pool_meta_cache; + StateBuilder<ImageCtxT>* m_state_builder; + ReplayerListener* m_replayer_listener; + + mutable ceph::mutex m_lock; + + State m_state = STATE_INIT; + + std::string m_image_spec; + Context* m_on_init_shutdown = nullptr; + + bool m_resync_requested = false; + int m_error_code = 0; + std::string m_error_description; + + C_UpdateWatchCtx* m_update_watch_ctx = nullptr; + uint64_t m_local_update_watcher_handle = 0; + uint64_t m_remote_update_watcher_handle = 0; + bool m_image_updated = false; + + AsyncOpTracker m_in_flight_op_tracker; + + uint64_t m_local_snap_id_start = 0; + uint64_t m_local_snap_id_end = CEPH_NOSNAP; + cls::rbd::MirrorSnapshotNamespace m_local_mirror_snap_ns; + uint64_t m_local_object_count = 0; + + std::string m_remote_mirror_peer_uuid; + uint64_t m_remote_snap_id_start = 0; + uint64_t m_remote_snap_id_end = CEPH_NOSNAP; + cls::rbd::MirrorSnapshotNamespace m_remote_mirror_snap_ns; + + librbd::mirror::snapshot::ImageState m_image_state; + DeepCopyHandler* m_deep_copy_handler = nullptr; + + TimeRollingMean m_bytes_per_second; + uint64_t m_last_snapshot_sync_seconds = 0; + + uint64_t m_snapshot_bytes = 0; + uint64_t m_last_snapshot_bytes = 0; + + boost::accumulators::accumulator_set< + uint64_t, boost::accumulators::stats< + boost::accumulators::tag::rolling_mean>> m_bytes_per_snapshot{ + boost::accumulators::tag::rolling_window::window_size = 2}; + utime_t m_snapshot_replay_start; + + uint32_t m_pending_snapshots = 0; + + bool m_remote_image_updated = false; + bool m_updating_sync_point = false; + bool m_sync_in_progress = false; + + PerfCounters *m_perf_counters = nullptr; + + void load_local_image_meta(); + void handle_load_local_image_meta(int r); + + void refresh_local_image(); + void handle_refresh_local_image(int r); + + void refresh_remote_image(); + void handle_refresh_remote_image(int r); + + void scan_local_mirror_snapshots(std::unique_lock<ceph::mutex>* locker); + void scan_remote_mirror_snapshots(std::unique_lock<ceph::mutex>* locker); + + void prune_non_primary_snapshot(uint64_t snap_id); + void handle_prune_non_primary_snapshot(int r); + + void copy_snapshots(); + void handle_copy_snapshots(int r); + + void get_remote_image_state(); + void handle_get_remote_image_state(int r); + + void get_local_image_state(); + void handle_get_local_image_state(int r); + + void create_non_primary_snapshot(); + void handle_create_non_primary_snapshot(int r); + + void update_mirror_image_state(); + void handle_update_mirror_image_state(int r); + + void request_sync(); + void handle_request_sync(int r); + + void copy_image(); + void handle_copy_image(int r); + void handle_copy_image_progress(uint64_t object_number, + uint64_t object_count); + void handle_copy_image_read(uint64_t bytes_read); + + void apply_image_state(); + void handle_apply_image_state(int r); + + void update_non_primary_snapshot(bool complete); + void handle_update_non_primary_snapshot(bool complete, int r); + + void notify_image_update(); + void handle_notify_image_update(int r); + + void unlink_peer(uint64_t remote_snap_id); + void handle_unlink_peer(int r); + + void finish_sync(); + + void register_local_update_watcher(); + void handle_register_local_update_watcher(int r); + + void register_remote_update_watcher(); + void handle_register_remote_update_watcher(int r); + + void unregister_remote_update_watcher(); + void handle_unregister_remote_update_watcher(int r); + + void unregister_local_update_watcher(); + void handle_unregister_local_update_watcher(int r); + + void wait_for_in_flight_ops(); + void handle_wait_for_in_flight_ops(int r); + + void handle_image_update_notify(); + + void handle_replay_complete(int r, const std::string& description); + void handle_replay_complete(std::unique_lock<ceph::mutex>* locker, + int r, const std::string& description); + void notify_status_updated(); + + bool is_replay_interrupted(); + bool is_replay_interrupted(std::unique_lock<ceph::mutex>* lock); + + void register_perf_counters(); + void unregister_perf_counters(); +}; + +} // namespace snapshot +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::snapshot::Replayer<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_REPLAYER_H diff --git a/src/tools/rbd_mirror/image_replayer/snapshot/StateBuilder.cc b/src/tools/rbd_mirror/image_replayer/snapshot/StateBuilder.cc new file mode 100644 index 000000000..ca3e6918b --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/snapshot/StateBuilder.cc @@ -0,0 +1,120 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "StateBuilder.h" +#include "include/ceph_assert.h" +#include "include/Context.h" +#include "common/debug.h" +#include "common/errno.h" +#include "librbd/ImageCtx.h" +#include "librbd/mirror/snapshot/ImageMeta.h" +#include "tools/rbd_mirror/image_replayer/snapshot/CreateLocalImageRequest.h" +#include "tools/rbd_mirror/image_replayer/snapshot/PrepareReplayRequest.h" +#include "tools/rbd_mirror/image_replayer/snapshot/Replayer.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::snapshot::" \ + << "StateBuilder: " << this << " " \ + << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace image_replayer { +namespace snapshot { + +template <typename I> +StateBuilder<I>::StateBuilder(const std::string& global_image_id) + : image_replayer::StateBuilder<I>(global_image_id) { +} + +template <typename I> +StateBuilder<I>::~StateBuilder() { + ceph_assert(local_image_meta == nullptr); +} + +template <typename I> +void StateBuilder<I>::close(Context* on_finish) { + dout(10) << dendl; + + delete local_image_meta; + local_image_meta = nullptr; + + // close the remote image after closing the local + // image in case the remote cluster is unreachable and + // we cannot close it. + on_finish = new LambdaContext([this, on_finish](int) { + this->close_remote_image(on_finish); + }); + this->close_local_image(on_finish); +} + +template <typename I> +bool StateBuilder<I>::is_disconnected() const { + return false; +} + +template <typename I> +bool StateBuilder<I>::is_linked_impl() const { + // the remote has to have us registered as a peer + return !remote_mirror_peer_uuid.empty(); +} + +template <typename I> +cls::rbd::MirrorImageMode StateBuilder<I>::get_mirror_image_mode() const { + return cls::rbd::MIRROR_IMAGE_MODE_SNAPSHOT; +} + +template <typename I> +image_sync::SyncPointHandler* StateBuilder<I>::create_sync_point_handler() { + dout(10) << dendl; + + // TODO + ceph_assert(false); + return nullptr; +} + +template <typename I> +BaseRequest* StateBuilder<I>::create_local_image_request( + Threads<I>* threads, + librados::IoCtx& local_io_ctx, + const std::string& global_image_id, + PoolMetaCache* pool_meta_cache, + ProgressContext* progress_ctx, + Context* on_finish) { + return CreateLocalImageRequest<I>::create( + threads, local_io_ctx, this->remote_image_ctx, global_image_id, + pool_meta_cache, progress_ctx, this, on_finish); +} + +template <typename I> +BaseRequest* StateBuilder<I>::create_prepare_replay_request( + const std::string& local_mirror_uuid, + ProgressContext* progress_ctx, + bool* resync_requested, + bool* syncing, + Context* on_finish) { + return PrepareReplayRequest<I>::create( + local_mirror_uuid, progress_ctx, this, resync_requested, syncing, + on_finish); +} + +template <typename I> +image_replayer::Replayer* StateBuilder<I>::create_replayer( + Threads<I>* threads, + InstanceWatcher<I>* instance_watcher, + const std::string& local_mirror_uuid, + PoolMetaCache* pool_meta_cache, + ReplayerListener* replayer_listener) { + return Replayer<I>::create( + threads, instance_watcher, local_mirror_uuid, pool_meta_cache, this, + replayer_listener); +} + +} // namespace snapshot +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::snapshot::StateBuilder<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/snapshot/StateBuilder.h b/src/tools/rbd_mirror/image_replayer/snapshot/StateBuilder.h new file mode 100644 index 000000000..a4ab82982 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/snapshot/StateBuilder.h @@ -0,0 +1,93 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_STATE_BUILDER_H +#define CEPH_RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_STATE_BUILDER_H + +#include "tools/rbd_mirror/image_replayer/StateBuilder.h" +#include <string> + +struct Context; + +namespace librbd { + +struct ImageCtx; + +namespace mirror { +namespace snapshot { + +template <typename> class ImageMeta; + +} // namespace snapshot +} // namespace mirror +} // namespace librbd + +namespace rbd { +namespace mirror { +namespace image_replayer { +namespace snapshot { + +template <typename> class SyncPointHandler; + +template <typename ImageCtxT> +class StateBuilder : public image_replayer::StateBuilder<ImageCtxT> { +public: + static StateBuilder* create(const std::string& global_image_id) { + return new StateBuilder(global_image_id); + } + + StateBuilder(const std::string& global_image_id); + ~StateBuilder() override; + + void close(Context* on_finish) override; + + bool is_disconnected() const override; + + cls::rbd::MirrorImageMode get_mirror_image_mode() const override; + + image_sync::SyncPointHandler* create_sync_point_handler() override; + + bool replay_requires_remote_image() const override { + return true; + } + + BaseRequest* create_local_image_request( + Threads<ImageCtxT>* threads, + librados::IoCtx& local_io_ctx, + const std::string& global_image_id, + PoolMetaCache* pool_meta_cache, + ProgressContext* progress_ctx, + Context* on_finish) override; + + BaseRequest* create_prepare_replay_request( + const std::string& local_mirror_uuid, + ProgressContext* progress_ctx, + bool* resync_requested, + bool* syncing, + Context* on_finish) override; + + image_replayer::Replayer* create_replayer( + Threads<ImageCtxT>* threads, + InstanceWatcher<ImageCtxT>* instance_watcher, + const std::string& local_mirror_uuid, + PoolMetaCache* pool_meta_cache, + ReplayerListener* replayer_listener) override; + + SyncPointHandler<ImageCtxT>* sync_point_handler = nullptr; + + std::string remote_mirror_peer_uuid; + + librbd::mirror::snapshot::ImageMeta<ImageCtxT>* local_image_meta = nullptr; + +private: + bool is_linked_impl() const override; +}; + +} // namespace snapshot +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::snapshot::StateBuilder<librbd::ImageCtx>; + +#endif // CEPH_RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_STATE_BUILDER_H diff --git a/src/tools/rbd_mirror/image_replayer/snapshot/Utils.cc b/src/tools/rbd_mirror/image_replayer/snapshot/Utils.cc new file mode 100644 index 000000000..6df95d300 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/snapshot/Utils.cc @@ -0,0 +1,65 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "Utils.h" +#include "common/debug.h" +#include "common/errno.h" +#include "cls/rbd/cls_rbd_types.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::snapshot::util::" \ + << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace image_replayer { +namespace snapshot { +namespace util { + +uint64_t compute_remote_snap_id( + const ceph::shared_mutex& local_image_lock, + const std::map<librados::snap_t, librbd::SnapInfo>& local_snap_infos, + uint64_t local_snap_id, const std::string& remote_mirror_uuid) { + ceph_assert(ceph_mutex_is_locked(local_image_lock)); + + // Search our local non-primary snapshots for a mapping to the remote + // snapshot. The non-primary mirror snapshot with the mappings will always + // come at or after the snapshot we are searching against + for (auto snap_it = local_snap_infos.lower_bound(local_snap_id); + snap_it != local_snap_infos.end(); ++snap_it) { + auto mirror_ns = std::get_if<cls::rbd::MirrorSnapshotNamespace>( + &snap_it->second.snap_namespace); + if (mirror_ns == nullptr || !mirror_ns->is_non_primary()) { + continue; + } + + if (mirror_ns->primary_mirror_uuid != remote_mirror_uuid) { + dout(20) << "local snapshot " << snap_it->first << " not tied to remote" + << dendl; + continue; + } else if (local_snap_id == snap_it->first) { + dout(15) << "local snapshot " << local_snap_id << " maps to " + << "remote snapshot " << mirror_ns->primary_snap_id << dendl; + return mirror_ns->primary_snap_id; + } + + const auto& snap_seqs = mirror_ns->snap_seqs; + for (auto [remote_snap_id_seq, local_snap_id_seq] : snap_seqs) { + if (local_snap_id_seq == local_snap_id) { + dout(15) << "local snapshot " << local_snap_id << " maps to " + << "remote snapshot " << remote_snap_id_seq << dendl; + return remote_snap_id_seq; + } + } + } + + return CEPH_NOSNAP; +} + +} // namespace util +} // namespace snapshot +} // namespace image_replayer +} // namespace mirror +} // namespace rbd diff --git a/src/tools/rbd_mirror/image_replayer/snapshot/Utils.h b/src/tools/rbd_mirror/image_replayer/snapshot/Utils.h new file mode 100644 index 000000000..8efc58685 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/snapshot/Utils.h @@ -0,0 +1,30 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_UTILS_H +#define RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_UTILS_H + +#include "include/int_types.h" +#include "include/rados/librados.hpp" +#include "common/ceph_mutex.h" +#include "librbd/Types.h" +#include <map> + +namespace rbd { +namespace mirror { +namespace image_replayer { +namespace snapshot { +namespace util { + +uint64_t compute_remote_snap_id( + const ceph::shared_mutex& local_image_lock, + const std::map<librados::snap_t, librbd::SnapInfo>& local_snap_infos, + uint64_t local_snap_id, const std::string& remote_mirror_uuid); + +} // namespace util +} // namespace snapshot +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +#endif // RBD_MIRROR_IMAGE_REPLAYER_SNAPSHOT_UTILS_H diff --git a/src/tools/rbd_mirror/image_sync/SyncPointCreateRequest.cc b/src/tools/rbd_mirror/image_sync/SyncPointCreateRequest.cc new file mode 100644 index 000000000..1bd5d77f0 --- /dev/null +++ b/src/tools/rbd_mirror/image_sync/SyncPointCreateRequest.cc @@ -0,0 +1,172 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "SyncPointCreateRequest.h" +#include "include/uuid.h" +#include "common/debug.h" +#include "common/errno.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/Operations.h" +#include "librbd/Utils.h" +#include "tools/rbd_mirror/image_sync/Types.h" +#include "tools/rbd_mirror/image_sync/Utils.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_sync::SyncPointCreateRequest: " \ + << this << " " << __func__ + +namespace rbd { +namespace mirror { +namespace image_sync { + +using librbd::util::create_context_callback; + +template <typename I> +SyncPointCreateRequest<I>::SyncPointCreateRequest( + I *remote_image_ctx, + const std::string &local_mirror_uuid, + SyncPointHandler* sync_point_handler, + Context *on_finish) + : m_remote_image_ctx(remote_image_ctx), + m_local_mirror_uuid(local_mirror_uuid), + m_sync_point_handler(sync_point_handler), + m_on_finish(on_finish) { + m_sync_points_copy = m_sync_point_handler->get_sync_points(); + ceph_assert(m_sync_points_copy.size() < 2); + + // initialize the updated client meta with the new sync point + m_sync_points_copy.emplace_back(); + if (m_sync_points_copy.size() > 1) { + m_sync_points_copy.back().from_snap_name = + m_sync_points_copy.front().snap_name; + } +} + +template <typename I> +void SyncPointCreateRequest<I>::send() { + send_update_sync_points(); +} + +template <typename I> +void SyncPointCreateRequest<I>::send_update_sync_points() { + uuid_d uuid_gen; + uuid_gen.generate_random(); + + auto& sync_point = m_sync_points_copy.back(); + sync_point.snap_name = util::get_snapshot_name_prefix(m_local_mirror_uuid) + + uuid_gen.to_string(); + + auto ctx = create_context_callback< + SyncPointCreateRequest<I>, + &SyncPointCreateRequest<I>::handle_update_sync_points>(this); + m_sync_point_handler->update_sync_points( + m_sync_point_handler->get_snap_seqs(), m_sync_points_copy, false, ctx); +} + +template <typename I> +void SyncPointCreateRequest<I>::handle_update_sync_points(int r) { + dout(20) << ": r=" << r << dendl; + + if (r < 0) { + derr << ": failed to update client data: " << cpp_strerror(r) + << dendl; + finish(r); + return; + } + + send_refresh_image(); +} + +template <typename I> +void SyncPointCreateRequest<I>::send_refresh_image() { + dout(20) << dendl; + + Context *ctx = create_context_callback< + SyncPointCreateRequest<I>, &SyncPointCreateRequest<I>::handle_refresh_image>( + this); + m_remote_image_ctx->state->refresh(ctx); +} + +template <typename I> +void SyncPointCreateRequest<I>::handle_refresh_image(int r) { + dout(20) << ": r=" << r << dendl; + + if (r < 0) { + derr << ": remote image refresh failed: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + send_create_snap(); +} + +template <typename I> +void SyncPointCreateRequest<I>::send_create_snap() { + dout(20) << dendl; + + auto& sync_point = m_sync_points_copy.back(); + + Context *ctx = create_context_callback< + SyncPointCreateRequest<I>, &SyncPointCreateRequest<I>::handle_create_snap>( + this); + m_remote_image_ctx->operations->snap_create( + cls::rbd::UserSnapshotNamespace(), sync_point.snap_name.c_str(), + librbd::SNAP_CREATE_FLAG_SKIP_NOTIFY_QUIESCE, m_prog_ctx, ctx); +} + +template <typename I> +void SyncPointCreateRequest<I>::handle_create_snap(int r) { + dout(20) << ": r=" << r << dendl; + + if (r == -EEXIST) { + send_update_sync_points(); + return; + } else if (r < 0) { + derr << ": failed to create snapshot: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + send_final_refresh_image(); +} + +template <typename I> +void SyncPointCreateRequest<I>::send_final_refresh_image() { + dout(20) << dendl; + + Context *ctx = create_context_callback< + SyncPointCreateRequest<I>, + &SyncPointCreateRequest<I>::handle_final_refresh_image>(this); + m_remote_image_ctx->state->refresh(ctx); +} + +template <typename I> +void SyncPointCreateRequest<I>::handle_final_refresh_image(int r) { + dout(20) << ": r=" << r << dendl; + + if (r < 0) { + derr << ": failed to refresh image for snapshot: " << cpp_strerror(r) + << dendl; + finish(r); + return; + } + + finish(0); +} + +template <typename I> +void SyncPointCreateRequest<I>::finish(int r) { + dout(20) << ": r=" << r << dendl; + + m_on_finish->complete(r); + delete this; +} + +} // namespace image_sync +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_sync::SyncPointCreateRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_sync/SyncPointCreateRequest.h b/src/tools/rbd_mirror/image_sync/SyncPointCreateRequest.h new file mode 100644 index 000000000..9b52b8374 --- /dev/null +++ b/src/tools/rbd_mirror/image_sync/SyncPointCreateRequest.h @@ -0,0 +1,93 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_SYNC_SYNC_POINT_CREATE_REQUEST_H +#define RBD_MIRROR_IMAGE_SYNC_SYNC_POINT_CREATE_REQUEST_H + +#include "librbd/internal.h" +#include "Types.h" +#include <string> + +class Context; +namespace journal { class Journaler; } +namespace librbd { class ImageCtx; } +namespace librbd { namespace journal { struct MirrorPeerClientMeta; } } + +namespace rbd { +namespace mirror { +namespace image_sync { + +template <typename ImageCtxT = librbd::ImageCtx> +class SyncPointCreateRequest { +public: + static SyncPointCreateRequest* create( + ImageCtxT *remote_image_ctx, + const std::string &local_mirror_uuid, + SyncPointHandler* sync_point_handler, + Context *on_finish) { + return new SyncPointCreateRequest(remote_image_ctx, local_mirror_uuid, + sync_point_handler, on_finish); + } + + SyncPointCreateRequest( + ImageCtxT *remote_image_ctx, + const std::string &local_mirror_uuid, + SyncPointHandler* sync_point_handler, + Context *on_finish); + + void send(); + +private: + /** + * @verbatim + * + * <start> + * | + * v + * UPDATE_SYNC_POINTS < . . + * | . + * v . + * REFRESH_IMAGE . + * | . (repeat on EEXIST) + * v . + * CREATE_SNAP . . . . . . + * | + * v + * REFRESH_IMAGE + * | + * v + * <finish> + * + * @endverbatim + */ + + ImageCtxT *m_remote_image_ctx; + std::string m_local_mirror_uuid; + SyncPointHandler* m_sync_point_handler; + Context *m_on_finish; + + SyncPoints m_sync_points_copy; + librbd::NoOpProgressContext m_prog_ctx; + + void send_update_sync_points(); + void handle_update_sync_points(int r); + + void send_refresh_image(); + void handle_refresh_image(int r); + + void send_create_snap(); + void handle_create_snap(int r); + + void send_final_refresh_image(); + void handle_final_refresh_image(int r); + + void finish(int r); +}; + +} // namespace image_sync +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_sync::SyncPointCreateRequest<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_SYNC_SYNC_POINT_CREATE_REQUEST_H diff --git a/src/tools/rbd_mirror/image_sync/SyncPointPruneRequest.cc b/src/tools/rbd_mirror/image_sync/SyncPointPruneRequest.cc new file mode 100644 index 000000000..d1cd32b39 --- /dev/null +++ b/src/tools/rbd_mirror/image_sync/SyncPointPruneRequest.cc @@ -0,0 +1,213 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "SyncPointPruneRequest.h" +#include "common/debug.h" +#include "common/errno.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/Operations.h" +#include "librbd/Utils.h" +#include <set> + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_sync::SyncPointPruneRequest: " \ + << this << " " << __func__ +namespace rbd { +namespace mirror { +namespace image_sync { + +using librbd::util::create_context_callback; + +template <typename I> +SyncPointPruneRequest<I>::SyncPointPruneRequest( + I *remote_image_ctx, + bool sync_complete, + SyncPointHandler* sync_point_handler, + Context *on_finish) + : m_remote_image_ctx(remote_image_ctx), + m_sync_complete(sync_complete), + m_sync_point_handler(sync_point_handler), + m_on_finish(on_finish) { + m_sync_points_copy = m_sync_point_handler->get_sync_points(); +} + +template <typename I> +void SyncPointPruneRequest<I>::send() { + if (m_sync_points_copy.empty()) { + send_remove_snap(); + return; + } + + if (m_sync_complete) { + // if sync is complete, we can remove the master sync point + auto it = m_sync_points_copy.begin(); + auto& sync_point = *it; + + ++it; + if (it == m_sync_points_copy.end() || + it->from_snap_name != sync_point.snap_name) { + m_snap_names.push_back(sync_point.snap_name); + } + + if (!sync_point.from_snap_name.empty()) { + m_snap_names.push_back(sync_point.from_snap_name); + } + } else { + // if we have more than one sync point or invalid sync points, + // trim them off + std::shared_lock image_locker{m_remote_image_ctx->image_lock}; + std::set<std::string> snap_names; + for (auto it = m_sync_points_copy.rbegin(); + it != m_sync_points_copy.rend(); ++it) { + auto& sync_point = *it; + if (&sync_point == &m_sync_points_copy.front()) { + if (m_remote_image_ctx->get_snap_id( + cls::rbd::UserSnapshotNamespace(), sync_point.snap_name) == + CEPH_NOSNAP) { + derr << ": failed to locate sync point snapshot: " + << sync_point.snap_name << dendl; + } else if (!sync_point.from_snap_name.empty()) { + derr << ": unexpected from_snap_name in primary sync point: " + << sync_point.from_snap_name << dendl; + } else { + // first sync point is OK -- keep it + break; + } + m_invalid_master_sync_point = true; + } + + if (snap_names.count(sync_point.snap_name) == 0) { + snap_names.insert(sync_point.snap_name); + m_snap_names.push_back(sync_point.snap_name); + } + + auto& front_sync_point = m_sync_points_copy.front(); + if (!sync_point.from_snap_name.empty() && + snap_names.count(sync_point.from_snap_name) == 0 && + sync_point.from_snap_name != front_sync_point.snap_name) { + snap_names.insert(sync_point.from_snap_name); + m_snap_names.push_back(sync_point.from_snap_name); + } + } + } + + send_remove_snap(); +} + +template <typename I> +void SyncPointPruneRequest<I>::send_remove_snap() { + if (m_snap_names.empty()) { + send_refresh_image(); + return; + } + + const std::string &snap_name = m_snap_names.front(); + + dout(20) << ": snap_name=" << snap_name << dendl; + + Context *ctx = create_context_callback< + SyncPointPruneRequest<I>, &SyncPointPruneRequest<I>::handle_remove_snap>( + this); + m_remote_image_ctx->operations->snap_remove(cls::rbd::UserSnapshotNamespace(), + snap_name.c_str(), + ctx); +} + +template <typename I> +void SyncPointPruneRequest<I>::handle_remove_snap(int r) { + dout(20) << ": r=" << r << dendl; + + ceph_assert(!m_snap_names.empty()); + std::string snap_name = m_snap_names.front(); + m_snap_names.pop_front(); + + if (r == -ENOENT) { + r = 0; + } + if (r < 0) { + derr << ": failed to remove snapshot '" << snap_name << "': " + << cpp_strerror(r) << dendl; + finish(r); + return; + } + + send_remove_snap(); +} + +template <typename I> +void SyncPointPruneRequest<I>::send_refresh_image() { + dout(20) << dendl; + + Context *ctx = create_context_callback< + SyncPointPruneRequest<I>, &SyncPointPruneRequest<I>::handle_refresh_image>( + this); + m_remote_image_ctx->state->refresh(ctx); +} + +template <typename I> +void SyncPointPruneRequest<I>::handle_refresh_image(int r) { + dout(20) << ": r=" << r << dendl; + + if (r < 0) { + derr << ": remote image refresh failed: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + send_update_sync_points(); +} + +template <typename I> +void SyncPointPruneRequest<I>::send_update_sync_points() { + dout(20) << dendl; + + if (m_sync_complete) { + m_sync_points_copy.pop_front(); + } else { + while (m_sync_points_copy.size() > 1) { + m_sync_points_copy.pop_back(); + } + if (m_invalid_master_sync_point) { + // all subsequent sync points would have been pruned + m_sync_points_copy.clear(); + } + } + + auto ctx = create_context_callback< + SyncPointPruneRequest<I>, + &SyncPointPruneRequest<I>::handle_update_sync_points>(this); + m_sync_point_handler->update_sync_points( + m_sync_point_handler->get_snap_seqs(), m_sync_points_copy, + m_sync_complete, ctx); +} + +template <typename I> +void SyncPointPruneRequest<I>::handle_update_sync_points(int r) { + dout(20) << ": r=" << r << dendl; + + if (r < 0) { + derr << ": failed to update client data: " << cpp_strerror(r) + << dendl; + finish(r); + return; + } + + finish(0); +} + +template <typename I> +void SyncPointPruneRequest<I>::finish(int r) { + dout(20) << ": r=" << r << dendl; + + m_on_finish->complete(r); + delete this; +} + +} // namespace image_sync +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_sync::SyncPointPruneRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_sync/SyncPointPruneRequest.h b/src/tools/rbd_mirror/image_sync/SyncPointPruneRequest.h new file mode 100644 index 000000000..08bf840b1 --- /dev/null +++ b/src/tools/rbd_mirror/image_sync/SyncPointPruneRequest.h @@ -0,0 +1,91 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_SYNC_SYNC_POINT_PRUNE_REQUEST_H +#define RBD_MIRROR_IMAGE_SYNC_SYNC_POINT_PRUNE_REQUEST_H + +#include "tools/rbd_mirror/image_sync/Types.h" +#include <list> +#include <string> + +class Context; +namespace journal { class Journaler; } +namespace librbd { class ImageCtx; } +namespace librbd { namespace journal { struct MirrorPeerClientMeta; } } + +namespace rbd { +namespace mirror { +namespace image_sync { + +template <typename ImageCtxT = librbd::ImageCtx> +class SyncPointPruneRequest { +public: + static SyncPointPruneRequest* create( + ImageCtxT *remote_image_ctx, + bool sync_complete, + SyncPointHandler* sync_point_handler, + Context *on_finish) { + return new SyncPointPruneRequest(remote_image_ctx, sync_complete, + sync_point_handler, on_finish); + } + + SyncPointPruneRequest( + ImageCtxT *remote_image_ctx, + bool sync_complete, + SyncPointHandler* sync_point_handler, + Context *on_finish); + + void send(); + +private: + /** + * @verbatim + * + * <start> + * | + * | . . . . . + * | . . + * v v . (repeat if from snap + * REMOVE_SNAP . . . unused by other sync) + * | + * v + * REFRESH_IMAGE + * | + * v + * UPDATE_CLIENT + * | + * v + * <finish> + * + * @endverbatim + */ + + ImageCtxT *m_remote_image_ctx; + bool m_sync_complete; + SyncPointHandler* m_sync_point_handler; + Context *m_on_finish; + + SyncPoints m_sync_points_copy; + std::list<std::string> m_snap_names; + + bool m_invalid_master_sync_point = false; + + void send_remove_snap(); + void handle_remove_snap(int r); + + void send_refresh_image(); + void handle_refresh_image(int r); + + void send_update_sync_points(); + void handle_update_sync_points(int r); + + void finish(int r); +}; + +} // namespace image_sync +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_sync::SyncPointPruneRequest<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_SYNC_SYNC_POINT_PRUNE_REQUEST_H diff --git a/src/tools/rbd_mirror/image_sync/Types.h b/src/tools/rbd_mirror/image_sync/Types.h new file mode 100644 index 000000000..d748dc93e --- /dev/null +++ b/src/tools/rbd_mirror/image_sync/Types.h @@ -0,0 +1,74 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_SYNC_TYPES_H +#define RBD_MIRROR_IMAGE_SYNC_TYPES_H + +#include "cls/rbd/cls_rbd_types.h" +#include "librbd/Types.h" +#include <list> +#include <string> +#include <boost/optional.hpp> + +struct Context; + +namespace rbd { +namespace mirror { +namespace image_sync { + +struct SyncPoint { + typedef boost::optional<uint64_t> ObjectNumber; + + SyncPoint() { + } + SyncPoint(const cls::rbd::SnapshotNamespace& snap_namespace, + const std::string& snap_name, + const std::string& from_snap_name, + const ObjectNumber& object_number) + : snap_namespace(snap_namespace), snap_name(snap_name), + from_snap_name(from_snap_name), object_number(object_number) { + } + + cls::rbd::SnapshotNamespace snap_namespace = + {cls::rbd::UserSnapshotNamespace{}}; + std::string snap_name; + std::string from_snap_name; + ObjectNumber object_number = boost::none; + + bool operator==(const SyncPoint& rhs) const { + return (snap_namespace == rhs.snap_namespace && + snap_name == rhs.snap_name && + from_snap_name == rhs.from_snap_name && + object_number == rhs.object_number); + } +}; + +typedef std::list<SyncPoint> SyncPoints; + +struct SyncPointHandler { +public: + SyncPointHandler(const SyncPointHandler&) = delete; + SyncPointHandler& operator=(const SyncPointHandler&) = delete; + + virtual ~SyncPointHandler() {} + virtual void destroy() { + delete this; + } + + virtual SyncPoints get_sync_points() const = 0; + virtual librbd::SnapSeqs get_snap_seqs() const = 0; + + virtual void update_sync_points(const librbd::SnapSeqs& snap_seq, + const SyncPoints& sync_points, + bool sync_complete, + Context* on_finish) = 0; + +protected: + SyncPointHandler() {} +}; + +} // namespace image_sync +} // namespace mirror +} // namespace rbd + +#endif // RBD_MIRROR_IMAGE_SYNC_TYPES_H diff --git a/src/tools/rbd_mirror/image_sync/Utils.cc b/src/tools/rbd_mirror/image_sync/Utils.cc new file mode 100644 index 000000000..6a3eae72d --- /dev/null +++ b/src/tools/rbd_mirror/image_sync/Utils.cc @@ -0,0 +1,24 @@ +// -*- mode:c++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "Utils.h" + +namespace rbd { +namespace mirror { +namespace image_sync { +namespace util { + +namespace { + +static const std::string SNAP_NAME_PREFIX(".rbd-mirror"); + +} // anonymous namespace + +std::string get_snapshot_name_prefix(const std::string& local_mirror_uuid) { + return SNAP_NAME_PREFIX + "." + local_mirror_uuid + "."; +} + +} // namespace util +} // namespace image_sync +} // namespace mirror +} // namespace rbd diff --git a/src/tools/rbd_mirror/image_sync/Utils.h b/src/tools/rbd_mirror/image_sync/Utils.h new file mode 100644 index 000000000..139699daa --- /dev/null +++ b/src/tools/rbd_mirror/image_sync/Utils.h @@ -0,0 +1,16 @@ +// -*- mode:c++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include <string> + +namespace rbd { +namespace mirror { +namespace image_sync { +namespace util { + +std::string get_snapshot_name_prefix(const std::string& local_mirror_uuid); + +} // namespace util +} // namespace image_sync +} // namespace mirror +} // namespace rbd diff --git a/src/tools/rbd_mirror/instance_watcher/Types.cc b/src/tools/rbd_mirror/instance_watcher/Types.cc new file mode 100644 index 000000000..0e9922733 --- /dev/null +++ b/src/tools/rbd_mirror/instance_watcher/Types.cc @@ -0,0 +1,245 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "Types.h" +#include "include/ceph_assert.h" +#include "include/stringify.h" +#include "common/Formatter.h" + +namespace rbd { +namespace mirror { +namespace instance_watcher { + +namespace { + +class EncodePayloadVisitor : public boost::static_visitor<void> { +public: + explicit EncodePayloadVisitor(bufferlist &bl) : m_bl(bl) {} + + template <typename Payload> + inline void operator()(const Payload &payload) const { + using ceph::encode; + encode(static_cast<uint32_t>(Payload::NOTIFY_OP), m_bl); + payload.encode(m_bl); + } + +private: + bufferlist &m_bl; +}; + +class DecodePayloadVisitor : public boost::static_visitor<void> { +public: + DecodePayloadVisitor(__u8 version, bufferlist::const_iterator &iter) + : m_version(version), m_iter(iter) {} + + template <typename Payload> + inline void operator()(Payload &payload) const { + payload.decode(m_version, m_iter); + } + +private: + __u8 m_version; + bufferlist::const_iterator &m_iter; +}; + +class DumpPayloadVisitor : public boost::static_visitor<void> { +public: + explicit DumpPayloadVisitor(Formatter *formatter) : m_formatter(formatter) {} + + template <typename Payload> + inline void operator()(const Payload &payload) const { + NotifyOp notify_op = Payload::NOTIFY_OP; + m_formatter->dump_string("notify_op", stringify(notify_op)); + payload.dump(m_formatter); + } + +private: + ceph::Formatter *m_formatter; +}; + +} // anonymous namespace + +void PayloadBase::encode(bufferlist &bl) const { + using ceph::encode; + encode(request_id, bl); +} + +void PayloadBase::decode(__u8 version, bufferlist::const_iterator &iter) { + using ceph::decode; + decode(request_id, iter); +} + +void PayloadBase::dump(Formatter *f) const { + f->dump_unsigned("request_id", request_id); +} + +void ImagePayloadBase::encode(bufferlist &bl) const { + using ceph::encode; + PayloadBase::encode(bl); + encode(global_image_id, bl); +} + +void ImagePayloadBase::decode(__u8 version, bufferlist::const_iterator &iter) { + using ceph::decode; + PayloadBase::decode(version, iter); + decode(global_image_id, iter); +} + +void ImagePayloadBase::dump(Formatter *f) const { + PayloadBase::dump(f); + f->dump_string("global_image_id", global_image_id); +} + +void PeerImageRemovedPayload::encode(bufferlist &bl) const { + using ceph::encode; + PayloadBase::encode(bl); + encode(global_image_id, bl); + encode(peer_mirror_uuid, bl); +} + +void PeerImageRemovedPayload::decode(__u8 version, bufferlist::const_iterator &iter) { + using ceph::decode; + PayloadBase::decode(version, iter); + decode(global_image_id, iter); + decode(peer_mirror_uuid, iter); +} + +void PeerImageRemovedPayload::dump(Formatter *f) const { + PayloadBase::dump(f); + f->dump_string("global_image_id", global_image_id); + f->dump_string("peer_mirror_uuid", peer_mirror_uuid); +} + +void SyncPayloadBase::encode(bufferlist &bl) const { + using ceph::encode; + PayloadBase::encode(bl); + encode(sync_id, bl); +} + +void SyncPayloadBase::decode(__u8 version, bufferlist::const_iterator &iter) { + using ceph::decode; + PayloadBase::decode(version, iter); + decode(sync_id, iter); +} + +void SyncPayloadBase::dump(Formatter *f) const { + PayloadBase::dump(f); + f->dump_string("sync_id", sync_id); +} + +void UnknownPayload::encode(bufferlist &bl) const { + ceph_abort(); +} + +void UnknownPayload::decode(__u8 version, bufferlist::const_iterator &iter) { +} + +void UnknownPayload::dump(Formatter *f) const { +} + +void NotifyMessage::encode(bufferlist& bl) const { + ENCODE_START(2, 2, bl); + boost::apply_visitor(EncodePayloadVisitor(bl), payload); + ENCODE_FINISH(bl); +} + +void NotifyMessage::decode(bufferlist::const_iterator& iter) { + DECODE_START(2, iter); + + uint32_t notify_op; + decode(notify_op, iter); + + // select the correct payload variant based upon the encoded op + switch (notify_op) { + case NOTIFY_OP_IMAGE_ACQUIRE: + payload = ImageAcquirePayload(); + break; + case NOTIFY_OP_IMAGE_RELEASE: + payload = ImageReleasePayload(); + break; + case NOTIFY_OP_PEER_IMAGE_REMOVED: + payload = PeerImageRemovedPayload(); + break; + case NOTIFY_OP_SYNC_REQUEST: + payload = SyncRequestPayload(); + break; + case NOTIFY_OP_SYNC_START: + payload = SyncStartPayload(); + break; + default: + payload = UnknownPayload(); + break; + } + + apply_visitor(DecodePayloadVisitor(struct_v, iter), payload); + DECODE_FINISH(iter); +} + +void NotifyMessage::dump(Formatter *f) const { + apply_visitor(DumpPayloadVisitor(f), payload); +} + +void NotifyMessage::generate_test_instances(std::list<NotifyMessage *> &o) { + o.push_back(new NotifyMessage(ImageAcquirePayload())); + o.push_back(new NotifyMessage(ImageAcquirePayload(1, "gid"))); + + o.push_back(new NotifyMessage(ImageReleasePayload())); + o.push_back(new NotifyMessage(ImageReleasePayload(1, "gid"))); + + o.push_back(new NotifyMessage(PeerImageRemovedPayload())); + o.push_back(new NotifyMessage(PeerImageRemovedPayload(1, "gid", "uuid"))); + + o.push_back(new NotifyMessage(SyncRequestPayload())); + o.push_back(new NotifyMessage(SyncRequestPayload(1, "sync_id"))); + + o.push_back(new NotifyMessage(SyncStartPayload())); + o.push_back(new NotifyMessage(SyncStartPayload(1, "sync_id"))); +} + +std::ostream &operator<<(std::ostream &out, const NotifyOp &op) { + switch (op) { + case NOTIFY_OP_IMAGE_ACQUIRE: + out << "ImageAcquire"; + break; + case NOTIFY_OP_IMAGE_RELEASE: + out << "ImageRelease"; + break; + case NOTIFY_OP_PEER_IMAGE_REMOVED: + out << "PeerImageRemoved"; + break; + case NOTIFY_OP_SYNC_REQUEST: + out << "SyncRequest"; + break; + case NOTIFY_OP_SYNC_START: + out << "SyncStart"; + break; + default: + out << "Unknown (" << static_cast<uint32_t>(op) << ")"; + break; + } + return out; +} + +void NotifyAckPayload::encode(bufferlist &bl) const { + using ceph::encode; + encode(instance_id, bl); + encode(request_id, bl); + encode(ret_val, bl); +} + +void NotifyAckPayload::decode(bufferlist::const_iterator &iter) { + using ceph::decode; + decode(instance_id, iter); + decode(request_id, iter); + decode(ret_val, iter); +} + +void NotifyAckPayload::dump(Formatter *f) const { + f->dump_string("instance_id", instance_id); + f->dump_unsigned("request_id", request_id); + f->dump_int("request_id", ret_val); +} + +} // namespace instance_watcher +} // namespace mirror +} // namespace rbd diff --git a/src/tools/rbd_mirror/instance_watcher/Types.h b/src/tools/rbd_mirror/instance_watcher/Types.h new file mode 100644 index 000000000..b0b7b7791 --- /dev/null +++ b/src/tools/rbd_mirror/instance_watcher/Types.h @@ -0,0 +1,197 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_INSTANCE_WATCHER_TYPES_H +#define RBD_MIRROR_INSTANCE_WATCHER_TYPES_H + +#include <string> +#include <set> +#include <boost/variant.hpp> + +#include "include/buffer_fwd.h" +#include "include/encoding.h" +#include "include/int_types.h" + +namespace ceph { class Formatter; } + +namespace rbd { +namespace mirror { +namespace instance_watcher { + +enum NotifyOp { + NOTIFY_OP_IMAGE_ACQUIRE = 0, + NOTIFY_OP_IMAGE_RELEASE = 1, + NOTIFY_OP_PEER_IMAGE_REMOVED = 2, + NOTIFY_OP_SYNC_REQUEST = 3, + NOTIFY_OP_SYNC_START = 4 +}; + +struct PayloadBase { + uint64_t request_id; + + PayloadBase() : request_id(0) { + } + + PayloadBase(uint64_t request_id) : request_id(request_id) { + } + + void encode(bufferlist &bl) const; + void decode(__u8 version, bufferlist::const_iterator &iter); + void dump(Formatter *f) const; +}; + +struct ImagePayloadBase : public PayloadBase { + std::string global_image_id; + + ImagePayloadBase() : PayloadBase() { + } + + ImagePayloadBase(uint64_t request_id, const std::string &global_image_id) + : PayloadBase(request_id), global_image_id(global_image_id) { + } + + void encode(bufferlist &bl) const; + void decode(__u8 version, bufferlist::const_iterator &iter); + void dump(Formatter *f) const; +}; + +struct ImageAcquirePayload : public ImagePayloadBase { + static const NotifyOp NOTIFY_OP = NOTIFY_OP_IMAGE_ACQUIRE; + + ImageAcquirePayload() { + } + ImageAcquirePayload(uint64_t request_id, const std::string &global_image_id) + : ImagePayloadBase(request_id, global_image_id) { + } +}; + +struct ImageReleasePayload : public ImagePayloadBase { + static const NotifyOp NOTIFY_OP = NOTIFY_OP_IMAGE_RELEASE; + + ImageReleasePayload() { + } + ImageReleasePayload(uint64_t request_id, const std::string &global_image_id) + : ImagePayloadBase(request_id, global_image_id) { + } +}; + +struct PeerImageRemovedPayload : public PayloadBase { + static const NotifyOp NOTIFY_OP = NOTIFY_OP_PEER_IMAGE_REMOVED; + + std::string global_image_id; + std::string peer_mirror_uuid; + + PeerImageRemovedPayload() { + } + PeerImageRemovedPayload(uint64_t request_id, + const std::string& global_image_id, + const std::string& peer_mirror_uuid) + : PayloadBase(request_id), + global_image_id(global_image_id), peer_mirror_uuid(peer_mirror_uuid) { + } + + void encode(bufferlist &bl) const; + void decode(__u8 version, bufferlist::const_iterator &iter); + void dump(Formatter *f) const; +}; + +struct SyncPayloadBase : public PayloadBase { + std::string sync_id; + + SyncPayloadBase() : PayloadBase() { + } + + SyncPayloadBase(uint64_t request_id, const std::string &sync_id) + : PayloadBase(request_id), sync_id(sync_id) { + } + + void encode(bufferlist &bl) const; + void decode(__u8 version, bufferlist::const_iterator &iter); + void dump(Formatter *f) const; +}; + +struct SyncRequestPayload : public SyncPayloadBase { + static const NotifyOp NOTIFY_OP = NOTIFY_OP_SYNC_REQUEST; + + SyncRequestPayload() : SyncPayloadBase() { + } + + SyncRequestPayload(uint64_t request_id, const std::string &sync_id) + : SyncPayloadBase(request_id, sync_id) { + } +}; + +struct SyncStartPayload : public SyncPayloadBase { + static const NotifyOp NOTIFY_OP = NOTIFY_OP_SYNC_START; + + SyncStartPayload() : SyncPayloadBase() { + } + + SyncStartPayload(uint64_t request_id, const std::string &sync_id) + : SyncPayloadBase(request_id, sync_id) { + } +}; + +struct UnknownPayload { + static const NotifyOp NOTIFY_OP = static_cast<NotifyOp>(-1); + + UnknownPayload() { + } + + void encode(bufferlist &bl) const; + void decode(__u8 version, bufferlist::const_iterator &iter); + void dump(Formatter *f) const; +}; + +typedef boost::variant<ImageAcquirePayload, + ImageReleasePayload, + PeerImageRemovedPayload, + SyncRequestPayload, + SyncStartPayload, + UnknownPayload> Payload; + +struct NotifyMessage { + NotifyMessage(const Payload &payload = UnknownPayload()) : payload(payload) { + } + + Payload payload; + + void encode(bufferlist& bl) const; + void decode(bufferlist::const_iterator& it); + void dump(Formatter *f) const; + + static void generate_test_instances(std::list<NotifyMessage *> &o); +}; + +WRITE_CLASS_ENCODER(NotifyMessage); + +std::ostream &operator<<(std::ostream &out, const NotifyOp &op); + +struct NotifyAckPayload { + std::string instance_id; + uint64_t request_id; + int ret_val; + + NotifyAckPayload() : request_id(0), ret_val(0) { + } + + NotifyAckPayload(const std::string &instance_id, uint64_t request_id, + int ret_val) + : instance_id(instance_id), request_id(request_id), ret_val(ret_val) { + } + + void encode(bufferlist &bl) const; + void decode(bufferlist::const_iterator& it); + void dump(Formatter *f) const; +}; + +WRITE_CLASS_ENCODER(NotifyAckPayload); + +} // namespace instance_watcher +} // namespace mirror +} // namespace librbd + +using rbd::mirror::instance_watcher::encode; +using rbd::mirror::instance_watcher::decode; + +#endif // RBD_MIRROR_INSTANCE_WATCHER_TYPES_H diff --git a/src/tools/rbd_mirror/instances/Types.h b/src/tools/rbd_mirror/instances/Types.h new file mode 100644 index 000000000..8b0a68fc3 --- /dev/null +++ b/src/tools/rbd_mirror/instances/Types.h @@ -0,0 +1,28 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_INSTANCES_TYPES_H +#define CEPH_RBD_MIRROR_INSTANCES_TYPES_H + +#include <string> +#include <vector> + +namespace rbd { +namespace mirror { +namespace instances { + +struct Listener { + typedef std::vector<std::string> InstanceIds; + + virtual ~Listener() { + } + + virtual void handle_added(const InstanceIds& instance_ids) = 0; + virtual void handle_removed(const InstanceIds& instance_ids) = 0; +}; + +} // namespace instances +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_INSTANCES_TYPES_H diff --git a/src/tools/rbd_mirror/leader_watcher/Types.cc b/src/tools/rbd_mirror/leader_watcher/Types.cc new file mode 100644 index 000000000..d2fb7908f --- /dev/null +++ b/src/tools/rbd_mirror/leader_watcher/Types.cc @@ -0,0 +1,161 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "Types.h" +#include "include/ceph_assert.h" +#include "include/stringify.h" +#include "common/Formatter.h" + +namespace rbd { +namespace mirror { +namespace leader_watcher { + +namespace { + +class EncodePayloadVisitor : public boost::static_visitor<void> { +public: + explicit EncodePayloadVisitor(bufferlist &bl) : m_bl(bl) {} + + template <typename Payload> + inline void operator()(const Payload &payload) const { + using ceph::encode; + encode(static_cast<uint32_t>(Payload::NOTIFY_OP), m_bl); + payload.encode(m_bl); + } + +private: + bufferlist &m_bl; +}; + +class DecodePayloadVisitor : public boost::static_visitor<void> { +public: + DecodePayloadVisitor(__u8 version, bufferlist::const_iterator &iter) + : m_version(version), m_iter(iter) {} + + template <typename Payload> + inline void operator()(Payload &payload) const { + payload.decode(m_version, m_iter); + } + +private: + __u8 m_version; + bufferlist::const_iterator &m_iter; +}; + +class DumpPayloadVisitor : public boost::static_visitor<void> { +public: + explicit DumpPayloadVisitor(Formatter *formatter) : m_formatter(formatter) {} + + template <typename Payload> + inline void operator()(const Payload &payload) const { + NotifyOp notify_op = Payload::NOTIFY_OP; + m_formatter->dump_string("notify_op", stringify(notify_op)); + payload.dump(m_formatter); + } + +private: + ceph::Formatter *m_formatter; +}; + +} // anonymous namespace + +void HeartbeatPayload::encode(bufferlist &bl) const { +} + +void HeartbeatPayload::decode(__u8 version, bufferlist::const_iterator &iter) { +} + +void HeartbeatPayload::dump(Formatter *f) const { +} + +void LockAcquiredPayload::encode(bufferlist &bl) const { +} + +void LockAcquiredPayload::decode(__u8 version, bufferlist::const_iterator &iter) { +} + +void LockAcquiredPayload::dump(Formatter *f) const { +} + +void LockReleasedPayload::encode(bufferlist &bl) const { +} + +void LockReleasedPayload::decode(__u8 version, bufferlist::const_iterator &iter) { +} + +void LockReleasedPayload::dump(Formatter *f) const { +} + +void UnknownPayload::encode(bufferlist &bl) const { + ceph_abort(); +} + +void UnknownPayload::decode(__u8 version, bufferlist::const_iterator &iter) { +} + +void UnknownPayload::dump(Formatter *f) const { +} + +void NotifyMessage::encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + boost::apply_visitor(EncodePayloadVisitor(bl), payload); + ENCODE_FINISH(bl); +} + +void NotifyMessage::decode(bufferlist::const_iterator& iter) { + DECODE_START(1, iter); + + uint32_t notify_op; + decode(notify_op, iter); + + // select the correct payload variant based upon the encoded op + switch (notify_op) { + case NOTIFY_OP_HEARTBEAT: + payload = HeartbeatPayload(); + break; + case NOTIFY_OP_LOCK_ACQUIRED: + payload = LockAcquiredPayload(); + break; + case NOTIFY_OP_LOCK_RELEASED: + payload = LockReleasedPayload(); + break; + default: + payload = UnknownPayload(); + break; + } + + apply_visitor(DecodePayloadVisitor(struct_v, iter), payload); + DECODE_FINISH(iter); +} + +void NotifyMessage::dump(Formatter *f) const { + apply_visitor(DumpPayloadVisitor(f), payload); +} + +void NotifyMessage::generate_test_instances(std::list<NotifyMessage *> &o) { + o.push_back(new NotifyMessage(HeartbeatPayload())); + o.push_back(new NotifyMessage(LockAcquiredPayload())); + o.push_back(new NotifyMessage(LockReleasedPayload())); +} + +std::ostream &operator<<(std::ostream &out, const NotifyOp &op) { + switch (op) { + case NOTIFY_OP_HEARTBEAT: + out << "Heartbeat"; + break; + case NOTIFY_OP_LOCK_ACQUIRED: + out << "LockAcquired"; + break; + case NOTIFY_OP_LOCK_RELEASED: + out << "LockReleased"; + break; + default: + out << "Unknown (" << static_cast<uint32_t>(op) << ")"; + break; + } + return out; +} + +} // namespace leader_watcher +} // namespace mirror +} // namespace librbd diff --git a/src/tools/rbd_mirror/leader_watcher/Types.h b/src/tools/rbd_mirror/leader_watcher/Types.h new file mode 100644 index 000000000..1278e54b7 --- /dev/null +++ b/src/tools/rbd_mirror/leader_watcher/Types.h @@ -0,0 +1,117 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_LEADER_WATCHER_TYPES_H +#define RBD_MIRROR_LEADER_WATCHER_TYPES_H + +#include "include/int_types.h" +#include "include/buffer_fwd.h" +#include "include/encoding.h" +#include <string> +#include <vector> +#include <boost/variant.hpp> + +struct Context; + +namespace ceph { class Formatter; } + +namespace rbd { +namespace mirror { +namespace leader_watcher { + +struct Listener { + typedef std::vector<std::string> InstanceIds; + + virtual ~Listener() { + } + + virtual void post_acquire_handler(Context *on_finish) = 0; + virtual void pre_release_handler(Context *on_finish) = 0; + + virtual void update_leader_handler( + const std::string &leader_instance_id) = 0; + + virtual void handle_instances_added(const InstanceIds& instance_ids) = 0; + virtual void handle_instances_removed(const InstanceIds& instance_ids) = 0; +}; + +enum NotifyOp { + NOTIFY_OP_HEARTBEAT = 0, + NOTIFY_OP_LOCK_ACQUIRED = 1, + NOTIFY_OP_LOCK_RELEASED = 2, +}; + +struct HeartbeatPayload { + static const NotifyOp NOTIFY_OP = NOTIFY_OP_HEARTBEAT; + + HeartbeatPayload() { + } + + void encode(bufferlist &bl) const; + void decode(__u8 version, bufferlist::const_iterator &iter); + void dump(Formatter *f) const; +}; + +struct LockAcquiredPayload { + static const NotifyOp NOTIFY_OP = NOTIFY_OP_LOCK_ACQUIRED; + + LockAcquiredPayload() { + } + + void encode(bufferlist &bl) const; + void decode(__u8 version, bufferlist::const_iterator &iter); + void dump(Formatter *f) const; +}; + +struct LockReleasedPayload { + static const NotifyOp NOTIFY_OP = NOTIFY_OP_LOCK_RELEASED; + + LockReleasedPayload() { + } + + void encode(bufferlist &bl) const; + void decode(__u8 version, bufferlist::const_iterator &iter); + void dump(Formatter *f) const; +}; + +struct UnknownPayload { + static const NotifyOp NOTIFY_OP = static_cast<NotifyOp>(-1); + + UnknownPayload() { + } + + void encode(bufferlist &bl) const; + void decode(__u8 version, bufferlist::const_iterator &iter); + void dump(Formatter *f) const; +}; + +typedef boost::variant<HeartbeatPayload, + LockAcquiredPayload, + LockReleasedPayload, + UnknownPayload> Payload; + +struct NotifyMessage { + NotifyMessage(const Payload &payload = UnknownPayload()) : payload(payload) { + } + + Payload payload; + + void encode(bufferlist& bl) const; + void decode(bufferlist::const_iterator& it); + void dump(Formatter *f) const; + + static void generate_test_instances(std::list<NotifyMessage *> &o); +}; + +WRITE_CLASS_ENCODER(NotifyMessage); + +std::ostream &operator<<(std::ostream &out, const NotifyOp &op); + +} // namespace leader_watcher +} // namespace mirror +} // namespace librbd + +using rbd::mirror::leader_watcher::encode; +using rbd::mirror::leader_watcher::decode; + +#endif // RBD_MIRROR_LEADER_WATCHER_TYPES_H diff --git a/src/tools/rbd_mirror/main.cc b/src/tools/rbd_mirror/main.cc new file mode 100644 index 000000000..85e95e6b6 --- /dev/null +++ b/src/tools/rbd_mirror/main.cc @@ -0,0 +1,124 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "common/ceph_argparse.h" +#include "common/config.h" +#include "common/debug.h" +#include "common/errno.h" +#include "common/perf_counters.h" +#include "global/global_init.h" +#include "global/signal_handler.h" +#include "Mirror.h" +#include "Types.h" + +#include <vector> + +rbd::mirror::Mirror *mirror = nullptr; +PerfCounters *g_journal_perf_counters = nullptr; +PerfCounters *g_snapshot_perf_counters = nullptr; + +void usage() { + std::cout << "usage: rbd-mirror [options...]" << std::endl; + std::cout << "options:\n"; + std::cout << " -m monaddress[:port] connect to specified monitor\n"; + std::cout << " --keyring=<path> path to keyring for local cluster\n"; + std::cout << " --log-file=<logfile> file to log debug output\n"; + std::cout << " --debug-rbd-mirror=<log-level>/<memory-level> set rbd-mirror debug level\n"; + generic_server_usage(); +} + +static void handle_signal(int signum) +{ + if (mirror) + mirror->handle_signal(signum); +} + +int main(int argc, const char **argv) +{ + auto args = argv_to_vec(argc, argv); + if (args.empty()) { + std::cerr << argv[0] << ": -h or --help for usage" << std::endl; + exit(1); + } + if (ceph_argparse_need_usage(args)) { + usage(); + exit(0); + } + + auto cct = global_init(nullptr, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_DAEMON, + CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS); + + if (g_conf()->daemonize) { + global_init_daemonize(g_ceph_context); + } + + common_init_finish(g_ceph_context); + + init_async_signal_handler(); + register_async_signal_handler(SIGHUP, handle_signal); + register_async_signal_handler_oneshot(SIGINT, handle_signal); + register_async_signal_handler_oneshot(SIGTERM, handle_signal); + + auto cmd_args = argv_to_vec(argc, argv); + + // disable unnecessary librbd cache + g_ceph_context->_conf.set_val_or_die("rbd_cache", "false"); + + auto prio = + g_ceph_context->_conf.get_val<int64_t>("rbd_mirror_perf_stats_prio"); + { + PerfCountersBuilder plb(g_ceph_context, "rbd_mirror_journal", + rbd::mirror::l_rbd_mirror_journal_first, + rbd::mirror::l_rbd_mirror_journal_last); + plb.add_u64_counter(rbd::mirror::l_rbd_mirror_journal_entries, "entries", + "Number of entries replayed", nullptr, prio); + plb.add_u64_counter(rbd::mirror::l_rbd_mirror_journal_replay_bytes, + "replay_bytes", "Total bytes replayed", nullptr, prio, + unit_t(UNIT_BYTES)); + plb.add_time_avg(rbd::mirror::l_rbd_mirror_journal_replay_latency, + "replay_latency", "Replay latency", nullptr, prio); + g_journal_perf_counters = plb.create_perf_counters(); + } + { + PerfCountersBuilder plb( + g_ceph_context, "rbd_mirror_snapshot", + rbd::mirror::l_rbd_mirror_snapshot_first, + rbd::mirror::l_rbd_mirror_snapshot_remote_timestamp); + plb.add_u64_counter(rbd::mirror::l_rbd_mirror_snapshot_snapshots, + "snapshots", "Number of snapshots synced", nullptr, + prio); + plb.add_time_avg(rbd::mirror::l_rbd_mirror_snapshot_sync_time, "sync_time", + "Average sync time", nullptr, prio); + plb.add_u64_counter(rbd::mirror::l_rbd_mirror_snapshot_sync_bytes, + "sync_bytes", "Total bytes synced", nullptr, prio, + unit_t(UNIT_BYTES)); + g_snapshot_perf_counters = plb.create_perf_counters(); + } + g_ceph_context->get_perfcounters_collection()->add(g_journal_perf_counters); + g_ceph_context->get_perfcounters_collection()->add(g_snapshot_perf_counters); + + mirror = new rbd::mirror::Mirror(g_ceph_context, cmd_args); + int r = mirror->init(); + if (r < 0) { + std::cerr << "failed to initialize: " << cpp_strerror(r) << std::endl; + goto cleanup; + } + + mirror->run(); + + cleanup: + unregister_async_signal_handler(SIGHUP, handle_signal); + unregister_async_signal_handler(SIGINT, handle_signal); + unregister_async_signal_handler(SIGTERM, handle_signal); + shutdown_async_signal_handler(); + + g_ceph_context->get_perfcounters_collection()->remove(g_journal_perf_counters); + g_ceph_context->get_perfcounters_collection()->remove(g_snapshot_perf_counters); + + delete mirror; + delete g_journal_perf_counters; + delete g_snapshot_perf_counters; + + return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS; +} diff --git a/src/tools/rbd_mirror/pool_watcher/RefreshImagesRequest.cc b/src/tools/rbd_mirror/pool_watcher/RefreshImagesRequest.cc new file mode 100644 index 000000000..a1d9c1b54 --- /dev/null +++ b/src/tools/rbd_mirror/pool_watcher/RefreshImagesRequest.cc @@ -0,0 +1,89 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/pool_watcher/RefreshImagesRequest.h" +#include "common/debug.h" +#include "common/errno.h" +#include "cls/rbd/cls_rbd_client.h" +#include "librbd/Utils.h" +#include <map> + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::pool_watcher::RefreshImagesRequest " \ + << this << " " << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace pool_watcher { + +static const uint32_t MAX_RETURN = 1024; + +using librbd::util::create_rados_callback; + +template <typename I> +void RefreshImagesRequest<I>::send() { + m_image_ids->clear(); + mirror_image_list(); +} + +template <typename I> +void RefreshImagesRequest<I>::mirror_image_list() { + dout(10) << dendl; + + librados::ObjectReadOperation op; + librbd::cls_client::mirror_image_list_start(&op, m_start_after, MAX_RETURN); + + m_out_bl.clear(); + librados::AioCompletion *aio_comp = create_rados_callback< + RefreshImagesRequest<I>, + &RefreshImagesRequest<I>::handle_mirror_image_list>(this); + int r = m_remote_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op, &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void RefreshImagesRequest<I>::handle_mirror_image_list(int r) { + dout(10) << "r=" << r << dendl; + + std::map<std::string, std::string> ids; + if (r == 0) { + auto it = m_out_bl.cbegin(); + r = librbd::cls_client::mirror_image_list_finish(&it, &ids); + } + + if (r < 0 && r != -ENOENT) { + derr << "failed to list mirrored images: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + // store as global -> local image ids + for (auto &id : ids) { + m_image_ids->emplace(id.second, id.first); + } + + if (ids.size() == MAX_RETURN) { + m_start_after = ids.rbegin()->first; + mirror_image_list(); + return; + } + + finish(0); +} + +template <typename I> +void RefreshImagesRequest<I>::finish(int r) { + dout(10) << "r=" << r << dendl; + + m_on_finish->complete(r); + delete this; +} + +} // namespace pool_watcher +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::pool_watcher::RefreshImagesRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/pool_watcher/RefreshImagesRequest.h b/src/tools/rbd_mirror/pool_watcher/RefreshImagesRequest.h new file mode 100644 index 000000000..8bfeabe29 --- /dev/null +++ b/src/tools/rbd_mirror/pool_watcher/RefreshImagesRequest.h @@ -0,0 +1,73 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_POOL_WATCHER_REFRESH_IMAGES_REQUEST_H +#define CEPH_RBD_MIRROR_POOL_WATCHER_REFRESH_IMAGES_REQUEST_H + +#include "include/buffer.h" +#include "include/rados/librados.hpp" +#include "tools/rbd_mirror/Types.h" +#include <string> + +struct Context; + +namespace librbd { struct ImageCtx; } + +namespace rbd { +namespace mirror { +namespace pool_watcher { + +template <typename ImageCtxT = librbd::ImageCtx> +class RefreshImagesRequest { +public: + static RefreshImagesRequest *create(librados::IoCtx &remote_io_ctx, + ImageIds *image_ids, Context *on_finish) { + return new RefreshImagesRequest(remote_io_ctx, image_ids, on_finish); + } + + RefreshImagesRequest(librados::IoCtx &remote_io_ctx, ImageIds *image_ids, + Context *on_finish) + : m_remote_io_ctx(remote_io_ctx), m_image_ids(image_ids), + m_on_finish(on_finish) { + } + + void send(); + +private: + /** + * @verbatim + * + * <start> + * | + * | /-------------\ + * | | | + * v v | (more images) + * MIRROR_IMAGE_LIST ---/ + * | + * v + * <finish> + * + * @endverbatim + */ + + librados::IoCtx &m_remote_io_ctx; + ImageIds *m_image_ids; + Context *m_on_finish; + + bufferlist m_out_bl; + std::string m_start_after; + + void mirror_image_list(); + void handle_mirror_image_list(int r); + + void finish(int r); + +}; + +} // namespace pool_watcher +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::pool_watcher::RefreshImagesRequest<librbd::ImageCtx>; + +#endif // CEPH_RBD_MIRROR_POOL_WATCHER_REFRESH_IMAGES_REQUEST_H diff --git a/src/tools/rbd_mirror/pool_watcher/Types.h b/src/tools/rbd_mirror/pool_watcher/Types.h new file mode 100644 index 000000000..52dfc342d --- /dev/null +++ b/src/tools/rbd_mirror/pool_watcher/Types.h @@ -0,0 +1,27 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_POOL_WATCHER_TYPES_H +#define CEPH_RBD_MIRROR_POOL_WATCHER_TYPES_H + +#include "tools/rbd_mirror/Types.h" +#include <string> + +namespace rbd { +namespace mirror { +namespace pool_watcher { + +struct Listener { + virtual ~Listener() { + } + + virtual void handle_update(const std::string &mirror_uuid, + ImageIds &&added_image_ids, + ImageIds &&removed_image_ids) = 0; +}; + +} // namespace pool_watcher +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_POOL_WATCHER_TYPES_H diff --git a/src/tools/rbd_mirror/service_daemon/Types.cc b/src/tools/rbd_mirror/service_daemon/Types.cc new file mode 100644 index 000000000..7dc6537c5 --- /dev/null +++ b/src/tools/rbd_mirror/service_daemon/Types.cc @@ -0,0 +1,29 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/service_daemon/Types.h" +#include <iostream> + +namespace rbd { +namespace mirror { +namespace service_daemon { + +std::ostream& operator<<(std::ostream& os, const CalloutLevel& callout_level) { + switch (callout_level) { + case CALLOUT_LEVEL_INFO: + os << "info"; + break; + case CALLOUT_LEVEL_WARNING: + os << "warning"; + break; + case CALLOUT_LEVEL_ERROR: + os << "error"; + break; + } + return os; +} + +} // namespace service_daemon +} // namespace mirror +} // namespace rbd + diff --git a/src/tools/rbd_mirror/service_daemon/Types.h b/src/tools/rbd_mirror/service_daemon/Types.h new file mode 100644 index 000000000..3aab72016 --- /dev/null +++ b/src/tools/rbd_mirror/service_daemon/Types.h @@ -0,0 +1,33 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_SERVICE_DAEMON_TYPES_H +#define CEPH_RBD_MIRROR_SERVICE_DAEMON_TYPES_H + +#include "include/int_types.h" +#include <iosfwd> +#include <string> +#include <boost/variant.hpp> + +namespace rbd { +namespace mirror { +namespace service_daemon { + +typedef uint64_t CalloutId; +const uint64_t CALLOUT_ID_NONE {0}; + +enum CalloutLevel { + CALLOUT_LEVEL_INFO, + CALLOUT_LEVEL_WARNING, + CALLOUT_LEVEL_ERROR +}; + +std::ostream& operator<<(std::ostream& os, const CalloutLevel& callout_level); + +typedef boost::variant<bool, uint64_t, std::string> AttributeValue; + +} // namespace service_daemon +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_SERVICE_DAEMON_TYPES_H diff --git a/src/tools/rbd_nbd/CMakeLists.txt b/src/tools/rbd_nbd/CMakeLists.txt new file mode 100644 index 000000000..da758f514 --- /dev/null +++ b/src/tools/rbd_nbd/CMakeLists.txt @@ -0,0 +1,4 @@ +find_package(nl REQUIRED genl) +add_executable(rbd-nbd rbd-nbd.cc) +target_link_libraries(rbd-nbd librbd librados global nl::genl) +install(TARGETS rbd-nbd DESTINATION bin) diff --git a/src/tools/rbd_nbd/nbd-netlink.h b/src/tools/rbd_nbd/nbd-netlink.h new file mode 100644 index 000000000..2d0b90964 --- /dev/null +++ b/src/tools/rbd_nbd/nbd-netlink.h @@ -0,0 +1,100 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copyright (C) 2017 Facebook. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ +#ifndef _UAPILINUX_NBD_NETLINK_H +#define _UAPILINUX_NBD_NETLINK_H + +#define NBD_GENL_FAMILY_NAME "nbd" +#define NBD_GENL_VERSION 0x1 +#define NBD_GENL_MCAST_GROUP_NAME "nbd_mc_group" + +/* Configuration policy attributes, used for CONNECT */ +enum { + NBD_ATTR_UNSPEC, + NBD_ATTR_INDEX, + NBD_ATTR_SIZE_BYTES, + NBD_ATTR_BLOCK_SIZE_BYTES, + NBD_ATTR_TIMEOUT, + NBD_ATTR_SERVER_FLAGS, + NBD_ATTR_CLIENT_FLAGS, + NBD_ATTR_SOCKETS, + NBD_ATTR_DEAD_CONN_TIMEOUT, + NBD_ATTR_DEVICE_LIST, + NBD_ATTR_BACKEND_IDENTIFIER, + __NBD_ATTR_MAX, +}; +#define NBD_ATTR_MAX (__NBD_ATTR_MAX - 1) + +/* + * This is the format for multiple devices with NBD_ATTR_DEVICE_LIST + * + * [NBD_ATTR_DEVICE_LIST] + * [NBD_DEVICE_ITEM] + * [NBD_DEVICE_INDEX] + * [NBD_DEVICE_CONNECTED] + */ +enum { + NBD_DEVICE_ITEM_UNSPEC, + NBD_DEVICE_ITEM, + __NBD_DEVICE_ITEM_MAX, +}; +#define NBD_DEVICE_ITEM_MAX (__NBD_DEVICE_ITEM_MAX - 1) + +enum { + NBD_DEVICE_UNSPEC, + NBD_DEVICE_INDEX, + NBD_DEVICE_CONNECTED, + __NBD_DEVICE_MAX, +}; +#define NBD_DEVICE_ATTR_MAX (__NBD_DEVICE_MAX - 1) + +/* + * This is the format for multiple sockets with NBD_ATTR_SOCKETS + * + * [NBD_ATTR_SOCKETS] + * [NBD_SOCK_ITEM] + * [NBD_SOCK_FD] + * [NBD_SOCK_ITEM] + * [NBD_SOCK_FD] + */ +enum { + NBD_SOCK_ITEM_UNSPEC, + NBD_SOCK_ITEM, + __NBD_SOCK_ITEM_MAX, +}; +#define NBD_SOCK_ITEM_MAX (__NBD_SOCK_ITEM_MAX - 1) + +enum { + NBD_SOCK_UNSPEC, + NBD_SOCK_FD, + __NBD_SOCK_MAX, +}; +#define NBD_SOCK_MAX (__NBD_SOCK_MAX - 1) + +enum { + NBD_CMD_UNSPEC, + NBD_CMD_CONNECT, + NBD_CMD_DISCONNECT, + NBD_CMD_RECONFIGURE, + NBD_CMD_LINK_DEAD, + NBD_CMD_STATUS, + __NBD_CMD_MAX, +}; +#define NBD_CMD_MAX (__NBD_CMD_MAX - 1) + +#endif /* _UAPILINUX_NBD_NETLINK_H */ diff --git a/src/tools/rbd_nbd/rbd-nbd.cc b/src/tools/rbd_nbd/rbd-nbd.cc new file mode 100644 index 000000000..e348bd8fe --- /dev/null +++ b/src/tools/rbd_nbd/rbd-nbd.cc @@ -0,0 +1,2441 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +/* + * rbd-nbd - RBD in userspace + * + * Copyright (C) 2015 - 2016 Kylin Corporation + * + * Author: Yunchuan Wen <yunchuan.wen@kylin-cloud.com> + * Li Wang <li.wang@kylin-cloud.com> + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * +*/ + +#include "acconfig.h" +#include "include/int_types.h" +#include "include/scope_guard.h" + +#include <boost/endian/conversion.hpp> + +#include <libgen.h> +#include <stdio.h> +#include <stdlib.h> +#include <stddef.h> +#include <errno.h> +#include <fcntl.h> +#include <poll.h> +#include <string.h> +#include <sys/types.h> +#include <unistd.h> + +#include <linux/nbd.h> +#include <linux/fs.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/syscall.h> + +#include "nbd-netlink.h" +#include <libnl3/netlink/genl/genl.h> +#include <libnl3/netlink/genl/ctrl.h> +#include <libnl3/netlink/genl/mngt.h> + +#include <filesystem> +#include <fstream> +#include <iostream> +#include <memory> +#include <regex> +#include <boost/algorithm/string/predicate.hpp> +#include <boost/lexical_cast.hpp> + +#include "common/Formatter.h" +#include "common/Preforker.h" +#include "common/SubProcess.h" +#include "common/TextTable.h" +#include "common/ceph_argparse.h" +#include "common/config.h" +#include "common/dout.h" +#include "common/errno.h" +#include "common/event_socket.h" +#include "common/module.h" +#include "common/safe_io.h" +#include "common/version.h" + +#include "global/global_init.h" +#include "global/signal_handler.h" + +#include "include/rados/librados.hpp" +#include "include/rbd/librbd.hpp" +#include "include/stringify.h" +#include "include/xlist.h" + +#include "mon/MonClient.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "rbd-nbd: " + +using namespace std; +namespace fs = std::filesystem; + +using boost::endian::big_to_native; +using boost::endian::native_to_big; + +enum Command { + None, + Map, + Unmap, + Attach, + Detach, + List +}; + +struct Config { + int nbds_max = 0; + int max_part = 255; + int io_timeout = -1; + int reattach_timeout = 30; + + bool exclusive = false; + bool notrim = false; + bool quiesce = false; + bool readonly = false; + bool set_max_part = false; + bool try_netlink = false; + bool show_cookie = false; + + std::string poolname; + std::string nsname; + std::string imgname; + std::string snapname; + std::string devpath; + std::string quiesce_hook = CMAKE_INSTALL_LIBEXECDIR "/rbd-nbd/rbd-nbd_quiesce"; + + std::string format; + bool pretty_format = false; + + std::vector<librbd::encryption_format_t> encryption_formats; + std::vector<std::string> encryption_passphrase_files; + + Command command = None; + int pid = 0; + std::string cookie; + uint64_t snapid = CEPH_NOSNAP; + + std::string image_spec() const { + std::string spec = poolname + "/"; + + if (!nsname.empty()) { + spec += nsname + "/"; + } + spec += imgname; + + if (!snapname.empty()) { + spec += "@" + snapname; + } + + return spec; + } +}; + +static void usage() +{ + std::cout << "Usage: rbd-nbd [options] map <image-or-snap-spec> Map image to nbd device\n" + << " detach <device|image-or-snap-spec> Detach image from nbd device\n" + << " [options] attach <image-or-snap-spec> Attach image to nbd device\n" + << " unmap <device|image-or-snap-spec> Unmap nbd device\n" + << " [options] list-mapped List mapped nbd devices\n" + << "Map and attach options:\n" + << " --device <device path> Specify nbd device path (/dev/nbd{num})\n" + << " --encryption-format luks|luks1|luks2\n" + << " Image encryption format (default: luks)\n" + << " --encryption-passphrase-file Path of file containing passphrase for unlocking image encryption\n" + << " --exclusive Forbid writes by other clients\n" + << " --notrim Turn off trim/discard\n" + << " --io-timeout <sec> Set nbd IO timeout\n" + << " --max_part <limit> Override for module param max_part\n" + << " --nbds_max <limit> Override for module param nbds_max\n" + << " --quiesce Use quiesce callbacks\n" + << " --quiesce-hook <path> Specify quiesce hook path\n" + << " (default: " << Config().quiesce_hook << ")\n" + << " --read-only Map read-only\n" + << " --reattach-timeout <sec> Set nbd re-attach timeout\n" + << " (default: " << Config().reattach_timeout << ")\n" + << " --try-netlink Use the nbd netlink interface\n" + << " --show-cookie Show device cookie\n" + << " --cookie Specify device cookie\n" + << " --snap-id <snap-id> Specify snapshot by ID instead of by name\n" + << "\n" + << "Unmap and detach options:\n" + << " --device <device path> Specify nbd device path (/dev/nbd{num})\n" + << " --snap-id <snap-id> Specify snapshot by ID instead of by name\n" + << "\n" + << "List options:\n" + << " --format plain|json|xml Output format (default: plain)\n" + << " --pretty-format Pretty formatting (json and xml)\n" + << std::endl; + generic_server_usage(); +} + +static int nbd = -1; +static int nbd_index = -1; +static EventSocket terminate_event_sock; + +#define RBD_NBD_BLKSIZE 512UL + +#define HELP_INFO 1 +#define VERSION_INFO 2 + +static int parse_args(vector<const char*>& args, std::ostream *err_msg, + Config *cfg); +static int netlink_disconnect(int index); +static int netlink_resize(int nbd_index, uint64_t size); + +static int run_quiesce_hook(const std::string &quiesce_hook, + const std::string &devpath, + const std::string &command); + +static std::string get_cookie(const std::string &devpath); + +class NBDServer +{ +public: + uint64_t quiesce_watch_handle = 0; + +private: + int fd; + librbd::Image ℑ + Config *cfg; + +public: + NBDServer(int fd, librbd::Image& image, Config *cfg) + : fd(fd) + , image(image) + , cfg(cfg) + , reader_thread(*this, &NBDServer::reader_entry) + , writer_thread(*this, &NBDServer::writer_entry) + , quiesce_thread(*this, &NBDServer::quiesce_entry) + { + std::vector<librbd::config_option_t> options; + image.config_list(&options); + for (auto &option : options) { + if ((option.name == std::string("rbd_cache") || + option.name == std::string("rbd_cache_writethrough_until_flush")) && + option.value == "false") { + allow_internal_flush = true; + break; + } + } + } + + Config *get_cfg() const { + return cfg; + } + +private: + int terminate_event_fd = -1; + ceph::mutex disconnect_lock = + ceph::make_mutex("NBDServer::DisconnectLocker"); + ceph::condition_variable disconnect_cond; + std::atomic<bool> terminated = { false }; + std::atomic<bool> allow_internal_flush = { false }; + + struct IOContext + { + xlist<IOContext*>::item item; + NBDServer *server = nullptr; + struct nbd_request request; + struct nbd_reply reply; + bufferlist data; + int command = 0; + + IOContext() + : item(this) + {} + }; + + friend std::ostream &operator<<(std::ostream &os, const IOContext &ctx); + + ceph::mutex lock = ceph::make_mutex("NBDServer::Locker"); + ceph::condition_variable cond; + xlist<IOContext*> io_pending; + xlist<IOContext*> io_finished; + + void io_start(IOContext *ctx) + { + std::lock_guard l{lock}; + io_pending.push_back(&ctx->item); + } + + void io_finish(IOContext *ctx) + { + std::lock_guard l{lock}; + ceph_assert(ctx->item.is_on_list()); + ctx->item.remove_myself(); + io_finished.push_back(&ctx->item); + cond.notify_all(); + } + + IOContext *wait_io_finish() + { + std::unique_lock l{lock}; + cond.wait(l, [this] { + return !io_finished.empty() || + (io_pending.empty() && terminated); + }); + + if (io_finished.empty()) + return NULL; + + IOContext *ret = io_finished.front(); + io_finished.pop_front(); + + return ret; + } + + void wait_clean() + { + std::unique_lock l{lock}; + cond.wait(l, [this] { return io_pending.empty(); }); + + while(!io_finished.empty()) { + std::unique_ptr<IOContext> free_ctx(io_finished.front()); + io_finished.pop_front(); + } + } + + void assert_clean() + { + std::unique_lock l{lock}; + + ceph_assert(!reader_thread.is_started()); + ceph_assert(!writer_thread.is_started()); + ceph_assert(io_pending.empty()); + ceph_assert(io_finished.empty()); + } + + static void aio_callback(librbd::completion_t cb, void *arg) + { + librbd::RBD::AioCompletion *aio_completion = + reinterpret_cast<librbd::RBD::AioCompletion*>(cb); + + IOContext *ctx = reinterpret_cast<IOContext *>(arg); + int ret = aio_completion->get_return_value(); + + dout(20) << __func__ << ": " << *ctx << dendl; + + if (ret == -EINVAL) { + // if shrinking an image, a pagecache writeback might reference + // extents outside of the range of the new image extents + dout(0) << __func__ << ": masking IO out-of-bounds error" << dendl; + ctx->data.clear(); + ret = 0; + } + + if (ret < 0) { + ctx->reply.error = native_to_big<uint32_t>(-ret); + } else if ((ctx->command == NBD_CMD_READ) && + ret < static_cast<int>(ctx->request.len)) { + int pad_byte_count = static_cast<int> (ctx->request.len) - ret; + ctx->data.append_zero(pad_byte_count); + dout(20) << __func__ << ": " << *ctx << ": Pad byte count: " + << pad_byte_count << dendl; + ctx->reply.error = native_to_big<uint32_t>(0); + } else { + ctx->reply.error = native_to_big<uint32_t>(0); + } + ctx->server->io_finish(ctx); + + aio_completion->release(); + } + + void reader_entry() + { + struct pollfd poll_fds[2]; + memset(poll_fds, 0, sizeof(struct pollfd) * 2); + poll_fds[0].fd = fd; + poll_fds[0].events = POLLIN; + poll_fds[1].fd = terminate_event_fd; + poll_fds[1].events = POLLIN; + + while (true) { + std::unique_ptr<IOContext> ctx(new IOContext()); + ctx->server = this; + + dout(20) << __func__ << ": waiting for nbd request" << dendl; + + int r = poll(poll_fds, 2, -1); + if (r == -1) { + if (errno == EINTR) { + continue; + } + r = -errno; + derr << "failed to poll nbd: " << cpp_strerror(r) << dendl; + goto error; + } + + if ((poll_fds[1].revents & POLLIN) != 0) { + dout(0) << __func__ << ": terminate received" << dendl; + goto signal; + } + + if ((poll_fds[0].revents & POLLIN) == 0) { + dout(20) << __func__ << ": nothing to read" << dendl; + continue; + } + + r = safe_read_exact(fd, &ctx->request, sizeof(struct nbd_request)); + if (r < 0) { + derr << "failed to read nbd request header: " << cpp_strerror(r) + << dendl; + goto error; + } + + if (ctx->request.magic != htonl(NBD_REQUEST_MAGIC)) { + derr << "invalid nbd request header" << dendl; + goto signal; + } + + ctx->request.from = big_to_native(ctx->request.from); + ctx->request.type = big_to_native(ctx->request.type); + ctx->request.len = big_to_native(ctx->request.len); + + ctx->reply.magic = native_to_big<uint32_t>(NBD_REPLY_MAGIC); + memcpy(ctx->reply.handle, ctx->request.handle, sizeof(ctx->reply.handle)); + + ctx->command = ctx->request.type & 0x0000ffff; + + dout(20) << *ctx << ": start" << dendl; + + switch (ctx->command) + { + case NBD_CMD_DISC: + // NBD_DO_IT will return when pipe is closed + dout(0) << "disconnect request received" << dendl; + goto signal; + case NBD_CMD_WRITE: + bufferptr ptr(ctx->request.len); + r = safe_read_exact(fd, ptr.c_str(), ctx->request.len); + if (r < 0) { + derr << *ctx << ": failed to read nbd request data: " + << cpp_strerror(r) << dendl; + goto error; + } + ctx->data.push_back(ptr); + break; + } + + IOContext *pctx = ctx.release(); + io_start(pctx); + librbd::RBD::AioCompletion *c = new librbd::RBD::AioCompletion(pctx, aio_callback); + switch (pctx->command) + { + case NBD_CMD_WRITE: + image.aio_write(pctx->request.from, pctx->request.len, pctx->data, c); + break; + case NBD_CMD_READ: + image.aio_read(pctx->request.from, pctx->request.len, pctx->data, c); + break; + case NBD_CMD_FLUSH: + image.aio_flush(c); + allow_internal_flush = true; + break; + case NBD_CMD_TRIM: + image.aio_discard(pctx->request.from, pctx->request.len, c); + break; + default: + derr << *pctx << ": invalid request command" << dendl; + c->release(); + goto signal; + } + } +error: + { + int r = netlink_disconnect(nbd_index); + if (r == 1) { + ioctl(nbd, NBD_DISCONNECT); + } + } +signal: + std::lock_guard l{lock}; + terminated = true; + cond.notify_all(); + + std::lock_guard disconnect_l{disconnect_lock}; + disconnect_cond.notify_all(); + + dout(20) << __func__ << ": terminated" << dendl; + } + + void writer_entry() + { + while (true) { + dout(20) << __func__ << ": waiting for io request" << dendl; + std::unique_ptr<IOContext> ctx(wait_io_finish()); + if (!ctx) { + dout(20) << __func__ << ": no io requests, terminating" << dendl; + goto done; + } + + dout(20) << __func__ << ": got: " << *ctx << dendl; + + int r = safe_write(fd, &ctx->reply, sizeof(struct nbd_reply)); + if (r < 0) { + derr << *ctx << ": failed to write reply header: " << cpp_strerror(r) + << dendl; + goto error; + } + if (ctx->command == NBD_CMD_READ && ctx->reply.error == htonl(0)) { + r = ctx->data.write_fd(fd); + if (r < 0) { + derr << *ctx << ": failed to write replay data: " << cpp_strerror(r) + << dendl; + goto error; + } + } + dout(20) << *ctx << ": finish" << dendl; + } + error: + wait_clean(); + done: + ::shutdown(fd, SHUT_RDWR); + + dout(20) << __func__ << ": terminated" << dendl; + } + + bool wait_quiesce() { + dout(20) << __func__ << dendl; + + std::unique_lock locker{lock}; + cond.wait(locker, [this] { return quiesce || terminated; }); + + if (terminated) { + return false; + } + + dout(20) << __func__ << ": got quiesce request" << dendl; + return true; + } + + void wait_unquiesce(std::unique_lock<ceph::mutex> &locker) { + dout(20) << __func__ << dendl; + + cond.wait(locker, [this] { return !quiesce || terminated; }); + + dout(20) << __func__ << ": got unquiesce request" << dendl; + } + + void wait_inflight_io() { + if (!allow_internal_flush) { + return; + } + + uint64_t features = 0; + image.features(&features); + if ((features & RBD_FEATURE_EXCLUSIVE_LOCK) != 0) { + bool is_owner = false; + image.is_exclusive_lock_owner(&is_owner); + if (!is_owner) { + return; + } + } + + dout(20) << __func__ << dendl; + + int r = image.flush(); + if (r < 0) { + derr << "flush failed: " << cpp_strerror(r) << dendl; + } + } + + void quiesce_entry() + { + ceph_assert(cfg->quiesce); + + while (wait_quiesce()) { + + int r = run_quiesce_hook(cfg->quiesce_hook, cfg->devpath, "quiesce"); + + wait_inflight_io(); + + { + std::unique_lock locker{lock}; + ceph_assert(quiesce == true); + + image.quiesce_complete(quiesce_watch_handle, r); + + if (r < 0) { + quiesce = false; + continue; + } + + wait_unquiesce(locker); + } + + run_quiesce_hook(cfg->quiesce_hook, cfg->devpath, "unquiesce"); + } + + dout(20) << __func__ << ": terminated" << dendl; + } + + class ThreadHelper : public Thread + { + public: + typedef void (NBDServer::*entry_func)(); + private: + NBDServer &server; + entry_func func; + public: + ThreadHelper(NBDServer &_server, entry_func _func) + :server(_server) + ,func(_func) + {} + protected: + void* entry() override + { + (server.*func)(); + return NULL; + } + } reader_thread, writer_thread, quiesce_thread; + + bool started = false; + bool quiesce = false; + +public: + void start() + { + if (!started) { + dout(10) << __func__ << ": starting" << dendl; + + started = true; + + terminate_event_fd = eventfd(0, EFD_NONBLOCK); + ceph_assert(terminate_event_fd > 0); + int r = terminate_event_sock.init(terminate_event_fd, + EVENT_SOCKET_TYPE_EVENTFD); + ceph_assert(r >= 0); + + reader_thread.create("rbd_reader"); + writer_thread.create("rbd_writer"); + if (cfg->quiesce) { + quiesce_thread.create("rbd_quiesce"); + } + } + } + + void wait_for_disconnect() + { + if (!started) + return; + + std::unique_lock l{disconnect_lock}; + disconnect_cond.wait(l); + } + + void notify_quiesce() { + dout(10) << __func__ << dendl; + + ceph_assert(cfg->quiesce); + + std::unique_lock locker{lock}; + ceph_assert(quiesce == false); + quiesce = true; + cond.notify_all(); + } + + void notify_unquiesce() { + dout(10) << __func__ << dendl; + + ceph_assert(cfg->quiesce); + + std::unique_lock locker{lock}; + ceph_assert(quiesce == true); + quiesce = false; + cond.notify_all(); + } + + ~NBDServer() + { + if (started) { + dout(10) << __func__ << ": terminating" << dendl; + + terminate_event_sock.notify(); + + reader_thread.join(); + writer_thread.join(); + if (cfg->quiesce) { + quiesce_thread.join(); + } + + assert_clean(); + + close(terminate_event_fd); + started = false; + } + } +}; + +std::ostream &operator<<(std::ostream &os, const NBDServer::IOContext &ctx) { + + os << "[" << std::hex << big_to_native(*((uint64_t *)ctx.request.handle)); + + switch (ctx.command) + { + case NBD_CMD_WRITE: + os << " WRITE "; + break; + case NBD_CMD_READ: + os << " READ "; + break; + case NBD_CMD_FLUSH: + os << " FLUSH "; + break; + case NBD_CMD_TRIM: + os << " TRIM "; + break; + case NBD_CMD_DISC: + os << " DISC "; + break; + default: + os << " UNKNOWN(" << ctx.command << ") "; + break; + } + + os << ctx.request.from << "~" << ctx.request.len << " " + << std::dec << big_to_native(ctx.reply.error) << "]"; + + return os; +} + +class NBDQuiesceWatchCtx : public librbd::QuiesceWatchCtx +{ +public: + NBDQuiesceWatchCtx(NBDServer *server) : server(server) { + } + + void handle_quiesce() override { + server->notify_quiesce(); + } + + void handle_unquiesce() override { + server->notify_unquiesce(); + } + +private: + NBDServer *server; +}; + +class NBDWatchCtx : public librbd::UpdateWatchCtx +{ +private: + int fd; + int nbd_index; + bool use_netlink; + librados::IoCtx &io_ctx; + librbd::Image ℑ + uint64_t size; + std::thread handle_notify_thread; + ceph::condition_variable cond; + ceph::mutex lock = ceph::make_mutex("NBDWatchCtx::Locker"); + bool notify = false; + bool terminated = false; + + bool wait_notify() { + dout(10) << __func__ << dendl; + + std::unique_lock locker{lock}; + cond.wait(locker, [this] { return notify || terminated; }); + + if (terminated) { + return false; + } + + dout(10) << __func__ << ": got notify request" << dendl; + notify = false; + return true; + } + + void handle_notify_entry() { + dout(10) << __func__ << dendl; + + while (wait_notify()) { + uint64_t new_size; + int ret = image.size(&new_size); + if (ret < 0) { + derr << "getting image size failed: " << cpp_strerror(ret) << dendl; + continue; + } + if (new_size == size) { + continue; + } + dout(5) << "resize detected" << dendl; + if (ioctl(fd, BLKFLSBUF, NULL) < 0) { + derr << "invalidate page cache failed: " << cpp_strerror(errno) + << dendl; + } + if (use_netlink) { + ret = netlink_resize(nbd_index, new_size); + } else { + ret = ioctl(fd, NBD_SET_SIZE, new_size); + if (ret < 0) { + derr << "resize failed: " << cpp_strerror(errno) << dendl; + } + } + if (!ret) { + size = new_size; + } + if (ioctl(fd, BLKRRPART, NULL) < 0) { + derr << "rescan of partition table failed: " << cpp_strerror(errno) + << dendl; + } + if (image.invalidate_cache() < 0) { + derr << "invalidate rbd cache failed" << dendl; + } + } + } + +public: + NBDWatchCtx(int _fd, + int _nbd_index, + bool _use_netlink, + librados::IoCtx &_io_ctx, + librbd::Image &_image, + unsigned long _size) + : fd(_fd) + , nbd_index(_nbd_index) + , use_netlink(_use_netlink) + , io_ctx(_io_ctx) + , image(_image) + , size(_size) + { + handle_notify_thread = make_named_thread("rbd_handle_notify", + &NBDWatchCtx::handle_notify_entry, + this); + } + + ~NBDWatchCtx() override + { + dout(10) << __func__ << ": terminating" << dendl; + std::unique_lock locker{lock}; + terminated = true; + cond.notify_all(); + locker.unlock(); + + handle_notify_thread.join(); + dout(10) << __func__ << ": finish" << dendl; + } + + void handle_notify() override + { + dout(10) << __func__ << dendl; + + std::unique_lock locker{lock}; + notify = true; + cond.notify_all(); + } +}; + +class NBDListIterator { +public: + bool get(Config *cfg) { + while (true) { + std::string nbd_path = "/sys/block/nbd" + stringify(m_index); + if(access(nbd_path.c_str(), F_OK) != 0) { + return false; + } + + *cfg = Config(); + cfg->devpath = "/dev/nbd" + stringify(m_index++); + + int pid; + std::ifstream ifs; + ifs.open(nbd_path + "/pid", std::ifstream::in); + if (!ifs.is_open()) { + continue; + } + ifs >> pid; + ifs.close(); + + // If the rbd-nbd is re-attached the pid may store garbage + // here. We are sure this is the case when it is negative or + // zero. Then we just try to find the attached process scanning + // /proc fs. If it is positive we check the process with this + // pid first and if it is not rbd-nbd fallback to searching the + // attached process. + do { + if (pid <= 0) { + pid = find_attached(cfg->devpath); + if (pid <= 0) { + break; + } + } + + if (get_mapped_info(pid, cfg) >= 0) { + return true; + } + pid = -1; + } while (true); + } + } + +private: + int m_index = 0; + std::map<int, Config> m_mapped_info_cache; + + int get_mapped_info(int pid, Config *cfg) { + ceph_assert(!cfg->devpath.empty()); + + auto it = m_mapped_info_cache.find(pid); + if (it != m_mapped_info_cache.end()) { + if (it->second.devpath != cfg->devpath) { + return -EINVAL; + } + *cfg = it->second; + return 0; + } + + m_mapped_info_cache[pid] = {}; + + int r; + std::string path = "/proc/" + stringify(pid) + "/comm"; + std::ifstream ifs; + std::string comm; + ifs.open(path.c_str(), std::ifstream::in); + if (!ifs.is_open()) + return -1; + ifs >> comm; + if (comm != "rbd-nbd") { + return -EINVAL; + } + ifs.close(); + + path = "/proc/" + stringify(pid) + "/cmdline"; + std::string cmdline; + std::vector<const char*> args; + + ifs.open(path.c_str(), std::ifstream::in); + if (!ifs.is_open()) + return -1; + ifs >> cmdline; + + if (cmdline.empty()) { + return -EINVAL; + } + + for (unsigned i = 0; i < cmdline.size(); i++) { + char *arg = &cmdline[i]; + if (i == 0) { + if (strcmp(basename(arg) , "rbd-nbd") != 0) { + return -EINVAL; + } + } else { + args.push_back(arg); + } + + while (cmdline[i] != '\0') { + i++; + } + } + + std::ostringstream err_msg; + Config c; + r = parse_args(args, &err_msg, &c); + if (r < 0) { + return r; + } + + if (c.command != Map && c.command != Attach) { + return -ENOENT; + } + + c.pid = pid; + m_mapped_info_cache.erase(pid); + if (!c.devpath.empty()) { + m_mapped_info_cache[pid] = c; + if (c.devpath != cfg->devpath) { + return -ENOENT; + } + } else { + c.devpath = cfg->devpath; + } + + c.cookie = get_cookie(cfg->devpath); + *cfg = c; + return 0; + } + + int find_attached(const std::string &devpath) { + for (auto &entry : fs::directory_iterator("/proc")) { + if (!fs::is_directory(entry.status())) { + continue; + } + + int pid; + try { + pid = boost::lexical_cast<uint64_t>(entry.path().filename().c_str()); + } catch (boost::bad_lexical_cast&) { + continue; + } + + Config cfg; + cfg.devpath = devpath; + if (get_mapped_info(pid, &cfg) >=0 && cfg.command == Attach) { + return cfg.pid; + } + } + + return -1; + } +}; + +struct EncryptionOptions { + std::vector<librbd::encryption_spec_t> specs; + + ~EncryptionOptions() { + for (auto& spec : specs) { + switch (spec.format) { + case RBD_ENCRYPTION_FORMAT_LUKS: { + auto opts = + static_cast<librbd::encryption_luks_format_options_t*>(spec.opts); + ceph_memzero_s(opts->passphrase.data(), opts->passphrase.size(), + opts->passphrase.size()); + delete opts; + break; + } + case RBD_ENCRYPTION_FORMAT_LUKS1: { + auto opts = + static_cast<librbd::encryption_luks1_format_options_t*>(spec.opts); + ceph_memzero_s(opts->passphrase.data(), opts->passphrase.size(), + opts->passphrase.size()); + delete opts; + break; + } + case RBD_ENCRYPTION_FORMAT_LUKS2: { + auto opts = + static_cast<librbd::encryption_luks2_format_options_t*>(spec.opts); + ceph_memzero_s(opts->passphrase.data(), opts->passphrase.size(), + opts->passphrase.size()); + delete opts; + break; + } + default: + ceph_abort(); + } + } + } +}; + +static std::string get_cookie(const std::string &devpath) +{ + std::string cookie; + std::ifstream ifs; + std::string path = "/sys/block/" + devpath.substr(sizeof("/dev/") - 1) + "/backend"; + + ifs.open(path, std::ifstream::in); + if (ifs.is_open()) { + std::getline(ifs, cookie); + ifs.close(); + } + return cookie; +} + +static int load_module(Config *cfg) +{ + ostringstream param; + int ret; + + if (cfg->nbds_max) + param << "nbds_max=" << cfg->nbds_max; + + if (cfg->max_part) + param << " max_part=" << cfg->max_part; + + if (!access("/sys/module/nbd", F_OK)) { + if (cfg->nbds_max || cfg->set_max_part) + cerr << "rbd-nbd: ignoring kernel module parameter options: nbd module already loaded" + << std::endl; + return 0; + } + + ret = module_load("nbd", param.str().c_str()); + if (ret < 0) + cerr << "rbd-nbd: failed to load nbd kernel module: " << cpp_strerror(-ret) + << std::endl; + + return ret; +} + +static int check_device_size(int nbd_index, unsigned long expected_size) +{ + // There are bugs with some older kernel versions that result in an + // overflow for large image sizes. This check is to ensure we are + // not affected. + + unsigned long size = 0; + std::string path = "/sys/block/nbd" + stringify(nbd_index) + "/size"; + std::ifstream ifs; + ifs.open(path.c_str(), std::ifstream::in); + if (!ifs.is_open()) { + cerr << "rbd-nbd: failed to open " << path << std::endl; + return -EINVAL; + } + ifs >> size; + size *= RBD_NBD_BLKSIZE; + + if (size == 0) { + // Newer kernel versions will report real size only after nbd + // connect. Assume this is the case and return success. + return 0; + } + + if (size != expected_size) { + cerr << "rbd-nbd: kernel reported invalid device size (" << size + << ", expected " << expected_size << ")" << std::endl; + return -EINVAL; + } + + return 0; +} + +static int parse_nbd_index(const std::string& devpath) +{ + int index, ret; + + ret = sscanf(devpath.c_str(), "/dev/nbd%d", &index); + if (ret <= 0) { + // mean an early matching failure. But some cases need a negative value. + if (ret == 0) + ret = -EINVAL; + cerr << "rbd-nbd: invalid device path: " << devpath + << " (expected /dev/nbd{num})" << std::endl; + return ret; + } + + return index; +} + +static int try_ioctl_setup(Config *cfg, int fd, uint64_t size, + uint64_t blksize, uint64_t flags) +{ + int index = 0, r; + + if (cfg->devpath.empty()) { + char dev[64]; + const char *path = "/sys/module/nbd/parameters/nbds_max"; + int nbds_max = -1; + if (access(path, F_OK) == 0) { + std::ifstream ifs; + ifs.open(path, std::ifstream::in); + if (ifs.is_open()) { + ifs >> nbds_max; + ifs.close(); + } + } + + while (true) { + snprintf(dev, sizeof(dev), "/dev/nbd%d", index); + + nbd = open(dev, O_RDWR); + if (nbd < 0) { + if (nbd == -EPERM && nbds_max != -1 && index < (nbds_max-1)) { + ++index; + continue; + } + r = nbd; + cerr << "rbd-nbd: failed to find unused device" << std::endl; + goto done; + } + + r = ioctl(nbd, NBD_SET_SOCK, fd); + if (r < 0) { + close(nbd); + ++index; + continue; + } + + cfg->devpath = dev; + break; + } + } else { + r = parse_nbd_index(cfg->devpath); + if (r < 0) + goto done; + index = r; + + nbd = open(cfg->devpath.c_str(), O_RDWR); + if (nbd < 0) { + r = nbd; + cerr << "rbd-nbd: failed to open device: " << cfg->devpath << std::endl; + goto done; + } + + r = ioctl(nbd, NBD_SET_SOCK, fd); + if (r < 0) { + r = -errno; + cerr << "rbd-nbd: the device " << cfg->devpath << " is busy" << std::endl; + close(nbd); + goto done; + } + } + + r = ioctl(nbd, NBD_SET_BLKSIZE, blksize); + if (r < 0) { + r = -errno; + cerr << "rbd-nbd: NBD_SET_BLKSIZE failed" << std::endl; + goto close_nbd; + } + + r = ioctl(nbd, NBD_SET_SIZE, size); + if (r < 0) { + cerr << "rbd-nbd: NBD_SET_SIZE failed" << std::endl; + r = -errno; + goto close_nbd; + } + + ioctl(nbd, NBD_SET_FLAGS, flags); + + if (cfg->io_timeout >= 0) { + r = ioctl(nbd, NBD_SET_TIMEOUT, (unsigned long)cfg->io_timeout); + if (r < 0) { + r = -errno; + cerr << "rbd-nbd: failed to set IO timeout: " << cpp_strerror(r) + << std::endl; + goto close_nbd; + } + } + + dout(10) << "ioctl setup complete for " << cfg->devpath << dendl; + nbd_index = index; + return 0; + +close_nbd: + if (r < 0) { + ioctl(nbd, NBD_CLEAR_SOCK); + cerr << "rbd-nbd: failed to map, status: " << cpp_strerror(-r) << std::endl; + } + close(nbd); +done: + return r; +} + +static void netlink_cleanup(struct nl_sock *sock) +{ + if (!sock) + return; + + nl_close(sock); + nl_socket_free(sock); +} + +static struct nl_sock *netlink_init(int *id) +{ + struct nl_sock *sock; + int ret; + + sock = nl_socket_alloc(); + if (!sock) { + cerr << "rbd-nbd: Could not allocate netlink socket." << std::endl; + return NULL; + } + + ret = genl_connect(sock); + if (ret < 0) { + cerr << "rbd-nbd: Could not connect netlink socket. Error " << ret + << std::endl; + goto free_sock; + } + + *id = genl_ctrl_resolve(sock, "nbd"); + if (*id < 0) + // nbd netlink interface not supported. + goto close_sock; + + return sock; + +close_sock: + nl_close(sock); +free_sock: + nl_socket_free(sock); + return NULL; +} + +static int netlink_disconnect(int index) +{ + struct nl_sock *sock; + struct nl_msg *msg; + int ret, nl_id; + + sock = netlink_init(&nl_id); + if (!sock) + // Try ioctl + return 1; + + nl_socket_modify_cb(sock, NL_CB_VALID, NL_CB_CUSTOM, genl_handle_msg, NULL); + + msg = nlmsg_alloc(); + if (!msg) { + cerr << "rbd-nbd: Could not allocate netlink message." << std::endl; + goto free_sock; + } + + if (!genlmsg_put(msg, NL_AUTO_PORT, NL_AUTO_SEQ, nl_id, 0, 0, + NBD_CMD_DISCONNECT, 0)) { + cerr << "rbd-nbd: Could not setup message." << std::endl; + goto nla_put_failure; + } + + NLA_PUT_U32(msg, NBD_ATTR_INDEX, index); + + ret = nl_send_sync(sock, msg); + netlink_cleanup(sock); + if (ret < 0) { + cerr << "rbd-nbd: netlink disconnect failed: " << nl_geterror(-ret) + << std::endl; + return -EIO; + } + + return 0; + +nla_put_failure: + nlmsg_free(msg); +free_sock: + netlink_cleanup(sock); + return -EIO; +} + +static int netlink_disconnect_by_path(const std::string& devpath) +{ + int index; + + index = parse_nbd_index(devpath); + if (index < 0) + return index; + + return netlink_disconnect(index); +} + +static int netlink_resize(int nbd_index, uint64_t size) +{ + struct nl_sock *sock; + struct nl_msg *msg; + int nl_id, ret; + + sock = netlink_init(&nl_id); + if (!sock) { + cerr << "rbd-nbd: Netlink interface not supported." << std::endl; + return 1; + } + + nl_socket_modify_cb(sock, NL_CB_VALID, NL_CB_CUSTOM, genl_handle_msg, NULL); + + msg = nlmsg_alloc(); + if (!msg) { + cerr << "rbd-nbd: Could not allocate netlink message." << std::endl; + goto free_sock; + } + + if (!genlmsg_put(msg, NL_AUTO_PORT, NL_AUTO_SEQ, nl_id, 0, 0, + NBD_CMD_RECONFIGURE, 0)) { + cerr << "rbd-nbd: Could not setup message." << std::endl; + goto free_msg; + } + + NLA_PUT_U32(msg, NBD_ATTR_INDEX, nbd_index); + NLA_PUT_U64(msg, NBD_ATTR_SIZE_BYTES, size); + + ret = nl_send_sync(sock, msg); + if (ret < 0) { + cerr << "rbd-nbd: netlink resize failed: " << nl_geterror(ret) << std::endl; + goto free_sock; + } + + netlink_cleanup(sock); + dout(10) << "netlink resize complete for nbd" << nbd_index << dendl; + return 0; + +nla_put_failure: +free_msg: + nlmsg_free(msg); +free_sock: + netlink_cleanup(sock); + return -EIO; +} + +static int netlink_connect_cb(struct nl_msg *msg, void *arg) +{ + struct genlmsghdr *gnlh = (struct genlmsghdr *)nlmsg_data(nlmsg_hdr(msg)); + Config *cfg = (Config *)arg; + struct nlattr *msg_attr[NBD_ATTR_MAX + 1]; + uint32_t index; + int ret; + + ret = nla_parse(msg_attr, NBD_ATTR_MAX, genlmsg_attrdata(gnlh, 0), + genlmsg_attrlen(gnlh, 0), NULL); + if (ret) { + cerr << "rbd-nbd: Unsupported netlink reply" << std::endl; + return -NLE_MSGTYPE_NOSUPPORT; + } + + if (!msg_attr[NBD_ATTR_INDEX]) { + cerr << "rbd-nbd: netlink connect reply missing device index." << std::endl; + return -NLE_MSGTYPE_NOSUPPORT; + } + + index = nla_get_u32(msg_attr[NBD_ATTR_INDEX]); + cfg->devpath = "/dev/nbd" + stringify(index); + nbd_index = index; + + return NL_OK; +} + +static int netlink_connect(Config *cfg, struct nl_sock *sock, int nl_id, int fd, + uint64_t size, uint64_t flags, bool reconnect) +{ + struct nlattr *sock_attr; + struct nlattr *sock_opt; + struct nl_msg *msg; + int ret; + + if (reconnect) { + dout(10) << "netlink try reconnect for " << cfg->devpath << dendl; + + nl_socket_modify_cb(sock, NL_CB_VALID, NL_CB_CUSTOM, genl_handle_msg, NULL); + } else { + nl_socket_modify_cb(sock, NL_CB_VALID, NL_CB_CUSTOM, netlink_connect_cb, + cfg); + } + + msg = nlmsg_alloc(); + if (!msg) { + cerr << "rbd-nbd: Could not allocate netlink message." << std::endl; + return -ENOMEM; + } + + if (!genlmsg_put(msg, NL_AUTO_PORT, NL_AUTO_SEQ, nl_id, 0, 0, + reconnect ? NBD_CMD_RECONFIGURE : NBD_CMD_CONNECT, 0)) { + cerr << "rbd-nbd: Could not setup message." << std::endl; + goto free_msg; + } + + if (!cfg->devpath.empty()) { + ret = parse_nbd_index(cfg->devpath); + if (ret < 0) + goto free_msg; + + NLA_PUT_U32(msg, NBD_ATTR_INDEX, ret); + if (reconnect) { + nbd_index = ret; + } + } + + if (cfg->io_timeout >= 0) + NLA_PUT_U64(msg, NBD_ATTR_TIMEOUT, cfg->io_timeout); + + NLA_PUT_U64(msg, NBD_ATTR_SIZE_BYTES, size); + NLA_PUT_U64(msg, NBD_ATTR_BLOCK_SIZE_BYTES, RBD_NBD_BLKSIZE); + NLA_PUT_U64(msg, NBD_ATTR_SERVER_FLAGS, flags); + NLA_PUT_U64(msg, NBD_ATTR_DEAD_CONN_TIMEOUT, cfg->reattach_timeout); + if (!cfg->cookie.empty()) + NLA_PUT_STRING(msg, NBD_ATTR_BACKEND_IDENTIFIER, cfg->cookie.c_str()); + + sock_attr = nla_nest_start(msg, NBD_ATTR_SOCKETS); + if (!sock_attr) { + cerr << "rbd-nbd: Could not init sockets in netlink message." << std::endl; + goto free_msg; + } + + sock_opt = nla_nest_start(msg, NBD_SOCK_ITEM); + if (!sock_opt) { + cerr << "rbd-nbd: Could not init sock in netlink message." << std::endl; + goto free_msg; + } + + NLA_PUT_U32(msg, NBD_SOCK_FD, fd); + nla_nest_end(msg, sock_opt); + nla_nest_end(msg, sock_attr); + + ret = nl_send_sync(sock, msg); + if (ret < 0) { + cerr << "rbd-nbd: netlink connect failed: " << nl_geterror(ret) + << std::endl; + return -EIO; + } + + dout(10) << "netlink connect complete for " << cfg->devpath << dendl; + return 0; + +nla_put_failure: +free_msg: + nlmsg_free(msg); + return -EIO; +} + +static int try_netlink_setup(Config *cfg, int fd, uint64_t size, uint64_t flags, + bool reconnect) +{ + struct nl_sock *sock; + int nl_id, ret; + + sock = netlink_init(&nl_id); + if (!sock) { + cerr << "rbd-nbd: Netlink interface not supported. Using ioctl interface." + << std::endl; + return 1; + } + + dout(10) << "netlink interface supported." << dendl; + + ret = netlink_connect(cfg, sock, nl_id, fd, size, flags, reconnect); + netlink_cleanup(sock); + + if (ret != 0) + return ret; + + nbd = open(cfg->devpath.c_str(), O_RDWR); + if (nbd < 0) { + cerr << "rbd-nbd: failed to open device: " << cfg->devpath << std::endl; + return nbd; + } + + return 0; +} + +static int run_quiesce_hook(const std::string &quiesce_hook, + const std::string &devpath, + const std::string &command) { + dout(10) << __func__ << ": " << quiesce_hook << " " << devpath << " " + << command << dendl; + + SubProcess hook(quiesce_hook.c_str(), SubProcess::CLOSE, SubProcess::PIPE, + SubProcess::PIPE); + hook.add_cmd_args(devpath.c_str(), command.c_str(), NULL); + bufferlist err; + int r = hook.spawn(); + if (r < 0) { + err.append("subprocess spawn failed"); + } else { + err.read_fd(hook.get_stderr(), 16384); + r = hook.join(); + if (r > 0) { + r = -r; + } + } + if (r < 0) { + derr << __func__ << ": " << quiesce_hook << " " << devpath << " " + << command << " failed: " << err.to_str() << dendl; + } else { + dout(10) << " succeeded: " << err.to_str() << dendl; + } + + return r; +} + +static void handle_signal(int signum) +{ + ceph_assert(signum == SIGINT || signum == SIGTERM); + derr << "*** Got signal " << sig_str(signum) << " ***" << dendl; + + dout(20) << __func__ << ": " << "notifying terminate" << dendl; + + ceph_assert(terminate_event_sock.is_valid()); + terminate_event_sock.notify(); +} + +static NBDServer *start_server(int fd, librbd::Image& image, Config *cfg) +{ + NBDServer *server; + + server = new NBDServer(fd, image, cfg); + server->start(); + + init_async_signal_handler(); + register_async_signal_handler(SIGHUP, sighup_handler); + register_async_signal_handler_oneshot(SIGINT, handle_signal); + register_async_signal_handler_oneshot(SIGTERM, handle_signal); + + return server; +} + +static void run_server(Preforker& forker, NBDServer *server, bool netlink_used) +{ + if (g_conf()->daemonize) { + global_init_postfork_finish(g_ceph_context); + forker.daemonize(); + } + + if (netlink_used) + server->wait_for_disconnect(); + else + ioctl(nbd, NBD_DO_IT); + + unregister_async_signal_handler(SIGHUP, sighup_handler); + unregister_async_signal_handler(SIGINT, handle_signal); + unregister_async_signal_handler(SIGTERM, handle_signal); + shutdown_async_signal_handler(); +} + +// Eventually it should be removed when pidfd_open is widely supported. + +static int wait_for_terminate_legacy(int pid, int timeout) +{ + for (int i = 0; ; i++) { + if (kill(pid, 0) == -1) { + if (errno == ESRCH) { + return 0; + } + int r = -errno; + cerr << "rbd-nbd: kill(" << pid << ", 0) failed: " + << cpp_strerror(r) << std::endl; + return r; + } + if (i >= timeout * 2) { + break; + } + usleep(500000); + } + + cerr << "rbd-nbd: waiting for process exit timed out" << std::endl; + return -ETIMEDOUT; +} + +// Eventually it should be replaced with glibc' pidfd_open +// when it is widely available. + +#ifdef __NR_pidfd_open +static int pidfd_open(pid_t pid, unsigned int flags) +{ + return syscall(__NR_pidfd_open, pid, flags); +} +#else +static int pidfd_open(pid_t pid, unsigned int flags) +{ + errno = ENOSYS; + return -1; +} +#endif + +static int wait_for_terminate(int pid, int timeout) +{ + int fd = pidfd_open(pid, 0); + if (fd == -1) { + if (errno == ENOSYS) { + return wait_for_terminate_legacy(pid, timeout); + } + if (errno == ESRCH) { + return 0; + } + int r = -errno; + cerr << "rbd-nbd: pidfd_open(" << pid << ") failed: " + << cpp_strerror(r) << std::endl; + return r; + } + + struct pollfd poll_fds[1]; + memset(poll_fds, 0, sizeof(struct pollfd)); + poll_fds[0].fd = fd; + poll_fds[0].events = POLLIN; + + int r = poll(poll_fds, 1, timeout * 1000); + if (r == -1) { + r = -errno; + cerr << "rbd-nbd: failed to poll rbd-nbd process: " << cpp_strerror(r) + << std::endl; + goto done; + } else { + r = 0; + } + + if ((poll_fds[0].revents & POLLIN) == 0) { + cerr << "rbd-nbd: waiting for process exit timed out" << std::endl; + r = -ETIMEDOUT; + } + +done: + close(fd); + + return r; +} + +static int do_map(int argc, const char *argv[], Config *cfg, bool reconnect) +{ + int r; + + librados::Rados rados; + librbd::RBD rbd; + librados::IoCtx io_ctx; + librbd::Image image; + + int read_only = 0; + unsigned long flags; + unsigned long size; + unsigned long blksize = RBD_NBD_BLKSIZE; + bool use_netlink; + + int fd[2]; + + librbd::image_info_t info; + + Preforker forker; + NBDServer *server; + + auto args = argv_to_vec(argc, argv); + if (args.empty()) { + cerr << argv[0] << ": -h or --help for usage" << std::endl; + exit(1); + } + if (ceph_argparse_need_usage(args)) { + usage(); + exit(0); + } + + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_DAEMON, + CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS); + g_ceph_context->_conf.set_val_or_die("pid_file", ""); + + if (global_init_prefork(g_ceph_context) >= 0) { + std::string err; + r = forker.prefork(err); + if (r < 0) { + cerr << err << std::endl; + return r; + } + if (forker.is_parent()) { + if (forker.parent_wait(err) != 0) { + return -ENXIO; + } + return 0; + } + global_init_postfork_start(g_ceph_context); + } + + common_init_finish(g_ceph_context); + global_init_chdir(g_ceph_context); + + if (socketpair(AF_UNIX, SOCK_STREAM, 0, fd) == -1) { + r = -errno; + goto close_ret; + } + + r = rados.init_with_context(g_ceph_context); + if (r < 0) + goto close_fd; + + r = rados.connect(); + if (r < 0) + goto close_fd; + + r = rados.ioctx_create(cfg->poolname.c_str(), io_ctx); + if (r < 0) + goto close_fd; + + io_ctx.set_namespace(cfg->nsname); + + r = rbd.open(io_ctx, image, cfg->imgname.c_str()); + if (r < 0) + goto close_fd; + + if (cfg->exclusive) { + r = image.lock_acquire(RBD_LOCK_MODE_EXCLUSIVE); + if (r < 0) { + cerr << "rbd-nbd: failed to acquire exclusive lock: " << cpp_strerror(r) + << std::endl; + goto close_fd; + } + } + + if (cfg->snapid != CEPH_NOSNAP) { + r = image.snap_set_by_id(cfg->snapid); + if (r < 0) { + cerr << "rbd-nbd: failed to set snap id: " << cpp_strerror(r) + << std::endl; + goto close_fd; + } + } else if (!cfg->snapname.empty()) { + r = image.snap_set(cfg->snapname.c_str()); + if (r < 0) { + cerr << "rbd-nbd: failed to set snap name: " << cpp_strerror(r) + << std::endl; + goto close_fd; + } + } + + if (!cfg->encryption_formats.empty()) { + EncryptionOptions encryption_options; + encryption_options.specs.reserve(cfg->encryption_formats.size()); + + for (size_t i = 0; i < cfg->encryption_formats.size(); ++i) { + std::ifstream file(cfg->encryption_passphrase_files[i], + std::ios::in | std::ios::binary); + if (file.fail()) { + r = -errno; + std::cerr << "rbd-nbd: unable to open passphrase file '" + << cfg->encryption_passphrase_files[i] << "': " + << cpp_strerror(r) << std::endl; + goto close_fd; + } + std::string passphrase((std::istreambuf_iterator<char>(file)), + std::istreambuf_iterator<char>()); + file.close(); + + switch (cfg->encryption_formats[i]) { + case RBD_ENCRYPTION_FORMAT_LUKS: { + auto opts = new librbd::encryption_luks_format_options_t{ + std::move(passphrase)}; + encryption_options.specs.push_back( + {RBD_ENCRYPTION_FORMAT_LUKS, opts, sizeof(*opts)}); + break; + } + case RBD_ENCRYPTION_FORMAT_LUKS1: { + auto opts = new librbd::encryption_luks1_format_options_t{ + .passphrase = std::move(passphrase)}; + encryption_options.specs.push_back( + {RBD_ENCRYPTION_FORMAT_LUKS1, opts, sizeof(*opts)}); + break; + } + case RBD_ENCRYPTION_FORMAT_LUKS2: { + auto opts = new librbd::encryption_luks2_format_options_t{ + .passphrase = std::move(passphrase)}; + encryption_options.specs.push_back( + {RBD_ENCRYPTION_FORMAT_LUKS2, opts, sizeof(*opts)}); + break; + } + default: + ceph_abort(); + } + } + + r = image.encryption_load2(encryption_options.specs.data(), + encryption_options.specs.size()); + if (r != 0) { + cerr << "rbd-nbd: failed to load encryption: " << cpp_strerror(r) + << std::endl; + goto close_fd; + } + + // luks2 block size can vary upto 4096, while luks1 always uses 512 + // currently we don't have an rbd API for querying the loaded encryption + blksize = 4096; + } + + r = image.stat(info, sizeof(info)); + if (r < 0) + goto close_fd; + + flags = NBD_FLAG_SEND_FLUSH | NBD_FLAG_HAS_FLAGS; + if (!cfg->notrim) { + flags |= NBD_FLAG_SEND_TRIM; + } + if (!cfg->snapname.empty() || cfg->readonly) { + flags |= NBD_FLAG_READ_ONLY; + read_only = 1; + } + + if (info.size > ULONG_MAX) { + r = -EFBIG; + cerr << "rbd-nbd: image is too large (" << byte_u_t(info.size) + << ", max is " << byte_u_t(ULONG_MAX) << ")" << std::endl; + goto close_fd; + } + + size = info.size; + + r = load_module(cfg); + if (r < 0) + goto close_fd; + + server = start_server(fd[1], image, cfg); + + use_netlink = cfg->try_netlink || reconnect; + if (use_netlink) { + // generate when the cookie is not supplied at CLI + if (!reconnect && cfg->cookie.empty()) { + uuid_d uuid_gen; + uuid_gen.generate_random(); + cfg->cookie = uuid_gen.to_string(); + } + r = try_netlink_setup(cfg, fd[0], size, flags, reconnect); + if (r < 0) { + goto free_server; + } else if (r == 1) { + use_netlink = false; + } + } + + if (!use_netlink) { + r = try_ioctl_setup(cfg, fd[0], size, blksize, flags); + if (r < 0) + goto free_server; + } + + r = check_device_size(nbd_index, size); + if (r < 0) + goto close_nbd; + + r = ioctl(nbd, BLKROSET, (unsigned long) &read_only); + if (r < 0) { + r = -errno; + goto close_nbd; + } + + { + NBDQuiesceWatchCtx quiesce_watch_ctx(server); + if (cfg->quiesce) { + r = image.quiesce_watch(&quiesce_watch_ctx, + &server->quiesce_watch_handle); + if (r < 0) { + goto close_nbd; + } + } + + uint64_t handle; + + NBDWatchCtx watch_ctx(nbd, nbd_index, use_netlink, io_ctx, image, + info.size); + r = image.update_watch(&watch_ctx, &handle); + if (r < 0) + goto close_nbd; + + std::string cookie; + if (use_netlink) { + cookie = get_cookie(cfg->devpath); + ceph_assert(cookie == cfg->cookie || cookie.empty()); + } + if (cfg->show_cookie && !cookie.empty()) { + cout << cfg->devpath << " " << cookie << std::endl; + } else { + cout << cfg->devpath << std::endl; + } + + run_server(forker, server, use_netlink); + + if (cfg->quiesce) { + r = image.quiesce_unwatch(server->quiesce_watch_handle); + ceph_assert(r == 0); + } + + r = image.update_unwatch(handle); + ceph_assert(r == 0); + } + +close_nbd: + if (r < 0) { + if (use_netlink) { + netlink_disconnect(nbd_index); + } else { + ioctl(nbd, NBD_CLEAR_SOCK); + cerr << "rbd-nbd: failed to map, status: " << cpp_strerror(-r) + << std::endl; + } + } + close(nbd); +free_server: + delete server; +close_fd: + close(fd[0]); + close(fd[1]); +close_ret: + image.close(); + io_ctx.close(); + rados.shutdown(); + + forker.exit(r < 0 ? EXIT_FAILURE : 0); + // Unreachable; + return r; +} + +static int do_detach(Config *cfg) +{ + int r = kill(cfg->pid, SIGTERM); + if (r == -1) { + r = -errno; + cerr << "rbd-nbd: failed to terminate " << cfg->pid << ": " + << cpp_strerror(r) << std::endl; + return r; + } + + return wait_for_terminate(cfg->pid, cfg->reattach_timeout); +} + +static int do_unmap(Config *cfg) +{ + /* + * The netlink disconnect call supports devices setup with netlink or ioctl, + * so we always try that first. + */ + int r = netlink_disconnect_by_path(cfg->devpath); + if (r < 0) { + return r; + } + + if (r == 1) { + int nbd = open(cfg->devpath.c_str(), O_RDWR); + if (nbd < 0) { + cerr << "rbd-nbd: failed to open device: " << cfg->devpath << std::endl; + return nbd; + } + + r = ioctl(nbd, NBD_DISCONNECT); + if (r < 0) { + cerr << "rbd-nbd: the device is not used" << std::endl; + } + + close(nbd); + + if (r < 0) { + return r; + } + } + + if (cfg->pid > 0) { + r = wait_for_terminate(cfg->pid, cfg->reattach_timeout); + } + + return 0; +} + +static int parse_imgpath(const std::string &imgpath, Config *cfg, + std::ostream *err_msg) { + std::regex pattern("^(?:([^/]+)/(?:([^/@]+)/)?)?([^@]+)(?:@([^/@]+))?$"); + std::smatch match; + if (!std::regex_match(imgpath, match, pattern)) { + std::cerr << "rbd-nbd: invalid spec '" << imgpath << "'" << std::endl; + return -EINVAL; + } + + if (match[1].matched) { + cfg->poolname = match[1]; + } + + if (match[2].matched) { + cfg->nsname = match[2]; + } + + cfg->imgname = match[3]; + + if (match[4].matched) + cfg->snapname = match[4]; + + return 0; +} + +static int do_list_mapped_devices(const std::string &format, bool pretty_format) +{ + bool should_print = false; + std::unique_ptr<ceph::Formatter> f; + TextTable tbl; + + if (format == "json") { + f.reset(new JSONFormatter(pretty_format)); + } else if (format == "xml") { + f.reset(new XMLFormatter(pretty_format)); + } else if (!format.empty() && format != "plain") { + std::cerr << "rbd-nbd: invalid output format: " << format << std::endl; + return -EINVAL; + } + + if (f) { + f->open_array_section("devices"); + } else { + tbl.define_column("id", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("pool", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("namespace", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("image", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("snap", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("device", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("cookie", TextTable::LEFT, TextTable::LEFT); + } + + Config cfg; + NBDListIterator it; + while (it.get(&cfg)) { + std::string snap = (cfg.snapid != CEPH_NOSNAP ? + "@" + std::to_string(cfg.snapid) : cfg.snapname); + if (f) { + f->open_object_section("device"); + f->dump_int("id", cfg.pid); + f->dump_string("pool", cfg.poolname); + f->dump_string("namespace", cfg.nsname); + f->dump_string("image", cfg.imgname); + f->dump_string("snap", snap); + f->dump_string("device", cfg.devpath); + f->dump_string("cookie", cfg.cookie); + f->close_section(); + } else { + should_print = true; + tbl << cfg.pid << cfg.poolname << cfg.nsname << cfg.imgname + << (snap.empty() ? "-" : snap) << cfg.devpath << cfg.cookie + << TextTable::endrow; + } + } + + if (f) { + f->close_section(); // devices + f->flush(std::cout); + } + if (should_print) { + std::cout << tbl; + } + return 0; +} + +static bool find_mapped_dev_by_spec(Config *cfg, int skip_pid=-1) { + Config c; + NBDListIterator it; + while (it.get(&c)) { + if (c.pid != skip_pid && + c.poolname == cfg->poolname && c.nsname == cfg->nsname && + c.imgname == cfg->imgname && c.snapname == cfg->snapname && + (cfg->devpath.empty() || c.devpath == cfg->devpath) && + c.snapid == cfg->snapid) { + *cfg = c; + return true; + } + } + return false; +} + +static int find_proc_by_dev(Config *cfg) { + Config c; + NBDListIterator it; + while (it.get(&c)) { + if (c.devpath == cfg->devpath) { + *cfg = c; + return true; + } + } + return false; +} + +static int parse_args(vector<const char*>& args, std::ostream *err_msg, + Config *cfg) { + std::string conf_file_list; + std::string cluster; + CephInitParameters iparams = ceph_argparse_early_args( + args, CEPH_ENTITY_TYPE_CLIENT, &cluster, &conf_file_list); + + ConfigProxy config{false}; + config->name = iparams.name; + config->cluster = cluster; + + if (!conf_file_list.empty()) { + config.parse_config_files(conf_file_list.c_str(), nullptr, 0); + } else { + config.parse_config_files(nullptr, nullptr, 0); + } + config.parse_env(CEPH_ENTITY_TYPE_CLIENT); + config.parse_argv(args); + cfg->poolname = config.get_val<std::string>("rbd_default_pool"); + + std::vector<const char*>::iterator i; + std::ostringstream err; + std::string arg_value; + long long snapid; + + for (i = args.begin(); i != args.end(); ) { + if (ceph_argparse_flag(args, i, "-h", "--help", (char*)NULL)) { + return HELP_INFO; + } else if (ceph_argparse_flag(args, i, "-v", "--version", (char*)NULL)) { + return VERSION_INFO; + } else if (ceph_argparse_witharg(args, i, &cfg->devpath, "--device", (char *)NULL)) { + } else if (ceph_argparse_witharg(args, i, &cfg->io_timeout, err, + "--io-timeout", (char *)NULL)) { + if (!err.str().empty()) { + *err_msg << "rbd-nbd: " << err.str(); + return -EINVAL; + } + if (cfg->io_timeout < 0) { + *err_msg << "rbd-nbd: Invalid argument for io-timeout!"; + return -EINVAL; + } + } else if (ceph_argparse_witharg(args, i, &cfg->nbds_max, err, "--nbds_max", (char *)NULL)) { + if (!err.str().empty()) { + *err_msg << "rbd-nbd: " << err.str(); + return -EINVAL; + } + if (cfg->nbds_max < 0) { + *err_msg << "rbd-nbd: Invalid argument for nbds_max!"; + return -EINVAL; + } + } else if (ceph_argparse_witharg(args, i, &cfg->max_part, err, "--max_part", (char *)NULL)) { + if (!err.str().empty()) { + *err_msg << "rbd-nbd: " << err.str(); + return -EINVAL; + } + if ((cfg->max_part < 0) || (cfg->max_part > 255)) { + *err_msg << "rbd-nbd: Invalid argument for max_part(0~255)!"; + return -EINVAL; + } + cfg->set_max_part = true; + } else if (ceph_argparse_flag(args, i, "--quiesce", (char *)NULL)) { + cfg->quiesce = true; + } else if (ceph_argparse_witharg(args, i, &cfg->quiesce_hook, + "--quiesce-hook", (char *)NULL)) { + } else if (ceph_argparse_flag(args, i, "--read-only", (char *)NULL)) { + cfg->readonly = true; + } else if (ceph_argparse_witharg(args, i, &cfg->reattach_timeout, err, + "--reattach-timeout", (char *)NULL)) { + if (!err.str().empty()) { + *err_msg << "rbd-nbd: " << err.str(); + return -EINVAL; + } + if (cfg->reattach_timeout < 0) { + *err_msg << "rbd-nbd: Invalid argument for reattach-timeout!"; + return -EINVAL; + } + } else if (ceph_argparse_flag(args, i, "--exclusive", (char *)NULL)) { + cfg->exclusive = true; + } else if (ceph_argparse_flag(args, i, "--notrim", (char *)NULL)) { + cfg->notrim = true; + } else if (ceph_argparse_witharg(args, i, &cfg->io_timeout, err, + "--timeout", (char *)NULL)) { + if (!err.str().empty()) { + *err_msg << "rbd-nbd: " << err.str(); + return -EINVAL; + } + if (cfg->io_timeout < 0) { + *err_msg << "rbd-nbd: Invalid argument for timeout!"; + return -EINVAL; + } + *err_msg << "rbd-nbd: --timeout is deprecated (use --io-timeout)"; + } else if (ceph_argparse_witharg(args, i, &cfg->format, err, "--format", + (char *)NULL)) { + } else if (ceph_argparse_flag(args, i, "--pretty-format", (char *)NULL)) { + cfg->pretty_format = true; + } else if (ceph_argparse_flag(args, i, "--try-netlink", (char *)NULL)) { + cfg->try_netlink = true; + } else if (ceph_argparse_flag(args, i, "--show-cookie", (char *)NULL)) { + cfg->show_cookie = true; + } else if (ceph_argparse_witharg(args, i, &cfg->cookie, "--cookie", (char *)NULL)) { + } else if (ceph_argparse_witharg(args, i, &snapid, err, + "--snap-id", (char *)NULL)) { + if (!err.str().empty()) { + *err_msg << "rbd-nbd: " << err.str(); + return -EINVAL; + } + if (snapid < 0) { + *err_msg << "rbd-nbd: Invalid argument for snap-id!"; + return -EINVAL; + } + cfg->snapid = snapid; + } else if (ceph_argparse_witharg(args, i, &arg_value, + "--encryption-format", (char *)NULL)) { + if (arg_value == "luks1") { + cfg->encryption_formats.push_back(RBD_ENCRYPTION_FORMAT_LUKS1); + } else if (arg_value == "luks2") { + cfg->encryption_formats.push_back(RBD_ENCRYPTION_FORMAT_LUKS2); + } else if (arg_value == "luks") { + cfg->encryption_formats.push_back(RBD_ENCRYPTION_FORMAT_LUKS); + } else { + *err_msg << "rbd-nbd: Invalid encryption format"; + return -EINVAL; + } + } else if (ceph_argparse_witharg(args, i, &arg_value, + "--encryption-passphrase-file", + (char *)NULL)) { + cfg->encryption_passphrase_files.push_back(arg_value); + } else { + ++i; + } + } + + if (cfg->encryption_formats.empty() && + !cfg->encryption_passphrase_files.empty()) { + cfg->encryption_formats.resize(cfg->encryption_passphrase_files.size(), + RBD_ENCRYPTION_FORMAT_LUKS); + } + + if (cfg->encryption_formats.size() != cfg->encryption_passphrase_files.size()) { + *err_msg << "rbd-nbd: Encryption formats count does not match " + << "passphrase files count"; + return -EINVAL; + } + + Command cmd = None; + if (args.begin() != args.end()) { + if (strcmp(*args.begin(), "map") == 0) { + cmd = Map; + } else if (strcmp(*args.begin(), "unmap") == 0) { + cmd = Unmap; + } else if (strcmp(*args.begin(), "attach") == 0) { + cmd = Attach; + } else if (strcmp(*args.begin(), "detach") == 0) { + cmd = Detach; + } else if (strcmp(*args.begin(), "list-mapped") == 0) { + cmd = List; + } else { + *err_msg << "rbd-nbd: unknown command: " << *args.begin(); + return -EINVAL; + } + args.erase(args.begin()); + } + + if (cmd == None) { + *err_msg << "rbd-nbd: must specify command"; + return -EINVAL; + } + + std::string cookie; + switch (cmd) { + case Attach: + if (cfg->devpath.empty()) { + *err_msg << "rbd-nbd: must specify device to attach"; + return -EINVAL; + } + // Allowing attach without --cookie option for kernel without + // NBD_ATTR_BACKEND_IDENTIFIER support for compatibility + cookie = get_cookie(cfg->devpath); + if (!cookie.empty()) { + if (cfg->cookie.empty()) { + *err_msg << "rbd-nbd: must specify cookie to attach"; + return -EINVAL; + } else if (cookie != cfg->cookie) { + *err_msg << "rbd-nbd: cookie mismatch"; + return -EINVAL; + } + } else if (!cfg->cookie.empty()) { + *err_msg << "rbd-nbd: kernel does not have cookie support"; + return -EINVAL; + } + [[fallthrough]]; + case Map: + if (args.begin() == args.end()) { + *err_msg << "rbd-nbd: must specify image-or-snap-spec"; + return -EINVAL; + } + if (parse_imgpath(*args.begin(), cfg, err_msg) < 0) { + return -EINVAL; + } + args.erase(args.begin()); + break; + case Detach: + case Unmap: + if (args.begin() == args.end()) { + *err_msg << "rbd-nbd: must specify nbd device or image-or-snap-spec"; + return -EINVAL; + } + if (boost::starts_with(*args.begin(), "/dev/")) { + cfg->devpath = *args.begin(); + } else { + if (parse_imgpath(*args.begin(), cfg, err_msg) < 0) { + return -EINVAL; + } + } + args.erase(args.begin()); + break; + default: + //shut up gcc; + break; + } + + if (cfg->snapid != CEPH_NOSNAP && !cfg->snapname.empty()) { + *err_msg << "rbd-nbd: use either snapname or snapid, not both"; + return -EINVAL; + } + + if (args.begin() != args.end()) { + *err_msg << "rbd-nbd: unknown args: " << *args.begin(); + return -EINVAL; + } + + cfg->command = cmd; + return 0; +} + +static int rbd_nbd(int argc, const char *argv[]) +{ + int r; + Config cfg; + auto args = argv_to_vec(argc, argv); + std::ostringstream err_msg; + r = parse_args(args, &err_msg, &cfg); + if (r == HELP_INFO) { + usage(); + return 0; + } else if (r == VERSION_INFO) { + std::cout << pretty_version_to_str() << std::endl; + return 0; + } else if (r < 0) { + cerr << err_msg.str() << std::endl; + return r; + } + + if (!err_msg.str().empty()) { + cerr << err_msg.str() << std::endl; + } + + switch (cfg.command) { + case Attach: + ceph_assert(!cfg.devpath.empty()); + if (find_mapped_dev_by_spec(&cfg, getpid())) { + cerr << "rbd-nbd: " << cfg.devpath << " has process " << cfg.pid + << " connected" << std::endl; + return -EBUSY; + } + [[fallthrough]]; + case Map: + if (cfg.imgname.empty()) { + cerr << "rbd-nbd: image name was not specified" << std::endl; + return -EINVAL; + } + + r = do_map(argc, argv, &cfg, cfg.command == Attach); + if (r < 0) + return -EINVAL; + break; + case Detach: + if (cfg.devpath.empty()) { + if (!find_mapped_dev_by_spec(&cfg)) { + cerr << "rbd-nbd: " << cfg.image_spec() << " is not mapped" + << std::endl; + return -ENOENT; + } + } else if (!find_proc_by_dev(&cfg)) { + cerr << "rbd-nbd: no process attached to " << cfg.devpath << " found" + << std::endl; + return -ENOENT; + } + r = do_detach(&cfg); + if (r < 0) + return -EINVAL; + break; + case Unmap: + if (cfg.devpath.empty()) { + if (!find_mapped_dev_by_spec(&cfg)) { + cerr << "rbd-nbd: " << cfg.image_spec() << " is not mapped" + << std::endl; + return -ENOENT; + } + } else if (!find_proc_by_dev(&cfg)) { + // still try to send disconnect to the device + } + r = do_unmap(&cfg); + if (r < 0) + return -EINVAL; + break; + case List: + r = do_list_mapped_devices(cfg.format, cfg.pretty_format); + if (r < 0) + return -EINVAL; + break; + default: + usage(); + break; + } + + return 0; +} + +int main(int argc, const char *argv[]) +{ + int r = rbd_nbd(argc, argv); + if (r < 0) { + return EXIT_FAILURE; + } + return 0; +} diff --git a/src/tools/rbd_nbd/rbd-nbd_quiesce b/src/tools/rbd_nbd/rbd-nbd_quiesce new file mode 100755 index 000000000..a62a12b15 --- /dev/null +++ b/src/tools/rbd_nbd/rbd-nbd_quiesce @@ -0,0 +1,31 @@ +#!/bin/sh + +echo "$0 $@" >&2 + +if [ $# -lt 2 ]; then + echo "usage: $0 <dev> <cmd>" >&2 + exit 1 +fi + +dev=$1 +cmd=$2 + +export PATH=/usr/sbin:/usr/bin:/sbin:/bin + +findmnt -S "${dev}" -fno TARGET | +while read mnt; do + case "${cmd}" in + quiesce) + echo "freezing ${mnt}" >&2 + fsfreeze -f "${mnt}" + ;; + unquiesce) + echo "unfreezing ${mnt}" >&2 + fsfreeze -u "${mnt}" + ;; + *) + echo "unknown command ${cmd}" >&2 + exit 1 + ;; + esac +done diff --git a/src/tools/rbd_recover_tool/FAQ b/src/tools/rbd_recover_tool/FAQ new file mode 100644 index 000000000..1655e8530 --- /dev/null +++ b/src/tools/rbd_recover_tool/FAQ @@ -0,0 +1,16 @@ +# author: min chen(minchen@ubuntukylin.com) 2014 2015 + +1. error "get_image_metadata_v2: no meta_header_seq input" +cause: + database is old, refresh database +solution: + ./rbd-recover-tool database + +2. Error initializing leveldb: IO error: lock /var/lib/ceph/osd/ceph-0/current/omap/LOCK: Resource temporarily unavailable + ERROR: error flushing journal /var/lib/ceph/osd/ceph-0/journal for object store /var/lib/ceph/osd/ceph-0: (1) Operation not permitted +cause: + when ./rbd-recover-tool database is interrupted , but command has been sent to each osd node, and there is a process reading leveldb and it is LOCKED + if run ./rbd-recover-tool database again, all command are sent to osd nodes again, while previous process is locking leveldb, so all new command + are failed. +solution: + wait until all previous command finished. diff --git a/src/tools/rbd_recover_tool/README b/src/tools/rbd_recover_tool/README new file mode 100644 index 000000000..d289c11ca --- /dev/null +++ b/src/tools/rbd_recover_tool/README @@ -0,0 +1,97 @@ +# author: Min chen(minchen@ubuntukylin.com) 2014 2015 + +------------- ceph rbd recover tool ------------- + + ceph rbd recover tool is used for recovering ceph rbd image, when all ceph services are killed. +it is based on ceph-0.80.x (Firefly and newer) + currently, ceph service(ceph-mon, ceph-osd) evently are not available caused by bugs or sth else +, especially on large scale ceph cluster, so that the ceph cluster can not supply service +and rbd images can not be accessed. In this case, a tool to recover rbd image is necessary. + ceph rbd recover tool is just used for this, it can collect all objects of an image from distributed +osd nodes with the latest pg epoch, and splice objects by offset to a complete image. To make sure +object data is complete, this tool does flush osd journal on each osd node before recovering. + but, there are some limitions: +-need ssh service and unobstructed network +-osd data must be accessed on local disk +-clone image is not supported, while snapshot is supported +-only support relicated pool + +before you run this tool, you should make sure that: +1). all processes (ceph-osd, ceph-mon, ceph-mds) are shutdown +2). ssh daemon is running & network is ok (ssh to each node without password) +3). ceph-kvstore-tool is installed(for ubuntu: apt-get install ceph-test) +4). osd disk is not crashed and data can be accessed on local filesystem + +-architecture: + + +---- osd.0 + | +admin_node -----------+---- osd.1 + | + +---- osd.2 + | + ...... + +-files: +admin_node: {rbd-recover-tool common_h epoch_h metadata_h database_h} +osd: {osd_job common_h epoch_h metadata_h} #/var/rbd_tool/osd_job +in this architecture, admin_node acts as client, osds act as server. +so, they run different files: +on admin_node run: rbd-recover-tool <action> [<parameters>] +on osd node run: ./osd_job <function> <parameters> +admin_node will copy files: osd_job, common_h, epoch_h, metadata_h to remote osd node + + +-config file +before you run this tool, make sure write config files first +osd_host_path: osd hostnames and osd data path #user input + osdhost0 /var/lib/ceph/osd/ceph-0 + osdhost1 /var/lib/ceph/osd/ceph-1 + ...... +mon_host: all mon node hostname #user input + monhost0 + monhost1 + ...... +mds_host: all mds node hostname #user input + mdshost0 + mdshost1 + ...... +then, init_env_admin function will create file: osd_host +osd_host: all osd node hostname #generated by admin_job, user ignore it + osdhost0 + osdhost1 + ...... + + +-usage: +rbd-recovert-tool <operation> +<operation> : +database #generating offline database: hobject path, node hostname, pg_epoch and image metadata +list #list all images from offline database +lookup <pool_id>/<image_name>[@[<snap_name>]] #lookup image metadata in offline database +recover <pool_id><image_name>[@[<snap_name>]] [/path/to/store/image] #recover image data according to image metadata + +-steps: +1. stop all ceph services: ceph-mon, ceph-osd, ceph-mds +2. setup config files: osd_host_path, mon_host, mds_host +3. rbd-recover-tool database # wait a long time +4. rbd-recover-tool list +4. rbd-recover-tool recover <pool_id>/<image_name>[@[<image_name>]] [/path/to/store/image] + + +-debug & error check +if admin_node operation is failed, you can check it on osd node +cd /var/rbd_tool/osd_job +./osd_job <operation> +<operation> : +do_image_id <image_id_hobject> #get image id of image format v2 +do_image_id <image_header_hobject> #get image id of image format v1 +do_image_metadata_v1 <image_header_hobject> #get image metadata of image format v1, maybe pg epoch is not latest +do_image_metadata_v2 <image_header_hobject> #get image metadata of image format v2, maybe pg epoch is not latest +do_image_list #get all images on this osd(image head hobject) +do_pg_epoch #get all pg epoch and store it in /var/rbd_tool/single_node/node_pg_epoch +do_omap_list #list all omap headers and omap entries on this osd + + +-FAQ +file FAQ lists some common confusing cases while testing diff --git a/src/tools/rbd_recover_tool/TODO b/src/tools/rbd_recover_tool/TODO new file mode 100644 index 000000000..c36d4c947 --- /dev/null +++ b/src/tools/rbd_recover_tool/TODO @@ -0,0 +1,2 @@ + +1.support clone imag diff --git a/src/tools/rbd_recover_tool/common_h b/src/tools/rbd_recover_tool/common_h new file mode 100644 index 000000000..f2df662ad --- /dev/null +++ b/src/tools/rbd_recover_tool/common_h @@ -0,0 +1,412 @@ +#!/usr/bin/env bash +# file: common_h +# +# Copyright (C) 2015 Ubuntu Kylin +# +# Author: Min Chen <minchen@ubuntukylin.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +my_dir=$(dirname "$0") + +# admin node init path +rbd_image=/var/rbd_tool/rbd_image +database=$rbd_image/database +image_coll_v1=$rbd_image/image_coll_v1 +image_coll_v2=$rbd_image/image_coll_v2 +pg_coll=$rbd_image/pg_coll +images=$rbd_image/images +images_meta=$rbd_image/images_meta +default_backup_dir=/var/rbd_tool/default_backup_dir + +# admin node: image snap & nosnap +nosnap= #$rbd_image/<image_name>/nosnap +snap= #rbd_image/<image_name>/<snap_name> + +# osd node init path +job_path=/var/rbd_tool/osd_job +single_node=/var/rbd_tool/single_node + +# osd node vars +osd_env= #single_node/$cluster$id/osd_env +osd_data= #/var/lib/ceph/osd/$cluster-$id +omap_path= #$osd_data/current/omap +image_list_v1= #single_node/$cluster-$id/image_list_v1 +image_list_v2= #single_node/$cluster-$id/image_list_v2 +image_v1= #$single_node/$cluster-$id/image_v1 +image_v2= #$single_node/$cluster-$id/image_v2 +pgid_list= #$single_node/$cluster-$id/pgid_list +node_pg_epoch= #$single_node/$cluster-$id/node_pg_epoch +omap_list= #$single_node/$cluster-$id/omap_list + +# admin node config file +osd_host_path=$my_dir/config/osd_host_path +osd_host_mapping= #$pwd_path/config/osd_host_mapping # host --> host_remote: by init_env_admin() +osd_host=$my_dir/config/osd_host #generated by function init_env_admin() +mon_host=$my_dir/config/mon_host +mds_host=$my_dir/config/mds_host + +# ssh option +ssh_option="-o ConnectTimeout=1" + +# gen md5sum +function gen_md5() +{ + echo $1|md5sum|awk '{print $1}' +} + +# on each osd node +# check ceph environment: ssh, ceph-kvstore-tool, osd_data_path +function check_ceph_env() +{ + local func="check_ceph_env" + if [ $# -lt 2 ];then + echo "$func: parameters: <node> <data_path>" + exit + fi + local node=$1 + local data_path=$2 + local res= + local cmd= + + trap 'echo [$node]: ssh failed; exit' INT HUP + ssh -o ConnectTimeout=1 $node "echo -n" </dev/null + res=$? + if [ $res -ne 0 ];then + echo "[$node]: ssh failed" + exit + fi + + cmd=ceph-kvstore-tool + trap 'echo [$node]: $cmd failed; exit' INT HUP + ssh -o ConnectTimeout=1 $node "$cmd &>/dev/null;" </dev/null + res=$? + # ceph-kvstore-tool will return 1 with no parameters input + if [ $res -ne 1 ];then + echo "[$node]: $cmd not installed" + exit + fi + + trap 'echo [$node]: stat $data_path failed; exit' INT HUP + ssh -o ConnectTimeout=1 $node "stat $data_path &>/dev/null;" </dev/null + res=$? + if [ $res -ne 0 ];then + echo "[$node]: $data_path not exists" + exit + fi +} + +# osd node context : osd_data_path +function init_env_osd() +{ + local func="init_env_osd" + if [ "$1"x = ""x ];then + echo "$func: no osd_data_path input" + exit + fi + osd_data=$1 + omap_path=$osd_data/current/omap + + if [ ! -e $single_node ];then + mkdir -p $single_node + fi + + local osd_id=`gen_md5 $osd_data` + local osd_dir=$single_node/$osd_id + + if [ ! -e $osd_dir ];then + mkdir -p $osd_dir + fi + + image_list_v1=$osd_dir/image_list_v1 + image_list_v2=$osd_dir/image_list_v2 + image_v1=$osd_dir/image_v1 + image_v2=$osd_dir/image_v2 + pgid_list=$osd_dir/pgid_list + node_pg_epoch=$osd_dir/node_pg_epoch + omap_list=$osd_dir/omap_list +} + +# admin node process file: osd_host_path +function init_env_admin() +{ + local func="init_env_admin" + local pwd_path=`pwd` + osd_host_mapping=$pwd_path/config/osd_host_mapping + if [ ! -s $osd_host_path ];then + echo "$func: config/osd_host_path not exists or empty" + exit + fi + if [ ! -e $rbd_image ];then + mkdir -p $rbd_image + fi + if [ ! -e $images ];then + mkdir -p $images + fi + + if [ ! -s $mon_host ];then + echo "$func: config/mon_host not exists or empty" + exit + fi + if [ ! -e $mds_host ];then + echo "$func: config/mds_host not exists" + exit + fi + + # we just judge if osd_host is needed to be updated + if [ -s $osd_host ] && [ $osd_host -nt $osd_host_path ];then + return + fi + echo "$func: create osd_host ..." + # create file: osd_host and osd_host_mapping + >$osd_host + >$osd_host_mapping + local lines=0 + local lineno=0 + while read line + do + lineno=$(($lineno + 1)) + if [ "$line"x = ""x ];then + continue; + fi + local node=`echo $line|awk '{print $1}'` + if [ "$node"x = ""x ];then + echo "$func: osd_host_path : line $lineno: osd hostname not input" + rm -rf $osd_host $osd_host_mapping + exit + fi + local data_path=`echo $line|awk '{print $2}'` + if [ "$data_path"x = ""x ];then + echo "$func: osd_host_path : line $lineno: osd data_path not input" + rm -rf $osd_host $osd_host_mapping + exit + fi + lines=$(($lines + 1)) + # in case : there are servral hostnames on the same node + # just need output of `hostname` + local hostname_alias= + hostname_alias=`ssh $ssh_option $node "hostname" 2>/dev/null </dev/null` + if [ "$hostname_alias"x = ""x ];then + echo "$func: osd_host_path: line $lineno: $node: get remote hostname alias failed" + rm -rf $osd_host $osd_host_mapping + exit + fi + echo "$node $hostname_alias" >>$osd_host_mapping + echo $node >> $osd_host + # check ceph env on remote osd + check_ceph_env $node $data_path + done < $osd_host_path + + if [ $lines = 0 ];then + echo "$func: no osd host path valid" + exit + fi +} + +function admin_parse_osd() +{ + local func="admin_parse_osd" + if [ -s $osd_host ];then + return + fi + # create file: osd_host + >$osd_host + local lines=0 + local lineno=0 + while read line + do + lineno=$(($lineno + 1)) + if [ "$line"x = ""x ];then + continue; + fi + local node=`echo $line|awk '{print $1}'` + if [ "$node"x = ""x ];then + echo "$func: osd_host_path : line $lineno: osd_host not input" + exit + fi + local data_path=`echo $line|awk '{print $2}'` + if [ "$data_path"x = ""x ];then + echo "$func: osd_host_path : line $lineno: osd_data not input" + exit + fi + lines=$(($lines + 1)) + echo $node >> $osd_host + done < $osd_host_path +} + +# for osd node +function get_omap_list() +{ + ceph-kvstore-tool $omap_path list > $omap_list +} + +function convert_underline() +{ + if [ "$1"x = ""x ];then + return + fi + + echo $1|sed -e 's/_/\\u/gp'|head -n 1 +} + +function dump_backslash() +{ + echo $*|sed -e 's/\\/\\\\/gp'|head -n 1 +} + +function dump_dump_backslash() +{ + echo $*|sed -e 's/\\/\\\\\\\\/gp'|head -n 1 +} + +function char_convert() +{ + if [ "$1"x = ""x ];then + return + fi + + echo $1|sed -e 's/_/\\u/gp' -e 's/\./%e/gp' -e 's/%/%p/gp'|head -n 1 +} + +function check_osd_process() +{ + local func="check_osd_process" + local host=$1 + if [ "$1"x = ""x ];then + exit + fi + local cmds="ps aux|grep ceph-osd|grep -v grep" + local ret=/tmp/ret.$$$$ + ssh $ssh_option $host $cmds |tee $ret + if [ -s $ret ];then + echo "$func: [$host] ceph-osd process is not killed" + exit + fi + rm -f $ret +} + +function get_map_header_prefix() +{ + echo "_HOBJTOSEQ_" +} + +function get_map_header_key() +{ + local func="get_map_header_key" + if [ "$1"x = ""x ];then + #echo $func': no keyword input' + exit + fi + local keyword=$1 + local res=`cat $omap_list| grep $keyword` + if [ "$res"x = ""x ];then + #echo "$func: map_header_key = $keyword not exists" + exit + fi + echo $res|awk -F ":" '{print $2}' +} + +function get_header_seq() +{ + local func="get_header_seq" + if [ "$1"x == ""x ];then + #echo "$func: no prefix input" + exit; + elif [ "$2"x == ""x ];then + #echo "$func: no key input" + exit; + fi + local prefix=$1; + local key=$2; + local res=/tmp/header_seq.$$$$ + + ceph-kvstore-tool $omap_path get $prefix $key 2>/dev/null 1>$res + if [ $? != 0 ]; then + #echo "$func: <$prefix , $key> not exists" ; + exit; + fi + + # ceph-kvstore-tool get result like this: + # 02 01 7e 00 00 00 12 44 00 00 00 00 00 00 00 00 + # get header seq bytes: + # 12 44 00 00 00 00 00 00 + # -> 00 00 00 00 00 00 44 12 + # echo $((16#0000000000004412)) -> 17426 == header_seq + local seq=`cat $res |head -n 2|tail -n 1| \ + awk ' + BEGIN { + FS=":" + seq=""; + i=7; + } { + split($2, arr, " ") + # header_seq uint64 : 8 bytes + for (x=7; x>=0; --x) { + seq=seq""arr[i+x]; + } + } + END { + print seq + }'` + if [ "$seq"x = ""x ];then + #echo "$func: get <$prefix , $key> failed" + exit; + fi + rm -f $res + echo $((16#$seq)) +} + +# get header info key/value +function get_header_kv() +{ + local func="get_header_kv" + if [ "$1"x = ""x ];then + #echo "$func: no prefix input" + exit + elif [ "$2"x = ""x ];then + #echo "$func: no key input" + exit + elif [ "$3"x != "string"x ] && [ "$3"x != "int"x ];then + #echo "$func: no valid type input, use type (string|int)" + exit + fi + + local prefix=$1 + local key=$2 + local types=$3 + local res=/tmp/kv.$$$$ + + ceph-kvstore-tool $omap_path get $prefix $key 2>/dev/null 1>$res + if [ $? != 0 ];then + #echo "$func: <$prefix , $key> not exists" + exit + fi + + if [ "$types"x = "string"x ];then + local value=`cat $res |tail -n +2|head -n -1|awk -F ": " '{printf $3}'|sed -n 's/^\.\{4\}//p'` + echo $value + elif [ "$types"x = "int"x ];then + local value=`cat $res |tail -n +2|head -n -1| \ + awk ' + BEGIN{ + FS=":" + } { + split($2, arr, " "); + len=length(arr) + for (i=len; i>0; --i) { + printf arr[i]; + } + }'` + echo $((16#$value)) + fi + rm -f $res +} diff --git a/src/tools/rbd_recover_tool/config/mds_host b/src/tools/rbd_recover_tool/config/mds_host new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/src/tools/rbd_recover_tool/config/mds_host diff --git a/src/tools/rbd_recover_tool/config/mon_host b/src/tools/rbd_recover_tool/config/mon_host new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/src/tools/rbd_recover_tool/config/mon_host diff --git a/src/tools/rbd_recover_tool/config/osd_host_path b/src/tools/rbd_recover_tool/config/osd_host_path new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/src/tools/rbd_recover_tool/config/osd_host_path diff --git a/src/tools/rbd_recover_tool/database_h b/src/tools/rbd_recover_tool/database_h new file mode 100644 index 000000000..4ff20425a --- /dev/null +++ b/src/tools/rbd_recover_tool/database_h @@ -0,0 +1,1134 @@ +#!/usr/bin/env bash +# file: database_h +# +# Copyright (C) 2015 Ubuntu Kylin +# +# Author: Min Chen <minchen@ubuntukylin.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +my_dir=$(dirname "$0") + +. $my_dir/common_h +. $my_dir/metadata_h +. $my_dir/epoch_h + +db_image_prefix= +db_image_size= +db_order= +db_snap_id= +db_snap_image_size= +found=0 + +#init osd_data and get all objects path +function gen_database() +{ + local func="gen_database" + rm -rf $database/* + rm -rf $images + rm -rf $raw + mkdir -p $database + local host= + local data_path= + + trap 'echo $func failed; exit;' INT HUP + while read line + do + { + host=`echo $line|awk '{print $1}'` + data_path=`echo $line|awk '{print $2}'` + if [ "$host"x = ""x ] || [ "$data_path"x = ""x ];then + continue + fi + local cmds="find $data_path/current -type f" + ssh $ssh_option $host $cmds > $database/$host + } & + done < $osd_host_path + wait + echo "$func: finish" +} + +# collect hobjects from database +# and choose the object whose epoch is latest +# then, sort the objects by their offsets in image +function gather_hobject_common() +{ + func="gather_hobject_common" + + trap 'echo $func failed; exit;' INT HUP + if [ $# -lt 2 ];then + echo "$func: parameters: <pool_id> <image_prefix> [<snap_id>]" + exit + fi + + local pool_id=$1 + local image_prefix=$2 + pool_id=$(($pool_id)) + local hex_pool_id=`printf "%x" $pool_id` + # NOSNAP = uint64(-2) + local snap_id=`printf "%u" -2` + local hex_snap_id="head" + local psuffix= + local fsuffix="_head" + if [ $# = 3 ];then + snap_id=$(($3)) + hex_snap_id=`printf "%x" $snap_id` + psuffix="_"$snap_id + fsuffix="_"$snap_id + fi + local underline_image_prefix=`convert_underline $image_prefix` + local dump_image_prefix=`dump_backslash $underline_image_prefix` + local ddump_image_prefix=`dump_dump_backslash $underline_image_prefix` + local images_raw_dir=$rbd_image/raw + local image_hobjects_dir=$images/pool_$pool_id/$image_prefix + # $images/raw/$image_prefix"_head" + local image_hobjects_raw=$images_raw_dir/$image_prefix"$fsuffix" + # $images/$image_prefix/$image_prefix"_head" + local image_hobjects_stable=$image_hobjects_dir/$image_prefix"$fsuffix" + + if [ ! -e $images_raw_dir ];then + mkdir -p $images_raw_dir + fi + if [ ! -e $image_hobjects_dir ];then + local image_metadata=$images_meta/$image_name_in + mkdir -p $image_hobjects_dir + fi + + pushd $database >/dev/null + local pattern="\.[0-9a-f]+__"$hex_snap_id"_[0-9A-F]{8}__"$hex_pool_id + >$image_hobjects_raw + grep -r -E $dump_image_prefix""$pattern * >$image_hobjects_raw + if [ ! -s $image_hobjects_raw ];then + echo "$func: image snap [ $image_prefix"$psuffix" ] is empty" + return 1 #no data available + fi + popd >/dev/null + + local offset_dir_temp=$images_raw_dir/$image_prefix"$fsuffix""_dir_temp" + rm -rf $offset_dir_temp + mkdir -p $offset_dir_temp + + echo "gather hobjects from database: snapid=$snap_id ..." + + # format: ceph2:/var/lib/ceph/osd/ceph-1/current/2.d3_head/rb.0.1293.6b8b4567.000000000002__head_FB425CD3__2 + local tmp_image=$offset_dir_temp/tmpimage.$$$$ + >$tmp_image + cat $image_hobjects_raw | + awk -F ':' ' + BEGIN { + pg_coll="'$pg_coll'" + tmp_image="'$tmp_image'" + osd_host_mapping="'$osd_host_mapping'" + snapid="'$snap_id'" + }{ + # $2 = /var/lib/ceph/osd/ceph-1/current/2.d3_head/rb.0.1293.6b8b4567.000000000002__head_FB425CD3__2 + + split($2, arr1, "/current/"); # {/var/lib/ceph/osd/ceph-1/, 2.d3_head/rb.0.1293.6b8b4567.000000000002__head_FB425CD3__2} + split(arr1[2], arr2, "/"); # {2.d3_head, rb.0.1293.6b8b4567.000000000002__head_FB425CD3__2} + split(arr2[1], arr3, "_head"); # {2.d3,} + + hobject=$2; + data_path=arr1[1]; + gsub(/\\u/, "\\\\\\\\u", hobject); # dump backslash to delay escape (\ -> \\) + "awk \"\\$1 == \\\""$1"\\\" {print \\$2}\" "osd_host_mapping" | head -n 1" | getline node + pgid = arr3[1]; + + len=length(arr2); + offset_hobject=arr2[len] # rb.0.1293.6b8b4567.000000000002__head_FB425CD3__2 + split(offset_hobject, offarr1, "."); # {rb, 0, 1293, 6b8b4567, 000000000002__head_FB425CD3__2} + len1=length(offarr1) + offset_p=offarr1[len1] # 000000000002__head_FB425CD3__2 + split(offset_p, offarr2, "__"); # {000000000002, head_FB425CD3, 2} + offset=offarr2[1]; # 000000000002 + + system("echo -n \""node" "pgid" "hobject" "offset" "snapid" \" >>"tmp_image); + #system("echo -n \""node" "pgid" "hobject" "offset" "snapid" \""); + #print node" "pgid" "hobject" "offset" "snapid + + # find pg_epoch from pg_coll database + system("awk \"\\$1 == \\\""node"\\\" && \\$2 == \\\""pgid"\\\" && \\$4 == \\\""data_path"\\\" {print \\$3}\" "pg_coll" >>"tmp_image); + #system("awk \"\\$1 == \\\""node"\\\" && \\$2 == \\\""pgid"\\\" && \\$4 == \\\""data_path"\\\" {print \\$3}\" "pg_coll); + }' + + local sort_image=$offset_dir_temp/sortimage.$$$$ + >$sort_image + sort -t ' ' -k 4.1,4 -k 6.1nr -k 1.1,1 $tmp_image >$sort_image + sort -t ' ' -k 4.1,4 -u $sort_image > $image_hobjects_stable + + #rm -rf $offset_dir_temp + return 0 +} + +function gather_hobject_nosnap() +{ + gather_hobject_common $1 $2 +} + +function gather_hobject_snap() +{ + gather_hobject_common $1 $2 $3 +} + +# select the max pg_epoch item of the same $field +# if no same $field, choose the first +# format : "node $field pg_epoch" +function choose_epoch() +{ + cat $1|sort -t ' ' -k 3.1,3nr -k 2.1,2n |head -n 1; +} + +# lookup image info , after scatter_node_jobs & gather_node_infos +function lookup_image() +{ + local func="lookup_image" + if [ $# -lt 2 ];then + echo "$func: parameters error <pool_id> <image_name> [<snap_name>]" + fi + local pool_id=$1 + local image_name=$2 + local snap_name=$3 + pool_id=$((pool_id)) + echo -e "$func: pool_id = $pool_id\timage_name = $image_name\tsnap_name = $snap_name" + if [ $pool_id -lt 0 ];then + echo "$func: pool_id must great than zero" + exit + fi + local hex_pool_id=`printf "%x" $pool_id` + input_image $image_name + local node= + local item=/tmp/item.$$$$ + local img_name=`dump_backslash $image_name` + + local image_format=0 + local image_id_hobject= + local image_header_hobject= + local result=/tmp/tmp_result.$$$$ + local res1=/tmp/tmp_res1.$$$$ + local res2=/tmp/tmp_res2.$$$$ + local data_path= + + # image format v1 + { + cat $image_coll_v1|grep -E "/$img_name\.rbd__head_[0-9A-F]{8}__$hex_pool_id" >$res1 + if [ -s $res1 ];then + echo -n "$func: rbd_header_hobject = " + choose_epoch $res1| tee $item + #choose_epoch $res1 > $item + + if [ -e $item ];then + node=`cat $item|awk '{print $1}'` + image_header_hobject=`cat $item|awk '{print $2}'` + if [ "$node"x = ""x ];then + echo "$func: v1 node is NULL" + exit + fi + if [ "$image_header_hobject"x = ""x ];then + echo "$func: v1 image_header_hobject is NULL" + exit + fi + rm -f $item + fi + + image_format=1 + echo -e "image_name:\t$image_name_in" + echo -e "image_format:\t$image_format" + data_path=`echo $image_header_hobject|awk -F "/current" '{print $1}'` + + >$result + cmds="bash $job_path/osd_job do_image_metadata_v1 $data_path `dump_backslash $image_header_hobject` $snap_name" + ssh $ssh_option $node $cmds | tee $result + fi + } + + # image format v2 + { + cat $image_coll_v2|grep -E "/rbd\\\\uid\."$img_name"__head_[0-9A-F]{8}__$hex_pool_id" >$res2 + if [ -s $res2 ];then + echo -n "$func: rbd_id_hobject = " + choose_epoch $res2 | tee $item + #choose_epoch $res2 > $item + + if [ -e $item ];then + node=`cat $item|awk '{print $1}'` + image_id_hobject=`cat $item|awk '{print $2}'` + if [ "$node"x = ""x ];then + echo "$func: v2 node is NULL(to get image_id_hobject)" + exit + fi + if [ "$image_id_hobject"x = ""x ];then + echo "$func: v2 image_id_hobject is NULL" + exit + fi + rm -f $item + fi + + check_osd_process $node + image_format=2 + + local tid=/tmp/image_id.$$$$ + data_path=`echo $image_id_hobject|awk -F "/current" '{print $1}'` + >$tid + cmds="bash $job_path/osd_job do_image_id $data_path `dump_backslash $image_id_hobject`" + ssh $ssh_option $node $cmds > $tid + + local image_id=`cat $tid` + rm -f $tid + + #get image_header_hobject + pushd $database >/dev/null + local pattern="header\."$image_id"__head_[0-9A-F]{8}__$hex_pool_id" + local tcoll=/tmp/tmp_image_head_coll.$$$$ + + # hostname(by command hostname) in $pg_coll maybe different from hostname in tcoll(input by user) + # t_host: hostname read from config file ($tcoll) + # t_host_remote: $(hostname) on osd node ($pg_coll) + grep -r -E $pattern * >$tcoll + popd >/dev/null + + local t_host=(`cat $tcoll|awk -F ":" '{print $1}'`) + local t_pgid=(`cat $tcoll|awk -F ":" '{print $2}'|sed -n 's/.*\/\([0-9a-fA-F]\+\.[0-9a-fA-F]\+\)_head\/.*/\1/p'`) + local t_hobject=(`cat $tcoll|awk -F ":" '{print $2}'`) + local t_data_path=(`cat $tcoll|awk -F ":" '{split($2, arr, "/current/"); print arr[1];}'`) + rm -f $tcoll + declare -a t_host_remote + + #if there is no failed pg migration, number of t_host is replica num + #replica num : 3, 4, 5 ... + local t_hostname=/tmp/t_hostname.$$$$ + for ((i=0; i<${#t_host[*]}; i++)) + do + ssh $ssh_option ${t_host[$i]} "hostname" >$t_hostname + if [ $? != 0 ];then + echo "$func: ${t_host[$i]} get host_remote failed" + exit + fi + t_host_remote[$i]=`cat $t_hostname` + done + rm -f $t_hostname + + local t_item=/tmp/tmp_item.$$$$ + local tmp_item=/tmp/tmp_tmp_item.$$$$ + + >$tmp_item + for ((i=0; i<${#t_host_remote[*]}; i++ )) + do + local node=${t_host_remote[$i]} + local pgid=${t_pgid[$i]} + awk '$1 == "'"$node"'" && $2 == "'"$pgid"'" {print}' $pg_coll >>$tmp_item + done + + # t_item: <remote_hostname> <pgid> <epoch> <data_path> + sort -u $tmp_item >$t_item + rm -f $tmp_item + + local entry=`choose_epoch $t_item` #t_host_remote + rm -f $t_item + + node=`echo $entry|awk '{print $1}'` + data_path=`echo $entry|awk '{print $4}'` + if [ "$node"x = ""x ];then + echo "$func: v2 node is NULL (to get image_header_hobject)" + exit + fi + + for ((i=0; i<${#t_host_remote[*]}; i++)) + do + if [ "${t_host_remote[$i]}"x = "$node"x ] && [ "${t_data_path[$i]}"x = "$data_path"x ];then + image_header_hobject=${t_hobject[$i]} + break + fi + done + + if [ "$image_id_hobject"x = ""x ];then + echo "$func: v2 image_header_hobject is NULL" + exit + fi + + check_osd_process $node + + echo "$func: rbd_header_hobject = $node $image_header_hobject" + echo -e "image_name:\t$image_name_in" + echo -e "image_format:\t$image_format" + + #data_path=`echo $image_header_hobject|awk -F "/current" '{print $1}'` + >$result + cmds="bash $job_path/osd_job do_image_metadata_v2 $data_path $image_id `dump_backslash $image_header_hobject` $snap_name" + ssh $ssh_option $node $cmds | tee $result + fi + } + + if [ ! -s $result ];then + echo "$func: $image_name_in not exists" + exit + fi + + # to assign value to global variable + db_image_prefix=`cat $result|awk '/^(object_prefix|block_name):/{print $2}'` + if [ "$db_image_prefix"x = ""x ];then + echo "$func: image_prefix is NULL" + exit + fi + + db_image_size=`cat $result|awk '/^image_size:/{print $2}'` + db_order=`cat $result|awk '/^order:/{print $2}'` + if [ "$snap_name"x != ""x ];then + db_snap_id=`cat $result|awk '/^snapshot:/{print $2}'` + if [ "$db_snap_id"x = ""x ];then + echo "$func: $image_name_in@$snap_name NOT EXISTS" + exit + fi + db_snap_image_size=`cat $result|awk '/^snapshot:/{print $4}'` + else + #save snaplist + local image_snaplist=$images/pool_$pool_id/$image_name_in/@snaplist + local image_dir=$images/pool_$pool_id/$image_name_in + if [ ! -e $image_dir ];then + mkdir -p $image_dir + fi + cat $result|awk '/^snapshot:/{print $2" "$3" "$4}' >$image_snaplist + fi + found=1 + rm -f $result +} + +function list_images() +{ + echo "=============== format ==============" + echo "format: <pool_id>/<image_name>" + echo "================ v1: ================" + #sed -n 's/\(.*\)\/\(.*\)\.rbd__\(.*\)/\2/p' $image_coll_v1|sort -u|sed -e 's/\\u/_/g' + sed -n 's/.*\/\(.*\)\.rbd__head_[0-9A-F]\{8\}__\([0-9a-f]\+\).*/\2 \1/p' $image_coll_v1|sort -u|awk '{print strtonum("0x"$1)"/"$2;}'|sed -e 's/\\u/_/g' + echo "================ v2: ================" + #sed -n 's/\(.*\)\/rbd\\uid.\(.*\)__\(head.*\)/\2/p' $image_coll_v2|sort -u|sed 's/\\u/_/g' + sed -n 's/.*\/rbd\\uid.\(.*\)__head_[0-9A-F]\{8\}__\([0-9a-f]\+\).*/\2 \1/p' $image_coll_v2|sort -u|awk '{print strtonum("0x"$1)"/"$2}'|sed 's/\\u/_/g' +} + +# lookup image metadata +# and +# collect hobjects of image with the latest pg epoch +function discover_image_nosnap() +{ + local func="discover_image_nosnap" + echo "$func ..." + local pool_id=$1 + local image_name=$2 + pool_id=$(($pool_id)) + lookup_image $pool_id $image_name # assign $image_prefix + gather_hobject_nosnap $pool_id $db_image_prefix + if [ $? -ne 0 ];then + exit + fi + local image_hobjects_stable_nosnap=$images/pool_$pool_id/$db_image_prefix/$db_image_prefix"_head" + local image_hobjects_dir=$images/pool_$pool_id/$image_name_in + if [ ! -e $image_hobjects_dir ];then + mkdir -p $image_hobjects_dir + fi + # mv image_prefix to image_name + mv $image_hobjects_stable_nosnap $image_hobjects_dir/$image_name_in + rm -rf $images/pool_$pool_id/$db_image_prefix +} + +# get the offset snapid object +# if there is no object, choose the smallest snapid which is greater than current snapid +function get_object_clone() +{ + local func="get_object_clone" + if [ $# -lt 4 ];then + exit + fi + + local object_offset_string=$1 + local snapid=$2 + local snaplist_path=$3 + local snapset_output_dir=$4 + + # snapid in desc + local snap_coll_arr=(` + cat $snaplist_path|awk '{ if ($1 >= '"$snapid"') print "'"$snapset_output_dir"'/@"$1}'`) + + local hex_snapid=`printf "%x" $snapid` + pushd $snapset_output_dir >/dev/null + # get object with the smallest snapid greater than current snapid + awk '$4 == "'"$object_offset_string"'" && $5 >= '$snapid' {print}' `echo ${snap_coll_arr[@]}` |tail -n 1 + popd >/dev/null +} + +# gather hobject for each snapid +function gen_snapset_hobject() +{ + local func="gen_image_snapset" + echo "$func ..." + if [ $# -lt 4 ];then + echo "$func: parameters: <pool_id> <image_prefix> <snaplist_path> <snapset_output_dir>" + exit + fi + local pool_id=$1 + local image_prefix=$2 + local snaplist_path=$3 + local snapset_output_dir=$4 + pool_id=$(($pool_id)) + OIFS=$IFS + IFS=$'\n' + local snaparr=(`cat $snaplist_path`) + # gather hobject for each snapshot + trap 'echo $func failed; exit;' INT HUP + for line in ${snaparr[@]} + do + OOIFS=$IFS + IFS=$' ' + local field=(`echo $line`) + local snapid=${field[0]} + local image_hobjects_stable_snap=$images/pool_$pool_id/$image_prefix/$image_prefix"_"$snapid + local image_snap=$snapset_output_dir/@$snapid + gather_hobject_snap $pool_id $image_prefix $snapid + local res=$? + if [ $res -ne 0 ];then + touch $image_snap + else + mv $image_hobjects_stable_snap $image_snap + fi + IFS=$OOIFS + done + IFS=$OIFS +} + +# lookup image metadata and get snapid hobjects +function discover_image_snap() +{ + local func="discover_image_snap" + echo "$func ..." + if [ $# -lt 3 ];then + echo "$func: parameters: <pool_id> <image_name> [<snap_name>]" + exit + fi + local pool_id=$1 + local image_name=$2 + local snap_name=$3 + pool_id=$(($pool_id)) + #mkdir -p $images/$image_prefix + lookup_image $pool_id $image_name $snap_name # input image_name and snap_name to lookup metadata and snap_id + if [ "$db_snap_id"x = ""x ];then + echo "$func: lookup image failed to gen snapid" + exit + fi + local image_hobjects_dir_prefix=$images/pool_$pool_id/$db_image_prefix + local image_nosnap=$images/pool_$pool_id/$image_name_in + #check if image nosnap recovered + if [ ! -s $image_nosnap ];then + echo "$func: please recover image nosnap before recover with snap" + rm -rf $image_hobjects_dir_prefix + exit + fi + local image_hobject_dir=$images/pool_$pool_id/$image_name_in + local image_snap_hobject=$image_hobject_dir/$image_name_in@$db_snap_id + local image_snap_hobject_head=$image_hobject_dir/$image_name_in@$db_snap_id@head + local image_snaplist=$image_hobject_dir/@snaplist + local image_snapset_dir=$image_hobject_dir/@snapset_dir + local image_head=$image_hobject_dir/$image_name_in + if [ ! -e $image_hobject_dir ];then + mkdir -p $image_hobject_dir + fi + # only gen snapset one time + if [ ! -e $image_snapset_dir ];then + mkdir -p $image_snapset_dir + gen_snapset_hobject $pool_id $db_image_prefix $image_snaplist $image_snapset_dir + + fi + + echo "$func: will get object clone ..." + >$image_snap_hobject + >$image_snap_hobject_head + + trap 'echo $func failed; exit;' INT HUP + # get each offset 's snapid hobject + while read line + do + #echo $line + OOIFS=$IFS + IFS=$' ' + local field=(`echo $line`) + local offset_string=${field[3]} + IFS=$OOIFS + local entry=`get_object_clone $offset_string $db_snap_id $image_snaplist $image_snapset_dir` + if [ "$entry"x != ""x ];then + echo $entry >> $image_snap_hobject + echo `dump_backslash $line` >> $image_snap_hobject_head + fi + done < $image_head + rm -rf $image_hobjects_dir_prefix +} + +# after discover_image_nosnap +# collect objects from osds one by one in sequence +function copy_image_nosnap_single_thread() +{ + local func="copy_image_nosnap_single_thread" + echo "$func ..." + if [ $# -lt 3 ];then + echo "$func: parameters: <pool_id> <image_hobjects> <backup_dir>" + exit + fi + local pool_id=$1 + local image_hobjects=$2 + local backup_dir=$3 + pool_id=$(($pool_id)) + + # make sure lookup_image first + if [ $found = 0 ];then + echo "$func: image not found, maybe forget to discover_image" + exit + fi + if [ ! -e $backup_dir ];then + mkdir -p $backup_dir + fi + + local image_dir=$backup_dir/pool_$pool_id/$image_name_in + local image_file=$image_dir/$image_name_in + local CURRENT=$image_dir/@CURRENT + local LOCK=$image_dir/@LOCK + if [ ! -e $image_dir ];then + mkdir -p $image_dir + fi + if [ -e $LOCK ];then + echo "$func: $LOCK is locked by other process" + exit + else + touch $LOCK + fi + + >$image_file + truncate -s $db_image_size $image_file + echo "head">$CURRENT + + local count=$(($db_image_size >> $db_order)) + local start=`cat $image_hobjects|head -n 1|awk '{print $4}'` + local end=`cat $image_hobjects|tail -n 1|awk '{print $4}'` + local entry_count=`cat $image_hobjects|wc -l` + + local char_bits=$((`echo $start|wc -c` -1 )) + local format="%0"$char_bits"x" + + local expect_start=`printf $format 0` + local expect_end=`printf $format $(($count -1 ))` + + echo -e "object_count\t$entry_count" + echo -e "expect\t\t[$expect_start ~ $expect_end] count:$count" + echo -e "range\t\t[$start ~ $end] count:$entry_count" + + local icount=0 + local istart= + local iend= + local percent= + + trap 'echo $func failed; exit;' INT HUP + local unit=$((1<<$db_order)) + while read line + do + { + icount=$(($icount+1)) + node=`echo $line|awk '{print $1}'` + hobject=`echo $line|awk '{print $3}'` + offset=`echo $line|awk '{print $4}'` + off=$((16#$offset)) + if [ $icount = 1 ];then + istart=$offset + fi + hobject=`dump_backslash $hobject` + iend=$offset + sshcmd="cat $hobject" + ssh $ssh_option $node $sshcmd < /dev/null | dd of=$image_file bs=$unit seek=$off conv=notrunc 2>/dev/null + percent=`echo "scale=3; 100*$icount/$entry_count"|bc` + tput sc #record current cursor + echo -n -e "complete\t[$istart ~ $iend] $icount/$entry_count ==> "$percent"%" + if [ $icount != $entry_count ];then + tput rc # backport most recent cursor + fi + } + done < $image_hobjects + + echo + echo -n "size: " + ls -lh $image_file|awk '{print $5"\t"$9}' + echo -n "du: " + du -h $image_file + #unlock + rm -f $LOCK +} + + +# ssh copy snap_object & head_object from osd to admin node +# copy all snapshot objects +# and +# all head objects which have the same offset as snapshot objects +function collect_image_snap_objects() +{ + local func="collect_image_snap_objects" + #$1=backup_dir, $2=snap_name, $3=snap_hobjects, $4=head_hobjects + if [ $# -lt 6 ];then + echo "$func: parameters: <pool_id> <image_name> <snap_id> <snap_hobjects> <head_hobjects> <backup_dir>" + exit + fi + + local pool_id=$1 + local image_name=$2 + local snap_id=$3 + local snap_hobjects=$4 #snap hobjects info + local head_hobjects=$5 #head hobjects info + local backup_dir=$6 + pool_id=$(($pool_id)) + + local head_dir=$backup_dir/pool_$pool_id/$image_name/@head + local snap_dir=$backup_dir/pool_$pool_id/$image_name/@$snap_id + local CURRENT=$backup_dir/pool_$pool_id/$image_name/@CURRENT + + if [ ! -e $head_dir ];then + mkdir -p $head_dir + fi + if [ ! -e $snap_dir ];then + mkdir -p $snap_dir + fi + + local snap_node= #osd node + local snap_hobject= #hobject path with snapid on osd + local snap_offset= + local snap_filename= + + local head_node= + local head_hobject= + local head_offset= + local head_filename= + + # ignore if there is no object in snapshot(empty ) + if [ ! -s $snap_hobjects ];then + echo "$func: $snap_hobjects is empty" + return 0 + fi + local start=`head -n 1 $snap_hobjects|awk '{print $4}'` + local end=`tail -n 1 $snap_hobjects|awk '{print $4}'` + local entry_count=`cat $snap_hobjects|wc -l` + if [ $((16#$first_offset)) -gt $((16#$last_offset)) ];then + echo "$func: $snap_hobjects not sorted" + return 1 + fi + + # just assert if ignored empty snapshot + if [ "$start"x = ""x ] || [ "$end"x = ""x ];then + return 1 + fi + + # speed up copy snapshot + # lookup the corresponding head hobject of snap hobject + # use command: grep <offset> <head hobjects> + # + # eg. + # head hobjects: (32 objects, snapid = uint64(-2) = 18446744073709551614) + # ceph1 29.4d /var/lib/ceph/osd/ceph-0/current/29.4d_head/rb.0.1c414.6b8b4567.000000000000__head_EC2C1C4D__1d 000000000000 18446744073709551614 869 + # ceph1 29.8c /var/lib/ceph/osd/ceph-0/current/29.8c_head/rb.0.1c414.6b8b4567.000000000001__head_0F439A8C__1d 000000000001 18446744073709551614 867 + # ceph1 29.6a /var/lib/ceph/osd/ceph-0/current/29.6a_head/rb.0.1c414.6b8b4567.000000000002__head_FC55706A__1d 000000000002 18446744073709551614 869 + # ceph1 29.8b /var/lib/ceph/osd/ceph-0/current/29.8b_head/rb.0.1c414.6b8b4567.000000000003__head_20A6328B__1d 000000000003 18446744073709551614 869 + # ceph2 29.75 /var/lib/ceph/osd/ceph-1/current/29.75_head/rb.0.1c414.6b8b4567.000000000004__head_AC5ADB75__1d 000000000004 18446744073709551614 867 + # ceph2 29.23 /var/lib/ceph/osd/ceph-1/current/29.23_head/rb.0.1c414.6b8b4567.000000000005__head_1FDEA823__1d 000000000005 18446744073709551614 867 + # ...... + # ceph1 29.34 /var/lib/ceph/osd/ceph-0/current/29.34_head/rb.0.1c414.6b8b4567.00000000001f__head_52373734__1d 00000000001f 18446744073709551614 869 + # + # snap hobjects: (3 objects, snapid >= 29) + # ceph1 29.8c /var/lib/ceph/osd/ceph-0/current/29.8c_head/rb.0.1c414.6b8b4567.000000000001__1f_0F439A8C__1d 000000000001 31 867 + # ceph1 29.6a /var/lib/ceph/osd/ceph-0/current/29.6a_head/rb.0.1c414.6b8b4567.000000000002__1e_FC55706A__1d 000000000002 30 869 + # ceph1 29.8b /var/lib/ceph/osd/ceph-0/current/29.8b_head/rb.0.1c414.6b8b4567.000000000003__1d_20A6328B__1d 000000000003 29 869 + # + # so find out offset in head hobjects line number: + # snap hobjects: 000000000001 ---> head hobjects: 2 (n1) + # snap hobjects: 000000000003 ---> head hobjects: 4 (n2) + # + # finally , grep range from the whole file [1 ~ N] shranked to part of file [n1 ~ n2] + # the worst case : [n1 ~ n2] = [1 ~ N], means no shranking + + # get the line number of the start offset in head hobjects + local n1=`grep -n $start $head_hobjects|head -n 1|cut -d ":" -f 1` + # get the line number of the end offset in head hobjects + local n2=`grep -n $end $head_hobjects|head -n 1|cut -d ":" -f 1` + + local icount=0 + local istart= + local iend= + local percent= + + OIFS=$IFS + IFS=$'\n' + + #assume file:snap_hobjects is not very large, and can be loaded into memory + local snap_arr=(`cat $snap_hobjects`) + local snap_tmp=/tmp/snaptmp.$$$$ + + # snap_tmp: + # consists of snap hobject or head hobject + # select lineno range: [n1 ~ n2] + head -n $n2 $head_hobjects|tail -n $(($n2-$n1+1)) >$snap_tmp + + echo "copy image snap/head objects from osd ..." + echo -e "object_count\t$entry_count" + echo -e "range\t\t[$start ~ $end] count:$entry_count" + + trap 'echo $func failed; exit;' INT HUP + for line in ${snap_arr[*]} + do + icount=$(($icount+1)) + + OOIFS=$IFS + IFS=$' ' + + local arr=(`echo $line`) + snap_node=${arr[0]} + snap_hobject=${arr[2]} + snap_offset=${arr[3]} + snap_filename=$snap_dir/$snap_offset + + if [ $icount = 1 ];then + istart=$snap_offset + fi + iend=$snap_offset + + #lookup corresponding head hobject of snap hobject + local res=`grep $snap_offset $snap_tmp|head -n 1` + if [ "$res"x = ""x ];then + echo "$func: image object[ $snap_offset ] missing" + exit + fi + + local arr2=(`echo $res`) + head_node=${arr2[0]} + head_hobject=${arr2[2]} + head_offset=${arr2[3]} + head_filename=$head_dir/$head_offset + + # just copy object(snap/head) if it does not exist + if [ ! -e $snap_filename ];then + ssh $ssh_option $snap_node "cat $snap_hobject" > $snap_filename + fi + if [ ! -e $head_filename ];then + ssh $ssh_option $head_node "cat $head_hobject" > $head_filename + fi + IFS=$OOIFS + + percent=`echo "scale=3; 100*$icount/$entry_count"|bc` + tput sc #record current cursor + echo -n -e "complete\t[$istart ~ $iend] $icount/$entry_count ==> "$percent"%" + if [ $icount != $entry_count ];then + tput rc # backport most recent cursor + fi + done + echo + IFS=$OIFS + rm -f $snap_tmp + return 0 +} + +# copy all snap objects and corresponding head objects from osds +# in single process +function copy_image_snap_single_thread() +{ + local func="copy_image_snap_single_thread" + if [ $# -lt 6 ];then + echo "$func: parameters: <pool_id> <image_name> <snap_id> <snap_hobjects> <head_hobjects> <backup_dir>" + exit + fi + local pool_id=$1 + local image_name=$2 + local snap_id=$3 + local snap_hobjects=$4 + local head_hobjects=$5 + local backup_dir=$6 + pool_id=$(($pool_id)) + + local CURRENT=$backup_dir/pool_$pool_id/$image_name/@CURRENT + local LOCK=$backup_dir/pool_$pool_id/$image_name/@LOCK + #lock + if [ -e $LOCK ];then + echo "$func: $LOCK is locked by other process" + exit + else + touch $LOCK + fi + collect_image_snap_objects $pool_id $image_name $snap_id $snap_hobjects $head_hobjects $backup_dir + #unlock + rm -f $LOCK +} + +# after all snap objects and necessary head objects are copied, +# just pick appropriate head objects and snap objects and write them to image +# in order to rollback image to snapshot +# +# init: image is created by copy_image_nosnap_single_thread firstly +# +# all output include 3 parts: +# <image> <head objects> <snap objects> +# +# head objects1 --- snap1 objects +# head objects2 --- snap2 objects +# image head objects3 --- snap3 objects +# ...... +# head objectsN --- snapN objects +# +# how to rollback: +# firstly rollback to head, secondly write <snapX objects> +# head = <image> + <head objects> +# snap1 = <image> + <head objects> + <snap1 objects> +# snap2 = <image> + <head objects> + <snap2 objects> +# snap3 = <image> + <head objects> + <snap3 objects> +# ...... +# snapN = <image> + <head objects> + <snapN objects> +# +# improve rollback: +# there is intersection of head objects and snapX objects, if snapX objects are not empty +# and need to deduplicate the intersection. +# deduplicate steps: +# - get difference set of head objects and snapX objects +# - write the difference set objects to image +# - write the snapX objects to image +function rollback_image_snap() +{ + local func="rollback_image_snap" + + echo "$func ..." + + trap 'echo $func failed; exit;' INT HUP + if [ $# -lt 6 ];then + echo "$func: parameters <pool_id> <image_name> <snap_id> <snap_object_dir> <backup_dir> <image_unit>" + exit + fi + local pool_id=$1 + local image_name=$2 + local snap_id=$3 + local snap_object_dir=$4 + local backup_dir=$5 + local image_unit=$6 + + local need_diff_set=0 + + local image_path=$backup_dir/pool_$pool_id/$image_name/$image_name + local head_object_dir=$backup_dir/pool_$pool_id/$image_name/@head + local CURRENT=$backup_dir/pool_$pool_id/$image_name/@CURRENT + local LOCK=$backup_dir/pool_$pool_id/$image_name/@LOCK + if [ -e $LOCK ];then + echo "$func: $LOCK is locked by other process" + exit + else + touch $LOCK + fi + if [ $snap_id -ne -2 ];then + echo $snap_id > $CURRENT + else + echo "head" > $CURRENT + fi + + if [ ! -e $snap_object_dir ];then + return 0 + fi + + if [ "$snap_object_dir"x != "$head_object_dir"x ];then + echo "$func: need to compute diff_set of head" + need_diff_set=1 + else + echo "$func: NO diff_set" + need_diff_set=0 + fi + + local entry_count=0 + local start= + local end= + local offset= + local icount=0 + local istart= + local iend= + local percent= + + local snap_objects= + local head_objects= + local diff_set= + + snap_objects=(`ls $snap_object_dir`) + + # if need to compute difference set of head_objects and snap_objects + if [ $need_diff_set -ne 0 ];then + head_objects=(`ls $head_object_dir`) + + #get the difference set: ( head_objects - snap_objects ) + diff_set=(` + sort -m <(echo ${head_objects[@]}|xargs -n 1 echo) <(echo ${snap_objects[@]}|xargs -n 1 echo) \ + <(echo ${snap_objects[@]}|xargs -n 1 echo) |uniq -u`) + + # copy diff_set of head object to image + pushd $head_object_dir >/dev/null + + echo "$func: copy diff_set head objects ..." + entry_count=${#diff_set[@]} + start=${diff_set[0]} + end= + if [ $entry_count -gt 0 ];then + end=${diff_set[$(($entry_count - 1))]} + fi + offset= + icount=0 + istart= + iend= + percent= + + echo -e "object_count\t$entry_count" + echo -e "range\t\t[$start ~ $end] count:$entry_count" + + for object in ${diff_set[@]} + do + icount=$(($icount+1)) + if [ $icount = 1 ];then + istart=$object + fi + iend=$object + + local offset=$((16#$object)) + dd if=$object of=$image_path bs=$image_unit seek=$offset conv=notrunc 2>/dev/null + + percent=`echo "scale=3; 100*$icount/$entry_count"|bc` + tput sc #record current cursor + echo -n -e "complete\t[$istart ~ $iend] $icount/$entry_count ==> "$percent"%" + if [ $icount != $entry_count ];then + tput rc # backport most recent cursor + fi + done + if [ $entry_count -gt 0 ];then + echo + fi + popd >/dev/null + + if [ $snap_id -ne -2 ];then + echo -e "$image_name already rollback diff_set: (head - snap)" + fi + fi + + # copy snap object to image + pushd $snap_object_dir >/dev/null + + if [ $need_diff_set -ne 0 ];then + echo "$func: copy snap objects ..." + else + echo "$func: copy head objects ..." + fi + entry_count=${#snap_objects[@]} + start=${snap_objects[0]} + end= + if [ $entry_count -gt 0 ];then + end=${snap_objects[$(($entry_count - 1))]} + fi + offset= + icount=0 + istart= + iend= + percent= + + echo -e "object_count\t$entry_count" + echo -e "range\t\t[$start ~ $end] count:$entry_count" + + for object in ${snap_objects[@]} + do + icount=$(($icount+1)) + if [ $icount = 1 ];then + istart=$object + fi + iend=$object + + local offset=$((16#$object)) + dd if=$object of=$image_path bs=$image_unit seek=$offset conv=notrunc 2>/dev/null + + percent=`echo "scale=3; 100*$icount/$entry_count"|bc` + tput sc #record current cursor + echo -n -e "complete\t[$istart ~ $iend] $icount/$entry_count ==> "$percent"%" + if [ $icount != $entry_count ];then + tput rc # backport most recent cursor + fi + done + if [ $entry_count -gt 0 ];then + echo + fi + popd >/dev/null + + rm -f $LOCK + if [ $snap_id -ne -2 ];then + echo "$image_name rollback to snapid: $snap_id" + else + echo "$image_name rollback to head" + fi +} + +function recover_image() +{ + local func="recover_image" + echo "$func ..." + + if [ $# -lt 3 ];then + echo "$func: parameters: <pool_id> <image_name> <snap_name> [<backup_dir>]" + exit + fi + + local pool_id=$1 + local img_name=$2 + local snap_name=$3 + local backup_dir=$4 + pool_id=$(($pool_id)) + if [ "$snap_name"x = "@"x ];then + snap_name= + fi + if [ "$backup_dir"x = ""x ];then + backup_dir=$default_backup_dir + fi + + #recover image with nosnap + if [ "$snap_name"x = ""x ];then + discover_image_nosnap $pool_id $img_name #input image_name + local image_hobjects=$images/pool_$pool_id/$image_name_in/$image_name_in + copy_image_nosnap_single_thread $pool_id $image_hobjects $backup_dir + + #recover image with snap + else + + # check if recovered head already + local img_hobjects_path=$images/pool_$pool_id/$img_name/$img_name + local img_file_path=$backup_dir/pool_$pool_id/$img_name/$img_name + if [ ! -e $img_hobjects_path ] || [ ! -e $img_file_path ];then + echo "$func: $img_name@$snap_name : can not rollback to snapshot, please recover image head first" + exit + fi + + # rollback to head + if [ "$snap_name"x = "@@"x ];then + local head_dir=$backup_dir/pool_$pool_id/$img_name/@head + if [ -e $head_dir ];then + local unit=`pushd $head_dir >/dev/null; ls|head -n 1|xargs -n 1 stat|awk '/Size:/{print $2}'` + # rollback to head + rollback_image_snap $pool_id $img_name -2 $backup_dir/$img_name/@head $backup_dir $unit + echo "$image_name_in head : $backup_dir/$img_name/$img_name" + else + echo "$func: no need to rollback to head" + fi + return 0 + fi + + # rollback to snap + discover_image_snap $pool_id $img_name $snap_name # get image meta & get snapid object + local snap_hobjects=$images/pool_$pool_id/$image_name_in/$image_name_in@$db_snap_id + local head_hobjects=$images/pool_$pool_id/$image_name_in/$image_name_in@$db_snap_id@head + local snap_object_dir=$backup_dir/pool_$pool_id/$image_name_in/@$db_snap_id + local image_path=$backup_dir/pool_$pool_id/$image_name_in/$image_name_in + local image_unit=$((1<<$db_order)) + copy_image_snap_single_thread $pool_id $image_name_in $db_snap_id $snap_hobjects $head_hobjects $backup_dir + rollback_image_snap $pool_id $image_name_in $db_snap_id $snap_object_dir $backup_dir $image_unit + echo "$image_name_in@$snap_name : $image_path" + fi +} diff --git a/src/tools/rbd_recover_tool/epoch_h b/src/tools/rbd_recover_tool/epoch_h new file mode 100644 index 000000000..e268eafa7 --- /dev/null +++ b/src/tools/rbd_recover_tool/epoch_h @@ -0,0 +1,119 @@ +#!/usr/bin/env bash +# file: epoch_h +# +# Copyright (C) 2015 Ubuntu Kylin +# +# Author: Min Chen <minchen@ubuntukylin.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +my_dir=$(dirname "$0") +. $my_dir/common_h + +#pgid_list=$single_node/$cluster-$id/pgid_list +function get_pgid_list() +{ + find $osd_data/current/ -type d -name "*_head"|\ + sed -n 's/\(.*\)\/current\/\([0-9a-fA-F]\+\.[0-9a-fA-F]\+\)_head/\2 \1/p'|\ + sort -t ' ' -k 1.1,1h -k 2.1,2 > $pgid_list; +} + +function get_pgid() +{ + hobject_path=$1 + echo $hobject_path| sed -n 's/\(.*\)\/\([0-9a-fA-F]\+\.[0-9a-fA-F]\+\)_head\(.*\)/\2/p' +} + +infos_seq= +function get_infos_seq() +{ + local func="get_infos_seq" + + local keyword=":infos." + local infos_key=`get_map_header_key $keyword` + + if [ "$infos_key"x = ""x ];then + echo "$func: keyword not input or infos_key not exists" + exit + fi + local prefix=`get_map_header_prefix` + local key=$infos_key + + infos_seq=`get_header_seq $prefix $key` + if [ "$infos_seq"x = ""x ];then + echo "$func: infos_seq not exists" + exit + fi +} + +pg_epoch= +function get_pg_epoch() +{ + local func="get_pg_epoch" + if [ "$1"x = ""x ];then + echo "$func: no pgid input" + exit + fi + + get_pg_epoch_firefly "$1" + if [ "$pg_epoch"x != ""x ]; then + # echo "Epoch for $1: $pg_epoch (firefly)" + return + fi + + get_pg_epoch_hammer "$1" + if [ "$pg_epoch"x != ""x ]; then + # echo "Epoch for $1: $pg_epoch (hammer)" + return + fi + + echo "$func: Couldn't find epoch for $1" + exit +} + +function get_pg_epoch_firefly() +{ + local func="get_pg_epoch_firefly" + if [ "$1"x = ""x ];then + echo "$func: no pgid input" + exit + fi + local pgid=$1 + local key=$pgid"_epoch" + + #get_infos_seq; + # infos_seq default to 1 + infos_seq=1 + local infos_seq=`printf "%016d" $infos_seq` + local prefix="_USER_"$infos_seq"_USER_" + + pg_epoch=`get_header_kv $prefix $key int` +} + +function get_pg_epoch_hammer() +{ + local func="get_pg_epoch_hammer" + if [ "$1"x = ""x ];then + echo "$func: no pgid input" + exit + fi + local pgid="$1" + local hkey_prefix="$(get_map_header_prefix)" + local hkey="$(printf '...head.%x.%08X' "$(echo "$pgid"|cut -d'.' -f1)" "$((0x$(echo "$pgid"|cut -d'.' -f2)))")" + + local infos_seq="$(get_header_seq "$hkey_prefix" "$hkey")" + local infos_seq=`printf "%016d" $infos_seq` + local prefix="_USER_"$infos_seq"_USER_" + local key="_epoch" + + pg_epoch=`get_header_kv $prefix $key int` +} diff --git a/src/tools/rbd_recover_tool/metadata_h b/src/tools/rbd_recover_tool/metadata_h new file mode 100644 index 000000000..b736ceea7 --- /dev/null +++ b/src/tools/rbd_recover_tool/metadata_h @@ -0,0 +1,368 @@ +#!/usr/bin/env bash +# file: metadata_h +# +# Copyright (C) 2015 Ubuntu Kylin +# +# Author: Min Chen <minchen@ubuntukylin.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +my_dir=$(dirname "$0") +. $my_dir/common_h +. $my_dir/epoch_h + +# put origin name in $image_name_in: for output +# put convert "_" name in $image_name: for grep image hobjects from database +image_name_in= +image_name= +function input_image() +{ + local func="input_image" + if [ "$1"x = ""x ];then + echo "$func: no image name input" + exit + fi + + image_name_in=$1 + # "_" -> "\u" + image_name=`convert_underline $image_name_in` +} + +#======================================== distinguish v1 or v2 =================================== +#image_list_v1=$single_node/$cluster-$id/image_list_v1 +#image_list_v2=$single_node/$cluster-$id/image_list_v2 +function get_image_list() +{ + find $osd_data/current/ -type f|grep ".rbd__" >$image_list_v1 + find $osd_data/current/ -type f|grep "rbd\\\\uid." >$image_list_v2 +} + +function get_image_format_by_hobject() +{ + local func="get_image_format" + if [ "$1"x = ""x ];then + exit + fi + local res1=`cat $image_list_v1|grep $1` + if [ "$res1"x != ""x ];then + echo 1 + exit + fi + + local res2=`cat $image_list_v2|grep $1` + if [ "$res2"x = ""x ];then + echo 2 + exit + fi +} + +#======================================== image format v1 ======================================== +# <image_name>.rbd include 3 parts: +# header + snap_count*snapshot + snap_count*snap_name +# +# struct rbd_obj_header_ondisk { +# 40 char text[40]; +# 24 char block_name[RBD_MAX_BLOCK_NAME_SIZE]; +# 4 char signature[4]; +# 8 char version[8]; +# struct { +# 1 __u8 order; +# 1 __u8 crypt_type; +# 1 __u8 comp_type; +# 1 __u8 unused; +# } __attribute__((packed)) options; +# 8 ceph_le64 image_size;//hexdump -C s=80 n=8 +# 8 ceph_le64 snap_seq; //hexdump -C s=88 n=8 +# 4 ceph_le32 snap_count;//hexdump -C s=96 n=4 +# 4 ceph_le32 reserved; +# 8 ceph_le64 snap_names_len;//hexdump -C s=104 n=8 +# struct rbd_obj_snap_ondisk snaps[0]; +# } __attribute__((packed)); +# +# sizeof(rbd_obj_header_ondisk): 112 +# +# struct rbd_obj_snap_ondisk { +# 8 ceph_le64 id; //hexdump -C s=112+i*16 n=8 , i=[0, snap_count) +# 8 ceph_le64 image_size;//hexdump -C s=112+i*16+8 n=8, i=[0, snap_count) +# } __attribute__((packed)); +# sizeof(rbd_obj_snap_ondisk): 16 +# +# get snap_names form <image_nane>.rbd +# hexdump -e '10/1 "%_c"' -s $((112 + $snap_count*16)) -n $snap_names_len <image_name>.rbd +# then split snap_names into array + +function get_image_metadata_v1() +{ + local func="get_image_metadata_v1" + if [ "$1"x = ""x ];then + echo "$func: no image head object input" + exit + fi + local snap_name= + if [ "$2"x != ""x ];then + snap_name=$2 + fi + + if [ ! -e $1 ];then + echo "$func: $1 not exists" + exit + fi + local hobject_path=$1 + d_hobject_path=`dump_backslash $1` + local image_format=`get_image_format_by_hobject $d_hobject_path` + if [ $image_format != 1 ];then + echo "$func: image_format must be 1" + exit + fi + + if [ ! -e $hobject_path ];then + echo "$func: $hobject_path not exists" + exit + fi + + # decode rbd_obj_header_ondisk of <image_name>.rbd + local block_name=`hexdump -e '10/1 "%c"' -s 40 -n 24 $hobject_path` + local order=`hexdump -e '10/4 "%u"' -s 76 -n 1 $hobject_path` + local image_size=`hexdump -C -s 80 -n 8 $hobject_path|head -n 1|awk '{for (i=9; i>1; i--) {printf $i}}'` + image_size=$((16#$image_size)) + local snap_seq=`hexdump -C -s 88 -n 8 $hobject_path|head -n 1| + awk '{num=""; for(i=9; i>1; i--){ num=num""$i;} print strtonum("0x"num);}'` + local snap_count=`hexdump -C -s 96 -n 4 $hobject_path|head -n 1| + awk '{num=""; for(i=5; i>1; i--){ num=num""$i;} print strtonum("0x"num);}'` + local snap_names_len=`hexdump -C -s 104 -n 8 $hobject_path|head -n 1| + awk '{num=""; for(i=9; i>1; i--){ num=num""$i;} print strtonum("0x"num);}'` + + echo -e "block_name:\t$block_name" + echo -e "order:\t\t$order" + echo -e "image_size:\t$image_size" + echo -e "snap_seq:\t$snap_seq" + + # decode N rbd_obj_snap_ondisk of <image_name>.rbd + declare -a snap_ids + declare -a snap_names + declare -a snap_image_sizes + local size_header=112 #sizeof(rbd_obj_header_ondisk) + local size_snap=16 #sizeof(rbd_obj_snap_ondisk) + local offset=0 + local id_off=0 + local size_off=0 + for ((i=0; i<$snap_count; i++)) + do + offset=$(($size_header + $i * $size_snap)) + id_off=$offset + size_off=$(($offset + 8)) + snap_ids[$i]=`hexdump -C -s $id_off -n 8 $hobject_path|head -n 1| + awk '{num=""; for(i=9; i>1; i--){num=num""$i;} print strtonum("0x"num);}'` + snap_image_sizes[$i]=`hexdump -C -s $size_off -n 8 $hobject_path|head -n 1| + awk '{num=""; for(i=9; i>1; i--){num=num""$i;} print strtonum("0x"num);}'` + done + offset=$(($size_header + $snap_count * $size_snap)) + snap_names=(`hexdump -e '10/1 "%_c"' -s $offset -n $snap_names_len $hobject_path| + awk -F "\\\\\\\\\\\\\\\\0" '{for(i=1; i<=NF; i++) {print $i" "} }'`); + + echo -e "\t\tID\tNAME\t\tSIZE" + for ((i=0; i<$snap_count; i++)) + do + if [ "$snap_name"x = ""x ];then + echo -n -e "snapshot:\t" + echo -e "${snap_ids[$i]}\t${snap_names[$i]}\t\t${snap_image_sizes[$i]}" + continue + fi + if [ "$snap_name"x = "${snap_names[$i]}"x ];then + echo -n -e "snapshot:\t" + echo -e "${snap_ids[$i]}\t${snap_names[$i]}\t\t${snap_image_sizes[$i]}" + return + fi + done +} + +#======================================== end image format v1 ======================================== + +#======================================== image format v2 ======================================== + +# map_header, header_seq, header, key/value +# eg. +# map_header _HOBJTOSEQ_:rbd%uheader%e139a6b8b4567...head.2.68E826B6 +# meta_header_seq 17426 +# header: _USER_0000000000017426_USER_:object_prefix +# _USER_0000000000017426_USER_:order +# _USER_0000000000017426_USER_:size +# _USER_0000000000017426_USER_:snap_seq +# key/value ceph-kvstore-tool /storepath get _USER_0000000000017426_USER_ (object_prefix|order|size|snap_seq) + +# decode image id from image_id_hobject +function get_image_id() +{ + local func="get_image_id" + if [ "$1"x = ""x ];then + exit; + fi + local image_id_hobject=$1 #from admin node's database + + if [ ! -e $image_id_hobject ];then + #echo "$func: $image_id_hobject not exists" + exit; + fi + + # get len of string + local n=`hexdump -e '10/4 "%u"' -s 0 -n 4 $image_id_hobject` + # get string + hexdump -e '10/1 "%c"' -s 4 -n $n $image_id_hobject +} + +#find image_id omap entry in omaplist +map_header_prefix= +map_header_key= +function get_map_header() +{ + local func="get_map_header" + local image_id=$1 + if [ "$image_id"x = ""x ];then + echo "$func: no image_id input" + exit; + fi + map_header_prefix=`get_map_header_prefix` + local keyword="header%e"$image_id + map_header_key=`get_map_header_key $keyword` + if [ "$map_header_key"x = ""x ];then + echo "$func: map_header_key is NULL(not in omaplist)" + exit + fi +} + +#get meta header seq from map_header +meta_header_seq= +function get_meta_header_seq() +{ + local func="get_meta_header_seq" + if [ "$1"x == ""x ];then + echo "$func: no prefix input" + exit; + elif [ "$2"x == ""x ];then + echo "$func: no key input" + exit; + fi + local prefix=$1; + local key=$2; + meta_header_seq=`get_header_seq $prefix $key` +} + +# get image metadata : object_prefix, order, image_size, snap_seq +object_prefix= +order= +image_size= +snap_seq= +function get_image_metadata_v2() +{ + local func="get_image_metadata_v2" + if [ "$1"x = ""x ];then + echo "$func: no meta_header_seq input" + exit; + fi + local meta_header_seq=`printf "%016d" $1` + #echo "$func: meta_header_seq = "$meta_header_seq + local ghobject_key="_USER_"$meta_header_seq"_USER_" + local prefix=$ghobject_key + + object_prefix=`get_header_kv $prefix object_prefix string` + #object_prefix="rbd_data.$image_id" + order=`get_header_kv $prefix order int` + image_size=`get_header_kv $prefix size int` + snap_seq=`get_header_kv $prefix snap_seq int` + + echo -e "object_prefix:\t$object_prefix" + echo -e "order:\t\t$order" + echo -e "image_size:\t$image_size" + echo -e "snap_seq:\t$snap_seq" + + # list snapshot + list_snaps_v2 $1 $2 +} + +# struct cls_rbd_snap { +# snapid_t id; +# string name; +# uint64_t image_size; +# uint64_t features; +# uint8_t protection_status; +# cls_rbd_parent parent; +# } +# decode cls_rbd_snap +# 1 u8 struct_v +# 1 u8 struct_compat +# 4 u32 struct_len +# 8 u64 snapid_t id //s=6 n=8 +# 4 u32 len of name //s=14 n=4 +# len char name //s=18 n=len +# 8 u64 image_size +# 8 u64 features +# ...... +# +function list_snaps_v2() +{ + local func="list_snaps_v2" + if [ "$1"x = ""x ];then + exit + fi + local sname= + if [ $# -eq 2 ];then + sname=$2 + fi + local meta_header_seq=`printf "%016d" $1` + local prefix="_USER_"$meta_header_seq"_USER_" + local keys=(`awk -F ":" '/snapshot_/ && $1 == "'"$prefix"'" {if ($2 == "") exit; split($2, arr, "_"); + print arr[2];}' $omap_list|sort -r`) + echo -e "\t\tID\tNAME\t\tSIZE" + for key in ${keys[@]} + do + key="snapshot_$key" + local arr=(`ceph-kvstore-tool $omap_path get $prefix $key|awk -F ":" '{print $2}'`); + # get snap_name + tmp= + for ((i=17; i>13; i--)) + do + tmp="$tmp${arr[$i]}" + done + local len=$((16#$tmp)) + local snap_name= + for ((i=18; i<$((18+$len)); i++)) + do + # convert ascii to char + local char=`echo -e "\x${arr[$i]}"` + snap_name="$snap_name$char" + done + # get snap_id (little endian) + local tmp= + for ((i=13; i>5; i--)) + do + tmp="$tmp${arr[$i]}" + done + local snap_id=$((16#$tmp)) + # get image_size of current snap (little endian) + tmp= + for ((i=$((25+$len)); i>$((17+$len)); i--)) + do + tmp="$tmp${arr[$i]}" + done + local image_size=$((16#$tmp)) + if [ "$sname"x = ""x ];then + echo -e "snapshot:\t$snap_id\t$snap_name\t\t$image_size" + continue + fi + if [ "$sname"x = "$snap_name"x ];then + echo -e "snapshot:\t$snap_id\t$snap_name\t\t$image_size" + return + fi + done +} + +#======================================== end image format v2 ======================================== diff --git a/src/tools/rbd_recover_tool/osd_job b/src/tools/rbd_recover_tool/osd_job new file mode 100755 index 000000000..b4b80be8a --- /dev/null +++ b/src/tools/rbd_recover_tool/osd_job @@ -0,0 +1,170 @@ +#!/usr/bin/env bash +# file: osd_job +# +# Copyright (C) 2015 Ubuntu Kylin +# +# Author: Min Chen <minchen@ubuntukylin.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +my_dir=$(dirname "$0") + +. $my_dir/common_h +. $my_dir/metadata_h +. $my_dir/epoch_h + +function check_ceph_osd() +{ + local func="check_ceph_osd" + local host=`hostname` + # if ceph-osd service is still running, except flush-journal + if [ "`ps aux|grep ceph-osd|grep -v flush-journal|grep -v grep`"x != ""x ];then + echo "[$host]: $func: ceph-osd is running..., stop it" + exit + fi +} + +function cat_pg_epoch() +{ + local func="cat_pg_epoch" + init_env_osd $1 + if [ -e $node_pg_epoch ];then + cat $node_pg_epoch + fi +} + +function cat_image_v1() +{ + local func="cat_image_v1" + init_env_osd $1 + if [ -e $image_v1 ];then + cat $image_v1 + fi +} + +function cat_image_v2() +{ + local func="cat_image_v2" + init_env_osd $1 + if [ -e $image_v2 ];then + cat $image_v2 + fi +} + +function flush_osd_journal() +{ + local func="flush_osd_journal" + init_env_osd $1 + local osd_data_path=$osd_data + local osd_journal_path=$osd_data/journal + local whoami_path=$osd_data/whoami + local host=`hostname` + if [ ! -e $whoami_path ];then + echo "[$host]: $func: $whoami_path not exists" + exit + fi + local whoami=`cat $whoami_path` + echo "[$host]: $func ..." + ceph-osd -i $whoami --osd-data $osd_data_path --osd-journal $osd_journal_path --flush-journal >/dev/null + if [ $? -ne 0 ];then + echo "[$host]: $func: flush osd journal failed" + exit + fi +} + +function do_omap_list() +{ + local func="do_omap_list" + init_env_osd $1 + local host=`hostname` + echo "[$host]: $func ..." + get_omap_list +} + +# get all pgs epoch +function do_pg_epoch() +{ + local func="do_pg_epoch" + init_env_osd $1 + local node=`hostname` + get_pgid_list + >$node_pg_epoch + local pgid= + local data_path= + local host=`hostname` + echo "[$host]: $func ..." + while read line + do + { + pgid=`echo $line|awk '{print $1}'` + data_path=`echo $line|awk '{print $2}'` + get_pg_epoch $pgid + echo -e "$node $pgid $pg_epoch $data_path" >>$node_pg_epoch + } + done < $pgid_list +} + +# get an list of image in this osd node, pg epoch maybe not the latest, the admin node will do distinguish +function do_image_list() +{ + local func="do_image_list" + init_env_osd $1 + get_image_list + local node=`hostname` + >$image_v1 + >$image_v2 + local host=`hostname` + echo "[$host]: $func ..." + for line in `cat $image_list_v1` + do + pgid=`get_pgid $line` + get_pg_epoch $pgid + echo "$node $line $pg_epoch" >> $image_v1 + done + for line in `cat $image_list_v2` + do + pgid=`get_pgid $line` + get_pg_epoch $pgid + echo "$node $line $pg_epoch" >> $image_v2 + done +} + +function do_image_id() +{ + local func="do_image_id" + init_env_osd $1 + get_image_id $2 +} + +function do_image_metadata_v1() +{ + local func="do_image_metadata_v1" + init_env_osd $1 + local image_header_hobject=$2 + local snap_name=$3 + get_image_metadata_v1 $image_header_hobject $snap_name +} + +function do_image_metadata_v2() +{ + local func="do_image_metadata_v2" + init_env_osd $1 + local image_id=$2 + local image_header_hobject=$3 + local snap_name=$4 + get_map_header $image_id + get_meta_header_seq $map_header_prefix $map_header_key + get_image_metadata_v2 $meta_header_seq $snap_name +} + +check_ceph_osd +$* diff --git a/src/tools/rbd_recover_tool/rbd-recover-tool b/src/tools/rbd_recover_tool/rbd-recover-tool new file mode 100755 index 000000000..b7a258650 --- /dev/null +++ b/src/tools/rbd_recover_tool/rbd-recover-tool @@ -0,0 +1,327 @@ +#!/usr/bin/env bash +# file: rbd-recover-tool +# +# Copyright (C) 2015 Ubuntu Kylin +# +# Author: Min Chen <minchen@ubuntukylin.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +# rbd-recover-tool is an offline recover tool for rbd image in replicated pool +# when ceph cluster is stopped. +# it is a simple disater recovery policy, just for urgent condition + +my_dir=$(dirname "$0") + +. $my_dir/common_h +. $my_dir/metadata_h +. $my_dir/epoch_h +. $my_dir/database_h + +#scp files from admin node to osd node +file1=common_h +file2=metadata_h +file3=epoch_h +file4=osd_job + +#------------ admin node's action ------------- + +function scp_file() +{ + local func="scp_file" + file=$1 + if [ "$1"x = ""x ];then + echo "$func: not file input" + exit + fi + for host in `cat $osd_host` + do + { + echo "$func: $host" + scp $ssh_option $file $host:$job_path 1>/dev/null + } & + done +} + +function scp_files() +{ + local func="scp_files" + for host in `cat $osd_host` + do + { + echo "$func: $host" + scp $ssh_option $file1 $host:$job_path + scp $ssh_option $file2 $host:$job_path + scp $ssh_option $file3 $host:$job_path + scp $ssh_option $file4 $host:$job_path + } & + done + wait + echo "$func: finish" +} + +function scatter_node_jobs() +{ + local func="scatter_node_jobs" + local host= + local data_path= + echo "$func: flush osd journal & generate infos: omap, pg, image metadata ..." + + trap 'echo $func failed; exit' INT HUP + while read line + do + { + host=`echo $line|awk '{print $1}'` + data_path=`echo $line|awk '{print $2}'` + check_osd_process $host + + cmd="mkdir -p $job_path" + ssh $ssh_option $host $cmd + scp $ssh_option $file1 $host:$job_path >/dev/null + scp $ssh_option $file2 $host:$job_path >/dev/null + scp $ssh_option $file3 $host:$job_path >/dev/null + scp $ssh_option $file4 $host:$job_path >/dev/null + + cmd="bash $job_path/osd_job flush_osd_journal $data_path;" + cmd="$cmd $job_path/osd_job do_omap_list $data_path;" + cmd="$cmd bash $job_path/osd_job do_pg_epoch $data_path;" + cmd="$cmd bash $job_path/osd_job do_image_list $data_path;" + + ssh $ssh_option $host $cmd </dev/null + } & + done < $osd_host_path + wait + echo "$func: finish" +} + +function gather_node_infos() +{ + local func="gather_node_infos" + echo "$func ..." + >$pg_coll + >$image_coll_v1 + >$image_coll_v2 + trap 'echo $func failed; exit' INT HUP + while read line + do + { + host=`echo $line|awk '{print $1}'` + data_path=`echo $line|awk '{print $2}'` + echo "$func: $host" + check_osd_process $host + + #pg epoch + cmd1="bash $job_path/osd_job cat_pg_epoch $data_path" + ssh $ssh_option $host $cmd1 >> $pg_coll + #image v1 + cmd2="bash $job_path/osd_job cat_image_v1 $data_path" + ssh $ssh_option $host $cmd2 >> $image_coll_v1 + #image v2 + cmd3="bash $job_path/osd_job cat_image_v2 $data_path" + ssh $ssh_option $host $cmd3 >> $image_coll_v2 + } & + done < $osd_host_path + wait + echo "$func: finish" +} + +function scatter_gather() +{ + local func="scatter_gather" + if [ ! -s $osd_host ];then + echo "$func: no osd_host input" + exit + fi + if [ ! -s $mon_host ];then + echo "$func: no mon_host input" + exit + fi + scatter_node_jobs + gather_node_infos +} + + +#------------- operations -------------- + +function database() +{ + scatter_gather + gen_database +} + +function list() +{ + list_images +} + +function lookup() +{ + lookup_image $1 $2 $3 +} + +function recover() +{ + recover_image $1 $2 $3 $4 +} + +#------------- helper ------------- + +function usage() +{ + local cmd_name="rbd-recover-tool" + echo + echo "$cmd_name is used to recover rbd image of replicated pool, + when all ceph services are stopped" + echo "Usage:" + echo "$cmd_name database + gather pg info, object info, image metadata, + and epoch info from all osd nodes, + this will cosume a long time, just be patient, + especially when scale up to 1000+ osds" + echo "$cmd_name list + list all rbd images of all replicated pools, + before to lookup & recover" + echo "$cmd_name lookup <pool_id>/<image_name>[@[<snap_name>]] + show image metadata: image format, rbd id, size, order, snapseq + In addition, for image with snapshots, + this will list all snapshot infomations" + echo "$cmd_name recover <pool_id>/<image_name>[@[<snap_name>]] [</path/to/store/image>] + all snapshots share one image head, to economize disk space + so there is only one snapshot at any time, + image is saved at </path/to/store/image>/pool_<pool_id>/image_name/image_name + cat <path/to/store/image>/pool_<pool_id>/image_name/@CURRENT, + will show snapid + recover to raw image/nosnap/head: <image_name> + rollback to image head: <image_name>@ + rollback to image snap: <image_name>@<snap_name> + recover steps: + 1. recover image nosnap (only one time) + 2. rollback to image snap" +} + +function get_path() +{ + local func="get_path" + if [ $# -lt 1 ];then + return + fi + if [[ $1 =~ // ]];then + return # "/path//to" is invalid + fi + local parent=`dirname $1` + local name=`basename $1` + if [ "$parent"x = "/"x ];then + echo "$parent$name" + else + echo -n "$parent/$name" + fi +} + +function admin_cmd() +{ + local func="admin_cmd" + if [ $# -lt 1 ];then + usage + exit + fi + if [ "$1"x = "-h"x ] || [ "$1"x = "--help"x ];then + usage + exit + fi + + if [ "$1"x = "database"x ];then + if [ $# -gt 1 ];then + usage + exit + fi + # remove osd_host to refresh osd_host and osd_host_mapping + rm -f $osd_host + init_env_admin + database + elif [ "$1"x = "list"x ];then + if [ $# -gt 1 ];then + usage + exit + fi + init_env_admin + list + elif [ "$1"x = "lookup"x ];then + if [ $# -gt 2 ];then + usage + exit + fi + local pool_id=-1 + local image_name= + local snap_name= + if [[ $2 =~ ^([^@/]+)/([^@/]+)$ ]];then + pool_id="${BASH_REMATCH[1]}" + image_name="${BASH_REMATCH[2]}" + elif [[ $2 =~ ^([^@/]+)/([^@/]+)@([^@/]*)$ ]];then + pool_id="${BASH_REMATCH[1]}" + image_name="${BASH_REMATCH[2]}" + snap_name="${BASH_REMATCH[3]}" + else + echo "format: $2 is invalid, use <pool_id>/<image_name>[@[<snap_name>]]" + exit + fi + init_env_admin + lookup $pool_id $image_name $snap_name + elif [ "$1"x = "recover"x ];then + if [ $# -lt 2 ] || [ $# -gt 3 ];then + usage + exit + fi + local pool_id=-1 + local image_name= + local snap_name=@ + local image_dir= + if [[ $2 =~ ^([^@/]+)/([^@/]+)$ ]];then + pool_id="${BASH_REMATCH[1]}" + image_name="${BASH_REMATCH[2]}" + elif [[ $2 =~ ^([^@/]+)/([^@/]+)@([^@/]*)$ ]];then + pool_id="${BASH_REMATCH[1]}" + image_name="${BASH_REMATCH[2]}" + snap_name="${BASH_REMATCH[3]}" + if [ "$snap_name"x = ""x ];then + snap_name=@@ + fi + else + echo "format: $2 is invalid, use <pool_id>/<image_name>[@[<snap_name>]]" + exit + fi + if [ $# = 3 ];then + image_dir=`get_path $3` + if [ "image_dir"x = ""x ];then + echo "$3 invalid" + exit + fi + fi + init_env_admin + recover $pool_id $image_name $snap_name $image_dir + elif [ "$1"x = "scp_files"x ];then + if [ $# -gt 1 ];then + exit + fi + admin_parse_osd + scp_files + elif [ "$1"x = "scp_file"x ];then + if [ $# -gt 2 ];then + exit + fi + admin_parse_osd + scp_file $2 + else + echo "$func: $1: command not found" + fi +} + +admin_cmd $* diff --git a/src/tools/rbd_recover_tool/test_rbd_recover_tool.sh b/src/tools/rbd_recover_tool/test_rbd_recover_tool.sh new file mode 100755 index 000000000..876b47b90 --- /dev/null +++ b/src/tools/rbd_recover_tool/test_rbd_recover_tool.sh @@ -0,0 +1,542 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2015 Ubuntu Kylin +# +# Author: Min Chen <minchen@ubuntukylin.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +# unit test case for rbd-recover-tool + +#prepare: +# - write config files: config/osd_host, config/mon_host, config/storage_path, config/mds_host if exist mds +#step 1. rbd export all images as you need +#step 2. stop all ceph services +#step 3. use ceph_rbd_recover_tool to recover all images +#step 4. compare md5sum of recover image with that of export image who has the same image name + +ssh_opt="-o ConnectTimeout=1" +my_dir=$(dirname "$0") +tool_dir=$my_dir + +#storage_path=$my_dir/config/storage_path +mon_host=$my_dir/config/mon_host +osd_host=$my_dir/config/osd_host +mds_host=$my_dir/config/mds_host + +test_dir= # `cat $storage_path` +export_dir= #$test_dir/export +recover_dir= #$test_dir/recover +image_names= #$test_dir/image_names +online_images= #$test_dir/online_images, all images on ceph rbd pool +gen_db= #$test_dir/gen_db, label database if exist +pool=rbd +pool_id=2 + +function get_pool_id() +{ + local pool_id_file=/tmp/pool_id_file.$$$$ + ceph osd pool stats $pool|head -n 1|awk '{print $4}' >$pool_id_file + if [ $? -ne 0 ];then + echo "$func: get pool id failed: pool = $pool" + rm -f $pool_id_file + exit + fi + pool_id=`cat $pool_id_file` + echo "$func: pool_id = $pool_id" + rm -f $pool_id_file +} + +function init() +{ + local func="init" + if [ $# -eq 0 ];then + echo "$func: must input <path> to storage images, enough disk space is good" + exit + fi + if [ ! -s $osd_host ];then + echo "$func: config/osd_host not exists or empty" + exit + fi + if [ ! -s $mon_host ];then + echo "$func: config/mon_host not exists or empty" + exit + fi + if [ ! -e $mds_host ];then + echo "$func: config/mds_host not exists" + exit + fi + test_dir=$1 + export_dir=$test_dir/export + recover_dir=$test_dir/recover + image_names=$test_dir/image_names + online_images=$test_dir/online_images + gen_db=$test_dir/gen_db + + trap 'echo "ceph cluster is stopped ..."; exit;' INT + ceph -s >/dev/null + get_pool_id + + mkdir -p $test_dir + mkdir -p $export_dir + mkdir -p $recover_dir + rm -rf $export_dir/* + rm -rf $recover_dir/* +} + +function do_gen_database() +{ + local func="do_gen_database" + if [ -s $gen_db ] && [ `cat $gen_db` = 1 ];then + echo "$func: database already existed" + exit + fi + bash $tool_dir/rbd-recover-tool database + echo 1 >$gen_db +} + +#check if all ceph processes are stopped +function check_ceph_service() +{ + local func="check_ceph_service" + local res=`cat $osd_host $mon_host $mds_host|sort -u|tr -d [:blank:]|xargs -n 1 -I @ ssh $ssh_opt @ "ps aux|grep -E \"(ceph-osd|ceph-mon|ceph-mds)\"|grep -v grep"` + if [ "$res"x != ""x ];then + echo "$func: NOT all ceph services are stopped" + return 1 + exit + fi + echo "$func: all ceph services are stopped" + return 0 +} + +function stop_ceph() +{ + local func="stop_ceph" + #cat osd_host|xargs -n 1 -I @ ssh $ssh_opt @ "killall ceph-osd" + while read osd + do + { + osd=`echo $osd|tr -d [:blank:]` + if [ "$osd"x = ""x ];then + continue + fi + #ssh $ssh_opt $osd "killall ceph-osd ceph-mon ceph-mds" </dev/null + ssh $ssh_opt $osd "killall ceph-osd" </dev/null + } & + done < $osd_host + wait + echo "waiting kill all osd ..." + sleep 1 + #cat $mon_host|xargs -n 1 -I @ ssh $ssh_opt @ "killall ceph-mon ceph-osd ceph-mds" + cat $mon_host|xargs -n 1 -I @ ssh $ssh_opt @ "killall ceph-mon" + #cat $mds_host|xargs -n 1 -I @ ssh $ssh_opt @ "killall ceph-mds ceph-mon ceph-osd" + cat $mds_host|xargs -n 1 -I @ ssh $ssh_opt @ "killall ceph-mds" +} + +function create_image() +{ + local func="create_image" + if [ ${#} -lt 3 ];then + echo "create_image: parameters: <image_name> <size> <image_format>" + exit + fi + local image_name=$1 + local size=$2 + local image_format=$3 + if [ $image_format -lt 1 ] || [ $image_format -gt 2 ];then + echo "$func: image_format must be 1 or 2" + exit + fi + local res=`rbd list|grep -E "^$1$"` + echo "$func $image_name ..." + if [ "$res"x = ""x ];then + rbd -p $pool create $image_name --size $size --image_format $image_format + else + if [ $image_format -eq 2 ];then + rbd snap ls $image_name|tail -n +2|awk '{print $2}'|xargs -n 1 -I % rbd snap unprotect $image_name@% + fi + rbd snap purge $image_name + #rbd rm $image_name + rbd -p $pool resize --allow-shrink --size $size $image_name + fi +} + +function export_image() +{ + local func="export_image" + + if [ $# -lt 2 ];then + echo "$func: parameters: <image_name> <image_format> [<image_size>]" + exit + fi + + local image_name=$1 + local format=$(($2)) + local size=$(($3)) #MB + + if [ $format -ne 1 ] && [ $format -ne 2 ];then + echo "$func: image format must be 1 or 2" + exit + fi + + if [ $size -eq 0 ];then + size=24 #MB + echo "$func: size = $size" + fi + local mnt=/rbdfuse + + mount |grep "rbd-fuse on /rbdfuse" &>/dev/null + if [ $? -ne 0 ];then + rbd-fuse $mnt + fi + + create_image $image_name $size $format + + dd conv=notrunc if=/dev/urandom of=$mnt/$image_name bs=4M count=$(($size/4)) + + local export_image_dir=$export_dir/pool_$pool_id/$image_name + mkdir -p $export_image_dir + local export_md5_nosnap=$export_image_dir/@md5_nosnap + >$export_md5_nosnap + + local export_image_path=$export_image_dir/$image_name + rm -f $export_image_path + + rbd export $pool/$image_name $export_image_path + md5sum $export_image_path |awk '{print $1}' >$export_md5_nosnap +} + +function recover_image() +{ + local func="recover_snapshots" + if [ $# -lt 1 ];then + echo "$func: parameters: <image_name>" + exit + fi + + local image_name=$1 + #pool_id=29 + + local recover_image_dir=$recover_dir/pool_$pool_id/$image_name + mkdir -p $recover_image_dir + local recover_md5_nosnap=$recover_image_dir/@md5_nosnap + >$recover_md5_nosnap + local snapshot= + + bash $tool_dir/rbd-recover-tool recover $pool_id/$image_name $recover_dir + md5sum $recover_image_dir/$image_name|awk '{print $1}' >$recover_md5_nosnap +} + +function make_snapshot() +{ + local func="make_snapshot" + if [ $# -lt 5 ];then + echo "$func: parameters: <ofile> <seek> <count> <snap> <export_image_dir>" + exit + fi + local ofile=$1 + local seek=$(($2)) + local count=$(($3)) + local snap=$4 + local export_image_dir=$5 + + if [ $seek -lt 0 ];then + echo "$func: seek can not be minus" + exit + fi + + if [ $count -lt 1 ];then + echo "$func: count must great than zero" + exit + fi + + echo "[$snap] $func ..." + echo "$1 $2 $3 $4" + rbd snap ls $image_name|grep $snap; + + local res=$? + if [ $res -eq 0 ];then + return $res + fi + + dd conv=notrunc if=/dev/urandom of=$ofile bs=1M count=$count seek=$seek 2>/dev/null + snapshot=$image_name@$snap + rbd snap create $snapshot + rm -f $export_image_dir/$snapshot + rbd export $pool/$image_name $export_image_dir/$snapshot + pushd $export_image_dir >/dev/null + md5sum $snapshot >> @md5 + popd >/dev/null +} + +function recover_snapshots() +{ + local func="recover_snapshots" + if [ $# -lt 1 ];then + echo "$func: parameters: <image_name>" + exit + fi + + local image_name=$1 + #pool_id=29 + + local recover_image_dir=$recover_dir/pool_$pool_id/$image_name + mkdir -p $recover_image_dir + local recover_md5=$recover_image_dir/@md5 + >$recover_md5 + local snapshot= + + + # recover head + bash $tool_dir/rbd-recover-tool recover $pool_id/$image_name $recover_dir + + # recover snapshots + for((i=1; i<10; i++)) + do + snapshot=snap$i + bash $tool_dir/rbd-recover-tool recover $pool_id/$image_name@$snapshot $recover_dir + pushd $recover_image_dir >/dev/null + local chksum=`md5sum $image_name|awk '{print $1}'` + echo "$chksum $image_name@$snapshot" >>@md5 + popd >/dev/null + done +} + +function export_snapshots() +{ + local func="export_snapshots" + + if [ $# -lt 2 ];then + echo "$func: parameters: <image_name> <image_format> [<image_size>]" + exit + fi + + local image_name=$1 + local format=$(($2)) + local size=$(($3)) #MB + + if [ $format -ne 1 ] && [ $format -ne 2 ];then + echo "$func: image format must be 1 or 2" + exit + fi + + if [ $size -eq 0 ];then + size=24 #MB + echo "$func: size = $size" + fi + local mnt=/rbdfuse + + mount |grep "rbd-fuse on /rbdfuse" &>/dev/null + if [ $? -ne 0 ];then + rbd-fuse $mnt + fi + + create_image $image_name $size $format + + local export_image_dir=$export_dir/pool_$pool_id/$image_name + mkdir -p $export_image_dir + local export_md5=$export_image_dir/@md5 + >$export_md5 + + # create 9 snapshots + # image = {object0, object1, object2, object3, object4, object5, ...} + # + # snap1 : init/write all objects + # snap2 : write object0 + # snap3 : write object1 + # snap4 : write object2 + # snap5 : write object3 + # snap6 : write object4 + # snap7 : write object5 + # snap8 : write object0 + # snap9 : write object3 + + make_snapshot $mnt/$image_name 0 $size snap1 $export_image_dir + make_snapshot $mnt/$image_name 0 1 snap2 $export_image_dir + make_snapshot $mnt/$image_name 4 1 snap3 $export_image_dir + make_snapshot $mnt/$image_name 8 1 snap4 $export_image_dir + make_snapshot $mnt/$image_name 12 1 snap5 $export_image_dir + make_snapshot $mnt/$image_name 16 1 snap6 $export_image_dir + make_snapshot $mnt/$image_name 20 1 snap7 $export_image_dir + make_snapshot $mnt/$image_name 1 1 snap8 $export_image_dir + make_snapshot $mnt/$image_name 13 1 snap9 $export_image_dir +} + +function check_recover_nosnap() +{ + local func="check_recover_nosnap" + if [ $# -lt 3 ];then + echo "$func: parameters: <export_md5_file> <recover_md5_file> <image_name>" + fi + local export_md5=$1 + local recover_md5=$2 + local image_name=$3 + + local ifpassed="FAILED" + + echo "================ < $image_name nosnap > ================" + + local export_md5sum=`cat $export_md5` + local recover_md5sum=`cat $recover_md5` + + if [ "$export_md5sum"x != ""x ] && [ "$export_md5sum"x = "$recover_md5sum"x ];then + ifpassed="PASSED" + fi + echo "export: $export_md5sum" + echo "recover: $recover_md5sum $ifpassed" +} + +function check_recover_snapshots() +{ + local func="check_recover_snapshots" + if [ $# -lt 3 ];then + echo "$func: parameters: <export_md5_file> <recover_md5_file> <image_name>" + fi + local export_md5=$1 + local recover_md5=$2 + local image_name=$3 + + local ifpassed="FAILED" + + echo "================ < $image_name snapshots > ================" + + OIFS=$IFS + IFS=$'\n' + local export_md5s=(`cat $export_md5`) + local recover_md5s=(`cat $recover_md5`) + for((i=0; i<9; i++)) + do + OOIFS=$IFS + IFS=$' ' + local x=$(($i+1)) + snapshot=snap$x + + local export_arr=(`echo ${export_md5s[$i]}`) + local recover_arr=(`echo ${recover_md5s[$i]}`) + echo "export: ${export_md5s[$i]}" + if [ "${export_arr[1]}"x != ""x ] && [ "${export_arr[1]}"x = "${recover_arr[1]}"x ];then + ifpassed="PASSED" + fi + echo "recover: ${recover_md5s[$i]} $ifpassed" + IFS=$OOIFS + done + IFS=$OIFS +} + +# step 1: export image, snapshot +function do_export_nosnap() +{ + export_image image_v1_nosnap 1 + export_image image_v2_nosnap 2 +} + +function do_export_snap() +{ + export_snapshots image_v1_snap 1 + export_snapshots image_v2_snap 2 +} + +# step 2: stop ceph cluster and gen database +function stop_cluster_gen_database() +{ + trap 'echo stop ceph cluster failed; exit;' INT HUP + stop_ceph + sleep 2 + check_ceph_service + local res=$? + while [ $res -ne 0 ] + do + stop_ceph + sleep 2 + check_ceph_service + res=$? + done + + echo 0 >$gen_db + do_gen_database +} + +# step 3: recover image,snapshot +function do_recover_nosnap() +{ + recover_image image_v1_nosnap + recover_image image_v2_nosnap +} + +function do_recover_snap() +{ + recover_snapshots image_v1_snap + recover_snapshots image_v2_snap +} + +# step 4: check md5sum pair<export_md5sum, recover_md5sum> +function do_check_recover_nosnap() +{ + local image1=image_v1_nosnap + local image2=image_v2_nosnap + + local export_md5_1=$export_dir/pool_$pool_id/$image1/@md5_nosnap + local export_md5_2=$export_dir/pool_$pool_id/$image2/@md5_nosnap + local recover_md5_1=$recover_dir/pool_$pool_id/$image1/@md5_nosnap + local recover_md5_2=$recover_dir/pool_$pool_id/$image2/@md5_nosnap + + check_recover_nosnap $export_md5_1 $recover_md5_1 $image1 + check_recover_nosnap $export_md5_2 $recover_md5_2 $image2 +} + +function do_check_recover_snap() +{ + local image1=image_v1_snap + local image2=image_v2_snap + + local export_md5_1=$export_dir/pool_$pool_id/$image1/@md5 + local export_md5_2=$export_dir/pool_$pool_id/$image2/@md5 + local recover_md5_1=$recover_dir/pool_$pool_id/$image1/@md5 + local recover_md5_2=$recover_dir/pool_$pool_id/$image2/@md5 + + check_recover_snapshots $export_md5_1 $recover_md5_1 $image1 + check_recover_snapshots $export_md5_2 $recover_md5_2 $image2 +} + +function test_case_1() +{ + do_export_nosnap + stop_cluster_gen_database + do_recover_nosnap + do_check_recover_nosnap +} + +function test_case_2() +{ + do_export_snap + stop_cluster_gen_database + do_recover_snap + do_check_recover_snap +} + +function test_case_3() +{ + do_export_nosnap + do_export_snap + + stop_cluster_gen_database + + do_recover_nosnap + do_recover_snap + + do_check_recover_nosnap + do_check_recover_snap +} + + +init $* +test_case_3 diff --git a/src/tools/rbd_wnbd/CMakeLists.txt b/src/tools/rbd_wnbd/CMakeLists.txt new file mode 100644 index 000000000..86c41b2ee --- /dev/null +++ b/src/tools/rbd_wnbd/CMakeLists.txt @@ -0,0 +1,11 @@ +add_executable(rbd-wnbd rbd_wnbd.cc wnbd_handler.cc wnbd_wmi.cc) +set_target_properties( + rbd-wnbd PROPERTIES COMPILE_FLAGS + "-fpermissive -I${WNBD_INCLUDE_DIRS}") +target_link_libraries( + rbd-wnbd setupapi rpcrt4 + wbemuuid oleaut32 + ${WNBD_LIBRARIES} + ${Boost_FILESYSTEM_LIBRARY} + librbd librados global) +install(TARGETS rbd-wnbd DESTINATION bin) diff --git a/src/tools/rbd_wnbd/rbd_wnbd.cc b/src/tools/rbd_wnbd/rbd_wnbd.cc new file mode 100644 index 000000000..a9e160456 --- /dev/null +++ b/src/tools/rbd_wnbd/rbd_wnbd.cc @@ -0,0 +1,1871 @@ +/* + * rbd-wnbd - RBD in userspace + * + * Copyright (C) 2020 SUSE LINUX GmbH + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * +*/ + +#include <objidl.h> +// LOCK_WRITE is also defined by objidl.h, we have to avoid +// a collision. +#undef LOCK_WRITE + +#include "include/int_types.h" + +#include <atomic> +#include <stdio.h> +#include <stdlib.h> +#include <stddef.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <unistd.h> + +#include "wnbd_handler.h" +#include "wnbd_wmi.h" +#include "rbd_wnbd.h" + +#include <fstream> +#include <memory> +#include <regex> + +#include "common/Formatter.h" +#include "common/TextTable.h" +#include "common/ceph_argparse.h" +#include "common/config.h" +#include "common/debug.h" +#include "common/dout.h" +#include "common/errno.h" +#include "common/version.h" +#include "common/win32/service.h" +#include "common/win32/wstring.h" +#include "common/admin_socket_client.h" + +#include "global/global_init.h" + +#include "include/uuid.h" +#include "include/rados/librados.hpp" +#include "include/rbd/librbd.hpp" + +#include <shellapi.h> + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "rbd-wnbd: " + +using namespace std; + +// Wait 2s before recreating the wmi subscription in case of errors +#define WMI_SUBSCRIPTION_RETRY_INTERVAL 2 +// SCSI adapter modification events aren't received until the entire polling +// interval has elapsed (unlike other WMI classes, such as Msvm_ComputerSystem). +// With longer intervals, it even seems to miss events. For this reason, +// we're using a relatively short interval but have adapter state monitoring +// as an optional feature, mainly used for dev / driver certification purposes. +#define WNBD_ADAPTER_WMI_POLL_INTERVAL 2 +// Wait for wmi events up to two seconds +#define WMI_EVENT_TIMEOUT 2 + +bool is_process_running(DWORD pid) +{ + HANDLE process = OpenProcess(SYNCHRONIZE, FALSE, pid); + DWORD ret = WaitForSingleObject(process, 0); + CloseHandle(process); + return ret == WAIT_TIMEOUT; +} + +DWORD WNBDActiveDiskIterator::fetch_list( + PWNBD_CONNECTION_LIST* conn_list) +{ + DWORD curr_buff_sz = 0; + DWORD buff_sz = 0; + DWORD err = 0; + PWNBD_CONNECTION_LIST tmp_list = NULL; + + // We're using a loop because other connections may show up by the time + // we retry. + do { + if (tmp_list) + free(tmp_list); + + if (buff_sz) { + tmp_list = (PWNBD_CONNECTION_LIST) calloc(1, buff_sz); + if (!tmp_list) { + derr << "Could not allocate " << buff_sz << " bytes." << dendl; + err = ERROR_NOT_ENOUGH_MEMORY; + break; + } + } + + curr_buff_sz = buff_sz; + // If the buffer is too small, the return value is 0 and "BufferSize" + // will contain the required size. This is counterintuitive, but + // Windows drivers can't return a buffer as well as a non-zero status. + err = WnbdList(tmp_list, &buff_sz); + if (err) + break; + } while (curr_buff_sz < buff_sz); + + if (err) { + if (tmp_list) + free(tmp_list); + } else { + *conn_list = tmp_list; + } + return err; +} + +WNBDActiveDiskIterator::WNBDActiveDiskIterator() +{ + DWORD status = WNBDActiveDiskIterator::fetch_list(&conn_list); + switch (status) { + case 0: + // no error + break; + case ERROR_OPEN_FAILED: + error = -ENOENT; + break; + default: + error = -EINVAL; + break; + } +} + +WNBDActiveDiskIterator::~WNBDActiveDiskIterator() +{ + if (conn_list) { + free(conn_list); + conn_list = NULL; + } +} + +bool WNBDActiveDiskIterator::get(Config *cfg) +{ + index += 1; + *cfg = Config(); + + if (!conn_list || index >= (int)conn_list->Count) { + return false; + } + + auto conn_info = conn_list->Connections[index]; + auto conn_props = conn_info.Properties; + + if (strncmp(conn_props.Owner, RBD_WNBD_OWNER_NAME, WNBD_MAX_OWNER_LENGTH)) { + dout(10) << "Ignoring disk: " << conn_props.InstanceName + << ". Owner: " << conn_props.Owner << dendl; + return this->get(cfg); + } + + error = load_mapping_config_from_registry(conn_props.InstanceName, cfg); + if (error) { + derr << "Could not load registry disk info for: " + << conn_props.InstanceName << ". Error: " << error << dendl; + return false; + } + + cfg->disk_number = conn_info.DiskNumber; + cfg->serial_number = std::string(conn_props.SerialNumber); + cfg->pid = conn_props.Pid; + cfg->active = cfg->disk_number > 0 && is_process_running(conn_props.Pid); + cfg->wnbd_mapped = true; + + return true; +} + +RegistryDiskIterator::RegistryDiskIterator() +{ + reg_key = new RegistryKey(g_ceph_context, HKEY_LOCAL_MACHINE, + SERVICE_REG_KEY, false); + if (!reg_key->hKey) { + if (!reg_key->missingKey) + error = -EINVAL; + return; + } + + if (RegQueryInfoKey(reg_key->hKey, NULL, NULL, NULL, &subkey_count, + NULL, NULL, NULL, NULL, NULL, NULL, NULL)) { + derr << "Could not query registry key: " << SERVICE_REG_KEY << dendl; + error = -EINVAL; + return; + } +} + +bool RegistryDiskIterator::get(Config *cfg) +{ + index += 1; + *cfg = Config(); + + if (!reg_key->hKey || !subkey_count) { + return false; + } + + char subkey_name[MAX_PATH] = {0}; + DWORD subkey_name_sz = MAX_PATH; + int err = RegEnumKeyEx( + reg_key->hKey, index, subkey_name, &subkey_name_sz, + NULL, NULL, NULL, NULL); + if (err == ERROR_NO_MORE_ITEMS) { + return false; + } else if (err) { + derr << "Could not enumerate registry. Error: " << err << dendl; + error = -EINVAL; + return false; + } + + if (load_mapping_config_from_registry(subkey_name, cfg)) { + error = -EINVAL; + return false; + }; + + return true; +} + +// Iterate over all RBD mappings, getting info from the registry and the driver. +bool WNBDDiskIterator::get(Config *cfg) +{ + *cfg = Config(); + + bool found_active = active_iterator.get(cfg); + if (found_active) { + active_devices.insert(cfg->devpath); + return true; + } + + error = active_iterator.get_error(); + if (error) { + dout(5) << ": WNBD iterator error: " << error << dendl; + return false; + } + + while(registry_iterator.get(cfg)) { + if (active_devices.find(cfg->devpath) != active_devices.end()) { + // Skip active devices that were already yielded. + continue; + } + return true; + } + + error = registry_iterator.get_error(); + if (error) { + dout(5) << ": Registry iterator error: " << error << dendl; + } + return false; +} + +int get_exe_path(std::string& path) { + char buffer[MAX_PATH]; + DWORD err = 0; + + int ret = GetModuleFileNameA(NULL, buffer, MAX_PATH); + if (!ret || ret == MAX_PATH) { + err = GetLastError(); + derr << "Could not retrieve executable path. " + << "Error: " << win32_strerror(err) << dendl; + return -EINVAL; + } + + path = buffer; + return 0; +} + +std::string get_cli_args() { + std::ostringstream cmdline; + for (int i=1; i<__argc; i++) { + if (i > 1) + cmdline << " "; + cmdline << std::quoted(__argv[i]); + } + return cmdline.str(); +} + +int send_map_request(std::string arguments) { + dout(15) << __func__ << ": command arguments: " << arguments << dendl; + + BYTE request_buff[SERVICE_PIPE_BUFFSZ] = { 0 }; + ServiceRequest* request = (ServiceRequest*) request_buff; + request->command = Connect; + arguments.copy( + (char*)request->arguments, + SERVICE_PIPE_BUFFSZ - FIELD_OFFSET(ServiceRequest, arguments)); + ServiceReply reply = { 0 }; + + DWORD bytes_read = 0; + BOOL success = CallNamedPipe( + SERVICE_PIPE_NAME, + request_buff, + SERVICE_PIPE_BUFFSZ, + &reply, + sizeof(reply), + &bytes_read, + DEFAULT_MAP_TIMEOUT_MS); + if (!success) { + DWORD err = GetLastError(); + derr << "Could not send device map request. " + << "Make sure that the ceph service is running. " + << "Error: " << win32_strerror(err) << dendl; + return -EINVAL; + } + if (reply.status) { + derr << "The ceph service failed to map the image. " + << "Check the log file or pass '-f' (foreground mode) " + << "for additional information. " + << "Error: " << cpp_strerror(reply.status) + << dendl; + } + + return reply.status; +} + +// Spawn a subprocess using the specified "rbd-wnbd" command +// arguments. A pipe is passed to the child process, +// which will allow it to communicate the mapping status +int map_device_using_suprocess(std::string arguments, int timeout_ms) +{ + STARTUPINFO si; + PROCESS_INFORMATION pi; + char ch; + DWORD err = 0, status = 0; + int exit_code = 0; + std::ostringstream command_line; + std::string exe_path; + // Windows async IO context + OVERLAPPED connect_o, read_o; + HANDLE connect_event = NULL, read_event = NULL; + // Used for waiting on multiple events that are going to be initialized later. + HANDLE wait_events[2] = { INVALID_HANDLE_VALUE, INVALID_HANDLE_VALUE}; + DWORD bytes_read = 0; + // We may get a command line containing an old pipe handle when + // recreating mappings, so we'll have to replace it. + std::regex pipe_pattern("([\'\"]?--pipe-name[\'\"]? +[\'\"]?[^ ]+[\'\"]?)"); + + uuid_d uuid; + uuid.generate_random(); + std::ostringstream pipe_name; + pipe_name << "\\\\.\\pipe\\rbd-wnbd-" << uuid; + + // Create an unique named pipe to communicate with the child. */ + HANDLE pipe_handle = CreateNamedPipe( + pipe_name.str().c_str(), + PIPE_ACCESS_INBOUND | FILE_FLAG_FIRST_PIPE_INSTANCE | + FILE_FLAG_OVERLAPPED, + PIPE_WAIT, + 1, // Only accept one instance + SERVICE_PIPE_BUFFSZ, + SERVICE_PIPE_BUFFSZ, + SERVICE_PIPE_TIMEOUT_MS, + NULL); + if (pipe_handle == INVALID_HANDLE_VALUE) { + err = GetLastError(); + derr << "CreateNamedPipe failed: " << win32_strerror(err) << dendl; + exit_code = -ECHILD; + goto finally; + } + connect_event = CreateEvent(0, TRUE, FALSE, NULL); + read_event = CreateEvent(0, TRUE, FALSE, NULL); + if (!connect_event || !read_event) { + err = GetLastError(); + derr << "CreateEvent failed: " << win32_strerror(err) << dendl; + exit_code = -ECHILD; + goto finally; + } + connect_o.hEvent = connect_event; + read_o.hEvent = read_event; + + status = ConnectNamedPipe(pipe_handle, &connect_o); + err = GetLastError(); + if (status || err != ERROR_IO_PENDING) { + if (status) + err = status; + derr << "ConnectNamedPipe failed: " << win32_strerror(err) << dendl; + exit_code = -ECHILD; + goto finally; + } + err = 0; + + dout(5) << __func__ << ": command arguments: " << arguments << dendl; + + // We'll avoid running arbitrary commands, instead using the executable + // path of this process (expected to be the full rbd-wnbd.exe path). + err = get_exe_path(exe_path); + if (err) { + exit_code = -EINVAL; + goto finally; + } + command_line << std::quoted(exe_path) + << " " << std::regex_replace(arguments, pipe_pattern, "") + << " --pipe-name " << pipe_name.str(); + + dout(5) << __func__ << ": command line: " << command_line.str() << dendl; + + GetStartupInfo(&si); + // Create a detached child + if (!CreateProcess(NULL, (char*)command_line.str().c_str(), + NULL, NULL, FALSE, DETACHED_PROCESS, + NULL, NULL, &si, &pi)) { + err = GetLastError(); + derr << "CreateProcess failed: " << win32_strerror(err) << dendl; + exit_code = -ECHILD; + goto finally; + } + + wait_events[0] = connect_event; + wait_events[1] = pi.hProcess; + status = WaitForMultipleObjects(2, wait_events, FALSE, timeout_ms); + switch(status) { + case WAIT_OBJECT_0: + if (!GetOverlappedResult(pipe_handle, &connect_o, &bytes_read, TRUE)) { + err = GetLastError(); + derr << "Couln't establish a connection with the child process. " + << "Error: " << win32_strerror(err) << dendl; + exit_code = -ECHILD; + goto clean_process; + } + // We have an incoming connection. + break; + case WAIT_OBJECT_0 + 1: + // The process has exited prematurely. + goto clean_process; + case WAIT_TIMEOUT: + derr << "Timed out waiting for child process connection." << dendl; + goto clean_process; + default: + derr << "Failed waiting for child process. Status: " << status << dendl; + goto clean_process; + } + // Block and wait for child to say it is ready. + dout(5) << __func__ << ": waiting for child notification." << dendl; + if (!ReadFile(pipe_handle, &ch, 1, NULL, &read_o)) { + err = GetLastError(); + if (err != ERROR_IO_PENDING) { + derr << "Receiving child process reply failed with: " + << win32_strerror(err) << dendl; + exit_code = -ECHILD; + goto clean_process; + } + } + wait_events[0] = read_event; + wait_events[1] = pi.hProcess; + // The RBD daemon is expected to write back right after opening the + // pipe. We'll use the same timeout value for now. + status = WaitForMultipleObjects(2, wait_events, FALSE, timeout_ms); + switch(status) { + case WAIT_OBJECT_0: + if (!GetOverlappedResult(pipe_handle, &read_o, &bytes_read, TRUE)) { + err = GetLastError(); + derr << "Receiving child process reply failed with: " + << win32_strerror(err) << dendl; + exit_code = -ECHILD; + goto clean_process; + } + break; + case WAIT_OBJECT_0 + 1: + // The process has exited prematurely. + goto clean_process; + case WAIT_TIMEOUT: + derr << "Timed out waiting for child process message." << dendl; + goto clean_process; + default: + derr << "Failed waiting for child process. Status: " << status << dendl; + goto clean_process; + } + + dout(5) << __func__ << ": received child notification." << dendl; + goto finally; + + clean_process: + if (!is_process_running(pi.dwProcessId)) { + GetExitCodeProcess(pi.hProcess, (PDWORD)&exit_code); + derr << "Daemon failed with: " << cpp_strerror(exit_code) << dendl; + } else { + // The process closed the pipe without notifying us or exiting. + // This is quite unlikely, but we'll terminate the process. + dout(0) << "Terminating unresponsive process." << dendl; + TerminateProcess(pi.hProcess, 1); + exit_code = -EINVAL; + } + + finally: + if (exit_code) + derr << "Could not start RBD daemon." << dendl; + if (pipe_handle) + CloseHandle(pipe_handle); + if (connect_event) + CloseHandle(connect_event); + if (read_event) + CloseHandle(read_event); + return exit_code; +} + +BOOL WINAPI console_handler_routine(DWORD dwCtrlType) +{ + dout(0) << "Received control signal: " << dwCtrlType + << ". Exiting." << dendl; + + std::unique_lock l{shutdown_lock}; + if (handler) + handler->shutdown(); + + return true; +} + +int save_config_to_registry(Config* cfg) +{ + std::string strKey{ SERVICE_REG_KEY }; + strKey.append("\\"); + strKey.append(cfg->devpath); + auto reg_key = RegistryKey( + g_ceph_context, HKEY_LOCAL_MACHINE, strKey.c_str(), true); + if (!reg_key.hKey) { + return -EINVAL; + } + + int ret_val = 0; + // Registry writes are immediately available to other processes. + // Still, we'll do a flush to ensure that the mapping can be + // recreated after a system crash. + if (reg_key.set("pid", getpid()) || + reg_key.set("devpath", cfg->devpath) || + reg_key.set("poolname", cfg->poolname) || + reg_key.set("nsname", cfg->nsname) || + reg_key.set("imgname", cfg->imgname) || + reg_key.set("snapname", cfg->snapname) || + reg_key.set("command_line", get_cli_args()) || + reg_key.set("persistent", cfg->persistent) || + reg_key.set("admin_sock_path", g_conf()->admin_socket) || + reg_key.flush()) { + ret_val = -EINVAL; + } + + return ret_val; +} + +int remove_config_from_registry(Config* cfg) +{ + std::string strKey{ SERVICE_REG_KEY }; + strKey.append("\\"); + strKey.append(cfg->devpath); + return RegistryKey::remove( + g_ceph_context, HKEY_LOCAL_MACHINE, strKey.c_str()); +} + +int load_mapping_config_from_registry(string devpath, Config* cfg) +{ + std::string strKey{ SERVICE_REG_KEY }; + strKey.append("\\"); + strKey.append(devpath); + auto reg_key = RegistryKey( + g_ceph_context, HKEY_LOCAL_MACHINE, strKey.c_str(), false); + if (!reg_key.hKey) { + if (reg_key.missingKey) + return -ENOENT; + else + return -EINVAL; + } + + reg_key.get("devpath", cfg->devpath); + reg_key.get("poolname", cfg->poolname); + reg_key.get("nsname", cfg->nsname); + reg_key.get("imgname", cfg->imgname); + reg_key.get("snapname", cfg->snapname); + reg_key.get("command_line", cfg->command_line); + reg_key.get("persistent", cfg->persistent); + reg_key.get("admin_sock_path", cfg->admin_sock_path); + + return 0; +} + +int restart_registered_mappings( + int worker_count, + int total_timeout, + int image_map_timeout) +{ + Config cfg; + WNBDDiskIterator iterator; + int r; + std::atomic<int> err = 0; + + dout(0) << "remounting persistent disks" << dendl; + + int total_timeout_ms = max(total_timeout, total_timeout * 1000); + int image_map_timeout_ms = max(image_map_timeout, image_map_timeout * 1000); + + LARGE_INTEGER start_t, counter_freq; + QueryPerformanceFrequency(&counter_freq); + QueryPerformanceCounter(&start_t); + + boost::asio::thread_pool pool(worker_count); + while (iterator.get(&cfg)) { + if (cfg.command_line.empty()) { + derr << "Could not recreate mapping, missing command line: " + << cfg.devpath << dendl; + err = -EINVAL; + continue; + } + if (cfg.wnbd_mapped) { + dout(1) << __func__ << ": device already mapped: " + << cfg.devpath << dendl; + continue; + } + if (!cfg.persistent) { + dout(1) << __func__ << ": cleaning up non-persistent mapping: " + << cfg.devpath << dendl; + r = remove_config_from_registry(&cfg); + if (r) { + derr << __func__ << ": could not clean up non-persistent mapping: " + << cfg.devpath << dendl; + } + continue; + } + + boost::asio::post(pool, + [cfg, start_t, counter_freq, total_timeout_ms, + image_map_timeout_ms, &err]() + { + LARGE_INTEGER curr_t, elapsed_ms; + QueryPerformanceCounter(&curr_t); + elapsed_ms.QuadPart = curr_t.QuadPart - start_t.QuadPart; + elapsed_ms.QuadPart *= 1000; + elapsed_ms.QuadPart /= counter_freq.QuadPart; + + int time_left_ms = max( + 0, + total_timeout_ms - (int)elapsed_ms.QuadPart); + time_left_ms = min(image_map_timeout_ms, time_left_ms); + if (!time_left_ms) { + err = -ETIMEDOUT; + return; + } + + dout(1) << "Remapping: " << cfg.devpath + << ". Timeout: " << time_left_ms << " ms." << dendl; + + // We'll try to map all devices and return a non-zero value + // if any of them fails. + int r = map_device_using_suprocess(cfg.command_line, time_left_ms); + if (r) { + err = r; + derr << "Could not create mapping: " + << cfg.devpath << ". Error: " << r << dendl; + } else { + dout(1) << "Successfully remapped: " << cfg.devpath << dendl; + } + }); + } + pool.join(); + + r = iterator.get_error(); + if (r) { + derr << "Could not fetch all mappings. Error: " << r << dendl; + err = r; + } + + return err; +} + +int disconnect_all_mappings( + bool unregister, + bool hard_disconnect, + int soft_disconnect_timeout, + int worker_count) +{ + // Although not generally recommended, soft_disconnect_timeout can be 0, + // which means infinite timeout. + ceph_assert(soft_disconnect_timeout >= 0); + ceph_assert(worker_count > 0); + int64_t timeout_ms = soft_disconnect_timeout * 1000; + + Config cfg; + WNBDActiveDiskIterator iterator; + int r; + std::atomic<int> err = 0; + + boost::asio::thread_pool pool(worker_count); + LARGE_INTEGER start_t, counter_freq; + QueryPerformanceFrequency(&counter_freq); + QueryPerformanceCounter(&start_t); + while (iterator.get(&cfg)) { + boost::asio::post(pool, + [cfg, start_t, counter_freq, timeout_ms, + hard_disconnect, unregister, &err]() mutable + { + LARGE_INTEGER curr_t, elapsed_ms; + QueryPerformanceCounter(&curr_t); + elapsed_ms.QuadPart = curr_t.QuadPart - start_t.QuadPart; + elapsed_ms.QuadPart *= 1000; + elapsed_ms.QuadPart /= counter_freq.QuadPart; + + int64_t time_left_ms = max((int64_t)0, timeout_ms - elapsed_ms.QuadPart); + + cfg.hard_disconnect = hard_disconnect || !time_left_ms; + cfg.hard_disconnect_fallback = true; + cfg.soft_disconnect_timeout = time_left_ms / 1000; + + dout(1) << "Removing mapping: " << cfg.devpath + << ". Timeout: " << cfg.soft_disconnect_timeout + << "s. Hard disconnect: " << cfg.hard_disconnect + << dendl; + + int r = do_unmap(&cfg, unregister); + if (r) { + err = r; + derr << "Could not remove mapping: " << cfg.devpath + << ". Error: " << r << dendl; + } else { + dout(1) << "Successfully removed mapping: " << cfg.devpath << dendl; + } + }); + } + pool.join(); + + r = iterator.get_error(); + if (r == -ENOENT) { + dout(0) << __func__ << ": wnbd adapter unavailable, " + << "assuming that no wnbd mappings exist." << dendl; + err = 0; + } else if (r) { + derr << "Could not fetch all mappings. Error: " << r << dendl; + err = r; + } + + return err; +} + +class RBDService : public ServiceBase { + private: + bool hard_disconnect; + int soft_disconnect_timeout; + int thread_count; + int service_start_timeout; + int image_map_timeout; + bool remap_failure_fatal; + bool adapter_monitoring_enabled; + + std::thread adapter_monitor_thread; + + ceph::mutex start_lock = ceph::make_mutex("RBDService::StartLocker"); + ceph::mutex shutdown_lock = ceph::make_mutex("RBDService::ShutdownLocker"); + bool started = false; + std::atomic<bool> stop_requsted = false; + + public: + RBDService(bool _hard_disconnect, + int _soft_disconnect_timeout, + int _thread_count, + int _service_start_timeout, + int _image_map_timeout, + bool _remap_failure_fatal, + bool _adapter_monitoring_enabled) + : ServiceBase(g_ceph_context) + , hard_disconnect(_hard_disconnect) + , soft_disconnect_timeout(_soft_disconnect_timeout) + , thread_count(_thread_count) + , service_start_timeout(_service_start_timeout) + , image_map_timeout(_image_map_timeout) + , remap_failure_fatal(_remap_failure_fatal) + , adapter_monitoring_enabled(_adapter_monitoring_enabled) + { + } + + static int execute_command(ServiceRequest* request) + { + switch(request->command) { + case Connect: + dout(1) << "Received device connect request. Command line: " + << (char*)request->arguments << dendl; + // TODO: use the configured service map timeout. + // TODO: add ceph.conf options. + return map_device_using_suprocess( + (char*)request->arguments, DEFAULT_MAP_TIMEOUT_MS); + default: + dout(1) << "Received unsupported command: " + << request->command << dendl; + return -ENOSYS; + } + } + + static DWORD handle_connection(HANDLE pipe_handle) + { + PBYTE message[SERVICE_PIPE_BUFFSZ] = { 0 }; + DWORD bytes_read = 0, bytes_written = 0; + DWORD err = 0; + DWORD reply_sz = 0; + ServiceReply reply = { 0 }; + + dout(20) << __func__ << ": Receiving message." << dendl; + BOOL success = ReadFile( + pipe_handle, message, SERVICE_PIPE_BUFFSZ, + &bytes_read, NULL); + if (!success || !bytes_read) { + err = GetLastError(); + derr << "Could not read service command: " + << win32_strerror(err) << dendl; + goto exit; + } + + dout(20) << __func__ << ": Executing command." << dendl; + reply.status = execute_command((ServiceRequest*) message); + reply_sz = sizeof(reply); + + dout(20) << __func__ << ": Sending reply. Status: " + << reply.status << dendl; + success = WriteFile( + pipe_handle, &reply, reply_sz, &bytes_written, NULL); + if (!success || reply_sz != bytes_written) { + err = GetLastError(); + derr << "Could not send service command result: " + << win32_strerror(err) << dendl; + } + +exit: + dout(20) << __func__ << ": Cleaning up connection." << dendl; + FlushFileBuffers(pipe_handle); + DisconnectNamedPipe(pipe_handle); + CloseHandle(pipe_handle); + + return err; + } + + // We have to support Windows server 2016. Unix sockets only work on + // WS 2019, so we can't use the Ceph admin socket abstraction. + // Getting the Ceph admin sockets to work with Windows named pipes + // would require quite a few changes. + static DWORD accept_pipe_connection() { + DWORD err = 0; + // We're currently using default ACLs, which grant full control to the + // LocalSystem account and administrator as well as the owner. + dout(20) << __func__ << ": opening new pipe instance" << dendl; + HANDLE pipe_handle = CreateNamedPipe( + SERVICE_PIPE_NAME, + PIPE_ACCESS_DUPLEX, + PIPE_TYPE_MESSAGE | PIPE_READMODE_MESSAGE | PIPE_WAIT, + PIPE_UNLIMITED_INSTANCES, + SERVICE_PIPE_BUFFSZ, + SERVICE_PIPE_BUFFSZ, + SERVICE_PIPE_TIMEOUT_MS, + NULL); + if (pipe_handle == INVALID_HANDLE_VALUE) { + err = GetLastError(); + derr << "CreatePipe failed: " << win32_strerror(err) << dendl; + return -EINVAL; + } + + dout(20) << __func__ << ": waiting for connections." << dendl; + BOOL connected = ConnectNamedPipe(pipe_handle, NULL); + if (!connected) { + err = GetLastError(); + if (err != ERROR_PIPE_CONNECTED) { + derr << "Pipe connection failed: " << win32_strerror(err) << dendl; + + CloseHandle(pipe_handle); + return err; + } + } + + dout(20) << __func__ << ": Connection received." << dendl; + // We'll handle the connection in a separate thread and at the same time + // accept a new connection. + HANDLE handler_thread = CreateThread( + NULL, 0, (LPTHREAD_START_ROUTINE) handle_connection, pipe_handle, 0, 0); + if (!handler_thread) { + err = GetLastError(); + derr << "Could not start pipe connection handler thread: " + << win32_strerror(err) << dendl; + CloseHandle(pipe_handle); + } else { + CloseHandle(handler_thread); + } + + return err; + } + + static int pipe_server_loop(LPVOID arg) + { + dout(5) << "Accepting admin pipe connections." << dendl; + while (1) { + // This call will block until a connection is received, which will + // then be handled in a separate thread. The function returns, allowing + // us to accept another simultaneous connection. + accept_pipe_connection(); + } + return 0; + } + + int create_pipe_server() { + HANDLE handler_thread = CreateThread( + NULL, 0, (LPTHREAD_START_ROUTINE) pipe_server_loop, NULL, 0, 0); + DWORD err = 0; + + if (!handler_thread) { + err = GetLastError(); + derr << "Could not start pipe server: " << win32_strerror(err) << dendl; + } else { + CloseHandle(handler_thread); + } + + return err; + } + + void monitor_wnbd_adapter() + { + dout(5) << __func__ << ": initializing COM" << dendl; + // Initialize the Windows COM library for this thread. + COMBootstrapper com_bootstrapper; + HRESULT hres = com_bootstrapper.initialize(); + if (FAILED(hres)) { + return; + } + + WmiSubscription subscription = subscribe_wnbd_adapter_events( + WNBD_ADAPTER_WMI_POLL_INTERVAL); + dout(5) << __func__ << ": initializing wmi subscription" << dendl; + hres = subscription.initialize(); + + dout(0) << "monitoring wnbd adapter state changes" << dendl; + // The event watcher will wait at most WMI_EVENT_TIMEOUT (2s) + // and exit the loop if the service is being stopped. + while (!stop_requsted) { + IWbemClassObject* object; + ULONG returned = 0; + + if (FAILED(hres)) { + derr << "couldn't retrieve wnbd adapter events, wmi hresult: " + << hres << ". Reestablishing wmi listener in " + << WMI_SUBSCRIPTION_RETRY_INTERVAL << " seconds." << dendl; + subscription.close(); + Sleep(WMI_SUBSCRIPTION_RETRY_INTERVAL * 1000); + + dout(20) << "recreating wnbd adapter wmi subscription" << dendl; + subscription = subscribe_wnbd_adapter_events( + WNBD_ADAPTER_WMI_POLL_INTERVAL); + hres = subscription.initialize(); + continue; + } + + dout(20) << "fetching wnbd adapter events" << dendl; + hres = subscription.next( + WMI_EVENT_TIMEOUT * 1000, + 1, // we'll process one event at a time + &object, + &returned); + + if (!FAILED(hres) && returned) { + if (WBEM_S_NO_ERROR == object->InheritsFrom(L"__InstanceCreationEvent")) { + dout(0) << "wnbd adapter (re)created, remounting disks" << dendl; + restart_registered_mappings( + thread_count, service_start_timeout, image_map_timeout); + } else if (WBEM_S_NO_ERROR == object->InheritsFrom(L"__InstanceDeletionEvent")) { + dout(0) << "wnbd adapter removed" << dendl; + // nothing to do here + } else if (WBEM_S_NO_ERROR == object->InheritsFrom(L"__InstanceModificationEvent")) { + dout(0) << "wnbd adapter changed" << dendl; + // TODO: look for state changes and log the availability/status + } + + object->Release(); + } + } + + dout(10) << "service stop requested, wnbd event monitor exited" << dendl; + } + + int run_hook() override { + std::unique_lock l{start_lock}; + if (started) { + // The run hook is only supposed to be called once per process, + // however we're staying cautious. + derr << "Service already running." << dendl; + return -EALREADY; + } + + started = true; + // Restart registered mappings before accepting new ones. + int r = restart_registered_mappings( + thread_count, service_start_timeout, image_map_timeout); + if (r) { + if (remap_failure_fatal) { + derr << "Couldn't remap all images. Cleaning up." << dendl; + return r; + } else { + dout(0) << "Ignoring image remap failure." << dendl; + } + } + + if (adapter_monitoring_enabled) { + adapter_monitor_thread = std::thread(&monitor_wnbd_adapter, this); + } else { + dout(0) << "WNBD adapter monitoring disabled." << dendl; + } + + return create_pipe_server(); + } + + // Invoked when the service is requested to stop. + int stop_hook() override { + std::unique_lock l{shutdown_lock}; + + stop_requsted = true; + + int r = disconnect_all_mappings( + false, hard_disconnect, soft_disconnect_timeout, thread_count); + + if (adapter_monitor_thread.joinable()) { + dout(10) << "waiting for wnbd event monitor thread" << dendl; + adapter_monitor_thread.join(); + dout(10) << "wnbd event monitor stopped" << dendl; + } + + return r; + } + + // Invoked when the system is shutting down. + int shutdown_hook() override { + return stop_hook(); + } +}; + +class WNBDWatchCtx : public librbd::UpdateWatchCtx +{ +private: + librados::IoCtx &io_ctx; + WnbdHandler* handler; + librbd::Image ℑ + uint64_t size; +public: + WNBDWatchCtx(librados::IoCtx& io_ctx, WnbdHandler* handler, + librbd::Image& image, uint64_t size) + : io_ctx(io_ctx) + , handler(handler) + , image(image) + , size(size) + { } + + ~WNBDWatchCtx() override {} + + void handle_notify() override + { + uint64_t new_size; + + if (image.size(&new_size) == 0 && new_size != size && + handler->resize(new_size) == 0) { + size = new_size; + } + } +}; + +static void usage() +{ + const char* usage_str =R"( +Usage: rbd-wnbd [options] map <image-or-snap-spec> Map an image to wnbd device + [options] unmap <device|image-or-snap-spec> Unmap wnbd device + [options] list List mapped wnbd devices + [options] show <image-or-snap-spec> Show mapped wnbd device + stats <image-or-snap-spec> Show IO counters + [options] service Windows service entrypoint, + handling device lifecycle + +Map options: + --device <device path> Optional mapping unique identifier + --exclusive Forbid writes by other clients + --read-only Map read-only + --non-persistent Do not recreate the mapping when the Ceph service + restarts. By default, mappings are persistent + --io-req-workers The number of workers that dispatch IO requests. + Default: 4 + --io-reply-workers The number of workers that dispatch IO replies. + Default: 4 + +Unmap options: + --hard-disconnect Skip attempting a soft disconnect + --no-hard-disconnect-fallback Immediately return an error if the soft + disconnect fails instead of attempting a hard + disconnect as fallback + --soft-disconnect-timeout Soft disconnect timeout in seconds. The soft + disconnect operation uses PnP to notify the + Windows storage stack that the device is going to + be disconnectd. Storage drivers can block this + operation if there are pending operations, + unflushed caches or open handles. Default: 15 + +Service options: + --hard-disconnect Skip attempting a soft disconnect + --soft-disconnect-timeout Cummulative soft disconnect timeout in seconds, + used when disconnecting existing mappings. A hard + disconnect will be issued when hitting the timeout + --service-thread-count The number of workers used when mapping or + unmapping images. Default: 8 + --start-timeout The service start timeout in seconds. Default: 120 + --map-timeout Individual image map timeout in seconds. Default: 20 + --remap-failure-fatal If set, the service will stop when failing to remap + an image at start time, unmapping images that have + been mapped so far. + --adapter-monitoring-enabled If set, the service will monitor WNBD adapter WMI + events and remount the images when the adapter gets + recreated. Mainly used for development and driver + certification purposes. + +Show|List options: + --format plain|json|xml Output format (default: plain) + --pretty-format Pretty formatting (json and xml) + +Common options: + --wnbd-log-level libwnbd.dll log level + +)"; + + std::cout << usage_str; + generic_server_usage(); +} + + +static Command cmd = None; + +int construct_devpath_if_missing(Config* cfg) +{ + // Windows doesn't allow us to request specific disk paths when mapping an + // image. This will just be used by rbd-wnbd and wnbd as an identifier. + if (cfg->devpath.empty()) { + if (cfg->imgname.empty()) { + derr << "Missing image name." << dendl; + return -EINVAL; + } + + if (!cfg->poolname.empty()) { + cfg->devpath += cfg->poolname; + cfg->devpath += '/'; + } + if (!cfg->nsname.empty()) { + cfg->devpath += cfg->nsname; + cfg->devpath += '/'; + } + + cfg->devpath += cfg->imgname; + + if (!cfg->snapname.empty()) { + cfg->devpath += '@'; + cfg->devpath += cfg->snapname; + } + } + + return 0; +} + +boost::intrusive_ptr<CephContext> do_global_init( + int argc, const char *argv[], Config *cfg) +{ + auto args = argv_to_vec(argc, argv); + + code_environment_t code_env; + int flags; + + switch(cmd) { + case Connect: + code_env = CODE_ENVIRONMENT_DAEMON; + flags = CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS; + break; + case Service: + code_env = CODE_ENVIRONMENT_DAEMON; + flags = CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS | + CINIT_FLAG_NO_MON_CONFIG | + CINIT_FLAG_NO_DAEMON_ACTIONS; + break; + default: + code_env = CODE_ENVIRONMENT_UTILITY; + flags = CINIT_FLAG_NO_MON_CONFIG; + break; + } + + global_pre_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, code_env, flags); + // Avoid cluttering the console when spawning a mapping that will run + // in the background. + if (g_conf()->daemonize && cfg->parent_pipe.empty()) { + flags |= CINIT_FLAG_NO_DAEMON_ACTIONS; + } + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + code_env, flags, FALSE); + + // There's no fork on Windows, we should be safe calling this anytime. + common_init_finish(g_ceph_context); + global_init_chdir(g_ceph_context); + + return cct; +} + +static int do_map(Config *cfg) +{ + int r; + + librados::Rados rados; + librbd::RBD rbd; + librados::IoCtx io_ctx; + librbd::Image image; + librbd::image_info_t info; + HANDLE parent_pipe_handle = INVALID_HANDLE_VALUE; + int err = 0; + + if (g_conf()->daemonize && cfg->parent_pipe.empty()) { + return send_map_request(get_cli_args()); + } + + dout(0) << "Mapping RBD image: " << cfg->devpath << dendl; + + r = rados.init_with_context(g_ceph_context); + if (r < 0) { + derr << "rbd-wnbd: couldn't initialize rados: " << cpp_strerror(r) + << dendl; + goto close_ret; + } + + r = rados.connect(); + if (r < 0) { + derr << "rbd-wnbd: couldn't connect to rados: " << cpp_strerror(r) + << dendl; + goto close_ret; + } + + r = rados.ioctx_create(cfg->poolname.c_str(), io_ctx); + if (r < 0) { + derr << "rbd-wnbd: couldn't create IO context: " << cpp_strerror(r) + << dendl; + goto close_ret; + } + + io_ctx.set_namespace(cfg->nsname); + + r = rbd.open(io_ctx, image, cfg->imgname.c_str()); + if (r < 0) { + derr << "rbd-wnbd: couldn't open rbd image: " << cpp_strerror(r) + << dendl; + goto close_ret; + } + + if (cfg->exclusive) { + r = image.lock_acquire(RBD_LOCK_MODE_EXCLUSIVE); + if (r < 0) { + derr << "rbd-wnbd: failed to acquire exclusive lock: " << cpp_strerror(r) + << dendl; + goto close_ret; + } + } + + if (!cfg->snapname.empty()) { + r = image.snap_set(cfg->snapname.c_str()); + if (r < 0) { + derr << "rbd-wnbd: couldn't use snapshot: " << cpp_strerror(r) + << dendl; + goto close_ret; + } + } + + r = image.stat(info, sizeof(info)); + if (r < 0) + goto close_ret; + + if (info.size > _UI64_MAX) { + r = -EFBIG; + derr << "rbd-wnbd: image is too large (" << byte_u_t(info.size) + << ", max is " << byte_u_t(_UI64_MAX) << ")" << dendl; + goto close_ret; + } + + // We're storing mapping details in the registry even for non-persistent + // mappings. This allows us to easily retrieve mapping details such + // as the rbd pool or admin socket path. + // We're cleaning up the registry entry when the non-persistent mapping + // gets disconnected or when the ceph service restarts. + r = save_config_to_registry(cfg); + if (r < 0) + goto close_ret; + + handler = new WnbdHandler(image, cfg->devpath, + info.size / RBD_WNBD_BLKSIZE, + RBD_WNBD_BLKSIZE, + !cfg->snapname.empty() || cfg->readonly, + g_conf().get_val<bool>("rbd_cache"), + cfg->io_req_workers, + cfg->io_reply_workers); + r = handler->start(); + if (r) { + r = r == ERROR_ALREADY_EXISTS ? -EEXIST : -EINVAL; + goto close_ret; + } + + // We're informing the parent processes that the initialization + // was successful. + if (!cfg->parent_pipe.empty()) { + parent_pipe_handle = CreateFile( + cfg->parent_pipe.c_str(), GENERIC_WRITE, 0, NULL, + OPEN_EXISTING, 0, NULL); + if (parent_pipe_handle == INVALID_HANDLE_VALUE) { + derr << "Could not open parent pipe: " << win32_strerror(err) << dendl; + } else if (!WriteFile(parent_pipe_handle, "a", 1, NULL, NULL)) { + // TODO: consider exiting in this case. The parent didn't wait for us, + // maybe it was killed after a timeout. + err = GetLastError(); + derr << "Failed to communicate with the parent: " + << win32_strerror(err) << dendl; + } else { + dout(5) << __func__ << ": submitted parent notification." << dendl; + } + + if (parent_pipe_handle != INVALID_HANDLE_VALUE) + CloseHandle(parent_pipe_handle); + + global_init_postfork_finish(g_ceph_context); + } + + { + uint64_t watch_handle; + WNBDWatchCtx watch_ctx(io_ctx, handler, image, info.size); + r = image.update_watch(&watch_ctx, &watch_handle); + if (r < 0) { + derr << __func__ << ": update_watch failed with error: " + << cpp_strerror(r) << dendl; + + handler->shutdown(); + goto close_ret; + } + + handler->wait(); + + r = image.update_unwatch(watch_handle); + if (r < 0) + derr << __func__ << ": update_unwatch failed with error: " + << cpp_strerror(r) << dendl; + + handler->shutdown(); + } + +close_ret: + // The registry record shouldn't be removed for (already) running mappings. + if (!cfg->persistent) { + dout(5) << __func__ << ": cleaning up non-persistent mapping: " + << cfg->devpath << dendl; + r = remove_config_from_registry(cfg); + if (r) { + derr << __func__ << ": could not clean up non-persistent mapping: " + << cfg->devpath << dendl; + } + } + + std::unique_lock l{shutdown_lock}; + + image.close(); + io_ctx.close(); + rados.shutdown(); + if (handler) { + delete handler; + handler = nullptr; + } + + return r; +} + +static int do_unmap(Config *cfg, bool unregister) +{ + WNBD_REMOVE_OPTIONS remove_options = {0}; + remove_options.Flags.HardRemove = cfg->hard_disconnect; + remove_options.Flags.HardRemoveFallback = cfg->hard_disconnect_fallback; + remove_options.SoftRemoveTimeoutMs = cfg->soft_disconnect_timeout * 1000; + remove_options.SoftRemoveRetryIntervalMs = SOFT_REMOVE_RETRY_INTERVAL * 1000; + + int err = WnbdRemoveEx(cfg->devpath.c_str(), &remove_options); + if (err && err != ERROR_FILE_NOT_FOUND) { + return -EINVAL; + } + + if (unregister) { + err = remove_config_from_registry(cfg); + if (err) { + derr << "rbd-wnbd: failed to unregister device: " + << cfg->devpath << ". Error: " << err << dendl; + return -EINVAL; + } + } + return 0; +} + +static int parse_imgpath(const std::string &imgpath, Config *cfg, + std::ostream *err_msg) +{ + std::regex pattern("^(?:([^/]+)/(?:([^/@]+)/)?)?([^@]+)(?:@([^/@]+))?$"); + std::smatch match; + if (!std::regex_match(imgpath, match, pattern)) { + derr << "rbd-wnbd: invalid spec '" << imgpath << "'" << dendl; + return -EINVAL; + } + + if (match[1].matched) { + cfg->poolname = match[1]; + } + + if (match[2].matched) { + cfg->nsname = match[2]; + } + + cfg->imgname = match[3]; + + if (match[4].matched) + cfg->snapname = match[4]; + + return 0; +} + +static int do_list_mapped_devices(const std::string &format, bool pretty_format) +{ + std::unique_ptr<ceph::Formatter> f; + TextTable tbl; + + if (format == "json") { + f.reset(new JSONFormatter(pretty_format)); + } else if (format == "xml") { + f.reset(new XMLFormatter(pretty_format)); + } else if (!format.empty() && format != "plain") { + derr << "rbd-wnbd: invalid output format: " << format << dendl; + return -EINVAL; + } + + if (f) { + f->open_array_section("devices"); + } else { + tbl.define_column("id", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("pool", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("namespace", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("image", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("snap", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("device", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("disk_number", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("status", TextTable::LEFT, TextTable::LEFT); + } + + Config cfg; + WNBDDiskIterator wnbd_disk_iterator; + + while (wnbd_disk_iterator.get(&cfg)) { + const char* status = cfg.active ? + WNBD_STATUS_ACTIVE : WNBD_STATUS_INACTIVE; + + if (f) { + f->open_object_section("device"); + f->dump_int("id", cfg.pid ? cfg.pid : -1); + f->dump_string("device", cfg.devpath); + f->dump_string("pool", cfg.poolname); + f->dump_string("namespace", cfg.nsname); + f->dump_string("image", cfg.imgname); + f->dump_string("snap", cfg.snapname); + f->dump_int("disk_number", cfg.disk_number ? cfg.disk_number : -1); + f->dump_string("status", status); + f->close_section(); + } else { + if (cfg.snapname.empty()) { + cfg.snapname = "-"; + } + tbl << (cfg.pid ? cfg.pid : -1) << cfg.poolname << cfg.nsname + << cfg.imgname << cfg.snapname << cfg.devpath + << cfg.disk_number << status << TextTable::endrow; + } + } + int error = wnbd_disk_iterator.get_error(); + if (error) { + derr << "Could not get disk list: " << error << dendl; + return error; + } + + if (f) { + f->close_section(); + f->flush(std::cout); + } else { + std::cout << tbl; + } + + return 0; +} + +static int do_show_mapped_device(std::string format, bool pretty_format, + std::string devpath) +{ + std::unique_ptr<ceph::Formatter> f; + TextTable tbl; + + if (format.empty() || format == "plain") { + format = "json"; + pretty_format = true; + } + if (format == "json") { + f.reset(new JSONFormatter(pretty_format)); + } else if (format == "xml") { + f.reset(new XMLFormatter(pretty_format)); + } else { + derr << "rbd-wnbd: invalid output format: " << format << dendl; + return -EINVAL; + } + + Config cfg; + int error = load_mapping_config_from_registry(devpath, &cfg); + if (error) { + derr << "Could not load registry disk info for: " + << devpath << ". Error: " << error << dendl; + return error; + } + + WNBD_CONNECTION_INFO conn_info = { 0 }; + // If the device is currently disconnected but there is a persistent + // mapping record, we'll show that. + DWORD ret = WnbdShow(devpath.c_str(), &conn_info); + if (ret && ret != ERROR_FILE_NOT_FOUND) { + return -EINVAL; + } + + auto conn_props = conn_info.Properties; + cfg.active = conn_info.DiskNumber > 0 && is_process_running(conn_props.Pid); + f->open_object_section("device"); + f->dump_int("id", conn_props.Pid ? conn_props.Pid : -1); + f->dump_string("device", cfg.devpath); + f->dump_string("pool", cfg.poolname); + f->dump_string("namespace", cfg.nsname); + f->dump_string("image", cfg.imgname); + f->dump_string("snap", cfg.snapname); + f->dump_int("persistent", cfg.persistent); + f->dump_int("disk_number", conn_info.DiskNumber ? conn_info.DiskNumber : -1); + f->dump_string("status", cfg.active ? WNBD_STATUS_ACTIVE : WNBD_STATUS_INACTIVE); + f->dump_string("pnp_device_id", to_string(conn_info.PNPDeviceID)); + f->dump_int("readonly", conn_props.Flags.ReadOnly); + f->dump_int("block_size", conn_props.BlockSize); + f->dump_int("block_count", conn_props.BlockCount); + f->dump_int("flush_enabled", conn_props.Flags.FlushSupported); + f->close_section(); + f->flush(std::cout); + + return 0; +} + +static int do_stats(std::string search_devpath) +{ + Config cfg; + WNBDDiskIterator wnbd_disk_iterator; + + while (wnbd_disk_iterator.get(&cfg)) { + if (cfg.devpath != search_devpath) + continue; + + AdminSocketClient client = AdminSocketClient(cfg.admin_sock_path); + std::string output; + std::string result = client.do_request("{\"prefix\":\"wnbd stats\"}", + &output); + if (!result.empty()) { + std::cerr << "Admin socket error: " << result << std::endl; + return -EINVAL; + } + + std::cout << output << std::endl; + return 0; + } + int error = wnbd_disk_iterator.get_error(); + if (!error) { + error = -ENOENT; + } + + derr << "Could not find the specified disk." << dendl; + return error; +} + +static int parse_args(std::vector<const char*>& args, + std::ostream *err_msg, + Command *command, Config *cfg) +{ + std::string conf_file_list; + std::string cluster; + CephInitParameters iparams = ceph_argparse_early_args( + args, CEPH_ENTITY_TYPE_CLIENT, &cluster, &conf_file_list); + + ConfigProxy config{false}; + config->name = iparams.name; + config->cluster = cluster; + + if (!conf_file_list.empty()) { + config.parse_config_files(conf_file_list.c_str(), nullptr, 0); + } else { + config.parse_config_files(nullptr, nullptr, 0); + } + config.parse_env(CEPH_ENTITY_TYPE_CLIENT); + config.parse_argv(args); + cfg->poolname = config.get_val<std::string>("rbd_default_pool"); + + std::vector<const char*>::iterator i; + std::ostringstream err; + + // TODO: consider using boost::program_options like Device.cc does. + // This should simplify argument parsing. Also, some arguments must be tied + // to specific commands, for example the disconnect timeout. Luckily, + // this is enforced by the "rbd device" wrapper. + for (i = args.begin(); i != args.end(); ) { + if (ceph_argparse_flag(args, i, "-h", "--help", (char*)NULL)) { + return HELP_INFO; + } else if (ceph_argparse_flag(args, i, "-v", "--version", (char*)NULL)) { + return VERSION_INFO; + } else if (ceph_argparse_witharg(args, i, &cfg->devpath, "--device", (char *)NULL)) { + } else if (ceph_argparse_witharg(args, i, &cfg->format, err, "--format", + (char *)NULL)) { + } else if (ceph_argparse_flag(args, i, "--read-only", (char *)NULL)) { + cfg->readonly = true; + } else if (ceph_argparse_flag(args, i, "--exclusive", (char *)NULL)) { + cfg->exclusive = true; + } else if (ceph_argparse_flag(args, i, "--non-persistent", (char *)NULL)) { + cfg->persistent = false; + } else if (ceph_argparse_flag(args, i, "--pretty-format", (char *)NULL)) { + cfg->pretty_format = true; + } else if (ceph_argparse_flag(args, i, "--remap-failure-fatal", (char *)NULL)) { + cfg->remap_failure_fatal = true; + } else if (ceph_argparse_flag(args, i, "--adapter-monitoring-enabled", (char *)NULL)) { + cfg->adapter_monitoring_enabled = true; + } else if (ceph_argparse_witharg(args, i, &cfg->parent_pipe, err, + "--pipe-name", (char *)NULL)) { + if (!err.str().empty()) { + *err_msg << "rbd-wnbd: " << err.str(); + return -EINVAL; + } + } else if (ceph_argparse_witharg(args, i, (int*)&cfg->wnbd_log_level, + err, "--wnbd-log-level", (char *)NULL)) { + if (!err.str().empty()) { + *err_msg << "rbd-wnbd: " << err.str(); + return -EINVAL; + } + if (cfg->wnbd_log_level < 0) { + *err_msg << "rbd-wnbd: Invalid argument for wnbd-log-level"; + return -EINVAL; + } + } else if (ceph_argparse_witharg(args, i, (int*)&cfg->io_req_workers, + err, "--io-req-workers", (char *)NULL)) { + if (!err.str().empty()) { + *err_msg << "rbd-wnbd: " << err.str(); + return -EINVAL; + } + if (cfg->io_req_workers <= 0) { + *err_msg << "rbd-wnbd: Invalid argument for io-req-workers"; + return -EINVAL; + } + } else if (ceph_argparse_witharg(args, i, (int*)&cfg->io_reply_workers, + err, "--io-reply-workers", (char *)NULL)) { + if (!err.str().empty()) { + *err_msg << "rbd-wnbd: " << err.str(); + return -EINVAL; + } + if (cfg->io_reply_workers <= 0) { + *err_msg << "rbd-wnbd: Invalid argument for io-reply-workers"; + return -EINVAL; + } + } else if (ceph_argparse_witharg(args, i, (int*)&cfg->service_thread_count, + err, "--service-thread-count", (char *)NULL)) { + if (!err.str().empty()) { + *err_msg << "rbd-wnbd: " << err.str(); + return -EINVAL; + } + if (cfg->service_thread_count <= 0) { + *err_msg << "rbd-wnbd: Invalid argument for service-thread-count"; + return -EINVAL; + } + } else if (ceph_argparse_flag(args, i, "--hard-disconnect", (char *)NULL)) { + cfg->hard_disconnect = true; + } else if (ceph_argparse_flag(args, i, + "--no-hard-disconnect-fallback", (char *)NULL)) { + cfg->hard_disconnect_fallback = false; + } else if (ceph_argparse_witharg(args, i, + (int*)&cfg->soft_disconnect_timeout, + err, "--soft-disconnect-timeout", + (char *)NULL)) { + if (!err.str().empty()) { + *err_msg << "rbd-wnbd: " << err.str(); + return -EINVAL; + } + if (cfg->soft_disconnect_timeout < 0) { + *err_msg << "rbd-wnbd: Invalid argument for soft-disconnect-timeout"; + return -EINVAL; + } + } else if (ceph_argparse_witharg(args, i, + (int*)&cfg->service_start_timeout, + err, "--start-timeout", + (char *)NULL)) { + if (!err.str().empty()) { + *err_msg << "rbd-wnbd: " << err.str(); + return -EINVAL; + } + if (cfg->service_start_timeout <= 0) { + *err_msg << "rbd-wnbd: Invalid argument for start-timeout"; + return -EINVAL; + } + } else if (ceph_argparse_witharg(args, i, + (int*)&cfg->image_map_timeout, + err, "--map-timeout", + (char *)NULL)) { + if (!err.str().empty()) { + *err_msg << "rbd-wnbd: " << err.str(); + return -EINVAL; + } + if (cfg->image_map_timeout <= 0) { + *err_msg << "rbd-wnbd: Invalid argument for map-timeout"; + return -EINVAL; + } + } else { + ++i; + } + } + + Command cmd = None; + if (args.begin() != args.end()) { + if (strcmp(*args.begin(), "map") == 0) { + cmd = Connect; + } else if (strcmp(*args.begin(), "unmap") == 0) { + cmd = Disconnect; + } else if (strcmp(*args.begin(), "list") == 0) { + cmd = List; + } else if (strcmp(*args.begin(), "show") == 0) { + cmd = Show; + } else if (strcmp(*args.begin(), "service") == 0) { + cmd = Service; + } else if (strcmp(*args.begin(), "stats") == 0) { + cmd = Stats; + } else if (strcmp(*args.begin(), "help") == 0) { + return HELP_INFO; + } else { + *err_msg << "rbd-wnbd: unknown command: " << *args.begin(); + return -EINVAL; + } + args.erase(args.begin()); + } + + if (cmd == None) { + *err_msg << "rbd-wnbd: must specify command"; + return -EINVAL; + } + + switch (cmd) { + case Connect: + case Disconnect: + case Show: + case Stats: + if (args.begin() == args.end()) { + *err_msg << "rbd-wnbd: must specify wnbd device or image-or-snap-spec"; + return -EINVAL; + } + if (parse_imgpath(*args.begin(), cfg, err_msg) < 0) { + return -EINVAL; + } + args.erase(args.begin()); + break; + default: + //shut up gcc; + break; + } + + if (args.begin() != args.end()) { + *err_msg << "rbd-wnbd: unknown args: " << *args.begin(); + return -EINVAL; + } + + *command = cmd; + return 0; +} + +static int rbd_wnbd(int argc, const char *argv[]) +{ + Config cfg; + auto args = argv_to_vec(argc, argv); + + // Avoid using dout before calling "do_global_init" + if (args.empty()) { + std::cout << argv[0] << ": -h or --help for usage" << std::endl; + exit(1); + } + + std::ostringstream err_msg; + int r = parse_args(args, &err_msg, &cmd, &cfg); + if (r == HELP_INFO) { + usage(); + return 0; + } else if (r == VERSION_INFO) { + std::cout << pretty_version_to_str() << std::endl; + return 0; + } else if (r < 0) { + std::cout << err_msg.str() << std::endl; + return r; + } + + auto cct = do_global_init(argc, argv, &cfg); + + WnbdSetLogger(WnbdHandler::LogMessage); + WnbdSetLogLevel(cfg.wnbd_log_level); + + switch (cmd) { + case Connect: + if (construct_devpath_if_missing(&cfg)) { + return -EINVAL; + } + r = do_map(&cfg); + if (r < 0) + return r; + break; + case Disconnect: + if (construct_devpath_if_missing(&cfg)) { + return -EINVAL; + } + r = do_unmap(&cfg, true); + if (r < 0) + return r; + break; + case List: + r = do_list_mapped_devices(cfg.format, cfg.pretty_format); + if (r < 0) + return r; + break; + case Show: + if (construct_devpath_if_missing(&cfg)) { + return r; + } + r = do_show_mapped_device(cfg.format, cfg.pretty_format, cfg.devpath); + if (r < 0) + return r; + break; + case Service: + { + RBDService service(cfg.hard_disconnect, cfg.soft_disconnect_timeout, + cfg.service_thread_count, + cfg.service_start_timeout, + cfg.image_map_timeout, + cfg.remap_failure_fatal, + cfg.adapter_monitoring_enabled); + // This call will block until the service stops. + r = RBDService::initialize(&service); + if (r < 0) + return r; + break; + } + case Stats: + if (construct_devpath_if_missing(&cfg)) { + return -EINVAL; + } + return do_stats(cfg.devpath); + default: + usage(); + break; + } + + return 0; +} + +int main(int argc, const char *argv[]) +{ + SetConsoleCtrlHandler(console_handler_routine, true); + // Avoid the Windows Error Reporting dialog. + SetErrorMode(GetErrorMode() | SEM_NOGPFAULTERRORBOX); + int r = rbd_wnbd(argc, argv); + if (r < 0) { + return r; + } + return 0; +} diff --git a/src/tools/rbd_wnbd/rbd_wnbd.h b/src/tools/rbd_wnbd/rbd_wnbd.h new file mode 100644 index 000000000..ac298e318 --- /dev/null +++ b/src/tools/rbd_wnbd/rbd_wnbd.h @@ -0,0 +1,193 @@ +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2020 SUSE LINUX GmbH + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#ifndef RBD_WNBD_H +#define RBD_WNBD_H + +#include <string.h> +#include <iostream> +#include <vector> + +#include "include/compat.h" +#include "common/win32/registry.h" + +#include "wnbd_handler.h" + +#define SERVICE_REG_KEY "SYSTEM\\CurrentControlSet\\Services\\rbd-wnbd" +#define SERVICE_PIPE_NAME "\\\\.\\pipe\\rbd-wnbd" +#define SERVICE_PIPE_TIMEOUT_MS 5000 +#define SERVICE_PIPE_BUFFSZ 4096 + +#define DEFAULT_MAP_TIMEOUT_MS 30000 + +#define RBD_WNBD_BLKSIZE 512UL + +#define DEFAULT_SERVICE_START_TIMEOUT 120 +#define DEFAULT_IMAGE_MAP_TIMEOUT 20 + +#define HELP_INFO 1 +#define VERSION_INFO 2 + +#define WNBD_STATUS_ACTIVE "active" +#define WNBD_STATUS_INACTIVE "inactive" + +#define DEFAULT_SERVICE_THREAD_COUNT 8 + +static WnbdHandler* handler = nullptr; +ceph::mutex shutdown_lock = ceph::make_mutex("RbdWnbd::ShutdownLock"); + +struct Config { + bool exclusive = false; + bool readonly = false; + + std::string parent_pipe; + + std::string poolname; + std::string nsname; + std::string imgname; + std::string snapname; + std::string devpath; + + std::string format; + bool pretty_format = false; + + bool hard_disconnect = false; + int soft_disconnect_timeout = DEFAULT_SOFT_REMOVE_TIMEOUT; + bool hard_disconnect_fallback = true; + + int service_start_timeout = DEFAULT_SERVICE_START_TIMEOUT; + int image_map_timeout = DEFAULT_IMAGE_MAP_TIMEOUT; + bool remap_failure_fatal = false; + bool adapter_monitoring_enabled = false; + + // TODO: consider moving those fields to a separate structure. Those + // provide connection information without actually being configurable. + // The disk number is provided by Windows. + int disk_number = -1; + int pid = 0; + std::string serial_number; + bool active = false; + bool wnbd_mapped = false; + std::string command_line; + std::string admin_sock_path; + + WnbdLogLevel wnbd_log_level = WnbdLogLevelInfo; + int io_req_workers = DEFAULT_IO_WORKER_COUNT; + int io_reply_workers = DEFAULT_IO_WORKER_COUNT; + int service_thread_count = DEFAULT_SERVICE_THREAD_COUNT; + + // register the mapping, recreating it when the Ceph service starts. + bool persistent = true; +}; + +enum Command { + None, + Connect, + Disconnect, + List, + Show, + Service, + Stats +}; + +typedef struct { + Command command; + BYTE arguments[1]; +} ServiceRequest; + +typedef struct { + int status; +} ServiceReply; + +bool is_process_running(DWORD pid); +void unmap_at_exit(); + +int disconnect_all_mappings( + bool unregister, + bool hard_disconnect, + int soft_disconnect_timeout, + int worker_count); +int restart_registered_mappings( + int worker_count, int total_timeout, int image_map_timeout); +int map_device_using_suprocess(std::string command_line); + +int construct_devpath_if_missing(Config* cfg); +int save_config_to_registry(Config* cfg); +int remove_config_from_registry(Config* cfg); +int load_mapping_config_from_registry(std::string devpath, Config* cfg); + +BOOL WINAPI console_handler_routine(DWORD dwCtrlType); + +static int parse_args(std::vector<const char*>& args, + std::ostream *err_msg, + Command *command, Config *cfg); +static int do_unmap(Config *cfg, bool unregister); + + +class BaseIterator { + public: + virtual ~BaseIterator() {}; + virtual bool get(Config *cfg) = 0; + + int get_error() { + return error; + } + protected: + int error = 0; + int index = -1; +}; + +// Iterate over mapped devices, retrieving info from the driver. +class WNBDActiveDiskIterator : public BaseIterator { + public: + WNBDActiveDiskIterator(); + ~WNBDActiveDiskIterator(); + + bool get(Config *cfg); + + private: + PWNBD_CONNECTION_LIST conn_list = NULL; + + static DWORD fetch_list(PWNBD_CONNECTION_LIST* conn_list); +}; + + +// Iterate over the Windows registry key, retrieving registered mappings. +class RegistryDiskIterator : public BaseIterator { + public: + RegistryDiskIterator(); + ~RegistryDiskIterator() { + delete reg_key; + } + + bool get(Config *cfg); + private: + DWORD subkey_count = 0; + char subkey_name[MAX_PATH]; + + RegistryKey* reg_key = NULL; +}; + +// Iterate over all RBD mappings, getting info from the registry and driver. +class WNBDDiskIterator : public BaseIterator { + public: + bool get(Config *cfg); + + private: + // We'll keep track of the active devices. + std::set<std::string> active_devices; + + WNBDActiveDiskIterator active_iterator; + RegistryDiskIterator registry_iterator; +}; + +#endif // RBD_WNBD_H diff --git a/src/tools/rbd_wnbd/wnbd_handler.cc b/src/tools/rbd_wnbd/wnbd_handler.cc new file mode 100644 index 000000000..f6a489836 --- /dev/null +++ b/src/tools/rbd_wnbd/wnbd_handler.cc @@ -0,0 +1,456 @@ +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2020 SUSE LINUX GmbH + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd + +#include "wnbd_handler.h" + +#define _NTSCSI_USER_MODE_ +#include <rpc.h> +#include <ddk/scsi.h> + +#include <boost/thread/tss.hpp> + +#include "common/debug.h" +#include "common/errno.h" +#include "common/safe_io.h" +#include "common/SubProcess.h" +#include "common/Formatter.h" + +#include "global/global_context.h" + +#include "rbd_wnbd.h" + +WnbdHandler::~WnbdHandler() +{ + if (started && wnbd_disk) { + dout(10) << __func__ << ": terminating" << dendl; + + shutdown(); + reply_tpool->join(); + + WnbdClose(wnbd_disk); + + started = false; + + delete reply_tpool; + delete admin_hook; + } +} + +int WnbdHandler::wait() +{ + int err = 0; + if (started && wnbd_disk) { + dout(10) << __func__ << ": waiting" << dendl; + + err = WnbdWaitDispatcher(wnbd_disk); + if (err) { + derr << __func__ << " failed waiting for dispatcher to stop: " + << err << dendl; + } + } + + return err; +} + +int WnbdAdminHook::call ( + std::string_view command, const cmdmap_t& cmdmap, + const bufferlist&, + Formatter *f, + std::ostream& errss, + bufferlist& out) +{ + if (command == "wnbd stats") { + return m_handler->dump_stats(f); + } + return -ENOSYS; +} + +int WnbdHandler::dump_stats(Formatter *f) +{ + if (!f) { + return -EINVAL; + } + + WNBD_USR_STATS stats = { 0 }; + DWORD err = WnbdGetUserspaceStats(wnbd_disk, &stats); + if (err) { + derr << "Failed to retrieve WNBD userspace stats. Error: " << err << dendl; + return -EINVAL; + } + + f->open_object_section("stats"); + f->dump_int("TotalReceivedRequests", stats.TotalReceivedRequests); + f->dump_int("TotalSubmittedRequests", stats.TotalSubmittedRequests); + f->dump_int("TotalReceivedReplies", stats.TotalReceivedReplies); + f->dump_int("UnsubmittedRequests", stats.UnsubmittedRequests); + f->dump_int("PendingSubmittedRequests", stats.PendingSubmittedRequests); + f->dump_int("PendingReplies", stats.PendingReplies); + f->dump_int("ReadErrors", stats.ReadErrors); + f->dump_int("WriteErrors", stats.WriteErrors); + f->dump_int("FlushErrors", stats.FlushErrors); + f->dump_int("UnmapErrors", stats.UnmapErrors); + f->dump_int("InvalidRequests", stats.InvalidRequests); + f->dump_int("TotalRWRequests", stats.TotalRWRequests); + f->dump_int("TotalReadBlocks", stats.TotalReadBlocks); + f->dump_int("TotalWrittenBlocks", stats.TotalWrittenBlocks); + + f->close_section(); + return 0; +} + +void WnbdHandler::shutdown() +{ + std::unique_lock l{shutdown_lock}; + if (!terminated && wnbd_disk) { + // We're requesting the disk to be removed but continue serving IO + // requests until the driver sends us the "Disconnect" event. + // TODO: expose PWNBD_REMOVE_OPTIONS, we're using the defaults ATM. + WnbdRemove(wnbd_disk, NULL); + wait(); + terminated = true; + } +} + +void WnbdHandler::aio_callback(librbd::completion_t cb, void *arg) +{ + librbd::RBD::AioCompletion *aio_completion = + reinterpret_cast<librbd::RBD::AioCompletion*>(cb); + + WnbdHandler::IOContext* ctx = static_cast<WnbdHandler::IOContext*>(arg); + int ret = aio_completion->get_return_value(); + + dout(20) << __func__ << ": " << *ctx << dendl; + + if (ret == -EINVAL) { + // if shrinking an image, a pagecache writeback might reference + // extents outside of the range of the new image extents + dout(0) << __func__ << ": masking IO out-of-bounds error" << *ctx << dendl; + ctx->data.clear(); + ret = 0; + } + + if (ret < 0) { + ctx->err_code = -ret; + // TODO: check the actual error. + ctx->set_sense(SCSI_SENSE_MEDIUM_ERROR, + SCSI_ADSENSE_UNRECOVERED_ERROR); + } else if ((ctx->req_type == WnbdReqTypeRead) && + ret < static_cast<int>(ctx->req_size)) { + int pad_byte_count = static_cast<int> (ctx->req_size) - ret; + ctx->data.append_zero(pad_byte_count); + dout(20) << __func__ << ": " << *ctx << ": Pad byte count: " + << pad_byte_count << dendl; + ctx->err_code = 0; + } else { + ctx->err_code = 0; + } + + boost::asio::post( + *ctx->handler->reply_tpool, + [&, ctx]() + { + ctx->handler->send_io_response(ctx); + }); + + aio_completion->release(); +} + +void WnbdHandler::send_io_response(WnbdHandler::IOContext *ctx) { + std::unique_ptr<WnbdHandler::IOContext> pctx{ctx}; + ceph_assert(WNBD_DEFAULT_MAX_TRANSFER_LENGTH >= pctx->data.length()); + + WNBD_IO_RESPONSE wnbd_rsp = {0}; + wnbd_rsp.RequestHandle = pctx->req_handle; + wnbd_rsp.RequestType = pctx->req_type; + wnbd_rsp.Status = pctx->wnbd_status; + int err = 0; + + // Use TLS to store an overlapped structure so that we avoid + // recreating one each time we send a reply. + static boost::thread_specific_ptr<OVERLAPPED> overlapped_tls( + // Cleanup routine + [](LPOVERLAPPED p_overlapped) + { + if (p_overlapped->hEvent) { + CloseHandle(p_overlapped->hEvent); + } + delete p_overlapped; + }); + + LPOVERLAPPED overlapped = overlapped_tls.get(); + if (!overlapped) + { + overlapped = new OVERLAPPED{0}; + HANDLE overlapped_evt = CreateEventA(0, TRUE, TRUE, NULL); + if (!overlapped_evt) { + err = GetLastError(); + derr << "Could not create event. Error: " << err << dendl; + return; + } + + overlapped->hEvent = overlapped_evt; + overlapped_tls.reset(overlapped); + } + + if (!ResetEvent(overlapped->hEvent)) { + err = GetLastError(); + derr << "Could not reset event. Error: " << err << dendl; + return; + } + + err = WnbdSendResponseEx( + pctx->handler->wnbd_disk, + &wnbd_rsp, + pctx->data.c_str(), + pctx->data.length(), + overlapped); + if (err == ERROR_IO_PENDING) { + DWORD returned_bytes = 0; + err = 0; + // We've got ERROR_IO_PENDING, which means that the operation is in + // progress. We'll use GetOverlappedResult to wait for it to complete + // and then retrieve the result. + if (!GetOverlappedResult(pctx->handler->wnbd_disk, overlapped, + &returned_bytes, TRUE)) { + err = GetLastError(); + derr << "Could not send response. Request id: " << wnbd_rsp.RequestHandle + << ". Error: " << err << dendl; + } + } +} + +void WnbdHandler::IOContext::set_sense(uint8_t sense_key, uint8_t asc, uint64_t info) +{ + WnbdSetSenseEx(&wnbd_status, sense_key, asc, info); +} + +void WnbdHandler::IOContext::set_sense(uint8_t sense_key, uint8_t asc) +{ + WnbdSetSense(&wnbd_status, sense_key, asc); +} + +void WnbdHandler::Read( + PWNBD_DISK Disk, + UINT64 RequestHandle, + PVOID Buffer, + UINT64 BlockAddress, + UINT32 BlockCount, + BOOLEAN ForceUnitAccess) +{ + WnbdHandler* handler = nullptr; + ceph_assert(!WnbdGetUserContext(Disk, (PVOID*)&handler)); + + WnbdHandler::IOContext* ctx = new WnbdHandler::IOContext(); + ctx->handler = handler; + ctx->req_handle = RequestHandle; + ctx->req_type = WnbdReqTypeRead; + ctx->req_size = BlockCount * handler->block_size; + ctx->req_from = BlockAddress * handler->block_size; + ceph_assert(ctx->req_size <= WNBD_DEFAULT_MAX_TRANSFER_LENGTH); + + int op_flags = 0; + if (ForceUnitAccess) { + op_flags |= LIBRADOS_OP_FLAG_FADVISE_FUA; + } + + dout(20) << *ctx << ": start" << dendl; + + librbd::RBD::AioCompletion *c = new librbd::RBD::AioCompletion(ctx, aio_callback); + handler->image.aio_read2(ctx->req_from, ctx->req_size, ctx->data, c, op_flags); + + dout(20) << *ctx << ": submitted" << dendl; +} + +void WnbdHandler::Write( + PWNBD_DISK Disk, + UINT64 RequestHandle, + PVOID Buffer, + UINT64 BlockAddress, + UINT32 BlockCount, + BOOLEAN ForceUnitAccess) +{ + WnbdHandler* handler = nullptr; + ceph_assert(!WnbdGetUserContext(Disk, (PVOID*)&handler)); + + WnbdHandler::IOContext* ctx = new WnbdHandler::IOContext(); + ctx->handler = handler; + ctx->req_handle = RequestHandle; + ctx->req_type = WnbdReqTypeWrite; + ctx->req_size = BlockCount * handler->block_size; + ctx->req_from = BlockAddress * handler->block_size; + + bufferptr ptr((char*)Buffer, ctx->req_size); + ctx->data.push_back(ptr); + + int op_flags = 0; + if (ForceUnitAccess) { + op_flags |= LIBRADOS_OP_FLAG_FADVISE_FUA; + } + + dout(20) << *ctx << ": start" << dendl; + + librbd::RBD::AioCompletion *c = new librbd::RBD::AioCompletion(ctx, aio_callback); + handler->image.aio_write2(ctx->req_from, ctx->req_size, ctx->data, c, op_flags); + + dout(20) << *ctx << ": submitted" << dendl; +} + +void WnbdHandler::Flush( + PWNBD_DISK Disk, + UINT64 RequestHandle, + UINT64 BlockAddress, + UINT32 BlockCount) +{ + WnbdHandler* handler = nullptr; + ceph_assert(!WnbdGetUserContext(Disk, (PVOID*)&handler)); + + WnbdHandler::IOContext* ctx = new WnbdHandler::IOContext(); + ctx->handler = handler; + ctx->req_handle = RequestHandle; + ctx->req_type = WnbdReqTypeFlush; + ctx->req_size = BlockCount * handler->block_size; + ctx->req_from = BlockAddress * handler->block_size; + + dout(20) << *ctx << ": start" << dendl; + + librbd::RBD::AioCompletion *c = new librbd::RBD::AioCompletion(ctx, aio_callback); + handler->image.aio_flush(c); + + dout(20) << *ctx << ": submitted" << dendl; +} + +void WnbdHandler::Unmap( + PWNBD_DISK Disk, + UINT64 RequestHandle, + PWNBD_UNMAP_DESCRIPTOR Descriptors, + UINT32 Count) +{ + WnbdHandler* handler = nullptr; + ceph_assert(!WnbdGetUserContext(Disk, (PVOID*)&handler)); + ceph_assert(1 == Count); + + WnbdHandler::IOContext* ctx = new WnbdHandler::IOContext(); + ctx->handler = handler; + ctx->req_handle = RequestHandle; + ctx->req_type = WnbdReqTypeUnmap; + ctx->req_size = Descriptors[0].BlockCount * handler->block_size; + ctx->req_from = Descriptors[0].BlockAddress * handler->block_size; + + dout(20) << *ctx << ": start" << dendl; + + librbd::RBD::AioCompletion *c = new librbd::RBD::AioCompletion(ctx, aio_callback); + handler->image.aio_discard(ctx->req_from, ctx->req_size, c); + + dout(20) << *ctx << ": submitted" << dendl; +} + +void WnbdHandler::LogMessage( + WnbdLogLevel LogLevel, + const char* Message, + const char* FileName, + UINT32 Line, + const char* FunctionName) +{ + // We're already passing the log level to WNBD, so we'll use the highest + // log level here. + dout(0) << "libwnbd.dll!" << FunctionName << " " + << WnbdLogLevelToStr(LogLevel) << " " << Message << dendl; +} + +int WnbdHandler::resize(uint64_t new_size) +{ + int err = 0; + + uint64_t new_block_count = new_size / block_size; + + dout(5) << "Resizing disk. Block size: " << block_size + << ". New block count: " << new_block_count + << ". Old block count: " + << wnbd_disk->Properties.BlockCount << "." << dendl; + err = WnbdSetDiskSize(wnbd_disk, new_block_count); + if (err) { + derr << "WNBD: Setting disk size failed with error: " + << win32_strerror(err) << dendl; + return -EINVAL; + } + + dout(5) << "Successfully resized disk to: " << new_block_count << " blocks" + << dendl; + return 0; +} + +int WnbdHandler::start() +{ + int err = 0; + WNBD_PROPERTIES wnbd_props = {0}; + + instance_name.copy(wnbd_props.InstanceName, sizeof(wnbd_props.InstanceName)); + ceph_assert(strlen(RBD_WNBD_OWNER_NAME) < WNBD_MAX_OWNER_LENGTH); + strncpy(wnbd_props.Owner, RBD_WNBD_OWNER_NAME, WNBD_MAX_OWNER_LENGTH); + + wnbd_props.BlockCount = block_count; + wnbd_props.BlockSize = block_size; + wnbd_props.MaxUnmapDescCount = 1; + + wnbd_props.Flags.ReadOnly = readonly; + wnbd_props.Flags.UnmapSupported = 1; + if (rbd_cache_enabled) { + wnbd_props.Flags.FUASupported = 1; + wnbd_props.Flags.FlushSupported = 1; + } + + err = WnbdCreate(&wnbd_props, &RbdWnbdInterface, this, &wnbd_disk); + if (err) + goto exit; + + started = true; + + err = WnbdStartDispatcher(wnbd_disk, io_req_workers); + if (err) { + derr << "Could not start WNBD dispatcher. Error: " << err << dendl; + } + +exit: + return err; +} + +std::ostream &operator<<(std::ostream &os, const WnbdHandler::IOContext &ctx) { + + os << "[" << std::hex << ctx.req_handle; + + switch (ctx.req_type) + { + case WnbdReqTypeRead: + os << " READ "; + break; + case WnbdReqTypeWrite: + os << " WRITE "; + break; + case WnbdReqTypeFlush: + os << " FLUSH "; + break; + case WnbdReqTypeUnmap: + os << " TRIM "; + break; + default: + os << " UNKNOWN(" << ctx.req_type << ") "; + break; + } + + os << ctx.req_from << "~" << ctx.req_size << " " + << std::dec << ntohl(ctx.err_code) << "]"; + + return os; +} diff --git a/src/tools/rbd_wnbd/wnbd_handler.h b/src/tools/rbd_wnbd/wnbd_handler.h new file mode 100644 index 000000000..c1ab5676b --- /dev/null +++ b/src/tools/rbd_wnbd/wnbd_handler.h @@ -0,0 +1,188 @@ +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2020 SUSE LINUX GmbH + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#ifndef WNBD_HANDLER_H +#define WNBD_HANDLER_H + +#include <wnbd.h> + +#include "common/admin_socket.h" +#include "common/ceph_context.h" +#include "common/Thread.h" + +#include "include/rbd/librbd.hpp" +#include "include/xlist.h" + +#include "global/global_context.h" + +// TODO: make this configurable. +#define RBD_WNBD_MAX_TRANSFER 2 * 1024 * 1024 +#define SOFT_REMOVE_RETRY_INTERVAL 2 +#define DEFAULT_SOFT_REMOVE_TIMEOUT 15 +#define DEFAULT_IO_WORKER_COUNT 4 + +// Not defined by mingw. +#ifndef SCSI_ADSENSE_UNRECOVERED_ERROR +#define SCSI_ADSENSE_UNRECOVERED_ERROR 0x11 +#endif + +// The following will be assigned to the "Owner" field of the WNBD +// parameters, which can be used to determine the application managing +// a disk. We'll ignore other disks. +#define RBD_WNBD_OWNER_NAME "ceph-rbd-wnbd" + +class WnbdHandler; + +class WnbdAdminHook : public AdminSocketHook { + WnbdHandler *m_handler; + +public: + explicit WnbdAdminHook(WnbdHandler *handler) : + m_handler(handler) { + g_ceph_context->get_admin_socket()->register_command( + "wnbd stats", this, "get WNBD stats"); + } + ~WnbdAdminHook() override { + g_ceph_context->get_admin_socket()->unregister_commands(this); + } + + int call(std::string_view command, const cmdmap_t& cmdmap, + const bufferlist&, + Formatter *f, std::ostream& errss, bufferlist& out) override; +}; + + +class WnbdHandler +{ +private: + librbd::Image ℑ + std::string instance_name; + uint64_t block_count; + uint32_t block_size; + bool readonly; + bool rbd_cache_enabled; + uint32_t io_req_workers; + uint32_t io_reply_workers; + WnbdAdminHook* admin_hook; + boost::asio::thread_pool* reply_tpool; + +public: + WnbdHandler(librbd::Image& _image, std::string _instance_name, + uint64_t _block_count, uint32_t _block_size, + bool _readonly, bool _rbd_cache_enabled, + uint32_t _io_req_workers, + uint32_t _io_reply_workers) + : image(_image) + , instance_name(_instance_name) + , block_count(_block_count) + , block_size(_block_size) + , readonly(_readonly) + , rbd_cache_enabled(_rbd_cache_enabled) + , io_req_workers(_io_req_workers) + , io_reply_workers(_io_reply_workers) + { + admin_hook = new WnbdAdminHook(this); + // Instead of relying on librbd's own thread pool, we're going to use a + // separate one. This allows us to make assumptions on the threads that + // are going to send the IO replies and thus be able to cache Windows + // OVERLAPPED structures. + reply_tpool = new boost::asio::thread_pool(_io_reply_workers); + } + + int resize(uint64_t new_size); + int start(); + // Wait for the handler to stop, which normally happens when the driver + // passes the "Disconnect" request. + int wait(); + void shutdown(); + + int dump_stats(Formatter *f); + + ~WnbdHandler(); + + static VOID LogMessage( + WnbdLogLevel LogLevel, + const char* Message, + const char* FileName, + UINT32 Line, + const char* FunctionName); + +private: + ceph::mutex shutdown_lock = ceph::make_mutex("WnbdHandler::DisconnectLocker"); + bool started = false; + bool terminated = false; + WNBD_DISK* wnbd_disk = nullptr; + + struct IOContext + { + xlist<IOContext*>::item item; + WnbdHandler *handler = nullptr; + WNBD_STATUS wnbd_status = {0}; + WnbdRequestType req_type = WnbdReqTypeUnknown; + uint64_t req_handle = 0; + uint32_t err_code = 0; + size_t req_size; + uint64_t req_from; + bufferlist data; + + IOContext() + : item(this) + {} + + void set_sense(uint8_t sense_key, uint8_t asc, uint64_t info); + void set_sense(uint8_t sense_key, uint8_t asc); + }; + + friend std::ostream &operator<<(std::ostream &os, const IOContext &ctx); + + void send_io_response(IOContext *ctx); + + static void aio_callback(librbd::completion_t cb, void *arg); + + // WNBD IO entry points + static void Read( + PWNBD_DISK Disk, + UINT64 RequestHandle, + PVOID Buffer, + UINT64 BlockAddress, + UINT32 BlockCount, + BOOLEAN ForceUnitAccess); + static void Write( + PWNBD_DISK Disk, + UINT64 RequestHandle, + PVOID Buffer, + UINT64 BlockAddress, + UINT32 BlockCount, + BOOLEAN ForceUnitAccess); + static void Flush( + PWNBD_DISK Disk, + UINT64 RequestHandle, + UINT64 BlockAddress, + UINT32 BlockCount); + static void Unmap( + PWNBD_DISK Disk, + UINT64 RequestHandle, + PWNBD_UNMAP_DESCRIPTOR Descriptors, + UINT32 Count); + + static constexpr WNBD_INTERFACE RbdWnbdInterface = + { + Read, + Write, + Flush, + Unmap, + }; +}; + +std::ostream &operator<<(std::ostream &os, const WnbdHandler::IOContext &ctx); + +#endif // WNBD_HANDLER_H diff --git a/src/tools/rbd_wnbd/wnbd_wmi.cc b/src/tools/rbd_wnbd/wnbd_wmi.cc new file mode 100644 index 000000000..f49fa4cc6 --- /dev/null +++ b/src/tools/rbd_wnbd/wnbd_wmi.cc @@ -0,0 +1,261 @@ +/* + * Ceph - scalable distributed file system + * + * Copyright (c) 2019 SUSE LLC + * Copyright (C) 2022 Cloudbase Solutions + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include "wnbd_wmi.h" + +#include "common/debug.h" +#include "common/win32/wstring.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "rbd-wnbd: " + +// Initializes the COM library for use by the calling thread using +// COINIT_MULTITHREADED. +static HRESULT co_initialize_basic() +{ + dout(10) << "initializing COM library" << dendl; + + HRESULT hres = CoInitializeEx(0, COINIT_MULTITHREADED); + if (FAILED(hres)) { + derr << "CoInitializeEx failed. HRESULT: " << hres << dendl; + return hres; + } + + // CoInitializeSecurity must be called once per process. + static bool com_security_flags_set = false; + + if (!com_security_flags_set) { + hres = CoInitializeSecurity( + NULL, -1, NULL, NULL, + RPC_C_AUTHN_LEVEL_DEFAULT, + RPC_C_IMP_LEVEL_IMPERSONATE, + NULL, + EOAC_NONE, + NULL); + if (FAILED(hres)) { + derr << "CoInitializeSecurity failed. HRESULT: " << hres << dendl; + CoUninitialize(); + return hres; + } + + com_security_flags_set = true; + } + + return 0; +} + +// co_uninitialize must be called once for every successful +// co_initialize_basic call. Any WMI objects (including connections, +// event subscriptions, etc) must be released beforehand. +static void co_uninitialize() +{ + dout(10) << "closing COM library" << dendl; + CoUninitialize(); +} + +HRESULT COMBootstrapper::initialize() +{ + std::unique_lock l{init_lock}; + + HRESULT hres = co_initialize_basic(); + if (!FAILED(hres)) { + initialized = true; + } + return hres; +} + +void COMBootstrapper::cleanup() +{ + if (initialized) { + co_uninitialize(); + initialized = false; + } +} + +void WmiConnection::close() +{ + dout(20) << "closing wmi conn: " << this + << ", svc: " << wbem_svc + << ", loc: " << wbem_loc << dendl; + if (wbem_svc != NULL) { + wbem_svc->Release(); + wbem_svc = NULL; + } + if (wbem_loc != NULL) { + wbem_loc->Release(); + wbem_loc = NULL; + } +} + +HRESULT WmiConnection::initialize() +{ + HRESULT hres = CoCreateInstance( + CLSID_WbemLocator, 0, CLSCTX_INPROC_SERVER, + IID_IWbemLocator, (LPVOID*)&wbem_loc); + if (FAILED(hres)) { + derr << "CoCreateInstance failed. HRESULT: " << hres << dendl; + return hres; + } + + hres = wbem_loc->ConnectServer( + _bstr_t(ns.c_str()).GetBSTR(), NULL, NULL, NULL, + WBEM_FLAG_CONNECT_USE_MAX_WAIT, NULL, NULL, + &wbem_svc); + if (FAILED(hres)) { + derr << "Could not connect to WMI service. HRESULT: " << hres << dendl; + return hres; + } + + if (!wbem_svc) { + hres = MAKE_HRESULT(SEVERITY_ERROR, FACILITY_WIN32, + ERROR_INVALID_HANDLE); + derr << "WMI connection failed, no WMI service object received." << dendl; + return hres; + } + + hres = CoSetProxyBlanket( + wbem_svc, RPC_C_AUTHN_WINNT, RPC_C_AUTHZ_NONE, NULL, + RPC_C_AUTHN_LEVEL_CALL, RPC_C_IMP_LEVEL_IMPERSONATE, NULL, EOAC_NONE); + if (FAILED(hres)) { + derr << "CoSetProxyBlanket failed. HRESULT:" << hres << dendl; + } + + return hres; +} + +HRESULT get_property_str( + IWbemClassObject* cls_obj, + const std::wstring& property, + std::wstring& value) +{ + VARIANT vt_prop; + VariantInit(&vt_prop); + HRESULT hres = cls_obj->Get(property.c_str(), 0, &vt_prop, 0, 0); + if (!FAILED(hres)) { + VARIANT vt_bstr_prop; + VariantInit(&vt_bstr_prop); + hres = VariantChangeType(&vt_bstr_prop, &vt_prop, 0, VT_BSTR); + if (!FAILED(hres)) { + value = vt_bstr_prop.bstrVal; + } + VariantClear(&vt_bstr_prop); + } + VariantClear(&vt_prop); + + if (FAILED(hres)) { + derr << "Could not get WMI property: " << to_string(property) + << ". HRESULT: " << hres << dendl; + } + return hres; +} + +HRESULT get_property_int( + IWbemClassObject* cls_obj, + const std::wstring& property, + uint32_t& value) +{ + VARIANT vt_prop; + VariantInit(&vt_prop); + HRESULT hres = cls_obj->Get(property.c_str(), 0, &vt_prop, 0, 0); + if (!FAILED(hres)) { + VARIANT vt_uint_prop; + VariantInit(&vt_uint_prop); + hres = VariantChangeType(&vt_uint_prop, &vt_prop, 0, VT_UINT); + if (!FAILED(hres)) { + value = vt_uint_prop.intVal; + } + VariantClear(&vt_uint_prop); + } + VariantClear(&vt_prop); + + if (FAILED(hres)) { + derr << "Could not get WMI property: " << to_string(property) + << ". HRESULT: " << hres << dendl; + } + return hres; +} + +HRESULT WmiSubscription::initialize() +{ + HRESULT hres = conn.initialize(); + if (FAILED(hres)) { + derr << "Could not create WMI connection" << dendl; + return hres; + } + + hres = conn.wbem_svc->ExecNotificationQuery( + _bstr_t(L"WQL").GetBSTR(), + _bstr_t(query.c_str()).GetBSTR(), + WBEM_FLAG_FORWARD_ONLY | WBEM_FLAG_RETURN_IMMEDIATELY, + NULL, + &event_enum); + + if (FAILED(hres)) { + derr << "Notification query failed, unable to subscribe to " + << "WMI events. HRESULT: " << hres << dendl; + } else { + dout(20) << "wmi subscription initialized: " << this + << ", event enum: " << event_enum + << ", conn: " << &conn << ", conn svc: " << conn.wbem_svc << dendl; + } + + return hres; +} + +void WmiSubscription::close() +{ + dout(20) << "closing wmi subscription: " << this + << ", event enum: " << event_enum << dendl; + if (event_enum != NULL) { + event_enum->Release(); + event_enum = NULL; + } +} + +HRESULT WmiSubscription::next( + long timeout, + ULONG count, + IWbemClassObject **objects, + ULONG *returned) +{ + if (!event_enum) { + HRESULT hres = MAKE_HRESULT( + SEVERITY_ERROR, FACILITY_WIN32, + ERROR_INVALID_HANDLE); + derr << "WMI subscription uninitialized." << dendl; + return hres; + } + + HRESULT hres = event_enum->Next(timeout, count, objects, returned); + if (FAILED(hres)) { + derr << "Unable to retrieve WMI events. HRESULT: " + << hres << dendl; + } + return hres; +} + +WmiSubscription subscribe_wnbd_adapter_events( + uint32_t interval) +{ + std::wostringstream query_stream; + query_stream + << L"SELECT * FROM __InstanceOperationEvent " + << L"WITHIN " << interval + << L"WHERE TargetInstance ISA 'Win32_ScsiController' " + << L"AND TargetInstance.Description=" + << L"'WNBD SCSI Virtual Adapter'"; + + return WmiSubscription(L"root\\cimv2", query_stream.str()); +} diff --git a/src/tools/rbd_wnbd/wnbd_wmi.h b/src/tools/rbd_wnbd/wnbd_wmi.h new file mode 100644 index 000000000..4d802d986 --- /dev/null +++ b/src/tools/rbd_wnbd/wnbd_wmi.h @@ -0,0 +1,109 @@ +/* + * Ceph - scalable distributed file system + * + * Copyright (c) 2019 SUSE LLC + * Copyright (C) 2022 Cloudbase Solutions + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#pragma once +#include <comutil.h> + +#define _WIN32_DCOM +#include <wbemcli.h> + +#include <string> +#include <vector> + +#include "common/ceph_mutex.h" + +// Convenience helper for initializing and cleaning up the +// Windows COM library using "COINIT_MULTITHREADED" concurrency mode. +// Any WMI objects (including connections, event subscriptions, etc) +// must be released before the COM library gets closed. +class COMBootstrapper +{ +private: + bool initialized = false; + + ceph::mutex init_lock = ceph::make_mutex("COMBootstrapper::InitLocker"); + +public: + HRESULT initialize(); + void cleanup(); + + ~COMBootstrapper() + { + cleanup(); + } +}; + +class WmiConnection +{ +private: + std::wstring ns; +public: + IWbemLocator* wbem_loc; + IWbemServices* wbem_svc; + + WmiConnection(std::wstring ns) + : ns(ns) + , wbem_loc(nullptr) + , wbem_svc(nullptr) + { + } + ~WmiConnection() + { + close(); + } + + HRESULT initialize(); + void close(); +}; + +HRESULT get_property_str( + IWbemClassObject* cls_obj, + const std::wstring& property, + std::wstring& value); +HRESULT get_property_int( + IWbemClassObject* cls_obj, + const std::wstring& property, + uint32_t& value); + +class WmiSubscription +{ +private: + std::wstring query; + + WmiConnection conn; + IEnumWbemClassObject *event_enum; + +public: + WmiSubscription(std::wstring ns, std::wstring query) + : query(query) + , conn(WmiConnection(ns)) + , event_enum(nullptr) + { + } + ~WmiSubscription() + { + close(); + } + + HRESULT initialize(); + void close(); + + // IEnumWbemClassObject::Next wrapper + HRESULT next( + long timeout, + ULONG count, + IWbemClassObject **objects, + ULONG *returned); +}; + +WmiSubscription subscribe_wnbd_adapter_events(uint32_t interval); |